Compare commits

...

121 Commits

Author SHA1 Message Date
SachinVin
6dfd42dfc8 backend\A64\exception_handler_posix.cpp: remove unused header 2020-05-30 23:05:06 +05:30
SachinVin
adeb4940dd backend\A64\exception_handler_posix.cpp: Fix typo in FindCodeBlockInfo 2020-05-29 20:19:14 +05:30
SachinVin
26ba798719 tests/A32: remove unused function 2020-05-23 19:55:11 +05:30
SachinVin
94e90aba68 backend/A64:port single stepping fix 2020-05-23 19:55:11 +05:30
SachinVin
47e508dd12 travis : a64: remove docker; dont fuzz against unicorn 2020-05-23 19:55:10 +05:30
SachinVin
d0c69355fb backend/A64: Use ASSERT_FALSE where possible 2020-05-23 19:55:10 +05:30
SachinVin
1b9d22bfee backend\A64\block_of_code.cpp: Remove stray semicolon 2020-05-23 19:55:10 +05:30
SachinVin
aef2d8d317 backend\A64\reg_alloc.cpp: Fix assert 2020-05-23 19:55:09 +05:30
SachinVin
877de72c34 CmakeLists: DYNARMIC_FRONTENDS optin for A64 backend 2020-05-23 19:55:09 +05:30
SachinVin
b0230f7def frontend/A32: remove decoder hack vfp instructions 2020-05-23 19:55:09 +05:30
SachinVin
b25b721a6a a64_emiter: CountLeadingZeros intrinsic shortcuts 2020-05-23 19:55:09 +05:30
BreadFish64
73ee4b9480 emit_a64: get rid of useless NOP generation
We don't actually patch anything in those locations beside a jump.
2020-05-23 19:55:08 +05:30
SachinVin
470be4f7dc emit_a64: Do not clear fast_dispatch_table unnecessarily
port 4305c74 - emit_x64: Do not clear fast_dispatch_table unnecessarily
2020-05-23 19:55:08 +05:30
SachinVin
231feee518 backend/A64/block_of_code.cpp: Clean up C style casts 2020-05-23 19:55:08 +05:30
SachinVin
27e21530b3 backend/A64/a32_emit_a64.cpp: EmitA32{Get,Set}Fpscr, set the guest_fpcr to host fpcr 2020-05-23 19:55:07 +05:30
SachinVin
66e7693204 backend/A64: Add Step 2020-05-23 19:55:07 +05:30
SachinVin
a3072d68cb backend/A64/block_of_code: Always specify codeptr to run from 2020-05-23 19:55:06 +05:30
BreadFish64
018b07f186 backend/A64: fix mp 2020-05-23 19:55:06 +05:30
SachinVin
8571f06596 backend/A64: Move SP to FP in GenMemoryAccessors + Minor cleanup and 2020-05-23 19:55:06 +05:30
SachinVin
9c74e334b1 backend/A64: Use X26 for storing remaining cycles. 2020-05-23 19:55:05 +05:30
BreadFish64
b6733a089a backend/A64: add fastmem support
fix crash on game close

fix generic exception handler

reorder hostloc gpr list

use temp register instead of X0 for writes

go back to regular std::partition
2020-05-23 19:55:05 +05:30
BreadFish64
45a758a6f2 merge fastmem 2020-05-23 19:55:05 +05:30
SachinVin
f7fd0cff8f backend\A64\constant_pool.cpp: Correct offset calculation 2020-05-23 19:55:04 +05:30
SachinVin
c99ad2a4f3 backend/A64/a32_jitstate: Upstream changes from x64 backend 2020-05-23 19:55:04 +05:30
SachinVin
968e8cddd3 backend/A64: Add test for q flag being incorrectly set 2020-05-23 19:55:04 +05:30
SachinVin
c7f7a99428 backend/A64/a32_emit_a64.cpp: Use unused HostCall registers 2020-05-23 19:55:03 +05:30
SachinVin
79c7b026ed backend/A64/a32_emit_a64.cpp: Use MOVP2R instead of MOVI2R. 2020-05-23 19:55:03 +05:30
SachinVin
7db182a5c8 backend/A64/abi: Fix FP caller and callee save registers 2020-05-23 19:55:03 +05:30
SachinVin
a7ef959570 a64/block_of_code: use GetWritableCodePtr() instead of const_cast<...>(GetCodePtr()) 2020-05-23 19:55:03 +05:30
SachinVin
534ad728a8 backend/A64/constant_pool: Clean up unused stuff 2020-05-23 19:55:02 +05:30
SachinVin
c8ec8f8945 emit_a64_data_processing.cpp: remove pointless DoNZCV. 2020-05-23 19:55:02 +05:30
SachinVin
6f643b2352 IR + backend/*: add SetCpsrNZCVRaw and change arg1 type of SetCpsrNZCV to IR::NZCV 2020-05-23 19:55:02 +05:30
SachinVin
43d37293b1 backend/A64: Fix ASR impl 2020-05-23 19:55:01 +05:30
SachinVin
e12d635bde a64_emitter: Use Correct alias for ZR and WZR in CMP 2020-05-23 19:55:01 +05:30
SachinVin
8c66a1609e backend/A64: Use CSLE instead of branches for LSL LSR and ASR + minor cleanup 2020-05-23 19:55:01 +05:30
SachinVin
878db6d65d backend/A64: Use correct register size for EmitNot64 2020-05-23 19:55:01 +05:30
SachinVin
f8594f3bb9 tests/A32: Check if Q flag is cleared properly 2020-05-23 19:55:00 +05:30
SachinVin
296bbdd0b0 backend/A64: SignedSaturatedSub and SignedSaturatedAdd 2020-05-23 19:55:00 +05:30
SachinVin
a6c2d1952a backend/A64/emit_a64_saturation.cpp: Implement EmitSignedSaturation and EmitUnsignedSaturation
Implements SSAT SSAT16 USAT USAT16 QASX QSAX UQASX UQSAX
2020-05-23 19:55:00 +05:30
SachinVin
011d62d958 backend/A64: add emit_a64_saturation.cpp 2020-05-23 19:54:59 +05:30
SachinVin
ad59325b45 backend/A64: Fix EmitA32SetCpsr 2020-05-23 19:54:59 +05:30
SachinVin
61ea47ad7b backend/A64/devirtualize: remove unused DevirtualizeItanium 2020-05-23 19:54:59 +05:30
SachinVin
bb39f419e2 backend/A64: refactor to fpscr from mxcsr 2020-05-23 19:54:58 +05:30
SachinVin
47c0632e16 backend/A64: Use ScratchGpr() instead of ABI_SCRATCH1 where possible 2020-05-23 19:54:58 +05:30
SachinVin
60303dbfa8 backend/A64: support for always_little_endian 2020-05-23 19:54:58 +05:30
SachinVin
19cd6f0309 backend/a64: Add hook_hint_instructions option
534eb0f
2020-05-23 19:54:57 +05:30
SachinVin
3d4caa5ee1 backend /A64: cleanup 2020-05-23 19:54:57 +05:30
SachinVin
d027786e4e gitignore: add .vs dir 2020-05-23 19:54:57 +05:30
SachinVin
0c7e261aac Minor style fix 2020-05-23 19:54:57 +05:30
SachinVin
6b167a68e4 backend\A64\emit_a64_packed.cpp: Implement AddSub halving and non halving 2020-05-23 19:54:56 +05:30
SachinVin
a87b13cabf backend\A64: Instructions that got implemented on the way 2020-05-23 19:54:56 +05:30
SachinVin
17e64406aa backend\A64\emit_a64_packed.cpp: Implement Unsigned Sum of Absolute Differences 2020-05-23 19:54:55 +05:30
SachinVin
871617ac3b a64 emitter: Absolute Difference and add across vector instructions 2020-05-23 19:54:55 +05:30
SachinVin
f9ba12a9e6 backend\A64\emit_a64_packed.cpp: Implement Packed Select 2020-05-23 19:54:54 +05:30
SachinVin
607a3c7110 Backend/a64: Fix asset when falling back to interpreter 2020-05-23 19:54:54 +05:30
SachinVin
a5564f588d backend\A64\emit_a64_packed.cpp: Implement Packed Halving Add/Sub instructions 2020-05-23 19:54:53 +05:30
SachinVin
fd01d6fe0a backend\A64\emit_a64_packed.cpp: Implement Packed Saturating instructions 2020-05-23 19:54:53 +05:30
SachinVin
b4fb2569ad backend\A64\emit_a64_packed.cpp: Implement SignedPacked*- ADD and SUB 2020-05-23 19:54:52 +05:30
SachinVin
8f98852249 a64 emitter: Vector Halving and Saturation instructions 2020-05-23 19:54:52 +05:30
SachinVin
9059505a2f backend\A64\emit_a64_packed.cpp: Implement UnsignedPacked*- ADD and SUB...
with few other in the emitter
2020-05-23 19:54:51 +05:30
SachinVin
5ad5784ef8 a64 emitter: fix Scalar Saturating Instructions 2020-05-23 19:54:51 +05:30
SachinVin
f0eee83098 A64 Emitter: Implement Saturating Add and Sub 2020-05-23 19:54:50 +05:30
SachinVin
ebd185968d backend\A64\emit_a64_data_processing.cpp: Implement Division 2020-05-23 19:54:50 +05:30
SachinVin
def0137021 backend\A64\emit_a64_data_processing.cpp: Implement 64bit CLZ 2020-05-23 19:54:50 +05:30
SachinVin
9f227edfe4 backend\A64\emit_a64_data_processing.cpp: Implement 64bit LSL and ROR Instructions
Also EmitTestBit
2020-05-23 19:54:49 +05:30
SachinVin
bb70cdd28c backend\A64\emit_a64_data_processing.cpp: Implement 64bit Logical Instructions 2020-05-23 19:54:49 +05:30
SachinVin
f851695f51 backend/a64: implememnt CheckBit 2020-05-23 19:54:49 +05:30
SachinVin
6d25995375 backend/a64: Redesign Const Pool 2020-05-23 19:54:48 +05:30
SachinVin
410c2010e9 backend\A64\emit_a64_floating_point.cpp: Fix include paths 2020-05-23 19:54:48 +05:30
SachinVin
8e3ad2feb5 backend\A64\a32_emit_a64.cpp: Fix Coproc* after rebase 2020-05-23 19:54:48 +05:30
SachinVin
fe49607add backend/a64/opcodes.inc: Coproc instructions 2020-05-23 19:54:47 +05:30
SachinVin
324e3c1fd1 a64 emitter: Fix LDR literal 2020-05-23 19:54:47 +05:30
SachinVin
3f220d94c6 a64 emitter: Move IsInRange* and MaskImm* into anon namespace 2020-05-23 19:54:47 +05:30
SachinVin
410dcf87a5 backend\A64\emit_a64_floating_point.cpp: Implement VADD VSUB VMUL and other stuff 2020-05-23 19:54:46 +05:30
SachinVin
4459188bfc backend\A64\emit_a64_floating_point.cpp: Implement VABS VNEG VCMP and a few others 2020-05-23 19:54:46 +05:30
SachinVin
23dc3cee01 frontend/A32/Decoder : (backend/a64)VMOV 2020-05-23 19:54:45 +05:30
SachinVin
72c8e5e536 backend\A64\emit_a64_floating_point.cpp: Implement VCVT instructions 2020-05-23 19:54:45 +05:30
SachinVin
50301cffbd backend\A64\emit_a64_floating_point.cpp: part 1 2020-05-23 19:54:44 +05:30
SachinVin
62f7b030e1 backend/a64/reg_alloc: Fix EmitMove for FPRs 2020-05-23 19:54:44 +05:30
SachinVin
b92195f2ae A64 emitter: Support for 64bit FMOV 2020-05-23 19:54:44 +05:30
SachinVin
1bd416aefb a64 backend: Load "guest_FPSR" 2020-05-23 19:54:43 +05:30
SachinVin
7661987e04 A64 backend: Add Get/SetExtendedRegister and Get/SetGEFlags 2020-05-23 19:54:43 +05:30
SachinVin
1a59aaec11 tests: Dont compile A64 tests for non x64 backend 2020-05-23 19:54:43 +05:30
SachinVin
952eb5c83f travis a64: unicorn 2020-05-23 19:54:43 +05:30
SachinVin
1c9ac3284e travis a64 backend 2020-05-23 19:54:42 +05:30
SachinVin
4da93c3130 Frontend/A32: a64 backend; Interpret SEL 2020-05-23 19:54:42 +05:30
SachinVin
8106f2a81b frontend/A32: A64 Backend implemented instructions 2020-05-23 19:54:42 +05:30
SachinVin
db07bfa933 backend\A64\emit_a64_data_processing.cpp: Implement REV and CLZ ops 2020-05-23 19:54:41 +05:30
SachinVin
6835cf34a1 backend\A64\emit_a64_data_processing.cpp: Implement Sext an Zext ops 2020-05-23 19:54:41 +05:30
SachinVin
e3054389a6 backend\A64\emit_a64_data_processing.cpp: Implement Logical ops 2020-05-23 19:54:40 +05:30
SachinVin
d37ec336a4 backend\A64\emit_a64_data_processing.cpp: Implement Arithmetic ops 2020-05-23 19:54:40 +05:30
SachinVin
e086d0df7f backend\A64\emit_a64_data_processing.cpp: Implement Shift and Rotate ops 2020-05-23 19:54:40 +05:30
SachinVin
8781a0f184 backend\A64\emit_a64_data_processing.cpp:Implement ops 2020-05-23 19:54:39 +05:30
SachinVin
a66bcdfc91 backend\A64\emit_a64_data_processing.cpp: Mostly empty file 2020-05-23 19:54:39 +05:30
SachinVin
9df55fc951 backend/a64: Add a32_interface 2020-05-23 19:54:38 +05:30
SachinVin
cb56c74d19 backend/a64: Port a32_emit_a64 2020-05-23 19:54:38 +05:30
SachinVin
4b48391fd3 backend/a64: Port block_of_code and emit_a64 2020-05-23 19:54:38 +05:30
SachinVin
0708019057 backend/a64: Port callback functions 2020-05-23 19:54:37 +05:30
SachinVin
f3bb2e5f92 backend/a64: Port exception handler 2020-05-23 19:54:37 +05:30
SachinVin
0d6b748b2a backend/a64: Port const pool 2020-05-23 19:54:37 +05:30
SachinVin
5c9179e2db backend/a64: Port reg_alloc 2020-05-23 19:54:36 +05:30
SachinVin
a37f9c4cc6 backend/a64: Port ABI functions 2020-05-23 19:54:36 +05:30
SachinVin
ab07872025 backend/a64: Port perfmap 2020-05-23 19:54:36 +05:30
SachinVin
be80e558c9 backend/a64: Port hostloc 2020-05-23 19:54:36 +05:30
SachinVin
9ca0155c19 backend/a64: Devirtualize functions for a64 2020-05-23 19:54:35 +05:30
SachinVin
fbb03a2a1b backend/a64: Port block_range_info 2020-05-23 19:54:35 +05:30
SachinVin
19b7fba235 CMakeModules\DetectArchitecture.cmake: Refactor ARCHITECTURE to DYNARMIC_ARCHITECTURE
Don't rely on super-project's definition of ARCHITECTURE
2020-05-23 19:54:35 +05:30
SachinVin
9bcbdacd2b [HACK] A32/exception_generating: Interpret undefined instructions 2020-05-23 19:54:35 +05:30
SachinVin
c72550f7d9 [HACK] CMakeLists: Do not build A64 tests on AArch64 2020-05-23 19:54:34 +05:30
MerryMage
8fdeb84822 fuzz_thumb: Add [JitA64] tag to supported instructions 2020-05-23 19:54:34 +05:30
SachinVin
4e4f2b8ef0 backend/A64: Port a32_jitstate 2020-05-23 19:54:34 +05:30
MerryMage
8de86b391f code_block: Support Windows and fix munmap check 2020-05-23 19:54:33 +05:30
SachinVin
0a55e1b11e ir_opt: Port a32_merge_interpreter_blocks 2020-05-23 19:54:33 +05:30
SachinVin
f654dbb29b assert: Use __android_log_print on Android 2020-05-23 19:54:33 +05:30
SachinVin
668d20391a CMakeLists: xbyak should only be linked on x64 2020-05-23 19:54:32 +05:30
SachinVin
0ce4fa4480 a64_emitter: Fix ABI push and pop 2020-05-23 19:54:32 +05:30
SachinVin
ddc8b7f932 a64_emitter: More style cleanup 2020-05-23 19:54:32 +05:30
SachinVin
6010c48bd0 a64_emitter: Style cleanup 2020-05-23 19:54:31 +05:30
BreadFish64
b8369d77ac Backend/A64: add jitstate_info.h 2020-05-23 19:54:31 +05:30
BreadFish64
7905eeb94b Backend/A64: Add Dolphin's ARM emitter 2020-05-23 19:54:31 +05:30
BreadFish64
f7664d9161 Add aarch64 CI 2020-05-23 19:54:31 +05:30
60 changed files with 13893 additions and 19 deletions

1
.gitignore vendored
View File

@ -1,5 +1,6 @@
# Built files
build/
docs/Doxygen/
.vs/
# Generated files
src/backend/x64/mig/

View File

@ -30,6 +30,20 @@ matrix:
- ninja-build
install: ./.travis/build-x86_64-linux/deps.sh
script: ./.travis/build-x86_64-linux/build.sh
- env: NAME="Linux aarch64 Build"
os: linux
dist: bionic
addons:
apt:
sources:
- ubuntu-toolchain-r-test
packages:
- gcc-8-aarch64-linux-gnu
- g++-8-aarch64-linux-gnu
- ninja-build
- qemu-user
install: ./.travis/build-aarch64-linux/deps.sh
script: ./.travis/build-aarch64-linux/build.sh
- env: NAME="macOS Build"
os: osx
sudo: false

View File

@ -0,0 +1,13 @@
#!/bin/sh
set -e
set -x
export CC=aarch64-linux-gnu-gcc-8
export CXX=aarch64-linux-gnu-g++-8
mkdir build && cd build
cmake .. -DBoost_INCLUDE_DIRS=${PWD}/../externals/ext-boost -DCMAKE_BUILD_TYPE=Release -G Ninja
ninja
qemu-aarch64 -L /usr/aarch64-linux-gnu ./tests/dynarmic_tests -d yes

View File

@ -0,0 +1,9 @@
#!/bin/sh
set -e
set -x
# TODO: This isn't ideal.
cd externals
git clone https://github.com/MerryMage/ext-boost
cd ..

View File

@ -105,10 +105,10 @@ else()
detect_architecture("__x86_64__" x86_64)
detect_architecture("__aarch64__" Aarch64)
endif()
if (NOT DEFINED ARCHITECTURE)
if (NOT DEFINED DYNARMIC_ARCHITECTURE)
message(FATAL_ERROR "Unsupported architecture encountered. Ending CMake generation.")
endif()
message(STATUS "Target architecture: ${ARCHITECTURE}")
message(STATUS "Target architecture: ${DYNARMIC_ARCHITECTURE}")
# Include Boost
if (NOT TARGET boost)

View File

@ -1,6 +1,6 @@
include(CheckSymbolExists)
function(detect_architecture symbol arch)
if (NOT DEFINED ARCHITECTURE)
if (NOT DEFINED DYNARMIC_ARCHITECTURE)
set(CMAKE_REQUIRED_QUIET 1)
check_symbol_exists("${symbol}" "" ARCHITECTURE_${arch})
unset(CMAKE_REQUIRED_QUIET)
@ -8,7 +8,7 @@ function(detect_architecture symbol arch)
# The output variable needs to be unique across invocations otherwise
# CMake's crazy scope rules will keep it defined
if (ARCHITECTURE_${arch})
set(ARCHITECTURE "${arch}" PARENT_SCOPE)
set(DYNARMIC_ARCHITECTURE "${arch}" PARENT_SCOPE)
set(ARCHITECTURE_${arch} 1 PARENT_SCOPE)
add_definitions(-DARCHITECTURE_${arch}=1)
endif()

View File

@ -154,6 +154,7 @@ if ("A32" IN_LIST DYNARMIC_FRONTENDS)
frontend/A32/translate/translate_thumb.cpp
ir_opt/a32_constant_memory_reads_pass.cpp
ir_opt/a32_get_set_elimination_pass.cpp
ir_opt/a32_merge_interpret_blocks.cpp
)
endif()
@ -322,6 +323,68 @@ if (ARCHITECTURE_x86_64)
else()
target_sources(dynarmic PRIVATE backend/x64/exception_handler_generic.cpp)
endif()
target_link_libraries(dynarmic PRIVATE xbyak)
elseif(ARCHITECTURE_Aarch64)
target_sources(dynarmic PRIVATE
backend/A64/emitter/a64_emitter.cpp
backend/A64/emitter/a64_emitter.h
backend/A64/emitter/arm_common.h
backend/A64/emitter/code_block.h
# backend/A64/a64_emit_a64.cpp
# backend/A64/a64_emit_a64.h
# backend/A64/a64_exclusive_monitor.cpp
# backend/A64/a64_interface.cpp
# backend/A64/a64_jitstate.cpp
# backend/A64/a64_jitstate.h
backend/A64/abi.cpp
backend/A64/abi.h
backend/A64/block_of_code.cpp
backend/A64/block_of_code.h
backend/A64/block_range_information.cpp
backend/A64/block_range_information.h
backend/A64/callback.cpp
backend/A64/callback.h
backend/A64/constant_pool.cpp
backend/A64/constant_pool.h
backend/A64/devirtualize.h
backend/A64/emit_a64.cpp
backend/A64/emit_a64.h
# backend/A64/emit_a64_aes.cpp
# backend/A64/emit_a64_crc32.cpp
backend/A64/emit_a64_data_processing.cpp
backend/A64/emit_a64_floating_point.cpp
backend/A64/emit_a64_packed.cpp
backend/A64/emit_a64_saturation.cpp
# backend/A64/emit_a64_sm4.cpp
# backend/A64/emit_a64_vector.cpp
# backend/A64/emit_a64_vector_floating_point.cpp
backend/A64/exception_handler.h
backend/A64/hostloc.cpp
backend/A64/hostloc.h
backend/A64/jitstate_info.h
backend/A64/opcodes.inc
backend/A64/perf_map.cpp
backend/A64/perf_map.h
backend/A64/reg_alloc.cpp
backend/A64/reg_alloc.h
)
if ("A32" IN_LIST DYNARMIC_FRONTENDS)
target_sources(dynarmic PRIVATE
backend/A64/a32_emit_a64.cpp
backend/A64/a32_emit_a64.h
backend/A64/a32_interface.cpp
backend/A64/a32_jitstate.cpp
backend/A64/a32_jitstate.h
)
endif()
if (UNIX)
target_sources(dynarmic PRIVATE backend/A64/exception_handler_posix.cpp)
else()
target_sources(dynarmic PRIVATE backend/A64/exception_handler_generic.cpp)
endif()
else()
message(FATAL_ERROR "Unsupported architecture")
endif()
@ -338,9 +401,12 @@ target_link_libraries(dynarmic
boost
fmt::fmt
mp
xbyak
$<$<BOOL:DYNARMIC_USE_LLVM>:${llvm_libs}>
)
if(ANDROID)
target_link_libraries(dynarmic PRIVATE log)
endif()
if (DYNARMIC_ENABLE_CPU_FEATURE_DETECTION)
target_compile_definitions(dynarmic PRIVATE DYNARMIC_ENABLE_CPU_FEATURE_DETECTION=1)
endif()

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,138 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <array>
#include <functional>
#include <optional>
#include <set>
#include <tuple>
#include <unordered_map>
#include "backend/A64/a32_jitstate.h"
#include "backend/A64/block_range_information.h"
#include "backend/A64/emit_a64.h"
#include "backend/A64/exception_handler.h"
#include "dynarmic/A32/a32.h"
#include "dynarmic/A32/config.h"
#include "frontend/A32/location_descriptor.h"
#include "frontend/ir/terminal.h"
namespace Dynarmic::BackendA64 {
struct A64State;
class RegAlloc;
struct A32EmitContext final : public EmitContext {
A32EmitContext(RegAlloc& reg_alloc, IR::Block& block);
A32::LocationDescriptor Location() const;
bool IsSingleStep() const;
FP::RoundingMode FPSCR_RMode() const override;
u32 FPCR() const override;
bool FPSCR_FTZ() const override;
bool FPSCR_DN() const override;
std::ptrdiff_t GetInstOffset(IR::Inst* inst) const;
};
class A32EmitA64 final : public EmitA64 {
public:
A32EmitA64(BlockOfCode& code, A32::UserConfig config, A32::Jit* jit_interface);
~A32EmitA64() override;
/**
* Emit host machine code for a basic block with intermediate representation `ir`.
* @note ir is modified.
*/
BlockDescriptor Emit(IR::Block& ir);
void ClearCache() override;
void InvalidateCacheRanges(const boost::icl::interval_set<u32>& ranges);
void FastmemCallback(CodePtr PC);
protected:
const A32::UserConfig config;
A32::Jit* jit_interface;
BlockRangeInformation<u32> block_ranges;
ExceptionHandler exception_handler;
void EmitCondPrelude(const A32EmitContext& ctx);
struct FastDispatchEntry {
u64 location_descriptor = 0xFFFF'FFFF'FFFF'FFFFull;
const void* code_ptr = nullptr;
};
static_assert(sizeof(FastDispatchEntry) == 0x10);
static constexpr u64 fast_dispatch_table_mask = 0xFFFF0;
static constexpr size_t fast_dispatch_table_size = 0x10000;
std::array<FastDispatchEntry, fast_dispatch_table_size> fast_dispatch_table;
void ClearFastDispatchTable();
using DoNotFastmemMarker = std::tuple<IR::LocationDescriptor, std::ptrdiff_t>;
std::set<DoNotFastmemMarker> do_not_fastmem;
DoNotFastmemMarker GenerateDoNotFastmemMarker(A32EmitContext& ctx, IR::Inst* inst);
void DoNotFastmem(const DoNotFastmemMarker& marker);
bool ShouldFastmem(const DoNotFastmemMarker& marker) const;
const void* read_memory_8;
const void* read_memory_16;
const void* read_memory_32;
const void* read_memory_64;
const void* write_memory_8;
const void* write_memory_16;
const void* write_memory_32;
const void* write_memory_64;
void GenMemoryAccessors();
template<typename T>
void ReadMemory(A32EmitContext& ctx, IR::Inst* inst, const CodePtr callback_fn);
template<typename T>
void WriteMemory(A32EmitContext& ctx, IR::Inst* inst, const CodePtr callback_fn);
const void* terminal_handler_pop_rsb_hint;
const void* terminal_handler_fast_dispatch_hint = nullptr;
FastDispatchEntry& (*fast_dispatch_table_lookup)(u64) = nullptr;
void GenTerminalHandlers();
// Microinstruction emitters
#define OPCODE(...)
#define A32OPC(name, type, ...) void EmitA32##name(A32EmitContext& ctx, IR::Inst* inst);
#define A64OPC(...)
#include "frontend/ir/opcodes.inc"
#undef OPCODE
#undef A32OPC
#undef A64OPC
// Helpers
std::string LocationDescriptorToFriendlyName(const IR::LocationDescriptor&) const override;
// Fastmem
struct FastmemPatchInfo {
std::function<void()> callback;
};
std::unordered_map<CodePtr, FastmemPatchInfo> fastmem_patch_info;
// Terminal instruction emitters
void EmitSetUpperLocationDescriptor(IR::LocationDescriptor new_location, IR::LocationDescriptor old_location);
void EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
void EmitTerminalImpl(IR::Term::ReturnToDispatch terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
void EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
void EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
void EmitTerminalImpl(IR::Term::PopRSBHint terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
void EmitTerminalImpl(IR::Term::FastDispatchHint terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
void EmitTerminalImpl(IR::Term::If terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
void EmitTerminalImpl(IR::Term::CheckBit terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
void EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
// Patching
void Unpatch(const IR::LocationDescriptor& target_desc) override;
void EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override;
void EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override;
void EmitPatchMovX0(CodePtr target_code_ptr = nullptr) override;
};
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,314 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include <memory>
#include <boost/icl/interval_set.hpp>
#include <fmt/format.h>
#include <dynarmic/A32/a32.h>
#include <dynarmic/A32/context.h>
#include "backend/A64/a32_emit_a64.h"
#include "backend/A64/a32_jitstate.h"
#include "backend/A64/block_of_code.h"
#include "backend/A64/callback.h"
#include "backend/A64/devirtualize.h"
#include "backend/A64/jitstate_info.h"
#include "common/assert.h"
#include "common/common_types.h"
#include "common/llvm_disassemble.h"
#include "common/scope_exit.h"
#include "frontend/A32/translate/translate.h"
#include "frontend/ir/basic_block.h"
#include "frontend/ir/location_descriptor.h"
#include "ir_opt/passes.h"
namespace Dynarmic::A32 {
using namespace BackendA64;
static RunCodeCallbacks GenRunCodeCallbacks(const A32::UserConfig& config, CodePtr (*LookupBlock)(void* lookup_block_arg), void* arg) {
return RunCodeCallbacks{
std::make_unique<ArgCallback>(LookupBlock, reinterpret_cast<u64>(arg)),
std::make_unique<ArgCallback>(Devirtualize<&A32::UserCallbacks::AddTicks>(config.callbacks)),
std::make_unique<ArgCallback>(Devirtualize<&A32::UserCallbacks::GetTicksRemaining>(config.callbacks)),
reinterpret_cast<u64>(config.fastmem_pointer),
};
}
struct Jit::Impl {
Impl(Jit* jit, A32::UserConfig config)
: block_of_code(GenRunCodeCallbacks(config, &GetCurrentBlockThunk, this), JitStateInfo{jit_state})
, emitter(block_of_code, config, jit)
, config(std::move(config))
, jit_interface(jit)
{}
A32JitState jit_state;
BlockOfCode block_of_code;
A32EmitA64 emitter;
const A32::UserConfig config;
// Requests made during execution to invalidate the cache are queued up here.
size_t invalid_cache_generation = 0;
boost::icl::interval_set<u32> invalid_cache_ranges;
bool invalidate_entire_cache = false;
void Execute() {
const CodePtr current_codeptr = [this]{
// RSB optimization
const u32 new_rsb_ptr = (jit_state.rsb_ptr - 1) & A32JitState::RSBPtrMask;
if (jit_state.GetUniqueHash() == jit_state.rsb_location_descriptors[new_rsb_ptr]) {
jit_state.rsb_ptr = new_rsb_ptr;
return reinterpret_cast<CodePtr>(jit_state.rsb_codeptrs[new_rsb_ptr]);
}
return GetCurrentBlock();
}();
block_of_code.RunCode(&jit_state, current_codeptr);
}
void Step() {
block_of_code.StepCode(&jit_state, GetCurrentSingleStep());
}
std::string Disassemble(const IR::LocationDescriptor& descriptor) {
auto block = GetBasicBlock(descriptor);
std::string result = fmt::format("address: {}\nsize: {} bytes\n", block.entrypoint, block.size);
#ifdef DYNARMIC_USE_LLVM
for (const u32* pos = reinterpret_cast<const u32*>(block.entrypoint);
reinterpret_cast<const u8*>(pos) < reinterpret_cast<const u8*>(block.entrypoint) + block.size; pos += 1) {
fmt::print("0x{:02x} 0x{:02x} ", reinterpret_cast<u64>(pos), *pos);
fmt::print("{}", Common::DisassembleAArch64(*pos, reinterpret_cast<u64>(pos)));
result += Common::DisassembleAArch64(*pos, reinterpret_cast<u64>(pos));
}
#endif
return result;
}
void PerformCacheInvalidation() {
if (invalidate_entire_cache) {
jit_state.ResetRSB();
block_of_code.ClearCache();
emitter.ClearCache();
invalid_cache_ranges.clear();
invalidate_entire_cache = false;
invalid_cache_generation++;
return;
}
if (invalid_cache_ranges.empty()) {
return;
}
jit_state.ResetRSB();
emitter.InvalidateCacheRanges(invalid_cache_ranges);
invalid_cache_ranges.clear();
invalid_cache_generation++;
}
void RequestCacheInvalidation() {
if (jit_interface->is_executing) {
jit_state.halt_requested = true;
return;
}
PerformCacheInvalidation();
}
private:
Jit* jit_interface;
static CodePtr GetCurrentBlockThunk(void* this_voidptr) {
Jit::Impl& this_ = *static_cast<Jit::Impl*>(this_voidptr);
return this_.GetCurrentBlock();
}
IR::LocationDescriptor GetCurrentLocation() const {
return IR::LocationDescriptor{jit_state.GetUniqueHash()};
}
CodePtr GetCurrentBlock() {
return GetBasicBlock(GetCurrentLocation()).entrypoint;
}
CodePtr GetCurrentSingleStep() {
return GetBasicBlock(A32::LocationDescriptor{GetCurrentLocation()}.SetSingleStepping(true)).entrypoint;
}
A32EmitA64::BlockDescriptor GetBasicBlock(IR::LocationDescriptor descriptor) {
auto block = emitter.GetBasicBlock(descriptor);
if (block)
return *block;
constexpr size_t MINIMUM_REMAINING_CODESIZE = 1 * 1024 * 1024;
if (block_of_code.SpaceRemaining() < MINIMUM_REMAINING_CODESIZE) {
invalidate_entire_cache = true;
PerformCacheInvalidation();
}
IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, [this](u32 vaddr) { return config.callbacks->MemoryReadCode(vaddr); }, {config.define_unpredictable_behaviour, config.hook_hint_instructions});
if (config.enable_optimizations) {
Optimization::A32GetSetElimination(ir_block);
Optimization::DeadCodeElimination(ir_block);
Optimization::A32ConstantMemoryReads(ir_block, config.callbacks);
Optimization::ConstantPropagation(ir_block);
Optimization::DeadCodeElimination(ir_block);
Optimization::A32MergeInterpretBlocksPass(ir_block, config.callbacks);
}
Optimization::VerificationPass(ir_block);
return emitter.Emit(ir_block);
}
};
Jit::Jit(UserConfig config) : impl(std::make_unique<Impl>(this, std::move(config))) {}
Jit::~Jit() = default;
void Jit::Run() {
ASSERT(!is_executing);
is_executing = true;
SCOPE_EXIT { this->is_executing = false; };
impl->jit_state.halt_requested = false;
impl->Execute();
impl->PerformCacheInvalidation();
}
void Jit::Step() {
ASSERT(!is_executing);
is_executing = true;
SCOPE_EXIT { this->is_executing = false; };
impl->jit_state.halt_requested = true;
impl->Step();
impl->PerformCacheInvalidation();
}
void Jit::ClearCache() {
impl->invalidate_entire_cache = true;
impl->RequestCacheInvalidation();
}
void Jit::InvalidateCacheRange(std::uint32_t start_address, std::size_t length) {
impl->invalid_cache_ranges.add(boost::icl::discrete_interval<u32>::closed(start_address, static_cast<u32>(start_address + length - 1)));
impl->RequestCacheInvalidation();
}
void Jit::Reset() {
ASSERT(!is_executing);
impl->jit_state = {};
}
void Jit::HaltExecution() {
impl->jit_state.halt_requested = true;
}
std::array<u32, 16>& Jit::Regs() {
return impl->jit_state.Reg;
}
const std::array<u32, 16>& Jit::Regs() const {
return impl->jit_state.Reg;
}
std::array<u32, 64>& Jit::ExtRegs() {
return impl->jit_state.ExtReg;
}
const std::array<u32, 64>& Jit::ExtRegs() const {
return impl->jit_state.ExtReg;
}
u32 Jit::Cpsr() const {
return impl->jit_state.Cpsr();
}
void Jit::SetCpsr(u32 value) {
return impl->jit_state.SetCpsr(value);
}
u32 Jit::Fpscr() const {
return impl->jit_state.Fpscr();
}
void Jit::SetFpscr(u32 value) {
return impl->jit_state.SetFpscr(value);
}
Context Jit::SaveContext() const {
Context ctx;
SaveContext(ctx);
return ctx;
}
struct Context::Impl {
A32JitState jit_state;
size_t invalid_cache_generation;
};
Context::Context() : impl(std::make_unique<Context::Impl>()) { impl->jit_state.ResetRSB(); }
Context::~Context() = default;
Context::Context(const Context& ctx) : impl(std::make_unique<Context::Impl>(*ctx.impl)) {}
Context::Context(Context&& ctx) noexcept : impl(std::move(ctx.impl)) {}
Context& Context::operator=(const Context& ctx) {
*impl = *ctx.impl;
return *this;
}
Context& Context::operator=(Context&& ctx) noexcept {
impl = std::move(ctx.impl);
return *this;
}
std::array<std::uint32_t, 16>& Context::Regs() {
return impl->jit_state.Reg;
}
const std::array<std::uint32_t, 16>& Context::Regs() const {
return impl->jit_state.Reg;
}
std::array<std::uint32_t, 64>& Context::ExtRegs() {
return impl->jit_state.ExtReg;
}
const std::array<std::uint32_t, 64>& Context::ExtRegs() const {
return impl->jit_state.ExtReg;
}
std::uint32_t Context::Cpsr() const {
return impl->jit_state.Cpsr();
}
void Context::SetCpsr(std::uint32_t value) {
impl->jit_state.SetCpsr(value);
}
std::uint32_t Context::Fpscr() const {
return impl->jit_state.Fpscr();
}
void Context::SetFpscr(std::uint32_t value) {
return impl->jit_state.SetFpscr(value);
}
void Jit::SaveContext(Context& ctx) const {
ctx.impl->jit_state.TransferJitState(impl->jit_state, false);
ctx.impl->invalid_cache_generation = impl->invalid_cache_generation;
}
void Jit::LoadContext(const Context& ctx) {
bool reset_rsb = ctx.impl->invalid_cache_generation != impl->invalid_cache_generation;
impl->jit_state.TransferJitState(ctx.impl->jit_state, reset_rsb);
}
std::string Jit::Disassemble(const IR::LocationDescriptor& descriptor) {
return impl->Disassemble(descriptor);
}
} // namespace Dynarmic::A32

View File

@ -0,0 +1,172 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include "backend/A64/a32_jitstate.h"
#include "backend/A64/block_of_code.h"
#include "common/assert.h"
#include "common/bit_util.h"
#include "common/common_types.h"
#include "frontend/A32/location_descriptor.h"
namespace Dynarmic::BackendA64 {
/**
* CPSR Bits
* =========
*
* ARM CPSR flags
* --------------
* N bit 31 Negative flag
* Z bit 30 Zero flag
* C bit 29 Carry flag
* V bit 28 oVerflow flag
* Q bit 27 Saturation flag
* IT[1:0] bits 25-26 If-Then execution state (lower 2 bits)
* J bit 24 Jazelle instruction set flag
* GE bits 16-19 Greater than or Equal flags
* IT[7:2] bits 10-15 If-Then execution state (upper 6 bits)
* E bit 9 Data Endianness flag
* A bit 8 Disable imprecise Aborts
* I bit 7 Disable IRQ interrupts
* F bit 6 Disable FIQ interrupts
* T bit 5 Thumb instruction set flag
* M bits 0-4 Processor Mode bits
*
* A64 flags
* -------------------
* N bit 31 Negative flag
* Z bit 30 Zero flag
* C bit 29 Carry flag
* V bit 28 oVerflow flag
*/
u32 A32JitState::Cpsr() const {
DEBUG_ASSERT((cpsr_nzcv & ~0xF0000000) == 0);
DEBUG_ASSERT((cpsr_q & ~1) == 0);
DEBUG_ASSERT((cpsr_jaifm & ~0x010001DF) == 0);
u32 cpsr = 0;
// NZCV flags
cpsr |= cpsr_nzcv;
// Q flag
cpsr |= cpsr_q ? 1 << 27 : 0;
// GE flags
cpsr |= Common::Bit<31>(cpsr_ge) ? 1 << 19 : 0;
cpsr |= Common::Bit<23>(cpsr_ge) ? 1 << 18 : 0;
cpsr |= Common::Bit<15>(cpsr_ge) ? 1 << 17 : 0;
cpsr |= Common::Bit<7>(cpsr_ge) ? 1 << 16 : 0;
// E flag, T flag
cpsr |= Common::Bit<1>(upper_location_descriptor) ? 1 << 9 : 0;
cpsr |= Common::Bit<0>(upper_location_descriptor) ? 1 << 5 : 0;
// IT state
cpsr |= static_cast<u32>(upper_location_descriptor & 0b11111100'00000000);
cpsr |= static_cast<u32>(upper_location_descriptor & 0b00000011'00000000) << 17;
// Other flags
cpsr |= cpsr_jaifm;
return cpsr;
}
void A32JitState::SetCpsr(u32 cpsr) {
// NZCV flags
cpsr_nzcv = cpsr & 0xF0000000;
// Q flag
cpsr_q = Common::Bit<27>(cpsr) ? 1 : 0;
// GE flags
cpsr_ge = 0;
cpsr_ge |= Common::Bit<19>(cpsr) ? 0xFF000000 : 0;
cpsr_ge |= Common::Bit<18>(cpsr) ? 0x00FF0000 : 0;
cpsr_ge |= Common::Bit<17>(cpsr) ? 0x0000FF00 : 0;
cpsr_ge |= Common::Bit<16>(cpsr) ? 0x000000FF : 0;
upper_location_descriptor &= 0xFFFF0000;
// E flag, T flag
upper_location_descriptor |= Common::Bit<9>(cpsr) ? 2 : 0;
upper_location_descriptor |= Common::Bit<5>(cpsr) ? 1 : 0;
// IT state
upper_location_descriptor |= (cpsr >> 0) & 0b11111100'00000000;
upper_location_descriptor |= (cpsr >> 17) & 0b00000011'00000000;
// Other flags
cpsr_jaifm = cpsr & 0x010001DF;
}
void A32JitState::ResetRSB() {
rsb_location_descriptors.fill(0xFFFFFFFFFFFFFFFFull);
rsb_codeptrs.fill(0);
}
/**
* FPSCR
* =========================
*
* VFP FPSCR cumulative exception bits
* -----------------------------------
* IDC bit 7 Input Denormal cumulative exception bit // Only ever set when FPSCR.FTZ = 1
* IXC bit 4 Inexact cumulative exception bit
* UFC bit 3 Underflow cumulative exception bit
* OFC bit 2 Overflow cumulative exception bit
* DZC bit 1 Division by Zero cumulative exception bit
* IOC bit 0 Invalid Operation cumulative exception bit
*
* VFP FPSCR exception trap enables
* --------------------------------
* IDE bit 15 Input Denormal exception trap enable
* IXE bit 12 Inexact exception trap enable
* UFE bit 11 Underflow exception trap enable
* OFE bit 10 Overflow exception trap enable
* DZE bit 9 Division by Zero exception trap enable
* IOE bit 8 Invalid Operation exception trap enable
*
* VFP FPSCR mode bits
* -------------------
* AHP bit 26 Alternate half-precision
* DN bit 25 Default NaN
* FZ bit 24 Flush to Zero
* RMode bits 22-23 Round to {0 = Nearest, 1 = Positive, 2 = Negative, 3 = Zero}
* Stride bits 20-21 Vector stride
* Len bits 16-18 Vector length
*/
// NZCV; QC (ASIMD only), AHP; DN, FZ, RMode, Stride; SBZP; Len; trap enables; cumulative bits
constexpr u32 FPSCR_MODE_MASK = A32::LocationDescriptor::FPSCR_MODE_MASK;
constexpr u32 FPSCR_NZCV_MASK = 0xF0000000;
u32 A32JitState::Fpscr() const {
DEBUG_ASSERT((fpsr_nzcv & ~FPSCR_NZCV_MASK) == 0);
const u32 fpcr_mode = static_cast<u32>(upper_location_descriptor) & FPSCR_MODE_MASK;
u32 FPSCR = fpcr_mode | fpsr_nzcv;
FPSCR |= (guest_fpsr & 0x1F);
FPSCR |= fpsr_exc;
return FPSCR;
}
void A32JitState::SetFpscr(u32 FPSCR) {
// Ensure that only upper half of upper_location_descriptor is used for FPSCR bits.
static_assert((FPSCR_MODE_MASK & 0xFFFF0000) == FPSCR_MODE_MASK);
upper_location_descriptor &= 0x0000FFFF;
upper_location_descriptor |= FPSCR & FPSCR_MODE_MASK;
fpsr_nzcv = FPSCR & FPSCR_NZCV_MASK;
guest_fpcr = 0;
guest_fpsr = 0;
// Cumulative flags IDC, IOC, IXC, UFC, OFC, DZC
fpsr_exc = FPSCR & 0x9F;
// Mode Bits
guest_fpcr |= FPSCR & 0x07C09F00;
// Exceptions
guest_fpsr |= FPSCR & 0x9F;
}
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,111 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <array>
#include "common/common_types.h"
namespace Dynarmic::BackendA64 {
class BlockOfCode;
#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable:4324) // Structure was padded due to alignment specifier
#endif
struct A32JitState {
using ProgramCounterType = u32;
A32JitState() { ResetRSB(); }
std::array<u32, 16> Reg{}; // Current register file.
// TODO: Mode-specific register sets unimplemented.
u32 upper_location_descriptor = 0;
u32 cpsr_ge = 0;
u32 cpsr_q = 0;
u32 cpsr_nzcv = 0;
u32 cpsr_jaifm = 0;
u32 Cpsr() const;
void SetCpsr(u32 cpsr);
alignas(u64) std::array<u32, 64> ExtReg{}; // Extension registers.
static constexpr size_t SpillCount = 64;
std::array<u64, SpillCount> Spill{}; // Spill.
static size_t GetSpillLocationOffsetFromIndex(size_t i) {
return static_cast<u64>(offsetof(A32JitState, Spill) + i * sizeof(u64));
}
// For internal use (See: BlockOfCode::RunCode)
u64 guest_fpcr = 0;
u64 guest_fpsr = 0;
u64 save_host_FPCR = 0;
s64 cycles_to_run = 0;
s64 cycles_remaining = 0;
bool halt_requested = false;
bool check_bit = false;
// Exclusive state
static constexpr u32 RESERVATION_GRANULE_MASK = 0xFFFFFFF8;
u32 exclusive_state = 0;
u32 exclusive_address = 0;
static constexpr size_t RSBSize = 8; // MUST be a power of 2.
static constexpr size_t RSBPtrMask = RSBSize - 1;
u32 rsb_ptr = 0;
std::array<u64, RSBSize> rsb_location_descriptors;
std::array<u64, RSBSize> rsb_codeptrs;
void ResetRSB();
u32 fpsr_exc = 0;
u32 fpsr_qc = 0; // Dummy value
u32 fpsr_nzcv = 0;
u32 Fpscr() const;
void SetFpscr(u32 FPSCR);
u64 GetUniqueHash() const noexcept {
return (static_cast<u64>(upper_location_descriptor) << 32) | (static_cast<u64>(Reg[15]));
}
void TransferJitState(const A32JitState& src, bool reset_rsb) {
Reg = src.Reg;
upper_location_descriptor = src.upper_location_descriptor;
cpsr_ge = src.cpsr_ge;
cpsr_q = src.cpsr_q;
cpsr_nzcv = src.cpsr_nzcv;
cpsr_jaifm = src.cpsr_jaifm;
ExtReg = src.ExtReg;
guest_fpcr = src.guest_fpcr;
guest_fpsr = src.guest_fpsr;
fpsr_exc = src.fpsr_exc;
fpsr_qc = src.fpsr_qc;
fpsr_nzcv = src.fpsr_nzcv;
exclusive_state = 0;
exclusive_address = 0;
if (reset_rsb) {
ResetRSB();
} else {
rsb_ptr = src.rsb_ptr;
rsb_location_descriptors = src.rsb_location_descriptors;
rsb_codeptrs = src.rsb_codeptrs;
}
}
};
#ifdef _MSC_VER
#pragma warning(pop)
#endif
using CodePtr = const void*;
} // namespace Dynarmic::BackendA64

87
src/backend/A64/abi.cpp Normal file
View File

@ -0,0 +1,87 @@
// Copyright (C) 2003 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// 20th Sep 2018: This code was modified for Dynarmic.
#include <algorithm>
#include <vector>
#include "backend/A64/abi.h"
#include "common/common_types.h"
#include "common/math_util.h"
#include "common/iterator_util.h"
namespace Dynarmic::BackendA64 {
template<typename RegisterArrayT>
void ABI_PushRegistersAndAdjustStack(BlockOfCode& code, const RegisterArrayT& regs) {
u32 gprs = 0 , fprs = 0;
for (HostLoc reg : regs) {
if (HostLocIsGPR(reg)) {
gprs |= 0x1 << static_cast<u32>(DecodeReg(HostLocToReg64(reg)));
} else if (HostLocIsFPR(reg)) {
fprs |= 0x1 << static_cast<u32>(DecodeReg(HostLocToFpr(reg)));
}
}
code.fp_emitter.ABI_PushRegisters(fprs);
code.ABI_PushRegisters(gprs);
}
template<typename RegisterArrayT>
void ABI_PopRegistersAndAdjustStack(BlockOfCode& code, const RegisterArrayT& regs) {
u32 gprs = 0, fprs = 0;
for (HostLoc reg : regs) {
if (HostLocIsGPR(reg)) {
gprs |= 0x1 << static_cast<u32>(DecodeReg(HostLocToReg64(reg)));
} else if (HostLocIsFPR(reg)) {
fprs |= 0x1 << static_cast<u32>(DecodeReg(HostLocToFpr(reg)));
}
}
code.ABI_PopRegisters(gprs);
code.fp_emitter.ABI_PopRegisters(fprs);
}
void ABI_PushCalleeSaveRegistersAndAdjustStack(BlockOfCode& code) {
ABI_PushRegistersAndAdjustStack(code, ABI_ALL_CALLEE_SAVE);
}
void ABI_PopCalleeSaveRegistersAndAdjustStack(BlockOfCode& code) {
ABI_PopRegistersAndAdjustStack(code, ABI_ALL_CALLEE_SAVE);
}
void ABI_PushCallerSaveRegistersAndAdjustStack(BlockOfCode& code) {
ABI_PushRegistersAndAdjustStack(code, ABI_ALL_CALLER_SAVE);
}
void ABI_PopCallerSaveRegistersAndAdjustStack(BlockOfCode& code) {
ABI_PopRegistersAndAdjustStack(code, ABI_ALL_CALLER_SAVE);
}
void ABI_PushCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, HostLoc exception) {
std::vector<HostLoc> regs;
std::remove_copy(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end(), std::back_inserter(regs), exception);
ABI_PushRegistersAndAdjustStack(code, regs);
}
void ABI_PopCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, HostLoc exception) {
std::vector<HostLoc> regs;
std::remove_copy(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end(), std::back_inserter(regs), exception);
ABI_PopRegistersAndAdjustStack(code, regs);
}
} // namespace Dynarmic::BackendX64

110
src/backend/A64/abi.h Normal file
View File

@ -0,0 +1,110 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <array>
#include "backend/A64/block_of_code.h"
#include "backend/A64/hostloc.h"
namespace Dynarmic::BackendA64 {
constexpr HostLoc ABI_RETURN = HostLoc::X0;
constexpr HostLoc ABI_PARAM1 = HostLoc::X0;
constexpr HostLoc ABI_PARAM2 = HostLoc::X1;
constexpr HostLoc ABI_PARAM3 = HostLoc::X2;
constexpr HostLoc ABI_PARAM4 = HostLoc::X3;
constexpr HostLoc ABI_PARAM5 = HostLoc::X4;
constexpr HostLoc ABI_PARAM6 = HostLoc::X5;
constexpr HostLoc ABI_PARAM7 = HostLoc::X6;
constexpr HostLoc ABI_PARAM8 = HostLoc::X7;
constexpr std::array<HostLoc, 43> ABI_ALL_CALLER_SAVE = {
HostLoc::X0,
HostLoc::X1,
HostLoc::X2,
HostLoc::X3,
HostLoc::X4,
HostLoc::X5,
HostLoc::X6,
HostLoc::X7,
HostLoc::X8,
HostLoc::X9,
HostLoc::X10,
HostLoc::X11,
HostLoc::X12,
HostLoc::X13,
HostLoc::X14,
HostLoc::X15,
HostLoc::X16,
HostLoc::X17,
HostLoc::X18,
HostLoc::Q0,
HostLoc::Q1,
HostLoc::Q2,
HostLoc::Q3,
HostLoc::Q4,
HostLoc::Q5,
HostLoc::Q6,
HostLoc::Q7,
HostLoc::Q16,
HostLoc::Q17,
HostLoc::Q18,
HostLoc::Q19,
HostLoc::Q20,
HostLoc::Q21,
HostLoc::Q22,
HostLoc::Q23,
HostLoc::Q24,
HostLoc::Q25,
HostLoc::Q26,
HostLoc::Q27,
HostLoc::Q28,
HostLoc::Q29,
HostLoc::Q30,
HostLoc::Q31,
};
constexpr std::array<HostLoc, 20> ABI_ALL_CALLEE_SAVE = {
HostLoc::X19,
HostLoc::X20,
HostLoc::X21,
HostLoc::X22,
HostLoc::X23,
HostLoc::X24,
HostLoc::X25,
HostLoc::X26,
HostLoc::X27,
HostLoc::X28,
HostLoc::X29,
HostLoc::X30,
HostLoc::Q8,
HostLoc::Q9,
HostLoc::Q10,
HostLoc::Q11,
HostLoc::Q12,
HostLoc::Q13,
HostLoc::Q14,
HostLoc::Q15,
};
constexpr size_t ABI_SHADOW_SPACE = 0; // bytes
static_assert(ABI_ALL_CALLER_SAVE.size() + ABI_ALL_CALLEE_SAVE.size() == 63, "Invalid total number of registers");
void ABI_PushCalleeSaveRegistersAndAdjustStack(BlockOfCode& code);
void ABI_PopCalleeSaveRegistersAndAdjustStack(BlockOfCode& code);
void ABI_PushCallerSaveRegistersAndAdjustStack(BlockOfCode& code);
void ABI_PopCallerSaveRegistersAndAdjustStack(BlockOfCode& code);
void ABI_PushCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, HostLoc exception);
void ABI_PopCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, HostLoc exception);
} // namespace Dynarmic::BackendX64

View File

@ -0,0 +1,330 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include <array>
#include <cstring>
#include <limits>
#include "backend/A64/a32_jitstate.h"
#include "backend/A64/abi.h"
#include "backend/A64/block_of_code.h"
#include "backend/A64/perf_map.h"
#include "common/assert.h"
#ifdef _WIN32
#include <windows.h>
#else
#include <sys/mman.h>
#endif
namespace Dynarmic::BackendA64 {
const Arm64Gen::ARM64Reg BlockOfCode::ABI_RETURN = Arm64Gen::ARM64Reg::X0;
const Arm64Gen::ARM64Reg BlockOfCode::ABI_RETURN2 = Arm64Gen::ARM64Reg::X1;
const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM1 = Arm64Gen::ARM64Reg::X0;
const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM2 = Arm64Gen::ARM64Reg::X1;
const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM3 = Arm64Gen::ARM64Reg::X2;
const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM4 = Arm64Gen::ARM64Reg::X3;
const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM5 = Arm64Gen::ARM64Reg::X4;
const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM6 = Arm64Gen::ARM64Reg::X5;
const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM7 = Arm64Gen::ARM64Reg::X6;
const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM8 = Arm64Gen::ARM64Reg::X7;
const Arm64Gen::ARM64Reg BlockOfCode::ABI_SCRATCH1 = Arm64Gen::ARM64Reg::X30;
const std::array<Arm64Gen::ARM64Reg, 8> BlockOfCode::ABI_PARAMS = {BlockOfCode::ABI_PARAM1, BlockOfCode::ABI_PARAM2,
BlockOfCode::ABI_PARAM3, BlockOfCode::ABI_PARAM4,
BlockOfCode::ABI_PARAM5, BlockOfCode::ABI_PARAM6,
BlockOfCode::ABI_PARAM7, BlockOfCode::ABI_PARAM8};
namespace {
constexpr size_t TOTAL_CODE_SIZE = 128 * 1024 * 1024;
constexpr size_t FAR_CODE_OFFSET = 100 * 1024 * 1024;
#ifdef DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT
void ProtectMemory(const void* base, size_t size, bool is_executable) {
#ifdef _WIN32
DWORD oldProtect = 0;
VirtualProtect(const_cast<void*>(base), size, is_executable ? PAGE_EXECUTE_READ : PAGE_READWRITE, &oldProtect);
#else
static const size_t pageSize = sysconf(_SC_PAGESIZE);
const size_t iaddr = reinterpret_cast<size_t>(base);
const size_t roundAddr = iaddr & ~(pageSize - static_cast<size_t>(1));
const int mode = is_executable ? (PROT_READ | PROT_EXEC) : (PROT_READ | PROT_WRITE);
mprotect(reinterpret_cast<void*>(roundAddr), size + (iaddr - roundAddr), mode);
#endif
}
#endif
} // anonymous namespace
BlockOfCode::BlockOfCode(RunCodeCallbacks cb, JitStateInfo jsi)
: fp_emitter(this)
, cb(std::move(cb))
, jsi(jsi)
, constant_pool(*this) {
AllocCodeSpace(TOTAL_CODE_SIZE);
EnableWriting();
GenRunCode();
}
void BlockOfCode::PreludeComplete() {
prelude_complete = true;
near_code_begin = GetCodePtr();
far_code_begin = GetCodePtr() + FAR_CODE_OFFSET;
FlushIcache();
ClearCache();
DisableWriting();
}
void BlockOfCode::EnableWriting() {
#ifdef DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT
ProtectMemory(GetCodePtr(), TOTAL_CODE_SIZE, false);
#endif
}
void BlockOfCode::DisableWriting() {
#ifdef DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT
ProtectMemory(GetCodePtr(), TOTAL_CODE_SIZE, true);
#endif
}
void BlockOfCode::ClearCache() {
ASSERT(prelude_complete);
in_far_code = false;
near_code_ptr = near_code_begin;
far_code_ptr = far_code_begin;
SetCodePtr(near_code_begin);
constant_pool.Clear();
}
size_t BlockOfCode::SpaceRemaining() const {
ASSERT(prelude_complete);
// This function provides an underestimate of near-code-size but that's okay.
// (Why? The maximum size of near code should be measured from near_code_begin, not top_.)
// These are offsets from Xbyak::CodeArray::top_.
std::size_t far_code_offset, near_code_offset;
if (in_far_code) {
near_code_offset = static_cast<const u8*>(near_code_ptr) - static_cast<const u8*>(region);
far_code_offset = GetCodePtr() - static_cast<const u8*>(region);
} else {
near_code_offset = GetCodePtr() - static_cast<const u8*>(region);
far_code_offset = static_cast<const u8*>(far_code_ptr) - static_cast<const u8*>(region);
}
if (far_code_offset > TOTAL_CODE_SIZE)
return 0;
if (near_code_offset > FAR_CODE_OFFSET)
return 0;
return std::min(TOTAL_CODE_SIZE - far_code_offset, FAR_CODE_OFFSET - near_code_offset);
}
void BlockOfCode::RunCode(void* jit_state, CodePtr code_ptr) const {
run_code(jit_state, code_ptr);
}
void BlockOfCode::StepCode(void* jit_state, CodePtr code_ptr) const {
step_code(jit_state, code_ptr);
}
void BlockOfCode::ReturnFromRunCode(bool fpscr_already_exited) {
size_t index = 0;
if (fpscr_already_exited)
index |= FPSCR_ALREADY_EXITED;
B(return_from_run_code[index]);
}
void BlockOfCode::ForceReturnFromRunCode(bool fpscr_already_exited) {
size_t index = FORCE_RETURN;
if (fpscr_already_exited)
index |= FPSCR_ALREADY_EXITED;
B(return_from_run_code[index]);
}
void BlockOfCode::GenRunCode() {
const u8* loop, *enter_fpscr_then_loop;
AlignCode16();
run_code = reinterpret_cast<RunCodeFuncType>(GetWritableCodePtr());
// This serves two purposes:
// 1. It saves all the registers we as a callee need to save.
// 2. It aligns the stack so that the code the JIT emits can assume
// that the stack is appropriately aligned for CALLs.
ABI_PushCalleeSaveRegistersAndAdjustStack(*this);
MOV(Arm64Gen::X28, ABI_PARAM1);
MOVI2R(Arm64Gen::X27, cb.value_in_X27);
MOV(Arm64Gen::X25, ABI_PARAM2); // save temporarily in non-volatile register
cb.GetTicksRemaining->EmitCall(*this);
STR(Arm64Gen::INDEX_UNSIGNED, ABI_RETURN, Arm64Gen::X28, jsi.offsetof_cycles_to_run);
MOV(Arm64Gen::X26, ABI_RETURN);
SwitchFpscrOnEntry();
BR(Arm64Gen::X25);
AlignCode16();
step_code = reinterpret_cast<RunCodeFuncType>(GetWritableCodePtr());
ABI_PushCalleeSaveRegistersAndAdjustStack(*this);
MOV(Arm64Gen::X28, ABI_PARAM1);
MOVI2R(Arm64Gen::X26, 1);
STR(Arm64Gen::INDEX_UNSIGNED, Arm64Gen::X26, Arm64Gen::X28, jsi.offsetof_cycles_to_run);
SwitchFpscrOnEntry();
BR(ABI_PARAM2);
enter_fpscr_then_loop = GetCodePtr();
SwitchFpscrOnEntry();
loop = GetCodePtr();
cb.LookupBlock->EmitCall(*this);
BR(ABI_RETURN);
// Return from run code variants
const auto emit_return_from_run_code = [this, &loop, &enter_fpscr_then_loop](bool fpscr_already_exited, bool force_return){
if (!force_return) {
CMP(Arm64Gen::X26, Arm64Gen::ZR);
B(CC_GT, fpscr_already_exited ? enter_fpscr_then_loop : loop);
}
if (!fpscr_already_exited) {
SwitchFpscrOnExit();
}
cb.AddTicks->EmitCall(*this, [this](RegList param) {
LDR(Arm64Gen::INDEX_UNSIGNED, param[0], Arm64Gen::X28, jsi.offsetof_cycles_to_run);
SUB(param[0], param[0], Arm64Gen::X26);
});
ABI_PopCalleeSaveRegistersAndAdjustStack(*this);
RET();
};
return_from_run_code[0] = AlignCode16();
emit_return_from_run_code(false, false);
return_from_run_code[FPSCR_ALREADY_EXITED] = AlignCode16();
emit_return_from_run_code(true, false);
return_from_run_code[FORCE_RETURN] = AlignCode16();
emit_return_from_run_code(false, true);
return_from_run_code[FPSCR_ALREADY_EXITED | FORCE_RETURN] = AlignCode16();
emit_return_from_run_code(true, true);
PerfMapRegister(run_code, GetCodePtr(), "dynarmic_dispatcher");
}
void BlockOfCode::SwitchFpscrOnEntry() {
MRS(ABI_SCRATCH1, Arm64Gen::FIELD_FPCR);
STR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_save_host_FPCR);
LDR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_guest_fpcr);
_MSR(Arm64Gen::FIELD_FPCR, ABI_SCRATCH1);
LDR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_guest_fpsr);
_MSR(Arm64Gen::FIELD_FPSR, ABI_SCRATCH1);
}
void BlockOfCode::SwitchFpscrOnExit() {
MRS(ABI_SCRATCH1, Arm64Gen::FIELD_FPCR);
STR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_guest_fpcr);
MRS(ABI_SCRATCH1, Arm64Gen::FIELD_FPSR);
STR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_guest_fpsr);
LDR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_save_host_FPCR);
_MSR(Arm64Gen::FIELD_FPCR, ABI_SCRATCH1);
}
void BlockOfCode::UpdateTicks() {
cb.AddTicks->EmitCall(*this, [this](RegList param) {
LDR(Arm64Gen::INDEX_UNSIGNED, param[0], Arm64Gen::X28, jsi.offsetof_cycles_to_run);
SUB(param[0], param[0], Arm64Gen::X26);
});
cb.GetTicksRemaining->EmitCall(*this);
STR(Arm64Gen::INDEX_UNSIGNED, ABI_RETURN, Arm64Gen::X28, jsi.offsetof_cycles_to_run);
MOV(Arm64Gen::X26, ABI_RETURN);
}
void BlockOfCode::LookupBlock() {
cb.LookupBlock->EmitCall(*this);
}
void BlockOfCode::EmitPatchLDR(Arm64Gen::ARM64Reg Rt, u64 lower, u64 upper) {
ASSERT_MSG(!in_far_code, "Can't patch when in far code, yet!");
constant_pool.EmitPatchLDR(Rt, lower, upper);
}
void BlockOfCode::PatchConstPool() {
constant_pool.PatchPool();
}
void BlockOfCode::SwitchToFarCode() {
ASSERT(prelude_complete);
ASSERT(!in_far_code);
in_far_code = true;
near_code_ptr = GetCodePtr();
SetCodePtr(far_code_ptr);
ASSERT_MSG(near_code_ptr < far_code_begin, "Near code has overwritten far code!");
}
void BlockOfCode::SwitchToNearCode() {
ASSERT(prelude_complete);
ASSERT(in_far_code);
in_far_code = false;
far_code_ptr = GetCodePtr();
SetCodePtr(near_code_ptr);
}
CodePtr BlockOfCode::GetCodeBegin() const {
return near_code_begin;
}
u8* BlockOfCode::GetRegion() const {
return region;
}
std::size_t BlockOfCode::GetRegionSize() const {
return total_region_size;
}
void* BlockOfCode::AllocateFromCodeSpace(size_t alloc_size) {
ASSERT_MSG(GetSpaceLeft() >= alloc_size, "ERR_CODE_IS_TOO_BIG");
void* ret = GetWritableCodePtr();
region_size += alloc_size;
SetCodePtr(GetCodePtr() + alloc_size);
memset(ret, 0, alloc_size);
return ret;
}
void BlockOfCode::SetCodePtr(CodePtr code_ptr) {
u8* ptr = const_cast<u8*>(reinterpret_cast<const u8*>(code_ptr));
ARM64XEmitter::SetCodePtr(ptr);
}
void BlockOfCode::EnsurePatchLocationSize(CodePtr begin, size_t size) {
size_t current_size = GetCodePtr() - reinterpret_cast<const u8*>(begin);
ASSERT(current_size <= size);
for (u32 i = 0; i < (size - current_size) / 4; i++) {
HINT(Arm64Gen::HINT_NOP);
}
}
//bool BlockOfCode::DoesCpuSupport(Xbyak::util::Cpu::Type type) const {
//#ifdef DYNARMIC_ENABLE_CPU_FEATURE_DETECTION
// return cpu_info.has(type);
//#else
// (void)type;
// return false;
//#endif
//}
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,147 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <array>
#include <memory>
#include <type_traits>
#include "backend/A64/callback.h"
#include "backend/A64/constant_pool.h"
#include "backend/A64/jitstate_info.h"
#include "backend/A64/emitter/a64_emitter.h"
#include "common/common_types.h"
namespace Dynarmic::BackendA64 {
using CodePtr = const void*;
struct RunCodeCallbacks {
std::unique_ptr<Callback> LookupBlock;
std::unique_ptr<Callback> AddTicks;
std::unique_ptr<Callback> GetTicksRemaining;
u64 value_in_X27;
};
class BlockOfCode final : public Arm64Gen::ARM64CodeBlock {
public:
BlockOfCode(RunCodeCallbacks cb, JitStateInfo jsi);
BlockOfCode(const BlockOfCode&) = delete;
/// Call when external emitters have finished emitting their preludes.
void PreludeComplete();
/// Change permissions to RW. This is required to support systems with W^X enforced.
void EnableWriting();
/// Change permissions to RX. This is required to support systems with W^X enforced.
void DisableWriting();
/// Clears this block of code and resets code pointer to beginning.
void ClearCache();
/// Calculates how much space is remaining to use. This is the minimum of near code and far code.
size_t SpaceRemaining() const;
/// Runs emulated code from code_ptr.
void RunCode(void* jit_state, CodePtr code_ptr) const;
/// Runs emulated code from code_ptr for a single cycle.
void StepCode(void* jit_state, CodePtr code_ptr) const;
/// Code emitter: Returns to dispatcher
void ReturnFromRunCode(bool fpscr_already_exited = false);
/// Code emitter: Returns to dispatcher, forces return to host
void ForceReturnFromRunCode(bool fpscr_already_exited = false);
/// Code emitter: Makes guest FPSR and FPCR the current FPSR and FPCR
void SwitchFpscrOnEntry();
/// Code emitter: Makes saved host FPCR the current FPCR
void SwitchFpscrOnExit();
/// Code emitter: Updates cycles remaining my calling cb.AddTicks and cb.GetTicksRemaining
/// @note this clobbers ABI caller-save registers
void UpdateTicks();
/// Code emitter: Performs a block lookup based on current state
/// @note this clobbers ABI caller-save registers
void LookupBlock();
u64 MConst(u64 lower, u64 upper = 0);
void EmitPatchLDR(Arm64Gen::ARM64Reg Rt, u64 lower, u64 upper = 0);
void PatchConstPool();
/// Far code sits far away from the near code. Execution remains primarily in near code.
/// "Cold" / Rarely executed instructions sit in far code, so the CPU doesn't fetch them unless necessary.
void SwitchToFarCode();
void SwitchToNearCode();
CodePtr GetCodeBegin() const;
u8* GetRegion() const;
std::size_t GetRegionSize() const;
const void* GetReturnFromRunCodeAddress() const {
return return_from_run_code[0];
}
const void* GetForceReturnFromRunCodeAddress() const {
return return_from_run_code[FORCE_RETURN];
}
/// Allocate memory of `size` bytes from the same block of memory the code is in.
/// This is useful for objects that need to be placed close to or within code.
/// The lifetime of this memory is the same as the code around it.
void* AllocateFromCodeSpace(size_t size);
void SetCodePtr(CodePtr code_ptr);
void EnsurePatchLocationSize(CodePtr begin, size_t size);
Arm64Gen::ARM64FloatEmitter fp_emitter;
// ABI registers
static const Arm64Gen::ARM64Reg ABI_RETURN;
static const Arm64Gen::ARM64Reg ABI_RETURN2;
static const Arm64Gen::ARM64Reg ABI_PARAM1;
static const Arm64Gen::ARM64Reg ABI_PARAM2;
static const Arm64Gen::ARM64Reg ABI_PARAM3;
static const Arm64Gen::ARM64Reg ABI_PARAM4;
static const Arm64Gen::ARM64Reg ABI_PARAM5;
static const Arm64Gen::ARM64Reg ABI_PARAM6;
static const Arm64Gen::ARM64Reg ABI_PARAM7;
static const Arm64Gen::ARM64Reg ABI_PARAM8;
static const Arm64Gen::ARM64Reg ABI_SCRATCH1;
static const std::array<Arm64Gen::ARM64Reg, 8> ABI_PARAMS;
// bool DoesCpuSupport(Xbyak::util::Cpu::Type type) const;
JitStateInfo GetJitStateInfo() const { return jsi; }
private:
RunCodeCallbacks cb;
JitStateInfo jsi;
bool prelude_complete = false;
CodePtr near_code_begin;
CodePtr far_code_begin;
ConstantPool constant_pool;
bool in_far_code = false;
CodePtr near_code_ptr;
CodePtr far_code_ptr;
using RunCodeFuncType = void(*)(void*, CodePtr);
RunCodeFuncType run_code = nullptr;
RunCodeFuncType step_code = nullptr;
static constexpr size_t FPSCR_ALREADY_EXITED = 1 << 0;
static constexpr size_t FORCE_RETURN = 1 << 1;
std::array<const void*, 4> return_from_run_code;
void GenRunCode();
//Xbyak::util::Cpu cpu_info;
};
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,45 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include <unordered_set>
#include <boost/icl/interval_map.hpp>
#include <boost/icl/interval_set.hpp>
#include "backend/A64/block_range_information.h"
#include "common/common_types.h"
namespace Dynarmic::BackendA64 {
template <typename ProgramCounterType>
void BlockRangeInformation<ProgramCounterType>::AddRange(boost::icl::discrete_interval<ProgramCounterType> range, IR::LocationDescriptor location) {
block_ranges.add(std::make_pair(range, std::set<IR::LocationDescriptor>{location}));
}
template <typename ProgramCounterType>
void BlockRangeInformation<ProgramCounterType>::ClearCache() {
block_ranges.clear();
}
template <typename ProgramCounterType>
std::unordered_set<IR::LocationDescriptor> BlockRangeInformation<ProgramCounterType>::InvalidateRanges(const boost::icl::interval_set<ProgramCounterType>& ranges) {
std::unordered_set<IR::LocationDescriptor> erase_locations;
for (auto invalidate_interval : ranges) {
auto pair = block_ranges.equal_range(invalidate_interval);
for (auto it = pair.first; it != pair.second; ++it) {
for (const auto &descriptor : it->second) {
erase_locations.insert(descriptor);
}
}
}
// TODO: EFFICIENCY: Remove ranges that are to be erased.
return erase_locations;
}
template class BlockRangeInformation<u32>;
template class BlockRangeInformation<u64>;
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,29 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <unordered_set>
#include <boost/icl/interval_map.hpp>
#include <boost/icl/interval_set.hpp>
#include "frontend/ir/location_descriptor.h"
namespace Dynarmic::BackendA64 {
template <typename ProgramCounterType>
class BlockRangeInformation {
public:
void AddRange(boost::icl::discrete_interval<ProgramCounterType> range, IR::LocationDescriptor location);
void ClearCache();
std::unordered_set<IR::LocationDescriptor> InvalidateRanges(const boost::icl::interval_set<ProgramCounterType>& ranges);
private:
boost::icl::interval_map<ProgramCounterType, std::set<IR::LocationDescriptor>> block_ranges;
};
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,41 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include "backend/A64/callback.h"
#include "backend/A64/block_of_code.h"
namespace Dynarmic::BackendA64 {
Callback::~Callback() = default;
void SimpleCallback::EmitCall(BlockOfCode& code, std::function<void(RegList)> l) const {
l({code.ABI_PARAM1, code.ABI_PARAM2, code.ABI_PARAM3, code.ABI_PARAM4});
code.QuickCallFunction(fn);
}
void SimpleCallback::EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Arm64Gen::ARM64Reg, RegList)> l) const {
l(code.ABI_PARAM1, {code.ABI_PARAM2, code.ABI_PARAM3, code.ABI_PARAM4});
code.QuickCallFunction(fn);
}
void ArgCallback::EmitCall(BlockOfCode& code, std::function<void(RegList)> l) const {
l({code.ABI_PARAM2, code.ABI_PARAM3, code.ABI_PARAM4});
code.MOVI2R(code.ABI_PARAM1, arg);
code.QuickCallFunction(fn);
}
void ArgCallback::EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Arm64Gen::ARM64Reg, RegList)> l) const {
#if defined(WIN32) && !defined(__MINGW64__)
l(code.ABI_PARAM2, {code.ABI_PARAM3, code.ABI_PARAM4});
code.MOVI2R(code.ABI_PARAM1, arg);
#else
l(code.ABI_PARAM1, {code.ABI_PARAM3, code.ABI_PARAM4});
code.MOVI2R(code.ABI_PARAM2, arg);
#endif
code.QuickCallFunction(fn);
}
} // namespace Dynarmic::BackendX64

View File

@ -0,0 +1,54 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <functional>
#include <vector>
#include "backend/A64/emitter/a64_emitter.h"
#include "common/common_types.h"
namespace Dynarmic::BackendA64 {
using RegList = std::vector<Arm64Gen::ARM64Reg>;
class BlockOfCode;
class Callback {
public:
virtual ~Callback();
virtual void EmitCall(BlockOfCode& code, std::function<void(RegList)> fn = [](RegList) {}) const = 0;
virtual void EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Arm64Gen::ARM64Reg, RegList)> fn) const = 0;
};
class SimpleCallback final : public Callback {
public:
template <typename Function>
SimpleCallback(Function fn) : fn(reinterpret_cast<void (*)()>(fn)) {}
void EmitCall(BlockOfCode& code, std::function<void(RegList)> fn = [](RegList) {}) const override;
void EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Arm64Gen::ARM64Reg, RegList)> fn) const override;
private:
void (*fn)();
};
class ArgCallback final : public Callback {
public:
template <typename Function>
ArgCallback(Function fn, u64 arg) : fn(reinterpret_cast<void (*)()>(fn)), arg(arg) {}
void EmitCall(BlockOfCode& code, std::function<void(RegList)> fn = [](RegList) {}) const override;
void EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Arm64Gen::ARM64Reg, RegList)> fn) const override;
private:
void (*fn)();
u64 arg;
};
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,65 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include <cstring>
#include "backend/A64/block_of_code.h"
#include "backend/A64/constant_pool.h"
#include "common/assert.h"
namespace Dynarmic::BackendA64 {
ConstantPool::ConstantPool(BlockOfCode& code) : code(code) {}
void ConstantPool::EmitPatchLDR(Arm64Gen::ARM64Reg Rt, u64 lower, u64 upper) {
const auto constant = std::make_tuple(lower, upper);
auto iter = constant_info.find(constant);
if (iter == constant_info.end()) {
struct PatchInfo p = { code.GetCodePtr(), Rt, constant };
patch_info.emplace_back(p);
code.BRK(0);
return;
}
const s32 offset = reinterpret_cast<size_t>(iter->second) - reinterpret_cast<size_t>(code.GetCodePtr());
if (!(offset >= -0x40000 && offset <= 0x3FFFF)) {
constant_info.erase(constant);
struct PatchInfo p = { code.GetCodePtr(), Rt, constant };
patch_info.emplace_back(p);
code.BRK(0x42);
return;
}
DEBUG_ASSERT((offset & 3) == 0);
code.LDR(Rt, offset / 4);
}
void ConstantPool::PatchPool() {
u8* pool_ptr = code.GetWritableCodePtr();
for (PatchInfo patch : patch_info) {
auto iter = constant_info.find(patch.constant);
if (iter == constant_info.end()) {
std::memcpy(pool_ptr, &std::get<0>(patch.constant), sizeof(u64));
std::memcpy(pool_ptr + sizeof(u64), &std::get<1>(patch.constant), sizeof(u64));
iter = constant_info.emplace(patch.constant, pool_ptr).first;
pool_ptr += align_size;
}
code.SetCodePtr(patch.ptr);
const s32 offset = reinterpret_cast<size_t>(iter->second) - reinterpret_cast<size_t>(code.GetCodePtr());
DEBUG_ASSERT((offset & 3) == 0);
code.LDR(patch.Rt, offset / 4);
}
patch_info.clear();
code.SetCodePtr(pool_ptr);
}
void ConstantPool::Clear() {
constant_info.clear();
patch_info.clear();
}
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,47 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <map>
#include "common/common_types.h"
namespace Dynarmic::BackendA64 {
class BlockOfCode;
/// ConstantPool allocates a block of memory from BlockOfCode.
/// It places constants into this block of memory, returning the address
/// of the memory location where the constant is placed. If the constant
/// already exists, its memory location is reused.
class ConstantPool final {
public:
ConstantPool(BlockOfCode& code);
void EmitPatchLDR(Arm64Gen::ARM64Reg Rt, u64 lower, u64 upper = 0);
void PatchPool();
void Clear();
private:
static constexpr size_t align_size = 16; // bytes
std::map<std::tuple<u64, u64>, void*> constant_info;
BlockOfCode& code;
struct PatchInfo {
const void* ptr;
Arm64Gen::ARM64Reg Rt;
std::tuple<u64, u64> constant;
};
std::vector<PatchInfo> patch_info;
};
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,77 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <cstring>
#include <memory>
#include <mp/traits/function_info.h>
#include "backend/A64/callback.h"
#include "common/assert.h"
#include "common/cast_util.h"
#include "common/common_types.h"
namespace Dynarmic::BackendA64 {
namespace impl {
template <typename FunctionType, FunctionType mfp>
struct ThunkBuilder;
template <typename C, typename R, typename... Args, R(C::*mfp)(Args...)>
struct ThunkBuilder<R(C::*)(Args...), mfp> {
static R Thunk(C* this_, Args... args) {
return (this_->*mfp)(std::forward<Args>(args)...);
}
};
} // namespace impl
template<auto mfp>
ArgCallback DevirtualizeGeneric(mp::class_type<decltype(mfp)>* this_) {
return ArgCallback{&impl::ThunkBuilder<decltype(mfp), mfp>::Thunk, reinterpret_cast<u64>(this_)};
}
template<auto mfp>
ArgCallback DevirtualizeWindows(mp::class_type<decltype(mfp)>* this_) {
static_assert(sizeof(mfp) == 8);
return ArgCallback{Common::BitCast<u64>(mfp), reinterpret_cast<u64>(this_)};
}
template<auto mfp>
ArgCallback DevirtualizeAarch64(mp::class_type<decltype(mfp)>* this_) {
struct MemberFunctionPointer {
/// For a non-virtual function, this is a simple function pointer.
/// For a virtual function, it is virtual table offset in bytes.
u64 ptr;
/// Twice the required adjustment to `this`, plus 1 if the member function is virtual.
u64 adj;
} mfp_struct = Common::BitCast<MemberFunctionPointer>(mfp);
static_assert(sizeof(MemberFunctionPointer) == 16);
static_assert(sizeof(MemberFunctionPointer) == sizeof(mfp));
u64 fn_ptr = mfp_struct.ptr;
u64 this_ptr = reinterpret_cast<u64>(this_) + mfp_struct.adj / 2;
if (mfp_struct.adj & 1) {
u64 vtable = Common::BitCastPointee<u64>(this_ptr);
fn_ptr = Common::BitCastPointee<u64>(vtable + fn_ptr);
}
return ArgCallback{fn_ptr, this_ptr};
}
template<auto mfp>
ArgCallback Devirtualize(mp::class_type<decltype(mfp)>* this_) {
#if defined(linux) || defined(__linux) || defined(__linux__)
return DevirtualizeAarch64<mfp>(this_);
#else
return DevirtualizeGeneric<mfp>(this_);
#endif
}
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,286 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include <unordered_map>
#include <unordered_set>
#include "backend/A64/block_of_code.h"
#include "backend/A64/emit_a64.h"
#include "backend/A64/hostloc.h"
#include "backend/A64/perf_map.h"
#include "common/assert.h"
#include "common/bit_util.h"
#include "common/common_types.h"
#include "common/scope_exit.h"
#include "common/variant_util.h"
#include "frontend/ir/basic_block.h"
#include "frontend/ir/microinstruction.h"
#include "frontend/ir/opcodes.h"
// TODO: Have ARM flags in host flags and not have them use up GPR registers unless necessary.
// TODO: Actually implement that proper instruction selector you've always wanted to sweetheart.
namespace Dynarmic::BackendA64 {
EmitContext::EmitContext(RegAlloc& reg_alloc, IR::Block& block)
: reg_alloc(reg_alloc), block(block) {}
void EmitContext::EraseInstruction(IR::Inst* inst) {
block.Instructions().erase(inst);
inst->ClearArgs();
}
EmitA64::EmitA64(BlockOfCode& code)
: code(code) {}
EmitA64::~EmitA64() = default;
std::optional<typename EmitA64::BlockDescriptor> EmitA64::GetBasicBlock(IR::LocationDescriptor descriptor) const {
auto iter = block_descriptors.find(descriptor);
if (iter == block_descriptors.end())
return std::nullopt;
return iter->second;
}
void EmitA64::EmitVoid(EmitContext&, IR::Inst*) {
}
void EmitA64::EmitBreakpoint(EmitContext&, IR::Inst*) {
code.BRK(0);
}
void EmitA64::EmitIdentity(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (!args[0].IsImmediate()) {
ctx.reg_alloc.DefineValue(inst, args[0]);
}
}
void EmitA64::PushRSBHelper(ARM64Reg loc_desc_reg, ARM64Reg index_reg, IR::LocationDescriptor target) {
auto iter = block_descriptors.find(target);
CodePtr target_code_ptr = iter != block_descriptors.end()
? iter->second.entrypoint
: code.GetReturnFromRunCodeAddress();
code.LDR(INDEX_UNSIGNED, DecodeReg(index_reg), X28, code.GetJitStateInfo().offsetof_rsb_ptr);
code.MOVI2R(loc_desc_reg, target.Value());
patch_information[target].mov_x0.emplace_back(code.GetCodePtr());
EmitPatchMovX0(target_code_ptr);
code.ADD(code.ABI_SCRATCH1, X28, DecodeReg(index_reg), ArithOption{index_reg, ST_LSL, 3});
code.STR(INDEX_UNSIGNED, loc_desc_reg, code.ABI_SCRATCH1, code.GetJitStateInfo().offsetof_rsb_location_descriptors);
code.STR(INDEX_UNSIGNED, X0, code.ABI_SCRATCH1, code.GetJitStateInfo().offsetof_rsb_codeptrs);
code.ADDI2R(DecodeReg(index_reg), DecodeReg(index_reg), 1);
code.ANDI2R(DecodeReg(index_reg), DecodeReg(index_reg), code.GetJitStateInfo().rsb_ptr_mask, code.ABI_SCRATCH1);
code.STR(INDEX_UNSIGNED, DecodeReg(index_reg), X28, code.GetJitStateInfo().offsetof_rsb_ptr);
}
void EmitA64::EmitPushRSB(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ASSERT(args[0].IsImmediate());
u64 unique_hash_of_target = args[0].GetImmediateU64();
ctx.reg_alloc.ScratchGpr({HostLoc::X0});
Arm64Gen::ARM64Reg loc_desc_reg = ctx.reg_alloc.ScratchGpr();
Arm64Gen::ARM64Reg index_reg = ctx.reg_alloc.ScratchGpr();
PushRSBHelper(loc_desc_reg, index_reg, IR::LocationDescriptor{unique_hash_of_target});
}
void EmitA64::EmitGetCarryFromOp(EmitContext&, IR::Inst*) {
ASSERT_FALSE("should never happen");
}
void EmitA64::EmitGetOverflowFromOp(EmitContext&, IR::Inst*) {
ASSERT_FALSE("should never happen");
}
void EmitA64::EmitGetGEFromOp(EmitContext&, IR::Inst*) {
ASSERT_FALSE("should never happen");
}
void EmitA64::EmitGetUpperFromOp(EmitContext&, IR::Inst*) {
ASSERT_FALSE("should never happen");
}
void EmitA64::EmitGetLowerFromOp(EmitContext&, IR::Inst*) {
ASSERT_FALSE("should never happen");
}
void EmitA64::EmitGetNZCVFromOp(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Arm64Gen::ARM64Reg nzcv = ctx.reg_alloc.ScratchGpr();
Arm64Gen::ARM64Reg value = ctx.reg_alloc.UseGpr(args[0]);
code.CMP(value, ZR);
code.MRS(nzcv, FIELD_NZCV);
ctx.reg_alloc.DefineValue(inst, nzcv);
}
void EmitA64::EmitNZCVFromPackedFlags(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (args[0].IsImmediate()) {
Arm64Gen::ARM64Reg nzcv = DecodeReg(ctx.reg_alloc.ScratchGpr());
u32 value = 0;
value |= Common::Bit<31>(args[0].GetImmediateU32()) ? (1 << 15) : 0;
value |= Common::Bit<30>(args[0].GetImmediateU32()) ? (1 << 14) : 0;
value |= Common::Bit<29>(args[0].GetImmediateU32()) ? (1 << 8) : 0;
value |= Common::Bit<28>(args[0].GetImmediateU32()) ? (1 << 0) : 0;
code.MOVI2R(nzcv, value);
ctx.reg_alloc.DefineValue(inst, nzcv);
} else {
Arm64Gen::ARM64Reg nzcv = DecodeReg(ctx.reg_alloc.UseScratchGpr(args[0]));
Arm64Gen::ARM64Reg scratch = DecodeReg(ctx.reg_alloc.ScratchGpr());
// TODO: Optimize
code.LSR(nzcv, nzcv, 28);
code.MOVI2R(scratch, 0b00010000'10000001);
code.MUL(nzcv, nzcv, scratch);
code.ANDI2R(nzcv, nzcv, 1, scratch);
ctx.reg_alloc.DefineValue(inst, nzcv);
}
}
void EmitA64::EmitAddCycles(size_t cycles) {
ASSERT(cycles < std::numeric_limits<u32>::max());
code.SUBI2R(X26, X26, static_cast<u32>(cycles));
}
FixupBranch EmitA64::EmitCond(IR::Cond cond) {
FixupBranch label;
const Arm64Gen::ARM64Reg cpsr = code.ABI_SCRATCH1;
code.LDR(INDEX_UNSIGNED, DecodeReg(cpsr), X28, code.GetJitStateInfo().offsetof_cpsr_nzcv);
code._MSR(FIELD_NZCV, cpsr);
switch (cond) {
case IR::Cond::EQ: //z
label = code.B(CC_EQ);
break;
case IR::Cond::NE: //!z
label = code.B(CC_NEQ);
break;
case IR::Cond::CS: //c
label = code.B(CC_CS);
break;
case IR::Cond::CC: //!c
label = code.B(CC_CC);
break;
case IR::Cond::MI: //n
label = code.B(CC_MI);
break;
case IR::Cond::PL: //!n
label = code.B(CC_PL);
break;
case IR::Cond::VS: //v
label = code.B(CC_VS);
break;
case IR::Cond::VC: //!v
label = code.B(CC_VC);
break;
case IR::Cond::HI: //c & !z
label = code.B(CC_HI);
break;
case IR::Cond::LS: //!c | z
label = code.B(CC_LS);
break;
case IR::Cond::GE: // n == v
label = code.B(CC_GE);
break;
case IR::Cond::LT: // n != v
label = code.B(CC_LT);
break;
case IR::Cond::GT: // !z & (n == v)
label = code.B(CC_GT);
break;
case IR::Cond::LE: // z | (n != v)
label = code.B(CC_LE);
break;
default:
ASSERT_MSG(false, "Unknown cond {}", static_cast<size_t>(cond));
break;
}
return label;
}
EmitA64::BlockDescriptor EmitA64::RegisterBlock(const IR::LocationDescriptor& descriptor, CodePtr entrypoint, size_t size) {
PerfMapRegister(entrypoint, code.GetCodePtr(), LocationDescriptorToFriendlyName(descriptor));
Patch(descriptor, entrypoint);
BlockDescriptor block_desc{entrypoint, size};
block_descriptors.emplace(descriptor.Value(), block_desc);
return block_desc;
}
void EmitA64::EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
Common::VisitVariant<void>(terminal, [this, initial_location, is_single_step](auto x) {
using T = std::decay_t<decltype(x)>;
if constexpr (!std::is_same_v<T, IR::Term::Invalid>) {
this->EmitTerminalImpl(x, initial_location, is_single_step);
} else {
ASSERT_MSG(false, "Invalid terminal");
}
});
}
void EmitA64::Patch(const IR::LocationDescriptor& desc, CodePtr bb) {
const CodePtr save_code_ptr = code.GetCodePtr();
const PatchInformation& patch_info = patch_information[desc];
for (CodePtr location : patch_info.jg) {
code.SetCodePtr(location);
EmitPatchJg(desc, bb);
code.FlushIcache();
}
for (CodePtr location : patch_info.jmp) {
code.SetCodePtr(location);
EmitPatchJmp(desc, bb);
code.FlushIcache();
}
for (CodePtr location : patch_info.mov_x0) {
code.SetCodePtr(location);
EmitPatchMovX0(bb);
code.FlushIcache();
}
code.SetCodePtr(save_code_ptr);
}
void EmitA64::Unpatch(const IR::LocationDescriptor& desc) {
Patch(desc, nullptr);
}
void EmitA64::ClearCache() {
block_descriptors.clear();
patch_information.clear();
PerfMapClear();
}
void EmitA64::InvalidateBasicBlocks(const std::unordered_set<IR::LocationDescriptor>& locations) {
code.EnableWriting();
SCOPE_EXIT { code.DisableWriting(); };
for (const auto &descriptor : locations) {
auto it = block_descriptors.find(descriptor);
if (it == block_descriptors.end()) {
continue;
}
if (patch_information.count(descriptor)) {
Unpatch(descriptor);
}
block_descriptors.erase(it);
}
}
} // namespace Dynarmic::BackendA64

124
src/backend/A64/emit_a64.h Normal file
View File

@ -0,0 +1,124 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <array>
#include <optional>
#include <string>
#include <type_traits>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "backend/A64/reg_alloc.h"
#include "backend/A64/emitter/a64_emitter.h"
#include "common/bit_util.h"
#include "common/fp/rounding_mode.h"
#include "frontend/ir/location_descriptor.h"
#include "frontend/ir/terminal.h"
namespace Dynarmic::IR {
class Block;
class Inst;
} // namespace Dynarmic::IR
namespace Dynarmic::BackendA64 {
class BlockOfCode;
using namespace Arm64Gen;
using A64FullVectorWidth = std::integral_constant<size_t, 128>;
// Array alias that always sizes itself according to the given type T
// relative to the size of a vector register. e.g. T = u32 would result
// in a std::array<u32, 4>.
template <typename T>
using VectorArray = std::array<T, A64FullVectorWidth::value / Common::BitSize<T>()>;
struct EmitContext {
EmitContext(RegAlloc& reg_alloc, IR::Block& block);
void EraseInstruction(IR::Inst* inst);
virtual FP::RoundingMode FPSCR_RMode() const = 0;
virtual u32 FPCR() const = 0;
virtual bool FPSCR_FTZ() const = 0;
virtual bool FPSCR_DN() const = 0;
virtual bool AccurateNaN() const { return true; }
RegAlloc& reg_alloc;
IR::Block& block;
};
class EmitA64 {
public:
struct BlockDescriptor {
CodePtr entrypoint; // Entrypoint of emitted code
size_t size; // Length in bytes of emitted code
};
EmitA64(BlockOfCode& code);
virtual ~EmitA64();
/// Looks up an emitted host block in the cache.
std::optional<BlockDescriptor> GetBasicBlock(IR::LocationDescriptor descriptor) const;
/// Empties the entire cache.
virtual void ClearCache();
/// Invalidates a selection of basic blocks.
void InvalidateBasicBlocks(const std::unordered_set<IR::LocationDescriptor>& locations);
protected:
// Microinstruction emitters
#define OPCODE(name, type, ...) void Emit##name(EmitContext& ctx, IR::Inst* inst);
#define A32OPC(...)
#define A64OPC(...)
#include "backend/A64/opcodes.inc"
#undef OPCODE
#undef A32OPC
#undef A64OPC
// Helpers
virtual std::string LocationDescriptorToFriendlyName(const IR::LocationDescriptor&) const = 0;
void EmitAddCycles(size_t cycles);
FixupBranch EmitCond(IR::Cond cond);
BlockDescriptor RegisterBlock(const IR::LocationDescriptor& location_descriptor, CodePtr entrypoint, size_t size);
void PushRSBHelper(Arm64Gen::ARM64Reg loc_desc_reg, Arm64Gen::ARM64Reg index_reg, IR::LocationDescriptor target);
// Terminal instruction emitters
void EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step);
virtual void EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
virtual void EmitTerminalImpl(IR::Term::ReturnToDispatch terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
virtual void EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
virtual void EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
virtual void EmitTerminalImpl(IR::Term::PopRSBHint terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
virtual void EmitTerminalImpl(IR::Term::FastDispatchHint terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
virtual void EmitTerminalImpl(IR::Term::If terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
virtual void EmitTerminalImpl(IR::Term::CheckBit terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
virtual void EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
// Patching
struct PatchInformation {
std::vector<CodePtr> jg;
std::vector<CodePtr> jmp;
std::vector<CodePtr> mov_x0;
};
void Patch(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr);
virtual void Unpatch(const IR::LocationDescriptor& target_desc);
virtual void EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) = 0;
virtual void EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) = 0;
virtual void EmitPatchMovX0(CodePtr target_code_ptr = nullptr) = 0;
// State
BlockOfCode& code;
std::unordered_map<IR::LocationDescriptor, BlockDescriptor> block_descriptors;
std::unordered_map<IR::LocationDescriptor, PatchInformation> patch_information;
};
} // namespace Dynarmic::BackendX64

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,471 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include <optional>
#include <type_traits>
#include <utility>
#include "backend/A64/abi.h"
#include "backend/A64/block_of_code.h"
#include "backend/A64/emit_a64.h"
#include "common/assert.h"
#include "common/common_types.h"
#include "common/fp/fpcr.h"
#include "common/fp/fpsr.h"
#include "common/fp/info.h"
#include "common/fp/op.h"
#include "common/fp/rounding_mode.h"
#include "common/fp/util.h"
#include "frontend/ir/basic_block.h"
#include "frontend/ir/microinstruction.h"
#include "frontend/ir/opcodes.h"
namespace Dynarmic::BackendA64 {
namespace {
Arm64Gen::RoundingMode ConvertRoundingModeToA64RoundingMode(FP::RoundingMode rounding_mode) {
switch (rounding_mode) {
case FP::RoundingMode::ToNearest_TieEven:
return RoundingMode::ROUND_N;
case FP::RoundingMode::TowardsPlusInfinity:
return RoundingMode::ROUND_P;
case FP::RoundingMode::TowardsMinusInfinity:
return RoundingMode::ROUND_M;
case FP::RoundingMode::TowardsZero:
return RoundingMode::ROUND_Z;
case FP::RoundingMode::ToNearest_TieAwayFromZero:
return RoundingMode::ROUND_A;
default:
UNREACHABLE();
}
}
template <size_t fsize, typename Function>
void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ARM64Reg result = ctx.reg_alloc.UseScratchFpr(args[0]);
result = fsize == 32 ? EncodeRegToSingle(result) : EncodeRegToDouble(result);
if constexpr (std::is_member_function_pointer_v<Function>) {
(code.fp_emitter.*fn)(result, result);
} else {
fn(result);
}
ctx.reg_alloc.DefineValue(inst, result);
}
template <size_t fsize, typename Function>
void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ARM64Reg result = ctx.reg_alloc.UseScratchFpr(args[0]);
ARM64Reg operand = ctx.reg_alloc.UseScratchFpr(args[1]);
result = fsize == 32 ? EncodeRegToSingle(result) : EncodeRegToDouble(result);
operand = fsize == 32 ? EncodeRegToSingle(operand) : EncodeRegToDouble(operand);
if constexpr (std::is_member_function_pointer_v<Function>) {
(code.fp_emitter.*fn)(result, result, operand);
}
else {
fn(result, result, operand);
}
ctx.reg_alloc.DefineValue(inst, result);
}
} // anonymous namespace
//void EmitA64::EmitFPAbs16(EmitContext& ctx, IR::Inst* inst) {
// auto args = ctx.reg_alloc.GetArgumentInfo(inst);
// const ARM64Reg result = ctx.reg_alloc.UseScratchXmm(args[0]);
//
// code.pand(result, code.MConst(xword, f16_non_sign_mask));
//
// ctx.reg_alloc.DefineValue(inst, result);
//}
void EmitA64::EmitFPAbs32(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.UseScratchFpr(args[0]));
code.fp_emitter.FABS(result, result);
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitA64::EmitFPAbs64(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
code.fp_emitter.FABS(result, result);
ctx.reg_alloc.DefineValue(inst, result);
}
//void EmitA64::EmitFPNeg16(EmitContext& ctx, IR::Inst* inst) {
// auto args = ctx.reg_alloc.GetArgumentInfo(inst);
// const ARM64Reg result = ctx.reg_alloc.UseScratchXmm(args[0]);
//
// code.pxor(result, code.MConst(xword, f16_negative_zero));
//
// ctx.reg_alloc.DefineValue(inst, result);
//}
void EmitA64::EmitFPNeg32(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.UseScratchFpr(args[0]));
code.fp_emitter.FNEG(result, result);
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitA64::EmitFPNeg64(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
code.fp_emitter.FNEG(result, result);
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitA64::EmitFPAdd32(EmitContext& ctx, IR::Inst* inst) {
FPThreeOp<32, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FADD);
}
void EmitA64::EmitFPAdd64(EmitContext& ctx, IR::Inst* inst) {
FPThreeOp<64, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FADD);
}
void EmitA64::EmitFPDiv32(EmitContext& ctx, IR::Inst* inst) {
FPThreeOp<32, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FDIV);
}
void EmitA64::EmitFPDiv64(EmitContext& ctx, IR::Inst* inst) {
FPThreeOp<64, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FDIV);
}
void EmitA64::EmitFPMul32(EmitContext& ctx, IR::Inst* inst) {
FPThreeOp<32, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FMUL);
}
void EmitA64::EmitFPMul64(EmitContext& ctx, IR::Inst* inst) {
FPThreeOp<64, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FMUL);
}
void EmitA64::EmitFPSqrt32(EmitContext& ctx, IR::Inst* inst) {
FPTwoOp<32>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FSQRT);
}
void EmitA64::EmitFPSqrt64(EmitContext& ctx, IR::Inst* inst) {
FPTwoOp<64>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FSQRT);
}
void EmitA64::EmitFPSub32(EmitContext& ctx, IR::Inst* inst) {
FPThreeOp<32, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FSUB);
}
void EmitA64::EmitFPSub64(EmitContext& ctx, IR::Inst* inst) {
FPThreeOp<64, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FSUB);
}
static ARM64Reg SetFpscrNzcvFromFlags(BlockOfCode& code, EmitContext& ctx) {
ARM64Reg nzcv = ctx.reg_alloc.ScratchGpr();
// Fpsr's nzcv is copied across integer nzcv
code.MRS(nzcv, FIELD_NZCV);
return nzcv;
}
void EmitA64::EmitFPCompare32(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ARM64Reg reg_a = EncodeRegToSingle(ctx.reg_alloc.UseFpr(args[0]));
ARM64Reg reg_b = EncodeRegToSingle(ctx.reg_alloc.UseFpr(args[1]));
bool exc_on_qnan = args[2].GetImmediateU1();
if (exc_on_qnan) {
code.fp_emitter.FCMPE(reg_a, reg_b);
} else {
code.fp_emitter.FCMP(reg_a, reg_b);
}
ARM64Reg nzcv = SetFpscrNzcvFromFlags(code, ctx);
ctx.reg_alloc.DefineValue(inst, nzcv);
}
void EmitA64::EmitFPCompare64(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg reg_a = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[0]));
const ARM64Reg reg_b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
bool exc_on_qnan = args[2].GetImmediateU1();
if (exc_on_qnan) {
code.fp_emitter.FCMPE(reg_a, reg_b);
} else {
code.fp_emitter.FCMP(reg_a, reg_b);
}
ARM64Reg nzcv = SetFpscrNzcvFromFlags(code, ctx);
ctx.reg_alloc.DefineValue(inst, nzcv);
}
void EmitA64::EmitFPHalfToDouble(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.UseScratchFpr(args[0]));
code.fp_emitter.FCVT(64, 16, result, result);
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitA64::EmitFPHalfToSingle(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.UseScratchFpr(args[0]));
code.fp_emitter.FCVT(32, 16, result, result);
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitA64::EmitFPSingleToDouble(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.UseScratchFpr(args[0]));
code.fp_emitter.FCVT(64, 32, result, result);
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitA64::EmitFPSingleToHalf(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.UseScratchFpr(args[0]));
code.fp_emitter.FCVT(16, 32, result, result);
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitA64::EmitFPDoubleToHalf(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
code.fp_emitter.FCVT(16, 64, result, result);
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitA64::EmitFPDoubleToSingle(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
code.fp_emitter.FCVT(32, 64, result, result);
ctx.reg_alloc.DefineValue(inst, result);
}
template<size_t fsize, bool unsigned_, size_t isize>
static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const size_t fbits = args[1].GetImmediateU8();
const auto rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
const auto round_imm = ConvertRoundingModeToA64RoundingMode(rounding_mode);
ASSERT_MSG(fbits == 0, "fixed point conversions are not supported yet");
ARM64Reg src = ctx.reg_alloc.UseScratchFpr(args[0]);
ARM64Reg result = ctx.reg_alloc.ScratchGpr();
src = fsize == 64 ? EncodeRegToDouble(src) : EncodeRegToSingle(src);
result = isize == 64 ? result : DecodeReg(result);
if constexpr (unsigned_) {
code.fp_emitter.FCVTU(result, src, round_imm);
}
else {
code.fp_emitter.FCVTS(result, src, round_imm);
}
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitA64::EmitFPDoubleToFixedS32(EmitContext& ctx, IR::Inst* inst) {
EmitFPToFixed<64, false, 32>(code, ctx, inst);
}
void EmitA64::EmitFPDoubleToFixedS64(EmitContext& ctx, IR::Inst* inst) {
EmitFPToFixed<64, false, 64>(code, ctx, inst);
}
void EmitA64::EmitFPDoubleToFixedU32(EmitContext& ctx, IR::Inst* inst) {
EmitFPToFixed<64, true, 32>(code, ctx, inst);
}
void EmitA64::EmitFPDoubleToFixedU64(EmitContext& ctx, IR::Inst* inst) {
EmitFPToFixed<64, true, 64>(code, ctx, inst);
}
void EmitA64::EmitFPSingleToFixedS32(EmitContext& ctx, IR::Inst* inst) {
EmitFPToFixed<32, false, 32>(code, ctx, inst);
}
void EmitA64::EmitFPSingleToFixedS64(EmitContext& ctx, IR::Inst* inst) {
EmitFPToFixed<32, false, 64>(code, ctx, inst);
}
void EmitA64::EmitFPSingleToFixedU32(EmitContext& ctx, IR::Inst* inst) {
EmitFPToFixed<32, true, 32>(code, ctx, inst);
}
void EmitA64::EmitFPSingleToFixedU64(EmitContext& ctx, IR::Inst* inst) {
EmitFPToFixed<32, true, 64>(code, ctx, inst);
}
void EmitA64::EmitFPFixedS32ToSingle(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg from = DecodeReg(ctx.reg_alloc.UseGpr(args[0]));
const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.ScratchFpr());
const size_t fbits = args[1].GetImmediateU8();
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
ASSERT(rounding_mode == ctx.FPSCR_RMode());
if (fbits != 0) {
code.fp_emitter.SCVTF(result, from, fbits);
}
else {
code.fp_emitter.SCVTF(result, from);
}
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitA64::EmitFPFixedU32ToSingle(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg from = DecodeReg(ctx.reg_alloc.UseGpr(args[0]));
const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.ScratchFpr());
const size_t fbits = args[1].GetImmediateU8();
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
ASSERT(rounding_mode == ctx.FPSCR_RMode());
if (fbits != 0) {
code.fp_emitter.UCVTF(result, from, fbits);
}
else {
code.fp_emitter.UCVTF(result, from);
}
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitA64::EmitFPFixedS32ToDouble(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg from = DecodeReg(ctx.reg_alloc.UseGpr(args[0]));
const ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
const size_t fbits = args[1].GetImmediateU8();
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
ASSERT(rounding_mode == ctx.FPSCR_RMode());
if (fbits != 0) {
code.fp_emitter.SCVTF(result, from, fbits);
}
else {
code.fp_emitter.SCVTF(result, from);
}
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitA64::EmitFPFixedS64ToDouble(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg from = ctx.reg_alloc.UseGpr(args[0]);
const ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
const size_t fbits = args[1].GetImmediateU8();
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
ASSERT(rounding_mode == ctx.FPSCR_RMode());
if (fbits != 0) {
code.fp_emitter.SCVTF(result, from, fbits);
}
else {
code.fp_emitter.SCVTF(result, from);
}
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitA64::EmitFPFixedS64ToSingle(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg from = ctx.reg_alloc.UseGpr(args[0]);
const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.ScratchFpr());
const size_t fbits = args[1].GetImmediateU8();
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
ASSERT(rounding_mode == ctx.FPSCR_RMode());
if (fbits != 0) {
code.fp_emitter.SCVTF(result, from, fbits);
}
else {
code.fp_emitter.SCVTF(result, from);
}
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitA64::EmitFPFixedU32ToDouble(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg from = DecodeReg(ctx.reg_alloc.UseGpr(args[0]));
const ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
const size_t fbits = args[1].GetImmediateU8();
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
ASSERT(rounding_mode == ctx.FPSCR_RMode());
if (fbits != 0) {
code.fp_emitter.UCVTF(result, from, fbits);
}
else {
code.fp_emitter.UCVTF(result, from);
}
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitA64::EmitFPFixedU64ToDouble(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg from = ctx.reg_alloc.UseGpr(args[0]);
const ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
const size_t fbits = args[1].GetImmediateU8();
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
ASSERT(rounding_mode == ctx.FPSCR_RMode());
if (fbits != 0) {
code.fp_emitter.UCVTF(result, from, fbits);
}
else {
code.fp_emitter.UCVTF(result, from);
}
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitA64::EmitFPFixedU64ToSingle(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg from = ctx.reg_alloc.UseGpr(args[0]);
const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.ScratchFpr());
const size_t fbits = args[1].GetImmediateU8();
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
ASSERT(rounding_mode == ctx.FPSCR_RMode());
if (fbits != 0) {
code.fp_emitter.UCVTF(result, from, fbits);
}
else {
code.fp_emitter.UCVTF(result, from);
}
ctx.reg_alloc.DefineValue(inst, result);
}
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,469 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include "backend/A64/block_of_code.h"
#include "backend/A64/emit_a64.h"
#include "frontend/ir/microinstruction.h"
#include "frontend/ir/opcodes.h"
namespace Dynarmic::BackendA64 {
void EmitA64::EmitPackedAddU8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
const ARM64Reg sum = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.ADD(B, sum, sum, b);
if (ge_inst) {
const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
code.fp_emitter.CMHI(B, ge, b, sum);
ctx.reg_alloc.DefineValue(ge_inst, ge);
ctx.EraseInstruction(ge_inst);
}
ctx.reg_alloc.DefineValue(inst, sum);
}
void EmitA64::EmitPackedAddS8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
if (ge_inst) {
const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
code.fp_emitter.SQADD(B, ge, a, b);
code.fp_emitter.CMGE_zero(B, ge, ge);
ctx.reg_alloc.DefineValue(ge_inst, ge);
ctx.EraseInstruction(ge_inst);
}
code.fp_emitter.ADD(B, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
const ARM64Reg sum = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.ADD(H, sum, sum, b);
if (ge_inst) {
const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
code.fp_emitter.CMHI(H, ge, b, sum);
ctx.reg_alloc.DefineValue(ge_inst, ge);
ctx.EraseInstruction(ge_inst);
}
ctx.reg_alloc.DefineValue(inst, sum);
}
void EmitA64::EmitPackedAddS16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
if (ge_inst) {
const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
code.fp_emitter.SQADD(H, ge, a, b);
code.fp_emitter.CMGE_zero(H, ge, ge);
ctx.reg_alloc.DefineValue(ge_inst, ge);
ctx.EraseInstruction(ge_inst);
}
code.fp_emitter.ADD(H, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedSubU8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
if (ge_inst) {
const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
code.fp_emitter.CMHS(B, ge, a, b);
ctx.reg_alloc.DefineValue(ge_inst, ge);
ctx.EraseInstruction(ge_inst);
}
code.fp_emitter.SUB(B, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedSubS8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
if (ge_inst) {
const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
code.fp_emitter.SQSUB(B, ge, a, b);
code.fp_emitter.CMGE_zero(B, ge, ge);
ctx.reg_alloc.DefineValue(ge_inst, ge);
ctx.EraseInstruction(ge_inst);
}
code.fp_emitter.SUB(B, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
if (ge_inst) {
const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
code.fp_emitter.CMHS(H, ge, a, b);
ctx.reg_alloc.DefineValue(ge_inst, ge);
ctx.EraseInstruction(ge_inst);
}
code.fp_emitter.SUB(H, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedSubS16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
if (ge_inst) {
const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
code.fp_emitter.SQSUB(H, ge, a, b);
code.fp_emitter.CMGE_zero(H, ge, ge);
ctx.reg_alloc.DefineValue(ge_inst, ge);
ctx.EraseInstruction(ge_inst);
}
code.fp_emitter.SUB(H, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedHalvingAddU8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.UHADD(B, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedHalvingAddU16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.UHADD(H, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedHalvingAddS8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.SHADD(B, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedHalvingAddS16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.SHADD(H, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedHalvingSubU8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.UHSUB(B, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedHalvingSubS8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.SHSUB(B, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedHalvingSubU16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.UHSUB(H, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedHalvingSubS16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.SHSUB(H, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitPackedSubAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, bool hi_is_sum, bool is_signed, bool is_halving) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
const ARM64Reg reg_a_hi = DecodeReg(ctx.reg_alloc.UseScratchGpr(args[0]));
const ARM64Reg reg_b_hi = DecodeReg(ctx.reg_alloc.UseScratchGpr(args[1]));
const ARM64Reg reg_a_lo = DecodeReg(ctx.reg_alloc.ScratchGpr());
const ARM64Reg reg_b_lo = DecodeReg(ctx.reg_alloc.ScratchGpr());
ARM64Reg reg_sum, reg_diff;
if (is_signed) {
code.SXTH(reg_a_lo, reg_a_hi);
code.SXTH(reg_b_lo, reg_b_hi);
code.ASR(reg_a_hi, reg_a_hi, 16);
code.ASR(reg_b_hi, reg_b_hi, 16);
} else {
code.UXTH(reg_a_lo, reg_a_hi);
code.UXTH(reg_b_lo, reg_b_hi);
code.LSR(reg_a_hi, reg_a_hi, 16);
code.LSR(reg_b_hi, reg_b_hi, 16);
}
if (hi_is_sum) {
code.SUB(reg_a_lo, reg_a_lo, reg_b_hi);
code.ADD(reg_a_hi, reg_a_hi, reg_b_lo);
reg_diff = reg_a_lo;
reg_sum = reg_a_hi;
} else {
code.ADD(reg_a_lo, reg_a_lo, reg_b_hi);
code.SUB(reg_a_hi, reg_a_hi, reg_b_lo);
reg_diff = reg_a_hi;
reg_sum = reg_a_lo;
}
if (ge_inst) {
// The reg_b registers are no longer required.
const ARM64Reg ge_sum = reg_b_hi;
const ARM64Reg ge_diff = reg_b_lo;
if (!is_signed) {
code.LSL(ge_sum, reg_sum, 15);
code.ASR(ge_sum, ge_sum, 31);
} else {
code.MVN(ge_sum, reg_sum);
code.ASR(ge_sum, ge_sum, 31);
}
code.MVN(ge_diff, reg_diff);
code.ASR(ge_diff, ge_diff, 31);
code.ANDI2R(ge_sum, ge_sum, hi_is_sum ? 0xFFFF0000 : 0x0000FFFF);
code.ANDI2R(ge_diff, ge_diff, hi_is_sum ? 0x0000FFFF : 0xFFFF0000);
code.ORR(ge_sum, ge_sum, ge_diff);
ctx.reg_alloc.DefineValue(ge_inst, ge_sum);
ctx.EraseInstruction(ge_inst);
}
if (is_halving) {
code.LSR(reg_a_hi, reg_a_hi, 1);
code.LSR(reg_a_lo, reg_a_lo, 1);
}
// reg_a_lo now contains the low word and reg_a_hi now contains the high word.
// Merge them.
code.BFM(reg_a_lo, reg_a_hi, 16, 15);
ctx.reg_alloc.DefineValue(inst, reg_a_lo);
}
void EmitA64::EmitPackedAddSubU16(EmitContext& ctx, IR::Inst* inst) {
EmitPackedSubAdd(code, ctx, inst, true, false, false);
}
void EmitA64::EmitPackedAddSubS16(EmitContext& ctx, IR::Inst* inst) {
EmitPackedSubAdd(code, ctx, inst, true, true, false);
}
void EmitA64::EmitPackedSubAddU16(EmitContext& ctx, IR::Inst* inst) {
EmitPackedSubAdd(code, ctx, inst, false, false, false);
}
void EmitA64::EmitPackedSubAddS16(EmitContext& ctx, IR::Inst* inst) {
EmitPackedSubAdd(code, ctx, inst, false, true, false);
}
void EmitA64::EmitPackedHalvingAddSubU16(EmitContext& ctx, IR::Inst* inst) {
EmitPackedSubAdd(code, ctx, inst, true, false, true);
}
void EmitA64::EmitPackedHalvingAddSubS16(EmitContext& ctx, IR::Inst* inst) {
EmitPackedSubAdd(code, ctx, inst, true, true, true);
}
void EmitA64::EmitPackedHalvingSubAddU16(EmitContext& ctx, IR::Inst* inst) {
EmitPackedSubAdd(code, ctx, inst, false, false, true);
}
void EmitA64::EmitPackedHalvingSubAddS16(EmitContext& ctx, IR::Inst* inst) {
EmitPackedSubAdd(code, ctx, inst, false, true, true);
}
void EmitA64::EmitPackedSaturatedAddU8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.UQADD(B, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedSaturatedAddS8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.SQADD(B, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedSaturatedSubU8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.UQSUB(B, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedSaturatedSubS8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.SQSUB(B, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedSaturatedAddU16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.UQADD(H, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedSaturatedAddS16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.SQADD(H, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedSaturatedSubU16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.UQSUB(H, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedSaturatedSubS16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.SQSUB(H, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedAbsDiffSumS8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.UABD(B, a, a, b);
code.fp_emitter.UADDLV(B, a, a);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedSelect(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[2]));
code.fp_emitter.BSL(ge, b, a);
ctx.reg_alloc.DefineValue(inst, ge);
}
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,167 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include <limits>
#include "backend/A64/block_of_code.h"
#include "backend/A64/emit_a64.h"
#include "common/assert.h"
#include "common/bit_util.h"
#include "common/common_types.h"
#include "frontend/ir/basic_block.h"
#include "frontend/ir/microinstruction.h"
#include "frontend/ir/opcodes.h"
namespace Dynarmic::BackendA64 {
namespace {
enum class Op {
Add,
Sub,
};
template<Op op, size_t size>
void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
ARM64Reg addend = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
if constexpr (op == Op::Add) {
code.fp_emitter.SQADD(size, result, result, addend);
}
else {
code.fp_emitter.SQSUB(size, result, result, addend);
}
if (overflow_inst) {
ARM64Reg overflow = ctx.reg_alloc.ScratchGpr();
code.MRS(overflow, FIELD_FPSR);
code.UBFX(overflow, overflow, 27, 1);
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
ctx.EraseInstruction(overflow_inst);
}
ctx.reg_alloc.DefineValue(inst, result);
}
} // anonymous namespace
void EmitA64::EmitSignedSaturatedAdd8(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Add, 8>(code, ctx, inst);
}
void EmitA64::EmitSignedSaturatedAdd16(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Add, 16>(code, ctx, inst);
}
void EmitA64::EmitSignedSaturatedAdd32(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Add, 32>(code, ctx, inst);
}
void EmitA64::EmitSignedSaturatedAdd64(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Add, 64>(code, ctx, inst);
}
void EmitA64::EmitSignedSaturatedSub8(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Sub, 8>(code, ctx, inst);
}
void EmitA64::EmitSignedSaturatedSub16(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Sub, 16>(code, ctx, inst);
}
void EmitA64::EmitSignedSaturatedSub32(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Sub, 32>(code, ctx, inst);
}
void EmitA64::EmitSignedSaturatedSub64(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Sub, 64>(code, ctx, inst);
}
void EmitA64::EmitSignedSaturation(EmitContext& ctx, IR::Inst* inst) {
const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const size_t N = args[1].GetImmediateU8();
ASSERT(N >= 1 && N <= 32);
if (N == 32) {
if (overflow_inst) {
const auto no_overflow = IR::Value(false);
overflow_inst->ReplaceUsesWith(no_overflow);
}
ctx.reg_alloc.DefineValue(inst, args[0]);
return;
}
const u32 mask = (1u << N) - 1;
const u32 positive_saturated_value = (1u << (N - 1)) - 1;
const u32 negative_saturated_value = 1u << (N - 1);
const u32 sext_negative_satured_value = Common::SignExtend(N, negative_saturated_value);
const ARM64Reg result = DecodeReg(ctx.reg_alloc.ScratchGpr());
const ARM64Reg reg_a = DecodeReg(ctx.reg_alloc.UseGpr(args[0]));
const ARM64Reg overflow = DecodeReg(ctx.reg_alloc.ScratchGpr());
const ARM64Reg tmp = DecodeReg(ctx.reg_alloc.ScratchGpr());
// overflow now contains a value between 0 and mask if it was originally between {negative,positive}_saturated_value.
code.ADDI2R(overflow, reg_a, negative_saturated_value, overflow);
// Put the appropriate saturated value in result
code.MOVI2R(tmp, positive_saturated_value);
code.CMP(reg_a, tmp);
code.MOVI2R(result, sext_negative_satured_value);
code.CSEL(result, tmp, result, CC_GT);
// Do the saturation
code.CMPI2R(overflow, mask, tmp);
code.CSEL(result, reg_a, result, CC_LS);
if (overflow_inst) {
code.CSET(overflow, CC_HI);
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
ctx.EraseInstruction(overflow_inst);
}
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitA64::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) {
const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const size_t N = args[1].GetImmediateU8();
ASSERT(N <= 31);
const u32 saturated_value = (1u << N) - 1;
const ARM64Reg result = DecodeReg(ctx.reg_alloc.ScratchGpr());
const ARM64Reg reg_a = DecodeReg(ctx.reg_alloc.UseGpr(args[0]));
const ARM64Reg overflow = DecodeReg(ctx.reg_alloc.ScratchGpr());
// Pseudocode: result = clamp(reg_a, 0, saturated_value);
code.MOVI2R(result, saturated_value);
code.CMP(reg_a, result);
code.CSEL(result, WZR, result, CC_LE);
code.CSEL(result, reg_a, result, CC_LS);
if (overflow_inst) {
code.CSET(overflow, CC_HI);
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
ctx.EraseInstruction(overflow_inst);
}
ctx.reg_alloc.DefineValue(inst, result);
}
} // namespace Dynarmic::BackendA64

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,28 @@
// Copyright 2014 Dolphin Emulator Project / 2018 dynarmic project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "common/common_types.h"
namespace Dynarmic::BackendA64 {
enum CCFlags {
CC_EQ = 0, // Equal
CC_NEQ, // Not equal
CC_CS, // Carry Set
CC_CC, // Carry Clear
CC_MI, // Minus (Negative)
CC_PL, // Plus
CC_VS, // Overflow
CC_VC, // No Overflow
CC_HI, // Unsigned higher
CC_LS, // Unsigned lower or same
CC_GE, // Signed greater than or equal
CC_LT, // Signed less than
CC_GT, // Signed greater than
CC_LE, // Signed less than or equal
CC_AL, // Always (unconditional) 14
CC_HS = CC_CS, // Alias of CC_CS Unsigned higher or same
CC_LO = CC_CC, // Alias of CC_CC Unsigned lower
};
const u32 NO_COND = 0xE0000000;
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,135 @@
// Copyright 2014 Dolphin Emulator Project / 2018 dynarmic project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include <cstddef>
#include <vector>
#ifdef _WIN32
#include <windows.h>
#else
#include <sys/mman.h>
#endif
#include "common/assert.h"
#include "common/common_types.h"
namespace Dynarmic::BackendA64 {
// Everything that needs to generate code should inherit from this.
// You get memory management for free, plus, you can use all emitter functions
// without having to prefix them with gen-> or something similar. Example
// implementation: class JIT : public CodeBlock<ARMXEmitter> {}
template <class T>
class CodeBlock : public T {
private:
// A privately used function to set the executable RAM space to something
// invalid. For debugging usefulness it should be used to set the RAM to a
// host specific breakpoint instruction
virtual void PoisonMemory() = 0;
protected:
u8* region = nullptr;
// Size of region we can use.
size_t region_size = 0;
// Original size of the region we allocated.
size_t total_region_size = 0;
bool m_is_child = false;
std::vector<CodeBlock*> m_children;
public:
CodeBlock() = default;
virtual ~CodeBlock() {
if (region)
FreeCodeSpace();
}
CodeBlock(const CodeBlock&) = delete;
CodeBlock& operator=(const CodeBlock&) = delete;
CodeBlock(CodeBlock&&) = delete;
CodeBlock& operator=(CodeBlock&&) = delete;
// Call this before you generate any code.
void AllocCodeSpace(size_t size) {
region_size = size;
total_region_size = size;
#if defined(_WIN32)
void* ptr = VirtualAlloc(nullptr, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
#else
void* ptr = mmap(nullptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | MAP_PRIVATE, -1, 0);
if (ptr == MAP_FAILED)
ptr = nullptr;
#endif
ASSERT_MSG(ptr != nullptr, "Failed to allocate executable memory");
region = static_cast<u8*>(ptr);
T::SetCodePtr(region);
}
// Always clear code space with breakpoints, so that if someone accidentally
// executes uninitialized, it just breaks into the debugger.
void ClearCodeSpace() {
PoisonMemory();
ResetCodePtr();
}
// Call this when shutting down. Don't rely on the destructor, even though
// it'll do the job.
void FreeCodeSpace() {
ASSERT(!m_is_child);
ASSERT(munmap(region, total_region_size) == 0);
region = nullptr;
region_size = 0;
total_region_size = 0;
for (CodeBlock* child : m_children) {
child->region = nullptr;
child->region_size = 0;
child->total_region_size = 0;
}
}
bool IsInSpace(const u8* ptr) const {
return ptr >= region && ptr < (region + region_size);
}
// Cannot currently be undone. Will write protect the entire code region.
// Start over if you need to change the code (call FreeCodeSpace(),
// AllocCodeSpace()).
void WriteProtect() {
ASSERT(mprotect(region, region_size, PROT_READ | PROT_EXEC) != 0);
}
void ResetCodePtr() {
T::SetCodePtr(region);
}
size_t GetSpaceLeft() const {
ASSERT(static_cast<size_t>(T::GetCodePtr() - region) < region_size);
return region_size - (T::GetCodePtr() - region);
}
bool IsAlmostFull() const {
// This should be bigger than the biggest block ever.
return GetSpaceLeft() < 0x10000;
}
bool HasChildren() const {
return region_size != total_region_size;
}
u8* AllocChildCodeSpace(size_t child_size) {
ASSERT_MSG(child_size < GetSpaceLeft(), "Insufficient space for child allocation.");
u8* child_region = region + region_size - child_size;
region_size -= child_size;
return child_region;
}
void AddChildCodeSpace(CodeBlock* child, size_t child_size) {
u8* child_region = AllocChildCodeSpace(child_size);
child->m_is_child = true;
child->region = child_region;
child->region_size = child_size;
child->total_region_size = child_size;
child->ResetCodePtr();
m_children.emplace_back(child);
}
};
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,39 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <array>
#include <memory>
#include <functional>
#include "backend/A64/a32_jitstate.h"
#include "common/common_types.h"
namespace Dynarmic::BackendA64 {
class BlockOfCode;
struct A64State {
std::array<u64, 32> X;
std::array<std::array<u64, 2>, 16> Q;
};
static_assert(sizeof(A64State) == sizeof(A64State::X) + sizeof(A64State::Q));
class ExceptionHandler final {
public:
ExceptionHandler();
~ExceptionHandler();
void Register(BlockOfCode& code, std::function<void(CodePtr)> segv_callback = nullptr);
bool SupportsFastmem() const;
private:
struct Impl;
std::unique_ptr<Impl> impl;
};
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,25 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include "backend/A64/exception_handler.h"
namespace Dynarmic::BackendA64 {
struct ExceptionHandler::Impl final {
};
ExceptionHandler::ExceptionHandler() = default;
ExceptionHandler::~ExceptionHandler() = default;
void ExceptionHandler::Register(BlockOfCode&, std::function<void(CodePtr)>) {
// Do nothing
}
bool ExceptionHandler::SupportsFastmem() const {
return false;
}
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,159 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2019 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
// Copyright 2008 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include <mutex>
#include <vector>
#include <csignal>
#include <ucontext.h>
#include "backend/A64/a32_jitstate.h"
#include "backend/A64/block_of_code.h"
#include "backend/A64/exception_handler.h"
#include "common/assert.h"
#include "common/cast_util.h"
#include "common/common_types.h"
namespace Dynarmic::BackendA64 {
namespace {
struct CodeBlockInfo {
BlockOfCode* block;
std::function<void(CodePtr)> callback;
};
class SigHandler {
public:
SigHandler();
~SigHandler();
void AddCodeBlock(CodeBlockInfo info);
void RemoveCodeBlock(CodePtr PC);
private:
auto FindCodeBlockInfo(CodePtr PC) {
return std::find_if(code_block_infos.begin(), code_block_infos.end(),
[&](const CodeBlockInfo& x) { return x.block->GetRegion() <= PC && x.block->GetRegion() + x.block->GetRegionSize() > PC; });
}
std::vector<CodeBlockInfo> code_block_infos;
std::mutex code_block_infos_mutex;
struct sigaction old_sa_segv;
struct sigaction old_sa_bus;
static void SigAction(int sig, siginfo_t* info, void* raw_context);
};
SigHandler sig_handler;
SigHandler::SigHandler() {
// Method below from dolphin.
constexpr std::size_t signal_stack_size =
static_cast<std::size_t>(std::max(SIGSTKSZ, 2 * 1024 * 1024));
stack_t signal_stack;
signal_stack.ss_sp = malloc(signal_stack_size);
signal_stack.ss_size = signal_stack_size;
signal_stack.ss_flags = 0;
ASSERT_MSG(sigaltstack(&signal_stack, nullptr) == 0,
"dynarmic: POSIX SigHandler: init failure at sigaltstack");
struct sigaction sa;
sa.sa_handler = nullptr;
sa.sa_sigaction = &SigHandler::SigAction;
sa.sa_flags = SA_SIGINFO | SA_ONSTACK | SA_RESTART;
sigemptyset(&sa.sa_mask);
sigaction(SIGSEGV, &sa, &old_sa_segv);
}
SigHandler::~SigHandler() {
// No cleanup required.
}
void SigHandler::AddCodeBlock(CodeBlockInfo cb) {
std::lock_guard<std::mutex> guard(code_block_infos_mutex);
ASSERT(FindCodeBlockInfo(cb.block->GetRegion()) == code_block_infos.end());
code_block_infos.push_back(std::move(cb));
}
void SigHandler::RemoveCodeBlock(CodePtr PC) {
std::lock_guard<std::mutex> guard(code_block_infos_mutex);
const auto iter = FindCodeBlockInfo(PC);
ASSERT(iter != code_block_infos.end());
code_block_infos.erase(iter);
}
void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) {
ASSERT(sig == SIGSEGV || sig == SIGBUS);
std::lock_guard<std::mutex> guard(sig_handler.code_block_infos_mutex);
auto PC = reinterpret_cast<CodePtr>(((ucontext_t*)raw_context)->uc_mcontext.pc);
const auto iter = sig_handler.FindCodeBlockInfo(PC);
if (iter != sig_handler.code_block_infos.end()) {
iter->callback(PC);
return;
}
fmt::print(
stderr,
"dynarmic: POSIX SigHandler: Exception was not in registered code blocks (PC {})\n",
PC);
struct sigaction* retry_sa =
sig == SIGSEGV ? &sig_handler.old_sa_segv : &sig_handler.old_sa_bus;
if (retry_sa->sa_flags & SA_SIGINFO) {
retry_sa->sa_sigaction(sig, info, raw_context);
return;
}
if (retry_sa->sa_handler == SIG_DFL) {
signal(sig, SIG_DFL);
return;
}
if (retry_sa->sa_handler == SIG_IGN) {
return;
}
retry_sa->sa_handler(sig);
}
} // anonymous namespace
struct ExceptionHandler::Impl final {
Impl(BlockOfCode& code, std::function<void(CodePtr)> cb) {
code_begin = code.GetRegion();
sig_handler.AddCodeBlock({&code, std::move(cb)});
}
~Impl() {
sig_handler.RemoveCodeBlock(code_begin);
}
private:
CodePtr code_begin;
};
ExceptionHandler::ExceptionHandler() = default;
ExceptionHandler::~ExceptionHandler() = default;
void ExceptionHandler::Register(BlockOfCode& code, std::function<void(CodePtr)> cb) {
if (cb)
impl = std::make_unique<Impl>(code, std::move(cb));
}
bool ExceptionHandler::SupportsFastmem() const {
return static_cast<bool>(impl);
}
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,21 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include "backend/A64/hostloc.h"
namespace Dynarmic::BackendA64 {
Arm64Gen::ARM64Reg HostLocToReg64(HostLoc loc) {
ASSERT(HostLocIsGPR(loc));
return static_cast<Arm64Gen::ARM64Reg>(static_cast<int>(Arm64Gen::X0) + static_cast<int>(loc));
}
Arm64Gen::ARM64Reg HostLocToFpr(HostLoc loc) {
ASSERT(HostLocIsFPR(loc));
return EncodeRegToQuad(static_cast<Arm64Gen::ARM64Reg>(static_cast<int>(loc) - static_cast<int>(HostLoc::Q0)));
}
} // namespace Dynarmic::BackendX64

176
src/backend/A64/hostloc.h Normal file
View File

@ -0,0 +1,176 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include "backend/A64/emitter/a64_emitter.h"
#include "common/assert.h"
#include "common/common_types.h"
namespace Dynarmic::BackendA64 {
enum class HostLoc {
// Ordering of the registers is intentional. See also: HostLocToA64.
// 64bit GPR registers
X0,
X1,
X2,
X3,
X4,
X5,
X6,
X7,
X8,
X9,
X10,
X11,
X12,
X13,
X14,
X15,
X16,
X17,
X18,
X19,
X20,
X21,
X22,
X23,
X24,
X25,
X26,
X27,
X28,
X29,
X30,
SP, // 64bit stack pointer
// Qword FPR registers
Q0,
Q1,
Q2,
Q3,
Q4,
Q5,
Q6,
Q7,
Q8,
Q9,
Q10,
Q11,
Q12,
Q13,
Q14,
Q15,
Q16,
Q17,
Q18,
Q19,
Q20,
Q21,
Q22,
Q23,
Q24,
Q25,
Q26,
Q27,
Q28,
Q29,
Q30,
Q31,
FirstSpill,
};
constexpr size_t NonSpillHostLocCount = static_cast<size_t>(HostLoc::FirstSpill);
inline bool HostLocIsGPR(HostLoc reg) {
return reg >= HostLoc::X0 && reg <= HostLoc::X30;
}
inline bool HostLocIsFPR(HostLoc reg) {
return reg >= HostLoc::Q0 && reg <= HostLoc::Q31;
}
inline bool HostLocIsRegister(HostLoc reg) {
return HostLocIsGPR(reg) || HostLocIsFPR(reg);
}
inline HostLoc HostLocRegIdx(int idx) {
ASSERT(idx >= 0 && idx <= 30);
return static_cast<HostLoc>(idx);
}
inline HostLoc HostLocFprIdx(int idx) {
ASSERT(idx >= 0 && idx <= 31);
return static_cast<HostLoc>(static_cast<size_t>(HostLoc::Q0) + idx);
}
inline HostLoc HostLocSpill(size_t i) {
return static_cast<HostLoc>(static_cast<size_t>(HostLoc::FirstSpill) + i);
}
inline bool HostLocIsSpill(HostLoc reg) {
return reg >= HostLoc::FirstSpill;
}
inline size_t HostLocBitWidth(HostLoc loc) {
if (HostLocIsGPR(loc))
return 64;
if (HostLocIsFPR(loc))
return 128;
if (HostLocIsSpill(loc))
return 128;
UNREACHABLE();
}
using HostLocList = std::initializer_list<HostLoc>;
// X18 may be reserved.(Windows and iOS)
// X26 holds the cycle counter
// X27 contains an emulated memory relate pointer
// X28 used for holding the JitState.
// X30 is the link register.
// In order of desireablity based first on ABI
constexpr HostLocList any_gpr = {
HostLoc::X19, HostLoc::X20, HostLoc::X21, HostLoc::X22, HostLoc::X23,
HostLoc::X24, HostLoc::X25,
HostLoc::X8, HostLoc::X9, HostLoc::X10, HostLoc::X11, HostLoc::X12,
HostLoc::X13, HostLoc::X14, HostLoc::X15, HostLoc::X16, HostLoc::X17,
HostLoc::X7, HostLoc::X6, HostLoc::X5, HostLoc::X4, HostLoc::X3,
HostLoc::X2, HostLoc::X1, HostLoc::X0,
};
constexpr HostLocList any_fpr = {
HostLoc::Q8, HostLoc::Q9, HostLoc::Q10, HostLoc::Q11, HostLoc::Q12, HostLoc::Q13,
HostLoc::Q14, HostLoc::Q15,
HostLoc::Q16, HostLoc::Q17, HostLoc::Q18, HostLoc::Q19, HostLoc::Q20, HostLoc::Q21,
HostLoc::Q22, HostLoc::Q23, HostLoc::Q24, HostLoc::Q25, HostLoc::Q26, HostLoc::Q27,
HostLoc::Q28, HostLoc::Q29, HostLoc::Q30, HostLoc::Q31,
HostLoc::Q7, HostLoc::Q6, HostLoc::Q5, HostLoc::Q4, HostLoc::Q3, HostLoc::Q2,
HostLoc::Q1, HostLoc::Q0,
};
Arm64Gen::ARM64Reg HostLocToReg64(HostLoc loc);
Arm64Gen::ARM64Reg HostLocToFpr(HostLoc loc);
template <typename JitStateType>
size_t SpillToOpArg(HostLoc loc) {
ASSERT(HostLocIsSpill(loc));
size_t i = static_cast<size_t>(loc) - static_cast<size_t>(HostLoc::FirstSpill);
ASSERT_MSG(i < JitStateType::SpillCount,
"Spill index greater than number of available spill locations");
return JitStateType::GetSpillLocationOffsetFromIndex(i);
}
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,44 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <cstddef>
namespace Dynarmic::BackendA64 {
struct JitStateInfo {
template <typename JitStateType>
JitStateInfo(const JitStateType&)
: offsetof_cycles_remaining(offsetof(JitStateType, cycles_remaining))
, offsetof_cycles_to_run(offsetof(JitStateType, cycles_to_run))
, offsetof_save_host_FPCR(offsetof(JitStateType, save_host_FPCR))
, offsetof_guest_fpcr(offsetof(JitStateType, guest_fpcr))
, offsetof_guest_fpsr(offsetof(JitStateType, guest_fpsr))
, offsetof_rsb_ptr(offsetof(JitStateType, rsb_ptr))
, rsb_ptr_mask(JitStateType::RSBPtrMask)
, offsetof_rsb_location_descriptors(offsetof(JitStateType, rsb_location_descriptors))
, offsetof_rsb_codeptrs(offsetof(JitStateType, rsb_codeptrs))
, offsetof_cpsr_nzcv(offsetof(JitStateType, cpsr_nzcv))
, offsetof_fpsr_exc(offsetof(JitStateType, fpsr_exc))
, offsetof_fpsr_qc(offsetof(JitStateType, fpsr_qc))
{}
const size_t offsetof_cycles_remaining;
const size_t offsetof_cycles_to_run;
const size_t offsetof_save_host_FPCR;
const size_t offsetof_guest_fpcr;
const size_t offsetof_guest_fpsr;
const size_t offsetof_rsb_ptr;
const size_t rsb_ptr_mask;
const size_t offsetof_rsb_location_descriptors;
const size_t offsetof_rsb_codeptrs;
const size_t offsetof_cpsr_nzcv;
const size_t offsetof_fpsr_exc;
const size_t offsetof_fpsr_qc;
};
} // namespace Dynarmic::BackendA64

651
src/backend/A64/opcodes.inc Normal file
View File

@ -0,0 +1,651 @@
// opcode name, return type, arg1 type, arg2 type, arg3 type, arg4 type, ...
OPCODE(Void, Void, )
OPCODE(Identity, Opaque, Opaque )
OPCODE(Breakpoint, Void, )
// A32 Context getters/setters
A32OPC(SetCheckBit, Void, U1 )
A32OPC(GetRegister, U32, A32Reg )
A32OPC(GetExtendedRegister32, U32, A32ExtReg )
A32OPC(GetExtendedRegister64, U64, A32ExtReg )
A32OPC(SetRegister, Void, A32Reg, U32 )
A32OPC(SetExtendedRegister32, Void, A32ExtReg, U32 )
A32OPC(SetExtendedRegister64, Void, A32ExtReg, U64 )
A32OPC(GetCpsr, U32, )
A32OPC(SetCpsr, Void, U32 )
A32OPC(SetCpsrNZCVRaw, Void, U32 )
A32OPC(SetCpsrNZCV, Void, NZCV )
A32OPC(SetCpsrNZCVQ, Void, U32 )
A32OPC(GetNFlag, U1, )
A32OPC(SetNFlag, Void, U1 )
A32OPC(GetZFlag, U1, )
A32OPC(SetZFlag, Void, U1 )
A32OPC(GetCFlag, U1, )
A32OPC(SetCFlag, Void, U1 )
A32OPC(GetVFlag, U1, )
A32OPC(SetVFlag, Void, U1 )
A32OPC(OrQFlag, Void, U1 )
A32OPC(GetGEFlags, U32, )
A32OPC(SetGEFlags, Void, U32 )
A32OPC(SetGEFlagsCompressed, Void, U32 )
A32OPC(BXWritePC, Void, U32 )
A32OPC(CallSupervisor, Void, U32 )
A32OPC(ExceptionRaised, Void, U32, U64 )
A32OPC(GetFpscr, U32, )
A32OPC(SetFpscr, Void, U32, )
A32OPC(GetFpscrNZCV, U32, )
A32OPC(SetFpscrNZCV, Void, NZCV )
// A64 Context getters/setters
//A64OPC(SetCheckBit, Void, U1 )
//A64OPC(GetCFlag, U1, )
//A64OPC(GetNZCVRaw, U32, )
//A64OPC(SetNZCVRaw, Void, U32 )
//A64OPC(SetNZCV, Void, NZCV )
//A64OPC(GetW, U32, A64Reg )
//A64OPC(GetX, U64, A64Reg )
//A64OPC(GetS, U128, A64Vec )
//A64OPC(GetD, U128, A64Vec )
//A64OPC(GetQ, U128, A64Vec )
//A64OPC(GetSP, U64, )
//A64OPC(GetFPCR, U32, )
//A64OPC(GetFPSR, U32, )
//A64OPC(SetW, Void, A64Reg, U32 )
//A64OPC(SetX, Void, A64Reg, U64 )
//A64OPC(SetS, Void, A64Vec, U128 )
//A64OPC(SetD, Void, A64Vec, U128 )
//A64OPC(SetQ, Void, A64Vec, U128 )
//A64OPC(SetSP, Void, U64 )
//A64OPC(SetFPCR, Void, U32 )
//A64OPC(SetFPSR, Void, U32 )
//A64OPC(OrQC, Void, U1 )
//A64OPC(SetPC, Void, U64 )
//A64OPC(CallSupervisor, Void, U32 )
//A64OPC(ExceptionRaised, Void, U64, U64 )
//A64OPC(DataCacheOperationRaised, Void, U64, U64 )
//A64OPC(DataSynchronizationBarrier, Void, )
//A64OPC(DataMemoryBarrier, Void, )
//A64OPC(InstructionSynchronizationBarrier, Void, )
//A64OPC(GetCNTFRQ, U32, )
//A64OPC(GetCNTPCT, U64, )
//A64OPC(GetCTR, U32, )
//A64OPC(GetDCZID, U32, )
//A64OPC(GetTPIDR, U64, )
//A64OPC(GetTPIDRRO, U64, )
//A64OPC(SetTPIDR, Void, U64 )
// Hints
OPCODE(PushRSB, Void, U64 )
// Pseudo-operation, handled specially at final emit
OPCODE(GetCarryFromOp, U1, Opaque )
OPCODE(GetOverflowFromOp, U1, Opaque )
OPCODE(GetGEFromOp, U32, Opaque )
OPCODE(GetNZCVFromOp, NZCV, Opaque )
OPCODE(GetUpperFromOp, U128, Opaque )
OPCODE(GetLowerFromOp, U128, Opaque )
OPCODE(NZCVFromPackedFlags, NZCV, U32 )
// Calculations
OPCODE(Pack2x32To1x64, U64, U32, U32 )
//OPCODE(Pack2x64To1x128, U128, U64, U64 )
OPCODE(LeastSignificantWord, U32, U64 )
OPCODE(MostSignificantWord, U32, U64 )
OPCODE(LeastSignificantHalf, U16, U32 )
OPCODE(LeastSignificantByte, U8, U32 )
OPCODE(MostSignificantBit, U1, U32 )
OPCODE(IsZero32, U1, U32 )
OPCODE(IsZero64, U1, U64 )
OPCODE(TestBit, U1, U64, U8 )
OPCODE(ConditionalSelect32, U32, Cond, U32, U32 )
OPCODE(ConditionalSelect64, U64, Cond, U64, U64 )
OPCODE(ConditionalSelectNZCV, NZCV, Cond, NZCV, NZCV )
OPCODE(LogicalShiftLeft32, U32, U32, U8, U1 )
OPCODE(LogicalShiftLeft64, U64, U64, U8 )
OPCODE(LogicalShiftRight32, U32, U32, U8, U1 )
OPCODE(LogicalShiftRight64, U64, U64, U8 )
OPCODE(ArithmeticShiftRight32, U32, U32, U8, U1 )
//OPCODE(ArithmeticShiftRight64, U64, U64, U8 )
OPCODE(RotateRight32, U32, U32, U8, U1 )
OPCODE(RotateRight64, U64, U64, U8 )
OPCODE(RotateRightExtended, U32, U32, U1 )
OPCODE(Add32, U32, U32, U32, U1 )
OPCODE(Add64, U64, U64, U64, U1 )
OPCODE(Sub32, U32, U32, U32, U1 )
OPCODE(Sub64, U64, U64, U64, U1 )
OPCODE(Mul32, U32, U32, U32 )
OPCODE(Mul64, U64, U64, U64 )
//OPCODE(SignedMultiplyHigh64, U64, U64, U64 )
//OPCODE(UnsignedMultiplyHigh64, U64, U64, U64 )
OPCODE(UnsignedDiv32, U32, U32, U32 )
OPCODE(UnsignedDiv64, U64, U64, U64 )
OPCODE(SignedDiv32, U32, U32, U32 )
OPCODE(SignedDiv64, U64, U64, U64 )
OPCODE(And32, U32, U32, U32 )
OPCODE(And64, U64, U64, U64 )
OPCODE(Eor32, U32, U32, U32 )
OPCODE(Eor64, U64, U64, U64 )
OPCODE(Or32, U32, U32, U32 )
OPCODE(Or64, U64, U64, U64 )
OPCODE(Not32, U32, U32 )
OPCODE(Not64, U64, U64 )
OPCODE(SignExtendByteToWord, U32, U8 )
OPCODE(SignExtendHalfToWord, U32, U16 )
OPCODE(SignExtendByteToLong, U64, U8 )
OPCODE(SignExtendHalfToLong, U64, U16 )
OPCODE(SignExtendWordToLong, U64, U32 )
OPCODE(ZeroExtendByteToWord, U32, U8 )
OPCODE(ZeroExtendHalfToWord, U32, U16 )
OPCODE(ZeroExtendByteToLong, U64, U8 )
OPCODE(ZeroExtendHalfToLong, U64, U16 )
OPCODE(ZeroExtendWordToLong, U64, U32 )
//OPCODE(ZeroExtendLongToQuad, U128, U64 )
//OPCODE(ByteReverseDual, U64, U64 )
OPCODE(ByteReverseWord, U32, U32 )
OPCODE(ByteReverseHalf, U16, U16 )
OPCODE(CountLeadingZeros32, U32, U32 )
OPCODE(CountLeadingZeros64, U64, U64 )
//OPCODE(ExtractRegister32, U32, U32, U32, U8 )
//OPCODE(ExtractRegister64, U64, U64, U64, U8 )
//OPCODE(MaxSigned32, U32, U32, U32 )
//OPCODE(MaxSigned64, U64, U64, U64 )
//OPCODE(MaxUnsigned32, U32, U32, U32 )
//OPCODE(MaxUnsigned64, U64, U64, U64 )
//OPCODE(MinSigned32, U32, U32, U32 )
//OPCODE(MinSigned64, U64, U64, U64 )
//OPCODE(MinUnsigned32, U32, U32, U32 )
//OPCODE(MinUnsigned64, U64, U64, U64 )
// Saturated instructions
OPCODE(SignedSaturatedAdd8, U8, U8, U8 )
OPCODE(SignedSaturatedAdd16, U16, U16, U16 )
OPCODE(SignedSaturatedAdd32, U32, U32, U32 )
OPCODE(SignedSaturatedAdd64, U64, U64, U64 )
//OPCODE(SignedSaturatedDoublingMultiplyReturnHigh16, U16, U16, U16 )
//OPCODE(SignedSaturatedDoublingMultiplyReturnHigh32, U32, U32, U32 )
OPCODE(SignedSaturatedSub8, U8, U8, U8 )
OPCODE(SignedSaturatedSub16, U16, U16, U16 )
OPCODE(SignedSaturatedSub32, U32, U32, U32 )
OPCODE(SignedSaturatedSub64, U64, U64, U64 )
OPCODE(SignedSaturation, U32, U32, U8 )
//OPCODE(UnsignedSaturatedAdd8, U8, U8, U8 )
//OPCODE(UnsignedSaturatedAdd16, U16, U16, U16 )
//OPCODE(UnsignedSaturatedAdd32, U32, U32, U32 )
//OPCODE(UnsignedSaturatedAdd64, U64, U64, U64 )
//OPCODE(UnsignedSaturatedSub8, U8, U8, U8 )
//OPCODE(UnsignedSaturatedSub16, U16, U16, U16 )
//OPCODE(UnsignedSaturatedSub32, U32, U32, U32 )
//OPCODE(UnsignedSaturatedSub64, U64, U64, U64 )
OPCODE(UnsignedSaturation, U32, U32, U8 )
// Packed instructions
OPCODE(PackedAddU8, U32, U32, U32 )
OPCODE(PackedAddS8, U32, U32, U32 )
OPCODE(PackedSubU8, U32, U32, U32 )
OPCODE(PackedSubS8, U32, U32, U32 )
OPCODE(PackedAddU16, U32, U32, U32 )
OPCODE(PackedAddS16, U32, U32, U32 )
OPCODE(PackedSubU16, U32, U32, U32 )
OPCODE(PackedSubS16, U32, U32, U32 )
OPCODE(PackedAddSubU16, U32, U32, U32 )
OPCODE(PackedAddSubS16, U32, U32, U32 )
OPCODE(PackedSubAddU16, U32, U32, U32 )
OPCODE(PackedSubAddS16, U32, U32, U32 )
OPCODE(PackedHalvingAddU8, U32, U32, U32 )
OPCODE(PackedHalvingAddS8, U32, U32, U32 )
OPCODE(PackedHalvingSubU8, U32, U32, U32 )
OPCODE(PackedHalvingSubS8, U32, U32, U32 )
OPCODE(PackedHalvingAddU16, U32, U32, U32 )
OPCODE(PackedHalvingAddS16, U32, U32, U32 )
OPCODE(PackedHalvingSubU16, U32, U32, U32 )
OPCODE(PackedHalvingSubS16, U32, U32, U32 )
OPCODE(PackedHalvingAddSubU16, U32, U32, U32 )
OPCODE(PackedHalvingAddSubS16, U32, U32, U32 )
OPCODE(PackedHalvingSubAddU16, U32, U32, U32 )
OPCODE(PackedHalvingSubAddS16, U32, U32, U32 )
OPCODE(PackedSaturatedAddU8, U32, U32, U32 )
OPCODE(PackedSaturatedAddS8, U32, U32, U32 )
OPCODE(PackedSaturatedSubU8, U32, U32, U32 )
OPCODE(PackedSaturatedSubS8, U32, U32, U32 )
OPCODE(PackedSaturatedAddU16, U32, U32, U32 )
OPCODE(PackedSaturatedAddS16, U32, U32, U32 )
OPCODE(PackedSaturatedSubU16, U32, U32, U32 )
OPCODE(PackedSaturatedSubS16, U32, U32, U32 )
OPCODE(PackedAbsDiffSumS8, U32, U32, U32 )
OPCODE(PackedSelect, U32, U32, U32, U32 )
// CRC instructions
//OPCODE(CRC32Castagnoli8, U32, U32, U32 )
//OPCODE(CRC32Castagnoli16, U32, U32, U32 )
//OPCODE(CRC32Castagnoli32, U32, U32, U32 )
//OPCODE(CRC32Castagnoli64, U32, U32, U64 )
//OPCODE(CRC32ISO8, U32, U32, U32 )
//OPCODE(CRC32ISO16, U32, U32, U32 )
//OPCODE(CRC32ISO32, U32, U32, U32 )
//OPCODE(CRC32ISO64, U32, U32, U64 )
// AES instructions
//OPCODE(AESDecryptSingleRound, U128, U128 )
//OPCODE(AESEncryptSingleRound, U128, U128 )
//OPCODE(AESInverseMixColumns, U128, U128 )
//OPCODE(AESMixColumns, U128, U128 )
// SM4 instructions
//OPCODE(SM4AccessSubstitutionBox, U8, U8 )
// Vector instructions
//OPCODE(VectorGetElement8, U8, U128, U8 )
//OPCODE(VectorGetElement16, U16, U128, U8 )
//OPCODE(VectorGetElement32, U32, U128, U8 )
//OPCODE(VectorGetElement64, U64, U128, U8 )
//OPCODE(VectorSetElement8, U128, U128, U8, U8 )
//OPCODE(VectorSetElement16, U128, U128, U8, U16 )
//OPCODE(VectorSetElement32, U128, U128, U8, U32 )
//OPCODE(VectorSetElement64, U128, U128, U8, U64 )
//OPCODE(VectorAbs8, U128, U128 )
//OPCODE(VectorAbs16, U128, U128 )
//OPCODE(VectorAbs32, U128, U128 )
//OPCODE(VectorAbs64, U128, U128 )
//OPCODE(VectorAdd8, U128, U128, U128 )
//OPCODE(VectorAdd16, U128, U128, U128 )
//OPCODE(VectorAdd32, U128, U128, U128 )
//OPCODE(VectorAdd64, U128, U128, U128 )
//OPCODE(VectorAnd, U128, U128, U128 )
//OPCODE(VectorArithmeticShiftRight8, U128, U128, U8 )
//OPCODE(VectorArithmeticShiftRight16, U128, U128, U8 )
//OPCODE(VectorArithmeticShiftRight32, U128, U128, U8 )
//OPCODE(VectorArithmeticShiftRight64, U128, U128, U8 )
//OPCODE(VectorArithmeticVShift8, U128, U128, U128 )
//OPCODE(VectorArithmeticVShift16, U128, U128, U128 )
//OPCODE(VectorArithmeticVShift32, U128, U128, U128 )
//OPCODE(VectorArithmeticVShift64, U128, U128, U128 )
//OPCODE(VectorBroadcastLower8, U128, U8 )
//OPCODE(VectorBroadcastLower16, U128, U16 )
//OPCODE(VectorBroadcastLower32, U128, U32 )
//OPCODE(VectorBroadcast8, U128, U8 )
//OPCODE(VectorBroadcast16, U128, U16 )
//OPCODE(VectorBroadcast32, U128, U32 )
//OPCODE(VectorBroadcast64, U128, U64 )
//OPCODE(VectorCountLeadingZeros8, U128, U128 )
//OPCODE(VectorCountLeadingZeros16, U128, U128 )
//OPCODE(VectorCountLeadingZeros32, U128, U128 )
//OPCODE(VectorDeinterleaveEven8, U128, U128, U128 )
//OPCODE(VectorDeinterleaveEven16, U128, U128, U128 )
//OPCODE(VectorDeinterleaveEven32, U128, U128, U128 )
//OPCODE(VectorDeinterleaveEven64, U128, U128, U128 )
//OPCODE(VectorDeinterleaveOdd8, U128, U128, U128 )
//OPCODE(VectorDeinterleaveOdd16, U128, U128, U128 )
//OPCODE(VectorDeinterleaveOdd32, U128, U128, U128 )
//OPCODE(VectorDeinterleaveOdd64, U128, U128, U128 )
//OPCODE(VectorEor, U128, U128, U128 )
//OPCODE(VectorEqual8, U128, U128, U128 )
//OPCODE(VectorEqual16, U128, U128, U128 )
//OPCODE(VectorEqual32, U128, U128, U128 )
//OPCODE(VectorEqual64, U128, U128, U128 )
//OPCODE(VectorEqual128, U128, U128, U128 )
//OPCODE(VectorExtract, U128, U128, U128, U8 )
//OPCODE(VectorExtractLower, U128, U128, U128, U8 )
//OPCODE(VectorGreaterS8, U128, U128, U128 )
//OPCODE(VectorGreaterS16, U128, U128, U128 )
//OPCODE(VectorGreaterS32, U128, U128, U128 )
//OPCODE(VectorGreaterS64, U128, U128, U128 )
//OPCODE(VectorHalvingAddS8, U128, U128, U128 )
//OPCODE(VectorHalvingAddS16, U128, U128, U128 )
//OPCODE(VectorHalvingAddS32, U128, U128, U128 )
//OPCODE(VectorHalvingAddU8, U128, U128, U128 )
//OPCODE(VectorHalvingAddU16, U128, U128, U128 )
//OPCODE(VectorHalvingAddU32, U128, U128, U128 )
//OPCODE(VectorHalvingSubS8, U128, U128, U128 )
//OPCODE(VectorHalvingSubS16, U128, U128, U128 )
//OPCODE(VectorHalvingSubS32, U128, U128, U128 )
//OPCODE(VectorHalvingSubU8, U128, U128, U128 )
//OPCODE(VectorHalvingSubU16, U128, U128, U128 )
//OPCODE(VectorHalvingSubU32, U128, U128, U128 )
//OPCODE(VectorInterleaveLower8, U128, U128, U128 )
//OPCODE(VectorInterleaveLower16, U128, U128, U128 )
//OPCODE(VectorInterleaveLower32, U128, U128, U128 )
//OPCODE(VectorInterleaveLower64, U128, U128, U128 )
//OPCODE(VectorInterleaveUpper8, U128, U128, U128 )
//OPCODE(VectorInterleaveUpper16, U128, U128, U128 )
//OPCODE(VectorInterleaveUpper32, U128, U128, U128 )
//OPCODE(VectorInterleaveUpper64, U128, U128, U128 )
//OPCODE(VectorLogicalShiftLeft8, U128, U128, U8 )
//OPCODE(VectorLogicalShiftLeft16, U128, U128, U8 )
//OPCODE(VectorLogicalShiftLeft32, U128, U128, U8 )
//OPCODE(VectorLogicalShiftLeft64, U128, U128, U8 )
//OPCODE(VectorLogicalShiftRight8, U128, U128, U8 )
//OPCODE(VectorLogicalShiftRight16, U128, U128, U8 )
//OPCODE(VectorLogicalShiftRight32, U128, U128, U8 )
//OPCODE(VectorLogicalShiftRight64, U128, U128, U8 )
//OPCODE(VectorLogicalVShift8, U128, U128, U128 )
//OPCODE(VectorLogicalVShift16, U128, U128, U128 )
//OPCODE(VectorLogicalVShift32, U128, U128, U128 )
//OPCODE(VectorLogicalVShift64, U128, U128, U128 )
//OPCODE(VectorMaxS8, U128, U128, U128 )
//OPCODE(VectorMaxS16, U128, U128, U128 )
//OPCODE(VectorMaxS32, U128, U128, U128 )
//OPCODE(VectorMaxS64, U128, U128, U128 )
//OPCODE(VectorMaxU8, U128, U128, U128 )
//OPCODE(VectorMaxU16, U128, U128, U128 )
//OPCODE(VectorMaxU32, U128, U128, U128 )
//OPCODE(VectorMaxU64, U128, U128, U128 )
//OPCODE(VectorMinS8, U128, U128, U128 )
//OPCODE(VectorMinS16, U128, U128, U128 )
//OPCODE(VectorMinS32, U128, U128, U128 )
//OPCODE(VectorMinS64, U128, U128, U128 )
//OPCODE(VectorMinU8, U128, U128, U128 )
//OPCODE(VectorMinU16, U128, U128, U128 )
//OPCODE(VectorMinU32, U128, U128, U128 )
//OPCODE(VectorMinU64, U128, U128, U128 )
//OPCODE(VectorMultiply8, U128, U128, U128 )
//OPCODE(VectorMultiply16, U128, U128, U128 )
//OPCODE(VectorMultiply32, U128, U128, U128 )
//OPCODE(VectorMultiply64, U128, U128, U128 )
//OPCODE(VectorNarrow16, U128, U128 )
//OPCODE(VectorNarrow32, U128, U128 )
//OPCODE(VectorNarrow64, U128, U128 )
//OPCODE(VectorNot, U128, U128 )
//OPCODE(VectorOr, U128, U128, U128 )
//OPCODE(VectorPairedAddLower8, U128, U128, U128 )
//OPCODE(VectorPairedAddLower16, U128, U128, U128 )
//OPCODE(VectorPairedAddLower32, U128, U128, U128 )
//OPCODE(VectorPairedAddSignedWiden8, U128, U128 )
//OPCODE(VectorPairedAddSignedWiden16, U128, U128 )
//OPCODE(VectorPairedAddSignedWiden32, U128, U128 )
//OPCODE(VectorPairedAddUnsignedWiden8, U128, U128 )
//OPCODE(VectorPairedAddUnsignedWiden16, U128, U128 )
//OPCODE(VectorPairedAddUnsignedWiden32, U128, U128 )
//OPCODE(VectorPairedAdd8, U128, U128, U128 )
//OPCODE(VectorPairedAdd16, U128, U128, U128 )
//OPCODE(VectorPairedAdd32, U128, U128, U128 )
//OPCODE(VectorPairedAdd64, U128, U128, U128 )
//OPCODE(VectorPairedMaxS8, U128, U128, U128 )
//OPCODE(VectorPairedMaxS16, U128, U128, U128 )
//OPCODE(VectorPairedMaxS32, U128, U128, U128 )
//OPCODE(VectorPairedMaxU8, U128, U128, U128 )
//OPCODE(VectorPairedMaxU16, U128, U128, U128 )
//OPCODE(VectorPairedMaxU32, U128, U128, U128 )
//OPCODE(VectorPairedMinS8, U128, U128, U128 )
//OPCODE(VectorPairedMinS16, U128, U128, U128 )
//OPCODE(VectorPairedMinS32, U128, U128, U128 )
//OPCODE(VectorPairedMinU8, U128, U128, U128 )
//OPCODE(VectorPairedMinU16, U128, U128, U128 )
//OPCODE(VectorPairedMinU32, U128, U128, U128 )
//OPCODE(VectorPolynomialMultiply8, U128, U128, U128 )
//OPCODE(VectorPolynomialMultiplyLong8, U128, U128, U128 )
//OPCODE(VectorPolynomialMultiplyLong64, U128, U128, U128 )
//OPCODE(VectorPopulationCount, U128, U128 )
//OPCODE(VectorReverseBits, U128, U128 )
//OPCODE(VectorRoundingHalvingAddS8, U128, U128, U128 )
//OPCODE(VectorRoundingHalvingAddS16, U128, U128, U128 )
//OPCODE(VectorRoundingHalvingAddS32, U128, U128, U128 )
//OPCODE(VectorRoundingHalvingAddU8, U128, U128, U128 )
//OPCODE(VectorRoundingHalvingAddU16, U128, U128, U128 )
//OPCODE(VectorRoundingHalvingAddU32, U128, U128, U128 )
//OPCODE(VectorRoundingShiftLeftS8, U128, U128, U128 )
//OPCODE(VectorRoundingShiftLeftS16, U128, U128, U128 )
//OPCODE(VectorRoundingShiftLeftS32, U128, U128, U128 )
//OPCODE(VectorRoundingShiftLeftS64, U128, U128, U128 )
//OPCODE(VectorRoundingShiftLeftU8, U128, U128, U128 )
//OPCODE(VectorRoundingShiftLeftU16, U128, U128, U128 )
//OPCODE(VectorRoundingShiftLeftU32, U128, U128, U128 )
//OPCODE(VectorRoundingShiftLeftU64, U128, U128, U128 )
//OPCODE(VectorShuffleHighHalfwords, U128, U128, U8 )
//OPCODE(VectorShuffleLowHalfwords, U128, U128, U8 )
//OPCODE(VectorShuffleWords, U128, U128, U8 )
//OPCODE(VectorSignExtend8, U128, U128 )
//OPCODE(VectorSignExtend16, U128, U128 )
//OPCODE(VectorSignExtend32, U128, U128 )
//OPCODE(VectorSignExtend64, U128, U128 )
//OPCODE(VectorSignedAbsoluteDifference8, U128, U128, U128 )
//OPCODE(VectorSignedAbsoluteDifference16, U128, U128, U128 )
//OPCODE(VectorSignedAbsoluteDifference32, U128, U128, U128 )
//OPCODE(VectorSignedMultiply16, Void, U128, U128 )
//OPCODE(VectorSignedMultiply32, Void, U128, U128 )
//OPCODE(VectorSignedSaturatedAbs8, U128, U128 )
//OPCODE(VectorSignedSaturatedAbs16, U128, U128 )
//OPCODE(VectorSignedSaturatedAbs32, U128, U128 )
//OPCODE(VectorSignedSaturatedAbs64, U128, U128 )
//OPCODE(VectorSignedSaturatedAccumulateUnsigned8, U128, U128, U128 )
//OPCODE(VectorSignedSaturatedAccumulateUnsigned16, U128, U128, U128 )
//OPCODE(VectorSignedSaturatedAccumulateUnsigned32, U128, U128, U128 )
//OPCODE(VectorSignedSaturatedAccumulateUnsigned64, U128, U128, U128 )
//OPCODE(VectorSignedSaturatedDoublingMultiply16, Void, U128, U128 )
//OPCODE(VectorSignedSaturatedDoublingMultiply32, Void, U128, U128 )
//OPCODE(VectorSignedSaturatedDoublingMultiplyLong16, U128, U128, U128 )
//OPCODE(VectorSignedSaturatedDoublingMultiplyLong32, U128, U128, U128 )
//OPCODE(VectorSignedSaturatedNarrowToSigned16, U128, U128 )
//OPCODE(VectorSignedSaturatedNarrowToSigned32, U128, U128 )
//OPCODE(VectorSignedSaturatedNarrowToSigned64, U128, U128 )
//OPCODE(VectorSignedSaturatedNarrowToUnsigned16, U128, U128 )
//OPCODE(VectorSignedSaturatedNarrowToUnsigned32, U128, U128 )
//OPCODE(VectorSignedSaturatedNarrowToUnsigned64, U128, U128 )
//OPCODE(VectorSignedSaturatedNeg8, U128, U128 )
//OPCODE(VectorSignedSaturatedNeg16, U128, U128 )
//OPCODE(VectorSignedSaturatedNeg32, U128, U128 )
//OPCODE(VectorSignedSaturatedNeg64, U128, U128 )
//OPCODE(VectorSignedSaturatedShiftLeft8, U128, U128, U128 )
//OPCODE(VectorSignedSaturatedShiftLeft16, U128, U128, U128 )
//OPCODE(VectorSignedSaturatedShiftLeft32, U128, U128, U128 )
//OPCODE(VectorSignedSaturatedShiftLeft64, U128, U128, U128 )
//OPCODE(VectorSignedSaturatedShiftLeftUnsigned8, U128, U128, U128 )
//OPCODE(VectorSignedSaturatedShiftLeftUnsigned16, U128, U128, U128 )
//OPCODE(VectorSignedSaturatedShiftLeftUnsigned32, U128, U128, U128 )
//OPCODE(VectorSignedSaturatedShiftLeftUnsigned64, U128, U128, U128 )
//OPCODE(VectorSub8, U128, U128, U128 )
//OPCODE(VectorSub16, U128, U128, U128 )
//OPCODE(VectorSub32, U128, U128, U128 )
//OPCODE(VectorSub64, U128, U128, U128 )
//OPCODE(VectorTable, Table, U128, Opaque, Opaque, Opaque )
//OPCODE(VectorTableLookup, U128, U128, Table, U128 )
//OPCODE(VectorUnsignedAbsoluteDifference8, U128, U128, U128 )
//OPCODE(VectorUnsignedAbsoluteDifference16, U128, U128, U128 )
//OPCODE(VectorUnsignedAbsoluteDifference32, U128, U128, U128 )
//OPCODE(VectorUnsignedMultiply16, Void, U128, U128 )
//OPCODE(VectorUnsignedMultiply32, Void, U128, U128 )
//OPCODE(VectorUnsignedRecipEstimate, U128, U128 )
//OPCODE(VectorUnsignedRecipSqrtEstimate, U128, U128 )
//OPCODE(VectorUnsignedSaturatedAccumulateSigned8, U128, U128, U128 )
//OPCODE(VectorUnsignedSaturatedAccumulateSigned16, U128, U128, U128 )
//OPCODE(VectorUnsignedSaturatedAccumulateSigned32, U128, U128, U128 )
//OPCODE(VectorUnsignedSaturatedAccumulateSigned64, U128, U128, U128 )
//OPCODE(VectorUnsignedSaturatedNarrow16, U128, U128 )
//OPCODE(VectorUnsignedSaturatedNarrow32, U128, U128 )
//OPCODE(VectorUnsignedSaturatedNarrow64, U128, U128 )
//OPCODE(VectorUnsignedSaturatedShiftLeft8, U128, U128, U128 )
//OPCODE(VectorUnsignedSaturatedShiftLeft16, U128, U128, U128 )
//OPCODE(VectorUnsignedSaturatedShiftLeft32, U128, U128, U128 )
//OPCODE(VectorUnsignedSaturatedShiftLeft64, U128, U128, U128 )
//OPCODE(VectorZeroExtend8, U128, U128 )
//OPCODE(VectorZeroExtend16, U128, U128 )
//OPCODE(VectorZeroExtend32, U128, U128 )
//OPCODE(VectorZeroExtend64, U128, U128 )
//OPCODE(VectorZeroUpper, U128, U128 )
//OPCODE(ZeroVector, U128, )
// Floating-point operations
//OPCODE(FPAbs16, U16, U16 )
OPCODE(FPAbs32, U32, U32 )
OPCODE(FPAbs64, U64, U64 )
OPCODE(FPAdd32, U32, U32, U32 )
OPCODE(FPAdd64, U64, U64, U64 )
OPCODE(FPCompare32, NZCV, U32, U32, U1 )
OPCODE(FPCompare64, NZCV, U64, U64, U1 )
OPCODE(FPDiv32, U32, U32, U32 )
OPCODE(FPDiv64, U64, U64, U64 )
//OPCODE(FPMax32, U32, U32, U32 )
//OPCODE(FPMax64, U64, U64, U64 )
//OPCODE(FPMaxNumeric32, U32, U32, U32 )
//OPCODE(FPMaxNumeric64, U64, U64, U64 )
//OPCODE(FPMin32, U32, U32, U32 )
//OPCODE(FPMin64, U64, U64, U64 )
//OPCODE(FPMinNumeric32, U32, U32, U32 )
//OPCODE(FPMinNumeric64, U64, U64, U64 )
OPCODE(FPMul32, U32, U32, U32 )
OPCODE(FPMul64, U64, U64, U64 )
//OPCODE(FPMulAdd16, U16, U16, U16, U16 )
//OPCODE(FPMulAdd32, U32, U32, U32, U32 )
//OPCODE(FPMulAdd64, U64, U64, U64, U64 )
//OPCODE(FPMulX32, U32, U32, U32 )
//OPCODE(FPMulX64, U64, U64, U64 )
//OPCODE(FPNeg16, U16, U16 )
OPCODE(FPNeg32, U32, U32 )
OPCODE(FPNeg64, U64, U64 )
//OPCODE(FPRecipEstimate16, U16, U16 )
//OPCODE(FPRecipEstimate32, U32, U32 )
//OPCODE(FPRecipEstimate64, U64, U64 )
//OPCODE(FPRecipExponent16, U16, U16 )
//OPCODE(FPRecipExponent32, U32, U32 )
//OPCODE(FPRecipExponent64, U64, U64 )
//OPCODE(FPRecipStepFused16, U16, U16, U16 )
//OPCODE(FPRecipStepFused32, U32, U32, U32 )
//OPCODE(FPRecipStepFused64, U64, U64, U64 )
//OPCODE(FPRoundInt16, U16, U16, U8, U1 )
//OPCODE(FPRoundInt32, U32, U32, U8, U1 )
//OPCODE(FPRoundInt64, U64, U64, U8, U1 )
//OPCODE(FPRSqrtEstimate16, U16, U16 )
//OPCODE(FPRSqrtEstimate32, U32, U32 )
//OPCODE(FPRSqrtEstimate64, U64, U64 )
//OPCODE(FPRSqrtStepFused16, U16, U16, U16 )
//OPCODE(FPRSqrtStepFused32, U32, U32, U32 )
//OPCODE(FPRSqrtStepFused64, U64, U64, U64 )
OPCODE(FPSqrt32, U32, U32 )
OPCODE(FPSqrt64, U64, U64 )
OPCODE(FPSub32, U32, U32, U32 )
OPCODE(FPSub64, U64, U64, U64 )
// Floating-point conversions
OPCODE(FPHalfToDouble, U64, U16, U8 )
OPCODE(FPHalfToSingle, U32, U16, U8 )
OPCODE(FPSingleToDouble, U64, U32, U8 )
OPCODE(FPSingleToHalf, U16, U32, U8 )
OPCODE(FPDoubleToHalf, U16, U64, U8 )
OPCODE(FPDoubleToSingle, U32, U64, U8 )
OPCODE(FPDoubleToFixedS32, U32, U64, U8, U8 )
OPCODE(FPDoubleToFixedS64, U64, U64, U8, U8 )
OPCODE(FPDoubleToFixedU32, U32, U64, U8, U8 )
OPCODE(FPDoubleToFixedU64, U64, U64, U8, U8 )
//OPCODE(FPHalfToFixedS32, U32, U16, U8, U8 )
//OPCODE(FPHalfToFixedS64, U64, U16, U8, U8 )
//OPCODE(FPHalfToFixedU32, U32, U16, U8, U8 )
//OPCODE(FPHalfToFixedU64, U64, U16, U8, U8 )
OPCODE(FPSingleToFixedS32, U32, U32, U8, U8 )
OPCODE(FPSingleToFixedS64, U64, U32, U8, U8 )
OPCODE(FPSingleToFixedU32, U32, U32, U8, U8 )
OPCODE(FPSingleToFixedU64, U64, U32, U8, U8 )
OPCODE(FPFixedU32ToSingle, U32, U32, U8, U8 )
OPCODE(FPFixedS32ToSingle, U32, U32, U8, U8 )
OPCODE(FPFixedU32ToDouble, U64, U32, U8, U8 )
OPCODE(FPFixedU64ToDouble, U64, U64, U8, U8 )
OPCODE(FPFixedU64ToSingle, U32, U64, U8, U8 )
OPCODE(FPFixedS32ToDouble, U64, U32, U8, U8 )
OPCODE(FPFixedS64ToDouble, U64, U64, U8, U8 )
OPCODE(FPFixedS64ToSingle, U32, U64, U8, U8 )
// Floating-point vector instructions
//OPCODE(FPVectorAbs16, U128, U128 )
//OPCODE(FPVectorAbs32, U128, U128 )
//OPCODE(FPVectorAbs64, U128, U128 )
//OPCODE(FPVectorAdd32, U128, U128, U128 )
//OPCODE(FPVectorAdd64, U128, U128, U128 )
//OPCODE(FPVectorDiv32, U128, U128, U128 )
//OPCODE(FPVectorDiv64, U128, U128, U128 )
//OPCODE(FPVectorEqual32, U128, U128, U128 )
//OPCODE(FPVectorEqual64, U128, U128, U128 )
//OPCODE(FPVectorFromSignedFixed32, U128, U128, U8, U8 )
//OPCODE(FPVectorFromSignedFixed64, U128, U128, U8, U8 )
//OPCODE(FPVectorFromUnsignedFixed32, U128, U128, U8, U8 )
//OPCODE(FPVectorFromUnsignedFixed64, U128, U128, U8, U8 )
//OPCODE(FPVectorGreater32, U128, U128, U128 )
//OPCODE(FPVectorGreater64, U128, U128, U128 )
//OPCODE(FPVectorGreaterEqual32, U128, U128, U128 )
//OPCODE(FPVectorGreaterEqual64, U128, U128, U128 )
//OPCODE(FPVectorMax32, U128, U128, U128 )
//OPCODE(FPVectorMax64, U128, U128, U128 )
//OPCODE(FPVectorMin32, U128, U128, U128 )
//OPCODE(FPVectorMin64, U128, U128, U128 )
//OPCODE(FPVectorMul32, U128, U128, U128 )
//OPCODE(FPVectorMul64, U128, U128, U128 )
//OPCODE(FPVectorMulAdd16, U128, U128, U128, U128 )
//OPCODE(FPVectorMulAdd32, U128, U128, U128, U128 )
//OPCODE(FPVectorMulAdd64, U128, U128, U128, U128 )
//OPCODE(FPVectorMulX32, U128, U128, U128 )
//OPCODE(FPVectorMulX64, U128, U128, U128 )
//OPCODE(FPVectorNeg16, U128, U128 )
//OPCODE(FPVectorNeg32, U128, U128 )
//OPCODE(FPVectorNeg64, U128, U128 )
//OPCODE(FPVectorPairedAdd32, U128, U128, U128 )
//OPCODE(FPVectorPairedAdd64, U128, U128, U128 )
//OPCODE(FPVectorPairedAddLower32, U128, U128, U128 )
//OPCODE(FPVectorPairedAddLower64, U128, U128, U128 )
//OPCODE(FPVectorRecipEstimate16, U128, U128 )
//OPCODE(FPVectorRecipEstimate32, U128, U128 )
//OPCODE(FPVectorRecipEstimate64, U128, U128 )
//OPCODE(FPVectorRecipStepFused16, U128, U128, U128 )
//OPCODE(FPVectorRecipStepFused32, U128, U128, U128 )
//OPCODE(FPVectorRecipStepFused64, U128, U128, U128 )
//OPCODE(FPVectorRoundInt16, U128, U128, U8, U1 )
//OPCODE(FPVectorRoundInt32, U128, U128, U8, U1 )
//OPCODE(FPVectorRoundInt64, U128, U128, U8, U1 )
//OPCODE(FPVectorRSqrtEstimate16, U128, U128 )
//OPCODE(FPVectorRSqrtEstimate32, U128, U128 )
//OPCODE(FPVectorRSqrtEstimate64, U128, U128 )
//OPCODE(FPVectorRSqrtStepFused16, U128, U128, U128 )
//OPCODE(FPVectorRSqrtStepFused32, U128, U128, U128 )
//OPCODE(FPVectorRSqrtStepFused64, U128, U128, U128 )
//OPCODE(FPVectorSqrt32, U128, U128 )
//OPCODE(FPVectorSqrt64, U128, U128 )
//OPCODE(FPVectorSub32, U128, U128, U128 )
//OPCODE(FPVectorSub64, U128, U128, U128 )
//OPCODE(FPVectorToSignedFixed16, U128, U128, U8, U8 )
//OPCODE(FPVectorToSignedFixed32, U128, U128, U8, U8 )
//OPCODE(FPVectorToSignedFixed64, U128, U128, U8, U8 )
//OPCODE(FPVectorToUnsignedFixed16, U128, U128, U8, U8 )
//OPCODE(FPVectorToUnsignedFixed32, U128, U128, U8, U8 )
//OPCODE(FPVectorToUnsignedFixed64, U128, U128, U8, U8 )
// A32 Memory access
A32OPC(ClearExclusive, Void, )
A32OPC(SetExclusive, Void, U32, U8 )
A32OPC(ReadMemory8, U8, U32 )
A32OPC(ReadMemory16, U16, U32 )
A32OPC(ReadMemory32, U32, U32 )
A32OPC(ReadMemory64, U64, U32 )
A32OPC(WriteMemory8, Void, U32, U8 )
A32OPC(WriteMemory16, Void, U32, U16 )
A32OPC(WriteMemory32, Void, U32, U32 )
A32OPC(WriteMemory64, Void, U32, U64 )
A32OPC(ExclusiveWriteMemory8, U32, U32, U8 )
A32OPC(ExclusiveWriteMemory16, U32, U32, U16 )
A32OPC(ExclusiveWriteMemory32, U32, U32, U32 )
A32OPC(ExclusiveWriteMemory64, U32, U32, U32, U32 )
// A64 Memory access
//A64OPC(ClearExclusive, Void, )
//A64OPC(SetExclusive, Void, U64, U8 )
//A64OPC(ReadMemory8, U8, U64 )
//A64OPC(ReadMemory16, U16, U64 )
//A64OPC(ReadMemory32, U32, U64 )
//A64OPC(ReadMemory64, U64, U64 )
//A64OPC(ReadMemory128, U128, U64 )
//A64OPC(WriteMemory8, Void, U64, U8 )
//A64OPC(WriteMemory16, Void, U64, U16 )
//A64OPC(WriteMemory32, Void, U64, U32 )
//A64OPC(WriteMemory64, Void, U64, U64 )
//A64OPC(WriteMemory128, Void, U64, U128 )
//A64OPC(ExclusiveWriteMemory8, U32, U64, U8 )
//A64OPC(ExclusiveWriteMemory16, U32, U64, U16 )
//A64OPC(ExclusiveWriteMemory32, U32, U64, U32 )
//A64OPC(ExclusiveWriteMemory64, U32, U64, U64 )
//A64OPC(ExclusiveWriteMemory128, U32, U64, U128 )
// Coprocessor
A32OPC(CoprocInternalOperation, Void, CoprocInfo )
A32OPC(CoprocSendOneWord, Void, CoprocInfo, U32 )
A32OPC(CoprocSendTwoWords, Void, CoprocInfo, U32, U32 )
A32OPC(CoprocGetOneWord, U32, CoprocInfo )
A32OPC(CoprocGetTwoWords, U64, CoprocInfo )
A32OPC(CoprocLoadWords, Void, CoprocInfo, U32 )
A32OPC(CoprocStoreWords, Void, CoprocInfo, U32 )

View File

@ -0,0 +1,89 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include <cstddef>
#include <string>
#ifdef __linux__
#include <cstdio>
#include <cstdlib>
#include <mutex>
#include <sys/types.h>
#include <unistd.h>
#include <fmt/format.h>
#include "common/common_types.h"
namespace Dynarmic::BackendA64 {
namespace {
std::mutex mutex;
std::FILE* file = nullptr;
void OpenFile() {
const char* perf_dir = std::getenv("PERF_BUILDID_DIR");
if (!perf_dir) {
file = nullptr;
return;
}
const pid_t pid = getpid();
const std::string filename = fmt::format("{:s}/perf-{:d}.map", perf_dir, pid);
file = std::fopen(filename.c_str(), "w");
if (!file) {
return;
}
std::setvbuf(file, nullptr, _IONBF, 0);
}
} // anonymous namespace
namespace detail {
void PerfMapRegister(const void* start, const void* end, const std::string& friendly_name) {
std::lock_guard guard{mutex};
if (!file) {
OpenFile();
if (!file) {
return;
}
}
const std::string line = fmt::format("{:016x} {:016x} {:s}\n", reinterpret_cast<u64>(start), reinterpret_cast<u64>(end) - reinterpret_cast<u64>(start), friendly_name);
std::fwrite(line.data(), sizeof *line.data(), line.size(), file);
}
} // namespace detail
void PerfMapClear() {
std::lock_guard guard{mutex};
if (!file) {
return;
}
std::fclose(file);
file = nullptr;
OpenFile();
}
} // namespace Dynarmic::BackendX64
#else
namespace Dynarmic::BackendA64 {
namespace detail {
void PerfMapRegister(const void*, const void*, const std::string&) {}
} // namespace detail
void PerfMapClear() {}
} // namespace Dynarmic::BackendX64
#endif

View File

@ -0,0 +1,27 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <cstddef>
#include <string>
#include "common/cast_util.h"
namespace Dynarmic::BackendA64 {
namespace detail {
void PerfMapRegister(const void* start, const void* end, const std::string& friendly_name);
} // namespace detail
template<typename T>
void PerfMapRegister(T start, const void* end, const std::string& friendly_name) {
detail::PerfMapRegister(Common::BitCast<const void*>(start), end, friendly_name);
}
void PerfMapClear();
} // namespace Dynarmic::BackendX64

View File

@ -0,0 +1,650 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include <algorithm>
#include <numeric>
#include <utility>
#include <fmt/ostream.h>
#include "backend/A64/abi.h"
#include "backend/A64/reg_alloc.h"
#include "common/assert.h"
namespace Dynarmic::BackendA64 {
static u64 ImmediateToU64(const IR::Value& imm) {
switch (imm.GetType()) {
case IR::Type::U1:
return u64(imm.GetU1());
case IR::Type::U8:
return u64(imm.GetU8());
case IR::Type::U16:
return u64(imm.GetU16());
case IR::Type::U32:
return u64(imm.GetU32());
case IR::Type::U64:
return u64(imm.GetU64());
default:
ASSERT_FALSE("This should never happen.");
}
}
static bool CanExchange(HostLoc a, HostLoc b) {
return HostLocIsGPR(a) && HostLocIsGPR(b);
}
// Minimum number of bits required to represent a type
static size_t GetBitWidth(IR::Type type) {
switch (type) {
case IR::Type::A32Reg:
case IR::Type::A32ExtReg:
case IR::Type::A64Reg:
case IR::Type::A64Vec:
case IR::Type::CoprocInfo:
case IR::Type::Cond:
case IR::Type::Void:
case IR::Type::Table:
ASSERT_FALSE("Type {} cannot be represented at runtime", type);
return 0;
case IR::Type::Opaque:
ASSERT_FALSE("Not a concrete type");
return 0;
case IR::Type::U1:
return 8;
case IR::Type::U8:
return 8;
case IR::Type::U16:
return 16;
case IR::Type::U32:
return 32;
case IR::Type::U64:
return 64;
case IR::Type::U128:
return 128;
case IR::Type::NZCVFlags:
return 32; // TODO: Update to 16 when flags optimization is done
}
UNREACHABLE();
return 0;
}
static bool IsValuelessType(IR::Type type) {
switch (type) {
case IR::Type::Table:
return true;
default:
return false;
}
}
bool HostLocInfo::IsLocked() const {
return is_being_used_count > 0;
}
bool HostLocInfo::IsEmpty() const {
return is_being_used_count == 0 && values.empty();
}
bool HostLocInfo::IsLastUse() const {
return is_being_used_count == 0 && current_references == 1 && accumulated_uses + 1 == total_uses;
}
void HostLocInfo::ReadLock() {
ASSERT(!is_scratch);
is_being_used_count++;
}
void HostLocInfo::WriteLock() {
ASSERT(is_being_used_count == 0);
is_being_used_count++;
is_scratch = true;
}
void HostLocInfo::AddArgReference() {
current_references++;
ASSERT(accumulated_uses + current_references <= total_uses);
}
void HostLocInfo::ReleaseOne() {
is_being_used_count--;
is_scratch = false;
if (current_references == 0)
return;
accumulated_uses++;
current_references--;
if (current_references == 0)
ReleaseAll();
}
void HostLocInfo::ReleaseAll() {
accumulated_uses += current_references;
current_references = 0;
ASSERT(total_uses == std::accumulate(values.begin(), values.end(), size_t(0), [](size_t sum, IR::Inst* inst) { return sum + inst->UseCount(); }));
if (total_uses == accumulated_uses) {
values.clear();
accumulated_uses = 0;
total_uses = 0;
max_bit_width = 0;
}
is_being_used_count = 0;
is_scratch = false;
}
bool HostLocInfo::ContainsValue(const IR::Inst* inst) const {
return std::find(values.begin(), values.end(), inst) != values.end();
}
size_t HostLocInfo::GetMaxBitWidth() const {
return max_bit_width;
}
void HostLocInfo::AddValue(IR::Inst* inst) {
values.push_back(inst);
total_uses += inst->UseCount();
max_bit_width = std::max(max_bit_width, GetBitWidth(inst->GetType()));
}
IR::Type Argument::GetType() const {
return value.GetType();
}
bool Argument::IsImmediate() const {
return value.IsImmediate();
}
bool Argument::IsVoid() const {
return GetType() == IR::Type::Void;
}
bool Argument::FitsInImmediateU32() const {
if (!IsImmediate())
return false;
u64 imm = ImmediateToU64(value);
return imm < 0x100000000;
}
bool Argument::FitsInImmediateS32() const {
if (!IsImmediate())
return false;
s64 imm = static_cast<s64>(ImmediateToU64(value));
return -s64(0x80000000) <= imm && imm <= s64(0x7FFFFFFF);
}
bool Argument::GetImmediateU1() const {
return value.GetU1();
}
u8 Argument::GetImmediateU8() const {
u64 imm = ImmediateToU64(value);
ASSERT(imm < 0x100);
return u8(imm);
}
u16 Argument::GetImmediateU16() const {
u64 imm = ImmediateToU64(value);
ASSERT(imm < 0x10000);
return u16(imm);
}
u32 Argument::GetImmediateU32() const {
u64 imm = ImmediateToU64(value);
ASSERT(imm < 0x100000000);
return u32(imm);
}
u64 Argument::GetImmediateS32() const {
ASSERT(FitsInImmediateS32());
u64 imm = ImmediateToU64(value);
return imm;
}
u64 Argument::GetImmediateU64() const {
return ImmediateToU64(value);
}
IR::Cond Argument::GetImmediateCond() const {
ASSERT(IsImmediate() && GetType() == IR::Type::Cond);
return value.GetCond();
}
bool Argument::IsInGpr() const {
if (IsImmediate())
return false;
return HostLocIsGPR(*reg_alloc.ValueLocation(value.GetInst()));
}
bool Argument::IsInFpr() const {
if (IsImmediate())
return false;
return HostLocIsFPR(*reg_alloc.ValueLocation(value.GetInst()));
}
bool Argument::IsInMemory() const {
if (IsImmediate())
return false;
return HostLocIsSpill(*reg_alloc.ValueLocation(value.GetInst()));
}
RegAlloc::ArgumentInfo RegAlloc::GetArgumentInfo(IR::Inst* inst) {
ArgumentInfo ret = {Argument{*this}, Argument{*this}, Argument{*this}, Argument{*this}};
for (size_t i = 0; i < inst->NumArgs(); i++) {
const IR::Value& arg = inst->GetArg(i);
ret[i].value = arg;
if (!arg.IsImmediate() && !IsValuelessType(arg.GetType())) {
ASSERT_MSG(ValueLocation(arg.GetInst()), "argument must already been defined");
LocInfo(*ValueLocation(arg.GetInst())).AddArgReference();
}
}
return ret;
}
Arm64Gen::ARM64Reg RegAlloc::UseGpr(Argument& arg) {
ASSERT(!arg.allocated);
arg.allocated = true;
return HostLocToReg64(UseImpl(arg.value, any_gpr));
}
Arm64Gen::ARM64Reg RegAlloc::UseFpr(Argument& arg) {
ASSERT(!arg.allocated);
arg.allocated = true;
return HostLocToFpr(UseImpl(arg.value, any_fpr));
}
//OpArg RegAlloc::UseOpArg(Argument& arg) {
// return UseGpr(arg);
//}
void RegAlloc::Use(Argument& arg, HostLoc host_loc) {
ASSERT(!arg.allocated);
arg.allocated = true;
UseImpl(arg.value, {host_loc});
}
Arm64Gen::ARM64Reg RegAlloc::UseScratchGpr(Argument& arg) {
ASSERT(!arg.allocated);
arg.allocated = true;
return HostLocToReg64(UseScratchImpl(arg.value, any_gpr));
}
Arm64Gen::ARM64Reg RegAlloc::UseScratchFpr(Argument& arg) {
ASSERT(!arg.allocated);
arg.allocated = true;
return HostLocToFpr(UseScratchImpl(arg.value, any_fpr));
}
void RegAlloc::UseScratch(Argument& arg, HostLoc host_loc) {
ASSERT(!arg.allocated);
arg.allocated = true;
UseScratchImpl(arg.value, {host_loc});
}
void RegAlloc::DefineValue(IR::Inst* inst, const Arm64Gen::ARM64Reg& reg) {
ASSERT(IsVector(reg) || IsGPR(reg));
HostLoc hostloc = static_cast<HostLoc>(DecodeReg(reg) + static_cast<size_t>(IsVector(reg) ? HostLoc::Q0 : HostLoc::X0));
DefineValueImpl(inst, hostloc);
}
void RegAlloc::DefineValue(IR::Inst* inst, Argument& arg) {
ASSERT(!arg.allocated);
arg.allocated = true;
DefineValueImpl(inst, arg.value);
}
void RegAlloc::Release(const Arm64Gen::ARM64Reg& reg) {
ASSERT(IsVector(reg) || IsGPR(reg));
const HostLoc hostloc = static_cast<HostLoc>(DecodeReg(reg) + static_cast<size_t>(IsVector(reg) ? HostLoc::Q0 : HostLoc::X0));
LocInfo(hostloc).ReleaseOne();
}
Arm64Gen::ARM64Reg RegAlloc::ScratchGpr(HostLocList desired_locations) {
return HostLocToReg64(ScratchImpl(desired_locations));
}
Arm64Gen::ARM64Reg RegAlloc::ScratchFpr(HostLocList desired_locations) {
return HostLocToFpr(ScratchImpl(desired_locations));
}
HostLoc RegAlloc::UseImpl(IR::Value use_value, HostLocList desired_locations) {
if (use_value.IsImmediate()) {
return LoadImmediate(use_value, ScratchImpl(desired_locations));
}
const IR::Inst* use_inst = use_value.GetInst();
const HostLoc current_location = *ValueLocation(use_inst);
const size_t max_bit_width = LocInfo(current_location).GetMaxBitWidth();
const bool can_use_current_location = std::find(desired_locations.begin(), desired_locations.end(), current_location) != desired_locations.end();
if (can_use_current_location) {
LocInfo(current_location).ReadLock();
return current_location;
}
if (LocInfo(current_location).IsLocked()) {
return UseScratchImpl(use_value, desired_locations);
}
const HostLoc destination_location = SelectARegister(desired_locations);
if (max_bit_width > HostLocBitWidth(destination_location)) {
return UseScratchImpl(use_value, desired_locations);
} else if (CanExchange(destination_location, current_location)) {
Exchange(destination_location, current_location);
} else {
MoveOutOfTheWay(destination_location);
Move(destination_location, current_location);
}
LocInfo(destination_location).ReadLock();
return destination_location;
}
HostLoc RegAlloc::UseScratchImpl(IR::Value use_value, HostLocList desired_locations) {
if (use_value.IsImmediate()) {
return LoadImmediate(use_value, ScratchImpl(desired_locations));
}
const IR::Inst* use_inst = use_value.GetInst();
const HostLoc current_location = *ValueLocation(use_inst);
const size_t bit_width = GetBitWidth(use_inst->GetType());
const bool can_use_current_location = std::find(desired_locations.begin(), desired_locations.end(), current_location) != desired_locations.end();
if (can_use_current_location && !LocInfo(current_location).IsLocked()) {
if (!LocInfo(current_location).IsLastUse()) {
MoveOutOfTheWay(current_location);
}
LocInfo(current_location).WriteLock();
return current_location;
}
const HostLoc destination_location = SelectARegister(desired_locations);
MoveOutOfTheWay(destination_location);
CopyToScratch(bit_width, destination_location, current_location);
LocInfo(destination_location).WriteLock();
return destination_location;
}
HostLoc RegAlloc::ScratchImpl(HostLocList desired_locations) {
HostLoc location = SelectARegister(desired_locations);
MoveOutOfTheWay(location);
LocInfo(location).WriteLock();
return location;
}
void RegAlloc::HostCall(IR::Inst* result_def, std::optional<Argument::copyable_reference> arg0,
std::optional<Argument::copyable_reference> arg1,
std::optional<Argument::copyable_reference> arg2,
std::optional<Argument::copyable_reference> arg3,
std::optional<Argument::copyable_reference> arg4,
std::optional<Argument::copyable_reference> arg5,
std::optional<Argument::copyable_reference> arg6,
std::optional<Argument::copyable_reference> arg7) {
constexpr size_t args_count = 8;
constexpr std::array<HostLoc, args_count> args_hostloc = { ABI_PARAM1, ABI_PARAM2, ABI_PARAM3, ABI_PARAM4, ABI_PARAM5, ABI_PARAM6, ABI_PARAM7, ABI_PARAM8 };
const std::array<std::optional<Argument::copyable_reference>, args_count> args = {arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7};
static const std::vector<HostLoc> other_caller_save = [args_hostloc]() {
std::vector<HostLoc> ret(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end());
for (auto hostloc : args_hostloc)
ret.erase(std::find(ret.begin(), ret.end(), hostloc));
return ret;
}();
for (size_t i = 0; i < args_count; i++) {
if (args[i]) {
UseScratch(*args[i], args_hostloc[i]);
}
}
for (size_t i = 0; i < args_count; i++) {
if (!args[i]) {
// TODO: Force spill
ScratchGpr({args_hostloc[i]});
}
}
for (HostLoc caller_saved : other_caller_save) {
ScratchImpl({caller_saved});
}
if (result_def) {
DefineValueImpl(result_def, ABI_RETURN);
}
}
void RegAlloc::EndOfAllocScope() {
for (auto& iter : hostloc_info) {
iter.ReleaseAll();
}
}
void RegAlloc::AssertNoMoreUses() {
ASSERT(std::all_of(hostloc_info.begin(), hostloc_info.end(), [](const auto& i) { return i.IsEmpty(); }));
}
HostLoc RegAlloc::SelectARegister(HostLocList desired_locations) const {
std::vector<HostLoc> candidates = desired_locations;
// Find all locations that have not been allocated..
const auto allocated_locs = std::partition(candidates.begin(), candidates.end(), [this](auto loc){
return !this->LocInfo(loc).IsLocked();
});
candidates.erase(allocated_locs, candidates.end());
ASSERT_MSG(!candidates.empty(), "All candidate registers have already been allocated");
// Selects the best location out of the available locations.
// TODO: Actually do LRU or something. Currently we just try to pick something without a value if possible.
std::partition(candidates.begin(), candidates.end(), [this](auto loc){
return this->LocInfo(loc).IsEmpty();
});
return candidates.front();
}
std::optional<HostLoc> RegAlloc::ValueLocation(const IR::Inst* value) const {
for (size_t i = 0; i < hostloc_info.size(); i++)
if (hostloc_info[i].ContainsValue(value))
return static_cast<HostLoc>(i);
return std::nullopt;
}
void RegAlloc::DefineValueImpl(IR::Inst* def_inst, HostLoc host_loc) {
ASSERT_MSG(!ValueLocation(def_inst), "def_inst has already been defined");
LocInfo(host_loc).AddValue(def_inst);
}
void RegAlloc::DefineValueImpl(IR::Inst* def_inst, const IR::Value& use_inst) {
ASSERT_MSG(!ValueLocation(def_inst), "def_inst has already been defined");
if (use_inst.IsImmediate()) {
HostLoc location = ScratchImpl(any_gpr);
DefineValueImpl(def_inst, location);
LoadImmediate(use_inst, location);
return;
}
ASSERT_MSG(ValueLocation(use_inst.GetInst()), "use_inst must already be defined");
HostLoc location = *ValueLocation(use_inst.GetInst());
DefineValueImpl(def_inst, location);
}
HostLoc RegAlloc::LoadImmediate(IR::Value imm, HostLoc host_loc) {
ASSERT_MSG(imm.IsImmediate(), "imm is not an immediate");
if (HostLocIsGPR(host_loc)) {
Arm64Gen::ARM64Reg reg = HostLocToReg64(host_loc);
u64 imm_value = ImmediateToU64(imm);
code.MOVI2R(reg, imm_value);
return host_loc;
}
if (HostLocIsFPR(host_loc)) {
Arm64Gen::ARM64Reg reg = Arm64Gen::EncodeRegToDouble(HostLocToFpr(host_loc));
u64 imm_value = ImmediateToU64(imm);
if (imm_value == 0)
code.fp_emitter.FMOV(reg, 0);
else {
code.EmitPatchLDR(reg, imm_value);
}
return host_loc;
}
UNREACHABLE();
}
void RegAlloc::Move(HostLoc to, HostLoc from) {
const size_t bit_width = LocInfo(from).GetMaxBitWidth();
ASSERT(LocInfo(to).IsEmpty() && !LocInfo(from).IsLocked());
ASSERT(bit_width <= HostLocBitWidth(to));
if (LocInfo(from).IsEmpty()) {
return;
}
EmitMove(bit_width, to, from);
LocInfo(to) = std::exchange(LocInfo(from), {});
}
void RegAlloc::CopyToScratch(size_t bit_width, HostLoc to, HostLoc from) {
ASSERT(LocInfo(to).IsEmpty() && !LocInfo(from).IsEmpty());
EmitMove(bit_width, to, from);
}
void RegAlloc::Exchange(HostLoc a, HostLoc b) {
ASSERT(!LocInfo(a).IsLocked() && !LocInfo(b).IsLocked());
ASSERT(LocInfo(a).GetMaxBitWidth() <= HostLocBitWidth(b));
ASSERT(LocInfo(b).GetMaxBitWidth() <= HostLocBitWidth(a));
if (LocInfo(a).IsEmpty()) {
Move(a, b);
return;
}
if (LocInfo(b).IsEmpty()) {
Move(b, a);
return;
}
EmitExchange(a, b);
std::swap(LocInfo(a), LocInfo(b));
}
void RegAlloc::MoveOutOfTheWay(HostLoc reg) {
ASSERT(!LocInfo(reg).IsLocked());
if (!LocInfo(reg).IsEmpty()) {
SpillRegister(reg);
}
}
void RegAlloc::SpillRegister(HostLoc loc) {
ASSERT_MSG(HostLocIsRegister(loc), "Only registers can be spilled");
ASSERT_MSG(!LocInfo(loc).IsEmpty(), "There is no need to spill unoccupied registers");
ASSERT_MSG(!LocInfo(loc).IsLocked(), "Registers that have been allocated must not be spilt");
HostLoc new_loc = FindFreeSpill();
Move(new_loc, loc);
}
HostLoc RegAlloc::FindFreeSpill() const {
for (size_t i = static_cast<size_t>(HostLoc::FirstSpill); i < hostloc_info.size(); i++) {
HostLoc loc = static_cast<HostLoc>(i);
if (LocInfo(loc).IsEmpty())
return loc;
}
ASSERT_FALSE("All spill locations are full");
}
HostLocInfo& RegAlloc::LocInfo(HostLoc loc) {
ASSERT(loc != HostLoc::SP && loc != HostLoc::X28 && loc != HostLoc::X29 && loc != HostLoc::X30);
return hostloc_info[static_cast<size_t>(loc)];
}
const HostLocInfo& RegAlloc::LocInfo(HostLoc loc) const {
ASSERT(loc != HostLoc::SP && loc != HostLoc::X28 && loc != HostLoc::X29 && loc != HostLoc::X30);
return hostloc_info[static_cast<size_t>(loc)];
}
void RegAlloc::EmitMove(size_t bit_width, HostLoc to, HostLoc from) {
if (HostLocIsFPR(to) && HostLocIsFPR(from)) {
// bit_width == 128
//mov(HostLocToFpr(to), HostLocToFpr(from));
ASSERT_FALSE("Unimplemented");
} else if (HostLocIsGPR(to) && HostLocIsGPR(from)) {
ASSERT(bit_width != 128);
if (bit_width == 64) {
code.MOV(HostLocToReg64(to), HostLocToReg64(from));
} else {
code.MOV(DecodeReg(HostLocToReg64(to)), DecodeReg(HostLocToReg64(from)));
}
} else if (HostLocIsFPR(to) && HostLocIsGPR(from)) {
ASSERT(bit_width != 128);
if (bit_width == 64) {
code.fp_emitter.FMOV(EncodeRegToDouble(HostLocToFpr(to)), HostLocToReg64(from));
} else {
code.fp_emitter.FMOV(EncodeRegToSingle(HostLocToFpr(to)), DecodeReg(HostLocToReg64(from)));
}
} else if (HostLocIsGPR(to) && HostLocIsFPR(from)) {
ASSERT(bit_width != 128);
if (bit_width == 64) {
code.fp_emitter.FMOV(HostLocToReg64(to), EncodeRegToDouble(HostLocToFpr(from)));
} else {
code.fp_emitter.FMOV(DecodeReg(HostLocToReg64(to)), EncodeRegToSingle(HostLocToFpr(from)));
}
} else if (HostLocIsFPR(to) && HostLocIsSpill(from)) {
s32 spill_addr = spill_to_addr(from);
// ASSERT(spill_addr.getBit() >= bit_width);
code.fp_emitter.LDR(bit_width, Arm64Gen::INDEX_UNSIGNED, HostLocToFpr(to), Arm64Gen::X28, spill_addr);
} else if (HostLocIsSpill(to) && HostLocIsFPR(from)) {
s32 spill_addr = spill_to_addr(to);
// ASSERT(spill_addr.getBit() >= bit_width);
code.fp_emitter.STR(bit_width, Arm64Gen::INDEX_UNSIGNED, HostLocToFpr(from), Arm64Gen::X28, spill_addr);
} else if (HostLocIsGPR(to) && HostLocIsSpill(from)) {
ASSERT(bit_width != 128);
if (bit_width == 64) {
code.LDR(Arm64Gen::INDEX_UNSIGNED, HostLocToReg64(to), Arm64Gen::X28, spill_to_addr(from));
} else {
code.LDR(Arm64Gen::INDEX_UNSIGNED, DecodeReg(HostLocToReg64(to)), Arm64Gen::X28, spill_to_addr(from));
}
} else if (HostLocIsSpill(to) && HostLocIsGPR(from)) {
ASSERT(bit_width != 128);
if (bit_width == 64) {
code.STR(Arm64Gen::INDEX_UNSIGNED, HostLocToReg64(from), Arm64Gen::X28, spill_to_addr(to));
} else {
code.STR(Arm64Gen::INDEX_UNSIGNED, DecodeReg(HostLocToReg64(from)), Arm64Gen::X28, spill_to_addr(to));
}
} else {
ASSERT_FALSE("Invalid RegAlloc::EmitMove");
}
}
void RegAlloc::EmitExchange(HostLoc a, HostLoc b) {
if (HostLocIsGPR(a) && HostLocIsGPR(b)) {
// Is this the best way to do it?
code.EOR(HostLocToReg64(a), HostLocToReg64(a), HostLocToReg64(b));
code.EOR(HostLocToReg64(b), HostLocToReg64(a), HostLocToReg64(b));
code.EOR(HostLocToReg64(a), HostLocToReg64(a), HostLocToReg64(b));
} else if (HostLocIsFPR(a) && HostLocIsFPR(b)) {
ASSERT_FALSE("Check your code: Exchanging XMM registers is unnecessary");
} else {
ASSERT_FALSE("Invalid RegAlloc::EmitExchange");
}
}
} // namespace Dynarmic::BackendA64

167
src/backend/A64/reg_alloc.h Normal file
View File

@ -0,0 +1,167 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <array>
#include <functional>
#include <utility>
#include <vector>
#include <optional>
#include "backend/A64/block_of_code.h"
#include "backend/A64/hostloc.h"
//#include "backend/A64/oparg.h"
#include "common/common_types.h"
#include "frontend/ir/cond.h"
#include "frontend/ir/microinstruction.h"
#include "frontend/ir/value.h"
namespace Dynarmic::BackendA64 {
class RegAlloc;
struct HostLocInfo {
public:
bool IsLocked() const;
bool IsEmpty() const;
bool IsLastUse() const;
void ReadLock();
void WriteLock();
void AddArgReference();
void ReleaseOne();
void ReleaseAll();
bool ContainsValue(const IR::Inst* inst) const;
size_t GetMaxBitWidth() const;
void AddValue(IR::Inst* inst);
private:
// Current instruction state
size_t is_being_used_count = 0;
bool is_scratch = false;
// Block state
size_t current_references = 0;
size_t accumulated_uses = 0;
size_t total_uses = 0;
// Value state
std::vector<IR::Inst*> values;
size_t max_bit_width = 0;
};
struct Argument {
public:
using copyable_reference = std::reference_wrapper<Argument>;
IR::Type GetType() const;
bool IsImmediate() const;
bool IsVoid() const;
bool FitsInImmediateU32() const;
bool FitsInImmediateS32() const;
bool GetImmediateU1() const;
u8 GetImmediateU8() const;
u16 GetImmediateU16() const;
u32 GetImmediateU32() const;
u64 GetImmediateS32() const;
u64 GetImmediateU64() const;
IR::Cond GetImmediateCond() const;
/// Is this value currently in a GPR?
bool IsInGpr() const;
/// Is this value currently in a FPR?
bool IsInFpr() const;
/// Is this value currently in memory?
bool IsInMemory() const;
private:
friend class RegAlloc;
explicit Argument(RegAlloc& reg_alloc) : reg_alloc(reg_alloc) {}
bool allocated = false;
RegAlloc& reg_alloc;
IR::Value value;
};
class RegAlloc final {
public:
using ArgumentInfo = std::array<Argument, IR::max_arg_count>;
explicit RegAlloc(BlockOfCode& code, size_t num_spills, std::function<u64(HostLoc)> spill_to_addr)
: hostloc_info(NonSpillHostLocCount + num_spills), code(code), spill_to_addr(std::move(spill_to_addr)) {}
ArgumentInfo GetArgumentInfo(IR::Inst* inst);
Arm64Gen::ARM64Reg UseGpr(Argument& arg);
Arm64Gen::ARM64Reg UseFpr(Argument& arg);
//OpArg UseOpArg(Argument& arg);
void Use(Argument& arg, HostLoc host_loc);
Arm64Gen::ARM64Reg UseScratchGpr(Argument& arg);
Arm64Gen::ARM64Reg UseScratchFpr(Argument& arg);
void UseScratch(Argument& arg, HostLoc host_loc);
void DefineValue(IR::Inst* inst, const Arm64Gen::ARM64Reg& reg);
void DefineValue(IR::Inst* inst, Argument& arg);
void Release(const Arm64Gen::ARM64Reg& reg);
Arm64Gen::ARM64Reg ScratchGpr(HostLocList desired_locations = any_gpr);
Arm64Gen::ARM64Reg ScratchFpr(HostLocList desired_locations = any_fpr);
void HostCall(IR::Inst* result_def = nullptr, std::optional<Argument::copyable_reference> arg0 = {},
std::optional<Argument::copyable_reference> arg1 = {},
std::optional<Argument::copyable_reference> arg2 = {},
std::optional<Argument::copyable_reference> arg3 = {},
std::optional<Argument::copyable_reference> arg4 = {},
std::optional<Argument::copyable_reference> arg5 = {},
std::optional<Argument::copyable_reference> arg6 = {},
std::optional<Argument::copyable_reference> arg7 = {});
// TODO: Values in host flags
void EndOfAllocScope();
void AssertNoMoreUses();
private:
friend struct Argument;
HostLoc SelectARegister(HostLocList desired_locations) const;
std::optional<HostLoc> ValueLocation(const IR::Inst* value) const;
HostLoc UseImpl(IR::Value use_value, HostLocList desired_locations);
HostLoc UseScratchImpl(IR::Value use_value, HostLocList desired_locations);
HostLoc ScratchImpl(HostLocList desired_locations);
void DefineValueImpl(IR::Inst* def_inst, HostLoc host_loc);
void DefineValueImpl(IR::Inst* def_inst, const IR::Value& use_inst);
HostLoc LoadImmediate(IR::Value imm, HostLoc reg);
void Move(HostLoc to, HostLoc from);
void CopyToScratch(size_t bit_width, HostLoc to, HostLoc from);
void Exchange(HostLoc a, HostLoc b);
void MoveOutOfTheWay(HostLoc reg);
void SpillRegister(HostLoc loc);
HostLoc FindFreeSpill() const;
std::vector<HostLocInfo> hostloc_info;
HostLocInfo& LocInfo(HostLoc loc);
const HostLocInfo& LocInfo(HostLoc loc) const;
BlockOfCode& code;
std::function<u32(HostLoc)> spill_to_addr;
void EmitMove(size_t bit_width, HostLoc to, HostLoc from);
void EmitExchange(HostLoc a, HostLoc b);
};
} // namespace Dynarmic::BackendA64

View File

@ -479,7 +479,7 @@ void A32EmitX64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) {
}
}
void A32EmitX64::EmitA32SetCpsrNZCV(A32EmitContext& ctx, IR::Inst* inst) {
void A32EmitX64::EmitA32SetCpsrNZCVRaw(A32EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (args[0].IsImmediate()) {
const u32 imm = args[0].GetImmediateU32();
@ -503,6 +503,17 @@ void A32EmitX64::EmitA32SetCpsrNZCV(A32EmitContext& ctx, IR::Inst* inst) {
}
}
void A32EmitX64::EmitA32SetCpsrNZCV(A32EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
code.and_(to_store, 0b11000001'00000001);
code.imul(to_store, to_store, 0b00010000'00100001);
code.shl(to_store, 16);
code.and_(to_store, 0xF0000000);
code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], to_store);
}
void A32EmitX64::EmitA32SetCpsrNZCVQ(A32EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (args[0].IsImmediate()) {

View File

@ -44,4 +44,9 @@ u8 RecipEstimate(u64 a);
*/
u8 RecipSqrtEstimate(u64 a);
template <typename T>
constexpr bool IsPow2(T imm){
return imm > 0 && (imm & (imm - 1)) == 0;
}
} // namespace Dynarmic::Common

View File

@ -27,7 +27,11 @@ std::vector<ArmMatcher<V>> GetArmDecodeTable() {
std::vector<ArmMatcher<V>> table = {
#define INST(fn, name, bitstring) Decoder::detail::detail<ArmMatcher<V>>::GetMatcher(&V::fn, name, bitstring),
#ifdef ARCHITECTURE_Aarch64
#include "arm_a64.inc"
#else
#include "arm.inc"
#endif
#undef INST
};

View File

@ -0,0 +1,301 @@
// Barrier instructions
//INST(arm_DMB, "DMB", "1111010101111111111100000101oooo") // v7
//INST(arm_DSB, "DSB", "1111010101111111111100000100oooo") // v7
//INST(arm_ISB, "ISB", "1111010101111111111100000110oooo") // v7
// Branch instructions
INST(arm_BLX_imm, "BLX (imm)", "1111101hvvvvvvvvvvvvvvvvvvvvvvvv") // v5
INST(arm_BLX_reg, "BLX (reg)", "cccc000100101111111111110011mmmm") // v5
INST(arm_B, "B", "cccc1010vvvvvvvvvvvvvvvvvvvvvvvv") // all
INST(arm_BL, "BL", "cccc1011vvvvvvvvvvvvvvvvvvvvvvvv") // all
INST(arm_BX, "BX", "cccc000100101111111111110001mmmm") // v4T
INST(arm_BXJ, "BXJ", "cccc000100101111111111110010mmmm") // v5J
// CRC32 instructions
//INST(arm_CRC32, "CRC32", "cccc00010zz0nnnndddd00000100mmmm") // v8
//INST(arm_CRC32C, "CRC32C", "cccc00010zz0nnnndddd00100100mmmm") // v8
// Coprocessor instructions
INST(arm_CDP, "CDP", "cccc1110ooooNNNNDDDDppppooo0MMMM") // v2 (CDP2: v5)
INST(arm_LDC, "LDC", "cccc110pudw1nnnnDDDDppppvvvvvvvv") // v2 (LDC2: v5)
INST(arm_MCR, "MCR", "cccc1110ooo0NNNNttttppppooo1MMMM") // v2 (MCR2: v5)
INST(arm_MCRR, "MCRR", "cccc11000100uuuuttttppppooooMMMM") // v5E (MCRR2: v6)
INST(arm_MRC, "MRC", "cccc1110ooo1NNNNttttppppooo1MMMM") // v2 (MRC2: v5)
INST(arm_MRRC, "MRRC", "cccc11000101uuuuttttppppooooMMMM") // v5E (MRRC2: v6)
INST(arm_STC, "STC", "cccc110pudw0nnnnDDDDppppvvvvvvvv") // v2 (STC2: v5)
// Data Processing instructions
INST(arm_ADC_imm, "ADC (imm)", "cccc0010101Snnnnddddrrrrvvvvvvvv") // all
INST(arm_ADC_reg, "ADC (reg)", "cccc0000101Snnnnddddvvvvvrr0mmmm") // all
INST(arm_ADC_rsr, "ADC (rsr)", "cccc0000101Snnnnddddssss0rr1mmmm") // all
INST(arm_ADD_imm, "ADD (imm)", "cccc0010100Snnnnddddrrrrvvvvvvvv") // all
INST(arm_ADD_reg, "ADD (reg)", "cccc0000100Snnnnddddvvvvvrr0mmmm") // all
INST(arm_ADD_rsr, "ADD (rsr)", "cccc0000100Snnnnddddssss0rr1mmmm") // all
INST(arm_AND_imm, "AND (imm)", "cccc0010000Snnnnddddrrrrvvvvvvvv") // all
INST(arm_AND_reg, "AND (reg)", "cccc0000000Snnnnddddvvvvvrr0mmmm") // all
INST(arm_AND_rsr, "AND (rsr)", "cccc0000000Snnnnddddssss0rr1mmmm") // all
INST(arm_BIC_imm, "BIC (imm)", "cccc0011110Snnnnddddrrrrvvvvvvvv") // all
INST(arm_BIC_reg, "BIC (reg)", "cccc0001110Snnnnddddvvvvvrr0mmmm") // all
INST(arm_BIC_rsr, "BIC (rsr)", "cccc0001110Snnnnddddssss0rr1mmmm") // all
INST(arm_CMN_imm, "CMN (imm)", "cccc00110111nnnn0000rrrrvvvvvvvv") // all
INST(arm_CMN_reg, "CMN (reg)", "cccc00010111nnnn0000vvvvvrr0mmmm") // all
INST(arm_CMN_rsr, "CMN (rsr)", "cccc00010111nnnn0000ssss0rr1mmmm") // all
INST(arm_CMP_imm, "CMP (imm)", "cccc00110101nnnn0000rrrrvvvvvvvv") // all
INST(arm_CMP_reg, "CMP (reg)", "cccc00010101nnnn0000vvvvvrr0mmmm") // all
INST(arm_CMP_rsr, "CMP (rsr)", "cccc00010101nnnn0000ssss0rr1mmmm") // all
INST(arm_EOR_imm, "EOR (imm)", "cccc0010001Snnnnddddrrrrvvvvvvvv") // all
INST(arm_EOR_reg, "EOR (reg)", "cccc0000001Snnnnddddvvvvvrr0mmmm") // all
INST(arm_EOR_rsr, "EOR (rsr)", "cccc0000001Snnnnddddssss0rr1mmmm") // all
INST(arm_MOV_imm, "MOV (imm)", "cccc0011101S0000ddddrrrrvvvvvvvv") // all
INST(arm_MOV_reg, "MOV (reg)", "cccc0001101S0000ddddvvvvvrr0mmmm") // all
INST(arm_MOV_rsr, "MOV (rsr)", "cccc0001101S0000ddddssss0rr1mmmm") // all
INST(arm_MVN_imm, "MVN (imm)", "cccc0011111S0000ddddrrrrvvvvvvvv") // all
INST(arm_MVN_reg, "MVN (reg)", "cccc0001111S0000ddddvvvvvrr0mmmm") // all
INST(arm_MVN_rsr, "MVN (rsr)", "cccc0001111S0000ddddssss0rr1mmmm") // all
INST(arm_ORR_imm, "ORR (imm)", "cccc0011100Snnnnddddrrrrvvvvvvvv") // all
INST(arm_ORR_reg, "ORR (reg)", "cccc0001100Snnnnddddvvvvvrr0mmmm") // all
INST(arm_ORR_rsr, "ORR (rsr)", "cccc0001100Snnnnddddssss0rr1mmmm") // all
INST(arm_RSB_imm, "RSB (imm)", "cccc0010011Snnnnddddrrrrvvvvvvvv") // all
INST(arm_RSB_reg, "RSB (reg)", "cccc0000011Snnnnddddvvvvvrr0mmmm") // all
INST(arm_RSB_rsr, "RSB (rsr)", "cccc0000011Snnnnddddssss0rr1mmmm") // all
INST(arm_RSC_imm, "RSC (imm)", "cccc0010111Snnnnddddrrrrvvvvvvvv") // all
INST(arm_RSC_reg, "RSC (reg)", "cccc0000111Snnnnddddvvvvvrr0mmmm") // all
INST(arm_RSC_rsr, "RSC (rsr)", "cccc0000111Snnnnddddssss0rr1mmmm") // all
INST(arm_SBC_imm, "SBC (imm)", "cccc0010110Snnnnddddrrrrvvvvvvvv") // all
INST(arm_SBC_reg, "SBC (reg)", "cccc0000110Snnnnddddvvvvvrr0mmmm") // all
INST(arm_SBC_rsr, "SBC (rsr)", "cccc0000110Snnnnddddssss0rr1mmmm") // all
INST(arm_SUB_imm, "SUB (imm)", "cccc0010010Snnnnddddrrrrvvvvvvvv") // all
INST(arm_SUB_reg, "SUB (reg)", "cccc0000010Snnnnddddvvvvvrr0mmmm") // all
INST(arm_SUB_rsr, "SUB (rsr)", "cccc0000010Snnnnddddssss0rr1mmmm") // all
INST(arm_TEQ_imm, "TEQ (imm)", "cccc00110011nnnn0000rrrrvvvvvvvv") // all
INST(arm_TEQ_reg, "TEQ (reg)", "cccc00010011nnnn0000vvvvvrr0mmmm") // all
INST(arm_TEQ_rsr, "TEQ (rsr)", "cccc00010011nnnn0000ssss0rr1mmmm") // all
INST(arm_TST_imm, "TST (imm)", "cccc00110001nnnn0000rrrrvvvvvvvv") // all
INST(arm_TST_reg, "TST (reg)", "cccc00010001nnnn0000vvvvvrr0mmmm") // all
INST(arm_TST_rsr, "TST (rsr)", "cccc00010001nnnn0000ssss0rr1mmmm") // all
// Exception Generating instructions
INST(arm_BKPT, "BKPT", "cccc00010010vvvvvvvvvvvv0111vvvv") // v5
INST(arm_SVC, "SVC", "cccc1111vvvvvvvvvvvvvvvvvvvvvvvv") // all
INST(arm_UDF, "UDF", "111001111111------------1111----") // all
// Extension instructions
INST(arm_SXTB, "SXTB", "cccc011010101111ddddrr000111mmmm") // v6
INST(arm_SXTB16, "SXTB16", "cccc011010001111ddddrr000111mmmm") // v6
INST(arm_SXTH, "SXTH", "cccc011010111111ddddrr000111mmmm") // v6
INST(arm_SXTAB, "SXTAB", "cccc01101010nnnnddddrr000111mmmm") // v6
INST(arm_SXTAB16, "SXTAB16", "cccc01101000nnnnddddrr000111mmmm") // v6
INST(arm_SXTAH, "SXTAH", "cccc01101011nnnnddddrr000111mmmm") // v6
INST(arm_UXTB, "UXTB", "cccc011011101111ddddrr000111mmmm") // v6
INST(arm_UXTB16, "UXTB16", "cccc011011001111ddddrr000111mmmm") // v6
INST(arm_UXTH, "UXTH", "cccc011011111111ddddrr000111mmmm") // v6
INST(arm_UXTAB, "UXTAB", "cccc01101110nnnnddddrr000111mmmm") // v6
INST(arm_UXTAB16, "UXTAB16", "cccc01101100nnnnddddrr000111mmmm") // v6
INST(arm_UXTAH, "UXTAH", "cccc01101111nnnnddddrr000111mmmm") // v6
// Hint instructions
INST(arm_PLD_imm, "PLD (imm)", "11110101uz01nnnn1111iiiiiiiiiiii") // v5E for PLD; v7 for PLDW
INST(arm_PLD_reg, "PLD (reg)", "11110111uz01nnnn1111iiiiitt0mmmm") // v5E for PLD; v7 for PLDW
INST(arm_SEV, "SEV", "----0011001000001111000000000100") // v6K
INST(arm_SEVL, "SEVL", "----0011001000001111000000000101") // v8
INST(arm_WFE, "WFE", "----0011001000001111000000000010") // v6K
INST(arm_WFI, "WFI", "----0011001000001111000000000011") // v6K
INST(arm_YIELD, "YIELD", "----0011001000001111000000000001") // v6K
INST(arm_NOP, "Reserved Hint", "----0011001000001111------------")
INST(arm_NOP, "Reserved Hint", "----001100100000111100000000----")
// Synchronization Primitive instructions
INST(arm_CLREX, "CLREX", "11110101011111111111000000011111") // v6K
INST(arm_LDREX, "LDREX", "cccc00011001nnnndddd111110011111") // v6
INST(arm_LDREXB, "LDREXB", "cccc00011101nnnndddd111110011111") // v6K
INST(arm_LDREXD, "LDREXD", "cccc00011011nnnndddd111110011111") // v6K
INST(arm_LDREXH, "LDREXH", "cccc00011111nnnndddd111110011111") // v6K
INST(arm_STREX, "STREX", "cccc00011000nnnndddd11111001mmmm") // v6
INST(arm_STREXB, "STREXB", "cccc00011100nnnndddd11111001mmmm") // v6K
INST(arm_STREXD, "STREXD", "cccc00011010nnnndddd11111001mmmm") // v6K
INST(arm_STREXH, "STREXH", "cccc00011110nnnndddd11111001mmmm") // v6K
INST(arm_SWP, "SWP", "cccc00010000nnnntttt00001001uuuu") // v2S (v6: Deprecated)
INST(arm_SWPB, "SWPB", "cccc00010100nnnntttt00001001uuuu") // v2S (v6: Deprecated)
// Load/Store instructions
INST(arm_LDRBT, "LDRBT (A1)", "----0100-111--------------------")
INST(arm_LDRBT, "LDRBT (A2)", "----0110-111---------------0----")
INST(arm_LDRHT, "LDRHT (A1)", "----0000-111------------1011----")
INST(arm_LDRHT, "LDRHT (A2)", "----0000-011--------00001011----")
INST(arm_LDRSBT, "LDRSBT (A1)", "----0000-111------------1101----")
INST(arm_LDRSBT, "LDRSBT (A2)", "----0000-011--------00001101----")
INST(arm_LDRSHT, "LDRSHT (A1)", "----0000-111------------1111----")
INST(arm_LDRSHT, "LDRSHT (A2)", "----0000-011--------00001111----")
INST(arm_LDRT, "LDRT (A1)", "----0100-011--------------------")
INST(arm_LDRT, "LDRT (A2)", "----0110-011---------------0----")
INST(arm_STRBT, "STRBT (A1)", "----0100-110--------------------")
INST(arm_STRBT, "STRBT (A2)", "----0110-110---------------0----")
INST(arm_STRHT, "STRHT (A1)", "----0000-110------------1011----")
INST(arm_STRHT, "STRHT (A2)", "----0000-010--------00001011----")
INST(arm_STRT, "STRT (A1)", "----0100-010--------------------")
INST(arm_STRT, "STRT (A2)", "----0110-010---------------0----")
INST(arm_LDR_lit, "LDR (lit)", "cccc0101u0011111ttttvvvvvvvvvvvv")
INST(arm_LDR_imm, "LDR (imm)", "cccc010pu0w1nnnnttttvvvvvvvvvvvv")
INST(arm_LDR_reg, "LDR (reg)", "cccc011pu0w1nnnnttttvvvvvrr0mmmm")
INST(arm_LDRB_lit, "LDRB (lit)", "cccc0101u1011111ttttvvvvvvvvvvvv")
INST(arm_LDRB_imm, "LDRB (imm)", "cccc010pu1w1nnnnttttvvvvvvvvvvvv")
INST(arm_LDRB_reg, "LDRB (reg)", "cccc011pu1w1nnnnttttvvvvvrr0mmmm")
INST(arm_LDRD_lit, "LDRD (lit)", "cccc0001u1001111ttttvvvv1101vvvv")
INST(arm_LDRD_imm, "LDRD (imm)", "cccc000pu1w0nnnnttttvvvv1101vvvv") // v5E
INST(arm_LDRD_reg, "LDRD (reg)", "cccc000pu0w0nnnntttt00001101mmmm") // v5E
INST(arm_LDRH_lit, "LDRH (lit)", "cccc000pu1w11111ttttvvvv1011vvvv")
INST(arm_LDRH_imm, "LDRH (imm)", "cccc000pu1w1nnnnttttvvvv1011vvvv")
INST(arm_LDRH_reg, "LDRH (reg)", "cccc000pu0w1nnnntttt00001011mmmm")
INST(arm_LDRSB_lit, "LDRSB (lit)", "cccc0001u1011111ttttvvvv1101vvvv")
INST(arm_LDRSB_imm, "LDRSB (imm)", "cccc000pu1w1nnnnttttvvvv1101vvvv")
INST(arm_LDRSB_reg, "LDRSB (reg)", "cccc000pu0w1nnnntttt00001101mmmm")
INST(arm_LDRSH_lit, "LDRSH (lit)", "cccc0001u1011111ttttvvvv1111vvvv")
INST(arm_LDRSH_imm, "LDRSH (imm)", "cccc000pu1w1nnnnttttvvvv1111vvvv")
INST(arm_LDRSH_reg, "LDRSH (reg)", "cccc000pu0w1nnnntttt00001111mmmm")
INST(arm_STR_imm, "STR (imm)", "cccc010pu0w0nnnnttttvvvvvvvvvvvv")
INST(arm_STR_reg, "STR (reg)", "cccc011pu0w0nnnnttttvvvvvrr0mmmm")
INST(arm_STRB_imm, "STRB (imm)", "cccc010pu1w0nnnnttttvvvvvvvvvvvv")
INST(arm_STRB_reg, "STRB (reg)", "cccc011pu1w0nnnnttttvvvvvrr0mmmm")
INST(arm_STRD_imm, "STRD (imm)", "cccc000pu1w0nnnnttttvvvv1111vvvv") // v5E
INST(arm_STRD_reg, "STRD (reg)", "cccc000pu0w0nnnntttt00001111mmmm") // v5E
INST(arm_STRH_imm, "STRH (imm)", "cccc000pu1w0nnnnttttvvvv1011vvvv")
INST(arm_STRH_reg, "STRH (reg)", "cccc000pu0w0nnnntttt00001011mmmm")
// Load/Store Multiple instructions
INST(arm_LDM, "LDM", "cccc100010w1nnnnxxxxxxxxxxxxxxxx") // all
INST(arm_LDMDA, "LDMDA", "cccc100000w1nnnnxxxxxxxxxxxxxxxx") // all
INST(arm_LDMDB, "LDMDB", "cccc100100w1nnnnxxxxxxxxxxxxxxxx") // all
INST(arm_LDMIB, "LDMIB", "cccc100110w1nnnnxxxxxxxxxxxxxxxx") // all
INST(arm_LDM_usr, "LDM (usr reg)", "----100--101--------------------") // all
INST(arm_LDM_eret, "LDM (exce ret)", "----100--1-1----1---------------") // all
INST(arm_STM, "STM", "cccc100010w0nnnnxxxxxxxxxxxxxxxx") // all
INST(arm_STMDA, "STMDA", "cccc100000w0nnnnxxxxxxxxxxxxxxxx") // all
INST(arm_STMDB, "STMDB", "cccc100100w0nnnnxxxxxxxxxxxxxxxx") // all
INST(arm_STMIB, "STMIB", "cccc100110w0nnnnxxxxxxxxxxxxxxxx") // all
INST(arm_STM_usr, "STM (usr reg)", "----100--100--------------------") // all
// Miscellaneous instructions
INST(arm_BFC, "BFC", "cccc0111110vvvvvddddvvvvv0011111") // v6T2
INST(arm_BFI, "BFI", "cccc0111110vvvvvddddvvvvv001nnnn") // v6T2
INST(arm_CLZ, "CLZ", "cccc000101101111dddd11110001mmmm") // v5
INST(arm_MOVT, "MOVT", "cccc00110100vvvvddddvvvvvvvvvvvv") // v6T2
INST(arm_MOVW, "MOVW", "cccc00110000vvvvddddvvvvvvvvvvvv") // v6T2
INST(arm_NOP, "NOP", "----0011001000001111000000000000") // v6K
INST(arm_SBFX, "SBFX", "cccc0111101wwwwwddddvvvvv101nnnn") // v6T2
INST(arm_SEL, "SEL", "cccc01101000nnnndddd11111011mmmm") // v6
INST(arm_UBFX, "UBFX", "cccc0111111wwwwwddddvvvvv101nnnn") // v6T2
// Unsigned Sum of Absolute Differences instructions
INST(arm_USAD8, "USAD8", "cccc01111000dddd1111mmmm0001nnnn") // v6
INST(arm_USADA8, "USADA8", "cccc01111000ddddaaaammmm0001nnnn") // v6
// Packing instructions
INST(arm_PKHBT, "PKHBT", "cccc01101000nnnnddddvvvvv001mmmm") // v6K
INST(arm_PKHTB, "PKHTB", "cccc01101000nnnnddddvvvvv101mmmm") // v6K
// Reversal instructions
INST(arm_RBIT, "RBIT", "cccc011011111111dddd11110011mmmm") // v6T2
INST(arm_REV, "REV", "cccc011010111111dddd11110011mmmm") // v6
INST(arm_REV16, "REV16", "cccc011010111111dddd11111011mmmm") // v6
INST(arm_REVSH, "REVSH", "cccc011011111111dddd11111011mmmm") // v6
// Saturation instructions
INST(arm_SSAT, "SSAT", "cccc0110101vvvvvddddvvvvvr01nnnn") // v6
INST(arm_SSAT16, "SSAT16", "cccc01101010vvvvdddd11110011nnnn") // v6
INST(arm_USAT, "USAT", "cccc0110111vvvvvddddvvvvvr01nnnn") // v6
INST(arm_USAT16, "USAT16", "cccc01101110vvvvdddd11110011nnnn") // v6
// Divide instructions
INST(arm_SDIV, "SDIV", "cccc01110001dddd1111mmmm0001nnnn") // v7a
INST(arm_UDIV, "UDIV", "cccc01110011dddd1111mmmm0001nnnn") // v7a
// Multiply (Normal) instructions
INST(arm_MLA, "MLA", "cccc0000001Sddddaaaammmm1001nnnn") // v2
INST(arm_MLS, "MLS", "cccc00000110ddddaaaammmm1001nnnn") // v6T2
INST(arm_MUL, "MUL", "cccc0000000Sdddd0000mmmm1001nnnn") // v2
// Multiply (Long) instructions
INST(arm_SMLAL, "SMLAL", "cccc0000111Sddddaaaammmm1001nnnn") // v3M
INST(arm_SMULL, "SMULL", "cccc0000110Sddddaaaammmm1001nnnn") // v3M
INST(arm_UMAAL, "UMAAL", "cccc00000100ddddaaaammmm1001nnnn") // v6
INST(arm_UMLAL, "UMLAL", "cccc0000101Sddddaaaammmm1001nnnn") // v3M
INST(arm_UMULL, "UMULL", "cccc0000100Sddddaaaammmm1001nnnn") // v3M
// Multiply (Halfword) instructions
INST(arm_SMLALxy, "SMLALXY", "cccc00010100ddddaaaammmm1xy0nnnn") // v5xP
INST(arm_SMLAxy, "SMLAXY", "cccc00010000ddddaaaammmm1xy0nnnn") // v5xP
INST(arm_SMULxy, "SMULXY", "cccc00010110dddd0000mmmm1xy0nnnn") // v5xP
// Multiply (Word by Halfword) instructions
INST(arm_SMLAWy, "SMLAWY", "cccc00010010ddddaaaammmm1y00nnnn") // v5xP
INST(arm_SMULWy, "SMULWY", "cccc00010010dddd0000mmmm1y10nnnn") // v5xP
// Multiply (Most Significant Word) instructions
INST(arm_SMMUL, "SMMUL", "cccc01110101dddd1111mmmm00R1nnnn") // v6
INST(arm_SMMLA, "SMMLA", "cccc01110101ddddaaaammmm00R1nnnn") // v6
INST(arm_SMMLS, "SMMLS", "cccc01110101ddddaaaammmm11R1nnnn") // v6
// Multiply (Dual) instructions
INST(arm_SMLAD, "SMLAD", "cccc01110000ddddaaaammmm00M1nnnn") // v6
INST(arm_SMLALD, "SMLALD", "cccc01110100ddddaaaammmm00M1nnnn") // v6
INST(arm_SMLSD, "SMLSD", "cccc01110000ddddaaaammmm01M1nnnn") // v6
INST(arm_SMLSLD, "SMLSLD", "cccc01110100ddddaaaammmm01M1nnnn") // v6
INST(arm_SMUAD, "SMUAD", "cccc01110000dddd1111mmmm00M1nnnn") // v6
INST(arm_SMUSD, "SMUSD", "cccc01110000dddd1111mmmm01M1nnnn") // v6
// Parallel Add/Subtract (Modulo) instructions
INST(arm_SASX, "SASX", "cccc01100001nnnndddd11110011mmmm") // v6
INST(arm_SSAX, "SSAX", "cccc01100001nnnndddd11110101mmmm") // v6
INST(arm_SADD8, "SADD8", "cccc01100001nnnndddd11111001mmmm") // v6
INST(arm_SADD16, "SADD16", "cccc01100001nnnndddd11110001mmmm") // v6
INST(arm_SSUB8, "SSUB8", "cccc01100001nnnndddd11111111mmmm") // v6
INST(arm_SSUB16, "SSUB16", "cccc01100001nnnndddd11110111mmmm") // v6
INST(arm_UADD8, "UADD8", "cccc01100101nnnndddd11111001mmmm") // v6
INST(arm_UADD16, "UADD16", "cccc01100101nnnndddd11110001mmmm") // v6
INST(arm_UASX, "UASX", "cccc01100101nnnndddd11110011mmmm") // v6
INST(arm_USAX, "USAX", "cccc01100101nnnndddd11110101mmmm") // v6
INST(arm_USUB8, "USUB8", "cccc01100101nnnndddd11111111mmmm") // v6
INST(arm_USUB16, "USUB16", "cccc01100101nnnndddd11110111mmmm") // v6
// Parallel Add/Subtract (Saturating) instructions
INST(arm_QADD8, "QADD8", "cccc01100010nnnndddd11111001mmmm") // v6
INST(arm_QADD16, "QADD16", "cccc01100010nnnndddd11110001mmmm") // v6
INST(arm_QASX, "QASX", "cccc01100010nnnndddd11110011mmmm") // v6
INST(arm_QSAX, "QSAX", "cccc01100010nnnndddd11110101mmmm") // v6
INST(arm_QSUB8, "QSUB8", "cccc01100010nnnndddd11111111mmmm") // v6
INST(arm_QSUB16, "QSUB16", "cccc01100010nnnndddd11110111mmmm") // v6
INST(arm_UQADD8, "UQADD8", "cccc01100110nnnndddd11111001mmmm") // v6
INST(arm_UQADD16, "UQADD16", "cccc01100110nnnndddd11110001mmmm") // v6
INST(arm_UQASX, "UQASX", "cccc01100110nnnndddd11110011mmmm") // v6
INST(arm_UQSAX, "UQSAX", "cccc01100110nnnndddd11110101mmmm") // v6
INST(arm_UQSUB8, "UQSUB8", "cccc01100110nnnndddd11111111mmmm") // v6
INST(arm_UQSUB16, "UQSUB16", "cccc01100110nnnndddd11110111mmmm") // v6
// Parallel Add/Subtract (Halving) instructions
INST(arm_SHASX, "SHASX", "cccc01100011nnnndddd11110011mmmm") // v6
INST(arm_SHSAX, "SHSAX", "cccc01100011nnnndddd11110101mmmm") // v6
INST(arm_UHASX, "UHASX", "cccc01100111nnnndddd11110011mmmm") // v6
INST(arm_UHSAX, "UHSAX", "cccc01100111nnnndddd11110101mmmm") // v6
INST(arm_SHADD8, "SHADD8", "cccc01100011nnnndddd11111001mmmm") // v6
INST(arm_SHADD16, "SHADD16", "cccc01100011nnnndddd11110001mmmm") // v6
INST(arm_SHSUB8, "SHSUB8", "cccc01100011nnnndddd11111111mmmm") // v6
INST(arm_SHSUB16, "SHSUB16", "cccc01100011nnnndddd11110111mmmm") // v6
INST(arm_UHADD8, "UHADD8", "cccc01100111nnnndddd11111001mmmm") // v6
INST(arm_UHADD16, "UHADD16", "cccc01100111nnnndddd11110001mmmm") // v6
INST(arm_UHSUB8, "UHSUB8", "cccc01100111nnnndddd11111111mmmm") // v6
INST(arm_UHSUB16, "UHSUB16", "cccc01100111nnnndddd11110111mmmm") // v6
// Saturated Add/Subtract instructions
INST(arm_QADD, "QADD", "cccc00010000nnnndddd00000101mmmm") // v5xP
INST(arm_QSUB, "QSUB", "cccc00010010nnnndddd00000101mmmm") // v5xP
INST(arm_QDADD, "QDADD", "cccc00010100nnnndddd00000101mmmm") // v5xP
INST(arm_QDSUB, "QDSUB", "cccc00010110nnnndddd00000101mmmm") // v5xP
// Status Register Access instructions
INST(arm_CPS, "CPS", "111100010000---00000000---0-----") // v6
INST(arm_SETEND, "SETEND", "1111000100000001000000e000000000") // v6
INST(arm_MRS, "MRS", "cccc000100001111dddd000000000000") // v3
INST(arm_MSR_imm, "MSR (imm)", "cccc00110010mmmm1111rrrrvvvvvvvv") // v3
INST(arm_MSR_reg, "MSR (reg)", "cccc00010010mmmm111100000000nnnn") // v3
INST(arm_RFE, "RFE", "1111100--0-1----0000101000000000") // v6
INST(arm_SRS, "SRS", "1111100--1-0110100000101000-----") // v6

View File

@ -99,6 +99,10 @@ void IREmitter::SetCpsr(const IR::U32& value) {
}
void IREmitter::SetCpsrNZCV(const IR::U32& value) {
Inst(Opcode::A32SetCpsrNZCVRaw, value);
}
void IREmitter::SetCpsrNZCV(const IR::NZCV& value) {
Inst(Opcode::A32SetCpsrNZCV, value);
}

View File

@ -47,6 +47,7 @@ public:
IR::U32 GetCpsr();
void SetCpsr(const IR::U32& value);
void SetCpsrNZCV(const IR::U32& value);
void SetCpsrNZCV(const IR::NZCV& value);
void SetCpsrNZCVQ(const IR::U32& value);
void SetCheckBit(const IR::U1& value);
IR::U1 GetCFlag();

View File

@ -41,7 +41,11 @@ bool ArmTranslatorVisitor::arm_SVC(Cond cond, Imm<24> imm24) {
// UDF<c> #<imm16>
bool ArmTranslatorVisitor::arm_UDF() {
#if ARCHITECTURE_x86_64
return UndefinedInstruction();
#else
return InterpretThisInstruction();
#endif
}
} // namespace Dynarmic::A32

View File

@ -3,9 +3,9 @@
* SPDX-License-Identifier: 0BSD
*/
#include "frontend/A64/translate/impl/impl.h"
#include "common/bit_util.h"
#include "frontend/ir/terminal.h"
#include "frontend/A64/translate/impl/impl.h"
namespace Dynarmic::A64 {

View File

@ -166,6 +166,7 @@ bool Inst::ReadsFromCPSR() const {
bool Inst::WritesToCPSR() const {
switch (op) {
case Opcode::A32SetCpsr:
case Opcode::A32SetCpsrNZCVRaw:
case Opcode::A32SetCpsrNZCV:
case Opcode::A32SetCpsrNZCVQ:
case Opcode::A32SetNFlag:

View File

@ -14,7 +14,8 @@ A32OPC(SetExtendedRegister32, Void, A32E
A32OPC(SetExtendedRegister64, Void, A32ExtReg, U64 )
A32OPC(GetCpsr, U32, )
A32OPC(SetCpsr, Void, U32 )
A32OPC(SetCpsrNZCV, Void, U32 )
A32OPC(SetCpsrNZCVRaw, Void, U32 )
A32OPC(SetCpsrNZCV, Void, NZCV )
A32OPC(SetCpsrNZCVQ, Void, U32 )
A32OPC(GetNFlag, U1, )
A32OPC(SetNFlag, Void, U1 )

View File

@ -0,0 +1,56 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include <array>
#include <boost/variant/get.hpp>
#include "common/assert.h"
#include "common/common_types.h"
#include "dynarmic/A32/config.h"
#include "frontend/A32/location_descriptor.h"
#include "frontend/A32/translate/translate.h"
#include "frontend/ir/basic_block.h"
#include "ir_opt/passes.h"
namespace Dynarmic::Optimization {
void A32MergeInterpretBlocksPass(IR::Block& block, A32::UserCallbacks* cb) {
const auto is_interpret_instruction = [cb](A32::LocationDescriptor location) {
const u32 instruction = cb->MemoryReadCode(location.PC());
IR::Block new_block{location};
A32::TranslateSingleInstruction(new_block, location, instruction);
if (!new_block.Instructions().empty())
return false;
const IR::Terminal terminal = new_block.GetTerminal();
if (auto term = boost::get<IR::Term::Interpret>(&terminal)) {
return term->next == location;
}
return false;
};
IR::Terminal terminal = block.GetTerminal();
auto term = boost::get<IR::Term::Interpret>(&terminal);
if (!term)
return;
A32::LocationDescriptor location{term->next};
size_t num_instructions = 1;
while (is_interpret_instruction(location.AdvancePC(static_cast<int>(num_instructions * 4)))) {
num_instructions++;
}
term->num_instructions = num_instructions;
block.ReplaceTerminal(terminal);
block.CycleCount() += num_instructions - 1;
}
} // namespace Dynarmic::Optimization

View File

@ -22,6 +22,7 @@ namespace Dynarmic::Optimization {
void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb);
void A32GetSetElimination(IR::Block& block);
void A32MergeInterpretBlocksPass(IR::Block& block, A32::UserCallbacks* cb);
void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf);
void A64GetSetElimination(IR::Block& block);
void A64MergeInterpretBlocksPass(IR::Block& block, A64::UserCallbacks* cb);

View File

@ -76,8 +76,12 @@ u32 GenRandomInst(u32 pc, bool is_last_inst) {
} instructions = []{
const std::vector<std::tuple<std::string, const char*>> list {
#define INST(fn, name, bitstring) {#fn, bitstring},
#ifdef ARCHITECTURE_Aarch64
#include "frontend/A32/decoder/arm_a64.inc"
#else
#include "frontend/A32/decoder/arm.inc"
#include "frontend/A32/decoder/asimd.inc"
#endif
#include "frontend/A32/decoder/vfp.inc"
#undef INST
};

View File

@ -199,7 +199,7 @@ void FuzzJitThumb(const size_t instruction_count, const size_t instructions_to_e
}
}
TEST_CASE("Fuzz Thumb instructions set 1", "[JitX64][Thumb]") {
TEST_CASE("Fuzz Thumb instructions set 1", "[JitX64][JitA64][Thumb]") {
const std::array instructions = {
ThumbInstGen("00000xxxxxxxxxxx"), // LSL <Rd>, <Rm>, #<imm5>
ThumbInstGen("00001xxxxxxxxxxx"), // LSR <Rd>, <Rm>, #<imm5>
@ -267,7 +267,7 @@ TEST_CASE("Fuzz Thumb instructions set 1", "[JitX64][Thumb]") {
#endif
}
TEST_CASE("Fuzz Thumb instructions set 2 (affects PC)", "[JitX64][Thumb]") {
TEST_CASE("Fuzz Thumb instructions set 2 (affects PC)", "[JitX64][JitA64][Thumb]") {
const std::array instructions = {
// TODO: We currently can't test BX/BLX as we have
// no way of preventing the unpredictable

View File

@ -425,3 +425,75 @@ TEST_CASE("arm: Test stepping 3", "[arm]") {
REQUIRE(jit.Regs()[15] == 20);
REQUIRE(jit.Cpsr() == 0x000001d0);
}
TEST_CASE("arm: Cleared Q flag", "[arm][A32][JitA64]") {
ArmTestEnv test_env;
A32::Jit jit{GetUserConfig(&test_env)};
// qadd r1, r0, r0
// msr APSR_nzcvq, #0
// qadd r3, r2, r2
// b +#0 (infinite loop)
test_env.code_mem = {
0xe1001050,
0xe328f000,
0xe1023052,
0xeafffffe,
};
jit.Regs() = {
0x7FFFFFFF, // R0
0x80008000, // R1
0x00008000, // R2
0x7f7f7f7f, // R3
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
};
jit.SetCpsr(0x000001d0); // User-mode
test_env.ticks_left = 4;
jit.Run();
REQUIRE(jit.Regs()[0] == 0x7FFFFFFF);
REQUIRE(jit.Regs()[1] == 0x7FFFFFFF);
REQUIRE(jit.Regs()[2] == 0x00008000);
REQUIRE(jit.Regs()[3] == 0x00010000);
REQUIRE(jit.Cpsr() == 0x000001d0);
}
TEST_CASE("arm: Cleared Q flag 2", "[arm][A32][JitA64]") {
ArmTestEnv test_env;
A32::Jit jit{GetUserConfig(&test_env)};
// Because of how we calculate the ge-flag in (A64 backend)sadd8
// and similar instructions, the host's Q flag may set,
// tainting our results in subsequent instructions.
// sadd8 r1, r0, r0
// qadd r3, r2, r2
// b +#0 (infinite loop)
test_env.code_mem = {
0xe6101f90,
0xe1023052,
0xeafffffe,
};
jit.Regs() = {
0x7F007F00, // R0
0x80008000, // R1
0x00008000, // R2
0x7f7f7f7f, // R3
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
};
jit.SetCpsr(0x000001d0); // User-mode
test_env.ticks_left = 4;
jit.Run();
REQUIRE((jit.Cpsr() & (1 << 27)) == 0);
}

View File

@ -7,9 +7,9 @@ add_executable(dynarmic_tests
A32/test_arm_instructions.cpp
A32/test_thumb_instructions.cpp
A32/testenv.h
A64/a64.cpp
A64/testenv.h
cpu_info.cpp
# A64/a64.cpp
# A64/testenv.h
# cpu_info.cpp
fp/FPToFixed.cpp
fp/FPValue.cpp
fp/mantissa_util_tests.cpp
@ -18,20 +18,35 @@ add_executable(dynarmic_tests
rand_int.h
)
if (ARCHITECTURE_x86_64)
target_sources(dynarmic_tests PRIVATE
A64/a64.cpp
A64/testenv.h
cpu_info.cpp
)
endif()
if (DYNARMIC_TESTS_USE_UNICORN)
target_sources(dynarmic_tests PRIVATE
A32/fuzz_arm.cpp
A32/fuzz_thumb.cpp
A64/fuzz_with_unicorn.cpp
A64/misaligned_page_table.cpp
A64/verify_unicorn.cpp
#A64/fuzz_with_unicorn.cpp
#A64/verify_unicorn.cpp
fuzz_util.cpp
fuzz_util.h
unicorn_emu/a32_unicorn.cpp
unicorn_emu/a32_unicorn.h
unicorn_emu/a64_unicorn.cpp
unicorn_emu/a64_unicorn.h
#unicorn_emu/a64_unicorn.cpp
#unicorn_emu/a64_unicorn.h
)
if (ARCHITECTURE_x86_64)
target_sources(dynarmic_tests PRIVATE
A64/fuzz_with_unicorn.cpp
A64/verify_unicorn.cpp
unicorn_emu/a64_unicorn.cpp
unicorn_emu/a64_unicorn.h
)
endif()
target_link_libraries(dynarmic_tests PRIVATE Unicorn::Unicorn)
endif()
@ -43,10 +58,16 @@ include(CreateDirectoryGroups)
create_target_directory_groups(dynarmic_tests)
create_target_directory_groups(dynarmic_print_info)
target_link_libraries(dynarmic_tests PRIVATE dynarmic boost catch fmt mp xbyak)
target_link_libraries(dynarmic_tests PRIVATE dynarmic boost catch fmt mp)
if (ARCHITECTURE_x86_64)
target_link_libraries(dynarmic_tests PRIVATE xbyak)
endif()
target_include_directories(dynarmic_tests PRIVATE . ../src)
target_compile_options(dynarmic_tests PRIVATE ${DYNARMIC_CXX_FLAGS})
target_compile_definitions(dynarmic_tests PRIVATE FMT_USE_USER_DEFINED_LITERALS=0)
target_compile_options(dynarmic_tests PRIVATE -DCATCH_CONFIG_NO_WINDOWS_SEH -DCATCH_CONFIG_NO_POSIX_SIGNALS)
target_link_libraries(dynarmic_print_info PRIVATE dynarmic boost catch fmt mp)
target_include_directories(dynarmic_print_info PRIVATE . ../src)