Compare commits

...

116 Commits

Author SHA1 Message Date
SachinVin
2a209226f9 backend\A64\reg_alloc.cpp: Fix assert 2020-05-16 18:18:29 +05:30
SachinVin
c2a877611c CmakeLists: DYNARMIC_FRONTENDS optin for A64 backend 2020-05-16 18:17:56 +05:30
SachinVin
bdf484be62 frontend/A32: remove decoder hack vfp instructions 2020-05-16 17:27:26 +05:30
SachinVin
6a41c5d0ef a64_emiter: CountLeadingZeros intrinsic shortcuts 2020-05-16 17:18:43 +05:30
BreadFish64
2c94eea72e emit_a64: get rid of useless NOP generation
We don't actually patch anything in those locations beside a jump.
2020-05-16 17:18:43 +05:30
SachinVin
0885ffdc50 emit_a64: Do not clear fast_dispatch_table unnecessarily
port 4305c74 - emit_x64: Do not clear fast_dispatch_table unnecessarily
2020-05-16 17:18:43 +05:30
SachinVin
fe229b4a8e backend/A64/block_of_code.cpp: Clean up C style casts 2020-05-16 17:18:42 +05:30
SachinVin
4c27fb78a8 backend/A64/a32_emit_a64.cpp: EmitA32{Get,Set}Fpscr, set the guest_fpcr to host fpcr 2020-05-16 17:18:42 +05:30
SachinVin
b07864321e backend/A64: Add Step 2020-05-16 17:18:42 +05:30
SachinVin
6f95f6d311 backend/A64/block_of_code: Always specify codeptr to run from 2020-05-16 17:18:42 +05:30
BreadFish64
7d7b8edf31 backend/A64: fix mp 2020-05-16 17:18:41 +05:30
SachinVin
0b81c5a3c1 backend/A64: Move SP to FP in GenMemoryAccessors + Minor cleanup and 2020-05-16 17:18:41 +05:30
SachinVin
941a6ba808 backend/A64: Use X26 for storing remaining cycles. 2020-05-16 17:18:41 +05:30
BreadFish64
eeb7c609fc backend/A64: add fastmem support
fix crash on game close

fix generic exception handler

reorder hostloc gpr list

use temp register instead of X0 for writes

go back to regular std::partition
2020-05-16 17:18:41 +05:30
BreadFish64
c4b62bb22e merge fastmem 2020-05-16 17:18:40 +05:30
SachinVin
2d7f2b11b2 backend\A64\constant_pool.cpp: Correct offset calculation 2020-05-16 17:18:40 +05:30
SachinVin
6a3c3579d1 backend/A64/a32_jitstate: Upstream changes from x64 backend 2020-05-16 17:18:40 +05:30
SachinVin
3db06be313 backend/A64: Add test for q flag being incorrectly set 2020-05-16 17:18:39 +05:30
SachinVin
d3e5bd4b43 backend/A64/a32_emit_a64.cpp: Use unused HostCall registers 2020-05-16 17:18:39 +05:30
SachinVin
5aa7b3cbed backend/A64/a32_emit_a64.cpp: Use MOVP2R instead of MOVI2R. 2020-05-16 17:18:39 +05:30
SachinVin
8fd3c5c4f3 backend/A64/abi: Fix FP caller and callee save registers 2020-05-16 17:18:39 +05:30
SachinVin
0f22688948 a64/block_of_code: use GetWritableCodePtr() instead of const_cast<...>(GetCodePtr()) 2020-05-16 17:18:38 +05:30
SachinVin
58450e7b42 backend/A64/constant_pool: Clean up unused stuff 2020-05-16 17:18:38 +05:30
SachinVin
c3dab59e46 emit_a64_data_processing.cpp: remove pointless DoNZCV. 2020-05-16 17:18:38 +05:30
SachinVin
351a557618 IR + backend/*: add SetCpsrNZCVRaw and change arg1 type of SetCpsrNZCV to IR::NZCV 2020-05-16 17:18:37 +05:30
SachinVin
75ed09b939 backend/A64: Fix ASR impl 2020-05-16 17:18:37 +05:30
SachinVin
fd4a8f277d a64_emitter: Use Correct alias for ZR and WZR in CMP 2020-05-16 17:18:37 +05:30
SachinVin
722e76f75f backend/A64: Use CSLE instead of branches for LSL LSR and ASR + minor cleanup 2020-05-16 17:18:36 +05:30
SachinVin
40463bde01 backend/A64: Use correct register size for EmitNot64 2020-05-16 17:18:36 +05:30
SachinVin
203e8326fc tests/A32: Check if Q flag is cleared properly 2020-05-16 17:18:35 +05:30
SachinVin
5a54320fea backend/A64: SignedSaturatedSub and SignedSaturatedAdd 2020-05-16 17:15:37 +05:30
SachinVin
571d3c49c9 backend/A64/emit_a64_saturation.cpp: Implement EmitSignedSaturation and EmitUnsignedSaturation
Implements SSAT SSAT16 USAT USAT16 QASX QSAX UQASX UQSAX
2020-05-16 17:15:37 +05:30
SachinVin
d61d21593f backend/A64: add emit_a64_saturation.cpp 2020-05-16 17:15:36 +05:30
SachinVin
1a295642fb backend/A64: Fix EmitA32SetCpsr 2020-05-16 17:15:36 +05:30
SachinVin
631274453a backend/A64/devirtualize: remove unused DevirtualizeItanium 2020-05-16 17:15:35 +05:30
SachinVin
d815a9bd08 backend/A64: refactor to fpscr from mxcsr 2020-05-16 17:15:35 +05:30
SachinVin
e06008a530 backend/A64: Use ScratchGpr() instead of ABI_SCRATCH1 where possible 2020-05-16 17:15:35 +05:30
SachinVin
3c30758dca backend/A64: support for always_little_endian 2020-05-16 17:15:35 +05:30
SachinVin
1a32b5501c backend/a64: Add hook_hint_instructions option
534eb0f
2020-05-16 17:15:34 +05:30
SachinVin
0d05eeb90a backend /A64: cleanup 2020-05-16 17:15:34 +05:30
SachinVin
14b94212a8 gitignore: add .vs dir 2020-05-16 17:15:34 +05:30
SachinVin
6faf2816bc Minor style fix 2020-05-16 17:15:34 +05:30
SachinVin
e45461ef9f backend\A64\emit_a64_packed.cpp: Implement AddSub halving and non halving 2020-05-16 17:15:33 +05:30
SachinVin
4880a6cfa7 backend\A64: Instructions that got implemented on the way 2020-05-16 17:15:33 +05:30
SachinVin
04a59768c6 backend\A64\emit_a64_packed.cpp: Implement Unsigned Sum of Absolute Differences 2020-05-16 17:15:32 +05:30
SachinVin
6ba3bbf7d4 a64 emitter: Absolute Difference and add across vector instructions 2020-05-16 17:15:32 +05:30
SachinVin
8216b2f7aa backend\A64\emit_a64_packed.cpp: Implement Packed Select 2020-05-16 17:15:32 +05:30
SachinVin
f889ecaf4d Backend/a64: Fix asset when falling back to interpreter 2020-05-16 17:15:31 +05:30
SachinVin
340e772c1f backend\A64\emit_a64_packed.cpp: Implement Packed Halving Add/Sub instructions 2020-05-16 17:15:31 +05:30
SachinVin
9e0a3e7aa0 backend\A64\emit_a64_packed.cpp: Implement Packed Saturating instructions 2020-05-16 17:15:30 +05:30
SachinVin
79157ef109 backend\A64\emit_a64_packed.cpp: Implement SignedPacked*- ADD and SUB 2020-05-16 17:15:30 +05:30
SachinVin
3ed0a9a593 a64 emitter: Vector Halving and Saturation instructions 2020-05-16 17:15:30 +05:30
SachinVin
42d5e1bc0e backend\A64\emit_a64_packed.cpp: Implement UnsignedPacked*- ADD and SUB...
with few other in the emitter
2020-05-16 17:15:29 +05:30
SachinVin
c78aa47c00 a64 emitter: fix Scalar Saturating Instructions 2020-05-16 17:15:29 +05:30
SachinVin
cc19981999 A64 Emitter: Implement Saturating Add and Sub 2020-05-16 17:15:29 +05:30
SachinVin
45a6d5d025 backend\A64\emit_a64_data_processing.cpp: Implement Division 2020-05-16 17:15:28 +05:30
SachinVin
3910b7b1bb backend\A64\emit_a64_data_processing.cpp: Implement 64bit CLZ 2020-05-16 17:15:28 +05:30
SachinVin
33f0c18ea4 backend\A64\emit_a64_data_processing.cpp: Implement 64bit LSL and ROR Instructions
Also EmitTestBit
2020-05-16 17:15:28 +05:30
SachinVin
69295c4918 backend\A64\emit_a64_data_processing.cpp: Implement 64bit Logical Instructions 2020-05-16 17:15:27 +05:30
SachinVin
745a924106 backend/a64: implememnt CheckBit 2020-05-16 17:15:27 +05:30
SachinVin
e27809706a backend/a64: Redesign Const Pool 2020-05-16 17:15:27 +05:30
SachinVin
42873f0825 backend\A64\emit_a64_floating_point.cpp: Fix include paths 2020-05-16 17:15:26 +05:30
SachinVin
07f648d906 backend\A64\a32_emit_a64.cpp: Fix Coproc* after rebase 2020-05-16 17:15:26 +05:30
SachinVin
1de1bdb6d4 backend/a64/opcodes.inc: Coproc instructions 2020-05-16 17:15:26 +05:30
SachinVin
47a2441640 a64 emitter: Fix LDR literal 2020-05-16 17:15:25 +05:30
SachinVin
1a9bdd41ea a64 emitter: Move IsInRange* and MaskImm* into anon namespace 2020-05-16 17:15:25 +05:30
SachinVin
be3ba643cc backend\A64\emit_a64_floating_point.cpp: Implement VADD VSUB VMUL and other stuff 2020-05-16 17:15:25 +05:30
SachinVin
9c789ded58 backend\A64\emit_a64_floating_point.cpp: Implement VABS VNEG VCMP and a few others 2020-05-16 17:15:24 +05:30
SachinVin
bb9ed1c4ec frontend/A32/Decoder : (backend/a64)VMOV 2020-05-16 17:15:24 +05:30
SachinVin
967c4e93b7 backend\A64\emit_a64_floating_point.cpp: Implement VCVT instructions 2020-05-16 17:15:24 +05:30
SachinVin
86e0ab0836 backend\A64\emit_a64_floating_point.cpp: part 1 2020-05-16 17:15:23 +05:30
SachinVin
dda7b5013a backend/a64/reg_alloc: Fix EmitMove for FPRs 2020-05-16 17:15:23 +05:30
SachinVin
7bfc973efe A64 emitter: Support for 64bit FMOV 2020-05-16 17:15:22 +05:30
SachinVin
c97c18f64b a64 backend: Load "guest_FPSR" 2020-05-16 17:15:22 +05:30
SachinVin
32eba73e1e A64 backend: Add Get/SetExtendedRegister and Get/SetGEFlags 2020-05-16 17:15:22 +05:30
SachinVin
c64e2812a8 tests: Dont compile A64 tests for non x64 backend 2020-05-16 17:15:22 +05:30
SachinVin
ea2be0b7ef travis a64: unicorn 2020-05-16 17:15:21 +05:30
SachinVin
a00248bd27 travis a64 backend 2020-05-16 17:15:21 +05:30
SachinVin
54dbfe86da Frontend/A32: a64 backend; Interpret SEL 2020-05-16 17:15:20 +05:30
SachinVin
cafe0c8d65 frontend/A32: A64 Backend implemented instructions 2020-05-16 17:15:20 +05:30
SachinVin
563dfded57 backend\A64\emit_a64_data_processing.cpp: Implement REV and CLZ ops 2020-05-16 17:15:19 +05:30
SachinVin
dddba6b9f5 backend\A64\emit_a64_data_processing.cpp: Implement Sext an Zext ops 2020-05-16 17:15:19 +05:30
SachinVin
401432b922 backend\A64\emit_a64_data_processing.cpp: Implement Logical ops 2020-05-16 17:15:19 +05:30
SachinVin
96a7171126 backend\A64\emit_a64_data_processing.cpp: Implement Arithmetic ops 2020-05-16 17:15:19 +05:30
SachinVin
21e59707ed backend\A64\emit_a64_data_processing.cpp: Implement Shift and Rotate ops 2020-05-16 17:15:18 +05:30
SachinVin
1fa8c36ab1 backend\A64\emit_a64_data_processing.cpp:Implement ops 2020-05-16 17:15:18 +05:30
SachinVin
a2c44e9a27 backend\A64\emit_a64_data_processing.cpp: Mostly empty file 2020-05-16 17:15:18 +05:30
SachinVin
9301cf2273 backend/a64: Add a32_interface 2020-05-16 17:15:17 +05:30
SachinVin
b4513f152a backend/a64: Port a32_emit_a64 2020-05-16 17:15:17 +05:30
SachinVin
3e655508b5 backend/a64: Port block_of_code and emit_a64 2020-05-16 17:15:17 +05:30
SachinVin
544988c1f4 backend/a64: Port callback functions 2020-05-16 17:15:17 +05:30
SachinVin
4b53c90bfb backend/a64: Port exception handler 2020-05-16 17:15:16 +05:30
SachinVin
53056f0a95 backend/a64: Port const pool 2020-05-16 17:15:16 +05:30
SachinVin
0e5e9759b6 backend/a64: Port reg_alloc 2020-05-16 17:15:16 +05:30
SachinVin
b06c8acce4 backend/a64: Port ABI functions 2020-05-16 17:15:15 +05:30
SachinVin
5f1209dc11 backend/a64: Port perfmap 2020-05-16 17:15:15 +05:30
SachinVin
69610e6ee9 backend/a64: Port hostloc 2020-05-16 17:15:15 +05:30
SachinVin
87f1181293 backend/a64: Devirtualize functions for a64 2020-05-16 17:15:14 +05:30
SachinVin
bfeb8d5356 backend/a64: Port block_range_info 2020-05-16 17:15:14 +05:30
SachinVin
642dd7607b CMakeModules\DetectArchitecture.cmake: Refactor ARCHITECTURE to DYNARMIC_ARCHITECTURE
Don't rely on super-project's definition of ARCHITECTURE
2020-05-16 17:15:14 +05:30
SachinVin
beecfca9f9 [HACK] A32/exception_generating: Interpret undefined instructions 2020-05-16 17:15:14 +05:30
SachinVin
df2bb10f33 [HACK] CMakeLists: Do not build A64 tests on AArch64 2020-05-16 17:15:13 +05:30
MerryMage
fba55874d2 fuzz_thumb: Add [JitA64] tag to supported instructions 2020-05-16 17:15:13 +05:30
SachinVin
ccef3889b4 backend/A64: Port a32_jitstate 2020-05-16 17:15:13 +05:30
MerryMage
1cc82ddff5 code_block: Support Windows and fix munmap check 2020-05-16 17:15:12 +05:30
SachinVin
56d43156f9 ir_opt: Port a32_merge_interpreter_blocks 2020-05-16 17:15:12 +05:30
SachinVin
235b6d2288 assert: Use __android_log_print on Android 2020-05-16 17:15:12 +05:30
SachinVin
2a5792bfbb CMakeLists: xbyak should only be linked on x64 2020-05-16 17:14:23 +05:30
SachinVin
d49816d794 a64_emitter: Fix ABI push and pop 2020-05-16 17:14:23 +05:30
SachinVin
6dc4c262f2 a64_emitter: More style cleanup 2020-05-16 17:14:23 +05:30
SachinVin
9bbc4d5353 a64_emitter: Style cleanup 2020-05-16 17:14:22 +05:30
BreadFish64
daf74884a2 Backend/A64: add jitstate_info.h 2020-05-16 17:14:22 +05:30
BreadFish64
cb07edb006 Backend/A64: Add Dolphin's ARM emitter 2020-05-16 17:14:22 +05:30
BreadFish64
21febaab9f Add aarch64 CI 2020-05-16 17:14:21 +05:30
MerryMage
1e291059e2 a64_emit_x64: Invalid regalloc code for EmitA64ExclusiveReadMemory128
Attempted to allocate args[0] after end of allocation scope
2020-05-16 12:31:12 +01:00
MerryMage
f4c75dbc38 A32/ASIMD: ARMv8: Implement VLD{1-4} (multiple) 2020-05-16 12:30:09 +01:00
67 changed files with 14096 additions and 20 deletions

1
.gitignore vendored
View File

@ -1,5 +1,6 @@
# Built files
build/
docs/Doxygen/
.vs/
# Generated files
src/backend/x64/mig/

View File

@ -30,6 +30,16 @@ matrix:
- ninja-build
install: ./.travis/build-x86_64-linux/deps.sh
script: ./.travis/build-x86_64-linux/build.sh
- env: NAME="Linux aarch64 Build"
os: linux
dist: trusty
sudo: required
services: docker
addons:
apt:
sources:
- ubuntu-toolchain-r-test
script: ./.travis/build-aarch64-linux/run.sh
- env: NAME="macOS Build"
os: osx
sudo: false

View File

@ -0,0 +1,14 @@
#!/bin/sh
set -e
set -x
export PKG_CONFIG_PATH=$HOME/.local/lib/pkgconfig:$PKG_CONFIG_PATH
export UNICORNDIR=$(pwd)/dynarmic/externals/unicorn
cd dynarmic
mkdir build && cd build
cmake .. -DBoost_INCLUDE_DIRS=${PWD}/../externals/ext-boost -DCMAKE_BUILD_TYPE=Release -DDYNARMIC_TESTS_USE_UNICORN=1 -DCMAKE_C_COMPILER=aarch64-linux-gnu-gcc-8 -DCMAKE_CXX_COMPILER=aarch64-linux-gnu-g++-8 -G Ninja
ninja
qemu-aarch64 -L /usr/aarch64-linux-gnu ./tests/dynarmic_tests -d yes

View File

@ -0,0 +1,16 @@
#!/bin/sh
set -e
set -x
apt-get update
apt-get install -y git cmake gcc python ninja-build g++-8-aarch64-linux-gnu qemu-user
# TODO: This isn't ideal.
cd dynarmic/externals
git clone https://github.com/MerryMage/ext-boost
git clone https://github.com/unicorn-engine/unicorn.git
cd unicorn
UNICORN_ARCHS="arm aarch64" CC=aarch64-linux-gnu-gcc-8 ./make.sh
cd ../..

View File

@ -0,0 +1,4 @@
#!/bin/sh
dynarmic/.travis/build-aarch64-linux/deps.sh
dynarmic/.travis/build-aarch64-linux/build.sh

View File

@ -0,0 +1,4 @@
#!/bin/sh
docker pull ubuntu:18.04
docker run -v $(pwd):/dynarmic ubuntu:18.04 dynarmic/.travis/build-aarch64-linux/docker.sh

View File

@ -105,10 +105,10 @@ else()
detect_architecture("__x86_64__" x86_64)
detect_architecture("__aarch64__" Aarch64)
endif()
if (NOT DEFINED ARCHITECTURE)
if (NOT DEFINED DYNARMIC_ARCHITECTURE)
message(FATAL_ERROR "Unsupported architecture encountered. Ending CMake generation.")
endif()
message(STATUS "Target architecture: ${ARCHITECTURE}")
message(STATUS "Target architecture: ${DYNARMIC_ARCHITECTURE}")
# Include Boost
if (NOT TARGET boost)

View File

@ -1,6 +1,6 @@
include(CheckSymbolExists)
function(detect_architecture symbol arch)
if (NOT DEFINED ARCHITECTURE)
if (NOT DEFINED DYNARMIC_ARCHITECTURE)
set(CMAKE_REQUIRED_QUIET 1)
check_symbol_exists("${symbol}" "" ARCHITECTURE_${arch})
unset(CMAKE_REQUIRED_QUIET)
@ -8,7 +8,7 @@ function(detect_architecture symbol arch)
# The output variable needs to be unique across invocations otherwise
# CMake's crazy scope rules will keep it defined
if (ARCHITECTURE_${arch})
set(ARCHITECTURE "${arch}" PARENT_SCOPE)
set(DYNARMIC_ARCHITECTURE "${arch}" PARENT_SCOPE)
set(ARCHITECTURE_${arch} 1 PARENT_SCOPE)
add_definitions(-DARCHITECTURE_${arch}=1)
endif()

View File

@ -122,6 +122,7 @@ if ("A32" IN_LIST DYNARMIC_FRONTENDS)
frontend/A32/location_descriptor.cpp
frontend/A32/location_descriptor.h
frontend/A32/PSR.h
frontend/A32/translate/impl/asimd_load_store_structures.cpp
frontend/A32/translate/impl/barrier.cpp
frontend/A32/translate/impl/branch.cpp
frontend/A32/translate/impl/coprocessor.cpp
@ -151,6 +152,7 @@ if ("A32" IN_LIST DYNARMIC_FRONTENDS)
frontend/A32/translate/translate_thumb.cpp
ir_opt/a32_constant_memory_reads_pass.cpp
ir_opt/a32_get_set_elimination_pass.cpp
ir_opt/a32_merge_interpret_blocks.cpp
)
endif()
@ -319,6 +321,68 @@ if (ARCHITECTURE_x86_64)
else()
target_sources(dynarmic PRIVATE backend/x64/exception_handler_generic.cpp)
endif()
target_link_libraries(dynarmic PRIVATE xbyak)
elseif(ARCHITECTURE_Aarch64)
target_sources(dynarmic PRIVATE
backend/A64/emitter/a64_emitter.cpp
backend/A64/emitter/a64_emitter.h
backend/A64/emitter/arm_common.h
backend/A64/emitter/code_block.h
# backend/A64/a64_emit_a64.cpp
# backend/A64/a64_emit_a64.h
# backend/A64/a64_exclusive_monitor.cpp
# backend/A64/a64_interface.cpp
# backend/A64/a64_jitstate.cpp
# backend/A64/a64_jitstate.h
backend/A64/abi.cpp
backend/A64/abi.h
backend/A64/block_of_code.cpp
backend/A64/block_of_code.h
backend/A64/block_range_information.cpp
backend/A64/block_range_information.h
backend/A64/callback.cpp
backend/A64/callback.h
backend/A64/constant_pool.cpp
backend/A64/constant_pool.h
backend/A64/devirtualize.h
backend/A64/emit_a64.cpp
backend/A64/emit_a64.h
# backend/A64/emit_a64_aes.cpp
# backend/A64/emit_a64_crc32.cpp
backend/A64/emit_a64_data_processing.cpp
backend/A64/emit_a64_floating_point.cpp
backend/A64/emit_a64_packed.cpp
backend/A64/emit_a64_saturation.cpp
# backend/A64/emit_a64_sm4.cpp
# backend/A64/emit_a64_vector.cpp
# backend/A64/emit_a64_vector_floating_point.cpp
backend/A64/exception_handler.h
backend/A64/hostloc.cpp
backend/A64/hostloc.h
backend/A64/jitstate_info.h
backend/A64/opcodes.inc
backend/A64/perf_map.cpp
backend/A64/perf_map.h
backend/A64/reg_alloc.cpp
backend/A64/reg_alloc.h
)
if ("A32" IN_LIST DYNARMIC_FRONTENDS)
target_sources(dynarmic PRIVATE
backend/A64/a32_emit_a64.cpp
backend/A64/a32_emit_a64.h
backend/A64/a32_interface.cpp
backend/A64/a32_jitstate.cpp
backend/A64/a32_jitstate.h
)
endif()
if (ANDROID)
target_sources(dynarmic PRIVATE backend/A64/exception_handler_posix.cpp)
else()
target_sources(dynarmic PRIVATE backend/A64/exception_handler_generic.cpp)
endif()
else()
message(FATAL_ERROR "Unsupported architecture")
endif()
@ -335,9 +399,12 @@ target_link_libraries(dynarmic
boost
fmt::fmt
mp
xbyak
$<$<BOOL:DYNARMIC_USE_LLVM>:${llvm_libs}>
)
if(ANDROID)
target_link_libraries(dynarmic PRIVATE log)
endif()
if (DYNARMIC_ENABLE_CPU_FEATURE_DETECTION)
target_compile_definitions(dynarmic PRIVATE DYNARMIC_ENABLE_CPU_FEATURE_DETECTION=1)
endif()

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,135 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <array>
#include <functional>
#include <optional>
#include <set>
#include <tuple>
#include <unordered_map>
#include "backend/A64/a32_jitstate.h"
#include "backend/A64/block_range_information.h"
#include "backend/A64/emit_a64.h"
#include "backend/A64/exception_handler.h"
#include "dynarmic/A32/a32.h"
#include "dynarmic/A32/config.h"
#include "frontend/A32/location_descriptor.h"
#include "frontend/ir/terminal.h"
namespace Dynarmic::BackendA64 {
struct A64State;
class RegAlloc;
struct A32EmitContext final : public EmitContext {
A32EmitContext(RegAlloc& reg_alloc, IR::Block& block);
A32::LocationDescriptor Location() const;
FP::RoundingMode FPSCR_RMode() const override;
u32 FPCR() const override;
bool FPSCR_FTZ() const override;
bool FPSCR_DN() const override;
std::ptrdiff_t GetInstOffset(IR::Inst* inst) const;
};
class A32EmitA64 final : public EmitA64 {
public:
A32EmitA64(BlockOfCode& code, A32::UserConfig config, A32::Jit* jit_interface);
~A32EmitA64() override;
/**
* Emit host machine code for a basic block with intermediate representation `ir`.
* @note ir is modified.
*/
BlockDescriptor Emit(IR::Block& ir);
void ClearCache() override;
void InvalidateCacheRanges(const boost::icl::interval_set<u32>& ranges);
void FastmemCallback(CodePtr PC);
protected:
const A32::UserConfig config;
A32::Jit* jit_interface;
BlockRangeInformation<u32> block_ranges;
ExceptionHandler exception_handler;
struct FastDispatchEntry {
u64 location_descriptor = 0xFFFF'FFFF'FFFF'FFFFull;
const void* code_ptr = nullptr;
};
static_assert(sizeof(FastDispatchEntry) == 0x10);
static constexpr u64 fast_dispatch_table_mask = 0xFFFF0;
static constexpr size_t fast_dispatch_table_size = 0x10000;
std::array<FastDispatchEntry, fast_dispatch_table_size> fast_dispatch_table;
void ClearFastDispatchTable();
using DoNotFastmemMarker = std::tuple<IR::LocationDescriptor, std::ptrdiff_t>;
std::set<DoNotFastmemMarker> do_not_fastmem;
DoNotFastmemMarker GenerateDoNotFastmemMarker(A32EmitContext& ctx, IR::Inst* inst);
void DoNotFastmem(const DoNotFastmemMarker& marker);
bool ShouldFastmem(const DoNotFastmemMarker& marker) const;
const void* read_memory_8;
const void* read_memory_16;
const void* read_memory_32;
const void* read_memory_64;
const void* write_memory_8;
const void* write_memory_16;
const void* write_memory_32;
const void* write_memory_64;
void GenMemoryAccessors();
template<typename T>
void ReadMemory(A32EmitContext& ctx, IR::Inst* inst, const CodePtr callback_fn);
template<typename T>
void WriteMemory(A32EmitContext& ctx, IR::Inst* inst, const CodePtr callback_fn);
const void* terminal_handler_pop_rsb_hint;
const void* terminal_handler_fast_dispatch_hint = nullptr;
FastDispatchEntry& (*fast_dispatch_table_lookup)(u64) = nullptr;
void GenTerminalHandlers();
// Microinstruction emitters
#define OPCODE(...)
#define A32OPC(name, type, ...) void EmitA32##name(A32EmitContext& ctx, IR::Inst* inst);
#define A64OPC(...)
#include "frontend/ir/opcodes.inc"
#undef OPCODE
#undef A32OPC
#undef A64OPC
// Helpers
std::string LocationDescriptorToFriendlyName(const IR::LocationDescriptor&) const override;
// Fastmem
struct FastmemPatchInfo {
std::function<void()> callback;
};
std::unordered_map<CodePtr, FastmemPatchInfo> fastmem_patch_info;
// Terminal instruction emitters
void EmitSetUpperLocationDescriptor(IR::LocationDescriptor new_location, IR::LocationDescriptor old_location);
void EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor initial_location) override;
void EmitTerminalImpl(IR::Term::ReturnToDispatch terminal, IR::LocationDescriptor initial_location) override;
void EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location) override;
void EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location) override;
void EmitTerminalImpl(IR::Term::PopRSBHint terminal, IR::LocationDescriptor initial_location) override;
void EmitTerminalImpl(IR::Term::FastDispatchHint terminal, IR::LocationDescriptor initial_location) override;
void EmitTerminalImpl(IR::Term::If terminal, IR::LocationDescriptor initial_location) override;
void EmitTerminalImpl(IR::Term::CheckBit terminal, IR::LocationDescriptor initial_location) override;
void EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location) override;
// Patching
void Unpatch(const IR::LocationDescriptor& target_desc) override;
void EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override;
void EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override;
void EmitPatchMovX0(CodePtr target_code_ptr = nullptr) override;
};
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,312 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include <memory>
#include <boost/icl/interval_set.hpp>
#include <fmt/format.h>
#include <dynarmic/A32/a32.h>
#include <dynarmic/A32/context.h>
#include "backend/A64/a32_emit_a64.h"
#include "backend/A64/a32_jitstate.h"
#include "backend/A64/block_of_code.h"
#include "backend/A64/callback.h"
#include "backend/A64/devirtualize.h"
#include "backend/A64/jitstate_info.h"
#include "common/assert.h"
#include "common/common_types.h"
#include "common/llvm_disassemble.h"
#include "common/scope_exit.h"
#include "frontend/A32/translate/translate.h"
#include "frontend/ir/basic_block.h"
#include "frontend/ir/location_descriptor.h"
#include "ir_opt/passes.h"
namespace Dynarmic::A32 {
using namespace BackendA64;
static RunCodeCallbacks GenRunCodeCallbacks(const A32::UserConfig& config, CodePtr (*LookupBlock)(void* lookup_block_arg), void* arg) {
return RunCodeCallbacks{
std::make_unique<ArgCallback>(LookupBlock, reinterpret_cast<u64>(arg)),
std::make_unique<ArgCallback>(Devirtualize<&A32::UserCallbacks::AddTicks>(config.callbacks)),
std::make_unique<ArgCallback>(Devirtualize<&A32::UserCallbacks::GetTicksRemaining>(config.callbacks)),
reinterpret_cast<u64>(config.fastmem_pointer),
};
}
struct Jit::Impl {
Impl(Jit* jit, A32::UserConfig config)
: block_of_code(GenRunCodeCallbacks(config, &GetCurrentBlockThunk, this), JitStateInfo{jit_state})
, emitter(block_of_code, config, jit)
, config(std::move(config))
, jit_interface(jit)
{}
A32JitState jit_state;
BlockOfCode block_of_code;
A32EmitA64 emitter;
const A32::UserConfig config;
// Requests made during execution to invalidate the cache are queued up here.
size_t invalid_cache_generation = 0;
boost::icl::interval_set<u32> invalid_cache_ranges;
bool invalidate_entire_cache = false;
void Execute() {
const CodePtr current_codeptr = [this]{
// RSB optimization
const u32 new_rsb_ptr = (jit_state.rsb_ptr - 1) & A32JitState::RSBPtrMask;
if (jit_state.GetUniqueHash() == jit_state.rsb_location_descriptors[new_rsb_ptr]) {
jit_state.rsb_ptr = new_rsb_ptr;
return reinterpret_cast<CodePtr>(jit_state.rsb_codeptrs[new_rsb_ptr]);
}
return GetCurrentBlock();
}();
block_of_code.RunCode(&jit_state, current_codeptr);
}
void Step() {
block_of_code.StepCode(&jit_state, GetCurrentSingleStep());
}
std::string Disassemble(const IR::LocationDescriptor& descriptor) {
auto block = GetBasicBlock(descriptor);
std::string result = fmt::format("address: {}\nsize: {} bytes\n", block.entrypoint, block.size);
#ifdef DYNARMIC_USE_LLVM
for (const u32* pos = reinterpret_cast<const u32*>(block.entrypoint);
reinterpret_cast<const u8*>(pos) < reinterpret_cast<const u8*>(block.entrypoint) + block.size; pos += 1) {
fmt::print("0x{:02x} 0x{:02x} ", reinterpret_cast<u64>(pos), *pos);
fmt::print("{}", Common::DisassembleAArch64(*pos, reinterpret_cast<u64>(pos)));
result += Common::DisassembleAArch64(*pos, reinterpret_cast<u64>(pos));
}
#endif
return result;
}
void PerformCacheInvalidation() {
if (invalidate_entire_cache) {
jit_state.ResetRSB();
block_of_code.ClearCache();
emitter.ClearCache();
invalid_cache_ranges.clear();
invalidate_entire_cache = false;
invalid_cache_generation++;
return;
}
if (invalid_cache_ranges.empty()) {
return;
}
jit_state.ResetRSB();
emitter.InvalidateCacheRanges(invalid_cache_ranges);
invalid_cache_ranges.clear();
invalid_cache_generation++;
}
void RequestCacheInvalidation() {
if (jit_interface->is_executing) {
jit_state.halt_requested = true;
return;
}
PerformCacheInvalidation();
}
private:
Jit* jit_interface;
static CodePtr GetCurrentBlockThunk(void* this_voidptr) {
Jit::Impl& this_ = *static_cast<Jit::Impl*>(this_voidptr);
return this_.GetCurrentBlock();
}
IR::LocationDescriptor GetCurrentLocation() const {
return IR::LocationDescriptor{jit_state.GetUniqueHash()};
}
CodePtr GetCurrentBlock() {
return GetBasicBlock(GetCurrentLocation()).entrypoint;
}
CodePtr GetCurrentSingleStep() {
return GetBasicBlock(A32::LocationDescriptor{GetCurrentLocation()}.SetSingleStepping(true)).entrypoint;
}
A32EmitA64::BlockDescriptor GetBasicBlock(IR::LocationDescriptor descriptor) {
auto block = emitter.GetBasicBlock(descriptor);
if (block)
return *block;
constexpr size_t MINIMUM_REMAINING_CODESIZE = 1 * 1024 * 1024;
if (block_of_code.SpaceRemaining() < MINIMUM_REMAINING_CODESIZE) {
invalidate_entire_cache = true;
PerformCacheInvalidation();
}
IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, [this](u32 vaddr) { return config.callbacks->MemoryReadCode(vaddr); }, {config.define_unpredictable_behaviour, config.hook_hint_instructions});
Optimization::A32GetSetElimination(ir_block);
Optimization::DeadCodeElimination(ir_block);
Optimization::A32ConstantMemoryReads(ir_block, config.callbacks);
Optimization::ConstantPropagation(ir_block);
Optimization::DeadCodeElimination(ir_block);
Optimization::A32MergeInterpretBlocksPass(ir_block, config.callbacks);
Optimization::VerificationPass(ir_block);
return emitter.Emit(ir_block);
}
};
Jit::Jit(UserConfig config) : impl(std::make_unique<Impl>(this, std::move(config))) {}
Jit::~Jit() = default;
void Jit::Run() {
ASSERT(!is_executing);
is_executing = true;
SCOPE_EXIT { this->is_executing = false; };
impl->jit_state.halt_requested = false;
impl->Execute();
impl->PerformCacheInvalidation();
}
void Jit::Step() {
ASSERT(!is_executing);
is_executing = true;
SCOPE_EXIT { this->is_executing = false; };
impl->jit_state.halt_requested = true;
impl->Step();
impl->PerformCacheInvalidation();
}
void Jit::ClearCache() {
impl->invalidate_entire_cache = true;
impl->RequestCacheInvalidation();
}
void Jit::InvalidateCacheRange(std::uint32_t start_address, std::size_t length) {
impl->invalid_cache_ranges.add(boost::icl::discrete_interval<u32>::closed(start_address, static_cast<u32>(start_address + length - 1)));
impl->RequestCacheInvalidation();
}
void Jit::Reset() {
ASSERT(!is_executing);
impl->jit_state = {};
}
void Jit::HaltExecution() {
impl->jit_state.halt_requested = true;
}
std::array<u32, 16>& Jit::Regs() {
return impl->jit_state.Reg;
}
const std::array<u32, 16>& Jit::Regs() const {
return impl->jit_state.Reg;
}
std::array<u32, 64>& Jit::ExtRegs() {
return impl->jit_state.ExtReg;
}
const std::array<u32, 64>& Jit::ExtRegs() const {
return impl->jit_state.ExtReg;
}
u32 Jit::Cpsr() const {
return impl->jit_state.Cpsr();
}
void Jit::SetCpsr(u32 value) {
return impl->jit_state.SetCpsr(value);
}
u32 Jit::Fpscr() const {
return impl->jit_state.Fpscr();
}
void Jit::SetFpscr(u32 value) {
return impl->jit_state.SetFpscr(value);
}
Context Jit::SaveContext() const {
Context ctx;
SaveContext(ctx);
return ctx;
}
struct Context::Impl {
A32JitState jit_state;
size_t invalid_cache_generation;
};
Context::Context() : impl(std::make_unique<Context::Impl>()) { impl->jit_state.ResetRSB(); }
Context::~Context() = default;
Context::Context(const Context& ctx) : impl(std::make_unique<Context::Impl>(*ctx.impl)) {}
Context::Context(Context&& ctx) noexcept : impl(std::move(ctx.impl)) {}
Context& Context::operator=(const Context& ctx) {
*impl = *ctx.impl;
return *this;
}
Context& Context::operator=(Context&& ctx) noexcept {
impl = std::move(ctx.impl);
return *this;
}
std::array<std::uint32_t, 16>& Context::Regs() {
return impl->jit_state.Reg;
}
const std::array<std::uint32_t, 16>& Context::Regs() const {
return impl->jit_state.Reg;
}
std::array<std::uint32_t, 64>& Context::ExtRegs() {
return impl->jit_state.ExtReg;
}
const std::array<std::uint32_t, 64>& Context::ExtRegs() const {
return impl->jit_state.ExtReg;
}
std::uint32_t Context::Cpsr() const {
return impl->jit_state.Cpsr();
}
void Context::SetCpsr(std::uint32_t value) {
impl->jit_state.SetCpsr(value);
}
std::uint32_t Context::Fpscr() const {
return impl->jit_state.Fpscr();
}
void Context::SetFpscr(std::uint32_t value) {
return impl->jit_state.SetFpscr(value);
}
void Jit::SaveContext(Context& ctx) const {
ctx.impl->jit_state.TransferJitState(impl->jit_state, false);
ctx.impl->invalid_cache_generation = impl->invalid_cache_generation;
}
void Jit::LoadContext(const Context& ctx) {
bool reset_rsb = ctx.impl->invalid_cache_generation != impl->invalid_cache_generation;
impl->jit_state.TransferJitState(ctx.impl->jit_state, reset_rsb);
}
std::string Jit::Disassemble(const IR::LocationDescriptor& descriptor) {
return impl->Disassemble(descriptor);
}
} // namespace Dynarmic::A32

View File

@ -0,0 +1,172 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include "backend/A64/a32_jitstate.h"
#include "backend/A64/block_of_code.h"
#include "common/assert.h"
#include "common/bit_util.h"
#include "common/common_types.h"
#include "frontend/A32/location_descriptor.h"
namespace Dynarmic::BackendA64 {
/**
* CPSR Bits
* =========
*
* ARM CPSR flags
* --------------
* N bit 31 Negative flag
* Z bit 30 Zero flag
* C bit 29 Carry flag
* V bit 28 oVerflow flag
* Q bit 27 Saturation flag
* IT[1:0] bits 25-26 If-Then execution state (lower 2 bits)
* J bit 24 Jazelle instruction set flag
* GE bits 16-19 Greater than or Equal flags
* IT[7:2] bits 10-15 If-Then execution state (upper 6 bits)
* E bit 9 Data Endianness flag
* A bit 8 Disable imprecise Aborts
* I bit 7 Disable IRQ interrupts
* F bit 6 Disable FIQ interrupts
* T bit 5 Thumb instruction set flag
* M bits 0-4 Processor Mode bits
*
* A64 flags
* -------------------
* N bit 31 Negative flag
* Z bit 30 Zero flag
* C bit 29 Carry flag
* V bit 28 oVerflow flag
*/
u32 A32JitState::Cpsr() const {
DEBUG_ASSERT((cpsr_nzcv & ~0xF0000000) == 0);
DEBUG_ASSERT((cpsr_q & ~1) == 0);
DEBUG_ASSERT((cpsr_jaifm & ~0x010001DF) == 0);
u32 cpsr = 0;
// NZCV flags
cpsr |= cpsr_nzcv;
// Q flag
cpsr |= cpsr_q ? 1 << 27 : 0;
// GE flags
cpsr |= Common::Bit<31>(cpsr_ge) ? 1 << 19 : 0;
cpsr |= Common::Bit<23>(cpsr_ge) ? 1 << 18 : 0;
cpsr |= Common::Bit<15>(cpsr_ge) ? 1 << 17 : 0;
cpsr |= Common::Bit<7>(cpsr_ge) ? 1 << 16 : 0;
// E flag, T flag
cpsr |= Common::Bit<1>(upper_location_descriptor) ? 1 << 9 : 0;
cpsr |= Common::Bit<0>(upper_location_descriptor) ? 1 << 5 : 0;
// IT state
cpsr |= static_cast<u32>(upper_location_descriptor & 0b11111100'00000000);
cpsr |= static_cast<u32>(upper_location_descriptor & 0b00000011'00000000) << 17;
// Other flags
cpsr |= cpsr_jaifm;
return cpsr;
}
void A32JitState::SetCpsr(u32 cpsr) {
// NZCV flags
cpsr_nzcv = cpsr & 0xF0000000;
// Q flag
cpsr_q = Common::Bit<27>(cpsr) ? 1 : 0;
// GE flags
cpsr_ge = 0;
cpsr_ge |= Common::Bit<19>(cpsr) ? 0xFF000000 : 0;
cpsr_ge |= Common::Bit<18>(cpsr) ? 0x00FF0000 : 0;
cpsr_ge |= Common::Bit<17>(cpsr) ? 0x0000FF00 : 0;
cpsr_ge |= Common::Bit<16>(cpsr) ? 0x000000FF : 0;
upper_location_descriptor &= 0xFFFF0000;
// E flag, T flag
upper_location_descriptor |= Common::Bit<9>(cpsr) ? 2 : 0;
upper_location_descriptor |= Common::Bit<5>(cpsr) ? 1 : 0;
// IT state
upper_location_descriptor |= (cpsr >> 0) & 0b11111100'00000000;
upper_location_descriptor |= (cpsr >> 17) & 0b00000011'00000000;
// Other flags
cpsr_jaifm = cpsr & 0x010001DF;
}
void A32JitState::ResetRSB() {
rsb_location_descriptors.fill(0xFFFFFFFFFFFFFFFFull);
rsb_codeptrs.fill(0);
}
/**
* FPSCR
* =========================
*
* VFP FPSCR cumulative exception bits
* -----------------------------------
* IDC bit 7 Input Denormal cumulative exception bit // Only ever set when FPSCR.FTZ = 1
* IXC bit 4 Inexact cumulative exception bit
* UFC bit 3 Underflow cumulative exception bit
* OFC bit 2 Overflow cumulative exception bit
* DZC bit 1 Division by Zero cumulative exception bit
* IOC bit 0 Invalid Operation cumulative exception bit
*
* VFP FPSCR exception trap enables
* --------------------------------
* IDE bit 15 Input Denormal exception trap enable
* IXE bit 12 Inexact exception trap enable
* UFE bit 11 Underflow exception trap enable
* OFE bit 10 Overflow exception trap enable
* DZE bit 9 Division by Zero exception trap enable
* IOE bit 8 Invalid Operation exception trap enable
*
* VFP FPSCR mode bits
* -------------------
* AHP bit 26 Alternate half-precision
* DN bit 25 Default NaN
* FZ bit 24 Flush to Zero
* RMode bits 22-23 Round to {0 = Nearest, 1 = Positive, 2 = Negative, 3 = Zero}
* Stride bits 20-21 Vector stride
* Len bits 16-18 Vector length
*/
// NZCV; QC (ASIMD only), AHP; DN, FZ, RMode, Stride; SBZP; Len; trap enables; cumulative bits
constexpr u32 FPSCR_MODE_MASK = A32::LocationDescriptor::FPSCR_MODE_MASK;
constexpr u32 FPSCR_NZCV_MASK = 0xF0000000;
u32 A32JitState::Fpscr() const {
DEBUG_ASSERT((fpsr_nzcv & ~FPSCR_NZCV_MASK) == 0);
const u32 fpcr_mode = static_cast<u32>(upper_location_descriptor) & FPSCR_MODE_MASK;
u32 FPSCR = fpcr_mode | fpsr_nzcv;
FPSCR |= (guest_fpsr & 0x1F);
FPSCR |= fpsr_exc;
return FPSCR;
}
void A32JitState::SetFpscr(u32 FPSCR) {
// Ensure that only upper half of upper_location_descriptor is used for FPSCR bits.
static_assert((FPSCR_MODE_MASK & 0xFFFF0000) == FPSCR_MODE_MASK);
upper_location_descriptor &= 0x0000FFFF;
upper_location_descriptor |= FPSCR & FPSCR_MODE_MASK;
fpsr_nzcv = FPSCR & FPSCR_NZCV_MASK;
guest_fpcr = 0;
guest_fpsr = 0;
// Cumulative flags IDC, IOC, IXC, UFC, OFC, DZC
fpsr_exc = FPSCR & 0x9F;
// Mode Bits
guest_fpcr |= FPSCR & 0x07C09F00;
// Exceptions
guest_fpsr |= FPSCR & 0x9F;
}
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,111 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <array>
#include "common/common_types.h"
namespace Dynarmic::BackendA64 {
class BlockOfCode;
#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable:4324) // Structure was padded due to alignment specifier
#endif
struct A32JitState {
using ProgramCounterType = u32;
A32JitState() { ResetRSB(); }
std::array<u32, 16> Reg{}; // Current register file.
// TODO: Mode-specific register sets unimplemented.
u32 upper_location_descriptor = 0;
u32 cpsr_ge = 0;
u32 cpsr_q = 0;
u32 cpsr_nzcv = 0;
u32 cpsr_jaifm = 0;
u32 Cpsr() const;
void SetCpsr(u32 cpsr);
alignas(u64) std::array<u32, 64> ExtReg{}; // Extension registers.
static constexpr size_t SpillCount = 64;
std::array<u64, SpillCount> Spill{}; // Spill.
static size_t GetSpillLocationOffsetFromIndex(size_t i) {
return static_cast<u64>(offsetof(A32JitState, Spill) + i * sizeof(u64));
}
// For internal use (See: BlockOfCode::RunCode)
u64 guest_fpcr = 0;
u64 guest_fpsr = 0;
u64 save_host_FPCR = 0;
s64 cycles_to_run = 0;
s64 cycles_remaining = 0;
bool halt_requested = false;
bool check_bit = false;
// Exclusive state
static constexpr u32 RESERVATION_GRANULE_MASK = 0xFFFFFFF8;
u32 exclusive_state = 0;
u32 exclusive_address = 0;
static constexpr size_t RSBSize = 8; // MUST be a power of 2.
static constexpr size_t RSBPtrMask = RSBSize - 1;
u32 rsb_ptr = 0;
std::array<u64, RSBSize> rsb_location_descriptors;
std::array<u64, RSBSize> rsb_codeptrs;
void ResetRSB();
u32 fpsr_exc = 0;
u32 fpsr_qc = 0; // Dummy value
u32 fpsr_nzcv = 0;
u32 Fpscr() const;
void SetFpscr(u32 FPSCR);
u64 GetUniqueHash() const noexcept {
return (static_cast<u64>(upper_location_descriptor) << 32) | (static_cast<u64>(Reg[15]));
}
void TransferJitState(const A32JitState& src, bool reset_rsb) {
Reg = src.Reg;
upper_location_descriptor = src.upper_location_descriptor;
cpsr_ge = src.cpsr_ge;
cpsr_q = src.cpsr_q;
cpsr_nzcv = src.cpsr_nzcv;
cpsr_jaifm = src.cpsr_jaifm;
ExtReg = src.ExtReg;
guest_fpcr = src.guest_fpcr;
guest_fpsr = src.guest_fpsr;
fpsr_exc = src.fpsr_exc;
fpsr_qc = src.fpsr_qc;
fpsr_nzcv = src.fpsr_nzcv;
exclusive_state = 0;
exclusive_address = 0;
if (reset_rsb) {
ResetRSB();
} else {
rsb_ptr = src.rsb_ptr;
rsb_location_descriptors = src.rsb_location_descriptors;
rsb_codeptrs = src.rsb_codeptrs;
}
}
};
#ifdef _MSC_VER
#pragma warning(pop)
#endif
using CodePtr = const void*;
} // namespace Dynarmic::BackendA64

87
src/backend/A64/abi.cpp Normal file
View File

@ -0,0 +1,87 @@
// Copyright (C) 2003 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// 20th Sep 2018: This code was modified for Dynarmic.
#include <algorithm>
#include <vector>
#include "backend/A64/abi.h"
#include "common/common_types.h"
#include "common/math_util.h"
#include "common/iterator_util.h"
namespace Dynarmic::BackendA64 {
template<typename RegisterArrayT>
void ABI_PushRegistersAndAdjustStack(BlockOfCode& code, const RegisterArrayT& regs) {
u32 gprs = 0 , fprs = 0;
for (HostLoc reg : regs) {
if (HostLocIsGPR(reg)) {
gprs |= 0x1 << static_cast<u32>(DecodeReg(HostLocToReg64(reg)));
} else if (HostLocIsFPR(reg)) {
fprs |= 0x1 << static_cast<u32>(DecodeReg(HostLocToFpr(reg)));
}
}
code.fp_emitter.ABI_PushRegisters(fprs);
code.ABI_PushRegisters(gprs);
}
template<typename RegisterArrayT>
void ABI_PopRegistersAndAdjustStack(BlockOfCode& code, const RegisterArrayT& regs) {
u32 gprs = 0, fprs = 0;
for (HostLoc reg : regs) {
if (HostLocIsGPR(reg)) {
gprs |= 0x1 << static_cast<u32>(DecodeReg(HostLocToReg64(reg)));
} else if (HostLocIsFPR(reg)) {
fprs |= 0x1 << static_cast<u32>(DecodeReg(HostLocToFpr(reg)));
}
}
code.ABI_PopRegisters(gprs);
code.fp_emitter.ABI_PopRegisters(fprs);
}
void ABI_PushCalleeSaveRegistersAndAdjustStack(BlockOfCode& code) {
ABI_PushRegistersAndAdjustStack(code, ABI_ALL_CALLEE_SAVE);
}
void ABI_PopCalleeSaveRegistersAndAdjustStack(BlockOfCode& code) {
ABI_PopRegistersAndAdjustStack(code, ABI_ALL_CALLEE_SAVE);
}
void ABI_PushCallerSaveRegistersAndAdjustStack(BlockOfCode& code) {
ABI_PushRegistersAndAdjustStack(code, ABI_ALL_CALLER_SAVE);
}
void ABI_PopCallerSaveRegistersAndAdjustStack(BlockOfCode& code) {
ABI_PopRegistersAndAdjustStack(code, ABI_ALL_CALLER_SAVE);
}
void ABI_PushCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, HostLoc exception) {
std::vector<HostLoc> regs;
std::remove_copy(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end(), std::back_inserter(regs), exception);
ABI_PushRegistersAndAdjustStack(code, regs);
}
void ABI_PopCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, HostLoc exception) {
std::vector<HostLoc> regs;
std::remove_copy(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end(), std::back_inserter(regs), exception);
ABI_PopRegistersAndAdjustStack(code, regs);
}
} // namespace Dynarmic::BackendX64

110
src/backend/A64/abi.h Normal file
View File

@ -0,0 +1,110 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <array>
#include "backend/A64/block_of_code.h"
#include "backend/A64/hostloc.h"
namespace Dynarmic::BackendA64 {
constexpr HostLoc ABI_RETURN = HostLoc::X0;
constexpr HostLoc ABI_PARAM1 = HostLoc::X0;
constexpr HostLoc ABI_PARAM2 = HostLoc::X1;
constexpr HostLoc ABI_PARAM3 = HostLoc::X2;
constexpr HostLoc ABI_PARAM4 = HostLoc::X3;
constexpr HostLoc ABI_PARAM5 = HostLoc::X4;
constexpr HostLoc ABI_PARAM6 = HostLoc::X5;
constexpr HostLoc ABI_PARAM7 = HostLoc::X6;
constexpr HostLoc ABI_PARAM8 = HostLoc::X7;
constexpr std::array<HostLoc, 43> ABI_ALL_CALLER_SAVE = {
HostLoc::X0,
HostLoc::X1,
HostLoc::X2,
HostLoc::X3,
HostLoc::X4,
HostLoc::X5,
HostLoc::X6,
HostLoc::X7,
HostLoc::X8,
HostLoc::X9,
HostLoc::X10,
HostLoc::X11,
HostLoc::X12,
HostLoc::X13,
HostLoc::X14,
HostLoc::X15,
HostLoc::X16,
HostLoc::X17,
HostLoc::X18,
HostLoc::Q0,
HostLoc::Q1,
HostLoc::Q2,
HostLoc::Q3,
HostLoc::Q4,
HostLoc::Q5,
HostLoc::Q6,
HostLoc::Q7,
HostLoc::Q16,
HostLoc::Q17,
HostLoc::Q18,
HostLoc::Q19,
HostLoc::Q20,
HostLoc::Q21,
HostLoc::Q22,
HostLoc::Q23,
HostLoc::Q24,
HostLoc::Q25,
HostLoc::Q26,
HostLoc::Q27,
HostLoc::Q28,
HostLoc::Q29,
HostLoc::Q30,
HostLoc::Q31,
};
constexpr std::array<HostLoc, 20> ABI_ALL_CALLEE_SAVE = {
HostLoc::X19,
HostLoc::X20,
HostLoc::X21,
HostLoc::X22,
HostLoc::X23,
HostLoc::X24,
HostLoc::X25,
HostLoc::X26,
HostLoc::X27,
HostLoc::X28,
HostLoc::X29,
HostLoc::X30,
HostLoc::Q8,
HostLoc::Q9,
HostLoc::Q10,
HostLoc::Q11,
HostLoc::Q12,
HostLoc::Q13,
HostLoc::Q14,
HostLoc::Q15,
};
constexpr size_t ABI_SHADOW_SPACE = 0; // bytes
static_assert(ABI_ALL_CALLER_SAVE.size() + ABI_ALL_CALLEE_SAVE.size() == 63, "Invalid total number of registers");
void ABI_PushCalleeSaveRegistersAndAdjustStack(BlockOfCode& code);
void ABI_PopCalleeSaveRegistersAndAdjustStack(BlockOfCode& code);
void ABI_PushCallerSaveRegistersAndAdjustStack(BlockOfCode& code);
void ABI_PopCallerSaveRegistersAndAdjustStack(BlockOfCode& code);
void ABI_PushCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, HostLoc exception);
void ABI_PopCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, HostLoc exception);
} // namespace Dynarmic::BackendX64

View File

@ -0,0 +1,330 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include <array>
#include <cstring>
#include <limits>
#include "backend/A64/a32_jitstate.h"
#include "backend/A64/abi.h"
#include "backend/A64/block_of_code.h"
#include "backend/A64/perf_map.h"
#include "common/assert.h"
#ifdef _WIN32
#include <windows.h>
#else
#include <sys/mman.h>
#endif
namespace Dynarmic::BackendA64 {
const Arm64Gen::ARM64Reg BlockOfCode::ABI_RETURN = Arm64Gen::ARM64Reg::X0;
const Arm64Gen::ARM64Reg BlockOfCode::ABI_RETURN2 = Arm64Gen::ARM64Reg::X1;
const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM1 = Arm64Gen::ARM64Reg::X0;
const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM2 = Arm64Gen::ARM64Reg::X1;
const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM3 = Arm64Gen::ARM64Reg::X2;
const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM4 = Arm64Gen::ARM64Reg::X3;
const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM5 = Arm64Gen::ARM64Reg::X4;
const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM6 = Arm64Gen::ARM64Reg::X5;
const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM7 = Arm64Gen::ARM64Reg::X6;
const Arm64Gen::ARM64Reg BlockOfCode::ABI_PARAM8 = Arm64Gen::ARM64Reg::X7;
const Arm64Gen::ARM64Reg BlockOfCode::ABI_SCRATCH1 = Arm64Gen::ARM64Reg::X30;
const std::array<Arm64Gen::ARM64Reg, 8> BlockOfCode::ABI_PARAMS = {BlockOfCode::ABI_PARAM1, BlockOfCode::ABI_PARAM2,
BlockOfCode::ABI_PARAM3, BlockOfCode::ABI_PARAM4,
BlockOfCode::ABI_PARAM5, BlockOfCode::ABI_PARAM6,
BlockOfCode::ABI_PARAM7, BlockOfCode::ABI_PARAM8};
namespace {
constexpr size_t TOTAL_CODE_SIZE = 128 * 1024 * 1024;
constexpr size_t FAR_CODE_OFFSET = 100 * 1024 * 1024;
#ifdef DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT
void ProtectMemory(const void* base, size_t size, bool is_executable) {
#ifdef _WIN32
DWORD oldProtect = 0;
VirtualProtect(const_cast<void*>(base), size, is_executable ? PAGE_EXECUTE_READ : PAGE_READWRITE, &oldProtect);
#else
static const size_t pageSize = sysconf(_SC_PAGESIZE);
const size_t iaddr = reinterpret_cast<size_t>(base);
const size_t roundAddr = iaddr & ~(pageSize - static_cast<size_t>(1));
const int mode = is_executable ? (PROT_READ | PROT_EXEC) : (PROT_READ | PROT_WRITE);
mprotect(reinterpret_cast<void*>(roundAddr), size + (iaddr - roundAddr), mode);
#endif
}
#endif
} // anonymous namespace
BlockOfCode::BlockOfCode(RunCodeCallbacks cb, JitStateInfo jsi)
: fp_emitter(this)
, cb(std::move(cb))
, jsi(jsi)
, constant_pool(*this) {
AllocCodeSpace(TOTAL_CODE_SIZE);
EnableWriting();
GenRunCode();
}
void BlockOfCode::PreludeComplete() {
prelude_complete = true;
near_code_begin = GetCodePtr();
far_code_begin = GetCodePtr() + FAR_CODE_OFFSET;
FlushIcache();
ClearCache();
DisableWriting();
}
void BlockOfCode::EnableWriting() {
#ifdef DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT
ProtectMemory(GetCodePtr(), TOTAL_CODE_SIZE, false);
#endif
}
void BlockOfCode::DisableWriting() {
#ifdef DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT
ProtectMemory(GetCodePtr(), TOTAL_CODE_SIZE, true);
#endif
}
void BlockOfCode::ClearCache() {
ASSERT(prelude_complete);
in_far_code = false;
near_code_ptr = near_code_begin;
far_code_ptr = far_code_begin;
SetCodePtr(near_code_begin);
constant_pool.Clear();
}
size_t BlockOfCode::SpaceRemaining() const {
ASSERT(prelude_complete);
// This function provides an underestimate of near-code-size but that's okay.
// (Why? The maximum size of near code should be measured from near_code_begin, not top_.)
// These are offsets from Xbyak::CodeArray::top_.
std::size_t far_code_offset, near_code_offset;
if (in_far_code) {
near_code_offset = static_cast<const u8*>(near_code_ptr) - static_cast<const u8*>(region);
far_code_offset = GetCodePtr() - static_cast<const u8*>(region);
} else {
near_code_offset = GetCodePtr() - static_cast<const u8*>(region);
far_code_offset = static_cast<const u8*>(far_code_ptr) - static_cast<const u8*>(region);
}
if (far_code_offset > TOTAL_CODE_SIZE)
return 0;
if (near_code_offset > FAR_CODE_OFFSET)
return 0;
return std::min(TOTAL_CODE_SIZE - far_code_offset, FAR_CODE_OFFSET - near_code_offset);
}
void BlockOfCode::RunCode(void* jit_state, CodePtr code_ptr) const {
run_code(jit_state, code_ptr);
}
void BlockOfCode::StepCode(void* jit_state, CodePtr code_ptr) const {
step_code(jit_state, code_ptr);
}
void BlockOfCode::ReturnFromRunCode(bool fpscr_already_exited) {
size_t index = 0;
if (fpscr_already_exited)
index |= FPSCR_ALREADY_EXITED;
B(return_from_run_code[index]);
}
void BlockOfCode::ForceReturnFromRunCode(bool fpscr_already_exited) {
size_t index = FORCE_RETURN;
if (fpscr_already_exited)
index |= FPSCR_ALREADY_EXITED;
B(return_from_run_code[index]);
}
void BlockOfCode::GenRunCode() {
const u8* loop, *enter_fpscr_then_loop;
AlignCode16();
run_code = reinterpret_cast<RunCodeFuncType>(GetWritableCodePtr());
// This serves two purposes:
// 1. It saves all the registers we as a callee need to save.
// 2. It aligns the stack so that the code the JIT emits can assume
// that the stack is appropriately aligned for CALLs.
ABI_PushCalleeSaveRegistersAndAdjustStack(*this);
MOV(Arm64Gen::X28, ABI_PARAM1);
MOVI2R(Arm64Gen::X27, cb.value_in_X27);
MOV(Arm64Gen::X25, ABI_PARAM2); // save temporarily in non-volatile register
cb.GetTicksRemaining->EmitCall(*this);
STR(Arm64Gen::INDEX_UNSIGNED, ABI_RETURN, Arm64Gen::X28, jsi.offsetof_cycles_to_run);
MOV(Arm64Gen::X26, ABI_RETURN);
SwitchFpscrOnEntry();
BR(Arm64Gen::X25);
AlignCode16();
step_code = reinterpret_cast<RunCodeFuncType>(GetWritableCodePtr());
ABI_PushCalleeSaveRegistersAndAdjustStack(*this);
MOV(Arm64Gen::X28, ABI_PARAM1);
MOVI2R(Arm64Gen::X26, 1);
STR(Arm64Gen::INDEX_UNSIGNED, Arm64Gen::X26, Arm64Gen::X28, jsi.offsetof_cycles_to_run);
SwitchFpscrOnEntry();
BR(ABI_PARAM2);
enter_fpscr_then_loop = GetCodePtr();
SwitchFpscrOnEntry();
loop = GetCodePtr();
cb.LookupBlock->EmitCall(*this);
BR(ABI_RETURN);
// Return from run code variants
const auto emit_return_from_run_code = [this, &loop, &enter_fpscr_then_loop](bool fpscr_already_exited, bool force_return){
if (!force_return) {
CMP(Arm64Gen::X26, Arm64Gen::ZR);
B(CC_GT, fpscr_already_exited ? enter_fpscr_then_loop : loop);
}
if (!fpscr_already_exited) {
SwitchFpscrOnExit();
}
cb.AddTicks->EmitCall(*this, [this](RegList param) {
LDR(Arm64Gen::INDEX_UNSIGNED, param[0], Arm64Gen::X28, jsi.offsetof_cycles_to_run);
SUB(param[0], param[0], Arm64Gen::X26);
});
ABI_PopCalleeSaveRegistersAndAdjustStack(*this);
RET();
};
return_from_run_code[0] = AlignCode16();
emit_return_from_run_code(false, false);
return_from_run_code[FPSCR_ALREADY_EXITED] = AlignCode16();
emit_return_from_run_code(true, false);
return_from_run_code[FORCE_RETURN] = AlignCode16();
emit_return_from_run_code(false, true);
return_from_run_code[FPSCR_ALREADY_EXITED | FORCE_RETURN] = AlignCode16();
emit_return_from_run_code(true, true);
PerfMapRegister(run_code, GetCodePtr(), "dynarmic_dispatcher");
}
void BlockOfCode::SwitchFpscrOnEntry() {
MRS(ABI_SCRATCH1, Arm64Gen::FIELD_FPCR);
STR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_save_host_FPCR);
LDR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_guest_fpcr);
_MSR(Arm64Gen::FIELD_FPCR, ABI_SCRATCH1);
LDR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_guest_fpsr);
_MSR(Arm64Gen::FIELD_FPSR, ABI_SCRATCH1);
}
void BlockOfCode::SwitchFpscrOnExit() {
MRS(ABI_SCRATCH1, Arm64Gen::FIELD_FPCR);
STR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_guest_fpcr);
MRS(ABI_SCRATCH1, Arm64Gen::FIELD_FPSR);
STR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_guest_fpsr);
LDR(Arm64Gen::INDEX_UNSIGNED, ABI_SCRATCH1, Arm64Gen::X28, jsi.offsetof_save_host_FPCR);
_MSR(Arm64Gen::FIELD_FPCR, ABI_SCRATCH1);
}
void BlockOfCode::UpdateTicks() {
cb.AddTicks->EmitCall(*this, [this](RegList param) {
LDR(Arm64Gen::INDEX_UNSIGNED, param[0], Arm64Gen::X28, jsi.offsetof_cycles_to_run);
SUB(param[0], param[0], Arm64Gen::X26);
});
cb.GetTicksRemaining->EmitCall(*this);
STR(Arm64Gen::INDEX_UNSIGNED, ABI_RETURN, Arm64Gen::X28, jsi.offsetof_cycles_to_run);
MOV(Arm64Gen::X26, ABI_RETURN);
}
void BlockOfCode::LookupBlock() {
cb.LookupBlock->EmitCall(*this);
}
void BlockOfCode::EmitPatchLDR(Arm64Gen::ARM64Reg Rt, u64 lower, u64 upper) {
ASSERT_MSG(!in_far_code, "Can't patch when in far code, yet!");
constant_pool.EmitPatchLDR(Rt, lower, upper);
}
void BlockOfCode::PatchConstPool() {
constant_pool.PatchPool();
}
void BlockOfCode::SwitchToFarCode() {
ASSERT(prelude_complete);
ASSERT(!in_far_code);
in_far_code = true;
near_code_ptr = GetCodePtr();
SetCodePtr(far_code_ptr);
ASSERT_MSG(near_code_ptr < far_code_begin, "Near code has overwritten far code!");
}
void BlockOfCode::SwitchToNearCode() {
ASSERT(prelude_complete);
ASSERT(in_far_code);
in_far_code = false;
far_code_ptr = GetCodePtr();
SetCodePtr(near_code_ptr);
}
CodePtr BlockOfCode::GetCodeBegin() const {
return near_code_begin;
}
u8* BlockOfCode::GetRegion() const {
return region;
}
std::size_t BlockOfCode::GetRegionSize() const {
return total_region_size;
};
void* BlockOfCode::AllocateFromCodeSpace(size_t alloc_size) {
ASSERT_MSG(GetSpaceLeft() >= alloc_size, "ERR_CODE_IS_TOO_BIG");
void* ret = GetWritableCodePtr();
region_size += alloc_size;
SetCodePtr(GetCodePtr() + alloc_size);
memset(ret, 0, alloc_size);
return ret;
}
void BlockOfCode::SetCodePtr(CodePtr code_ptr) {
u8* ptr = const_cast<u8*>(reinterpret_cast<const u8*>(code_ptr));
ARM64XEmitter::SetCodePtr(ptr);
}
void BlockOfCode::EnsurePatchLocationSize(CodePtr begin, size_t size) {
size_t current_size = GetCodePtr() - reinterpret_cast<const u8*>(begin);
ASSERT(current_size <= size);
for (u32 i = 0; i < (size - current_size) / 4; i++) {
HINT(Arm64Gen::HINT_NOP);
}
}
//bool BlockOfCode::DoesCpuSupport(Xbyak::util::Cpu::Type type) const {
//#ifdef DYNARMIC_ENABLE_CPU_FEATURE_DETECTION
// return cpu_info.has(type);
//#else
// (void)type;
// return false;
//#endif
//}
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,147 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <array>
#include <memory>
#include <type_traits>
#include "backend/A64/callback.h"
#include "backend/A64/constant_pool.h"
#include "backend/A64/jitstate_info.h"
#include "backend/A64/emitter/a64_emitter.h"
#include "common/common_types.h"
namespace Dynarmic::BackendA64 {
using CodePtr = const void*;
struct RunCodeCallbacks {
std::unique_ptr<Callback> LookupBlock;
std::unique_ptr<Callback> AddTicks;
std::unique_ptr<Callback> GetTicksRemaining;
u64 value_in_X27;
};
class BlockOfCode final : public Arm64Gen::ARM64CodeBlock {
public:
BlockOfCode(RunCodeCallbacks cb, JitStateInfo jsi);
BlockOfCode(const BlockOfCode&) = delete;
/// Call when external emitters have finished emitting their preludes.
void PreludeComplete();
/// Change permissions to RW. This is required to support systems with W^X enforced.
void EnableWriting();
/// Change permissions to RX. This is required to support systems with W^X enforced.
void DisableWriting();
/// Clears this block of code and resets code pointer to beginning.
void ClearCache();
/// Calculates how much space is remaining to use. This is the minimum of near code and far code.
size_t SpaceRemaining() const;
/// Runs emulated code from code_ptr.
void RunCode(void* jit_state, CodePtr code_ptr) const;
/// Runs emulated code from code_ptr for a single cycle.
void StepCode(void* jit_state, CodePtr code_ptr) const;
/// Code emitter: Returns to dispatcher
void ReturnFromRunCode(bool fpscr_already_exited = false);
/// Code emitter: Returns to dispatcher, forces return to host
void ForceReturnFromRunCode(bool fpscr_already_exited = false);
/// Code emitter: Makes guest FPSR and FPCR the current FPSR and FPCR
void SwitchFpscrOnEntry();
/// Code emitter: Makes saved host FPCR the current FPCR
void SwitchFpscrOnExit();
/// Code emitter: Updates cycles remaining my calling cb.AddTicks and cb.GetTicksRemaining
/// @note this clobbers ABI caller-save registers
void UpdateTicks();
/// Code emitter: Performs a block lookup based on current state
/// @note this clobbers ABI caller-save registers
void LookupBlock();
u64 MConst(u64 lower, u64 upper = 0);
void EmitPatchLDR(Arm64Gen::ARM64Reg Rt, u64 lower, u64 upper = 0);
void PatchConstPool();
/// Far code sits far away from the near code. Execution remains primarily in near code.
/// "Cold" / Rarely executed instructions sit in far code, so the CPU doesn't fetch them unless necessary.
void SwitchToFarCode();
void SwitchToNearCode();
CodePtr GetCodeBegin() const;
u8* GetRegion() const;
std::size_t GetRegionSize() const;
const void* GetReturnFromRunCodeAddress() const {
return return_from_run_code[0];
}
const void* GetForceReturnFromRunCodeAddress() const {
return return_from_run_code[FORCE_RETURN];
}
/// Allocate memory of `size` bytes from the same block of memory the code is in.
/// This is useful for objects that need to be placed close to or within code.
/// The lifetime of this memory is the same as the code around it.
void* AllocateFromCodeSpace(size_t size);
void SetCodePtr(CodePtr code_ptr);
void EnsurePatchLocationSize(CodePtr begin, size_t size);
Arm64Gen::ARM64FloatEmitter fp_emitter;
// ABI registers
static const Arm64Gen::ARM64Reg ABI_RETURN;
static const Arm64Gen::ARM64Reg ABI_RETURN2;
static const Arm64Gen::ARM64Reg ABI_PARAM1;
static const Arm64Gen::ARM64Reg ABI_PARAM2;
static const Arm64Gen::ARM64Reg ABI_PARAM3;
static const Arm64Gen::ARM64Reg ABI_PARAM4;
static const Arm64Gen::ARM64Reg ABI_PARAM5;
static const Arm64Gen::ARM64Reg ABI_PARAM6;
static const Arm64Gen::ARM64Reg ABI_PARAM7;
static const Arm64Gen::ARM64Reg ABI_PARAM8;
static const Arm64Gen::ARM64Reg ABI_SCRATCH1;
static const std::array<Arm64Gen::ARM64Reg, 8> ABI_PARAMS;
// bool DoesCpuSupport(Xbyak::util::Cpu::Type type) const;
JitStateInfo GetJitStateInfo() const { return jsi; }
private:
RunCodeCallbacks cb;
JitStateInfo jsi;
bool prelude_complete = false;
CodePtr near_code_begin;
CodePtr far_code_begin;
ConstantPool constant_pool;
bool in_far_code = false;
CodePtr near_code_ptr;
CodePtr far_code_ptr;
using RunCodeFuncType = void(*)(void*, CodePtr);
RunCodeFuncType run_code = nullptr;
RunCodeFuncType step_code = nullptr;
static constexpr size_t FPSCR_ALREADY_EXITED = 1 << 0;
static constexpr size_t FORCE_RETURN = 1 << 1;
std::array<const void*, 4> return_from_run_code;
void GenRunCode();
//Xbyak::util::Cpu cpu_info;
};
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,45 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include <unordered_set>
#include <boost/icl/interval_map.hpp>
#include <boost/icl/interval_set.hpp>
#include "backend/A64/block_range_information.h"
#include "common/common_types.h"
namespace Dynarmic::BackendA64 {
template <typename ProgramCounterType>
void BlockRangeInformation<ProgramCounterType>::AddRange(boost::icl::discrete_interval<ProgramCounterType> range, IR::LocationDescriptor location) {
block_ranges.add(std::make_pair(range, std::set<IR::LocationDescriptor>{location}));
}
template <typename ProgramCounterType>
void BlockRangeInformation<ProgramCounterType>::ClearCache() {
block_ranges.clear();
}
template <typename ProgramCounterType>
std::unordered_set<IR::LocationDescriptor> BlockRangeInformation<ProgramCounterType>::InvalidateRanges(const boost::icl::interval_set<ProgramCounterType>& ranges) {
std::unordered_set<IR::LocationDescriptor> erase_locations;
for (auto invalidate_interval : ranges) {
auto pair = block_ranges.equal_range(invalidate_interval);
for (auto it = pair.first; it != pair.second; ++it) {
for (const auto &descriptor : it->second) {
erase_locations.insert(descriptor);
}
}
}
// TODO: EFFICIENCY: Remove ranges that are to be erased.
return erase_locations;
}
template class BlockRangeInformation<u32>;
template class BlockRangeInformation<u64>;
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,29 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <unordered_set>
#include <boost/icl/interval_map.hpp>
#include <boost/icl/interval_set.hpp>
#include "frontend/ir/location_descriptor.h"
namespace Dynarmic::BackendA64 {
template <typename ProgramCounterType>
class BlockRangeInformation {
public:
void AddRange(boost::icl::discrete_interval<ProgramCounterType> range, IR::LocationDescriptor location);
void ClearCache();
std::unordered_set<IR::LocationDescriptor> InvalidateRanges(const boost::icl::interval_set<ProgramCounterType>& ranges);
private:
boost::icl::interval_map<ProgramCounterType, std::set<IR::LocationDescriptor>> block_ranges;
};
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,41 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include "backend/A64/callback.h"
#include "backend/A64/block_of_code.h"
namespace Dynarmic::BackendA64 {
Callback::~Callback() = default;
void SimpleCallback::EmitCall(BlockOfCode& code, std::function<void(RegList)> l) const {
l({code.ABI_PARAM1, code.ABI_PARAM2, code.ABI_PARAM3, code.ABI_PARAM4});
code.QuickCallFunction(fn);
}
void SimpleCallback::EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Arm64Gen::ARM64Reg, RegList)> l) const {
l(code.ABI_PARAM1, {code.ABI_PARAM2, code.ABI_PARAM3, code.ABI_PARAM4});
code.QuickCallFunction(fn);
}
void ArgCallback::EmitCall(BlockOfCode& code, std::function<void(RegList)> l) const {
l({code.ABI_PARAM2, code.ABI_PARAM3, code.ABI_PARAM4});
code.MOVI2R(code.ABI_PARAM1, arg);
code.QuickCallFunction(fn);
}
void ArgCallback::EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Arm64Gen::ARM64Reg, RegList)> l) const {
#if defined(WIN32) && !defined(__MINGW64__)
l(code.ABI_PARAM2, {code.ABI_PARAM3, code.ABI_PARAM4});
code.MOVI2R(code.ABI_PARAM1, arg);
#else
l(code.ABI_PARAM1, {code.ABI_PARAM3, code.ABI_PARAM4});
code.MOVI2R(code.ABI_PARAM2, arg);
#endif
code.QuickCallFunction(fn);
}
} // namespace Dynarmic::BackendX64

View File

@ -0,0 +1,54 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <functional>
#include <vector>
#include "backend/A64/emitter/a64_emitter.h"
#include "common/common_types.h"
namespace Dynarmic::BackendA64 {
using RegList = std::vector<Arm64Gen::ARM64Reg>;
class BlockOfCode;
class Callback {
public:
virtual ~Callback();
virtual void EmitCall(BlockOfCode& code, std::function<void(RegList)> fn = [](RegList) {}) const = 0;
virtual void EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Arm64Gen::ARM64Reg, RegList)> fn) const = 0;
};
class SimpleCallback final : public Callback {
public:
template <typename Function>
SimpleCallback(Function fn) : fn(reinterpret_cast<void (*)()>(fn)) {}
void EmitCall(BlockOfCode& code, std::function<void(RegList)> fn = [](RegList) {}) const override;
void EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Arm64Gen::ARM64Reg, RegList)> fn) const override;
private:
void (*fn)();
};
class ArgCallback final : public Callback {
public:
template <typename Function>
ArgCallback(Function fn, u64 arg) : fn(reinterpret_cast<void (*)()>(fn)), arg(arg) {}
void EmitCall(BlockOfCode& code, std::function<void(RegList)> fn = [](RegList) {}) const override;
void EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Arm64Gen::ARM64Reg, RegList)> fn) const override;
private:
void (*fn)();
u64 arg;
};
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,65 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include <cstring>
#include "backend/A64/block_of_code.h"
#include "backend/A64/constant_pool.h"
#include "common/assert.h"
namespace Dynarmic::BackendA64 {
ConstantPool::ConstantPool(BlockOfCode& code) : code(code) {}
void ConstantPool::EmitPatchLDR(Arm64Gen::ARM64Reg Rt, u64 lower, u64 upper) {
const auto constant = std::make_tuple(lower, upper);
auto iter = constant_info.find(constant);
if (iter == constant_info.end()) {
struct PatchInfo p = { code.GetCodePtr(), Rt, constant };
patch_info.emplace_back(p);
code.BRK(0);
return;
}
const s32 offset = reinterpret_cast<size_t>(iter->second) - reinterpret_cast<size_t>(code.GetCodePtr());
if (!(offset >= -0x40000 && offset <= 0x3FFFF)) {
constant_info.erase(constant);
struct PatchInfo p = { code.GetCodePtr(), Rt, constant };
patch_info.emplace_back(p);
code.BRK(0x42);
return;
}
DEBUG_ASSERT((offset & 3) == 0);
code.LDR(Rt, offset / 4);
}
void ConstantPool::PatchPool() {
u8* pool_ptr = code.GetWritableCodePtr();
for (PatchInfo patch : patch_info) {
auto iter = constant_info.find(patch.constant);
if (iter == constant_info.end()) {
std::memcpy(pool_ptr, &std::get<0>(patch.constant), sizeof(u64));
std::memcpy(pool_ptr + sizeof(u64), &std::get<1>(patch.constant), sizeof(u64));
iter = constant_info.emplace(patch.constant, pool_ptr).first;
pool_ptr += align_size;
}
code.SetCodePtr(patch.ptr);
const s32 offset = reinterpret_cast<size_t>(iter->second) - reinterpret_cast<size_t>(code.GetCodePtr());
DEBUG_ASSERT((offset & 3) == 0);
code.LDR(patch.Rt, offset / 4);
}
patch_info.clear();
code.SetCodePtr(pool_ptr);
}
void ConstantPool::Clear() {
constant_info.clear();
patch_info.clear();
}
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,47 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <map>
#include "common/common_types.h"
namespace Dynarmic::BackendA64 {
class BlockOfCode;
/// ConstantPool allocates a block of memory from BlockOfCode.
/// It places constants into this block of memory, returning the address
/// of the memory location where the constant is placed. If the constant
/// already exists, its memory location is reused.
class ConstantPool final {
public:
ConstantPool(BlockOfCode& code);
void EmitPatchLDR(Arm64Gen::ARM64Reg Rt, u64 lower, u64 upper = 0);
void PatchPool();
void Clear();
private:
static constexpr size_t align_size = 16; // bytes
std::map<std::tuple<u64, u64>, void*> constant_info;
BlockOfCode& code;
struct PatchInfo {
const void* ptr;
Arm64Gen::ARM64Reg Rt;
std::tuple<u64, u64> constant;
};
std::vector<PatchInfo> patch_info;
};
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,77 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <cstring>
#include <memory>
#include <mp/traits/function_info.h>
#include "backend/A64/callback.h"
#include "common/assert.h"
#include "common/cast_util.h"
#include "common/common_types.h"
namespace Dynarmic::BackendA64 {
namespace impl {
template <typename FunctionType, FunctionType mfp>
struct ThunkBuilder;
template <typename C, typename R, typename... Args, R(C::*mfp)(Args...)>
struct ThunkBuilder<R(C::*)(Args...), mfp> {
static R Thunk(C* this_, Args... args) {
return (this_->*mfp)(std::forward<Args>(args)...);
}
};
} // namespace impl
template<auto mfp>
ArgCallback DevirtualizeGeneric(mp::class_type<decltype(mfp)>* this_) {
return ArgCallback{&impl::ThunkBuilder<decltype(mfp), mfp>::Thunk, reinterpret_cast<u64>(this_)};
}
template<auto mfp>
ArgCallback DevirtualizeWindows(mp::class_type<decltype(mfp)>* this_) {
static_assert(sizeof(mfp) == 8);
return ArgCallback{Common::BitCast<u64>(mfp), reinterpret_cast<u64>(this_)};
}
template<auto mfp>
ArgCallback DevirtualizeAarch64(mp::class_type<decltype(mfp)>* this_) {
struct MemberFunctionPointer {
/// For a non-virtual function, this is a simple function pointer.
/// For a virtual function, it is virtual table offset in bytes.
u64 ptr;
/// Twice the required adjustment to `this`, plus 1 if the member function is virtual.
u64 adj;
} mfp_struct = Common::BitCast<MemberFunctionPointer>(mfp);
static_assert(sizeof(MemberFunctionPointer) == 16);
static_assert(sizeof(MemberFunctionPointer) == sizeof(mfp));
u64 fn_ptr = mfp_struct.ptr;
u64 this_ptr = reinterpret_cast<u64>(this_) + mfp_struct.adj / 2;
if (mfp_struct.adj & 1) {
u64 vtable = Common::BitCastPointee<u64>(this_ptr);
fn_ptr = Common::BitCastPointee<u64>(vtable + fn_ptr);
}
return ArgCallback{fn_ptr, this_ptr};
}
template<auto mfp>
ArgCallback Devirtualize(mp::class_type<decltype(mfp)>* this_) {
#if defined(linux) || defined(__linux) || defined(__linux__)
return DevirtualizeAarch64<mfp>(this_);
#else
return DevirtualizeGeneric<mfp>(this_);
#endif
}
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,300 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include <unordered_map>
#include <unordered_set>
#include "backend/A64/block_of_code.h"
#include "backend/A64/emit_a64.h"
#include "backend/A64/hostloc.h"
#include "backend/A64/perf_map.h"
#include "common/assert.h"
#include "common/bit_util.h"
#include "common/common_types.h"
#include "common/scope_exit.h"
#include "common/variant_util.h"
#include "frontend/ir/basic_block.h"
#include "frontend/ir/microinstruction.h"
#include "frontend/ir/opcodes.h"
// TODO: Have ARM flags in host flags and not have them use up GPR registers unless necessary.
// TODO: Actually implement that proper instruction selector you've always wanted to sweetheart.
namespace Dynarmic::BackendA64 {
EmitContext::EmitContext(RegAlloc& reg_alloc, IR::Block& block)
: reg_alloc(reg_alloc), block(block) {}
void EmitContext::EraseInstruction(IR::Inst* inst) {
block.Instructions().erase(inst);
inst->ClearArgs();
}
EmitA64::EmitA64(BlockOfCode& code)
: code(code) {}
EmitA64::~EmitA64() = default;
std::optional<typename EmitA64::BlockDescriptor> EmitA64::GetBasicBlock(IR::LocationDescriptor descriptor) const {
auto iter = block_descriptors.find(descriptor);
if (iter == block_descriptors.end())
return std::nullopt;
return iter->second;
}
void EmitA64::EmitVoid(EmitContext&, IR::Inst*) {
}
void EmitA64::EmitBreakpoint(EmitContext&, IR::Inst*) {
code.BRK(0);
}
void EmitA64::EmitIdentity(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (!args[0].IsImmediate()) {
ctx.reg_alloc.DefineValue(inst, args[0]);
}
}
void EmitA64::PushRSBHelper(ARM64Reg loc_desc_reg, ARM64Reg index_reg, IR::LocationDescriptor target) {
auto iter = block_descriptors.find(target);
CodePtr target_code_ptr = iter != block_descriptors.end()
? iter->second.entrypoint
: code.GetReturnFromRunCodeAddress();
code.LDR(INDEX_UNSIGNED, DecodeReg(index_reg), X28, code.GetJitStateInfo().offsetof_rsb_ptr);
code.MOVI2R(loc_desc_reg, target.Value());
patch_information[target].mov_x0.emplace_back(code.GetCodePtr());
EmitPatchMovX0(target_code_ptr);
code.ADD(code.ABI_SCRATCH1, X28, DecodeReg(index_reg), ArithOption{index_reg, ST_LSL, 3});
code.STR(INDEX_UNSIGNED, loc_desc_reg, code.ABI_SCRATCH1, code.GetJitStateInfo().offsetof_rsb_location_descriptors);
code.STR(INDEX_UNSIGNED, X0, code.ABI_SCRATCH1, code.GetJitStateInfo().offsetof_rsb_codeptrs);
code.ADDI2R(DecodeReg(index_reg), DecodeReg(index_reg), 1);
code.ANDI2R(DecodeReg(index_reg), DecodeReg(index_reg), code.GetJitStateInfo().rsb_ptr_mask, code.ABI_SCRATCH1);
code.STR(INDEX_UNSIGNED, DecodeReg(index_reg), X28, code.GetJitStateInfo().offsetof_rsb_ptr);
}
void EmitA64::EmitPushRSB(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ASSERT(args[0].IsImmediate());
u64 unique_hash_of_target = args[0].GetImmediateU64();
ctx.reg_alloc.ScratchGpr({HostLoc::X0});
Arm64Gen::ARM64Reg loc_desc_reg = ctx.reg_alloc.ScratchGpr();
Arm64Gen::ARM64Reg index_reg = ctx.reg_alloc.ScratchGpr();
PushRSBHelper(loc_desc_reg, index_reg, IR::LocationDescriptor{unique_hash_of_target});
}
void EmitA64::EmitGetCarryFromOp(EmitContext&, IR::Inst*) {
ASSERT_MSG(false, "should never happen");
}
void EmitA64::EmitGetOverflowFromOp(EmitContext&, IR::Inst*) {
ASSERT_MSG(false, "should never happen");
}
void EmitA64::EmitGetGEFromOp(EmitContext&, IR::Inst*) {
ASSERT_MSG(false, "should never happen");
}
void EmitA64::EmitGetUpperFromOp(EmitContext&, IR::Inst*) {
ASSERT_MSG(false, "should never happen");
}
void EmitA64::EmitGetLowerFromOp(EmitContext&, IR::Inst*) {
ASSERT_MSG(false, "should never happen");
}
void EmitA64::EmitGetNZCVFromOp(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Arm64Gen::ARM64Reg nzcv = ctx.reg_alloc.ScratchGpr();
Arm64Gen::ARM64Reg value = ctx.reg_alloc.UseGpr(args[0]);
code.CMP(value, ZR);
code.MRS(nzcv, FIELD_NZCV);
ctx.reg_alloc.DefineValue(inst, nzcv);
}
void EmitA64::EmitNZCVFromPackedFlags(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (args[0].IsImmediate()) {
Arm64Gen::ARM64Reg nzcv = DecodeReg(ctx.reg_alloc.ScratchGpr());
u32 value = 0;
value |= Common::Bit<31>(args[0].GetImmediateU32()) ? (1 << 15) : 0;
value |= Common::Bit<30>(args[0].GetImmediateU32()) ? (1 << 14) : 0;
value |= Common::Bit<29>(args[0].GetImmediateU32()) ? (1 << 8) : 0;
value |= Common::Bit<28>(args[0].GetImmediateU32()) ? (1 << 0) : 0;
code.MOVI2R(nzcv, value);
ctx.reg_alloc.DefineValue(inst, nzcv);
} else {
Arm64Gen::ARM64Reg nzcv = DecodeReg(ctx.reg_alloc.UseScratchGpr(args[0]));
Arm64Gen::ARM64Reg scratch = DecodeReg(ctx.reg_alloc.ScratchGpr());
// TODO: Optimize
code.LSR(nzcv, nzcv, 28);
code.MOVI2R(scratch, 0b00010000'10000001);
code.MUL(nzcv, nzcv, scratch);
code.ANDI2R(nzcv, nzcv, 1, scratch);
ctx.reg_alloc.DefineValue(inst, nzcv);
}
}
void EmitA64::EmitAddCycles(size_t cycles) {
ASSERT(cycles < std::numeric_limits<u32>::max());
code.SUBI2R(X26, X26, static_cast<u32>(cycles));
}
FixupBranch EmitA64::EmitCond(IR::Cond cond) {
FixupBranch label;
const Arm64Gen::ARM64Reg cpsr = code.ABI_SCRATCH1;
code.LDR(INDEX_UNSIGNED, DecodeReg(cpsr), X28, code.GetJitStateInfo().offsetof_cpsr_nzcv);
code._MSR(FIELD_NZCV, cpsr);
switch (cond) {
case IR::Cond::EQ: //z
label = code.B(CC_EQ);
break;
case IR::Cond::NE: //!z
label = code.B(CC_NEQ);
break;
case IR::Cond::CS: //c
label = code.B(CC_CS);
break;
case IR::Cond::CC: //!c
label = code.B(CC_CC);
break;
case IR::Cond::MI: //n
label = code.B(CC_MI);
break;
case IR::Cond::PL: //!n
label = code.B(CC_PL);
break;
case IR::Cond::VS: //v
label = code.B(CC_VS);
break;
case IR::Cond::VC: //!v
label = code.B(CC_VC);
break;
case IR::Cond::HI: //c & !z
label = code.B(CC_HI);
break;
case IR::Cond::LS: //!c | z
label = code.B(CC_LS);
break;
case IR::Cond::GE: // n == v
label = code.B(CC_GE);
break;
case IR::Cond::LT: // n != v
label = code.B(CC_LT);
break;
case IR::Cond::GT: // !z & (n == v)
label = code.B(CC_GT);
break;
case IR::Cond::LE: // z | (n != v)
label = code.B(CC_LE);
break;
default:
ASSERT_MSG(false, "Unknown cond {}", static_cast<size_t>(cond));
break;
}
return label;
}
void EmitA64::EmitCondPrelude(const IR::Block& block) {
if (block.GetCondition() == IR::Cond::AL) {
ASSERT(!block.HasConditionFailedLocation());
return;
}
ASSERT(block.HasConditionFailedLocation());
FixupBranch pass = EmitCond(block.GetCondition());
EmitAddCycles(block.ConditionFailedCycleCount());
EmitTerminal(IR::Term::LinkBlock{block.ConditionFailedLocation()}, block.Location());
code.SetJumpTarget(pass);
}
EmitA64::BlockDescriptor EmitA64::RegisterBlock(const IR::LocationDescriptor& descriptor, CodePtr entrypoint, size_t size) {
PerfMapRegister(entrypoint, code.GetCodePtr(), LocationDescriptorToFriendlyName(descriptor));
Patch(descriptor, entrypoint);
BlockDescriptor block_desc{entrypoint, size};
block_descriptors.emplace(descriptor.Value(), block_desc);
return block_desc;
}
void EmitA64::EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location) {
Common::VisitVariant<void>(terminal, [this, &initial_location](auto x) {
using T = std::decay_t<decltype(x)>;
if constexpr (!std::is_same_v<T, IR::Term::Invalid>) {
this->EmitTerminalImpl(x, initial_location);
} else {
ASSERT_MSG(false, "Invalid terminal");
}
});
}
void EmitA64::Patch(const IR::LocationDescriptor& desc, CodePtr bb) {
const CodePtr save_code_ptr = code.GetCodePtr();
const PatchInformation& patch_info = patch_information[desc];
for (CodePtr location : patch_info.jg) {
code.SetCodePtr(location);
EmitPatchJg(desc, bb);
code.FlushIcache();
}
for (CodePtr location : patch_info.jmp) {
code.SetCodePtr(location);
EmitPatchJmp(desc, bb);
code.FlushIcache();
}
for (CodePtr location : patch_info.mov_x0) {
code.SetCodePtr(location);
EmitPatchMovX0(bb);
code.FlushIcache();
}
code.SetCodePtr(save_code_ptr);
}
void EmitA64::Unpatch(const IR::LocationDescriptor& desc) {
Patch(desc, nullptr);
}
void EmitA64::ClearCache() {
block_descriptors.clear();
patch_information.clear();
PerfMapClear();
}
void EmitA64::InvalidateBasicBlocks(const std::unordered_set<IR::LocationDescriptor>& locations) {
code.EnableWriting();
SCOPE_EXIT { code.DisableWriting(); };
for (const auto &descriptor : locations) {
auto it = block_descriptors.find(descriptor);
if (it == block_descriptors.end()) {
continue;
}
if (patch_information.count(descriptor)) {
Unpatch(descriptor);
}
block_descriptors.erase(it);
}
}
} // namespace Dynarmic::BackendA64

125
src/backend/A64/emit_a64.h Normal file
View File

@ -0,0 +1,125 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <array>
#include <optional>
#include <string>
#include <type_traits>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "backend/A64/reg_alloc.h"
#include "backend/A64/emitter/a64_emitter.h"
#include "common/bit_util.h"
#include "common/fp/rounding_mode.h"
#include "frontend/ir/location_descriptor.h"
#include "frontend/ir/terminal.h"
namespace Dynarmic::IR {
class Block;
class Inst;
} // namespace Dynarmic::IR
namespace Dynarmic::BackendA64 {
class BlockOfCode;
using namespace Arm64Gen;
using A64FullVectorWidth = std::integral_constant<size_t, 128>;
// Array alias that always sizes itself according to the given type T
// relative to the size of a vector register. e.g. T = u32 would result
// in a std::array<u32, 4>.
template <typename T>
using VectorArray = std::array<T, A64FullVectorWidth::value / Common::BitSize<T>()>;
struct EmitContext {
EmitContext(RegAlloc& reg_alloc, IR::Block& block);
void EraseInstruction(IR::Inst* inst);
virtual FP::RoundingMode FPSCR_RMode() const = 0;
virtual u32 FPCR() const = 0;
virtual bool FPSCR_FTZ() const = 0;
virtual bool FPSCR_DN() const = 0;
virtual bool AccurateNaN() const { return true; }
RegAlloc& reg_alloc;
IR::Block& block;
};
class EmitA64 {
public:
struct BlockDescriptor {
CodePtr entrypoint; // Entrypoint of emitted code
size_t size; // Length in bytes of emitted code
};
EmitA64(BlockOfCode& code);
virtual ~EmitA64();
/// Looks up an emitted host block in the cache.
std::optional<BlockDescriptor> GetBasicBlock(IR::LocationDescriptor descriptor) const;
/// Empties the entire cache.
virtual void ClearCache();
/// Invalidates a selection of basic blocks.
void InvalidateBasicBlocks(const std::unordered_set<IR::LocationDescriptor>& locations);
protected:
// Microinstruction emitters
#define OPCODE(name, type, ...) void Emit##name(EmitContext& ctx, IR::Inst* inst);
#define A32OPC(...)
#define A64OPC(...)
#include "backend/A64/opcodes.inc"
#undef OPCODE
#undef A32OPC
#undef A64OPC
// Helpers
virtual std::string LocationDescriptorToFriendlyName(const IR::LocationDescriptor&) const = 0;
void EmitAddCycles(size_t cycles);
FixupBranch EmitCond(IR::Cond cond);
void EmitCondPrelude(const IR::Block& block);
BlockDescriptor RegisterBlock(const IR::LocationDescriptor& location_descriptor, CodePtr entrypoint, size_t size);
void PushRSBHelper(Arm64Gen::ARM64Reg loc_desc_reg, Arm64Gen::ARM64Reg index_reg, IR::LocationDescriptor target);
// Terminal instruction emitters
void EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location);
virtual void EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor initial_location) = 0;
virtual void EmitTerminalImpl(IR::Term::ReturnToDispatch terminal, IR::LocationDescriptor initial_location) = 0;
virtual void EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location) = 0;
virtual void EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location) = 0;
virtual void EmitTerminalImpl(IR::Term::PopRSBHint terminal, IR::LocationDescriptor initial_location) = 0;
virtual void EmitTerminalImpl(IR::Term::FastDispatchHint terminal, IR::LocationDescriptor initial_location) = 0;
virtual void EmitTerminalImpl(IR::Term::If terminal, IR::LocationDescriptor initial_location) = 0;
virtual void EmitTerminalImpl(IR::Term::CheckBit terminal, IR::LocationDescriptor initial_location) = 0;
virtual void EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location) = 0;
// Patching
struct PatchInformation {
std::vector<CodePtr> jg;
std::vector<CodePtr> jmp;
std::vector<CodePtr> mov_x0;
};
void Patch(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr);
virtual void Unpatch(const IR::LocationDescriptor& target_desc);
virtual void EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) = 0;
virtual void EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) = 0;
virtual void EmitPatchMovX0(CodePtr target_code_ptr = nullptr) = 0;
// State
BlockOfCode& code;
std::unordered_map<IR::LocationDescriptor, BlockDescriptor> block_descriptors;
std::unordered_map<IR::LocationDescriptor, PatchInformation> patch_information;
};
} // namespace Dynarmic::BackendX64

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,471 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include <optional>
#include <type_traits>
#include <utility>
#include "backend/A64/abi.h"
#include "backend/A64/block_of_code.h"
#include "backend/A64/emit_a64.h"
#include "common/assert.h"
#include "common/common_types.h"
#include "common/fp/fpcr.h"
#include "common/fp/fpsr.h"
#include "common/fp/info.h"
#include "common/fp/op.h"
#include "common/fp/rounding_mode.h"
#include "common/fp/util.h"
#include "frontend/ir/basic_block.h"
#include "frontend/ir/microinstruction.h"
#include "frontend/ir/opcodes.h"
namespace Dynarmic::BackendA64 {
namespace {
Arm64Gen::RoundingMode ConvertRoundingModeToA64RoundingMode(FP::RoundingMode rounding_mode) {
switch (rounding_mode) {
case FP::RoundingMode::ToNearest_TieEven:
return RoundingMode::ROUND_N;
case FP::RoundingMode::TowardsPlusInfinity:
return RoundingMode::ROUND_P;
case FP::RoundingMode::TowardsMinusInfinity:
return RoundingMode::ROUND_M;
case FP::RoundingMode::TowardsZero:
return RoundingMode::ROUND_Z;
case FP::RoundingMode::ToNearest_TieAwayFromZero:
return RoundingMode::ROUND_A;
default:
UNREACHABLE();
}
}
template <size_t fsize, typename Function>
void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ARM64Reg result = ctx.reg_alloc.UseScratchFpr(args[0]);
result = fsize == 32 ? EncodeRegToSingle(result) : EncodeRegToDouble(result);
if constexpr (std::is_member_function_pointer_v<Function>) {
(code.fp_emitter.*fn)(result, result);
} else {
fn(result);
}
ctx.reg_alloc.DefineValue(inst, result);
}
template <size_t fsize, typename Function>
void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ARM64Reg result = ctx.reg_alloc.UseScratchFpr(args[0]);
ARM64Reg operand = ctx.reg_alloc.UseScratchFpr(args[1]);
result = fsize == 32 ? EncodeRegToSingle(result) : EncodeRegToDouble(result);
operand = fsize == 32 ? EncodeRegToSingle(operand) : EncodeRegToDouble(operand);
if constexpr (std::is_member_function_pointer_v<Function>) {
(code.fp_emitter.*fn)(result, result, operand);
}
else {
fn(result, result, operand);
}
ctx.reg_alloc.DefineValue(inst, result);
}
} // anonymous namespace
//void EmitA64::EmitFPAbs16(EmitContext& ctx, IR::Inst* inst) {
// auto args = ctx.reg_alloc.GetArgumentInfo(inst);
// const ARM64Reg result = ctx.reg_alloc.UseScratchXmm(args[0]);
//
// code.pand(result, code.MConst(xword, f16_non_sign_mask));
//
// ctx.reg_alloc.DefineValue(inst, result);
//}
void EmitA64::EmitFPAbs32(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.UseScratchFpr(args[0]));
code.fp_emitter.FABS(result, result);
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitA64::EmitFPAbs64(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
code.fp_emitter.FABS(result, result);
ctx.reg_alloc.DefineValue(inst, result);
}
//void EmitA64::EmitFPNeg16(EmitContext& ctx, IR::Inst* inst) {
// auto args = ctx.reg_alloc.GetArgumentInfo(inst);
// const ARM64Reg result = ctx.reg_alloc.UseScratchXmm(args[0]);
//
// code.pxor(result, code.MConst(xword, f16_negative_zero));
//
// ctx.reg_alloc.DefineValue(inst, result);
//}
void EmitA64::EmitFPNeg32(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.UseScratchFpr(args[0]));
code.fp_emitter.FNEG(result, result);
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitA64::EmitFPNeg64(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
code.fp_emitter.FNEG(result, result);
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitA64::EmitFPAdd32(EmitContext& ctx, IR::Inst* inst) {
FPThreeOp<32, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FADD);
}
void EmitA64::EmitFPAdd64(EmitContext& ctx, IR::Inst* inst) {
FPThreeOp<64, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FADD);
}
void EmitA64::EmitFPDiv32(EmitContext& ctx, IR::Inst* inst) {
FPThreeOp<32, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FDIV);
}
void EmitA64::EmitFPDiv64(EmitContext& ctx, IR::Inst* inst) {
FPThreeOp<64, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FDIV);
}
void EmitA64::EmitFPMul32(EmitContext& ctx, IR::Inst* inst) {
FPThreeOp<32, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FMUL);
}
void EmitA64::EmitFPMul64(EmitContext& ctx, IR::Inst* inst) {
FPThreeOp<64, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FMUL);
}
void EmitA64::EmitFPSqrt32(EmitContext& ctx, IR::Inst* inst) {
FPTwoOp<32>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FSQRT);
}
void EmitA64::EmitFPSqrt64(EmitContext& ctx, IR::Inst* inst) {
FPTwoOp<64>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FSQRT);
}
void EmitA64::EmitFPSub32(EmitContext& ctx, IR::Inst* inst) {
FPThreeOp<32, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FSUB);
}
void EmitA64::EmitFPSub64(EmitContext& ctx, IR::Inst* inst) {
FPThreeOp<64, void(Arm64Gen::ARM64FloatEmitter::*)(ARM64Reg, ARM64Reg, ARM64Reg)>(code, ctx, inst, &Arm64Gen::ARM64FloatEmitter::FSUB);
}
static ARM64Reg SetFpscrNzcvFromFlags(BlockOfCode& code, EmitContext& ctx) {
ARM64Reg nzcv = ctx.reg_alloc.ScratchGpr();
// Fpsr's nzcv is copied across integer nzcv
code.MRS(nzcv, FIELD_NZCV);
return nzcv;
}
void EmitA64::EmitFPCompare32(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ARM64Reg reg_a = EncodeRegToSingle(ctx.reg_alloc.UseFpr(args[0]));
ARM64Reg reg_b = EncodeRegToSingle(ctx.reg_alloc.UseFpr(args[1]));
bool exc_on_qnan = args[2].GetImmediateU1();
if (exc_on_qnan) {
code.fp_emitter.FCMPE(reg_a, reg_b);
} else {
code.fp_emitter.FCMP(reg_a, reg_b);
}
ARM64Reg nzcv = SetFpscrNzcvFromFlags(code, ctx);
ctx.reg_alloc.DefineValue(inst, nzcv);
}
void EmitA64::EmitFPCompare64(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg reg_a = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[0]));
const ARM64Reg reg_b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
bool exc_on_qnan = args[2].GetImmediateU1();
if (exc_on_qnan) {
code.fp_emitter.FCMPE(reg_a, reg_b);
} else {
code.fp_emitter.FCMP(reg_a, reg_b);
}
ARM64Reg nzcv = SetFpscrNzcvFromFlags(code, ctx);
ctx.reg_alloc.DefineValue(inst, nzcv);
}
void EmitA64::EmitFPHalfToDouble(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.UseScratchFpr(args[0]));
code.fp_emitter.FCVT(64, 16, result, result);
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitA64::EmitFPHalfToSingle(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.UseScratchFpr(args[0]));
code.fp_emitter.FCVT(32, 16, result, result);
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitA64::EmitFPSingleToDouble(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.UseScratchFpr(args[0]));
code.fp_emitter.FCVT(64, 32, result, result);
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitA64::EmitFPSingleToHalf(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.UseScratchFpr(args[0]));
code.fp_emitter.FCVT(16, 32, result, result);
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitA64::EmitFPDoubleToHalf(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
code.fp_emitter.FCVT(16, 64, result, result);
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitA64::EmitFPDoubleToSingle(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
code.fp_emitter.FCVT(32, 64, result, result);
ctx.reg_alloc.DefineValue(inst, result);
}
template<size_t fsize, bool unsigned_, size_t isize>
static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const size_t fbits = args[1].GetImmediateU8();
const auto rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
const auto round_imm = ConvertRoundingModeToA64RoundingMode(rounding_mode);
ASSERT_MSG(fbits == 0, "fixed point conversions are not supported yet");
ARM64Reg src = ctx.reg_alloc.UseScratchFpr(args[0]);
ARM64Reg result = ctx.reg_alloc.ScratchGpr();
src = fsize == 64 ? EncodeRegToDouble(src) : EncodeRegToSingle(src);
result = isize == 64 ? result : DecodeReg(result);
if constexpr (unsigned_) {
code.fp_emitter.FCVTU(result, src, round_imm);
}
else {
code.fp_emitter.FCVTS(result, src, round_imm);
}
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitA64::EmitFPDoubleToFixedS32(EmitContext& ctx, IR::Inst* inst) {
EmitFPToFixed<64, false, 32>(code, ctx, inst);
}
void EmitA64::EmitFPDoubleToFixedS64(EmitContext& ctx, IR::Inst* inst) {
EmitFPToFixed<64, false, 64>(code, ctx, inst);
}
void EmitA64::EmitFPDoubleToFixedU32(EmitContext& ctx, IR::Inst* inst) {
EmitFPToFixed<64, true, 32>(code, ctx, inst);
}
void EmitA64::EmitFPDoubleToFixedU64(EmitContext& ctx, IR::Inst* inst) {
EmitFPToFixed<64, true, 64>(code, ctx, inst);
}
void EmitA64::EmitFPSingleToFixedS32(EmitContext& ctx, IR::Inst* inst) {
EmitFPToFixed<32, false, 32>(code, ctx, inst);
}
void EmitA64::EmitFPSingleToFixedS64(EmitContext& ctx, IR::Inst* inst) {
EmitFPToFixed<32, false, 64>(code, ctx, inst);
}
void EmitA64::EmitFPSingleToFixedU32(EmitContext& ctx, IR::Inst* inst) {
EmitFPToFixed<32, true, 32>(code, ctx, inst);
}
void EmitA64::EmitFPSingleToFixedU64(EmitContext& ctx, IR::Inst* inst) {
EmitFPToFixed<32, true, 64>(code, ctx, inst);
}
void EmitA64::EmitFPFixedS32ToSingle(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg from = DecodeReg(ctx.reg_alloc.UseGpr(args[0]));
const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.ScratchFpr());
const size_t fbits = args[1].GetImmediateU8();
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
ASSERT(rounding_mode == ctx.FPSCR_RMode());
if (fbits != 0) {
code.fp_emitter.SCVTF(result, from, fbits);
}
else {
code.fp_emitter.SCVTF(result, from);
}
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitA64::EmitFPFixedU32ToSingle(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg from = DecodeReg(ctx.reg_alloc.UseGpr(args[0]));
const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.ScratchFpr());
const size_t fbits = args[1].GetImmediateU8();
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
ASSERT(rounding_mode == ctx.FPSCR_RMode());
if (fbits != 0) {
code.fp_emitter.UCVTF(result, from, fbits);
}
else {
code.fp_emitter.UCVTF(result, from);
}
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitA64::EmitFPFixedS32ToDouble(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg from = DecodeReg(ctx.reg_alloc.UseGpr(args[0]));
const ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
const size_t fbits = args[1].GetImmediateU8();
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
ASSERT(rounding_mode == ctx.FPSCR_RMode());
if (fbits != 0) {
code.fp_emitter.SCVTF(result, from, fbits);
}
else {
code.fp_emitter.SCVTF(result, from);
}
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitA64::EmitFPFixedS64ToDouble(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg from = ctx.reg_alloc.UseGpr(args[0]);
const ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
const size_t fbits = args[1].GetImmediateU8();
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
ASSERT(rounding_mode == ctx.FPSCR_RMode());
if (fbits != 0) {
code.fp_emitter.SCVTF(result, from, fbits);
}
else {
code.fp_emitter.SCVTF(result, from);
}
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitA64::EmitFPFixedS64ToSingle(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg from = ctx.reg_alloc.UseGpr(args[0]);
const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.ScratchFpr());
const size_t fbits = args[1].GetImmediateU8();
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
ASSERT(rounding_mode == ctx.FPSCR_RMode());
if (fbits != 0) {
code.fp_emitter.SCVTF(result, from, fbits);
}
else {
code.fp_emitter.SCVTF(result, from);
}
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitA64::EmitFPFixedU32ToDouble(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg from = DecodeReg(ctx.reg_alloc.UseGpr(args[0]));
const ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
const size_t fbits = args[1].GetImmediateU8();
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
ASSERT(rounding_mode == ctx.FPSCR_RMode());
if (fbits != 0) {
code.fp_emitter.UCVTF(result, from, fbits);
}
else {
code.fp_emitter.UCVTF(result, from);
}
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitA64::EmitFPFixedU64ToDouble(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg from = ctx.reg_alloc.UseGpr(args[0]);
const ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
const size_t fbits = args[1].GetImmediateU8();
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
ASSERT(rounding_mode == ctx.FPSCR_RMode());
if (fbits != 0) {
code.fp_emitter.UCVTF(result, from, fbits);
}
else {
code.fp_emitter.UCVTF(result, from);
}
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitA64::EmitFPFixedU64ToSingle(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg from = ctx.reg_alloc.UseGpr(args[0]);
const ARM64Reg result = EncodeRegToSingle(ctx.reg_alloc.ScratchFpr());
const size_t fbits = args[1].GetImmediateU8();
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
ASSERT(rounding_mode == ctx.FPSCR_RMode());
if (fbits != 0) {
code.fp_emitter.UCVTF(result, from, fbits);
}
else {
code.fp_emitter.UCVTF(result, from);
}
ctx.reg_alloc.DefineValue(inst, result);
}
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,469 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include "backend/A64/block_of_code.h"
#include "backend/A64/emit_a64.h"
#include "frontend/ir/microinstruction.h"
#include "frontend/ir/opcodes.h"
namespace Dynarmic::BackendA64 {
void EmitA64::EmitPackedAddU8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
const ARM64Reg sum = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.ADD(B, sum, sum, b);
if (ge_inst) {
const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
code.fp_emitter.CMHI(B, ge, b, sum);
ctx.reg_alloc.DefineValue(ge_inst, ge);
ctx.EraseInstruction(ge_inst);
}
ctx.reg_alloc.DefineValue(inst, sum);
}
void EmitA64::EmitPackedAddS8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
if (ge_inst) {
const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
code.fp_emitter.SQADD(B, ge, a, b);
code.fp_emitter.CMGE_zero(B, ge, ge);
ctx.reg_alloc.DefineValue(ge_inst, ge);
ctx.EraseInstruction(ge_inst);
}
code.fp_emitter.ADD(B, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
const ARM64Reg sum = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.ADD(H, sum, sum, b);
if (ge_inst) {
const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
code.fp_emitter.CMHI(H, ge, b, sum);
ctx.reg_alloc.DefineValue(ge_inst, ge);
ctx.EraseInstruction(ge_inst);
}
ctx.reg_alloc.DefineValue(inst, sum);
}
void EmitA64::EmitPackedAddS16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
if (ge_inst) {
const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
code.fp_emitter.SQADD(H, ge, a, b);
code.fp_emitter.CMGE_zero(H, ge, ge);
ctx.reg_alloc.DefineValue(ge_inst, ge);
ctx.EraseInstruction(ge_inst);
}
code.fp_emitter.ADD(H, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedSubU8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
if (ge_inst) {
const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
code.fp_emitter.CMHS(B, ge, a, b);
ctx.reg_alloc.DefineValue(ge_inst, ge);
ctx.EraseInstruction(ge_inst);
}
code.fp_emitter.SUB(B, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedSubS8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
if (ge_inst) {
const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
code.fp_emitter.SQSUB(B, ge, a, b);
code.fp_emitter.CMGE_zero(B, ge, ge);
ctx.reg_alloc.DefineValue(ge_inst, ge);
ctx.EraseInstruction(ge_inst);
}
code.fp_emitter.SUB(B, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
if (ge_inst) {
const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
code.fp_emitter.CMHS(H, ge, a, b);
ctx.reg_alloc.DefineValue(ge_inst, ge);
ctx.EraseInstruction(ge_inst);
}
code.fp_emitter.SUB(H, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedSubS16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
if (ge_inst) {
const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.ScratchFpr());
code.fp_emitter.SQSUB(H, ge, a, b);
code.fp_emitter.CMGE_zero(H, ge, ge);
ctx.reg_alloc.DefineValue(ge_inst, ge);
ctx.EraseInstruction(ge_inst);
}
code.fp_emitter.SUB(H, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedHalvingAddU8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.UHADD(B, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedHalvingAddU16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.UHADD(H, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedHalvingAddS8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.SHADD(B, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedHalvingAddS16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.SHADD(H, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedHalvingSubU8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.UHSUB(B, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedHalvingSubS8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.SHSUB(B, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedHalvingSubU16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.UHSUB(H, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedHalvingSubS16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.SHSUB(H, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitPackedSubAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, bool hi_is_sum, bool is_signed, bool is_halving) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
const ARM64Reg reg_a_hi = DecodeReg(ctx.reg_alloc.UseScratchGpr(args[0]));
const ARM64Reg reg_b_hi = DecodeReg(ctx.reg_alloc.UseScratchGpr(args[1]));
const ARM64Reg reg_a_lo = DecodeReg(ctx.reg_alloc.ScratchGpr());
const ARM64Reg reg_b_lo = DecodeReg(ctx.reg_alloc.ScratchGpr());
ARM64Reg reg_sum, reg_diff;
if (is_signed) {
code.SXTH(reg_a_lo, reg_a_hi);
code.SXTH(reg_b_lo, reg_b_hi);
code.ASR(reg_a_hi, reg_a_hi, 16);
code.ASR(reg_b_hi, reg_b_hi, 16);
} else {
code.UXTH(reg_a_lo, reg_a_hi);
code.UXTH(reg_b_lo, reg_b_hi);
code.LSR(reg_a_hi, reg_a_hi, 16);
code.LSR(reg_b_hi, reg_b_hi, 16);
}
if (hi_is_sum) {
code.SUB(reg_a_lo, reg_a_lo, reg_b_hi);
code.ADD(reg_a_hi, reg_a_hi, reg_b_lo);
reg_diff = reg_a_lo;
reg_sum = reg_a_hi;
} else {
code.ADD(reg_a_lo, reg_a_lo, reg_b_hi);
code.SUB(reg_a_hi, reg_a_hi, reg_b_lo);
reg_diff = reg_a_hi;
reg_sum = reg_a_lo;
}
if (ge_inst) {
// The reg_b registers are no longer required.
const ARM64Reg ge_sum = reg_b_hi;
const ARM64Reg ge_diff = reg_b_lo;
if (!is_signed) {
code.LSL(ge_sum, reg_sum, 15);
code.ASR(ge_sum, ge_sum, 31);
} else {
code.MVN(ge_sum, reg_sum);
code.ASR(ge_sum, ge_sum, 31);
}
code.MVN(ge_diff, reg_diff);
code.ASR(ge_diff, ge_diff, 31);
code.ANDI2R(ge_sum, ge_sum, hi_is_sum ? 0xFFFF0000 : 0x0000FFFF);
code.ANDI2R(ge_diff, ge_diff, hi_is_sum ? 0x0000FFFF : 0xFFFF0000);
code.ORR(ge_sum, ge_sum, ge_diff);
ctx.reg_alloc.DefineValue(ge_inst, ge_sum);
ctx.EraseInstruction(ge_inst);
}
if (is_halving) {
code.LSR(reg_a_hi, reg_a_hi, 1);
code.LSR(reg_a_lo, reg_a_lo, 1);
}
// reg_a_lo now contains the low word and reg_a_hi now contains the high word.
// Merge them.
code.BFM(reg_a_lo, reg_a_hi, 16, 15);
ctx.reg_alloc.DefineValue(inst, reg_a_lo);
}
void EmitA64::EmitPackedAddSubU16(EmitContext& ctx, IR::Inst* inst) {
EmitPackedSubAdd(code, ctx, inst, true, false, false);
}
void EmitA64::EmitPackedAddSubS16(EmitContext& ctx, IR::Inst* inst) {
EmitPackedSubAdd(code, ctx, inst, true, true, false);
}
void EmitA64::EmitPackedSubAddU16(EmitContext& ctx, IR::Inst* inst) {
EmitPackedSubAdd(code, ctx, inst, false, false, false);
}
void EmitA64::EmitPackedSubAddS16(EmitContext& ctx, IR::Inst* inst) {
EmitPackedSubAdd(code, ctx, inst, false, true, false);
}
void EmitA64::EmitPackedHalvingAddSubU16(EmitContext& ctx, IR::Inst* inst) {
EmitPackedSubAdd(code, ctx, inst, true, false, true);
}
void EmitA64::EmitPackedHalvingAddSubS16(EmitContext& ctx, IR::Inst* inst) {
EmitPackedSubAdd(code, ctx, inst, true, true, true);
}
void EmitA64::EmitPackedHalvingSubAddU16(EmitContext& ctx, IR::Inst* inst) {
EmitPackedSubAdd(code, ctx, inst, false, false, true);
}
void EmitA64::EmitPackedHalvingSubAddS16(EmitContext& ctx, IR::Inst* inst) {
EmitPackedSubAdd(code, ctx, inst, false, true, true);
}
void EmitA64::EmitPackedSaturatedAddU8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.UQADD(B, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedSaturatedAddS8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.SQADD(B, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedSaturatedSubU8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.UQSUB(B, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedSaturatedSubS8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.SQSUB(B, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedSaturatedAddU16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.UQADD(H, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedSaturatedAddS16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.SQADD(H, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedSaturatedSubU16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.UQSUB(H, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedSaturatedSubS16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.SQSUB(H, a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedAbsDiffSumS8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
code.fp_emitter.UABD(B, a, a, b);
code.fp_emitter.UADDLV(B, a, a);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitA64::EmitPackedSelect(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const ARM64Reg ge = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
const ARM64Reg a = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
const ARM64Reg b = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[2]));
code.fp_emitter.BSL(ge, b, a);
ctx.reg_alloc.DefineValue(inst, ge);
}
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,167 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include <limits>
#include "backend/A64/block_of_code.h"
#include "backend/A64/emit_a64.h"
#include "common/assert.h"
#include "common/bit_util.h"
#include "common/common_types.h"
#include "frontend/ir/basic_block.h"
#include "frontend/ir/microinstruction.h"
#include "frontend/ir/opcodes.h"
namespace Dynarmic::BackendA64 {
namespace {
enum class Op {
Add,
Sub,
};
template<Op op, size_t size>
void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ARM64Reg result = EncodeRegToDouble(ctx.reg_alloc.UseScratchFpr(args[0]));
ARM64Reg addend = EncodeRegToDouble(ctx.reg_alloc.UseFpr(args[1]));
if constexpr (op == Op::Add) {
code.fp_emitter.SQADD(size, result, result, addend);
}
else {
code.fp_emitter.SQSUB(size, result, result, addend);
}
if (overflow_inst) {
ARM64Reg overflow = ctx.reg_alloc.ScratchGpr();
code.MRS(overflow, FIELD_FPSR);
code.UBFX(overflow, overflow, 27, 1);
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
ctx.EraseInstruction(overflow_inst);
}
ctx.reg_alloc.DefineValue(inst, result);
}
} // anonymous namespace
void EmitA64::EmitSignedSaturatedAdd8(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Add, 8>(code, ctx, inst);
}
void EmitA64::EmitSignedSaturatedAdd16(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Add, 16>(code, ctx, inst);
}
void EmitA64::EmitSignedSaturatedAdd32(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Add, 32>(code, ctx, inst);
}
void EmitA64::EmitSignedSaturatedAdd64(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Add, 64>(code, ctx, inst);
}
void EmitA64::EmitSignedSaturatedSub8(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Sub, 8>(code, ctx, inst);
}
void EmitA64::EmitSignedSaturatedSub16(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Sub, 16>(code, ctx, inst);
}
void EmitA64::EmitSignedSaturatedSub32(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Sub, 32>(code, ctx, inst);
}
void EmitA64::EmitSignedSaturatedSub64(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Sub, 64>(code, ctx, inst);
}
void EmitA64::EmitSignedSaturation(EmitContext& ctx, IR::Inst* inst) {
const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const size_t N = args[1].GetImmediateU8();
ASSERT(N >= 1 && N <= 32);
if (N == 32) {
if (overflow_inst) {
const auto no_overflow = IR::Value(false);
overflow_inst->ReplaceUsesWith(no_overflow);
}
ctx.reg_alloc.DefineValue(inst, args[0]);
return;
}
const u32 mask = (1u << N) - 1;
const u32 positive_saturated_value = (1u << (N - 1)) - 1;
const u32 negative_saturated_value = 1u << (N - 1);
const u32 sext_negative_satured_value = Common::SignExtend(N, negative_saturated_value);
const ARM64Reg result = DecodeReg(ctx.reg_alloc.ScratchGpr());
const ARM64Reg reg_a = DecodeReg(ctx.reg_alloc.UseGpr(args[0]));
const ARM64Reg overflow = DecodeReg(ctx.reg_alloc.ScratchGpr());
const ARM64Reg tmp = DecodeReg(ctx.reg_alloc.ScratchGpr());
// overflow now contains a value between 0 and mask if it was originally between {negative,positive}_saturated_value.
code.ADDI2R(overflow, reg_a, negative_saturated_value, overflow);
// Put the appropriate saturated value in result
code.MOVI2R(tmp, positive_saturated_value);
code.CMP(reg_a, tmp);
code.MOVI2R(result, sext_negative_satured_value);
code.CSEL(result, tmp, result, CC_GT);
// Do the saturation
code.CMPI2R(overflow, mask, tmp);
code.CSEL(result, reg_a, result, CC_LS);
if (overflow_inst) {
code.CSET(overflow, CC_HI);
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
ctx.EraseInstruction(overflow_inst);
}
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitA64::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) {
const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const size_t N = args[1].GetImmediateU8();
ASSERT(N <= 31);
const u32 saturated_value = (1u << N) - 1;
const ARM64Reg result = DecodeReg(ctx.reg_alloc.ScratchGpr());
const ARM64Reg reg_a = DecodeReg(ctx.reg_alloc.UseGpr(args[0]));
const ARM64Reg overflow = DecodeReg(ctx.reg_alloc.ScratchGpr());
// Pseudocode: result = clamp(reg_a, 0, saturated_value);
code.MOVI2R(result, saturated_value);
code.CMP(reg_a, result);
code.CSEL(result, WZR, result, CC_LE);
code.CSEL(result, reg_a, result, CC_LS);
if (overflow_inst) {
code.CSET(overflow, CC_HI);
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
ctx.EraseInstruction(overflow_inst);
}
ctx.reg_alloc.DefineValue(inst, result);
}
} // namespace Dynarmic::BackendA64

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,28 @@
// Copyright 2014 Dolphin Emulator Project / 2018 dynarmic project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "common/common_types.h"
namespace Dynarmic::BackendA64 {
enum CCFlags {
CC_EQ = 0, // Equal
CC_NEQ, // Not equal
CC_CS, // Carry Set
CC_CC, // Carry Clear
CC_MI, // Minus (Negative)
CC_PL, // Plus
CC_VS, // Overflow
CC_VC, // No Overflow
CC_HI, // Unsigned higher
CC_LS, // Unsigned lower or same
CC_GE, // Signed greater than or equal
CC_LT, // Signed less than
CC_GT, // Signed greater than
CC_LE, // Signed less than or equal
CC_AL, // Always (unconditional) 14
CC_HS = CC_CS, // Alias of CC_CS Unsigned higher or same
CC_LO = CC_CC, // Alias of CC_CC Unsigned lower
};
const u32 NO_COND = 0xE0000000;
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,135 @@
// Copyright 2014 Dolphin Emulator Project / 2018 dynarmic project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include <cstddef>
#include <vector>
#ifdef _WIN32
#include <windows.h>
#else
#include <sys/mman.h>
#endif
#include "common/assert.h"
#include "common/common_types.h"
namespace Dynarmic::BackendA64 {
// Everything that needs to generate code should inherit from this.
// You get memory management for free, plus, you can use all emitter functions
// without having to prefix them with gen-> or something similar. Example
// implementation: class JIT : public CodeBlock<ARMXEmitter> {}
template <class T>
class CodeBlock : public T {
private:
// A privately used function to set the executable RAM space to something
// invalid. For debugging usefulness it should be used to set the RAM to a
// host specific breakpoint instruction
virtual void PoisonMemory() = 0;
protected:
u8* region = nullptr;
// Size of region we can use.
size_t region_size = 0;
// Original size of the region we allocated.
size_t total_region_size = 0;
bool m_is_child = false;
std::vector<CodeBlock*> m_children;
public:
CodeBlock() = default;
virtual ~CodeBlock() {
if (region)
FreeCodeSpace();
}
CodeBlock(const CodeBlock&) = delete;
CodeBlock& operator=(const CodeBlock&) = delete;
CodeBlock(CodeBlock&&) = delete;
CodeBlock& operator=(CodeBlock&&) = delete;
// Call this before you generate any code.
void AllocCodeSpace(size_t size) {
region_size = size;
total_region_size = size;
#if defined(_WIN32)
void* ptr = VirtualAlloc(nullptr, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
#else
void* ptr = mmap(nullptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | MAP_PRIVATE, -1, 0);
if (ptr == MAP_FAILED)
ptr = nullptr;
#endif
ASSERT_MSG(ptr != nullptr, "Failed to allocate executable memory");
region = static_cast<u8*>(ptr);
T::SetCodePtr(region);
}
// Always clear code space with breakpoints, so that if someone accidentally
// executes uninitialized, it just breaks into the debugger.
void ClearCodeSpace() {
PoisonMemory();
ResetCodePtr();
}
// Call this when shutting down. Don't rely on the destructor, even though
// it'll do the job.
void FreeCodeSpace() {
ASSERT(!m_is_child);
ASSERT(munmap(region, total_region_size) == 0);
region = nullptr;
region_size = 0;
total_region_size = 0;
for (CodeBlock* child : m_children) {
child->region = nullptr;
child->region_size = 0;
child->total_region_size = 0;
}
}
bool IsInSpace(const u8* ptr) const {
return ptr >= region && ptr < (region + region_size);
}
// Cannot currently be undone. Will write protect the entire code region.
// Start over if you need to change the code (call FreeCodeSpace(),
// AllocCodeSpace()).
void WriteProtect() {
ASSERT(mprotect(region, region_size, PROT_READ | PROT_EXEC) != 0);
}
void ResetCodePtr() {
T::SetCodePtr(region);
}
size_t GetSpaceLeft() const {
ASSERT(static_cast<size_t>(T::GetCodePtr() - region) < region_size);
return region_size - (T::GetCodePtr() - region);
}
bool IsAlmostFull() const {
// This should be bigger than the biggest block ever.
return GetSpaceLeft() < 0x10000;
}
bool HasChildren() const {
return region_size != total_region_size;
}
u8* AllocChildCodeSpace(size_t child_size) {
ASSERT_MSG(child_size < GetSpaceLeft(), "Insufficient space for child allocation.");
u8* child_region = region + region_size - child_size;
region_size -= child_size;
return child_region;
}
void AddChildCodeSpace(CodeBlock* child, size_t child_size) {
u8* child_region = AllocChildCodeSpace(child_size);
child->m_is_child = true;
child->region = child_region;
child->region_size = child_size;
child->total_region_size = child_size;
child->ResetCodePtr();
m_children.emplace_back(child);
}
};
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,39 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <array>
#include <memory>
#include <functional>
#include "backend/A64/a32_jitstate.h"
#include "common/common_types.h"
namespace Dynarmic::BackendA64 {
class BlockOfCode;
struct A64State {
std::array<u64, 32> X;
std::array<std::array<u64, 2>, 16> Q;
};
static_assert(sizeof(A64State) == sizeof(A64State::X) + sizeof(A64State::Q));
class ExceptionHandler final {
public:
ExceptionHandler();
~ExceptionHandler();
void Register(BlockOfCode& code, std::function<void(CodePtr)> segv_callback = nullptr);
bool SupportsFastmem() const;
private:
struct Impl;
std::unique_ptr<Impl> impl;
};
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,25 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include "backend/A64/exception_handler.h"
namespace Dynarmic::BackendA64 {
struct ExceptionHandler::Impl final {
};
ExceptionHandler::ExceptionHandler() = default;
ExceptionHandler::~ExceptionHandler() = default;
void ExceptionHandler::Register(BlockOfCode&, std::function<void(CodePtr)>) {
// Do nothing
}
bool ExceptionHandler::SupportsFastmem() const {
return false;
}
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,161 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2019 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
// Copyright 2008 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include <mutex>
#include <vector>
#include <csignal>
#include <ucontext.h>
#include "backend/A64/a32_jitstate.h"
#include "backend/A64/block_of_code.h"
#include "backend/A64/exception_handler.h"
#include "common/assert.h"
#include "common/cast_util.h"
#include "common/common_types.h"
#include "jni.h"
namespace Dynarmic::BackendA64 {
namespace {
struct CodeBlockInfo {
BlockOfCode* block;
std::function<void(CodePtr)> callback;
};
class SigHandler {
public:
SigHandler();
~SigHandler();
void AddCodeBlock(CodeBlockInfo info);
void RemoveCodeBlock(CodePtr PC);
private:
auto FindCodeBlockInfo(CodePtr PC) {
return std::find_if(code_block_infos.begin(), code_block_infos.end(),
[&](const CodeBlockInfo& x) { return x.block->GetRegion() <= PC && x.block->GetRegion() + x.block->GetRegionSize(); });
}
std::vector<CodeBlockInfo> code_block_infos;
std::mutex code_block_infos_mutex;
struct sigaction old_sa_segv;
struct sigaction old_sa_bus;
static void SigAction(int sig, siginfo_t* info, void* raw_context);
};
SigHandler sig_handler;
SigHandler::SigHandler() {
// Method below from dolphin.
constexpr std::size_t signal_stack_size =
static_cast<std::size_t>(std::max(SIGSTKSZ, 2 * 1024 * 1024));
stack_t signal_stack;
signal_stack.ss_sp = malloc(signal_stack_size);
signal_stack.ss_size = signal_stack_size;
signal_stack.ss_flags = 0;
ASSERT_MSG(sigaltstack(&signal_stack, nullptr) == 0,
"dynarmic: POSIX SigHandler: init failure at sigaltstack");
struct sigaction sa;
sa.sa_handler = nullptr;
sa.sa_sigaction = &SigHandler::SigAction;
sa.sa_flags = SA_SIGINFO | SA_ONSTACK | SA_RESTART;
sigemptyset(&sa.sa_mask);
sigaction(SIGSEGV, &sa, &old_sa_segv);
}
SigHandler::~SigHandler() {
// No cleanup required.
}
void SigHandler::AddCodeBlock(CodeBlockInfo cb) {
std::lock_guard<std::mutex> guard(code_block_infos_mutex);
ASSERT(FindCodeBlockInfo(cb.block->GetRegion()) == code_block_infos.end());
code_block_infos.push_back(std::move(cb));
}
void SigHandler::RemoveCodeBlock(CodePtr PC) {
std::lock_guard<std::mutex> guard(code_block_infos_mutex);
const auto iter = FindCodeBlockInfo(PC);
ASSERT(iter != code_block_infos.end());
code_block_infos.erase(iter);
}
void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) {
ASSERT(sig == SIGSEGV || sig == SIGBUS);
std::lock_guard<std::mutex> guard(sig_handler.code_block_infos_mutex);
auto PC = reinterpret_cast<CodePtr>(((ucontext_t*)raw_context)->uc_mcontext.pc);
const auto iter = sig_handler.FindCodeBlockInfo(PC);
if (iter != sig_handler.code_block_infos.end()) {
iter->callback(PC);
return;
}
fmt::print(
stderr,
"dynarmic: POSIX SigHandler: Exception was not in registered code blocks (PC {})\n",
PC);
struct sigaction* retry_sa =
sig == SIGSEGV ? &sig_handler.old_sa_segv : &sig_handler.old_sa_bus;
if (retry_sa->sa_flags & SA_SIGINFO) {
retry_sa->sa_sigaction(sig, info, raw_context);
return;
}
if (retry_sa->sa_handler == SIG_DFL) {
signal(sig, SIG_DFL);
return;
}
if (retry_sa->sa_handler == SIG_IGN) {
return;
}
retry_sa->sa_handler(sig);
}
} // anonymous namespace
struct ExceptionHandler::Impl final {
Impl(BlockOfCode& code, std::function<void(CodePtr)> cb) {
code_begin = code.GetRegion();
sig_handler.AddCodeBlock({&code, std::move(cb)});
}
~Impl() {
sig_handler.RemoveCodeBlock(code_begin);
}
private:
CodePtr code_begin;
};
ExceptionHandler::ExceptionHandler() = default;
ExceptionHandler::~ExceptionHandler() = default;
void ExceptionHandler::Register(BlockOfCode& code, std::function<void(CodePtr)> cb) {
if (cb)
impl = std::make_unique<Impl>(code, std::move(cb));
}
bool ExceptionHandler::SupportsFastmem() const {
return static_cast<bool>(impl);
}
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,21 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include "backend/A64/hostloc.h"
namespace Dynarmic::BackendA64 {
Arm64Gen::ARM64Reg HostLocToReg64(HostLoc loc) {
ASSERT(HostLocIsGPR(loc));
return static_cast<Arm64Gen::ARM64Reg>(static_cast<int>(Arm64Gen::X0) + static_cast<int>(loc));
}
Arm64Gen::ARM64Reg HostLocToFpr(HostLoc loc) {
ASSERT(HostLocIsFPR(loc));
return EncodeRegToQuad(static_cast<Arm64Gen::ARM64Reg>(static_cast<int>(loc) - static_cast<int>(HostLoc::Q0)));
}
} // namespace Dynarmic::BackendX64

176
src/backend/A64/hostloc.h Normal file
View File

@ -0,0 +1,176 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include "backend/A64/emitter/a64_emitter.h"
#include "common/assert.h"
#include "common/common_types.h"
namespace Dynarmic::BackendA64 {
enum class HostLoc {
// Ordering of the registers is intentional. See also: HostLocToA64.
// 64bit GPR registers
X0,
X1,
X2,
X3,
X4,
X5,
X6,
X7,
X8,
X9,
X10,
X11,
X12,
X13,
X14,
X15,
X16,
X17,
X18,
X19,
X20,
X21,
X22,
X23,
X24,
X25,
X26,
X27,
X28,
X29,
X30,
SP, // 64bit stack pointer
// Qword FPR registers
Q0,
Q1,
Q2,
Q3,
Q4,
Q5,
Q6,
Q7,
Q8,
Q9,
Q10,
Q11,
Q12,
Q13,
Q14,
Q15,
Q16,
Q17,
Q18,
Q19,
Q20,
Q21,
Q22,
Q23,
Q24,
Q25,
Q26,
Q27,
Q28,
Q29,
Q30,
Q31,
FirstSpill,
};
constexpr size_t NonSpillHostLocCount = static_cast<size_t>(HostLoc::FirstSpill);
inline bool HostLocIsGPR(HostLoc reg) {
return reg >= HostLoc::X0 && reg <= HostLoc::X30;
}
inline bool HostLocIsFPR(HostLoc reg) {
return reg >= HostLoc::Q0 && reg <= HostLoc::Q31;
}
inline bool HostLocIsRegister(HostLoc reg) {
return HostLocIsGPR(reg) || HostLocIsFPR(reg);
}
inline HostLoc HostLocRegIdx(int idx) {
ASSERT(idx >= 0 && idx <= 30);
return static_cast<HostLoc>(idx);
}
inline HostLoc HostLocFprIdx(int idx) {
ASSERT(idx >= 0 && idx <= 31);
return static_cast<HostLoc>(static_cast<size_t>(HostLoc::Q0) + idx);
}
inline HostLoc HostLocSpill(size_t i) {
return static_cast<HostLoc>(static_cast<size_t>(HostLoc::FirstSpill) + i);
}
inline bool HostLocIsSpill(HostLoc reg) {
return reg >= HostLoc::FirstSpill;
}
inline size_t HostLocBitWidth(HostLoc loc) {
if (HostLocIsGPR(loc))
return 64;
if (HostLocIsFPR(loc))
return 128;
if (HostLocIsSpill(loc))
return 128;
UNREACHABLE();
}
using HostLocList = std::initializer_list<HostLoc>;
// X18 may be reserved.(Windows and iOS)
// X26 holds the cycle counter
// X27 contains an emulated memory relate pointer
// X28 used for holding the JitState.
// X30 is the link register.
// In order of desireablity based first on ABI
constexpr HostLocList any_gpr = {
HostLoc::X19, HostLoc::X20, HostLoc::X21, HostLoc::X22, HostLoc::X23,
HostLoc::X24, HostLoc::X25,
HostLoc::X8, HostLoc::X9, HostLoc::X10, HostLoc::X11, HostLoc::X12,
HostLoc::X13, HostLoc::X14, HostLoc::X15, HostLoc::X16, HostLoc::X17,
HostLoc::X7, HostLoc::X6, HostLoc::X5, HostLoc::X4, HostLoc::X3,
HostLoc::X2, HostLoc::X1, HostLoc::X0,
};
constexpr HostLocList any_fpr = {
HostLoc::Q8, HostLoc::Q9, HostLoc::Q10, HostLoc::Q11, HostLoc::Q12, HostLoc::Q13,
HostLoc::Q14, HostLoc::Q15,
HostLoc::Q16, HostLoc::Q17, HostLoc::Q18, HostLoc::Q19, HostLoc::Q20, HostLoc::Q21,
HostLoc::Q22, HostLoc::Q23, HostLoc::Q24, HostLoc::Q25, HostLoc::Q26, HostLoc::Q27,
HostLoc::Q28, HostLoc::Q29, HostLoc::Q30, HostLoc::Q31,
HostLoc::Q7, HostLoc::Q6, HostLoc::Q5, HostLoc::Q4, HostLoc::Q3, HostLoc::Q2,
HostLoc::Q1, HostLoc::Q0,
};
Arm64Gen::ARM64Reg HostLocToReg64(HostLoc loc);
Arm64Gen::ARM64Reg HostLocToFpr(HostLoc loc);
template <typename JitStateType>
size_t SpillToOpArg(HostLoc loc) {
ASSERT(HostLocIsSpill(loc));
size_t i = static_cast<size_t>(loc) - static_cast<size_t>(HostLoc::FirstSpill);
ASSERT_MSG(i < JitStateType::SpillCount,
"Spill index greater than number of available spill locations");
return JitStateType::GetSpillLocationOffsetFromIndex(i);
}
} // namespace Dynarmic::BackendA64

View File

@ -0,0 +1,44 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <cstddef>
namespace Dynarmic::BackendA64 {
struct JitStateInfo {
template <typename JitStateType>
JitStateInfo(const JitStateType&)
: offsetof_cycles_remaining(offsetof(JitStateType, cycles_remaining))
, offsetof_cycles_to_run(offsetof(JitStateType, cycles_to_run))
, offsetof_save_host_FPCR(offsetof(JitStateType, save_host_FPCR))
, offsetof_guest_fpcr(offsetof(JitStateType, guest_fpcr))
, offsetof_guest_fpsr(offsetof(JitStateType, guest_fpsr))
, offsetof_rsb_ptr(offsetof(JitStateType, rsb_ptr))
, rsb_ptr_mask(JitStateType::RSBPtrMask)
, offsetof_rsb_location_descriptors(offsetof(JitStateType, rsb_location_descriptors))
, offsetof_rsb_codeptrs(offsetof(JitStateType, rsb_codeptrs))
, offsetof_cpsr_nzcv(offsetof(JitStateType, cpsr_nzcv))
, offsetof_fpsr_exc(offsetof(JitStateType, fpsr_exc))
, offsetof_fpsr_qc(offsetof(JitStateType, fpsr_qc))
{}
const size_t offsetof_cycles_remaining;
const size_t offsetof_cycles_to_run;
const size_t offsetof_save_host_FPCR;
const size_t offsetof_guest_fpcr;
const size_t offsetof_guest_fpsr;
const size_t offsetof_rsb_ptr;
const size_t rsb_ptr_mask;
const size_t offsetof_rsb_location_descriptors;
const size_t offsetof_rsb_codeptrs;
const size_t offsetof_cpsr_nzcv;
const size_t offsetof_fpsr_exc;
const size_t offsetof_fpsr_qc;
};
} // namespace Dynarmic::BackendA64

651
src/backend/A64/opcodes.inc Normal file
View File

@ -0,0 +1,651 @@
// opcode name, return type, arg1 type, arg2 type, arg3 type, arg4 type, ...
OPCODE(Void, Void, )
OPCODE(Identity, Opaque, Opaque )
OPCODE(Breakpoint, Void, )
// A32 Context getters/setters
A32OPC(SetCheckBit, Void, U1 )
A32OPC(GetRegister, U32, A32Reg )
A32OPC(GetExtendedRegister32, U32, A32ExtReg )
A32OPC(GetExtendedRegister64, U64, A32ExtReg )
A32OPC(SetRegister, Void, A32Reg, U32 )
A32OPC(SetExtendedRegister32, Void, A32ExtReg, U32 )
A32OPC(SetExtendedRegister64, Void, A32ExtReg, U64 )
A32OPC(GetCpsr, U32, )
A32OPC(SetCpsr, Void, U32 )
A32OPC(SetCpsrNZCVRaw, Void, U32 )
A32OPC(SetCpsrNZCV, Void, NZCV )
A32OPC(SetCpsrNZCVQ, Void, U32 )
A32OPC(GetNFlag, U1, )
A32OPC(SetNFlag, Void, U1 )
A32OPC(GetZFlag, U1, )
A32OPC(SetZFlag, Void, U1 )
A32OPC(GetCFlag, U1, )
A32OPC(SetCFlag, Void, U1 )
A32OPC(GetVFlag, U1, )
A32OPC(SetVFlag, Void, U1 )
A32OPC(OrQFlag, Void, U1 )
A32OPC(GetGEFlags, U32, )
A32OPC(SetGEFlags, Void, U32 )
A32OPC(SetGEFlagsCompressed, Void, U32 )
A32OPC(BXWritePC, Void, U32 )
A32OPC(CallSupervisor, Void, U32 )
A32OPC(ExceptionRaised, Void, U32, U64 )
A32OPC(GetFpscr, U32, )
A32OPC(SetFpscr, Void, U32, )
A32OPC(GetFpscrNZCV, U32, )
A32OPC(SetFpscrNZCV, Void, NZCV )
// A64 Context getters/setters
//A64OPC(SetCheckBit, Void, U1 )
//A64OPC(GetCFlag, U1, )
//A64OPC(GetNZCVRaw, U32, )
//A64OPC(SetNZCVRaw, Void, U32 )
//A64OPC(SetNZCV, Void, NZCV )
//A64OPC(GetW, U32, A64Reg )
//A64OPC(GetX, U64, A64Reg )
//A64OPC(GetS, U128, A64Vec )
//A64OPC(GetD, U128, A64Vec )
//A64OPC(GetQ, U128, A64Vec )
//A64OPC(GetSP, U64, )
//A64OPC(GetFPCR, U32, )
//A64OPC(GetFPSR, U32, )
//A64OPC(SetW, Void, A64Reg, U32 )
//A64OPC(SetX, Void, A64Reg, U64 )
//A64OPC(SetS, Void, A64Vec, U128 )
//A64OPC(SetD, Void, A64Vec, U128 )
//A64OPC(SetQ, Void, A64Vec, U128 )
//A64OPC(SetSP, Void, U64 )
//A64OPC(SetFPCR, Void, U32 )
//A64OPC(SetFPSR, Void, U32 )
//A64OPC(OrQC, Void, U1 )
//A64OPC(SetPC, Void, U64 )
//A64OPC(CallSupervisor, Void, U32 )
//A64OPC(ExceptionRaised, Void, U64, U64 )
//A64OPC(DataCacheOperationRaised, Void, U64, U64 )
//A64OPC(DataSynchronizationBarrier, Void, )
//A64OPC(DataMemoryBarrier, Void, )
//A64OPC(InstructionSynchronizationBarrier, Void, )
//A64OPC(GetCNTFRQ, U32, )
//A64OPC(GetCNTPCT, U64, )
//A64OPC(GetCTR, U32, )
//A64OPC(GetDCZID, U32, )
//A64OPC(GetTPIDR, U64, )
//A64OPC(GetTPIDRRO, U64, )
//A64OPC(SetTPIDR, Void, U64 )
// Hints
OPCODE(PushRSB, Void, U64 )
// Pseudo-operation, handled specially at final emit
OPCODE(GetCarryFromOp, U1, Opaque )
OPCODE(GetOverflowFromOp, U1, Opaque )
OPCODE(GetGEFromOp, U32, Opaque )
OPCODE(GetNZCVFromOp, NZCV, Opaque )
OPCODE(GetUpperFromOp, U128, Opaque )
OPCODE(GetLowerFromOp, U128, Opaque )
OPCODE(NZCVFromPackedFlags, NZCV, U32 )
// Calculations
OPCODE(Pack2x32To1x64, U64, U32, U32 )
//OPCODE(Pack2x64To1x128, U128, U64, U64 )
OPCODE(LeastSignificantWord, U32, U64 )
OPCODE(MostSignificantWord, U32, U64 )
OPCODE(LeastSignificantHalf, U16, U32 )
OPCODE(LeastSignificantByte, U8, U32 )
OPCODE(MostSignificantBit, U1, U32 )
OPCODE(IsZero32, U1, U32 )
OPCODE(IsZero64, U1, U64 )
OPCODE(TestBit, U1, U64, U8 )
OPCODE(ConditionalSelect32, U32, Cond, U32, U32 )
OPCODE(ConditionalSelect64, U64, Cond, U64, U64 )
OPCODE(ConditionalSelectNZCV, NZCV, Cond, NZCV, NZCV )
OPCODE(LogicalShiftLeft32, U32, U32, U8, U1 )
OPCODE(LogicalShiftLeft64, U64, U64, U8 )
OPCODE(LogicalShiftRight32, U32, U32, U8, U1 )
OPCODE(LogicalShiftRight64, U64, U64, U8 )
OPCODE(ArithmeticShiftRight32, U32, U32, U8, U1 )
//OPCODE(ArithmeticShiftRight64, U64, U64, U8 )
OPCODE(RotateRight32, U32, U32, U8, U1 )
OPCODE(RotateRight64, U64, U64, U8 )
OPCODE(RotateRightExtended, U32, U32, U1 )
OPCODE(Add32, U32, U32, U32, U1 )
OPCODE(Add64, U64, U64, U64, U1 )
OPCODE(Sub32, U32, U32, U32, U1 )
OPCODE(Sub64, U64, U64, U64, U1 )
OPCODE(Mul32, U32, U32, U32 )
OPCODE(Mul64, U64, U64, U64 )
//OPCODE(SignedMultiplyHigh64, U64, U64, U64 )
//OPCODE(UnsignedMultiplyHigh64, U64, U64, U64 )
OPCODE(UnsignedDiv32, U32, U32, U32 )
OPCODE(UnsignedDiv64, U64, U64, U64 )
OPCODE(SignedDiv32, U32, U32, U32 )
OPCODE(SignedDiv64, U64, U64, U64 )
OPCODE(And32, U32, U32, U32 )
OPCODE(And64, U64, U64, U64 )
OPCODE(Eor32, U32, U32, U32 )
OPCODE(Eor64, U64, U64, U64 )
OPCODE(Or32, U32, U32, U32 )
OPCODE(Or64, U64, U64, U64 )
OPCODE(Not32, U32, U32 )
OPCODE(Not64, U64, U64 )
OPCODE(SignExtendByteToWord, U32, U8 )
OPCODE(SignExtendHalfToWord, U32, U16 )
OPCODE(SignExtendByteToLong, U64, U8 )
OPCODE(SignExtendHalfToLong, U64, U16 )
OPCODE(SignExtendWordToLong, U64, U32 )
OPCODE(ZeroExtendByteToWord, U32, U8 )
OPCODE(ZeroExtendHalfToWord, U32, U16 )
OPCODE(ZeroExtendByteToLong, U64, U8 )
OPCODE(ZeroExtendHalfToLong, U64, U16 )
OPCODE(ZeroExtendWordToLong, U64, U32 )
//OPCODE(ZeroExtendLongToQuad, U128, U64 )
//OPCODE(ByteReverseDual, U64, U64 )
OPCODE(ByteReverseWord, U32, U32 )
OPCODE(ByteReverseHalf, U16, U16 )
OPCODE(CountLeadingZeros32, U32, U32 )
OPCODE(CountLeadingZeros64, U64, U64 )
//OPCODE(ExtractRegister32, U32, U32, U32, U8 )
//OPCODE(ExtractRegister64, U64, U64, U64, U8 )
//OPCODE(MaxSigned32, U32, U32, U32 )
//OPCODE(MaxSigned64, U64, U64, U64 )
//OPCODE(MaxUnsigned32, U32, U32, U32 )
//OPCODE(MaxUnsigned64, U64, U64, U64 )
//OPCODE(MinSigned32, U32, U32, U32 )
//OPCODE(MinSigned64, U64, U64, U64 )
//OPCODE(MinUnsigned32, U32, U32, U32 )
//OPCODE(MinUnsigned64, U64, U64, U64 )
// Saturated instructions
OPCODE(SignedSaturatedAdd8, U8, U8, U8 )
OPCODE(SignedSaturatedAdd16, U16, U16, U16 )
OPCODE(SignedSaturatedAdd32, U32, U32, U32 )
OPCODE(SignedSaturatedAdd64, U64, U64, U64 )
//OPCODE(SignedSaturatedDoublingMultiplyReturnHigh16, U16, U16, U16 )
//OPCODE(SignedSaturatedDoublingMultiplyReturnHigh32, U32, U32, U32 )
OPCODE(SignedSaturatedSub8, U8, U8, U8 )
OPCODE(SignedSaturatedSub16, U16, U16, U16 )
OPCODE(SignedSaturatedSub32, U32, U32, U32 )
OPCODE(SignedSaturatedSub64, U64, U64, U64 )
OPCODE(SignedSaturation, U32, U32, U8 )
//OPCODE(UnsignedSaturatedAdd8, U8, U8, U8 )
//OPCODE(UnsignedSaturatedAdd16, U16, U16, U16 )
//OPCODE(UnsignedSaturatedAdd32, U32, U32, U32 )
//OPCODE(UnsignedSaturatedAdd64, U64, U64, U64 )
//OPCODE(UnsignedSaturatedSub8, U8, U8, U8 )
//OPCODE(UnsignedSaturatedSub16, U16, U16, U16 )
//OPCODE(UnsignedSaturatedSub32, U32, U32, U32 )
//OPCODE(UnsignedSaturatedSub64, U64, U64, U64 )
OPCODE(UnsignedSaturation, U32, U32, U8 )
// Packed instructions
OPCODE(PackedAddU8, U32, U32, U32 )
OPCODE(PackedAddS8, U32, U32, U32 )
OPCODE(PackedSubU8, U32, U32, U32 )
OPCODE(PackedSubS8, U32, U32, U32 )
OPCODE(PackedAddU16, U32, U32, U32 )
OPCODE(PackedAddS16, U32, U32, U32 )
OPCODE(PackedSubU16, U32, U32, U32 )
OPCODE(PackedSubS16, U32, U32, U32 )
OPCODE(PackedAddSubU16, U32, U32, U32 )
OPCODE(PackedAddSubS16, U32, U32, U32 )
OPCODE(PackedSubAddU16, U32, U32, U32 )
OPCODE(PackedSubAddS16, U32, U32, U32 )
OPCODE(PackedHalvingAddU8, U32, U32, U32 )
OPCODE(PackedHalvingAddS8, U32, U32, U32 )
OPCODE(PackedHalvingSubU8, U32, U32, U32 )
OPCODE(PackedHalvingSubS8, U32, U32, U32 )
OPCODE(PackedHalvingAddU16, U32, U32, U32 )
OPCODE(PackedHalvingAddS16, U32, U32, U32 )
OPCODE(PackedHalvingSubU16, U32, U32, U32 )
OPCODE(PackedHalvingSubS16, U32, U32, U32 )
OPCODE(PackedHalvingAddSubU16, U32, U32, U32 )
OPCODE(PackedHalvingAddSubS16, U32, U32, U32 )
OPCODE(PackedHalvingSubAddU16, U32, U32, U32 )
OPCODE(PackedHalvingSubAddS16, U32, U32, U32 )
OPCODE(PackedSaturatedAddU8, U32, U32, U32 )
OPCODE(PackedSaturatedAddS8, U32, U32, U32 )
OPCODE(PackedSaturatedSubU8, U32, U32, U32 )
OPCODE(PackedSaturatedSubS8, U32, U32, U32 )
OPCODE(PackedSaturatedAddU16, U32, U32, U32 )
OPCODE(PackedSaturatedAddS16, U32, U32, U32 )
OPCODE(PackedSaturatedSubU16, U32, U32, U32 )
OPCODE(PackedSaturatedSubS16, U32, U32, U32 )
OPCODE(PackedAbsDiffSumS8, U32, U32, U32 )
OPCODE(PackedSelect, U32, U32, U32, U32 )
// CRC instructions
//OPCODE(CRC32Castagnoli8, U32, U32, U32 )
//OPCODE(CRC32Castagnoli16, U32, U32, U32 )
//OPCODE(CRC32Castagnoli32, U32, U32, U32 )
//OPCODE(CRC32Castagnoli64, U32, U32, U64 )
//OPCODE(CRC32ISO8, U32, U32, U32 )
//OPCODE(CRC32ISO16, U32, U32, U32 )
//OPCODE(CRC32ISO32, U32, U32, U32 )
//OPCODE(CRC32ISO64, U32, U32, U64 )
// AES instructions
//OPCODE(AESDecryptSingleRound, U128, U128 )
//OPCODE(AESEncryptSingleRound, U128, U128 )
//OPCODE(AESInverseMixColumns, U128, U128 )
//OPCODE(AESMixColumns, U128, U128 )
// SM4 instructions
//OPCODE(SM4AccessSubstitutionBox, U8, U8 )
// Vector instructions
//OPCODE(VectorGetElement8, U8, U128, U8 )
//OPCODE(VectorGetElement16, U16, U128, U8 )
//OPCODE(VectorGetElement32, U32, U128, U8 )
//OPCODE(VectorGetElement64, U64, U128, U8 )
//OPCODE(VectorSetElement8, U128, U128, U8, U8 )
//OPCODE(VectorSetElement16, U128, U128, U8, U16 )
//OPCODE(VectorSetElement32, U128, U128, U8, U32 )
//OPCODE(VectorSetElement64, U128, U128, U8, U64 )
//OPCODE(VectorAbs8, U128, U128 )
//OPCODE(VectorAbs16, U128, U128 )
//OPCODE(VectorAbs32, U128, U128 )
//OPCODE(VectorAbs64, U128, U128 )
//OPCODE(VectorAdd8, U128, U128, U128 )
//OPCODE(VectorAdd16, U128, U128, U128 )
//OPCODE(VectorAdd32, U128, U128, U128 )
//OPCODE(VectorAdd64, U128, U128, U128 )
//OPCODE(VectorAnd, U128, U128, U128 )
//OPCODE(VectorArithmeticShiftRight8, U128, U128, U8 )
//OPCODE(VectorArithmeticShiftRight16, U128, U128, U8 )
//OPCODE(VectorArithmeticShiftRight32, U128, U128, U8 )
//OPCODE(VectorArithmeticShiftRight64, U128, U128, U8 )
//OPCODE(VectorArithmeticVShift8, U128, U128, U128 )
//OPCODE(VectorArithmeticVShift16, U128, U128, U128 )
//OPCODE(VectorArithmeticVShift32, U128, U128, U128 )
//OPCODE(VectorArithmeticVShift64, U128, U128, U128 )
//OPCODE(VectorBroadcastLower8, U128, U8 )
//OPCODE(VectorBroadcastLower16, U128, U16 )
//OPCODE(VectorBroadcastLower32, U128, U32 )
//OPCODE(VectorBroadcast8, U128, U8 )
//OPCODE(VectorBroadcast16, U128, U16 )
//OPCODE(VectorBroadcast32, U128, U32 )
//OPCODE(VectorBroadcast64, U128, U64 )
//OPCODE(VectorCountLeadingZeros8, U128, U128 )
//OPCODE(VectorCountLeadingZeros16, U128, U128 )
//OPCODE(VectorCountLeadingZeros32, U128, U128 )
//OPCODE(VectorDeinterleaveEven8, U128, U128, U128 )
//OPCODE(VectorDeinterleaveEven16, U128, U128, U128 )
//OPCODE(VectorDeinterleaveEven32, U128, U128, U128 )
//OPCODE(VectorDeinterleaveEven64, U128, U128, U128 )
//OPCODE(VectorDeinterleaveOdd8, U128, U128, U128 )
//OPCODE(VectorDeinterleaveOdd16, U128, U128, U128 )
//OPCODE(VectorDeinterleaveOdd32, U128, U128, U128 )
//OPCODE(VectorDeinterleaveOdd64, U128, U128, U128 )
//OPCODE(VectorEor, U128, U128, U128 )
//OPCODE(VectorEqual8, U128, U128, U128 )
//OPCODE(VectorEqual16, U128, U128, U128 )
//OPCODE(VectorEqual32, U128, U128, U128 )
//OPCODE(VectorEqual64, U128, U128, U128 )
//OPCODE(VectorEqual128, U128, U128, U128 )
//OPCODE(VectorExtract, U128, U128, U128, U8 )
//OPCODE(VectorExtractLower, U128, U128, U128, U8 )
//OPCODE(VectorGreaterS8, U128, U128, U128 )
//OPCODE(VectorGreaterS16, U128, U128, U128 )
//OPCODE(VectorGreaterS32, U128, U128, U128 )
//OPCODE(VectorGreaterS64, U128, U128, U128 )
//OPCODE(VectorHalvingAddS8, U128, U128, U128 )
//OPCODE(VectorHalvingAddS16, U128, U128, U128 )
//OPCODE(VectorHalvingAddS32, U128, U128, U128 )
//OPCODE(VectorHalvingAddU8, U128, U128, U128 )
//OPCODE(VectorHalvingAddU16, U128, U128, U128 )
//OPCODE(VectorHalvingAddU32, U128, U128, U128 )
//OPCODE(VectorHalvingSubS8, U128, U128, U128 )
//OPCODE(VectorHalvingSubS16, U128, U128, U128 )
//OPCODE(VectorHalvingSubS32, U128, U128, U128 )
//OPCODE(VectorHalvingSubU8, U128, U128, U128 )
//OPCODE(VectorHalvingSubU16, U128, U128, U128 )
//OPCODE(VectorHalvingSubU32, U128, U128, U128 )
//OPCODE(VectorInterleaveLower8, U128, U128, U128 )
//OPCODE(VectorInterleaveLower16, U128, U128, U128 )
//OPCODE(VectorInterleaveLower32, U128, U128, U128 )
//OPCODE(VectorInterleaveLower64, U128, U128, U128 )
//OPCODE(VectorInterleaveUpper8, U128, U128, U128 )
//OPCODE(VectorInterleaveUpper16, U128, U128, U128 )
//OPCODE(VectorInterleaveUpper32, U128, U128, U128 )
//OPCODE(VectorInterleaveUpper64, U128, U128, U128 )
//OPCODE(VectorLogicalShiftLeft8, U128, U128, U8 )
//OPCODE(VectorLogicalShiftLeft16, U128, U128, U8 )
//OPCODE(VectorLogicalShiftLeft32, U128, U128, U8 )
//OPCODE(VectorLogicalShiftLeft64, U128, U128, U8 )
//OPCODE(VectorLogicalShiftRight8, U128, U128, U8 )
//OPCODE(VectorLogicalShiftRight16, U128, U128, U8 )
//OPCODE(VectorLogicalShiftRight32, U128, U128, U8 )
//OPCODE(VectorLogicalShiftRight64, U128, U128, U8 )
//OPCODE(VectorLogicalVShift8, U128, U128, U128 )
//OPCODE(VectorLogicalVShift16, U128, U128, U128 )
//OPCODE(VectorLogicalVShift32, U128, U128, U128 )
//OPCODE(VectorLogicalVShift64, U128, U128, U128 )
//OPCODE(VectorMaxS8, U128, U128, U128 )
//OPCODE(VectorMaxS16, U128, U128, U128 )
//OPCODE(VectorMaxS32, U128, U128, U128 )
//OPCODE(VectorMaxS64, U128, U128, U128 )
//OPCODE(VectorMaxU8, U128, U128, U128 )
//OPCODE(VectorMaxU16, U128, U128, U128 )
//OPCODE(VectorMaxU32, U128, U128, U128 )
//OPCODE(VectorMaxU64, U128, U128, U128 )
//OPCODE(VectorMinS8, U128, U128, U128 )
//OPCODE(VectorMinS16, U128, U128, U128 )
//OPCODE(VectorMinS32, U128, U128, U128 )
//OPCODE(VectorMinS64, U128, U128, U128 )
//OPCODE(VectorMinU8, U128, U128, U128 )
//OPCODE(VectorMinU16, U128, U128, U128 )
//OPCODE(VectorMinU32, U128, U128, U128 )
//OPCODE(VectorMinU64, U128, U128, U128 )
//OPCODE(VectorMultiply8, U128, U128, U128 )
//OPCODE(VectorMultiply16, U128, U128, U128 )
//OPCODE(VectorMultiply32, U128, U128, U128 )
//OPCODE(VectorMultiply64, U128, U128, U128 )
//OPCODE(VectorNarrow16, U128, U128 )
//OPCODE(VectorNarrow32, U128, U128 )
//OPCODE(VectorNarrow64, U128, U128 )
//OPCODE(VectorNot, U128, U128 )
//OPCODE(VectorOr, U128, U128, U128 )
//OPCODE(VectorPairedAddLower8, U128, U128, U128 )
//OPCODE(VectorPairedAddLower16, U128, U128, U128 )
//OPCODE(VectorPairedAddLower32, U128, U128, U128 )
//OPCODE(VectorPairedAddSignedWiden8, U128, U128 )
//OPCODE(VectorPairedAddSignedWiden16, U128, U128 )
//OPCODE(VectorPairedAddSignedWiden32, U128, U128 )
//OPCODE(VectorPairedAddUnsignedWiden8, U128, U128 )
//OPCODE(VectorPairedAddUnsignedWiden16, U128, U128 )
//OPCODE(VectorPairedAddUnsignedWiden32, U128, U128 )
//OPCODE(VectorPairedAdd8, U128, U128, U128 )
//OPCODE(VectorPairedAdd16, U128, U128, U128 )
//OPCODE(VectorPairedAdd32, U128, U128, U128 )
//OPCODE(VectorPairedAdd64, U128, U128, U128 )
//OPCODE(VectorPairedMaxS8, U128, U128, U128 )
//OPCODE(VectorPairedMaxS16, U128, U128, U128 )
//OPCODE(VectorPairedMaxS32, U128, U128, U128 )
//OPCODE(VectorPairedMaxU8, U128, U128, U128 )
//OPCODE(VectorPairedMaxU16, U128, U128, U128 )
//OPCODE(VectorPairedMaxU32, U128, U128, U128 )
//OPCODE(VectorPairedMinS8, U128, U128, U128 )
//OPCODE(VectorPairedMinS16, U128, U128, U128 )
//OPCODE(VectorPairedMinS32, U128, U128, U128 )
//OPCODE(VectorPairedMinU8, U128, U128, U128 )
//OPCODE(VectorPairedMinU16, U128, U128, U128 )
//OPCODE(VectorPairedMinU32, U128, U128, U128 )
//OPCODE(VectorPolynomialMultiply8, U128, U128, U128 )
//OPCODE(VectorPolynomialMultiplyLong8, U128, U128, U128 )
//OPCODE(VectorPolynomialMultiplyLong64, U128, U128, U128 )
//OPCODE(VectorPopulationCount, U128, U128 )
//OPCODE(VectorReverseBits, U128, U128 )
//OPCODE(VectorRoundingHalvingAddS8, U128, U128, U128 )
//OPCODE(VectorRoundingHalvingAddS16, U128, U128, U128 )
//OPCODE(VectorRoundingHalvingAddS32, U128, U128, U128 )
//OPCODE(VectorRoundingHalvingAddU8, U128, U128, U128 )
//OPCODE(VectorRoundingHalvingAddU16, U128, U128, U128 )
//OPCODE(VectorRoundingHalvingAddU32, U128, U128, U128 )
//OPCODE(VectorRoundingShiftLeftS8, U128, U128, U128 )
//OPCODE(VectorRoundingShiftLeftS16, U128, U128, U128 )
//OPCODE(VectorRoundingShiftLeftS32, U128, U128, U128 )
//OPCODE(VectorRoundingShiftLeftS64, U128, U128, U128 )
//OPCODE(VectorRoundingShiftLeftU8, U128, U128, U128 )
//OPCODE(VectorRoundingShiftLeftU16, U128, U128, U128 )
//OPCODE(VectorRoundingShiftLeftU32, U128, U128, U128 )
//OPCODE(VectorRoundingShiftLeftU64, U128, U128, U128 )
//OPCODE(VectorShuffleHighHalfwords, U128, U128, U8 )
//OPCODE(VectorShuffleLowHalfwords, U128, U128, U8 )
//OPCODE(VectorShuffleWords, U128, U128, U8 )
//OPCODE(VectorSignExtend8, U128, U128 )
//OPCODE(VectorSignExtend16, U128, U128 )
//OPCODE(VectorSignExtend32, U128, U128 )
//OPCODE(VectorSignExtend64, U128, U128 )
//OPCODE(VectorSignedAbsoluteDifference8, U128, U128, U128 )
//OPCODE(VectorSignedAbsoluteDifference16, U128, U128, U128 )
//OPCODE(VectorSignedAbsoluteDifference32, U128, U128, U128 )
//OPCODE(VectorSignedMultiply16, Void, U128, U128 )
//OPCODE(VectorSignedMultiply32, Void, U128, U128 )
//OPCODE(VectorSignedSaturatedAbs8, U128, U128 )
//OPCODE(VectorSignedSaturatedAbs16, U128, U128 )
//OPCODE(VectorSignedSaturatedAbs32, U128, U128 )
//OPCODE(VectorSignedSaturatedAbs64, U128, U128 )
//OPCODE(VectorSignedSaturatedAccumulateUnsigned8, U128, U128, U128 )
//OPCODE(VectorSignedSaturatedAccumulateUnsigned16, U128, U128, U128 )
//OPCODE(VectorSignedSaturatedAccumulateUnsigned32, U128, U128, U128 )
//OPCODE(VectorSignedSaturatedAccumulateUnsigned64, U128, U128, U128 )
//OPCODE(VectorSignedSaturatedDoublingMultiply16, Void, U128, U128 )
//OPCODE(VectorSignedSaturatedDoublingMultiply32, Void, U128, U128 )
//OPCODE(VectorSignedSaturatedDoublingMultiplyLong16, U128, U128, U128 )
//OPCODE(VectorSignedSaturatedDoublingMultiplyLong32, U128, U128, U128 )
//OPCODE(VectorSignedSaturatedNarrowToSigned16, U128, U128 )
//OPCODE(VectorSignedSaturatedNarrowToSigned32, U128, U128 )
//OPCODE(VectorSignedSaturatedNarrowToSigned64, U128, U128 )
//OPCODE(VectorSignedSaturatedNarrowToUnsigned16, U128, U128 )
//OPCODE(VectorSignedSaturatedNarrowToUnsigned32, U128, U128 )
//OPCODE(VectorSignedSaturatedNarrowToUnsigned64, U128, U128 )
//OPCODE(VectorSignedSaturatedNeg8, U128, U128 )
//OPCODE(VectorSignedSaturatedNeg16, U128, U128 )
//OPCODE(VectorSignedSaturatedNeg32, U128, U128 )
//OPCODE(VectorSignedSaturatedNeg64, U128, U128 )
//OPCODE(VectorSignedSaturatedShiftLeft8, U128, U128, U128 )
//OPCODE(VectorSignedSaturatedShiftLeft16, U128, U128, U128 )
//OPCODE(VectorSignedSaturatedShiftLeft32, U128, U128, U128 )
//OPCODE(VectorSignedSaturatedShiftLeft64, U128, U128, U128 )
//OPCODE(VectorSignedSaturatedShiftLeftUnsigned8, U128, U128, U128 )
//OPCODE(VectorSignedSaturatedShiftLeftUnsigned16, U128, U128, U128 )
//OPCODE(VectorSignedSaturatedShiftLeftUnsigned32, U128, U128, U128 )
//OPCODE(VectorSignedSaturatedShiftLeftUnsigned64, U128, U128, U128 )
//OPCODE(VectorSub8, U128, U128, U128 )
//OPCODE(VectorSub16, U128, U128, U128 )
//OPCODE(VectorSub32, U128, U128, U128 )
//OPCODE(VectorSub64, U128, U128, U128 )
//OPCODE(VectorTable, Table, U128, Opaque, Opaque, Opaque )
//OPCODE(VectorTableLookup, U128, U128, Table, U128 )
//OPCODE(VectorUnsignedAbsoluteDifference8, U128, U128, U128 )
//OPCODE(VectorUnsignedAbsoluteDifference16, U128, U128, U128 )
//OPCODE(VectorUnsignedAbsoluteDifference32, U128, U128, U128 )
//OPCODE(VectorUnsignedMultiply16, Void, U128, U128 )
//OPCODE(VectorUnsignedMultiply32, Void, U128, U128 )
//OPCODE(VectorUnsignedRecipEstimate, U128, U128 )
//OPCODE(VectorUnsignedRecipSqrtEstimate, U128, U128 )
//OPCODE(VectorUnsignedSaturatedAccumulateSigned8, U128, U128, U128 )
//OPCODE(VectorUnsignedSaturatedAccumulateSigned16, U128, U128, U128 )
//OPCODE(VectorUnsignedSaturatedAccumulateSigned32, U128, U128, U128 )
//OPCODE(VectorUnsignedSaturatedAccumulateSigned64, U128, U128, U128 )
//OPCODE(VectorUnsignedSaturatedNarrow16, U128, U128 )
//OPCODE(VectorUnsignedSaturatedNarrow32, U128, U128 )
//OPCODE(VectorUnsignedSaturatedNarrow64, U128, U128 )
//OPCODE(VectorUnsignedSaturatedShiftLeft8, U128, U128, U128 )
//OPCODE(VectorUnsignedSaturatedShiftLeft16, U128, U128, U128 )
//OPCODE(VectorUnsignedSaturatedShiftLeft32, U128, U128, U128 )
//OPCODE(VectorUnsignedSaturatedShiftLeft64, U128, U128, U128 )
//OPCODE(VectorZeroExtend8, U128, U128 )
//OPCODE(VectorZeroExtend16, U128, U128 )
//OPCODE(VectorZeroExtend32, U128, U128 )
//OPCODE(VectorZeroExtend64, U128, U128 )
//OPCODE(VectorZeroUpper, U128, U128 )
//OPCODE(ZeroVector, U128, )
// Floating-point operations
//OPCODE(FPAbs16, U16, U16 )
OPCODE(FPAbs32, U32, U32 )
OPCODE(FPAbs64, U64, U64 )
OPCODE(FPAdd32, U32, U32, U32 )
OPCODE(FPAdd64, U64, U64, U64 )
OPCODE(FPCompare32, NZCV, U32, U32, U1 )
OPCODE(FPCompare64, NZCV, U64, U64, U1 )
OPCODE(FPDiv32, U32, U32, U32 )
OPCODE(FPDiv64, U64, U64, U64 )
//OPCODE(FPMax32, U32, U32, U32 )
//OPCODE(FPMax64, U64, U64, U64 )
//OPCODE(FPMaxNumeric32, U32, U32, U32 )
//OPCODE(FPMaxNumeric64, U64, U64, U64 )
//OPCODE(FPMin32, U32, U32, U32 )
//OPCODE(FPMin64, U64, U64, U64 )
//OPCODE(FPMinNumeric32, U32, U32, U32 )
//OPCODE(FPMinNumeric64, U64, U64, U64 )
OPCODE(FPMul32, U32, U32, U32 )
OPCODE(FPMul64, U64, U64, U64 )
//OPCODE(FPMulAdd16, U16, U16, U16, U16 )
//OPCODE(FPMulAdd32, U32, U32, U32, U32 )
//OPCODE(FPMulAdd64, U64, U64, U64, U64 )
//OPCODE(FPMulX32, U32, U32, U32 )
//OPCODE(FPMulX64, U64, U64, U64 )
//OPCODE(FPNeg16, U16, U16 )
OPCODE(FPNeg32, U32, U32 )
OPCODE(FPNeg64, U64, U64 )
//OPCODE(FPRecipEstimate16, U16, U16 )
//OPCODE(FPRecipEstimate32, U32, U32 )
//OPCODE(FPRecipEstimate64, U64, U64 )
//OPCODE(FPRecipExponent16, U16, U16 )
//OPCODE(FPRecipExponent32, U32, U32 )
//OPCODE(FPRecipExponent64, U64, U64 )
//OPCODE(FPRecipStepFused16, U16, U16, U16 )
//OPCODE(FPRecipStepFused32, U32, U32, U32 )
//OPCODE(FPRecipStepFused64, U64, U64, U64 )
//OPCODE(FPRoundInt16, U16, U16, U8, U1 )
//OPCODE(FPRoundInt32, U32, U32, U8, U1 )
//OPCODE(FPRoundInt64, U64, U64, U8, U1 )
//OPCODE(FPRSqrtEstimate16, U16, U16 )
//OPCODE(FPRSqrtEstimate32, U32, U32 )
//OPCODE(FPRSqrtEstimate64, U64, U64 )
//OPCODE(FPRSqrtStepFused16, U16, U16, U16 )
//OPCODE(FPRSqrtStepFused32, U32, U32, U32 )
//OPCODE(FPRSqrtStepFused64, U64, U64, U64 )
OPCODE(FPSqrt32, U32, U32 )
OPCODE(FPSqrt64, U64, U64 )
OPCODE(FPSub32, U32, U32, U32 )
OPCODE(FPSub64, U64, U64, U64 )
// Floating-point conversions
OPCODE(FPHalfToDouble, U64, U16, U8 )
OPCODE(FPHalfToSingle, U32, U16, U8 )
OPCODE(FPSingleToDouble, U64, U32, U8 )
OPCODE(FPSingleToHalf, U16, U32, U8 )
OPCODE(FPDoubleToHalf, U16, U64, U8 )
OPCODE(FPDoubleToSingle, U32, U64, U8 )
OPCODE(FPDoubleToFixedS32, U32, U64, U8, U8 )
OPCODE(FPDoubleToFixedS64, U64, U64, U8, U8 )
OPCODE(FPDoubleToFixedU32, U32, U64, U8, U8 )
OPCODE(FPDoubleToFixedU64, U64, U64, U8, U8 )
//OPCODE(FPHalfToFixedS32, U32, U16, U8, U8 )
//OPCODE(FPHalfToFixedS64, U64, U16, U8, U8 )
//OPCODE(FPHalfToFixedU32, U32, U16, U8, U8 )
//OPCODE(FPHalfToFixedU64, U64, U16, U8, U8 )
OPCODE(FPSingleToFixedS32, U32, U32, U8, U8 )
OPCODE(FPSingleToFixedS64, U64, U32, U8, U8 )
OPCODE(FPSingleToFixedU32, U32, U32, U8, U8 )
OPCODE(FPSingleToFixedU64, U64, U32, U8, U8 )
OPCODE(FPFixedU32ToSingle, U32, U32, U8, U8 )
OPCODE(FPFixedS32ToSingle, U32, U32, U8, U8 )
OPCODE(FPFixedU32ToDouble, U64, U32, U8, U8 )
OPCODE(FPFixedU64ToDouble, U64, U64, U8, U8 )
OPCODE(FPFixedU64ToSingle, U32, U64, U8, U8 )
OPCODE(FPFixedS32ToDouble, U64, U32, U8, U8 )
OPCODE(FPFixedS64ToDouble, U64, U64, U8, U8 )
OPCODE(FPFixedS64ToSingle, U32, U64, U8, U8 )
// Floating-point vector instructions
//OPCODE(FPVectorAbs16, U128, U128 )
//OPCODE(FPVectorAbs32, U128, U128 )
//OPCODE(FPVectorAbs64, U128, U128 )
//OPCODE(FPVectorAdd32, U128, U128, U128 )
//OPCODE(FPVectorAdd64, U128, U128, U128 )
//OPCODE(FPVectorDiv32, U128, U128, U128 )
//OPCODE(FPVectorDiv64, U128, U128, U128 )
//OPCODE(FPVectorEqual32, U128, U128, U128 )
//OPCODE(FPVectorEqual64, U128, U128, U128 )
//OPCODE(FPVectorFromSignedFixed32, U128, U128, U8, U8 )
//OPCODE(FPVectorFromSignedFixed64, U128, U128, U8, U8 )
//OPCODE(FPVectorFromUnsignedFixed32, U128, U128, U8, U8 )
//OPCODE(FPVectorFromUnsignedFixed64, U128, U128, U8, U8 )
//OPCODE(FPVectorGreater32, U128, U128, U128 )
//OPCODE(FPVectorGreater64, U128, U128, U128 )
//OPCODE(FPVectorGreaterEqual32, U128, U128, U128 )
//OPCODE(FPVectorGreaterEqual64, U128, U128, U128 )
//OPCODE(FPVectorMax32, U128, U128, U128 )
//OPCODE(FPVectorMax64, U128, U128, U128 )
//OPCODE(FPVectorMin32, U128, U128, U128 )
//OPCODE(FPVectorMin64, U128, U128, U128 )
//OPCODE(FPVectorMul32, U128, U128, U128 )
//OPCODE(FPVectorMul64, U128, U128, U128 )
//OPCODE(FPVectorMulAdd16, U128, U128, U128, U128 )
//OPCODE(FPVectorMulAdd32, U128, U128, U128, U128 )
//OPCODE(FPVectorMulAdd64, U128, U128, U128, U128 )
//OPCODE(FPVectorMulX32, U128, U128, U128 )
//OPCODE(FPVectorMulX64, U128, U128, U128 )
//OPCODE(FPVectorNeg16, U128, U128 )
//OPCODE(FPVectorNeg32, U128, U128 )
//OPCODE(FPVectorNeg64, U128, U128 )
//OPCODE(FPVectorPairedAdd32, U128, U128, U128 )
//OPCODE(FPVectorPairedAdd64, U128, U128, U128 )
//OPCODE(FPVectorPairedAddLower32, U128, U128, U128 )
//OPCODE(FPVectorPairedAddLower64, U128, U128, U128 )
//OPCODE(FPVectorRecipEstimate16, U128, U128 )
//OPCODE(FPVectorRecipEstimate32, U128, U128 )
//OPCODE(FPVectorRecipEstimate64, U128, U128 )
//OPCODE(FPVectorRecipStepFused16, U128, U128, U128 )
//OPCODE(FPVectorRecipStepFused32, U128, U128, U128 )
//OPCODE(FPVectorRecipStepFused64, U128, U128, U128 )
//OPCODE(FPVectorRoundInt16, U128, U128, U8, U1 )
//OPCODE(FPVectorRoundInt32, U128, U128, U8, U1 )
//OPCODE(FPVectorRoundInt64, U128, U128, U8, U1 )
//OPCODE(FPVectorRSqrtEstimate16, U128, U128 )
//OPCODE(FPVectorRSqrtEstimate32, U128, U128 )
//OPCODE(FPVectorRSqrtEstimate64, U128, U128 )
//OPCODE(FPVectorRSqrtStepFused16, U128, U128, U128 )
//OPCODE(FPVectorRSqrtStepFused32, U128, U128, U128 )
//OPCODE(FPVectorRSqrtStepFused64, U128, U128, U128 )
//OPCODE(FPVectorSqrt32, U128, U128 )
//OPCODE(FPVectorSqrt64, U128, U128 )
//OPCODE(FPVectorSub32, U128, U128, U128 )
//OPCODE(FPVectorSub64, U128, U128, U128 )
//OPCODE(FPVectorToSignedFixed16, U128, U128, U8, U8 )
//OPCODE(FPVectorToSignedFixed32, U128, U128, U8, U8 )
//OPCODE(FPVectorToSignedFixed64, U128, U128, U8, U8 )
//OPCODE(FPVectorToUnsignedFixed16, U128, U128, U8, U8 )
//OPCODE(FPVectorToUnsignedFixed32, U128, U128, U8, U8 )
//OPCODE(FPVectorToUnsignedFixed64, U128, U128, U8, U8 )
// A32 Memory access
A32OPC(ClearExclusive, Void, )
A32OPC(SetExclusive, Void, U32, U8 )
A32OPC(ReadMemory8, U8, U32 )
A32OPC(ReadMemory16, U16, U32 )
A32OPC(ReadMemory32, U32, U32 )
A32OPC(ReadMemory64, U64, U32 )
A32OPC(WriteMemory8, Void, U32, U8 )
A32OPC(WriteMemory16, Void, U32, U16 )
A32OPC(WriteMemory32, Void, U32, U32 )
A32OPC(WriteMemory64, Void, U32, U64 )
A32OPC(ExclusiveWriteMemory8, U32, U32, U8 )
A32OPC(ExclusiveWriteMemory16, U32, U32, U16 )
A32OPC(ExclusiveWriteMemory32, U32, U32, U32 )
A32OPC(ExclusiveWriteMemory64, U32, U32, U32, U32 )
// A64 Memory access
//A64OPC(ClearExclusive, Void, )
//A64OPC(SetExclusive, Void, U64, U8 )
//A64OPC(ReadMemory8, U8, U64 )
//A64OPC(ReadMemory16, U16, U64 )
//A64OPC(ReadMemory32, U32, U64 )
//A64OPC(ReadMemory64, U64, U64 )
//A64OPC(ReadMemory128, U128, U64 )
//A64OPC(WriteMemory8, Void, U64, U8 )
//A64OPC(WriteMemory16, Void, U64, U16 )
//A64OPC(WriteMemory32, Void, U64, U32 )
//A64OPC(WriteMemory64, Void, U64, U64 )
//A64OPC(WriteMemory128, Void, U64, U128 )
//A64OPC(ExclusiveWriteMemory8, U32, U64, U8 )
//A64OPC(ExclusiveWriteMemory16, U32, U64, U16 )
//A64OPC(ExclusiveWriteMemory32, U32, U64, U32 )
//A64OPC(ExclusiveWriteMemory64, U32, U64, U64 )
//A64OPC(ExclusiveWriteMemory128, U32, U64, U128 )
// Coprocessor
A32OPC(CoprocInternalOperation, Void, CoprocInfo )
A32OPC(CoprocSendOneWord, Void, CoprocInfo, U32 )
A32OPC(CoprocSendTwoWords, Void, CoprocInfo, U32, U32 )
A32OPC(CoprocGetOneWord, U32, CoprocInfo )
A32OPC(CoprocGetTwoWords, U64, CoprocInfo )
A32OPC(CoprocLoadWords, Void, CoprocInfo, U32 )
A32OPC(CoprocStoreWords, Void, CoprocInfo, U32 )

View File

@ -0,0 +1,89 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include <cstddef>
#include <string>
#ifdef __linux__
#include <cstdio>
#include <cstdlib>
#include <mutex>
#include <sys/types.h>
#include <unistd.h>
#include <fmt/format.h>
#include "common/common_types.h"
namespace Dynarmic::BackendA64 {
namespace {
std::mutex mutex;
std::FILE* file = nullptr;
void OpenFile() {
const char* perf_dir = std::getenv("PERF_BUILDID_DIR");
if (!perf_dir) {
file = nullptr;
return;
}
const pid_t pid = getpid();
const std::string filename = fmt::format("{:s}/perf-{:d}.map", perf_dir, pid);
file = std::fopen(filename.c_str(), "w");
if (!file) {
return;
}
std::setvbuf(file, nullptr, _IONBF, 0);
}
} // anonymous namespace
namespace detail {
void PerfMapRegister(const void* start, const void* end, const std::string& friendly_name) {
std::lock_guard guard{mutex};
if (!file) {
OpenFile();
if (!file) {
return;
}
}
const std::string line = fmt::format("{:016x} {:016x} {:s}\n", reinterpret_cast<u64>(start), reinterpret_cast<u64>(end) - reinterpret_cast<u64>(start), friendly_name);
std::fwrite(line.data(), sizeof *line.data(), line.size(), file);
}
} // namespace detail
void PerfMapClear() {
std::lock_guard guard{mutex};
if (!file) {
return;
}
std::fclose(file);
file = nullptr;
OpenFile();
}
} // namespace Dynarmic::BackendX64
#else
namespace Dynarmic::BackendA64 {
namespace detail {
void PerfMapRegister(const void*, const void*, const std::string&) {}
} // namespace detail
void PerfMapClear() {}
} // namespace Dynarmic::BackendX64
#endif

View File

@ -0,0 +1,27 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <cstddef>
#include <string>
#include "common/cast_util.h"
namespace Dynarmic::BackendA64 {
namespace detail {
void PerfMapRegister(const void* start, const void* end, const std::string& friendly_name);
} // namespace detail
template<typename T>
void PerfMapRegister(T start, const void* end, const std::string& friendly_name) {
detail::PerfMapRegister(Common::BitCast<const void*>(start), end, friendly_name);
}
void PerfMapClear();
} // namespace Dynarmic::BackendX64

View File

@ -0,0 +1,650 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include <algorithm>
#include <numeric>
#include <utility>
#include <fmt/ostream.h>
#include "backend/A64/abi.h"
#include "backend/A64/reg_alloc.h"
#include "common/assert.h"
namespace Dynarmic::BackendA64 {
static u64 ImmediateToU64(const IR::Value& imm) {
switch (imm.GetType()) {
case IR::Type::U1:
return u64(imm.GetU1());
case IR::Type::U8:
return u64(imm.GetU8());
case IR::Type::U16:
return u64(imm.GetU16());
case IR::Type::U32:
return u64(imm.GetU32());
case IR::Type::U64:
return u64(imm.GetU64());
default:
ASSERT_MSG(false, "This should never happen.");
}
}
static bool CanExchange(HostLoc a, HostLoc b) {
return HostLocIsGPR(a) && HostLocIsGPR(b);
}
// Minimum number of bits required to represent a type
static size_t GetBitWidth(IR::Type type) {
switch (type) {
case IR::Type::A32Reg:
case IR::Type::A32ExtReg:
case IR::Type::A64Reg:
case IR::Type::A64Vec:
case IR::Type::CoprocInfo:
case IR::Type::Cond:
case IR::Type::Void:
case IR::Type::Table:
ASSERT_MSG(false, "Type {} cannot be represented at runtime", type);
return 0;
case IR::Type::Opaque:
ASSERT_MSG(false, "Not a concrete type");
return 0;
case IR::Type::U1:
return 8;
case IR::Type::U8:
return 8;
case IR::Type::U16:
return 16;
case IR::Type::U32:
return 32;
case IR::Type::U64:
return 64;
case IR::Type::U128:
return 128;
case IR::Type::NZCVFlags:
return 32; // TODO: Update to 16 when flags optimization is done
}
UNREACHABLE();
return 0;
}
static bool IsValuelessType(IR::Type type) {
switch (type) {
case IR::Type::Table:
return true;
default:
return false;
}
}
bool HostLocInfo::IsLocked() const {
return is_being_used_count > 0;
}
bool HostLocInfo::IsEmpty() const {
return is_being_used_count == 0 && values.empty();
}
bool HostLocInfo::IsLastUse() const {
return is_being_used_count == 0 && current_references == 1 && accumulated_uses + 1 == total_uses;
}
void HostLocInfo::ReadLock() {
ASSERT(!is_scratch);
is_being_used_count++;
}
void HostLocInfo::WriteLock() {
ASSERT(is_being_used_count == 0);
is_being_used_count++;
is_scratch = true;
}
void HostLocInfo::AddArgReference() {
current_references++;
ASSERT(accumulated_uses + current_references <= total_uses);
}
void HostLocInfo::ReleaseOne() {
is_being_used_count--;
is_scratch = false;
if (current_references == 0)
return;
accumulated_uses++;
current_references--;
if (current_references == 0)
ReleaseAll();
}
void HostLocInfo::ReleaseAll() {
accumulated_uses += current_references;
current_references = 0;
ASSERT(total_uses == std::accumulate(values.begin(), values.end(), size_t(0), [](size_t sum, IR::Inst* inst) { return sum + inst->UseCount(); }));
if (total_uses == accumulated_uses) {
values.clear();
accumulated_uses = 0;
total_uses = 0;
max_bit_width = 0;
}
is_being_used_count = 0;
is_scratch = false;
}
bool HostLocInfo::ContainsValue(const IR::Inst* inst) const {
return std::find(values.begin(), values.end(), inst) != values.end();
}
size_t HostLocInfo::GetMaxBitWidth() const {
return max_bit_width;
}
void HostLocInfo::AddValue(IR::Inst* inst) {
values.push_back(inst);
total_uses += inst->UseCount();
max_bit_width = std::max(max_bit_width, GetBitWidth(inst->GetType()));
}
IR::Type Argument::GetType() const {
return value.GetType();
}
bool Argument::IsImmediate() const {
return value.IsImmediate();
}
bool Argument::IsVoid() const {
return GetType() == IR::Type::Void;
}
bool Argument::FitsInImmediateU32() const {
if (!IsImmediate())
return false;
u64 imm = ImmediateToU64(value);
return imm < 0x100000000;
}
bool Argument::FitsInImmediateS32() const {
if (!IsImmediate())
return false;
s64 imm = static_cast<s64>(ImmediateToU64(value));
return -s64(0x80000000) <= imm && imm <= s64(0x7FFFFFFF);
}
bool Argument::GetImmediateU1() const {
return value.GetU1();
}
u8 Argument::GetImmediateU8() const {
u64 imm = ImmediateToU64(value);
ASSERT(imm < 0x100);
return u8(imm);
}
u16 Argument::GetImmediateU16() const {
u64 imm = ImmediateToU64(value);
ASSERT(imm < 0x10000);
return u16(imm);
}
u32 Argument::GetImmediateU32() const {
u64 imm = ImmediateToU64(value);
ASSERT(imm < 0x100000000);
return u32(imm);
}
u64 Argument::GetImmediateS32() const {
ASSERT(FitsInImmediateS32());
u64 imm = ImmediateToU64(value);
return imm;
}
u64 Argument::GetImmediateU64() const {
return ImmediateToU64(value);
}
IR::Cond Argument::GetImmediateCond() const {
ASSERT(IsImmediate() && GetType() == IR::Type::Cond);
return value.GetCond();
}
bool Argument::IsInGpr() const {
if (IsImmediate())
return false;
return HostLocIsGPR(*reg_alloc.ValueLocation(value.GetInst()));
}
bool Argument::IsInFpr() const {
if (IsImmediate())
return false;
return HostLocIsFPR(*reg_alloc.ValueLocation(value.GetInst()));
}
bool Argument::IsInMemory() const {
if (IsImmediate())
return false;
return HostLocIsSpill(*reg_alloc.ValueLocation(value.GetInst()));
}
RegAlloc::ArgumentInfo RegAlloc::GetArgumentInfo(IR::Inst* inst) {
ArgumentInfo ret = {Argument{*this}, Argument{*this}, Argument{*this}, Argument{*this}};
for (size_t i = 0; i < inst->NumArgs(); i++) {
const IR::Value& arg = inst->GetArg(i);
ret[i].value = arg;
if (!arg.IsImmediate() && !IsValuelessType(arg.GetType())) {
ASSERT_MSG(ValueLocation(arg.GetInst()), "argument must already been defined");
LocInfo(*ValueLocation(arg.GetInst())).AddArgReference();
}
}
return ret;
}
Arm64Gen::ARM64Reg RegAlloc::UseGpr(Argument& arg) {
ASSERT(!arg.allocated);
arg.allocated = true;
return HostLocToReg64(UseImpl(arg.value, any_gpr));
}
Arm64Gen::ARM64Reg RegAlloc::UseFpr(Argument& arg) {
ASSERT(!arg.allocated);
arg.allocated = true;
return HostLocToFpr(UseImpl(arg.value, any_fpr));
}
//OpArg RegAlloc::UseOpArg(Argument& arg) {
// return UseGpr(arg);
//}
void RegAlloc::Use(Argument& arg, HostLoc host_loc) {
ASSERT(!arg.allocated);
arg.allocated = true;
UseImpl(arg.value, {host_loc});
}
Arm64Gen::ARM64Reg RegAlloc::UseScratchGpr(Argument& arg) {
ASSERT(!arg.allocated);
arg.allocated = true;
return HostLocToReg64(UseScratchImpl(arg.value, any_gpr));
}
Arm64Gen::ARM64Reg RegAlloc::UseScratchFpr(Argument& arg) {
ASSERT(!arg.allocated);
arg.allocated = true;
return HostLocToFpr(UseScratchImpl(arg.value, any_fpr));
}
void RegAlloc::UseScratch(Argument& arg, HostLoc host_loc) {
ASSERT(!arg.allocated);
arg.allocated = true;
UseScratchImpl(arg.value, {host_loc});
}
void RegAlloc::DefineValue(IR::Inst* inst, const Arm64Gen::ARM64Reg& reg) {
ASSERT(IsVector(reg) || IsGPR(reg));
HostLoc hostloc = static_cast<HostLoc>(DecodeReg(reg) + static_cast<size_t>(IsVector(reg) ? HostLoc::Q0 : HostLoc::X0));
DefineValueImpl(inst, hostloc);
}
void RegAlloc::DefineValue(IR::Inst* inst, Argument& arg) {
ASSERT(!arg.allocated);
arg.allocated = true;
DefineValueImpl(inst, arg.value);
}
void RegAlloc::Release(const Arm64Gen::ARM64Reg& reg) {
ASSERT(IsVector(reg) || IsGPR(reg));
const HostLoc hostloc = static_cast<HostLoc>(DecodeReg(reg) + static_cast<size_t>(IsVector(reg) ? HostLoc::Q0 : HostLoc::X0));
LocInfo(hostloc).ReleaseOne();
}
Arm64Gen::ARM64Reg RegAlloc::ScratchGpr(HostLocList desired_locations) {
return HostLocToReg64(ScratchImpl(desired_locations));
}
Arm64Gen::ARM64Reg RegAlloc::ScratchFpr(HostLocList desired_locations) {
return HostLocToFpr(ScratchImpl(desired_locations));
}
HostLoc RegAlloc::UseImpl(IR::Value use_value, HostLocList desired_locations) {
if (use_value.IsImmediate()) {
return LoadImmediate(use_value, ScratchImpl(desired_locations));
}
const IR::Inst* use_inst = use_value.GetInst();
const HostLoc current_location = *ValueLocation(use_inst);
const size_t max_bit_width = LocInfo(current_location).GetMaxBitWidth();
const bool can_use_current_location = std::find(desired_locations.begin(), desired_locations.end(), current_location) != desired_locations.end();
if (can_use_current_location) {
LocInfo(current_location).ReadLock();
return current_location;
}
if (LocInfo(current_location).IsLocked()) {
return UseScratchImpl(use_value, desired_locations);
}
const HostLoc destination_location = SelectARegister(desired_locations);
if (max_bit_width > HostLocBitWidth(destination_location)) {
return UseScratchImpl(use_value, desired_locations);
} else if (CanExchange(destination_location, current_location)) {
Exchange(destination_location, current_location);
} else {
MoveOutOfTheWay(destination_location);
Move(destination_location, current_location);
}
LocInfo(destination_location).ReadLock();
return destination_location;
}
HostLoc RegAlloc::UseScratchImpl(IR::Value use_value, HostLocList desired_locations) {
if (use_value.IsImmediate()) {
return LoadImmediate(use_value, ScratchImpl(desired_locations));
}
const IR::Inst* use_inst = use_value.GetInst();
const HostLoc current_location = *ValueLocation(use_inst);
const size_t bit_width = GetBitWidth(use_inst->GetType());
const bool can_use_current_location = std::find(desired_locations.begin(), desired_locations.end(), current_location) != desired_locations.end();
if (can_use_current_location && !LocInfo(current_location).IsLocked()) {
if (!LocInfo(current_location).IsLastUse()) {
MoveOutOfTheWay(current_location);
}
LocInfo(current_location).WriteLock();
return current_location;
}
const HostLoc destination_location = SelectARegister(desired_locations);
MoveOutOfTheWay(destination_location);
CopyToScratch(bit_width, destination_location, current_location);
LocInfo(destination_location).WriteLock();
return destination_location;
}
HostLoc RegAlloc::ScratchImpl(HostLocList desired_locations) {
HostLoc location = SelectARegister(desired_locations);
MoveOutOfTheWay(location);
LocInfo(location).WriteLock();
return location;
}
void RegAlloc::HostCall(IR::Inst* result_def, std::optional<Argument::copyable_reference> arg0,
std::optional<Argument::copyable_reference> arg1,
std::optional<Argument::copyable_reference> arg2,
std::optional<Argument::copyable_reference> arg3,
std::optional<Argument::copyable_reference> arg4,
std::optional<Argument::copyable_reference> arg5,
std::optional<Argument::copyable_reference> arg6,
std::optional<Argument::copyable_reference> arg7) {
constexpr size_t args_count = 8;
constexpr std::array<HostLoc, args_count> args_hostloc = { ABI_PARAM1, ABI_PARAM2, ABI_PARAM3, ABI_PARAM4, ABI_PARAM5, ABI_PARAM6, ABI_PARAM7, ABI_PARAM8 };
const std::array<std::optional<Argument::copyable_reference>, args_count> args = {arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7};
static const std::vector<HostLoc> other_caller_save = [args_hostloc]() {
std::vector<HostLoc> ret(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end());
for (auto hostloc : args_hostloc)
ret.erase(std::find(ret.begin(), ret.end(), hostloc));
return ret;
}();
for (size_t i = 0; i < args_count; i++) {
if (args[i]) {
UseScratch(*args[i], args_hostloc[i]);
}
}
for (size_t i = 0; i < args_count; i++) {
if (!args[i]) {
// TODO: Force spill
ScratchGpr({args_hostloc[i]});
}
}
for (HostLoc caller_saved : other_caller_save) {
ScratchImpl({caller_saved});
}
if (result_def) {
DefineValueImpl(result_def, ABI_RETURN);
}
}
void RegAlloc::EndOfAllocScope() {
for (auto& iter : hostloc_info) {
iter.ReleaseAll();
}
}
void RegAlloc::AssertNoMoreUses() {
ASSERT(std::all_of(hostloc_info.begin(), hostloc_info.end(), [](const auto& i) { return i.IsEmpty(); }));
}
HostLoc RegAlloc::SelectARegister(HostLocList desired_locations) const {
std::vector<HostLoc> candidates = desired_locations;
// Find all locations that have not been allocated..
const auto allocated_locs = std::partition(candidates.begin(), candidates.end(), [this](auto loc){
return !this->LocInfo(loc).IsLocked();
});
candidates.erase(allocated_locs, candidates.end());
ASSERT_MSG(!candidates.empty(), "All candidate registers have already been allocated");
// Selects the best location out of the available locations.
// TODO: Actually do LRU or something. Currently we just try to pick something without a value if possible.
std::partition(candidates.begin(), candidates.end(), [this](auto loc){
return this->LocInfo(loc).IsEmpty();
});
return candidates.front();
}
std::optional<HostLoc> RegAlloc::ValueLocation(const IR::Inst* value) const {
for (size_t i = 0; i < hostloc_info.size(); i++)
if (hostloc_info[i].ContainsValue(value))
return static_cast<HostLoc>(i);
return std::nullopt;
}
void RegAlloc::DefineValueImpl(IR::Inst* def_inst, HostLoc host_loc) {
ASSERT_MSG(!ValueLocation(def_inst), "def_inst has already been defined");
LocInfo(host_loc).AddValue(def_inst);
}
void RegAlloc::DefineValueImpl(IR::Inst* def_inst, const IR::Value& use_inst) {
ASSERT_MSG(!ValueLocation(def_inst), "def_inst has already been defined");
if (use_inst.IsImmediate()) {
HostLoc location = ScratchImpl(any_gpr);
DefineValueImpl(def_inst, location);
LoadImmediate(use_inst, location);
return;
}
ASSERT_MSG(ValueLocation(use_inst.GetInst()), "use_inst must already be defined");
HostLoc location = *ValueLocation(use_inst.GetInst());
DefineValueImpl(def_inst, location);
}
HostLoc RegAlloc::LoadImmediate(IR::Value imm, HostLoc host_loc) {
ASSERT_MSG(imm.IsImmediate(), "imm is not an immediate");
if (HostLocIsGPR(host_loc)) {
Arm64Gen::ARM64Reg reg = HostLocToReg64(host_loc);
u64 imm_value = ImmediateToU64(imm);
code.MOVI2R(reg, imm_value);
return host_loc;
}
if (HostLocIsFPR(host_loc)) {
Arm64Gen::ARM64Reg reg = Arm64Gen::EncodeRegToDouble(HostLocToFpr(host_loc));
u64 imm_value = ImmediateToU64(imm);
if (imm_value == 0)
code.fp_emitter.FMOV(reg, 0);
else {
code.EmitPatchLDR(reg, imm_value);
}
return host_loc;
}
UNREACHABLE();
}
void RegAlloc::Move(HostLoc to, HostLoc from) {
const size_t bit_width = LocInfo(from).GetMaxBitWidth();
ASSERT(LocInfo(to).IsEmpty() && !LocInfo(from).IsLocked());
ASSERT(bit_width <= HostLocBitWidth(to));
if (LocInfo(from).IsEmpty()) {
return;
}
EmitMove(bit_width, to, from);
LocInfo(to) = std::exchange(LocInfo(from), {});
}
void RegAlloc::CopyToScratch(size_t bit_width, HostLoc to, HostLoc from) {
ASSERT(LocInfo(to).IsEmpty() && !LocInfo(from).IsEmpty());
EmitMove(bit_width, to, from);
}
void RegAlloc::Exchange(HostLoc a, HostLoc b) {
ASSERT(!LocInfo(a).IsLocked() && !LocInfo(b).IsLocked());
ASSERT(LocInfo(a).GetMaxBitWidth() <= HostLocBitWidth(b));
ASSERT(LocInfo(b).GetMaxBitWidth() <= HostLocBitWidth(a));
if (LocInfo(a).IsEmpty()) {
Move(a, b);
return;
}
if (LocInfo(b).IsEmpty()) {
Move(b, a);
return;
}
EmitExchange(a, b);
std::swap(LocInfo(a), LocInfo(b));
}
void RegAlloc::MoveOutOfTheWay(HostLoc reg) {
ASSERT(!LocInfo(reg).IsLocked());
if (!LocInfo(reg).IsEmpty()) {
SpillRegister(reg);
}
}
void RegAlloc::SpillRegister(HostLoc loc) {
ASSERT_MSG(HostLocIsRegister(loc), "Only registers can be spilled");
ASSERT_MSG(!LocInfo(loc).IsEmpty(), "There is no need to spill unoccupied registers");
ASSERT_MSG(!LocInfo(loc).IsLocked(), "Registers that have been allocated must not be spilt");
HostLoc new_loc = FindFreeSpill();
Move(new_loc, loc);
}
HostLoc RegAlloc::FindFreeSpill() const {
for (size_t i = static_cast<size_t>(HostLoc::FirstSpill); i < hostloc_info.size(); i++) {
HostLoc loc = static_cast<HostLoc>(i);
if (LocInfo(loc).IsEmpty())
return loc;
}
ASSERT_MSG(false, "All spill locations are full");
}
HostLocInfo& RegAlloc::LocInfo(HostLoc loc) {
ASSERT(loc != HostLoc::SP && loc != HostLoc::X28 && loc != HostLoc::X29 && loc != HostLoc::X30);
return hostloc_info[static_cast<size_t>(loc)];
}
const HostLocInfo& RegAlloc::LocInfo(HostLoc loc) const {
ASSERT(loc != HostLoc::SP && loc != HostLoc::X28 && loc != HostLoc::X29 && loc != HostLoc::X30);
return hostloc_info[static_cast<size_t>(loc)];
}
void RegAlloc::EmitMove(size_t bit_width, HostLoc to, HostLoc from) {
if (HostLocIsFPR(to) && HostLocIsFPR(from)) {
// bit_width == 128
//mov(HostLocToFpr(to), HostLocToFpr(from));
ASSERT_FALSE("Unimplemented");
} else if (HostLocIsGPR(to) && HostLocIsGPR(from)) {
ASSERT(bit_width != 128);
if (bit_width == 64) {
code.MOV(HostLocToReg64(to), HostLocToReg64(from));
} else {
code.MOV(DecodeReg(HostLocToReg64(to)), DecodeReg(HostLocToReg64(from)));
}
} else if (HostLocIsFPR(to) && HostLocIsGPR(from)) {
ASSERT(bit_width != 128);
if (bit_width == 64) {
code.fp_emitter.FMOV(EncodeRegToDouble(HostLocToFpr(to)), HostLocToReg64(from));
} else {
code.fp_emitter.FMOV(EncodeRegToSingle(HostLocToFpr(to)), DecodeReg(HostLocToReg64(from)));
}
} else if (HostLocIsGPR(to) && HostLocIsFPR(from)) {
ASSERT(bit_width != 128);
if (bit_width == 64) {
code.fp_emitter.FMOV(HostLocToReg64(to), EncodeRegToDouble(HostLocToFpr(from)));
} else {
code.fp_emitter.FMOV(DecodeReg(HostLocToReg64(to)), EncodeRegToSingle(HostLocToFpr(from)));
}
} else if (HostLocIsFPR(to) && HostLocIsSpill(from)) {
s32 spill_addr = spill_to_addr(from);
// ASSERT(spill_addr.getBit() >= bit_width);
code.fp_emitter.LDR(bit_width, Arm64Gen::INDEX_UNSIGNED, HostLocToFpr(to), Arm64Gen::X28, spill_addr);
} else if (HostLocIsSpill(to) && HostLocIsFPR(from)) {
s32 spill_addr = spill_to_addr(to);
// ASSERT(spill_addr.getBit() >= bit_width);
code.fp_emitter.STR(bit_width, Arm64Gen::INDEX_UNSIGNED, HostLocToFpr(from), Arm64Gen::X28, spill_addr);
} else if (HostLocIsGPR(to) && HostLocIsSpill(from)) {
ASSERT(bit_width != 128);
if (bit_width == 64) {
code.LDR(Arm64Gen::INDEX_UNSIGNED, HostLocToReg64(to), Arm64Gen::X28, spill_to_addr(from));
} else {
code.LDR(Arm64Gen::INDEX_UNSIGNED, DecodeReg(HostLocToReg64(to)), Arm64Gen::X28, spill_to_addr(from));
}
} else if (HostLocIsSpill(to) && HostLocIsGPR(from)) {
ASSERT(bit_width != 128);
if (bit_width == 64) {
code.STR(Arm64Gen::INDEX_UNSIGNED, HostLocToReg64(from), Arm64Gen::X28, spill_to_addr(to));
} else {
code.STR(Arm64Gen::INDEX_UNSIGNED, DecodeReg(HostLocToReg64(from)), Arm64Gen::X28, spill_to_addr(to));
}
} else {
ASSERT_MSG(false, "Invalid RegAlloc::EmitMove");
}
}
void RegAlloc::EmitExchange(HostLoc a, HostLoc b) {
if (HostLocIsGPR(a) && HostLocIsGPR(b)) {
// Is this the best way to do it?
code.EOR(HostLocToReg64(a), HostLocToReg64(a), HostLocToReg64(b));
code.EOR(HostLocToReg64(b), HostLocToReg64(a), HostLocToReg64(b));
code.EOR(HostLocToReg64(a), HostLocToReg64(a), HostLocToReg64(b));
} else if (HostLocIsFPR(a) && HostLocIsFPR(b)) {
ASSERT_MSG(false, "Check your code: Exchanging XMM registers is unnecessary");
} else {
ASSERT_MSG(false, "Invalid RegAlloc::EmitExchange");
}
}
} // namespace Dynarmic::BackendA64

167
src/backend/A64/reg_alloc.h Normal file
View File

@ -0,0 +1,167 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <array>
#include <functional>
#include <utility>
#include <vector>
#include <optional>
#include "backend/A64/block_of_code.h"
#include "backend/A64/hostloc.h"
//#include "backend/A64/oparg.h"
#include "common/common_types.h"
#include "frontend/ir/cond.h"
#include "frontend/ir/microinstruction.h"
#include "frontend/ir/value.h"
namespace Dynarmic::BackendA64 {
class RegAlloc;
struct HostLocInfo {
public:
bool IsLocked() const;
bool IsEmpty() const;
bool IsLastUse() const;
void ReadLock();
void WriteLock();
void AddArgReference();
void ReleaseOne();
void ReleaseAll();
bool ContainsValue(const IR::Inst* inst) const;
size_t GetMaxBitWidth() const;
void AddValue(IR::Inst* inst);
private:
// Current instruction state
size_t is_being_used_count = 0;
bool is_scratch = false;
// Block state
size_t current_references = 0;
size_t accumulated_uses = 0;
size_t total_uses = 0;
// Value state
std::vector<IR::Inst*> values;
size_t max_bit_width = 0;
};
struct Argument {
public:
using copyable_reference = std::reference_wrapper<Argument>;
IR::Type GetType() const;
bool IsImmediate() const;
bool IsVoid() const;
bool FitsInImmediateU32() const;
bool FitsInImmediateS32() const;
bool GetImmediateU1() const;
u8 GetImmediateU8() const;
u16 GetImmediateU16() const;
u32 GetImmediateU32() const;
u64 GetImmediateS32() const;
u64 GetImmediateU64() const;
IR::Cond GetImmediateCond() const;
/// Is this value currently in a GPR?
bool IsInGpr() const;
/// Is this value currently in a FPR?
bool IsInFpr() const;
/// Is this value currently in memory?
bool IsInMemory() const;
private:
friend class RegAlloc;
explicit Argument(RegAlloc& reg_alloc) : reg_alloc(reg_alloc) {}
bool allocated = false;
RegAlloc& reg_alloc;
IR::Value value;
};
class RegAlloc final {
public:
using ArgumentInfo = std::array<Argument, IR::max_arg_count>;
explicit RegAlloc(BlockOfCode& code, size_t num_spills, std::function<u64(HostLoc)> spill_to_addr)
: hostloc_info(NonSpillHostLocCount + num_spills), code(code), spill_to_addr(std::move(spill_to_addr)) {}
ArgumentInfo GetArgumentInfo(IR::Inst* inst);
Arm64Gen::ARM64Reg UseGpr(Argument& arg);
Arm64Gen::ARM64Reg UseFpr(Argument& arg);
//OpArg UseOpArg(Argument& arg);
void Use(Argument& arg, HostLoc host_loc);
Arm64Gen::ARM64Reg UseScratchGpr(Argument& arg);
Arm64Gen::ARM64Reg UseScratchFpr(Argument& arg);
void UseScratch(Argument& arg, HostLoc host_loc);
void DefineValue(IR::Inst* inst, const Arm64Gen::ARM64Reg& reg);
void DefineValue(IR::Inst* inst, Argument& arg);
void Release(const Arm64Gen::ARM64Reg& reg);
Arm64Gen::ARM64Reg ScratchGpr(HostLocList desired_locations = any_gpr);
Arm64Gen::ARM64Reg ScratchFpr(HostLocList desired_locations = any_fpr);
void HostCall(IR::Inst* result_def = nullptr, std::optional<Argument::copyable_reference> arg0 = {},
std::optional<Argument::copyable_reference> arg1 = {},
std::optional<Argument::copyable_reference> arg2 = {},
std::optional<Argument::copyable_reference> arg3 = {},
std::optional<Argument::copyable_reference> arg4 = {},
std::optional<Argument::copyable_reference> arg5 = {},
std::optional<Argument::copyable_reference> arg6 = {},
std::optional<Argument::copyable_reference> arg7 = {});
// TODO: Values in host flags
void EndOfAllocScope();
void AssertNoMoreUses();
private:
friend struct Argument;
HostLoc SelectARegister(HostLocList desired_locations) const;
std::optional<HostLoc> ValueLocation(const IR::Inst* value) const;
HostLoc UseImpl(IR::Value use_value, HostLocList desired_locations);
HostLoc UseScratchImpl(IR::Value use_value, HostLocList desired_locations);
HostLoc ScratchImpl(HostLocList desired_locations);
void DefineValueImpl(IR::Inst* def_inst, HostLoc host_loc);
void DefineValueImpl(IR::Inst* def_inst, const IR::Value& use_inst);
HostLoc LoadImmediate(IR::Value imm, HostLoc reg);
void Move(HostLoc to, HostLoc from);
void CopyToScratch(size_t bit_width, HostLoc to, HostLoc from);
void Exchange(HostLoc a, HostLoc b);
void MoveOutOfTheWay(HostLoc reg);
void SpillRegister(HostLoc loc);
HostLoc FindFreeSpill() const;
std::vector<HostLocInfo> hostloc_info;
HostLocInfo& LocInfo(HostLoc loc);
const HostLocInfo& LocInfo(HostLoc loc) const;
BlockOfCode& code;
std::function<u32(HostLoc)> spill_to_addr;
void EmitMove(size_t bit_width, HostLoc to, HostLoc from);
void EmitExchange(HostLoc a, HostLoc b);
};
} // namespace Dynarmic::BackendA64

View File

@ -479,7 +479,7 @@ void A32EmitX64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) {
}
}
void A32EmitX64::EmitA32SetCpsrNZCV(A32EmitContext& ctx, IR::Inst* inst) {
void A32EmitX64::EmitA32SetCpsrNZCVRaw(A32EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (args[0].IsImmediate()) {
const u32 imm = args[0].GetImmediateU32();
@ -503,6 +503,17 @@ void A32EmitX64::EmitA32SetCpsrNZCV(A32EmitContext& ctx, IR::Inst* inst) {
}
}
void A32EmitX64::EmitA32SetCpsrNZCV(A32EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
code.and_(to_store, 0b11000001'00000001);
code.imul(to_store, to_store, 0b00010000'00100001);
code.shl(to_store, 16);
code.and_(to_store, 0xF0000000);
code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], to_store);
}
void A32EmitX64::EmitA32SetCpsrNZCVQ(A32EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (args[0].IsImmediate()) {

View File

@ -995,8 +995,9 @@ void A64EmitX64::EmitA64ExclusiveReadMemory128(A64EmitContext& ctx, IR::Inst* in
ASSERT(conf.global_monitor != nullptr);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
ctx.reg_alloc.Use(args[0], ABI_PARAM2);
ctx.reg_alloc.EndOfAllocScope();
ctx.reg_alloc.HostCall(nullptr, {}, args[0]);
ctx.reg_alloc.HostCall(nullptr);
code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(1));
code.mov(code.ABI_PARAM1, reinterpret_cast<u64>(&conf));

View File

@ -44,4 +44,9 @@ u8 RecipEstimate(u64 a);
*/
u8 RecipSqrtEstimate(u64 a);
template <typename T>
constexpr bool IsPow2(T imm){
return imm > 0 && (imm & (imm - 1)) == 0;
}
} // namespace Dynarmic::Common

View File

@ -27,7 +27,11 @@ std::vector<ArmMatcher<V>> GetArmDecodeTable() {
std::vector<ArmMatcher<V>> table = {
#define INST(fn, name, bitstring) Decoder::detail::detail<ArmMatcher<V>>::GetMatcher(&V::fn, name, bitstring),
#ifdef ARCHITECTURE_Aarch64
#include "arm_a64.inc"
#else
#include "arm.inc"
#endif
#undef INST
};

View File

@ -0,0 +1,301 @@
// Barrier instructions
//INST(arm_DMB, "DMB", "1111010101111111111100000101oooo") // v7
//INST(arm_DSB, "DSB", "1111010101111111111100000100oooo") // v7
//INST(arm_ISB, "ISB", "1111010101111111111100000110oooo") // v7
// Branch instructions
INST(arm_BLX_imm, "BLX (imm)", "1111101hvvvvvvvvvvvvvvvvvvvvvvvv") // v5
INST(arm_BLX_reg, "BLX (reg)", "cccc000100101111111111110011mmmm") // v5
INST(arm_B, "B", "cccc1010vvvvvvvvvvvvvvvvvvvvvvvv") // all
INST(arm_BL, "BL", "cccc1011vvvvvvvvvvvvvvvvvvvvvvvv") // all
INST(arm_BX, "BX", "cccc000100101111111111110001mmmm") // v4T
INST(arm_BXJ, "BXJ", "cccc000100101111111111110010mmmm") // v5J
// CRC32 instructions
//INST(arm_CRC32, "CRC32", "cccc00010zz0nnnndddd00000100mmmm") // v8
//INST(arm_CRC32C, "CRC32C", "cccc00010zz0nnnndddd00100100mmmm") // v8
// Coprocessor instructions
INST(arm_CDP, "CDP", "cccc1110ooooNNNNDDDDppppooo0MMMM") // v2 (CDP2: v5)
INST(arm_LDC, "LDC", "cccc110pudw1nnnnDDDDppppvvvvvvvv") // v2 (LDC2: v5)
INST(arm_MCR, "MCR", "cccc1110ooo0NNNNttttppppooo1MMMM") // v2 (MCR2: v5)
INST(arm_MCRR, "MCRR", "cccc11000100uuuuttttppppooooMMMM") // v5E (MCRR2: v6)
INST(arm_MRC, "MRC", "cccc1110ooo1NNNNttttppppooo1MMMM") // v2 (MRC2: v5)
INST(arm_MRRC, "MRRC", "cccc11000101uuuuttttppppooooMMMM") // v5E (MRRC2: v6)
INST(arm_STC, "STC", "cccc110pudw0nnnnDDDDppppvvvvvvvv") // v2 (STC2: v5)
// Data Processing instructions
INST(arm_ADC_imm, "ADC (imm)", "cccc0010101Snnnnddddrrrrvvvvvvvv") // all
INST(arm_ADC_reg, "ADC (reg)", "cccc0000101Snnnnddddvvvvvrr0mmmm") // all
INST(arm_ADC_rsr, "ADC (rsr)", "cccc0000101Snnnnddddssss0rr1mmmm") // all
INST(arm_ADD_imm, "ADD (imm)", "cccc0010100Snnnnddddrrrrvvvvvvvv") // all
INST(arm_ADD_reg, "ADD (reg)", "cccc0000100Snnnnddddvvvvvrr0mmmm") // all
INST(arm_ADD_rsr, "ADD (rsr)", "cccc0000100Snnnnddddssss0rr1mmmm") // all
INST(arm_AND_imm, "AND (imm)", "cccc0010000Snnnnddddrrrrvvvvvvvv") // all
INST(arm_AND_reg, "AND (reg)", "cccc0000000Snnnnddddvvvvvrr0mmmm") // all
INST(arm_AND_rsr, "AND (rsr)", "cccc0000000Snnnnddddssss0rr1mmmm") // all
INST(arm_BIC_imm, "BIC (imm)", "cccc0011110Snnnnddddrrrrvvvvvvvv") // all
INST(arm_BIC_reg, "BIC (reg)", "cccc0001110Snnnnddddvvvvvrr0mmmm") // all
INST(arm_BIC_rsr, "BIC (rsr)", "cccc0001110Snnnnddddssss0rr1mmmm") // all
INST(arm_CMN_imm, "CMN (imm)", "cccc00110111nnnn0000rrrrvvvvvvvv") // all
INST(arm_CMN_reg, "CMN (reg)", "cccc00010111nnnn0000vvvvvrr0mmmm") // all
INST(arm_CMN_rsr, "CMN (rsr)", "cccc00010111nnnn0000ssss0rr1mmmm") // all
INST(arm_CMP_imm, "CMP (imm)", "cccc00110101nnnn0000rrrrvvvvvvvv") // all
INST(arm_CMP_reg, "CMP (reg)", "cccc00010101nnnn0000vvvvvrr0mmmm") // all
INST(arm_CMP_rsr, "CMP (rsr)", "cccc00010101nnnn0000ssss0rr1mmmm") // all
INST(arm_EOR_imm, "EOR (imm)", "cccc0010001Snnnnddddrrrrvvvvvvvv") // all
INST(arm_EOR_reg, "EOR (reg)", "cccc0000001Snnnnddddvvvvvrr0mmmm") // all
INST(arm_EOR_rsr, "EOR (rsr)", "cccc0000001Snnnnddddssss0rr1mmmm") // all
INST(arm_MOV_imm, "MOV (imm)", "cccc0011101S0000ddddrrrrvvvvvvvv") // all
INST(arm_MOV_reg, "MOV (reg)", "cccc0001101S0000ddddvvvvvrr0mmmm") // all
INST(arm_MOV_rsr, "MOV (rsr)", "cccc0001101S0000ddddssss0rr1mmmm") // all
INST(arm_MVN_imm, "MVN (imm)", "cccc0011111S0000ddddrrrrvvvvvvvv") // all
INST(arm_MVN_reg, "MVN (reg)", "cccc0001111S0000ddddvvvvvrr0mmmm") // all
INST(arm_MVN_rsr, "MVN (rsr)", "cccc0001111S0000ddddssss0rr1mmmm") // all
INST(arm_ORR_imm, "ORR (imm)", "cccc0011100Snnnnddddrrrrvvvvvvvv") // all
INST(arm_ORR_reg, "ORR (reg)", "cccc0001100Snnnnddddvvvvvrr0mmmm") // all
INST(arm_ORR_rsr, "ORR (rsr)", "cccc0001100Snnnnddddssss0rr1mmmm") // all
INST(arm_RSB_imm, "RSB (imm)", "cccc0010011Snnnnddddrrrrvvvvvvvv") // all
INST(arm_RSB_reg, "RSB (reg)", "cccc0000011Snnnnddddvvvvvrr0mmmm") // all
INST(arm_RSB_rsr, "RSB (rsr)", "cccc0000011Snnnnddddssss0rr1mmmm") // all
INST(arm_RSC_imm, "RSC (imm)", "cccc0010111Snnnnddddrrrrvvvvvvvv") // all
INST(arm_RSC_reg, "RSC (reg)", "cccc0000111Snnnnddddvvvvvrr0mmmm") // all
INST(arm_RSC_rsr, "RSC (rsr)", "cccc0000111Snnnnddddssss0rr1mmmm") // all
INST(arm_SBC_imm, "SBC (imm)", "cccc0010110Snnnnddddrrrrvvvvvvvv") // all
INST(arm_SBC_reg, "SBC (reg)", "cccc0000110Snnnnddddvvvvvrr0mmmm") // all
INST(arm_SBC_rsr, "SBC (rsr)", "cccc0000110Snnnnddddssss0rr1mmmm") // all
INST(arm_SUB_imm, "SUB (imm)", "cccc0010010Snnnnddddrrrrvvvvvvvv") // all
INST(arm_SUB_reg, "SUB (reg)", "cccc0000010Snnnnddddvvvvvrr0mmmm") // all
INST(arm_SUB_rsr, "SUB (rsr)", "cccc0000010Snnnnddddssss0rr1mmmm") // all
INST(arm_TEQ_imm, "TEQ (imm)", "cccc00110011nnnn0000rrrrvvvvvvvv") // all
INST(arm_TEQ_reg, "TEQ (reg)", "cccc00010011nnnn0000vvvvvrr0mmmm") // all
INST(arm_TEQ_rsr, "TEQ (rsr)", "cccc00010011nnnn0000ssss0rr1mmmm") // all
INST(arm_TST_imm, "TST (imm)", "cccc00110001nnnn0000rrrrvvvvvvvv") // all
INST(arm_TST_reg, "TST (reg)", "cccc00010001nnnn0000vvvvvrr0mmmm") // all
INST(arm_TST_rsr, "TST (rsr)", "cccc00010001nnnn0000ssss0rr1mmmm") // all
// Exception Generating instructions
INST(arm_BKPT, "BKPT", "cccc00010010vvvvvvvvvvvv0111vvvv") // v5
INST(arm_SVC, "SVC", "cccc1111vvvvvvvvvvvvvvvvvvvvvvvv") // all
INST(arm_UDF, "UDF", "111001111111------------1111----") // all
// Extension instructions
INST(arm_SXTB, "SXTB", "cccc011010101111ddddrr000111mmmm") // v6
INST(arm_SXTB16, "SXTB16", "cccc011010001111ddddrr000111mmmm") // v6
INST(arm_SXTH, "SXTH", "cccc011010111111ddddrr000111mmmm") // v6
INST(arm_SXTAB, "SXTAB", "cccc01101010nnnnddddrr000111mmmm") // v6
INST(arm_SXTAB16, "SXTAB16", "cccc01101000nnnnddddrr000111mmmm") // v6
INST(arm_SXTAH, "SXTAH", "cccc01101011nnnnddddrr000111mmmm") // v6
INST(arm_UXTB, "UXTB", "cccc011011101111ddddrr000111mmmm") // v6
INST(arm_UXTB16, "UXTB16", "cccc011011001111ddddrr000111mmmm") // v6
INST(arm_UXTH, "UXTH", "cccc011011111111ddddrr000111mmmm") // v6
INST(arm_UXTAB, "UXTAB", "cccc01101110nnnnddddrr000111mmmm") // v6
INST(arm_UXTAB16, "UXTAB16", "cccc01101100nnnnddddrr000111mmmm") // v6
INST(arm_UXTAH, "UXTAH", "cccc01101111nnnnddddrr000111mmmm") // v6
// Hint instructions
INST(arm_PLD_imm, "PLD (imm)", "11110101uz01nnnn1111iiiiiiiiiiii") // v5E for PLD; v7 for PLDW
INST(arm_PLD_reg, "PLD (reg)", "11110111uz01nnnn1111iiiiitt0mmmm") // v5E for PLD; v7 for PLDW
INST(arm_SEV, "SEV", "----0011001000001111000000000100") // v6K
INST(arm_SEVL, "SEVL", "----0011001000001111000000000101") // v8
INST(arm_WFE, "WFE", "----0011001000001111000000000010") // v6K
INST(arm_WFI, "WFI", "----0011001000001111000000000011") // v6K
INST(arm_YIELD, "YIELD", "----0011001000001111000000000001") // v6K
INST(arm_NOP, "Reserved Hint", "----0011001000001111------------")
INST(arm_NOP, "Reserved Hint", "----001100100000111100000000----")
// Synchronization Primitive instructions
INST(arm_CLREX, "CLREX", "11110101011111111111000000011111") // v6K
INST(arm_LDREX, "LDREX", "cccc00011001nnnndddd111110011111") // v6
INST(arm_LDREXB, "LDREXB", "cccc00011101nnnndddd111110011111") // v6K
INST(arm_LDREXD, "LDREXD", "cccc00011011nnnndddd111110011111") // v6K
INST(arm_LDREXH, "LDREXH", "cccc00011111nnnndddd111110011111") // v6K
INST(arm_STREX, "STREX", "cccc00011000nnnndddd11111001mmmm") // v6
INST(arm_STREXB, "STREXB", "cccc00011100nnnndddd11111001mmmm") // v6K
INST(arm_STREXD, "STREXD", "cccc00011010nnnndddd11111001mmmm") // v6K
INST(arm_STREXH, "STREXH", "cccc00011110nnnndddd11111001mmmm") // v6K
INST(arm_SWP, "SWP", "cccc00010000nnnntttt00001001uuuu") // v2S (v6: Deprecated)
INST(arm_SWPB, "SWPB", "cccc00010100nnnntttt00001001uuuu") // v2S (v6: Deprecated)
// Load/Store instructions
INST(arm_LDRBT, "LDRBT (A1)", "----0100-111--------------------")
INST(arm_LDRBT, "LDRBT (A2)", "----0110-111---------------0----")
INST(arm_LDRHT, "LDRHT (A1)", "----0000-111------------1011----")
INST(arm_LDRHT, "LDRHT (A2)", "----0000-011--------00001011----")
INST(arm_LDRSBT, "LDRSBT (A1)", "----0000-111------------1101----")
INST(arm_LDRSBT, "LDRSBT (A2)", "----0000-011--------00001101----")
INST(arm_LDRSHT, "LDRSHT (A1)", "----0000-111------------1111----")
INST(arm_LDRSHT, "LDRSHT (A2)", "----0000-011--------00001111----")
INST(arm_LDRT, "LDRT (A1)", "----0100-011--------------------")
INST(arm_LDRT, "LDRT (A2)", "----0110-011---------------0----")
INST(arm_STRBT, "STRBT (A1)", "----0100-110--------------------")
INST(arm_STRBT, "STRBT (A2)", "----0110-110---------------0----")
INST(arm_STRHT, "STRHT (A1)", "----0000-110------------1011----")
INST(arm_STRHT, "STRHT (A2)", "----0000-010--------00001011----")
INST(arm_STRT, "STRT (A1)", "----0100-010--------------------")
INST(arm_STRT, "STRT (A2)", "----0110-010---------------0----")
INST(arm_LDR_lit, "LDR (lit)", "cccc0101u0011111ttttvvvvvvvvvvvv")
INST(arm_LDR_imm, "LDR (imm)", "cccc010pu0w1nnnnttttvvvvvvvvvvvv")
INST(arm_LDR_reg, "LDR (reg)", "cccc011pu0w1nnnnttttvvvvvrr0mmmm")
INST(arm_LDRB_lit, "LDRB (lit)", "cccc0101u1011111ttttvvvvvvvvvvvv")
INST(arm_LDRB_imm, "LDRB (imm)", "cccc010pu1w1nnnnttttvvvvvvvvvvvv")
INST(arm_LDRB_reg, "LDRB (reg)", "cccc011pu1w1nnnnttttvvvvvrr0mmmm")
INST(arm_LDRD_lit, "LDRD (lit)", "cccc0001u1001111ttttvvvv1101vvvv")
INST(arm_LDRD_imm, "LDRD (imm)", "cccc000pu1w0nnnnttttvvvv1101vvvv") // v5E
INST(arm_LDRD_reg, "LDRD (reg)", "cccc000pu0w0nnnntttt00001101mmmm") // v5E
INST(arm_LDRH_lit, "LDRH (lit)", "cccc000pu1w11111ttttvvvv1011vvvv")
INST(arm_LDRH_imm, "LDRH (imm)", "cccc000pu1w1nnnnttttvvvv1011vvvv")
INST(arm_LDRH_reg, "LDRH (reg)", "cccc000pu0w1nnnntttt00001011mmmm")
INST(arm_LDRSB_lit, "LDRSB (lit)", "cccc0001u1011111ttttvvvv1101vvvv")
INST(arm_LDRSB_imm, "LDRSB (imm)", "cccc000pu1w1nnnnttttvvvv1101vvvv")
INST(arm_LDRSB_reg, "LDRSB (reg)", "cccc000pu0w1nnnntttt00001101mmmm")
INST(arm_LDRSH_lit, "LDRSH (lit)", "cccc0001u1011111ttttvvvv1111vvvv")
INST(arm_LDRSH_imm, "LDRSH (imm)", "cccc000pu1w1nnnnttttvvvv1111vvvv")
INST(arm_LDRSH_reg, "LDRSH (reg)", "cccc000pu0w1nnnntttt00001111mmmm")
INST(arm_STR_imm, "STR (imm)", "cccc010pu0w0nnnnttttvvvvvvvvvvvv")
INST(arm_STR_reg, "STR (reg)", "cccc011pu0w0nnnnttttvvvvvrr0mmmm")
INST(arm_STRB_imm, "STRB (imm)", "cccc010pu1w0nnnnttttvvvvvvvvvvvv")
INST(arm_STRB_reg, "STRB (reg)", "cccc011pu1w0nnnnttttvvvvvrr0mmmm")
INST(arm_STRD_imm, "STRD (imm)", "cccc000pu1w0nnnnttttvvvv1111vvvv") // v5E
INST(arm_STRD_reg, "STRD (reg)", "cccc000pu0w0nnnntttt00001111mmmm") // v5E
INST(arm_STRH_imm, "STRH (imm)", "cccc000pu1w0nnnnttttvvvv1011vvvv")
INST(arm_STRH_reg, "STRH (reg)", "cccc000pu0w0nnnntttt00001011mmmm")
// Load/Store Multiple instructions
INST(arm_LDM, "LDM", "cccc100010w1nnnnxxxxxxxxxxxxxxxx") // all
INST(arm_LDMDA, "LDMDA", "cccc100000w1nnnnxxxxxxxxxxxxxxxx") // all
INST(arm_LDMDB, "LDMDB", "cccc100100w1nnnnxxxxxxxxxxxxxxxx") // all
INST(arm_LDMIB, "LDMIB", "cccc100110w1nnnnxxxxxxxxxxxxxxxx") // all
INST(arm_LDM_usr, "LDM (usr reg)", "----100--101--------------------") // all
INST(arm_LDM_eret, "LDM (exce ret)", "----100--1-1----1---------------") // all
INST(arm_STM, "STM", "cccc100010w0nnnnxxxxxxxxxxxxxxxx") // all
INST(arm_STMDA, "STMDA", "cccc100000w0nnnnxxxxxxxxxxxxxxxx") // all
INST(arm_STMDB, "STMDB", "cccc100100w0nnnnxxxxxxxxxxxxxxxx") // all
INST(arm_STMIB, "STMIB", "cccc100110w0nnnnxxxxxxxxxxxxxxxx") // all
INST(arm_STM_usr, "STM (usr reg)", "----100--100--------------------") // all
// Miscellaneous instructions
INST(arm_BFC, "BFC", "cccc0111110vvvvvddddvvvvv0011111") // v6T2
INST(arm_BFI, "BFI", "cccc0111110vvvvvddddvvvvv001nnnn") // v6T2
INST(arm_CLZ, "CLZ", "cccc000101101111dddd11110001mmmm") // v5
INST(arm_MOVT, "MOVT", "cccc00110100vvvvddddvvvvvvvvvvvv") // v6T2
INST(arm_MOVW, "MOVW", "cccc00110000vvvvddddvvvvvvvvvvvv") // v6T2
INST(arm_NOP, "NOP", "----0011001000001111000000000000") // v6K
INST(arm_SBFX, "SBFX", "cccc0111101wwwwwddddvvvvv101nnnn") // v6T2
INST(arm_SEL, "SEL", "cccc01101000nnnndddd11111011mmmm") // v6
INST(arm_UBFX, "UBFX", "cccc0111111wwwwwddddvvvvv101nnnn") // v6T2
// Unsigned Sum of Absolute Differences instructions
INST(arm_USAD8, "USAD8", "cccc01111000dddd1111mmmm0001nnnn") // v6
INST(arm_USADA8, "USADA8", "cccc01111000ddddaaaammmm0001nnnn") // v6
// Packing instructions
INST(arm_PKHBT, "PKHBT", "cccc01101000nnnnddddvvvvv001mmmm") // v6K
INST(arm_PKHTB, "PKHTB", "cccc01101000nnnnddddvvvvv101mmmm") // v6K
// Reversal instructions
INST(arm_RBIT, "RBIT", "cccc011011111111dddd11110011mmmm") // v6T2
INST(arm_REV, "REV", "cccc011010111111dddd11110011mmmm") // v6
INST(arm_REV16, "REV16", "cccc011010111111dddd11111011mmmm") // v6
INST(arm_REVSH, "REVSH", "cccc011011111111dddd11111011mmmm") // v6
// Saturation instructions
INST(arm_SSAT, "SSAT", "cccc0110101vvvvvddddvvvvvr01nnnn") // v6
INST(arm_SSAT16, "SSAT16", "cccc01101010vvvvdddd11110011nnnn") // v6
INST(arm_USAT, "USAT", "cccc0110111vvvvvddddvvvvvr01nnnn") // v6
INST(arm_USAT16, "USAT16", "cccc01101110vvvvdddd11110011nnnn") // v6
// Divide instructions
INST(arm_SDIV, "SDIV", "cccc01110001dddd1111mmmm0001nnnn") // v7a
INST(arm_UDIV, "UDIV", "cccc01110011dddd1111mmmm0001nnnn") // v7a
// Multiply (Normal) instructions
INST(arm_MLA, "MLA", "cccc0000001Sddddaaaammmm1001nnnn") // v2
INST(arm_MLS, "MLS", "cccc00000110ddddaaaammmm1001nnnn") // v6T2
INST(arm_MUL, "MUL", "cccc0000000Sdddd0000mmmm1001nnnn") // v2
// Multiply (Long) instructions
INST(arm_SMLAL, "SMLAL", "cccc0000111Sddddaaaammmm1001nnnn") // v3M
INST(arm_SMULL, "SMULL", "cccc0000110Sddddaaaammmm1001nnnn") // v3M
INST(arm_UMAAL, "UMAAL", "cccc00000100ddddaaaammmm1001nnnn") // v6
INST(arm_UMLAL, "UMLAL", "cccc0000101Sddddaaaammmm1001nnnn") // v3M
INST(arm_UMULL, "UMULL", "cccc0000100Sddddaaaammmm1001nnnn") // v3M
// Multiply (Halfword) instructions
INST(arm_SMLALxy, "SMLALXY", "cccc00010100ddddaaaammmm1xy0nnnn") // v5xP
INST(arm_SMLAxy, "SMLAXY", "cccc00010000ddddaaaammmm1xy0nnnn") // v5xP
INST(arm_SMULxy, "SMULXY", "cccc00010110dddd0000mmmm1xy0nnnn") // v5xP
// Multiply (Word by Halfword) instructions
INST(arm_SMLAWy, "SMLAWY", "cccc00010010ddddaaaammmm1y00nnnn") // v5xP
INST(arm_SMULWy, "SMULWY", "cccc00010010dddd0000mmmm1y10nnnn") // v5xP
// Multiply (Most Significant Word) instructions
INST(arm_SMMUL, "SMMUL", "cccc01110101dddd1111mmmm00R1nnnn") // v6
INST(arm_SMMLA, "SMMLA", "cccc01110101ddddaaaammmm00R1nnnn") // v6
INST(arm_SMMLS, "SMMLS", "cccc01110101ddddaaaammmm11R1nnnn") // v6
// Multiply (Dual) instructions
INST(arm_SMLAD, "SMLAD", "cccc01110000ddddaaaammmm00M1nnnn") // v6
INST(arm_SMLALD, "SMLALD", "cccc01110100ddddaaaammmm00M1nnnn") // v6
INST(arm_SMLSD, "SMLSD", "cccc01110000ddddaaaammmm01M1nnnn") // v6
INST(arm_SMLSLD, "SMLSLD", "cccc01110100ddddaaaammmm01M1nnnn") // v6
INST(arm_SMUAD, "SMUAD", "cccc01110000dddd1111mmmm00M1nnnn") // v6
INST(arm_SMUSD, "SMUSD", "cccc01110000dddd1111mmmm01M1nnnn") // v6
// Parallel Add/Subtract (Modulo) instructions
INST(arm_SASX, "SASX", "cccc01100001nnnndddd11110011mmmm") // v6
INST(arm_SSAX, "SSAX", "cccc01100001nnnndddd11110101mmmm") // v6
INST(arm_SADD8, "SADD8", "cccc01100001nnnndddd11111001mmmm") // v6
INST(arm_SADD16, "SADD16", "cccc01100001nnnndddd11110001mmmm") // v6
INST(arm_SSUB8, "SSUB8", "cccc01100001nnnndddd11111111mmmm") // v6
INST(arm_SSUB16, "SSUB16", "cccc01100001nnnndddd11110111mmmm") // v6
INST(arm_UADD8, "UADD8", "cccc01100101nnnndddd11111001mmmm") // v6
INST(arm_UADD16, "UADD16", "cccc01100101nnnndddd11110001mmmm") // v6
INST(arm_UASX, "UASX", "cccc01100101nnnndddd11110011mmmm") // v6
INST(arm_USAX, "USAX", "cccc01100101nnnndddd11110101mmmm") // v6
INST(arm_USUB8, "USUB8", "cccc01100101nnnndddd11111111mmmm") // v6
INST(arm_USUB16, "USUB16", "cccc01100101nnnndddd11110111mmmm") // v6
// Parallel Add/Subtract (Saturating) instructions
INST(arm_QADD8, "QADD8", "cccc01100010nnnndddd11111001mmmm") // v6
INST(arm_QADD16, "QADD16", "cccc01100010nnnndddd11110001mmmm") // v6
INST(arm_QASX, "QASX", "cccc01100010nnnndddd11110011mmmm") // v6
INST(arm_QSAX, "QSAX", "cccc01100010nnnndddd11110101mmmm") // v6
INST(arm_QSUB8, "QSUB8", "cccc01100010nnnndddd11111111mmmm") // v6
INST(arm_QSUB16, "QSUB16", "cccc01100010nnnndddd11110111mmmm") // v6
INST(arm_UQADD8, "UQADD8", "cccc01100110nnnndddd11111001mmmm") // v6
INST(arm_UQADD16, "UQADD16", "cccc01100110nnnndddd11110001mmmm") // v6
INST(arm_UQASX, "UQASX", "cccc01100110nnnndddd11110011mmmm") // v6
INST(arm_UQSAX, "UQSAX", "cccc01100110nnnndddd11110101mmmm") // v6
INST(arm_UQSUB8, "UQSUB8", "cccc01100110nnnndddd11111111mmmm") // v6
INST(arm_UQSUB16, "UQSUB16", "cccc01100110nnnndddd11110111mmmm") // v6
// Parallel Add/Subtract (Halving) instructions
INST(arm_SHASX, "SHASX", "cccc01100011nnnndddd11110011mmmm") // v6
INST(arm_SHSAX, "SHSAX", "cccc01100011nnnndddd11110101mmmm") // v6
INST(arm_UHASX, "UHASX", "cccc01100111nnnndddd11110011mmmm") // v6
INST(arm_UHSAX, "UHSAX", "cccc01100111nnnndddd11110101mmmm") // v6
INST(arm_SHADD8, "SHADD8", "cccc01100011nnnndddd11111001mmmm") // v6
INST(arm_SHADD16, "SHADD16", "cccc01100011nnnndddd11110001mmmm") // v6
INST(arm_SHSUB8, "SHSUB8", "cccc01100011nnnndddd11111111mmmm") // v6
INST(arm_SHSUB16, "SHSUB16", "cccc01100011nnnndddd11110111mmmm") // v6
INST(arm_UHADD8, "UHADD8", "cccc01100111nnnndddd11111001mmmm") // v6
INST(arm_UHADD16, "UHADD16", "cccc01100111nnnndddd11110001mmmm") // v6
INST(arm_UHSUB8, "UHSUB8", "cccc01100111nnnndddd11111111mmmm") // v6
INST(arm_UHSUB16, "UHSUB16", "cccc01100111nnnndddd11110111mmmm") // v6
// Saturated Add/Subtract instructions
INST(arm_QADD, "QADD", "cccc00010000nnnndddd00000101mmmm") // v5xP
INST(arm_QSUB, "QSUB", "cccc00010010nnnndddd00000101mmmm") // v5xP
INST(arm_QDADD, "QDADD", "cccc00010100nnnndddd00000101mmmm") // v5xP
INST(arm_QDSUB, "QDSUB", "cccc00010110nnnndddd00000101mmmm") // v5xP
// Status Register Access instructions
INST(arm_CPS, "CPS", "111100010000---00000000---0-----") // v6
INST(arm_SETEND, "SETEND", "1111000100000001000000e000000000") // v6
INST(arm_MRS, "MRS", "cccc000100001111dddd000000000000") // v3
INST(arm_MSR_imm, "MSR (imm)", "cccc00110010mmmm1111rrrrvvvvvvvv") // v3
INST(arm_MSR_reg, "MSR (reg)", "cccc00010010mmmm111100000000nnnn") // v3
INST(arm_RFE, "RFE", "1111100--0-1----0000101000000000") // v6
INST(arm_SRS, "SRS", "1111100--1-0110100000101000-----") // v6

View File

@ -119,3 +119,14 @@
//INST(asimd_VBIC_imm, "VBIC (immediate)", "1111001a1-000bcd----10x10-11efgh") // ASIMD
//INST(asimd_VMVN_imm, "VMVN (immediate)", "1111001a1-000bcd----110x0-11efgh") // ASIMD
//INST(asimd_VMOV_imm, "VMOV (immediate)", "1111001a1-000bcd----11100-11efgh") // ASIMD
// Advanced SIMD load/store structures
//INST(v8_VST_multiple, "VST{1-4} (multiple)", "111101000D00nnnnddddxxxxzzaammmm") // v8
INST(v8_VLD_multiple, "VLD{1-4} (multiple)", "111101000D10nnnnddddxxxxzzaammmm") // v8
INST(arm_UDF, "UNALLOCATED", "111101000--0--------1011--------") // v8
INST(arm_UDF, "UNALLOCATED", "111101000--0--------11----------") // v8
//INST(arm_UDF, "UNALLOCATED", "111101001-00--------11----------") // v8
//INST(v8_VLD_all_lanes, "VLD{1-4} (all lanes)", "111101001D10nnnndddd11nnzzTammmm") // v8
//INST(arm_UDF, "UNALLOCATED", "111101001-10--------1110---1----") // v8
//INST(v8_VST_single, "VST{1-4} (single)", "111101001D00nnnnddddzzNNaaaammmm") // v8
//INST(v8_VLD_single, "VLD{1-4} (single)", "111101001D10nnnnddddzzNNaaaammmm") // v8

View File

@ -99,6 +99,10 @@ void IREmitter::SetCpsr(const IR::U32& value) {
}
void IREmitter::SetCpsrNZCV(const IR::U32& value) {
Inst(Opcode::A32SetCpsrNZCVRaw, value);
}
void IREmitter::SetCpsrNZCV(const IR::NZCV& value) {
Inst(Opcode::A32SetCpsrNZCV, value);
}
@ -183,6 +187,20 @@ void IREmitter::SetExclusive(const IR::U32& vaddr, size_t byte_size) {
Inst(Opcode::A32SetExclusive, vaddr, Imm8(u8(byte_size)));
}
IR::UAny IREmitter::ReadMemory(size_t bitsize, const IR::U32& vaddr) {
switch (bitsize) {
case 8:
return ReadMemory8(vaddr);
case 16:
return ReadMemory16(vaddr);
case 32:
return ReadMemory32(vaddr);
case 64:
return ReadMemory64(vaddr);
}
ASSERT_FALSE("Invalid bitsize");
}
IR::U8 IREmitter::ReadMemory8(const IR::U32& vaddr) {
return Inst<IR::U8>(Opcode::A32ReadMemory8, vaddr);
}
@ -202,6 +220,20 @@ IR::U64 IREmitter::ReadMemory64(const IR::U32& vaddr) {
return current_location.EFlag() ? ByteReverseDual(value) : value;
}
void IREmitter::WriteMemory(size_t bitsize, const IR::U32& vaddr, const IR::UAny& value) {
switch (bitsize) {
case 8:
return WriteMemory8(vaddr, value);
case 16:
return WriteMemory16(vaddr, value);
case 32:
return WriteMemory32(vaddr, value);
case 64:
return WriteMemory64(vaddr, value);
}
ASSERT_FALSE("Invalid bitsize");
}
void IREmitter::WriteMemory8(const IR::U32& vaddr, const IR::U8& value) {
Inst(Opcode::A32WriteMemory8, vaddr, value);
}

View File

@ -47,6 +47,7 @@ public:
IR::U32 GetCpsr();
void SetCpsr(const IR::U32& value);
void SetCpsrNZCV(const IR::U32& value);
void SetCpsrNZCV(const IR::NZCV& value);
void SetCpsrNZCVQ(const IR::U32& value);
void SetCheckBit(const IR::U1& value);
IR::U1 GetCFlag();
@ -70,10 +71,12 @@ public:
void ClearExclusive();
void SetExclusive(const IR::U32& vaddr, size_t byte_size);
IR::UAny ReadMemory(size_t bitsize, const IR::U32& vaddr);
IR::U8 ReadMemory8(const IR::U32& vaddr);
IR::U16 ReadMemory16(const IR::U32& vaddr);
IR::U32 ReadMemory32(const IR::U32& vaddr);
IR::U64 ReadMemory64(const IR::U32& vaddr);
void WriteMemory(size_t bitsize, const IR::U32& vaddr, const IR::UAny& value);
void WriteMemory8(const IR::U32& vaddr, const IR::U8& value);
void WriteMemory16(const IR::U32& vaddr, const IR::U16& value);
void WriteMemory32(const IR::U32& vaddr, const IR::U32& value);

View File

@ -0,0 +1,153 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2020 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include "frontend/A32/translate/impl/translate_arm.h"
#include "common/bit_util.h"
namespace Dynarmic::A32 {
static ExtReg ToExtRegD(size_t base, bool bit) {
return static_cast<ExtReg>(static_cast<size_t>(ExtReg::D0) + base + (bit ? 16 : 0));
}
bool ArmTranslatorVisitor::v8_VLD_multiple(bool D, Reg n, size_t Vd, Imm<4> type, size_t size, size_t align, Reg m) {
size_t nelem, regs, inc;
switch (type.ZeroExtend()) {
case 0b0111: // VLD1 A1
nelem = 1;
regs = 1;
inc = 0;
if (Common::Bit<1>(align)) {
return UndefinedInstruction();
}
break;
case 0b1010: // VLD1 A2
nelem = 1;
regs = 2;
inc = 0;
if (align == 0b11) {
return UndefinedInstruction();
}
break;
case 0b0110: // VLD1 A3
nelem = 1;
regs = 3;
inc = 0;
if (Common::Bit<1>(align)) {
return UndefinedInstruction();
}
break;
case 0b0010: // VLD1 A4
nelem = 1;
regs = 4;
inc = 0;
break;
case 0b1000: // VLD2 A1
nelem = 2;
regs = 1;
inc = 1;
if (size == 0b11 || align == 0b11) {
return UndefinedInstruction();
}
break;
case 0b1001: // VLD2 A1
nelem = 2;
regs = 1;
inc = 2;
if (size == 0b11 || align == 0b11) {
return UndefinedInstruction();
}
break;
case 0b0011: // VLD2 A2
nelem = 2;
regs = 2;
inc = 2;
if (size == 0b11) {
return UndefinedInstruction();
}
break;
case 0b0100: // VLD3
nelem = 3;
regs = 1;
inc = 1;
if (size == 0b11 || Common::Bit<1>(align)) {
return UndefinedInstruction();
}
break;
case 0b0101: // VLD3
nelem = 3;
regs = 1;
inc = 2;
if (size == 0b11 || Common::Bit<1>(align)) {
return UndefinedInstruction();
}
break;
case 0b0000: // VLD4
nelem = 4;
regs = 1;
inc = 1;
if (size == 0b11) {
return UndefinedInstruction();
}
break;
case 0b0001: // VLD4
nelem = 4;
regs = 1;
inc = 2;
if (size == 0b11) {
return UndefinedInstruction();
}
break;
default:
ASSERT_FALSE("Decode error");
}
const ExtReg d = ToExtRegD(Vd, D);
const size_t d_last = RegNumber(d) + inc * (nelem - 1);
if (n == Reg::R15 || d_last + regs > 32) {
return UnpredictableInstruction();
}
[[maybe_unused]] const size_t alignment = align == 0 ? 1 : 4 << align;
const size_t ebytes = 1 << size;
const size_t elements = 8 / ebytes;
const bool wback = m != Reg::R15;
const bool register_index = m != Reg::R15 && m != Reg::R13;
for (size_t r = 0; r < regs; r++) {
for (size_t i = 0; i < nelem; i++) {
const ExtReg ext_reg = d + i * inc + r;
ir.SetExtendedRegister(ext_reg, ir.Imm64(0));
}
}
IR::U32 address = ir.GetRegister(n);
for (size_t r = 0; r < regs; r++) {
for (size_t e = 0; e < elements; e++) {
for (size_t i = 0; i < nelem; i++) {
const ExtReg ext_reg = d + i * inc + r;
const IR::U64 element = ir.ZeroExtendToLong(ir.ReadMemory(ebytes * 8, address));
const IR::U64 shifted_element = ir.LogicalShiftLeft(element, ir.Imm8(e * ebytes * 8));
ir.SetExtendedRegister(ext_reg, ir.Or(ir.GetExtendedRegister(ext_reg), shifted_element));
address = ir.Add(address, ir.Imm32(ebytes));
}
}
}
if (wback) {
if (register_index) {
ir.SetRegister(n, ir.Add(ir.GetRegister(n), ir.GetRegister(m)));
} else {
ir.SetRegister(n, ir.Add(ir.GetRegister(n), ir.Imm32(8 * nelem * regs)));
}
}
return true;
}
} // namespace Dynarmic::A32

View File

@ -41,7 +41,11 @@ bool ArmTranslatorVisitor::arm_SVC(Cond cond, Imm<24> imm24) {
// UDF<c> #<imm16>
bool ArmTranslatorVisitor::arm_UDF() {
#if ARCHITECTURE_x86_64
return UndefinedInstruction();
#else
return InterpretThisInstruction();
#endif
}
} // namespace Dynarmic::A32

View File

@ -428,6 +428,9 @@ struct ArmTranslatorVisitor final {
bool vfp_VSTM_a2(Cond cond, bool p, bool u, bool D, bool w, Reg n, size_t Vd, Imm<8> imm8);
bool vfp_VLDM_a1(Cond cond, bool p, bool u, bool D, bool w, Reg n, size_t Vd, Imm<8> imm8);
bool vfp_VLDM_a2(Cond cond, bool p, bool u, bool D, bool w, Reg n, size_t Vd, Imm<8> imm8);
// Advanced SIMD load/store structures
bool v8_VLD_multiple(bool D, Reg n, size_t Vd, Imm<4> type, size_t sz, size_t align, Reg m);
};
} // namespace Dynarmic::A32

View File

@ -9,6 +9,7 @@
#include "common/assert.h"
#include "frontend/A32/decoder/arm.h"
#include "frontend/A32/decoder/asimd.h"
#include "frontend/A32/decoder/vfp.h"
#include "frontend/A32/location_descriptor.h"
#include "frontend/A32/translate/impl/translate_arm.h"
@ -41,6 +42,8 @@ IR::Block TranslateArm(LocationDescriptor descriptor, MemoryReadCodeFuncType mem
if (const auto vfp_decoder = DecodeVFP<ArmTranslatorVisitor>(arm_instruction)) {
should_continue = vfp_decoder->get().call(visitor, arm_instruction);
} else if (const auto asimd_decoder = DecodeASIMD<ArmTranslatorVisitor>(arm_instruction)) {
should_continue = asimd_decoder->get().call(visitor, arm_instruction);
} else if (const auto decoder = DecodeArm<ArmTranslatorVisitor>(arm_instruction)) {
should_continue = decoder->get().call(visitor, arm_instruction);
} else {
@ -80,6 +83,8 @@ bool TranslateSingleArmInstruction(IR::Block& block, LocationDescriptor descript
bool should_continue = true;
if (const auto vfp_decoder = DecodeVFP<ArmTranslatorVisitor>(arm_instruction)) {
should_continue = vfp_decoder->get().call(visitor, arm_instruction);
} else if (const auto asimd_decoder = DecodeASIMD<ArmTranslatorVisitor>(arm_instruction)) {
should_continue = asimd_decoder->get().call(visitor, arm_instruction);
} else if (const auto decoder = DecodeArm<ArmTranslatorVisitor>(arm_instruction)) {
should_continue = decoder->get().call(visitor, arm_instruction);
} else {

View File

@ -3,9 +3,9 @@
* SPDX-License-Identifier: 0BSD
*/
#include "frontend/A64/translate/impl/impl.h"
#include "common/bit_util.h"
#include "frontend/ir/terminal.h"
#include "frontend/A64/translate/impl/impl.h"
namespace Dynarmic::A64 {

View File

@ -166,6 +166,7 @@ bool Inst::ReadsFromCPSR() const {
bool Inst::WritesToCPSR() const {
switch (op) {
case Opcode::A32SetCpsr:
case Opcode::A32SetCpsrNZCVRaw:
case Opcode::A32SetCpsrNZCV:
case Opcode::A32SetCpsrNZCVQ:
case Opcode::A32SetNFlag:

View File

@ -14,7 +14,8 @@ A32OPC(SetExtendedRegister32, Void, A32E
A32OPC(SetExtendedRegister64, Void, A32ExtReg, U64 )
A32OPC(GetCpsr, U32, )
A32OPC(SetCpsr, Void, U32 )
A32OPC(SetCpsrNZCV, Void, U32 )
A32OPC(SetCpsrNZCVRaw, Void, U32 )
A32OPC(SetCpsrNZCV, Void, NZCV )
A32OPC(SetCpsrNZCVQ, Void, U32 )
A32OPC(GetNFlag, U1, )
A32OPC(SetNFlag, Void, U1 )

View File

@ -0,0 +1,56 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include <array>
#include <boost/variant/get.hpp>
#include "common/assert.h"
#include "common/common_types.h"
#include "dynarmic/A32/config.h"
#include "frontend/A32/location_descriptor.h"
#include "frontend/A32/translate/translate.h"
#include "frontend/ir/basic_block.h"
#include "ir_opt/passes.h"
namespace Dynarmic::Optimization {
void A32MergeInterpretBlocksPass(IR::Block& block, A32::UserCallbacks* cb) {
const auto is_interpret_instruction = [cb](A32::LocationDescriptor location) {
const u32 instruction = cb->MemoryReadCode(location.PC());
IR::Block new_block{location};
A32::TranslateSingleInstruction(new_block, location, instruction);
if (!new_block.Instructions().empty())
return false;
const IR::Terminal terminal = new_block.GetTerminal();
if (auto term = boost::get<IR::Term::Interpret>(&terminal)) {
return term->next == location;
}
return false;
};
IR::Terminal terminal = block.GetTerminal();
auto term = boost::get<IR::Term::Interpret>(&terminal);
if (!term)
return;
A32::LocationDescriptor location{term->next};
size_t num_instructions = 1;
while (is_interpret_instruction(location.AdvancePC(static_cast<int>(num_instructions * 4)))) {
num_instructions++;
}
term->num_instructions = num_instructions;
block.ReplaceTerminal(terminal);
block.CycleCount() += num_instructions - 1;
}
} // namespace Dynarmic::Optimization

View File

@ -22,6 +22,7 @@ namespace Dynarmic::Optimization {
void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb);
void A32GetSetElimination(IR::Block& block);
void A32MergeInterpretBlocksPass(IR::Block& block, A32::UserCallbacks* cb);
void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf);
void A64GetSetElimination(IR::Block& block);
void A64MergeInterpretBlocksPass(IR::Block& block, A64::UserCallbacks* cb);

View File

@ -33,6 +33,14 @@
#include <fmt/format.h>
#include <fmt/ostream.h>
static Dynarmic::A32::UserConfig GetUserConfig(ArmTestEnv* testenv) {
Dynarmic::A32::UserConfig user_config;
user_config.enable_fast_dispatch = false;
user_config.callbacks = testenv;
user_config.fastmem_pointer = reinterpret_cast<void*>(0xFFFFFDDE00000000);
return user_config;
}
namespace {
using namespace Dynarmic;
@ -76,7 +84,12 @@ u32 GenRandomInst(u32 pc, bool is_last_inst) {
} instructions = []{
const std::vector<std::tuple<std::string, const char*>> list {
#define INST(fn, name, bitstring) {#fn, bitstring},
#ifdef ARCHITECTURE_Aarch64
#include "frontend/A32/decoder/arm_a64.inc"
#else
#include "frontend/A32/decoder/arm.inc"
#include "frontend/A32/decoder/asimd.inc"
#endif
#include "frontend/A32/decoder/vfp.inc"
#undef INST
};

View File

@ -199,7 +199,7 @@ void FuzzJitThumb(const size_t instruction_count, const size_t instructions_to_e
}
}
TEST_CASE("Fuzz Thumb instructions set 1", "[JitX64][Thumb]") {
TEST_CASE("Fuzz Thumb instructions set 1", "[JitX64][JitA64][Thumb]") {
const std::array instructions = {
ThumbInstGen("00000xxxxxxxxxxx"), // LSL <Rd>, <Rm>, #<imm5>
ThumbInstGen("00001xxxxxxxxxxx"), // LSR <Rd>, <Rm>, #<imm5>
@ -267,7 +267,7 @@ TEST_CASE("Fuzz Thumb instructions set 1", "[JitX64][Thumb]") {
#endif
}
TEST_CASE("Fuzz Thumb instructions set 2 (affects PC)", "[JitX64][Thumb]") {
TEST_CASE("Fuzz Thumb instructions set 2 (affects PC)", "[JitX64][JitA64][Thumb]") {
const std::array instructions = {
// TODO: We currently can't test BX/BLX as we have
// no way of preventing the unpredictable

View File

@ -425,3 +425,75 @@ TEST_CASE("arm: Test stepping 3", "[arm]") {
REQUIRE(jit.Regs()[15] == 20);
REQUIRE(jit.Cpsr() == 0x000001d0);
}
TEST_CASE("arm: Cleared Q flag", "[arm][A32][JitA64]") {
ArmTestEnv test_env;
A32::Jit jit{GetUserConfig(&test_env)};
// qadd r1, r0, r0
// msr APSR_nzcvq, #0
// qadd r3, r2, r2
// b +#0 (infinite loop)
test_env.code_mem = {
0xe1001050,
0xe328f000,
0xe1023052,
0xeafffffe,
};
jit.Regs() = {
0x7FFFFFFF, // R0
0x80008000, // R1
0x00008000, // R2
0x7f7f7f7f, // R3
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
};
jit.SetCpsr(0x000001d0); // User-mode
test_env.ticks_left = 4;
jit.Run();
REQUIRE(jit.Regs()[0] == 0x7FFFFFFF);
REQUIRE(jit.Regs()[1] == 0x7FFFFFFF);
REQUIRE(jit.Regs()[2] == 0x00008000);
REQUIRE(jit.Regs()[3] == 0x00010000);
REQUIRE(jit.Cpsr() == 0x000001d0);
}
TEST_CASE("arm: Cleared Q flag 2", "[arm][A32][JitA64]") {
ArmTestEnv test_env;
A32::Jit jit{GetUserConfig(&test_env)};
// Because of how we calculate the ge-flag in (A64 backend)sadd8
// and similar instructions, the host's Q flag may set,
// tainting our results in subsequent instructions.
// sadd8 r1, r0, r0
// qadd r3, r2, r2
// b +#0 (infinite loop)
test_env.code_mem = {
0xe6101f90,
0xe1023052,
0xeafffffe,
};
jit.Regs() = {
0x7F007F00, // R0
0x80008000, // R1
0x00008000, // R2
0x7f7f7f7f, // R3
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
};
jit.SetCpsr(0x000001d0); // User-mode
test_env.ticks_left = 4;
jit.Run();
REQUIRE((jit.Cpsr() & (1 << 27)) == 0);
}

View File

@ -7,9 +7,9 @@ add_executable(dynarmic_tests
A32/test_arm_instructions.cpp
A32/test_thumb_instructions.cpp
A32/testenv.h
A64/a64.cpp
A64/testenv.h
cpu_info.cpp
# A64/a64.cpp
# A64/testenv.h
# cpu_info.cpp
fp/FPToFixed.cpp
fp/FPValue.cpp
fp/mantissa_util_tests.cpp
@ -18,20 +18,35 @@ add_executable(dynarmic_tests
rand_int.h
)
if (ARCHITECTURE_x86_64)
target_sources(dynarmic_tests PRIVATE
A64/a64.cpp
A64/testenv.h
cpu_info.cpp
)
endif()
if (DYNARMIC_TESTS_USE_UNICORN)
target_sources(dynarmic_tests PRIVATE
A32/fuzz_arm.cpp
A32/fuzz_thumb.cpp
A64/fuzz_with_unicorn.cpp
A64/misaligned_page_table.cpp
A64/verify_unicorn.cpp
#A64/fuzz_with_unicorn.cpp
#A64/verify_unicorn.cpp
fuzz_util.cpp
fuzz_util.h
unicorn_emu/a32_unicorn.cpp
unicorn_emu/a32_unicorn.h
unicorn_emu/a64_unicorn.cpp
unicorn_emu/a64_unicorn.h
#unicorn_emu/a64_unicorn.cpp
#unicorn_emu/a64_unicorn.h
)
if (ARCHITECTURE_x86_64)
target_sources(dynarmic_tests PRIVATE
A64/fuzz_with_unicorn.cpp
A64/verify_unicorn.cpp
unicorn_emu/a64_unicorn.cpp
unicorn_emu/a64_unicorn.h
)
endif()
target_link_libraries(dynarmic_tests PRIVATE Unicorn::Unicorn)
endif()
@ -43,10 +58,16 @@ include(CreateDirectoryGroups)
create_target_directory_groups(dynarmic_tests)
create_target_directory_groups(dynarmic_print_info)
target_link_libraries(dynarmic_tests PRIVATE dynarmic boost catch fmt mp xbyak)
target_link_libraries(dynarmic_tests PRIVATE dynarmic boost catch fmt mp)
if (ARCHITECTURE_x86_64)
target_link_libraries(dynarmic_tests PRIVATE xbyak)
endif()
target_include_directories(dynarmic_tests PRIVATE . ../src)
target_compile_options(dynarmic_tests PRIVATE ${DYNARMIC_CXX_FLAGS})
target_compile_definitions(dynarmic_tests PRIVATE FMT_USE_USER_DEFINED_LITERALS=0)
target_compile_options(dynarmic_tests PRIVATE -DCATCH_CONFIG_NO_WINDOWS_SEH -DCATCH_CONFIG_NO_POSIX_SIGNALS)
target_link_libraries(dynarmic_print_info PRIVATE dynarmic boost catch fmt mp)
target_include_directories(dynarmic_print_info PRIVATE . ../src)