android: video_core: Add experimental asynchronous GPU option.
This commit is contained in:
parent
5a35c71f84
commit
70bf60ea27
@ -347,13 +347,15 @@ public final class SettingsFragmentPresenter {
|
||||
mView.getActivity().setTitle(R.string.preferences_graphics);
|
||||
|
||||
SettingSection rendererSection = mSettings.getSection(Settings.SECTION_RENDERER);
|
||||
Setting shadersAccurateMul = rendererSection.getSetting(SettingsFile.KEY_SHADERS_ACCURATE_MUL);
|
||||
Setting resolutionFactor = rendererSection.getSetting(SettingsFile.KEY_RESOLUTION_FACTOR);
|
||||
Setting filterMode = rendererSection.getSetting(SettingsFile.KEY_FILTER_MODE);
|
||||
Setting useAsynchronousGpuEmulation = rendererSection.getSetting(SettingsFile.KEY_USE_ASYNCHRONOUS_GPU_EMULATION);
|
||||
Setting shadersAccurateMul = rendererSection.getSetting(SettingsFile.KEY_SHADERS_ACCURATE_MUL);
|
||||
|
||||
sl.add(new CheckBoxSetting(SettingsFile.KEY_SHADERS_ACCURATE_MUL, Settings.SECTION_RENDERER, R.string.shaders_accurate_mul, R.string.shaders_accurate_mul_description, false, shadersAccurateMul));
|
||||
sl.add(new SliderSetting(SettingsFile.KEY_RESOLUTION_FACTOR, Settings.SECTION_RENDERER, R.string.internal_resolution, R.string.internal_resolution_description, 1, 4, "x", 1, resolutionFactor));
|
||||
sl.add(new CheckBoxSetting(SettingsFile.KEY_FILTER_MODE, Settings.SECTION_RENDERER, R.string.linear_filtering, R.string.linear_filtering_description, true, filterMode));
|
||||
sl.add(new CheckBoxSetting(SettingsFile.KEY_USE_ASYNCHRONOUS_GPU_EMULATION, Settings.SECTION_RENDERER, R.string.asynchronous_gpu, R.string.asynchronous_gpu_description, true, useAsynchronousGpuEmulation));
|
||||
sl.add(new CheckBoxSetting(SettingsFile.KEY_SHADERS_ACCURATE_MUL, Settings.SECTION_RENDERER, R.string.shaders_accurate_mul, R.string.shaders_accurate_mul_description, false, shadersAccurateMul));
|
||||
}
|
||||
|
||||
private void addAudioSettings(ArrayList<SettingsItem> sl) {
|
||||
|
@ -53,6 +53,7 @@ public final class SettingsFile {
|
||||
public static final String KEY_FACTOR_3D = "factor_3d";
|
||||
public static final String KEY_FILTER_MODE = "filter_mode";
|
||||
public static final String KEY_TEXTURE_FILTER_NAME = "texture_filter_name";
|
||||
public static final String KEY_USE_ASYNCHRONOUS_GPU_EMULATION = "use_asynchronous_gpu_emulation";
|
||||
|
||||
public static final String KEY_LAYOUT_OPTION = "layout_option";
|
||||
public static final String KEY_SWAP_SCREEN = "swap_screen";
|
||||
|
@ -16,6 +16,8 @@ add_library(main SHARED
|
||||
emu_window/emu_window.h
|
||||
game_info.cpp
|
||||
game_info.h
|
||||
game_settings.cpp
|
||||
game_settings.h
|
||||
id_cache.cpp
|
||||
id_cache.h
|
||||
mic.cpp
|
||||
|
@ -114,6 +114,8 @@ void Config::ReadValues() {
|
||||
Settings::values.use_hw_shader = sdl2_config->GetBoolean("Renderer", "use_hw_shader", true);
|
||||
Settings::values.shaders_accurate_mul =
|
||||
sdl2_config->GetBoolean("Renderer", "shaders_accurate_mul", false);
|
||||
Settings::values.use_asynchronous_gpu_emulation =
|
||||
sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", true);
|
||||
Settings::values.use_shader_jit = sdl2_config->GetBoolean("Renderer", "use_shader_jit", true);
|
||||
Settings::values.resolution_factor =
|
||||
static_cast<u16>(sdl2_config->GetInteger("Renderer", "resolution_factor", 1));
|
||||
|
@ -108,6 +108,10 @@ use_hw_shader =
|
||||
# 0: Off (Default. Faster, but causes issues in some games) 1: On (Slower, but correct)
|
||||
shaders_accurate_mul =
|
||||
|
||||
# Enable asynchronous GPU emulation
|
||||
# 0: Off (Slower, but more accurate) 1: On (Default. Faster, but may cause issues in some games)
|
||||
use_asynchronous_gpu_emulation =
|
||||
|
||||
# Whether to use the Just-In-Time (JIT) compiler for shader emulation
|
||||
# 0: Interpreter (slow), 1 (default): JIT (fast)
|
||||
use_shader_jit =
|
||||
|
192
src/android/app/src/main/jni/game_settings.cpp
Normal file
192
src/android/app/src/main/jni/game_settings.cpp
Normal file
@ -0,0 +1,192 @@
|
||||
// Copyright 2019 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "core/settings.h"
|
||||
|
||||
namespace GameSettings {
|
||||
|
||||
void LoadOverrides(u64 program_id) {
|
||||
Settings::values.gpu_timing_mode_submit_list = Settings::GpuTimingMode::Asynch_2ms;
|
||||
Settings::values.gpu_timing_mode_swap_buffers = Settings::GpuTimingMode::Asynch_8ms;
|
||||
Settings::values.gpu_timing_mode_memory_fill = Settings::GpuTimingMode::Asynch_2ms;
|
||||
Settings::values.gpu_timing_mode_display_transfer = Settings::GpuTimingMode::Synch;
|
||||
Settings::values.gpu_timing_mode_flush = Settings::GpuTimingMode::Skip;
|
||||
Settings::values.gpu_timing_mode_flush_and_invalidate = Settings::GpuTimingMode::Asynch;
|
||||
Settings::values.gpu_timing_mode_invalidate = Settings::GpuTimingMode::Synch;
|
||||
|
||||
switch (program_id) {
|
||||
//// JAP / The Legend of Zelda: A Link Between Worlds
|
||||
// case 0x00040000000EC200:
|
||||
//// USA / The Legend of Zelda: A Link Between Worlds
|
||||
// case 0x00040000000EC300:
|
||||
//// EUR / The Legend of Zelda: A Link Between Worlds
|
||||
// case 0x00040000000EC400:
|
||||
// Settings::values.gpu_timing_mode_submit_list = Settings::GpuTimingMode::Asynch_1ms;
|
||||
// Settings::values.gpu_timing_mode_swap_buffers = Settings::GpuTimingMode::Asynch_2ms;
|
||||
// Settings::values.gpu_timing_mode_memory_fill = Settings::GpuTimingMode::Asynch_1ms;
|
||||
// Settings::values.gpu_timing_mode_display_transfer =
|
||||
// Settings::GpuTimingMode::Asynch_600us; Settings::values.gpu_timing_mode_flush =
|
||||
// Settings::GpuTimingMode::Skip; Settings::values.gpu_timing_mode_flush_and_invalidate =
|
||||
// Settings::GpuTimingMode::Skip; break;
|
||||
|
||||
//// JAP / The Legend of Zelda: Majora's Mask 3D
|
||||
// case 0x00040000000D6E00:
|
||||
//// USA / The Legend of Zelda: Majora's Mask 3D
|
||||
// case 0x0004000000125500:
|
||||
//// EUR / The Legend of Zelda: Majora's Mask 3D
|
||||
// case 0x0004000000125600:
|
||||
// Settings::values.gpu_timing_mode_submit_list = Settings::GpuTimingMode::Asynch_1ms;
|
||||
// Settings::values.gpu_timing_mode_swap_buffers = Settings::GpuTimingMode::Asynch_4ms;
|
||||
// Settings::values.gpu_timing_mode_memory_fill = Settings::GpuTimingMode::Asynch;
|
||||
// Settings::values.gpu_timing_mode_display_transfer = Settings::GpuTimingMode::Asynch;
|
||||
// Settings::values.gpu_timing_mode_flush = Settings::GpuTimingMode::Skip;
|
||||
// Settings::values.gpu_timing_mode_flush_and_invalidate = Settings::GpuTimingMode::Skip;
|
||||
// break;
|
||||
|
||||
// JAP / The Legend of Zelda: Ocarina of Time 3D
|
||||
case 0x0004000000033400:
|
||||
// USA / The Legend of Zelda: Ocarina of Time 3D
|
||||
case 0x0004000000033500:
|
||||
// EUR / The Legend of Zelda: Ocarina of Time 3D
|
||||
case 0x0004000000033600:
|
||||
// KOR / The Legend of Zelda: Ocarina of Time 3D
|
||||
case 0x000400000008F800:
|
||||
// CHI / The Legend of Zelda: Ocarina of Time 3D
|
||||
case 0x000400000008F900:
|
||||
Settings::values.shaders_accurate_mul = true;
|
||||
Settings::values.gpu_timing_mode_submit_list = Settings::GpuTimingMode::Asynch_1ms;
|
||||
Settings::values.gpu_timing_mode_swap_buffers = Settings::GpuTimingMode::Asynch_4ms;
|
||||
Settings::values.gpu_timing_mode_memory_fill = Settings::GpuTimingMode::Asynch;
|
||||
Settings::values.gpu_timing_mode_display_transfer = Settings::GpuTimingMode::Asynch;
|
||||
Settings::values.gpu_timing_mode_flush = Settings::GpuTimingMode::Skip;
|
||||
Settings::values.gpu_timing_mode_flush_and_invalidate = Settings::GpuTimingMode::Skip;
|
||||
break;
|
||||
|
||||
// JAP / Super Mario 3D Land
|
||||
case 0x0004000000054100:
|
||||
// USA / Super Mario 3D Land
|
||||
case 0x0004000000054000:
|
||||
// EUR / Super Mario 3D Land
|
||||
case 0x0004000000053F00:
|
||||
// KOR / Super Mario 3D Land
|
||||
case 0x0004000000089D00:
|
||||
Settings::values.gpu_timing_mode_submit_list = Settings::GpuTimingMode::Asynch_40us;
|
||||
// Settings::values.gpu_timing_mode_swap_buffers = Settings::GpuTimingMode::Asynch_4ms;
|
||||
// Settings::values.gpu_timing_mode_memory_fill = Settings::GpuTimingMode::Asynch_40us;
|
||||
// Settings::values.gpu_timing_mode_display_transfer =
|
||||
// Settings::GpuTimingMode::Asynch_40us; Settings::values.gpu_timing_mode_flush =
|
||||
// Settings::GpuTimingMode::Skip; Settings::values.gpu_timing_mode_flush_and_invalidate =
|
||||
// Settings::GpuTimingMode::Skip;
|
||||
break;
|
||||
|
||||
//// USA / Animal Crossing: New Leaf
|
||||
// case 0x0004000000086300:
|
||||
//// EUR / Animal Crossing: New Leaf
|
||||
// case 0x0004000000086400:
|
||||
// Settings::values.gpu_timing_mode_submit_list = Settings::GpuTimingMode::Asynch_1ms;
|
||||
// Settings::values.gpu_timing_mode_swap_buffers = Settings::GpuTimingMode::Asynch_2ms;
|
||||
// Settings::values.gpu_timing_mode_memory_fill = Settings::GpuTimingMode::Asynch_1ms;
|
||||
// Settings::values.gpu_timing_mode_display_transfer =
|
||||
// Settings::GpuTimingMode::Asynch_600us; Settings::values.gpu_timing_mode_flush =
|
||||
// Settings::GpuTimingMode::Skip; Settings::values.gpu_timing_mode_flush_and_invalidate =
|
||||
// Settings::GpuTimingMode::Skip; break;
|
||||
|
||||
//// USA / Pokemon Omega Ruby
|
||||
// case 0x000400000011C400:
|
||||
//// USA / Pokemon Alpha Sapphire
|
||||
// case 0x000400000011C500:
|
||||
// Settings::values.gpu_timing_mode_submit_list = Settings::GpuTimingMode::Asynch;
|
||||
// Settings::values.gpu_timing_mode_swap_buffers = Settings::GpuTimingMode::Asynch_4ms;
|
||||
// Settings::values.gpu_timing_mode_memory_fill = Settings::GpuTimingMode::Asynch;
|
||||
// Settings::values.gpu_timing_mode_display_transfer = Settings::GpuTimingMode::Asynch;
|
||||
// Settings::values.gpu_timing_mode_flush = Settings::GpuTimingMode::Synch;
|
||||
// Settings::values.gpu_timing_mode_flush_and_invalidate = Settings::GpuTimingMode::Skip;
|
||||
// break;
|
||||
|
||||
//// USA / Pokemon X
|
||||
// case 0x0004000000055D00:
|
||||
//// USA / Pokemon Y
|
||||
// case 0x0004000000055E00:
|
||||
//// USA / Pokemon X Update 1.x
|
||||
// case 0x0004000E00055D00:
|
||||
// Settings::values.gpu_timing_mode_submit_list = Settings::GpuTimingMode::Asynch;
|
||||
// Settings::values.gpu_timing_mode_swap_buffers = Settings::GpuTimingMode::Asynch_4ms;
|
||||
// Settings::values.gpu_timing_mode_memory_fill = Settings::GpuTimingMode::Asynch;
|
||||
// Settings::values.gpu_timing_mode_display_transfer = Settings::GpuTimingMode::Asynch;
|
||||
// Settings::values.gpu_timing_mode_flush = Settings::GpuTimingMode::Synch;
|
||||
// Settings::values.gpu_timing_mode_flush_and_invalidate = Settings::GpuTimingMode::Skip;
|
||||
// break;
|
||||
|
||||
// USA / Pokemon Ultra Sun
|
||||
case 0x00040000001B5000:
|
||||
// USA / Pokemon Ultra Moon
|
||||
case 0x00040000001B5100:
|
||||
// Settings::values.force_separable_shader_fix = true;
|
||||
// Settings::values.gpu_timing_mode_submit_list = Settings::GpuTimingMode::Asynch;
|
||||
// Settings::values.gpu_timing_mode_swap_buffers = Settings::GpuTimingMode::Asynch_4ms;
|
||||
// Settings::values.gpu_timing_mode_memory_fill = Settings::GpuTimingMode::Asynch;
|
||||
// Settings::values.gpu_timing_mode_display_transfer = Settings::GpuTimingMode::Asynch;
|
||||
// Settings::values.gpu_timing_mode_flush = Settings::GpuTimingMode::Skip;
|
||||
// Settings::values.gpu_timing_mode_flush_and_invalidate = Settings::GpuTimingMode::Skip;
|
||||
break;
|
||||
|
||||
//// USA / Kirby: Planet Robobot
|
||||
// case 0x0004000000183600:
|
||||
// Settings::values.gpu_timing_mode_submit_list = Settings::GpuTimingMode::Asynch_1ms;
|
||||
// Settings::values.gpu_timing_mode_swap_buffers = Settings::GpuTimingMode::Asynch_8ms;
|
||||
// Settings::values.gpu_timing_mode_memory_fill = Settings::GpuTimingMode::Asynch_1ms;
|
||||
// Settings::values.gpu_timing_mode_display_transfer = Settings::GpuTimingMode::Synch;
|
||||
// Settings::values.gpu_timing_mode_flush = Settings::GpuTimingMode::Skip;
|
||||
// Settings::values.gpu_timing_mode_flush_and_invalidate = Settings::GpuTimingMode::Skip;
|
||||
// break;
|
||||
|
||||
//// JAP / Mario Kart 7
|
||||
// case 0x0004000000030600:
|
||||
//// USA / Mario Kart 7
|
||||
// case 0x0004000000030800:
|
||||
//// EUR / Mario Kart 7
|
||||
// case 0x0004000000030700:
|
||||
//// CHI / Mario Kart 7
|
||||
// case 0x000400000008B400:
|
||||
// Settings::values.gpu_timing_mode_submit_list = Settings::GpuTimingMode::Asynch_1ms;
|
||||
// Settings::values.gpu_timing_mode_swap_buffers = Settings::GpuTimingMode::Asynch_2ms;
|
||||
// Settings::values.gpu_timing_mode_memory_fill = Settings::GpuTimingMode::Asynch;
|
||||
// Settings::values.gpu_timing_mode_display_transfer = Settings::GpuTimingMode::Asynch;
|
||||
// Settings::values.gpu_timing_mode_flush = Settings::GpuTimingMode::Skip;
|
||||
// Settings::values.gpu_timing_mode_flush_and_invalidate = Settings::GpuTimingMode::Skip;
|
||||
// break;
|
||||
|
||||
//// USA / Super Smash Bros.
|
||||
// case 0x00040000000EDF00:
|
||||
//// EUR / Super Smash Bros.
|
||||
// case 0x00040000000EE000:
|
||||
// Settings::values.gpu_timing_mode_submit_list = Settings::GpuTimingMode::Asynch_2ms;
|
||||
// Settings::values.gpu_timing_mode_swap_buffers = Settings::GpuTimingMode::Asynch_4ms;
|
||||
// Settings::values.gpu_timing_mode_memory_fill = Settings::GpuTimingMode::Asynch;
|
||||
// Settings::values.gpu_timing_mode_display_transfer =
|
||||
// Settings::GpuTimingMode::Asynch_20us; Settings::values.gpu_timing_mode_flush =
|
||||
// Settings::GpuTimingMode::Skip; Settings::values.gpu_timing_mode_flush_and_invalidate =
|
||||
// Settings::GpuTimingMode::Skip; break;
|
||||
|
||||
//// JAP / New Super Mario Bros. 2
|
||||
// case 0x000400000007AD00:
|
||||
//// USA / New Super Mario Bros. 2
|
||||
// case 0x000400000007AE00:
|
||||
//// EUR / New Super Mario Bros. 2
|
||||
// case 0x000400000007AF00:
|
||||
//// CHI / New Super Mario Bros. 2
|
||||
// case 0x00040000000B8A00:
|
||||
//// All / New Super Mario Bros. 2
|
||||
// case 0x0004000000137E00:
|
||||
// Settings::values.gpu_timing_mode_submit_list = Settings::GpuTimingMode::Asynch_2ms;
|
||||
// Settings::values.gpu_timing_mode_swap_buffers = Settings::GpuTimingMode::Asynch_4ms;
|
||||
// Settings::values.gpu_timing_mode_memory_fill = Settings::GpuTimingMode::Asynch;
|
||||
// Settings::values.gpu_timing_mode_display_transfer =
|
||||
// Settings::GpuTimingMode::Asynch_20us; Settings::values.gpu_timing_mode_flush =
|
||||
// Settings::GpuTimingMode::Skip; Settings::values.gpu_timing_mode_flush_and_invalidate =
|
||||
// Settings::GpuTimingMode::Skip; break;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace GameSettings
|
11
src/android/app/src/main/jni/game_settings.h
Normal file
11
src/android/app/src/main/jni/game_settings.h
Normal file
@ -0,0 +1,11 @@
|
||||
// Copyright 2020 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace GameSettings {
|
||||
|
||||
void LoadOverrides(u64 program_id);
|
||||
|
||||
} // namespace GameSettings
|
@ -30,6 +30,7 @@
|
||||
#include "jni/config.h"
|
||||
#include "jni/emu_window/emu_window.h"
|
||||
#include "jni/game_info.h"
|
||||
#include "jni/game_settings.h"
|
||||
#include "jni/id_cache.h"
|
||||
#include "jni/mic.h"
|
||||
#include "jni/native.h"
|
||||
@ -150,6 +151,12 @@ static Core::System::ResultStatus RunCitra(const std::string& filepath) {
|
||||
return load_result;
|
||||
}
|
||||
|
||||
// Replace with game-specific settings
|
||||
u64 program_id{};
|
||||
system.GetAppLoader().ReadProgramId(program_id);
|
||||
GameSettings::LoadOverrides(program_id);
|
||||
Settings::Apply();
|
||||
|
||||
auto& telemetry_session = Core::System::GetInstance().TelemetrySession();
|
||||
telemetry_session.AddField(Telemetry::FieldType::App, "Frontend", "SDL");
|
||||
|
||||
|
@ -225,6 +225,7 @@ System::ResultStatus System::RunLoop(bool tight_loop) {
|
||||
GDBStub::SetCpuStepFlag(false);
|
||||
}
|
||||
|
||||
Service::GSP::Update();
|
||||
HW::Update();
|
||||
Reschedule();
|
||||
|
||||
@ -417,7 +418,7 @@ System::ResultStatus System::Init(Frontend::EmuWindow& emu_window, u32 system_mo
|
||||
video_dumper = std::make_unique<VideoDumper::NullBackend>();
|
||||
#endif
|
||||
|
||||
VideoCore::ResultStatus result = VideoCore::Init(emu_window, *memory);
|
||||
VideoCore::ResultStatus result = VideoCore::Init(*this, emu_window, *memory);
|
||||
if (result != VideoCore::ResultStatus::Success) {
|
||||
switch (result) {
|
||||
case VideoCore::ResultStatus::ErrorGenericDrivers:
|
||||
@ -436,7 +437,7 @@ System::ResultStatus System::Init(Frontend::EmuWindow& emu_window, u32 system_mo
|
||||
return ResultStatus::Success;
|
||||
}
|
||||
|
||||
RendererBase& System::Renderer() {
|
||||
VideoCore::RendererBase& System::Renderer() {
|
||||
return *VideoCore::g_renderer;
|
||||
}
|
||||
|
||||
|
@ -57,7 +57,9 @@ namespace VideoDumper {
|
||||
class Backend;
|
||||
}
|
||||
|
||||
namespace VideoCore {
|
||||
class RendererBase;
|
||||
}
|
||||
|
||||
namespace Core {
|
||||
|
||||
@ -205,7 +207,7 @@ public:
|
||||
return *dsp_core;
|
||||
}
|
||||
|
||||
RendererBase& Renderer();
|
||||
VideoCore::RendererBase& Renderer();
|
||||
|
||||
/**
|
||||
* Gets a reference to the service manager.
|
||||
|
@ -15,7 +15,7 @@ static std::weak_ptr<GSP_GPU> gsp_gpu;
|
||||
void SignalInterrupt(InterruptId interrupt_id) {
|
||||
auto gpu = gsp_gpu.lock();
|
||||
ASSERT(gpu != nullptr);
|
||||
return gpu->SignalInterrupt(interrupt_id);
|
||||
return gpu->SignalInterruptThreadSafe(interrupt_id);
|
||||
}
|
||||
|
||||
void InstallInterfaces(Core::System& system) {
|
||||
@ -31,4 +31,10 @@ void SetGlobalModule(Core::System& system) {
|
||||
gsp_gpu = system.ServiceManager().GetService<GSP_GPU>("gsp::Gpu");
|
||||
}
|
||||
|
||||
void Update() {
|
||||
auto gpu = gsp_gpu.lock();
|
||||
ASSERT(gpu != nullptr);
|
||||
return gpu->Update();
|
||||
}
|
||||
|
||||
} // namespace Service::GSP
|
||||
|
@ -25,4 +25,7 @@ void SignalInterrupt(InterruptId interrupt_id);
|
||||
void InstallInterfaces(Core::System& system);
|
||||
|
||||
void SetGlobalModule(Core::System& system);
|
||||
|
||||
void Update();
|
||||
|
||||
} // namespace Service::GSP
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include <boost/serialization/shared_ptr.hpp>
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/threadsafe_queue.h"
|
||||
#include "core/hle/kernel/event.h"
|
||||
#include "core/hle/kernel/hle_ipc.h"
|
||||
#include "core/hle/result.h"
|
||||
@ -238,6 +239,18 @@ public:
|
||||
*/
|
||||
FrameBufferUpdate* GetFrameBufferInfo(u32 thread_id, u32 screen_index);
|
||||
|
||||
void Update() {
|
||||
while (!interrupt_queue.Empty()) {
|
||||
InterruptId next_interrupt{};
|
||||
interrupt_queue.Pop(next_interrupt);
|
||||
SignalInterrupt(next_interrupt);
|
||||
}
|
||||
}
|
||||
|
||||
void SignalInterruptThreadSafe(InterruptId interrupt_id) {
|
||||
interrupt_queue.Push(interrupt_id);
|
||||
}
|
||||
|
||||
private:
|
||||
/**
|
||||
* Signals that the specified interrupt type has occurred to userland code for the specified GSP
|
||||
@ -456,6 +469,8 @@ private:
|
||||
}
|
||||
|
||||
friend class boost::serialization::access;
|
||||
|
||||
Common::MPSCQueue<InterruptId> interrupt_queue;
|
||||
};
|
||||
|
||||
ResultCode SetBufferSwap(u32 screen_id, const FrameBufferInfo& info);
|
||||
|
@ -9,7 +9,6 @@
|
||||
#include "common/color.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "common/vector_math.h"
|
||||
#include "core/core.h"
|
||||
#include "core/core_timing.h"
|
||||
@ -47,343 +46,6 @@ inline void Read(T& var, const u32 raw_addr) {
|
||||
var = g_regs[addr / 4];
|
||||
}
|
||||
|
||||
static Common::Vec4<u8> DecodePixel(Regs::PixelFormat input_format, const u8* src_pixel) {
|
||||
switch (input_format) {
|
||||
case Regs::PixelFormat::RGBA8:
|
||||
return Color::DecodeRGBA8(src_pixel);
|
||||
|
||||
case Regs::PixelFormat::RGB8:
|
||||
return Color::DecodeRGB8(src_pixel);
|
||||
|
||||
case Regs::PixelFormat::RGB565:
|
||||
return Color::DecodeRGB565(src_pixel);
|
||||
|
||||
case Regs::PixelFormat::RGB5A1:
|
||||
return Color::DecodeRGB5A1(src_pixel);
|
||||
|
||||
case Regs::PixelFormat::RGBA4:
|
||||
return Color::DecodeRGBA4(src_pixel);
|
||||
|
||||
default:
|
||||
LOG_ERROR(HW_GPU, "Unknown source framebuffer format {:x}", static_cast<u32>(input_format));
|
||||
return {0, 0, 0, 0};
|
||||
}
|
||||
}
|
||||
|
||||
MICROPROFILE_DEFINE(GPU_DisplayTransfer, "GPU", "DisplayTransfer", MP_RGB(100, 100, 255));
|
||||
MICROPROFILE_DEFINE(GPU_CmdlistProcessing, "GPU", "Cmdlist Processing", MP_RGB(100, 255, 100));
|
||||
|
||||
static void MemoryFill(const Regs::MemoryFillConfig& config) {
|
||||
const PAddr start_addr = config.GetStartAddress();
|
||||
const PAddr end_addr = config.GetEndAddress();
|
||||
|
||||
// TODO: do hwtest with these cases
|
||||
if (!g_memory->IsValidPhysicalAddress(start_addr)) {
|
||||
LOG_CRITICAL(HW_GPU, "invalid start address {:#010X}", start_addr);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!g_memory->IsValidPhysicalAddress(end_addr)) {
|
||||
LOG_CRITICAL(HW_GPU, "invalid end address {:#010X}", end_addr);
|
||||
return;
|
||||
}
|
||||
|
||||
if (end_addr <= start_addr) {
|
||||
LOG_CRITICAL(HW_GPU, "invalid memory range from {:#010X} to {:#010X}", start_addr,
|
||||
end_addr);
|
||||
return;
|
||||
}
|
||||
|
||||
u8* start = g_memory->GetPhysicalPointer(start_addr);
|
||||
u8* end = g_memory->GetPhysicalPointer(end_addr);
|
||||
|
||||
if (VideoCore::g_renderer->Rasterizer()->AccelerateFill(config))
|
||||
return;
|
||||
|
||||
Memory::RasterizerInvalidateRegion(config.GetStartAddress(),
|
||||
config.GetEndAddress() - config.GetStartAddress());
|
||||
|
||||
if (config.fill_24bit) {
|
||||
// fill with 24-bit values
|
||||
for (u8* ptr = start; ptr < end; ptr += 3) {
|
||||
ptr[0] = config.value_24bit_r;
|
||||
ptr[1] = config.value_24bit_g;
|
||||
ptr[2] = config.value_24bit_b;
|
||||
}
|
||||
} else if (config.fill_32bit) {
|
||||
// fill with 32-bit values
|
||||
if (end > start) {
|
||||
u32 value = config.value_32bit;
|
||||
std::size_t len = (end - start) / sizeof(u32);
|
||||
for (std::size_t i = 0; i < len; ++i)
|
||||
memcpy(&start[i * sizeof(u32)], &value, sizeof(u32));
|
||||
}
|
||||
} else {
|
||||
// fill with 16-bit values
|
||||
u16 value_16bit = config.value_16bit.Value();
|
||||
for (u8* ptr = start; ptr < end; ptr += sizeof(u16))
|
||||
memcpy(ptr, &value_16bit, sizeof(u16));
|
||||
}
|
||||
}
|
||||
|
||||
static void DisplayTransfer(const Regs::DisplayTransferConfig& config) {
|
||||
const PAddr src_addr = config.GetPhysicalInputAddress();
|
||||
const PAddr dst_addr = config.GetPhysicalOutputAddress();
|
||||
|
||||
// TODO: do hwtest with these cases
|
||||
if (!g_memory->IsValidPhysicalAddress(src_addr)) {
|
||||
LOG_CRITICAL(HW_GPU, "invalid input address {:#010X}", src_addr);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!g_memory->IsValidPhysicalAddress(dst_addr)) {
|
||||
LOG_CRITICAL(HW_GPU, "invalid output address {:#010X}", dst_addr);
|
||||
return;
|
||||
}
|
||||
|
||||
if (config.input_width == 0) {
|
||||
LOG_CRITICAL(HW_GPU, "zero input width");
|
||||
return;
|
||||
}
|
||||
|
||||
if (config.input_height == 0) {
|
||||
LOG_CRITICAL(HW_GPU, "zero input height");
|
||||
return;
|
||||
}
|
||||
|
||||
if (config.output_width == 0) {
|
||||
LOG_CRITICAL(HW_GPU, "zero output width");
|
||||
return;
|
||||
}
|
||||
|
||||
if (config.output_height == 0) {
|
||||
LOG_CRITICAL(HW_GPU, "zero output height");
|
||||
return;
|
||||
}
|
||||
|
||||
if (VideoCore::g_renderer->Rasterizer()->AccelerateDisplayTransfer(config))
|
||||
return;
|
||||
|
||||
u8* src_pointer = g_memory->GetPhysicalPointer(src_addr);
|
||||
u8* dst_pointer = g_memory->GetPhysicalPointer(dst_addr);
|
||||
|
||||
if (config.scaling > config.ScaleXY) {
|
||||
LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode {}",
|
||||
config.scaling.Value());
|
||||
UNIMPLEMENTED();
|
||||
return;
|
||||
}
|
||||
|
||||
if (config.input_linear && config.scaling != config.NoScale) {
|
||||
LOG_CRITICAL(HW_GPU, "Scaling is only implemented on tiled input");
|
||||
UNIMPLEMENTED();
|
||||
return;
|
||||
}
|
||||
|
||||
int horizontal_scale = config.scaling != config.NoScale ? 1 : 0;
|
||||
int vertical_scale = config.scaling == config.ScaleXY ? 1 : 0;
|
||||
|
||||
u32 output_width = config.output_width >> horizontal_scale;
|
||||
u32 output_height = config.output_height >> vertical_scale;
|
||||
|
||||
u32 input_size =
|
||||
config.input_width * config.input_height * GPU::Regs::BytesPerPixel(config.input_format);
|
||||
u32 output_size = output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format);
|
||||
|
||||
Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), input_size);
|
||||
Memory::RasterizerInvalidateRegion(config.GetPhysicalOutputAddress(), output_size);
|
||||
|
||||
for (u32 y = 0; y < output_height; ++y) {
|
||||
for (u32 x = 0; x < output_width; ++x) {
|
||||
Common::Vec4<u8> src_color;
|
||||
|
||||
// Calculate the [x,y] position of the input image
|
||||
// based on the current output position and the scale
|
||||
u32 input_x = x << horizontal_scale;
|
||||
u32 input_y = y << vertical_scale;
|
||||
|
||||
u32 output_y;
|
||||
if (config.flip_vertically) {
|
||||
// Flip the y value of the output data,
|
||||
// we do this after calculating the [x,y] position of the input image
|
||||
// to account for the scaling options.
|
||||
output_y = output_height - y - 1;
|
||||
} else {
|
||||
output_y = y;
|
||||
}
|
||||
|
||||
u32 dst_bytes_per_pixel = GPU::Regs::BytesPerPixel(config.output_format);
|
||||
u32 src_bytes_per_pixel = GPU::Regs::BytesPerPixel(config.input_format);
|
||||
u32 src_offset;
|
||||
u32 dst_offset;
|
||||
|
||||
if (config.input_linear) {
|
||||
if (!config.dont_swizzle) {
|
||||
// Interpret the input as linear and the output as tiled
|
||||
u32 coarse_y = output_y & ~7;
|
||||
u32 stride = output_width * dst_bytes_per_pixel;
|
||||
|
||||
src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel;
|
||||
dst_offset = VideoCore::GetMortonOffset(x, output_y, dst_bytes_per_pixel) +
|
||||
coarse_y * stride;
|
||||
} else {
|
||||
// Both input and output are linear
|
||||
src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel;
|
||||
dst_offset = (x + output_y * output_width) * dst_bytes_per_pixel;
|
||||
}
|
||||
} else {
|
||||
if (!config.dont_swizzle) {
|
||||
// Interpret the input as tiled and the output as linear
|
||||
u32 coarse_y = input_y & ~7;
|
||||
u32 stride = config.input_width * src_bytes_per_pixel;
|
||||
|
||||
src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) +
|
||||
coarse_y * stride;
|
||||
dst_offset = (x + output_y * output_width) * dst_bytes_per_pixel;
|
||||
} else {
|
||||
// Both input and output are tiled
|
||||
u32 out_coarse_y = output_y & ~7;
|
||||
u32 out_stride = output_width * dst_bytes_per_pixel;
|
||||
|
||||
u32 in_coarse_y = input_y & ~7;
|
||||
u32 in_stride = config.input_width * src_bytes_per_pixel;
|
||||
|
||||
src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) +
|
||||
in_coarse_y * in_stride;
|
||||
dst_offset = VideoCore::GetMortonOffset(x, output_y, dst_bytes_per_pixel) +
|
||||
out_coarse_y * out_stride;
|
||||
}
|
||||
}
|
||||
|
||||
const u8* src_pixel = src_pointer + src_offset;
|
||||
src_color = DecodePixel(config.input_format, src_pixel);
|
||||
if (config.scaling == config.ScaleX) {
|
||||
Common::Vec4<u8> pixel =
|
||||
DecodePixel(config.input_format, src_pixel + src_bytes_per_pixel);
|
||||
src_color = ((src_color + pixel) / 2).Cast<u8>();
|
||||
} else if (config.scaling == config.ScaleXY) {
|
||||
Common::Vec4<u8> pixel1 =
|
||||
DecodePixel(config.input_format, src_pixel + 1 * src_bytes_per_pixel);
|
||||
Common::Vec4<u8> pixel2 =
|
||||
DecodePixel(config.input_format, src_pixel + 2 * src_bytes_per_pixel);
|
||||
Common::Vec4<u8> pixel3 =
|
||||
DecodePixel(config.input_format, src_pixel + 3 * src_bytes_per_pixel);
|
||||
src_color = (((src_color + pixel1) + (pixel2 + pixel3)) / 4).Cast<u8>();
|
||||
}
|
||||
|
||||
u8* dst_pixel = dst_pointer + dst_offset;
|
||||
switch (config.output_format) {
|
||||
case Regs::PixelFormat::RGBA8:
|
||||
Color::EncodeRGBA8(src_color, dst_pixel);
|
||||
break;
|
||||
|
||||
case Regs::PixelFormat::RGB8:
|
||||
Color::EncodeRGB8(src_color, dst_pixel);
|
||||
break;
|
||||
|
||||
case Regs::PixelFormat::RGB565:
|
||||
Color::EncodeRGB565(src_color, dst_pixel);
|
||||
break;
|
||||
|
||||
case Regs::PixelFormat::RGB5A1:
|
||||
Color::EncodeRGB5A1(src_color, dst_pixel);
|
||||
break;
|
||||
|
||||
case Regs::PixelFormat::RGBA4:
|
||||
Color::EncodeRGBA4(src_color, dst_pixel);
|
||||
break;
|
||||
|
||||
default:
|
||||
LOG_ERROR(HW_GPU, "Unknown destination framebuffer format {:x}",
|
||||
static_cast<u32>(config.output_format.Value()));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void TextureCopy(const Regs::DisplayTransferConfig& config) {
|
||||
const PAddr src_addr = config.GetPhysicalInputAddress();
|
||||
const PAddr dst_addr = config.GetPhysicalOutputAddress();
|
||||
|
||||
// TODO: do hwtest with invalid addresses
|
||||
if (!g_memory->IsValidPhysicalAddress(src_addr)) {
|
||||
LOG_CRITICAL(HW_GPU, "invalid input address {:#010X}", src_addr);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!g_memory->IsValidPhysicalAddress(dst_addr)) {
|
||||
LOG_CRITICAL(HW_GPU, "invalid output address {:#010X}", dst_addr);
|
||||
return;
|
||||
}
|
||||
|
||||
if (VideoCore::g_renderer->Rasterizer()->AccelerateTextureCopy(config))
|
||||
return;
|
||||
|
||||
u8* src_pointer = g_memory->GetPhysicalPointer(src_addr);
|
||||
u8* dst_pointer = g_memory->GetPhysicalPointer(dst_addr);
|
||||
|
||||
u32 remaining_size = Common::AlignDown(config.texture_copy.size, 16);
|
||||
|
||||
if (remaining_size == 0) {
|
||||
LOG_CRITICAL(HW_GPU, "zero size. Real hardware freezes on this.");
|
||||
return;
|
||||
}
|
||||
|
||||
u32 input_gap = config.texture_copy.input_gap * 16;
|
||||
u32 output_gap = config.texture_copy.output_gap * 16;
|
||||
|
||||
// Zero gap means contiguous input/output even if width = 0. To avoid infinite loop below, width
|
||||
// is assigned with the total size if gap = 0.
|
||||
u32 input_width = input_gap == 0 ? remaining_size : config.texture_copy.input_width * 16;
|
||||
u32 output_width = output_gap == 0 ? remaining_size : config.texture_copy.output_width * 16;
|
||||
|
||||
if (input_width == 0) {
|
||||
LOG_CRITICAL(HW_GPU, "zero input width. Real hardware freezes on this.");
|
||||
return;
|
||||
}
|
||||
|
||||
if (output_width == 0) {
|
||||
LOG_CRITICAL(HW_GPU, "zero output width. Real hardware freezes on this.");
|
||||
return;
|
||||
}
|
||||
|
||||
std::size_t contiguous_input_size =
|
||||
config.texture_copy.size / input_width * (input_width + input_gap);
|
||||
Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(),
|
||||
static_cast<u32>(contiguous_input_size));
|
||||
|
||||
std::size_t contiguous_output_size =
|
||||
config.texture_copy.size / output_width * (output_width + output_gap);
|
||||
// Only need to flush output if it has a gap
|
||||
const auto FlushInvalidate_fn = (output_gap != 0) ? Memory::RasterizerFlushAndInvalidateRegion
|
||||
: Memory::RasterizerInvalidateRegion;
|
||||
FlushInvalidate_fn(config.GetPhysicalOutputAddress(), static_cast<u32>(contiguous_output_size));
|
||||
|
||||
u32 remaining_input = input_width;
|
||||
u32 remaining_output = output_width;
|
||||
while (remaining_size > 0) {
|
||||
u32 copy_size = std::min({remaining_input, remaining_output, remaining_size});
|
||||
|
||||
std::memcpy(dst_pointer, src_pointer, copy_size);
|
||||
src_pointer += copy_size;
|
||||
dst_pointer += copy_size;
|
||||
|
||||
remaining_input -= copy_size;
|
||||
remaining_output -= copy_size;
|
||||
remaining_size -= copy_size;
|
||||
|
||||
if (remaining_input == 0) {
|
||||
remaining_input = input_width;
|
||||
src_pointer += input_gap;
|
||||
}
|
||||
if (remaining_output == 0) {
|
||||
remaining_output = output_width;
|
||||
dst_pointer += output_gap;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void Write(u32 addr, const T data) {
|
||||
addr -= HW::VADDR_GPU;
|
||||
@ -403,63 +65,23 @@ inline void Write(u32 addr, const T data) {
|
||||
case GPU_REG_INDEX(memory_fill_config[0].trigger):
|
||||
case GPU_REG_INDEX(memory_fill_config[1].trigger): {
|
||||
const bool is_second_filler = (index != GPU_REG_INDEX(memory_fill_config[0].trigger));
|
||||
auto& config = g_regs.memory_fill_config[is_second_filler];
|
||||
const auto& config = g_regs.memory_fill_config[is_second_filler];
|
||||
|
||||
if (config.trigger) {
|
||||
MemoryFill(config);
|
||||
LOG_TRACE(HW_GPU, "MemoryFill from {:#010X} to {:#010X}", config.GetStartAddress(),
|
||||
config.GetEndAddress());
|
||||
|
||||
// It seems that it won't signal interrupt if "address_start" is zero.
|
||||
// TODO: hwtest this
|
||||
if (config.GetStartAddress() != 0) {
|
||||
if (!is_second_filler) {
|
||||
Service::GSP::SignalInterrupt(Service::GSP::InterruptId::PSC0);
|
||||
} else {
|
||||
Service::GSP::SignalInterrupt(Service::GSP::InterruptId::PSC1);
|
||||
}
|
||||
}
|
||||
|
||||
// Reset "trigger" flag and set the "finish" flag
|
||||
// NOTE: This was confirmed to happen on hardware even if "address_start" is zero.
|
||||
config.trigger.Assign(0);
|
||||
config.finished.Assign(1);
|
||||
LOG_TRACE(HW_GPU, "MemoryFill started from {:#010X} to {:#010X}",
|
||||
config.GetStartAddress(), config.GetEndAddress());
|
||||
VideoCore::MemoryFill(&config, is_second_filler);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case GPU_REG_INDEX(display_transfer_config.trigger): {
|
||||
MICROPROFILE_SCOPE(GPU_DisplayTransfer);
|
||||
|
||||
const auto& config = g_regs.display_transfer_config;
|
||||
if (config.trigger & 1) {
|
||||
if (g_regs.display_transfer_config.trigger & 1) {
|
||||
|
||||
if (Pica::g_debug_context)
|
||||
Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::IncomingDisplayTransfer,
|
||||
nullptr);
|
||||
|
||||
if (config.is_texture_copy) {
|
||||
TextureCopy(config);
|
||||
LOG_TRACE(HW_GPU,
|
||||
"TextureCopy: {:#X} bytes from {:#010X}({}+{})-> "
|
||||
"{:#010X}({}+{}), flags {:#010X}",
|
||||
config.texture_copy.size, config.GetPhysicalInputAddress(),
|
||||
config.texture_copy.input_width * 16, config.texture_copy.input_gap * 16,
|
||||
config.GetPhysicalOutputAddress(), config.texture_copy.output_width * 16,
|
||||
config.texture_copy.output_gap * 16, config.flags);
|
||||
} else {
|
||||
DisplayTransfer(config);
|
||||
LOG_TRACE(HW_GPU,
|
||||
"DisplayTransfer: {:#010X}({}x{})-> "
|
||||
"{:#010X}({}x{}), dst format {:x}, flags {:#010X}",
|
||||
config.GetPhysicalInputAddress(), config.input_width.Value(),
|
||||
config.input_height.Value(), config.GetPhysicalOutputAddress(),
|
||||
config.output_width.Value(), config.output_height.Value(),
|
||||
static_cast<u32>(config.output_format.Value()), config.flags);
|
||||
}
|
||||
|
||||
g_regs.display_transfer_config.trigger = 0;
|
||||
Service::GSP::SignalInterrupt(Service::GSP::InterruptId::PPF);
|
||||
VideoCore::DisplayTransfer(&g_regs.display_transfer_config);
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -468,15 +90,10 @@ inline void Write(u32 addr, const T data) {
|
||||
case GPU_REG_INDEX(command_processor_config.trigger): {
|
||||
const auto& config = g_regs.command_processor_config;
|
||||
if (config.trigger & 1) {
|
||||
MICROPROFILE_SCOPE(GPU_CmdlistProcessing);
|
||||
|
||||
Pica::CommandProcessor::ProcessCommandList(config.GetPhysicalAddress(), config.size);
|
||||
|
||||
g_regs.command_processor_config.trigger = 0;
|
||||
VideoCore::ProcessCommandList(config.GetPhysicalAddress(), config.size);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -504,15 +121,7 @@ template void Write<u8>(u32 addr, const u8 data);
|
||||
|
||||
/// Update hardware
|
||||
static void VBlankCallback(u64 userdata, s64 cycles_late) {
|
||||
VideoCore::g_renderer->SwapBuffers();
|
||||
|
||||
// Signal to GSP that GPU interrupt has occurred
|
||||
// TODO(yuriks): hwtest to determine if PDC0 is for the Top screen and PDC1 for the Sub
|
||||
// screen, or if both use the same interrupts and these two instead determine the
|
||||
// beginning and end of the VBlank period. If needed, split the interrupt firing into
|
||||
// two different intervals.
|
||||
Service::GSP::SignalInterrupt(Service::GSP::InterruptId::PDC0);
|
||||
Service::GSP::SignalInterrupt(Service::GSP::InterruptId::PDC1);
|
||||
VideoCore::SwapBuffers();
|
||||
|
||||
// Reschedule recurrent event
|
||||
Core::System::GetInstance().CoreTiming().ScheduleEvent(frame_ticks - cycles_late, vblank_event);
|
||||
|
@ -579,7 +579,7 @@ void RasterizerFlushRegion(PAddr start, u32 size) {
|
||||
return;
|
||||
}
|
||||
|
||||
VideoCore::g_renderer->Rasterizer()->FlushRegion(start, size);
|
||||
VideoCore::FlushRegion(start, size);
|
||||
}
|
||||
|
||||
void RasterizerInvalidateRegion(PAddr start, u32 size) {
|
||||
@ -587,7 +587,7 @@ void RasterizerInvalidateRegion(PAddr start, u32 size) {
|
||||
return;
|
||||
}
|
||||
|
||||
VideoCore::g_renderer->Rasterizer()->InvalidateRegion(start, size);
|
||||
VideoCore::InvalidateRegion(start, size);
|
||||
}
|
||||
|
||||
void RasterizerFlushAndInvalidateRegion(PAddr start, u32 size) {
|
||||
@ -597,7 +597,7 @@ void RasterizerFlushAndInvalidateRegion(PAddr start, u32 size) {
|
||||
return;
|
||||
}
|
||||
|
||||
VideoCore::g_renderer->Rasterizer()->FlushAndInvalidateRegion(start, size);
|
||||
VideoCore::FlushAndInvalidateRegion(start, size);
|
||||
}
|
||||
|
||||
void RasterizerClearAll(bool flush) {
|
||||
@ -630,16 +630,15 @@ void RasterizerFlushVirtualRegion(VAddr start, u32 size, FlushMode mode) {
|
||||
PAddr physical_start = paddr_region_start + (overlap_start - region_start);
|
||||
u32 overlap_size = overlap_end - overlap_start;
|
||||
|
||||
auto* rasterizer = VideoCore::g_renderer->Rasterizer();
|
||||
switch (mode) {
|
||||
case FlushMode::Flush:
|
||||
rasterizer->FlushRegion(physical_start, overlap_size);
|
||||
VideoCore::FlushRegion(physical_start, overlap_size);
|
||||
break;
|
||||
case FlushMode::Invalidate:
|
||||
rasterizer->InvalidateRegion(physical_start, overlap_size);
|
||||
VideoCore::InvalidateRegion(physical_start, overlap_size);
|
||||
break;
|
||||
case FlushMode::FlushAndInvalidate:
|
||||
rasterizer->FlushAndInvalidateRegion(physical_start, overlap_size);
|
||||
VideoCore::FlushAndInvalidateRegion(physical_start, overlap_size);
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
@ -96,6 +96,7 @@ void LogSettings() {
|
||||
log_setting("Renderer_TextureFilterName", values.texture_filter_name);
|
||||
log_setting("Stereoscopy_Render3d", static_cast<int>(values.render_3d));
|
||||
log_setting("Stereoscopy_Factor3d", values.factor_3d);
|
||||
log_setting("Renderer_UseAsyncGPU", Settings::values.use_asynchronous_gpu_emulation);
|
||||
log_setting("Layout_LayoutOption", static_cast<int>(values.layout_option));
|
||||
log_setting("Layout_SwapScreen", values.swap_screen);
|
||||
log_setting("Layout_UprightScreen", values.upright_screen);
|
||||
|
@ -42,6 +42,27 @@ enum class MicInputType {
|
||||
|
||||
enum class StereoRenderOption { Off, SideBySide, Anaglyph, Interlaced, ReverseInterlaced };
|
||||
|
||||
enum class GpuTimingMode {
|
||||
Skip,
|
||||
Synch,
|
||||
Asynch,
|
||||
Asynch_10us,
|
||||
Asynch_20us,
|
||||
Asynch_40us,
|
||||
Asynch_60us,
|
||||
Asynch_80us,
|
||||
Asynch_100us,
|
||||
Asynch_200us,
|
||||
Asynch_400us,
|
||||
Asynch_600us,
|
||||
Asynch_800us,
|
||||
Asynch_1ms,
|
||||
Asynch_2ms,
|
||||
Asynch_4ms,
|
||||
Asynch_6ms,
|
||||
Asynch_8ms,
|
||||
};
|
||||
|
||||
namespace NativeButton {
|
||||
enum Values {
|
||||
A,
|
||||
@ -197,6 +218,16 @@ struct Values {
|
||||
|
||||
bool use_vsync_new;
|
||||
|
||||
// Asynchronous GPU parameters
|
||||
bool use_asynchronous_gpu_emulation{};
|
||||
GpuTimingMode gpu_timing_mode_submit_list{GpuTimingMode::Synch};
|
||||
GpuTimingMode gpu_timing_mode_swap_buffers{GpuTimingMode::Synch};
|
||||
GpuTimingMode gpu_timing_mode_memory_fill{GpuTimingMode::Synch};
|
||||
GpuTimingMode gpu_timing_mode_display_transfer{GpuTimingMode::Synch};
|
||||
GpuTimingMode gpu_timing_mode_flush{GpuTimingMode::Synch};
|
||||
GpuTimingMode gpu_timing_mode_flush_and_invalidate{GpuTimingMode::Synch};
|
||||
GpuTimingMode gpu_timing_mode_invalidate{GpuTimingMode::Synch};
|
||||
|
||||
// Audio
|
||||
bool enable_dsp_lle;
|
||||
bool enable_dsp_lle_multithread;
|
||||
|
@ -6,6 +6,10 @@ add_library(video_core STATIC
|
||||
geometry_pipeline.cpp
|
||||
geometry_pipeline.h
|
||||
gpu_debugger.h
|
||||
gpu.cpp
|
||||
gpu.h
|
||||
gpu_thread.cpp
|
||||
gpu_thread.h
|
||||
pica.cpp
|
||||
pica.h
|
||||
pica_state.h
|
||||
|
@ -7,16 +7,20 @@
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/color.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "common/vector_math.h"
|
||||
#include "core/hle/lock.h"
|
||||
#include "core/hle/service/gsp/gsp.h"
|
||||
#include "core/hw/gpu.h"
|
||||
#include "core/memory.h"
|
||||
#include "core/tracer/recorder.h"
|
||||
#include "video_core/command_processor.h"
|
||||
#include "video_core/debug_utils/debug_utils.h"
|
||||
#include "video_core/pica.h"
|
||||
#include "video_core/pica_state.h"
|
||||
#include "video_core/pica_types.h"
|
||||
#include "video_core/primitive_assembly.h"
|
||||
@ -26,9 +30,16 @@
|
||||
#include "video_core/regs_texturing.h"
|
||||
#include "video_core/renderer_base.h"
|
||||
#include "video_core/shader/shader.h"
|
||||
#include "video_core/utils.h"
|
||||
#include "video_core/vertex_loader.h"
|
||||
#include "video_core/video_core.h"
|
||||
|
||||
MICROPROFILE_DEFINE(GPU_Drawing, "GPU", "Drawing", MP_RGB(50, 50, 240));
|
||||
MICROPROFILE_DEFINE(GPU_MemoryFill, "GPU", "MemoryFill", MP_RGB(100, 100, 255));
|
||||
MICROPROFILE_DEFINE(GPU_TextureCopy, "GPU", "Texture Copy", MP_RGB(100, 100, 255));
|
||||
MICROPROFILE_DEFINE(GPU_DisplayTransfer, "GPU", "DisplayTransfer", MP_RGB(100, 100, 255));
|
||||
MICROPROFILE_DEFINE(GPU_CmdlistProcessing, "GPU", "Cmdlist Processing", MP_RGB(100, 255, 100));
|
||||
|
||||
namespace Pica::CommandProcessor {
|
||||
|
||||
// Expand a 4-bit mask to 4-byte mask, e.g. 0b0101 -> 0x00FF00FF
|
||||
@ -37,8 +48,6 @@ constexpr std::array<u32, 16> expand_bits_to_bytes{
|
||||
0xff000000, 0xff0000ff, 0xff00ff00, 0xff00ffff, 0xffff0000, 0xffff00ff, 0xffffff00, 0xffffffff,
|
||||
};
|
||||
|
||||
MICROPROFILE_DEFINE(GPU_Drawing, "GPU", "Drawing", MP_RGB(50, 50, 240));
|
||||
|
||||
static const char* GetShaderSetupTypeName(Shader::ShaderSetup& setup) {
|
||||
if (&setup == &g_state.vs) {
|
||||
return "vertex shader";
|
||||
@ -263,10 +272,12 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
|
||||
case PICA_REG_INDEX(pipeline.command_buffer.trigger[1]): {
|
||||
unsigned index =
|
||||
static_cast<unsigned>(id - PICA_REG_INDEX(pipeline.command_buffer.trigger[0]));
|
||||
u32* head_ptr = (u32*)VideoCore::g_memory->GetPhysicalPointer(
|
||||
|
||||
u32* start = (u32*)VideoCore::g_memory->GetPhysicalPointer(
|
||||
regs.pipeline.command_buffer.GetPhysicalAddress(index));
|
||||
g_state.cmd_list.head_ptr = g_state.cmd_list.current_ptr = head_ptr;
|
||||
g_state.cmd_list.length = regs.pipeline.command_buffer.GetSize(index) / sizeof(u32);
|
||||
auto& cmd_list = g_state.cmd_list;
|
||||
cmd_list.head_ptr = cmd_list.current_ptr = start;
|
||||
cmd_list.length = regs.pipeline.command_buffer.GetSize(index) / sizeof(u32);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -675,4 +686,408 @@ void ProcessCommandList(PAddr list, u32 size) {
|
||||
}
|
||||
}
|
||||
|
||||
static Common::Vec4<u8> DecodePixel(GPU::Regs::PixelFormat input_format, const u8* src_pixel) {
|
||||
switch (input_format) {
|
||||
case GPU::Regs::PixelFormat::RGBA8:
|
||||
return Color::DecodeRGBA8(src_pixel);
|
||||
|
||||
case GPU::Regs::PixelFormat::RGB8:
|
||||
return Color::DecodeRGB8(src_pixel);
|
||||
|
||||
case GPU::Regs::PixelFormat::RGB565:
|
||||
return Color::DecodeRGB565(src_pixel);
|
||||
|
||||
case GPU::Regs::PixelFormat::RGB5A1:
|
||||
return Color::DecodeRGB5A1(src_pixel);
|
||||
|
||||
case GPU::Regs::PixelFormat::RGBA4:
|
||||
return Color::DecodeRGBA4(src_pixel);
|
||||
|
||||
default:
|
||||
LOG_ERROR(HW_GPU, "Unknown source framebuffer format {:x}", static_cast<u32>(input_format));
|
||||
return {0, 0, 0, 0};
|
||||
}
|
||||
}
|
||||
|
||||
void ProcessMemoryFill(const GPU::Regs::MemoryFillConfig& config) {
|
||||
MICROPROFILE_SCOPE(GPU_MemoryFill);
|
||||
const PAddr start_addr = config.GetStartAddress();
|
||||
const PAddr end_addr = config.GetEndAddress();
|
||||
|
||||
// TODO: do hwtest with these cases
|
||||
if (!VideoCore::g_memory->IsValidPhysicalAddress(start_addr)) {
|
||||
LOG_CRITICAL(HW_GPU, "invalid start address {:#010X}", start_addr);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!VideoCore::g_memory->IsValidPhysicalAddress(end_addr)) {
|
||||
LOG_CRITICAL(HW_GPU, "invalid end address {:#010X}", end_addr);
|
||||
return;
|
||||
}
|
||||
|
||||
if (end_addr <= start_addr) {
|
||||
LOG_CRITICAL(HW_GPU, "invalid memory range from {:#010X} to {:#010X}", start_addr,
|
||||
end_addr);
|
||||
return;
|
||||
}
|
||||
|
||||
u8* start = VideoCore::g_memory->GetPhysicalPointer(start_addr);
|
||||
u8* end = VideoCore::g_memory->GetPhysicalPointer(end_addr);
|
||||
|
||||
if (VideoCore::g_renderer->Rasterizer()->AccelerateFill(config))
|
||||
return;
|
||||
|
||||
Memory::RasterizerInvalidateRegion(config.GetStartAddress(),
|
||||
config.GetEndAddress() - config.GetStartAddress());
|
||||
|
||||
if (config.fill_24bit) {
|
||||
// fill with 24-bit values
|
||||
for (u8* ptr = start; ptr < end; ptr += 3) {
|
||||
ptr[0] = config.value_24bit_r;
|
||||
ptr[1] = config.value_24bit_g;
|
||||
ptr[2] = config.value_24bit_b;
|
||||
}
|
||||
} else if (config.fill_32bit) {
|
||||
// fill with 32-bit values
|
||||
if (end > start) {
|
||||
u32 value = config.value_32bit;
|
||||
std::size_t len = (end - start) / sizeof(u32);
|
||||
for (std::size_t i = 0; i < len; ++i)
|
||||
memcpy(&start[i * sizeof(u32)], &value, sizeof(u32));
|
||||
}
|
||||
} else {
|
||||
// fill with 16-bit values
|
||||
u16 value_16bit = config.value_16bit.Value();
|
||||
for (u8* ptr = start; ptr < end; ptr += sizeof(u16))
|
||||
memcpy(ptr, &value_16bit, sizeof(u16));
|
||||
}
|
||||
}
|
||||
|
||||
static void DisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) {
|
||||
MICROPROFILE_SCOPE(GPU_DisplayTransfer);
|
||||
const PAddr src_addr = config.GetPhysicalInputAddress();
|
||||
const PAddr dst_addr = config.GetPhysicalOutputAddress();
|
||||
|
||||
// TODO: do hwtest with these cases
|
||||
if (!VideoCore::g_memory->IsValidPhysicalAddress(src_addr)) {
|
||||
LOG_CRITICAL(HW_GPU, "invalid input address {:#010X}", src_addr);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!VideoCore::g_memory->IsValidPhysicalAddress(dst_addr)) {
|
||||
LOG_CRITICAL(HW_GPU, "invalid output address {:#010X}", dst_addr);
|
||||
return;
|
||||
}
|
||||
|
||||
if (config.input_width == 0) {
|
||||
LOG_CRITICAL(HW_GPU, "zero input width");
|
||||
return;
|
||||
}
|
||||
|
||||
if (config.input_height == 0) {
|
||||
LOG_CRITICAL(HW_GPU, "zero input height");
|
||||
return;
|
||||
}
|
||||
|
||||
if (config.output_width == 0) {
|
||||
LOG_CRITICAL(HW_GPU, "zero output width");
|
||||
return;
|
||||
}
|
||||
|
||||
if (config.output_height == 0) {
|
||||
LOG_CRITICAL(HW_GPU, "zero output height");
|
||||
return;
|
||||
}
|
||||
|
||||
if (VideoCore::g_renderer->Rasterizer()->AccelerateDisplayTransfer(config))
|
||||
return;
|
||||
|
||||
u8* src_pointer = VideoCore::g_memory->GetPhysicalPointer(src_addr);
|
||||
u8* dst_pointer = VideoCore::g_memory->GetPhysicalPointer(dst_addr);
|
||||
|
||||
if (config.scaling > config.ScaleXY) {
|
||||
LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode {}",
|
||||
config.scaling.Value());
|
||||
UNIMPLEMENTED();
|
||||
return;
|
||||
}
|
||||
|
||||
if (config.input_linear && config.scaling != config.NoScale) {
|
||||
LOG_CRITICAL(HW_GPU, "Scaling is only implemented on tiled input");
|
||||
UNIMPLEMENTED();
|
||||
return;
|
||||
}
|
||||
|
||||
int horizontal_scale = config.scaling != config.NoScale ? 1 : 0;
|
||||
int vertical_scale = config.scaling == config.ScaleXY ? 1 : 0;
|
||||
|
||||
u32 output_width = config.output_width >> horizontal_scale;
|
||||
u32 output_height = config.output_height >> vertical_scale;
|
||||
|
||||
u32 input_size =
|
||||
config.input_width * config.input_height * GPU::Regs::BytesPerPixel(config.input_format);
|
||||
u32 output_size = output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format);
|
||||
|
||||
Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), input_size);
|
||||
Memory::RasterizerInvalidateRegion(config.GetPhysicalOutputAddress(), output_size);
|
||||
|
||||
for (u32 y = 0; y < output_height; ++y) {
|
||||
for (u32 x = 0; x < output_width; ++x) {
|
||||
Common::Vec4<u8> src_color;
|
||||
|
||||
// Calculate the [x,y] position of the input image
|
||||
// based on the current output position and the scale
|
||||
u32 input_x = x << horizontal_scale;
|
||||
u32 input_y = y << vertical_scale;
|
||||
|
||||
u32 output_y;
|
||||
if (config.flip_vertically) {
|
||||
// Flip the y value of the output data,
|
||||
// we do this after calculating the [x,y] position of the input image
|
||||
// to account for the scaling options.
|
||||
output_y = output_height - y - 1;
|
||||
} else {
|
||||
output_y = y;
|
||||
}
|
||||
|
||||
u32 dst_bytes_per_pixel = GPU::Regs::BytesPerPixel(config.output_format);
|
||||
u32 src_bytes_per_pixel = GPU::Regs::BytesPerPixel(config.input_format);
|
||||
u32 src_offset;
|
||||
u32 dst_offset;
|
||||
|
||||
if (config.input_linear) {
|
||||
if (!config.dont_swizzle) {
|
||||
// Interpret the input as linear and the output as tiled
|
||||
u32 coarse_y = output_y & ~7;
|
||||
u32 stride = output_width * dst_bytes_per_pixel;
|
||||
|
||||
src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel;
|
||||
dst_offset = VideoCore::GetMortonOffset(x, output_y, dst_bytes_per_pixel) +
|
||||
coarse_y * stride;
|
||||
} else {
|
||||
// Both input and output are linear
|
||||
src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel;
|
||||
dst_offset = (x + output_y * output_width) * dst_bytes_per_pixel;
|
||||
}
|
||||
} else {
|
||||
if (!config.dont_swizzle) {
|
||||
// Interpret the input as tiled and the output as linear
|
||||
u32 coarse_y = input_y & ~7;
|
||||
u32 stride = config.input_width * src_bytes_per_pixel;
|
||||
|
||||
src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) +
|
||||
coarse_y * stride;
|
||||
dst_offset = (x + output_y * output_width) * dst_bytes_per_pixel;
|
||||
} else {
|
||||
// Both input and output are tiled
|
||||
u32 out_coarse_y = output_y & ~7;
|
||||
u32 out_stride = output_width * dst_bytes_per_pixel;
|
||||
|
||||
u32 in_coarse_y = input_y & ~7;
|
||||
u32 in_stride = config.input_width * src_bytes_per_pixel;
|
||||
|
||||
src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) +
|
||||
in_coarse_y * in_stride;
|
||||
dst_offset = VideoCore::GetMortonOffset(x, output_y, dst_bytes_per_pixel) +
|
||||
out_coarse_y * out_stride;
|
||||
}
|
||||
}
|
||||
|
||||
const u8* src_pixel = src_pointer + src_offset;
|
||||
src_color = DecodePixel(config.input_format, src_pixel);
|
||||
if (config.scaling == config.ScaleX) {
|
||||
Common::Vec4<u8> pixel =
|
||||
DecodePixel(config.input_format, src_pixel + src_bytes_per_pixel);
|
||||
src_color = ((src_color + pixel) / 2).Cast<u8>();
|
||||
} else if (config.scaling == config.ScaleXY) {
|
||||
Common::Vec4<u8> pixel1 =
|
||||
DecodePixel(config.input_format, src_pixel + 1 * src_bytes_per_pixel);
|
||||
Common::Vec4<u8> pixel2 =
|
||||
DecodePixel(config.input_format, src_pixel + 2 * src_bytes_per_pixel);
|
||||
Common::Vec4<u8> pixel3 =
|
||||
DecodePixel(config.input_format, src_pixel + 3 * src_bytes_per_pixel);
|
||||
src_color = (((src_color + pixel1) + (pixel2 + pixel3)) / 4).Cast<u8>();
|
||||
}
|
||||
|
||||
u8* dst_pixel = dst_pointer + dst_offset;
|
||||
switch (config.output_format) {
|
||||
case GPU::Regs::PixelFormat::RGBA8:
|
||||
Color::EncodeRGBA8(src_color, dst_pixel);
|
||||
break;
|
||||
|
||||
case GPU::Regs::PixelFormat::RGB8:
|
||||
Color::EncodeRGB8(src_color, dst_pixel);
|
||||
break;
|
||||
|
||||
case GPU::Regs::PixelFormat::RGB565:
|
||||
Color::EncodeRGB565(src_color, dst_pixel);
|
||||
break;
|
||||
|
||||
case GPU::Regs::PixelFormat::RGB5A1:
|
||||
Color::EncodeRGB5A1(src_color, dst_pixel);
|
||||
break;
|
||||
|
||||
case GPU::Regs::PixelFormat::RGBA4:
|
||||
Color::EncodeRGBA4(src_color, dst_pixel);
|
||||
break;
|
||||
|
||||
default:
|
||||
LOG_ERROR(HW_GPU, "Unknown destination framebuffer format {:x}",
|
||||
static_cast<u32>(config.output_format.Value()));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void TextureCopy(const GPU::Regs::DisplayTransferConfig& config) {
|
||||
MICROPROFILE_SCOPE(GPU_TextureCopy);
|
||||
const PAddr src_addr = config.GetPhysicalInputAddress();
|
||||
const PAddr dst_addr = config.GetPhysicalOutputAddress();
|
||||
|
||||
// TODO: do hwtest with invalid addresses
|
||||
if (!VideoCore::g_memory->IsValidPhysicalAddress(src_addr)) {
|
||||
LOG_CRITICAL(HW_GPU, "invalid input address {:#010X}", src_addr);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!VideoCore::g_memory->IsValidPhysicalAddress(dst_addr)) {
|
||||
LOG_CRITICAL(HW_GPU, "invalid output address {:#010X}", dst_addr);
|
||||
return;
|
||||
}
|
||||
|
||||
if (VideoCore::g_renderer->Rasterizer()->AccelerateTextureCopy(config))
|
||||
return;
|
||||
|
||||
u8* src_pointer = VideoCore::g_memory->GetPhysicalPointer(src_addr);
|
||||
u8* dst_pointer = VideoCore::g_memory->GetPhysicalPointer(dst_addr);
|
||||
|
||||
u32 remaining_size = Common::AlignDown(config.texture_copy.size, 16);
|
||||
|
||||
if (remaining_size == 0) {
|
||||
LOG_CRITICAL(HW_GPU, "zero size. Real hardware freezes on this.");
|
||||
return;
|
||||
}
|
||||
|
||||
u32 input_gap = config.texture_copy.input_gap * 16;
|
||||
u32 output_gap = config.texture_copy.output_gap * 16;
|
||||
|
||||
// Zero gap means contiguous input/output even if width = 0. To avoid infinite loop below, width
|
||||
// is assigned with the total size if gap = 0.
|
||||
u32 input_width = input_gap == 0 ? remaining_size : config.texture_copy.input_width * 16;
|
||||
u32 output_width = output_gap == 0 ? remaining_size : config.texture_copy.output_width * 16;
|
||||
|
||||
if (input_width == 0) {
|
||||
LOG_CRITICAL(HW_GPU, "zero input width. Real hardware freezes on this.");
|
||||
return;
|
||||
}
|
||||
|
||||
if (output_width == 0) {
|
||||
LOG_CRITICAL(HW_GPU, "zero output width. Real hardware freezes on this.");
|
||||
return;
|
||||
}
|
||||
|
||||
std::size_t contiguous_input_size =
|
||||
config.texture_copy.size / input_width * (input_width + input_gap);
|
||||
Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(),
|
||||
static_cast<u32>(contiguous_input_size));
|
||||
|
||||
std::size_t contiguous_output_size =
|
||||
config.texture_copy.size / output_width * (output_width + output_gap);
|
||||
// Only need to flush output if it has a gap
|
||||
// const auto FlushInvalidate_fn = (output_gap != 0) ?
|
||||
// &VideoCore::g_renderer->Rasterizer()->FlushAndInvalidateRegion
|
||||
// :
|
||||
// &VideoCore::g_renderer->Rasterizer()->InvalidateRegion;
|
||||
if (output_gap != 0) {
|
||||
VideoCore::g_renderer->Rasterizer()->FlushAndInvalidateRegion(
|
||||
config.GetPhysicalOutputAddress(), static_cast<u32>(contiguous_output_size));
|
||||
} else {
|
||||
VideoCore::g_renderer->Rasterizer()->InvalidateRegion(
|
||||
config.GetPhysicalOutputAddress(), static_cast<u32>(contiguous_output_size));
|
||||
}
|
||||
|
||||
u32 remaining_input = input_width;
|
||||
u32 remaining_output = output_width;
|
||||
while (remaining_size > 0) {
|
||||
u32 copy_size = std::min({remaining_input, remaining_output, remaining_size});
|
||||
|
||||
std::memcpy(dst_pointer, src_pointer, copy_size);
|
||||
src_pointer += copy_size;
|
||||
dst_pointer += copy_size;
|
||||
|
||||
remaining_input -= copy_size;
|
||||
remaining_output -= copy_size;
|
||||
remaining_size -= copy_size;
|
||||
|
||||
if (remaining_input == 0) {
|
||||
remaining_input = input_width;
|
||||
src_pointer += input_gap;
|
||||
}
|
||||
if (remaining_output == 0) {
|
||||
remaining_output = output_width;
|
||||
dst_pointer += output_gap;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ProcessDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) {
|
||||
if (config.is_texture_copy) {
|
||||
TextureCopy(config);
|
||||
LOG_TRACE(HW_GPU,
|
||||
"TextureCopy: {:#X} bytes from {:#010X}({}+{})-> "
|
||||
"{:#010X}({}+{}), flags {:#010X}",
|
||||
config.texture_copy.size, config.GetPhysicalInputAddress(),
|
||||
config.texture_copy.input_width * 16, config.texture_copy.input_gap * 16,
|
||||
config.GetPhysicalOutputAddress(), config.texture_copy.output_width * 16,
|
||||
config.texture_copy.output_gap * 16, config.flags);
|
||||
} else {
|
||||
DisplayTransfer(config);
|
||||
LOG_TRACE(HW_GPU,
|
||||
"DisplayTransfer: {:#010X}({}x{})-> "
|
||||
"{:#010X}({}x{}), dst format {:x}, flags {:#010X}",
|
||||
config.GetPhysicalInputAddress(), config.input_width.Value(),
|
||||
config.input_height.Value(), config.GetPhysicalOutputAddress(),
|
||||
config.output_width.Value(), config.output_height.Value(),
|
||||
static_cast<u32>(config.output_format.Value()), config.flags);
|
||||
}
|
||||
}
|
||||
|
||||
void AfterCommandList() {
|
||||
Service::GSP::SignalInterrupt(Service::GSP::InterruptId::P3D);
|
||||
GPU::g_regs.command_processor_config.trigger = 0;
|
||||
}
|
||||
|
||||
void AfterDisplayTransfer() {
|
||||
GPU::g_regs.display_transfer_config.trigger = 0;
|
||||
Service::GSP::SignalInterrupt(Service::GSP::InterruptId::PPF);
|
||||
}
|
||||
|
||||
void AfterMemoryFill(bool is_second_filler) {
|
||||
const auto& config = GPU::g_regs.memory_fill_config[is_second_filler];
|
||||
// Reset "trigger" flag and set the "finish" flag
|
||||
// NOTE: This was confirmed to happen on hardware even if "address_start" is zero.
|
||||
GPU::g_regs.memory_fill_config[is_second_filler ? 1 : 0].trigger.Assign(0);
|
||||
GPU::g_regs.memory_fill_config[is_second_filler ? 1 : 0].finished.Assign(1);
|
||||
// It seems that it won't signal interrupt if "address_start" is zero.
|
||||
// TODO: hwtest this
|
||||
if (config.GetStartAddress() != 0) {
|
||||
if (!is_second_filler) {
|
||||
Service::GSP::SignalInterrupt(Service::GSP::InterruptId::PSC0);
|
||||
} else {
|
||||
Service::GSP::SignalInterrupt(Service::GSP::InterruptId::PSC1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void AfterSwapBuffers() {
|
||||
// Signal to GSP that GPU interrupt has occurred
|
||||
// TODO(yuriks): hwtest to determine if PDC0 is for the Top screen and PDC1 for the Sub
|
||||
// screen, or if both use the same interrupts and these two instead determine the
|
||||
// beginning and end of the VBlank period. If needed, split the interrupt firing into
|
||||
// two different intervals.
|
||||
Service::GSP::SignalInterrupt(Service::GSP::InterruptId::PDC0);
|
||||
Service::GSP::SignalInterrupt(Service::GSP::InterruptId::PDC1);
|
||||
}
|
||||
|
||||
} // namespace Pica::CommandProcessor
|
||||
|
@ -5,8 +5,10 @@
|
||||
#pragma once
|
||||
|
||||
#include <type_traits>
|
||||
#include <vector>
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "core/hw/gpu.h"
|
||||
|
||||
namespace Pica::CommandProcessor {
|
||||
|
||||
@ -34,4 +36,17 @@ static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect
|
||||
|
||||
void ProcessCommandList(PAddr list, u32 size);
|
||||
|
||||
void AfterCommandList();
|
||||
|
||||
void ProcessDisplayTransfer(const GPU::Regs::DisplayTransferConfig&);
|
||||
|
||||
void AfterDisplayTransfer();
|
||||
|
||||
void ProcessMemoryFill(const GPU::Regs::MemoryFillConfig&);
|
||||
|
||||
void AfterMemoryFill(bool);
|
||||
|
||||
// TODO move somewhere else
|
||||
void AfterSwapBuffers();
|
||||
|
||||
} // namespace Pica::CommandProcessor
|
||||
|
86
src/video_core/gpu.cpp
Normal file
86
src/video_core/gpu.cpp
Normal file
@ -0,0 +1,86 @@
|
||||
// Copyright 2019 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "core/core.h"
|
||||
#include "video_core/command_processor.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/gpu_thread.h"
|
||||
#include "video_core/renderer_base.h"
|
||||
|
||||
namespace VideoCore {
|
||||
GPUBackend::GPUBackend(VideoCore::RendererBase& renderer) : renderer{renderer} {}
|
||||
|
||||
GPUBackend::~GPUBackend() = default;
|
||||
|
||||
GPUSerial::GPUSerial(Core::System& system, VideoCore::RendererBase& renderer)
|
||||
: GPUBackend(renderer), system{system} {}
|
||||
|
||||
GPUSerial::~GPUSerial() {}
|
||||
|
||||
void GPUSerial::ProcessCommandList(PAddr list, u32 size) {
|
||||
Pica::CommandProcessor::ProcessCommandList(list, size);
|
||||
Pica::CommandProcessor::AfterCommandList();
|
||||
}
|
||||
|
||||
void GPUSerial::SwapBuffers() {
|
||||
renderer.SwapBuffers();
|
||||
Pica::CommandProcessor::AfterSwapBuffers();
|
||||
}
|
||||
|
||||
void GPUSerial::DisplayTransfer(const GPU::Regs::DisplayTransferConfig* config) {
|
||||
Pica::CommandProcessor::ProcessDisplayTransfer(*config);
|
||||
Pica::CommandProcessor::AfterDisplayTransfer();
|
||||
}
|
||||
|
||||
void GPUSerial::MemoryFill(const GPU::Regs::MemoryFillConfig* config, bool is_second_filler) {
|
||||
Pica::CommandProcessor::ProcessMemoryFill(*config);
|
||||
Pica::CommandProcessor::AfterMemoryFill(is_second_filler);
|
||||
}
|
||||
|
||||
void GPUSerial::FlushRegion(VAddr addr, u64 size) {
|
||||
renderer.Rasterizer()->FlushRegion(addr, size);
|
||||
}
|
||||
|
||||
void GPUSerial::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
||||
renderer.Rasterizer()->FlushAndInvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
void GPUSerial::InvalidateRegion(VAddr addr, u64 size) {
|
||||
renderer.Rasterizer()->InvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
GPUParallel::GPUParallel(Core::System& system, VideoCore::RendererBase& renderer)
|
||||
: GPUBackend(renderer), gpu_thread(system, renderer) {}
|
||||
|
||||
GPUParallel::~GPUParallel() = default;
|
||||
|
||||
void GPUParallel::ProcessCommandList(PAddr list, u32 size) {
|
||||
gpu_thread.SubmitList(list, size);
|
||||
}
|
||||
|
||||
void GPUParallel::SwapBuffers() {
|
||||
gpu_thread.SwapBuffers();
|
||||
}
|
||||
|
||||
void GPUParallel::DisplayTransfer(const GPU::Regs::DisplayTransferConfig* config) {
|
||||
gpu_thread.DisplayTransfer(config);
|
||||
}
|
||||
|
||||
void GPUParallel::MemoryFill(const GPU::Regs::MemoryFillConfig* config, bool is_second_filler) {
|
||||
gpu_thread.MemoryFill(config, is_second_filler);
|
||||
}
|
||||
|
||||
void GPUParallel::FlushRegion(VAddr addr, u64 size) {
|
||||
gpu_thread.FlushRegion(addr, size);
|
||||
}
|
||||
|
||||
void GPUParallel::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
||||
gpu_thread.FlushAndInvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
void GPUParallel::InvalidateRegion(VAddr addr, u64 size) {
|
||||
gpu_thread.InvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
} // namespace VideoCore
|
73
src/video_core/gpu.h
Normal file
73
src/video_core/gpu.h
Normal file
@ -0,0 +1,73 @@
|
||||
// Copyright 2019 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "core/hw/gpu.h"
|
||||
#include "video_core/gpu_thread.h"
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
}
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
class RendererBase;
|
||||
|
||||
class GPUBackend {
|
||||
public:
|
||||
explicit GPUBackend(VideoCore::RendererBase& renderer);
|
||||
|
||||
virtual ~GPUBackend();
|
||||
|
||||
virtual void ProcessCommandList(PAddr list, u32 size) = 0;
|
||||
virtual void SwapBuffers() = 0;
|
||||
virtual void DisplayTransfer(const GPU::Regs::DisplayTransferConfig* config) = 0;
|
||||
virtual void MemoryFill(const GPU::Regs::MemoryFillConfig* config, bool is_second_filler) = 0;
|
||||
virtual void FlushRegion(VAddr addr, u64 size) = 0;
|
||||
virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
|
||||
virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
|
||||
|
||||
protected:
|
||||
VideoCore::RendererBase& renderer;
|
||||
};
|
||||
|
||||
class GPUSerial : public GPUBackend {
|
||||
public:
|
||||
explicit GPUSerial(Core::System& system, VideoCore::RendererBase& renderer);
|
||||
|
||||
~GPUSerial();
|
||||
|
||||
void ProcessCommandList(PAddr list, u32 size) override;
|
||||
void SwapBuffers() override;
|
||||
void DisplayTransfer(const GPU::Regs::DisplayTransferConfig* config) override;
|
||||
void MemoryFill(const GPU::Regs::MemoryFillConfig* config, bool is_second_filler) override;
|
||||
void FlushRegion(VAddr addr, u64 size) override;
|
||||
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
|
||||
void InvalidateRegion(VAddr addr, u64 size) override;
|
||||
|
||||
private:
|
||||
Core::System& system;
|
||||
};
|
||||
|
||||
class GPUParallel : public GPUBackend {
|
||||
public:
|
||||
explicit GPUParallel(Core::System& system, VideoCore::RendererBase& renderer);
|
||||
|
||||
~GPUParallel();
|
||||
|
||||
void ProcessCommandList(PAddr list, u32 size) override;
|
||||
void SwapBuffers() override;
|
||||
void DisplayTransfer(const GPU::Regs::DisplayTransferConfig* config) override;
|
||||
void MemoryFill(const GPU::Regs::MemoryFillConfig* config, bool is_second_filler) override;
|
||||
void FlushRegion(VAddr addr, u64 size) override;
|
||||
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
|
||||
void InvalidateRegion(VAddr addr, u64 size) override;
|
||||
|
||||
private:
|
||||
GPUThread::ThreadManager gpu_thread;
|
||||
};
|
||||
|
||||
} // namespace VideoCore
|
226
src/video_core/gpu_thread.cpp
Normal file
226
src/video_core/gpu_thread.cpp
Normal file
@ -0,0 +1,226 @@
|
||||
// Copyright 2019 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/logging/log.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "common/thread.h"
|
||||
#include "core/core.h"
|
||||
#include "core/core_timing.h"
|
||||
#include "core/dumping/backend.h"
|
||||
#include "core/frontend/scope_acquire_context.h"
|
||||
#include "core/settings.h"
|
||||
#include "video_core/command_processor.h"
|
||||
#include "video_core/gpu_thread.h"
|
||||
#include "video_core/renderer_base.h"
|
||||
|
||||
namespace VideoCore::GPUThread {
|
||||
|
||||
/// Runs the GPU thread
|
||||
static void RunThread(VideoCore::RendererBase& renderer, SynchState& state, Core::System& system) {
|
||||
|
||||
MicroProfileOnThreadCreate("GpuThread");
|
||||
Common::SetCurrentThreadName("GpuThread");
|
||||
|
||||
// Wait for first GPU command before acquiring the window context
|
||||
state.WaitForCommands();
|
||||
|
||||
// If emulation was stopped during disk shader loading, abort before trying to acquire context
|
||||
if (!state.is_running) {
|
||||
return;
|
||||
}
|
||||
|
||||
Frontend::ScopeAcquireContext acquire_context{renderer.GetRenderWindow()};
|
||||
|
||||
CommandDataContainer next;
|
||||
while (state.is_running) {
|
||||
state.WaitForCommands();
|
||||
|
||||
CommandDataContainer next;
|
||||
while (state.queue.Pop(next)) {
|
||||
auto command = &next.data;
|
||||
auto fence = next.fence;
|
||||
if (const auto submit_list = std::get_if<SubmitListCommand>(command)) {
|
||||
Pica::CommandProcessor::ProcessCommandList(submit_list->list, submit_list->size);
|
||||
} else if (const auto data = std::get_if<SwapBuffersCommand>(command)) {
|
||||
renderer.SwapBuffers();
|
||||
Pica::CommandProcessor::AfterSwapBuffers();
|
||||
} else if (const auto data = std::get_if<MemoryFillCommand>(command)) {
|
||||
Pica::CommandProcessor::ProcessMemoryFill(*(data->config));
|
||||
const bool is_second_filler = fence & (1llu << 63);
|
||||
Pica::CommandProcessor::AfterMemoryFill(is_second_filler);
|
||||
} else if (const auto data = std::get_if<DisplayTransferCommand>(command)) {
|
||||
Pica::CommandProcessor::ProcessDisplayTransfer(*(data->config));
|
||||
Pica::CommandProcessor::AfterDisplayTransfer();
|
||||
} else if (const auto data = std::get_if<FlushRegionCommand>(command)) {
|
||||
renderer.Rasterizer()->FlushRegion(data->addr, data->size);
|
||||
} else if (const auto data = std::get_if<FlushAndInvalidateRegionCommand>(command)) {
|
||||
renderer.Rasterizer()->FlushAndInvalidateRegion(data->addr, data->size);
|
||||
} else if (const auto data = std::get_if<InvalidateRegionCommand>(command)) {
|
||||
renderer.Rasterizer()->InvalidateRegion(data->addr, data->size);
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
}
|
||||
state.signaled_fence = next.fence;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ThreadManager::ThreadManager(Core::System& system, VideoCore::RendererBase& renderer)
|
||||
: system{system}, renderer{renderer} {
|
||||
synchronize_event = system.CoreTiming().RegisterEvent(
|
||||
"GPUSynchronizeEvent", [this](u64 fence, s64) { state.WaitForSynchronization(fence); });
|
||||
|
||||
thread = std::make_unique<std::thread>(RunThread, std::ref(renderer), std::ref(state),
|
||||
std::ref(system));
|
||||
thread_id = thread->get_id();
|
||||
}
|
||||
|
||||
ThreadManager::~ThreadManager() {
|
||||
// Notify GPU thread that a shutdown is pending
|
||||
state.is_running.exchange(false);
|
||||
thread->join();
|
||||
}
|
||||
|
||||
void ThreadManager::Synchronize(u64 fence, Settings::GpuTimingMode mode) {
|
||||
int timeout_us{};
|
||||
|
||||
switch (mode) {
|
||||
case Settings::GpuTimingMode::Asynch:
|
||||
case Settings::GpuTimingMode::Skip:
|
||||
return;
|
||||
case Settings::GpuTimingMode::Asynch_10us:
|
||||
timeout_us = 10;
|
||||
break;
|
||||
case Settings::GpuTimingMode::Asynch_20us:
|
||||
timeout_us = 20;
|
||||
break;
|
||||
case Settings::GpuTimingMode::Asynch_40us:
|
||||
timeout_us = 40;
|
||||
break;
|
||||
case Settings::GpuTimingMode::Asynch_60us:
|
||||
timeout_us = 60;
|
||||
break;
|
||||
case Settings::GpuTimingMode::Asynch_80us:
|
||||
timeout_us = 80;
|
||||
break;
|
||||
case Settings::GpuTimingMode::Asynch_100us:
|
||||
timeout_us = 100;
|
||||
break;
|
||||
case Settings::GpuTimingMode::Asynch_200us:
|
||||
timeout_us = 200;
|
||||
break;
|
||||
case Settings::GpuTimingMode::Asynch_400us:
|
||||
timeout_us = 400;
|
||||
break;
|
||||
case Settings::GpuTimingMode::Asynch_600us:
|
||||
timeout_us = 600;
|
||||
break;
|
||||
case Settings::GpuTimingMode::Asynch_800us:
|
||||
timeout_us = 800;
|
||||
break;
|
||||
case Settings::GpuTimingMode::Asynch_1ms:
|
||||
timeout_us = 1000;
|
||||
break;
|
||||
case Settings::GpuTimingMode::Asynch_2ms:
|
||||
timeout_us = 2000;
|
||||
break;
|
||||
case Settings::GpuTimingMode::Asynch_4ms:
|
||||
timeout_us = 4000;
|
||||
break;
|
||||
case Settings::GpuTimingMode::Asynch_6ms:
|
||||
timeout_us = 6000;
|
||||
break;
|
||||
case Settings::GpuTimingMode::Asynch_8ms:
|
||||
timeout_us = 8000;
|
||||
break;
|
||||
}
|
||||
|
||||
if (timeout_us > 0) {
|
||||
system.CoreTiming().ScheduleEvent(usToCycles(timeout_us), synchronize_event, fence);
|
||||
} else if (timeout_us == 0) {
|
||||
state.WaitForSynchronization(fence);
|
||||
}
|
||||
}
|
||||
|
||||
void ThreadManager::SubmitList(PAddr list, u32 size) {
|
||||
if (size == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
Synchronize(PushCommand(SubmitListCommand{list, size}),
|
||||
Settings::values.gpu_timing_mode_submit_list);
|
||||
}
|
||||
|
||||
void ThreadManager::SwapBuffers() {
|
||||
Synchronize(PushCommand(SwapBuffersCommand{}), Settings::values.gpu_timing_mode_swap_buffers);
|
||||
}
|
||||
|
||||
void ThreadManager::DisplayTransfer(const GPU::Regs::DisplayTransferConfig* config) {
|
||||
Synchronize(PushCommand(DisplayTransferCommand{config}),
|
||||
Settings::values.gpu_timing_mode_display_transfer);
|
||||
}
|
||||
|
||||
void ThreadManager::MemoryFill(const GPU::Regs::MemoryFillConfig* config, bool is_second_filler) {
|
||||
Synchronize(PushCommand(MemoryFillCommand{config, is_second_filler}),
|
||||
Settings::values.gpu_timing_mode_memory_fill);
|
||||
}
|
||||
|
||||
void ThreadManager::FlushRegion(VAddr addr, u64 size) {
|
||||
if (Settings::values.gpu_timing_mode_flush == Settings::GpuTimingMode::Skip) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!IsGpuThread()) {
|
||||
Synchronize(PushCommand(FlushRegionCommand{addr, size}),
|
||||
Settings::values.gpu_timing_mode_flush);
|
||||
} else {
|
||||
renderer.Rasterizer()->FlushRegion(addr, size);
|
||||
}
|
||||
}
|
||||
|
||||
void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
||||
if (Settings::values.gpu_timing_mode_flush_and_invalidate == Settings::GpuTimingMode::Skip) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!IsGpuThread()) {
|
||||
Synchronize(PushCommand(InvalidateRegionCommand{addr, size}),
|
||||
Settings::values.gpu_timing_mode_flush_and_invalidate);
|
||||
} else {
|
||||
renderer.Rasterizer()->InvalidateRegion(addr, size);
|
||||
}
|
||||
}
|
||||
|
||||
void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
|
||||
if (Settings::values.gpu_timing_mode_invalidate == Settings::GpuTimingMode::Skip) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!IsGpuThread()) {
|
||||
Synchronize(PushCommand(InvalidateRegionCommand{addr, size}),
|
||||
Settings::values.gpu_timing_mode_invalidate);
|
||||
} else {
|
||||
renderer.Rasterizer()->InvalidateRegion(addr, size);
|
||||
}
|
||||
}
|
||||
|
||||
u64 ThreadManager::PushCommand(CommandData&& command_data) {
|
||||
const u64 fence{++state.last_fence};
|
||||
state.queue.Push(CommandDataContainer(std::move(command_data), fence));
|
||||
return fence;
|
||||
}
|
||||
|
||||
MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
|
||||
void SynchState::WaitForSynchronization(u64 fence) {
|
||||
if (signaled_fence >= fence) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Wait for the GPU to be idle (all commands to be executed)
|
||||
MICROPROFILE_SCOPE(GPU_wait);
|
||||
while (signaled_fence < fence && is_running) {
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace VideoCore::GPUThread
|
218
src/video_core/gpu_thread.h
Normal file
218
src/video_core/gpu_thread.h
Normal file
@ -0,0 +1,218 @@
|
||||
// Copyright 2019 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
#include <condition_variable>
|
||||
#include <functional>
|
||||
#include <future>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <optional>
|
||||
#include <thread>
|
||||
#include <variant>
|
||||
#include "common/threadsafe_queue.h"
|
||||
#include "core/core_timing.h"
|
||||
#include "core/frontend/emu_window.h"
|
||||
#include "core/settings.h"
|
||||
#include "video_core/command_processor.h"
|
||||
|
||||
namespace VideoCore {
|
||||
class RendererBase;
|
||||
}
|
||||
|
||||
namespace VideoCore::GPUThread {
|
||||
|
||||
/// Command to signal to the GPU thread that a command list is ready for processing
|
||||
struct SubmitListCommand {
|
||||
// In order for the variant to be default constructable, the first element needs a default
|
||||
// constructor
|
||||
constexpr SubmitListCommand() : list(0), size(0) {}
|
||||
explicit constexpr SubmitListCommand(PAddr list, u32 size) : list(list), size(size) {}
|
||||
PAddr list;
|
||||
u32 size;
|
||||
};
|
||||
|
||||
static_assert(std::is_copy_assignable<SubmitListCommand>::value,
|
||||
"SubmitListCommand is not copy assignable");
|
||||
static_assert(std::is_copy_constructible<SubmitListCommand>::value,
|
||||
"SubmitListCommand is not copy constructable");
|
||||
|
||||
/// Command to signal to the GPU thread that a swap buffers is pending
|
||||
struct SwapBuffersCommand final {
|
||||
explicit constexpr SwapBuffersCommand() {}
|
||||
};
|
||||
|
||||
static_assert(std::is_copy_assignable<SwapBuffersCommand>::value,
|
||||
"SwapBuffersCommand is not copy assignable");
|
||||
static_assert(std::is_copy_constructible<SwapBuffersCommand>::value,
|
||||
"SwapBuffersCommand is not copy constructable");
|
||||
|
||||
struct MemoryFillCommand final {
|
||||
explicit constexpr MemoryFillCommand(const GPU::Regs::MemoryFillConfig* config,
|
||||
bool is_second_filler)
|
||||
: config{config}, is_second_filler(is_second_filler) {}
|
||||
|
||||
const GPU::Regs::MemoryFillConfig* config;
|
||||
bool is_second_filler;
|
||||
};
|
||||
|
||||
static_assert(std::is_copy_assignable<MemoryFillCommand>::value,
|
||||
"MemoryFillCommand is not copy assignable");
|
||||
static_assert(std::is_copy_constructible<MemoryFillCommand>::value,
|
||||
"MemoryFillCommand is not copy constructable");
|
||||
|
||||
struct DisplayTransferCommand final {
|
||||
explicit constexpr DisplayTransferCommand(const GPU::Regs::DisplayTransferConfig* config)
|
||||
: config{config} {}
|
||||
|
||||
const GPU::Regs::DisplayTransferConfig* config;
|
||||
};
|
||||
static_assert(std::is_copy_assignable<DisplayTransferCommand>::value,
|
||||
"DisplayTransferCommand is not copy assignable");
|
||||
static_assert(std::is_copy_constructible<DisplayTransferCommand>::value,
|
||||
"DisplayTransferCommand is not copy constructable");
|
||||
|
||||
/// Command to signal to the GPU thread to flush a region
|
||||
struct FlushRegionCommand final {
|
||||
explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
|
||||
|
||||
VAddr addr;
|
||||
u64 size;
|
||||
};
|
||||
static_assert(std::is_copy_assignable<FlushRegionCommand>::value,
|
||||
"FlushRegionCommand is not copy assignable");
|
||||
static_assert(std::is_copy_constructible<FlushRegionCommand>::value,
|
||||
"FlushRegionCommand is not copy constructable");
|
||||
|
||||
/// Command to signal to the GPU thread to flush and invalidate a region
|
||||
struct FlushAndInvalidateRegionCommand final {
|
||||
explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size)
|
||||
: addr{addr}, size{size} {}
|
||||
|
||||
VAddr addr;
|
||||
u64 size;
|
||||
};
|
||||
static_assert(std::is_copy_assignable<FlushAndInvalidateRegionCommand>::value,
|
||||
"FlushAndInvalidateRegionCommand is not copy assignable");
|
||||
static_assert(std::is_copy_constructible<FlushAndInvalidateRegionCommand>::value,
|
||||
"FlushAndInvalidateRegionCommand is not copy constructable");
|
||||
|
||||
/// Command to signal to the GPU thread to flush a region
|
||||
struct InvalidateRegionCommand final {
|
||||
explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
|
||||
|
||||
VAddr addr;
|
||||
u64 size;
|
||||
};
|
||||
static_assert(std::is_copy_assignable<InvalidateRegionCommand>::value,
|
||||
"InvalidateRegionCommand is not copy assignable");
|
||||
static_assert(std::is_copy_constructible<InvalidateRegionCommand>::value,
|
||||
"InvalidateRegionCommand is not copy constructable");
|
||||
|
||||
using CommandData =
|
||||
std::variant<SubmitListCommand, SwapBuffersCommand, MemoryFillCommand, DisplayTransferCommand,
|
||||
FlushRegionCommand, FlushAndInvalidateRegionCommand, InvalidateRegionCommand>;
|
||||
|
||||
struct CommandDataContainer {
|
||||
CommandDataContainer() = default;
|
||||
|
||||
CommandDataContainer(CommandData&& data, u64 next_fence)
|
||||
: data{std::move(data)}, fence{next_fence} {}
|
||||
|
||||
CommandData data;
|
||||
u64 fence{};
|
||||
};
|
||||
|
||||
/// Struct used to synchronize the GPU thread
|
||||
struct SynchState final {
|
||||
std::atomic_bool is_running{true};
|
||||
std::atomic_int queued_frame_count{};
|
||||
std::mutex synchronization_mutex;
|
||||
std::mutex commands_mutex;
|
||||
std::condition_variable commands_condition;
|
||||
std::condition_variable synchronization_condition;
|
||||
|
||||
/// Returns true if the gap in GPU commands is small enough that we can consider the CPU and GPU
|
||||
/// synchronized. This is entirely empirical.
|
||||
bool IsSynchronized() const {
|
||||
constexpr std::size_t max_queue_gap{100};
|
||||
return queue.Size() <= max_queue_gap;
|
||||
}
|
||||
|
||||
void TrySynchronize() {
|
||||
if (IsSynchronized()) {
|
||||
std::lock_guard lock{synchronization_mutex};
|
||||
synchronization_condition.notify_one();
|
||||
}
|
||||
}
|
||||
|
||||
void WaitForSynchronization(u64 fence);
|
||||
|
||||
void SignalCommands() {
|
||||
if (queue.Empty()) {
|
||||
return;
|
||||
}
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(commands_mutex);
|
||||
commands_condition.notify_one();
|
||||
}
|
||||
}
|
||||
|
||||
void WaitForCommands() {
|
||||
while (queue.Empty() && is_running)
|
||||
;
|
||||
// std::unique_lock lock{commands_mutex};
|
||||
// commands_condition.wait(lock, [this] { return !queue.Empty(); });
|
||||
}
|
||||
|
||||
using CommandQueue = Common::SPSCQueue<CommandDataContainer>;
|
||||
CommandQueue queue;
|
||||
u64 last_fence{};
|
||||
std::atomic<u64> signaled_fence{};
|
||||
};
|
||||
|
||||
/// Class used to manage the GPU thread
|
||||
class ThreadManager final {
|
||||
public:
|
||||
explicit ThreadManager(Core::System& system, VideoCore::RendererBase& renderer);
|
||||
~ThreadManager();
|
||||
|
||||
void SubmitList(PAddr list, u32 size);
|
||||
|
||||
void SwapBuffers();
|
||||
|
||||
void DisplayTransfer(const GPU::Regs::DisplayTransferConfig*);
|
||||
|
||||
void MemoryFill(const GPU::Regs::MemoryFillConfig*, bool is_second_filler);
|
||||
|
||||
void FlushRegion(VAddr addr, u64 size);
|
||||
|
||||
void FlushAndInvalidateRegion(VAddr addr, u64 size);
|
||||
|
||||
void InvalidateRegion(VAddr addr, u64 size);
|
||||
|
||||
private:
|
||||
void Synchronize(u64 fence, Settings::GpuTimingMode mode);
|
||||
|
||||
/// Pushes a command to be executed by the GPU thread
|
||||
u64 PushCommand(CommandData&& command_data);
|
||||
|
||||
/// Returns true if this is called by the GPU thread
|
||||
bool IsGpuThread() const {
|
||||
return std::this_thread::get_id() == thread_id;
|
||||
}
|
||||
|
||||
private:
|
||||
SynchState state;
|
||||
std::unique_ptr<std::thread> thread;
|
||||
std::thread::id thread_id{};
|
||||
Core::System& system;
|
||||
VideoCore::RendererBase& renderer;
|
||||
Core::TimingEventType* synchronize_event{};
|
||||
};
|
||||
|
||||
} // namespace VideoCore::GPUThread
|
@ -9,6 +9,8 @@
|
||||
#include "video_core/swrasterizer/swrasterizer.h"
|
||||
#include "video_core/video_core.h"
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
RendererBase::RendererBase(Frontend::EmuWindow& window) : render_window{window} {}
|
||||
RendererBase::~RendererBase() = default;
|
||||
void RendererBase::UpdateCurrentFramebufferLayout(bool is_portrait_mode) {
|
||||
@ -32,3 +34,5 @@ void RendererBase::RefreshRasterizerSetting() {
|
||||
void RendererBase::Sync() {
|
||||
rasterizer->SyncEntireState();
|
||||
}
|
||||
|
||||
} // namespace VideoCore
|
||||
|
@ -13,6 +13,8 @@ namespace Frontend {
|
||||
class EmuWindow;
|
||||
}
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
class RendererBase : NonCopyable {
|
||||
public:
|
||||
explicit RendererBase(Frontend::EmuWindow& window);
|
||||
@ -75,3 +77,5 @@ protected:
|
||||
private:
|
||||
bool opengl_rasterizer_active = false;
|
||||
};
|
||||
|
||||
} // namespace VideoCore
|
||||
|
@ -289,7 +289,9 @@ RasterizerOpenGL::VertexArrayInfo RasterizerOpenGL::AnalyzeVertexArray(bool is_i
|
||||
vertex_min = 0xFFFF;
|
||||
vertex_max = 0;
|
||||
const u32 size = regs.pipeline.num_vertices * (index_u16 ? 2 : 1);
|
||||
#ifndef ANDROID
|
||||
res_cache.FlushRegion(address, size, nullptr);
|
||||
#endif
|
||||
for (u32 index = 0; index < regs.pipeline.num_vertices; ++index) {
|
||||
const u32 vertex = index_u16 ? index_address_16[index] : index_address_8[index];
|
||||
vertex_min = std::min(vertex_min, vertex);
|
||||
@ -361,8 +363,9 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset,
|
||||
|
||||
u32 vertex_num = vs_input_index_max - vs_input_index_min + 1;
|
||||
u32 data_size = loader.byte_count * vertex_num;
|
||||
|
||||
#ifndef ANDROID
|
||||
res_cache.FlushRegion(data_addr, data_size, nullptr);
|
||||
#endif
|
||||
std::memcpy(array_ptr, VideoCore::g_memory->GetPhysicalPointer(data_addr), data_size);
|
||||
|
||||
array_ptr += data_size;
|
||||
|
@ -1239,6 +1239,10 @@ VideoCore::ResultStatus RendererOpenGL::Init() {
|
||||
|
||||
RefreshRasterizerSetting();
|
||||
|
||||
if (Settings::values.use_asynchronous_gpu_emulation) {
|
||||
render_window.DoneCurrent();
|
||||
}
|
||||
|
||||
return VideoCore::ResultStatus::Success;
|
||||
}
|
||||
|
||||
|
@ -57,7 +57,7 @@ struct PresentationTexture {
|
||||
OGLTexture texture;
|
||||
};
|
||||
|
||||
class RendererOpenGL : public RendererBase {
|
||||
class RendererOpenGL : public VideoCore::RendererBase {
|
||||
public:
|
||||
explicit RendererOpenGL(Frontend::EmuWindow& window);
|
||||
~RendererOpenGL() override;
|
||||
|
@ -5,7 +5,10 @@
|
||||
#include <memory>
|
||||
#include "common/archives.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/vector_math.h"
|
||||
#include "core/memory.h"
|
||||
#include "core/settings.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/pica.h"
|
||||
#include "video_core/pica_state.h"
|
||||
#include "video_core/renderer_base.h"
|
||||
@ -19,6 +22,7 @@
|
||||
namespace VideoCore {
|
||||
|
||||
std::unique_ptr<RendererBase> g_renderer; ///< Renderer plugin
|
||||
std::unique_ptr<GPUBackend> g_gpu;
|
||||
|
||||
std::atomic<bool> g_hw_renderer_enabled;
|
||||
std::atomic<bool> g_shader_jit_enabled;
|
||||
@ -39,13 +43,20 @@ Layout::FramebufferLayout g_screenshot_framebuffer_layout;
|
||||
Memory::MemorySystem* g_memory;
|
||||
|
||||
/// Initialize the video core
|
||||
ResultStatus Init(Frontend::EmuWindow& emu_window, Memory::MemorySystem& memory) {
|
||||
ResultStatus Init(Core::System& system, Frontend::EmuWindow& emu_window,
|
||||
Memory::MemorySystem& memory) {
|
||||
g_memory = &memory;
|
||||
Pica::Init();
|
||||
|
||||
OpenGL::GLES = Settings::values.use_gles;
|
||||
|
||||
g_renderer = std::make_unique<OpenGL::RendererOpenGL>(emu_window);
|
||||
|
||||
if (Settings::values.use_asynchronous_gpu_emulation) {
|
||||
g_gpu = std::make_unique<VideoCore::GPUParallel>(system, *g_renderer);
|
||||
} else {
|
||||
g_gpu = std::make_unique<VideoCore::GPUSerial>(system, *g_renderer);
|
||||
}
|
||||
ResultStatus result = g_renderer->Init();
|
||||
|
||||
if (result != ResultStatus::Success) {
|
||||
@ -62,6 +73,7 @@ void Shutdown() {
|
||||
Pica::Shutdown();
|
||||
|
||||
g_renderer->ShutDown();
|
||||
g_gpu.reset();
|
||||
g_renderer.reset();
|
||||
|
||||
LOG_DEBUG(Render, "shutdown OK");
|
||||
@ -95,6 +107,34 @@ void serialize(Archive& ar, const unsigned int) {
|
||||
ar& Pica::g_state;
|
||||
}
|
||||
|
||||
void ProcessCommandList(PAddr list, u32 size) {
|
||||
g_gpu->ProcessCommandList(list, size);
|
||||
}
|
||||
|
||||
void SwapBuffers() {
|
||||
g_gpu->SwapBuffers();
|
||||
}
|
||||
|
||||
void DisplayTransfer(const GPU::Regs::DisplayTransferConfig* config) {
|
||||
g_gpu->DisplayTransfer(config);
|
||||
}
|
||||
|
||||
void MemoryFill(const GPU::Regs::MemoryFillConfig* config, bool is_second_filler) {
|
||||
g_gpu->MemoryFill(config, is_second_filler);
|
||||
}
|
||||
|
||||
void FlushRegion(VAddr addr, u64 size) {
|
||||
g_gpu->FlushRegion(addr, size);
|
||||
}
|
||||
|
||||
void FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
||||
g_gpu->FlushAndInvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
void InvalidateRegion(VAddr addr, u64 size) {
|
||||
g_gpu->InvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
} // namespace VideoCore
|
||||
|
||||
SERIALIZE_IMPL(VideoCore)
|
||||
|
@ -8,13 +8,12 @@
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include "core/frontend/emu_window.h"
|
||||
#include "video_core/command_processor.h"
|
||||
|
||||
namespace Frontend {
|
||||
class EmuWindow;
|
||||
}
|
||||
|
||||
class RendererBase;
|
||||
|
||||
namespace Memory {
|
||||
class MemorySystem;
|
||||
}
|
||||
@ -24,7 +23,11 @@ class MemorySystem;
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
class GPUBackend;
|
||||
class RendererBase;
|
||||
|
||||
extern std::unique_ptr<RendererBase> g_renderer; ///< Renderer plugin
|
||||
extern std::unique_ptr<VideoCore::GPUBackend> g_gpu;
|
||||
|
||||
// TODO: Wrap these in a user settings struct along with any other graphics settings (often set from
|
||||
// qt ui)
|
||||
@ -53,7 +56,28 @@ enum class ResultStatus {
|
||||
};
|
||||
|
||||
/// Initialize the video core
|
||||
ResultStatus Init(Frontend::EmuWindow& emu_window, Memory::MemorySystem& memory);
|
||||
ResultStatus Init(Core::System& system, Frontend::EmuWindow& emu_window,
|
||||
Memory::MemorySystem& memory);
|
||||
|
||||
void ProcessCommandList(PAddr list, u32 size);
|
||||
|
||||
/// Notify rasterizer that it should swap the current framebuffer
|
||||
void SwapBuffers();
|
||||
|
||||
/// Perform a DisplayTransfer (accelerated by the rasterizer if available)
|
||||
void DisplayTransfer(const GPU::Regs::DisplayTransferConfig* config);
|
||||
|
||||
/// Perform a MemoryFill (accelerated by the rasterizer if available)
|
||||
void MemoryFill(const GPU::Regs::MemoryFillConfig* config, bool is_second_filler);
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
|
||||
void FlushRegion(VAddr addr, u64 size);
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated
|
||||
void FlushAndInvalidateRegion(VAddr addr, u64 size);
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region should be invalidated
|
||||
void InvalidateRegion(VAddr addr, u64 size);
|
||||
|
||||
/// Shutdown the video core
|
||||
void Shutdown();
|
||||
|
Loading…
x
Reference in New Issue
Block a user