android: video_core: Add experimental asynchronous GPU option.

This commit is contained in:
bunnei 2020-05-15 22:16:01 -04:00 committed by xperia64
parent c39b4cc219
commit 2c3b4b98a0
31 changed files with 1427 additions and 423 deletions

View File

@ -347,13 +347,15 @@ public final class SettingsFragmentPresenter {
mView.getActivity().setTitle(R.string.preferences_graphics);
SettingSection rendererSection = mSettings.getSection(Settings.SECTION_RENDERER);
Setting shadersAccurateMul = rendererSection.getSetting(SettingsFile.KEY_SHADERS_ACCURATE_MUL);
Setting resolutionFactor = rendererSection.getSetting(SettingsFile.KEY_RESOLUTION_FACTOR);
Setting filterMode = rendererSection.getSetting(SettingsFile.KEY_FILTER_MODE);
Setting useAsynchronousGpuEmulation = rendererSection.getSetting(SettingsFile.KEY_USE_ASYNCHRONOUS_GPU_EMULATION);
Setting shadersAccurateMul = rendererSection.getSetting(SettingsFile.KEY_SHADERS_ACCURATE_MUL);
sl.add(new CheckBoxSetting(SettingsFile.KEY_SHADERS_ACCURATE_MUL, Settings.SECTION_RENDERER, R.string.shaders_accurate_mul, R.string.shaders_accurate_mul_description, false, shadersAccurateMul));
sl.add(new SliderSetting(SettingsFile.KEY_RESOLUTION_FACTOR, Settings.SECTION_RENDERER, R.string.internal_resolution, R.string.internal_resolution_description, 1, 4, "x", 1, resolutionFactor));
sl.add(new CheckBoxSetting(SettingsFile.KEY_FILTER_MODE, Settings.SECTION_RENDERER, R.string.linear_filtering, R.string.linear_filtering_description, true, filterMode));
sl.add(new CheckBoxSetting(SettingsFile.KEY_USE_ASYNCHRONOUS_GPU_EMULATION, Settings.SECTION_RENDERER, R.string.asynchronous_gpu, R.string.asynchronous_gpu_description, true, useAsynchronousGpuEmulation));
sl.add(new CheckBoxSetting(SettingsFile.KEY_SHADERS_ACCURATE_MUL, Settings.SECTION_RENDERER, R.string.shaders_accurate_mul, R.string.shaders_accurate_mul_description, false, shadersAccurateMul));
}
private void addAudioSettings(ArrayList<SettingsItem> sl) {

View File

@ -53,6 +53,7 @@ public final class SettingsFile {
public static final String KEY_FACTOR_3D = "factor_3d";
public static final String KEY_FILTER_MODE = "filter_mode";
public static final String KEY_TEXTURE_FILTER_NAME = "texture_filter_name";
public static final String KEY_USE_ASYNCHRONOUS_GPU_EMULATION = "use_asynchronous_gpu_emulation";
public static final String KEY_LAYOUT_OPTION = "layout_option";
public static final String KEY_SWAP_SCREEN = "swap_screen";

View File

@ -16,6 +16,8 @@ add_library(main SHARED
emu_window/emu_window.h
game_info.cpp
game_info.h
game_settings.cpp
game_settings.h
id_cache.cpp
id_cache.h
mic.cpp

View File

@ -114,6 +114,8 @@ void Config::ReadValues() {
Settings::values.use_hw_shader = sdl2_config->GetBoolean("Renderer", "use_hw_shader", true);
Settings::values.shaders_accurate_mul =
sdl2_config->GetBoolean("Renderer", "shaders_accurate_mul", false);
Settings::values.use_asynchronous_gpu_emulation =
sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", true);
Settings::values.use_shader_jit = sdl2_config->GetBoolean("Renderer", "use_shader_jit", true);
Settings::values.resolution_factor =
static_cast<u16>(sdl2_config->GetInteger("Renderer", "resolution_factor", 1));

View File

@ -108,6 +108,10 @@ use_hw_shader =
# 0: Off (Default. Faster, but causes issues in some games) 1: On (Slower, but correct)
shaders_accurate_mul =
# Enable asynchronous GPU emulation
# 0: Off (Slower, but more accurate) 1: On (Default. Faster, but may cause issues in some games)
use_asynchronous_gpu_emulation =
# Whether to use the Just-In-Time (JIT) compiler for shader emulation
# 0: Interpreter (slow), 1 (default): JIT (fast)
use_shader_jit =

View File

@ -0,0 +1,192 @@
// Copyright 2019 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "core/settings.h"
namespace GameSettings {
void LoadOverrides(u64 program_id) {
Settings::values.gpu_timing_mode_submit_list = Settings::GpuTimingMode::Asynch_2ms;
Settings::values.gpu_timing_mode_swap_buffers = Settings::GpuTimingMode::Asynch_8ms;
Settings::values.gpu_timing_mode_memory_fill = Settings::GpuTimingMode::Asynch_2ms;
Settings::values.gpu_timing_mode_display_transfer = Settings::GpuTimingMode::Synch;
Settings::values.gpu_timing_mode_flush = Settings::GpuTimingMode::Skip;
Settings::values.gpu_timing_mode_flush_and_invalidate = Settings::GpuTimingMode::Asynch;
Settings::values.gpu_timing_mode_invalidate = Settings::GpuTimingMode::Synch;
switch (program_id) {
//// JAP / The Legend of Zelda: A Link Between Worlds
// case 0x00040000000EC200:
//// USA / The Legend of Zelda: A Link Between Worlds
// case 0x00040000000EC300:
//// EUR / The Legend of Zelda: A Link Between Worlds
// case 0x00040000000EC400:
// Settings::values.gpu_timing_mode_submit_list = Settings::GpuTimingMode::Asynch_1ms;
// Settings::values.gpu_timing_mode_swap_buffers = Settings::GpuTimingMode::Asynch_2ms;
// Settings::values.gpu_timing_mode_memory_fill = Settings::GpuTimingMode::Asynch_1ms;
// Settings::values.gpu_timing_mode_display_transfer =
// Settings::GpuTimingMode::Asynch_600us; Settings::values.gpu_timing_mode_flush =
// Settings::GpuTimingMode::Skip; Settings::values.gpu_timing_mode_flush_and_invalidate =
// Settings::GpuTimingMode::Skip; break;
//// JAP / The Legend of Zelda: Majora's Mask 3D
// case 0x00040000000D6E00:
//// USA / The Legend of Zelda: Majora's Mask 3D
// case 0x0004000000125500:
//// EUR / The Legend of Zelda: Majora's Mask 3D
// case 0x0004000000125600:
// Settings::values.gpu_timing_mode_submit_list = Settings::GpuTimingMode::Asynch_1ms;
// Settings::values.gpu_timing_mode_swap_buffers = Settings::GpuTimingMode::Asynch_4ms;
// Settings::values.gpu_timing_mode_memory_fill = Settings::GpuTimingMode::Asynch;
// Settings::values.gpu_timing_mode_display_transfer = Settings::GpuTimingMode::Asynch;
// Settings::values.gpu_timing_mode_flush = Settings::GpuTimingMode::Skip;
// Settings::values.gpu_timing_mode_flush_and_invalidate = Settings::GpuTimingMode::Skip;
// break;
// JAP / The Legend of Zelda: Ocarina of Time 3D
case 0x0004000000033400:
// USA / The Legend of Zelda: Ocarina of Time 3D
case 0x0004000000033500:
// EUR / The Legend of Zelda: Ocarina of Time 3D
case 0x0004000000033600:
// KOR / The Legend of Zelda: Ocarina of Time 3D
case 0x000400000008F800:
// CHI / The Legend of Zelda: Ocarina of Time 3D
case 0x000400000008F900:
Settings::values.shaders_accurate_mul = true;
Settings::values.gpu_timing_mode_submit_list = Settings::GpuTimingMode::Asynch_1ms;
Settings::values.gpu_timing_mode_swap_buffers = Settings::GpuTimingMode::Asynch_4ms;
Settings::values.gpu_timing_mode_memory_fill = Settings::GpuTimingMode::Asynch;
Settings::values.gpu_timing_mode_display_transfer = Settings::GpuTimingMode::Asynch;
Settings::values.gpu_timing_mode_flush = Settings::GpuTimingMode::Skip;
Settings::values.gpu_timing_mode_flush_and_invalidate = Settings::GpuTimingMode::Skip;
break;
// JAP / Super Mario 3D Land
case 0x0004000000054100:
// USA / Super Mario 3D Land
case 0x0004000000054000:
// EUR / Super Mario 3D Land
case 0x0004000000053F00:
// KOR / Super Mario 3D Land
case 0x0004000000089D00:
Settings::values.gpu_timing_mode_submit_list = Settings::GpuTimingMode::Asynch_40us;
// Settings::values.gpu_timing_mode_swap_buffers = Settings::GpuTimingMode::Asynch_4ms;
// Settings::values.gpu_timing_mode_memory_fill = Settings::GpuTimingMode::Asynch_40us;
// Settings::values.gpu_timing_mode_display_transfer =
// Settings::GpuTimingMode::Asynch_40us; Settings::values.gpu_timing_mode_flush =
// Settings::GpuTimingMode::Skip; Settings::values.gpu_timing_mode_flush_and_invalidate =
// Settings::GpuTimingMode::Skip;
break;
//// USA / Animal Crossing: New Leaf
// case 0x0004000000086300:
//// EUR / Animal Crossing: New Leaf
// case 0x0004000000086400:
// Settings::values.gpu_timing_mode_submit_list = Settings::GpuTimingMode::Asynch_1ms;
// Settings::values.gpu_timing_mode_swap_buffers = Settings::GpuTimingMode::Asynch_2ms;
// Settings::values.gpu_timing_mode_memory_fill = Settings::GpuTimingMode::Asynch_1ms;
// Settings::values.gpu_timing_mode_display_transfer =
// Settings::GpuTimingMode::Asynch_600us; Settings::values.gpu_timing_mode_flush =
// Settings::GpuTimingMode::Skip; Settings::values.gpu_timing_mode_flush_and_invalidate =
// Settings::GpuTimingMode::Skip; break;
//// USA / Pokemon Omega Ruby
// case 0x000400000011C400:
//// USA / Pokemon Alpha Sapphire
// case 0x000400000011C500:
// Settings::values.gpu_timing_mode_submit_list = Settings::GpuTimingMode::Asynch;
// Settings::values.gpu_timing_mode_swap_buffers = Settings::GpuTimingMode::Asynch_4ms;
// Settings::values.gpu_timing_mode_memory_fill = Settings::GpuTimingMode::Asynch;
// Settings::values.gpu_timing_mode_display_transfer = Settings::GpuTimingMode::Asynch;
// Settings::values.gpu_timing_mode_flush = Settings::GpuTimingMode::Synch;
// Settings::values.gpu_timing_mode_flush_and_invalidate = Settings::GpuTimingMode::Skip;
// break;
//// USA / Pokemon X
// case 0x0004000000055D00:
//// USA / Pokemon Y
// case 0x0004000000055E00:
//// USA / Pokemon X Update 1.x
// case 0x0004000E00055D00:
// Settings::values.gpu_timing_mode_submit_list = Settings::GpuTimingMode::Asynch;
// Settings::values.gpu_timing_mode_swap_buffers = Settings::GpuTimingMode::Asynch_4ms;
// Settings::values.gpu_timing_mode_memory_fill = Settings::GpuTimingMode::Asynch;
// Settings::values.gpu_timing_mode_display_transfer = Settings::GpuTimingMode::Asynch;
// Settings::values.gpu_timing_mode_flush = Settings::GpuTimingMode::Synch;
// Settings::values.gpu_timing_mode_flush_and_invalidate = Settings::GpuTimingMode::Skip;
// break;
// USA / Pokemon Ultra Sun
case 0x00040000001B5000:
// USA / Pokemon Ultra Moon
case 0x00040000001B5100:
// Settings::values.force_separable_shader_fix = true;
// Settings::values.gpu_timing_mode_submit_list = Settings::GpuTimingMode::Asynch;
// Settings::values.gpu_timing_mode_swap_buffers = Settings::GpuTimingMode::Asynch_4ms;
// Settings::values.gpu_timing_mode_memory_fill = Settings::GpuTimingMode::Asynch;
// Settings::values.gpu_timing_mode_display_transfer = Settings::GpuTimingMode::Asynch;
// Settings::values.gpu_timing_mode_flush = Settings::GpuTimingMode::Skip;
// Settings::values.gpu_timing_mode_flush_and_invalidate = Settings::GpuTimingMode::Skip;
break;
//// USA / Kirby: Planet Robobot
// case 0x0004000000183600:
// Settings::values.gpu_timing_mode_submit_list = Settings::GpuTimingMode::Asynch_1ms;
// Settings::values.gpu_timing_mode_swap_buffers = Settings::GpuTimingMode::Asynch_8ms;
// Settings::values.gpu_timing_mode_memory_fill = Settings::GpuTimingMode::Asynch_1ms;
// Settings::values.gpu_timing_mode_display_transfer = Settings::GpuTimingMode::Synch;
// Settings::values.gpu_timing_mode_flush = Settings::GpuTimingMode::Skip;
// Settings::values.gpu_timing_mode_flush_and_invalidate = Settings::GpuTimingMode::Skip;
// break;
//// JAP / Mario Kart 7
// case 0x0004000000030600:
//// USA / Mario Kart 7
// case 0x0004000000030800:
//// EUR / Mario Kart 7
// case 0x0004000000030700:
//// CHI / Mario Kart 7
// case 0x000400000008B400:
// Settings::values.gpu_timing_mode_submit_list = Settings::GpuTimingMode::Asynch_1ms;
// Settings::values.gpu_timing_mode_swap_buffers = Settings::GpuTimingMode::Asynch_2ms;
// Settings::values.gpu_timing_mode_memory_fill = Settings::GpuTimingMode::Asynch;
// Settings::values.gpu_timing_mode_display_transfer = Settings::GpuTimingMode::Asynch;
// Settings::values.gpu_timing_mode_flush = Settings::GpuTimingMode::Skip;
// Settings::values.gpu_timing_mode_flush_and_invalidate = Settings::GpuTimingMode::Skip;
// break;
//// USA / Super Smash Bros.
// case 0x00040000000EDF00:
//// EUR / Super Smash Bros.
// case 0x00040000000EE000:
// Settings::values.gpu_timing_mode_submit_list = Settings::GpuTimingMode::Asynch_2ms;
// Settings::values.gpu_timing_mode_swap_buffers = Settings::GpuTimingMode::Asynch_4ms;
// Settings::values.gpu_timing_mode_memory_fill = Settings::GpuTimingMode::Asynch;
// Settings::values.gpu_timing_mode_display_transfer =
// Settings::GpuTimingMode::Asynch_20us; Settings::values.gpu_timing_mode_flush =
// Settings::GpuTimingMode::Skip; Settings::values.gpu_timing_mode_flush_and_invalidate =
// Settings::GpuTimingMode::Skip; break;
//// JAP / New Super Mario Bros. 2
// case 0x000400000007AD00:
//// USA / New Super Mario Bros. 2
// case 0x000400000007AE00:
//// EUR / New Super Mario Bros. 2
// case 0x000400000007AF00:
//// CHI / New Super Mario Bros. 2
// case 0x00040000000B8A00:
//// All / New Super Mario Bros. 2
// case 0x0004000000137E00:
// Settings::values.gpu_timing_mode_submit_list = Settings::GpuTimingMode::Asynch_2ms;
// Settings::values.gpu_timing_mode_swap_buffers = Settings::GpuTimingMode::Asynch_4ms;
// Settings::values.gpu_timing_mode_memory_fill = Settings::GpuTimingMode::Asynch;
// Settings::values.gpu_timing_mode_display_transfer =
// Settings::GpuTimingMode::Asynch_20us; Settings::values.gpu_timing_mode_flush =
// Settings::GpuTimingMode::Skip; Settings::values.gpu_timing_mode_flush_and_invalidate =
// Settings::GpuTimingMode::Skip; break;
}
}
} // namespace GameSettings

View File

@ -0,0 +1,11 @@
// Copyright 2020 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/common_types.h"
namespace GameSettings {
void LoadOverrides(u64 program_id);
} // namespace GameSettings

View File

@ -30,6 +30,7 @@
#include "jni/config.h"
#include "jni/emu_window/emu_window.h"
#include "jni/game_info.h"
#include "jni/game_settings.h"
#include "jni/id_cache.h"
#include "jni/mic.h"
#include "jni/native.h"
@ -150,6 +151,12 @@ static Core::System::ResultStatus RunCitra(const std::string& filepath) {
return load_result;
}
// Replace with game-specific settings
u64 program_id{};
system.GetAppLoader().ReadProgramId(program_id);
GameSettings::LoadOverrides(program_id);
Settings::Apply();
auto& telemetry_session = Core::System::GetInstance().TelemetrySession();
telemetry_session.AddField(Telemetry::FieldType::App, "Frontend", "SDL");

View File

@ -225,6 +225,7 @@ System::ResultStatus System::RunLoop(bool tight_loop) {
GDBStub::SetCpuStepFlag(false);
}
Service::GSP::Update();
HW::Update();
Reschedule();
@ -417,7 +418,7 @@ System::ResultStatus System::Init(Frontend::EmuWindow& emu_window, u32 system_mo
video_dumper = std::make_unique<VideoDumper::NullBackend>();
#endif
VideoCore::ResultStatus result = VideoCore::Init(emu_window, *memory);
VideoCore::ResultStatus result = VideoCore::Init(*this, emu_window, *memory);
if (result != VideoCore::ResultStatus::Success) {
switch (result) {
case VideoCore::ResultStatus::ErrorGenericDrivers:
@ -436,7 +437,7 @@ System::ResultStatus System::Init(Frontend::EmuWindow& emu_window, u32 system_mo
return ResultStatus::Success;
}
RendererBase& System::Renderer() {
VideoCore::RendererBase& System::Renderer() {
return *VideoCore::g_renderer;
}

View File

@ -57,7 +57,9 @@ namespace VideoDumper {
class Backend;
}
namespace VideoCore {
class RendererBase;
}
namespace Core {
@ -205,7 +207,7 @@ public:
return *dsp_core;
}
[[nodiscard]] RendererBase& Renderer();
[[nodiscard]] VideoCore::RendererBase& Renderer();
/**
* Gets a reference to the service manager.

View File

@ -15,7 +15,7 @@ static std::weak_ptr<GSP_GPU> gsp_gpu;
void SignalInterrupt(InterruptId interrupt_id) {
auto gpu = gsp_gpu.lock();
ASSERT(gpu != nullptr);
return gpu->SignalInterrupt(interrupt_id);
return gpu->SignalInterruptThreadSafe(interrupt_id);
}
void InstallInterfaces(Core::System& system) {
@ -31,4 +31,10 @@ void SetGlobalModule(Core::System& system) {
gsp_gpu = system.ServiceManager().GetService<GSP_GPU>("gsp::Gpu");
}
void Update() {
auto gpu = gsp_gpu.lock();
ASSERT(gpu != nullptr);
return gpu->Update();
}
} // namespace Service::GSP

View File

@ -25,4 +25,7 @@ void SignalInterrupt(InterruptId interrupt_id);
void InstallInterfaces(Core::System& system);
void SetGlobalModule(Core::System& system);
void Update();
} // namespace Service::GSP

View File

@ -11,6 +11,7 @@
#include <boost/serialization/shared_ptr.hpp>
#include "common/bit_field.h"
#include "common/common_types.h"
#include "common/threadsafe_queue.h"
#include "core/hle/kernel/event.h"
#include "core/hle/kernel/hle_ipc.h"
#include "core/hle/result.h"
@ -238,6 +239,18 @@ public:
*/
FrameBufferUpdate* GetFrameBufferInfo(u32 thread_id, u32 screen_index);
void Update() {
while (!interrupt_queue.Empty()) {
InterruptId next_interrupt{};
interrupt_queue.Pop(next_interrupt);
SignalInterrupt(next_interrupt);
}
}
void SignalInterruptThreadSafe(InterruptId interrupt_id) {
interrupt_queue.Push(interrupt_id);
}
private:
/**
* Signals that the specified interrupt type has occurred to userland code for the specified GSP
@ -456,6 +469,8 @@ private:
}
friend class boost::serialization::access;
Common::MPSCQueue<InterruptId> interrupt_queue;
};
ResultCode SetBufferSwap(u32 screen_id, const FrameBufferInfo& info);

View File

@ -9,7 +9,6 @@
#include "common/color.h"
#include "common/common_types.h"
#include "common/logging/log.h"
#include "common/microprofile.h"
#include "common/vector_math.h"
#include "core/core.h"
#include "core/core_timing.h"
@ -47,343 +46,6 @@ inline void Read(T& var, const u32 raw_addr) {
var = g_regs[addr / 4];
}
static Common::Vec4<u8> DecodePixel(Regs::PixelFormat input_format, const u8* src_pixel) {
switch (input_format) {
case Regs::PixelFormat::RGBA8:
return Color::DecodeRGBA8(src_pixel);
case Regs::PixelFormat::RGB8:
return Color::DecodeRGB8(src_pixel);
case Regs::PixelFormat::RGB565:
return Color::DecodeRGB565(src_pixel);
case Regs::PixelFormat::RGB5A1:
return Color::DecodeRGB5A1(src_pixel);
case Regs::PixelFormat::RGBA4:
return Color::DecodeRGBA4(src_pixel);
default:
LOG_ERROR(HW_GPU, "Unknown source framebuffer format {:x}", input_format);
return {0, 0, 0, 0};
}
}
MICROPROFILE_DEFINE(GPU_DisplayTransfer, "GPU", "DisplayTransfer", MP_RGB(100, 100, 255));
MICROPROFILE_DEFINE(GPU_CmdlistProcessing, "GPU", "Cmdlist Processing", MP_RGB(100, 255, 100));
static void MemoryFill(const Regs::MemoryFillConfig& config) {
const PAddr start_addr = config.GetStartAddress();
const PAddr end_addr = config.GetEndAddress();
// TODO: do hwtest with these cases
if (!g_memory->IsValidPhysicalAddress(start_addr)) {
LOG_CRITICAL(HW_GPU, "invalid start address {:#010X}", start_addr);
return;
}
if (!g_memory->IsValidPhysicalAddress(end_addr)) {
LOG_CRITICAL(HW_GPU, "invalid end address {:#010X}", end_addr);
return;
}
if (end_addr <= start_addr) {
LOG_CRITICAL(HW_GPU, "invalid memory range from {:#010X} to {:#010X}", start_addr,
end_addr);
return;
}
u8* start = g_memory->GetPhysicalPointer(start_addr);
u8* end = g_memory->GetPhysicalPointer(end_addr);
if (VideoCore::g_renderer->Rasterizer()->AccelerateFill(config))
return;
Memory::RasterizerInvalidateRegion(config.GetStartAddress(),
config.GetEndAddress() - config.GetStartAddress());
if (config.fill_24bit) {
// fill with 24-bit values
for (u8* ptr = start; ptr < end; ptr += 3) {
ptr[0] = config.value_24bit_r;
ptr[1] = config.value_24bit_g;
ptr[2] = config.value_24bit_b;
}
} else if (config.fill_32bit) {
// fill with 32-bit values
if (end > start) {
u32 value = config.value_32bit;
std::size_t len = (end - start) / sizeof(u32);
for (std::size_t i = 0; i < len; ++i)
memcpy(&start[i * sizeof(u32)], &value, sizeof(u32));
}
} else {
// fill with 16-bit values
u16 value_16bit = config.value_16bit.Value();
for (u8* ptr = start; ptr < end; ptr += sizeof(u16))
memcpy(ptr, &value_16bit, sizeof(u16));
}
}
static void DisplayTransfer(const Regs::DisplayTransferConfig& config) {
const PAddr src_addr = config.GetPhysicalInputAddress();
const PAddr dst_addr = config.GetPhysicalOutputAddress();
// TODO: do hwtest with these cases
if (!g_memory->IsValidPhysicalAddress(src_addr)) {
LOG_CRITICAL(HW_GPU, "invalid input address {:#010X}", src_addr);
return;
}
if (!g_memory->IsValidPhysicalAddress(dst_addr)) {
LOG_CRITICAL(HW_GPU, "invalid output address {:#010X}", dst_addr);
return;
}
if (config.input_width == 0) {
LOG_CRITICAL(HW_GPU, "zero input width");
return;
}
if (config.input_height == 0) {
LOG_CRITICAL(HW_GPU, "zero input height");
return;
}
if (config.output_width == 0) {
LOG_CRITICAL(HW_GPU, "zero output width");
return;
}
if (config.output_height == 0) {
LOG_CRITICAL(HW_GPU, "zero output height");
return;
}
if (VideoCore::g_renderer->Rasterizer()->AccelerateDisplayTransfer(config))
return;
u8* src_pointer = g_memory->GetPhysicalPointer(src_addr);
u8* dst_pointer = g_memory->GetPhysicalPointer(dst_addr);
if (config.scaling > config.ScaleXY) {
LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode {}",
config.scaling.Value());
UNIMPLEMENTED();
return;
}
if (config.input_linear && config.scaling != config.NoScale) {
LOG_CRITICAL(HW_GPU, "Scaling is only implemented on tiled input");
UNIMPLEMENTED();
return;
}
int horizontal_scale = config.scaling != config.NoScale ? 1 : 0;
int vertical_scale = config.scaling == config.ScaleXY ? 1 : 0;
u32 output_width = config.output_width >> horizontal_scale;
u32 output_height = config.output_height >> vertical_scale;
u32 input_size =
config.input_width * config.input_height * GPU::Regs::BytesPerPixel(config.input_format);
u32 output_size = output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format);
Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), input_size);
Memory::RasterizerInvalidateRegion(config.GetPhysicalOutputAddress(), output_size);
for (u32 y = 0; y < output_height; ++y) {
for (u32 x = 0; x < output_width; ++x) {
Common::Vec4<u8> src_color;
// Calculate the [x,y] position of the input image
// based on the current output position and the scale
u32 input_x = x << horizontal_scale;
u32 input_y = y << vertical_scale;
u32 output_y;
if (config.flip_vertically) {
// Flip the y value of the output data,
// we do this after calculating the [x,y] position of the input image
// to account for the scaling options.
output_y = output_height - y - 1;
} else {
output_y = y;
}
u32 dst_bytes_per_pixel = GPU::Regs::BytesPerPixel(config.output_format);
u32 src_bytes_per_pixel = GPU::Regs::BytesPerPixel(config.input_format);
u32 src_offset;
u32 dst_offset;
if (config.input_linear) {
if (!config.dont_swizzle) {
// Interpret the input as linear and the output as tiled
u32 coarse_y = output_y & ~7;
u32 stride = output_width * dst_bytes_per_pixel;
src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel;
dst_offset = VideoCore::GetMortonOffset(x, output_y, dst_bytes_per_pixel) +
coarse_y * stride;
} else {
// Both input and output are linear
src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel;
dst_offset = (x + output_y * output_width) * dst_bytes_per_pixel;
}
} else {
if (!config.dont_swizzle) {
// Interpret the input as tiled and the output as linear
u32 coarse_y = input_y & ~7;
u32 stride = config.input_width * src_bytes_per_pixel;
src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) +
coarse_y * stride;
dst_offset = (x + output_y * output_width) * dst_bytes_per_pixel;
} else {
// Both input and output are tiled
u32 out_coarse_y = output_y & ~7;
u32 out_stride = output_width * dst_bytes_per_pixel;
u32 in_coarse_y = input_y & ~7;
u32 in_stride = config.input_width * src_bytes_per_pixel;
src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) +
in_coarse_y * in_stride;
dst_offset = VideoCore::GetMortonOffset(x, output_y, dst_bytes_per_pixel) +
out_coarse_y * out_stride;
}
}
const u8* src_pixel = src_pointer + src_offset;
src_color = DecodePixel(config.input_format, src_pixel);
if (config.scaling == config.ScaleX) {
Common::Vec4<u8> pixel =
DecodePixel(config.input_format, src_pixel + src_bytes_per_pixel);
src_color = ((src_color + pixel) / 2).Cast<u8>();
} else if (config.scaling == config.ScaleXY) {
Common::Vec4<u8> pixel1 =
DecodePixel(config.input_format, src_pixel + 1 * src_bytes_per_pixel);
Common::Vec4<u8> pixel2 =
DecodePixel(config.input_format, src_pixel + 2 * src_bytes_per_pixel);
Common::Vec4<u8> pixel3 =
DecodePixel(config.input_format, src_pixel + 3 * src_bytes_per_pixel);
src_color = (((src_color + pixel1) + (pixel2 + pixel3)) / 4).Cast<u8>();
}
u8* dst_pixel = dst_pointer + dst_offset;
switch (config.output_format) {
case Regs::PixelFormat::RGBA8:
Color::EncodeRGBA8(src_color, dst_pixel);
break;
case Regs::PixelFormat::RGB8:
Color::EncodeRGB8(src_color, dst_pixel);
break;
case Regs::PixelFormat::RGB565:
Color::EncodeRGB565(src_color, dst_pixel);
break;
case Regs::PixelFormat::RGB5A1:
Color::EncodeRGB5A1(src_color, dst_pixel);
break;
case Regs::PixelFormat::RGBA4:
Color::EncodeRGBA4(src_color, dst_pixel);
break;
default:
LOG_ERROR(HW_GPU, "Unknown destination framebuffer format {:x}",
static_cast<u32>(config.output_format.Value()));
break;
}
}
}
}
static void TextureCopy(const Regs::DisplayTransferConfig& config) {
const PAddr src_addr = config.GetPhysicalInputAddress();
const PAddr dst_addr = config.GetPhysicalOutputAddress();
// TODO: do hwtest with invalid addresses
if (!g_memory->IsValidPhysicalAddress(src_addr)) {
LOG_CRITICAL(HW_GPU, "invalid input address {:#010X}", src_addr);
return;
}
if (!g_memory->IsValidPhysicalAddress(dst_addr)) {
LOG_CRITICAL(HW_GPU, "invalid output address {:#010X}", dst_addr);
return;
}
if (VideoCore::g_renderer->Rasterizer()->AccelerateTextureCopy(config))
return;
u8* src_pointer = g_memory->GetPhysicalPointer(src_addr);
u8* dst_pointer = g_memory->GetPhysicalPointer(dst_addr);
u32 remaining_size = Common::AlignDown(config.texture_copy.size, 16);
if (remaining_size == 0) {
LOG_CRITICAL(HW_GPU, "zero size. Real hardware freezes on this.");
return;
}
u32 input_gap = config.texture_copy.input_gap * 16;
u32 output_gap = config.texture_copy.output_gap * 16;
// Zero gap means contiguous input/output even if width = 0. To avoid infinite loop below, width
// is assigned with the total size if gap = 0.
u32 input_width = input_gap == 0 ? remaining_size : config.texture_copy.input_width * 16;
u32 output_width = output_gap == 0 ? remaining_size : config.texture_copy.output_width * 16;
if (input_width == 0) {
LOG_CRITICAL(HW_GPU, "zero input width. Real hardware freezes on this.");
return;
}
if (output_width == 0) {
LOG_CRITICAL(HW_GPU, "zero output width. Real hardware freezes on this.");
return;
}
std::size_t contiguous_input_size =
config.texture_copy.size / input_width * (input_width + input_gap);
Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(),
static_cast<u32>(contiguous_input_size));
std::size_t contiguous_output_size =
config.texture_copy.size / output_width * (output_width + output_gap);
// Only need to flush output if it has a gap
const auto FlushInvalidate_fn = (output_gap != 0) ? Memory::RasterizerFlushAndInvalidateRegion
: Memory::RasterizerInvalidateRegion;
FlushInvalidate_fn(config.GetPhysicalOutputAddress(), static_cast<u32>(contiguous_output_size));
u32 remaining_input = input_width;
u32 remaining_output = output_width;
while (remaining_size > 0) {
u32 copy_size = std::min({remaining_input, remaining_output, remaining_size});
std::memcpy(dst_pointer, src_pointer, copy_size);
src_pointer += copy_size;
dst_pointer += copy_size;
remaining_input -= copy_size;
remaining_output -= copy_size;
remaining_size -= copy_size;
if (remaining_input == 0) {
remaining_input = input_width;
src_pointer += input_gap;
}
if (remaining_output == 0) {
remaining_output = output_width;
dst_pointer += output_gap;
}
}
}
template <typename T>
inline void Write(u32 addr, const T data) {
addr -= HW::VADDR_GPU;
@ -403,63 +65,23 @@ inline void Write(u32 addr, const T data) {
case GPU_REG_INDEX(memory_fill_config[0].trigger):
case GPU_REG_INDEX(memory_fill_config[1].trigger): {
const bool is_second_filler = (index != GPU_REG_INDEX(memory_fill_config[0].trigger));
auto& config = g_regs.memory_fill_config[is_second_filler];
const auto& config = g_regs.memory_fill_config[is_second_filler];
if (config.trigger) {
MemoryFill(config);
LOG_TRACE(HW_GPU, "MemoryFill from {:#010X} to {:#010X}", config.GetStartAddress(),
config.GetEndAddress());
// It seems that it won't signal interrupt if "address_start" is zero.
// TODO: hwtest this
if (config.GetStartAddress() != 0) {
if (!is_second_filler) {
Service::GSP::SignalInterrupt(Service::GSP::InterruptId::PSC0);
} else {
Service::GSP::SignalInterrupt(Service::GSP::InterruptId::PSC1);
}
}
// Reset "trigger" flag and set the "finish" flag
// NOTE: This was confirmed to happen on hardware even if "address_start" is zero.
config.trigger.Assign(0);
config.finished.Assign(1);
LOG_TRACE(HW_GPU, "MemoryFill started from {:#010X} to {:#010X}",
config.GetStartAddress(), config.GetEndAddress());
VideoCore::MemoryFill(&config, is_second_filler);
}
break;
}
case GPU_REG_INDEX(display_transfer_config.trigger): {
MICROPROFILE_SCOPE(GPU_DisplayTransfer);
const auto& config = g_regs.display_transfer_config;
if (config.trigger & 1) {
if (g_regs.display_transfer_config.trigger & 1) {
if (Pica::g_debug_context)
Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::IncomingDisplayTransfer,
nullptr);
if (config.is_texture_copy) {
TextureCopy(config);
LOG_TRACE(HW_GPU,
"TextureCopy: {:#X} bytes from {:#010X}({}+{})-> "
"{:#010X}({}+{}), flags {:#010X}",
config.texture_copy.size, config.GetPhysicalInputAddress(),
config.texture_copy.input_width * 16, config.texture_copy.input_gap * 16,
config.GetPhysicalOutputAddress(), config.texture_copy.output_width * 16,
config.texture_copy.output_gap * 16, config.flags);
} else {
DisplayTransfer(config);
LOG_TRACE(HW_GPU,
"DisplayTransfer: {:#010X}({}x{})-> "
"{:#010X}({}x{}), dst format {:x}, flags {:#010X}",
config.GetPhysicalInputAddress(), config.input_width.Value(),
config.input_height.Value(), config.GetPhysicalOutputAddress(),
config.output_width.Value(), config.output_height.Value(),
static_cast<u32>(config.output_format.Value()), config.flags);
}
g_regs.display_transfer_config.trigger = 0;
Service::GSP::SignalInterrupt(Service::GSP::InterruptId::PPF);
VideoCore::DisplayTransfer(&g_regs.display_transfer_config);
}
break;
}
@ -468,15 +90,10 @@ inline void Write(u32 addr, const T data) {
case GPU_REG_INDEX(command_processor_config.trigger): {
const auto& config = g_regs.command_processor_config;
if (config.trigger & 1) {
MICROPROFILE_SCOPE(GPU_CmdlistProcessing);
Pica::CommandProcessor::ProcessCommandList(config.GetPhysicalAddress(), config.size);
g_regs.command_processor_config.trigger = 0;
VideoCore::ProcessCommandList(config.GetPhysicalAddress(), config.size);
}
break;
}
default:
break;
}
@ -504,15 +121,7 @@ template void Write<u8>(u32 addr, const u8 data);
/// Update hardware
static void VBlankCallback(u64 userdata, s64 cycles_late) {
VideoCore::g_renderer->SwapBuffers();
// Signal to GSP that GPU interrupt has occurred
// TODO(yuriks): hwtest to determine if PDC0 is for the Top screen and PDC1 for the Sub
// screen, or if both use the same interrupts and these two instead determine the
// beginning and end of the VBlank period. If needed, split the interrupt firing into
// two different intervals.
Service::GSP::SignalInterrupt(Service::GSP::InterruptId::PDC0);
Service::GSP::SignalInterrupt(Service::GSP::InterruptId::PDC1);
VideoCore::SwapBuffers();
// Reschedule recurrent event
Core::System::GetInstance().CoreTiming().ScheduleEvent(frame_ticks - cycles_late, vblank_event);

View File

@ -579,7 +579,7 @@ void RasterizerFlushRegion(PAddr start, u32 size) {
return;
}
VideoCore::g_renderer->Rasterizer()->FlushRegion(start, size);
VideoCore::FlushRegion(start, size);
}
void RasterizerInvalidateRegion(PAddr start, u32 size) {
@ -587,7 +587,7 @@ void RasterizerInvalidateRegion(PAddr start, u32 size) {
return;
}
VideoCore::g_renderer->Rasterizer()->InvalidateRegion(start, size);
VideoCore::InvalidateRegion(start, size);
}
void RasterizerFlushAndInvalidateRegion(PAddr start, u32 size) {
@ -597,7 +597,7 @@ void RasterizerFlushAndInvalidateRegion(PAddr start, u32 size) {
return;
}
VideoCore::g_renderer->Rasterizer()->FlushAndInvalidateRegion(start, size);
VideoCore::FlushAndInvalidateRegion(start, size);
}
void RasterizerClearAll(bool flush) {
@ -630,16 +630,15 @@ void RasterizerFlushVirtualRegion(VAddr start, u32 size, FlushMode mode) {
PAddr physical_start = paddr_region_start + (overlap_start - region_start);
u32 overlap_size = overlap_end - overlap_start;
auto* rasterizer = VideoCore::g_renderer->Rasterizer();
switch (mode) {
case FlushMode::Flush:
rasterizer->FlushRegion(physical_start, overlap_size);
VideoCore::FlushRegion(physical_start, overlap_size);
break;
case FlushMode::Invalidate:
rasterizer->InvalidateRegion(physical_start, overlap_size);
VideoCore::InvalidateRegion(physical_start, overlap_size);
break;
case FlushMode::FlushAndInvalidate:
rasterizer->FlushAndInvalidateRegion(physical_start, overlap_size);
VideoCore::FlushAndInvalidateRegion(physical_start, overlap_size);
break;
}
};

View File

@ -94,6 +94,7 @@ void LogSettings() {
log_setting("Renderer_PostProcessingShader", values.pp_shader_name);
log_setting("Renderer_FilterMode", values.filter_mode);
log_setting("Renderer_TextureFilterName", values.texture_filter_name);
log_setting("Renderer_UseAsyncGPU", Settings::values.use_asynchronous_gpu_emulation);
log_setting("Stereoscopy_Render3d", values.render_3d);
log_setting("Stereoscopy_Factor3d", values.factor_3d);
log_setting("Layout_LayoutOption", values.layout_option);

View File

@ -42,6 +42,27 @@ enum class MicInputType {
enum class StereoRenderOption { Off, SideBySide, Anaglyph, Interlaced, ReverseInterlaced };
enum class GpuTimingMode {
Skip,
Synch,
Asynch,
Asynch_10us,
Asynch_20us,
Asynch_40us,
Asynch_60us,
Asynch_80us,
Asynch_100us,
Asynch_200us,
Asynch_400us,
Asynch_600us,
Asynch_800us,
Asynch_1ms,
Asynch_2ms,
Asynch_4ms,
Asynch_6ms,
Asynch_8ms,
};
namespace NativeButton {
enum Values {
A,
@ -197,6 +218,16 @@ struct Values {
bool use_vsync_new;
// Asynchronous GPU parameters
bool use_asynchronous_gpu_emulation{};
GpuTimingMode gpu_timing_mode_submit_list{GpuTimingMode::Synch};
GpuTimingMode gpu_timing_mode_swap_buffers{GpuTimingMode::Synch};
GpuTimingMode gpu_timing_mode_memory_fill{GpuTimingMode::Synch};
GpuTimingMode gpu_timing_mode_display_transfer{GpuTimingMode::Synch};
GpuTimingMode gpu_timing_mode_flush{GpuTimingMode::Synch};
GpuTimingMode gpu_timing_mode_flush_and_invalidate{GpuTimingMode::Synch};
GpuTimingMode gpu_timing_mode_invalidate{GpuTimingMode::Synch};
// Audio
bool enable_dsp_lle;
bool enable_dsp_lle_multithread;

View File

@ -6,6 +6,10 @@ add_library(video_core STATIC
geometry_pipeline.cpp
geometry_pipeline.h
gpu_debugger.h
gpu.cpp
gpu.h
gpu_thread.cpp
gpu_thread.h
pica.cpp
pica.h
pica_state.h

View File

@ -7,16 +7,20 @@
#include <cstring>
#include <memory>
#include <utility>
#include "common/alignment.h"
#include "common/assert.h"
#include "common/color.h"
#include "common/logging/log.h"
#include "common/microprofile.h"
#include "common/vector_math.h"
#include "core/hle/lock.h"
#include "core/hle/service/gsp/gsp.h"
#include "core/hw/gpu.h"
#include "core/memory.h"
#include "core/tracer/recorder.h"
#include "video_core/command_processor.h"
#include "video_core/debug_utils/debug_utils.h"
#include "video_core/pica.h"
#include "video_core/pica_state.h"
#include "video_core/pica_types.h"
#include "video_core/primitive_assembly.h"
@ -26,9 +30,16 @@
#include "video_core/regs_texturing.h"
#include "video_core/renderer_base.h"
#include "video_core/shader/shader.h"
#include "video_core/utils.h"
#include "video_core/vertex_loader.h"
#include "video_core/video_core.h"
MICROPROFILE_DEFINE(GPU_Drawing, "GPU", "Drawing", MP_RGB(50, 50, 240));
MICROPROFILE_DEFINE(GPU_MemoryFill, "GPU", "MemoryFill", MP_RGB(100, 100, 255));
MICROPROFILE_DEFINE(GPU_TextureCopy, "GPU", "Texture Copy", MP_RGB(100, 100, 255));
MICROPROFILE_DEFINE(GPU_DisplayTransfer, "GPU", "DisplayTransfer", MP_RGB(100, 100, 255));
MICROPROFILE_DEFINE(GPU_CmdlistProcessing, "GPU", "Cmdlist Processing", MP_RGB(100, 255, 100));
namespace Pica::CommandProcessor {
// Expand a 4-bit mask to 4-byte mask, e.g. 0b0101 -> 0x00FF00FF
@ -37,8 +48,6 @@ constexpr std::array<u32, 16> expand_bits_to_bytes{
0xff000000, 0xff0000ff, 0xff00ff00, 0xff00ffff, 0xffff0000, 0xffff00ff, 0xffffff00, 0xffffffff,
};
MICROPROFILE_DEFINE(GPU_Drawing, "GPU", "Drawing", MP_RGB(50, 50, 240));
static const char* GetShaderSetupTypeName(Shader::ShaderSetup& setup) {
if (&setup == &g_state.vs) {
return "vertex shader";
@ -263,10 +272,12 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
case PICA_REG_INDEX(pipeline.command_buffer.trigger[1]): {
unsigned index =
static_cast<unsigned>(id - PICA_REG_INDEX(pipeline.command_buffer.trigger[0]));
u32* head_ptr = (u32*)VideoCore::g_memory->GetPhysicalPointer(
u32* start = (u32*)VideoCore::g_memory->GetPhysicalPointer(
regs.pipeline.command_buffer.GetPhysicalAddress(index));
g_state.cmd_list.head_ptr = g_state.cmd_list.current_ptr = head_ptr;
g_state.cmd_list.length = regs.pipeline.command_buffer.GetSize(index) / sizeof(u32);
auto& cmd_list = g_state.cmd_list;
cmd_list.head_ptr = cmd_list.current_ptr = start;
cmd_list.length = regs.pipeline.command_buffer.GetSize(index) / sizeof(u32);
break;
}
@ -675,4 +686,408 @@ void ProcessCommandList(PAddr list, u32 size) {
}
}
static Common::Vec4<u8> DecodePixel(GPU::Regs::PixelFormat input_format, const u8* src_pixel) {
switch (input_format) {
case GPU::Regs::PixelFormat::RGBA8:
return Color::DecodeRGBA8(src_pixel);
case GPU::Regs::PixelFormat::RGB8:
return Color::DecodeRGB8(src_pixel);
case GPU::Regs::PixelFormat::RGB565:
return Color::DecodeRGB565(src_pixel);
case GPU::Regs::PixelFormat::RGB5A1:
return Color::DecodeRGB5A1(src_pixel);
case GPU::Regs::PixelFormat::RGBA4:
return Color::DecodeRGBA4(src_pixel);
default:
LOG_ERROR(HW_GPU, "Unknown source framebuffer format {:x}", static_cast<u32>(input_format));
return {0, 0, 0, 0};
}
}
void ProcessMemoryFill(const GPU::Regs::MemoryFillConfig& config) {
MICROPROFILE_SCOPE(GPU_MemoryFill);
const PAddr start_addr = config.GetStartAddress();
const PAddr end_addr = config.GetEndAddress();
// TODO: do hwtest with these cases
if (!VideoCore::g_memory->IsValidPhysicalAddress(start_addr)) {
LOG_CRITICAL(HW_GPU, "invalid start address {:#010X}", start_addr);
return;
}
if (!VideoCore::g_memory->IsValidPhysicalAddress(end_addr)) {
LOG_CRITICAL(HW_GPU, "invalid end address {:#010X}", end_addr);
return;
}
if (end_addr <= start_addr) {
LOG_CRITICAL(HW_GPU, "invalid memory range from {:#010X} to {:#010X}", start_addr,
end_addr);
return;
}
u8* start = VideoCore::g_memory->GetPhysicalPointer(start_addr);
u8* end = VideoCore::g_memory->GetPhysicalPointer(end_addr);
if (VideoCore::g_renderer->Rasterizer()->AccelerateFill(config))
return;
Memory::RasterizerInvalidateRegion(config.GetStartAddress(),
config.GetEndAddress() - config.GetStartAddress());
if (config.fill_24bit) {
// fill with 24-bit values
for (u8* ptr = start; ptr < end; ptr += 3) {
ptr[0] = config.value_24bit_r;
ptr[1] = config.value_24bit_g;
ptr[2] = config.value_24bit_b;
}
} else if (config.fill_32bit) {
// fill with 32-bit values
if (end > start) {
u32 value = config.value_32bit;
std::size_t len = (end - start) / sizeof(u32);
for (std::size_t i = 0; i < len; ++i)
memcpy(&start[i * sizeof(u32)], &value, sizeof(u32));
}
} else {
// fill with 16-bit values
u16 value_16bit = config.value_16bit.Value();
for (u8* ptr = start; ptr < end; ptr += sizeof(u16))
memcpy(ptr, &value_16bit, sizeof(u16));
}
}
static void DisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) {
MICROPROFILE_SCOPE(GPU_DisplayTransfer);
const PAddr src_addr = config.GetPhysicalInputAddress();
const PAddr dst_addr = config.GetPhysicalOutputAddress();
// TODO: do hwtest with these cases
if (!VideoCore::g_memory->IsValidPhysicalAddress(src_addr)) {
LOG_CRITICAL(HW_GPU, "invalid input address {:#010X}", src_addr);
return;
}
if (!VideoCore::g_memory->IsValidPhysicalAddress(dst_addr)) {
LOG_CRITICAL(HW_GPU, "invalid output address {:#010X}", dst_addr);
return;
}
if (config.input_width == 0) {
LOG_CRITICAL(HW_GPU, "zero input width");
return;
}
if (config.input_height == 0) {
LOG_CRITICAL(HW_GPU, "zero input height");
return;
}
if (config.output_width == 0) {
LOG_CRITICAL(HW_GPU, "zero output width");
return;
}
if (config.output_height == 0) {
LOG_CRITICAL(HW_GPU, "zero output height");
return;
}
if (VideoCore::g_renderer->Rasterizer()->AccelerateDisplayTransfer(config))
return;
u8* src_pointer = VideoCore::g_memory->GetPhysicalPointer(src_addr);
u8* dst_pointer = VideoCore::g_memory->GetPhysicalPointer(dst_addr);
if (config.scaling > config.ScaleXY) {
LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode {}",
config.scaling.Value());
UNIMPLEMENTED();
return;
}
if (config.input_linear && config.scaling != config.NoScale) {
LOG_CRITICAL(HW_GPU, "Scaling is only implemented on tiled input");
UNIMPLEMENTED();
return;
}
int horizontal_scale = config.scaling != config.NoScale ? 1 : 0;
int vertical_scale = config.scaling == config.ScaleXY ? 1 : 0;
u32 output_width = config.output_width >> horizontal_scale;
u32 output_height = config.output_height >> vertical_scale;
u32 input_size =
config.input_width * config.input_height * GPU::Regs::BytesPerPixel(config.input_format);
u32 output_size = output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format);
Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), input_size);
Memory::RasterizerInvalidateRegion(config.GetPhysicalOutputAddress(), output_size);
for (u32 y = 0; y < output_height; ++y) {
for (u32 x = 0; x < output_width; ++x) {
Common::Vec4<u8> src_color;
// Calculate the [x,y] position of the input image
// based on the current output position and the scale
u32 input_x = x << horizontal_scale;
u32 input_y = y << vertical_scale;
u32 output_y;
if (config.flip_vertically) {
// Flip the y value of the output data,
// we do this after calculating the [x,y] position of the input image
// to account for the scaling options.
output_y = output_height - y - 1;
} else {
output_y = y;
}
u32 dst_bytes_per_pixel = GPU::Regs::BytesPerPixel(config.output_format);
u32 src_bytes_per_pixel = GPU::Regs::BytesPerPixel(config.input_format);
u32 src_offset;
u32 dst_offset;
if (config.input_linear) {
if (!config.dont_swizzle) {
// Interpret the input as linear and the output as tiled
u32 coarse_y = output_y & ~7;
u32 stride = output_width * dst_bytes_per_pixel;
src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel;
dst_offset = VideoCore::GetMortonOffset(x, output_y, dst_bytes_per_pixel) +
coarse_y * stride;
} else {
// Both input and output are linear
src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel;
dst_offset = (x + output_y * output_width) * dst_bytes_per_pixel;
}
} else {
if (!config.dont_swizzle) {
// Interpret the input as tiled and the output as linear
u32 coarse_y = input_y & ~7;
u32 stride = config.input_width * src_bytes_per_pixel;
src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) +
coarse_y * stride;
dst_offset = (x + output_y * output_width) * dst_bytes_per_pixel;
} else {
// Both input and output are tiled
u32 out_coarse_y = output_y & ~7;
u32 out_stride = output_width * dst_bytes_per_pixel;
u32 in_coarse_y = input_y & ~7;
u32 in_stride = config.input_width * src_bytes_per_pixel;
src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) +
in_coarse_y * in_stride;
dst_offset = VideoCore::GetMortonOffset(x, output_y, dst_bytes_per_pixel) +
out_coarse_y * out_stride;
}
}
const u8* src_pixel = src_pointer + src_offset;
src_color = DecodePixel(config.input_format, src_pixel);
if (config.scaling == config.ScaleX) {
Common::Vec4<u8> pixel =
DecodePixel(config.input_format, src_pixel + src_bytes_per_pixel);
src_color = ((src_color + pixel) / 2).Cast<u8>();
} else if (config.scaling == config.ScaleXY) {
Common::Vec4<u8> pixel1 =
DecodePixel(config.input_format, src_pixel + 1 * src_bytes_per_pixel);
Common::Vec4<u8> pixel2 =
DecodePixel(config.input_format, src_pixel + 2 * src_bytes_per_pixel);
Common::Vec4<u8> pixel3 =
DecodePixel(config.input_format, src_pixel + 3 * src_bytes_per_pixel);
src_color = (((src_color + pixel1) + (pixel2 + pixel3)) / 4).Cast<u8>();
}
u8* dst_pixel = dst_pointer + dst_offset;
switch (config.output_format) {
case GPU::Regs::PixelFormat::RGBA8:
Color::EncodeRGBA8(src_color, dst_pixel);
break;
case GPU::Regs::PixelFormat::RGB8:
Color::EncodeRGB8(src_color, dst_pixel);
break;
case GPU::Regs::PixelFormat::RGB565:
Color::EncodeRGB565(src_color, dst_pixel);
break;
case GPU::Regs::PixelFormat::RGB5A1:
Color::EncodeRGB5A1(src_color, dst_pixel);
break;
case GPU::Regs::PixelFormat::RGBA4:
Color::EncodeRGBA4(src_color, dst_pixel);
break;
default:
LOG_ERROR(HW_GPU, "Unknown destination framebuffer format {:x}",
static_cast<u32>(config.output_format.Value()));
break;
}
}
}
}
static void TextureCopy(const GPU::Regs::DisplayTransferConfig& config) {
MICROPROFILE_SCOPE(GPU_TextureCopy);
const PAddr src_addr = config.GetPhysicalInputAddress();
const PAddr dst_addr = config.GetPhysicalOutputAddress();
// TODO: do hwtest with invalid addresses
if (!VideoCore::g_memory->IsValidPhysicalAddress(src_addr)) {
LOG_CRITICAL(HW_GPU, "invalid input address {:#010X}", src_addr);
return;
}
if (!VideoCore::g_memory->IsValidPhysicalAddress(dst_addr)) {
LOG_CRITICAL(HW_GPU, "invalid output address {:#010X}", dst_addr);
return;
}
if (VideoCore::g_renderer->Rasterizer()->AccelerateTextureCopy(config))
return;
u8* src_pointer = VideoCore::g_memory->GetPhysicalPointer(src_addr);
u8* dst_pointer = VideoCore::g_memory->GetPhysicalPointer(dst_addr);
u32 remaining_size = Common::AlignDown(config.texture_copy.size, 16);
if (remaining_size == 0) {
LOG_CRITICAL(HW_GPU, "zero size. Real hardware freezes on this.");
return;
}
u32 input_gap = config.texture_copy.input_gap * 16;
u32 output_gap = config.texture_copy.output_gap * 16;
// Zero gap means contiguous input/output even if width = 0. To avoid infinite loop below, width
// is assigned with the total size if gap = 0.
u32 input_width = input_gap == 0 ? remaining_size : config.texture_copy.input_width * 16;
u32 output_width = output_gap == 0 ? remaining_size : config.texture_copy.output_width * 16;
if (input_width == 0) {
LOG_CRITICAL(HW_GPU, "zero input width. Real hardware freezes on this.");
return;
}
if (output_width == 0) {
LOG_CRITICAL(HW_GPU, "zero output width. Real hardware freezes on this.");
return;
}
std::size_t contiguous_input_size =
config.texture_copy.size / input_width * (input_width + input_gap);
Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(),
static_cast<u32>(contiguous_input_size));
std::size_t contiguous_output_size =
config.texture_copy.size / output_width * (output_width + output_gap);
// Only need to flush output if it has a gap
// const auto FlushInvalidate_fn = (output_gap != 0) ?
// &VideoCore::g_renderer->Rasterizer()->FlushAndInvalidateRegion
// :
// &VideoCore::g_renderer->Rasterizer()->InvalidateRegion;
if (output_gap != 0) {
VideoCore::g_renderer->Rasterizer()->FlushAndInvalidateRegion(
config.GetPhysicalOutputAddress(), static_cast<u32>(contiguous_output_size));
} else {
VideoCore::g_renderer->Rasterizer()->InvalidateRegion(
config.GetPhysicalOutputAddress(), static_cast<u32>(contiguous_output_size));
}
u32 remaining_input = input_width;
u32 remaining_output = output_width;
while (remaining_size > 0) {
u32 copy_size = std::min({remaining_input, remaining_output, remaining_size});
std::memcpy(dst_pointer, src_pointer, copy_size);
src_pointer += copy_size;
dst_pointer += copy_size;
remaining_input -= copy_size;
remaining_output -= copy_size;
remaining_size -= copy_size;
if (remaining_input == 0) {
remaining_input = input_width;
src_pointer += input_gap;
}
if (remaining_output == 0) {
remaining_output = output_width;
dst_pointer += output_gap;
}
}
}
void ProcessDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) {
if (config.is_texture_copy) {
TextureCopy(config);
LOG_TRACE(HW_GPU,
"TextureCopy: {:#X} bytes from {:#010X}({}+{})-> "
"{:#010X}({}+{}), flags {:#010X}",
config.texture_copy.size, config.GetPhysicalInputAddress(),
config.texture_copy.input_width * 16, config.texture_copy.input_gap * 16,
config.GetPhysicalOutputAddress(), config.texture_copy.output_width * 16,
config.texture_copy.output_gap * 16, config.flags);
} else {
DisplayTransfer(config);
LOG_TRACE(HW_GPU,
"DisplayTransfer: {:#010X}({}x{})-> "
"{:#010X}({}x{}), dst format {:x}, flags {:#010X}",
config.GetPhysicalInputAddress(), config.input_width.Value(),
config.input_height.Value(), config.GetPhysicalOutputAddress(),
config.output_width.Value(), config.output_height.Value(),
static_cast<u32>(config.output_format.Value()), config.flags);
}
}
void AfterCommandList() {
Service::GSP::SignalInterrupt(Service::GSP::InterruptId::P3D);
GPU::g_regs.command_processor_config.trigger = 0;
}
void AfterDisplayTransfer() {
GPU::g_regs.display_transfer_config.trigger = 0;
Service::GSP::SignalInterrupt(Service::GSP::InterruptId::PPF);
}
void AfterMemoryFill(bool is_second_filler) {
const auto& config = GPU::g_regs.memory_fill_config[is_second_filler];
// Reset "trigger" flag and set the "finish" flag
// NOTE: This was confirmed to happen on hardware even if "address_start" is zero.
GPU::g_regs.memory_fill_config[is_second_filler ? 1 : 0].trigger.Assign(0);
GPU::g_regs.memory_fill_config[is_second_filler ? 1 : 0].finished.Assign(1);
// It seems that it won't signal interrupt if "address_start" is zero.
// TODO: hwtest this
if (config.GetStartAddress() != 0) {
if (!is_second_filler) {
Service::GSP::SignalInterrupt(Service::GSP::InterruptId::PSC0);
} else {
Service::GSP::SignalInterrupt(Service::GSP::InterruptId::PSC1);
}
}
}
void AfterSwapBuffers() {
// Signal to GSP that GPU interrupt has occurred
// TODO(yuriks): hwtest to determine if PDC0 is for the Top screen and PDC1 for the Sub
// screen, or if both use the same interrupts and these two instead determine the
// beginning and end of the VBlank period. If needed, split the interrupt firing into
// two different intervals.
Service::GSP::SignalInterrupt(Service::GSP::InterruptId::PDC0);
Service::GSP::SignalInterrupt(Service::GSP::InterruptId::PDC1);
}
} // namespace Pica::CommandProcessor

View File

@ -5,8 +5,10 @@
#pragma once
#include <type_traits>
#include <vector>
#include "common/bit_field.h"
#include "common/common_types.h"
#include "core/hw/gpu.h"
namespace Pica::CommandProcessor {
@ -34,4 +36,17 @@ static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect
void ProcessCommandList(PAddr list, u32 size);
void AfterCommandList();
void ProcessDisplayTransfer(const GPU::Regs::DisplayTransferConfig&);
void AfterDisplayTransfer();
void ProcessMemoryFill(const GPU::Regs::MemoryFillConfig&);
void AfterMemoryFill(bool);
// TODO move somewhere else
void AfterSwapBuffers();
} // namespace Pica::CommandProcessor

86
src/video_core/gpu.cpp Normal file
View File

@ -0,0 +1,86 @@
// Copyright 2019 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "core/core.h"
#include "video_core/command_processor.h"
#include "video_core/gpu.h"
#include "video_core/gpu_thread.h"
#include "video_core/renderer_base.h"
namespace VideoCore {
GPUBackend::GPUBackend(VideoCore::RendererBase& renderer) : renderer{renderer} {}
GPUBackend::~GPUBackend() = default;
GPUSerial::GPUSerial(Core::System& system, VideoCore::RendererBase& renderer)
: GPUBackend(renderer), system{system} {}
GPUSerial::~GPUSerial() {}
void GPUSerial::ProcessCommandList(PAddr list, u32 size) {
Pica::CommandProcessor::ProcessCommandList(list, size);
Pica::CommandProcessor::AfterCommandList();
}
void GPUSerial::SwapBuffers() {
renderer.SwapBuffers();
Pica::CommandProcessor::AfterSwapBuffers();
}
void GPUSerial::DisplayTransfer(const GPU::Regs::DisplayTransferConfig* config) {
Pica::CommandProcessor::ProcessDisplayTransfer(*config);
Pica::CommandProcessor::AfterDisplayTransfer();
}
void GPUSerial::MemoryFill(const GPU::Regs::MemoryFillConfig* config, bool is_second_filler) {
Pica::CommandProcessor::ProcessMemoryFill(*config);
Pica::CommandProcessor::AfterMemoryFill(is_second_filler);
}
void GPUSerial::FlushRegion(VAddr addr, u64 size) {
renderer.Rasterizer()->FlushRegion(addr, size);
}
void GPUSerial::FlushAndInvalidateRegion(VAddr addr, u64 size) {
renderer.Rasterizer()->FlushAndInvalidateRegion(addr, size);
}
void GPUSerial::InvalidateRegion(VAddr addr, u64 size) {
renderer.Rasterizer()->InvalidateRegion(addr, size);
}
GPUParallel::GPUParallel(Core::System& system, VideoCore::RendererBase& renderer)
: GPUBackend(renderer), gpu_thread(system, renderer) {}
GPUParallel::~GPUParallel() = default;
void GPUParallel::ProcessCommandList(PAddr list, u32 size) {
gpu_thread.SubmitList(list, size);
}
void GPUParallel::SwapBuffers() {
gpu_thread.SwapBuffers();
}
void GPUParallel::DisplayTransfer(const GPU::Regs::DisplayTransferConfig* config) {
gpu_thread.DisplayTransfer(config);
}
void GPUParallel::MemoryFill(const GPU::Regs::MemoryFillConfig* config, bool is_second_filler) {
gpu_thread.MemoryFill(config, is_second_filler);
}
void GPUParallel::FlushRegion(VAddr addr, u64 size) {
gpu_thread.FlushRegion(addr, size);
}
void GPUParallel::FlushAndInvalidateRegion(VAddr addr, u64 size) {
gpu_thread.FlushAndInvalidateRegion(addr, size);
}
void GPUParallel::InvalidateRegion(VAddr addr, u64 size) {
gpu_thread.InvalidateRegion(addr, size);
}
} // namespace VideoCore

73
src/video_core/gpu.h Normal file
View File

@ -0,0 +1,73 @@
// Copyright 2019 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/common_types.h"
#include "core/hw/gpu.h"
#include "video_core/gpu_thread.h"
namespace Core {
class System;
}
namespace VideoCore {
class RendererBase;
class GPUBackend {
public:
explicit GPUBackend(VideoCore::RendererBase& renderer);
virtual ~GPUBackend();
virtual void ProcessCommandList(PAddr list, u32 size) = 0;
virtual void SwapBuffers() = 0;
virtual void DisplayTransfer(const GPU::Regs::DisplayTransferConfig* config) = 0;
virtual void MemoryFill(const GPU::Regs::MemoryFillConfig* config, bool is_second_filler) = 0;
virtual void FlushRegion(VAddr addr, u64 size) = 0;
virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
protected:
VideoCore::RendererBase& renderer;
};
class GPUSerial : public GPUBackend {
public:
explicit GPUSerial(Core::System& system, VideoCore::RendererBase& renderer);
~GPUSerial();
void ProcessCommandList(PAddr list, u32 size) override;
void SwapBuffers() override;
void DisplayTransfer(const GPU::Regs::DisplayTransferConfig* config) override;
void MemoryFill(const GPU::Regs::MemoryFillConfig* config, bool is_second_filler) override;
void FlushRegion(VAddr addr, u64 size) override;
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size) override;
private:
Core::System& system;
};
class GPUParallel : public GPUBackend {
public:
explicit GPUParallel(Core::System& system, VideoCore::RendererBase& renderer);
~GPUParallel();
void ProcessCommandList(PAddr list, u32 size) override;
void SwapBuffers() override;
void DisplayTransfer(const GPU::Regs::DisplayTransferConfig* config) override;
void MemoryFill(const GPU::Regs::MemoryFillConfig* config, bool is_second_filler) override;
void FlushRegion(VAddr addr, u64 size) override;
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size) override;
private:
GPUThread::ThreadManager gpu_thread;
};
} // namespace VideoCore

View File

@ -0,0 +1,226 @@
// Copyright 2019 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/logging/log.h"
#include "common/microprofile.h"
#include "common/thread.h"
#include "core/core.h"
#include "core/core_timing.h"
#include "core/dumping/backend.h"
#include "core/frontend/scope_acquire_context.h"
#include "core/settings.h"
#include "video_core/command_processor.h"
#include "video_core/gpu_thread.h"
#include "video_core/renderer_base.h"
namespace VideoCore::GPUThread {
/// Runs the GPU thread
static void RunThread(VideoCore::RendererBase& renderer, SynchState& state, Core::System& system) {
MicroProfileOnThreadCreate("GpuThread");
Common::SetCurrentThreadName("GpuThread");
// Wait for first GPU command before acquiring the window context
state.WaitForCommands();
// If emulation was stopped during disk shader loading, abort before trying to acquire context
if (!state.is_running) {
return;
}
Frontend::ScopeAcquireContext acquire_context{renderer.GetRenderWindow()};
CommandDataContainer next;
while (state.is_running) {
state.WaitForCommands();
CommandDataContainer next;
while (state.queue.Pop(next)) {
auto command = &next.data;
auto fence = next.fence;
if (const auto submit_list = std::get_if<SubmitListCommand>(command)) {
Pica::CommandProcessor::ProcessCommandList(submit_list->list, submit_list->size);
} else if (const auto data = std::get_if<SwapBuffersCommand>(command)) {
renderer.SwapBuffers();
Pica::CommandProcessor::AfterSwapBuffers();
} else if (const auto data = std::get_if<MemoryFillCommand>(command)) {
Pica::CommandProcessor::ProcessMemoryFill(*(data->config));
const bool is_second_filler = fence & (1llu << 63);
Pica::CommandProcessor::AfterMemoryFill(is_second_filler);
} else if (const auto data = std::get_if<DisplayTransferCommand>(command)) {
Pica::CommandProcessor::ProcessDisplayTransfer(*(data->config));
Pica::CommandProcessor::AfterDisplayTransfer();
} else if (const auto data = std::get_if<FlushRegionCommand>(command)) {
renderer.Rasterizer()->FlushRegion(data->addr, data->size);
} else if (const auto data = std::get_if<FlushAndInvalidateRegionCommand>(command)) {
renderer.Rasterizer()->FlushAndInvalidateRegion(data->addr, data->size);
} else if (const auto data = std::get_if<InvalidateRegionCommand>(command)) {
renderer.Rasterizer()->InvalidateRegion(data->addr, data->size);
} else {
UNREACHABLE();
}
state.signaled_fence = next.fence;
}
}
}
ThreadManager::ThreadManager(Core::System& system, VideoCore::RendererBase& renderer)
: system{system}, renderer{renderer} {
synchronize_event = system.CoreTiming().RegisterEvent(
"GPUSynchronizeEvent", [this](u64 fence, s64) { state.WaitForSynchronization(fence); });
thread = std::make_unique<std::thread>(RunThread, std::ref(renderer), std::ref(state),
std::ref(system));
thread_id = thread->get_id();
}
ThreadManager::~ThreadManager() {
// Notify GPU thread that a shutdown is pending
state.is_running.exchange(false);
thread->join();
}
void ThreadManager::Synchronize(u64 fence, Settings::GpuTimingMode mode) {
int timeout_us{};
switch (mode) {
case Settings::GpuTimingMode::Asynch:
case Settings::GpuTimingMode::Skip:
return;
case Settings::GpuTimingMode::Asynch_10us:
timeout_us = 10;
break;
case Settings::GpuTimingMode::Asynch_20us:
timeout_us = 20;
break;
case Settings::GpuTimingMode::Asynch_40us:
timeout_us = 40;
break;
case Settings::GpuTimingMode::Asynch_60us:
timeout_us = 60;
break;
case Settings::GpuTimingMode::Asynch_80us:
timeout_us = 80;
break;
case Settings::GpuTimingMode::Asynch_100us:
timeout_us = 100;
break;
case Settings::GpuTimingMode::Asynch_200us:
timeout_us = 200;
break;
case Settings::GpuTimingMode::Asynch_400us:
timeout_us = 400;
break;
case Settings::GpuTimingMode::Asynch_600us:
timeout_us = 600;
break;
case Settings::GpuTimingMode::Asynch_800us:
timeout_us = 800;
break;
case Settings::GpuTimingMode::Asynch_1ms:
timeout_us = 1000;
break;
case Settings::GpuTimingMode::Asynch_2ms:
timeout_us = 2000;
break;
case Settings::GpuTimingMode::Asynch_4ms:
timeout_us = 4000;
break;
case Settings::GpuTimingMode::Asynch_6ms:
timeout_us = 6000;
break;
case Settings::GpuTimingMode::Asynch_8ms:
timeout_us = 8000;
break;
}
if (timeout_us > 0) {
system.CoreTiming().ScheduleEvent(usToCycles(timeout_us), synchronize_event, fence);
} else if (timeout_us == 0) {
state.WaitForSynchronization(fence);
}
}
void ThreadManager::SubmitList(PAddr list, u32 size) {
if (size == 0) {
return;
}
Synchronize(PushCommand(SubmitListCommand{list, size}),
Settings::values.gpu_timing_mode_submit_list);
}
void ThreadManager::SwapBuffers() {
Synchronize(PushCommand(SwapBuffersCommand{}), Settings::values.gpu_timing_mode_swap_buffers);
}
void ThreadManager::DisplayTransfer(const GPU::Regs::DisplayTransferConfig* config) {
Synchronize(PushCommand(DisplayTransferCommand{config}),
Settings::values.gpu_timing_mode_display_transfer);
}
void ThreadManager::MemoryFill(const GPU::Regs::MemoryFillConfig* config, bool is_second_filler) {
Synchronize(PushCommand(MemoryFillCommand{config, is_second_filler}),
Settings::values.gpu_timing_mode_memory_fill);
}
void ThreadManager::FlushRegion(VAddr addr, u64 size) {
if (Settings::values.gpu_timing_mode_flush == Settings::GpuTimingMode::Skip) {
return;
}
if (!IsGpuThread()) {
Synchronize(PushCommand(FlushRegionCommand{addr, size}),
Settings::values.gpu_timing_mode_flush);
} else {
renderer.Rasterizer()->FlushRegion(addr, size);
}
}
void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
if (Settings::values.gpu_timing_mode_flush_and_invalidate == Settings::GpuTimingMode::Skip) {
return;
}
if (!IsGpuThread()) {
Synchronize(PushCommand(InvalidateRegionCommand{addr, size}),
Settings::values.gpu_timing_mode_flush_and_invalidate);
} else {
renderer.Rasterizer()->InvalidateRegion(addr, size);
}
}
void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
if (Settings::values.gpu_timing_mode_invalidate == Settings::GpuTimingMode::Skip) {
return;
}
if (!IsGpuThread()) {
Synchronize(PushCommand(InvalidateRegionCommand{addr, size}),
Settings::values.gpu_timing_mode_invalidate);
} else {
renderer.Rasterizer()->InvalidateRegion(addr, size);
}
}
u64 ThreadManager::PushCommand(CommandData&& command_data) {
const u64 fence{++state.last_fence};
state.queue.Push(CommandDataContainer(std::move(command_data), fence));
return fence;
}
MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
void SynchState::WaitForSynchronization(u64 fence) {
if (signaled_fence >= fence) {
return;
}
// Wait for the GPU to be idle (all commands to be executed)
MICROPROFILE_SCOPE(GPU_wait);
while (signaled_fence < fence && is_running) {
}
}
} // namespace VideoCore::GPUThread

218
src/video_core/gpu_thread.h Normal file
View File

@ -0,0 +1,218 @@
// Copyright 2019 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <atomic>
#include <condition_variable>
#include <functional>
#include <future>
#include <memory>
#include <mutex>
#include <optional>
#include <thread>
#include <variant>
#include "common/threadsafe_queue.h"
#include "core/core_timing.h"
#include "core/frontend/emu_window.h"
#include "core/settings.h"
#include "video_core/command_processor.h"
namespace VideoCore {
class RendererBase;
}
namespace VideoCore::GPUThread {
/// Command to signal to the GPU thread that a command list is ready for processing
struct SubmitListCommand {
// In order for the variant to be default constructable, the first element needs a default
// constructor
constexpr SubmitListCommand() : list(0), size(0) {}
explicit constexpr SubmitListCommand(PAddr list, u32 size) : list(list), size(size) {}
PAddr list;
u32 size;
};
static_assert(std::is_copy_assignable<SubmitListCommand>::value,
"SubmitListCommand is not copy assignable");
static_assert(std::is_copy_constructible<SubmitListCommand>::value,
"SubmitListCommand is not copy constructable");
/// Command to signal to the GPU thread that a swap buffers is pending
struct SwapBuffersCommand final {
explicit constexpr SwapBuffersCommand() {}
};
static_assert(std::is_copy_assignable<SwapBuffersCommand>::value,
"SwapBuffersCommand is not copy assignable");
static_assert(std::is_copy_constructible<SwapBuffersCommand>::value,
"SwapBuffersCommand is not copy constructable");
struct MemoryFillCommand final {
explicit constexpr MemoryFillCommand(const GPU::Regs::MemoryFillConfig* config,
bool is_second_filler)
: config{config}, is_second_filler(is_second_filler) {}
const GPU::Regs::MemoryFillConfig* config;
bool is_second_filler;
};
static_assert(std::is_copy_assignable<MemoryFillCommand>::value,
"MemoryFillCommand is not copy assignable");
static_assert(std::is_copy_constructible<MemoryFillCommand>::value,
"MemoryFillCommand is not copy constructable");
struct DisplayTransferCommand final {
explicit constexpr DisplayTransferCommand(const GPU::Regs::DisplayTransferConfig* config)
: config{config} {}
const GPU::Regs::DisplayTransferConfig* config;
};
static_assert(std::is_copy_assignable<DisplayTransferCommand>::value,
"DisplayTransferCommand is not copy assignable");
static_assert(std::is_copy_constructible<DisplayTransferCommand>::value,
"DisplayTransferCommand is not copy constructable");
/// Command to signal to the GPU thread to flush a region
struct FlushRegionCommand final {
explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
VAddr addr;
u64 size;
};
static_assert(std::is_copy_assignable<FlushRegionCommand>::value,
"FlushRegionCommand is not copy assignable");
static_assert(std::is_copy_constructible<FlushRegionCommand>::value,
"FlushRegionCommand is not copy constructable");
/// Command to signal to the GPU thread to flush and invalidate a region
struct FlushAndInvalidateRegionCommand final {
explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size)
: addr{addr}, size{size} {}
VAddr addr;
u64 size;
};
static_assert(std::is_copy_assignable<FlushAndInvalidateRegionCommand>::value,
"FlushAndInvalidateRegionCommand is not copy assignable");
static_assert(std::is_copy_constructible<FlushAndInvalidateRegionCommand>::value,
"FlushAndInvalidateRegionCommand is not copy constructable");
/// Command to signal to the GPU thread to flush a region
struct InvalidateRegionCommand final {
explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
VAddr addr;
u64 size;
};
static_assert(std::is_copy_assignable<InvalidateRegionCommand>::value,
"InvalidateRegionCommand is not copy assignable");
static_assert(std::is_copy_constructible<InvalidateRegionCommand>::value,
"InvalidateRegionCommand is not copy constructable");
using CommandData =
std::variant<SubmitListCommand, SwapBuffersCommand, MemoryFillCommand, DisplayTransferCommand,
FlushRegionCommand, FlushAndInvalidateRegionCommand, InvalidateRegionCommand>;
struct CommandDataContainer {
CommandDataContainer() = default;
CommandDataContainer(CommandData&& data, u64 next_fence)
: data{std::move(data)}, fence{next_fence} {}
CommandData data;
u64 fence{};
};
/// Struct used to synchronize the GPU thread
struct SynchState final {
std::atomic_bool is_running{true};
std::atomic_int queued_frame_count{};
std::mutex synchronization_mutex;
std::mutex commands_mutex;
std::condition_variable commands_condition;
std::condition_variable synchronization_condition;
/// Returns true if the gap in GPU commands is small enough that we can consider the CPU and GPU
/// synchronized. This is entirely empirical.
bool IsSynchronized() const {
constexpr std::size_t max_queue_gap{100};
return queue.Size() <= max_queue_gap;
}
void TrySynchronize() {
if (IsSynchronized()) {
std::lock_guard lock{synchronization_mutex};
synchronization_condition.notify_one();
}
}
void WaitForSynchronization(u64 fence);
void SignalCommands() {
if (queue.Empty()) {
return;
}
{
std::lock_guard<std::mutex> lock(commands_mutex);
commands_condition.notify_one();
}
}
void WaitForCommands() {
while (queue.Empty() && is_running)
;
// std::unique_lock lock{commands_mutex};
// commands_condition.wait(lock, [this] { return !queue.Empty(); });
}
using CommandQueue = Common::SPSCQueue<CommandDataContainer>;
CommandQueue queue;
u64 last_fence{};
std::atomic<u64> signaled_fence{};
};
/// Class used to manage the GPU thread
class ThreadManager final {
public:
explicit ThreadManager(Core::System& system, VideoCore::RendererBase& renderer);
~ThreadManager();
void SubmitList(PAddr list, u32 size);
void SwapBuffers();
void DisplayTransfer(const GPU::Regs::DisplayTransferConfig*);
void MemoryFill(const GPU::Regs::MemoryFillConfig*, bool is_second_filler);
void FlushRegion(VAddr addr, u64 size);
void FlushAndInvalidateRegion(VAddr addr, u64 size);
void InvalidateRegion(VAddr addr, u64 size);
private:
void Synchronize(u64 fence, Settings::GpuTimingMode mode);
/// Pushes a command to be executed by the GPU thread
u64 PushCommand(CommandData&& command_data);
/// Returns true if this is called by the GPU thread
bool IsGpuThread() const {
return std::this_thread::get_id() == thread_id;
}
private:
SynchState state;
std::unique_ptr<std::thread> thread;
std::thread::id thread_id{};
Core::System& system;
VideoCore::RendererBase& renderer;
Core::TimingEventType* synchronize_event{};
};
} // namespace VideoCore::GPUThread

View File

@ -9,6 +9,8 @@
#include "video_core/swrasterizer/swrasterizer.h"
#include "video_core/video_core.h"
namespace VideoCore {
RendererBase::RendererBase(Frontend::EmuWindow& window) : render_window{window} {}
RendererBase::~RendererBase() = default;
void RendererBase::UpdateCurrentFramebufferLayout(bool is_portrait_mode) {
@ -32,3 +34,5 @@ void RendererBase::RefreshRasterizerSetting() {
void RendererBase::Sync() {
rasterizer->SyncEntireState();
}
} // namespace VideoCore

View File

@ -13,6 +13,8 @@ namespace Frontend {
class EmuWindow;
}
namespace VideoCore {
class RendererBase : NonCopyable {
public:
explicit RendererBase(Frontend::EmuWindow& window);
@ -75,3 +77,5 @@ protected:
private:
bool opengl_rasterizer_active = false;
};
} // namespace VideoCore

View File

@ -289,7 +289,9 @@ RasterizerOpenGL::VertexArrayInfo RasterizerOpenGL::AnalyzeVertexArray(bool is_i
vertex_min = 0xFFFF;
vertex_max = 0;
const u32 size = regs.pipeline.num_vertices * (index_u16 ? 2 : 1);
#ifndef ANDROID
res_cache.FlushRegion(address, size, nullptr);
#endif
for (u32 index = 0; index < regs.pipeline.num_vertices; ++index) {
const u32 vertex = index_u16 ? index_address_16[index] : index_address_8[index];
vertex_min = std::min(vertex_min, vertex);
@ -361,8 +363,9 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset,
u32 vertex_num = vs_input_index_max - vs_input_index_min + 1;
u32 data_size = loader.byte_count * vertex_num;
#ifndef ANDROID
res_cache.FlushRegion(data_addr, data_size, nullptr);
#endif
std::memcpy(array_ptr, VideoCore::g_memory->GetPhysicalPointer(data_addr), data_size);
array_ptr += data_size;

View File

@ -1244,6 +1244,10 @@ VideoCore::ResultStatus RendererOpenGL::Init() {
RefreshRasterizerSetting();
if (Settings::values.use_asynchronous_gpu_emulation) {
render_window.DoneCurrent();
}
return VideoCore::ResultStatus::Success;
}

View File

@ -57,7 +57,7 @@ struct PresentationTexture {
OGLTexture texture;
};
class RendererOpenGL : public RendererBase {
class RendererOpenGL : public VideoCore::RendererBase {
public:
explicit RendererOpenGL(Frontend::EmuWindow& window);
~RendererOpenGL() override;

View File

@ -5,7 +5,10 @@
#include <memory>
#include "common/archives.h"
#include "common/logging/log.h"
#include "common/vector_math.h"
#include "core/memory.h"
#include "core/settings.h"
#include "video_core/gpu.h"
#include "video_core/pica.h"
#include "video_core/pica_state.h"
#include "video_core/renderer_base.h"
@ -19,6 +22,7 @@
namespace VideoCore {
std::unique_ptr<RendererBase> g_renderer; ///< Renderer plugin
std::unique_ptr<GPUBackend> g_gpu;
std::atomic<bool> g_hw_renderer_enabled;
std::atomic<bool> g_shader_jit_enabled;
@ -39,13 +43,20 @@ Layout::FramebufferLayout g_screenshot_framebuffer_layout;
Memory::MemorySystem* g_memory;
/// Initialize the video core
ResultStatus Init(Frontend::EmuWindow& emu_window, Memory::MemorySystem& memory) {
ResultStatus Init(Core::System& system, Frontend::EmuWindow& emu_window,
Memory::MemorySystem& memory) {
g_memory = &memory;
Pica::Init();
OpenGL::GLES = Settings::values.use_gles;
g_renderer = std::make_unique<OpenGL::RendererOpenGL>(emu_window);
if (Settings::values.use_asynchronous_gpu_emulation) {
g_gpu = std::make_unique<VideoCore::GPUParallel>(system, *g_renderer);
} else {
g_gpu = std::make_unique<VideoCore::GPUSerial>(system, *g_renderer);
}
ResultStatus result = g_renderer->Init();
if (result != ResultStatus::Success) {
@ -62,6 +73,7 @@ void Shutdown() {
Pica::Shutdown();
g_renderer->ShutDown();
g_gpu.reset();
g_renderer.reset();
LOG_DEBUG(Render, "shutdown OK");
@ -95,6 +107,34 @@ void serialize(Archive& ar, const unsigned int) {
ar& Pica::g_state;
}
void ProcessCommandList(PAddr list, u32 size) {
g_gpu->ProcessCommandList(list, size);
}
void SwapBuffers() {
g_gpu->SwapBuffers();
}
void DisplayTransfer(const GPU::Regs::DisplayTransferConfig* config) {
g_gpu->DisplayTransfer(config);
}
void MemoryFill(const GPU::Regs::MemoryFillConfig* config, bool is_second_filler) {
g_gpu->MemoryFill(config, is_second_filler);
}
void FlushRegion(VAddr addr, u64 size) {
g_gpu->FlushRegion(addr, size);
}
void FlushAndInvalidateRegion(VAddr addr, u64 size) {
g_gpu->FlushAndInvalidateRegion(addr, size);
}
void InvalidateRegion(VAddr addr, u64 size) {
g_gpu->InvalidateRegion(addr, size);
}
} // namespace VideoCore
SERIALIZE_IMPL(VideoCore)

View File

@ -8,13 +8,12 @@
#include <iostream>
#include <memory>
#include "core/frontend/emu_window.h"
#include "video_core/command_processor.h"
namespace Frontend {
class EmuWindow;
}
class RendererBase;
namespace Memory {
class MemorySystem;
}
@ -24,7 +23,11 @@ class MemorySystem;
namespace VideoCore {
class GPUBackend;
class RendererBase;
extern std::unique_ptr<RendererBase> g_renderer; ///< Renderer plugin
extern std::unique_ptr<VideoCore::GPUBackend> g_gpu;
// TODO: Wrap these in a user settings struct along with any other graphics settings (often set from
// qt ui)
@ -53,7 +56,28 @@ enum class ResultStatus {
};
/// Initialize the video core
ResultStatus Init(Frontend::EmuWindow& emu_window, Memory::MemorySystem& memory);
ResultStatus Init(Core::System& system, Frontend::EmuWindow& emu_window,
Memory::MemorySystem& memory);
void ProcessCommandList(PAddr list, u32 size);
/// Notify rasterizer that it should swap the current framebuffer
void SwapBuffers();
/// Perform a DisplayTransfer (accelerated by the rasterizer if available)
void DisplayTransfer(const GPU::Regs::DisplayTransferConfig* config);
/// Perform a MemoryFill (accelerated by the rasterizer if available)
void MemoryFill(const GPU::Regs::MemoryFillConfig* config, bool is_second_filler);
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
void FlushRegion(VAddr addr, u64 size);
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated
void FlushAndInvalidateRegion(VAddr addr, u64 size);
/// Notify rasterizer that any caches of the specified region should be invalidated
void InvalidateRegion(VAddr addr, u64 size);
/// Shutdown the video core
void Shutdown();