diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index aa78ff8b..9ee53ee4 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -10,12 +10,12 @@ on: # A workflow run is made up of one or more jobs that can run sequentially or in parallel # See the following, which includes links to supported macOS versions, including supported Xcode versions -# https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners#supported-runners-and-hardware-resources +# https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners/about-github-hosted-runners#supported-runners-and-hardware-resources jobs: build: strategy: matrix: - xcode: [ "14.3.1" ] + xcode: [ "15.0" ] platform: [ "all", "macos", "ios" ] os: [ "macos-13" ] upload_artifacts: [ true ] @@ -39,6 +39,11 @@ jobs: # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it - uses: actions/checkout@v3 + # Python 3.12 removed distutils, which is used by glslang::update_glslang_sources.py called from fetchDependencies + - uses: actions/setup-python@v4 + with: + python-version: '3.11' + - name: Select Xcode version run: sudo xcode-select -switch "${XCODE_DEV_PATH}" diff --git a/Common/MVKOSExtensions.h b/Common/MVKOSExtensions.h index 79d89216..2c40602a 100644 --- a/Common/MVKOSExtensions.h +++ b/Common/MVKOSExtensions.h @@ -24,6 +24,9 @@ #include +#pragma mark - +#pragma mark Operating System versions + typedef float MVKOSVersion; /*** Constant indicating unsupported functionality in an OS. */ @@ -39,44 +42,58 @@ static const MVKOSVersion kMVKOSVersionUnsupported = std::numeric_limits= minVer; } +static inline bool mvkOSVersionIsAtLeast(MVKOSVersion minVer) { return mvkOSVersion() >= minVer; } /** * Returns whether the operating system version is at least the appropriate min version. - * The constant kMVKOSVersionUnsupported can be used for either value to cause the test - * to always fail on that OS, which is useful for indidicating functionalty guarded by + * The constant kMVKOSVersionUnsupported can be used for any of the values to cause the test + * to always fail on that OS, which is useful for indicating that functionalty guarded by * this test is not supported on that OS. */ -inline bool mvkOSVersionIsAtLeast(MVKOSVersion macOSMinVer, MVKOSVersion iOSMinVer, MVKOSVersion visionOSMinVer) { +static inline bool mvkOSVersionIsAtLeast(MVKOSVersion macOSMinVer, + MVKOSVersion iOSMinVer, + MVKOSVersion visionOSMinVer) { #if MVK_MACOS return mvkOSVersionIsAtLeast(macOSMinVer); #endif +#if MVK_IOS_OR_TVOS + return mvkOSVersionIsAtLeast(iOSMinVer); +#endif #if MVK_VISIONOS return mvkOSVersionIsAtLeast(visionOSMinVer); -#elif MVK_IOS_OR_TVOS - return mvkOSVersionIsAtLeast(iOSMinVer); #endif } + +#pragma mark - +#pragma mark Timestamps + /** - * Returns a monotonic timestamp value for use in Vulkan and performance timestamping. + * Returns a monotonic tick value for use in Vulkan and performance timestamping. * - * The returned value corresponds to the number of CPU "ticks" since the app was initialized. - * - * Calling this value twice, subtracting the first value from the second, and then multiplying - * the result by the value returned by mvkGetTimestampPeriod() will provide an indication of the - * number of nanoseconds between the two calls. The convenience function mvkGetElapsedMilliseconds() - * can be used to perform this calculation. + * The returned value corresponds to the number of CPU ticks since an arbitrary + * point in the past, and does not increment while the system is asleep. */ uint64_t mvkGetTimestamp(); -/** Returns the number of nanoseconds between each increment of the value returned by mvkGetTimestamp(). */ -double mvkGetTimestampPeriod(); +/** + * Returns the number of runtime nanoseconds since an arbitrary point in the past, + * excluding any time spent while the system is asleep. + * + * This value corresponds to the timestamps returned by Metal presentation timings. + */ +uint64_t mvkGetRuntimeNanoseconds(); + +/** + * Returns the number of nanoseconds since an arbitrary point in the past, + * including any time spent while the system is asleep. + */ +uint64_t mvkGetContinuousNanoseconds(); /** * Returns the number of nanoseconds elapsed between startTimestamp and endTimestamp, @@ -94,73 +111,27 @@ uint64_t mvkGetElapsedNanoseconds(uint64_t startTimestamp = 0, uint64_t endTimes */ double mvkGetElapsedMilliseconds(uint64_t startTimestamp = 0, uint64_t endTimestamp = 0); -/** Returns the current absolute time in nanoseconds. */ -uint64_t mvkGetAbsoluteTime(); - -/** Ensures the block is executed on the main thread. */ -void mvkDispatchToMainAndWait(dispatch_block_t block); - #pragma mark - #pragma mark Process environment /** - * Returns the value of the environment variable at the given name, - * or an empty string if no environment variable with that name exists. - * - * If pWasFound is not null, its value is set to true if the environment - * variable exists, or false if not. + * Sets the value of the environment variable at the given name, into the + * std::string, and returns whether the environment variable was found. */ -std::string mvkGetEnvVar(std::string varName, bool* pWasFound = nullptr); +bool mvkGetEnvVar(const char* evName, std::string& evStr); + +/** + * Returns a pointer to a string containing the value of the environment variable at + * the given name, or returns the default value if the environment variable was not set. + */ +const char* mvkGetEnvVarString(const char* evName, std::string& evStr, const char* defaultValue = ""); /** * Returns the value of the environment variable at the given name, - * or zero if no environment variable with that name exists. - * - * If pWasFound is not null, its value is set to true if the environment - * variable exists, or false if not. + * or returns the default value if the environment variable was not set. */ -int64_t mvkGetEnvVarInt64(std::string varName, bool* pWasFound = nullptr); - -/** - * Returns the value of the environment variable at the given name, - * or false if no environment variable with that name exists. - * - * If pWasFound is not null, its value is set to true if the environment - * variable exists, or false if not. - */ -bool mvkGetEnvVarBool(std::string varName, bool* pWasFound = nullptr); - -#define MVK_SET_FROM_ENV_OR_BUILD_BOOL(cfgVal, EV) \ - do { \ - bool wasFound = false; \ - bool ev = mvkGetEnvVarBool(#EV, &wasFound); \ - cfgVal = wasFound ? ev : EV; \ - } while(false) - -#define MVK_SET_FROM_ENV_OR_BUILD_INT64(cfgVal, EV) \ - do { \ - bool wasFound = false; \ - int64_t ev = mvkGetEnvVarInt64(#EV, &wasFound); \ - cfgVal = wasFound ? ev : EV; \ - } while(false) - -// Pointer cast permits cfgVal to be an enum var -#define MVK_SET_FROM_ENV_OR_BUILD_INT32(cfgVal, EV) \ - do { \ - bool wasFound = false; \ - int64_t ev = mvkGetEnvVarInt64(#EV, &wasFound); \ - int64_t val = wasFound ? ev : EV; \ - *(int32_t*)&cfgVal = (int32_t)std::min(std::max(val, (int64_t)INT32_MIN), (int64_t)INT32_MAX); \ - } while(false) - -#define MVK_SET_FROM_ENV_OR_BUILD_STRING(cfgVal, EV, strObj) \ - do { \ - bool wasFound = false; \ - std::string ev = mvkGetEnvVar(#EV, &wasFound); \ - strObj = wasFound ? std::move(ev) : EV; \ - cfgVal = strObj.c_str(); \ - } while(false) +double mvkGetEnvVarNumber(const char* evName, double defaultValue = 0.0); #pragma mark - @@ -178,8 +149,12 @@ uint64_t mvkGetUsedMemorySize(); /** Returns the size of a page of host memory on this platform. */ uint64_t mvkGetHostMemoryPageSize(); + #pragma mark - #pragma mark Threading /** Returns the amount of avaliable CPU cores. */ uint32_t mvkGetAvaliableCPUCores(); + +/** Ensures the block is executed on the main thread. */ +void mvkDispatchToMainAndWait(dispatch_block_t block); diff --git a/Common/MVKOSExtensions.mm b/Common/MVKOSExtensions.mm index 4e2c974c..8d33f3d4 100644 --- a/Common/MVKOSExtensions.mm +++ b/Common/MVKOSExtensions.mm @@ -29,6 +29,10 @@ using namespace std; + +#pragma mark - +#pragma mark Operating System versions + MVKOSVersion mvkOSVersion() { static MVKOSVersion _mvkOSVersion = 0; if ( !_mvkOSVersion ) { @@ -38,64 +42,57 @@ MVKOSVersion mvkOSVersion() { return _mvkOSVersion; } -static uint64_t _mvkTimestampBase; -static double _mvkTimestampPeriod; + +#pragma mark - +#pragma mark Timestamps + static mach_timebase_info_data_t _mvkMachTimebase; -uint64_t mvkGetTimestamp() { return mach_absolute_time() - _mvkTimestampBase; } +uint64_t mvkGetTimestamp() { return mach_absolute_time(); } -double mvkGetTimestampPeriod() { return _mvkTimestampPeriod; } +uint64_t mvkGetRuntimeNanoseconds() { return mach_absolute_time() * _mvkMachTimebase.numer / _mvkMachTimebase.denom; } + +uint64_t mvkGetContinuousNanoseconds() { return mach_continuous_time() * _mvkMachTimebase.numer / _mvkMachTimebase.denom; } uint64_t mvkGetElapsedNanoseconds(uint64_t startTimestamp, uint64_t endTimestamp) { if (endTimestamp == 0) { endTimestamp = mvkGetTimestamp(); } - return (endTimestamp - startTimestamp) * _mvkTimestampPeriod; + return (endTimestamp - startTimestamp) * _mvkMachTimebase.numer / _mvkMachTimebase.denom; } double mvkGetElapsedMilliseconds(uint64_t startTimestamp, uint64_t endTimestamp) { return mvkGetElapsedNanoseconds(startTimestamp, endTimestamp) / 1e6; } -uint64_t mvkGetAbsoluteTime() { return mach_continuous_time() * _mvkMachTimebase.numer / _mvkMachTimebase.denom; } - -// Initialize timestamping capabilities on app startup. -//Called automatically when the framework is loaded and initialized. +// Initialize timestamp capabilities on app startup. +// Called automatically when the framework is loaded and initialized. static bool _mvkTimestampsInitialized = false; __attribute__((constructor)) static void MVKInitTimestamps() { if (_mvkTimestampsInitialized ) { return; } _mvkTimestampsInitialized = true; - _mvkTimestampBase = mach_absolute_time(); mach_timebase_info(&_mvkMachTimebase); - _mvkTimestampPeriod = (double)_mvkMachTimebase.numer / (double)_mvkMachTimebase.denom; -} - -void mvkDispatchToMainAndWait(dispatch_block_t block) { - if (NSThread.isMainThread) { - block(); - } else { - dispatch_sync(dispatch_get_main_queue(), block); - } } #pragma mark - #pragma mark Process environment -string mvkGetEnvVar(string varName, bool* pWasFound) { +bool mvkGetEnvVar(const char* varName, string& evStr) { @autoreleasepool { NSDictionary* nsEnv = [[NSProcessInfo processInfo] environment]; - NSString* envStr = nsEnv[@(varName.c_str())]; - if (pWasFound) { *pWasFound = envStr != nil; } - return envStr ? envStr.UTF8String : ""; + NSString* nsStr = nsEnv[@(varName)]; + if (nsStr) { evStr = nsStr.UTF8String; } + return nsStr != nil; } } -int64_t mvkGetEnvVarInt64(string varName, bool* pWasFound) { - return strtoll(mvkGetEnvVar(varName, pWasFound).c_str(), NULL, 0); +const char* mvkGetEnvVarString(const char* varName, string& evStr, const char* defaultValue) { + return mvkGetEnvVar(varName, evStr) ? evStr.c_str() : defaultValue; } -bool mvkGetEnvVarBool(std::string varName, bool* pWasFound) { - return mvkGetEnvVarInt64(varName, pWasFound) != 0; +double mvkGetEnvVarNumber(const char* varName, double defaultValue) { + string evStr; + return mvkGetEnvVar(varName, evStr) ? strtod(evStr.c_str(), nullptr) : defaultValue; } @@ -144,6 +141,7 @@ uint64_t mvkGetUsedMemorySize() { uint64_t mvkGetHostMemoryPageSize() { return sysconf(_SC_PAGESIZE); } + #pragma mark - #pragma mark Threading @@ -151,3 +149,11 @@ uint64_t mvkGetHostMemoryPageSize() { return sysconf(_SC_PAGESIZE); } uint32_t mvkGetAvaliableCPUCores() { return (uint32_t)[[NSProcessInfo processInfo] activeProcessorCount]; } + +void mvkDispatchToMainAndWait(dispatch_block_t block) { + if (NSThread.isMainThread) { + block(); + } else { + dispatch_sync(dispatch_get_main_queue(), block); + } +} diff --git a/Demos/Cube/Cube.xcodeproj/project.pbxproj b/Demos/Cube/Cube.xcodeproj/project.pbxproj index 44654e51..66fb7d92 100644 --- a/Demos/Cube/Cube.xcodeproj/project.pbxproj +++ b/Demos/Cube/Cube.xcodeproj/project.pbxproj @@ -3,7 +3,7 @@ archiveVersion = 1; classes = { }; - objectVersion = 52; + objectVersion = 54; objects = { /* Begin PBXBuildFile section */ diff --git a/Demos/Cube/iOS/DemoViewController.m b/Demos/Cube/iOS/DemoViewController.m index 9256927a..30112a2c 100644 --- a/Demos/Cube/iOS/DemoViewController.m +++ b/Demos/Cube/iOS/DemoViewController.m @@ -30,15 +30,9 @@ struct demo demo; } --(void) dealloc { - demo_cleanup(&demo); - [_displayLink release]; - [super dealloc]; -} - -/** Since this is a single-view app, init Vulkan when the view is loaded. */ --(void) viewDidLoad { - [super viewDidLoad]; +/** Since this is a single-view app, initialize Vulkan as view is appearing. */ +-(void) viewWillAppear: (BOOL) animated { + [super viewWillAppear: animated]; self.view.contentScaleFactor = UIScreen.mainScreen.nativeScale; @@ -68,6 +62,13 @@ demo_resize(&demo); } +-(void) viewDidDisappear: (BOOL) animated { + [_displayLink invalidate]; + [_displayLink release]; + demo_cleanup(&demo); + [super viewDidDisappear: animated]; +} + @end diff --git a/Demos/Cube/macOS/DemoViewController.m b/Demos/Cube/macOS/DemoViewController.m index d8468bdc..76dc4d92 100644 --- a/Demos/Cube/macOS/DemoViewController.m +++ b/Demos/Cube/macOS/DemoViewController.m @@ -18,6 +18,7 @@ #import "DemoViewController.h" #import +#import #include #include "../../Vulkan-Tools/cube/cube.c" @@ -27,27 +28,34 @@ #pragma mark DemoViewController @implementation DemoViewController { - CVDisplayLinkRef _displayLink; + CVDisplayLinkRef _displayLink; struct demo demo; + uint32_t _maxFrameCount; + uint64_t _frameCount; + BOOL _stop; + BOOL _useDisplayLink; } --(void) dealloc { - demo_cleanup(&demo); - CVDisplayLinkRelease(_displayLink); - [super dealloc]; -} - -/** Since this is a single-view app, initialize Vulkan during view loading. */ --(void) viewDidLoad { - [super viewDidLoad]; +/** Since this is a single-view app, initialize Vulkan as view is appearing. */ +-(void) viewWillAppear { + [super viewWillAppear]; self.view.wantsLayer = YES; // Back the view with a layer created by the makeBackingLayer method. - // Enabling this will sync the rendering loop with the natural display link (60 fps). - // Disabling this will allow the rendering loop to run flat out, limited only by the rendering speed. - bool useDisplayLink = true; + // Enabling this will sync the rendering loop with the natural display link + // (monitor refresh rate, typically 60 fps). Disabling this will allow the + // rendering loop to run flat out, limited only by the rendering speed. + _useDisplayLink = YES; - VkPresentModeKHR vkPresentMode = useDisplayLink ? VK_PRESENT_MODE_FIFO_KHR : VK_PRESENT_MODE_IMMEDIATE_KHR; + // If this value is set to zero, the demo will render frames until the window is closed. + // If this value is not zero, it establishes a maximum number of frames that will be + // rendered, and once this count has been reached, the demo will stop rendering. + // Once rendering is finished, if _useDisplayLink is false, the demo will immediately + // clean up the Vulkan objects, or if _useDisplayLink is true, the demo will delay + // cleaning up Vulkan objects until the window is closed. + _maxFrameCount = 0; + + VkPresentModeKHR vkPresentMode = _useDisplayLink ? VK_PRESENT_MODE_FIFO_KHR : VK_PRESENT_MODE_IMMEDIATE_KHR; char vkPresentModeStr[64]; sprintf(vkPresentModeStr, "%d", vkPresentMode); @@ -55,19 +63,33 @@ int argc = sizeof(argv)/sizeof(char*); demo_main(&demo, self.view.layer, argc, argv); - if (useDisplayLink) { + _stop = NO; + _frameCount = 0; + if (_useDisplayLink) { CVDisplayLinkCreateWithActiveCGDisplays(&_displayLink); - CVDisplayLinkSetOutputCallback(_displayLink, &DisplayLinkCallback, &demo); + CVDisplayLinkSetOutputCallback(_displayLink, &DisplayLinkCallback, self); CVDisplayLinkStart(_displayLink); } else { dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{ - while(true) { + do { demo_draw(&demo); - } + _stop = _stop || (_maxFrameCount && ++_frameCount >= _maxFrameCount); + } while( !_stop ); + demo_cleanup(&demo); }); } } +-(void) viewDidDisappear { + _stop = YES; + if (_useDisplayLink) { + CVDisplayLinkRelease(_displayLink); + demo_cleanup(&demo); + } + + [super viewDidDisappear]; +} + #pragma mark Display loop callback function @@ -78,7 +100,11 @@ static CVReturn DisplayLinkCallback(CVDisplayLinkRef displayLink, CVOptionFlags flagsIn, CVOptionFlags* flagsOut, void* target) { - demo_draw((struct demo*)target); + DemoViewController* demoVC =(DemoViewController*)target; + if ( !demoVC->_stop ) { + demo_draw(&demoVC->demo); + demoVC->_stop = (demoVC->_maxFrameCount && ++demoVC->_frameCount >= demoVC->_maxFrameCount); + } return kCVReturnSuccess; } diff --git a/Docs/MoltenVK_Runtime_UserGuide.md b/Docs/MoltenVK_Runtime_UserGuide.md index 47fa35ba..7f826eae 100644 --- a/Docs/MoltenVK_Runtime_UserGuide.md +++ b/Docs/MoltenVK_Runtime_UserGuide.md @@ -312,7 +312,8 @@ In addition to core *Vulkan* functionality, **MoltenVK** also supports the foll - `VK_KHR_16bit_storage` - `VK_KHR_8bit_storage` - `VK_KHR_bind_memory2` -- `VK_KHR_buffer_device_address` *(requires GPU Tier 2 argument buffers support)* +- `VK_KHR_buffer_device_address` + - *Requires GPU Tier 2 argument buffers support.* - `VK_KHR_copy_commands2` - `VK_KHR_create_renderpass2` - `VK_KHR_dedicated_allocation` @@ -322,7 +323,8 @@ In addition to core *Vulkan* functionality, **MoltenVK** also supports the foll - `VK_KHR_device_group_creation` - `VK_KHR_driver_properties` - `VK_KHR_dynamic_rendering` -- `VK_KHR_fragment_shader_barycentric` *(requires Metal 2.2 on Mac or Metal 2.3 on iOS)* +- `VK_KHR_fragment_shader_barycentric` + - *Requires Metal 2.2 on Mac or Metal 2.3 on iOS.* - `VK_KHR_get_memory_requirements2` - `VK_KHR_get_physical_device_properties2` - `VK_KHR_get_surface_capabilities2` @@ -337,70 +339,103 @@ In addition to core *Vulkan* functionality, **MoltenVK** also supports the foll - `VK_KHR_portability_subset` - `VK_KHR_push_descriptor` - `VK_KHR_relaxed_block_layout` -- `VK_KHR_sampler_mirror_clamp_to_edge` *(requires a Mac GPU or Apple family 7 GPU)* +- `VK_KHR_sampler_mirror_clamp_to_edge` + - *Requires a Mac GPU or Apple family 7 GPU.* - `VK_KHR_sampler_ycbcr_conversion` - `VK_KHR_separate_depth_stencil_layouts` - `VK_KHR_shader_draw_parameters` - `VK_KHR_shader_float_controls` - `VK_KHR_shader_float16_int8` - `VK_KHR_shader_non_semantic_info` -- `VK_KHR_shader_subgroup_extended_types` *(requires Metal 2.1 on Mac or Metal 2.2 and Apple family 4 on iOS)* +- `VK_KHR_shader_subgroup_extended_types` + - *Requires Metal 2.1 on Mac or Metal 2.2 and Apple family 4 on iOS.* - `VK_KHR_spirv_1_4` - `VK_KHR_storage_buffer_storage_class` - `VK_KHR_surface` - `VK_KHR_swapchain` - `VK_KHR_swapchain_mutable_format` +- `VK_KHR_synchronization2` - `VK_KHR_timeline_semaphore` - `VK_KHR_uniform_buffer_standard_layout` - `VK_KHR_variable_pointers` -- `VK_EXT_4444_formats` *(requires 16-bit formats and either native texture swizzling or manual swizzling to be enabled)* -- `VK_EXT_buffer_device_address` *(requires GPU Tier 2 argument buffers support)* -- `VK_EXT_calibrated_timestamps` *(requires Metal 2.2)* +- `VK_EXT_4444_formats` + - *Requires 16-bit formats and either native texture swizzling or manual swizzling to be enabled.* +- `VK_EXT_buffer_device_address` + - *Requires GPU Tier 2 argument buffers support.* +- `VK_EXT_calibrated_timestamps` + - *Requires Metal 2.2.* - `VK_EXT_debug_marker` - `VK_EXT_debug_report` - `VK_EXT_debug_utils` -- `VK_EXT_descriptor_indexing` *(initial release limited to Metal Tier 1: 96/128 textures, - 16 samplers, except macOS 11.0 (Big Sur) or later, or on older versions of macOS using - an Intel GPU, and if Metal argument buffers enabled in config)* +- `VK_EXT_descriptor_indexing` + - *Initial release limited to Metal Tier 1: 96/128 textures, + 16 samplers, except macOS 11.0 (Big Sur) or later, or on older versions of macOS using + an Intel GPU, and if Metal argument buffers enabled in config.* +- `VK_EXT_extended_dynamic_state` + - *Requires Metal 3.1 for `VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE`.* +- `VK_EXT_extended_dynamic_state2` + - *Primitive restart is always enabled, as Metal does not support disabling it (`VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE_EXT`).* +- `VK_EXT_extended_dynamic_state3` + - *Metal does not support `VK_POLYGON_MODE_POINT`* - `VK_EXT_external_memory_host` -- `VK_EXT_fragment_shader_interlock` *(requires Metal 2.0 and Raster Order Groups)* +- `VK_EXT_fragment_shader_interlock` + - *Requires Metal 2.0 and Raster Order Groups.* +- `VK_EXT_hdr_metadata` + - *macOS only.* +- `VK_EXT_headless_surface` - `VK_EXT_host_query_reset` - `VK_EXT_image_robustness` - `VK_EXT_inline_uniform_block` - `VK_EXT_layer_settings` -- `VK_EXT_memory_budget` *(requires Metal 2.0)* +- `VK_EXT_memory_budget` + - *Requires Metal 2.0.* - `VK_EXT_metal_objects` - `VK_EXT_metal_surface` - `VK_EXT_pipeline_creation_cache_control` - `VK_EXT_pipeline_creation_feedback` -- `VK_EXT_post_depth_coverage` *(iOS and macOS, requires family 4 (A11) or better Apple GPU)* +- `VK_EXT_post_depth_coverage` + - *iOS and macOS, requires family 4 (A11) or better Apple GPU.* - `VK_EXT_private_data ` - `VK_EXT_robustness2` - `VK_EXT_sample_locations` - `VK_EXT_scalar_block_layout` - `VK_EXT_separate_stencil_usage` -- `VK_EXT_shader_atomic_float` *(requires Metal 3.0)* -- `VK_EXT_shader_demote_to_helper_invocation` *(requires Metal Shading Language 2.3)* -- `VK_EXT_shader_stencil_export` *(requires Mac GPU family 2 or iOS GPU family 5)* -- `VK_EXT_shader_subgroup_ballot` *(requires Mac GPU family 2 or Apple GPU family 4)* -- `VK_EXT_shader_subgroup_vote` *(requires Mac GPU family 2 or Apple GPU family 4)* +- `VK_EXT_shader_atomic_float` + - *Requires Metal 3.0.* +- `VK_EXT_shader_demote_to_helper_invocation` + - *Requires Metal Shading Language 2.3.* +- `VK_EXT_shader_stencil_export` + - *Requires Mac GPU family 2 or iOS GPU family 5.* +- `VK_EXT_shader_subgroup_ballot` + - *Requires Mac GPU family 2 or Apple GPU family 4.* +- `VK_EXT_shader_subgroup_vote` + - *Requires Mac GPU family 2 or Apple GPU family 4.* - `VK_EXT_shader_viewport_index_layer` -- `VK_EXT_subgroup_size_control` *(requires Metal 2.1 on Mac or Metal 2.2 and Apple family 4 on iOS)* +- `VK_EXT_subgroup_size_control` + - *Requires Metal 2.1 on Mac or Metal 2.2 and Apple family 4 on iOS.* - `VK_EXT_surface_maintenance1` - `VK_EXT_swapchain_colorspace` - `VK_EXT_swapchain_maintenance1` - `VK_EXT_vertex_attribute_divisor` -- `VK_EXT_texel_buffer_alignment` *(requires Metal 2.0)* -- `VK_EXT_texture_compression_astc_hdr` *(iOS and macOS, requires family 6 (A13) or better Apple GPU)* -- `VK_MVK_ios_surface` *(iOS) (Obsolete. Use `VK_EXT_metal_surface` instead.)* -- `VK_MVK_macos_surface` *(macOS) (Obsolete. Use `VK_EXT_metal_surface` instead.)* +- `VK_EXT_texel_buffer_alignment` + - *Requires Metal 2.0.* +- `VK_EXT_texture_compression_astc_hdr` + - *iOS and macOS, requires family 6 (A13) or better Apple GPU.* +- `VK_MVK_ios_surface` + - *Obsolete. Use `VK_EXT_metal_surface` instead.* +- `VK_MVK_macos_surface` + - *Obsolete. Use `VK_EXT_metal_surface` instead.* - `VK_AMD_gpu_shader_half_float` - `VK_AMD_negative_viewport_height` -- `VK_AMD_shader_image_load_store_lod` *(requires Apple GPU)* -- `VK_AMD_shader_trinary_minmax` *(requires Metal 2.1)* -- `VK_IMG_format_pvrtc` *(requires Apple GPU)* +- `VK_AMD_shader_image_load_store_lod` + - *Requires Apple GPU.* +- `VK_AMD_shader_trinary_minmax` + - *Requires Metal 2.1.* +- `VK_IMG_format_pvrtc` + - *Requires Apple GPU.* - `VK_INTEL_shader_integer_functions2` -- `VK_NV_fragment_shader_barycentric` *(requires Metal 2.2 on Mac or Metal 2.3 on iOS)* +- `VK_NV_fragment_shader_barycentric` + - *Requires Metal 2.2 on Mac or Metal 2.3 on iOS.* - `VK_NV_glsl_shader` In order to visibly display your content on *macOS*, *iOS*, or *tvOS*, you must enable the @@ -517,11 +552,19 @@ you can address the issue as follows: - Errors encountered during **Runtime Shader Conversion** are logged to the console. +<<<<<<< HEAD - To help understand conversion issues during **Runtime Shader Conversion**, you can enable the logging of the *SPIR-V* and *MSL* shader source code during shader conversion, by turning on the `MVKConfiguration::debugMode` configuration parameter, or setting the value of the `MVK_CONFIG_DEBUG` runtime environment variable to `1`. See the [*MoltenVK Configuration*](#moltenvk_config) description above. +======= +- To help understand conversion issues during **Runtime Shader Conversion**, you can enable the + logging of the *SPIR-V* and *MSL* shader source code during shader conversion, by turning on the + `MVKConfiguration::debugMode` configuration parameter, or setting the value of the `MVK_CONFIG_DEBUG` + runtime environment variable to `1`. See the [*MoltenVK Configuration*](#moltenvk_config) + description above. +>>>>>>> e6a3886313a270e93a327cdb822f856fb75393b2 Enabling debug mode in **MoltenVK** includes shader conversion logging, which causes both the incoming *SPIR-V* code and the converted *MSL* source code to be logged to the console @@ -588,9 +631,9 @@ vailable when you request it, resulting in frame delays and visual stuttering. ### Timestamping -On non-Apple Silicon devices (older Mac devices), the GPU can switch power and performance -states as required by usage. This affects the GPU timestamps retrievable through the Vulkan -API. As a result, the value of `VkPhysicalDeviceLimits::timestampPeriod` can vary over time. +On non-Apple GPUs (older Mac devices), the GPU can switch power and performance states as +required by usage. This affects the GPU timestamps retrievable through the Vulkan API. +As a result, the value of `VkPhysicalDeviceLimits::timestampPeriod` can vary over time. Consider calling `vkGetPhysicalDeviceProperties()`, when needed, and retrieve the current value of `VkPhysicalDeviceLimits::timestampPeriod`, to help you calibrate recent GPU timestamps queried through the Vulkan API. @@ -622,6 +665,8 @@ Known **MoltenVK** Limitations ------------------------------ This section documents the known limitations in this version of **MoltenVK**. + +- See [above](#interaction) for known limitations for specific Vulkan extensions. - On *macOS* versions prior to *macOS 10.15.6*, native host-coherent image device memory is not available. Because of this, changes made to `VkImage VK_MEMORY_PROPERTY_HOST_COHERENT_BIT` device memory by the CPU @@ -645,4 +690,3 @@ This section documents the known limitations in this version of **MoltenVK**. use the *Vulkan Loader and Layers* from the *[Vulkan SDK](https://vulkan.lunarg.com/sdk/home)*. Refer to the *Vulkan SDK [Getting Started](https://vulkan.lunarg.com/doc/sdk/latest/mac/getting_started.html)* document for more info. - diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md index 566c95bc..5fd04e02 100644 --- a/Docs/Whats_New.md +++ b/Docs/Whats_New.md @@ -13,15 +13,57 @@ Copyright (c) 2015-2023 [The Brenwill Workshop Ltd.](http://www.brenwill.com) -MoltenVK 1.2.6 +MoltenVK 1.2.7 -------------- Released TBD - Add support for extensions: + - `VK_EXT_extended_dynamic_state3` *(Metal does not support `VK_POLYGON_MODE_POINT`)* + - `VK_EXT_headless_surface` - `VK_EXT_layer_settings` -- Fix rare case where vertex attribute buffers are not bound to Metal - when no other bindings change between pipelines. +- Fix regression that broke `VK_POLYGON_MODE_LINE`. +- Fix regression in marking rendering state dirty after `vkCmdClearAttachments()`. +- Reduce disk space consumed after running `fetchDependencies` script by removing intermediate file caches. +- Fix rare deadlock during launch via `dlopen()`. +- Fix initial value of `VkPhysicalDeviceLimits::timestampPeriod` on non-Apple Silicon GPUs. +- Fix swapchain and surface bugs when windowing system is accessed from off the main thread. +- Update to latest SPIRV-Cross: + - MSL: Fix regression error in argument buffer runtime arrays. + - MSL: Work around broken cube texture gradients on Apple Silicon. + + + +MoltenVK 1.2.6 +-------------- + +Released 2023/10/17 + +- Add support for extensions: + - `VK_KHR_synchronization2` + - `VK_EXT_extended_dynamic_state` *(requires Metal 3.1 for `VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE`)* + - `VK_EXT_extended_dynamic_state2` +- Fix rare case where vertex attribute buffers are not bound to Metal when no other bindings change between pipelines. +- Ensure objects retained for life of `MTLCommandBuffer` during `vkCmdBlitImage()` & `vkQueuePresentKHR()`. +- Fix case where a `CAMetalDrawable` with invalid pixel format causes onscreen flickering. +- Fix deadlock when reporting debug message on `MVKInstance` destruction. +- Fix MSL code used in `vkCmdBlitImage()` on depth-stencil formats. +- Improve behavior of swapchain image presentation stalls caused by Metal regression. +- `VkPhysicalDeviceLimits::timestampPeriod` set to 1.0 on Apple GPUs, and calculated dynamically on non-Apple GPUs. +- Add `MVKConfiguration::timestampPeriodLowPassAlpha` and environment variable + `MVK_CONFIG_TIMESTAMP_PERIOD_LOWPASS_ALPHA`, to add a configurable lowpass filter + for varying `VkPhysicalDeviceLimits::timestampPeriod` on non-Apple GPUs. +- Add several additional performance trackers, available via logging, or the `mvk_private_api.h` API. +- Deprecate `MVK_DEBUG` env var, and add `MVK_CONFIG_DEBUG` env var to replace it. +- Update `MVK_CONFIGURATION_API_VERSION` and `MVK_PRIVATE_API_VERSION` to `38`. +- Update dependency libraries to match _Vulkan SDK 1.3.268_. +- Update to latest SPIRV-Cross: + - MSL: Workaround Metal 3.1 regression bug on recursive input structs. + - MSL: fix extraction of global variables, in case of atomics. + - MSL: Workaround bizarre crash on macOS. + - MSL: runtime array over argument buffers. + - MSL: Make rw texture fences optional. + - MSL: Prevent RAW hazards on read_write textures. @@ -31,6 +73,7 @@ MoltenVK 1.2.5 Released 2023/08/15 - Add support for extensions: + - `VK_KHR_deferred_host_operations` - `VK_KHR_incremental_present` - `VK_KHR_shader_non_semantic_info` - `VK_EXT_4444_formats` diff --git a/ExternalRevisions/SPIRV-Cross_repo_revision b/ExternalRevisions/SPIRV-Cross_repo_revision index 590969e2..cd24745d 100644 --- a/ExternalRevisions/SPIRV-Cross_repo_revision +++ b/ExternalRevisions/SPIRV-Cross_repo_revision @@ -1 +1 @@ -bccaa94db814af33d8ef05c153e7c34d8bd4d685 +50e90dd74e0e43e243f12a70f0326d2cf8ed3945 diff --git a/ExternalRevisions/Vulkan-Headers_repo_revision b/ExternalRevisions/Vulkan-Headers_repo_revision index a8ebecec..d9d0ec41 100644 --- a/ExternalRevisions/Vulkan-Headers_repo_revision +++ b/ExternalRevisions/Vulkan-Headers_repo_revision @@ -1 +1 @@ -85c2334e92e215cce34e8e0ed8b2dce4700f4a50 +19a863ccce773ff393b186329478b1eb1a519fd3 diff --git a/ExternalRevisions/Vulkan-Tools_repo_revision b/ExternalRevisions/Vulkan-Tools_repo_revision index bf77fda0..6ecf5015 100644 --- a/ExternalRevisions/Vulkan-Tools_repo_revision +++ b/ExternalRevisions/Vulkan-Tools_repo_revision @@ -1 +1 @@ -300d9bf6b3cf7b237ee5e2c1d0ae10b9236f82d3 +1532001f7edae559af1988293eec90bc5e2607d5 diff --git a/ExternalRevisions/glslang_repo_revision b/ExternalRevisions/glslang_repo_revision index aba7fbb1..cd54f4f3 100644 --- a/ExternalRevisions/glslang_repo_revision +++ b/ExternalRevisions/glslang_repo_revision @@ -1 +1 @@ -76b52ebf77833908dc4c0dd6c70a9c357ac720bd +be564292f00c5bf0d7251c11f1c9618eb1117762 diff --git a/MoltenVK/MoltenVK.xcodeproj/project.pbxproj b/MoltenVK/MoltenVK.xcodeproj/project.pbxproj index 202efa91..be7ca325 100644 --- a/MoltenVK/MoltenVK.xcodeproj/project.pbxproj +++ b/MoltenVK/MoltenVK.xcodeproj/project.pbxproj @@ -46,7 +46,7 @@ 2FEA0A6724902F9F00EEF3AD /* MVKCommonEnvironment.h in Headers */ = {isa = PBXBuildFile; fileRef = A9F0429D1FB4CF82009FCCB8 /* MVKCommonEnvironment.h */; }; 2FEA0A6824902F9F00EEF3AD /* MVKWatermark.h in Headers */ = {isa = PBXBuildFile; fileRef = A98149491FB6A3F7005F00B4 /* MVKWatermark.h */; }; 2FEA0A6924902F9F00EEF3AD /* MVKOSExtensions.h in Headers */ = {isa = PBXBuildFile; fileRef = A9B51BD6225E986A00AC74D2 /* MVKOSExtensions.h */; }; - 2FEA0A6A24902F9F00EEF3AD /* MVKCmdRenderPass.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB7721C7DFB4800632CA3 /* MVKCmdRenderPass.h */; }; + 2FEA0A6A24902F9F00EEF3AD /* MVKCmdRendering.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB7721C7DFB4800632CA3 /* MVKCmdRendering.h */; }; 2FEA0A6B24902F9F00EEF3AD /* MVKCmdPipeline.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB76E1C7DFB4800632CA3 /* MVKCmdPipeline.h */; }; 2FEA0A6C24902F9F00EEF3AD /* MVKSmallVectorAllocator.h in Headers */ = {isa = PBXBuildFile; fileRef = A9F3D9D924732A4C00745190 /* MVKSmallVectorAllocator.h */; }; 2FEA0A6D24902F9F00EEF3AD /* MVKPipeline.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB78D1C7DFB4800632CA3 /* MVKPipeline.h */; }; @@ -90,7 +90,7 @@ 2FEA0A9424902F9F00EEF3AD /* MVKCommandPool.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB77B1C7DFB4800632CA3 /* MVKCommandPool.mm */; }; 2FEA0A9524902F9F00EEF3AD /* MVKCmdDraw.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7751C7DFB4800632CA3 /* MVKCmdDraw.mm */; }; 2FEA0A9624902F9F00EEF3AD /* MVKCommandBuffer.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7791C7DFB4800632CA3 /* MVKCommandBuffer.mm */; }; - 2FEA0A9724902F9F00EEF3AD /* MVKCmdRenderPass.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7731C7DFB4800632CA3 /* MVKCmdRenderPass.mm */; }; + 2FEA0A9724902F9F00EEF3AD /* MVKCmdRendering.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7731C7DFB4800632CA3 /* MVKCmdRendering.mm */; }; 2FEA0A9824902F9F00EEF3AD /* MVKBuffer.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7801C7DFB4800632CA3 /* MVKBuffer.mm */; }; 2FEA0A9924902F9F00EEF3AD /* mvk_datatypes.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7A91C7DFB4800632CA3 /* mvk_datatypes.mm */; }; 2FEA0A9A24902F9F00EEF3AD /* MVKExtensions.mm in Sources */ = {isa = PBXBuildFile; fileRef = A909F65E213B190700FCD6BE /* MVKExtensions.mm */; }; @@ -117,7 +117,7 @@ 2FEA0AAF24902F9F00EEF3AD /* MVKLayers.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7A11C7DFB4800632CA3 /* MVKLayers.mm */; }; 2FEA0AB024902F9F00EEF3AD /* MVKFramebuffer.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7881C7DFB4800632CA3 /* MVKFramebuffer.mm */; }; 2FEA0AB124902F9F00EEF3AD /* MVKMTLBufferAllocation.mm in Sources */ = {isa = PBXBuildFile; fileRef = A9C96DCF1DDC20C20053187F /* MVKMTLBufferAllocation.mm */; }; - 2FEA0AB224902F9F00EEF3AD /* CAMetalLayer+MoltenVK.m in Sources */ = {isa = PBXBuildFile; fileRef = A9E53DD62100B197002781DD /* CAMetalLayer+MoltenVK.m */; }; + 2FEA0AB224902F9F00EEF3AD /* CAMetalLayer+MoltenVK.mm in Sources */ = {isa = PBXBuildFile; fileRef = A9E53DD62100B197002781DD /* CAMetalLayer+MoltenVK.mm */; }; 2FEA0AB324902F9F00EEF3AD /* MVKCmdDispatch.mm in Sources */ = {isa = PBXBuildFile; fileRef = A9096E5D1F81E16300DFBEA6 /* MVKCmdDispatch.mm */; }; 2FEA0AB424902F9F00EEF3AD /* MVKCmdDebug.mm in Sources */ = {isa = PBXBuildFile; fileRef = A99C90ED229455B300A061DA /* MVKCmdDebug.mm */; }; 45003E73214AD4E500E989CB /* MVKExtensions.def in Headers */ = {isa = PBXBuildFile; fileRef = 45003E6F214AD4C900E989CB /* MVKExtensions.def */; }; @@ -172,10 +172,10 @@ A94FB7C11C7DFB4800632CA3 /* MVKCmdQueries.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB7701C7DFB4800632CA3 /* MVKCmdQueries.h */; }; A94FB7C21C7DFB4800632CA3 /* MVKCmdQueries.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7711C7DFB4800632CA3 /* MVKCmdQueries.mm */; }; A94FB7C31C7DFB4800632CA3 /* MVKCmdQueries.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7711C7DFB4800632CA3 /* MVKCmdQueries.mm */; }; - A94FB7C41C7DFB4800632CA3 /* MVKCmdRenderPass.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB7721C7DFB4800632CA3 /* MVKCmdRenderPass.h */; }; - A94FB7C51C7DFB4800632CA3 /* MVKCmdRenderPass.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB7721C7DFB4800632CA3 /* MVKCmdRenderPass.h */; }; - A94FB7C61C7DFB4800632CA3 /* MVKCmdRenderPass.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7731C7DFB4800632CA3 /* MVKCmdRenderPass.mm */; }; - A94FB7C71C7DFB4800632CA3 /* MVKCmdRenderPass.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7731C7DFB4800632CA3 /* MVKCmdRenderPass.mm */; }; + A94FB7C41C7DFB4800632CA3 /* MVKCmdRendering.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB7721C7DFB4800632CA3 /* MVKCmdRendering.h */; }; + A94FB7C51C7DFB4800632CA3 /* MVKCmdRendering.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB7721C7DFB4800632CA3 /* MVKCmdRendering.h */; }; + A94FB7C61C7DFB4800632CA3 /* MVKCmdRendering.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7731C7DFB4800632CA3 /* MVKCmdRendering.mm */; }; + A94FB7C71C7DFB4800632CA3 /* MVKCmdRendering.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7731C7DFB4800632CA3 /* MVKCmdRendering.mm */; }; A94FB7C81C7DFB4800632CA3 /* MVKCmdDraw.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB7741C7DFB4800632CA3 /* MVKCmdDraw.h */; }; A94FB7C91C7DFB4800632CA3 /* MVKCmdDraw.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB7741C7DFB4800632CA3 /* MVKCmdDraw.h */; }; A94FB7CA1C7DFB4800632CA3 /* MVKCmdDraw.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7751C7DFB4800632CA3 /* MVKCmdDraw.mm */; }; @@ -331,6 +331,10 @@ A9B51BD8225E986A00AC74D2 /* MVKOSExtensions.mm in Sources */ = {isa = PBXBuildFile; fileRef = A9B51BD2225E986A00AC74D2 /* MVKOSExtensions.mm */; }; A9B51BD9225E986A00AC74D2 /* MVKOSExtensions.h in Headers */ = {isa = PBXBuildFile; fileRef = A9B51BD6225E986A00AC74D2 /* MVKOSExtensions.h */; }; A9B51BDA225E986A00AC74D2 /* MVKOSExtensions.h in Headers */ = {isa = PBXBuildFile; fileRef = A9B51BD6225E986A00AC74D2 /* MVKOSExtensions.h */; }; + A9C327562AAFBD390025EE79 /* MVKConfigMembers.def in Headers */ = {isa = PBXBuildFile; fileRef = A9C327542AAF8A770025EE79 /* MVKConfigMembers.def */; }; + A9C327572AAFBD3A0025EE79 /* MVKConfigMembers.def in Headers */ = {isa = PBXBuildFile; fileRef = A9C327542AAF8A770025EE79 /* MVKConfigMembers.def */; }; + A9C327582AAFBD3A0025EE79 /* MVKConfigMembers.def in Headers */ = {isa = PBXBuildFile; fileRef = A9C327542AAF8A770025EE79 /* MVKConfigMembers.def */; }; + A9C327592AAFBD3B0025EE79 /* MVKConfigMembers.def in Headers */ = {isa = PBXBuildFile; fileRef = A9C327542AAF8A770025EE79 /* MVKConfigMembers.def */; }; A9C96DD01DDC20C20053187F /* MVKMTLBufferAllocation.h in Headers */ = {isa = PBXBuildFile; fileRef = A9C96DCE1DDC20C20053187F /* MVKMTLBufferAllocation.h */; }; A9C96DD11DDC20C20053187F /* MVKMTLBufferAllocation.h in Headers */ = {isa = PBXBuildFile; fileRef = A9C96DCE1DDC20C20053187F /* MVKMTLBufferAllocation.h */; }; A9C96DD21DDC20C20053187F /* MVKMTLBufferAllocation.mm in Sources */ = {isa = PBXBuildFile; fileRef = A9C96DCF1DDC20C20053187F /* MVKMTLBufferAllocation.mm */; }; @@ -356,8 +360,8 @@ A9E53DE62100B197002781DD /* NSString+MoltenVK.mm in Sources */ = {isa = PBXBuildFile; fileRef = A9E53DD42100B197002781DD /* NSString+MoltenVK.mm */; }; A9E53DE72100B197002781DD /* MTLTextureDescriptor+MoltenVK.m in Sources */ = {isa = PBXBuildFile; fileRef = A9E53DD52100B197002781DD /* MTLTextureDescriptor+MoltenVK.m */; }; A9E53DE82100B197002781DD /* MTLTextureDescriptor+MoltenVK.m in Sources */ = {isa = PBXBuildFile; fileRef = A9E53DD52100B197002781DD /* MTLTextureDescriptor+MoltenVK.m */; }; - A9E53DE92100B197002781DD /* CAMetalLayer+MoltenVK.m in Sources */ = {isa = PBXBuildFile; fileRef = A9E53DD62100B197002781DD /* CAMetalLayer+MoltenVK.m */; }; - A9E53DEA2100B197002781DD /* CAMetalLayer+MoltenVK.m in Sources */ = {isa = PBXBuildFile; fileRef = A9E53DD62100B197002781DD /* CAMetalLayer+MoltenVK.m */; }; + A9E53DE92100B197002781DD /* CAMetalLayer+MoltenVK.mm in Sources */ = {isa = PBXBuildFile; fileRef = A9E53DD62100B197002781DD /* CAMetalLayer+MoltenVK.mm */; }; + A9E53DEA2100B197002781DD /* CAMetalLayer+MoltenVK.mm in Sources */ = {isa = PBXBuildFile; fileRef = A9E53DD62100B197002781DD /* CAMetalLayer+MoltenVK.mm */; }; A9E53DF32100B302002781DD /* MTLRenderPassDescriptor+MoltenVK.h in Headers */ = {isa = PBXBuildFile; fileRef = A9E53DEE2100B302002781DD /* MTLRenderPassDescriptor+MoltenVK.h */; }; A9E53DF42100B302002781DD /* MTLRenderPassDescriptor+MoltenVK.h in Headers */ = {isa = PBXBuildFile; fileRef = A9E53DEE2100B302002781DD /* MTLRenderPassDescriptor+MoltenVK.h */; }; A9E53DF52100B302002781DD /* MTLRenderPassDescriptor+MoltenVK.m in Sources */ = {isa = PBXBuildFile; fileRef = A9E53DF22100B302002781DD /* MTLRenderPassDescriptor+MoltenVK.m */; }; @@ -416,7 +420,7 @@ DCFD7F0B2A45BC6E007BBBF7 /* MVKCommonEnvironment.h in Headers */ = {isa = PBXBuildFile; fileRef = A9F0429D1FB4CF82009FCCB8 /* MVKCommonEnvironment.h */; }; DCFD7F0C2A45BC6E007BBBF7 /* MVKWatermark.h in Headers */ = {isa = PBXBuildFile; fileRef = A98149491FB6A3F7005F00B4 /* MVKWatermark.h */; }; DCFD7F0D2A45BC6E007BBBF7 /* MVKOSExtensions.h in Headers */ = {isa = PBXBuildFile; fileRef = A9B51BD6225E986A00AC74D2 /* MVKOSExtensions.h */; }; - DCFD7F0E2A45BC6E007BBBF7 /* MVKCmdRenderPass.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB7721C7DFB4800632CA3 /* MVKCmdRenderPass.h */; }; + DCFD7F0E2A45BC6E007BBBF7 /* MVKCmdRendering.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB7721C7DFB4800632CA3 /* MVKCmdRendering.h */; }; DCFD7F0F2A45BC6E007BBBF7 /* MVKCmdPipeline.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB76E1C7DFB4800632CA3 /* MVKCmdPipeline.h */; }; DCFD7F102A45BC6E007BBBF7 /* MVKSmallVectorAllocator.h in Headers */ = {isa = PBXBuildFile; fileRef = A9F3D9D924732A4C00745190 /* MVKSmallVectorAllocator.h */; }; DCFD7F112A45BC6E007BBBF7 /* MVKPipeline.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB78D1C7DFB4800632CA3 /* MVKPipeline.h */; }; @@ -462,7 +466,7 @@ DCFD7F3A2A45BC6E007BBBF7 /* MVKCommandPool.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB77B1C7DFB4800632CA3 /* MVKCommandPool.mm */; }; DCFD7F3B2A45BC6E007BBBF7 /* MVKCmdDraw.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7751C7DFB4800632CA3 /* MVKCmdDraw.mm */; }; DCFD7F3C2A45BC6E007BBBF7 /* MVKCommandBuffer.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7791C7DFB4800632CA3 /* MVKCommandBuffer.mm */; }; - DCFD7F3D2A45BC6E007BBBF7 /* MVKCmdRenderPass.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7731C7DFB4800632CA3 /* MVKCmdRenderPass.mm */; }; + DCFD7F3D2A45BC6E007BBBF7 /* MVKCmdRendering.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7731C7DFB4800632CA3 /* MVKCmdRendering.mm */; }; DCFD7F3E2A45BC6E007BBBF7 /* MVKBuffer.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7801C7DFB4800632CA3 /* MVKBuffer.mm */; }; DCFD7F3F2A45BC6E007BBBF7 /* MVKEnvironment.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A9A5E9C525C0822700E9085E /* MVKEnvironment.cpp */; }; DCFD7F402A45BC6E007BBBF7 /* mvk_datatypes.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7A91C7DFB4800632CA3 /* mvk_datatypes.mm */; }; @@ -491,7 +495,7 @@ DCFD7F572A45BC6E007BBBF7 /* MVKFramebuffer.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7881C7DFB4800632CA3 /* MVKFramebuffer.mm */; }; DCFD7F582A45BC6E007BBBF7 /* MTLRenderPassStencilAttachmentDescriptor+MoltenVK.m in Sources */ = {isa = PBXBuildFile; fileRef = 453638302508A4C6000EFFD3 /* MTLRenderPassStencilAttachmentDescriptor+MoltenVK.m */; }; DCFD7F592A45BC6E007BBBF7 /* MVKMTLBufferAllocation.mm in Sources */ = {isa = PBXBuildFile; fileRef = A9C96DCF1DDC20C20053187F /* MVKMTLBufferAllocation.mm */; }; - DCFD7F5A2A45BC6E007BBBF7 /* CAMetalLayer+MoltenVK.m in Sources */ = {isa = PBXBuildFile; fileRef = A9E53DD62100B197002781DD /* CAMetalLayer+MoltenVK.m */; }; + DCFD7F5A2A45BC6E007BBBF7 /* CAMetalLayer+MoltenVK.mm in Sources */ = {isa = PBXBuildFile; fileRef = A9E53DD62100B197002781DD /* CAMetalLayer+MoltenVK.mm */; }; DCFD7F5B2A45BC6E007BBBF7 /* MVKCmdDispatch.mm in Sources */ = {isa = PBXBuildFile; fileRef = A9096E5D1F81E16300DFBEA6 /* MVKCmdDispatch.mm */; }; DCFD7F5C2A45BC6E007BBBF7 /* MVKCmdDebug.mm in Sources */ = {isa = PBXBuildFile; fileRef = A99C90ED229455B300A061DA /* MVKCmdDebug.mm */; }; /* End PBXBuildFile section */ @@ -591,8 +595,8 @@ A94FB76F1C7DFB4800632CA3 /* MVKCmdPipeline.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = MVKCmdPipeline.mm; sourceTree = ""; }; A94FB7701C7DFB4800632CA3 /* MVKCmdQueries.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MVKCmdQueries.h; sourceTree = ""; }; A94FB7711C7DFB4800632CA3 /* MVKCmdQueries.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = MVKCmdQueries.mm; sourceTree = ""; }; - A94FB7721C7DFB4800632CA3 /* MVKCmdRenderPass.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MVKCmdRenderPass.h; sourceTree = ""; }; - A94FB7731C7DFB4800632CA3 /* MVKCmdRenderPass.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = MVKCmdRenderPass.mm; sourceTree = ""; }; + A94FB7721C7DFB4800632CA3 /* MVKCmdRendering.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MVKCmdRendering.h; sourceTree = ""; }; + A94FB7731C7DFB4800632CA3 /* MVKCmdRendering.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = MVKCmdRendering.mm; sourceTree = ""; }; A94FB7741C7DFB4800632CA3 /* MVKCmdDraw.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MVKCmdDraw.h; sourceTree = ""; }; A94FB7751C7DFB4800632CA3 /* MVKCmdDraw.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = MVKCmdDraw.mm; sourceTree = ""; }; A94FB7761C7DFB4800632CA3 /* MVKCommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MVKCommand.h; sourceTree = ""; }; @@ -670,6 +674,7 @@ A9B51BD2225E986A00AC74D2 /* MVKOSExtensions.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = MVKOSExtensions.mm; sourceTree = ""; }; A9B51BD6225E986A00AC74D2 /* MVKOSExtensions.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MVKOSExtensions.h; sourceTree = ""; }; A9B8EE0A1A98D796009C5A02 /* libMoltenVK.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libMoltenVK.a; sourceTree = BUILT_PRODUCTS_DIR; }; + A9C327542AAF8A770025EE79 /* MVKConfigMembers.def */ = {isa = PBXFileReference; explicitFileType = sourcecode.cpp.h; fileEncoding = 4; path = MVKConfigMembers.def; sourceTree = ""; }; A9C83DCD24533E22003E5261 /* MVKCommandTypePools.def */ = {isa = PBXFileReference; explicitFileType = sourcecode.cpp.h; fileEncoding = 4; path = MVKCommandTypePools.def; sourceTree = ""; }; A9C86CB61C55B8350096CAF2 /* MoltenVKShaderConverter.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; name = MoltenVKShaderConverter.xcodeproj; path = ../MoltenVKShaderConverter/MoltenVKShaderConverter.xcodeproj; sourceTree = ""; }; A9C96DCE1DDC20C20053187F /* MVKMTLBufferAllocation.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MVKMTLBufferAllocation.h; sourceTree = ""; }; @@ -686,7 +691,7 @@ A9E53DD32100B197002781DD /* MTLSamplerDescriptor+MoltenVK.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "MTLSamplerDescriptor+MoltenVK.h"; sourceTree = ""; }; A9E53DD42100B197002781DD /* NSString+MoltenVK.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = "NSString+MoltenVK.mm"; sourceTree = ""; }; A9E53DD52100B197002781DD /* MTLTextureDescriptor+MoltenVK.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = "MTLTextureDescriptor+MoltenVK.m"; sourceTree = ""; }; - A9E53DD62100B197002781DD /* CAMetalLayer+MoltenVK.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = "CAMetalLayer+MoltenVK.m"; sourceTree = ""; }; + A9E53DD62100B197002781DD /* CAMetalLayer+MoltenVK.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = "CAMetalLayer+MoltenVK.mm"; sourceTree = ""; }; A9E53DEE2100B302002781DD /* MTLRenderPassDescriptor+MoltenVK.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "MTLRenderPassDescriptor+MoltenVK.h"; sourceTree = ""; }; A9E53DF22100B302002781DD /* MTLRenderPassDescriptor+MoltenVK.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = "MTLRenderPassDescriptor+MoltenVK.m"; sourceTree = ""; }; A9E53DFA21064F84002781DD /* MTLRenderPipelineDescriptor+MoltenVK.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = "MTLRenderPipelineDescriptor+MoltenVK.m"; sourceTree = ""; }; @@ -739,8 +744,8 @@ A94FB76F1C7DFB4800632CA3 /* MVKCmdPipeline.mm */, A94FB7701C7DFB4800632CA3 /* MVKCmdQueries.h */, A94FB7711C7DFB4800632CA3 /* MVKCmdQueries.mm */, - A94FB7721C7DFB4800632CA3 /* MVKCmdRenderPass.h */, - A94FB7731C7DFB4800632CA3 /* MVKCmdRenderPass.mm */, + A94FB7721C7DFB4800632CA3 /* MVKCmdRendering.h */, + A94FB7731C7DFB4800632CA3 /* MVKCmdRendering.mm */, A94FB76C1C7DFB4800632CA3 /* MVKCmdTransfer.h */, A94FB76D1C7DFB4800632CA3 /* MVKCmdTransfer.mm */, A94FB7761C7DFB4800632CA3 /* MVKCommand.h */, @@ -843,6 +848,7 @@ 4553AEF62251617100E8EBCD /* MVKBlockObserver.m */, 45557A5121C9EFF3008868BD /* MVKCodec.h */, 45557A4D21C9EFF3008868BD /* MVKCodec.mm */, + A9C327542AAF8A770025EE79 /* MVKConfigMembers.def */, 45557A5721CD83C3008868BD /* MVKDXTnCodec.def */, A9A5E9C525C0822700E9085E /* MVKEnvironment.cpp */, A98149431FB6A3F7005F00B4 /* MVKEnvironment.h */, @@ -883,7 +889,7 @@ isa = PBXGroup; children = ( A9E53DD12100B197002781DD /* CAMetalLayer+MoltenVK.h */, - A9E53DD62100B197002781DD /* CAMetalLayer+MoltenVK.m */, + A9E53DD62100B197002781DD /* CAMetalLayer+MoltenVK.mm */, 453638312508A4C7000EFFD3 /* MTLRenderPassDepthAttachmentDescriptor+MoltenVK.h */, 4536382F2508A4C6000EFFD3 /* MTLRenderPassDepthAttachmentDescriptor+MoltenVK.m */, A9E53DEE2100B302002781DD /* MTLRenderPassDescriptor+MoltenVK.h */, @@ -989,7 +995,7 @@ 2FEA0A6724902F9F00EEF3AD /* MVKCommonEnvironment.h in Headers */, 2FEA0A6824902F9F00EEF3AD /* MVKWatermark.h in Headers */, 2FEA0A6924902F9F00EEF3AD /* MVKOSExtensions.h in Headers */, - 2FEA0A6A24902F9F00EEF3AD /* MVKCmdRenderPass.h in Headers */, + 2FEA0A6A24902F9F00EEF3AD /* MVKCmdRendering.h in Headers */, 2FEA0A6B24902F9F00EEF3AD /* MVKCmdPipeline.h in Headers */, 2FEA0A6C24902F9F00EEF3AD /* MVKSmallVectorAllocator.h in Headers */, 2FEA0A6D24902F9F00EEF3AD /* MVKPipeline.h in Headers */, @@ -1008,6 +1014,7 @@ 2FEA0A7824902F9F00EEF3AD /* MVKDeviceMemory.h in Headers */, 2FEA0A7924902F9F00EEF3AD /* MVKMTLResourceBindings.h in Headers */, 2FEA0A7A24902F9F00EEF3AD /* MVKExtensions.def in Headers */, + A9C327572AAFBD3A0025EE79 /* MVKConfigMembers.def in Headers */, 2FEA0A7B24902F9F00EEF3AD /* mvk_datatypes.hpp in Headers */, 2FEA0A7C24902F9F00EEF3AD /* MVKCommandEncodingPool.h in Headers */, 2FEA0A7D24902F9F00EEF3AD /* MVKResource.h in Headers */, @@ -1067,9 +1074,10 @@ A9F042A41FB4CF83009FCCB8 /* MVKCommonEnvironment.h in Headers */, A981495D1FB6A3F7005F00B4 /* MVKWatermark.h in Headers */, A9B51BD9225E986A00AC74D2 /* MVKOSExtensions.h in Headers */, - A94FB7C41C7DFB4800632CA3 /* MVKCmdRenderPass.h in Headers */, + A94FB7C41C7DFB4800632CA3 /* MVKCmdRendering.h in Headers */, A94FB7BC1C7DFB4800632CA3 /* MVKCmdPipeline.h in Headers */, A9F3D9DC24732A4D00745190 /* MVKSmallVectorAllocator.h in Headers */, + A9C327562AAFBD390025EE79 /* MVKConfigMembers.def in Headers */, A94FB7F81C7DFB4800632CA3 /* MVKPipeline.h in Headers */, A94FB7F01C7DFB4800632CA3 /* MVKImage.h in Headers */, 4553AEFD2251617100E8EBCD /* MVKBlockObserver.h in Headers */, @@ -1143,10 +1151,11 @@ A9F042A51FB4CF83009FCCB8 /* MVKCommonEnvironment.h in Headers */, A981495E1FB6A3F7005F00B4 /* MVKWatermark.h in Headers */, A9B51BDA225E986A00AC74D2 /* MVKOSExtensions.h in Headers */, - A94FB7C51C7DFB4800632CA3 /* MVKCmdRenderPass.h in Headers */, + A94FB7C51C7DFB4800632CA3 /* MVKCmdRendering.h in Headers */, A94FB7BD1C7DFB4800632CA3 /* MVKCmdPipeline.h in Headers */, A9F3D9DD24732A4D00745190 /* MVKSmallVectorAllocator.h in Headers */, A94FB7F91C7DFB4800632CA3 /* MVKPipeline.h in Headers */, + A9C327582AAFBD3A0025EE79 /* MVKConfigMembers.def in Headers */, A94FB7F11C7DFB4800632CA3 /* MVKImage.h in Headers */, 4553AEFE2251617100E8EBCD /* MVKBlockObserver.h in Headers */, A94FB7B91C7DFB4800632CA3 /* MVKCmdTransfer.h in Headers */, @@ -1204,6 +1213,7 @@ DCFD7EFD2A45BC6E007BBBF7 /* MVKSync.h in Headers */, DCFD7EFE2A45BC6E007BBBF7 /* MVKDevice.h in Headers */, DCFD7EFF2A45BC6E007BBBF7 /* MVKSmallVector.h in Headers */, + A9C327592AAFBD3B0025EE79 /* MVKConfigMembers.def in Headers */, DCFD7F002A45BC6E007BBBF7 /* MVKCommandPool.h in Headers */, DCFD7F012A45BC6E007BBBF7 /* MVKShaderModule.h in Headers */, DCFD7F022A45BC6E007BBBF7 /* MVKVulkanAPIObject.h in Headers */, @@ -1218,7 +1228,7 @@ DCFD7F0B2A45BC6E007BBBF7 /* MVKCommonEnvironment.h in Headers */, DCFD7F0C2A45BC6E007BBBF7 /* MVKWatermark.h in Headers */, DCFD7F0D2A45BC6E007BBBF7 /* MVKOSExtensions.h in Headers */, - DCFD7F0E2A45BC6E007BBBF7 /* MVKCmdRenderPass.h in Headers */, + DCFD7F0E2A45BC6E007BBBF7 /* MVKCmdRendering.h in Headers */, DCFD7F0F2A45BC6E007BBBF7 /* MVKCmdPipeline.h in Headers */, DCFD7F102A45BC6E007BBBF7 /* MVKSmallVectorAllocator.h in Headers */, DCFD7F112A45BC6E007BBBF7 /* MVKPipeline.h in Headers */, @@ -1664,7 +1674,7 @@ 2FEA0A9424902F9F00EEF3AD /* MVKCommandPool.mm in Sources */, 2FEA0A9524902F9F00EEF3AD /* MVKCmdDraw.mm in Sources */, 2FEA0A9624902F9F00EEF3AD /* MVKCommandBuffer.mm in Sources */, - 2FEA0A9724902F9F00EEF3AD /* MVKCmdRenderPass.mm in Sources */, + 2FEA0A9724902F9F00EEF3AD /* MVKCmdRendering.mm in Sources */, 2FEA0A9824902F9F00EEF3AD /* MVKBuffer.mm in Sources */, 2FEA0A9924902F9F00EEF3AD /* mvk_datatypes.mm in Sources */, 2FEA0A9A24902F9F00EEF3AD /* MVKExtensions.mm in Sources */, @@ -1693,7 +1703,7 @@ 2FEA0AAF24902F9F00EEF3AD /* MVKLayers.mm in Sources */, 2FEA0AB024902F9F00EEF3AD /* MVKFramebuffer.mm in Sources */, 2FEA0AB124902F9F00EEF3AD /* MVKMTLBufferAllocation.mm in Sources */, - 2FEA0AB224902F9F00EEF3AD /* CAMetalLayer+MoltenVK.m in Sources */, + 2FEA0AB224902F9F00EEF3AD /* CAMetalLayer+MoltenVK.mm in Sources */, 2FEA0AB324902F9F00EEF3AD /* MVKCmdDispatch.mm in Sources */, 2FEA0AB424902F9F00EEF3AD /* MVKCmdDebug.mm in Sources */, ); @@ -1724,7 +1734,7 @@ A94FB7D61C7DFB4800632CA3 /* MVKCommandPool.mm in Sources */, A94FB7CA1C7DFB4800632CA3 /* MVKCmdDraw.mm in Sources */, A94FB7D21C7DFB4800632CA3 /* MVKCommandBuffer.mm in Sources */, - A94FB7C61C7DFB4800632CA3 /* MVKCmdRenderPass.mm in Sources */, + A94FB7C61C7DFB4800632CA3 /* MVKCmdRendering.mm in Sources */, A94FB7DE1C7DFB4800632CA3 /* MVKBuffer.mm in Sources */, A9A5E9C725C0822700E9085E /* MVKEnvironment.cpp in Sources */, A94FB82A1C7DFB4800632CA3 /* mvk_datatypes.mm in Sources */, @@ -1753,7 +1763,7 @@ A94FB7EE1C7DFB4800632CA3 /* MVKFramebuffer.mm in Sources */, 453638382508A4C7000EFFD3 /* MTLRenderPassStencilAttachmentDescriptor+MoltenVK.m in Sources */, A9C96DD21DDC20C20053187F /* MVKMTLBufferAllocation.mm in Sources */, - A9E53DE92100B197002781DD /* CAMetalLayer+MoltenVK.m in Sources */, + A9E53DE92100B197002781DD /* CAMetalLayer+MoltenVK.mm in Sources */, A9096E5E1F81E16300DFBEA6 /* MVKCmdDispatch.mm in Sources */, A99C90F0229455B300A061DA /* MVKCmdDebug.mm in Sources */, ); @@ -1784,7 +1794,7 @@ A94FB7D71C7DFB4800632CA3 /* MVKCommandPool.mm in Sources */, A94FB7CB1C7DFB4800632CA3 /* MVKCmdDraw.mm in Sources */, A94FB7D31C7DFB4800632CA3 /* MVKCommandBuffer.mm in Sources */, - A94FB7C71C7DFB4800632CA3 /* MVKCmdRenderPass.mm in Sources */, + A94FB7C71C7DFB4800632CA3 /* MVKCmdRendering.mm in Sources */, A94FB7DF1C7DFB4800632CA3 /* MVKBuffer.mm in Sources */, A9A5E9C925C0822700E9085E /* MVKEnvironment.cpp in Sources */, A94FB82B1C7DFB4800632CA3 /* mvk_datatypes.mm in Sources */, @@ -1813,7 +1823,7 @@ A94FB7EF1C7DFB4800632CA3 /* MVKFramebuffer.mm in Sources */, 4536383A2508A4C7000EFFD3 /* MTLRenderPassStencilAttachmentDescriptor+MoltenVK.m in Sources */, A9C96DD31DDC20C20053187F /* MVKMTLBufferAllocation.mm in Sources */, - A9E53DEA2100B197002781DD /* CAMetalLayer+MoltenVK.m in Sources */, + A9E53DEA2100B197002781DD /* CAMetalLayer+MoltenVK.mm in Sources */, A9096E5F1F81E16300DFBEA6 /* MVKCmdDispatch.mm in Sources */, A99C90F1229455B300A061DA /* MVKCmdDebug.mm in Sources */, ); @@ -1844,7 +1854,7 @@ DCFD7F3A2A45BC6E007BBBF7 /* MVKCommandPool.mm in Sources */, DCFD7F3B2A45BC6E007BBBF7 /* MVKCmdDraw.mm in Sources */, DCFD7F3C2A45BC6E007BBBF7 /* MVKCommandBuffer.mm in Sources */, - DCFD7F3D2A45BC6E007BBBF7 /* MVKCmdRenderPass.mm in Sources */, + DCFD7F3D2A45BC6E007BBBF7 /* MVKCmdRendering.mm in Sources */, DCFD7F3E2A45BC6E007BBBF7 /* MVKBuffer.mm in Sources */, DCFD7F3F2A45BC6E007BBBF7 /* MVKEnvironment.cpp in Sources */, DCFD7F402A45BC6E007BBBF7 /* mvk_datatypes.mm in Sources */, @@ -1873,7 +1883,7 @@ DCFD7F572A45BC6E007BBBF7 /* MVKFramebuffer.mm in Sources */, DCFD7F582A45BC6E007BBBF7 /* MTLRenderPassStencilAttachmentDescriptor+MoltenVK.m in Sources */, DCFD7F592A45BC6E007BBBF7 /* MVKMTLBufferAllocation.mm in Sources */, - DCFD7F5A2A45BC6E007BBBF7 /* CAMetalLayer+MoltenVK.m in Sources */, + DCFD7F5A2A45BC6E007BBBF7 /* CAMetalLayer+MoltenVK.mm in Sources */, DCFD7F5B2A45BC6E007BBBF7 /* MVKCmdDispatch.mm in Sources */, DCFD7F5C2A45BC6E007BBBF7 /* MVKCmdDebug.mm in Sources */, ); diff --git a/MoltenVK/MoltenVK/API/mvk_config.h b/MoltenVK/MoltenVK/API/mvk_config.h index 0a4c25e7..b9cd2f65 100644 --- a/MoltenVK/MoltenVK/API/mvk_config.h +++ b/MoltenVK/MoltenVK/API/mvk_config.h @@ -45,13 +45,13 @@ extern "C" { */ #define MVK_VERSION_MAJOR 1 #define MVK_VERSION_MINOR 2 -#define MVK_VERSION_PATCH 5 +#define MVK_VERSION_PATCH 7 #define MVK_MAKE_VERSION(major, minor, patch) (((major) * 10000) + ((minor) * 100) + (patch)) #define MVK_VERSION MVK_MAKE_VERSION(MVK_VERSION_MAJOR, MVK_VERSION_MINOR, MVK_VERSION_PATCH) -#define MVK_CONFIGURATION_API_VERSION 37 +#define MVK_CONFIGURATION_API_VERSION 38 /** Identifies the level of logging MoltenVK should be limited to outputting. */ typedef enum MVKConfigLogLevel { @@ -138,10 +138,11 @@ typedef enum MVKConfigCompressionAlgorithm { /** Identifies the style of activity performance logging to use. */ typedef enum MVKConfigActivityPerformanceLoggingStyle { - MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_FRAME_COUNT = 0, /**< Repeatedly log performance after a configured number of frames. */ - MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_IMMEDIATE = 1, /**< Log immediately after each performance measurement. */ - MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_DEVICE_LIFETIME = 2, /**< Log at the end of the VkDevice lifetime. This is useful for one-shot apps such as testing frameworks. */ - MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_MAX_ENUM = 0x7FFFFFFF, + MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_FRAME_COUNT = 0, /**< Repeatedly log performance after a configured number of frames. */ + MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_IMMEDIATE = 1, /**< Log immediately after each performance measurement. */ + MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_DEVICE_LIFETIME = 2, /**< Log at the end of the VkDevice lifetime. This is useful for one-shot apps such as testing frameworks. */ + MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_DEVICE_LIFETIME_ACCUMULATE = 3, /**< Log at the end of the VkDevice lifetime, but continue to accumulate across mulitiple VkDevices throughout the app process. This is useful for testing frameworks that create many VkDevices serially. */ + MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_MAX_ENUM = 0x7FFFFFFF, } MVKConfigActivityPerformanceLoggingStyle; /** @@ -786,6 +787,8 @@ typedef struct { /** * Controls when MoltenVK should log activity performance events. * + * The performanceTracking parameter must also be enabled. + * * The value of this parameter must be changed before creating a VkDevice, * for the change to take effect. * @@ -916,6 +919,9 @@ typedef struct { /** * Maximize the concurrent executing compilation tasks. * + * The value of this parameter must be changed before creating a VkInstance, + * for the change to take effect. + * * The initial value or this parameter is set by the * MVK_CONFIG_SHOULD_MAXIMIZE_CONCURRENT_COMPILATION * runtime environment variable or MoltenVK compile-time build setting. @@ -923,6 +929,38 @@ typedef struct { */ VkBool32 shouldMaximizeConcurrentCompilation; + /** + * This parameter is ignored on Apple Silicon devices. + * + * Non-Apple GPUs can have a dynamic timestamp period, which varies over time according to GPU + * workload. Depending on how often the app samples the VkPhysicalDeviceLimits::timestampPeriod + * value using vkGetPhysicalDeviceProperties(), the app may want up-to-date, but potentially + * volatile values, or it may find average values more useful. + * + * The value of this parameter sets the alpha (A) value of a simple lowpass filter + * on the timestampPeriod value, of the form: + * + * TPout = (1 - A)TPout + (A * TPin) + * + * The alpha value can be set to a float between 0.0 and 1.0. Values of alpha closer to + * 0.0 cause the value of timestampPeriod to vary slowly over time and be less volatile, + * and values of alpha closer to 1.0 cause the value of timestampPeriod to vary quickly + * and be more volatile. + * + * Apps that query the timestampPeriod value infrequently will prefer low volatility, whereas + * apps that query frequently may prefer higher volatility, to track more recent changes. + * + * The value of this parameter can be changed at any time, and will affect subsequent queries. + * + * The initial value or this parameter is set by the + * MVK_CONFIG_TIMESTAMP_PERIOD_LOWPASS_ALPHA + * runtime environment variable or MoltenVK compile-time build setting. + * If neither is set, this parameter is set to 1.0 by default, + * indicating that the timestampPeriod will vary relatively slowly, + * with the expectation that the app is querying this value infrequently. + */ + float timestampPeriodLowPassAlpha; + } MVKConfiguration; diff --git a/MoltenVK/MoltenVK/API/mvk_datatypes.h b/MoltenVK/MoltenVK/API/mvk_datatypes.h index b0e2dac7..eabb5aa3 100644 --- a/MoltenVK/MoltenVK/API/mvk_datatypes.h +++ b/MoltenVK/MoltenVK/API/mvk_datatypes.h @@ -383,6 +383,9 @@ MTLViewport mvkMTLViewportFromVkViewport(VkViewport vkViewport); /** Returns the Metal MTLScissorRect corresponding to the specified Vulkan VkRect2D. */ MTLScissorRect mvkMTLScissorRectFromVkRect2D(VkRect2D vkRect); +/** Returns the Vulkan VkRect2D corresponding to the specified Metal MTLScissorRect. */ +VkRect2D mvkVkRect2DFromMTLScissorRect(MTLScissorRect mtlScissorRect); + /** Returns the Metal MTLCompareFunction corresponding to the specified Vulkan VkCompareOp, */ MTLCompareFunction mvkMTLCompareFunctionFromVkCompareOp(VkCompareOp vkOp); @@ -414,13 +417,13 @@ MTLWinding mvkMTLWindingFromSpvExecutionMode(uint32_t spvMode); MTLTessellationPartitionMode mvkMTLTessellationPartitionModeFromSpvExecutionMode(uint32_t spvMode); /** - * Returns the combination of Metal MTLRenderStage bits corresponding to the specified Vulkan VkPiplineStageFlags, + * Returns the combination of Metal MTLRenderStage bits corresponding to the specified Vulkan VkPipelineStageFlags2, * taking into consideration whether the barrier is to be placed before or after the specified pipeline stages. */ -MTLRenderStages mvkMTLRenderStagesFromVkPipelineStageFlags(VkPipelineStageFlags vkStages, bool placeBarrierBefore); +MTLRenderStages mvkMTLRenderStagesFromVkPipelineStageFlags(VkPipelineStageFlags2 vkStages, bool placeBarrierBefore); -/** Returns the combination of Metal MTLBarrierScope bits corresponding to the specified Vulkan VkAccessFlags. */ -MTLBarrierScope mvkMTLBarrierScopeFromVkAccessFlags(VkAccessFlags vkAccess); +/** Returns the combination of Metal MTLBarrierScope bits corresponding to the specified Vulkan VkAccessFlags2. */ +MTLBarrierScope mvkMTLBarrierScopeFromVkAccessFlags(VkAccessFlags2 vkAccess); #pragma mark - #pragma mark Geometry conversions diff --git a/MoltenVK/MoltenVK/API/mvk_private_api.h b/MoltenVK/MoltenVK/API/mvk_private_api.h index 87bc8ad9..dd0b31ca 100644 --- a/MoltenVK/MoltenVK/API/mvk_private_api.h +++ b/MoltenVK/MoltenVK/API/mvk_private_api.h @@ -44,7 +44,7 @@ typedef unsigned long MTLArgumentBuffersTier; */ -#define MVK_PRIVATE_API_VERSION 37 +#define MVK_PRIVATE_API_VERSION 38 /** Identifies the type of rounding Metal uses for float to integer conversions in particular calculatons. */ @@ -151,47 +151,60 @@ typedef struct { MTLArgumentBuffersTier argumentBuffersTier; /**< The argument buffer tier available on this device, as a Metal enumeration. */ VkBool32 needsSampleDrefLodArrayWorkaround; /**< If true, sampling from arrayed depth images with explicit LoD is broken and needs a workaround. */ VkDeviceSize hostMemoryPageSize; /**< The size of a page of host memory on this platform. */ + VkBool32 dynamicVertexStride; /**< If true, VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE is supported. */ + VkBool32 needsCubeGradWorkaround; /**< If true, sampling from cube textures with explicit gradients is broken and needs a workaround. */ } MVKPhysicalDeviceMetalFeatures; -/** MoltenVK performance of a particular type of activity. */ +/** + * MoltenVK performance of a particular type of activity. + * Durations are recorded in milliseconds. Memory sizes are recorded in kilobytes. + */ typedef struct { - uint32_t count; /**< The number of activities of this type. */ - double latestDuration; /**< The latest (most recent) duration of the activity, in milliseconds. */ - double averageDuration; /**< The average duration of the activity, in milliseconds. */ - double minimumDuration; /**< The minimum duration of the activity, in milliseconds. */ - double maximumDuration; /**< The maximum duration of the activity, in milliseconds. */ + uint32_t count; /**< The number of activities of this type. */ + double latest; /**< The latest (most recent) value of the activity. */ + double average; /**< The average value of the activity. */ + double minimum; /**< The minimum value of the activity. */ + double maximum; /**< The maximum value of the activity. */ } MVKPerformanceTracker; /** MoltenVK performance of shader compilation activities. */ typedef struct { - MVKPerformanceTracker hashShaderCode; /** Create a hash from the incoming shader code. */ - MVKPerformanceTracker spirvToMSL; /** Convert SPIR-V to MSL source code. */ - MVKPerformanceTracker mslCompile; /** Compile MSL source code into a MTLLibrary. */ - MVKPerformanceTracker mslLoad; /** Load pre-compiled MSL code into a MTLLibrary. */ - MVKPerformanceTracker mslCompress; /** Compress MSL source code after compiling a MTLLibrary, to hold it in a pipeline cache. */ - MVKPerformanceTracker mslDecompress; /** Decompress MSL source code to write the MSL when serializing a pipeline cache. */ - MVKPerformanceTracker shaderLibraryFromCache; /** Retrieve a shader library from the cache, lazily creating it if needed. */ - MVKPerformanceTracker functionRetrieval; /** Retrieve a MTLFunction from a MTLLibrary. */ - MVKPerformanceTracker functionSpecialization; /** Specialize a retrieved MTLFunction. */ - MVKPerformanceTracker pipelineCompile; /** Compile MTLFunctions into a pipeline. */ - MVKPerformanceTracker glslToSPRIV; /** Convert GLSL to SPIR-V code. */ + MVKPerformanceTracker hashShaderCode; /** Create a hash from the incoming shader code, in milliseconds. */ + MVKPerformanceTracker spirvToMSL; /** Convert SPIR-V to MSL source code, in milliseconds. */ + MVKPerformanceTracker mslCompile; /** Compile MSL source code into a MTLLibrary, in milliseconds. */ + MVKPerformanceTracker mslLoad; /** Load pre-compiled MSL code into a MTLLibrary, in milliseconds. */ + MVKPerformanceTracker mslCompress; /** Compress MSL source code after compiling a MTLLibrary, to hold it in a pipeline cache, in milliseconds. */ + MVKPerformanceTracker mslDecompress; /** Decompress MSL source code to write the MSL when serializing a pipeline cache, in milliseconds. */ + MVKPerformanceTracker shaderLibraryFromCache; /** Retrieve a shader library from the cache, lazily creating it if needed, in milliseconds. */ + MVKPerformanceTracker functionRetrieval; /** Retrieve a MTLFunction from a MTLLibrary, in milliseconds. */ + MVKPerformanceTracker functionSpecialization; /** Specialize a retrieved MTLFunction, in milliseconds. */ + MVKPerformanceTracker pipelineCompile; /** Compile MTLFunctions into a pipeline, in milliseconds. */ + MVKPerformanceTracker glslToSPRIV; /** Convert GLSL to SPIR-V code, in milliseconds. */ } MVKShaderCompilationPerformance; /** MoltenVK performance of pipeline cache activities. */ typedef struct { - MVKPerformanceTracker sizePipelineCache; /** Calculate the size of cache data required to write MSL to pipeline cache data stream. */ - MVKPerformanceTracker writePipelineCache; /** Write MSL to pipeline cache data stream. */ - MVKPerformanceTracker readPipelineCache; /** Read MSL from pipeline cache data stream. */ + MVKPerformanceTracker sizePipelineCache; /** Calculate the size of cache data required to write MSL to pipeline cache data stream, in milliseconds. */ + MVKPerformanceTracker writePipelineCache; /** Write MSL to pipeline cache data stream, in milliseconds. */ + MVKPerformanceTracker readPipelineCache; /** Read MSL from pipeline cache data stream, in milliseconds. */ } MVKPipelineCachePerformance; /** MoltenVK performance of queue activities. */ typedef struct { - MVKPerformanceTracker mtlQueueAccess; /** Create an MTLCommandQueue or access an existing cached instance. */ - MVKPerformanceTracker mtlCommandBufferCompletion; /** Completion of a MTLCommandBuffer on the GPU, from commit to completion callback. */ - MVKPerformanceTracker nextCAMetalDrawable; /** Retrieve next CAMetalDrawable from CAMetalLayer during presentation. */ - MVKPerformanceTracker frameInterval; /** Frame presentation interval (1000/FPS). */ + MVKPerformanceTracker retrieveMTLCommandBuffer; /** Retrieve a MTLCommandBuffer from a MTLQueue, in milliseconds. */ + MVKPerformanceTracker commandBufferEncoding; /** Encode a single VkCommandBuffer to a MTLCommandBuffer (excludes MTLCommandBuffer encoding from configured immediate prefilling), in milliseconds. */ + MVKPerformanceTracker submitCommandBuffers; /** Submit and encode all VkCommandBuffers in a vkQueueSubmit() operation to MTLCommandBuffers (including both prefilled and deferred encoding), in milliseconds. */ + MVKPerformanceTracker mtlCommandBufferExecution; /** Execute a MTLCommandBuffer on the GPU, from commit to completion callback, in milliseconds. */ + MVKPerformanceTracker retrieveCAMetalDrawable; /** Retrieve next CAMetalDrawable from a CAMetalLayer, in milliseconds. */ + MVKPerformanceTracker presentSwapchains; /** Present the swapchains in a vkQueuePresentKHR() on the GPU, from commit to presentation callback, in milliseconds. */ + MVKPerformanceTracker frameInterval; /** Frame presentation interval (1000/FPS), in milliseconds. */ } MVKQueuePerformance; +/** MoltenVK performance of device activities. */ +typedef struct { + MVKPerformanceTracker gpuMemoryAllocated; /** GPU memory allocated, in kilobytes. */ +} MVKDevicePerformance; + /** * MoltenVK performance. You can retrieve a copy of this structure using the vkGetPerformanceStatisticsMVK() function. * @@ -209,6 +222,7 @@ typedef struct { MVKShaderCompilationPerformance shaderCompilation; /** Shader compilations activities. */ MVKPipelineCachePerformance pipelineCache; /** Pipeline cache activities. */ MVKQueuePerformance queue; /** Queue activities. */ + MVKDevicePerformance device; /** Device activities. */ } MVKPerformanceStatistics; diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdDispatch.mm b/MoltenVK/MoltenVK/Commands/MVKCmdDispatch.mm index 1125963d..020f04b5 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdDispatch.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCmdDispatch.mm @@ -46,7 +46,7 @@ void MVKCmdDispatch::encode(MVKCommandEncoder* cmdEncoder) { MTLRegion mtlThreadgroupCount = MTLRegionMake3D(_baseGroupX, _baseGroupY, _baseGroupZ, _groupCountX, _groupCountY, _groupCountZ); cmdEncoder->finalizeDispatchState(); // Ensure all updated state has been submitted to Metal id mtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseDispatch); - auto* pipeline = (MVKComputePipeline*)cmdEncoder->_computePipelineState.getPipeline(); + auto* pipeline = cmdEncoder->_computePipelineState.getComputePipeline(); if (pipeline->allowsDispatchBase()) { if ([mtlEncoder respondsToSelector: @selector(setStageInRegion:)]) { // We'll use the stage-input region to pass the base along to the shader. diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdDraw.h b/MoltenVK/MoltenVK/Commands/MVKCmdDraw.h index c1e4a693..2b9696ab 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdDraw.h +++ b/MoltenVK/MoltenVK/Commands/MVKCmdDraw.h @@ -37,10 +37,12 @@ class MVKCmdBindVertexBuffers : public MVKCommand { public: VkResult setContent(MVKCommandBuffer* cmdBuff, - uint32_t startBinding, + uint32_t firstBinding, uint32_t bindingCount, const VkBuffer* pBuffers, - const VkDeviceSize* pOffsets); + const VkDeviceSize* pOffsets, + const VkDeviceSize* pSizes, + const VkDeviceSize* pStrides); void encode(MVKCommandEncoder* cmdEncoder) override; diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm b/MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm index 20d27815..87515ba1 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm @@ -30,20 +30,23 @@ template VkResult MVKCmdBindVertexBuffers::setContent(MVKCommandBuffer* cmdBuff, - uint32_t startBinding, + uint32_t firstBinding, uint32_t bindingCount, const VkBuffer* pBuffers, - const VkDeviceSize* pOffsets) { - + const VkDeviceSize* pOffsets, + const VkDeviceSize* pSizes, + const VkDeviceSize* pStrides) { MVKDevice* mvkDvc = cmdBuff->getDevice(); _bindings.clear(); // Clear for reuse _bindings.reserve(bindingCount); MVKMTLBufferBinding b; for (uint32_t bindIdx = 0; bindIdx < bindingCount; bindIdx++) { MVKBuffer* mvkBuffer = (MVKBuffer*)pBuffers[bindIdx]; - b.index = mvkDvc->getMetalBufferIndexForVertexAttributeBinding(startBinding + bindIdx); + b.index = mvkDvc->getMetalBufferIndexForVertexAttributeBinding(firstBinding + bindIdx); b.mtlBuffer = mvkBuffer->getMTLBuffer(); b.offset = mvkBuffer->getMTLBufferOffset() + pOffsets[bindIdx]; + b.size = pSizes ? (uint32_t)pSizes[bindIdx] : 0; + b.stride = pStrides ? (uint32_t)pStrides[bindIdx] : 0; _bindings.push_back(b); } @@ -141,12 +144,11 @@ void MVKCmdDraw::encodeIndexedIndirect(MVKCommandEncoder* cmdEncoder) { void MVKCmdDraw::encode(MVKCommandEncoder* cmdEncoder) { - if (_vertexCount == 0 || _instanceCount == 0) { - // Nothing to do. - return; - } + if (_vertexCount == 0 || _instanceCount == 0) { return; } // Nothing to do. - auto* pipeline = (MVKGraphicsPipeline*)cmdEncoder->_graphicsPipelineState.getPipeline(); + cmdEncoder->restartMetalRenderPassIfNeeded(); + + auto* pipeline = cmdEncoder->_graphicsPipelineState.getGraphicsPipeline(); // Metal doesn't support triangle fans, so encode it as triangles via an indexed indirect triangles command instead. if (pipeline->getVkPrimitiveTopology() == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN) { @@ -169,7 +171,7 @@ void MVKCmdDraw::encode(MVKCommandEncoder* cmdEncoder) { } tessParams; uint32_t outControlPointCount = 0; if (pipeline->isTessellationPipeline()) { - tessParams.inControlPointCount = pipeline->getInputControlPointCount(); + tessParams.inControlPointCount = cmdEncoder->_renderingState.getPatchControlPoints(); outControlPointCount = pipeline->getOutputControlPointCount(); tessParams.patchCount = mvkCeilingDivide(_vertexCount, tessParams.inControlPointCount) * _instanceCount; } @@ -296,13 +298,13 @@ void MVKCmdDraw::encode(MVKCommandEncoder* cmdEncoder) { uint32_t instanceCount = _instanceCount * viewCount; cmdEncoder->_graphicsResourcesState.offsetZeroDivisorVertexBuffers(stage, pipeline, _firstInstance); if (cmdEncoder->_pDeviceMetalFeatures->baseVertexInstanceDrawing) { - [cmdEncoder->_mtlRenderEncoder drawPrimitives: cmdEncoder->_mtlPrimitiveType + [cmdEncoder->_mtlRenderEncoder drawPrimitives: cmdEncoder->_renderingState.getPrimitiveType() vertexStart: _firstVertex vertexCount: _vertexCount instanceCount: instanceCount baseInstance: _firstInstance]; } else { - [cmdEncoder->_mtlRenderEncoder drawPrimitives: cmdEncoder->_mtlPrimitiveType + [cmdEncoder->_mtlRenderEncoder drawPrimitives: cmdEncoder->_renderingState.getPrimitiveType() vertexStart: _firstVertex vertexCount: _vertexCount instanceCount: instanceCount]; @@ -366,12 +368,11 @@ void MVKCmdDrawIndexed::encodeIndexedIndirect(MVKCommandEncoder* cmdEncoder) { void MVKCmdDrawIndexed::encode(MVKCommandEncoder* cmdEncoder) { - if (_indexCount == 0 || _instanceCount == 0) { - // Nothing to do. - return; - } + if (_indexCount == 0 || _instanceCount == 0) { return; } // Nothing to do. - auto* pipeline = (MVKGraphicsPipeline*)cmdEncoder->_graphicsPipelineState.getPipeline(); + cmdEncoder->restartMetalRenderPassIfNeeded(); + + auto* pipeline = cmdEncoder->_graphicsPipelineState.getGraphicsPipeline(); // Metal doesn't support triangle fans, so encode it as triangles via an indexed indirect triangles command instead. if (pipeline->getVkPrimitiveTopology() == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN) { @@ -398,7 +399,7 @@ void MVKCmdDrawIndexed::encode(MVKCommandEncoder* cmdEncoder) { } tessParams; uint32_t outControlPointCount = 0; if (pipeline->isTessellationPipeline()) { - tessParams.inControlPointCount = pipeline->getInputControlPointCount(); + tessParams.inControlPointCount = cmdEncoder->_renderingState.getPatchControlPoints(); outControlPointCount = pipeline->getOutputControlPointCount(); tessParams.patchCount = mvkCeilingDivide(_indexCount, tessParams.inControlPointCount) * _instanceCount; } @@ -530,7 +531,7 @@ void MVKCmdDrawIndexed::encode(MVKCommandEncoder* cmdEncoder) { uint32_t instanceCount = _instanceCount * viewCount; cmdEncoder->_graphicsResourcesState.offsetZeroDivisorVertexBuffers(stage, pipeline, _firstInstance); if (cmdEncoder->_pDeviceMetalFeatures->baseVertexInstanceDrawing) { - [cmdEncoder->_mtlRenderEncoder drawIndexedPrimitives: cmdEncoder->_mtlPrimitiveType + [cmdEncoder->_mtlRenderEncoder drawIndexedPrimitives: cmdEncoder->_renderingState.getPrimitiveType() indexCount: _indexCount indexType: (MTLIndexType)ibb.mtlIndexType indexBuffer: ibb.mtlBuffer @@ -539,7 +540,7 @@ void MVKCmdDrawIndexed::encode(MVKCommandEncoder* cmdEncoder) { baseVertex: _vertexOffset baseInstance: _firstInstance]; } else { - [cmdEncoder->_mtlRenderEncoder drawIndexedPrimitives: cmdEncoder->_mtlPrimitiveType + [cmdEncoder->_mtlRenderEncoder drawIndexedPrimitives: cmdEncoder->_renderingState.getPrimitiveType() indexCount: _indexCount indexType: (MTLIndexType)ibb.mtlIndexType indexBuffer: ibb.mtlBuffer @@ -646,7 +647,9 @@ void MVKCmdDrawIndirect::encodeIndexedIndirect(MVKCommandEncoder* cmdEncoder) { void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) { - auto* pipeline = (MVKGraphicsPipeline*)cmdEncoder->_graphicsPipelineState.getPipeline(); + cmdEncoder->restartMetalRenderPassIfNeeded(); + + auto* pipeline = cmdEncoder->_graphicsPipelineState.getGraphicsPipeline(); // Metal doesn't support triangle fans, so encode it as indexed indirect triangles instead. if (pipeline->getVkPrimitiveTopology() == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN) { @@ -683,7 +686,7 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) { // encoding and execution. So we don't know how big to make the buffers. // We must assume an arbitrarily large number of vertices may be submitted. // But not too many, or we'll exhaust available VRAM. - inControlPointCount = pipeline->getInputControlPointCount(); + inControlPointCount = cmdEncoder->_renderingState.getPatchControlPoints(); outControlPointCount = pipeline->getOutputControlPointCount(); vertexCount = kMVKMaxDrawIndirectVertexCount; patchCount = mvkCeilingDivide(vertexCount, inControlPointCount); @@ -925,7 +928,7 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) { cmdEncoder->_graphicsResourcesState.beginMetalRenderPass(); cmdEncoder->getPushConstants(VK_SHADER_STAGE_VERTEX_BIT)->beginMetalRenderPass(); } else { - [cmdEncoder->_mtlRenderEncoder drawPrimitives: cmdEncoder->_mtlPrimitiveType + [cmdEncoder->_mtlRenderEncoder drawPrimitives: cmdEncoder->_renderingState.getPrimitiveType() indirectBuffer: mtlIndBuff indirectBufferOffset: mtlIndBuffOfst]; mtlIndBuffOfst += needsInstanceAdjustment ? sizeof(MTLDrawPrimitivesIndirectArguments) : _mtlIndirectBufferStride; @@ -987,6 +990,7 @@ VkResult MVKCmdDrawIndexedIndirect::setContent(MVKCommandBuffer* cmdBuff, } void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder) { + cmdEncoder->restartMetalRenderPassIfNeeded(); encode(cmdEncoder, cmdEncoder->_graphicsResourcesState._mtlIndexBufferBinding); } @@ -996,7 +1000,7 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder, const MVKI MVKIndexMTLBufferBinding ibb = ibbOrig; MVKIndexMTLBufferBinding ibbTriFan = ibb; - auto* pipeline = (MVKGraphicsPipeline*)cmdEncoder->_graphicsPipelineState.getPipeline(); + auto* pipeline = cmdEncoder->_graphicsPipelineState.getGraphicsPipeline(); MVKVertexAdjustments vtxAdjmts; vtxAdjmts.mtlIndexType = ibb.mtlIndexType; @@ -1031,7 +1035,7 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder, const MVKI // encoding and execution. So we don't know how big to make the buffers. // We must assume an arbitrarily large number of vertices may be submitted. // But not too many, or we'll exhaust available VRAM. - inControlPointCount = pipeline->getInputControlPointCount(); + inControlPointCount = cmdEncoder->_renderingState.getPatchControlPoints(); outControlPointCount = pipeline->getOutputControlPointCount(); vertexCount = kMVKMaxDrawIndirectVertexCount; patchCount = mvkCeilingDivide(vertexCount, inControlPointCount); @@ -1312,7 +1316,7 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder, const MVKI cmdEncoder->getPushConstants(VK_SHADER_STAGE_VERTEX_BIT)->beginMetalRenderPass(); } else { cmdEncoder->_graphicsResourcesState.offsetZeroDivisorVertexBuffers(stage, pipeline, _directCmdFirstInstance); - [cmdEncoder->_mtlRenderEncoder drawIndexedPrimitives: cmdEncoder->_mtlPrimitiveType + [cmdEncoder->_mtlRenderEncoder drawIndexedPrimitives: cmdEncoder->_renderingState.getPrimitiveType() indexType: (MTLIndexType)ibb.mtlIndexType indexBuffer: ibb.mtlBuffer indexBufferOffset: ibb.offset diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.h b/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.h index 8e177256..84bc923a 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.h +++ b/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.h @@ -30,6 +30,34 @@ class MVKDescriptorSet; class MVKDescriptorUpdateTemplate; +#pragma mark - +#pragma mark MVKCmdExecuteCommands + +/** + * Vulkan command to execute secondary command buffers. + * Template class to balance vector pre-allocations between very common low counts and fewer larger counts. + */ +template +class MVKCmdExecuteCommands : public MVKCommand { + +public: + VkResult setContent(MVKCommandBuffer* cmdBuff, + uint32_t commandBuffersCount, + const VkCommandBuffer* pCommandBuffers); + + void encode(MVKCommandEncoder* cmdEncoder) override; + +protected: + MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; + + MVKSmallVector _secondaryCommandBuffers; +}; + +// Concrete template class implementations. +typedef MVKCmdExecuteCommands<1> MVKCmdExecuteCommands1; +typedef MVKCmdExecuteCommands<16> MVKCmdExecuteCommandsMulti; + + #pragma mark - #pragma mark MVKCmdPipelineBarrier @@ -41,6 +69,9 @@ template class MVKCmdPipelineBarrier : public MVKCommand { public: + VkResult setContent(MVKCommandBuffer* cmdBuff, + const VkDependencyInfo* pDependencyInfo); + VkResult setContent(MVKCommandBuffer* cmdBuff, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask, @@ -59,8 +90,6 @@ protected: bool coversTextures(); MVKSmallVector _barriers; - VkPipelineStageFlags _srcStageMask; - VkPipelineStageFlags _dstStageMask; VkDependencyFlags _dependencyFlags; }; @@ -281,34 +310,26 @@ protected: #pragma mark - -#pragma mark MVKCmdSetResetEvent +#pragma mark MVKCmdSetEvent -/** Abstract Vulkan command to set or reset an event. */ -class MVKCmdSetResetEvent : public MVKCommand { +/** Vulkan command to set an event. */ +class MVKCmdSetEvent : public MVKCommand { public: + VkResult setContent(MVKCommandBuffer* cmdBuff, + VkEvent event, + const VkDependencyInfo* pDependencyInfo); + VkResult setContent(MVKCommandBuffer* cmdBuff, VkEvent event, VkPipelineStageFlags stageMask); -protected: - MVKEvent* _mvkEvent; - -}; - - -#pragma mark - -#pragma mark MVKCmdSetEvent - -/** Vulkan command to set an event. */ -class MVKCmdSetEvent : public MVKCmdSetResetEvent { - -public: void encode(MVKCommandEncoder* cmdEncoder) override; protected: MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; + MVKEvent* _mvkEvent; }; @@ -316,14 +337,19 @@ protected: #pragma mark MVKCmdResetEvent /** Vulkan command to reset an event. */ -class MVKCmdResetEvent : public MVKCmdSetResetEvent { +class MVKCmdResetEvent : public MVKCommand { public: + VkResult setContent(MVKCommandBuffer* cmdBuff, + VkEvent event, + VkPipelineStageFlags2 stageMask); + void encode(MVKCommandEncoder* cmdEncoder) override; protected: MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; + MVKEvent* _mvkEvent; }; @@ -339,6 +365,11 @@ template class MVKCmdWaitEvents : public MVKCommand { public: + VkResult setContent(MVKCommandBuffer* cmdBuff, + uint32_t eventCount, + const VkEvent* pEvents, + const VkDependencyInfo* pDependencyInfos); + VkResult setContent(MVKCommandBuffer* cmdBuff, uint32_t eventCount, const VkEvent* pEvents, diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm b/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm index 3efcab53..1a30f550 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm @@ -26,9 +26,59 @@ #include "mvk_datatypes.hpp" +#pragma mark - +#pragma mark MVKCmdExecuteCommands + +template +VkResult MVKCmdExecuteCommands::setContent(MVKCommandBuffer* cmdBuff, + uint32_t commandBuffersCount, + const VkCommandBuffer* pCommandBuffers) { + // Add clear values + _secondaryCommandBuffers.clear(); // Clear for reuse + _secondaryCommandBuffers.reserve(commandBuffersCount); + for (uint32_t cbIdx = 0; cbIdx < commandBuffersCount; cbIdx++) { + _secondaryCommandBuffers.push_back(MVKCommandBuffer::getMVKCommandBuffer(pCommandBuffers[cbIdx])); + } + cmdBuff->recordExecuteCommands(_secondaryCommandBuffers.contents()); + + return VK_SUCCESS; +} + +template +void MVKCmdExecuteCommands::encode(MVKCommandEncoder* cmdEncoder) { + for (auto& cb : _secondaryCommandBuffers) { cmdEncoder->encodeSecondary(cb); } +} + +template class MVKCmdExecuteCommands<1>; +template class MVKCmdExecuteCommands<16>; + + #pragma mark - #pragma mark MVKCmdPipelineBarrier +template +VkResult MVKCmdPipelineBarrier::setContent(MVKCommandBuffer* cmdBuff, + const VkDependencyInfo* pDependencyInfo) { + _dependencyFlags = pDependencyInfo->dependencyFlags; + + _barriers.clear(); // Clear for reuse + _barriers.reserve(pDependencyInfo->memoryBarrierCount + + pDependencyInfo->bufferMemoryBarrierCount + + pDependencyInfo->imageMemoryBarrierCount); + + for (uint32_t i = 0; i < pDependencyInfo->memoryBarrierCount; i++) { + _barriers.emplace_back(pDependencyInfo->pMemoryBarriers[i]); + } + for (uint32_t i = 0; i < pDependencyInfo->bufferMemoryBarrierCount; i++) { + _barriers.emplace_back(pDependencyInfo->pBufferMemoryBarriers[i]); + } + for (uint32_t i = 0; i < pDependencyInfo->imageMemoryBarrierCount; i++) { + _barriers.emplace_back(pDependencyInfo->pImageMemoryBarriers[i]); + } + + return VK_SUCCESS; +} + template VkResult MVKCmdPipelineBarrier::setContent(MVKCommandBuffer* cmdBuff, VkPipelineStageFlags srcStageMask, @@ -40,21 +90,19 @@ VkResult MVKCmdPipelineBarrier::setContent(MVKCommandBuffer* cmdBuff, const VkBufferMemoryBarrier* pBufferMemoryBarriers, uint32_t imageMemoryBarrierCount, const VkImageMemoryBarrier* pImageMemoryBarriers) { - _srcStageMask = srcStageMask; - _dstStageMask = dstStageMask; _dependencyFlags = dependencyFlags; _barriers.clear(); // Clear for reuse _barriers.reserve(memoryBarrierCount + bufferMemoryBarrierCount + imageMemoryBarrierCount); for (uint32_t i = 0; i < memoryBarrierCount; i++) { - _barriers.emplace_back(pMemoryBarriers[i]); + _barriers.emplace_back(pMemoryBarriers[i], srcStageMask, dstStageMask); } for (uint32_t i = 0; i < bufferMemoryBarrierCount; i++) { - _barriers.emplace_back(pBufferMemoryBarriers[i]); + _barriers.emplace_back(pBufferMemoryBarriers[i], srcStageMask, dstStageMask); } for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) { - _barriers.emplace_back(pImageMemoryBarriers[i]); + _barriers.emplace_back(pImageMemoryBarriers[i], srcStageMask, dstStageMask); } return VK_SUCCESS; @@ -67,13 +115,9 @@ void MVKCmdPipelineBarrier::encode(MVKCommandEncoder* cmdEncoder) { // Calls below invoke MTLBlitCommandEncoder so must apply this first. // Check if pipeline barriers are available and we are in a renderpass. if (cmdEncoder->getDevice()->_pMetalFeatures->memoryBarriers && cmdEncoder->_mtlRenderEncoder) { - MTLRenderStages srcStages = mvkMTLRenderStagesFromVkPipelineStageFlags(_srcStageMask, false); - MTLRenderStages dstStages = mvkMTLRenderStagesFromVkPipelineStageFlags(_dstStageMask, true); - - id resources[_barriers.size()]; - uint32_t rezCnt = 0; - for (auto& b : _barriers) { + MTLRenderStages srcStages = mvkMTLRenderStagesFromVkPipelineStageFlags(b.srcStageMask, false); + MTLRenderStages dstStages = mvkMTLRenderStagesFromVkPipelineStageFlags(b.dstStageMask, true); switch (b.type) { case MVKPipelineBarrier::Memory: { MTLBarrierScope scope = (mvkMTLBarrierScopeFromVkAccessFlags(b.srcAccessMask) | @@ -84,27 +128,30 @@ void MVKCmdPipelineBarrier::encode(MVKCommandEncoder* cmdEncoder) { break; } - case MVKPipelineBarrier::Buffer: - resources[rezCnt++] = b.mvkBuffer->getMTLBuffer(); + case MVKPipelineBarrier::Buffer: { + id mtlRez = b.mvkBuffer->getMTLBuffer(); + [cmdEncoder->_mtlRenderEncoder memoryBarrierWithResources: &mtlRez + count: 1 + afterStages: srcStages + beforeStages: dstStages]; break; - - case MVKPipelineBarrier::Image: - for (uint8_t planeIndex = 0; planeIndex < b.mvkImage->getPlaneCount(); planeIndex++) { - resources[rezCnt++] = b.mvkImage->getMTLTexture(planeIndex); - } + } + case MVKPipelineBarrier::Image: { + uint32_t plnCnt = b.mvkImage->getPlaneCount(); + id mtlRezs[plnCnt]; + for (uint8_t plnIdx = 0; plnIdx < plnCnt; plnIdx++) { + mtlRezs[plnIdx] = b.mvkImage->getMTLTexture(plnIdx); + } + [cmdEncoder->_mtlRenderEncoder memoryBarrierWithResources: mtlRezs + count: plnCnt + afterStages: srcStages + beforeStages: dstStages]; break; - + } default: break; } } - - if (rezCnt) { - [cmdEncoder->_mtlRenderEncoder memoryBarrierWithResources: resources - count: rezCnt - afterStages: srcStages - beforeStages: dstStages]; - } } else if (cmdEncoder->getDevice()->_pMetalFeatures->textureBarriers) { #if !MVK_MACCAT if (coversTextures()) { [cmdEncoder->_mtlRenderEncoder textureBarrier]; } @@ -138,15 +185,15 @@ void MVKCmdPipelineBarrier::encode(MVKCommandEncoder* cmdEncoder) { for (auto& b : _barriers) { switch (b.type) { case MVKPipelineBarrier::Memory: - mvkDvc->applyMemoryBarrier(_srcStageMask, _dstStageMask, b, cmdEncoder, cmdUse); + mvkDvc->applyMemoryBarrier(b, cmdEncoder, cmdUse); break; case MVKPipelineBarrier::Buffer: - b.mvkBuffer->applyBufferMemoryBarrier(_srcStageMask, _dstStageMask, b, cmdEncoder, cmdUse); + b.mvkBuffer->applyBufferMemoryBarrier(b, cmdEncoder, cmdUse); break; case MVKPipelineBarrier::Image: - b.mvkImage->applyImageMemoryBarrier(_srcStageMask, _dstStageMask, b, cmdEncoder, cmdUse); + b.mvkImage->applyImageMemoryBarrier(b, cmdEncoder, cmdUse); break; default: @@ -493,19 +540,23 @@ MVKCmdPushDescriptorSetWithTemplate::~MVKCmdPushDescriptorSetWithTemplate() { #pragma mark - -#pragma mark MVKCmdSetResetEvent +#pragma mark MVKCmdSetEvent -VkResult MVKCmdSetResetEvent::setContent(MVKCommandBuffer* cmdBuff, - VkEvent event, - VkPipelineStageFlags stageMask) { +VkResult MVKCmdSetEvent::setContent(MVKCommandBuffer* cmdBuff, + VkEvent event, + VkPipelineStageFlags stageMask) { _mvkEvent = (MVKEvent*)event; return VK_SUCCESS; } +VkResult MVKCmdSetEvent::setContent(MVKCommandBuffer* cmdBuff, + VkEvent event, + const VkDependencyInfo* pDependencyInfo) { + _mvkEvent = (MVKEvent*)event; -#pragma mark - -#pragma mark MVKCmdSetEvent + return VK_SUCCESS; +} void MVKCmdSetEvent::encode(MVKCommandEncoder* cmdEncoder) { cmdEncoder->signalEvent(_mvkEvent, true); @@ -515,6 +566,14 @@ void MVKCmdSetEvent::encode(MVKCommandEncoder* cmdEncoder) { #pragma mark - #pragma mark MVKCmdResetEvent +VkResult MVKCmdResetEvent::setContent(MVKCommandBuffer* cmdBuff, + VkEvent event, + VkPipelineStageFlags2 stageMask) { + _mvkEvent = (MVKEvent*)event; + + return VK_SUCCESS; +} + void MVKCmdResetEvent::encode(MVKCommandEncoder* cmdEncoder) { cmdEncoder->signalEvent(_mvkEvent, false); } @@ -523,6 +582,20 @@ void MVKCmdResetEvent::encode(MVKCommandEncoder* cmdEncoder) { #pragma mark - #pragma mark MVKCmdWaitEvents +template +VkResult MVKCmdWaitEvents::setContent(MVKCommandBuffer* cmdBuff, + uint32_t eventCount, + const VkEvent* pEvents, + const VkDependencyInfo* pDependencyInfos) { + _mvkEvents.clear(); // Clear for reuse + _mvkEvents.reserve(eventCount); + for (uint32_t i = 0; i < eventCount; i++) { + _mvkEvents.push_back((MVKEvent*)pEvents[i]); + } + + return VK_SUCCESS; +} + template VkResult MVKCmdWaitEvents::setContent(MVKCommandBuffer* cmdBuff, uint32_t eventCount, diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdQueries.h b/MoltenVK/MoltenVK/Commands/MVKCmdQueries.h index baa58863..6b3686e8 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdQueries.h +++ b/MoltenVK/MoltenVK/Commands/MVKCmdQueries.h @@ -85,7 +85,7 @@ class MVKCmdWriteTimestamp : public MVKCmdQuery { public: VkResult setContent(MVKCommandBuffer* cmdBuff, - VkPipelineStageFlagBits pipelineStage, + VkPipelineStageFlags2 stage, VkQueryPool queryPool, uint32_t query); @@ -94,7 +94,7 @@ public: protected: MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; - VkPipelineStageFlagBits _pipelineStage; + VkPipelineStageFlags2 _stage; }; diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdQueries.mm b/MoltenVK/MoltenVK/Commands/MVKCmdQueries.mm index bc5ba9c6..aac431fb 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdQueries.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCmdQueries.mm @@ -77,13 +77,13 @@ void MVKCmdEndQuery::encode(MVKCommandEncoder* cmdEncoder) { #pragma mark MVKCmdWriteTimestamp VkResult MVKCmdWriteTimestamp::setContent(MVKCommandBuffer* cmdBuff, - VkPipelineStageFlagBits pipelineStage, + VkPipelineStageFlags2 stage, VkQueryPool queryPool, uint32_t query) { VkResult rslt = MVKCmdQuery::setContent(cmdBuff, queryPool, query); - _pipelineStage = pipelineStage; + _stage = stage; cmdBuff->recordTimestampCommand(); diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdRenderPass.h b/MoltenVK/MoltenVK/Commands/MVKCmdRendering.h similarity index 62% rename from MoltenVK/MoltenVK/Commands/MVKCmdRenderPass.h rename to MoltenVK/MoltenVK/Commands/MVKCmdRendering.h index 66c7c304..16e4863b 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdRenderPass.h +++ b/MoltenVK/MoltenVK/Commands/MVKCmdRendering.h @@ -1,5 +1,5 @@ /* - * MVKCmdRenderPass.h + * MVKCmdRendering.h * * Copyright (c) 2015-2023 The Brenwill Workshop Ltd. (http://www.brenwill.com) * @@ -46,7 +46,6 @@ public: protected: - MVKSmallVector> _subpassSamplePositions; MVKRenderPass* _renderPass; MVKFramebuffer* _framebuffer; VkRect2D _renderArea; @@ -203,37 +202,28 @@ public: protected: MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; - MVKSmallVector _samplePositions; + MVKSmallVector _sampleLocations; }; #pragma mark - -#pragma mark MVKCmdExecuteCommands +#pragma mark MVKCmdSetSampleLocationsEnable -/** - * Vulkan command to execute secondary command buffers. - * Template class to balance vector pre-allocations between very common low counts and fewer larger counts. - */ -template -class MVKCmdExecuteCommands : public MVKCommand { +/** Vulkan command to dynamically enable custom sample locations. */ +class MVKCmdSetSampleLocationsEnable : public MVKCommand { public: VkResult setContent(MVKCommandBuffer* cmdBuff, - uint32_t commandBuffersCount, - const VkCommandBuffer* pCommandBuffers); + VkBool32 sampleLocationsEnable); void encode(MVKCommandEncoder* cmdEncoder) override; protected: MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; - MVKSmallVector _secondaryCommandBuffers; + VkBool32 _sampleLocationsEnable; }; -// Concrete template class implementations. -typedef MVKCmdExecuteCommands<1> MVKCmdExecuteCommands1; -typedef MVKCmdExecuteCommands<16> MVKCmdExecuteCommandsMulti; - #pragma mark - #pragma mark MVKCmdSetViewport @@ -295,25 +285,6 @@ typedef MVKCmdSetScissor<1> MVKCmdSetScissor1; typedef MVKCmdSetScissor MVKCmdSetScissorMulti; -#pragma mark - -#pragma mark MVKCmdSetLineWidth - -/** Vulkan command to set the line width. */ -class MVKCmdSetLineWidth : public MVKCommand { - -public: - VkResult setContent(MVKCommandBuffer* cmdBuff, - float lineWidth); - - void encode(MVKCommandEncoder* cmdEncoder) override; - -protected: - MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; - - float _lineWidth; -}; - - #pragma mark - #pragma mark MVKCmdSetDepthBias @@ -337,6 +308,25 @@ protected: }; +#pragma mark - +#pragma mark MVKCmdSetDepthBiasEnable + +/** Vulkan command to dynamically enable or disable depth bias. */ +class MVKCmdSetDepthBiasEnable : public MVKCommand { + +public: + VkResult setContent(MVKCommandBuffer* cmdBuff, + VkBool32 depthBiasEnable); + + void encode(MVKCommandEncoder* cmdEncoder) override; + +protected: + MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; + + VkBool32 _depthBiasEnable; +}; + + #pragma mark - #pragma mark MVKCmdSetBlendConstants @@ -352,31 +342,129 @@ public: protected: MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; - float _red; - float _green; - float _blue; - float _alpha; + float _blendConstants[4] = {}; }; #pragma mark - -#pragma mark MVKCmdSetDepthBounds +#pragma mark MVKCmdSetDepthTestEnable -/** Vulkan command to set depth bounds. */ -class MVKCmdSetDepthBounds : public MVKCommand { +/** Vulkan command to dynamically enable depth testing. */ +class MVKCmdSetDepthTestEnable : public MVKCommand { public: VkResult setContent(MVKCommandBuffer* cmdBuff, - float minDepthBounds, - float maxDepthBounds); + VkBool32 depthTestEnable); - void encode(MVKCommandEncoder* cmdEncoder) override; + void encode(MVKCommandEncoder* cmdEncoder) override; protected: MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; - float _minDepthBounds; - float _maxDepthBounds; + VkBool32 _depthTestEnable; +}; + + +#pragma mark - +#pragma mark MVKCmdSetDepthWriteEnable + +/** Vulkan command to dynamically enable depth writing. */ +class MVKCmdSetDepthWriteEnable : public MVKCommand { + +public: + VkResult setContent(MVKCommandBuffer* cmdBuff, + VkBool32 depthWriteEnable); + + void encode(MVKCommandEncoder* cmdEncoder) override; + +protected: + MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; + + VkBool32 _depthWriteEnable; +}; + + +#pragma mark - +#pragma mark MVKCmdSetDepthClipEnable + +/** Vulkan command to dynamically enable depth clip. */ +class MVKCmdSetDepthClipEnable : public MVKCommand { + +public: + VkResult setContent(MVKCommandBuffer* cmdBuff, + VkBool32 depthClipEnable); + + void encode(MVKCommandEncoder* cmdEncoder) override; + +protected: + MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; + + VkBool32 _depthClipEnable; +}; + + +#pragma mark - +#pragma mark MVKCmdSetDepthCompareOp + +/** Vulkan command to dynamically set the depth compare operation. */ +class MVKCmdSetDepthCompareOp : public MVKCommand { + +public: + VkResult setContent(MVKCommandBuffer* cmdBuff, + VkCompareOp depthCompareOp); + + void encode(MVKCommandEncoder* cmdEncoder) override; + +protected: + MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; + + VkCompareOp _depthCompareOp; +}; + + +#pragma mark - +#pragma mark MVKCmdSetStencilTestEnable + +/** Vulkan command to dynamically enable stencil testing. */ +class MVKCmdSetStencilTestEnable : public MVKCommand { + +public: + VkResult setContent(MVKCommandBuffer* cmdBuff, + VkBool32 stencilTestEnable); + + void encode(MVKCommandEncoder* cmdEncoder) override; + +protected: + MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; + + VkBool32 _stencilTestEnable; +}; + + +#pragma mark - +#pragma mark MVKCmdSetStencilOp + +/** Vulkan command to dynamically set the stencil operations. */ +class MVKCmdSetStencilOp : public MVKCommand { + +public: + VkResult setContent(MVKCommandBuffer* cmdBuff, + VkStencilFaceFlags faceMask, + VkStencilOp failOp, + VkStencilOp passOp, + VkStencilOp depthFailOp, + VkCompareOp compareOp); + + void encode(MVKCommandEncoder* cmdEncoder) override; + +protected: + MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; + + VkStencilFaceFlags _faceMask; + VkStencilOp _failOp; + VkStencilOp _passOp; + VkStencilOp _depthFailOp; + VkCompareOp _compareOp; }; @@ -442,3 +530,136 @@ protected: uint32_t _stencilReference; }; + +#pragma mark - +#pragma mark MVKCmdSetCullMode + +/** Vulkan command to dynamically set the cull mode. */ +class MVKCmdSetCullMode : public MVKCommand { + +public: + VkResult setContent(MVKCommandBuffer* cmdBuff, + VkCullModeFlags cullMode); + + void encode(MVKCommandEncoder* cmdEncoder) override; + +protected: + MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; + + VkCullModeFlags _cullMode; +}; + + +#pragma mark - +#pragma mark MVKCmdSetFrontFace + +/** Vulkan command to dynamically set the front facing winding order. */ +class MVKCmdSetFrontFace : public MVKCommand { + +public: + VkResult setContent(MVKCommandBuffer* cmdBuff, + VkFrontFace frontFace); + + void encode(MVKCommandEncoder* cmdEncoder) override; + +protected: + MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; + + VkFrontFace _frontFace; +}; + + +#pragma mark - +#pragma mark MVKCmdSetPatchControlPoints + +/** Vulkan command to dynamically set the number of patch control points. */ +class MVKCmdSetPatchControlPoints : public MVKCommand { + +public: + VkResult setContent(MVKCommandBuffer* cmdBuff, + uint32_t patchControlPoints); + + void encode(MVKCommandEncoder* cmdEncoder) override; + +protected: + MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; + + uint32_t _patchControlPoints; +}; + + +#pragma mark - +#pragma mark MVKCmdSetPolygonMode + +/** Vulkan command to dynamically set the polygon mode. */ +class MVKCmdSetPolygonMode : public MVKCommand { + +public: + VkResult setContent(MVKCommandBuffer* cmdBuff, + VkPolygonMode polygonMode); + + void encode(MVKCommandEncoder* cmdEncoder) override; + +protected: + MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; + + VkPolygonMode _polygonMode; +}; + + +#pragma mark - +#pragma mark MVKCmdSetPrimitiveTopology + +/** Vulkan command to dynamically set the primitive topology. */ +class MVKCmdSetPrimitiveTopology : public MVKCommand { + +public: + VkResult setContent(MVKCommandBuffer* cmdBuff, + VkPrimitiveTopology primitiveTopology); + + void encode(MVKCommandEncoder* cmdEncoder) override; + +protected: + MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; + + VkPrimitiveTopology _primitiveTopology; +}; + + +#pragma mark - +#pragma mark MVKCmdSetPrimitiveRestartEnable + +/** Vulkan command to dynamically enable or disable primitive restart functionality. */ +class MVKCmdSetPrimitiveRestartEnable : public MVKCommand { + +public: + VkResult setContent(MVKCommandBuffer* cmdBuff, + VkBool32 primitiveRestartEnable); + + void encode(MVKCommandEncoder* cmdEncoder) override; + +protected: + MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; + + VkBool32 _primitiveRestartEnable; +}; + + +#pragma mark - +#pragma mark MVKCmdSetRasterizerDiscardEnable + +/** Vulkan command to dynamically enable or disable rasterization. */ +class MVKCmdSetRasterizerDiscardEnable : public MVKCommand { + +public: + VkResult setContent(MVKCommandBuffer* cmdBuff, + VkBool32 rasterizerDiscardEnable); + + void encode(MVKCommandEncoder* cmdEncoder) override; + +protected: + MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; + + VkBool32 _rasterizerDiscardEnable; +}; + diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdRenderPass.mm b/MoltenVK/MoltenVK/Commands/MVKCmdRendering.mm similarity index 58% rename from MoltenVK/MoltenVK/Commands/MVKCmdRenderPass.mm rename to MoltenVK/MoltenVK/Commands/MVKCmdRendering.mm index 08bbe851..a2492ace 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdRenderPass.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCmdRendering.mm @@ -1,5 +1,5 @@ /* - * MVKCmdRenderPass.mm + * MVKCmdRendering.mm * * Copyright (c) 2015-2023 The Brenwill Workshop Ltd. (http://www.brenwill.com) * @@ -16,7 +16,7 @@ * limitations under the License. */ -#include "MVKCmdRenderPass.h" +#include "MVKCmdRendering.h" #include "MVKCommandBuffer.h" #include "MVKCommandPool.h" #include "MVKFramebuffer.h" @@ -36,30 +36,6 @@ VkResult MVKCmdBeginRenderPassBase::setContent(MVKCommandBuffer* cmdBuff, _renderPass = (MVKRenderPass*)pRenderPassBegin->renderPass; _framebuffer = (MVKFramebuffer*)pRenderPassBegin->framebuffer; _renderArea = pRenderPassBegin->renderArea; - _subpassSamplePositions.clear(); - - for (const auto* next = (VkBaseInStructure*)pRenderPassBegin->pNext; next; next = next->pNext) { - switch (next->sType) { - case VK_STRUCTURE_TYPE_RENDER_PASS_SAMPLE_LOCATIONS_BEGIN_INFO_EXT: { - // Build an array of arrays, one array of sample positions for each subpass index. - // For subpasses not included in VkRenderPassSampleLocationsBeginInfoEXT, the resulting array of samples will be empty. - _subpassSamplePositions.resize(_renderPass->getSubpassCount()); - auto* pRPSampLocnsInfo = (VkRenderPassSampleLocationsBeginInfoEXT*)next; - for (uint32_t spSLIdx = 0; spSLIdx < pRPSampLocnsInfo->postSubpassSampleLocationsCount; spSLIdx++) { - auto& spsl = pRPSampLocnsInfo->pPostSubpassSampleLocations[spSLIdx]; - uint32_t spIdx = spsl.subpassIndex; - auto& spSampPosns = _subpassSamplePositions[spIdx]; - for (uint32_t slIdx = 0; slIdx < spsl.sampleLocationsInfo.sampleLocationsCount; slIdx++) { - auto& sl = spsl.sampleLocationsInfo.pSampleLocations[slIdx]; - spSampPosns.push_back(MTLSamplePositionMake(sl.x, sl.y)); - } - } - break; - } - default: - break; - } - } cmdBuff->_currentSubpassInfo.beginRenderpass(_renderPass); @@ -86,15 +62,6 @@ VkResult MVKCmdBeginRenderPass::setContent(MVKCommandBuffer* cmdBuff, template void MVKCmdBeginRenderPass::encode(MVKCommandEncoder* cmdEncoder) { - - // Convert the sample position array of arrays to an array of array-references, - // so that it can be passed to the command encoder. - size_t spSPCnt = _subpassSamplePositions.size(); - MVKArrayRef spSPRefs[spSPCnt]; - for (uint32_t spSPIdx = 0; spSPIdx < spSPCnt; spSPIdx++) { - spSPRefs[spSPIdx] = _subpassSamplePositions[spSPIdx].contents(); - } - cmdEncoder->beginRenderpass(this, _contents, _renderPass, @@ -102,7 +69,7 @@ void MVKCmdBeginRenderPass::encode(MVKCommandEncoder* cmdEncoder) { _renderArea, _clearValues.contents(), _attachments.contents(), - MVKArrayRef(spSPRefs, spSPCnt)); + kMVKCommandUseBeginRenderPass); } template class MVKCmdBeginRenderPass<1, 0>; @@ -217,46 +184,31 @@ void MVKCmdEndRendering::encode(MVKCommandEncoder* cmdEncoder) { VkResult MVKCmdSetSampleLocations::setContent(MVKCommandBuffer* cmdBuff, const VkSampleLocationsInfoEXT* pSampleLocationsInfo) { - + _sampleLocations.clear(); for (uint32_t slIdx = 0; slIdx < pSampleLocationsInfo->sampleLocationsCount; slIdx++) { - auto& sl = pSampleLocationsInfo->pSampleLocations[slIdx]; - _samplePositions.push_back(MTLSamplePositionMake(sl.x, sl.y)); + _sampleLocations.push_back(pSampleLocationsInfo->pSampleLocations[slIdx]); } - return VK_SUCCESS; } void MVKCmdSetSampleLocations::encode(MVKCommandEncoder* cmdEncoder) { - cmdEncoder->setDynamicSamplePositions(_samplePositions.contents()); + cmdEncoder->_renderingState.setSampleLocations(_sampleLocations.contents(), true); } #pragma mark - -#pragma mark MVKCmdExecuteCommands - -template -VkResult MVKCmdExecuteCommands::setContent(MVKCommandBuffer* cmdBuff, - uint32_t commandBuffersCount, - const VkCommandBuffer* pCommandBuffers) { - // Add clear values - _secondaryCommandBuffers.clear(); // Clear for reuse - _secondaryCommandBuffers.reserve(commandBuffersCount); - for (uint32_t cbIdx = 0; cbIdx < commandBuffersCount; cbIdx++) { - _secondaryCommandBuffers.push_back(MVKCommandBuffer::getMVKCommandBuffer(pCommandBuffers[cbIdx])); - } - cmdBuff->recordExecuteCommands(_secondaryCommandBuffers.contents()); +#pragma mark MVKCmdSetSampleLocationsEnable +VkResult MVKCmdSetSampleLocationsEnable::setContent(MVKCommandBuffer* cmdBuff, + VkBool32 sampleLocationsEnable) { + _sampleLocationsEnable = sampleLocationsEnable; return VK_SUCCESS; } -template -void MVKCmdExecuteCommands::encode(MVKCommandEncoder* cmdEncoder) { - for (auto& cb : _secondaryCommandBuffers) { cmdEncoder->encodeSecondary(cb); } +void MVKCmdSetSampleLocationsEnable::encode(MVKCommandEncoder* cmdEncoder) { + cmdEncoder->_renderingState.setSampleLocationsEnable(_sampleLocationsEnable, true); } -template class MVKCmdExecuteCommands<1>; -template class MVKCmdExecuteCommands<16>; - #pragma mark - #pragma mark MVKCmdSetViewport @@ -267,7 +219,7 @@ VkResult MVKCmdSetViewport::setContent(MVKCommandBuffer* cmdBuff, uint32_t viewportCount, const VkViewport* pViewports) { _firstViewport = firstViewport; - _viewports.clear(); // Clear for reuse + _viewports.clear(); _viewports.reserve(viewportCount); for (uint32_t vpIdx = 0; vpIdx < viewportCount; vpIdx++) { _viewports.push_back(pViewports[vpIdx]); @@ -278,7 +230,7 @@ VkResult MVKCmdSetViewport::setContent(MVKCommandBuffer* cmdBuff, template void MVKCmdSetViewport::encode(MVKCommandEncoder* cmdEncoder) { - cmdEncoder->_viewportState.setViewports(_viewports.contents(), _firstViewport, true); + cmdEncoder->_renderingState.setViewports(_viewports.contents(), _firstViewport, true); } template class MVKCmdSetViewport<1>; @@ -294,7 +246,7 @@ VkResult MVKCmdSetScissor::setContent(MVKCommandBuffer* cmdBuff, uint32_t scissorCount, const VkRect2D* pScissors) { _firstScissor = firstScissor; - _scissors.clear(); // Clear for reuse + _scissors.clear(); _scissors.reserve(scissorCount); for (uint32_t sIdx = 0; sIdx < scissorCount; sIdx++) { _scissors.push_back(pScissors[sIdx]); @@ -305,31 +257,13 @@ VkResult MVKCmdSetScissor::setContent(MVKCommandBuffer* cmdBuff, template void MVKCmdSetScissor::encode(MVKCommandEncoder* cmdEncoder) { - cmdEncoder->_scissorState.setScissors(_scissors.contents(), _firstScissor, true); + cmdEncoder->_renderingState.setScissors(_scissors.contents(), _firstScissor, true); } template class MVKCmdSetScissor<1>; template class MVKCmdSetScissor; -#pragma mark - -#pragma mark MVKCmdSetLineWidth - -VkResult MVKCmdSetLineWidth::setContent(MVKCommandBuffer* cmdBuff, - float lineWidth) { - _lineWidth = lineWidth; - - // Validate - if (_lineWidth != 1.0 || cmdBuff->getDevice()->_enabledFeatures.wideLines) { - return cmdBuff->reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdSetLineWidth(): The current device does not support wide lines."); - } - - return VK_SUCCESS; -} - -void MVKCmdSetLineWidth::encode(MVKCommandEncoder* cmdEncoder) {} - - #pragma mark - #pragma mark MVKCmdSetDepthBias @@ -345,9 +279,23 @@ VkResult MVKCmdSetDepthBias::setContent(MVKCommandBuffer* cmdBuff, } void MVKCmdSetDepthBias::encode(MVKCommandEncoder* cmdEncoder) { - cmdEncoder->_depthBiasState.setDepthBias(_depthBiasConstantFactor, - _depthBiasSlopeFactor, - _depthBiasClamp); + cmdEncoder->_renderingState.setDepthBias(_depthBiasConstantFactor, + _depthBiasSlopeFactor, + _depthBiasClamp); +} + + +#pragma mark - +#pragma mark MVKCmdSetDepthBiasEnable + +VkResult MVKCmdSetDepthBiasEnable::setContent(MVKCommandBuffer* cmdBuff, + VkBool32 depthBiasEnable) { + _depthBiasEnable = depthBiasEnable; + return VK_SUCCESS; +} + +void MVKCmdSetDepthBiasEnable::encode(MVKCommandEncoder* cmdEncoder) { + cmdEncoder->_renderingState.setDepthBiasEnable(_depthBiasEnable); } @@ -356,37 +304,105 @@ void MVKCmdSetDepthBias::encode(MVKCommandEncoder* cmdEncoder) { VkResult MVKCmdSetBlendConstants::setContent(MVKCommandBuffer* cmdBuff, const float blendConst[4]) { - _red = blendConst[0]; - _green = blendConst[1]; - _blue = blendConst[2]; - _alpha = blendConst[3]; - + mvkCopy(_blendConstants, blendConst, 4); return VK_SUCCESS; } void MVKCmdSetBlendConstants::encode(MVKCommandEncoder* cmdEncoder) { - cmdEncoder->_blendColorState.setBlendColor(_red, _green, _blue, _alpha, true); + cmdEncoder->_renderingState.setBlendConstants(_blendConstants, true); } #pragma mark - -#pragma mark MVKCmdSetDepthBounds - -VkResult MVKCmdSetDepthBounds::setContent(MVKCommandBuffer* cmdBuff, - float minDepthBounds, - float maxDepthBounds) { - _minDepthBounds = minDepthBounds; - _maxDepthBounds = maxDepthBounds; - - // Validate - if (cmdBuff->getDevice()->_enabledFeatures.depthBounds) { - return cmdBuff->reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdSetDepthBounds(): The current device does not support setting depth bounds."); - } +#pragma mark MVKCmdSetDepthTestEnable +VkResult MVKCmdSetDepthTestEnable::setContent(MVKCommandBuffer* cmdBuff, + VkBool32 depthTestEnable) { + _depthTestEnable = depthTestEnable; return VK_SUCCESS; } -void MVKCmdSetDepthBounds::encode(MVKCommandEncoder* cmdEncoder) {} +void MVKCmdSetDepthTestEnable::encode(MVKCommandEncoder* cmdEncoder) { + cmdEncoder->_depthStencilState.setDepthTestEnable(_depthTestEnable); +} + + +#pragma mark - +#pragma mark MVKCmdSetDepthWriteEnable + +VkResult MVKCmdSetDepthWriteEnable::setContent(MVKCommandBuffer* cmdBuff, + VkBool32 depthWriteEnable) { + _depthWriteEnable = depthWriteEnable; + return VK_SUCCESS; +} + +void MVKCmdSetDepthWriteEnable::encode(MVKCommandEncoder* cmdEncoder) { + cmdEncoder->_depthStencilState.setDepthWriteEnable(_depthWriteEnable); +} + + +#pragma mark - +#pragma mark MVKCmdSetDepthClipEnable + +VkResult MVKCmdSetDepthClipEnable::setContent(MVKCommandBuffer* cmdBuff, + VkBool32 depthClipEnable) { + _depthClipEnable = depthClipEnable; + return VK_SUCCESS; +} + +void MVKCmdSetDepthClipEnable::encode(MVKCommandEncoder* cmdEncoder) { + cmdEncoder->_renderingState.setDepthClipEnable(_depthClipEnable, true); +} + + +#pragma mark - +#pragma mark MVKCmdSetDepthCompareOp + +VkResult MVKCmdSetDepthCompareOp::setContent(MVKCommandBuffer* cmdBuff, + VkCompareOp depthCompareOp) { + _depthCompareOp = depthCompareOp; + return VK_SUCCESS; +} + +void MVKCmdSetDepthCompareOp::encode(MVKCommandEncoder* cmdEncoder) { + cmdEncoder->_depthStencilState.setDepthCompareOp(_depthCompareOp); +} + + +#pragma mark - +#pragma mark MVKCmdSetStencilTestEnable + +VkResult MVKCmdSetStencilTestEnable::setContent(MVKCommandBuffer* cmdBuff, + VkBool32 stencilTestEnable) { + _stencilTestEnable = stencilTestEnable; + return VK_SUCCESS; +} + +void MVKCmdSetStencilTestEnable::encode(MVKCommandEncoder* cmdEncoder) { + cmdEncoder->_depthStencilState.setStencilTestEnable(_stencilTestEnable); +} + + +#pragma mark - +#pragma mark MVKCmdSetStencilOp + +VkResult MVKCmdSetStencilOp::setContent(MVKCommandBuffer* cmdBuff, + VkStencilFaceFlags faceMask, + VkStencilOp failOp, + VkStencilOp passOp, + VkStencilOp depthFailOp, + VkCompareOp compareOp) { + _faceMask = faceMask; + _failOp = failOp; + _passOp = passOp; + _depthFailOp = depthFailOp; + _compareOp = compareOp; + return VK_SUCCESS; +} + +void MVKCmdSetStencilOp::encode(MVKCommandEncoder* cmdEncoder) { + cmdEncoder->_depthStencilState.setStencilOp(_faceMask, _failOp, _passOp, _depthFailOp, _compareOp); +} #pragma mark - @@ -436,6 +452,103 @@ VkResult MVKCmdSetStencilReference::setContent(MVKCommandBuffer* cmdBuff, } void MVKCmdSetStencilReference::encode(MVKCommandEncoder* cmdEncoder) { - cmdEncoder->_stencilReferenceValueState.setReferenceValues(_faceMask, _stencilReference); + cmdEncoder->_renderingState.setStencilReferenceValues(_faceMask, _stencilReference); } + +#pragma mark - +#pragma mark MVKCmdSetCullMode + +VkResult MVKCmdSetCullMode::setContent(MVKCommandBuffer* cmdBuff, + VkCullModeFlags cullMode) { + _cullMode = cullMode; + return VK_SUCCESS; +} + +void MVKCmdSetCullMode::encode(MVKCommandEncoder* cmdEncoder) { + cmdEncoder->_renderingState.setCullMode(_cullMode, true); +} + + +#pragma mark - +#pragma mark MVKCmdSetFrontFace + +VkResult MVKCmdSetFrontFace::setContent(MVKCommandBuffer* cmdBuff, + VkFrontFace frontFace) { + _frontFace = frontFace; + return VK_SUCCESS; +} + +void MVKCmdSetFrontFace::encode(MVKCommandEncoder* cmdEncoder) { + cmdEncoder->_renderingState.setFrontFace(_frontFace, true); +} + + +#pragma mark - +#pragma mark MVKCmdSetPatchControlPoints + +VkResult MVKCmdSetPatchControlPoints::setContent(MVKCommandBuffer* cmdBuff, + uint32_t patchControlPoints) { + _patchControlPoints = patchControlPoints; + return VK_SUCCESS; +} + +void MVKCmdSetPatchControlPoints::encode(MVKCommandEncoder* cmdEncoder) { + cmdEncoder->_renderingState.setPatchControlPoints(_patchControlPoints, true); +} + + +#pragma mark - +#pragma mark MVKCmdSetPolygonMode + +VkResult MVKCmdSetPolygonMode::setContent(MVKCommandBuffer* cmdBuff, + VkPolygonMode polygonMode) { + _polygonMode = polygonMode; + return VK_SUCCESS; +} + +void MVKCmdSetPolygonMode::encode(MVKCommandEncoder* cmdEncoder) { + cmdEncoder->_renderingState.setPolygonMode(_polygonMode, true); +} + + +#pragma mark - +#pragma mark MVKCmdSetPrimitiveTopology + +VkResult MVKCmdSetPrimitiveTopology::setContent(MVKCommandBuffer* cmdBuff, + VkPrimitiveTopology primitiveTopology) { + _primitiveTopology = primitiveTopology; + return VK_SUCCESS; +} + +void MVKCmdSetPrimitiveTopology::encode(MVKCommandEncoder* cmdEncoder) { + cmdEncoder->_renderingState.setPrimitiveTopology(_primitiveTopology, true); +} + + +#pragma mark - +#pragma mark MVKCmdSetPrimitiveRestartEnable + +VkResult MVKCmdSetPrimitiveRestartEnable::setContent(MVKCommandBuffer* cmdBuff, + VkBool32 primitiveRestartEnable) { + _primitiveRestartEnable = primitiveRestartEnable; + return VK_SUCCESS; +} + +void MVKCmdSetPrimitiveRestartEnable::encode(MVKCommandEncoder* cmdEncoder) { + cmdEncoder->_renderingState.setPrimitiveRestartEnable(_primitiveRestartEnable, true); +} + + +#pragma mark - +#pragma mark MVKCmdSetRasterizerDiscardEnable + +VkResult MVKCmdSetRasterizerDiscardEnable::setContent(MVKCommandBuffer* cmdBuff, + VkBool32 rasterizerDiscardEnable) { + _rasterizerDiscardEnable = rasterizerDiscardEnable; + return VK_SUCCESS; +} + +void MVKCmdSetRasterizerDiscardEnable::encode(MVKCommandEncoder* cmdEncoder) { + cmdEncoder->_renderingState.setRasterizerDiscardEnable(_rasterizerDiscardEnable, true); +} diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm b/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm index 2c0ef546..52dcb78f 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm @@ -504,11 +504,12 @@ void MVKCmdBlitImage::encode(MVKCommandEncoder* cmdEncoder, MVKCommandUse com if (cmdEncoder->getDevice()->_pMetalFeatures->nativeTextureSwizzle && _srcImage->needsSwizzle()) { // Use a view that has a swizzle on it. - srcMTLTex = [[srcMTLTex newTextureViewWithPixelFormat:srcMTLTex.pixelFormat - textureType:srcMTLTex.textureType - levels:NSMakeRange(0, srcMTLTex.mipmapLevelCount) - slices:NSMakeRange(0, srcMTLTex.arrayLength) - swizzle:_srcImage->getPixelFormats()->getMTLTextureSwizzleChannels(_srcImage->getVkFormat())] autorelease]; + srcMTLTex = [srcMTLTex newTextureViewWithPixelFormat:srcMTLTex.pixelFormat + textureType:srcMTLTex.textureType + levels:NSMakeRange(0, srcMTLTex.mipmapLevelCount) + slices:NSMakeRange(0, srcMTLTex.arrayLength) + swizzle:_srcImage->getPixelFormats()->getMTLTextureSwizzleChannels(_srcImage->getVkFormat())]; + [cmdEncoder->_mtlCmdBuffer addCompletedHandler: ^(id) { [srcMTLTex release]; }]; } cmdEncoder->endCurrentMetalEncoding(); @@ -551,9 +552,7 @@ void MVKCmdBlitImage::encode(MVKCommandEncoder* cmdEncoder, MVKCommandUse com textureType: MTLTextureType2DArray levels: NSMakeRange(0, srcMTLTex.mipmapLevelCount) slices: NSMakeRange(0, srcMTLTex.arrayLength)]; - [cmdEncoder->_mtlCmdBuffer addCompletedHandler: ^(id) { - [srcMTLTex release]; - }]; + [cmdEncoder->_mtlCmdBuffer addCompletedHandler: ^(id) { [srcMTLTex release]; }]; } blitKey.dstMTLPixelFormat = _dstImage->getMTLPixelFormat(dstPlaneIndex); blitKey.srcFilter = mvkMTLSamplerMinMagFilterFromVkFilter(_filter); @@ -655,9 +654,7 @@ void MVKCmdBlitImage::encode(MVKCommandEncoder* cmdEncoder, MVKCommandUse com #endif } id stencilMTLTex = [srcMTLTex newTextureViewWithPixelFormat: stencilFmt]; - [cmdEncoder->_mtlCmdBuffer addCompletedHandler: ^(id) { - [stencilMTLTex release]; - }]; + [cmdEncoder->_mtlCmdBuffer addCompletedHandler: ^(id) { [stencilMTLTex release]; }]; [mtlRendEnc setFragmentTexture: stencilMTLTex atIndex: 1]; } else { [mtlRendEnc setFragmentTexture: srcMTLTex atIndex: 1]; @@ -1509,11 +1506,10 @@ void MVKCmdClearAttachments::encode(MVKCommandEncoder* cmdEncoder) { // Return to the previous rendering state on the next render activity cmdEncoder->_graphicsPipelineState.markDirty(); + cmdEncoder->_graphicsResourcesState.markDirty(); cmdEncoder->_depthStencilState.markDirty(); - cmdEncoder->_stencilReferenceValueState.markDirty(); - cmdEncoder->_depthBiasState.markDirty(); - cmdEncoder->_viewportState.markDirty(); - cmdEncoder->_scissorState.markDirty(); + cmdEncoder->_renderingState.markDirty(); + cmdEncoder->_occlusionQueryState.markDirty(); } template diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h index 76274dad..92d02e77 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h +++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h @@ -143,7 +143,7 @@ public: bool _needsVisibilityResultMTLBuffer; /** Called when a MVKCmdExecuteCommands is added to this command buffer. */ - void recordExecuteCommands(const MVKArrayRef secondaryCommandBuffers); + void recordExecuteCommands(MVKArrayRef secondaryCommandBuffers); /** Called when a timestamp command is added. */ void recordTimestampCommand(); @@ -182,7 +182,6 @@ protected: friend class MVKCommandEncoder; friend class MVKCommandPool; - MVKBaseObject* getBaseObject() override { return this; }; void propagateDebugName() override {} void init(const VkCommandBufferAllocateInfo* pAllocateInfo); bool canExecute(); @@ -251,21 +250,23 @@ public: const VkRect2D& renderArea, MVKArrayRef clearValues, MVKArrayRef attachments, - MVKArrayRef> subpassSamplePositions, - MVKCommandUse cmdUse = kMVKCommandUseBeginRenderPass); + MVKCommandUse cmdUse); /** Begins the next render subpass. */ void beginNextSubpass(MVKCommand* subpassCmd, VkSubpassContents renderpassContents); - /** Sets the dynamic custom sample positions to use when rendering. */ - void setDynamicSamplePositions(MVKArrayRef dynamicSamplePositions); - /** Begins dynamic rendering. */ void beginRendering(MVKCommand* rendCmd, const VkRenderingInfo* pRenderingInfo); /** Begins a Metal render pass for the current render subpass. */ void beginMetalRenderPass(MVKCommandUse cmdUse); + /** + * If a Metal render pass has started, and it needs to be restarted, + * then end the existing Metal render pass, and start a new one. + */ + void restartMetalRenderPassIfNeeded(); + /** If a render encoder is active, encodes store actions for all attachments to it. */ void encodeStoreActions(bool storeOverride = false); @@ -301,14 +302,11 @@ public: /** Encodes an operation to signal an event to a status. */ void signalEvent(MVKEvent* mvkEvent, bool status); - /** - * If a pipeline is currently bound, returns whether the current pipeline permits dynamic - * setting of the specified state. If no pipeline is currently bound, returns true. - */ - bool supportsDynamicState(VkDynamicState state); + /** Clips the rect to ensure it fits inside the render area. */ + VkRect2D clipToRenderArea(VkRect2D rect); /** Clips the scissor to ensure it fits inside the render area. */ - VkRect2D clipToRenderArea(VkRect2D scissor); + MTLScissorRect clipToRenderArea(MTLScissorRect scissor); /** Called by each graphics draw command to establish any outstanding state just prior to performing the draw. */ void finalizeDrawState(MVKGraphicsStage stage); @@ -362,6 +360,9 @@ public: /** Returns the push constants associated with the specified shader stage. */ MVKPushConstantsCommandEncoderState* getPushConstants(VkShaderStageFlagBits shaderStage); + /** Encode the buffer binding as a vertex attribute buffer. */ + void encodeVertexAttributeBuffer(MVKMTLBufferBinding& b, bool isDynamicStride); + /** * Copy bytes into the Metal encoder at a Metal vertex buffer index, and optionally indicate * that this binding might override a desriptor binding. If so, the descriptor binding will @@ -436,37 +437,25 @@ public: id _mtlRenderEncoder; /** Tracks the current graphics pipeline bound to the encoder. */ - MVKPipelineCommandEncoderState _graphicsPipelineState; + MVKPipelineCommandEncoderState _graphicsPipelineState; + + /** Tracks the current graphics resources state of the encoder. */ + MVKGraphicsResourcesCommandEncoderState _graphicsResourcesState; /** Tracks the current compute pipeline bound to the encoder. */ - MVKPipelineCommandEncoderState _computePipelineState; + MVKPipelineCommandEncoderState _computePipelineState; - /** Tracks the current viewport state of the encoder. */ - MVKViewportCommandEncoderState _viewportState; - - /** Tracks the current scissor state of the encoder. */ - MVKScissorCommandEncoderState _scissorState; - - /** Tracks the current depth bias state of the encoder. */ - MVKDepthBiasCommandEncoderState _depthBiasState; - - /** Tracks the current blend color state of the encoder. */ - MVKBlendColorCommandEncoderState _blendColorState; + /** Tracks the current compute resources state of the encoder. */ + MVKComputeResourcesCommandEncoderState _computeResourcesState; /** Tracks the current depth stencil state of the encoder. */ MVKDepthStencilCommandEncoderState _depthStencilState; - /** Tracks the current stencil reference value state of the encoder. */ - MVKStencilReferenceValueCommandEncoderState _stencilReferenceValueState; + /** Tracks the current rendering states of the encoder. */ + MVKRenderingCommandEncoderState _renderingState; - /** Tracks the current graphics resources state of the encoder. */ - MVKGraphicsResourcesCommandEncoderState _graphicsResourcesState; - - /** Tracks the current compute resources state of the encoder. */ - MVKComputeResourcesCommandEncoderState _computeResourcesState; - - /** The type of primitive that will be rendered. */ - MTLPrimitiveType _mtlPrimitiveType; + /** Tracks the occlusion query state of the encoder. */ + MVKOcclusionQueryCommandEncoderState _occlusionQueryState; /** The size of the threadgroup for the compute shader. */ MTLSize _mtlThreadgroupSize; @@ -495,7 +484,6 @@ protected: void encodeGPUCounterSample(MVKGPUCounterQueryPool* mvkQryPool, uint32_t sampleIndex, MVKCounterSamplingFlags samplingPoints); void encodeTimestampStageCounterSamples(); id getStageCountersMTLFence(); - MVKArrayRef getCustomSamplePositions(); NSString* getMTLRenderCommandEncoderName(MVKCommandUse cmdUse); template void retainIfImmediatelyEncoding(T& mtlEnc); template void endMetalEncoding(T& mtlEnc); @@ -511,8 +499,6 @@ protected: MVKSmallVector _timestampStageCounterQueries; MVKSmallVector _clearValues; MVKSmallVector _attachments; - MVKSmallVector _dynamicSamplePositions; - MVKSmallVector> _subpassSamplePositions; id _mtlComputeEncoder; id _mtlBlitEncoder; id _stageCountersMTLFence; @@ -521,7 +507,6 @@ protected: MVKPushConstantsCommandEncoderState _tessEvalPushConstants; MVKPushConstantsCommandEncoderState _fragmentPushConstants; MVKPushConstantsCommandEncoderState _computePushConstants; - MVKOcclusionQueryCommandEncoderState _occlusionQueryState; MVKPrefillMetalCommandBuffersStyle _prefillStyle; VkSubpassContents _subpassContents; uint32_t _renderSubpassIndex; @@ -536,9 +521,6 @@ protected: #pragma mark - #pragma mark Support functions -/** Returns a name, suitable for use as a MTLCommandBuffer label, based on the MVKCommandUse. */ -NSString* mvkMTLCommandBufferLabel(MVKCommandUse cmdUse); - /** Returns a name, suitable for use as a MTLRenderCommandEncoder label, based on the MVKCommandUse. */ NSString* mvkMTLRenderCommandEncoderLabel(MVKCommandUse cmdUse); diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm index a0abfc71..7a3ac508 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm @@ -25,7 +25,7 @@ #include "MVKFoundation.h" #include "MTLRenderPassDescriptor+MoltenVK.h" #include "MVKCmdDraw.h" -#include "MVKCmdRenderPass.h" +#include "MVKCmdRendering.h" #include using namespace std; @@ -120,7 +120,7 @@ VkResult MVKCommandBuffer::begin(const VkCommandBufferBeginInfo* pBeginInfo) { if(_device->shouldPrefillMTLCommandBuffers() && !(_isSecondary || _supportsConcurrentExecution)) { @autoreleasepool { - _prefilledMTLCmdBuffer = [_commandPool->getMTLCommandBuffer(0) retain]; // retained + _prefilledMTLCmdBuffer = [_commandPool->getMTLCommandBuffer(kMVKCommandUseBeginCommandBuffer, 0) retain]; // retained auto prefillStyle = getMVKConfig().prefillMetalCommandBuffers; if (prefillStyle == MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS_STYLE_IMMEDIATE_ENCODING || prefillStyle == MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS_STYLE_IMMEDIATE_ENCODING_NO_AUTORELEASE ) { @@ -260,7 +260,7 @@ bool MVKCommandBuffer::canExecute() { } _wasExecuted = true; - return true; + return wasConfigurationSuccessful(); } // Return the number of bits set in the view mask, with a minimum value of 1. @@ -310,7 +310,7 @@ MVKCommandBuffer::~MVKCommandBuffer() { } // Promote the initial visibility buffer and indication of timestamp use from the secondary buffers. -void MVKCommandBuffer::recordExecuteCommands(const MVKArrayRef secondaryCommandBuffers) { +void MVKCommandBuffer::recordExecuteCommands(MVKArrayRef secondaryCommandBuffers) { for (MVKCommandBuffer* cmdBuff : secondaryCommandBuffers) { if (cmdBuff->_needsVisibilityResultMTLBuffer) { _needsVisibilityResultMTLBuffer = true; } if (cmdBuff->_hasStageCounterTimestampCommand) { _hasStageCounterTimestampCommand = true; } @@ -335,11 +335,19 @@ void MVKCommandBuffer::recordBindPipeline(MVKCmdBindPipeline* mvkBindPipeline) { #pragma mark - #pragma mark MVKCommandEncoder +// Activity performance tracking is put here to deliberately exclude when +// MVKConfiguration::prefillMetalCommandBuffers is set to immediate prefilling, +// because that would include app time between command submissions. void MVKCommandEncoder::encode(id mtlCmdBuff, MVKCommandEncodingContext* pEncodingContext) { + MVKDevice* mvkDev = getDevice(); + uint64_t startTime = mvkDev->getPerformanceTimestamp(); + beginEncoding(mtlCmdBuff, pEncodingContext); encodeCommands(_cmdBuffer->_head); endEncoding(); + + mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.queue.commandBufferEncoding, startTime); } void MVKCommandEncoder::beginEncoding(id mtlCmdBuff, MVKCommandEncodingContext* pEncodingContext) { @@ -434,7 +442,6 @@ void MVKCommandEncoder::beginRendering(MVKCommand* rendCmd, const VkRenderingInf pRenderingInfo->renderArea, MVKArrayRef(clearValues, attCnt), MVKArrayRef(imageViews, attCnt), - MVKArrayRef>(), kMVKCommandUseBeginRendering); // If we've just created new transient objects, once retained by this encoder, @@ -454,7 +461,6 @@ void MVKCommandEncoder::beginRenderpass(MVKCommand* passCmd, const VkRect2D& renderArea, MVKArrayRef clearValues, MVKArrayRef attachments, - MVKArrayRef> subpassSamplePositions, MVKCommandUse cmdUse) { _pEncodingContext->setRenderingContext(renderPass, framebuffer); _renderArea = renderArea; @@ -463,13 +469,6 @@ void MVKCommandEncoder::beginRenderpass(MVKCommand* passCmd, _clearValues.assign(clearValues.begin(), clearValues.end()); _attachments.assign(attachments.begin(), attachments.end()); - // Copy the sample positions array of arrays, one array of sample positions for each subpass index. - _subpassSamplePositions.resize(subpassSamplePositions.size); - for (uint32_t spSPIdx = 0; spSPIdx < subpassSamplePositions.size; spSPIdx++) { - _subpassSamplePositions[spSPIdx].assign(subpassSamplePositions[spSPIdx].begin(), - subpassSamplePositions[spSPIdx].end()); - } - setSubpass(passCmd, subpassContents, 0, cmdUse); } @@ -510,10 +509,6 @@ void MVKCommandEncoder::beginNextMultiviewPass() { beginMetalRenderPass(kMVKCommandUseNextSubpass); } -void MVKCommandEncoder::setDynamicSamplePositions(MVKArrayRef dynamicSamplePositions) { - _dynamicSamplePositions.assign(dynamicSamplePositions.begin(), dynamicSamplePositions.end()); -} - // Retain encoders when prefilling, because prefilling may span multiple autorelease pools. template void MVKCommandEncoder::retainIfImmediatelyEncoding(T& mtlEnc) { @@ -528,7 +523,6 @@ void MVKCommandEncoder::endMetalEncoding(T& mtlEnc) { mtlEnc = nil; } - // Creates _mtlRenderEncoder and marks cached render state as dirty so it will be set into the _mtlRenderEncoder. void MVKCommandEncoder::beginMetalRenderPass(MVKCommandUse cmdUse) { @@ -584,8 +578,8 @@ void MVKCommandEncoder::beginMetalRenderPass(MVKCommandUse cmdUse) { // If no custom sample positions are established, size will be zero, // and Metal will default to using default sample postions. if (_pDeviceMetalFeatures->programmableSamplePositions) { - auto cstmSampPosns = getCustomSamplePositions(); - [mtlRPDesc setSamplePositions: cstmSampPosns.data count: cstmSampPosns.size]; + auto sampPosns = _renderingState.getSamplePositions(); + [mtlRPDesc setSamplePositions: sampPosns.data() count: sampPosns.size()]; } _mtlRenderEncoder = [_mtlCmdBuffer renderCommandEncoderWithDescriptor: mtlRPDesc]; @@ -599,29 +593,22 @@ void MVKCommandEncoder::beginMetalRenderPass(MVKCommandUse cmdUse) { _graphicsPipelineState.beginMetalRenderPass(); _graphicsResourcesState.beginMetalRenderPass(); - _viewportState.beginMetalRenderPass(); - _scissorState.beginMetalRenderPass(); - _depthBiasState.beginMetalRenderPass(); - _blendColorState.beginMetalRenderPass(); + _depthStencilState.beginMetalRenderPass(); + _renderingState.beginMetalRenderPass(); _vertexPushConstants.beginMetalRenderPass(); _tessCtlPushConstants.beginMetalRenderPass(); _tessEvalPushConstants.beginMetalRenderPass(); _fragmentPushConstants.beginMetalRenderPass(); - _depthStencilState.beginMetalRenderPass(); - _stencilReferenceValueState.beginMetalRenderPass(); _occlusionQueryState.beginMetalRenderPass(); } -// If custom sample positions have been set, return them, otherwise return an empty array. -// For Metal, VkPhysicalDeviceSampleLocationsPropertiesEXT::variableSampleLocations is false. -// As such, Vulkan requires that sample positions must be established at the beginning of -// a renderpass, and that both pipeline and dynamic sample locations must be the same as those -// set for each subpass. Therefore, the only sample positions of use are those set for each -// subpass when the renderpass begins. The pipeline and dynamic sample positions are ignored. -MVKArrayRef MVKCommandEncoder::getCustomSamplePositions() { - return (_renderSubpassIndex < _subpassSamplePositions.size() - ? _subpassSamplePositions[_renderSubpassIndex].contents() - : MVKArrayRef()); +void MVKCommandEncoder::restartMetalRenderPassIfNeeded() { + if ( !_mtlRenderEncoder ) { return; } + + if (_renderingState.needsMetalRenderPassRestart()) { + encodeStoreActions(true); + beginMetalRenderPass(kMVKCommandUseRestartSubpass); + } } void MVKCommandEncoder::encodeStoreActions(bool storeOverride) { @@ -698,24 +685,23 @@ void MVKCommandEncoder::signalEvent(MVKEvent* mvkEvent, bool status) { mvkEvent->encodeSignal(_mtlCmdBuffer, status); } -bool MVKCommandEncoder::supportsDynamicState(VkDynamicState state) { - MVKGraphicsPipeline* gpl = (MVKGraphicsPipeline*)_graphicsPipelineState.getPipeline(); - return !gpl || gpl->supportsDynamicState(state); +VkRect2D MVKCommandEncoder::clipToRenderArea(VkRect2D rect) { + + uint32_t raLeft = max(_renderArea.offset.x, 0); + uint32_t raRight = raLeft + _renderArea.extent.width; + uint32_t raBottom = max(_renderArea.offset.y, 0); + uint32_t raTop = raBottom + _renderArea.extent.height; + + rect.offset.x = mvkClamp(rect.offset.x, raLeft, max(raRight - 1, raLeft)); + rect.offset.y = mvkClamp(rect.offset.y, raBottom, max(raTop - 1, raBottom)); + rect.extent.width = min(rect.extent.width, raRight - rect.offset.x); + rect.extent.height = min(rect.extent.height, raTop - rect.offset.y); + + return rect; } -VkRect2D MVKCommandEncoder::clipToRenderArea(VkRect2D scissor) { - - int32_t raLeft = _renderArea.offset.x; - int32_t raRight = raLeft + _renderArea.extent.width; - int32_t raBottom = _renderArea.offset.y; - int32_t raTop = raBottom + _renderArea.extent.height; - - scissor.offset.x = mvkClamp(scissor.offset.x, raLeft, max(raRight - 1, raLeft)); - scissor.offset.y = mvkClamp(scissor.offset.y, raBottom, max(raTop - 1, raBottom)); - scissor.extent.width = min(scissor.extent.width, raRight - scissor.offset.x); - scissor.extent.height = min(scissor.extent.height, raTop - scissor.offset.y); - - return scissor; +MTLScissorRect MVKCommandEncoder::clipToRenderArea(MTLScissorRect scissor) { + return mvkMTLScissorRectFromVkRect2D(clipToRenderArea(mvkVkRect2DFromMTLScissorRect(scissor))); } void MVKCommandEncoder::finalizeDrawState(MVKGraphicsStage stage) { @@ -725,16 +711,12 @@ void MVKCommandEncoder::finalizeDrawState(MVKGraphicsStage stage) { } _graphicsPipelineState.encode(stage); // Must do first..it sets others _graphicsResourcesState.encode(stage); // Before push constants, to allow them to override. - _viewportState.encode(stage); - _scissorState.encode(stage); - _depthBiasState.encode(stage); - _blendColorState.encode(stage); + _depthStencilState.encode(stage); + _renderingState.encode(stage); _vertexPushConstants.encode(stage); _tessCtlPushConstants.encode(stage); _tessEvalPushConstants.encode(stage); _fragmentPushConstants.encode(stage); - _depthStencilState.encode(stage); - _stencilReferenceValueState.encode(stage); _occlusionQueryState.encode(stage); } @@ -823,16 +805,12 @@ void MVKCommandEncoder::endMetalRenderEncoding() { _graphicsPipelineState.endMetalRenderPass(); _graphicsResourcesState.endMetalRenderPass(); - _viewportState.endMetalRenderPass(); - _scissorState.endMetalRenderPass(); - _depthBiasState.endMetalRenderPass(); - _blendColorState.endMetalRenderPass(); + _depthStencilState.endMetalRenderPass(); + _renderingState.endMetalRenderPass(); _vertexPushConstants.endMetalRenderPass(); _tessCtlPushConstants.endMetalRenderPass(); _tessEvalPushConstants.endMetalRenderPass(); _fragmentPushConstants.endMetalRenderPass(); - _depthStencilState.endMetalRenderPass(); - _stencilReferenceValueState.endMetalRenderPass(); _occlusionQueryState.endMetalRenderPass(); } @@ -924,6 +902,42 @@ void MVKCommandEncoder::setVertexBytes(id mtlEncoder, } } +void MVKCommandEncoder::encodeVertexAttributeBuffer(MVKMTLBufferBinding& b, bool isDynamicStride) { + if (_device->_pMetalFeatures->dynamicVertexStride) { +#if MVK_XCODE_15 + NSUInteger mtlStride = isDynamicStride ? b.stride : MTLAttributeStrideStatic; + if (b.isInline) { + [_mtlRenderEncoder setVertexBytes: b.mtlBytes + length: b.size + attributeStride: mtlStride + atIndex: b.index]; + } else if (b.justOffset) { + [_mtlRenderEncoder setVertexBufferOffset: b.offset + attributeStride: mtlStride + atIndex: b.index]; + } else { + [_mtlRenderEncoder setVertexBuffer: b.mtlBuffer + offset: b.offset + attributeStride: mtlStride + atIndex: b.index]; + } +#endif + } else { + if (b.isInline) { + [_mtlRenderEncoder setVertexBytes: b.mtlBytes + length: b.size + atIndex: b.index]; + } else if (b.justOffset) { + [_mtlRenderEncoder setVertexBufferOffset: b.offset + atIndex: b.index]; + } else { + [_mtlRenderEncoder setVertexBuffer: b.mtlBuffer + offset: b.offset + atIndex: b.index]; + } + } +} + void MVKCommandEncoder::setFragmentBytes(id mtlEncoder, const void* bytes, NSUInteger length, @@ -1123,39 +1137,35 @@ void MVKCommandEncoder::finishQueries() { MVKCommandEncoder::MVKCommandEncoder(MVKCommandBuffer* cmdBuffer, MVKPrefillMetalCommandBuffersStyle prefillStyle) : MVKBaseDeviceObject(cmdBuffer->getDevice()), - _cmdBuffer(cmdBuffer), - _graphicsPipelineState(this), - _computePipelineState(this), - _viewportState(this), - _scissorState(this), - _depthBiasState(this), - _blendColorState(this), - _depthStencilState(this), - _stencilReferenceValueState(this), - _graphicsResourcesState(this), - _computeResourcesState(this), - _vertexPushConstants(this, VK_SHADER_STAGE_VERTEX_BIT), - _tessCtlPushConstants(this, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT), - _tessEvalPushConstants(this, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT), - _fragmentPushConstants(this, VK_SHADER_STAGE_FRAGMENT_BIT), - _computePushConstants(this, VK_SHADER_STAGE_COMPUTE_BIT), - _occlusionQueryState(this), - _prefillStyle(prefillStyle){ + _cmdBuffer(cmdBuffer), + _graphicsPipelineState(this), + _graphicsResourcesState(this), + _computePipelineState(this), + _computeResourcesState(this), + _depthStencilState(this), + _renderingState(this), + _occlusionQueryState(this), + _vertexPushConstants(this, VK_SHADER_STAGE_VERTEX_BIT), + _tessCtlPushConstants(this, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT), + _tessEvalPushConstants(this, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT), + _fragmentPushConstants(this, VK_SHADER_STAGE_FRAGMENT_BIT), + _computePushConstants(this, VK_SHADER_STAGE_COMPUTE_BIT), + _prefillStyle(prefillStyle){ - _pDeviceFeatures = &_device->_enabledFeatures; - _pDeviceMetalFeatures = _device->_pMetalFeatures; - _pDeviceProperties = _device->_pProperties; - _pDeviceMemoryProperties = _device->_pMemoryProperties; - _pActivatedQueries = nullptr; - _mtlCmdBuffer = nil; - _mtlRenderEncoder = nil; - _mtlComputeEncoder = nil; - _mtlComputeEncoderUse = kMVKCommandUseNone; - _mtlBlitEncoder = nil; - _mtlBlitEncoderUse = kMVKCommandUseNone; - _pEncodingContext = nullptr; - _stageCountersMTLFence = nil; - _flushCount = 0; + _pDeviceFeatures = &_device->_enabledFeatures; + _pDeviceMetalFeatures = _device->_pMetalFeatures; + _pDeviceProperties = _device->_pProperties; + _pDeviceMemoryProperties = _device->_pMemoryProperties; + _pActivatedQueries = nullptr; + _mtlCmdBuffer = nil; + _mtlRenderEncoder = nil; + _mtlComputeEncoder = nil; + _mtlComputeEncoderUse = kMVKCommandUseNone; + _mtlBlitEncoder = nil; + _mtlBlitEncoderUse = kMVKCommandUseNone; + _pEncodingContext = nullptr; + _stageCountersMTLFence = nil; + _flushCount = 0; } MVKCommandEncoder::~MVKCommandEncoder() { @@ -1169,19 +1179,6 @@ MVKCommandEncoder::~MVKCommandEncoder() { #pragma mark - #pragma mark Support functions -NSString* mvkMTLCommandBufferLabel(MVKCommandUse cmdUse) { - switch (cmdUse) { - case kMVKCommandUseEndCommandBuffer: return @"vkEndCommandBuffer (Prefilled) CommandBuffer"; - case kMVKCommandUseQueueSubmit: return @"vkQueueSubmit CommandBuffer"; - case kMVKCommandUseQueuePresent: return @"vkQueuePresentKHR CommandBuffer"; - case kMVKCommandUseQueueWaitIdle: return @"vkQueueWaitIdle CommandBuffer"; - case kMVKCommandUseDeviceWaitIdle: return @"vkDeviceWaitIdle CommandBuffer"; - case kMVKCommandUseAcquireNextImage: return @"vkAcquireNextImageKHR CommandBuffer"; - case kMVKCommandUseInvalidateMappedMemoryRanges: return @"vkInvalidateMappedMemoryRanges CommandBuffer"; - default: return @"Unknown Use CommandBuffer"; - } -} - NSString* mvkMTLRenderCommandEncoderLabel(MVKCommandUse cmdUse) { switch (cmdUse) { case kMVKCommandUseBeginRendering: return @"vkCmdBeginRendering RenderEncoder"; diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h index 06152dd7..4ac895d4 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h +++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h @@ -21,6 +21,7 @@ #include "MVKMTLResourceBindings.h" #include "MVKCommandResourceFactory.h" #include "MVKDevice.h" +#include "MVKPipeline.h" #include "MVKDescriptor.h" #include "MVKSmallVector.h" #include "MVKBitArray.h" @@ -81,7 +82,7 @@ public: /** * If the content of this instance is dirty, marks this instance as no longer dirty * and calls the encodeImpl() function to encode the content onto the Metal encoder. - * Marking dirty is done in advance so that subclass encodeImpl() implementations + * Marking clean is done in advance so that subclass encodeImpl() implementations * can override to leave this instance in a dirty state. * Subclasses must override the encodeImpl() function to do the actual work. */ @@ -96,8 +97,21 @@ public: MVKCommandEncoderState(MVKCommandEncoder* cmdEncoder) : _cmdEncoder(cmdEncoder) {} protected: - virtual void encodeImpl(uint32_t stage) = 0; + enum StateScope { + Static = 0, + Dynamic, + Count + }; + + virtual void encodeImpl(uint32_t stage) = 0; MVKDevice* getDevice(); + bool isDynamicState(MVKRenderStateType state); + template T& getContent(T* iVarAry, bool isDynamic) { + return iVarAry[isDynamic ? StateScope::Dynamic : StateScope::Static]; + } + template T& getContent(T* iVarAry, MVKRenderStateType state) { + return getContent(iVarAry, isDynamicState(state)); + } MVKCommandEncoder* _cmdEncoder; bool _isDirty = false; @@ -108,20 +122,17 @@ protected: #pragma mark - #pragma mark MVKPipelineCommandEncoderState -/** Holds encoder state established by pipeline commands. */ +/** Abstract class to hold encoder state established by pipeline commands. */ class MVKPipelineCommandEncoderState : public MVKCommandEncoderState { public: + void bindPipeline(MVKPipeline* pipeline); - /** Binds the pipeline. */ - void bindPipeline(MVKPipeline* pipeline); - - /** Returns the currently bound pipeline. */ MVKPipeline* getPipeline(); + MVKGraphicsPipeline* getGraphicsPipeline() { return (MVKGraphicsPipeline*)getPipeline(); } + MVKComputePipeline* getComputePipeline() { return (MVKComputePipeline*)getPipeline(); } - /** Constructs this instance for the specified command encoder. */ - MVKPipelineCommandEncoderState(MVKCommandEncoder* cmdEncoder) - : MVKCommandEncoderState(cmdEncoder) {} + MVKPipelineCommandEncoderState(MVKCommandEncoder* cmdEncoder) : MVKCommandEncoderState(cmdEncoder) {} protected: void encodeImpl(uint32_t stage) override; @@ -130,62 +141,6 @@ protected: }; -#pragma mark - -#pragma mark MVKViewportCommandEncoderState - -/** Holds encoder state established by viewport commands. */ -class MVKViewportCommandEncoderState : public MVKCommandEncoderState { - -public: - - /** - * Sets one or more of the viewports, starting at the first index. - * The isSettingDynamically indicates that the scissor is being changed dynamically, - * which is only allowed if the pipeline was created as VK_DYNAMIC_STATE_SCISSOR. - */ - void setViewports(const MVKArrayRef viewports, - uint32_t firstViewport, - bool isSettingDynamically); - - /** Constructs this instance for the specified command encoder. */ - MVKViewportCommandEncoderState(MVKCommandEncoder* cmdEncoder) - : MVKCommandEncoderState(cmdEncoder) {} - -protected: - void encodeImpl(uint32_t stage) override; - - MVKSmallVector _viewports, _dynamicViewports; -}; - - -#pragma mark - -#pragma mark MVKScissorCommandEncoderState - -/** Holds encoder state established by viewport commands. */ -class MVKScissorCommandEncoderState : public MVKCommandEncoderState { - -public: - - /** - * Sets one or more of the scissors, starting at the first index. - * The isSettingDynamically indicates that the scissor is being changed dynamically, - * which is only allowed if the pipeline was created as VK_DYNAMIC_STATE_SCISSOR. - */ - void setScissors(const MVKArrayRef scissors, - uint32_t firstScissor, - bool isSettingDynamically); - - /** Constructs this instance for the specified command encoder. */ - MVKScissorCommandEncoderState(MVKCommandEncoder* cmdEncoder) - : MVKCommandEncoderState(cmdEncoder) {} - -protected: - void encodeImpl(uint32_t stage) override; - - MVKSmallVector _scissors, _dynamicScissors; -}; - - #pragma mark - #pragma mark MVKPushConstantsCommandEncoderState @@ -226,16 +181,29 @@ public: /** Sets the depth stencil state during pipeline binding. */ void setDepthStencilState(const VkPipelineDepthStencilStateCreateInfo& vkDepthStencilInfo); - /** - * Sets the stencil compare mask value of the indicated faces - * to the specified value, from explicit dynamic command. - */ + /** Enables or disables depth testing, from explicit dynamic command. */ + void setDepthTestEnable(VkBool32 depthTestEnable); + + /** Enables or disables depth writing, from explicit dynamic command. */ + void setDepthWriteEnable(VkBool32 depthWriteEnable); + + /** Sets the depth compare operation, from explicit dynamic command. */ + void setDepthCompareOp(VkCompareOp depthCompareOp); + + /** Enables or disables stencil testing, from explicit dynamic command. */ + void setStencilTestEnable(VkBool32 stencilTestEnable); + + /** Sets the stencil operations of the indicated faces from explicit dynamic command. */ + void setStencilOp(VkStencilFaceFlags faceMask, + VkStencilOp failOp, + VkStencilOp passOp, + VkStencilOp depthFailOp, + VkCompareOp compareOp); + + /** Sets the stencil compare mask value of the indicated faces from explicit dynamic command. */ void setStencilCompareMask(VkStencilFaceFlags faceMask, uint32_t stencilCompareMask); - /** - * Sets the stencil write mask value of the indicated faces - * to the specified value, from explicit dynamic command. - */ + /** Sets the stencil write mask value of the indicated faces from explicit dynamic command. */ void setStencilWriteMask(VkStencilFaceFlags faceMask, uint32_t stencilWriteMask); void beginMetalRenderPass() override; @@ -246,96 +214,124 @@ public: protected: void encodeImpl(uint32_t stage) override; - void setStencilState(MVKMTLStencilDescriptorData& stencilInfo, - const VkStencilOpState& vkStencil, - bool enabled); + MVKMTLDepthStencilDescriptorData& getData(MVKRenderStateType state) { return getContent(_depthStencilData, state); } + template void setContent(T& content, T value) { + if (content != value) { + content = value; + markDirty(); + } + } + void setStencilState(MVKMTLStencilDescriptorData& sData, const VkStencilOpState& vkStencil); + void setStencilOp(MVKMTLStencilDescriptorData& sData, VkStencilOp failOp, + VkStencilOp passOp, VkStencilOp depthFailOp, VkCompareOp compareOp); - MVKMTLDepthStencilDescriptorData _depthStencilData = kMVKMTLDepthStencilDescriptorDataDefault; + MVKMTLDepthStencilDescriptorData _depthStencilData[StateScope::Count]; + bool _depthTestEnabled[StateScope::Count] = {}; bool _hasDepthAttachment = false; bool _hasStencilAttachment = false; }; #pragma mark - -#pragma mark MVKStencilReferenceValueCommandEncoderState +#pragma mark MVKRenderingCommandEncoderState -/** Holds encoder state established by stencil reference values commands. */ -class MVKStencilReferenceValueCommandEncoderState : public MVKCommandEncoderState { - -public: - - /** Sets the stencil references during pipeline binding. */ - void setReferenceValues(const VkPipelineDepthStencilStateCreateInfo& vkDepthStencilInfo); - - /** Sets the stencil state from explicit dynamic command. */ - void setReferenceValues(VkStencilFaceFlags faceMask, uint32_t stencilReference); - - /** Constructs this instance for the specified command encoder. */ - MVKStencilReferenceValueCommandEncoderState(MVKCommandEncoder* cmdEncoder) - : MVKCommandEncoderState(cmdEncoder) {} - -protected: - void encodeImpl(uint32_t stage) override; - - uint32_t _frontFaceValue = 0; - uint32_t _backFaceValue = 0; +struct MVKDepthBias { + float depthBiasConstantFactor; + float depthBiasSlopeFactor; + float depthBiasClamp; }; - -#pragma mark - -#pragma mark MVKDepthBiasCommandEncoderState - -/** Holds encoder state established by depth bias commands. */ -class MVKDepthBiasCommandEncoderState : public MVKCommandEncoderState { - -public: - - /** Sets the depth bias during pipeline binding. */ - void setDepthBias(const VkPipelineRasterizationStateCreateInfo& vkRasterInfo); - - /** Sets the depth bias dynamically. */ - void setDepthBias(float depthBiasConstantFactor, - float depthBiasSlopeFactor, - float depthBiasClamp); - - /** Constructs this instance for the specified command encoder. */ - MVKDepthBiasCommandEncoderState(MVKCommandEncoder* cmdEncoder) - : MVKCommandEncoderState(cmdEncoder) {} - -protected: - void encodeImpl(uint32_t stage) override; - - float _depthBiasConstantFactor = 0; - float _depthBiasClamp = 0; - float _depthBiasSlopeFactor = 0; - bool _isEnabled = false; +struct MVKStencilReference { + uint32_t frontFaceValue; + uint32_t backFaceValue; }; +struct MVKMTLViewports { + MTLViewport viewports[kMVKMaxViewportScissorCount]; + uint32_t viewportCount; +}; -#pragma mark - -#pragma mark MVKBlendColorCommandEncoderState - -/** Holds encoder state established by blend color commands. */ -class MVKBlendColorCommandEncoderState : public MVKCommandEncoderState { +struct MVKMTLScissors { + MTLScissorRect scissors[kMVKMaxViewportScissorCount]; + uint32_t scissorCount; +}; +/** Holds encoder state established by various rendering state commands. */ +class MVKRenderingCommandEncoderState : public MVKCommandEncoderState { public: + void setCullMode(VkCullModeFlags cullMode, bool isDynamic); - /** Sets the blend color, either as part of pipeline binding, or dynamically. */ - void setBlendColor(float red, float green, - float blue, float alpha, - bool isDynamic); + void setFrontFace(VkFrontFace frontFace, bool isDynamic); - /** Constructs this instance for the specified command encoder. */ - MVKBlendColorCommandEncoderState(MVKCommandEncoder* cmdEncoder) - : MVKCommandEncoderState(cmdEncoder) {} + void setPolygonMode(VkPolygonMode polygonMode, bool isDynamic); + + void setBlendConstants(float blendConstants[4], bool isDynamic); + + void setDepthBias(const VkPipelineRasterizationStateCreateInfo& vkRasterInfo); + void setDepthBias(float depthBiasConstantFactor, float depthBiasSlopeFactor, float depthBiasClamp); + void setDepthBiasEnable(VkBool32 depthBiasEnable); + void setDepthClipEnable(bool depthClip, bool isDynamic); + + void setStencilReferenceValues(const VkPipelineDepthStencilStateCreateInfo& vkDepthStencilInfo); + void setStencilReferenceValues(VkStencilFaceFlags faceMask, uint32_t stencilReference); + + void setViewports(const MVKArrayRef viewports, uint32_t firstViewport, bool isDynamic); + void setScissors(const MVKArrayRef scissors, uint32_t firstScissor, bool isDynamic); + + void setPrimitiveRestartEnable(VkBool32 primitiveRestartEnable, bool isDynamic); + + void setRasterizerDiscardEnable(VkBool32 rasterizerDiscardEnable, bool isDynamic); + + void setPrimitiveTopology(VkPrimitiveTopology topology, bool isDynamic); + MTLPrimitiveType getPrimitiveType(); + + void setPatchControlPoints(uint32_t patchControlPoints, bool isDynamic); + uint32_t getPatchControlPoints(); + + void setSampleLocationsEnable(VkBool32 sampleLocationsEnable, bool isDynamic); + void setSampleLocations(const MVKArrayRef sampleLocations, bool isDynamic); + MVKArrayRef getSamplePositions(); + + void beginMetalRenderPass() override; + bool needsMetalRenderPassRestart(); + + bool isDirty(MVKRenderStateType state); + void markDirty() override; + + MVKRenderingCommandEncoderState(MVKCommandEncoder* cmdEncoder) : MVKCommandEncoderState(cmdEncoder) {} protected: - void encodeImpl(uint32_t stage) override; + void encodeImpl(uint32_t stage) override; + bool isDrawingTriangles(); + template void setContent(T* iVarAry, T* pVal, MVKRenderStateType state, bool isDynamic) { + auto* pIVar = &iVarAry[isDynamic ? StateScope::Dynamic : StateScope::Static]; + if( !mvkAreEqual(pVal, pIVar) ) { + *pIVar = *pVal; + _dirtyStates.enable(state); + _modifiedStates.enable(state); + MVKCommandEncoderState::markDirty(); // Avoid local markDirty() as it marks all states dirty. + } + } - float _red = 0; - float _green = 0; - float _blue = 0; - float _alpha = 0; + MVKSmallVector _mtlSampleLocations[StateScope::Count] = {}; + MVKMTLViewports _mtlViewports[StateScope::Count] = {}; + MVKMTLScissors _mtlScissors[StateScope::Count] = {}; + MVKColor32 _mtlBlendConstants[StateScope::Count] = {}; + MVKDepthBias _mtlDepthBias[StateScope::Count] = {}; + MVKStencilReference _mtlStencilReference[StateScope::Count] = {}; + MTLCullMode _mtlCullMode[StateScope::Count] = { MTLCullModeNone, MTLCullModeNone }; + MTLWinding _mtlFrontFace[StateScope::Count] = { MTLWindingClockwise, MTLWindingClockwise }; + MTLPrimitiveType _mtlPrimitiveTopology[StateScope::Count] = { MTLPrimitiveTypePoint, MTLPrimitiveTypePoint }; + MTLDepthClipMode _mtlDepthClipEnable[StateScope::Count] = { MTLDepthClipModeClip, MTLDepthClipModeClip }; + MTLTriangleFillMode _mtlPolygonMode[StateScope::Count] = { MTLTriangleFillModeFill, MTLTriangleFillModeFill }; + uint32_t _mtlPatchControlPoints[StateScope::Count] = {}; + MVKRenderStateFlags _dirtyStates; + MVKRenderStateFlags _modifiedStates; + bool _mtlSampleLocationsEnable[StateScope::Count] = {}; + bool _mtlDepthBiasEnable[StateScope::Count] = {}; + bool _mtlPrimitiveRestartEnable[StateScope::Count] = {}; + bool _mtlRasterizerDiscardEnable[StateScope::Count] = {}; + bool _cullBothFaces[StateScope::Count] = {}; }; @@ -457,7 +453,7 @@ protected: contents[index] = value; } - void assertMissingSwizzles(bool needsSwizzle, const char* stageName, const MVKArrayRef texBindings); + void assertMissingSwizzles(bool needsSwizzle, const char* stageName, MVKArrayRef texBindings); void encodeMetalArgumentBuffer(MVKShaderStage stage); virtual void bindMetalArgumentBuffer(MVKShaderStage stage, MVKMTLBufferBinding& buffBind) = 0; @@ -547,7 +543,7 @@ public: const char* pStageName, bool fullImageViewSwizzle, std::function bindBuffer, - std::function)> bindImplicitBuffer, + std::function)> bindImplicitBuffer, std::function bindTexture, std::function bindSampler); diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm index 044dd96e..c7246a51 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm @@ -25,20 +25,31 @@ using namespace std; +#define shouldUpdateFace(face) mvkAreAllFlagsEnabled(faceMask, VK_STENCIL_FACE_##face##_BIT) + #pragma mark - #pragma mark MVKCommandEncoderState MVKVulkanAPIObject* MVKCommandEncoderState::getVulkanAPIObject() { return _cmdEncoder->getVulkanAPIObject(); }; + MVKDevice* MVKCommandEncoderState::getDevice() { return _cmdEncoder->getDevice(); } +bool MVKCommandEncoderState::isDynamicState(MVKRenderStateType state) { + auto* gpl = _cmdEncoder->_graphicsPipelineState.getGraphicsPipeline(); + return !gpl || gpl->isDynamicState(state); +} + #pragma mark - #pragma mark MVKPipelineCommandEncoderState void MVKPipelineCommandEncoderState::bindPipeline(MVKPipeline* pipeline) { - if (pipeline != _pipeline) markDirty(); - _pipeline = pipeline; + if (pipeline == _pipeline) { return; } + + _pipeline = pipeline; + _pipeline->wasBound(_cmdEncoder); + markDirty(); } MVKPipeline* MVKPipelineCommandEncoderState::getPipeline() { return _pipeline; } @@ -51,112 +62,6 @@ void MVKPipelineCommandEncoderState::encodeImpl(uint32_t stage) { } -#pragma mark - -#pragma mark MVKViewportCommandEncoderState - -void MVKViewportCommandEncoderState::setViewports(const MVKArrayRef viewports, - uint32_t firstViewport, - bool isSettingDynamically) { - - size_t vpCnt = viewports.size; - uint32_t maxViewports = getDevice()->_pProperties->limits.maxViewports; - if ((firstViewport + vpCnt > maxViewports) || - (firstViewport >= maxViewports) || - (isSettingDynamically && vpCnt == 0)) - return; - - auto& usingViewports = isSettingDynamically ? _dynamicViewports : _viewports; - - if (firstViewport + vpCnt > usingViewports.size()) { - usingViewports.resize(firstViewport + vpCnt); - } - - bool dirty; - bool mustSetDynamically = _cmdEncoder->supportsDynamicState(VK_DYNAMIC_STATE_VIEWPORT); - if (isSettingDynamically || (!mustSetDynamically && vpCnt > 0)) { - dirty = memcmp(&usingViewports[firstViewport], &viewports[0], vpCnt * sizeof(VkViewport)) != 0; - std::copy(viewports.begin(), viewports.end(), usingViewports.begin() + firstViewport); - } else { - dirty = !usingViewports.empty(); - usingViewports.clear(); - } - - if (dirty) markDirty(); -} - -void MVKViewportCommandEncoderState::encodeImpl(uint32_t stage) { - if (stage != kMVKGraphicsStageRasterization) { return; } - auto& usingViewports = _viewports.size() > 0 ? _viewports : _dynamicViewports; - if (usingViewports.empty()) { return; } - - if (_cmdEncoder->_pDeviceFeatures->multiViewport) { - size_t vpCnt = usingViewports.size(); - MTLViewport mtlViewports[vpCnt]; - for (uint32_t vpIdx = 0; vpIdx < vpCnt; vpIdx++) { - mtlViewports[vpIdx] = mvkMTLViewportFromVkViewport(usingViewports[vpIdx]); - } -#if MVK_MACOS_OR_IOS - [_cmdEncoder->_mtlRenderEncoder setViewports: mtlViewports count: vpCnt]; -#endif - } else { - [_cmdEncoder->_mtlRenderEncoder setViewport: mvkMTLViewportFromVkViewport(usingViewports[0])]; - } -} - - -#pragma mark - -#pragma mark MVKScissorCommandEncoderState - -void MVKScissorCommandEncoderState::setScissors(const MVKArrayRef scissors, - uint32_t firstScissor, - bool isSettingDynamically) { - - size_t sCnt = scissors.size; - uint32_t maxScissors = getDevice()->_pProperties->limits.maxViewports; - if ((firstScissor + sCnt > maxScissors) || - (firstScissor >= maxScissors) || - (isSettingDynamically && sCnt == 0)) - return; - - auto& usingScissors = isSettingDynamically ? _dynamicScissors : _scissors; - - if (firstScissor + sCnt > usingScissors.size()) { - usingScissors.resize(firstScissor + sCnt); - } - - bool dirty; - bool mustSetDynamically = _cmdEncoder->supportsDynamicState(VK_DYNAMIC_STATE_SCISSOR); - if (isSettingDynamically || (!mustSetDynamically && sCnt > 0)) { - dirty = memcmp(&usingScissors[firstScissor], &scissors[0], sCnt * sizeof(VkRect2D)) != 0; - std::copy(scissors.begin(), scissors.end(), usingScissors.begin() + firstScissor); - } else { - dirty = !usingScissors.empty(); - usingScissors.clear(); - } - - if (dirty) markDirty(); -} - -void MVKScissorCommandEncoderState::encodeImpl(uint32_t stage) { - if (stage != kMVKGraphicsStageRasterization) { return; } - auto& usingScissors = _scissors.size() > 0 ? _scissors : _dynamicScissors; - if (usingScissors.empty()) { return; } - - if (_cmdEncoder->_pDeviceFeatures->multiViewport) { - size_t sCnt = usingScissors.size(); - MTLScissorRect mtlScissors[sCnt]; - for (uint32_t sIdx = 0; sIdx < sCnt; sIdx++) { - mtlScissors[sIdx] = mvkMTLScissorRectFromVkRect2D(_cmdEncoder->clipToRenderArea(usingScissors[sIdx])); - } -#if MVK_MACOS_OR_IOS - [_cmdEncoder->_mtlRenderEncoder setScissorRects: mtlScissors count: sCnt]; -#endif - } else { - [_cmdEncoder->_mtlRenderEncoder setScissorRect: mvkMTLScissorRectFromVkRect2D(_cmdEncoder->clipToRenderArea(usingScissors[0]))]; - } -} - - #pragma mark - #pragma mark MVKPushConstantsCommandEncoderState @@ -165,7 +70,7 @@ void MVKPushConstantsCommandEncoderState:: setPushConstants(uint32_t offset, MVK // Typically any MSL struct that contains a float4 will also have a size that is rounded up to a multiple of a float4 size. // Ensure that we pass along enough content to cover this extra space even if it is never actually accessed by the shader. size_t pcSizeAlign = getDevice()->_pMetalFeatures->pushConstantSizeAlignment; - size_t pcSize = pushConstants.size; + size_t pcSize = pushConstants.size(); size_t pcBuffSize = mvkAlignByteCount(offset + pcSize, pcSizeAlign); mvkEnsureSize(_pushConstants, pcBuffSize); copy(pushConstants.begin(), pushConstants.end(), _pushConstants.begin() + offset); @@ -245,7 +150,7 @@ void MVKPushConstantsCommandEncoderState::encodeImpl(uint32_t stage) { } bool MVKPushConstantsCommandEncoderState::isTessellating() { - MVKGraphicsPipeline* gp = (MVKGraphicsPipeline*)_cmdEncoder->_graphicsPipelineState.getPipeline(); + auto* gp = _cmdEncoder->_graphicsPipelineState.getGraphicsPipeline(); return gp ? gp->isTessellationPipeline() : false; } @@ -254,74 +159,84 @@ bool MVKPushConstantsCommandEncoderState::isTessellating() { #pragma mark MVKDepthStencilCommandEncoderState void MVKDepthStencilCommandEncoderState:: setDepthStencilState(const VkPipelineDepthStencilStateCreateInfo& vkDepthStencilInfo) { - auto oldData = _depthStencilData; + auto& depthEnabled = _depthTestEnabled[StateScope::Static]; + auto oldDepthEnabled = depthEnabled; + depthEnabled = static_cast(vkDepthStencilInfo.depthTestEnable); - if (vkDepthStencilInfo.depthTestEnable) { - _depthStencilData.depthCompareFunction = mvkMTLCompareFunctionFromVkCompareOp(vkDepthStencilInfo.depthCompareOp); - _depthStencilData.depthWriteEnabled = vkDepthStencilInfo.depthWriteEnable; - } else { - _depthStencilData.depthCompareFunction = kMVKMTLDepthStencilDescriptorDataDefault.depthCompareFunction; - _depthStencilData.depthWriteEnabled = kMVKMTLDepthStencilDescriptorDataDefault.depthWriteEnabled; - } + auto& dsData = _depthStencilData[StateScope::Static]; + auto oldData = dsData; + dsData.depthCompareFunction = mvkMTLCompareFunctionFromVkCompareOp(vkDepthStencilInfo.depthCompareOp); + dsData.depthWriteEnabled = vkDepthStencilInfo.depthWriteEnable; - setStencilState(_depthStencilData.frontFaceStencilData, vkDepthStencilInfo.front, vkDepthStencilInfo.stencilTestEnable); - setStencilState(_depthStencilData.backFaceStencilData, vkDepthStencilInfo.back, vkDepthStencilInfo.stencilTestEnable); + dsData.stencilTestEnabled = static_cast(vkDepthStencilInfo.stencilTestEnable); + setStencilState(dsData.frontFaceStencilData, vkDepthStencilInfo.front); + setStencilState(dsData.backFaceStencilData, vkDepthStencilInfo.back); - if (!(oldData == _depthStencilData)) markDirty(); + if (depthEnabled != oldDepthEnabled || dsData != oldData) { markDirty(); } } -void MVKDepthStencilCommandEncoderState::setStencilState(MVKMTLStencilDescriptorData& stencilInfo, - const VkStencilOpState& vkStencil, - bool enabled) { - if ( !enabled ) { - stencilInfo = kMVKMTLStencilDescriptorDataDefault; - return; - } - - stencilInfo.enabled = true; - stencilInfo.stencilCompareFunction = mvkMTLCompareFunctionFromVkCompareOp(vkStencil.compareOp); - stencilInfo.stencilFailureOperation = mvkMTLStencilOperationFromVkStencilOp(vkStencil.failOp); - stencilInfo.depthFailureOperation = mvkMTLStencilOperationFromVkStencilOp(vkStencil.depthFailOp); - stencilInfo.depthStencilPassOperation = mvkMTLStencilOperationFromVkStencilOp(vkStencil.passOp); - - if ( !_cmdEncoder->supportsDynamicState(VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK) ) { - stencilInfo.readMask = vkStencil.compareMask; - } - if ( !_cmdEncoder->supportsDynamicState(VK_DYNAMIC_STATE_STENCIL_WRITE_MASK) ) { - stencilInfo.writeMask = vkStencil.writeMask; - } +void MVKDepthStencilCommandEncoderState::setStencilState(MVKMTLStencilDescriptorData& sData, + const VkStencilOpState& vkStencil) { + sData.readMask = vkStencil.compareMask; + sData.writeMask = vkStencil.writeMask; + sData.stencilCompareFunction = mvkMTLCompareFunctionFromVkCompareOp(vkStencil.compareOp); + sData.stencilFailureOperation = mvkMTLStencilOperationFromVkStencilOp(vkStencil.failOp); + sData.depthFailureOperation = mvkMTLStencilOperationFromVkStencilOp(vkStencil.depthFailOp); + sData.depthStencilPassOperation = mvkMTLStencilOperationFromVkStencilOp(vkStencil.passOp); +} + +void MVKDepthStencilCommandEncoderState::setDepthTestEnable(VkBool32 depthTestEnable) { + setContent(_depthTestEnabled[StateScope::Dynamic], static_cast(depthTestEnable)); +} + +void MVKDepthStencilCommandEncoderState::setDepthWriteEnable(VkBool32 depthWriteEnable) { + setContent(_depthStencilData[StateScope::Dynamic].depthWriteEnabled, static_cast(depthWriteEnable)); +} + +void MVKDepthStencilCommandEncoderState::setDepthCompareOp(VkCompareOp depthCompareOp) { + setContent(_depthStencilData[StateScope::Dynamic].depthCompareFunction, + (uint8_t)mvkMTLCompareFunctionFromVkCompareOp(depthCompareOp)); +} + +void MVKDepthStencilCommandEncoderState::setStencilTestEnable(VkBool32 stencilTestEnable) { + setContent(_depthStencilData[StateScope::Dynamic].stencilTestEnabled, static_cast(stencilTestEnable)); +} + +void MVKDepthStencilCommandEncoderState::setStencilOp(MVKMTLStencilDescriptorData& sData, + VkStencilOp failOp, + VkStencilOp passOp, + VkStencilOp depthFailOp, + VkCompareOp compareOp) { + auto oldData = sData; + sData.stencilCompareFunction = mvkMTLCompareFunctionFromVkCompareOp(compareOp); + sData.stencilFailureOperation = mvkMTLStencilOperationFromVkStencilOp(failOp); + sData.depthFailureOperation = mvkMTLStencilOperationFromVkStencilOp(depthFailOp); + sData.depthStencilPassOperation = mvkMTLStencilOperationFromVkStencilOp(passOp); + if (sData != oldData) { markDirty(); } +} + +void MVKDepthStencilCommandEncoderState::setStencilOp(VkStencilFaceFlags faceMask, + VkStencilOp failOp, + VkStencilOp passOp, + VkStencilOp depthFailOp, + VkCompareOp compareOp) { + auto& dsData = _depthStencilData[StateScope::Dynamic]; + if (shouldUpdateFace(FRONT)) { setStencilOp(dsData.frontFaceStencilData, failOp, passOp, depthFailOp, compareOp); } + if (shouldUpdateFace(BACK)) { setStencilOp(dsData.backFaceStencilData, failOp, passOp, depthFailOp, compareOp); } } -// We don't check for dynamic state here, because if this is called before pipeline is set, -// it may not be accurate, and if not dynamic, pipeline will override when it is encoded anyway. void MVKDepthStencilCommandEncoderState::setStencilCompareMask(VkStencilFaceFlags faceMask, - uint32_t stencilCompareMask) { - auto oldData = _depthStencilData; - - if (mvkAreAllFlagsEnabled(faceMask, VK_STENCIL_FACE_FRONT_BIT)) { - _depthStencilData.frontFaceStencilData.readMask = stencilCompareMask; - } - if (mvkAreAllFlagsEnabled(faceMask, VK_STENCIL_FACE_BACK_BIT)) { - _depthStencilData.backFaceStencilData.readMask = stencilCompareMask; - } - - if (!(oldData == _depthStencilData)) markDirty(); + uint32_t stencilCompareMask) { + auto& dsData = _depthStencilData[StateScope::Dynamic]; + if (shouldUpdateFace(FRONT)) { setContent(dsData.frontFaceStencilData.readMask, stencilCompareMask); } + if (shouldUpdateFace(BACK)) { setContent(dsData.backFaceStencilData.readMask, stencilCompareMask); } } -// We don't check for dynamic state here, because if this is called before pipeline is set, -// it may not be accurate, and if not dynamic, pipeline will override when it is encoded anyway. void MVKDepthStencilCommandEncoderState::setStencilWriteMask(VkStencilFaceFlags faceMask, - uint32_t stencilWriteMask) { - auto oldData = _depthStencilData; - - if (mvkAreAllFlagsEnabled(faceMask, VK_STENCIL_FACE_FRONT_BIT)) { - _depthStencilData.frontFaceStencilData.writeMask = stencilWriteMask; - } - if (mvkAreAllFlagsEnabled(faceMask, VK_STENCIL_FACE_BACK_BIT)) { - _depthStencilData.backFaceStencilData.writeMask = stencilWriteMask; - } - - if (!(oldData == _depthStencilData)) markDirty(); + uint32_t stencilWriteMask) { + auto& dsData = _depthStencilData[StateScope::Dynamic]; + if (shouldUpdateFace(FRONT)) { setContent(dsData.frontFaceStencilData.writeMask, stencilWriteMask); } + if (shouldUpdateFace(BACK)) { setContent(dsData.backFaceStencilData.writeMask, stencilWriteMask); } } void MVKDepthStencilCommandEncoderState::beginMetalRenderPass() { @@ -337,132 +252,349 @@ void MVKDepthStencilCommandEncoderState::beginMetalRenderPass() { if (_hasStencilAttachment != prevHasStencilAttachment) { markDirty(); } } +// Combine static and dynamic depth/stencil data void MVKDepthStencilCommandEncoderState::encodeImpl(uint32_t stage) { - auto cmdEncPool = _cmdEncoder->getCommandEncodingPool(); - switch (stage) { - case kMVKGraphicsStageRasterization: { - // If renderpass does not have a depth or a stencil attachment, disable corresponding test - MVKMTLDepthStencilDescriptorData adjustedDSData = _depthStencilData; - adjustedDSData.disable(!_hasDepthAttachment, !_hasStencilAttachment); - [_cmdEncoder->_mtlRenderEncoder setDepthStencilState: cmdEncPool->getMTLDepthStencilState(adjustedDSData)]; - break; - } - default: // Do nothing on other stages - break; + if (stage != kMVKGraphicsStageRasterization) { return; } + + MVKMTLDepthStencilDescriptorData dsData; + + if (_hasDepthAttachment && getContent(_depthTestEnabled, DepthTestEnable)) { + dsData.depthCompareFunction = getData(DepthCompareOp).depthCompareFunction; + dsData.depthWriteEnabled = getData(DepthWriteEnable).depthWriteEnabled; + } + + if (_hasStencilAttachment && getData(StencilTestEnable).stencilTestEnabled) { + dsData.stencilTestEnabled = true; + + auto& frontFace = dsData.frontFaceStencilData; + auto& backFace = dsData.backFaceStencilData; + + const auto& srcRM = getData(StencilCompareMask); + frontFace.readMask = srcRM.frontFaceStencilData.readMask; + backFace.readMask = srcRM.backFaceStencilData.readMask; + + const auto& srcWM = getData(StencilWriteMask); + frontFace.writeMask = srcWM.frontFaceStencilData.writeMask; + backFace.writeMask = srcWM.backFaceStencilData.writeMask; + + const auto& srcSOp = getData(StencilOp); + frontFace.stencilCompareFunction = srcSOp.frontFaceStencilData.stencilCompareFunction; + frontFace.stencilFailureOperation = srcSOp.frontFaceStencilData.stencilFailureOperation; + frontFace.depthFailureOperation = srcSOp.frontFaceStencilData.depthFailureOperation; + frontFace.depthStencilPassOperation = srcSOp.frontFaceStencilData.depthStencilPassOperation; + + backFace.stencilCompareFunction = srcSOp.backFaceStencilData.stencilCompareFunction; + backFace.stencilFailureOperation = srcSOp.backFaceStencilData.stencilFailureOperation; + backFace.depthFailureOperation = srcSOp.backFaceStencilData.depthFailureOperation; + backFace.depthStencilPassOperation = srcSOp.backFaceStencilData.depthStencilPassOperation; + } + + [_cmdEncoder->_mtlRenderEncoder setDepthStencilState: _cmdEncoder->getCommandEncodingPool()->getMTLDepthStencilState(dsData)]; +} + + +#pragma mark - +#pragma mark MVKRenderingCommandEncoderState + +#define getMTLContent(state) getContent(_mtl##state, state) +#define setMTLContent(state) setContent(_mtl##state, &mtl##state, state, isDynamic) + +void MVKRenderingCommandEncoderState::setCullMode(VkCullModeFlags cullMode, bool isDynamic) { + auto mtlCullMode = mvkMTLCullModeFromVkCullModeFlags(cullMode); + setMTLContent(CullMode); + _cullBothFaces[isDynamic ? StateScope::Dynamic : StateScope::Static] = (cullMode == VK_CULL_MODE_FRONT_AND_BACK); +} + +void MVKRenderingCommandEncoderState::setFrontFace(VkFrontFace frontFace, bool isDynamic) { + auto mtlFrontFace = mvkMTLWindingFromVkFrontFace(frontFace); + setMTLContent(FrontFace); +} + +void MVKRenderingCommandEncoderState::setPolygonMode(VkPolygonMode polygonMode, bool isDynamic) { + auto mtlPolygonMode = mvkMTLTriangleFillModeFromVkPolygonMode(polygonMode); + setMTLContent(PolygonMode); +} + +void MVKRenderingCommandEncoderState::setBlendConstants(float blendConstants[4], bool isDynamic) { + MVKColor32 mtlBlendConstants; + mvkCopy(mtlBlendConstants.float32, blendConstants, 4); + setMTLContent(BlendConstants); +} + +void MVKRenderingCommandEncoderState::setDepthBias(const VkPipelineRasterizationStateCreateInfo& vkRasterInfo) { + bool isDynamic = false; + + bool mtlDepthBiasEnable = static_cast(vkRasterInfo.depthBiasEnable); + setMTLContent(DepthBiasEnable); + + MVKDepthBias mtlDepthBias = { + .depthBiasConstantFactor = vkRasterInfo.depthBiasConstantFactor, + .depthBiasSlopeFactor = vkRasterInfo.depthBiasSlopeFactor, + .depthBiasClamp = vkRasterInfo.depthBiasClamp + }; + setMTLContent(DepthBias); +} + +void MVKRenderingCommandEncoderState::setDepthBias(float depthBiasConstantFactor, + float depthBiasSlopeFactor, + float depthBiasClamp) { + bool isDynamic = true; + MVKDepthBias mtlDepthBias = { + .depthBiasConstantFactor = depthBiasConstantFactor, + .depthBiasSlopeFactor = depthBiasSlopeFactor, + .depthBiasClamp = depthBiasClamp + }; + setMTLContent(DepthBias); +} + +void MVKRenderingCommandEncoderState::setDepthBiasEnable(VkBool32 depthBiasEnable) { + bool isDynamic = true; + bool mtlDepthBiasEnable = static_cast(depthBiasEnable); + setMTLContent(DepthBiasEnable); +} + +void MVKRenderingCommandEncoderState::setDepthClipEnable(bool depthClip, bool isDynamic) { + auto mtlDepthClipEnable = depthClip ? MTLDepthClipModeClip : MTLDepthClipModeClamp; + setMTLContent(DepthClipEnable); +} + +void MVKRenderingCommandEncoderState::setStencilReferenceValues(const VkPipelineDepthStencilStateCreateInfo& vkDepthStencilInfo) { + bool isDynamic = false; + MVKStencilReference mtlStencilReference = { + .frontFaceValue = vkDepthStencilInfo.front.reference, + .backFaceValue = vkDepthStencilInfo.back.reference + }; + setMTLContent(StencilReference); +} + +void MVKRenderingCommandEncoderState::setStencilReferenceValues(VkStencilFaceFlags faceMask, uint32_t stencilReference) { + bool isDynamic = true; + MVKStencilReference mtlStencilReference = _mtlStencilReference[StateScope::Dynamic]; + if (shouldUpdateFace(FRONT)) { mtlStencilReference.frontFaceValue = stencilReference; } + if (shouldUpdateFace(BACK)) { mtlStencilReference.backFaceValue = stencilReference; } + setMTLContent(StencilReference); +} + +void MVKRenderingCommandEncoderState::setViewports(const MVKArrayRef viewports, + uint32_t firstViewport, + bool isDynamic) { + uint32_t maxViewports = getDevice()->_pProperties->limits.maxViewports; + if (firstViewport >= maxViewports) { return; } + + MVKMTLViewports mtlViewports = isDynamic ? _mtlViewports[StateScope::Dynamic] : _mtlViewports[StateScope::Static]; + size_t vpCnt = min((uint32_t)viewports.size(), maxViewports - firstViewport); + for (uint32_t vpIdx = 0; vpIdx < vpCnt; vpIdx++) { + mtlViewports.viewports[firstViewport + vpIdx] = mvkMTLViewportFromVkViewport(viewports[vpIdx]); + mtlViewports.viewportCount = max(mtlViewports.viewportCount, vpIdx + 1); + } + setMTLContent(Viewports); +} + +void MVKRenderingCommandEncoderState::setScissors(const MVKArrayRef scissors, + uint32_t firstScissor, + bool isDynamic) { + uint32_t maxScissors = getDevice()->_pProperties->limits.maxViewports; + if (firstScissor >= maxScissors) { return; } + + MVKMTLScissors mtlScissors = isDynamic ? _mtlScissors[StateScope::Dynamic] : _mtlScissors[StateScope::Static]; + size_t sCnt = min((uint32_t)scissors.size(), maxScissors - firstScissor); + for (uint32_t sIdx = 0; sIdx < sCnt; sIdx++) { + mtlScissors.scissors[firstScissor + sIdx] = mvkMTLScissorRectFromVkRect2D(scissors[sIdx]); + mtlScissors.scissorCount = max(mtlScissors.scissorCount, sIdx + 1); + } + setMTLContent(Scissors); +} + +void MVKRenderingCommandEncoderState::setPrimitiveRestartEnable(VkBool32 primitiveRestartEnable, bool isDynamic) { + bool mtlPrimitiveRestartEnable = static_cast(primitiveRestartEnable); + setMTLContent(PrimitiveRestartEnable); +} + +void MVKRenderingCommandEncoderState::setRasterizerDiscardEnable(VkBool32 rasterizerDiscardEnable, bool isDynamic) { + bool mtlRasterizerDiscardEnable = static_cast(rasterizerDiscardEnable); + setMTLContent(RasterizerDiscardEnable); +} + +// This value is retrieved, not encoded, so don't mark this encoder as dirty. +void MVKRenderingCommandEncoderState::setPrimitiveTopology(VkPrimitiveTopology topology, bool isDynamic) { + getContent(_mtlPrimitiveTopology, isDynamic) = mvkMTLPrimitiveTypeFromVkPrimitiveTopology(topology); +} + +MTLPrimitiveType MVKRenderingCommandEncoderState::getPrimitiveType() { + return getMTLContent(PrimitiveTopology); +} + +bool MVKRenderingCommandEncoderState::isDrawingTriangles() { + switch (getPrimitiveType()) { + case MTLPrimitiveTypeTriangle: return true; + case MTLPrimitiveTypeTriangleStrip: return true; + default: return false; } } - -#pragma mark - -#pragma mark MVKStencilReferenceValueCommandEncoderState - -void MVKStencilReferenceValueCommandEncoderState:: setReferenceValues(const VkPipelineDepthStencilStateCreateInfo& vkDepthStencilInfo) { - - // If ref values are to be set dynamically, don't set them here. - if (_cmdEncoder->supportsDynamicState(VK_DYNAMIC_STATE_STENCIL_REFERENCE)) { return; } - - if (_frontFaceValue != vkDepthStencilInfo.front.reference || _backFaceValue != vkDepthStencilInfo.back.reference) - markDirty(); - - _frontFaceValue = vkDepthStencilInfo.front.reference; - _backFaceValue = vkDepthStencilInfo.back.reference; +// This value is retrieved, not encoded, so don't mark this encoder as dirty. +void MVKRenderingCommandEncoderState::setPatchControlPoints(uint32_t patchControlPoints, bool isDynamic) { + getContent(_mtlPatchControlPoints, isDynamic) = patchControlPoints; } -// We don't check for dynamic state here, because if this is called before pipeline is set, -// it may not be accurate, and if not dynamic, pipeline will override when it is encoded anyway. -void MVKStencilReferenceValueCommandEncoderState::setReferenceValues(VkStencilFaceFlags faceMask, - uint32_t stencilReference) { - bool dirty = false; - if (mvkAreAllFlagsEnabled(faceMask, VK_STENCIL_FACE_FRONT_BIT)) { - dirty |= (_frontFaceValue != stencilReference); - _frontFaceValue = stencilReference; - } - if (mvkAreAllFlagsEnabled(faceMask, VK_STENCIL_FACE_BACK_BIT)) { - dirty |= (_backFaceValue != stencilReference); - _backFaceValue = stencilReference; - } - if (dirty) markDirty(); +uint32_t MVKRenderingCommandEncoderState::getPatchControlPoints() { + return getMTLContent(PatchControlPoints); } -void MVKStencilReferenceValueCommandEncoderState::encodeImpl(uint32_t stage) { - if (stage != kMVKGraphicsStageRasterization) { return; } - [_cmdEncoder->_mtlRenderEncoder setStencilFrontReferenceValue: _frontFaceValue - backReferenceValue: _backFaceValue]; +void MVKRenderingCommandEncoderState::setSampleLocationsEnable(VkBool32 sampleLocationsEnable, bool isDynamic) { + bool slEnbl = static_cast(sampleLocationsEnable); + auto& mtlSampLocEnbl = getContent(_mtlSampleLocationsEnable, isDynamic); + + if (slEnbl == mtlSampLocEnbl) { return; } + + mtlSampLocEnbl = slEnbl; + + // This value is retrieved, not encoded, so don't mark this encoder as dirty. + _dirtyStates.enable(SampleLocationsEnable); } +void MVKRenderingCommandEncoderState::setSampleLocations(MVKArrayRef sampleLocations, bool isDynamic) { + auto& mtlSampPosns = getContent(_mtlSampleLocations, isDynamic); + size_t slCnt = sampleLocations.size(); -#pragma mark - -#pragma mark MVKDepthBiasCommandEncoderState + // When comparing new vs current, make use of fact that MTLSamplePosition & VkSampleLocationEXT have same memory footprint. + if (slCnt == mtlSampPosns.size() && + mvkAreEqual((MTLSamplePosition*)sampleLocations.data(), + mtlSampPosns.data(), slCnt)) { + return; + } -void MVKDepthBiasCommandEncoderState::setDepthBias(const VkPipelineRasterizationStateCreateInfo& vkRasterInfo) { + mtlSampPosns.clear(); + for (uint32_t slIdx = 0; slIdx < slCnt; slIdx++) { + auto& sl = sampleLocations[slIdx]; + mtlSampPosns.push_back(MTLSamplePositionMake(mvkClamp(sl.x, kMVKMinSampleLocationCoordinate, kMVKMaxSampleLocationCoordinate), + mvkClamp(sl.y, kMVKMinSampleLocationCoordinate, kMVKMaxSampleLocationCoordinate))); + } - auto wasEnabled = _isEnabled; - _isEnabled = vkRasterInfo.depthBiasEnable; - - // If ref values are to be set dynamically, don't set them here. - if (_cmdEncoder->supportsDynamicState(VK_DYNAMIC_STATE_DEPTH_BIAS)) { return; } - - if (_isEnabled != wasEnabled || _depthBiasConstantFactor != vkRasterInfo.depthBiasConstantFactor - || _depthBiasSlopeFactor != vkRasterInfo.depthBiasSlopeFactor || _depthBiasClamp != vkRasterInfo.depthBiasClamp) { - - markDirty(); - _depthBiasConstantFactor = vkRasterInfo.depthBiasConstantFactor; - _depthBiasSlopeFactor = vkRasterInfo.depthBiasSlopeFactor; - _depthBiasClamp = vkRasterInfo.depthBiasClamp; - } + // This value is retrieved, not encoded, so don't mark this encoder as dirty. + _dirtyStates.enable(SampleLocations); } -// We don't check for dynamic state here, because if this is called before pipeline is set, -// it may not be accurate, and if not dynamic, pipeline will override when it is encoded anyway. -void MVKDepthBiasCommandEncoderState::setDepthBias(float depthBiasConstantFactor, - float depthBiasSlopeFactor, - float depthBiasClamp) { - - if (_depthBiasConstantFactor != depthBiasConstantFactor || _depthBiasSlopeFactor != depthBiasSlopeFactor - || _depthBiasClamp != depthBiasClamp) { - - markDirty(); - _depthBiasConstantFactor = depthBiasConstantFactor; - _depthBiasSlopeFactor = depthBiasSlopeFactor; - _depthBiasClamp = depthBiasClamp; - } +MVKArrayRef MVKRenderingCommandEncoderState::getSamplePositions() { + return getMTLContent(SampleLocationsEnable) ? getMTLContent(SampleLocations).contents() : MVKArrayRef(); } -void MVKDepthBiasCommandEncoderState::encodeImpl(uint32_t stage) { - if (stage != kMVKGraphicsStageRasterization) { return; } - if (_isEnabled) { - [_cmdEncoder->_mtlRenderEncoder setDepthBias: _depthBiasConstantFactor - slopeScale: _depthBiasSlopeFactor - clamp: _depthBiasClamp]; - } else { - [_cmdEncoder->_mtlRenderEncoder setDepthBias: 0 slopeScale: 0 clamp: 0]; - } +// Return whether state is dirty, and mark it not dirty +bool MVKRenderingCommandEncoderState::isDirty(MVKRenderStateType state) { + bool rslt = _dirtyStates.isEnabled(state); + _dirtyStates.disable(state); + return rslt; } +// Don't force sample location & sample location enable to become dirty if they weren't already, because +// this may cause needsMetalRenderPassRestart() to trigger an unnecessary Metal renderpass restart. +void MVKRenderingCommandEncoderState::markDirty() { + MVKCommandEncoderState::markDirty(); -#pragma mark - -#pragma mark MVKBlendColorCommandEncoderState + bool wasSLDirty = _dirtyStates.isEnabled(SampleLocations); + bool wasSLEnblDirty = _dirtyStates.isEnabled(SampleLocationsEnable); + + _dirtyStates.enableAll(); -void MVKBlendColorCommandEncoderState::setBlendColor(float red, float green, - float blue, float alpha, - bool isDynamic) { - // Abort if we are using dynamic, but call is not dynamic. - if ( !isDynamic && _cmdEncoder->supportsDynamicState(VK_DYNAMIC_STATE_BLEND_CONSTANTS) ) { return; } - - if (_red != red || _green != green || _blue != blue || _alpha != alpha) { - markDirty(); - _red = red; - _green = green; - _blue = blue; - _alpha = alpha; - } + _dirtyStates.set(SampleLocations, wasSLDirty); + _dirtyStates.set(SampleLocationsEnable, wasSLEnblDirty); } -void MVKBlendColorCommandEncoderState::encodeImpl(uint32_t stage) { - if (stage != kMVKGraphicsStageRasterization) { return; } - [_cmdEncoder->_mtlRenderEncoder setBlendColorRed: _red green: _green blue: _blue alpha: _alpha]; +// Don't call parent beginMetalRenderPass() because it +// will call local markDirty() which is too aggressive. +void MVKRenderingCommandEncoderState::beginMetalRenderPass() { + if (_isModified) { + _dirtyStates = _modifiedStates; + MVKCommandEncoderState::markDirty(); + } } +// Don't use || on isDirty calls, to ensure they both get called, so that the dirty flag of each will be cleared. +bool MVKRenderingCommandEncoderState::needsMetalRenderPassRestart() { + bool isSLDirty = isDirty(SampleLocations); + bool isSLEnblDirty = isDirty(SampleLocationsEnable); + return isSLDirty || isSLEnblDirty; +} + +#pragma mark Encoding + +void MVKRenderingCommandEncoderState::encodeImpl(uint32_t stage) { + if (stage != kMVKGraphicsStageRasterization) { return; } + + auto& rendEnc = _cmdEncoder->_mtlRenderEncoder; + + if (isDirty(PolygonMode)) { [rendEnc setTriangleFillMode: getMTLContent(PolygonMode)]; } + if (isDirty(CullMode)) { [rendEnc setCullMode: getMTLContent(CullMode)]; } + if (isDirty(FrontFace)) { [rendEnc setFrontFacingWinding: getMTLContent(FrontFace)]; } + if (isDirty(BlendConstants)) { + auto& bcFlt = getMTLContent(BlendConstants).float32; + [rendEnc setBlendColorRed: bcFlt[0] green: bcFlt[1] blue: bcFlt[2] alpha: bcFlt[3]]; + } + if (isDirty(DepthBiasEnable) || isDirty(DepthBias)) { + if (getMTLContent(DepthBiasEnable)) { + auto& db = getMTLContent(DepthBias); + [rendEnc setDepthBias: db.depthBiasConstantFactor + slopeScale: db.depthBiasSlopeFactor + clamp: db.depthBiasClamp]; + } else { + [rendEnc setDepthBias: 0 slopeScale: 0 clamp: 0]; + } + } + if (isDirty(DepthClipEnable) && _cmdEncoder->_pDeviceFeatures->depthClamp) { + [rendEnc setDepthClipMode: getMTLContent(DepthClipEnable)]; + } + + if (isDirty(StencilReference)) { + auto& sr = getMTLContent(StencilReference); + [rendEnc setStencilFrontReferenceValue: sr.frontFaceValue backReferenceValue: sr.backFaceValue]; + } + + // Validate + // In Metal, primitive restart cannot be disabled. + // Just issue warning here, as it is very likely the app is not actually expecting + // to use primitive restart at all, and is just setting this as a "just-in-case", + // and forcing an error here would be unexpected to the app (including CTS). + auto mtlPrimType = getPrimitiveType(); + if (isDirty(PrimitiveRestartEnable) && !getMTLContent(PrimitiveRestartEnable) && + (mtlPrimType == MTLPrimitiveTypeTriangleStrip || mtlPrimType == MTLPrimitiveTypeLineStrip)) { + reportWarning(VK_ERROR_FEATURE_NOT_PRESENT, "Metal does not support disabling primitive restart."); + } + + if (isDirty(Viewports)) { + auto& mtlViewports = getMTLContent(Viewports); + if (_cmdEncoder->_pDeviceFeatures->multiViewport) { +#if MVK_MACOS_OR_IOS + [rendEnc setViewports: mtlViewports.viewports count: mtlViewports.viewportCount]; +#endif + } else { + [rendEnc setViewport: mtlViewports.viewports[0]]; + } + } + + // If rasterizing discard has been dynamically enabled, or culling has been dynamically + // set to front-and-back, emulate this by using zeroed scissor rectangles. + if (isDirty(Scissors)) { + static MTLScissorRect zeroRect = {}; + auto mtlScissors = getMTLContent(Scissors); + bool shouldDiscard = ((_mtlRasterizerDiscardEnable[StateScope::Dynamic] && isDynamicState(RasterizerDiscardEnable)) || + (isDrawingTriangles() && _cullBothFaces[StateScope::Dynamic] && isDynamicState(CullMode))); + for (uint32_t sIdx = 0; sIdx < mtlScissors.scissorCount; sIdx++) { + mtlScissors.scissors[sIdx] = shouldDiscard ? zeroRect : _cmdEncoder->clipToRenderArea(mtlScissors.scissors[sIdx]); + } + + if (_cmdEncoder->_pDeviceFeatures->multiViewport) { +#if MVK_MACOS_OR_IOS + [rendEnc setScissorRects: mtlScissors.scissors count: mtlScissors.scissorCount]; +#endif + } else { + [rendEnc setScissorRect: mtlScissors.scissors[0]]; + } + } +} + +#undef getMTLContent +#undef setMTLContent + #pragma mark - #pragma mark MVKResourcesCommandEncoderState @@ -488,7 +620,7 @@ void MVKResourcesCommandEncoderState::bindDescriptorSet(uint32_t descSetIndex, // Update dynamic buffer offsets uint32_t baseDynOfstIdx = dslMTLRezIdxOffsets.getMetalResourceIndexes().dynamicOffsetBufferIndex; uint32_t doCnt = descSet->getDynamicOffsetDescriptorCount(); - for (uint32_t doIdx = 0; doIdx < doCnt && dynamicOffsetIndex < dynamicOffsets.size; doIdx++) { + for (uint32_t doIdx = 0; doIdx < doCnt && dynamicOffsetIndex < dynamicOffsets.size(); doIdx++) { updateImplicitBuffer(_dynamicOffsets, baseDynOfstIdx + doIdx, dynamicOffsets[dynamicOffsetIndex++]); } @@ -594,7 +726,7 @@ void MVKResourcesCommandEncoderState::markDirty() { } // If a swizzle is needed for this stage, iterates all the bindings and logs errors for those that need texture swizzling. -void MVKResourcesCommandEncoderState::assertMissingSwizzles(bool needsSwizzle, const char* stageName, const MVKArrayRef texBindings) { +void MVKResourcesCommandEncoderState::assertMissingSwizzles(bool needsSwizzle, const char* stageName, MVKArrayRef texBindings) { if (needsSwizzle) { for (auto& tb : texBindings) { VkComponentMapping vkcm = mvkUnpackSwizzle(tb.swizzle); @@ -684,7 +816,7 @@ void MVKGraphicsResourcesCommandEncoderState::encodeBindings(MVKShaderStage stag const char* pStageName, bool fullImageViewSwizzle, std::function bindBuffer, - std::function)> bindImplicitBuffer, + std::function)> bindImplicitBuffer, std::function bindTexture, std::function bindSampler) { @@ -772,11 +904,16 @@ void MVKGraphicsResourcesCommandEncoderState::markDirty() { } } +#if !MVK_XCODE_15 +static const NSUInteger MTLAttributeStrideStatic = NSUIntegerMax; +#endif + void MVKGraphicsResourcesCommandEncoderState::encodeImpl(uint32_t stage) { - MVKGraphicsPipeline* pipeline = (MVKGraphicsPipeline*)getPipeline(); + auto* pipeline = _cmdEncoder->_graphicsPipelineState.getGraphicsPipeline(); bool fullImageViewSwizzle = pipeline->fullImageViewSwizzle() || getDevice()->_pMetalFeatures->nativeTextureSwizzle; bool forTessellation = pipeline->isTessellationPipeline(); + bool isDynamicVertexStride = pipeline->isDynamicState(VertexStride); if (stage == kMVKGraphicsStageVertex) { encodeBindings(kMVKShaderStageVertex, "vertex", fullImageViewSwizzle, @@ -795,10 +932,10 @@ void MVKGraphicsResourcesCommandEncoderState::encodeImpl(uint32_t stage) { offset: b.offset atIndex: b.index]; }, - [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, const MVKArrayRef s)->void { + [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, MVKArrayRef s)->void { cmdEncoder->setComputeBytes(cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl), - s.data, - s.size * sizeof(uint32_t), + s.data(), + s.byteSize(), b.index); }, [](MVKCommandEncoder* cmdEncoder, MVKMTLTextureBinding& b)->void { @@ -812,33 +949,24 @@ void MVKGraphicsResourcesCommandEncoderState::encodeImpl(uint32_t stage) { } else if (!forTessellation && stage == kMVKGraphicsStageRasterization) { encodeBindings(kMVKShaderStageVertex, "vertex", fullImageViewSwizzle, - [pipeline](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b)->void { + [pipeline, isDynamicVertexStride](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b)->void { // The app may have bound more vertex attribute buffers than used by the pipeline. // We must not bind those extra buffers to the shader because they might overwrite // any implicit buffers used by the pipeline. if (pipeline->isValidVertexBufferIndex(kMVKShaderStageVertex, b.index)) { - if (b.isInline) { - cmdEncoder->setVertexBytes(cmdEncoder->_mtlRenderEncoder, - b.mtlBytes, - b.size, - b.index); - } else { - if (b.justOffset) { - [cmdEncoder->_mtlRenderEncoder setVertexBufferOffset: b.offset - atIndex: b.index]; - } else { - [cmdEncoder->_mtlRenderEncoder setVertexBuffer: b.mtlBuffer - offset: b.offset - atIndex: b.index]; - } + cmdEncoder->encodeVertexAttributeBuffer(b, isDynamicVertexStride); - // Add any translated vertex bindings for this binding + // Add any translated vertex bindings for this binding + if ( !b.isInline ) { auto xltdVtxBindings = pipeline->getTranslatedVertexBindings(); for (auto& xltdBind : xltdVtxBindings) { if (b.index == pipeline->getMetalBufferIndexForVertexAttributeBinding(xltdBind.binding)) { - [cmdEncoder->_mtlRenderEncoder setVertexBuffer: b.mtlBuffer - offset: b.offset + xltdBind.translationOffset - atIndex: pipeline->getMetalBufferIndexForVertexAttributeBinding(xltdBind.translationBinding)]; + MVKMTLBufferBinding bx = { + .mtlBuffer = b.mtlBuffer, + .offset = b.offset + xltdBind.translationOffset, + .stride = b.stride, + .index = static_cast(pipeline->getMetalBufferIndexForVertexAttributeBinding(xltdBind.translationBinding)) }; + cmdEncoder->encodeVertexAttributeBuffer(bx, isDynamicVertexStride); } } } @@ -846,10 +974,10 @@ void MVKGraphicsResourcesCommandEncoderState::encodeImpl(uint32_t stage) { b.isDirty = true; // We haven't written it out, so leave dirty until next time. } }, - [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, const MVKArrayRef s)->void { + [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, MVKArrayRef s)->void { cmdEncoder->setVertexBytes(cmdEncoder->_mtlRenderEncoder, - s.data, - s.size * sizeof(uint32_t), + s.data(), + s.byteSize(), b.index); }, [](MVKCommandEncoder* cmdEncoder, MVKMTLTextureBinding& b)->void { @@ -879,10 +1007,10 @@ void MVKGraphicsResourcesCommandEncoderState::encodeImpl(uint32_t stage) { offset: b.offset atIndex: b.index]; }, - [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, const MVKArrayRef s)->void { + [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, MVKArrayRef s)->void { cmdEncoder->setComputeBytes(cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl), - s.data, - s.size * sizeof(uint32_t), + s.data(), + s.byteSize(), b.index); }, [](MVKCommandEncoder* cmdEncoder, MVKMTLTextureBinding& b)->void { @@ -898,24 +1026,13 @@ void MVKGraphicsResourcesCommandEncoderState::encodeImpl(uint32_t stage) { if (forTessellation && stage == kMVKGraphicsStageRasterization) { encodeBindings(kMVKShaderStageTessEval, "tessellation evaluation", fullImageViewSwizzle, - [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b)->void { - if (b.isInline) - cmdEncoder->setVertexBytes(cmdEncoder->_mtlRenderEncoder, - b.mtlBytes, - b.size, - b.index); - else if (b.justOffset) - [cmdEncoder->_mtlRenderEncoder setVertexBufferOffset: b.offset - atIndex: b.index]; - else - [cmdEncoder->_mtlRenderEncoder setVertexBuffer: b.mtlBuffer - offset: b.offset - atIndex: b.index]; + [isDynamicVertexStride](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b)->void { + cmdEncoder->encodeVertexAttributeBuffer(b, isDynamicVertexStride); }, - [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, const MVKArrayRef s)->void { + [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, MVKArrayRef s)->void { cmdEncoder->setVertexBytes(cmdEncoder->_mtlRenderEncoder, - s.data, - s.size * sizeof(uint32_t), + s.data(), + s.byteSize(), b.index); }, [](MVKCommandEncoder* cmdEncoder, MVKMTLTextureBinding& b)->void { @@ -945,10 +1062,10 @@ void MVKGraphicsResourcesCommandEncoderState::encodeImpl(uint32_t stage) { offset: b.offset atIndex: b.index]; }, - [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, const MVKArrayRef s)->void { + [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, MVKArrayRef s)->void { cmdEncoder->setFragmentBytes(cmdEncoder->_mtlRenderEncoder, - s.data, - s.size * sizeof(uint32_t), + s.data(), + s.byteSize(), b.index); }, [](MVKCommandEncoder* cmdEncoder, MVKMTLTextureBinding& b)->void { diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandPool.h b/MoltenVK/MoltenVK/Commands/MVKCommandPool.h index f2cf1e66..e2325857 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandPool.h +++ b/MoltenVK/MoltenVK/Commands/MVKCommandPool.h @@ -23,7 +23,7 @@ #include "MVKCommandEncodingPool.h" #include "MVKCommand.h" #include "MVKCmdPipeline.h" -#include "MVKCmdRenderPass.h" +#include "MVKCmdRendering.h" #include "MVKCmdDispatch.h" #include "MVKCmdDraw.h" #include "MVKCmdTransfer.h" @@ -82,7 +82,7 @@ public: * Returns a retained MTLCommandBuffer created from the indexed queue * within the queue family for which this command pool was created. */ - id getMTLCommandBuffer(uint32_t queueIndex); + id getMTLCommandBuffer(MVKCommandUse cmdUse, uint32_t queueIndex); /** Release any held but unused memory back to the system. */ void trim(); diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandPool.mm b/MoltenVK/MoltenVK/Commands/MVKCommandPool.mm index be4713f3..656740b0 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandPool.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandPool.mm @@ -77,8 +77,8 @@ void MVKCommandPool::freeCommandBuffers(uint32_t commandBufferCount, } } -id MVKCommandPool::getMTLCommandBuffer(uint32_t queueIndex) { - return _device->getQueue(_queueFamilyIndex, queueIndex)->getMTLCommandBuffer(kMVKCommandUseEndCommandBuffer, true); +id MVKCommandPool::getMTLCommandBuffer(MVKCommandUse cmdUse, uint32_t queueIndex) { + return _device->getQueue(_queueFamilyIndex, queueIndex)->getMTLCommandBuffer(cmdUse, true); } // Clear the command type pool member variables. diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.h b/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.h index 84fa37b6..99fcb384 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.h +++ b/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.h @@ -210,27 +210,24 @@ namespace std { * change as early as possible. */ typedef struct MVKMTLStencilDescriptorData { - bool enabled; /**< Indicates whether stencil testing for this face is enabled. */ + uint32_t readMask; /**< The bit-mask to apply when comparing the stencil buffer value to the reference value. */ + uint32_t writeMask; /**< The bit-mask to apply when writing values to the stencil buffer. */ uint8_t stencilCompareFunction; /**< The stencil compare function (interpreted as MTLCompareFunction). */ uint8_t stencilFailureOperation; /**< The operation to take when the stencil test fails (interpreted as MTLStencilOperation). */ uint8_t depthFailureOperation; /**< The operation to take when the stencil test passes, but the depth test fails (interpreted as MTLStencilOperation). */ uint8_t depthStencilPassOperation; /**< The operation to take when both the stencil and depth tests pass (interpreted as MTLStencilOperation). */ - uint32_t readMask; /**< The bit-mask to apply when comparing the stencil buffer value to the reference value. */ - uint32_t writeMask; /**< The bit-mask to apply when writing values to the stencil buffer. */ + + bool operator==(const MVKMTLStencilDescriptorData& rhs) const { return mvkAreEqual(this, &rhs); } + bool operator!=(const MVKMTLStencilDescriptorData& rhs) const { return !(*this == rhs); } MVKMTLStencilDescriptorData() { - - // Start with all zeros to ensure memory comparisons will work, - // even if the structure contains alignment gaps. - mvkClear(this); - - enabled = false; + mvkClear(this); // Clear all memory to ensure memory comparisons will work. + mvkEnableAllFlags(readMask); + mvkEnableAllFlags(writeMask); stencilCompareFunction = MTLCompareFunctionAlways; stencilFailureOperation = MTLStencilOperationKeep; depthFailureOperation = MTLStencilOperationKeep; depthStencilPassOperation = MTLStencilOperationKeep; - readMask = static_cast(~0); - writeMask = static_cast(~0); } } MVKMTLStencilDescriptorData; @@ -247,34 +244,32 @@ const MVKMTLStencilDescriptorData kMVKMTLStencilDescriptorDataDefault; * change as early as possible. */ typedef struct MVKMTLDepthStencilDescriptorData { - uint8_t depthCompareFunction; /**< The depth compare function (interpreted as MTLCompareFunction). */ - bool depthWriteEnabled; /**< Indicates whether depth writing is enabled. */ MVKMTLStencilDescriptorData frontFaceStencilData; MVKMTLStencilDescriptorData backFaceStencilData; + uint8_t depthCompareFunction; /**< The depth compare function (interpreted as MTLCompareFunction). */ + bool depthWriteEnabled; /**< Indicates whether depth writing is enabled. */ + bool stencilTestEnabled; /**< Indicates whether stencil testing is enabled. */ bool operator==(const MVKMTLDepthStencilDescriptorData& rhs) const { return mvkAreEqual(this, &rhs); } + bool operator!=(const MVKMTLDepthStencilDescriptorData& rhs) const { return !(*this == rhs); } std::size_t hash() const { return mvkHash((uint64_t*)this, sizeof(*this) / sizeof(uint64_t)); } - - /** Disable depth and/or stencil testing. */ - void disable(bool disableDepth, bool disableStencil) { - if (disableDepth) { - depthCompareFunction = MTLCompareFunctionAlways; - depthWriteEnabled = false; - } - if (disableStencil) { - frontFaceStencilData = kMVKMTLStencilDescriptorDataDefault; - backFaceStencilData = kMVKMTLStencilDescriptorDataDefault; - } + void disableDepth() { + depthCompareFunction = MTLCompareFunctionAlways; + depthWriteEnabled = false; + } + void disableStencil() { + stencilTestEnabled = false; + frontFaceStencilData = kMVKMTLStencilDescriptorDataDefault; + backFaceStencilData = kMVKMTLStencilDescriptorDataDefault; } MVKMTLDepthStencilDescriptorData() { - // Start with all zeros to ensure memory comparisons will work, - // even if the structure contains alignment gaps. - mvkClear(this); - disable(true, true); + mvkClear(this); // Clear all memory to ensure memory comparisons will work. + disableDepth(); + disableStencil(); } } __attribute__((aligned(sizeof(uint64_t)))) MVKMTLDepthStencilDescriptorData; diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm b/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm index b3003507..1e301734 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm @@ -286,7 +286,7 @@ id MVKCommandResourceFactory::newBlitFragFunction(MVKRPSKeyBlitImg& [msl appendLineMVK: @" constant TexSubrez& subRez [[buffer(0)]]) {"]; [msl appendLineMVK: @" FragmentOutputs out;"]; if (mvkIsAnyFlagEnabled(blitKey.srcAspect, (VK_IMAGE_ASPECT_DEPTH_BIT))) { - [msl appendFormat: @" out.depth = tex.sample(ce_sampler, varyings.v_texCoord%@%@, level(subRez.lod)).%c;", coordArg, sliceArg, swizzleArg[0]]; + [msl appendFormat: @" out.depth = tex.sample(ce_sampler, varyings.v_texCoord%@%@, level(subRez.lod));", coordArg, sliceArg]; [msl appendLineMVK]; } if (mvkIsAnyFlagEnabled(blitKey.srcAspect, (VK_IMAGE_ASPECT_STENCIL_BIT))) { @@ -433,9 +433,10 @@ id MVKCommandResourceFactory::newMTLDepthStencilState(bool } id MVKCommandResourceFactory::newMTLDepthStencilState(MVKMTLDepthStencilDescriptorData& dsData) { - MTLStencilDescriptor* fsDesc = newMTLStencilDescriptor(dsData.frontFaceStencilData); // temp retain - MTLStencilDescriptor* bsDesc = newMTLStencilDescriptor(dsData.backFaceStencilData); // temp retain - MTLDepthStencilDescriptor* dsDesc = [MTLDepthStencilDescriptor new]; // temp retain + bool testStencil = dsData.stencilTestEnabled; + auto* fsDesc = testStencil ? newMTLStencilDescriptor(dsData.frontFaceStencilData) : nil; // temp retain + auto* bsDesc = testStencil ? newMTLStencilDescriptor(dsData.backFaceStencilData) : nil; // temp retain + auto* dsDesc = [MTLDepthStencilDescriptor new]; // temp retain dsDesc.depthCompareFunction = (MTLCompareFunction)dsData.depthCompareFunction; dsDesc.depthWriteEnabled = dsData.depthWriteEnabled; dsDesc.frontFaceStencil = fsDesc; @@ -443,16 +444,14 @@ id MVKCommandResourceFactory::newMTLDepthStencilState(MVKM id dss = [getMTLDevice() newDepthStencilStateWithDescriptor: dsDesc]; - [fsDesc release]; // temp release - [bsDesc release]; // temp release - [dsDesc release]; // temp release + [fsDesc release]; // temp release + [bsDesc release]; // temp release + [dsDesc release]; // temp release return dss; } MTLStencilDescriptor* MVKCommandResourceFactory::newMTLStencilDescriptor(MVKMTLStencilDescriptorData& sData) { - if ( !sData.enabled ) { return nil; } - MTLStencilDescriptor* sDesc = [MTLStencilDescriptor new]; // retained sDesc.stencilCompareFunction = (MTLCompareFunction)sData.stencilCompareFunction; sDesc.stencilFailureOperation = (MTLStencilOperation)sData.stencilFailureOperation; @@ -623,7 +622,7 @@ id MVKCommandResourceFactory::newFunctionNamed(const char* funcName NSString* nsFuncName = [[NSString alloc] initWithUTF8String: funcName]; // temp retained id mtlFunc = [_mtlLibrary newFunctionWithName: nsFuncName]; // retained [nsFuncName release]; // temp release - _device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.functionRetrieval, startTime); + _device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.functionRetrieval, startTime); return mtlFunc; } @@ -636,7 +635,7 @@ id MVKCommandResourceFactory::newMTLFunction(NSString* mslSrcCode, id mtlLib = [getMTLDevice() newLibraryWithSource: mslSrcCode options: getDevice()->getMTLCompileOptions() error: &err]; // temp retain - _device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.mslCompile, startTime); + _device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.mslCompile, startTime); if (err) { reportError(VK_ERROR_INITIALIZATION_FAILED, @@ -645,7 +644,7 @@ id MVKCommandResourceFactory::newMTLFunction(NSString* mslSrcCode, } else { startTime = _device->getPerformanceTimestamp(); mtlFunc = [mtlLib newFunctionWithName: funcName]; - _device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.functionRetrieval, startTime); + _device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.functionRetrieval, startTime); } [mtlLib release]; // temp release @@ -689,7 +688,7 @@ void MVKCommandResourceFactory::initMTLLibrary() { options: getDevice()->getMTLCompileOptions() error: &err]; // retained MVKAssert( !err, "Could not compile command shaders (Error code %li):\n%s", (long)err.code, err.localizedDescription.UTF8String); - _device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.mslCompile, startTime); + _device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.mslCompile, startTime); } } diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandTypePools.def b/MoltenVK/MoltenVK/Commands/MVKCommandTypePools.def index f8f208b6..6703a0ba 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandTypePools.def +++ b/MoltenVK/MoltenVK/Commands/MVKCommandTypePools.def @@ -81,18 +81,31 @@ MVK_CMD_TYPE_POOL(EndRenderPass) MVK_CMD_TYPE_POOLS_FROM_3_THRESHOLDS(BeginRendering, 1, 2, 4) MVK_CMD_TYPE_POOL(EndRendering) MVK_CMD_TYPE_POOL(SetSampleLocations) +MVK_CMD_TYPE_POOL(SetSampleLocationsEnable) MVK_CMD_TYPE_POOLS_FROM_THRESHOLD(ExecuteCommands, 1) MVK_CMD_TYPE_POOLS_FROM_2_THRESHOLDS(BindDescriptorSetsStatic, 1, 4) MVK_CMD_TYPE_POOLS_FROM_THRESHOLD(BindDescriptorSetsDynamic, 4) MVK_CMD_TYPE_POOLS_FROM_THRESHOLD(SetViewport, 1) MVK_CMD_TYPE_POOLS_FROM_THRESHOLD(SetScissor, 1) -MVK_CMD_TYPE_POOL(SetLineWidth) -MVK_CMD_TYPE_POOL(SetDepthBias) MVK_CMD_TYPE_POOL(SetBlendConstants) -MVK_CMD_TYPE_POOL(SetDepthBounds) +MVK_CMD_TYPE_POOL(SetDepthBias) +MVK_CMD_TYPE_POOL(SetDepthBiasEnable) +MVK_CMD_TYPE_POOL(SetDepthTestEnable) +MVK_CMD_TYPE_POOL(SetDepthWriteEnable) +MVK_CMD_TYPE_POOL(SetDepthClipEnable) +MVK_CMD_TYPE_POOL(SetDepthCompareOp) +MVK_CMD_TYPE_POOL(SetStencilTestEnable) +MVK_CMD_TYPE_POOL(SetStencilOp) MVK_CMD_TYPE_POOL(SetStencilCompareMask) MVK_CMD_TYPE_POOL(SetStencilWriteMask) MVK_CMD_TYPE_POOL(SetStencilReference) +MVK_CMD_TYPE_POOL(SetCullMode) +MVK_CMD_TYPE_POOL(SetFrontFace) +MVK_CMD_TYPE_POOL(SetPrimitiveTopology) +MVK_CMD_TYPE_POOL(SetPrimitiveRestartEnable) +MVK_CMD_TYPE_POOL(SetPolygonMode) +MVK_CMD_TYPE_POOL(SetPatchControlPoints) +MVK_CMD_TYPE_POOL(SetRasterizerDiscardEnable) MVK_CMD_TYPE_POOLS_FROM_2_THRESHOLDS(BindVertexBuffers, 1, 2) MVK_CMD_TYPE_POOL(BindIndexBuffer) MVK_CMD_TYPE_POOL(Draw) diff --git a/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.h b/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.h index 474a0a16..2be98144 100644 --- a/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.h +++ b/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.h @@ -99,7 +99,6 @@ public: protected: friend class MVKMTLBufferAllocation; - MVKBaseObject* getBaseObject() override { return this; }; MVKMTLBufferAllocation* newObject() override; void returnAllocationUnlocked(MVKMTLBufferAllocation* ba); void returnAllocation(MVKMTLBufferAllocation* ba); diff --git a/MoltenVK/MoltenVK/Commands/MVKMTLResourceBindings.h b/MoltenVK/MoltenVK/Commands/MVKMTLResourceBindings.h index de72f06d..e0637011 100644 --- a/MoltenVK/MoltenVK/Commands/MVKMTLResourceBindings.h +++ b/MoltenVK/MoltenVK/Commands/MVKMTLResourceBindings.h @@ -67,6 +67,7 @@ typedef struct MVKMTLBufferBinding { union { id mtlBuffer = nil; id mtlResource; const void* mtlBytes; }; // aliases VkDeviceSize offset = 0; uint32_t size = 0; + uint32_t stride = 0; uint16_t index = 0; bool justOffset = false; bool isDirty = true; @@ -78,14 +79,16 @@ typedef struct MVKMTLBufferBinding { void update(const MVKMTLBufferBinding &other) { if (mtlBuffer != other.mtlBuffer || size != other.size || other.isInline) { mtlBuffer = other.mtlBuffer; + offset = other.offset; size = other.size; + stride = other.stride; isInline = other.isInline; - offset = other.offset; justOffset = false; isOverridden = false; isDirty = true; - } else if (offset != other.offset) { + } else if (offset != other.offset || stride != other.stride) { offset = other.offset; + stride = other.stride; justOffset = !isOverridden && (!isDirty || justOffset); isOverridden = false; isDirty = true; @@ -112,8 +115,10 @@ typedef struct MVKPipelineBarrier { } MVKPipelineBarrierType; MVKPipelineBarrierType type = None; - VkAccessFlags srcAccessMask = 0; - VkAccessFlags dstAccessMask = 0; + VkPipelineStageFlags2 srcStageMask = 0; + VkAccessFlags2 srcAccessMask = 0; + VkPipelineStageFlags2 dstStageMask = 0; + VkAccessFlags2 dstAccessMask = 0; uint8_t srcQueueFamilyIndex = 0; uint8_t dstQueueFamilyIndex = 0; union { MVKBuffer* mvkBuffer = nullptr; MVKImage* mvkImage; MVKResource* mvkResource; }; @@ -136,15 +141,29 @@ typedef struct MVKPipelineBarrier { bool isBufferBarrier() { return type == Buffer; } bool isImageBarrier() { return type == Image; } - MVKPipelineBarrier(const VkMemoryBarrier& vkBarrier) : + MVKPipelineBarrier(const VkMemoryBarrier2& vkBarrier) : type(Memory), + srcStageMask(vkBarrier.srcStageMask), srcAccessMask(vkBarrier.srcAccessMask), + dstStageMask(vkBarrier.dstStageMask), dstAccessMask(vkBarrier.dstAccessMask) {} - MVKPipelineBarrier(const VkBufferMemoryBarrier& vkBarrier) : - type(Buffer), + MVKPipelineBarrier(const VkMemoryBarrier& vkBarrier, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags dstStageMask) : + type(Memory), + srcStageMask(srcStageMask), srcAccessMask(vkBarrier.srcAccessMask), + dstStageMask(dstStageMask), + dstAccessMask(vkBarrier.dstAccessMask) + {} + + MVKPipelineBarrier(const VkBufferMemoryBarrier2& vkBarrier) : + type(Buffer), + srcStageMask(vkBarrier.srcStageMask), + srcAccessMask(vkBarrier.srcAccessMask), + dstStageMask(vkBarrier.dstStageMask), dstAccessMask(vkBarrier.dstAccessMask), srcQueueFamilyIndex(vkBarrier.srcQueueFamilyIndex), dstQueueFamilyIndex(vkBarrier.dstQueueFamilyIndex), @@ -153,9 +172,45 @@ typedef struct MVKPipelineBarrier { size(vkBarrier.size) {} - MVKPipelineBarrier(const VkImageMemoryBarrier& vkBarrier) : - type(Image), + MVKPipelineBarrier(const VkBufferMemoryBarrier& vkBarrier, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags dstStageMask) : + type(Buffer), + srcStageMask(srcStageMask), srcAccessMask(vkBarrier.srcAccessMask), + dstStageMask(dstStageMask), + dstAccessMask(vkBarrier.dstAccessMask), + srcQueueFamilyIndex(vkBarrier.srcQueueFamilyIndex), + dstQueueFamilyIndex(vkBarrier.dstQueueFamilyIndex), + mvkBuffer((MVKBuffer*)vkBarrier.buffer), + offset(vkBarrier.offset), + size(vkBarrier.size) + {} + + MVKPipelineBarrier(const VkImageMemoryBarrier2& vkBarrier) : + type(Image), + srcStageMask(vkBarrier.srcStageMask), + srcAccessMask(vkBarrier.srcAccessMask), + dstStageMask(vkBarrier.dstStageMask), + dstAccessMask(vkBarrier.dstAccessMask), + srcQueueFamilyIndex(vkBarrier.srcQueueFamilyIndex), + dstQueueFamilyIndex(vkBarrier.dstQueueFamilyIndex), + mvkImage((MVKImage*)vkBarrier.image), + newLayout(vkBarrier.newLayout), + aspectMask(vkBarrier.subresourceRange.aspectMask), + baseArrayLayer(vkBarrier.subresourceRange.baseArrayLayer), + layerCount(vkBarrier.subresourceRange.layerCount), + baseMipLevel(vkBarrier.subresourceRange.baseMipLevel), + levelCount(vkBarrier.subresourceRange.levelCount) + {} + + MVKPipelineBarrier(const VkImageMemoryBarrier& vkBarrier, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags dstStageMask) : + type(Image), + srcStageMask(srcStageMask), + srcAccessMask(vkBarrier.srcAccessMask), + dstStageMask(dstStageMask), dstAccessMask(vkBarrier.dstAccessMask), srcQueueFamilyIndex(vkBarrier.srcQueueFamilyIndex), dstQueueFamilyIndex(vkBarrier.dstQueueFamilyIndex), diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.h b/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.h index 2e338ce7..95fdf681 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.h @@ -52,16 +52,12 @@ public: VkResult bindDeviceMemory2(const VkBindBufferMemoryInfo* pBindInfo); /** Applies the specified global memory barrier. */ - void applyMemoryBarrier(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier, + void applyMemoryBarrier(MVKPipelineBarrier& barrier, MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse) override; /** Applies the specified buffer memory barrier. */ - void applyBufferMemoryBarrier(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier, + void applyBufferMemoryBarrier(MVKPipelineBarrier& barrier, MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse); @@ -95,9 +91,7 @@ protected: friend class MVKDeviceMemory; void propagateDebugName() override; - bool needsHostReadSync(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier); + bool needsHostReadSync(MVKPipelineBarrier& barrier); bool overlaps(VkDeviceSize offset, VkDeviceSize size, VkDeviceSize &overlapOffset, VkDeviceSize &overlapSize); bool shouldFlushHostMemory(); VkResult flushToDevice(VkDeviceSize offset, VkDeviceSize size); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm b/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm index a99f4f0f..e14ff7d5 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm @@ -94,25 +94,21 @@ VkResult MVKBuffer::bindDeviceMemory2(const VkBindBufferMemoryInfo* pBindInfo) { return bindDeviceMemory((MVKDeviceMemory*)pBindInfo->memory, pBindInfo->memoryOffset); } -void MVKBuffer::applyMemoryBarrier(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier, +void MVKBuffer::applyMemoryBarrier(MVKPipelineBarrier& barrier, MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse) { #if MVK_MACOS - if ( needsHostReadSync(srcStageMask, dstStageMask, barrier) ) { + if ( needsHostReadSync(barrier) ) { [cmdEncoder->getMTLBlitEncoder(cmdUse) synchronizeResource: getMTLBuffer()]; } #endif } -void MVKBuffer::applyBufferMemoryBarrier(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier, +void MVKBuffer::applyBufferMemoryBarrier(MVKPipelineBarrier& barrier, MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse) { #if MVK_MACOS - if ( needsHostReadSync(srcStageMask, dstStageMask, barrier) ) { + if ( needsHostReadSync(barrier) ) { [cmdEncoder->getMTLBlitEncoder(cmdUse) synchronizeResource: getMTLBuffer()]; } #endif @@ -120,11 +116,9 @@ void MVKBuffer::applyBufferMemoryBarrier(VkPipelineStageFlags srcStageMask, // Returns whether the specified buffer memory barrier requires a sync between this // buffer and host memory for the purpose of the host reading texture memory. -bool MVKBuffer::needsHostReadSync(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier) { +bool MVKBuffer::needsHostReadSync(MVKPipelineBarrier& barrier) { #if MVK_MACOS - return (mvkIsAnyFlagEnabled(dstStageMask, (VK_PIPELINE_STAGE_HOST_BIT)) && + return (mvkIsAnyFlagEnabled(barrier.dstStageMask, (VK_PIPELINE_STAGE_HOST_BIT)) && mvkIsAnyFlagEnabled(barrier.dstAccessMask, (VK_ACCESS_HOST_READ_BIT)) && isMemoryHostAccessible() && (!isMemoryHostCoherent() || _isHostCoherentTexelBuffer)); #endif @@ -238,6 +232,7 @@ MVKBuffer::MVKBuffer(MVKDevice* device, const VkBufferCreateInfo* pCreateInfo) : } void MVKBuffer::initExternalMemory(VkExternalMemoryHandleTypeFlags handleTypes) { + if ( !handleTypes ) { return; } if (mvkIsOnlyAnyFlagEnabled(handleTypes, VK_EXTERNAL_MEMORY_HANDLE_TYPE_MTLBUFFER_BIT_KHR)) { _externalMemoryHandleTypes = handleTypes; auto& xmProps = getPhysicalDevice()->getExternalBufferProperties(VK_EXTERNAL_MEMORY_HANDLE_TYPE_MTLBUFFER_BIT_KHR); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm index a3f02ea8..ac83d697 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm @@ -729,7 +729,7 @@ void MVKBufferDescriptor::bind(MVKCommandEncoder* cmdEncoder, MVKArrayRef dynamicOffsets, uint32_t& dynamicOffsetIndex) { MVKMTLBufferBinding bb; - NSUInteger bufferDynamicOffset = (usesDynamicBufferOffsets() && dynamicOffsets.size > dynamicOffsetIndex + NSUInteger bufferDynamicOffset = (usesDynamicBufferOffsets() && dynamicOffsets.size() > dynamicOffsetIndex ? dynamicOffsets[dynamicOffsetIndex++] : 0); if (_mvkBuffer) { bb.mtlBuffer = _mvkBuffer->getMTLBuffer(); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h index cb6f9b51..dc0edb95 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h @@ -53,7 +53,6 @@ class MVKSemaphore; class MVKTimelineSemaphore; class MVKDeferredOperation; class MVKEvent; -class MVKSemaphoreImpl; class MVKQueryPool; class MVKShaderModule; class MVKPipelineCache; @@ -74,16 +73,22 @@ class MVKPrivateDataSlot; /** The buffer index to use for vertex content. */ -const static uint32_t kMVKVertexContentBufferIndex = 0; +static constexpr uint32_t kMVKVertexContentBufferIndex = 0; // Parameters to define the sizing of inline collections -const static uint32_t kMVKQueueFamilyCount = 4; -const static uint32_t kMVKQueueCountPerQueueFamily = 1; // Must be 1. See comments in MVKPhysicalDevice::getQueueFamilies() -const static uint32_t kMVKMinSwapchainImageCount = 2; -const static uint32_t kMVKMaxSwapchainImageCount = 3; -const static uint32_t kMVKMaxColorAttachmentCount = 8; -const static uint32_t kMVKMaxViewportScissorCount = 16; -const static uint32_t kMVKMaxDescriptorSetCount = SPIRV_CROSS_NAMESPACE::kMaxArgumentBuffers; +static constexpr uint32_t kMVKQueueFamilyCount = 4; +static constexpr uint32_t kMVKQueueCountPerQueueFamily = 1; // Must be 1. See comments in MVKPhysicalDevice::getQueueFamilies() +static constexpr uint32_t kMVKMinSwapchainImageCount = 2; +static constexpr uint32_t kMVKMaxSwapchainImageCount = 3; +static constexpr uint32_t kMVKMaxColorAttachmentCount = 8; +static constexpr uint32_t kMVKMaxViewportScissorCount = 16; +static constexpr uint32_t kMVKMaxDescriptorSetCount = SPIRV_CROSS_NAMESPACE::kMaxArgumentBuffers; +static constexpr uint32_t kMVKMaxSampleCount = 8; +static constexpr uint32_t kMVKSampleLocationCoordinateGridSize = 16; +static constexpr float kMVKMinSampleLocationCoordinate = 0.0; +static constexpr float kMVKMaxSampleLocationCoordinate = (float)(kMVKSampleLocationCoordinateGridSize - 1) / (float)kMVKSampleLocationCoordinateGridSize; +static constexpr VkExtent2D kMVKSampleLocationPixelGridSize = { 1, 1 }; +static constexpr VkExtent2D kMVKSampleLocationPixelGridSizeNotSupported = { 0, 0 }; #if !MVK_XCODE_12 typedef NSUInteger MTLTimestamp; @@ -398,11 +403,12 @@ protected: uint64_t getRecommendedMaxWorkingSetSize(); uint64_t getCurrentAllocatedSize(); uint32_t getMaxSamplerCount(); + uint32_t getMaxPerSetDescriptorCount(); void initExternalMemoryProperties(); void initExtensions(); void initCounterSets(); bool needsCounterSetRetained(); - void updateTimestampsAndPeriod(); + void updateTimestampPeriod(); MVKArrayRef getQueueFamilies(); void initPipelineCacheUUID(); uint32_t getHighestGPUCapability(); @@ -440,6 +446,11 @@ protected: #pragma mark - #pragma mark MVKDevice +typedef enum { + MVKActivityPerformanceValueTypeDuration, + MVKActivityPerformanceValueTypeByteCount, +} MVKActivityPerformanceValueType; + typedef struct MVKMTLBlitEncoder { id mtlBlitEncoder = nil; id mtlCmdBuffer = nil; @@ -677,43 +688,45 @@ public: void removeTimelineSemaphore(MVKTimelineSemaphore* sem4, uint64_t value); /** Applies the specified global memory barrier to all resource issued by this device. */ - void applyMemoryBarrier(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier, + void applyMemoryBarrier(MVKPipelineBarrier& barrier, MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse); /** * If performance is being tracked, returns a monotonic timestamp value for use performance timestamping. - * * The returned value corresponds to the number of CPU "ticks" since the app was initialized. * - * Calling this value twice, subtracting the first value from the second, and then multiplying - * the result by the value returned by mvkGetTimestampPeriod() will provide an indication of the - * number of nanoseconds between the two calls. The convenience function mvkGetElapsedMilliseconds() - * can be used to perform this calculation. + * Call this function twice, then use the functions mvkGetElapsedNanoseconds() or mvkGetElapsedMilliseconds() + * to determine the number of nanoseconds or milliseconds between the two calls. */ uint64_t getPerformanceTimestamp() { return _isPerformanceTracking ? mvkGetTimestamp() : 0; } /** - * If performance is being tracked, adds the performance for an activity with a duration - * interval between the start and end times, to the given performance statistics. + * If performance is being tracked, adds the performance for an activity with a duration interval + * between the start and end times, measured in milliseconds, to the given performance statistics. * * If endTime is zero or not supplied, the current time is used. */ - void addActivityPerformance(MVKPerformanceTracker& activityTracker, + void addPerformanceInterval(MVKPerformanceTracker& perfTracker, uint64_t startTime, uint64_t endTime = 0) { if (_isPerformanceTracking) { - updateActivityPerformance(activityTracker, startTime, endTime); - - // Log call not locked. Very minor chance that the tracker data will be updated during log call, - // resulting in an inconsistent report. Not worth taking lock perf hit for rare inline reporting. - if (_activityPerformanceLoggingStyle == MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_IMMEDIATE) { - logActivityPerformance(activityTracker, _performanceStatistics, true); - } + updateActivityPerformance(perfTracker, mvkGetElapsedMilliseconds(startTime, endTime)); } }; + /** + * If performance is being tracked, adds the performance for an activity + * with a kilobyte count, to the given performance statistics. + */ + void addPerformanceByteCount(MVKPerformanceTracker& perfTracker, uint64_t byteCount) { + if (_isPerformanceTracking) { + updateActivityPerformance(perfTracker, double(byteCount / KIBI)); + } + }; + + /** Updates the given performance statistic. */ + void updateActivityPerformance(MVKPerformanceTracker& activity, double currentValue); + /** Populates the specified statistics structure from the current activity performance statistics. */ void getPerformanceStatistics(MVKPerformanceStatistics* pPerf); @@ -885,8 +898,10 @@ protected: template void enableFeatures(S* pRequested, VkBool32* pEnabledBools, const VkBool32* pRequestedBools, const VkBool32* pAvailableBools, uint32_t count); void enableExtensions(const VkDeviceCreateInfo* pCreateInfo); const char* getActivityPerformanceDescription(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats); - void logActivityPerformance(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats, bool isInline = false); - void updateActivityPerformance(MVKPerformanceTracker& activity, uint64_t startTime, uint64_t endTime); + MVKActivityPerformanceValueType getActivityPerformanceValueType(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats); + void logActivityInline(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats); + void logActivityDuration(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats, bool isInline = false); + void logActivityByteCount(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats, bool isInline = false); void getDescriptorVariableDescriptorCountLayoutSupport(const VkDescriptorSetLayoutCreateInfo* pCreateInfo, VkDescriptorSetLayoutSupport* pSupport, VkDescriptorSetVariableDescriptorCountLayoutSupport* pVarDescSetCountSupport); @@ -908,7 +923,6 @@ protected: id _defaultMTLSamplerState = nil; id _dummyBlitMTLBuffer = nil; uint32_t _globalVisibilityQueryCount = 0; - MVKConfigActivityPerformanceLoggingStyle _activityPerformanceLoggingStyle = MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_FRAME_COUNT; bool _isPerformanceTracking = false; bool _isCurrentlyAutoGPUCapturing = false; bool _isUsingMetalArgumentBuffers = false; @@ -952,13 +966,9 @@ public: bool isUsingPipelineStageMetalArgumentBuffers() { return isUsingMetalArgumentBuffers() && !_device->_pMetalFeatures->descriptorSetArgumentBuffers; }; /** Constructs an instance for the specified device. */ - MVKDeviceTrackingMixin(MVKDevice* device) : _device(device) { assert(_device); } - - virtual ~MVKDeviceTrackingMixin() {} + MVKDeviceTrackingMixin(MVKDevice* device) : _device(device) { assert(_device); } protected: - virtual MVKBaseObject* getBaseObject() = 0; - MVKDevice* _device; }; @@ -973,9 +983,6 @@ public: /** Constructs an instance for the specified device. */ MVKBaseDeviceObject(MVKDevice* device) : MVKDeviceTrackingMixin(device) {} - -protected: - MVKBaseObject* getBaseObject() override { return this; }; }; @@ -992,10 +999,6 @@ public: /** Constructs an instance for the specified device. */ MVKVulkanAPIDeviceObject(MVKDevice* device) : MVKDeviceTrackingMixin(device) {} - -protected: - MVKBaseObject* getBaseObject() override { return this; }; - }; @@ -1048,7 +1051,6 @@ public: protected: T* newObject() override { return new T(_device); } - MVKBaseObject* getBaseObject() override { return this; }; }; @@ -1056,6 +1058,15 @@ protected: #pragma mark - #pragma mark Support functions +/** + * Returns an autoreleased array containing the MTLDevices available on this system, + * sorted according to power, with higher power GPU's at the front of the array. + * This ensures that a lazy app that simply grabs the first GPU will get a high-power + * one by default. If MVKConfiguration::forceLowPowerGPU is enabled, the returned + * array will only include low-power devices. The intance may be a nullptr. + */ +NSArray>* mvkGetAvailableMTLDevicesArray(MVKInstance* instance); + /** Returns the registry ID of the specified device, or zero if the device does not have a registry ID. */ uint64_t mvkGetRegistryID(id mtlDevice); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm index ff575907..f1b4f2a2 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm @@ -75,9 +75,6 @@ static const uint32_t kAMDRadeonRX5500DeviceId = 0x7340; static const uint32_t kAMDRadeonRX6800DeviceId = 0x73bf; static const uint32_t kAMDRadeonRX6700DeviceId = 0x73df; -static const VkExtent2D kMetalSamplePositionGridSize = { 1, 1 }; -static const VkExtent2D kMetalSamplePositionGridSizeNotSupported = { 0, 0 }; - static const uint32_t kMaxTimeDomains = 2; #pragma clang diagnostic pop @@ -131,9 +128,9 @@ void MVKPhysicalDevice::getFeatures(VkPhysicalDeviceFeatures2* features) { .shaderInputAttachmentArrayDynamicIndexing = _metalFeatures.arrayOfTextures, .shaderUniformTexelBufferArrayDynamicIndexing = _metalFeatures.arrayOfTextures, .shaderStorageTexelBufferArrayDynamicIndexing = _metalFeatures.arrayOfTextures, - .shaderUniformBufferArrayNonUniformIndexing = false, + .shaderUniformBufferArrayNonUniformIndexing = true, .shaderSampledImageArrayNonUniformIndexing = _metalFeatures.arrayOfTextures && _metalFeatures.arrayOfSamplers, - .shaderStorageBufferArrayNonUniformIndexing = false, + .shaderStorageBufferArrayNonUniformIndexing = true, .shaderStorageImageArrayNonUniformIndexing = _metalFeatures.arrayOfTextures, .shaderInputAttachmentArrayNonUniformIndexing = _metalFeatures.arrayOfTextures, .shaderUniformTexelBufferArrayNonUniformIndexing = _metalFeatures.arrayOfTextures, @@ -320,6 +317,11 @@ void MVKPhysicalDevice::getFeatures(VkPhysicalDeviceFeatures2* features) { subgroupSizeFeatures->computeFullSubgroups = _metalFeatures.simdPermute || _metalFeatures.quadPermute; break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES: { + auto* synch2Features = (VkPhysicalDeviceSynchronization2Features*)next; + synch2Features->synchronization2 = true; + break; + } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXTURE_COMPRESSION_ASTC_HDR_FEATURES: { auto* astcHDRFeatures = (VkPhysicalDeviceTextureCompressionASTCHDRFeatures*)next; astcHDRFeatures->textureCompressionASTC_HDR = _metalFeatures.astcHDRTextures; @@ -382,6 +384,53 @@ void MVKPhysicalDevice::getFeatures(VkPhysicalDeviceFeatures2* features) { formatFeatures->formatA4B4G4R4 = canSupport4444; break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT: { + auto* extDynState = (VkPhysicalDeviceExtendedDynamicStateFeaturesEXT*)next; + extDynState->extendedDynamicState = true; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_2_FEATURES_EXT: { + auto* extDynState2 = (VkPhysicalDeviceExtendedDynamicState2FeaturesEXT*)next; + extDynState2->extendedDynamicState2 = true; + extDynState2->extendedDynamicState2LogicOp = false; + extDynState2->extendedDynamicState2PatchControlPoints = true; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_3_FEATURES_EXT: { + auto* extDynState3 = (VkPhysicalDeviceExtendedDynamicState3FeaturesEXT*)next; + extDynState3->extendedDynamicState3TessellationDomainOrigin = false; + extDynState3->extendedDynamicState3DepthClampEnable = true; + extDynState3->extendedDynamicState3PolygonMode = true; + extDynState3->extendedDynamicState3RasterizationSamples = false; + extDynState3->extendedDynamicState3SampleMask = false; + extDynState3->extendedDynamicState3AlphaToCoverageEnable = false; + extDynState3->extendedDynamicState3AlphaToOneEnable = false; + extDynState3->extendedDynamicState3LogicOpEnable = false; + extDynState3->extendedDynamicState3ColorBlendEnable = false; + extDynState3->extendedDynamicState3ColorBlendEquation = false; + extDynState3->extendedDynamicState3ColorWriteMask = false; + extDynState3->extendedDynamicState3RasterizationStream = false; + extDynState3->extendedDynamicState3ConservativeRasterizationMode = false; + extDynState3->extendedDynamicState3ExtraPrimitiveOverestimationSize = false; + extDynState3->extendedDynamicState3DepthClipEnable = true; + extDynState3->extendedDynamicState3SampleLocationsEnable = true; + extDynState3->extendedDynamicState3ColorBlendAdvanced = false; + extDynState3->extendedDynamicState3ProvokingVertexMode = false; + extDynState3->extendedDynamicState3LineRasterizationMode = false; + extDynState3->extendedDynamicState3LineStippleEnable = false; + extDynState3->extendedDynamicState3DepthClipNegativeOneToOne = false; + extDynState3->extendedDynamicState3ViewportWScalingEnable = false; + extDynState3->extendedDynamicState3ViewportSwizzle = false; + extDynState3->extendedDynamicState3CoverageToColorEnable = false; + extDynState3->extendedDynamicState3CoverageToColorLocation = false; + extDynState3->extendedDynamicState3CoverageModulationMode = false; + extDynState3->extendedDynamicState3CoverageModulationTableEnable = false; + extDynState3->extendedDynamicState3CoverageModulationTable = false; + extDynState3->extendedDynamicState3CoverageReductionMode = false; + extDynState3->extendedDynamicState3RepresentativeFragmentTestEnable = false; + extDynState3->extendedDynamicState3ShadingRateImageEnable = false; + break; + } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_INTERLOCK_FEATURES_EXT: { auto* interlockFeatures = (VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT*)next; interlockFeatures->fragmentShaderSampleInterlock = _metalFeatures.rasterOrderGroups; @@ -451,7 +500,7 @@ void MVKPhysicalDevice::getFeatures(VkPhysicalDeviceFeatures2* features) { } void MVKPhysicalDevice::getProperties(VkPhysicalDeviceProperties* properties) { - updateTimestampsAndPeriod(); + updateTimestampPeriod(); *properties = _properties; } @@ -476,9 +525,7 @@ void MVKPhysicalDevice::getProperties(VkPhysicalDeviceProperties2* properties) { supportedProps11.maxMultiviewViewCount = 32; supportedProps11.maxMultiviewInstanceIndex = canUseInstancingForMultiview() ? uintMax / 32 : uintMax; supportedProps11.protectedNoFault = false; - supportedProps11.maxPerSetDescriptors = 4 * (_metalFeatures.maxPerStageBufferCount + - _metalFeatures.maxPerStageTextureCount + - _metalFeatures.maxPerStageSamplerCount); + supportedProps11.maxPerSetDescriptors = getMaxPerSetDescriptorCount(); supportedProps11.maxMemoryAllocationSize = _metalFeatures.maxMTLBufferSize; // Create a SSOT for these Vulkan 1.2 properties, which can be queried via two mechanisms here. @@ -730,11 +777,11 @@ void MVKPhysicalDevice::getProperties(VkPhysicalDeviceProperties2* properties) { case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT: { auto* sampLocnProps = (VkPhysicalDeviceSampleLocationsPropertiesEXT*)next; sampLocnProps->sampleLocationSampleCounts = _metalFeatures.supportedSampleCounts; - sampLocnProps->maxSampleLocationGridSize = kMetalSamplePositionGridSize; - sampLocnProps->sampleLocationCoordinateRange[0] = 0.0; - sampLocnProps->sampleLocationCoordinateRange[1] = (15.0 / 16.0); - sampLocnProps->sampleLocationSubPixelBits = 4; - sampLocnProps->variableSampleLocations = VK_FALSE; + sampLocnProps->maxSampleLocationGridSize = kMVKSampleLocationPixelGridSize; + sampLocnProps->sampleLocationCoordinateRange[0] = kMVKMinSampleLocationCoordinate; + sampLocnProps->sampleLocationCoordinateRange[1] = kMVKMaxSampleLocationCoordinate; + sampLocnProps->sampleLocationSubPixelBits = mvkPowerOfTwoExponent(kMVKSampleLocationCoordinateGridSize); + sampLocnProps->variableSampleLocations = true; break; } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: { @@ -843,8 +890,8 @@ void MVKPhysicalDevice::getMultisampleProperties(VkSampleCountFlagBits samples, VkMultisamplePropertiesEXT* pMultisampleProperties) { if (pMultisampleProperties) { pMultisampleProperties->maxSampleLocationGridSize = (mvkIsOnlyAnyFlagEnabled(samples, _metalFeatures.supportedSampleCounts) - ? kMetalSamplePositionGridSize - : kMetalSamplePositionGridSizeNotSupported); + ? kMVKSampleLocationPixelGridSize + : kMVKSampleLocationPixelGridSizeNotSupported); } } @@ -1155,8 +1202,8 @@ VkResult MVKPhysicalDevice::getSurfaceSupport(uint32_t queueFamilyIndex, isHeadless = getMTLDevice().isHeadless; #endif - // If this device is headless or the surface does not have a CAMetalLayer, it is not supported. - *pSupported = !(isHeadless || (surface->getCAMetalLayer() == nil)); + // If this device is headless, the surface must be headless. + *pSupported = isHeadless ? surface->isHeadless() : wasConfigurationSuccessful(); return *pSupported ? VK_SUCCESS : surface->getConfigurationResult(); } @@ -1215,13 +1262,12 @@ VkResult MVKPhysicalDevice::getSurfaceCapabilities( const VkPhysicalDeviceSurfac // The CAlayer underlying the surface must be a CAMetalLayer. MVKSurface* surface = (MVKSurface*)pSurfaceInfo->surface; - CAMetalLayer* mtlLayer = surface->getCAMetalLayer(); - if ( !mtlLayer ) { return surface->getConfigurationResult(); } + if ( !surface->wasConfigurationSuccessful() ) { return surface->getConfigurationResult(); } VkSurfaceCapabilitiesKHR& surfCaps = pSurfaceCapabilities->surfaceCapabilities; surfCaps.minImageCount = _metalFeatures.minSwapchainImageCount; surfCaps.maxImageCount = _metalFeatures.maxSwapchainImageCount; - surfCaps.currentExtent = mvkGetNaturalExtent(mtlLayer); + surfCaps.currentExtent = surface->getNaturalExtent(); surfCaps.minImageExtent = { 1, 1 }; surfCaps.maxImageExtent = { _properties.limits.maxImageDimension2D, _properties.limits.maxImageDimension2D }; surfCaps.maxImageArrayLayers = 1; @@ -1300,9 +1346,7 @@ VkResult MVKPhysicalDevice::getSurfaceFormats(MVKSurface* surface, uint32_t* pCount, VkSurfaceFormatKHR* pSurfaceFormats) { - // The layer underlying the surface view must be a CAMetalLayer. - CAMetalLayer* mtlLayer = surface->getCAMetalLayer(); - if ( !mtlLayer ) { return surface->getConfigurationResult(); } + if ( !surface->wasConfigurationSuccessful() ) { return surface->getConfigurationResult(); } #define addSurfFmt(MTL_FMT) \ do { \ @@ -1425,9 +1469,7 @@ VkResult MVKPhysicalDevice::getSurfacePresentModes(MVKSurface* surface, uint32_t* pCount, VkPresentModeKHR* pPresentModes) { - // The layer underlying the surface view must be a CAMetalLayer. - CAMetalLayer* mtlLayer = surface->getCAMetalLayer(); - if ( !mtlLayer ) { return surface->getConfigurationResult(); } + if ( !surface->wasConfigurationSuccessful() ) { return surface->getConfigurationResult(); } #define ADD_VK_PRESENT_MODE(VK_PM) \ do { \ @@ -1455,9 +1497,7 @@ VkResult MVKPhysicalDevice::getPresentRectangles(MVKSurface* surface, uint32_t* pRectCount, VkRect2D* pRects) { - // The layer underlying the surface view must be a CAMetalLayer. - CAMetalLayer* mtlLayer = surface->getCAMetalLayer(); - if ( !mtlLayer ) { return surface->getConfigurationResult(); } + if ( !surface->wasConfigurationSuccessful() ) { return surface->getConfigurationResult(); } if ( !pRects ) { *pRectCount = 1; @@ -1469,7 +1509,7 @@ VkResult MVKPhysicalDevice::getPresentRectangles(MVKSurface* surface, *pRectCount = 1; pRects[0].offset = { 0, 0 }; - pRects[0].extent = mvkGetNaturalExtent(mtlLayer); + pRects[0].extent = surface->getNaturalExtent(); return VK_SUCCESS; } @@ -1525,7 +1565,7 @@ MVKArrayRef MVKPhysicalDevice::getQueueFamilies() { VkResult MVKPhysicalDevice::getQueueFamilyProperties(uint32_t* pCount, VkQueueFamilyProperties* pQueueFamilyProperties) { auto qFams = getQueueFamilies(); - uint32_t qfCnt = uint32_t(qFams.size); + uint32_t qfCnt = uint32_t(qFams.size()); // If properties aren't actually being requested yet, simply update the returned count if ( !pQueueFamilyProperties ) { @@ -1570,21 +1610,25 @@ VkResult MVKPhysicalDevice::getQueueFamilyProperties(uint32_t* pCount, // If needed, update the timestamp period for this device, using a crude lowpass filter to level out // wild temporary changes, particularly during initial queries before much GPU activity has occurred. // On Apple GPUs, CPU & GPU timestamps are the same, and timestamp period never changes. -void MVKPhysicalDevice::updateTimestampsAndPeriod() { - if (_properties.vendorID == kAppleVendorId) { return; } +void MVKPhysicalDevice::updateTimestampPeriod() { + if (_properties.vendorID != kAppleVendorId && + [_mtlDevice respondsToSelector: @selector(sampleTimestamps:gpuTimestamp:)]) { - MTLTimestamp earlierCPUTs = _prevCPUTimestamp; - MTLTimestamp earlierGPUTs = _prevGPUTimestamp; - [_mtlDevice sampleTimestamps: &_prevCPUTimestamp gpuTimestamp: &_prevGPUTimestamp]; - double elapsedCPUNanos = _prevCPUTimestamp - earlierCPUTs; - double elapsedGPUTicks = _prevGPUTimestamp - earlierGPUTs; - if (elapsedCPUNanos && elapsedGPUTicks) { // Ensure not zero - float tsPeriod = elapsedCPUNanos / elapsedGPUTicks; + MTLTimestamp earlierCPUTs = _prevCPUTimestamp; + MTLTimestamp earlierGPUTs = _prevGPUTimestamp; + [_mtlDevice sampleTimestamps: &_prevCPUTimestamp gpuTimestamp: &_prevGPUTimestamp]; + double elapsedCPUNanos = _prevCPUTimestamp - earlierCPUTs; + double elapsedGPUTicks = _prevGPUTimestamp - earlierGPUTs; - // Basic lowpass filter Y = (1 - a)Y + a*X. - // The lower a is, the slower Y will change over time. - static const float a = 0.05; - _properties.limits.timestampPeriod = ((1.0 - a) * _properties.limits.timestampPeriod) + (a * tsPeriod); + // Don't update period the first time through, or if no time elapsed. + if (earlierCPUTs && elapsedCPUNanos && elapsedGPUTicks) { + // Basic lowpass filter TPout = (1 - A)TPout + (A * TPin). + // The lower A is, the slower TPout will change over time. + auto& vkTsp = _properties.limits.timestampPeriod; + float a = getMVKConfig().timestampPeriodLowPassAlpha; + float tsPeriod = elapsedCPUNanos / elapsedGPUTicks; + vkTsp = ((1.0 - a) * vkTsp) + (a * tsPeriod); + } } } @@ -1689,10 +1733,15 @@ void MVKPhysicalDevice::initMetalFeatures() { _metalFeatures.minSwapchainImageCount = kMVKMinSwapchainImageCount; _metalFeatures.maxSwapchainImageCount = kMVKMaxSwapchainImageCount; - _metalFeatures.vertexStrideAlignment = 4; - _metalFeatures.maxPerStageStorageTextureCount = 8; + _metalFeatures.vertexStrideAlignment = supportsMTLGPUFamily(Apple5) ? 1 : 4; + +#if MVK_XCODE_15 + // Dynamic vertex stride needs to have everything aligned - compiled with support for vertex stride calls, and supported by both runtime OS and GPU. + _metalFeatures.dynamicVertexStride = mvkOSVersionIsAtLeast(14.0, 17.0, 1.0) && (supportsMTLGPUFamily(Apple4) || supportsMTLGPUFamily(Mac2)); +#endif + // GPU-specific features switch (_properties.vendorID) { case kAMDVendorId: @@ -1703,6 +1752,7 @@ void MVKPhysicalDevice::initMetalFeatures() { if (!mvkOSVersionIsAtLeast(14.0, 17.0, 1.0)) { _metalFeatures.needsSampleDrefLodArrayWorkaround = true; } + _metalFeatures.needsCubeGradWorkaround = true; // fallthrough case kIntelVendorId: case kNVVendorId: @@ -2183,6 +2233,8 @@ void MVKPhysicalDevice::initMetalFeatures() { if ([_mtlDevice respondsToSelector: @selector(argumentBuffersSupport)]) { _metalFeatures.argumentBuffersTier = _mtlDevice.argumentBuffersSupport; + } else { + _metalFeatures.argumentBuffersTier = MTLArgumentBuffersTier1; } #define checkSupportsMTLCounterSamplingPoint(mtlSP, mvkSP) \ @@ -2343,7 +2395,7 @@ void MVKPhysicalDevice::initFeatures() { mvkClear(&_vulkan12FeaturesNoExt); // Start with everything cleared _vulkan12FeaturesNoExt.samplerMirrorClampToEdge = _metalFeatures.samplerMirrorClampToEdge; _vulkan12FeaturesNoExt.drawIndirectCount = false; - _vulkan12FeaturesNoExt.descriptorIndexing = true; + _vulkan12FeaturesNoExt.descriptorIndexing = _metalFeatures.arrayOfTextures && _metalFeatures.arrayOfSamplers; _vulkan12FeaturesNoExt.samplerFilterMinmax = false; _vulkan12FeaturesNoExt.shaderOutputViewportIndex = _features.multiViewport; _vulkan12FeaturesNoExt.shaderOutputLayer = _metalFeatures.layeredRendering; @@ -2404,7 +2456,7 @@ void MVKPhysicalDevice::initLimits() { _properties.limits.maxVertexInputAttributes = 31; _properties.limits.maxVertexInputBindings = 31; - _properties.limits.maxVertexInputBindingStride = (2 * KIBI); + _properties.limits.maxVertexInputBindingStride = supportsMTLGPUFamily(Apple2) ? kMVKUndefinedLargeUInt32 : (4 * KIBI); _properties.limits.maxVertexInputAttributeOffset = _properties.limits.maxVertexInputBindingStride - 1; _properties.limits.maxPerStageDescriptorSamplers = _metalFeatures.maxPerStageSamplerCount; @@ -2613,7 +2665,10 @@ void MVKPhysicalDevice::initLimits() { _properties.limits.optimalBufferCopyRowPitchAlignment = 1; _properties.limits.timestampComputeAndGraphics = VK_TRUE; - _properties.limits.timestampPeriod = mvkGetTimestampPeriod(); // Will be 1.0 on Apple Silicon + + // On non-Apple GPU's, this can vary over time, and is calculated based on actual GPU activity. + _properties.limits.timestampPeriod = 1.0; + updateTimestampPeriod(); _properties.limits.pointSizeRange[0] = 1; switch (_properties.vendorID) { @@ -2633,7 +2688,7 @@ void MVKPhysicalDevice::initLimits() { _properties.limits.pointSizeGranularity = 1; _properties.limits.lineWidthRange[0] = 1; _properties.limits.lineWidthRange[1] = 1; - _properties.limits.lineWidthGranularity = 1; + _properties.limits.lineWidthGranularity = 0; _properties.limits.standardSampleLocations = VK_TRUE; _properties.limits.strictLines = _properties.vendorID == kIntelVendorId || _properties.vendorID == kNVVendorId; @@ -2689,7 +2744,7 @@ void MVKPhysicalDevice::initLimits() { _properties.limits.maxComputeWorkGroupCount[1] = kMVKUndefinedLargeUInt32; _properties.limits.maxComputeWorkGroupCount[2] = kMVKUndefinedLargeUInt32; - _properties.limits.maxDrawIndexedIndexValue = numeric_limits::max() - 1; // Support both fullDrawIndexUint32 and automatic primitive restart. + _properties.limits.maxDrawIndexedIndexValue = numeric_limits::max(); _properties.limits.maxDrawIndirectCount = kMVKUndefinedLargeUInt32; @@ -3056,32 +3111,23 @@ uint64_t MVKPhysicalDevice::getVRAMSize() { } } +// If possible, retrieve from the MTLDevice, otherwise from available memory size, or a fixed conservative estimate. uint64_t MVKPhysicalDevice::getRecommendedMaxWorkingSetSize() { -#if MVK_MACOS +#if MVK_XCODE_15 || MVK_MACOS if ( [_mtlDevice respondsToSelector: @selector(recommendedMaxWorkingSetSize)]) { return _mtlDevice.recommendedMaxWorkingSetSize; } #endif -#if MVK_IOS_OR_TVOS - // GPU and CPU use shared memory. Estimate the current free memory in the system. uint64_t freeMem = mvkGetAvailableMemorySize(); - if (freeMem) { return freeMem; } -#endif - - return 128 * MEBI; // Conservative minimum for macOS GPU's & iOS shared memory + return freeMem ? freeMem : 256 * MEBI; } +// If possible, retrieve from the MTLDevice, otherwise use the current memory used by this process. uint64_t MVKPhysicalDevice::getCurrentAllocatedSize() { if ( [_mtlDevice respondsToSelector: @selector(currentAllocatedSize)] ) { return _mtlDevice.currentAllocatedSize; } -#if MVK_IOS_OR_TVOS - // We can use the current memory used by this process as a reasonable approximation. return mvkGetUsedMemorySize(); -#endif -#if MVK_MACOS - return 0; -#endif } // When using argument buffers, Metal imposes a hard limit on the number of MTLSamplerState @@ -3096,6 +3142,13 @@ uint32_t MVKPhysicalDevice::getMaxSamplerCount() { } } +// Vulkan imposes a minimum maximum of 1024 descriptors per set. +uint32_t MVKPhysicalDevice::getMaxPerSetDescriptorCount() { + return max(4 * (_metalFeatures.maxPerStageBufferCount + + _metalFeatures.maxPerStageTextureCount + + _metalFeatures.maxPerStageSamplerCount), 1024u); +} + void MVKPhysicalDevice::initExternalMemoryProperties() { // Common @@ -3149,6 +3202,9 @@ void MVKPhysicalDevice::initExtensions() { pWritableExtns->vk_KHR_fragment_shader_barycentric.enabled = false; pWritableExtns->vk_NV_fragment_shader_barycentric.enabled = false; } + if (!_metalFeatures.arrayOfTextures || !_metalFeatures.arrayOfSamplers) { + pWritableExtns->vk_EXT_descriptor_indexing.enabled = false; + } // The relevant functions are not available if not built with Xcode 14. #if MVK_XCODE_14 @@ -3249,31 +3305,14 @@ bool MVKPhysicalDevice::needsCounterSetRetained() { } void MVKPhysicalDevice::logGPUInfo() { - string devTypeStr; - switch (_properties.deviceType) { - case VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU: - devTypeStr = "Discrete"; - break; - case VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU: - devTypeStr = "Integrated"; - break; - case VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU: - devTypeStr = "Virtual"; - break; - case VK_PHYSICAL_DEVICE_TYPE_CPU: - devTypeStr = "CPU Emulation"; - break; - default: - devTypeStr = "Unknown"; - break; - } - string logMsg = "GPU device:"; logMsg += "\n\t\tmodel: %s"; logMsg += "\n\t\ttype: %s"; logMsg += "\n\t\tvendorID: %#06x"; logMsg += "\n\t\tdeviceID: %#06x"; logMsg += "\n\t\tpipelineCacheUUID: %s"; + logMsg += "\n\t\tGPU memory available: %llu MB"; + logMsg += "\n\t\tGPU memory used: %llu MB"; logMsg += "\n\tsupports the following Metal Versions, GPU's and Feature Sets:"; logMsg += "\n\t\tMetal Shading Language %s"; @@ -3356,9 +3395,29 @@ void MVKPhysicalDevice::logGPUInfo() { } #endif + string devTypeStr; + switch (_properties.deviceType) { + case VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU: + devTypeStr = "Discrete"; + break; + case VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU: + devTypeStr = "Integrated"; + break; + case VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU: + devTypeStr = "Virtual"; + break; + case VK_PHYSICAL_DEVICE_TYPE_CPU: + devTypeStr = "CPU Emulation"; + break; + default: + devTypeStr = "Unknown"; + break; + } + NSUUID* nsUUID = [[NSUUID alloc] initWithUUIDBytes: _properties.pipelineCacheUUID]; // temp retain - MVKLogInfo(logMsg.c_str(), _properties.deviceName, devTypeStr.c_str(), + MVKLogInfo(logMsg.c_str(), getName(), devTypeStr.c_str(), _properties.vendorID, _properties.deviceID, nsUUID.UUIDString.UTF8String, + getRecommendedMaxWorkingSetSize() / MEBI, getCurrentAllocatedSize() / MEBI, SPIRVToMSLConversionOptions::printMSLVersion(_metalFeatures.mslVersion).c_str()); [nsUUID release]; // temp release } @@ -3366,7 +3425,11 @@ void MVKPhysicalDevice::logGPUInfo() { MVKPhysicalDevice::~MVKPhysicalDevice() { mvkDestroyContainerContents(_queueFamilies); [_timestampMTLCounterSet release]; + + uint64_t memUsed = getCurrentAllocatedSize(); // Retrieve before releasing MTLDevice [_mtlDevice release]; + + MVKLogInfo("Destroyed VkPhysicalDevice for GPU %s with %llu MB of GPU memory still allocated.", getName(), memUsed / MEBI); } @@ -3375,12 +3438,13 @@ MVKPhysicalDevice::~MVKPhysicalDevice() { // Returns core device commands and enabled extension device commands. PFN_vkVoidFunction MVKDevice::getProcAddr(const char* pName) { - MVKEntryPoint* pMVKPA = _physicalDevice->_mvkInstance->getEntryPoint(pName); - uint32_t apiVersion = _physicalDevice->_mvkInstance->_appInfo.apiVersion; + MVKInstance* pMVKInst = _physicalDevice->_mvkInstance; + MVKEntryPoint* pMVKPA = pMVKInst->getEntryPoint(pName); + uint32_t apiVersion = pMVKInst->_appInfo.apiVersion; - bool isSupported = (pMVKPA && // Command exists and... - pMVKPA->isDevice && // ...is a device command and... - pMVKPA->isEnabled(apiVersion, _enabledExtensions)); // ...is a core or enabled extension command. + bool isSupported = (pMVKPA && // Command exists and... + pMVKPA->isDevice && // ...is a device command and... + pMVKPA->isEnabled(apiVersion, _enabledExtensions, &pMVKInst->_enabledExtensions)); // ...is a core or enabled extension command. return isSupported ? pMVKPA->functionPointer : nullptr; } @@ -3442,7 +3506,7 @@ void MVKDevice::getDescriptorSetLayoutSupport(const VkDescriptorSetLayoutCreateI for (uint32_t i = 0; i < pCreateInfo->bindingCount; i++) { descriptorCount += pCreateInfo->pBindings[i].descriptorCount; } - pSupport->supported = (descriptorCount < ((_physicalDevice->_metalFeatures.maxPerStageBufferCount + _physicalDevice->_metalFeatures.maxPerStageTextureCount + _physicalDevice->_metalFeatures.maxPerStageSamplerCount) * 2)); + pSupport->supported = (descriptorCount < _physicalDevice->getMaxPerSetDescriptorCount()); // Check whether the layout has a variable-count descriptor, and if so, whether we can support it. for (auto* next = (VkBaseOutStructure*)pSupport->pNext; next; next = next->pNext) { @@ -3601,14 +3665,14 @@ void MVKDevice::getCalibratedTimestamps(uint32_t timestampCount, MTLTimestamp cpuStamp, gpuStamp; uint64_t cpuStart, cpuEnd; - cpuStart = mvkGetAbsoluteTime(); + cpuStart = mvkGetContinuousNanoseconds(); [getMTLDevice() sampleTimestamps: &cpuStamp gpuTimestamp: &gpuStamp]; // Sample again to calculate the maximum deviation. Note that the // -[MTLDevice sampleTimestamps:gpuTimestamp:] method guarantees that CPU // timestamps are in nanoseconds. We don't want to call the method again, // because that could result in an expensive syscall to query the GPU time- // stamp. - cpuEnd = mvkGetAbsoluteTime(); + cpuEnd = mvkGetContinuousNanoseconds(); for (uint32_t tsIdx = 0; tsIdx < timestampCount; ++tsIdx) { switch (pTimestampInfos[tsIdx].timeDomain) { case VK_TIME_DOMAIN_DEVICE_EXT: @@ -4172,43 +4236,63 @@ void MVKDevice::removeTimelineSemaphore(MVKTimelineSemaphore* sem4, uint64_t val mvkRemoveFirstOccurance(_awaitingTimelineSem4s, make_pair(sem4, value)); } -void MVKDevice::applyMemoryBarrier(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier, +void MVKDevice::applyMemoryBarrier(MVKPipelineBarrier& barrier, MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse) { - if (!mvkIsAnyFlagEnabled(dstStageMask, VK_PIPELINE_STAGE_HOST_BIT) || + if (!mvkIsAnyFlagEnabled(barrier.dstStageMask, VK_PIPELINE_STAGE_HOST_BIT) || !mvkIsAnyFlagEnabled(barrier.dstAccessMask, VK_ACCESS_HOST_READ_BIT) ) { return; } lock_guard lock(_rezLock); for (auto& rez : _resources) { - rez->applyMemoryBarrier(srcStageMask, dstStageMask, barrier, cmdEncoder, cmdUse); + rez->applyMemoryBarrier(barrier, cmdEncoder, cmdUse); } } -void MVKDevice::updateActivityPerformance(MVKPerformanceTracker& activity, - uint64_t startTime, uint64_t endTime) { - - double currInterval = mvkGetElapsedMilliseconds(startTime, endTime); +void MVKDevice::updateActivityPerformance(MVKPerformanceTracker& activity, double currentValue) { lock_guard lock(_perfLock); - activity.latestDuration = currInterval; - activity.minimumDuration = ((activity.minimumDuration == 0.0) - ? currInterval : - min(currInterval, activity.minimumDuration)); - activity.maximumDuration = max(currInterval, activity.maximumDuration); - double totalInterval = (activity.averageDuration * activity.count++) + currInterval; - activity.averageDuration = totalInterval / activity.count; + activity.latest = currentValue; + activity.minimum = ((activity.minimum == 0.0) + ? currentValue : + min(currentValue, activity.minimum)); + activity.maximum = max(currentValue, activity.maximum); + double total = (activity.average * activity.count++) + currentValue; + activity.average = total / activity.count; + + if (_isPerformanceTracking && getMVKConfig().activityPerformanceLoggingStyle == MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_IMMEDIATE) { + logActivityInline(activity, _performanceStatistics); + } } -void MVKDevice::logActivityPerformance(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats, bool isInline) { - MVKLogInfo("%s%s%s avg: %.3f ms, latest: %.3f ms, min: %.3f ms, max: %.3f ms, count: %d", - (isInline ? "" : " "), +void MVKDevice::logActivityInline(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats) { + if (getActivityPerformanceValueType(activity, _performanceStatistics) == MVKActivityPerformanceValueTypeByteCount) { + logActivityByteCount(activity, _performanceStatistics, true); + } else { + logActivityDuration(activity, _performanceStatistics, true); + } +} +void MVKDevice::logActivityDuration(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats, bool isInline) { + const char* fmt = (isInline + ? "%s performance avg: %.3f ms, latest: %.3f ms, min: %.3f ms, max: %.3f ms, count: %d" + : " %-45s avg: %.3f ms, latest: %.3f ms, min: %.3f ms, max: %.3f ms, count: %d"); + MVKLogInfo(fmt, getActivityPerformanceDescription(activity, perfStats), - (isInline ? " performance" : ""), - activity.averageDuration, - activity.latestDuration, - activity.minimumDuration, - activity.maximumDuration, + activity.average, + activity.latest, + activity.minimum, + activity.maximum, + activity.count); +} + +void MVKDevice::logActivityByteCount(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats, bool isInline) { + const char* fmt = (isInline + ? "%s avg: %5llu MB, latest: %5llu MB, min: %5llu MB, max: %5llu MB, count: %d" + : " %-45s avg: %5llu MB, latest: %5llu MB, min: %5llu MB, max: %5llu MB, count: %d"); + MVKLogInfo(fmt, + getActivityPerformanceDescription(activity, perfStats), + uint64_t(activity.average) / KIBI, + uint64_t(activity.latest) / KIBI, + uint64_t(activity.minimum) / KIBI, + uint64_t(activity.maximum) / KIBI, activity.count); } @@ -4218,49 +4302,71 @@ void MVKDevice::logPerformanceSummary() { MVKPerformanceStatistics perfStats; getPerformanceStatistics(&perfStats); - logActivityPerformance(perfStats.queue.frameInterval, perfStats); - logActivityPerformance(perfStats.queue.nextCAMetalDrawable, perfStats); - logActivityPerformance(perfStats.queue.mtlCommandBufferCompletion, perfStats); - logActivityPerformance(perfStats.queue.mtlQueueAccess, perfStats); - logActivityPerformance(perfStats.shaderCompilation.hashShaderCode, perfStats); - logActivityPerformance(perfStats.shaderCompilation.spirvToMSL, perfStats); - logActivityPerformance(perfStats.shaderCompilation.mslCompile, perfStats); - logActivityPerformance(perfStats.shaderCompilation.mslLoad, perfStats); - logActivityPerformance(perfStats.shaderCompilation.mslCompress, perfStats); - logActivityPerformance(perfStats.shaderCompilation.mslDecompress, perfStats); - logActivityPerformance(perfStats.shaderCompilation.shaderLibraryFromCache, perfStats); - logActivityPerformance(perfStats.shaderCompilation.functionRetrieval, perfStats); - logActivityPerformance(perfStats.shaderCompilation.functionSpecialization, perfStats); - logActivityPerformance(perfStats.shaderCompilation.pipelineCompile, perfStats); - logActivityPerformance(perfStats.pipelineCache.sizePipelineCache, perfStats); - logActivityPerformance(perfStats.pipelineCache.readPipelineCache, perfStats); - logActivityPerformance(perfStats.pipelineCache.writePipelineCache, perfStats); +#define logDuration(s) logActivityDuration(perfStats.s, perfStats) +#define logByteCount(s) logActivityByteCount(perfStats.s, perfStats) + + logDuration(queue.frameInterval); + logDuration(queue.retrieveMTLCommandBuffer); + logDuration(queue.commandBufferEncoding); + logDuration(queue.submitCommandBuffers); + logDuration(queue.mtlCommandBufferExecution); + logDuration(queue.retrieveCAMetalDrawable); + logDuration(queue.presentSwapchains); + logDuration(shaderCompilation.hashShaderCode); + logDuration(shaderCompilation.spirvToMSL); + logDuration(shaderCompilation.mslCompile); + logDuration(shaderCompilation.mslLoad); + logDuration(shaderCompilation.mslCompress); + logDuration(shaderCompilation.mslDecompress); + logDuration(shaderCompilation.shaderLibraryFromCache); + logDuration(shaderCompilation.functionRetrieval); + logDuration(shaderCompilation.functionSpecialization); + logDuration(shaderCompilation.pipelineCompile); + logDuration(pipelineCache.sizePipelineCache); + logDuration(pipelineCache.readPipelineCache); + logDuration(pipelineCache.writePipelineCache); + logByteCount(device.gpuMemoryAllocated); +#undef logDuration +#undef logByteCount } const char* MVKDevice::getActivityPerformanceDescription(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats) { - if (&activity == &perfStats.shaderCompilation.hashShaderCode) { return "Hash shader SPIR-V code"; } - if (&activity == &perfStats.shaderCompilation.spirvToMSL) { return "Convert SPIR-V to MSL source code"; } - if (&activity == &perfStats.shaderCompilation.mslCompile) { return "Compile MSL source code into a MTLLibrary"; } - if (&activity == &perfStats.shaderCompilation.mslLoad) { return "Load pre-compiled MSL code into a MTLLibrary"; } - if (&activity == &perfStats.shaderCompilation.mslCompress) { return "Compress MSL source code after compiling a MTLLibrary"; } - if (&activity == &perfStats.shaderCompilation.mslDecompress) { return "Decompress MSL source code during pipeline cache write"; } - if (&activity == &perfStats.shaderCompilation.shaderLibraryFromCache) { return "Retrieve shader library from the cache"; } - if (&activity == &perfStats.shaderCompilation.functionRetrieval) { return "Retrieve a MTLFunction from a MTLLibrary"; } - if (&activity == &perfStats.shaderCompilation.functionSpecialization) { return "Specialize a retrieved MTLFunction"; } - if (&activity == &perfStats.shaderCompilation.pipelineCompile) { return "Compile MTLFunctions into a pipeline"; } - if (&activity == &perfStats.pipelineCache.sizePipelineCache) { return "Calculate cache size required to write MSL to pipeline cache"; } - if (&activity == &perfStats.pipelineCache.readPipelineCache) { return "Read MSL from pipeline cache"; } - if (&activity == &perfStats.pipelineCache.writePipelineCache) { return "Write MSL to pipeline cache"; } - if (&activity == &perfStats.queue.mtlQueueAccess) { return "Access MTLCommandQueue"; } - if (&activity == &perfStats.queue.mtlCommandBufferCompletion) { return "Complete MTLCommandBuffer"; } - if (&activity == &perfStats.queue.nextCAMetalDrawable) { return "Retrieve a CAMetalDrawable from CAMetalLayer"; } - if (&activity == &perfStats.queue.frameInterval) { return "Frame interval"; } - return "Unknown performance activity"; +#define ifActivityReturnName(s, n) if (&activity == &perfStats.s) return n + ifActivityReturnName(shaderCompilation.hashShaderCode, "Hash shader SPIR-V code"); + ifActivityReturnName(shaderCompilation.spirvToMSL, "Convert SPIR-V to MSL source code"); + ifActivityReturnName(shaderCompilation.mslCompile, "Compile MSL into a MTLLibrary"); + ifActivityReturnName(shaderCompilation.mslLoad, "Load pre-compiled MSL into a MTLLibrary"); + ifActivityReturnName(shaderCompilation.mslCompress, "Compress MSL after compiling a MTLLibrary"); + ifActivityReturnName(shaderCompilation.mslDecompress, "Decompress MSL for pipeline cache write"); + ifActivityReturnName(shaderCompilation.shaderLibraryFromCache, "Retrieve shader library from the cache"); + ifActivityReturnName(shaderCompilation.functionRetrieval, "Retrieve a MTLFunction from a MTLLibrary"); + ifActivityReturnName(shaderCompilation.functionSpecialization, "Specialize a retrieved MTLFunction"); + ifActivityReturnName(shaderCompilation.pipelineCompile, "Compile MTLFunctions into a pipeline"); + ifActivityReturnName(pipelineCache.sizePipelineCache, "Calculate pipeline cache size"); + ifActivityReturnName(pipelineCache.readPipelineCache, "Read MSL from pipeline cache"); + ifActivityReturnName(pipelineCache.writePipelineCache, "Write MSL to pipeline cache"); + ifActivityReturnName(queue.retrieveMTLCommandBuffer, "Retrieve a MTLCommandBuffer"); + ifActivityReturnName(queue.commandBufferEncoding, "Encode VkCommandBuffer to MTLCommandBuffer"); + ifActivityReturnName(queue.submitCommandBuffers, "vkQueueSubmit() encoding to MTLCommandBuffers"); + ifActivityReturnName(queue.mtlCommandBufferExecution, "Execute a MTLCommandBuffer on GPU"); + ifActivityReturnName(queue.retrieveCAMetalDrawable, "Retrieve a CAMetalDrawable"); + ifActivityReturnName(queue.presentSwapchains, "Present swapchains in on GPU"); + ifActivityReturnName(queue.frameInterval, "Frame interval"); + ifActivityReturnName(device.gpuMemoryAllocated, "GPU memory allocated"); + return "Unknown performance activity"; +#undef ifActivityReturnName +} + +MVKActivityPerformanceValueType MVKDevice::getActivityPerformanceValueType(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats) { + if (&activity == &perfStats.device.gpuMemoryAllocated) return MVKActivityPerformanceValueTypeByteCount; + return MVKActivityPerformanceValueTypeDuration; } void MVKDevice::getPerformanceStatistics(MVKPerformanceStatistics* pPerf) { - lock_guard lock(_perfLock); + addPerformanceByteCount(_performanceStatistics.device.gpuMemoryAllocated, + _physicalDevice->getCurrentAllocatedSize()); + lock_guard lock(_perfLock); if (pPerf) { *pPerf = _performanceStatistics; } } @@ -4597,33 +4703,15 @@ MVKDevice::MVKDevice(MVKPhysicalDevice* physicalDevice, const VkDeviceCreateInfo startAutoGPUCapture(MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_DEVICE, getMTLDevice()); MVKLogInfo("Created VkDevice to run on GPU %s with the following %d Vulkan extensions enabled:%s", - _pProperties->deviceName, - _enabledExtensions.getEnabledCount(), - _enabledExtensions.enabledNamesString("\n\t\t", true).c_str()); + getName(), _enabledExtensions.getEnabledCount(), _enabledExtensions.enabledNamesString("\n\t\t", true).c_str()); } +// Perf stats that last the duration of the app process. +static MVKPerformanceStatistics _processPerformanceStatistics = {}; + void MVKDevice::initPerformanceTracking() { - _isPerformanceTracking = getMVKConfig().performanceTracking; - _activityPerformanceLoggingStyle = getMVKConfig().activityPerformanceLoggingStyle; - - _performanceStatistics.shaderCompilation.hashShaderCode = {}; - _performanceStatistics.shaderCompilation.spirvToMSL = {}; - _performanceStatistics.shaderCompilation.mslCompile = {}; - _performanceStatistics.shaderCompilation.mslLoad = {}; - _performanceStatistics.shaderCompilation.mslCompress = {}; - _performanceStatistics.shaderCompilation.mslDecompress = {}; - _performanceStatistics.shaderCompilation.shaderLibraryFromCache = {}; - _performanceStatistics.shaderCompilation.functionRetrieval = {}; - _performanceStatistics.shaderCompilation.functionSpecialization = {}; - _performanceStatistics.shaderCompilation.pipelineCompile = {}; - _performanceStatistics.pipelineCache.sizePipelineCache = {}; - _performanceStatistics.pipelineCache.writePipelineCache = {}; - _performanceStatistics.pipelineCache.readPipelineCache = {}; - _performanceStatistics.queue.mtlQueueAccess = {}; - _performanceStatistics.queue.mtlCommandBufferCompletion = {}; - _performanceStatistics.queue.nextCAMetalDrawable = {}; - _performanceStatistics.queue.frameInterval = {}; + _performanceStatistics = _processPerformanceStatistics; } void MVKDevice::initPhysicalDevice(MVKPhysicalDevice* physicalDevice, const VkDeviceCreateInfo* pCreateInfo) { @@ -4920,9 +5008,16 @@ void MVKDevice::reservePrivateData(const VkDeviceCreateInfo* pCreateInfo) { } MVKDevice::~MVKDevice() { - if (_activityPerformanceLoggingStyle == MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_DEVICE_LIFETIME) { - MVKLogInfo("Device activity performance summary:"); - logPerformanceSummary(); + if (_isPerformanceTracking) { + auto perfLogStyle = getMVKConfig().activityPerformanceLoggingStyle; + if (perfLogStyle == MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_DEVICE_LIFETIME) { + MVKLogInfo("Device activity performance summary:"); + logPerformanceSummary(); + } else if (perfLogStyle == MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_DEVICE_LIFETIME_ACCUMULATE) { + MVKLogInfo("Process activity performance summary:"); + logPerformanceSummary(); + _processPerformanceStatistics = _performanceStatistics; + } } for (auto& queues : _queuesByQueueFamilyIndex) { @@ -4938,12 +5033,58 @@ MVKDevice::~MVKDevice() { stopAutoGPUCapture(MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_DEVICE); mvkDestroyContainerContents(_privateDataSlots); + + MVKLogInfo("Destroyed VkDevice on GPU %s with %d Vulkan extensions enabled.", + getName(), _enabledExtensions.getEnabledCount()); } #pragma mark - #pragma mark Support functions +NSArray>* mvkGetAvailableMTLDevicesArray(MVKInstance* instance) { + NSMutableArray* mtlDevs = [NSMutableArray array]; // autoreleased + +#if MVK_MACOS + NSArray* rawMTLDevs = [MTLCopyAllDevices() autorelease]; + bool forceLowPower = mvkGetMVKConfig(instance).forceLowPowerGPU; + + // Populate the array of appropriate MTLDevices + for (id md in rawMTLDevs) { + if ( !forceLowPower || md.isLowPower ) { [mtlDevs addObject: md]; } + } + + // Sort by power + [mtlDevs sortUsingComparator: ^(id md1, id md2) { + BOOL md1IsLP = md1.isLowPower; + BOOL md2IsLP = md2.isLowPower; + + if (md1IsLP == md2IsLP) { + // If one device is headless and the other one is not, select the + // one that is not headless first. + BOOL md1IsHeadless = md1.isHeadless; + BOOL md2IsHeadless = md2.isHeadless; + if (md1IsHeadless == md2IsHeadless ) { + return NSOrderedSame; + } + return md2IsHeadless ? NSOrderedAscending : NSOrderedDescending; + } + + return md2IsLP ? NSOrderedAscending : NSOrderedDescending; + }]; + + // If the survey found at least one device, return the array. + if (mtlDevs.count) { return mtlDevs; } + +#endif // MVK_MACOS + + // For other OS's, or for macOS if the survey returned empty, use the default device. + id md = [MTLCreateSystemDefaultDevice() autorelease]; + if (md) { [mtlDevs addObject: md]; } + + return mtlDevs; // retained +} + uint64_t mvkGetRegistryID(id mtlDevice) { return [mtlDevice respondsToSelector: @selector(registryID)] ? mtlDevice.registryID : 0; } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceFeatureStructs.def b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceFeatureStructs.def index c0bbb481..d3856e8c 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceFeatureStructs.def +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceFeatureStructs.def @@ -55,6 +55,7 @@ MVK_DEVICE_FEATURE(ShaderAtomicInt64, SHADER_ATOMIC_INT64, MVK_DEVICE_FEATURE(ShaderFloat16Int8, SHADER_FLOAT16_INT8, 2) MVK_DEVICE_FEATURE(ShaderSubgroupExtendedTypes, SHADER_SUBGROUP_EXTENDED_TYPES, 1) MVK_DEVICE_FEATURE(SubgroupSizeControl, SUBGROUP_SIZE_CONTROL, 2) +MVK_DEVICE_FEATURE(Synchronization2, SYNCHRONIZATION_2, 1) MVK_DEVICE_FEATURE(TextureCompressionASTCHDR, TEXTURE_COMPRESSION_ASTC_HDR, 1) MVK_DEVICE_FEATURE(TimelineSemaphore, TIMELINE_SEMAPHORE, 1) MVK_DEVICE_FEATURE(UniformBufferStandardLayout, UNIFORM_BUFFER_STANDARD_LAYOUT, 1) @@ -63,6 +64,9 @@ MVK_DEVICE_FEATURE(VulkanMemoryModel, VULKAN_MEMORY_MODEL, MVK_DEVICE_FEATURE_EXTN(FragmentShaderBarycentric, FRAGMENT_SHADER_BARYCENTRIC, KHR, 1) MVK_DEVICE_FEATURE_EXTN(PortabilitySubset, PORTABILITY_SUBSET, KHR, 15) MVK_DEVICE_FEATURE_EXTN(4444Formats, 4444_FORMATS, EXT, 2) +MVK_DEVICE_FEATURE_EXTN(ExtendedDynamicState, EXTENDED_DYNAMIC_STATE, EXT, 1) +MVK_DEVICE_FEATURE_EXTN(ExtendedDynamicState2, EXTENDED_DYNAMIC_STATE_2, EXT, 3) +MVK_DEVICE_FEATURE_EXTN(ExtendedDynamicState3, EXTENDED_DYNAMIC_STATE_3, EXT, 31) MVK_DEVICE_FEATURE_EXTN(FragmentShaderInterlock, FRAGMENT_SHADER_INTERLOCK, EXT, 3) MVK_DEVICE_FEATURE_EXTN(PipelineCreationCacheControl, PIPELINE_CREATION_CACHE_CONTROL, EXT, 1) MVK_DEVICE_FEATURE_EXTN(Robustness2, ROBUSTNESS_2, EXT, 3) diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.h b/MoltenVK/MoltenVK/GPUObjects/MVKImage.h index 572e8f06..058876d6 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.h @@ -31,6 +31,7 @@ class MVKImage; class MVKImageView; class MVKSwapchain; +class MVKQueue; class MVKCommandEncoder; @@ -73,9 +74,7 @@ protected: bool overlaps(VkSubresourceLayout& imgLayout, VkDeviceSize offset, VkDeviceSize size); void propagateDebugName(); MVKImageMemoryBinding* getMemoryBinding() const; - void applyImageMemoryBarrier(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier, + void applyImageMemoryBarrier(MVKPipelineBarrier& barrier, MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse); void pullFromDeviceOnCompletion(MVKCommandEncoder* cmdEncoder, @@ -118,9 +117,7 @@ public: VkResult bindDeviceMemory(MVKDeviceMemory* mvkMem, VkDeviceSize memOffset) override; /** Applies the specified global memory barrier. */ - void applyMemoryBarrier(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier, + void applyMemoryBarrier(MVKPipelineBarrier& barrier, MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse) override; @@ -132,9 +129,7 @@ protected: friend MVKImage; void propagateDebugName() override; - bool needsHostReadSync(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier); + bool needsHostReadSync(MVKPipelineBarrier& barrier); bool shouldFlushHostMemory(); VkResult flushToDevice(VkDeviceSize offset, VkDeviceSize size); VkResult pullFromDevice(VkDeviceSize offset, VkDeviceSize size); @@ -250,9 +245,7 @@ public: virtual VkResult bindDeviceMemory2(const VkBindImageMemoryInfo* pBindInfo); /** Applies the specified image memory barrier. */ - void applyImageMemoryBarrier(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier, + void applyImageMemoryBarrier(MVKPipelineBarrier& barrier, MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse); @@ -385,14 +378,8 @@ class MVKSwapchainImage : public MVKImage { public: - /** Binds this resource to the specified offset within the specified memory allocation. */ VkResult bindDeviceMemory(MVKDeviceMemory* mvkMem, VkDeviceSize memOffset, uint8_t planeIndex) override; -#pragma mark Metal - - /** Returns the Metal texture used by the CAMetalDrawable underlying this image. */ - id getMTLTexture(uint8_t planeIndex) override; - #pragma mark Construction @@ -406,11 +393,10 @@ public: protected: friend class MVKPeerSwapchainImage; - virtual id getCAMetalDrawable() = 0; void detachSwapchain(); + std::mutex _detachmentLock; MVKSwapchain* _swapchain; - std::mutex _swapchainLock; uint32_t _swapchainIndex; }; @@ -429,6 +415,7 @@ typedef struct MVKSwapchainImageAvailability { /** Presentation info. */ typedef struct { MVKPresentableSwapchainImage* presentableImage; + MVKQueue* queue; // The queue on which the vkQueuePresentKHR() command was executed. MVKFence* fence; // VK_EXT_swapchain_maintenance1 fence signaled when resources can be destroyed uint64_t desiredPresentTime; // VK_GOOGLE_display_timing desired presentation time in nanoseconds uint32_t presentID; // VK_GOOGLE_display_timing presentID @@ -451,35 +438,46 @@ public: #pragma mark Metal - /** Presents the contained drawable to the OS. */ - void presentCAMetalDrawable(id mtlCmdBuff, MVKImagePresentInfo presentInfo); + id getMTLTexture(uint8_t planeIndex) override; + /** Presents the contained drawable to the OS. */ + VkResult presentCAMetalDrawable(id mtlCmdBuff, MVKImagePresentInfo presentInfo); + + /** Called when the presentation begins. */ + void beginPresentation(const MVKImagePresentInfo& presentInfo); + + /** Called via callback when the presentation completes. */ + void endPresentation(const MVKImagePresentInfo& presentInfo, + const MVKSwapchainSignaler& signaler, + uint64_t actualPresentTime = 0); #pragma mark Construction MVKPresentableSwapchainImage(MVKDevice* device, const VkImageCreateInfo* pCreateInfo, MVKSwapchain* swapchain, uint32_t swapchainIndex); + void destroy() override; + ~MVKPresentableSwapchainImage() override; protected: friend MVKSwapchain; - id getCAMetalDrawable() override; - void addPresentedHandler(id mtlDrawable, MVKImagePresentInfo presentInfo); + id getCAMetalDrawable(); + void addPresentedHandler(id mtlDrawable, MVKImagePresentInfo presentInfo, MVKSwapchainSignaler signaler); void releaseMetalDrawable(); MVKSwapchainImageAvailability getAvailability(); - void makeAvailable(const MVKSwapchainSignaler& signaler); void makeAvailable(); - void acquireAndSignalWhenAvailable(MVKSemaphore* semaphore, MVKFence* fence); - void renderWatermark(id mtlCmdBuff); + VkResult acquireAndSignalWhenAvailable(MVKSemaphore* semaphore, MVKFence* fence); + MVKSwapchainSignaler getPresentationSignaler(); - id _mtlDrawable; - id _presentingMTLCmdBuff; + id _mtlDrawable = nil; + id _mtlTextureHeadless = nil; MVKSwapchainImageAvailability _availability; MVKSmallVector _availabilitySignalers; - MVKSwapchainSignaler _preSignaler; + MVKSwapchainSignaler _preSignaler = {}; std::mutex _availabilityLock; + uint64_t _presentationStartTime = 0; }; @@ -491,7 +489,8 @@ class MVKPeerSwapchainImage : public MVKSwapchainImage { public: - /** Binds this resource according to the specified bind information. */ + id getMTLTexture(uint8_t planeIndex) override; + VkResult bindDeviceMemory2(const VkBindImageMemoryInfo* pBindInfo) override; @@ -501,10 +500,6 @@ public: const VkImageCreateInfo* pCreateInfo, MVKSwapchain* swapchain, uint32_t swapchainIndex); - -protected: - id getCAMetalDrawable() override; - }; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm index 89b98d91..740dc167 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm @@ -19,13 +19,16 @@ #include "MVKImage.h" #include "MVKQueue.h" #include "MVKSwapchain.h" +#include "MVKSurface.h" #include "MVKCommandBuffer.h" #include "MVKCmdDebug.h" #include "MVKFoundation.h" #include "MVKOSExtensions.h" #include "MVKCodec.h" + #import "MTLTextureDescriptor+MoltenVK.h" #import "MTLSamplerDescriptor+MoltenVK.h" +#import "CAMetalLayer+MoltenVK.h" using namespace std; using namespace SPIRV_CROSS_NAMESPACE; @@ -303,9 +306,7 @@ MVKImageMemoryBinding* MVKImagePlane::getMemoryBinding() const { return (_image->_memoryBindings.size() > 1) ? _image->_memoryBindings[_planeIndex] : _image->_memoryBindings[0]; } -void MVKImagePlane::applyImageMemoryBarrier(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier, +void MVKImagePlane::applyImageMemoryBarrier(MVKPipelineBarrier& barrier, MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse) { @@ -322,7 +323,7 @@ void MVKImagePlane::applyImageMemoryBarrier(VkPipelineStageFlags srcStageMask, : (layerStart + barrier.layerCount)); MVKImageMemoryBinding* memBind = getMemoryBinding(); - bool needsSync = memBind->needsHostReadSync(srcStageMask, dstStageMask, barrier); + bool needsSync = memBind->needsHostReadSync(barrier); bool needsPull = ((!memBind->_mtlTexelBuffer || memBind->_ownsTexelBuffer) && memBind->isMemoryHostCoherent() && barrier.newLayout == VK_IMAGE_LAYOUT_GENERAL && @@ -443,13 +444,11 @@ VkResult MVKImageMemoryBinding::bindDeviceMemory(MVKDeviceMemory* mvkMem, VkDevi return _deviceMemory ? _deviceMemory->addImageMemoryBinding(this) : VK_SUCCESS; } -void MVKImageMemoryBinding::applyMemoryBarrier(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier, +void MVKImageMemoryBinding::applyMemoryBarrier(MVKPipelineBarrier& barrier, MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse) { #if MVK_MACOS - if ( needsHostReadSync(srcStageMask, dstStageMask, barrier) ) { + if (needsHostReadSync(barrier)) { for(uint8_t planeIndex = beginPlaneIndex(); planeIndex < endPlaneIndex(); planeIndex++) { [cmdEncoder->getMTLBlitEncoder(cmdUse) synchronizeResource: _image->_planes[planeIndex]->_mtlTexture]; } @@ -468,9 +467,7 @@ void MVKImageMemoryBinding::propagateDebugName() { // Returns whether the specified image memory barrier requires a sync between this // texture and host memory for the purpose of the host reading texture memory. -bool MVKImageMemoryBinding::needsHostReadSync(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier) { +bool MVKImageMemoryBinding::needsHostReadSync(MVKPipelineBarrier& barrier) { #if MVK_MACOS return ((barrier.newLayout == VK_IMAGE_LAYOUT_GENERAL) && mvkIsAnyFlagEnabled(barrier.dstAccessMask, (VK_ACCESS_HOST_READ_BIT | VK_ACCESS_MEMORY_READ_BIT)) && @@ -624,15 +621,13 @@ bool MVKImage::getIsValidViewFormat(VkFormat viewFormat) { #pragma mark Resource memory -void MVKImage::applyImageMemoryBarrier(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier, +void MVKImage::applyImageMemoryBarrier(MVKPipelineBarrier& barrier, MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse) { for (uint8_t planeIndex = 0; planeIndex < _planes.size(); planeIndex++) { if ( !_hasChromaSubsampling || mvkIsAnyFlagEnabled(barrier.aspectMask, (VK_IMAGE_ASPECT_PLANE_0_BIT << planeIndex)) ) { - _planes[planeIndex]->applyImageMemoryBarrier(srcStageMask, dstStageMask, barrier, cmdEncoder, cmdUse); + _planes[planeIndex]->applyImageMemoryBarrier(barrier, cmdEncoder, cmdUse); } } } @@ -1149,6 +1144,7 @@ bool MVKImage::validateLinear(const VkImageCreateInfo* pCreateInfo, bool isAttac } void MVKImage::initExternalMemory(VkExternalMemoryHandleTypeFlags handleTypes) { + if ( !handleTypes ) { return; } if (mvkIsOnlyAnyFlagEnabled(handleTypes, VK_EXTERNAL_MEMORY_HANDLE_TYPE_MTLTEXTURE_BIT_KHR)) { auto& xmProps = getPhysicalDevice()->getExternalImageProperties(VK_EXTERNAL_MEMORY_HANDLE_TYPE_MTLTEXTURE_BIT_KHR); for(auto& memoryBinding : _memoryBindings) { @@ -1175,12 +1171,6 @@ VkResult MVKSwapchainImage::bindDeviceMemory(MVKDeviceMemory* mvkMem, VkDeviceSi } -#pragma mark Metal - -// Overridden to always retrieve the MTLTexture directly from the CAMetalDrawable. -id MVKSwapchainImage::getMTLTexture(uint8_t planeIndex) { return [getCAMetalDrawable() texture]; } - - #pragma mark Construction MVKSwapchainImage::MVKSwapchainImage(MVKDevice* device, @@ -1192,8 +1182,9 @@ MVKSwapchainImage::MVKSwapchainImage(MVKDevice* device, } void MVKSwapchainImage::detachSwapchain() { - lock_guard lock(_swapchainLock); + lock_guard lock(_detachmentLock); _swapchain = nullptr; + _device = nullptr; } void MVKSwapchainImage::destroy() { @@ -1217,58 +1208,57 @@ MVKSwapchainImageAvailability MVKPresentableSwapchainImage::getAvailability() { return _availability; } -// If present, signal the semaphore for the first waiter for the given image. -static void signalPresentationSemaphore(const MVKSwapchainSignaler& signaler, id mtlCmdBuff) { - if (signaler.semaphore) { signaler.semaphore->encodeDeferredSignal(mtlCmdBuff, signaler.semaphoreSignalToken); } -} - -// Signal either or both of the semaphore and fence in the specified tracker pair. -static void signal(const MVKSwapchainSignaler& signaler, id mtlCmdBuff) { - if (signaler.semaphore) { signaler.semaphore->encodeDeferredSignal(mtlCmdBuff, signaler.semaphoreSignalToken); } - if (signaler.fence) { signaler.fence->signal(); } -} - // Tell the semaphore and fence that they are being tracked for future signaling. -static void markAsTracked(const MVKSwapchainSignaler& signaler) { +static void track(const MVKSwapchainSignaler& signaler) { if (signaler.semaphore) { signaler.semaphore->retain(); } if (signaler.fence) { signaler.fence->retain(); } } -// Tell the semaphore and fence that they are no longer being tracked for future signaling. -static void unmarkAsTracked(const MVKSwapchainSignaler& signaler) { +static void signal(MVKSemaphore* semaphore, uint64_t semaphoreSignalToken, id mtlCmdBuff) { + if (semaphore) { semaphore->encodeDeferredSignal(mtlCmdBuff, semaphoreSignalToken); } +} + +static void signal(MVKFence* fence) { + if (fence) { fence->signal(); } +} + +// Signal the semaphore and fence and tell them that they are no longer being tracked for future signaling. +static void signalAndUntrack(const MVKSwapchainSignaler& signaler) { + signal(signaler.semaphore, signaler.semaphoreSignalToken, nil); if (signaler.semaphore) { signaler.semaphore->release(); } + + signal(signaler.fence); if (signaler.fence) { signaler.fence->release(); } } -static void signalAndUnmarkAsTracked(const MVKSwapchainSignaler& signaler) { - signal(signaler, nil); - unmarkAsTracked(signaler); -} +VkResult MVKPresentableSwapchainImage::acquireAndSignalWhenAvailable(MVKSemaphore* semaphore, MVKFence* fence) { + + // Now that this image is being acquired, release the existing drawable and its texture. + // This is not done earlier so the texture is retained for any post-processing such as screen captures, etc. + releaseMetalDrawable(); -void MVKPresentableSwapchainImage::acquireAndSignalWhenAvailable(MVKSemaphore* semaphore, MVKFence* fence) { lock_guard lock(_availabilityLock); // Upon acquisition, update acquisition ID immediately, to move it to the back of the chain, // so other images will be preferred if either all images are available or no images are available. _availability.acquisitionID = _swapchain->getNextAcquisitionID(); - // Now that this image is being acquired, release the existing drawable and its texture. - // This is not done earlier so the texture is retained for any post-processing such as screen captures, etc. - releaseMetalDrawable(); - auto signaler = MVKSwapchainSignaler{fence, semaphore, semaphore ? semaphore->deferSignal() : 0}; if (_availability.isAvailable) { _availability.isAvailable = false; - // If signalling through a MTLEvent, and there's no command buffer presenting me, use an ephemeral MTLCommandBuffer. + // If signalling through a MTLEvent, signal through an ephemeral MTLCommandBuffer. // Another option would be to use MTLSharedEvent in MVKSemaphore, but that might // impose unacceptable performance costs to handle this particular case. @autoreleasepool { MVKSemaphore* mvkSem = signaler.semaphore; - id mtlCmdBuff = (mvkSem && mvkSem->isUsingCommandEncoding() - ? _device->getAnyQueue()->getMTLCommandBuffer(kMVKCommandUseAcquireNextImage) - : nil); - signal(signaler, mtlCmdBuff); + id mtlCmdBuff = nil; + if (mvkSem && mvkSem->isUsingCommandEncoding()) { + mtlCmdBuff = _device->getAnyQueue()->getMTLCommandBuffer(kMVKCommandUseAcquireNextImage); + if ( !mtlCmdBuff ) { setConfigurationResult(VK_ERROR_OUT_OF_POOL_MEMORY); } + } + signal(signaler.semaphore, signaler.semaphoreSignalToken, mtlCmdBuff); + signal(signaler.fence); [mtlCmdBuff commit]; } @@ -1276,45 +1266,65 @@ void MVKPresentableSwapchainImage::acquireAndSignalWhenAvailable(MVKSemaphore* s } else { _availabilitySignalers.push_back(signaler); } - markAsTracked(signaler); + track(signaler); + + return getConfigurationResult(); } +// Calling nextDrawable may result in a nil drawable, or a drawable with no pixel format. +// Attempt several times to retrieve a good drawable, and set an error to trigger the +// swapchain to be re-established if one cannot be retrieved. id MVKPresentableSwapchainImage::getCAMetalDrawable() { - while ( !_mtlDrawable ) { - @autoreleasepool { // Reclaim auto-released drawable object before end of loop - uint64_t startTime = _device->getPerformanceTimestamp(); - _mtlDrawable = [_swapchain->_mtlLayer.nextDrawable retain]; - if ( !_mtlDrawable ) { MVKLogError("CAMetalDrawable could not be acquired."); } + if (_mtlTextureHeadless) { return nil; } // If headless, there is no drawable. - _device->addActivityPerformance(_device->_performanceStatistics.queue.nextCAMetalDrawable, startTime); + if ( !_mtlDrawable ) { + @autoreleasepool { + bool hasInvalidFormat = false; + uint32_t attemptCnt = _swapchain->getImageCount(); // Attempt a resonable number of times + for (uint32_t attemptIdx = 0; !_mtlDrawable && attemptIdx < attemptCnt; attemptIdx++) { + uint64_t startTime = _device->getPerformanceTimestamp(); + _mtlDrawable = [_swapchain->getCAMetalLayer().nextDrawable retain]; // retained + _device->addPerformanceInterval(_device->_performanceStatistics.queue.retrieveCAMetalDrawable, startTime); + hasInvalidFormat = _mtlDrawable && !_mtlDrawable.texture.pixelFormat; + if (hasInvalidFormat) { releaseMetalDrawable(); } + } + if (hasInvalidFormat) { + setConfigurationResult(reportError(VK_ERROR_OUT_OF_DATE_KHR, "CAMetalDrawable with valid format could not be acquired after %d attempts.", attemptCnt)); + } else if ( !_mtlDrawable ) { + setConfigurationResult(reportError(VK_ERROR_OUT_OF_POOL_MEMORY, "CAMetalDrawable could not be acquired after %d attempts.", attemptCnt)); + } } } return _mtlDrawable; } +// If not headless, retrieve the MTLTexture directly from the CAMetalDrawable. +id MVKPresentableSwapchainImage::getMTLTexture(uint8_t planeIndex) { + return _mtlTextureHeadless ? _mtlTextureHeadless : getCAMetalDrawable().texture; +} + // Present the drawable and make myself available only once the command buffer has completed. // Pass MVKImagePresentInfo by value because it may not exist when the callback runs. -void MVKPresentableSwapchainImage::presentCAMetalDrawable(id mtlCmdBuff, - MVKImagePresentInfo presentInfo) { - lock_guard lock(_availabilityLock); - - _swapchain->willPresentSurface(getMTLTexture(0), mtlCmdBuff); +VkResult MVKPresentableSwapchainImage::presentCAMetalDrawable(id mtlCmdBuff, + MVKImagePresentInfo presentInfo) { + _swapchain->renderWatermark(getMTLTexture(0), mtlCmdBuff); // According to Apple, it is more performant to call MTLDrawable present from within a // MTLCommandBuffer scheduled-handler than it is to call MTLCommandBuffer presentDrawable:. // But get current drawable now, intead of in handler, because a new drawable might be acquired by then. // Attach present handler before presenting to avoid race condition. id mtlDrwbl = getCAMetalDrawable(); + MVKSwapchainSignaler signaler = getPresentationSignaler(); [mtlCmdBuff addScheduledHandler: ^(id mcb) { + + addPresentedHandler(mtlDrwbl, presentInfo, signaler); + // Try to do any present mode transitions as late as possible in an attempt // to avoid visual disruptions on any presents already on the queue. if (presentInfo.presentMode != VK_PRESENT_MODE_MAX_ENUM_KHR) { mtlDrwbl.layer.displaySyncEnabledMVK = (presentInfo.presentMode != VK_PRESENT_MODE_IMMEDIATE_KHR); } - if (presentInfo.hasPresentTime) { - addPresentedHandler(mtlDrwbl, presentInfo); - } if (presentInfo.desiredPresentTime) { [mtlDrwbl presentAtTime: (double)presentInfo.desiredPresentTime * 1.0e-9]; } else { @@ -1322,7 +1332,30 @@ void MVKPresentableSwapchainImage::presentCAMetalDrawable(id m } }]; - MVKSwapchainSignaler signaler; + // Ensure this image, the drawable, and the present fence are not destroyed while + // awaiting MTLCommandBuffer completion. We retain the drawable separately because + // a new drawable might be acquired by this image by then. + // Signal the fence from this callback, because the last one or two presentation + // completion callbacks can occasionally stall. + retain(); + [mtlDrwbl retain]; + auto* fence = presentInfo.fence; + if (fence) { fence->retain(); } + [mtlCmdBuff addCompletedHandler: ^(id mcb) { + signal(fence); + if (fence) { fence->release(); } + [mtlDrwbl release]; + release(); + }]; + + signal(signaler.semaphore, signaler.semaphoreSignalToken, mtlCmdBuff); + + return getConfigurationResult(); +} + +MVKSwapchainSignaler MVKPresentableSwapchainImage::getPresentationSignaler() { + lock_guard lock(_availabilityLock); + // Mark this image as available if no semaphores or fences are waiting to be signaled. _availability.isAvailable = _availabilitySignalers.empty(); if (_availability.isAvailable) { @@ -1331,93 +1364,91 @@ void MVKPresentableSwapchainImage::presentCAMetalDrawable(id m // when an app uses a single semaphore or fence for more than one swapchain image. // Because the semaphore or fence will be signaled by more than one image, it will // get out of sync, and the final use of the image would not be signaled as a result. - signaler = _preSignaler; + return _preSignaler; } else { // If this image is not yet available, extract and signal the first semaphore and fence. + MVKSwapchainSignaler signaler; auto sigIter = _availabilitySignalers.begin(); signaler = *sigIter; _availabilitySignalers.erase(sigIter); + return signaler; } - - // Ensure this image, the drawable, and the present fence are not destroyed while - // awaiting MTLCommandBuffer completion. We retain the drawable separately because - // a new drawable might be acquired by this image by then. - retain(); - [mtlDrwbl retain]; - auto* fence = presentInfo.fence; - if (fence) { fence->retain(); } - [mtlCmdBuff addCompletedHandler: ^(id mcb) { - [mtlDrwbl release]; - makeAvailable(signaler); - release(); - if (fence) { - fence->signal(); - fence->release(); - } - }]; - - signalPresentationSemaphore(signaler, mtlCmdBuff); } -// Pass MVKImagePresentInfo by value because it may not exist when the callback runs. +// Pass MVKImagePresentInfo & MVKSwapchainSignaler by value because they may not exist when the callback runs. void MVKPresentableSwapchainImage::addPresentedHandler(id mtlDrawable, - MVKImagePresentInfo presentInfo) { + MVKImagePresentInfo presentInfo, + MVKSwapchainSignaler signaler) { + beginPresentation(presentInfo); + #if !MVK_OS_SIMULATOR if ([mtlDrawable respondsToSelector: @selector(addPresentedHandler:)]) { - retain(); // Ensure this image is not destroyed while awaiting presentation - [mtlDrawable addPresentedHandler: ^(id drawable) { - // Since we're in a callback, it's possible that the swapchain has been released by now. - // Lock the swapchain, and test if it is present before doing anything with it. - lock_guard cblock(_swapchainLock); - if (_swapchain) { _swapchain->recordPresentTime(presentInfo, drawable.presentedTime * 1.0e9); } - release(); + [mtlDrawable addPresentedHandler: ^(id mtlDrwbl) { + endPresentation(presentInfo, signaler, mtlDrwbl.presentedTime * 1.0e9); }]; - return; - } + } else #endif - - // If MTLDrawable.presentedTime/addPresentedHandler isn't supported, - // treat it as if the present happened when requested. - // Since this function may be called in a callback, it's possible that - // the swapchain has been released by the time this function runs. - // Lock the swapchain, and test if it is present before doing anything with it. - lock_guard lock(_swapchainLock); - if (_swapchain) {_swapchain->recordPresentTime(presentInfo); } + { + // If MTLDrawable.presentedTime/addPresentedHandler isn't supported, + // treat it as if the present happened when requested. + endPresentation(presentInfo, signaler); + } } -// Resets the MTLTexture and CAMetalDrawable underlying this image. +// Ensure this image and the swapchain are not destroyed while awaiting presentation +void MVKPresentableSwapchainImage::beginPresentation(const MVKImagePresentInfo& presentInfo) { + retain(); + _swapchain->beginPresentation(presentInfo); + _presentationStartTime = getDevice()->getPerformanceTimestamp(); +} + +void MVKPresentableSwapchainImage::endPresentation(const MVKImagePresentInfo& presentInfo, + const MVKSwapchainSignaler& signaler, + uint64_t actualPresentTime) { + + // If the presentation time is not available, use the current nanosecond runtime clock, + // which should be reasonably accurate (sub-ms) to the presentation time. The presentation + // time will not be available if the presentation did not actually happen, such as when + // running headless, or on a test harness that is not attached to the windowing system. + if (actualPresentTime == 0) { actualPresentTime = mvkGetRuntimeNanoseconds(); } + + { // Scope to avoid deadlock if release() is run within detachment lock + // If I have become detached from the swapchain, it means the swapchain, and possibly the + // VkDevice, have been destroyed by the time of this callback, so do not reference them. + lock_guard lock(_detachmentLock); + if (_device) { _device->addPerformanceInterval(_device->_performanceStatistics.queue.presentSwapchains, _presentationStartTime); } + if (_swapchain) { _swapchain->endPresentation(presentInfo, actualPresentTime); } + } + + // Makes an image available for acquisition by the app. + // If any semaphores are waiting to be signaled when this image becomes available, the + // earliest semaphore is signaled, and this image remains unavailable for other uses. + signalAndUntrack(signaler); + release(); +} + +// Releases the CAMetalDrawable underlying this image. void MVKPresentableSwapchainImage::releaseMetalDrawable() { - for (uint8_t planeIndex = 0; planeIndex < _planes.size(); ++planeIndex) { - _planes[planeIndex]->releaseMTLTexture(); - } [_mtlDrawable release]; _mtlDrawable = nil; } -// Makes an image available for acquisition by the app. -// If any semaphores are waiting to be signaled when this image becomes available, the -// earliest semaphore is signaled, and this image remains unavailable for other uses. -void MVKPresentableSwapchainImage::makeAvailable(const MVKSwapchainSignaler& signaler) { - lock_guard lock(_availabilityLock); - - signalAndUnmarkAsTracked(signaler); -} - // Signal, untrack, and release any signalers that are tracking. +// Release the drawable before the lock, as it may trigger completion callback. void MVKPresentableSwapchainImage::makeAvailable() { + releaseMetalDrawable(); lock_guard lock(_availabilityLock); if ( !_availability.isAvailable ) { - signalAndUnmarkAsTracked(_preSignaler); + signalAndUntrack(_preSignaler); for (auto& sig : _availabilitySignalers) { - signalAndUnmarkAsTracked(sig); + signalAndUntrack(sig); } _availabilitySignalers.clear(); _availability.isAvailable = true; } } - #pragma mark Construction MVKPresentableSwapchainImage::MVKPresentableSwapchainImage(MVKDevice* device, @@ -1426,17 +1457,34 @@ MVKPresentableSwapchainImage::MVKPresentableSwapchainImage(MVKDevice* device, uint32_t swapchainIndex) : MVKSwapchainImage(device, pCreateInfo, swapchain, swapchainIndex) { - _mtlDrawable = nil; - _availability.acquisitionID = _swapchain->getNextAcquisitionID(); _availability.isAvailable = true; - _preSignaler = MVKSwapchainSignaler{nullptr, nullptr, 0}; + + if (swapchain->isHeadless()) { + @autoreleasepool { + MTLTextureDescriptor* mtlTexDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat: getMTLPixelFormat() + width: pCreateInfo->extent.width + height: pCreateInfo->extent.height + mipmapped: NO]; + mtlTexDesc.usageMVK = MTLTextureUsageRenderTarget; + mtlTexDesc.storageModeMVK = MTLStorageModePrivate; + + _mtlTextureHeadless = [[getMTLDevice() newTextureWithDescriptor: mtlTexDesc] retain]; // retained + } + } +} + + +void MVKPresentableSwapchainImage::destroy() { + releaseMetalDrawable(); + [_mtlTextureHeadless release]; + _mtlTextureHeadless = nil; + MVKSwapchainImage::destroy(); } // Unsignaled signalers will exist if this image is acquired more than it is presented. // Ensure they are signaled and untracked so the fences and semaphores will be released. MVKPresentableSwapchainImage::~MVKPresentableSwapchainImage() { - releaseMetalDrawable(); makeAvailable(); } @@ -1464,8 +1512,8 @@ VkResult MVKPeerSwapchainImage::bindDeviceMemory2(const VkBindImageMemoryInfo* p #pragma mark Metal -id MVKPeerSwapchainImage::getCAMetalDrawable() { - return ((MVKSwapchainImage*)_swapchain->getPresentableImage(_swapchainIndex))->getCAMetalDrawable(); +id MVKPeerSwapchainImage::getMTLTexture(uint8_t planeIndex) { + return ((MVKSwapchainImage*)_swapchain->getPresentableImage(_swapchainIndex))->getMTLTexture(planeIndex); } @@ -1627,6 +1675,14 @@ VkResult MVKImageViewPlane::initSwizzledMTLPixelFormat(const VkImageViewCreateIn adjustAnyComponentSwizzleValue(a, R, A, B, G, R); break; + case VK_FORMAT_B4G4R4A4_UNORM_PACK16: + // Metal doesn't support this directly, so use a swizzle to get the ordering right. + adjustAnyComponentSwizzleValue(r, B, B, G, R, A); + adjustAnyComponentSwizzleValue(g, G, B, G, R, A); + adjustAnyComponentSwizzleValue(b, R, B, G, R, A); + adjustAnyComponentSwizzleValue(a, A, B, G, R, A); + break; + default: break; } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.h b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.h index 04aea66e..da63fea4 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.h @@ -41,9 +41,14 @@ typedef struct MVKEntryPoint { bool isDevice; bool isCore() { return !ext1Name && !ext2Name; } - bool isEnabled(uint32_t enabledVersion, const MVKExtensionList& extList) { - return ((isCore() && MVK_VULKAN_API_VERSION_CONFORM(enabledVersion) >= apiVersion) || - extList.isEnabled(ext1Name) || extList.isEnabled(ext2Name)); + bool isEnabled(uint32_t enabledVersion, const MVKExtensionList& extList, const MVKExtensionList* instExtList = nullptr) { + bool isAPISupported = MVK_VULKAN_API_VERSION_CONFORM(enabledVersion) >= apiVersion; + auto isExtnSupported = [this, isAPISupported](const MVKExtensionList& extList) { + return extList.isEnabled(this->ext1Name) && (isAPISupported || !this->ext2Name || extList.isEnabled(this->ext2Name)); + }; + return ((isCore() && isAPISupported) || + isExtnSupported(extList) || + (instExtList && isExtnSupported(*instExtList))); } } MVKEntryPoint; @@ -115,6 +120,9 @@ public: MVKSurface* createSurface(const VkMetalSurfaceCreateInfoEXT* pCreateInfo, const VkAllocationCallbacks* pAllocator); + MVKSurface* createSurface(const VkHeadlessSurfaceCreateInfoEXT* pCreateInfo, + const VkAllocationCallbacks* pAllocator); + MVKSurface* createSurface(const Vk_PLATFORM_SurfaceCreateInfoMVK* pCreateInfo, const VkAllocationCallbacks* pAllocator); @@ -181,9 +189,8 @@ protected: void propagateDebugName() override {} void initProcAddrs(); - void initDebugCallbacks(const VkInstanceCreateInfo* pCreateInfo); void initMVKConfig(const VkInstanceCreateInfo* pCreateInfo); - NSArray>* getAvailableMTLDevicesArray(); + void initDebugCallbacks(const VkInstanceCreateInfo* pCreateInfo); VkDebugReportFlagsEXT getVkDebugReportFlagsFromLogLevel(MVKConfigLogLevel logLevel); VkDebugUtilsMessageSeverityFlagBitsEXT getVkDebugUtilsMessageSeverityFlagBitsFromLogLevel(MVKConfigLogLevel logLevel); VkDebugUtilsMessageTypeFlagsEXT getVkDebugUtilsMessageTypesFlagBitsFromLogLevel(MVKConfigLogLevel logLevel); @@ -197,6 +204,7 @@ protected: MVKSmallVector _debugReportCallbacks; MVKSmallVector _debugUtilMessengers; std::unordered_map _entryPoints; + std::string _autoGPUCaptureOutputFilepath; std::mutex _dcbLock; bool _hasDebugReportCallbacks; bool _hasDebugUtilsMessengers; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm index b952aefe..2a98ef62 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm @@ -102,6 +102,11 @@ MVKSurface* MVKInstance::createSurface(const VkMetalSurfaceCreateInfoEXT* pCreat return new MVKSurface(this, pCreateInfo, pAllocator); } +MVKSurface* MVKInstance::createSurface(const VkHeadlessSurfaceCreateInfoEXT* pCreateInfo, + const VkAllocationCallbacks* pAllocator) { + return new MVKSurface(this, pCreateInfo, pAllocator); +} + MVKSurface* MVKInstance::createSurface(const Vk_PLATFORM_SurfaceCreateInfoMVK* pCreateInfo, const VkAllocationCallbacks* pAllocator) { return new MVKSurface(this, pCreateInfo, pAllocator); @@ -238,94 +243,37 @@ void MVKInstance::debugReportMessage(MVKVulkanAPIObject* mvkAPIObj, MVKConfigLog VkDebugReportFlagsEXT MVKInstance::getVkDebugReportFlagsFromLogLevel(MVKConfigLogLevel logLevel) { switch (logLevel) { - case MVK_CONFIG_LOG_LEVEL_DEBUG: - return VK_DEBUG_REPORT_DEBUG_BIT_EXT; - case MVK_CONFIG_LOG_LEVEL_INFO: - return VK_DEBUG_REPORT_INFORMATION_BIT_EXT; - case MVK_CONFIG_LOG_LEVEL_WARNING: - return VK_DEBUG_REPORT_WARNING_BIT_EXT; - case MVK_CONFIG_LOG_LEVEL_ERROR: - default: - return VK_DEBUG_REPORT_ERROR_BIT_EXT; + case MVK_CONFIG_LOG_LEVEL_ERROR: return VK_DEBUG_REPORT_ERROR_BIT_EXT; + case MVK_CONFIG_LOG_LEVEL_WARNING: return VK_DEBUG_REPORT_WARNING_BIT_EXT; + case MVK_CONFIG_LOG_LEVEL_INFO: return VK_DEBUG_REPORT_INFORMATION_BIT_EXT; + case MVK_CONFIG_LOG_LEVEL_DEBUG: return VK_DEBUG_REPORT_DEBUG_BIT_EXT; + default: return VK_DEBUG_REPORT_ERROR_BIT_EXT; } } VkDebugUtilsMessageSeverityFlagBitsEXT MVKInstance::getVkDebugUtilsMessageSeverityFlagBitsFromLogLevel(MVKConfigLogLevel logLevel) { switch (logLevel) { - case MVK_CONFIG_LOG_LEVEL_DEBUG: - return VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT; - case MVK_CONFIG_LOG_LEVEL_INFO: - return VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT; - case MVK_CONFIG_LOG_LEVEL_WARNING: - return VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT; - case MVK_CONFIG_LOG_LEVEL_ERROR: - default: - return VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT; + case MVK_CONFIG_LOG_LEVEL_ERROR: return VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT; + case MVK_CONFIG_LOG_LEVEL_WARNING: return VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT; + case MVK_CONFIG_LOG_LEVEL_INFO: return VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT; + case MVK_CONFIG_LOG_LEVEL_DEBUG: return VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT; + default: return VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT; } } VkDebugUtilsMessageTypeFlagsEXT MVKInstance::getVkDebugUtilsMessageTypesFlagBitsFromLogLevel(MVKConfigLogLevel logLevel) { switch (logLevel) { - case MVK_CONFIG_LOG_LEVEL_DEBUG: - case MVK_CONFIG_LOG_LEVEL_INFO: - case MVK_CONFIG_LOG_LEVEL_WARNING: - return VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT; - case MVK_CONFIG_LOG_LEVEL_ERROR: - default: - return VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT; + case MVK_CONFIG_LOG_LEVEL_ERROR: return VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT; + case MVK_CONFIG_LOG_LEVEL_WARNING: return VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT; + case MVK_CONFIG_LOG_LEVEL_DEBUG: return VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT; + case MVK_CONFIG_LOG_LEVEL_INFO: return VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT; + default: return VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT; } } #pragma mark Object Creation -// Returns an autoreleased array containing the MTLDevices available on this system, sorted according -// to power, with higher power GPU's at the front of the array. This ensures that a lazy app that simply -// grabs the first GPU will get a high-power one by default. If MVKConfiguration::forceLowPowerGPU is set, -// the returned array will only include low-power devices. -NSArray>* MVKInstance::getAvailableMTLDevicesArray() { - NSMutableArray* mtlDevs = [NSMutableArray array]; - -#if MVK_MACOS - NSArray* rawMTLDevs = [MTLCopyAllDevices() autorelease]; - if (rawMTLDevs) { - bool forceLowPower = getMVKConfig().forceLowPowerGPU; - - // Populate the array of appropriate MTLDevices - for (id md in rawMTLDevs) { - if ( !forceLowPower || md.isLowPower ) { [mtlDevs addObject: md]; } - } - - // Sort by power - [mtlDevs sortUsingComparator: ^(id md1, id md2) { - BOOL md1IsLP = md1.isLowPower; - BOOL md2IsLP = md2.isLowPower; - - if (md1IsLP == md2IsLP) { - // If one device is headless and the other one is not, select the - // one that is not headless first. - BOOL md1IsHeadless = md1.isHeadless; - BOOL md2IsHeadless = md2.isHeadless; - if (md1IsHeadless == md2IsHeadless ) { - return NSOrderedSame; - } - return md2IsHeadless ? NSOrderedAscending : NSOrderedDescending; - } - - return md2IsLP ? NSOrderedAscending : NSOrderedDescending; - }]; - - } -#endif // MVK_MACOS - -#if MVK_IOS_OR_TVOS - id md = [MTLCreateSystemDefaultDevice() autorelease]; - if (md) { [mtlDevs addObject: md]; } -#endif // MVK_IOS_OR_TVOS - - return mtlDevs; // retained -} - MVKInstance::MVKInstance(const VkInstanceCreateInfo* pCreateInfo) : _enabledExtensions(this) { initDebugCallbacks(pCreateInfo); // Do before any creation activities @@ -347,7 +295,7 @@ MVKInstance::MVKInstance(const VkInstanceCreateInfo* pCreateInfo) : _enabledExte // This effort creates a number of autoreleased instances of Metal // and other Obj-C classes, so wrap it all in an autorelease pool. @autoreleasepool { - NSArray>* mtlDevices = getAvailableMTLDevicesArray(); + NSArray>* mtlDevices = mvkGetAvailableMTLDevicesArray(this); _physicalDevices.reserve(mtlDevices.count); for (id mtlDev in mtlDevices) { _physicalDevices.push_back(new MVKPhysicalDevice(this, mtlDev)); @@ -398,20 +346,13 @@ void MVKInstance::initDebugCallbacks(const VkInstanceCreateInfo* pCreateInfo) { } } -#define STR(NAME) #NAME -#define CHECK_CONFIG(name, configSetting, type) \ - if(mvkStringsAreEqual(pSetting->pSettingName, STR(MVK_CONFIG_##name))) { \ - _mvkConfig.configSetting = *(type*)(pSetting->pValues); \ - continue; \ - } - -// If the VK_EXT_layer_settings extension is enabled, initialize the local +// If the VK_EXT_layer_settings extension is enabled, initialize the local // MVKConfiguration from the global version built from environment variables. void MVKInstance::initMVKConfig(const VkInstanceCreateInfo* pCreateInfo) { if ( !_enabledExtensions.vk_EXT_layer_settings.enabled ) { return; } - _mvkConfig = getMVKConfig(); + _mvkConfig = mvkConfig(); VkLayerSettingsCreateInfoEXT* pLSCreateInfo = nil; for (const auto* next = (VkBaseInStructure*)pCreateInfo->pNext; next; next = next->pNext) { @@ -429,42 +370,15 @@ void MVKInstance::initMVKConfig(const VkInstanceCreateInfo* pCreateInfo) { for (uint32_t lsIdx = 0; lsIdx < pLSCreateInfo->settingCount; lsIdx++) { const auto* pSetting = &pLSCreateInfo->pSettings[lsIdx]; - CHECK_CONFIG(DEBUG, debugMode, VkBool32); - CHECK_CONFIG(SHADER_CONVERSION_FLIP_VERTEX_Y, shaderConversionFlipVertexY, VkBool32); - CHECK_CONFIG(SYNCHRONOUS_QUEUE_SUBMITS, synchronousQueueSubmits, VkBool32); - CHECK_CONFIG(PREFILL_METAL_COMMAND_BUFFERS, prefillMetalCommandBuffers, MVKPrefillMetalCommandBuffersStyle); - CHECK_CONFIG(MAX_ACTIVE_METAL_COMMAND_BUFFERS_PER_QUEUE, maxActiveMetalCommandBuffersPerQueue, uint32_t); - CHECK_CONFIG(SUPPORT_LARGE_QUERY_POOLS, supportLargeQueryPools, VkBool32); - CHECK_CONFIG(PRESENT_WITH_COMMAND_BUFFER, presentWithCommandBuffer, VkBool32); - CHECK_CONFIG(SWAPCHAIN_MIN_MAG_FILTER_USE_NEAREST, swapchainMinMagFilterUseNearest, VkBool32); - CHECK_CONFIG(METAL_COMPILE_TIMEOUT, metalCompileTimeout, uint64_t); - CHECK_CONFIG(PERFORMANCE_TRACKING, performanceTracking, VkBool32); - CHECK_CONFIG(PERFORMANCE_LOGGING_FRAME_COUNT, performanceLoggingFrameCount, uint32_t); - CHECK_CONFIG(ACTIVITY_PERFORMANCE_LOGGING_STYLE, activityPerformanceLoggingStyle, MVKConfigActivityPerformanceLoggingStyle); - CHECK_CONFIG(DISPLAY_WATERMARK, displayWatermark, VkBool32); - CHECK_CONFIG(SPECIALIZED_QUEUE_FAMILIES, specializedQueueFamilies, VkBool32); - CHECK_CONFIG(SWITCH_SYSTEM_GPU, switchSystemGPU, VkBool32); - CHECK_CONFIG(FULL_IMAGE_VIEW_SWIZZLE, fullImageViewSwizzle, VkBool32); - CHECK_CONFIG(DEFAULT_GPU_CAPTURE_SCOPE_QUEUE_FAMILY_INDEX, defaultGPUCaptureScopeQueueFamilyIndex, VkBool32); - CHECK_CONFIG(DEFAULT_GPU_CAPTURE_SCOPE_QUEUE_INDEX, defaultGPUCaptureScopeQueueIndex, VkBool32); - CHECK_CONFIG(FAST_MATH_ENABLED, fastMathEnabled, MVKConfigFastMath); - CHECK_CONFIG(LOG_LEVEL, logLevel, MVKConfigLogLevel); - CHECK_CONFIG(TRACE_VULKAN_CALLS, traceVulkanCalls, MVKConfigTraceVulkanCalls); - CHECK_CONFIG(FORCE_LOW_POWER_GPU, forceLowPowerGPU, VkBool32); - CHECK_CONFIG(VK_SEMAPHORE_SUPPORT_STYLE, semaphoreSupportStyle, MVKVkSemaphoreSupportStyle); - CHECK_CONFIG(AUTO_GPU_CAPTURE_SCOPE, autoGPUCaptureScope, MVKConfigAutoGPUCaptureScope); - CHECK_CONFIG(AUTO_GPU_CAPTURE_OUTPUT_FILE, autoGPUCaptureOutputFilepath, const char*); - CHECK_CONFIG(TEXTURE_1D_AS_2D, texture1DAs2D, VkBool32); - CHECK_CONFIG(PREALLOCATE_DESCRIPTORS, preallocateDescriptors, VkBool32); - CHECK_CONFIG(USE_COMMAND_POOLING, useCommandPooling, VkBool32); - CHECK_CONFIG(USE_MTLHEAP, useMTLHeap, VkBool32); - CHECK_CONFIG(API_VERSION_TO_ADVERTISE, apiVersionToAdvertise, uint32_t); - CHECK_CONFIG(ADVERTISE_EXTENSIONS, advertiseExtensions, uint32_t); - CHECK_CONFIG(RESUME_LOST_DEVICE, resumeLostDevice, VkBool32); - CHECK_CONFIG(USE_METAL_ARGUMENT_BUFFERS, useMetalArgumentBuffers, MVKUseMetalArgumentBuffers); - CHECK_CONFIG(SHADER_COMPRESSION_ALGORITHM, shaderSourceCompressionAlgorithm, MVKConfigCompressionAlgorithm); - CHECK_CONFIG(SHOULD_MAXIMIZE_CONCURRENT_COMPILATION, shouldMaximizeConcurrentCompilation, VkBool32); +#define STR(name) #name +#define MVK_CONFIG_MEMBER(member, mbrType, name) \ + if(mvkStringsAreEqual(pSetting->pSettingName, STR(MVK_CONFIG_##name))) { \ + _mvkConfig.member = *(mbrType*)(pSetting->pValues); \ + continue; \ + } +#include "MVKConfigMembers.def" } + mvkSetConfig(_mvkConfig, _mvkConfig, _autoGPUCaptureOutputFilepath); } #define ADD_ENTRY_POINT_MAP(name, func, api, ext1, ext2, isDev) \ @@ -507,8 +421,8 @@ void MVKInstance::initMVKConfig(const VkInstanceCreateInfo* pCreateInfo) { #define ADD_INST_EXT_ENTRY_POINT(func, EXT) ADD_ENTRY_POINT(func, 0, VK_##EXT##_EXTENSION_NAME, nullptr, false) #define ADD_DVC_EXT_ENTRY_POINT(func, EXT) ADD_ENTRY_POINT(func, 0, VK_##EXT##_EXTENSION_NAME, nullptr, true) -#define ADD_INST_EXT2_ENTRY_POINT(func, EXT1, EXT2) ADD_ENTRY_POINT(func, 0, VK_##EXT1##_EXTENSION_NAME, VK_##EXT2##_EXTENSION_NAME, false) -#define ADD_DVC_EXT2_ENTRY_POINT(func, EXT1, EXT2) ADD_ENTRY_POINT(func, 0, VK_##EXT1##_EXTENSION_NAME, VK_##EXT2##_EXTENSION_NAME, true) +#define ADD_INST_EXT2_ENTRY_POINT(func, API, EXT1, EXT2) ADD_ENTRY_POINT(func, VK_API_VERSION_##API, VK_##EXT1##_EXTENSION_NAME, VK_##EXT2##_EXTENSION_NAME, false) +#define ADD_DVC_EXT2_ENTRY_POINT(func, API, EXT1, EXT2) ADD_ENTRY_POINT(func, VK_API_VERSION_##API, VK_##EXT1##_EXTENSION_NAME, VK_##EXT2##_EXTENSION_NAME, true) // Add an open function, not tied to core or an extension. #define ADD_INST_OPEN_ENTRY_POINT(func) ADD_ENTRY_POINT(func, 0, nullptr, nullptr, false) @@ -553,21 +467,23 @@ void MVKInstance::initProcAddrs() { ADD_INST_EXT_ENTRY_POINT(vkGetPhysicalDeviceSurfacePresentModesKHR, KHR_SURFACE); ADD_INST_EXT_ENTRY_POINT(vkGetPhysicalDeviceSurfaceCapabilities2KHR, KHR_GET_SURFACE_CAPABILITIES_2); ADD_INST_EXT_ENTRY_POINT(vkGetPhysicalDeviceSurfaceFormats2KHR, KHR_GET_SURFACE_CAPABILITIES_2); + ADD_INST_EXT_ENTRY_POINT(vkCreateHeadlessSurfaceEXT, EXT_HEADLESS_SURFACE); + ADD_INST_EXT_ENTRY_POINT(vkCreateMetalSurfaceEXT, EXT_METAL_SURFACE); ADD_INST_EXT_ENTRY_POINT(vkCreateDebugReportCallbackEXT, EXT_DEBUG_REPORT); ADD_INST_EXT_ENTRY_POINT(vkDestroyDebugReportCallbackEXT, EXT_DEBUG_REPORT); ADD_INST_EXT_ENTRY_POINT(vkDebugReportMessageEXT, EXT_DEBUG_REPORT); - ADD_INST_EXT_ENTRY_POINT(vkSetDebugUtilsObjectNameEXT, EXT_DEBUG_UTILS); - ADD_INST_EXT_ENTRY_POINT(vkSetDebugUtilsObjectTagEXT, EXT_DEBUG_UTILS); - ADD_INST_EXT_ENTRY_POINT(vkQueueBeginDebugUtilsLabelEXT, EXT_DEBUG_UTILS); - ADD_INST_EXT_ENTRY_POINT(vkQueueEndDebugUtilsLabelEXT, EXT_DEBUG_UTILS); - ADD_INST_EXT_ENTRY_POINT(vkQueueInsertDebugUtilsLabelEXT, EXT_DEBUG_UTILS); - ADD_INST_EXT_ENTRY_POINT(vkCmdBeginDebugUtilsLabelEXT, EXT_DEBUG_UTILS); - ADD_INST_EXT_ENTRY_POINT(vkCmdEndDebugUtilsLabelEXT, EXT_DEBUG_UTILS); - ADD_INST_EXT_ENTRY_POINT(vkCmdInsertDebugUtilsLabelEXT, EXT_DEBUG_UTILS); + // n.b. Despite that VK_EXT_debug_utils is an instance extension, these functions are device functions. + ADD_DVC_EXT_ENTRY_POINT(vkSetDebugUtilsObjectNameEXT, EXT_DEBUG_UTILS); + ADD_DVC_EXT_ENTRY_POINT(vkSetDebugUtilsObjectTagEXT, EXT_DEBUG_UTILS); + ADD_DVC_EXT_ENTRY_POINT(vkQueueBeginDebugUtilsLabelEXT, EXT_DEBUG_UTILS); + ADD_DVC_EXT_ENTRY_POINT(vkQueueEndDebugUtilsLabelEXT, EXT_DEBUG_UTILS); + ADD_DVC_EXT_ENTRY_POINT(vkQueueInsertDebugUtilsLabelEXT, EXT_DEBUG_UTILS); + ADD_DVC_EXT_ENTRY_POINT(vkCmdBeginDebugUtilsLabelEXT, EXT_DEBUG_UTILS); + ADD_DVC_EXT_ENTRY_POINT(vkCmdEndDebugUtilsLabelEXT, EXT_DEBUG_UTILS); + ADD_DVC_EXT_ENTRY_POINT(vkCmdInsertDebugUtilsLabelEXT, EXT_DEBUG_UTILS); ADD_INST_EXT_ENTRY_POINT(vkCreateDebugUtilsMessengerEXT, EXT_DEBUG_UTILS); ADD_INST_EXT_ENTRY_POINT(vkDestroyDebugUtilsMessengerEXT, EXT_DEBUG_UTILS); ADD_INST_EXT_ENTRY_POINT(vkSubmitDebugUtilsMessageEXT, EXT_DEBUG_UTILS); - ADD_INST_EXT_ENTRY_POINT(vkCreateMetalSurfaceEXT, EXT_METAL_SURFACE); #ifdef VK_USE_PLATFORM_IOS_MVK ADD_INST_EXT_ENTRY_POINT(vkCreateIOSSurfaceMVK, MVK_IOS_SURFACE); @@ -762,16 +678,16 @@ void MVKInstance::initProcAddrs() { ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdResetEvent2, KHR, KHR_SYNCHRONIZATION_2); ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdResolveImage2, KHR, KHR_COPY_COMMANDS_2); ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdSetCullMode, EXT, EXT_EXTENDED_DYNAMIC_STATE); - ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdSetDepthBiasEnable, EXT, EXT_EXTENDED_DYNAMIC_STATE); + ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdSetDepthBiasEnable, EXT, EXT_EXTENDED_DYNAMIC_STATE_2); ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdSetDepthBoundsTestEnable, EXT, EXT_EXTENDED_DYNAMIC_STATE); ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdSetDepthCompareOp, EXT, EXT_EXTENDED_DYNAMIC_STATE); ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdSetDepthTestEnable, EXT, EXT_EXTENDED_DYNAMIC_STATE); ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdSetDepthWriteEnable, EXT, EXT_EXTENDED_DYNAMIC_STATE); ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdSetEvent2, KHR, KHR_SYNCHRONIZATION_2); ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdSetFrontFace, EXT, EXT_EXTENDED_DYNAMIC_STATE); - ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdSetPrimitiveRestartEnable, EXT, EXT_EXTENDED_DYNAMIC_STATE); + ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdSetPrimitiveRestartEnable, EXT, EXT_EXTENDED_DYNAMIC_STATE_2); ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdSetPrimitiveTopology, EXT, EXT_EXTENDED_DYNAMIC_STATE); - ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdSetRasterizerDiscardEnable, EXT, EXT_EXTENDED_DYNAMIC_STATE); + ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdSetRasterizerDiscardEnable, EXT, EXT_EXTENDED_DYNAMIC_STATE_2); ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdSetScissorWithCount, EXT, EXT_EXTENDED_DYNAMIC_STATE); ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdSetStencilOp, EXT, EXT_EXTENDED_DYNAMIC_STATE); ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdSetStencilTestEnable, EXT, EXT_EXTENDED_DYNAMIC_STATE); @@ -796,16 +712,16 @@ void MVKInstance::initProcAddrs() { ADD_DVC_EXT_ENTRY_POINT(vkMapMemory2KHR, KHR_MAP_MEMORY_2); ADD_DVC_EXT_ENTRY_POINT(vkUnmapMemory2KHR, KHR_MAP_MEMORY_2); ADD_DVC_EXT_ENTRY_POINT(vkCmdPushDescriptorSetKHR, KHR_PUSH_DESCRIPTOR); - ADD_DVC_EXT2_ENTRY_POINT(vkCmdPushDescriptorSetWithTemplateKHR, KHR_PUSH_DESCRIPTOR, KHR_DESCRIPTOR_UPDATE_TEMPLATE); + ADD_DVC_EXT2_ENTRY_POINT(vkCmdPushDescriptorSetWithTemplateKHR, 1_1, KHR_PUSH_DESCRIPTOR, KHR_DESCRIPTOR_UPDATE_TEMPLATE); ADD_DVC_EXT_ENTRY_POINT(vkCreateSwapchainKHR, KHR_SWAPCHAIN); ADD_DVC_EXT_ENTRY_POINT(vkDestroySwapchainKHR, KHR_SWAPCHAIN); ADD_DVC_EXT_ENTRY_POINT(vkGetSwapchainImagesKHR, KHR_SWAPCHAIN); ADD_DVC_EXT_ENTRY_POINT(vkAcquireNextImageKHR, KHR_SWAPCHAIN); ADD_DVC_EXT_ENTRY_POINT(vkQueuePresentKHR, KHR_SWAPCHAIN); - ADD_DVC_EXT2_ENTRY_POINT(vkGetDeviceGroupPresentCapabilitiesKHR, KHR_SWAPCHAIN, KHR_DEVICE_GROUP); - ADD_DVC_EXT2_ENTRY_POINT(vkGetDeviceGroupSurfacePresentModesKHR, KHR_SWAPCHAIN, KHR_DEVICE_GROUP); - ADD_DVC_EXT2_ENTRY_POINT(vkGetPhysicalDevicePresentRectanglesKHR, KHR_SWAPCHAIN, KHR_DEVICE_GROUP); - ADD_DVC_EXT2_ENTRY_POINT(vkAcquireNextImage2KHR, KHR_SWAPCHAIN, KHR_DEVICE_GROUP); + ADD_DVC_EXT2_ENTRY_POINT(vkGetDeviceGroupPresentCapabilitiesKHR, 1_1, KHR_SWAPCHAIN, KHR_DEVICE_GROUP); + ADD_DVC_EXT2_ENTRY_POINT(vkGetDeviceGroupSurfacePresentModesKHR, 1_1, KHR_SWAPCHAIN, KHR_DEVICE_GROUP); + ADD_DVC_EXT2_ENTRY_POINT(vkGetPhysicalDevicePresentRectanglesKHR, 1_1, KHR_SWAPCHAIN, KHR_DEVICE_GROUP); + ADD_DVC_EXT2_ENTRY_POINT(vkAcquireNextImage2KHR, 1_1, KHR_SWAPCHAIN, KHR_DEVICE_GROUP); ADD_DVC_EXT_ENTRY_POINT(vkGetCalibratedTimestampsEXT, EXT_CALIBRATED_TIMESTAMPS); ADD_DVC_EXT_ENTRY_POINT(vkGetPhysicalDeviceCalibrateableTimeDomainsEXT, EXT_CALIBRATED_TIMESTAMPS); ADD_DVC_EXT_ENTRY_POINT(vkDebugMarkerSetObjectTagEXT, EXT_DEBUG_MARKER); @@ -825,6 +741,29 @@ void MVKInstance::initProcAddrs() { ADD_DVC_EXT_ENTRY_POINT(vkReleaseSwapchainImagesEXT, EXT_SWAPCHAIN_MAINTENANCE_1); ADD_DVC_EXT_ENTRY_POINT(vkGetRefreshCycleDurationGOOGLE, GOOGLE_DISPLAY_TIMING); ADD_DVC_EXT_ENTRY_POINT(vkGetPastPresentationTimingGOOGLE, GOOGLE_DISPLAY_TIMING); + ADD_DVC_EXT_ENTRY_POINT(vkCmdSetLogicOpEXT, EXT_EXTENDED_DYNAMIC_STATE_2); + ADD_DVC_EXT_ENTRY_POINT(vkCmdSetPatchControlPointsEXT, EXT_EXTENDED_DYNAMIC_STATE_2); + ADD_DVC_EXT_ENTRY_POINT(vkCmdSetAlphaToCoverageEnableEXT, EXT_EXTENDED_DYNAMIC_STATE_3); + ADD_DVC_EXT_ENTRY_POINT(vkCmdSetAlphaToOneEnableEXT, EXT_EXTENDED_DYNAMIC_STATE_3); + ADD_DVC_EXT_ENTRY_POINT(vkCmdSetColorBlendAdvancedEXT, EXT_EXTENDED_DYNAMIC_STATE_3); + ADD_DVC_EXT_ENTRY_POINT(vkCmdSetColorBlendEnableEXT, EXT_EXTENDED_DYNAMIC_STATE_3); + ADD_DVC_EXT_ENTRY_POINT(vkCmdSetColorBlendEquationEXT, EXT_EXTENDED_DYNAMIC_STATE_3); + ADD_DVC_EXT_ENTRY_POINT(vkCmdSetColorWriteMaskEXT, EXT_EXTENDED_DYNAMIC_STATE_3); + ADD_DVC_EXT_ENTRY_POINT(vkCmdSetConservativeRasterizationModeEXT, EXT_EXTENDED_DYNAMIC_STATE_3); + ADD_DVC_EXT_ENTRY_POINT(vkCmdSetDepthClampEnableEXT, EXT_EXTENDED_DYNAMIC_STATE_3); + ADD_DVC_EXT_ENTRY_POINT(vkCmdSetDepthClipEnableEXT, EXT_EXTENDED_DYNAMIC_STATE_3); + ADD_DVC_EXT_ENTRY_POINT(vkCmdSetDepthClipNegativeOneToOneEXT, EXT_EXTENDED_DYNAMIC_STATE_3); + ADD_DVC_EXT_ENTRY_POINT(vkCmdSetExtraPrimitiveOverestimationSizeEXT, EXT_EXTENDED_DYNAMIC_STATE_3); + ADD_DVC_EXT_ENTRY_POINT(vkCmdSetLineRasterizationModeEXT, EXT_EXTENDED_DYNAMIC_STATE_3); + ADD_DVC_EXT_ENTRY_POINT(vkCmdSetLineStippleEnableEXT, EXT_EXTENDED_DYNAMIC_STATE_3); + ADD_DVC_EXT_ENTRY_POINT(vkCmdSetLogicOpEnableEXT, EXT_EXTENDED_DYNAMIC_STATE_3); + ADD_DVC_EXT_ENTRY_POINT(vkCmdSetPolygonModeEXT, EXT_EXTENDED_DYNAMIC_STATE_3); + ADD_DVC_EXT_ENTRY_POINT(vkCmdSetProvokingVertexModeEXT, EXT_EXTENDED_DYNAMIC_STATE_3); + ADD_DVC_EXT_ENTRY_POINT(vkCmdSetRasterizationSamplesEXT, EXT_EXTENDED_DYNAMIC_STATE_3); + ADD_DVC_EXT_ENTRY_POINT(vkCmdSetRasterizationStreamEXT, EXT_EXTENDED_DYNAMIC_STATE_3); + ADD_DVC_EXT_ENTRY_POINT(vkCmdSetSampleLocationsEnableEXT, EXT_EXTENDED_DYNAMIC_STATE_3); + ADD_DVC_EXT_ENTRY_POINT(vkCmdSetSampleMaskEXT, EXT_EXTENDED_DYNAMIC_STATE_3); + ADD_DVC_EXT_ENTRY_POINT(vkCmdSetTessellationDomainOriginEXT, EXT_EXTENDED_DYNAMIC_STATE_3); } void MVKInstance::logVersions() { @@ -850,6 +789,11 @@ MVKInstance::~MVKInstance() { _useCreationCallbacks = true; mvkDestroyContainerContents(_physicalDevices); + // Since this message may invoke debug callbacks, do it before locking callbacks. + MVKLogInfo("Destroying VkInstance for Vulkan version %s with %d Vulkan extensions enabled.", + mvkGetVulkanVersionString(_appInfo.apiVersion).c_str(), + _enabledExtensions.getEnabledCount()); + lock_guard lock(_dcbLock); mvkDestroyContainerContents(_debugReportCallbacks); } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h index 505e894c..6827b5b9 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h @@ -132,7 +132,10 @@ public: /** Returns the debug report object type of this object. */ VkDebugReportObjectTypeEXT getVkDebugReportObjectType() override { return VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_EXT; } - /** Binds this pipeline to the specified command encoder. */ + /** Called when the pipeline has been bound to the command encoder. */ + virtual void wasBound(MVKCommandEncoder* cmdEncoder) {} + + /** Encodes this pipeline to the command encoder. */ virtual void encode(MVKCommandEncoder* cmdEncoder, uint32_t stage = 0) = 0; /** Binds the push constants to a command encoder. */ @@ -218,6 +221,56 @@ struct MVKStagedDescriptorBindingUse { MVKBitArray stages[4] = {}; }; +/** Enumeration identifying different state content types. */ +enum MVKRenderStateType { + Unknown = 0, + BlendConstants, + CullMode, + DepthBias, + DepthBiasEnable, + DepthBounds, + DepthBoundsTestEnable, + DepthClipEnable, + DepthCompareOp, + DepthTestEnable, + DepthWriteEnable, + FrontFace, + LineWidth, + LogicOp, + LogicOpEnable, + PatchControlPoints, + PolygonMode, + PrimitiveRestartEnable, + PrimitiveTopology, + RasterizerDiscardEnable, + SampleLocations, + SampleLocationsEnable, + Scissors, + StencilCompareMask, + StencilOp, + StencilReference, + StencilTestEnable, + StencilWriteMask, + VertexStride, + Viewports, + MVKRenderStateTypeCount +}; + +/** Boolean tracking of rendering state. */ +struct MVKRenderStateFlags { + void enable(MVKRenderStateType rs) { if (rs) { mvkEnableFlags(_stateFlags, getFlagMask(rs)); } } + void disable(MVKRenderStateType rs) { if (rs) { mvkDisableFlags(_stateFlags, getFlagMask(rs)); } } + void set(MVKRenderStateType rs, bool val) { val? enable(rs) : disable(rs); } + void enableAll() { mvkEnableAllFlags(_stateFlags); } + void disableAll() { mvkDisableAllFlags(_stateFlags); } + bool isEnabled(MVKRenderStateType rs) { return mvkIsAnyFlagEnabled(_stateFlags, getFlagMask(rs)); } +protected: + uint32_t getFlagMask(MVKRenderStateType rs) { return rs ? (1u << (rs - 1u)) : 0; } // Ignore Unknown type + + uint32_t _stateFlags = 0; + static_assert(sizeof(_stateFlags) * 8 >= MVKRenderStateTypeCount - 1, "_stateFlags is too small to support the number of flags in MVKRenderStateType."); // Ignore Unknown type +}; + /** Represents an Vulkan graphics pipeline. */ class MVKGraphicsPipeline : public MVKPipeline { @@ -226,18 +279,16 @@ public: /** Returns the number and order of stages in this pipeline. Draws commands must encode this pipeline once per stage. */ void getStages(MVKPiplineStages& stages); - /** Binds this pipeline to the specified command encoder. */ + virtual void wasBound(MVKCommandEncoder* cmdEncoder) override; + void encode(MVKCommandEncoder* cmdEncoder, uint32_t stage = 0) override; - /** Returns whether this pipeline permits dynamic setting of the specifie state. */ - bool supportsDynamicState(VkDynamicState state); + /** Returns whether this pipeline permits dynamic setting of the state. */ + bool isDynamicState(MVKRenderStateType state) { return _dynamicState.isEnabled(state); } /** Returns whether this pipeline has tessellation shaders. */ bool isTessellationPipeline() { return _tessInfo.patchControlPoints > 0; } - /** Returns the number of input tessellation patch control points. */ - uint32_t getInputControlPointCount() { return _tessInfo.patchControlPoints; } - /** Returns the number of output tessellation patch control points. */ uint32_t getOutputControlPointCount() { return _outputControlPointCount; } @@ -271,9 +322,6 @@ public: /** Returns true if the tessellation control shader needs a buffer to store its per-patch output. */ bool needsTessCtlPatchOutputBuffer() { return _needsTessCtlPatchOutputBuffer; } - /** Returns whether this pipeline has custom sample positions enabled. */ - bool isUsingCustomSamplePositions() { return _isUsingCustomSamplePositions; } - /** Returns the Vulkan primitive topology. */ VkPrimitiveTopology getVkPrimitiveTopology() { return _vkPrimitiveTopology; } @@ -286,9 +334,6 @@ public: */ bool isValidVertexBufferIndex(MVKShaderStage stage, uint32_t mtlBufferIndex); - /** Returns the custom samples used by this pipeline. */ - MVKArrayRef getCustomSamplePositions() { return _customSamplePositions.contents(); } - /** Returns the Metal vertex buffer index to use for the specified vertex attribute binding number. */ uint32_t getMetalBufferIndexForVertexAttributeBinding(uint32_t binding) { return _device->getMetalBufferIndexForVertexAttributeBinding(binding); } @@ -320,7 +365,8 @@ protected: id getOrCompilePipeline(MTLComputePipelineDescriptor* plDesc, id& plState, const char* compilerType); bool compileTessVertexStageState(MTLComputePipelineDescriptor* vtxPLDesc, MVKMTLFunction* pVtxFunctions, VkPipelineCreationFeedback* pVertexFB); bool compileTessControlStageState(MTLComputePipelineDescriptor* tcPLDesc, VkPipelineCreationFeedback* pTessCtlFB); - void initCustomSamplePositions(const VkGraphicsPipelineCreateInfo* pCreateInfo); + void initDynamicState(const VkGraphicsPipelineCreateInfo* pCreateInfo); + void initSampleLocations(const VkGraphicsPipelineCreateInfo* pCreateInfo); void initMTLRenderPipelineState(const VkGraphicsPipelineCreateInfo* pCreateInfo, const SPIRVTessReflectionData& reflectData, VkPipelineCreationFeedback* pPipelineFB, const VkPipelineShaderStageCreateInfo* pVertexSS, VkPipelineCreationFeedback* pVertexFB, const VkPipelineShaderStageCreateInfo* pTessCtlSS, VkPipelineCreationFeedback* pTessCtlFB, const VkPipelineShaderStageCreateInfo* pTessEvalSS, VkPipelineCreationFeedback* pTessEvalFB, const VkPipelineShaderStageCreateInfo* pFragmentSS, VkPipelineCreationFeedback* pFragmentFB); void initShaderConversionConfig(SPIRVToMSLConversionConfiguration& shaderConfig, const VkGraphicsPipelineCreateInfo* pCreateInfo, const SPIRVTessReflectionData& reflectData); void initReservedVertexAttributeBufferCount(const VkGraphicsPipelineCreateInfo* pCreateInfo); @@ -356,11 +402,11 @@ protected: VkPipelineTessellationStateCreateInfo _tessInfo; VkPipelineRasterizationStateCreateInfo _rasterInfo; VkPipelineDepthStencilStateCreateInfo _depthStencilInfo; + MVKRenderStateFlags _dynamicState; MVKSmallVector _viewports; MVKSmallVector _scissors; - MVKSmallVector _dynamicState; - MVKSmallVector _customSamplePositions; + MVKSmallVector _sampleLocations; MVKSmallVector _translatedVertexBindings; MVKSmallVector _zeroDivisorVertexBindings; MVKSmallVector _mtlArgumentEncoders; @@ -374,11 +420,7 @@ protected: id _mtlTessControlStageState = nil; id _mtlPipelineState = nil; - float _blendConstants[4] = { 0.0, 0.0, 0.0, 1.0 }; - MTLCullMode _mtlCullMode; - MTLWinding _mtlFrontWinding; - MTLTriangleFillMode _mtlFillMode; - MTLDepthClipMode _mtlDepthClipMode; + float _blendConstants[4] = {}; MVKShaderImplicitRezBinding _reservedVertexAttributeBufferCount; MVKShaderImplicitRezBinding _viewRangeBufferIndex; MVKShaderImplicitRezBinding _outputBufferIndex; @@ -387,6 +429,8 @@ protected: uint32_t _tessCtlPatchOutputBufferIndex = 0; uint32_t _tessCtlLevelBufferIndex = 0; + bool _primitiveRestartEnable = true; + bool _hasRasterInfo = false; bool _needsVertexSwizzleBuffer = false; bool _needsVertexBufferSizeBuffer = false; bool _needsVertexDynamicOffsetBuffer = false; @@ -407,7 +451,7 @@ protected: bool _needsFragmentViewRangeBuffer = false; bool _isRasterizing = false; bool _isRasterizingColor = false; - bool _isUsingCustomSamplePositions = false; + bool _sampleLocationsEnable = false; }; @@ -419,7 +463,6 @@ class MVKComputePipeline : public MVKPipeline { public: - /** Binds this pipeline to the specified command encoder. */ void encode(MVKCommandEncoder* cmdEncoder, uint32_t = 0) override; /** Returns if this pipeline allows non-zero dispatch bases in vkCmdDispatchBase(). */ diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm index 7b1d467b..da283549 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm @@ -49,7 +49,7 @@ void MVKPipelineLayout::bindDescriptorSets(MVKCommandEncoder* cmdEncoder, MVKArrayRef dynamicOffsets) { if (!cmdEncoder) { clearConfigurationResult(); } uint32_t dynamicOffsetIndex = 0; - size_t dsCnt = descriptorSets.size; + size_t dsCnt = descriptorSets.size(); for (uint32_t dsIdx = 0; dsIdx < dsCnt; dsIdx++) { MVKDescriptorSet* descSet = descriptorSets[dsIdx]; uint32_t dslIdx = firstSet + dsIdx; @@ -229,6 +229,13 @@ MVKPipeline::MVKPipeline(MVKDevice* device, MVKPipelineCache* pipelineCache, MVK #pragma mark - #pragma mark MVKGraphicsPipeline +// Set retrieve-only rendering state when pipeline is bound, as it's too late at draw command. +void MVKGraphicsPipeline::wasBound(MVKCommandEncoder* cmdEncoder) { + cmdEncoder->_renderingState.setPatchControlPoints(_tessInfo.patchControlPoints, false); + cmdEncoder->_renderingState.setSampleLocations(_sampleLocations.contents(), false); + cmdEncoder->_renderingState.setSampleLocationsEnable(_sampleLocationsEnable, false); +} + void MVKGraphicsPipeline::getStages(MVKPiplineStages& stages) { if (isTessellationPipeline()) { stages.push_back(kMVKGraphicsStageVertex); @@ -292,24 +299,21 @@ void MVKGraphicsPipeline::encode(MVKCommandEncoder* cmdEncoder, uint32_t stage) // Depth stencil state - Cleared _depthStencilInfo values will disable depth testing cmdEncoder->_depthStencilState.setDepthStencilState(_depthStencilInfo); - cmdEncoder->_stencilReferenceValueState.setReferenceValues(_depthStencilInfo); // Rasterization - cmdEncoder->_blendColorState.setBlendColor(_blendConstants[0], _blendConstants[1], - _blendConstants[2], _blendConstants[3], false); - cmdEncoder->_depthBiasState.setDepthBias(_rasterInfo); - cmdEncoder->_viewportState.setViewports(_viewports.contents(), 0, false); - cmdEncoder->_scissorState.setScissors(_scissors.contents(), 0, false); - cmdEncoder->_mtlPrimitiveType = mvkMTLPrimitiveTypeFromVkPrimitiveTopology(_vkPrimitiveTopology); - - [mtlCmdEnc setCullMode: _mtlCullMode]; - [mtlCmdEnc setFrontFacingWinding: _mtlFrontWinding]; - [mtlCmdEnc setTriangleFillMode: _mtlFillMode]; - - if (_device->_enabledFeatures.depthClamp) { - [mtlCmdEnc setDepthClipMode: _mtlDepthClipMode]; - } - + cmdEncoder->_renderingState.setPrimitiveTopology(_vkPrimitiveTopology, false); + cmdEncoder->_renderingState.setPrimitiveRestartEnable(_primitiveRestartEnable, false); + cmdEncoder->_renderingState.setBlendConstants(_blendConstants, false); + cmdEncoder->_renderingState.setStencilReferenceValues(_depthStencilInfo); + cmdEncoder->_renderingState.setViewports(_viewports.contents(), 0, false); + cmdEncoder->_renderingState.setScissors(_scissors.contents(), 0, false); + if (_hasRasterInfo) { + cmdEncoder->_renderingState.setCullMode(_rasterInfo.cullMode, false); + cmdEncoder->_renderingState.setFrontFace(_rasterInfo.frontFace, false); + cmdEncoder->_renderingState.setPolygonMode(_rasterInfo.polygonMode, false); + cmdEncoder->_renderingState.setDepthBias(_rasterInfo); + cmdEncoder->_renderingState.setDepthClipEnable( !_rasterInfo.depthClampEnable, false ); + } break; } @@ -320,21 +324,6 @@ void MVKGraphicsPipeline::encode(MVKCommandEncoder* cmdEncoder, uint32_t stage) cmdEncoder->_graphicsResourcesState.bindViewRangeBuffer(_viewRangeBufferIndex, _needsVertexViewRangeBuffer, _needsFragmentViewRangeBuffer); } -bool MVKGraphicsPipeline::supportsDynamicState(VkDynamicState state) { - for (auto& ds : _dynamicState) { - if (state == ds) { - // Some dynamic states have other restrictions - switch (state) { - case VK_DYNAMIC_STATE_DEPTH_BIAS: - return _rasterInfo.depthBiasEnable; - default: - return true; - } - } - } - return false; -} - static const char vtxCompilerType[] = "Vertex stage pipeline for tessellation"; bool MVKGraphicsPipeline::compileTessVertexStageState(MTLComputePipelineDescriptor* vtxPLDesc, @@ -414,6 +403,10 @@ MVKGraphicsPipeline::MVKGraphicsPipeline(MVKDevice* device, const VkGraphicsPipelineCreateInfo* pCreateInfo) : MVKPipeline(device, pipelineCache, (MVKPipelineLayout*)pCreateInfo->layout, pCreateInfo->flags, parent) { + + // Extract dynamic state first, as it can affect many configurations. + initDynamicState(pCreateInfo); + // Determine rasterization early, as various other structs are validated and interpreted in this context. const VkPipelineRenderingCreateInfo* pRendInfo = getRenderingCreateInfo(pCreateInfo); _isRasterizing = !isRasterizationDisabled(pCreateInfo); @@ -509,17 +502,12 @@ MVKGraphicsPipeline::MVKGraphicsPipeline(MVKDevice* device, initMTLRenderPipelineState(pCreateInfo, reflectData, pPipelineFB, pVertexSS, pVertexFB, pTessCtlSS, pTessCtlFB, pTessEvalSS, pTessEvalFB, pFragmentSS, pFragmentFB); if ( !_hasValidMTLPipelineStates ) { return; } - // Track dynamic state - const VkPipelineDynamicStateCreateInfo* pDS = pCreateInfo->pDynamicState; - if (pDS) { - for (uint32_t i = 0; i < pDS->dynamicStateCount; i++) { - _dynamicState.push_back(pDS->pDynamicStates[i]); - } - } - // Blending - must ignore allowed bad pColorBlendState pointer if rasterization disabled or no color attachments if (_isRasterizingColor && pCreateInfo->pColorBlendState) { - memcpy(&_blendConstants, &pCreateInfo->pColorBlendState->blendConstants, sizeof(_blendConstants)); + mvkCopy(_blendConstants, pCreateInfo->pColorBlendState->blendConstants, 4); + } else { + static float defaultBlendConstants[4] = { 0, 0.0, 0.0, 1.0 }; + mvkCopy(_blendConstants, defaultBlendConstants, 4); } // Topology @@ -527,27 +515,13 @@ MVKGraphicsPipeline::MVKGraphicsPipeline(MVKDevice* device, ? pCreateInfo->pInputAssemblyState->topology : VK_PRIMITIVE_TOPOLOGY_POINT_LIST); + _primitiveRestartEnable = pCreateInfo->pInputAssemblyState ? pCreateInfo->pInputAssemblyState->primitiveRestartEnable : true; + // Rasterization - _mtlCullMode = MTLCullModeNone; - _mtlFrontWinding = MTLWindingCounterClockwise; - _mtlFillMode = MTLTriangleFillModeFill; - _mtlDepthClipMode = MTLDepthClipModeClip; - bool hasRasterInfo = mvkSetOrClear(&_rasterInfo, pCreateInfo->pRasterizationState); - if (hasRasterInfo) { - _mtlCullMode = mvkMTLCullModeFromVkCullModeFlags(_rasterInfo.cullMode); - _mtlFrontWinding = mvkMTLWindingFromVkFrontFace(_rasterInfo.frontFace); - _mtlFillMode = mvkMTLTriangleFillModeFromVkPolygonMode(_rasterInfo.polygonMode); - if (_rasterInfo.depthClampEnable) { - if (_device->_enabledFeatures.depthClamp) { - _mtlDepthClipMode = MTLDepthClipModeClamp; - } else { - setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "This device does not support depth clamping.")); - } - } - } + _hasRasterInfo = mvkSetOrClear(&_rasterInfo, pCreateInfo->pRasterizationState); // Must run after _isRasterizing and _dynamicState are populated - initCustomSamplePositions(pCreateInfo); + initSampleLocations(pCreateInfo); // Depth stencil content - clearing will disable depth and stencil testing // Must ignore allowed bad pDepthStencilState pointer if rasterization disabled or no depth or stencil attachment format @@ -557,26 +531,84 @@ MVKGraphicsPipeline::MVKGraphicsPipeline(MVKDevice* device, // Viewports and scissors - must ignore allowed bad pViewportState pointer if rasterization is disabled auto pVPState = _isRasterizing ? pCreateInfo->pViewportState : nullptr; if (pVPState) { - uint32_t vpCnt = pVPState->viewportCount; + + // If viewports are dynamic, ignore them here. + uint32_t vpCnt = (pVPState->pViewports && !isDynamicState(Viewports)) ? pVPState->viewportCount : 0; _viewports.reserve(vpCnt); for (uint32_t vpIdx = 0; vpIdx < vpCnt; vpIdx++) { - // If viewport is dyanamic, we still add a dummy so that the count will be tracked. - VkViewport vp; - if ( !supportsDynamicState(VK_DYNAMIC_STATE_VIEWPORT) ) { vp = pVPState->pViewports[vpIdx]; } - _viewports.push_back(vp); + _viewports.push_back(pVPState->pViewports[vpIdx]); } - uint32_t sCnt = pVPState->scissorCount; + // If scissors are dynamic, ignore them here. + uint32_t sCnt = (pVPState->pScissors && !isDynamicState(Scissors)) ? pVPState->scissorCount : 0; _scissors.reserve(sCnt); for (uint32_t sIdx = 0; sIdx < sCnt; sIdx++) { - // If scissor is dyanamic, we still add a dummy so that the count will be tracked. - VkRect2D sc; - if ( !supportsDynamicState(VK_DYNAMIC_STATE_SCISSOR) ) { sc = pVPState->pScissors[sIdx]; } - _scissors.push_back(sc); + _scissors.push_back(pVPState->pScissors[sIdx]); } } } +static MVKRenderStateType getRenderStateType(VkDynamicState vkDynamicState) { + switch (vkDynamicState) { + case VK_DYNAMIC_STATE_BLEND_CONSTANTS: return BlendConstants; + case VK_DYNAMIC_STATE_CULL_MODE: return CullMode; + case VK_DYNAMIC_STATE_DEPTH_BIAS: return DepthBias; + case VK_DYNAMIC_STATE_DEPTH_BIAS_ENABLE: return DepthBiasEnable; + case VK_DYNAMIC_STATE_DEPTH_BOUNDS: return DepthBounds; + case VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE: return DepthBoundsTestEnable; + case VK_DYNAMIC_STATE_DEPTH_CLAMP_ENABLE_EXT: return DepthClipEnable; + case VK_DYNAMIC_STATE_DEPTH_CLIP_ENABLE_EXT: return DepthClipEnable; + case VK_DYNAMIC_STATE_DEPTH_COMPARE_OP: return DepthCompareOp; + case VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE: return DepthTestEnable; + case VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE: return DepthWriteEnable; + case VK_DYNAMIC_STATE_FRONT_FACE: return FrontFace; + case VK_DYNAMIC_STATE_LINE_WIDTH: return LineWidth; + case VK_DYNAMIC_STATE_LOGIC_OP_EXT: return LogicOp; + case VK_DYNAMIC_STATE_LOGIC_OP_ENABLE_EXT: return LogicOpEnable; + case VK_DYNAMIC_STATE_PATCH_CONTROL_POINTS_EXT: return PatchControlPoints; + case VK_DYNAMIC_STATE_POLYGON_MODE_EXT: return PolygonMode; + case VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE: return PrimitiveRestartEnable; + case VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY: return PrimitiveTopology; + case VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE: return RasterizerDiscardEnable; + case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT: return SampleLocations; + case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_ENABLE_EXT: return SampleLocationsEnable; + case VK_DYNAMIC_STATE_SCISSOR: return Scissors; + case VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT: return Scissors; + case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK: return StencilCompareMask; + case VK_DYNAMIC_STATE_STENCIL_OP: return StencilOp; + case VK_DYNAMIC_STATE_STENCIL_REFERENCE: return StencilReference; + case VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE: return StencilTestEnable; + case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK: return StencilWriteMask; + case VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE: return VertexStride; + case VK_DYNAMIC_STATE_VIEWPORT: return Viewports; + case VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT: return Viewports; + default: return Unknown; + } +} + +// This is executed first during pipeline creation. Do not depend on any internal state here. +void MVKGraphicsPipeline::initDynamicState(const VkGraphicsPipelineCreateInfo* pCreateInfo) { + const auto* pDS = pCreateInfo->pDynamicState; + if ( !pDS ) { return; } + + for (uint32_t i = 0; i < pDS->dynamicStateCount; i++) { + auto dynStateType = getRenderStateType(pDS->pDynamicStates[i]); + bool isDynamic = true; + + // Some dynamic states have other restrictions + switch (dynStateType) { + case VertexStride: + isDynamic = _device->_pMetalFeatures->dynamicVertexStride; + if ( !isDynamic ) { setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "This device and platform does not support VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE (macOS 14.0 or iOS/tvOS 17.0, plus either Apple4 or Mac2 GPU).")); } + break; + default: + break; + } + + if (isDynamic) { _dynamicState.enable(dynStateType); } + } +} + // Either returns an existing pipeline state or compiles a new one. id MVKGraphicsPipeline::getOrCompilePipeline(MTLRenderPipelineDescriptor* plDesc, id& plState) { @@ -603,7 +635,7 @@ id MVKGraphicsPipeline::getOrCompilePipeline(MTLCompute } // Must run after _isRasterizing and _dynamicState are populated -void MVKGraphicsPipeline::initCustomSamplePositions(const VkGraphicsPipelineCreateInfo* pCreateInfo) { +void MVKGraphicsPipeline::initSampleLocations(const VkGraphicsPipelineCreateInfo* pCreateInfo) { // Must ignore allowed bad pMultisampleState pointer if rasterization disabled if ( !(_isRasterizing && pCreateInfo->pMultisampleState) ) { return; } @@ -612,12 +644,9 @@ void MVKGraphicsPipeline::initCustomSamplePositions(const VkGraphicsPipelineCrea switch (next->sType) { case VK_STRUCTURE_TYPE_PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT: { auto* pSampLocnsCreateInfo = (VkPipelineSampleLocationsStateCreateInfoEXT*)next; - _isUsingCustomSamplePositions = pSampLocnsCreateInfo->sampleLocationsEnable; - if (_isUsingCustomSamplePositions && !supportsDynamicState(VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT)) { - for (uint32_t slIdx = 0; slIdx < pSampLocnsCreateInfo->sampleLocationsInfo.sampleLocationsCount; slIdx++) { - auto& sl = pSampLocnsCreateInfo->sampleLocationsInfo.pSampleLocations[slIdx]; - _customSamplePositions.push_back(MTLSamplePositionMake(sl.x, sl.y)); - } + _sampleLocationsEnable = pSampLocnsCreateInfo->sampleLocationsEnable; + for (uint32_t slIdx = 0; slIdx < pSampLocnsCreateInfo->sampleLocationsInfo.sampleLocationsCount; slIdx++) { + _sampleLocations.push_back(pSampLocnsCreateInfo->sampleLocationsInfo.pSampleLocations[slIdx]); } break; } @@ -1311,6 +1340,10 @@ bool MVKGraphicsPipeline::addFragmentShaderToPipeline(MTLRenderPipelineDescripto return true; } +#if !MVK_XCODE_15 +static const NSUInteger MTLBufferLayoutStrideDynamic = NSUIntegerMax; +#endif + template bool MVKGraphicsPipeline::addVertexInputToPipeline(T* inputDesc, const VkPipelineVertexInputStateCreateInfo* pVI, @@ -1328,31 +1361,30 @@ bool MVKGraphicsPipeline::addVertexInputToPipeline(T* inputDesc, } // Vertex buffer bindings - uint32_t vbCnt = pVI->vertexBindingDescriptionCount; + bool isVtxStrideStatic = !isDynamicState(VertexStride); uint32_t maxBinding = 0; + uint32_t vbCnt = pVI->vertexBindingDescriptionCount; for (uint32_t i = 0; i < vbCnt; i++) { const VkVertexInputBindingDescription* pVKVB = &pVI->pVertexBindingDescriptions[i]; if (shaderConfig.isVertexBufferUsed(pVKVB->binding)) { - // Vulkan allows any stride, but Metal only allows multiples of 4. - // TODO: We could try to expand the buffer to the required alignment in that case. - VkDeviceSize mtlVtxStrideAlignment = _device->_pMetalFeatures->vertexStrideAlignment; - if ((pVKVB->stride % mtlVtxStrideAlignment) != 0) { - setConfigurationResult(reportError(VK_ERROR_INITIALIZATION_FAILED, "Under Metal, vertex attribute binding strides must be aligned to %llu bytes.", mtlVtxStrideAlignment)); + // Vulkan allows any stride, but Metal requires multiples of 4 on older GPUs. + if (isVtxStrideStatic && (pVKVB->stride % _device->_pMetalFeatures->vertexStrideAlignment) != 0) { + setConfigurationResult(reportError(VK_ERROR_INITIALIZATION_FAILED, "Under Metal, vertex attribute binding strides must be aligned to %llu bytes.", _device->_pMetalFeatures->vertexStrideAlignment)); return false; } maxBinding = max(pVKVB->binding, maxBinding); uint32_t vbIdx = getMetalBufferIndexForVertexAttributeBinding(pVKVB->binding); auto vbDesc = inputDesc.layouts[vbIdx]; - if (pVKVB->stride == 0) { + if (isVtxStrideStatic && pVKVB->stride == 0) { // Stride can't be 0, it will be set later to attributes' maximum offset + size // to prevent it from being larger than the underlying buffer permits. vbDesc.stride = 0; vbDesc.stepFunction = (decltype(vbDesc.stepFunction))MTLStepFunctionConstant; vbDesc.stepRate = 0; } else { - vbDesc.stride = pVKVB->stride; + vbDesc.stride = isVtxStrideStatic ? pVKVB->stride : MTLBufferLayoutStrideDynamic; vbDesc.stepFunction = (decltype(vbDesc.stepFunction))mvkMTLStepFunctionFromVkVertexInputRate(pVKVB->inputRate, isTessellationPipeline()); vbDesc.stepRate = 1; } @@ -1386,52 +1418,54 @@ bool MVKGraphicsPipeline::addVertexInputToPipeline(T* inputDesc, if (shaderConfig.isShaderInputLocationUsed(pVKVA->location)) { uint32_t vaBinding = pVKVA->binding; uint32_t vaOffset = pVKVA->offset; + auto vaDesc = inputDesc.attributes[pVKVA->location]; + auto mtlFormat = (decltype(vaDesc.format))getPixelFormats()->getMTLVertexFormat(pVKVA->format); // Vulkan allows offsets to exceed the buffer stride, but Metal doesn't. // If this is the case, fetch a translated artificial buffer binding, using the same MTLBuffer, // but that is translated so that the reduced VA offset fits into the binding stride. - const VkVertexInputBindingDescription* pVKVB = pVI->pVertexBindingDescriptions; - uint32_t attrSize = 0; - for (uint32_t j = 0; j < vbCnt; j++, pVKVB++) { - if (pVKVB->binding == pVKVA->binding) { - attrSize = getPixelFormats()->getBytesPerBlock(pVKVA->format); - if (pVKVB->stride == 0) { - // The step is set to constant, but we need to change stride to be non-zero for metal. - // Look for the maximum offset + size to set as the stride. - uint32_t vbIdx = getMetalBufferIndexForVertexAttributeBinding(pVKVB->binding); - auto vbDesc = inputDesc.layouts[vbIdx]; - uint32_t strideLowBound = vaOffset + attrSize; - if (vbDesc.stride < strideLowBound) vbDesc.stride = strideLowBound; - } else if (vaOffset && vaOffset + attrSize > pVKVB->stride) { - // Move vertex attribute offset into the stride. This vertex attribute may be - // combined with other vertex attributes into the same translated buffer binding. - // But if the reduced offset combined with the vertex attribute size still won't - // fit into the buffer binding stride, force the vertex attribute offset to zero, - // effectively dedicating this vertex attribute to its own buffer binding. - uint32_t origOffset = vaOffset; - vaOffset %= pVKVB->stride; - if (vaOffset + attrSize > pVKVB->stride) { - vaOffset = 0; + if (isVtxStrideStatic) { + const VkVertexInputBindingDescription* pVKVB = pVI->pVertexBindingDescriptions; + uint32_t attrSize = 0; + for (uint32_t j = 0; j < vbCnt; j++, pVKVB++) { + if (pVKVB->binding == pVKVA->binding) { + attrSize = getPixelFormats()->getBytesPerBlock(pVKVA->format); + if (pVKVB->stride == 0) { + // The step is set to constant, but we need to change stride to be non-zero for metal. + // Look for the maximum offset + size to set as the stride. + uint32_t vbIdx = getMetalBufferIndexForVertexAttributeBinding(pVKVB->binding); + auto vbDesc = inputDesc.layouts[vbIdx]; + uint32_t strideLowBound = vaOffset + attrSize; + if (vbDesc.stride < strideLowBound) vbDesc.stride = strideLowBound; + } else if (vaOffset && vaOffset + attrSize > pVKVB->stride) { + // Move vertex attribute offset into the stride. This vertex attribute may be + // combined with other vertex attributes into the same translated buffer binding. + // But if the reduced offset combined with the vertex attribute size still won't + // fit into the buffer binding stride, force the vertex attribute offset to zero, + // effectively dedicating this vertex attribute to its own buffer binding. + uint32_t origOffset = vaOffset; + vaOffset %= pVKVB->stride; + if (vaOffset + attrSize > pVKVB->stride) { + vaOffset = 0; + } + vaBinding = getTranslatedVertexBinding(vaBinding, origOffset - vaOffset, maxBinding); + if (zeroDivisorBindings.count(pVKVB->binding)) { + zeroDivisorBindings.insert(vaBinding); + } } - vaBinding = getTranslatedVertexBinding(vaBinding, origOffset - vaOffset, maxBinding); - if (zeroDivisorBindings.count(pVKVB->binding)) { - zeroDivisorBindings.insert(vaBinding); - } + break; } - break; + } + if (pVKVB->stride && attrSize > pVKVB->stride) { + /* Metal does not support overlapping loads. Truncate format vector length to prevent an assertion + * and hope it's not used by the shader. */ + MTLVertexFormat newFormat = mvkAdjustFormatVectorToSize((MTLVertexFormat)mtlFormat, pVKVB->stride); + reportError(VK_SUCCESS, "Found attribute with size (%u) larger than it's binding's stride (%u). Changing descriptor format from %s to %s.", + attrSize, pVKVB->stride, getPixelFormats()->getName((MTLVertexFormat)mtlFormat), getPixelFormats()->getName(newFormat)); + mtlFormat = (decltype(vaDesc.format))newFormat; } } - auto vaDesc = inputDesc.attributes[pVKVA->location]; - auto mtlFormat = (decltype(vaDesc.format))getPixelFormats()->getMTLVertexFormat(pVKVA->format); - if (pVKVB->stride && attrSize > pVKVB->stride) { - /* Metal does not support overlapping loads. Truncate format vector length to prevent an assertion - * and hope it's not used by the shader. */ - MTLVertexFormat newFormat = mvkAdjustFormatVectorToSize((MTLVertexFormat)mtlFormat, pVKVB->stride); - reportError(VK_SUCCESS, "Found attribute with size (%u) larger than it's binding's stride (%u). Changing descriptor format from %s to %s.", - attrSize, pVKVB->stride, getPixelFormats()->getName((MTLVertexFormat)mtlFormat), getPixelFormats()->getName(newFormat)); - mtlFormat = (decltype(vaDesc.format))newFormat; - } vaDesc.format = mtlFormat; vaDesc.bufferIndex = (decltype(vaDesc.bufferIndex))getMetalBufferIndexForVertexAttributeBinding(vaBinding); vaDesc.offset = vaOffset; @@ -1607,7 +1641,7 @@ void MVKGraphicsPipeline::addFragmentOutputToPipeline(MTLRenderPipelineDescripto // Multisampling - must ignore allowed bad pMultisampleState pointer if rasterization disabled if (_isRasterizing && pCreateInfo->pMultisampleState) { - plDesc.sampleCount = mvkSampleCountFromVkSampleCountFlagBits(pCreateInfo->pMultisampleState->rasterizationSamples); + plDesc.rasterSampleCount = mvkSampleCountFromVkSampleCountFlagBits(pCreateInfo->pMultisampleState->rasterizationSamples); plDesc.alphaToCoverageEnabled = pCreateInfo->pMultisampleState->alphaToCoverageEnable; plDesc.alphaToOneEnabled = pCreateInfo->pMultisampleState->alphaToOneEnable; @@ -1646,6 +1680,7 @@ void MVKGraphicsPipeline::initShaderConversionConfig(SPIRVToMSLConversionConfigu shaderConfig.options.mslOptions.argument_buffers = useMetalArgBuff; shaderConfig.options.mslOptions.force_active_argument_buffer_resources = useMetalArgBuff; shaderConfig.options.mslOptions.pad_argument_buffer_resources = useMetalArgBuff; + shaderConfig.options.mslOptions.agx_manual_cube_grad_fixup = _device->_pMetalFeatures->needsCubeGradWorkaround; MVKPipelineLayout* layout = (MVKPipelineLayout*)pCreateInfo->layout; layout->populateShaderConversionConfig(shaderConfig); @@ -1701,6 +1736,7 @@ void MVKGraphicsPipeline::initShaderConversionConfig(SPIRVToMSLConversionConfigu shaderConfig.options.mslOptions.multiview = mvkIsMultiview(pRendInfo->viewMask); shaderConfig.options.mslOptions.multiview_layered_rendering = getPhysicalDevice()->canUseInstancingForMultiview(); shaderConfig.options.mslOptions.view_index_from_device_index = mvkAreAllFlagsEnabled(pCreateInfo->flags, VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT); + shaderConfig.options.mslOptions.replace_recursive_inputs = mvkOSVersionIsAtLeast(14.0, 17.0, 1.0); #if MVK_MACOS shaderConfig.options.mslOptions.emulate_subgroups = !_device->_pMetalFeatures->simdPermute; #endif @@ -1897,17 +1933,22 @@ void MVKGraphicsPipeline::addPrevStageOutputToShaderConversionConfig(SPIRVToMSLC } } -// We render points if either the topology or polygon fill mode dictate it +// We render points if either the static topology or static polygon fill mode dictate it bool MVKGraphicsPipeline::isRenderingPoints(const VkGraphicsPipelineCreateInfo* pCreateInfo) { - return ((pCreateInfo->pInputAssemblyState && (pCreateInfo->pInputAssemblyState->topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST)) || - (pCreateInfo->pRasterizationState && (pCreateInfo->pRasterizationState->polygonMode == VK_POLYGON_MODE_POINT))); + return ((pCreateInfo->pInputAssemblyState && + (pCreateInfo->pInputAssemblyState->topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST) && + !isDynamicState(PrimitiveTopology)) || + (pCreateInfo->pRasterizationState && + (pCreateInfo->pRasterizationState->polygonMode == VK_POLYGON_MODE_POINT) && + !isDynamicState(PolygonMode))); } -// We disable rasterization if either rasterizerDiscard is enabled or the cull mode dictates it. +// We disable rasterization if either static rasterizerDiscard is enabled or the static cull mode dictates it. bool MVKGraphicsPipeline::isRasterizationDisabled(const VkGraphicsPipelineCreateInfo* pCreateInfo) { return (pCreateInfo->pRasterizationState && - (pCreateInfo->pRasterizationState->rasterizerDiscardEnable || - ((pCreateInfo->pRasterizationState->cullMode == VK_CULL_MODE_FRONT_AND_BACK) && pCreateInfo->pInputAssemblyState && + ((pCreateInfo->pRasterizationState->rasterizerDiscardEnable && !isDynamicState(RasterizerDiscardEnable)) || + ((pCreateInfo->pRasterizationState->cullMode == VK_CULL_MODE_FRONT_AND_BACK) && !isDynamicState(CullMode) && + pCreateInfo->pInputAssemblyState && (mvkMTLPrimitiveTopologyClassFromVkPrimitiveTopology(pCreateInfo->pInputAssemblyState->topology) == MTLPrimitiveTopologyClassTriangle)))); } @@ -2105,6 +2146,7 @@ MVKMTLFunction MVKComputePipeline::getMTLFunction(const VkComputePipelineCreateI shaderConfig.options.mslOptions.buffer_size_buffer_index = _bufferSizeBufferIndex.stages[kMVKShaderStageCompute]; shaderConfig.options.mslOptions.dynamic_offsets_buffer_index = _dynamicOffsetBufferIndex.stages[kMVKShaderStageCompute]; shaderConfig.options.mslOptions.indirect_params_buffer_index = _indirectParamsIndex.stages[kMVKShaderStageCompute]; + shaderConfig.options.mslOptions.replace_recursive_inputs = mvkOSVersionIsAtLeast(14.0, 17.0, 1.0); MVKMTLFunction func = ((MVKShaderModule*)pSS->module)->getMTLFunction(&shaderConfig, pSS->pSpecializationInfo, this, pStageFB); if ( !func.getMTLFunction() ) { @@ -2269,7 +2311,7 @@ VkResult MVKPipelineCache::writeDataImpl(size_t* pDataSize, void* pData) { // Serializes the data in this cache to a stream void MVKPipelineCache::writeData(ostream& outstream, bool isCounting) { #if MVK_USE_CEREAL - MVKPerformanceTracker& activityTracker = isCounting + MVKPerformanceTracker& perfTracker = isCounting ? _device->_performanceStatistics.pipelineCache.sizePipelineCache : _device->_performanceStatistics.pipelineCache.writePipelineCache; @@ -2297,7 +2339,7 @@ void MVKPipelineCache::writeData(ostream& outstream, bool isCounting) { writer(cacheIter.getShaderConversionConfig()); writer(cacheIter.getShaderConversionResultInfo()); writer(cacheIter.getCompressedMSL()); - _device->addActivityPerformance(activityTracker, startTime); + _device->addPerformanceInterval(perfTracker, startTime); } } @@ -2366,7 +2408,7 @@ void MVKPipelineCache::readData(const VkPipelineCacheCreateInfo* pCreateInfo) { // Add the shader library to the staging cache. MVKShaderLibraryCache* slCache = getShaderLibraryCache(smKey); - _device->addActivityPerformance(_device->_performanceStatistics.pipelineCache.readPipelineCache, startTime); + _device->addPerformanceInterval(_device->_performanceStatistics.pipelineCache.readPipelineCache, startTime); slCache->addShaderLibrary(&shaderConversionConfig, resultInfo, compressedMSL); break; @@ -2475,7 +2517,9 @@ namespace SPIRV_CROSS_NAMESPACE { opt.force_sample_rate_shading, opt.manual_helper_invocation_updates, opt.check_discarded_frag_stores, - opt.sample_dref_lod_array_as_grad); + opt.sample_dref_lod_array_as_grad, + opt.replace_recursive_inputs, + opt.agx_manual_cube_grad_fixup); } template diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.h b/MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.h index 479965b4..5d23225b 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.h @@ -20,7 +20,7 @@ #include "MVKBaseObject.h" #include "MVKOSExtensions.h" -#include "mvk_datatypes.h" +#include "mvk_datatypes.hpp" #include #include diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.mm b/MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.mm index 828bca38..f11bec5c 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.mm @@ -529,6 +529,7 @@ MTLClearColor MVKPixelFormats::getMTLClearColor(VkClearValue vkClearValue, VkFor #define OFFSET_SNORM(COLOR, BIT_WIDTH) OFFSET_NORM(-1.0, COLOR, BIT_WIDTH - 1) switch (vkFormat) { case VK_FORMAT_R4G4B4A4_UNORM_PACK16: + case VK_FORMAT_B4G4R4A4_UNORM_PACK16: case VK_FORMAT_A4R4G4B4_UNORM_PACK16: case VK_FORMAT_A4B4G4R4_UNORM_PACK16: OFFSET_UNORM(red, 4) @@ -831,7 +832,7 @@ void MVKPixelFormats::initVkFormatCapabilities() { addVkFormatDesc( R4G4_UNORM_PACK8, Invalid, Invalid, Invalid, Invalid, 1, 1, 1, ColorFloat ); addVkFormatDesc( R4G4B4A4_UNORM_PACK16, ABGR4Unorm, Invalid, Invalid, Invalid, 1, 1, 2, ColorFloat ); - addVkFormatDesc( B4G4R4A4_UNORM_PACK16, Invalid, Invalid, Invalid, Invalid, 1, 1, 2, ColorFloat ); + addVkFormatDescSwizzled( B4G4R4A4_UNORM_PACK16, ABGR4Unorm, Invalid, Invalid, Invalid, 1, 1, 2, ColorFloat, B, G, R, A ); addVkFormatDescSwizzled( A4R4G4B4_UNORM_PACK16, ABGR4Unorm, Invalid, Invalid, Invalid, 1, 1, 2, ColorFloat, G, B, A, R ); addVkFormatDescSwizzled( A4B4G4R4_UNORM_PACK16, ABGR4Unorm, Invalid, Invalid, Invalid, 1, 1, 2, ColorFloat, A, B, G, R ); @@ -1482,26 +1483,21 @@ void MVKPixelFormats::addMTLVertexFormatCapabilities(id mtlDevice, } } -// If supporting a physical device, retrieve the MTLDevice from it, -// otherwise create a temp copy of the system default MTLDevice. +// If supporting a physical device, retrieve the MTLDevice from it, otherwise +// retrieve the array of physical GPU devices, and use the first one. +// Retrieving the GPUs creates a number of autoreleased instances of Metal +// and other Obj-C classes, so wrap it all in an autorelease pool. void MVKPixelFormats::modifyMTLFormatCapabilities() { if (_physicalDevice) { modifyMTLFormatCapabilities(_physicalDevice->getMTLDevice()); } else { -#if MVK_IOS_OR_TVOS - id mtlDevice = MTLCreateSystemDefaultDevice(); // temp retained -#endif -#if MVK_MACOS - NSArray>* mtlDevices = MTLCopyAllDevices(); // temp retained - id mtlDevice = [mtlDevices count] > 0 ? [mtlDevices[0] retain] : MTLCreateSystemDefaultDevice(); // temp retained - [mtlDevices release]; // temp release -#endif - modifyMTLFormatCapabilities(mtlDevice); - [mtlDevice release]; // release temp instance + @autoreleasepool { + auto* mtlDevs = mvkGetAvailableMTLDevicesArray(nullptr); + if (mtlDevs.count) { modifyMTLFormatCapabilities(mtlDevs[0]); } + } } } - // Mac Catalyst does not support feature sets, so we redefine them to GPU families in MVKDevice.h. #if MVK_MACCAT #define addFeatSetMTLPixFmtCaps(FEAT_SET, MTL_FMT, CAPS) \ diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.h b/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.h index 80c3a357..5f44a95f 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.h @@ -56,7 +56,7 @@ public: virtual void endQuery(uint32_t query, MVKCommandEncoder* cmdEncoder); /** Finishes the specified queries and marks them as available. */ - virtual void finishQueries(const MVKArrayRef queries); + virtual void finishQueries(MVKArrayRef queries); /** Resets the results and availability status of the specified queries. */ virtual void resetResults(uint32_t firstQuery, uint32_t queryCount, MVKCommandEncoder* cmdEncoder); @@ -212,7 +212,7 @@ class MVKTimestampQueryPool : public MVKGPUCounterQueryPool { public: void endQuery(uint32_t query, MVKCommandEncoder* cmdEncoder) override; - void finishQueries(const MVKArrayRef queries) override; + void finishQueries(MVKArrayRef queries) override; #pragma mark Construction diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm b/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm index 5275d74e..0cb42a2b 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm @@ -52,7 +52,7 @@ void MVKQueryPool::endQuery(uint32_t query, MVKCommandEncoder* cmdEncoder) { } // Mark queries as available -void MVKQueryPool::finishQueries(const MVKArrayRef queries) { +void MVKQueryPool::finishQueries(MVKArrayRef queries) { lock_guard lock(_availabilityLock); for (uint32_t qry : queries) { if (_availability[qry] == DeviceAvailable) { @@ -379,9 +379,9 @@ void MVKTimestampQueryPool::endQuery(uint32_t query, MVKCommandEncoder* cmdEncod } // If not using MTLCounterSampleBuffer, update timestamp values, then mark queries as available -void MVKTimestampQueryPool::finishQueries(const MVKArrayRef queries) { +void MVKTimestampQueryPool::finishQueries(MVKArrayRef queries) { if ( !_mtlCounterBuffer ) { - uint64_t ts = mvkGetTimestamp(); + uint64_t ts = mvkGetElapsedNanoseconds(); for (uint32_t qry : queries) { _timestamps[qry] = ts; } } MVKQueryPool::finishQueries(queries); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h index bcefd2f3..086410e8 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h @@ -86,10 +86,14 @@ public: /** Returns a pointer to the Vulkan instance. */ MVKInstance* getInstance() override { return _device->getInstance(); } + /** Return the name of this queue. */ + const std::string& getName() { return _name; } + #pragma mark Queue submissions /** Submits the specified command buffers to the queue. */ - VkResult submit(uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence, MVKCommandUse cmdUse); + template + VkResult submit(uint32_t submitCount, const S* pSubmits, VkFence fence, MVKCommandUse cmdUse); /** Submits the specified presentation command to the queue. */ VkResult submit(const VkPresentInfoKHR* pPresentInfo); @@ -97,10 +101,6 @@ public: /** Block the current thread until this queue is idle. */ VkResult waitIdle(MVKCommandUse cmdUse); - /** Return the name of this queue. */ - const std::string& getName() { return _name; } - - #pragma mark Metal /** Returns the Metal queue underlying this queue. */ @@ -135,36 +135,53 @@ protected: friend class MVKQueueCommandBufferSubmission; friend class MVKQueuePresentSurfaceSubmission; - MVKBaseObject* getBaseObject() override { return this; }; void propagateDebugName() override; void initName(); void initExecQueue(); void initMTLCommandQueue(); - void initGPUCaptureScopes(); void destroyExecQueue(); VkResult submit(MVKQueueSubmission* qSubmit); NSString* getMTLCommandBufferLabel(MVKCommandUse cmdUse); + void handleMTLCommandBufferError(id mtlCmdBuff); MVKQueueFamily* _queueFamily; - uint32_t _index; - float _priority; - dispatch_queue_t _execQueue; - id _mtlQueue; std::string _name; - NSString* _mtlCmdBuffLabelEndCommandBuffer; - NSString* _mtlCmdBuffLabelQueueSubmit; - NSString* _mtlCmdBuffLabelQueuePresent; - NSString* _mtlCmdBuffLabelDeviceWaitIdle; - NSString* _mtlCmdBuffLabelQueueWaitIdle; - NSString* _mtlCmdBuffLabelAcquireNextImage; - NSString* _mtlCmdBuffLabelInvalidateMappedMemoryRanges; - MVKGPUCaptureScope* _submissionCaptureScope; + dispatch_queue_t _execQueue; + id _mtlQueue = nil; + NSString* _mtlCmdBuffLabelBeginCommandBuffer = nil; + NSString* _mtlCmdBuffLabelQueueSubmit = nil; + NSString* _mtlCmdBuffLabelQueuePresent = nil; + NSString* _mtlCmdBuffLabelDeviceWaitIdle = nil; + NSString* _mtlCmdBuffLabelQueueWaitIdle = nil; + NSString* _mtlCmdBuffLabelAcquireNextImage = nil; + NSString* _mtlCmdBuffLabelInvalidateMappedMemoryRanges = nil; + MVKGPUCaptureScope* _submissionCaptureScope = nil; + float _priority; + uint32_t _index; }; #pragma mark - #pragma mark MVKQueueSubmission +typedef struct MVKSemaphoreSubmitInfo { +private: + MVKSemaphore* _semaphore; +public: + uint64_t value; + VkPipelineStageFlags2 stageMask; + uint32_t deviceIndex; + + void encodeWait(id mtlCmdBuff); + void encodeSignal(id mtlCmdBuff); + MVKSemaphoreSubmitInfo(const VkSemaphoreSubmitInfo& semaphoreSubmitInfo); + MVKSemaphoreSubmitInfo(const VkSemaphore semaphore, VkPipelineStageFlags stageMask); + MVKSemaphoreSubmitInfo(const MVKSemaphoreSubmitInfo& other); + MVKSemaphoreSubmitInfo& operator=(const MVKSemaphoreSubmitInfo& other); + ~MVKSemaphoreSubmitInfo(); + +} MVKSemaphoreSubmitInfo; + /** This is an abstract class for an operation that can be submitted to an MVKQueue. */ class MVKQueueSubmission : public MVKBaseObject, public MVKConfigurableMixin { @@ -178,11 +195,16 @@ public: * * Upon completion of this function, no further calls should be made to this instance. */ - virtual void execute() = 0; + virtual VkResult execute() = 0; + + MVKQueueSubmission(MVKQueue* queue, + uint32_t waitSemaphoreInfoCount, + const VkSemaphoreSubmitInfo* pWaitSemaphoreSubmitInfos); MVKQueueSubmission(MVKQueue* queue, uint32_t waitSemaphoreCount, - const VkSemaphore* pWaitSemaphores); + const VkSemaphore* pWaitSemaphores, + const VkPipelineStageFlags* pWaitDstStageMask); ~MVKQueueSubmission() override; @@ -190,15 +212,25 @@ protected: friend class MVKQueue; virtual void finish() = 0; + MVKDevice* getDevice() { return _queue->getDevice(); } MVKQueue* _queue; - MVKSmallVector> _waitSemaphores; + MVKSmallVector _waitSemaphores; }; #pragma mark - #pragma mark MVKQueueCommandBufferSubmission +typedef struct MVKCommandBufferSubmitInfo { + MVKCommandBuffer* commandBuffer; + uint32_t deviceMask; + + MVKCommandBufferSubmitInfo(const VkCommandBufferSubmitInfo& commandBufferInfo); + MVKCommandBufferSubmitInfo(VkCommandBuffer commandBuffer); + +} MVKCommandBufferSubmitInfo; + /** * Submits an empty set of command buffers to the queue. * Used for fence-only command submissions. @@ -206,9 +238,17 @@ protected: class MVKQueueCommandBufferSubmission : public MVKQueueSubmission { public: - void execute() override; + VkResult execute() override; - MVKQueueCommandBufferSubmission(MVKQueue* queue, const VkSubmitInfo* pSubmit, VkFence fence, MVKCommandUse cmdUse); + MVKQueueCommandBufferSubmission(MVKQueue* queue, + const VkSubmitInfo2* pSubmit, + VkFence fence, + MVKCommandUse cmdUse); + + MVKQueueCommandBufferSubmission(MVKQueue* queue, + const VkSubmitInfo* pSubmit, + VkFence fence, + MVKCommandUse cmdUse); ~MVKQueueCommandBufferSubmission() override; @@ -217,16 +257,16 @@ protected: id getActiveMTLCommandBuffer(); void setActiveMTLCommandBuffer(id mtlCmdBuff); - void commitActiveMTLCommandBuffer(bool signalCompletion = false); + VkResult commitActiveMTLCommandBuffer(bool signalCompletion = false); void finish() override; virtual void submitCommandBuffers() {} MVKCommandEncodingContext _encodingContext; - MVKSmallVector> _signalSemaphores; - MVKFence* _fence; - id _activeMTLCommandBuffer; - MVKCommandUse _commandUse; - bool _emulatedWaitDone; //Used to track if we've already waited for emulated semaphores. + MVKSmallVector _signalSemaphores; + MVKFence* _fence = nullptr; + id _activeMTLCommandBuffer = nil; + MVKCommandUse _commandUse = kMVKCommandUseNone; + bool _emulatedWaitDone = false; //Used to track if we've already waited for emulated semaphores. }; @@ -238,25 +278,20 @@ template class MVKQueueFullCommandBufferSubmission : public MVKQueueCommandBufferSubmission { public: - MVKQueueFullCommandBufferSubmission(MVKQueue* queue, const VkSubmitInfo* pSubmit, VkFence fence) : - MVKQueueCommandBufferSubmission(queue, pSubmit, fence, kMVKCommandUseQueueSubmit) { + MVKQueueFullCommandBufferSubmission(MVKQueue* queue, + const VkSubmitInfo2* pSubmit, + VkFence fence, + MVKCommandUse cmdUse); - // pSubmit can be null if just tracking the fence alone - if (pSubmit) { - uint32_t cbCnt = pSubmit->commandBufferCount; - _cmdBuffers.reserve(cbCnt); - for (uint32_t i = 0; i < cbCnt; i++) { - MVKCommandBuffer* cb = MVKCommandBuffer::getMVKCommandBuffer(pSubmit->pCommandBuffers[i]); - _cmdBuffers.push_back(cb); - setConfigurationResult(cb->getConfigurationResult()); - } - } - } + MVKQueueFullCommandBufferSubmission(MVKQueue* queue, + const VkSubmitInfo* pSubmit, + VkFence fence, + MVKCommandUse cmdUse); protected: void submitCommandBuffers() override; - MVKSmallVector _cmdBuffers; + MVKSmallVector _cmdBuffers; }; @@ -267,7 +302,7 @@ protected: class MVKQueuePresentSurfaceSubmission : public MVKQueueSubmission { public: - void execute() override; + VkResult execute() override; MVKQueuePresentSurfaceSubmission(MVKQueue* queue, const VkPresentInfoKHR* pPresentInfo); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm index 786e979f..92a99a33 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm @@ -18,6 +18,7 @@ #include "MVKInstance.h" #include "MVKQueue.h" +#include "MVKSurface.h" #include "MVKSwapchain.h" #include "MVKSync.h" #include "MVKFoundation.h" @@ -68,7 +69,7 @@ void MVKQueue::propagateDebugName() { setLabelIfNotNil(_mtlQueue, _debugName); } // Execute the queue submission under an autoreleasepool to ensure transient Metal objects are autoreleased. // This is critical for apps that don't use standard OS autoreleasing runloop threading. -static inline void execute(MVKQueueSubmission* qSubmit) { @autoreleasepool { qSubmit->execute(); } } +static inline VkResult execute(MVKQueueSubmission* qSubmit) { @autoreleasepool { return qSubmit->execute(); } } // Executes the submmission, either immediately, or by dispatching to an execution queue. // Submissions to the execution queue are wrapped in a dedicated autoreleasepool. @@ -79,43 +80,50 @@ VkResult MVKQueue::submit(MVKQueueSubmission* qSubmit) { if ( !qSubmit ) { return VK_SUCCESS; } // Ignore nils - VkResult rslt = qSubmit->getConfigurationResult(); // Extract result before submission to avoid race condition with early destruction + // Extract result before submission to avoid race condition with early destruction + // Submit regardless of config result, to ensure submission semaphores and fences are signalled. + // The submissions will ensure a misconfiguration will be safe to execute. + VkResult rslt = qSubmit->getConfigurationResult(); if (_execQueue) { dispatch_async(_execQueue, ^{ execute(qSubmit); } ); } else { - execute(qSubmit); + rslt = execute(qSubmit); } return rslt; } -VkResult MVKQueue::submit(uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence, MVKCommandUse cmdUse) { +static inline uint32_t getCommandBufferCount(const VkSubmitInfo2* pSubmitInfo) { return pSubmitInfo->commandBufferInfoCount; } +static inline uint32_t getCommandBufferCount(const VkSubmitInfo* pSubmitInfo) { return pSubmitInfo->commandBufferCount; } + +template +VkResult MVKQueue::submit(uint32_t submitCount, const S* pSubmits, VkFence fence, MVKCommandUse cmdUse) { // Fence-only submission if (submitCount == 0 && fence) { - return submit(new MVKQueueCommandBufferSubmission(this, nullptr, fence, cmdUse)); + return submit(new MVKQueueCommandBufferSubmission(this, (S*)nullptr, fence, cmdUse)); } VkResult rslt = VK_SUCCESS; for (uint32_t sIdx = 0; sIdx < submitCount; sIdx++) { VkFence fenceOrNil = (sIdx == (submitCount - 1)) ? fence : VK_NULL_HANDLE; // last one gets the fence - const VkSubmitInfo* pVkSub = &pSubmits[sIdx]; + const S* pVkSub = &pSubmits[sIdx]; MVKQueueCommandBufferSubmission* mvkSub; - uint32_t cbCnt = pVkSub->commandBufferCount; + uint32_t cbCnt = getCommandBufferCount(pVkSub); if (cbCnt <= 1) { - mvkSub = new MVKQueueFullCommandBufferSubmission<1>(this, pVkSub, fenceOrNil); + mvkSub = new MVKQueueFullCommandBufferSubmission<1>(this, pVkSub, fenceOrNil, cmdUse); } else if (cbCnt <= 16) { - mvkSub = new MVKQueueFullCommandBufferSubmission<16>(this, pVkSub, fenceOrNil); + mvkSub = new MVKQueueFullCommandBufferSubmission<16>(this, pVkSub, fenceOrNil, cmdUse); } else if (cbCnt <= 32) { - mvkSub = new MVKQueueFullCommandBufferSubmission<32>(this, pVkSub, fenceOrNil); + mvkSub = new MVKQueueFullCommandBufferSubmission<32>(this, pVkSub, fenceOrNil, cmdUse); } else if (cbCnt <= 64) { - mvkSub = new MVKQueueFullCommandBufferSubmission<64>(this, pVkSub, fenceOrNil); + mvkSub = new MVKQueueFullCommandBufferSubmission<64>(this, pVkSub, fenceOrNil, cmdUse); } else if (cbCnt <= 128) { - mvkSub = new MVKQueueFullCommandBufferSubmission<128>(this, pVkSub, fenceOrNil); + mvkSub = new MVKQueueFullCommandBufferSubmission<128>(this, pVkSub, fenceOrNil, cmdUse); } else if (cbCnt <= 256) { - mvkSub = new MVKQueueFullCommandBufferSubmission<256>(this, pVkSub, fenceOrNil); + mvkSub = new MVKQueueFullCommandBufferSubmission<256>(this, pVkSub, fenceOrNil, cmdUse); } else { - mvkSub = new MVKQueueFullCommandBufferSubmission<512>(this, pVkSub, fenceOrNil); + mvkSub = new MVKQueueFullCommandBufferSubmission<512>(this, pVkSub, fenceOrNil, cmdUse); } VkResult subRslt = submit(mvkSub); @@ -124,33 +132,30 @@ VkResult MVKQueue::submit(uint32_t submitCount, const VkSubmitInfo* pSubmits, Vk return rslt; } +// Concrete implementations of templated MVKQueue::submit(). +template VkResult MVKQueue::submit(uint32_t submitCount, const VkSubmitInfo2* pSubmits, VkFence fence, MVKCommandUse cmdUse); +template VkResult MVKQueue::submit(uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence, MVKCommandUse cmdUse); + VkResult MVKQueue::submit(const VkPresentInfoKHR* pPresentInfo) { return submit(new MVKQueuePresentSurfaceSubmission(this, pPresentInfo)); } -// Create an empty submit struct and fence, submit to queue and wait on fence. VkResult MVKQueue::waitIdle(MVKCommandUse cmdUse) { - if (_device->getConfigurationResult() != VK_SUCCESS) { return _device->getConfigurationResult(); } + VkResult rslt = _device->getConfigurationResult(); + if (rslt != VK_SUCCESS) { return rslt; } - VkFenceCreateInfo vkFenceInfo = { - .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - }; + auto* mtlCmdBuff = getMTLCommandBuffer(cmdUse); + [mtlCmdBuff commit]; + [mtlCmdBuff waitUntilCompleted]; - // The MVKFence is retained by the command submission, and may outlive this function while - // the command submission finishes, so we can't allocate MVKFence locally on the stack. - MVKFence* mvkFence = new MVKFence(_device, &vkFenceInfo); - VkFence vkFence = (VkFence)mvkFence; - submit(0, nullptr, vkFence, cmdUse); - VkResult rslt = mvkWaitForFences(_device, 1, &vkFence, false); - mvkFence->destroy(); - return rslt; + return VK_SUCCESS; } id MVKQueue::getMTLCommandBuffer(MVKCommandUse cmdUse, bool retainRefs) { id mtlCmdBuff = nil; + MVKDevice* mvkDev = getDevice(); + uint64_t startTime = mvkDev->getPerformanceTimestamp(); #if MVK_XCODE_12 if ([_mtlQueue respondsToSelector: @selector(commandBufferWithDescriptor:)]) { MTLCommandBufferDescriptor* mtlCmdBuffDesc = [MTLCommandBufferDescriptor new]; // temp retain @@ -167,53 +172,126 @@ id MVKQueue::getMTLCommandBuffer(MVKCommandUse cmdUse, bool re } else { mtlCmdBuff = [_mtlQueue commandBufferWithUnretainedReferences]; } - setLabelIfNotNil(mtlCmdBuff, getMTLCommandBufferLabel(cmdUse)); + mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.queue.retrieveMTLCommandBuffer, startTime); + NSString* mtlCmdBuffLabel = getMTLCommandBufferLabel(cmdUse); + setLabelIfNotNil(mtlCmdBuff, mtlCmdBuffLabel); + [mtlCmdBuff addCompletedHandler: ^(id mtlCB) { handleMTLCommandBufferError(mtlCB); }]; + + if ( !mtlCmdBuff ) { reportError(VK_ERROR_OUT_OF_POOL_MEMORY, "%s could not be acquired.", mtlCmdBuffLabel.UTF8String); } return mtlCmdBuff; } NSString* MVKQueue::getMTLCommandBufferLabel(MVKCommandUse cmdUse) { -#define CASE_GET_LABEL(cmdUse) \ - case kMVKCommandUse ##cmdUse: \ - if ( !_mtlCmdBuffLabel ##cmdUse ) { _mtlCmdBuffLabel ##cmdUse = [[NSString stringWithFormat: @"%@ on Queue %d-%d", mvkMTLCommandBufferLabel(kMVKCommandUse ##cmdUse), _queueFamily->getIndex(), _index] retain]; } \ - return _mtlCmdBuffLabel ##cmdUse +#define CASE_GET_LABEL(cu) \ + case kMVKCommandUse ##cu: \ + if ( !_mtlCmdBuffLabel ##cu ) { _mtlCmdBuffLabel ##cu = [[NSString stringWithFormat: @"%s MTLCommandBuffer on Queue %d-%d", mvkVkCommandName(kMVKCommandUse ##cu), _queueFamily->getIndex(), _index] retain]; } \ + return _mtlCmdBuffLabel ##cu switch (cmdUse) { - CASE_GET_LABEL(EndCommandBuffer); + CASE_GET_LABEL(BeginCommandBuffer); CASE_GET_LABEL(QueueSubmit); CASE_GET_LABEL(QueuePresent); CASE_GET_LABEL(QueueWaitIdle); CASE_GET_LABEL(DeviceWaitIdle); CASE_GET_LABEL(AcquireNextImage); CASE_GET_LABEL(InvalidateMappedMemoryRanges); - default: return mvkMTLCommandBufferLabel(cmdUse); + default: + MVKAssert(false, "Uncached MTLCommandBuffer label for command use %s.", mvkVkCommandName(cmdUse)); + return [NSString stringWithFormat: @"%s MTLCommandBuffer on Queue %d-%d", mvkVkCommandName(cmdUse), _queueFamily->getIndex(), _index]; } #undef CASE_GET_LABEL } +#if MVK_XCODE_12 +static const char* mvkStringFromMTLCommandEncoderErrorState(MTLCommandEncoderErrorState errState) { + switch (errState) { + case MTLCommandEncoderErrorStateUnknown: return "unknown"; + case MTLCommandEncoderErrorStateAffected: return "affected"; + case MTLCommandEncoderErrorStateCompleted: return "completed"; + case MTLCommandEncoderErrorStateFaulted: return "faulted"; + case MTLCommandEncoderErrorStatePending: return "pending"; + } + return "unknown"; +} +#endif + +void MVKQueue::handleMTLCommandBufferError(id mtlCmdBuff) { + if (mtlCmdBuff.status != MTLCommandBufferStatusError) { return; } + + // If a command buffer error has occurred, report the error. If the error affects + // the physical device, always mark both the device and physical device as lost. + // If the error is local to this command buffer, optionally mark the device (but not the + // physical device) as lost, depending on the value of MVKConfiguration::resumeLostDevice. + VkResult vkErr = VK_ERROR_UNKNOWN; + bool markDeviceLoss = !getMVKConfig().resumeLostDevice; + bool markPhysicalDeviceLoss = false; + switch (mtlCmdBuff.error.code) { + case MTLCommandBufferErrorBlacklisted: + case MTLCommandBufferErrorNotPermitted: // May also be used for command buffers executed in the background without the right entitlement. +#if MVK_MACOS && !MVK_MACCAT + case MTLCommandBufferErrorDeviceRemoved: +#endif + vkErr = VK_ERROR_DEVICE_LOST; + markDeviceLoss = true; + markPhysicalDeviceLoss = true; + break; + case MTLCommandBufferErrorTimeout: + vkErr = VK_TIMEOUT; + break; +#if MVK_XCODE_13 + case MTLCommandBufferErrorStackOverflow: +#endif + case MTLCommandBufferErrorPageFault: + case MTLCommandBufferErrorOutOfMemory: + default: + vkErr = VK_ERROR_OUT_OF_DEVICE_MEMORY; + break; + } + reportError(vkErr, "MTLCommandBuffer \"%s\" execution failed (code %li): %s", + mtlCmdBuff.label ? mtlCmdBuff.label.UTF8String : "", + mtlCmdBuff.error.code, mtlCmdBuff.error.localizedDescription.UTF8String); + if (markDeviceLoss) { getDevice()->markLost(markPhysicalDeviceLoss); } + +#if MVK_XCODE_12 + if (&MTLCommandBufferEncoderInfoErrorKey != nullptr) { + if (NSArray>* mtlEncInfo = mtlCmdBuff.error.userInfo[MTLCommandBufferEncoderInfoErrorKey]) { + MVKLogInfo("Encoders for %p \"%s\":", mtlCmdBuff, mtlCmdBuff.label ? mtlCmdBuff.label.UTF8String : ""); + for (id enc in mtlEncInfo) { + MVKLogInfo(" - %s: %s", enc.label.UTF8String, mvkStringFromMTLCommandEncoderErrorState(enc.errorState)); + if (enc.debugSignposts.count > 0) { + MVKLogInfo(" Debug signposts:"); + for (NSString* signpost in enc.debugSignposts) { + MVKLogInfo(" - %s", signpost.UTF8String); + } + } + } + } + } + if ([mtlCmdBuff respondsToSelector: @selector(logs)]) { + bool isFirstMsg = true; + for (id log in mtlCmdBuff.logs) { + if (isFirstMsg) { + MVKLogInfo("Shader log messages:"); + isFirstMsg = false; + } + MVKLogInfo("%s", log.description.UTF8String); + } + } +#endif +} #pragma mark Construction #define MVK_DISPATCH_QUEUE_QOS_CLASS QOS_CLASS_USER_INITIATED -MVKQueue::MVKQueue(MVKDevice* device, MVKQueueFamily* queueFamily, uint32_t index, float priority) - : MVKDeviceTrackingMixin(device) { - +MVKQueue::MVKQueue(MVKDevice* device, MVKQueueFamily* queueFamily, uint32_t index, float priority) : MVKDeviceTrackingMixin(device) { _queueFamily = queueFamily; _index = index; _priority = priority; - _mtlCmdBuffLabelEndCommandBuffer = nil; - _mtlCmdBuffLabelQueueSubmit = nil; - _mtlCmdBuffLabelQueuePresent = nil; - _mtlCmdBuffLabelDeviceWaitIdle = nil; - _mtlCmdBuffLabelQueueWaitIdle = nil; - _mtlCmdBuffLabelAcquireNextImage = nil; - _mtlCmdBuffLabelInvalidateMappedMemoryRanges = nil; - initName(); initExecQueue(); initMTLCommandQueue(); - initGPUCaptureScopes(); } void MVKQueue::initName() { @@ -236,23 +314,15 @@ void MVKQueue::initExecQueue() { } } -// Retrieves and initializes the Metal command queue. +// Retrieves and initializes the Metal command queue and Xcode GPU capture scopes void MVKQueue::initMTLCommandQueue() { - uint64_t startTime = _device->getPerformanceTimestamp(); _mtlQueue = _queueFamily->getMTLCommandQueue(_index); // not retained (cached in queue family) - _device->addActivityPerformance(_device->_performanceStatistics.queue.mtlQueueAccess, startTime); -} -// Initializes Xcode GPU capture scopes -void MVKQueue::initGPUCaptureScopes() { _submissionCaptureScope = new MVKGPUCaptureScope(this); - if (_queueFamily->getIndex() == getMVKConfig().defaultGPUCaptureScopeQueueFamilyIndex && _index == getMVKConfig().defaultGPUCaptureScopeQueueIndex) { - getDevice()->startAutoGPUCapture(MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_FRAME, _mtlQueue); _submissionCaptureScope->makeDefault(); - } _submissionCaptureScope->beginScope(); // Allow Xcode to capture the first frame if desired. } @@ -261,7 +331,7 @@ MVKQueue::~MVKQueue() { destroyExecQueue(); _submissionCaptureScope->destroy(); - [_mtlCmdBuffLabelEndCommandBuffer release]; + [_mtlCmdBuffLabelBeginCommandBuffer release]; [_mtlCmdBuffLabelQueueSubmit release]; [_mtlCmdBuffLabelQueuePresent release]; [_mtlCmdBuffLabelDeviceWaitIdle release]; @@ -282,23 +352,89 @@ void MVKQueue::destroyExecQueue() { #pragma mark - #pragma mark MVKQueueSubmission +void MVKSemaphoreSubmitInfo::encodeWait(id mtlCmdBuff) { + if (_semaphore) { _semaphore->encodeWait(mtlCmdBuff, value); } +} + +void MVKSemaphoreSubmitInfo::encodeSignal(id mtlCmdBuff) { + if (_semaphore) { _semaphore->encodeSignal(mtlCmdBuff, value); } +} + +MVKSemaphoreSubmitInfo::MVKSemaphoreSubmitInfo(const VkSemaphoreSubmitInfo& semaphoreSubmitInfo) : + _semaphore((MVKSemaphore*)semaphoreSubmitInfo.semaphore), + value(semaphoreSubmitInfo.value), + stageMask(semaphoreSubmitInfo.stageMask), + deviceIndex(semaphoreSubmitInfo.deviceIndex) { + if (_semaphore) { _semaphore->retain(); } +} + +MVKSemaphoreSubmitInfo::MVKSemaphoreSubmitInfo(const VkSemaphore semaphore, + VkPipelineStageFlags stageMask) : + _semaphore((MVKSemaphore*)semaphore), + value(0), + stageMask(stageMask), + deviceIndex(0) { + if (_semaphore) { _semaphore->retain(); } +} + +MVKSemaphoreSubmitInfo::MVKSemaphoreSubmitInfo(const MVKSemaphoreSubmitInfo& other) : + _semaphore(other._semaphore), + value(other.value), + stageMask(other.stageMask), + deviceIndex(other.deviceIndex) { + if (_semaphore) { _semaphore->retain(); } +} + +MVKSemaphoreSubmitInfo& MVKSemaphoreSubmitInfo::operator=(const MVKSemaphoreSubmitInfo& other) { + // Retain new object first in case it's the same object + if (other._semaphore) {other._semaphore->retain(); } + if (_semaphore) { _semaphore->release(); } + _semaphore = other._semaphore; + + value = other.value; + stageMask = other.stageMask; + deviceIndex = other.deviceIndex; + return *this; +} + +MVKSemaphoreSubmitInfo::~MVKSemaphoreSubmitInfo() { + if (_semaphore) { _semaphore->release(); } +} + +MVKCommandBufferSubmitInfo::MVKCommandBufferSubmitInfo(const VkCommandBufferSubmitInfo& commandBufferInfo) : + commandBuffer(MVKCommandBuffer::getMVKCommandBuffer(commandBufferInfo.commandBuffer)), + deviceMask(commandBufferInfo.deviceMask) {} + +MVKCommandBufferSubmitInfo::MVKCommandBufferSubmitInfo(VkCommandBuffer commandBuffer) : + commandBuffer(MVKCommandBuffer::getMVKCommandBuffer(commandBuffer)), + deviceMask(0) {} + +MVKQueueSubmission::MVKQueueSubmission(MVKQueue* queue, + uint32_t waitSemaphoreInfoCount, + const VkSemaphoreSubmitInfo* pWaitSemaphoreSubmitInfos) { + _queue = queue; + _queue->retain(); // Retain here and release in destructor. See note for MVKQueueCommandBufferSubmission::finish(). + + _waitSemaphores.reserve(waitSemaphoreInfoCount); + for (uint32_t i = 0; i < waitSemaphoreInfoCount; i++) { + _waitSemaphores.emplace_back(pWaitSemaphoreSubmitInfos[i]); + } +} + MVKQueueSubmission::MVKQueueSubmission(MVKQueue* queue, uint32_t waitSemaphoreCount, - const VkSemaphore* pWaitSemaphores) { + const VkSemaphore* pWaitSemaphores, + const VkPipelineStageFlags* pWaitDstStageMask) { _queue = queue; _queue->retain(); // Retain here and release in destructor. See note for MVKQueueCommandBufferSubmission::finish(). _waitSemaphores.reserve(waitSemaphoreCount); for (uint32_t i = 0; i < waitSemaphoreCount; i++) { - auto* sem4 = (MVKSemaphore*)pWaitSemaphores[i]; - sem4->retain(); - uint64_t sem4Val = 0; - _waitSemaphores.emplace_back(sem4, sem4Val); + _waitSemaphores.emplace_back(pWaitSemaphores[i], pWaitDstStageMask ? pWaitDstStageMask[i] : 0); } } MVKQueueSubmission::~MVKQueueSubmission() { - for (auto s : _waitSemaphores) { s.first->release(); } _queue->release(); } @@ -306,22 +442,22 @@ MVKQueueSubmission::~MVKQueueSubmission() { #pragma mark - #pragma mark MVKQueueCommandBufferSubmission -void MVKQueueCommandBufferSubmission::execute() { +VkResult MVKQueueCommandBufferSubmission::execute() { _queue->_submissionCaptureScope->beginScope(); // If using encoded semaphore waiting, do so now. - for (auto& ws : _waitSemaphores) { ws.first->encodeWait(getActiveMTLCommandBuffer(), ws.second); } + for (auto& ws : _waitSemaphores) { ws.encodeWait(getActiveMTLCommandBuffer()); } // Submit each command buffer. submitCommandBuffers(); // If using encoded semaphore signaling, do so now. - for (auto& ss : _signalSemaphores) { ss.first->encodeSignal(getActiveMTLCommandBuffer(), ss.second); } + for (auto& ss : _signalSemaphores) { ss.encodeSignal(getActiveMTLCommandBuffer()); } // Commit the last MTLCommandBuffer. // Nothing after this because callback might destroy this instance before this function ends. - commitActiveMTLCommandBuffer(true); + return commitActiveMTLCommandBuffer(true); } // Returns the active MTLCommandBuffer, lazily retrieving it from the queue if needed. @@ -341,24 +477,11 @@ void MVKQueueCommandBufferSubmission::setActiveMTLCommandBuffer(idencodeWait(nil, ws.second); } + for (auto& ws : _waitSemaphores) { ws.encodeWait(nil); } _emulatedWaitDone = true; } @@ -380,72 +503,29 @@ void MVKQueueCommandBufferSubmission::commitActiveMTLCommandBuffer(bool signalCo // If we need to signal completion, use getActiveMTLCommandBuffer() to ensure at least // one MTLCommandBuffer is used, otherwise if this instance has no content, it will not - // finish(), signal the fence and semaphores ,and be destroyed. + // finish(), signal the fence and semaphores, and be destroyed. // Use temp var for MTLCommandBuffer commit and release because completion callback // may destroy this instance before this function ends. id mtlCmdBuff = signalCompletion ? getActiveMTLCommandBuffer() : _activeMTLCommandBuffer; _activeMTLCommandBuffer = nil; - MVKDevice* mvkDev = _queue->getDevice(); + MVKDevice* mvkDev = getDevice(); uint64_t startTime = mvkDev->getPerformanceTimestamp(); [mtlCmdBuff addCompletedHandler: ^(id mtlCB) { - if (mtlCB.status == MTLCommandBufferStatusError) { - // If a command buffer error has occurred, report the error. If the error affects - // the physical device, always mark both the device and physical device as lost. - // If the error is local to this command buffer, optionally mark the device (but not the - // physical device) as lost, depending on the value of MVKConfiguration::resumeLostDevice. - getVulkanAPIObject()->reportError(VK_ERROR_DEVICE_LOST, "MTLCommandBuffer \"%s\" execution failed (code %li): %s", mtlCB.label ? mtlCB.label.UTF8String : "", mtlCB.error.code, mtlCB.error.localizedDescription.UTF8String); - switch (mtlCB.error.code) { - case MTLCommandBufferErrorBlacklisted: - case MTLCommandBufferErrorNotPermitted: // May also be used for command buffers executed in the background without the right entitlement. -#if MVK_MACOS && !MVK_MACCAT - case MTLCommandBufferErrorDeviceRemoved: -#endif - mvkDev->markLost(true); - break; - default: - if ( !getMVKConfig().resumeLostDevice ) { mvkDev->markLost(); } - break; - } -#if MVK_XCODE_12 - if (getMVKConfig().debugMode) { - if (&MTLCommandBufferEncoderInfoErrorKey != nullptr) { - if (NSArray>* mtlEncInfo = mtlCB.error.userInfo[MTLCommandBufferEncoderInfoErrorKey]) { - MVKLogInfo("Encoders for %p \"%s\":", mtlCB, mtlCB.label ? mtlCB.label.UTF8String : ""); - for (id enc in mtlEncInfo) { - MVKLogInfo(" - %s: %s", enc.label.UTF8String, mvkStringFromErrorState(enc.errorState)); - if (enc.debugSignposts.count > 0) { - MVKLogInfo(" Debug signposts:"); - for (NSString* signpost in enc.debugSignposts) { - MVKLogInfo(" - %s", signpost.UTF8String); - } - } - } - } - } - } -#endif - } -#if MVK_XCODE_12 - if (getMVKConfig().debugMode && [mtlCB respondsToSelector: @selector(logs)]) { - bool isFirstMsg = true; - for (id log in mtlCB.logs) { - if (isFirstMsg) { - MVKLogInfo("Shader log messages:"); - isFirstMsg = false; - } - MVKLogInfo("%s", log.description.UTF8String); - } - } -#endif - - // Ensure finish() is the last thing the completetion callback does. - mvkDev->addActivityPerformance(mvkDev->_performanceStatistics.queue.mtlCommandBufferCompletion, startTime); - if (signalCompletion) { this->finish(); } + mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.queue.mtlCommandBufferExecution, startTime); + if (signalCompletion) { this->finish(); } // Must be the last thing the completetion callback does. }]; + // Retrieve the result before committing MTLCommandBuffer, because finish() will destroy this instance. + VkResult rslt = mtlCmdBuff ? getConfigurationResult() : VK_ERROR_OUT_OF_POOL_MEMORY; [mtlCmdBuff commit]; [mtlCmdBuff release]; // retained + + // If we need to signal completion, but an error occurred and the MTLCommandBuffer + // was not created, call the finish() function directly. + if (signalCompletion && !mtlCmdBuff) { finish(); } + + return rslt; } // Be sure to retain() any API objects referenced in this function, and release() them in the @@ -460,7 +540,7 @@ void MVKQueueCommandBufferSubmission::finish() { _queue->_submissionCaptureScope->endScope(); // If using inline semaphore signaling, do so now. - for (auto& ss : _signalSemaphores) { ss.first->encodeSignal(nil, ss.second); } + for (auto& ss : _signalSemaphores) { ss.encodeSignal(nil); } // If a fence exists, signal it. if (_fence) { _fence->signal(); } @@ -472,18 +552,53 @@ void MVKQueueCommandBufferSubmission::finish() { // be destroyed on the waiting thread before this submission is done with them. We therefore // retain() each here to ensure they live long enough for this submission to finish using them. MVKQueueCommandBufferSubmission::MVKQueueCommandBufferSubmission(MVKQueue* queue, - const VkSubmitInfo* pSubmit, + const VkSubmitInfo2* pSubmit, VkFence fence, MVKCommandUse cmdUse) : MVKQueueSubmission(queue, - (pSubmit ? pSubmit->waitSemaphoreCount : 0), - (pSubmit ? pSubmit->pWaitSemaphores : nullptr)), - _commandUse(cmdUse), - _emulatedWaitDone(false) { + pSubmit ? pSubmit->waitSemaphoreInfoCount : 0, + pSubmit ? pSubmit->pWaitSemaphoreInfos : nullptr), + _fence((MVKFence*)fence), + _commandUse(cmdUse) { + + if (_fence) { _fence->retain(); } + + // pSubmit can be null if just tracking the fence alone + if (pSubmit) { + uint32_t ssCnt = pSubmit->signalSemaphoreInfoCount; + _signalSemaphores.reserve(ssCnt); + for (uint32_t i = 0; i < ssCnt; i++) { + _signalSemaphores.emplace_back(pSubmit->pSignalSemaphoreInfos[i]); + } + } +} + +// On device loss, the fence and signal semaphores may be signalled early, and they might then +// be destroyed on the waiting thread before this submission is done with them. We therefore +// retain() each here to ensure they live long enough for this submission to finish using them. +MVKQueueCommandBufferSubmission::MVKQueueCommandBufferSubmission(MVKQueue* queue, + const VkSubmitInfo* pSubmit, + VkFence fence, + MVKCommandUse cmdUse) + : MVKQueueSubmission(queue, + pSubmit ? pSubmit->waitSemaphoreCount : 0, + pSubmit ? pSubmit->pWaitSemaphores : nullptr, + pSubmit ? pSubmit->pWaitDstStageMask : nullptr), + + _fence((MVKFence*)fence), + _commandUse(cmdUse) { + + if (_fence) { _fence->retain(); } // pSubmit can be null if just tracking the fence alone if (pSubmit) { - VkTimelineSemaphoreSubmitInfo* pTimelineSubmit = nullptr; + uint32_t ssCnt = pSubmit->signalSemaphoreCount; + _signalSemaphores.reserve(ssCnt); + for (uint32_t i = 0; i < ssCnt; i++) { + _signalSemaphores.emplace_back(pSubmit->pSignalSemaphores[i], 0); + } + + VkTimelineSemaphoreSubmitInfo* pTimelineSubmit = nullptr; for (const auto* next = (const VkBaseInStructure*)pSubmit->pNext; next; next = next->pNext) { switch (next->sType) { case VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO: @@ -494,37 +609,66 @@ MVKQueueCommandBufferSubmission::MVKQueueCommandBufferSubmission(MVKQueue* queue } } if (pTimelineSubmit) { - // Presentation doesn't support timeline semaphores, so handle wait values here. - uint32_t wsCnt = pTimelineSubmit->waitSemaphoreValueCount; - for (uint32_t i = 0; i < wsCnt; i++) { - _waitSemaphores[i].second = pTimelineSubmit->pWaitSemaphoreValues[i]; + uint32_t wsvCnt = pTimelineSubmit->waitSemaphoreValueCount; + for (uint32_t i = 0; i < wsvCnt; i++) { + _waitSemaphores[i].value = pTimelineSubmit->pWaitSemaphoreValues[i]; } + + uint32_t ssvCnt = pTimelineSubmit->signalSemaphoreValueCount; + for (uint32_t i = 0; i < ssvCnt; i++) { + _signalSemaphores[i].value = pTimelineSubmit->pSignalSemaphoreValues[i]; + } } - uint32_t ssCnt = pSubmit->signalSemaphoreCount; - _signalSemaphores.reserve(ssCnt); - for (uint32_t i = 0; i < ssCnt; i++) { - auto* sem4 = (MVKSemaphore*)pSubmit->pSignalSemaphores[i]; - sem4->retain(); - uint64_t sem4Val = pTimelineSubmit ? pTimelineSubmit->pSignalSemaphoreValues[i] : 0; - _signalSemaphores.emplace_back(sem4, sem4Val); - } } - - _fence = (MVKFence*)fence; - if (_fence) { _fence->retain(); } - - _activeMTLCommandBuffer = nil; } MVKQueueCommandBufferSubmission::~MVKQueueCommandBufferSubmission() { if (_fence) { _fence->release(); } - for (auto s : _signalSemaphores) { s.first->release(); } } template void MVKQueueFullCommandBufferSubmission::submitCommandBuffers() { - for (auto& cb : _cmdBuffers) { cb->submit(this, &_encodingContext); } + MVKDevice* mvkDev = getDevice(); + uint64_t startTime = mvkDev->getPerformanceTimestamp(); + + for (auto& cbInfo : _cmdBuffers) { cbInfo.commandBuffer->submit(this, &_encodingContext); } + + mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.queue.submitCommandBuffers, startTime); +} + +template +MVKQueueFullCommandBufferSubmission::MVKQueueFullCommandBufferSubmission(MVKQueue* queue, + const VkSubmitInfo2* pSubmit, + VkFence fence, + MVKCommandUse cmdUse) + : MVKQueueCommandBufferSubmission(queue, pSubmit, fence, cmdUse) { + + if (pSubmit) { + uint32_t cbCnt = pSubmit->commandBufferInfoCount; + _cmdBuffers.reserve(cbCnt); + for (uint32_t i = 0; i < cbCnt; i++) { + _cmdBuffers.emplace_back(pSubmit->pCommandBufferInfos[i]); + setConfigurationResult(_cmdBuffers.back().commandBuffer->getConfigurationResult()); + } + } +} + +template +MVKQueueFullCommandBufferSubmission::MVKQueueFullCommandBufferSubmission(MVKQueue* queue, + const VkSubmitInfo* pSubmit, + VkFence fence, + MVKCommandUse cmdUse) + : MVKQueueCommandBufferSubmission(queue, pSubmit, fence, cmdUse) { + + if (pSubmit) { + uint32_t cbCnt = pSubmit->commandBufferCount; + _cmdBuffers.reserve(cbCnt); + for (uint32_t i = 0; i < cbCnt; i++) { + _cmdBuffers.emplace_back(pSubmit->pCommandBuffers[i]); + setConfigurationResult(_cmdBuffers.back().commandBuffer->getConfigurationResult()); + } + } } @@ -534,24 +678,34 @@ void MVKQueueFullCommandBufferSubmission::submitCommandBuffers() { // If the semaphores are encodable, wait on them by encoding them on the MTLCommandBuffer before presenting. // If the semaphores are not encodable, wait on them inline after presenting. // The semaphores know what to do. -void MVKQueuePresentSurfaceSubmission::execute() { - id mtlCmdBuff = _queue->getMTLCommandBuffer(kMVKCommandUseQueuePresent); - [mtlCmdBuff enqueue]; - for (auto& ws : _waitSemaphores) { ws.first->encodeWait(mtlCmdBuff, 0); } +VkResult MVKQueuePresentSurfaceSubmission::execute() { + // MTLCommandBuffer retain references to avoid rare case where objects are destroyed too early. + // Although testing could not determine which objects were being lost, queue present MTLCommandBuffers + // are used only once per frame, and retain so few objects, that blanket retention is still performant. + id mtlCmdBuff = _queue->getMTLCommandBuffer(kMVKCommandUseQueuePresent, true); - // Add completion handler that will destroy this submission only once the MTLCommandBuffer - // is finished with the resources retained here, including the wait semaphores. - // Completion handlers are also added in presentCAMetalDrawable() to retain the swapchain images. - [mtlCmdBuff addCompletedHandler: ^(id mcb) { - this->finish(); - }]; - - for (int i = 0; i < _presentInfo.size(); i++ ) { - _presentInfo[i].presentableImage->presentCAMetalDrawable(mtlCmdBuff, _presentInfo[i]); + for (auto& ws : _waitSemaphores) { + ws.encodeWait(mtlCmdBuff); // Encoded semaphore waits + ws.encodeWait(nil); // Inline semaphore waits } - for (auto& ws : _waitSemaphores) { ws.first->encodeWait(nil, 0); } - [mtlCmdBuff commit]; + for (int i = 0; i < _presentInfo.size(); i++ ) { + setConfigurationResult(_presentInfo[i].presentableImage->presentCAMetalDrawable(mtlCmdBuff, _presentInfo[i])); + } + + if ( !mtlCmdBuff ) { setConfigurationResult(VK_ERROR_OUT_OF_POOL_MEMORY); } // Check after images may set error. + + // Add completion callback to the MTLCommandBuffer to call finish(), + // or if the MTLCommandBuffer could not be created, call finish() directly. + // Retrieve the result first, because finish() will destroy this instance. + VkResult rslt = getConfigurationResult(); + if (mtlCmdBuff) { + [mtlCmdBuff addCompletedHandler: ^(id mtlCB) { this->finish(); }]; + [mtlCmdBuff commit]; + } else { + finish(); + } + return rslt; } void MVKQueuePresentSurfaceSubmission::finish() { @@ -563,7 +717,7 @@ void MVKQueuePresentSurfaceSubmission::finish() { cs->beginScope(); if (_queue->_queueFamily->getIndex() == getMVKConfig().defaultGPUCaptureScopeQueueFamilyIndex && _queue->_index == getMVKConfig().defaultGPUCaptureScopeQueueIndex) { - _queue->getDevice()->stopAutoGPUCapture(MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_FRAME); + getDevice()->stopAutoGPUCapture(MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_FRAME); } this->destroy(); @@ -571,7 +725,7 @@ void MVKQueuePresentSurfaceSubmission::finish() { MVKQueuePresentSurfaceSubmission::MVKQueuePresentSurfaceSubmission(MVKQueue* queue, const VkPresentInfoKHR* pPresentInfo) - : MVKQueueSubmission(queue, pPresentInfo->waitSemaphoreCount, pPresentInfo->pWaitSemaphores) { + : MVKQueueSubmission(queue, pPresentInfo->waitSemaphoreCount, pPresentInfo->pWaitSemaphores, nullptr) { const VkPresentTimesInfoGOOGLE* pPresentTimesInfo = nullptr; const VkSwapchainPresentFenceInfoEXT* pPresentFenceInfo = nullptr; @@ -623,6 +777,7 @@ MVKQueuePresentSurfaceSubmission::MVKQueuePresentSurfaceSubmission(MVKQueue* que for (uint32_t scIdx = 0; scIdx < scCnt; scIdx++) { MVKSwapchain* mvkSC = (MVKSwapchain*)pPresentInfo->pSwapchains[scIdx]; MVKImagePresentInfo presentInfo = {}; // Start with everything zeroed + presentInfo.queue = _queue; presentInfo.presentableImage = mvkSC->getPresentableImage(pPresentInfo->pImageIndices[scIdx]); presentInfo.presentMode = pPresentModes ? pPresentModes[scIdx] : VK_PRESENT_MODE_MAX_ENUM_KHR; presentInfo.fence = pFences ? (MVKFence*)pFences[scIdx] : nullptr; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.h b/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.h index 534ec018..cb9c8e44 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.h @@ -116,8 +116,8 @@ public: void populateMTLRenderPassDescriptor(MTLRenderPassDescriptor* mtlRPDesc, uint32_t passIdx, MVKFramebuffer* framebuffer, - const MVKArrayRef attachments, - const MVKArrayRef clearValues, + MVKArrayRef attachments, + MVKArrayRef clearValues, bool isRenderingEntireAttachment, bool loadOverride = false); @@ -126,7 +126,7 @@ public: * when the render area is smaller than the full framebuffer size. */ void populateClearAttachments(MVKClearAttachments& clearAtts, - const MVKArrayRef clearValues); + MVKArrayRef clearValues); /** * Populates the specified vector with VkClearRects for clearing views of a specified multiview @@ -140,11 +140,11 @@ public: /** If a render encoder is active, sets the store actions for all attachments to it. */ void encodeStoreActions(MVKCommandEncoder* cmdEncoder, bool isRenderingEntireAttachment, - const MVKArrayRef attachments, + MVKArrayRef attachments, bool storeOverride = false); /** Resolves any resolve attachments that cannot be handled by native Metal subpass resolve behavior. */ - void resolveUnresolvableAttachments(MVKCommandEncoder* cmdEncoder, const MVKArrayRef attachments); + void resolveUnresolvableAttachments(MVKCommandEncoder* cmdEncoder, MVKArrayRef attachments); MVKRenderSubpass(MVKRenderPass* renderPass, const VkSubpassDescription* pCreateInfo, const VkRenderPassInputAttachmentAspectCreateInfo* pInputAspects, @@ -265,6 +265,22 @@ protected: #pragma mark - #pragma mark MVKRenderPass +/** Collects together VkSubpassDependency and VkMemoryBarrier2. */ +typedef struct MVKSubpassDependency { + uint32_t srcSubpass; + uint32_t dstSubpass; + VkPipelineStageFlags2 srcStageMask; + VkPipelineStageFlags2 dstStageMask; + VkAccessFlags2 srcAccessMask; + VkAccessFlags2 dstAccessMask; + VkDependencyFlags dependencyFlags; + int32_t viewOffset; + + MVKSubpassDependency(const VkSubpassDependency& spDep, int32_t viewOffset); + MVKSubpassDependency(const VkSubpassDependency2& spDep, const VkMemoryBarrier2* pMemBar); + +} MVKSubpassDependency; + /** Represents a Vulkan render pass. */ class MVKRenderPass : public MVKVulkanAPIDeviceObject { @@ -308,7 +324,7 @@ protected: MVKSmallVector _attachments; MVKSmallVector _subpasses; - MVKSmallVector _subpassDependencies; + MVKSmallVector _subpassDependencies; VkRenderingFlags _renderingFlags = 0; }; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm b/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm index 762d72d9..a742690d 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm @@ -138,8 +138,8 @@ uint32_t MVKRenderSubpass::getViewCountUpToMetalPass(uint32_t passIdx) const { void MVKRenderSubpass::populateMTLRenderPassDescriptor(MTLRenderPassDescriptor* mtlRPDesc, uint32_t passIdx, MVKFramebuffer* framebuffer, - const MVKArrayRef attachments, - const MVKArrayRef clearValues, + MVKArrayRef attachments, + MVKArrayRef clearValues, bool isRenderingEntireAttachment, bool loadOverride) { MVKPixelFormats* pixFmts = _renderPass->getPixelFormats(); @@ -279,7 +279,7 @@ void MVKRenderSubpass::populateMTLRenderPassDescriptor(MTLRenderPassDescriptor* void MVKRenderSubpass::encodeStoreActions(MVKCommandEncoder* cmdEncoder, bool isRenderingEntireAttachment, - const MVKArrayRef attachments, + MVKArrayRef attachments, bool storeOverride) { if (!cmdEncoder->_mtlRenderEncoder) { return; } if (!_renderPass->getDevice()->_pMetalFeatures->deferredStoreActions) { return; } @@ -308,7 +308,7 @@ void MVKRenderSubpass::encodeStoreActions(MVKCommandEncoder* cmdEncoder, } void MVKRenderSubpass::populateClearAttachments(MVKClearAttachments& clearAtts, - const MVKArrayRef clearValues) { + MVKArrayRef clearValues) { uint32_t caCnt = getColorAttachmentCount(); for (uint32_t caIdx = 0; caIdx < caCnt; caIdx++) { uint32_t attIdx = _colorAttachments[caIdx].attachment; @@ -394,7 +394,7 @@ MVKMTLFmtCaps MVKRenderSubpass::getRequiredFormatCapabilitiesForAttachmentAt(uin return caps; } -void MVKRenderSubpass::resolveUnresolvableAttachments(MVKCommandEncoder* cmdEncoder, const MVKArrayRef attachments) { +void MVKRenderSubpass::resolveUnresolvableAttachments(MVKCommandEncoder* cmdEncoder, MVKArrayRef attachments) { MVKPixelFormats* pixFmts = cmdEncoder->getPixelFormats(); size_t raCnt = _resolveAttachments.size(); for (uint32_t raIdx = 0; raIdx < raCnt; raIdx++) { @@ -904,6 +904,26 @@ MVKAttachmentDescription::MVKAttachmentDescription(MVKRenderPass* renderPass, #pragma mark - #pragma mark MVKRenderPass +MVKSubpassDependency::MVKSubpassDependency(const VkSubpassDependency& spDep, int32_t viewOffset) : + srcSubpass(spDep.srcSubpass), + dstSubpass(spDep.dstSubpass), + srcStageMask(spDep.srcStageMask), + dstStageMask(spDep.dstStageMask), + srcAccessMask(spDep.srcAccessMask), + dstAccessMask(spDep.dstAccessMask), + dependencyFlags(spDep.dependencyFlags), + viewOffset(viewOffset) {} + +MVKSubpassDependency::MVKSubpassDependency(const VkSubpassDependency2& spDep, const VkMemoryBarrier2* pMemBar) : + srcSubpass(spDep.srcSubpass), + dstSubpass(spDep.dstSubpass), + srcStageMask(pMemBar ? pMemBar->srcStageMask : spDep.srcStageMask), + dstStageMask(pMemBar ? pMemBar->dstStageMask : spDep.dstStageMask), + srcAccessMask(pMemBar ? pMemBar->srcAccessMask : spDep.srcAccessMask), + dstAccessMask(pMemBar ? pMemBar->dstAccessMask : spDep.dstAccessMask), + dependencyFlags(spDep.dependencyFlags), + viewOffset(spDep.viewOffset) {} + VkExtent2D MVKRenderPass::getRenderAreaGranularity() { if (_device->_pMetalFeatures->tileBasedDeferredRendering) { // This is the tile area. @@ -954,19 +974,7 @@ MVKRenderPass::MVKRenderPass(MVKDevice* device, } _subpassDependencies.reserve(pCreateInfo->dependencyCount); for (uint32_t i = 0; i < pCreateInfo->dependencyCount; i++) { - VkSubpassDependency2 dependency = { - .sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2, - .pNext = nullptr, - .srcSubpass = pCreateInfo->pDependencies[i].srcSubpass, - .dstSubpass = pCreateInfo->pDependencies[i].dstSubpass, - .srcStageMask = pCreateInfo->pDependencies[i].srcStageMask, - .dstStageMask = pCreateInfo->pDependencies[i].dstStageMask, - .srcAccessMask = pCreateInfo->pDependencies[i].srcAccessMask, - .dstAccessMask = pCreateInfo->pDependencies[i].dstAccessMask, - .dependencyFlags = pCreateInfo->pDependencies[i].dependencyFlags, - .viewOffset = viewOffsets ? viewOffsets[i] : 0, - }; - _subpassDependencies.push_back(dependency); + _subpassDependencies.emplace_back(pCreateInfo->pDependencies[i], viewOffsets ? viewOffsets[i] : 0); } // Link attachments to subpasses @@ -991,7 +999,19 @@ MVKRenderPass::MVKRenderPass(MVKDevice* device, } _subpassDependencies.reserve(pCreateInfo->dependencyCount); for (uint32_t i = 0; i < pCreateInfo->dependencyCount; i++) { - _subpassDependencies.push_back(pCreateInfo->pDependencies[i]); + auto& spDep = pCreateInfo->pDependencies[i]; + + const VkMemoryBarrier2* pMemoryBarrier2 = nullptr; + for (auto* next = (const VkBaseInStructure*)spDep.pNext; next; next = next->pNext) { + switch (next->sType) { + case VK_STRUCTURE_TYPE_MEMORY_BARRIER_2: + pMemoryBarrier2 = (const VkMemoryBarrier2*)next; + break; + default: + break; + } + } + _subpassDependencies.emplace_back(spDep, pMemoryBarrier2); } // Link attachments to subpasses diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKResource.h b/MoltenVK/MoltenVK/GPUObjects/MVKResource.h index a1c3da6b..5b9c47fd 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKResource.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKResource.h @@ -60,9 +60,7 @@ public: } /** Applies the specified global memory barrier. */ - virtual void applyMemoryBarrier(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier, + virtual void applyMemoryBarrier(MVKPipelineBarrier& barrier, MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse) = 0; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.h b/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.h index 87418edd..be4f2545 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.h @@ -43,7 +43,7 @@ using namespace mvk; typedef struct MVKMTLFunction { SPIRVToMSLConversionResultInfo shaderConversionResults; MTLSize threadGroupSize; - inline id getMTLFunction() { return _mtlFunction; } + id getMTLFunction() { return _mtlFunction; } MVKMTLFunction(id mtlFunc, const SPIRVToMSLConversionResultInfo scRslts, MTLSize tgSize); MVKMTLFunction(const MVKMTLFunction& other); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm b/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm index 3e7b5c12..81dd886d 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm @@ -36,10 +36,11 @@ MVKMTLFunction::MVKMTLFunction(const MVKMTLFunction& other) { } MVKMTLFunction& MVKMTLFunction::operator=(const MVKMTLFunction& other) { - if (_mtlFunction != other._mtlFunction) { - [_mtlFunction release]; - _mtlFunction = [other._mtlFunction retain]; // retained - } + // Retain new object first in case it's the same object + [other._mtlFunction retain]; + [_mtlFunction release]; + _mtlFunction = other._mtlFunction; + shaderConversionResults = other.shaderConversionResults; threadGroupSize = other.threadGroupSize; return *this; @@ -80,7 +81,7 @@ MVKMTLFunction MVKShaderLibrary::getMTLFunction(const VkSpecializationInfo* pSpe uint64_t startTime = pShaderFeedback ? mvkGetTimestamp() : mvkDev->getPerformanceTimestamp(); id mtlFunc = [[_mtlLibrary newFunctionWithName: mtlFuncName] autorelease]; - mvkDev->addActivityPerformance(mvkDev->_performanceStatistics.shaderCompilation.functionRetrieval, startTime); + mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.shaderCompilation.functionRetrieval, startTime); if (pShaderFeedback) { if (mtlFunc) { mvkEnableFlags(pShaderFeedback->flags, VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT); @@ -156,7 +157,7 @@ void MVKShaderLibrary::compressMSL(const string& msl) { MVKDevice* mvkDev = _owner->getDevice(); uint64_t startTime = mvkDev->getPerformanceTimestamp(); _compressedMSL.compress(msl, getMVKConfig().shaderSourceCompressionAlgorithm); - mvkDev->addActivityPerformance(mvkDev->_performanceStatistics.shaderCompilation.mslCompress, startTime); + mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.shaderCompilation.mslCompress, startTime); } // Decompresses the cached MSL into the string. @@ -164,7 +165,7 @@ void MVKShaderLibrary::decompressMSL(string& msl) { MVKDevice* mvkDev = _owner->getDevice(); uint64_t startTime = mvkDev->getPerformanceTimestamp(); _compressedMSL.decompress(msl); - mvkDev->addActivityPerformance(mvkDev->_performanceStatistics.shaderCompilation.mslDecompress, startTime); + mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.shaderCompilation.mslDecompress, startTime); } MVKShaderLibrary::MVKShaderLibrary(MVKVulkanAPIDeviceObject* owner, @@ -207,7 +208,7 @@ MVKShaderLibrary::MVKShaderLibrary(MVKVulkanAPIDeviceObject* owner, handleCompilationError(err, "Compiled shader module creation"); [shdrData release]; } - mvkDev->addActivityPerformance(mvkDev->_performanceStatistics.shaderCompilation.mslLoad, startTime); + mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.shaderCompilation.mslLoad, startTime); } MVKShaderLibrary::MVKShaderLibrary(const MVKShaderLibrary& other) { @@ -283,7 +284,7 @@ MVKShaderLibrary* MVKShaderLibraryCache::findShaderLibrary(SPIRVToMSLConversionC if (slPair.first.matches(*pShaderConfig)) { pShaderConfig->alignWith(slPair.first); MVKDevice* mvkDev = _owner->getDevice(); - mvkDev->addActivityPerformance(mvkDev->_performanceStatistics.shaderCompilation.shaderLibraryFromCache, startTime); + mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.shaderCompilation.shaderLibraryFromCache, startTime); if (pShaderFeedback) { pShaderFeedback->duration += mvkGetElapsedNanoseconds(startTime); } @@ -363,7 +364,7 @@ bool MVKShaderModule::convert(SPIRVToMSLConversionConfiguration* pShaderConfig, GLSLToSPIRVConversionResult glslConversionResult; uint64_t startTime = _device->getPerformanceTimestamp(); bool wasConverted = _glslConverter.convert(getMVKGLSLConversionShaderStage(pShaderConfig), glslConversionResult, shouldLogCode, false); - _device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.glslToSPRIV, startTime); + _device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.glslToSPRIV, startTime); if (wasConverted) { if (shouldLogCode) { MVKLogInfo("%s", glslConversionResult.resultLog.c_str()); } @@ -376,7 +377,7 @@ bool MVKShaderModule::convert(SPIRVToMSLConversionConfiguration* pShaderConfig, uint64_t startTime = _device->getPerformanceTimestamp(); bool wasConverted = _spvConverter.convert(*pShaderConfig, conversionResult, shouldLogCode, shouldLogCode, shouldLogEstimatedGLSL); - _device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.spirvToMSL, startTime); + _device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.spirvToMSL, startTime); if (wasConverted) { if (shouldLogCode) { MVKLogInfo("%s", conversionResult.resultLog.c_str()); } @@ -436,7 +437,7 @@ MVKShaderModule::MVKShaderModule(MVKDevice* device, uint64_t startTime = _device->getPerformanceTimestamp(); codeHash = mvkHash(pCreateInfo->pCode, spvCount); - _device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.hashShaderCode, startTime); + _device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.hashShaderCode, startTime); _spvConverter.setSPIRV(pCreateInfo->pCode, spvCount); @@ -450,7 +451,7 @@ MVKShaderModule::MVKShaderModule(MVKDevice* device, uint64_t startTime = _device->getPerformanceTimestamp(); codeHash = mvkHash(&magicNum); codeHash = mvkHash(pMSLCode, mslCodeLen, codeHash); - _device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.hashShaderCode, startTime); + _device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.hashShaderCode, startTime); SPIRVToMSLConversionResult conversionResult; conversionResult.msl = pMSLCode; @@ -466,7 +467,7 @@ MVKShaderModule::MVKShaderModule(MVKDevice* device, uint64_t startTime = _device->getPerformanceTimestamp(); codeHash = mvkHash(&magicNum); codeHash = mvkHash(pMSLCode, mslCodeLen, codeHash); - _device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.hashShaderCode, startTime); + _device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.hashShaderCode, startTime); _directMSLLibrary = new MVKShaderLibrary(this, (void*)(pMSLCode), mslCodeLen); @@ -479,7 +480,7 @@ MVKShaderModule::MVKShaderModule(MVKDevice* device, uint64_t startTime = _device->getPerformanceTimestamp(); codeHash = mvkHash(pGLSL, codeSize); - _device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.hashShaderCode, startTime); + _device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.hashShaderCode, startTime); _glslConverter.setGLSL(pGLSL, glslLen); } else { diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSurface.h b/MoltenVK/MoltenVK/GPUObjects/MVKSurface.h index 0bcceb5d..453eac66 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKSurface.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKSurface.h @@ -24,17 +24,8 @@ #import #import -#ifdef VK_USE_PLATFORM_IOS_MVK -# define PLATFORM_VIEW_CLASS UIView -# import -#endif - -#ifdef VK_USE_PLATFORM_MACOS_MVK -# define PLATFORM_VIEW_CLASS NSView -# import -#endif - class MVKInstance; +class MVKSwapchain; @class MVKBlockObserver; @@ -55,12 +46,17 @@ public: /** Returns a pointer to the Vulkan instance. */ MVKInstance* getInstance() override { return _mvkInstance; } - /** Returns the CAMetalLayer underlying this surface. */ - inline CAMetalLayer* getCAMetalLayer() { - std::lock_guard lock(_layerLock); - return _mtlCAMetalLayer; - } + /** Returns the CAMetalLayer underlying this surface. */ + CAMetalLayer* getCAMetalLayer(); + /** Returns the extent of this surface. */ + VkExtent2D getExtent(); + + /** Returns the extent for which the underlying CAMetalLayer will not need to be scaled when composited. */ + VkExtent2D getNaturalExtent(); + + /** Returns whether this surface is headless. */ + bool isHeadless() { return !_mtlCAMetalLayer && wasConfigurationSuccessful(); } #pragma mark Construction @@ -68,6 +64,10 @@ public: const VkMetalSurfaceCreateInfoEXT* pCreateInfo, const VkAllocationCallbacks* pAllocator); + MVKSurface(MVKInstance* mvkInstance, + const VkHeadlessSurfaceCreateInfoEXT* pCreateInfo, + const VkAllocationCallbacks* pAllocator); + MVKSurface(MVKInstance* mvkInstance, const Vk_PLATFORM_SurfaceCreateInfoMVK* pCreateInfo, const VkAllocationCallbacks* pAllocator); @@ -75,13 +75,18 @@ public: ~MVKSurface() override; protected: + friend class MVKSwapchain; + void propagateDebugName() override {} - void initLayerObserver(); + void setActiveSwapchain(MVKSwapchain* swapchain); + void initLayer(CAMetalLayer* mtlLayer, const char* vkFuncName, bool isHeadless); void releaseLayer(); - MVKInstance* _mvkInstance; - CAMetalLayer* _mtlCAMetalLayer; - MVKBlockObserver* _layerObserver; std::mutex _layerLock; + MVKInstance* _mvkInstance = nullptr; + CAMetalLayer* _mtlCAMetalLayer = nil; + MVKBlockObserver* _layerObserver = nil; + MVKSwapchain* _activeSwapchain = nullptr; + VkExtent2D _headlessExtent = {0xFFFFFFFF, 0xFFFFFFFF}; }; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSurface.mm b/MoltenVK/MoltenVK/GPUObjects/MVKSurface.mm index 1309d73d..04855718 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKSurface.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKSurface.mm @@ -17,11 +17,26 @@ */ #include "MVKSurface.h" +#include "MVKSwapchain.h" #include "MVKInstance.h" #include "MVKFoundation.h" #include "MVKOSExtensions.h" +#include "mvk_datatypes.hpp" + +#import "CAMetalLayer+MoltenVK.h" #import "MVKBlockObserver.h" +#ifdef VK_USE_PLATFORM_IOS_MVK +# define PLATFORM_VIEW_CLASS UIView +# import +#endif + +#ifdef VK_USE_PLATFORM_MACOS_MVK +# define PLATFORM_VIEW_CLASS NSView +# import +#endif + + // We need to double-dereference the name to first convert to the platform symbol, then to a string. #define STR_PLATFORM(NAME) #NAME #define STR(NAME) STR_PLATFORM(NAME) @@ -29,54 +44,68 @@ #pragma mark MVKSurface +CAMetalLayer* MVKSurface::getCAMetalLayer() { + std::lock_guard lock(_layerLock); + return _mtlCAMetalLayer; +} + +VkExtent2D MVKSurface::getExtent() { + return _mtlCAMetalLayer ? mvkVkExtent2DFromCGSize(_mtlCAMetalLayer.drawableSize) : _headlessExtent; +} + +VkExtent2D MVKSurface::getNaturalExtent() { + return _mtlCAMetalLayer ? mvkVkExtent2DFromCGSize(_mtlCAMetalLayer.naturalDrawableSizeMVK) : _headlessExtent; +} + +// Per spec, headless surface extent is set from the swapchain. +void MVKSurface::setActiveSwapchain(MVKSwapchain* swapchain) { + _activeSwapchain = swapchain; + _headlessExtent = swapchain->getImageExtent(); +} + MVKSurface::MVKSurface(MVKInstance* mvkInstance, const VkMetalSurfaceCreateInfoEXT* pCreateInfo, const VkAllocationCallbacks* pAllocator) : _mvkInstance(mvkInstance) { + initLayer((CAMetalLayer*)pCreateInfo->pLayer, "vkCreateMetalSurfaceEXT", false); +} - _mtlCAMetalLayer = (CAMetalLayer*)[pCreateInfo->pLayer retain]; - initLayerObserver(); +MVKSurface::MVKSurface(MVKInstance* mvkInstance, + const VkHeadlessSurfaceCreateInfoEXT* pCreateInfo, + const VkAllocationCallbacks* pAllocator) : _mvkInstance(mvkInstance) { + initLayer(nil, "vkCreateHeadlessSurfaceEXT", true); } // pCreateInfo->pView can be either a CAMetalLayer or a view (NSView/UIView). MVKSurface::MVKSurface(MVKInstance* mvkInstance, const Vk_PLATFORM_SurfaceCreateInfoMVK* pCreateInfo, const VkAllocationCallbacks* pAllocator) : _mvkInstance(mvkInstance) { + MVKLogWarn("%s() is deprecated. Use vkCreateMetalSurfaceEXT() from the VK_EXT_metal_surface extension.", STR(vkCreate_PLATFORM_SurfaceMVK)); // Get the platform object contained in pView - id obj = (id)pCreateInfo->pView; - // If it's a view (NSView/UIView), extract the layer, otherwise assume it's already a CAMetalLayer. + id obj = (id)pCreateInfo->pView; if ([obj isKindOfClass: [PLATFORM_VIEW_CLASS class]]) { - if ( !NSThread.isMainThread ) { - MVKLogInfo("%s(): You are not calling this function from the main thread. %s should only be accessed from the main thread. When using this function outside the main thread, consider passing the CAMetalLayer itself in %s::pView, instead of the %s.", - STR(vkCreate_PLATFORM_SurfaceMVK), STR(PLATFORM_VIEW_CLASS), STR(Vk_PLATFORM_SurfaceCreateInfoMVK), STR(PLATFORM_VIEW_CLASS)); - } - obj = ((PLATFORM_VIEW_CLASS*)obj).layer; + __block id layer; + mvkDispatchToMainAndWait(^{ layer = ((PLATFORM_VIEW_CLASS*)obj).layer; }); + obj = layer; } // Confirm that we were provided with a CAMetalLayer - if ([obj isKindOfClass: [CAMetalLayer class]]) { - _mtlCAMetalLayer = (CAMetalLayer*)[obj retain]; // retained - } else { - setConfigurationResult(reportError(VK_ERROR_INITIALIZATION_FAILED, - "%s(): On-screen rendering requires a layer of type CAMetalLayer.", - STR(vkCreate_PLATFORM_SurfaceMVK))); - _mtlCAMetalLayer = nil; - } - - initLayerObserver(); + initLayer([obj isKindOfClass: CAMetalLayer.class] ? (CAMetalLayer*)obj : nil, STR(vkCreate_PLATFORM_SurfaceMVK), false); } -// Sometimes, the owning view can replace its CAMetalLayer. In that case, the client needs to recreate the surface. -void MVKSurface::initLayerObserver() { +void MVKSurface::initLayer(CAMetalLayer* mtlLayer, const char* vkFuncName, bool isHeadless) { - _layerObserver = nil; - if ( ![_mtlCAMetalLayer.delegate isKindOfClass: [PLATFORM_VIEW_CLASS class]] ) { return; } + _mtlCAMetalLayer = [mtlLayer retain]; // retained + if ( !_mtlCAMetalLayer && !isHeadless ) { setConfigurationResult(reportError(VK_ERROR_SURFACE_LOST_KHR, "%s(): On-screen rendering requires a layer of type CAMetalLayer.", vkFuncName)); } - _layerObserver = [MVKBlockObserver observerWithBlock: ^(NSString* path, id, NSDictionary*, void*) { - if ( ![path isEqualToString: @"layer"] ) { return; } - this->releaseLayer(); - } forObject: _mtlCAMetalLayer.delegate atKeyPath: @"layer"]; + // Sometimes, the owning view can replace its CAMetalLayer. + // When that happens, the app needs to recreate the surface. + if ([_mtlCAMetalLayer.delegate isKindOfClass: [PLATFORM_VIEW_CLASS class]]) { + _layerObserver = [MVKBlockObserver observerWithBlock: ^(NSString* path, id, NSDictionary*, void*) { + if ([path isEqualToString: @"layer"]) { this->releaseLayer(); } + } forObject: _mtlCAMetalLayer.delegate atKeyPath: @"layer"]; + } } void MVKSurface::releaseLayer() { diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.h b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.h index 523a5807..d8eb535f 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.h @@ -23,13 +23,10 @@ #include "MVKSmallVector.h" #include -#import "CAMetalLayer+MoltenVK.h" #import class MVKWatermark; -@class MVKBlockObserver; - #pragma mark - #pragma mark MVKSwapchain @@ -45,11 +42,20 @@ public: /** Returns the debug report object type of this object. */ VkDebugReportObjectTypeEXT getVkDebugReportObjectType() override { return VK_DEBUG_REPORT_OBJECT_TYPE_SWAPCHAIN_KHR_EXT; } + /** Returns the CAMetalLayer underlying the surface used by this swapchain. */ + CAMetalLayer* getCAMetalLayer(); + + /** Returns whether the surface is headless. */ + bool isHeadless(); + /** Returns the number of images in this swapchain. */ - inline uint32_t getImageCount() { return (uint32_t)_presentableImages.size(); } + uint32_t getImageCount() { return (uint32_t)_presentableImages.size(); } + + /** Returns the size of the images in this swapchain. */ + VkExtent2D getImageExtent() { return _imageExtent; } /** Returns the image at the specified index. */ - inline MVKPresentableSwapchainImage* getPresentableImage(uint32_t index) { return _presentableImages[index]; } + MVKPresentableSwapchainImage* getPresentableImage(uint32_t index) { return _presentableImages[index]; } /** * Returns the array of presentable images associated with this swapchain. @@ -76,19 +82,8 @@ public: /** Releases swapchain images. */ VkResult releaseImages(const VkReleaseSwapchainImagesInfoEXT* pReleaseInfo); - /** Returns whether the parent surface is now lost and this swapchain must be recreated. */ - bool getIsSurfaceLost() { return _surfaceLost; } - - /** Returns whether this swapchain is optimally sized for the surface. */ - bool hasOptimalSurface(); - /** Returns the status of the surface. Surface loss takes precedence over sub-optimal errors. */ - VkResult getSurfaceStatus() { - if (_device->getConfigurationResult() != VK_SUCCESS) { return _device->getConfigurationResult(); } - if (getIsSurfaceLost()) { return VK_ERROR_SURFACE_LOST_KHR; } - if ( !hasOptimalSurface() ) { return VK_SUBOPTIMAL_KHR; } - return VK_SUCCESS; - } + VkResult getSurfaceStatus(); /** Adds HDR metadata to this swapchain. */ void setHDRMetadataEXT(const VkHdrMetadataEXT& metadata); @@ -118,45 +113,29 @@ protected: VkSwapchainPresentScalingCreateInfoEXT* pScalingInfo, uint32_t imgCnt); void initSurfaceImages(const VkSwapchainCreateInfoKHR* pCreateInfo, uint32_t imgCnt); - void releaseLayer(); - void releaseUndisplayedSurfaces(); + bool getIsSurfaceLost(); + bool hasOptimalSurface(); uint64_t getNextAcquisitionID(); - void willPresentSurface(id mtlTexture, id mtlCmdBuff); void renderWatermark(id mtlTexture, id mtlCmdBuff); void markFrameInterval(); - void recordPresentTime(const MVKImagePresentInfo& presentInfo, uint64_t actualPresentTime = 0); + void beginPresentation(const MVKImagePresentInfo& presentInfo); + void endPresentation(const MVKImagePresentInfo& presentInfo, uint64_t actualPresentTime = 0); + void forceUnpresentedImageCompletion(); - CAMetalLayer* _mtlLayer = nil; + MVKSurface* _surface = nullptr; MVKWatermark* _licenseWatermark = nullptr; MVKSmallVector _presentableImages; MVKSmallVector _compatiblePresentModes; static const int kMaxPresentationHistory = 60; VkPastPresentationTimingGOOGLE _presentTimingHistory[kMaxPresentationHistory]; std::atomic _currentAcquisitionID = 0; - MVKBlockObserver* _layerObserver = nil; std::mutex _presentHistoryLock; - std::mutex _layerLock; uint64_t _lastFrameTime = 0; - VkExtent2D _mtlLayerDrawableExtent = {0, 0}; + VkExtent2D _imageExtent = {0, 0}; + std::atomic _unpresentedImageCount = 0; uint32_t _currentPerfLogFrameCount = 0; uint32_t _presentHistoryCount = 0; uint32_t _presentHistoryIndex = 0; uint32_t _presentHistoryHeadIndex = 0; - std::atomic _surfaceLost = false; bool _isDeliberatelyScaled = false; }; - - -#pragma mark - -#pragma mark Support functions - -/** - * Returns the natural extent of the CAMetalLayer. - * - * The natural extent is the size of the bounds property of the layer, - * multiplied by the contentsScale property of the layer, rounded - * to nearest integer using half-to-even rounding. - */ -static inline VkExtent2D mvkGetNaturalExtent(CAMetalLayer* mtlLayer) { - return mvkVkExtent2DFromCGSize(mtlLayer.naturalDrawableSizeMVK); -} diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm index e6f3e5c2..e1fa5619 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm @@ -26,9 +26,11 @@ #include "MVKWatermarkTextureContent.h" #include "MVKWatermarkShaderSource.h" #include "mvk_datatypes.hpp" +#include + +#import "CAMetalLayer+MoltenVK.h" #import "MVKBlockObserver.h" -#include using namespace std; @@ -47,6 +49,10 @@ void MVKSwapchain::propagateDebugName() { } } +CAMetalLayer* MVKSwapchain::getCAMetalLayer() { return _surface->getCAMetalLayer(); } + +bool MVKSwapchain::isHeadless() { return _surface->isHeadless(); } + VkResult MVKSwapchain::getImages(uint32_t* pCount, VkImage* pSwapchainImages) { // Get the number of surface images @@ -95,9 +101,8 @@ VkResult MVKSwapchain::acquireNextImage(uint64_t timeout, // Return the index of the image with the shortest wait, // and signal the semaphore and fence when it's available *pImageIndex = minWaitImage->_swapchainIndex; - minWaitImage->acquireAndSignalWhenAvailable((MVKSemaphore*)semaphore, (MVKFence*)fence); - - return getSurfaceStatus(); + VkResult rslt = minWaitImage->acquireAndSignalWhenAvailable((MVKSemaphore*)semaphore, (MVKFence*)fence); + return rslt ? rslt : getSurfaceStatus(); } VkResult MVKSwapchain::releaseImages(const VkReleaseSwapchainImagesInfoEXT* pReleaseInfo) { @@ -105,38 +110,39 @@ VkResult MVKSwapchain::releaseImages(const VkReleaseSwapchainImagesInfoEXT* pRel getPresentableImage(pReleaseInfo->pImageIndices[imgIdxIdx])->makeAvailable(); } - return VK_SUCCESS; + return _surface->getConfigurationResult(); } uint64_t MVKSwapchain::getNextAcquisitionID() { return ++_currentAcquisitionID; } -// Releases any surfaces that are not currently being displayed, -// so they can be used by a different swapchain. -void MVKSwapchain::releaseUndisplayedSurfaces() {} +bool MVKSwapchain::getIsSurfaceLost() { + VkResult surfRslt = _surface->getConfigurationResult(); + setConfigurationResult(surfRslt); + return surfRslt != VK_SUCCESS; +} +VkResult MVKSwapchain::getSurfaceStatus() { + if (_device->getConfigurationResult() != VK_SUCCESS) { return _device->getConfigurationResult(); } + if (getIsSurfaceLost()) { return VK_ERROR_SURFACE_LOST_KHR; } + if ( !hasOptimalSurface() ) { return VK_SUBOPTIMAL_KHR; } + return VK_SUCCESS; +} -// This swapchain is optimally sized for the surface if the app has specified deliberate -// swapchain scaling, or the CAMetalLayer drawableSize has not changed since the swapchain -// was created, and the CAMetalLayer will not need to be scaled when composited. +// This swapchain is optimally sized for the surface if the app has specified +// deliberate swapchain scaling, or the surface extent has not changed since the +// swapchain was created, and the surface will not need to be scaled when composited. bool MVKSwapchain::hasOptimalSurface() { if (_isDeliberatelyScaled) { return true; } - VkExtent2D drawExtent = mvkVkExtent2DFromCGSize(_mtlLayer.drawableSize); - return (mvkVkExtent2DsAreEqual(drawExtent, _mtlLayerDrawableExtent) && - mvkVkExtent2DsAreEqual(drawExtent, mvkGetNaturalExtent(_mtlLayer))); + VkExtent2D surfExtent = _surface->getExtent(); + return (mvkVkExtent2DsAreEqual(surfExtent, _imageExtent) && + mvkVkExtent2DsAreEqual(surfExtent, _surface->getNaturalExtent())); } #pragma mark Rendering -// Called automatically when a swapchain image is about to be presented to the surface by the queue. -// Activities include marking the frame interval and rendering the watermark if needed. -void MVKSwapchain::willPresentSurface(id mtlTexture, id mtlCmdBuff) { - markFrameInterval(); - renderWatermark(mtlTexture, mtlCmdBuff); -} - -// If the product has not been fully licensed, renders the watermark image to the surface. +// Renders the watermark image to the surface. void MVKSwapchain::renderWatermark(id mtlTexture, id mtlCmdBuff) { if (getMVKConfig().displayWatermark) { if ( !_licenseWatermark ) { @@ -158,22 +164,22 @@ void MVKSwapchain::renderWatermark(id mtlTexture, idaddActivityPerformance(_device->_performanceStatistics.queue.frameInterval, prevFrameTime, _lastFrameTime); + _device->updateActivityPerformance(_device->_performanceStatistics.queue.frameInterval, mvkGetElapsedMilliseconds(prevFrameTime, _lastFrameTime)); - uint32_t perfLogCntLimit = getMVKConfig().performanceLoggingFrameCount; - if ((perfLogCntLimit > 0) && (++_currentPerfLogFrameCount >= perfLogCntLimit)) { + auto& mvkCfg = getMVKConfig(); + bool shouldLogOnFrames = mvkCfg.performanceTracking && mvkCfg.activityPerformanceLoggingStyle == MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_FRAME_COUNT; + if (shouldLogOnFrames && (mvkCfg.performanceLoggingFrameCount > 0) && (++_currentPerfLogFrameCount >= mvkCfg.performanceLoggingFrameCount)) { _currentPerfLogFrameCount = 0; MVKLogInfo("Performance statistics reporting every: %d frames, avg FPS: %.2f, elapsed time: %.3f seconds:", - perfLogCntLimit, - (1000.0 / _device->_performanceStatistics.queue.frameInterval.averageDuration), + mvkCfg.performanceLoggingFrameCount, + (1000.0 / _device->_performanceStatistics.queue.frameInterval.average), mvkGetElapsedMilliseconds() / 1000.0); if (getMVKConfig().activityPerformanceLoggingStyle == MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_FRAME_COUNT) { _device->logPerformanceSummary(); @@ -181,6 +187,125 @@ void MVKSwapchain::markFrameInterval() { } } +VkResult MVKSwapchain::getRefreshCycleDuration(VkRefreshCycleDurationGOOGLE *pRefreshCycleDuration) { + if (_device->getConfigurationResult() != VK_SUCCESS) { return _device->getConfigurationResult(); } + + auto* screen = getCAMetalLayer().screenMVK; // Will be nil if headless +#if MVK_MACOS && !MVK_MACCAT + double framesPerSecond = 60; + if (screen) { + CGDirectDisplayID displayId = [[[screen deviceDescription] objectForKey:@"NSScreenNumber"] unsignedIntValue]; + CGDisplayModeRef mode = CGDisplayCopyDisplayMode(displayId); + framesPerSecond = CGDisplayModeGetRefreshRate(mode); + CGDisplayModeRelease(mode); +#if MVK_XCODE_13 + if (framesPerSecond == 0 && [screen respondsToSelector: @selector(maximumFramesPerSecond)]) + framesPerSecond = [screen maximumFramesPerSecond]; +#endif + // Builtin panels, e.g., on MacBook, report a zero refresh rate. + if (framesPerSecond == 0) + framesPerSecond = 60.0; + } +#elif MVK_IOS_OR_TVOS || MVK_MACCAT + NSInteger framesPerSecond = 60; + if ([screen respondsToSelector: @selector(maximumFramesPerSecond)]) { + framesPerSecond = screen.maximumFramesPerSecond; + } +#elif MVK_VISIONOS + NSInteger framesPerSecond = 90; // TODO: See if this can be obtained from OS instead +#endif + + pRefreshCycleDuration->refreshDuration = (uint64_t)1e9 / framesPerSecond; + return VK_SUCCESS; +} + +VkResult MVKSwapchain::getPastPresentationTiming(uint32_t *pCount, VkPastPresentationTimingGOOGLE *pPresentationTimings) { + if (_device->getConfigurationResult() != VK_SUCCESS) { return _device->getConfigurationResult(); } + + VkResult res = VK_SUCCESS; + + std::lock_guard lock(_presentHistoryLock); + if (pPresentationTimings == nullptr) { + *pCount = _presentHistoryCount; + } else { + uint32_t countRemaining = std::min(_presentHistoryCount, *pCount); + uint32_t outIndex = 0; + + res = (*pCount >= _presentHistoryCount) ? VK_SUCCESS : VK_INCOMPLETE; + *pCount = countRemaining; + + while (countRemaining > 0) { + pPresentationTimings[outIndex] = _presentTimingHistory[_presentHistoryHeadIndex]; + countRemaining--; + _presentHistoryCount--; + _presentHistoryHeadIndex = (_presentHistoryHeadIndex + 1) % kMaxPresentationHistory; + outIndex++; + } + } + + return res; +} + +void MVKSwapchain::beginPresentation(const MVKImagePresentInfo& presentInfo) { + _unpresentedImageCount++; +} + +void MVKSwapchain::endPresentation(const MVKImagePresentInfo& presentInfo, uint64_t actualPresentTime) { + _unpresentedImageCount--; + + std::lock_guard lock(_presentHistoryLock); + + markFrameInterval(); + if (_presentHistoryCount < kMaxPresentationHistory) { + _presentHistoryCount++; + } else { + _presentHistoryHeadIndex = (_presentHistoryHeadIndex + 1) % kMaxPresentationHistory; + } + + _presentTimingHistory[_presentHistoryIndex].presentID = presentInfo.presentID; + _presentTimingHistory[_presentHistoryIndex].desiredPresentTime = presentInfo.desiredPresentTime; + _presentTimingHistory[_presentHistoryIndex].actualPresentTime = actualPresentTime; + // These details are not available in Metal + _presentTimingHistory[_presentHistoryIndex].earliestPresentTime = actualPresentTime; + _presentTimingHistory[_presentHistoryIndex].presentMargin = 0; + _presentHistoryIndex = (_presentHistoryIndex + 1) % kMaxPresentationHistory; +} + +// Because of a regression in Metal, the most recent one or two presentations may not complete +// and call back. To work around this, if there are any uncompleted presentations, change the +// drawableSize of the CAMetalLayer, which will trigger presentation completion and callbacks. +// The drawableSize will be set to a correct size by the next swapchain created on the same surface. +void MVKSwapchain::forceUnpresentedImageCompletion() { + if (_unpresentedImageCount) { + getCAMetalLayer().drawableSize = { 1,1 }; + } +} + +void MVKSwapchain::setLayerNeedsDisplay(const VkPresentRegionKHR* pRegion) { + auto* mtlLayer = getCAMetalLayer(); + if (!pRegion || pRegion->rectangleCount == 0) { + [mtlLayer setNeedsDisplay]; + return; + } + + for (uint32_t i = 0; i < pRegion->rectangleCount; ++i) { + CGRect cgRect = mvkCGRectFromVkRectLayerKHR(pRegion->pRectangles[i]); +#if MVK_MACOS + // VK_KHR_incremental_present specifies an upper-left origin, but macOS by default + // uses a lower-left origin. + cgRect.origin.y = mtlLayer.bounds.size.height - cgRect.origin.y; +#endif + // We were given rectangles in pixels, but -[CALayer setNeedsDisplayInRect:] wants them + // in points, which is pixels / contentsScale. + CGFloat scaleFactor = mtlLayer.contentsScale; + cgRect.origin.x /= scaleFactor; + cgRect.origin.y /= scaleFactor; + cgRect.size.width /= scaleFactor; + cgRect.size.height /= scaleFactor; + [mtlLayer setNeedsDisplayInRect:cgRect]; + } +} + #if MVK_MACOS struct CIE1931XY { uint16_t x; @@ -237,19 +362,32 @@ void MVKSwapchain::setHDRMetadataEXT(const VkHdrMetadataEXT& metadata) { CAEDRMetadata* caMetadata = [CAEDRMetadata HDR10MetadataWithDisplayInfo: colorVolData contentInfo: lightLevelData opticalOutputScale: 1]; - _mtlLayer.EDRMetadata = caMetadata; + auto* mtlLayer = getCAMetalLayer(); + mtlLayer.EDRMetadata = caMetadata; + mtlLayer.wantsExtendedDynamicRangeContent = YES; [caMetadata release]; [colorVolData release]; [lightLevelData release]; - _mtlLayer.wantsExtendedDynamicRangeContent = YES; #endif } #pragma mark Construction -MVKSwapchain::MVKSwapchain(MVKDevice* device, - const VkSwapchainCreateInfoKHR* pCreateInfo) : MVKVulkanAPIDeviceObject(device) { +MVKSwapchain::MVKSwapchain(MVKDevice* device, const VkSwapchainCreateInfoKHR* pCreateInfo) + : MVKVulkanAPIDeviceObject(device), + _surface((MVKSurface*)pCreateInfo->surface), + _imageExtent(pCreateInfo->imageExtent) { + + // Check if oldSwapchain is properly set + auto* oldSwapchain = (MVKSwapchain*)pCreateInfo->oldSwapchain; + if (oldSwapchain == _surface->_activeSwapchain) { + _surface->setActiveSwapchain(this); + } else { + setConfigurationResult(reportError(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR, "vkCreateSwapchainKHR(): pCreateInfo->oldSwapchain does not match the VkSwapchain that is in use by the surface")); + return; + } + memset(_presentTimingHistory, 0, sizeof(_presentTimingHistory)); // Retrieve the scaling and present mode structs if they are supplied. @@ -280,10 +418,6 @@ MVKSwapchain::MVKSwapchain(MVKDevice* device, } } - // If applicable, release any surfaces (not currently being displayed) from the old swapchain. - MVKSwapchain* oldSwapchain = (MVKSwapchain*)pCreateInfo->oldSwapchain; - if (oldSwapchain) { oldSwapchain->releaseUndisplayedSurfaces(); } - uint32_t imgCnt = mvkClamp(pCreateInfo->minImageCount, _device->_pMetalFeatures->minSwapchainImageCount, _device->_pMetalFeatures->maxSwapchainImageCount); @@ -333,85 +467,86 @@ void MVKSwapchain::initCAMetalLayer(const VkSwapchainCreateInfoKHR* pCreateInfo, VkSwapchainPresentScalingCreateInfoEXT* pScalingInfo, uint32_t imgCnt) { - MVKSurface* mvkSrfc = (MVKSurface*)pCreateInfo->surface; - _mtlLayer = mvkSrfc->getCAMetalLayer(); - if ( !_mtlLayer ) { - setConfigurationResult(mvkSrfc->getConfigurationResult()); - _surfaceLost = true; - return; - } + auto* mtlLayer = getCAMetalLayer(); + if ( !mtlLayer || getIsSurfaceLost() ) { return; } auto minMagFilter = getMVKConfig().swapchainMinMagFilterUseNearest ? kCAFilterNearest : kCAFilterLinear; - _mtlLayer.device = getMTLDevice(); - _mtlLayer.pixelFormat = getPixelFormats()->getMTLPixelFormat(pCreateInfo->imageFormat); - _mtlLayer.maximumDrawableCountMVK = imgCnt; - _mtlLayer.displaySyncEnabledMVK = (pCreateInfo->presentMode != VK_PRESENT_MODE_IMMEDIATE_KHR); - _mtlLayer.minificationFilter = minMagFilter; - _mtlLayer.magnificationFilter = minMagFilter; - _mtlLayer.contentsGravity = getCALayerContentsGravity(pScalingInfo); - _mtlLayer.framebufferOnly = !mvkIsAnyFlagEnabled(pCreateInfo->imageUsage, (VK_IMAGE_USAGE_TRANSFER_SRC_BIT | - VK_IMAGE_USAGE_TRANSFER_DST_BIT | - VK_IMAGE_USAGE_SAMPLED_BIT | - VK_IMAGE_USAGE_STORAGE_BIT)); - // Remember the extent to later detect if it has changed under the covers, - // and set the drawable size of the CAMetalLayer from the extent. - _mtlLayerDrawableExtent = pCreateInfo->imageExtent; - _mtlLayer.drawableSize = mvkCGSizeFromVkExtent2D(_mtlLayerDrawableExtent); + mtlLayer.drawableSize = mvkCGSizeFromVkExtent2D(_imageExtent); + mtlLayer.device = getMTLDevice(); + mtlLayer.pixelFormat = getPixelFormats()->getMTLPixelFormat(pCreateInfo->imageFormat); + mtlLayer.maximumDrawableCountMVK = imgCnt; + mtlLayer.displaySyncEnabledMVK = (pCreateInfo->presentMode != VK_PRESENT_MODE_IMMEDIATE_KHR); + mtlLayer.minificationFilter = minMagFilter; + mtlLayer.magnificationFilter = minMagFilter; + mtlLayer.contentsGravity = getCALayerContentsGravity(pScalingInfo); + mtlLayer.framebufferOnly = !mvkIsAnyFlagEnabled(pCreateInfo->imageUsage, (VK_IMAGE_USAGE_TRANSFER_SRC_BIT | + VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_STORAGE_BIT)); + + // Because of a regression in Metal, the most recent one or two presentations may not + // complete and call back. Changing the CAMetalLayer drawableSize will force any incomplete + // presentations on the oldSwapchain to complete and call back, but if the drawableSize + // is not changing from the previous, we force those completions first. + auto* oldSwapchain = (MVKSwapchain*)pCreateInfo->oldSwapchain; + if (oldSwapchain && mvkVkExtent2DsAreEqual(pCreateInfo->imageExtent, _surface->getExtent())) { + oldSwapchain->forceUnpresentedImageCompletion(); + } if (pCreateInfo->compositeAlpha != VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR) { - _mtlLayer.opaque = pCreateInfo->compositeAlpha == VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; + mtlLayer.opaque = pCreateInfo->compositeAlpha == VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; } switch (pCreateInfo->imageColorSpace) { case VK_COLOR_SPACE_SRGB_NONLINEAR_KHR: - _mtlLayer.colorspaceNameMVK = kCGColorSpaceSRGB; - _mtlLayer.wantsExtendedDynamicRangeContentMVK = NO; + mtlLayer.colorspaceNameMVK = kCGColorSpaceSRGB; + mtlLayer.wantsExtendedDynamicRangeContentMVK = NO; break; case VK_COLOR_SPACE_DISPLAY_P3_NONLINEAR_EXT: - _mtlLayer.colorspaceNameMVK = kCGColorSpaceDisplayP3; - _mtlLayer.wantsExtendedDynamicRangeContentMVK = YES; + mtlLayer.colorspaceNameMVK = kCGColorSpaceDisplayP3; + mtlLayer.wantsExtendedDynamicRangeContentMVK = YES; break; case VK_COLOR_SPACE_EXTENDED_SRGB_LINEAR_EXT: - _mtlLayer.colorspaceNameMVK = kCGColorSpaceExtendedLinearSRGB; - _mtlLayer.wantsExtendedDynamicRangeContentMVK = YES; + mtlLayer.colorspaceNameMVK = kCGColorSpaceExtendedLinearSRGB; + mtlLayer.wantsExtendedDynamicRangeContentMVK = YES; break; case VK_COLOR_SPACE_EXTENDED_SRGB_NONLINEAR_EXT: - _mtlLayer.colorspaceNameMVK = kCGColorSpaceExtendedSRGB; - _mtlLayer.wantsExtendedDynamicRangeContentMVK = YES; + mtlLayer.colorspaceNameMVK = kCGColorSpaceExtendedSRGB; + mtlLayer.wantsExtendedDynamicRangeContentMVK = YES; break; case VK_COLOR_SPACE_DISPLAY_P3_LINEAR_EXT: - _mtlLayer.colorspaceNameMVK = kCGColorSpaceExtendedLinearDisplayP3; - _mtlLayer.wantsExtendedDynamicRangeContentMVK = YES; + mtlLayer.colorspaceNameMVK = kCGColorSpaceExtendedLinearDisplayP3; + mtlLayer.wantsExtendedDynamicRangeContentMVK = YES; break; case VK_COLOR_SPACE_DCI_P3_NONLINEAR_EXT: - _mtlLayer.colorspaceNameMVK = kCGColorSpaceDCIP3; - _mtlLayer.wantsExtendedDynamicRangeContentMVK = YES; + mtlLayer.colorspaceNameMVK = kCGColorSpaceDCIP3; + mtlLayer.wantsExtendedDynamicRangeContentMVK = YES; break; case VK_COLOR_SPACE_BT709_NONLINEAR_EXT: - _mtlLayer.colorspaceNameMVK = kCGColorSpaceITUR_709; - _mtlLayer.wantsExtendedDynamicRangeContentMVK = NO; + mtlLayer.colorspaceNameMVK = kCGColorSpaceITUR_709; + mtlLayer.wantsExtendedDynamicRangeContentMVK = NO; break; case VK_COLOR_SPACE_BT2020_LINEAR_EXT: - _mtlLayer.colorspaceNameMVK = kCGColorSpaceExtendedLinearITUR_2020; - _mtlLayer.wantsExtendedDynamicRangeContentMVK = YES; + mtlLayer.colorspaceNameMVK = kCGColorSpaceExtendedLinearITUR_2020; + mtlLayer.wantsExtendedDynamicRangeContentMVK = YES; break; #if MVK_XCODE_12 case VK_COLOR_SPACE_HDR10_ST2084_EXT: - _mtlLayer.colorspaceNameMVK = kCGColorSpaceITUR_2100_PQ; - _mtlLayer.wantsExtendedDynamicRangeContentMVK = YES; + mtlLayer.colorspaceNameMVK = kCGColorSpaceITUR_2100_PQ; + mtlLayer.wantsExtendedDynamicRangeContentMVK = YES; break; case VK_COLOR_SPACE_HDR10_HLG_EXT: - _mtlLayer.colorspaceNameMVK = kCGColorSpaceITUR_2100_HLG; - _mtlLayer.wantsExtendedDynamicRangeContentMVK = YES; + mtlLayer.colorspaceNameMVK = kCGColorSpaceITUR_2100_HLG; + mtlLayer.wantsExtendedDynamicRangeContentMVK = YES; break; #endif case VK_COLOR_SPACE_ADOBERGB_NONLINEAR_EXT: - _mtlLayer.colorspaceNameMVK = kCGColorSpaceAdobeRGB1998; - _mtlLayer.wantsExtendedDynamicRangeContentMVK = NO; + mtlLayer.colorspaceNameMVK = kCGColorSpaceAdobeRGB1998; + mtlLayer.wantsExtendedDynamicRangeContentMVK = NO; break; case VK_COLOR_SPACE_PASS_THROUGH_EXT: - _mtlLayer.colorspace = nil; - _mtlLayer.wantsExtendedDynamicRangeContentMVK = NO; + mtlLayer.colorspace = nil; + mtlLayer.wantsExtendedDynamicRangeContentMVK = NO; break; default: setConfigurationResult(reportError(VK_ERROR_FORMAT_NOT_SUPPORTED, "vkCreateSwapchainKHR(): Metal does not support VkColorSpaceKHR value %d.", pCreateInfo->imageColorSpace)); @@ -421,22 +556,6 @@ void MVKSwapchain::initCAMetalLayer(const VkSwapchainCreateInfoKHR* pCreateInfo, // TODO: set additional CAMetalLayer properties before extracting drawables: // - presentsWithTransaction // - drawsAsynchronously - - if ( [_mtlLayer.delegate isKindOfClass: [PLATFORM_VIEW_CLASS class]] ) { - // Sometimes, the owning view can replace its CAMetalLayer. In that case, the client - // needs to recreate the swapchain, or no content will be displayed. - _layerObserver = [MVKBlockObserver observerWithBlock: ^(NSString* path, id, NSDictionary*, void*) { - if ( ![path isEqualToString: @"layer"] ) { return; } - this->releaseLayer(); - } forObject: _mtlLayer.delegate atKeyPath: @"layer"]; - } -} - -void MVKSwapchain::releaseLayer() { - std::lock_guard lock(_layerLock); - _surfaceLost = true; - [_layerObserver release]; - _layerObserver = nil; } // Initializes the array of images used for the surface of this swapchain. @@ -460,13 +579,12 @@ void MVKSwapchain::initSurfaceImages(const VkSwapchainCreateInfoKHR* pCreateInfo } VkExtent2D imgExtent = pCreateInfo->imageExtent; - VkImageCreateInfo imgInfo = { .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, .pNext = VK_NULL_HANDLE, .imageType = VK_IMAGE_TYPE_2D, - .format = getPixelFormats()->getVkFormat(_mtlLayer.pixelFormat), - .extent = { imgExtent.width, imgExtent.height, 1 }, + .format = pCreateInfo->imageFormat, + .extent = mvkVkExtent3DFromVkExtent2D(imgExtent), .mipLevels = 1, .arrayLayers = 1, .samples = VK_SAMPLE_COUNT_1_BIT, @@ -492,133 +610,34 @@ void MVKSwapchain::initSurfaceImages(const VkSwapchainCreateInfoKHR* pCreateInfo _presentableImages.push_back(_device->createPresentableSwapchainImage(&imgInfo, this, imgIdx, nullptr)); } - NSString* screenName = @"Main Screen"; + auto* mtlLayer = getCAMetalLayer(); + if (mtlLayer) { + NSString* screenName = @"Main Screen"; #if MVK_MACOS && !MVK_MACCAT - if ([_mtlLayer.screenMVK respondsToSelector:@selector(localizedName)]) { - screenName = _mtlLayer.screenMVK.localizedName; - } -#endif - MVKLogInfo("Created %d swapchain images with initial size (%d, %d) and contents scale %.1f for screen %s.", - imgCnt, imgExtent.width, imgExtent.height, _mtlLayer.contentsScale, screenName.UTF8String); -} - -VkResult MVKSwapchain::getRefreshCycleDuration(VkRefreshCycleDurationGOOGLE *pRefreshCycleDuration) { - if (_device->getConfigurationResult() != VK_SUCCESS) { return _device->getConfigurationResult(); } - -#if MVK_VISIONOS - // TODO: See if this can be obtained from OS instead - NSInteger framesPerSecond = 90; -#elif MVK_IOS_OR_TVOS || MVK_MACCAT - NSInteger framesPerSecond = 60; - UIScreen* screen = _mtlLayer.screenMVK; - if ([screen respondsToSelector: @selector(maximumFramesPerSecond)]) { - framesPerSecond = screen.maximumFramesPerSecond; - } -#elif MVK_MACOS && !MVK_MACCAT - NSScreen* screen = _mtlLayer.screenMVK; - CGDirectDisplayID displayId = [[[screen deviceDescription] objectForKey:@"NSScreenNumber"] unsignedIntValue]; - CGDisplayModeRef mode = CGDisplayCopyDisplayMode(displayId); - double framesPerSecond = CGDisplayModeGetRefreshRate(mode); - CGDisplayModeRelease(mode); -#if MVK_XCODE_13 - if (framesPerSecond == 0 && [screen respondsToSelector: @selector(maximumFramesPerSecond)]) - framesPerSecond = [screen maximumFramesPerSecond]; -#endif - - // Builtin panels, e.g., on MacBook, report a zero refresh rate. - if (framesPerSecond == 0) - framesPerSecond = 60.0; -#endif - - pRefreshCycleDuration->refreshDuration = (uint64_t)1e9 / framesPerSecond; - return VK_SUCCESS; -} - -VkResult MVKSwapchain::getPastPresentationTiming(uint32_t *pCount, VkPastPresentationTimingGOOGLE *pPresentationTimings) { - if (_device->getConfigurationResult() != VK_SUCCESS) { return _device->getConfigurationResult(); } - - VkResult res = VK_SUCCESS; - - std::lock_guard lock(_presentHistoryLock); - if (pPresentationTimings == nullptr) { - *pCount = _presentHistoryCount; - } else { - uint32_t countRemaining = std::min(_presentHistoryCount, *pCount); - uint32_t outIndex = 0; - - res = (*pCount >= _presentHistoryCount) ? VK_SUCCESS : VK_INCOMPLETE; - *pCount = countRemaining; - - while (countRemaining > 0) { - pPresentationTimings[outIndex] = _presentTimingHistory[_presentHistoryHeadIndex]; - countRemaining--; - _presentHistoryCount--; - _presentHistoryHeadIndex = (_presentHistoryHeadIndex + 1) % kMaxPresentationHistory; - outIndex++; + auto* screen = mtlLayer.screenMVK; + if ([screen respondsToSelector:@selector(localizedName)]) { + screenName = screen.localizedName; } - } - - return res; -} - -void MVKSwapchain::recordPresentTime(const MVKImagePresentInfo& presentInfo, uint64_t actualPresentTime) { - std::lock_guard lock(_presentHistoryLock); - if (_presentHistoryCount < kMaxPresentationHistory) { - _presentHistoryCount++; - } else { - _presentHistoryHeadIndex = (_presentHistoryHeadIndex + 1) % kMaxPresentationHistory; - } - - // If actual present time is not available, use desired time instead, and if that - // hasn't been set, use the current time, which should be reasonably accurate (sub-ms), - // since we are here as part of the addPresentedHandler: callback. - if (actualPresentTime == 0) { actualPresentTime = presentInfo.desiredPresentTime; } - if (actualPresentTime == 0) { actualPresentTime = CACurrentMediaTime() * 1.0e9; } - - _presentTimingHistory[_presentHistoryIndex].presentID = presentInfo.presentID; - _presentTimingHistory[_presentHistoryIndex].desiredPresentTime = presentInfo.desiredPresentTime; - _presentTimingHistory[_presentHistoryIndex].actualPresentTime = actualPresentTime; - // These details are not available in Metal - _presentTimingHistory[_presentHistoryIndex].earliestPresentTime = actualPresentTime; - _presentTimingHistory[_presentHistoryIndex].presentMargin = 0; - _presentHistoryIndex = (_presentHistoryIndex + 1) % kMaxPresentationHistory; -} - -void MVKSwapchain::setLayerNeedsDisplay(const VkPresentRegionKHR* pRegion) { - if (!pRegion || pRegion->rectangleCount == 0) { - [_mtlLayer setNeedsDisplay]; - return; - } - - for (uint32_t i = 0; i < pRegion->rectangleCount; ++i) { - CGRect cgRect = mvkCGRectFromVkRectLayerKHR(pRegion->pRectangles[i]); -#if MVK_MACOS - // VK_KHR_incremental_present specifies an upper-left origin, but macOS by default - // uses a lower-left origin. - cgRect.origin.y = _mtlLayer.bounds.size.height - cgRect.origin.y; #endif - // We were given rectangles in pixels, but -[CALayer setNeedsDisplayInRect:] wants them - // in points, which is pixels / contentsScale. - CGFloat scaleFactor = _mtlLayer.contentsScale; - cgRect.origin.x /= scaleFactor; - cgRect.origin.y /= scaleFactor; - cgRect.size.width /= scaleFactor; - cgRect.size.height /= scaleFactor; - [_mtlLayer setNeedsDisplayInRect:cgRect]; + MVKLogInfo("Created %d swapchain images with size (%d, %d) and contents scale %.1f in layer %s (%p) on screen %s.", + imgCnt, imgExtent.width, imgExtent.height, mtlLayer.contentsScale, mtlLayer.name.UTF8String, mtlLayer, screenName.UTF8String); + } else { + MVKLogInfo("Created %d swapchain images with size (%d, %d) on headless surface.", imgCnt, imgExtent.width, imgExtent.height); } } -// A retention loop exists between the swapchain and its images. The swapchain images -// retain the swapchain because they can be in flight when the app destroys the swapchain. -// Release the images now, when the app destroys the swapchain, so they will be destroyed when -// no longer held by the presentation flow, and will in turn release the swapchain for destruction. void MVKSwapchain::destroy() { + // If this swapchain was not replaced by a new swapchain, remove this swapchain + // from the surface, and force any outstanding presentations to complete. + if (_surface->_activeSwapchain == this) { + _surface->_activeSwapchain = nullptr; + forceUnpresentedImageCompletion(); + } for (auto& img : _presentableImages) { _device->destroyPresentableSwapchainImage(img, NULL); } MVKVulkanAPIDeviceObject::destroy(); } MVKSwapchain::~MVKSwapchain() { if (_licenseWatermark) { _licenseWatermark->destroy(); } - releaseLayer(); } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSync.h b/MoltenVK/MoltenVK/GPUObjects/MVKSync.h index ad87f715..4e3f5122 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKSync.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKSync.h @@ -63,6 +63,9 @@ public: /** Returns whether this instance is in a reserved state. */ bool isReserved(); + /** Returns the number of outstanding reservations. */ + uint32_t getReservationCount(); + /** * Blocks processing on the current thread until any or all (depending on configuration) outstanding * reservations have been released, or until the specified timeout interval in nanoseconds expires. @@ -89,20 +92,19 @@ public: * require a separate call to the release() function to cause the semaphore to stop blocking. */ MVKSemaphoreImpl(bool waitAll = true, uint32_t reservationCount = 0) - : _shouldWaitAll(waitAll), _reservationCount(reservationCount) {} + : _reservationCount(reservationCount), _shouldWaitAll(waitAll) {} - /** Destructor. */ ~MVKSemaphoreImpl(); private: bool operator()(); - inline bool isClear() { return _reservationCount == 0; } // Not thread-safe + bool isClear() { return _reservationCount == 0; } // Not thread-safe std::mutex _lock; std::condition_variable _blocker; - bool _shouldWaitAll; uint32_t _reservationCount; + bool _shouldWaitAll; }; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSync.mm b/MoltenVK/MoltenVK/GPUObjects/MVKSync.mm index d4ebe72f..50e4601f 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKSync.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKSync.mm @@ -50,6 +50,11 @@ bool MVKSemaphoreImpl::isReserved() { return !isClear(); } +uint32_t MVKSemaphoreImpl::getReservationCount() { + lock_guard lock(_lock); + return _reservationCount; +} + bool MVKSemaphoreImpl::wait(uint64_t timeout, bool reserveAgain) { unique_lock lock(_lock); @@ -123,7 +128,7 @@ uint64_t MVKSemaphoreMTLEvent::deferSignal() { } void MVKSemaphoreMTLEvent::encodeDeferredSignal(id mtlCmdBuff, uint64_t deferToken) { - if (mtlCmdBuff) { [mtlCmdBuff encodeSignalEvent: _mtlEvent value: deferToken]; } + [mtlCmdBuff encodeSignalEvent: _mtlEvent value: deferToken]; } MVKSemaphoreMTLEvent::MVKSemaphoreMTLEvent(MVKDevice* device, @@ -583,7 +588,7 @@ void MVKMetalCompiler::compile(unique_lock& lock, dispatch_block_t block) if (_compileError) { handleError(); } - mvkDev->addActivityPerformance(*_pPerformanceTracker, _startTime); + mvkDev->addPerformanceInterval(*_pPerformanceTracker, _startTime); } void MVKMetalCompiler::handleError() { diff --git a/MoltenVK/MoltenVK/Layers/MVKExtensions.def b/MoltenVK/MoltenVK/Layers/MVKExtensions.def index ee292a76..c9917b79 100644 --- a/MoltenVK/MoltenVK/Layers/MVKExtensions.def +++ b/MoltenVK/MoltenVK/Layers/MVKExtensions.def @@ -91,6 +91,7 @@ MVK_EXTENSION(KHR_storage_buffer_storage_class, KHR_STORAGE_BUFFER_STORAGE MVK_EXTENSION(KHR_surface, KHR_SURFACE, INSTANCE, 10.11, 8.0, 1.0) MVK_EXTENSION(KHR_swapchain, KHR_SWAPCHAIN, DEVICE, 10.11, 8.0, 1.0) MVK_EXTENSION(KHR_swapchain_mutable_format, KHR_SWAPCHAIN_MUTABLE_FORMAT, DEVICE, 10.11, 8.0, 1.0) +MVK_EXTENSION(KHR_synchronization2, KHR_SYNCHRONIZATION_2, DEVICE, 10.11, 8.0, 1.0) MVK_EXTENSION(KHR_timeline_semaphore, KHR_TIMELINE_SEMAPHORE, DEVICE, 10.11, 8.0, 1.0) MVK_EXTENSION(KHR_uniform_buffer_standard_layout, KHR_UNIFORM_BUFFER_STANDARD_LAYOUT, DEVICE, 10.11, 8.0, 1.0) MVK_EXTENSION(KHR_variable_pointers, KHR_VARIABLE_POINTERS, DEVICE, 10.11, 8.0, 1.0) @@ -102,9 +103,13 @@ MVK_EXTENSION(EXT_debug_marker, EXT_DEBUG_MARKER, MVK_EXTENSION(EXT_debug_report, EXT_DEBUG_REPORT, INSTANCE, 10.11, 8.0, 1.0) MVK_EXTENSION(EXT_debug_utils, EXT_DEBUG_UTILS, INSTANCE, 10.11, 8.0, 1.0) MVK_EXTENSION(EXT_descriptor_indexing, EXT_DESCRIPTOR_INDEXING, DEVICE, 10.11, 8.0, 1.0) +MVK_EXTENSION(EXT_extended_dynamic_state, EXT_EXTENDED_DYNAMIC_STATE, DEVICE, 10.11, 8.0, 1.0) +MVK_EXTENSION(EXT_extended_dynamic_state2, EXT_EXTENDED_DYNAMIC_STATE_2, DEVICE, 10.11, 8.0, 1.0) +MVK_EXTENSION(EXT_extended_dynamic_state3, EXT_EXTENDED_DYNAMIC_STATE_3, DEVICE, 10.11, 8.0, 1.0) MVK_EXTENSION(EXT_external_memory_host, EXT_EXTERNAL_MEMORY_HOST, DEVICE, 10.11, 8.0, 1.0) MVK_EXTENSION(EXT_fragment_shader_interlock, EXT_FRAGMENT_SHADER_INTERLOCK, DEVICE, 10.13, 11.0, 1.0) MVK_EXTENSION(EXT_hdr_metadata, EXT_HDR_METADATA, DEVICE, 10.15, MVK_NA, MVK_NA) +MVK_EXTENSION(EXT_headless_surface, EXT_HEADLESS_SURFACE, INSTANCE, 10.11, 8.0, 1.0) MVK_EXTENSION(EXT_host_query_reset, EXT_HOST_QUERY_RESET, DEVICE, 10.11, 8.0, 1.0) MVK_EXTENSION(EXT_image_robustness, EXT_IMAGE_ROBUSTNESS, DEVICE, 10.11, 8.0, 1.0) MVK_EXTENSION(EXT_inline_uniform_block, EXT_INLINE_UNIFORM_BLOCK, DEVICE, 10.11, 8.0, 1.0) diff --git a/MoltenVK/MoltenVK/OS/CAMetalLayer+MoltenVK.h b/MoltenVK/MoltenVK/OS/CAMetalLayer+MoltenVK.h index 61a5c439..c78128ba 100644 --- a/MoltenVK/MoltenVK/OS/CAMetalLayer+MoltenVK.h +++ b/MoltenVK/MoltenVK/OS/CAMetalLayer+MoltenVK.h @@ -23,12 +23,10 @@ #import #if MVK_IOS_OR_TVOS || MVK_MACCAT -# define PLATFORM_SCREEN_CLASS UIScreen # include #endif #if MVK_MACOS && !MVK_MACCAT -# define PLATFORM_SCREEN_CLASS NSScreen # include #endif @@ -76,9 +74,16 @@ */ @property(nonatomic, readwrite) CFStringRef colorspaceNameMVK; -#if !MVK_VISIONOS +#if MVK_IOS_OR_TVOS || MVK_MACCAT /** Returns the screen on which this layer is rendering. */ -@property(nonatomic, readonly) PLATFORM_SCREEN_CLASS* screenMVK; +@property(nonatomic, readonly) UIScreen* screenMVK; +#endif + +#if MVK_MACOS && !MVK_MACCAT +/** Returns the screen on which this layer is rendering. */ +@property(nonatomic, readonly) NSScreen* screenMVK; + +@property(nonatomic, readonly) NSScreen* privateScreenMVKImpl; #endif @end diff --git a/MoltenVK/MoltenVK/OS/CAMetalLayer+MoltenVK.m b/MoltenVK/MoltenVK/OS/CAMetalLayer+MoltenVK.mm similarity index 92% rename from MoltenVK/MoltenVK/OS/CAMetalLayer+MoltenVK.m rename to MoltenVK/MoltenVK/OS/CAMetalLayer+MoltenVK.mm index 9a8b10d8..380a9150 100644 --- a/MoltenVK/MoltenVK/OS/CAMetalLayer+MoltenVK.m +++ b/MoltenVK/MoltenVK/OS/CAMetalLayer+MoltenVK.mm @@ -18,6 +18,7 @@ #include "CAMetalLayer+MoltenVK.h" +#include "MVKOSExtensions.h" #if MVK_MACOS && !MVK_MACCAT # include @@ -88,6 +89,13 @@ #if MVK_MACOS && !MVK_MACCAT -(NSScreen*) screenMVK { + __block NSScreen* screen; + mvkDispatchToMainAndWait(^{ screen = self.privateScreenMVKImpl; }); + return screen; +} + +// Search for the screen currently displaying the layer, and default to the main screen if it can't be found. +-(NSScreen*) privateScreenMVKImpl { // If this layer has a delegate that is an NSView, and the view is in a window, retrieve the screen from the window. if ([self.delegate isKindOfClass: NSView.class]) { NSWindow* window = ((NSView*)self.delegate).window; diff --git a/MoltenVK/MoltenVK/Utility/MVKBaseObject.h b/MoltenVK/MoltenVK/Utility/MVKBaseObject.h index d45f4078..356dce42 100644 --- a/MoltenVK/MoltenVK/Utility/MVKBaseObject.h +++ b/MoltenVK/MoltenVK/Utility/MVKBaseObject.h @@ -57,7 +57,7 @@ public: void reportMessage(MVKConfigLogLevel logLevel, const char* format, ...) __printflike(3, 4); /** - * Report a Vulkan error message, on behalf of the object, which may be nil. + * Report a message, on behalf of the object, which may be nil. * Reporting includes logging to a standard system logging stream, and if the object * is not nil and has access to the VkInstance, the message will also be forwarded * to the VkInstance for output to the Vulkan debug report messaging API. @@ -65,14 +65,19 @@ public: static void reportMessage(MVKBaseObject* mvkObj, MVKConfigLogLevel logLevel, const char* format, ...) __printflike(3, 4); /** - * Report a Vulkan error message, on behalf of the object, which may be nil. + * Report a Vulkan result message. This includes logging to a standard system logging stream, + * and some subclasses will also forward the message to their VkInstance for output to the + * Vulkan debug report messaging API. + */ + VkResult reportResult(VkResult vkRslt, MVKConfigLogLevel logLevel, const char* format, ...) __printflike(4, 5); + + /** + * Report a Vulkan result message, on behalf of the object. which may be nil. * Reporting includes logging to a standard system logging stream, and if the object * is not nil and has access to the VkInstance, the message will also be forwarded * to the VkInstance for output to the Vulkan debug report messaging API. - * - * This is the core reporting implementation. Other similar functions delegate here. */ - static void reportMessage(MVKBaseObject* mvkObj, MVKConfigLogLevel logLevel, const char* format, va_list args) __printflike(3, 0); + static VkResult reportResult(MVKBaseObject* mvkObj, VkResult vkRslt, MVKConfigLogLevel logLevel, const char* format, ...) __printflike(4, 5); /** * Report a Vulkan error message. This includes logging to a standard system logging stream, @@ -90,19 +95,29 @@ public: static VkResult reportError(MVKBaseObject* mvkObj, VkResult vkErr, const char* format, ...) __printflike(3, 4); /** - * Report a Vulkan error message, on behalf of the object. which may be nil. + * Report a Vulkan warning message. This includes logging to a standard system logging stream, + * and some subclasses will also forward the message to their VkInstance for output to the + * Vulkan debug report messaging API. + */ + VkResult reportWarning(VkResult vkRslt, const char* format, ...) __printflike(3, 4); + + /** + * Report a Vulkan warning message, on behalf of the object. which may be nil. * Reporting includes logging to a standard system logging stream, and if the object * is not nil and has access to the VkInstance, the message will also be forwarded * to the VkInstance for output to the Vulkan debug report messaging API. - * - * This is the core reporting implementation. Other similar functions delegate here. */ - static VkResult reportError(MVKBaseObject* mvkObj, VkResult vkErr, const char* format, va_list args) __printflike(3, 0); + static VkResult reportWarning(MVKBaseObject* mvkObj, VkResult vkRslt, const char* format, ...) __printflike(3, 4); /** Destroys this object. Default behaviour simply deletes it. Subclasses may override to delay deletion. */ virtual void destroy() { delete this; } - virtual ~MVKBaseObject() {} + virtual ~MVKBaseObject() {} + +protected: + static VkResult reportResult(MVKBaseObject* mvkObj, VkResult vkRslt, MVKConfigLogLevel logLevel, const char* format, va_list args) __printflike(4, 0); + static void reportMessage(MVKBaseObject* mvkObj, MVKConfigLogLevel logLevel, const char* format, va_list args) __printflike(3, 0); + }; @@ -135,7 +150,7 @@ public: * Called when this instance has been retained as a reference by another object, * indicating that this instance will not be deleted until that reference is released. */ - void retain() { _refCount++; } + void retain() { _refCount.fetch_add(1, std::memory_order_relaxed); } /** * Called when this instance has been released as a reference from another object. @@ -146,7 +161,7 @@ public: * Note that the destroy() function is called on the BaseClass. * Releasing will not call any overridden destroy() function in a descendant class. */ - void release() { if (--_refCount == 0) { BaseClass::destroy(); } } + void release() { if (_refCount.fetch_sub(1, std::memory_order_acq_rel) == 1) { BaseClass::destroy(); } } /** * Marks this instance as destroyed. If all previous references to this instance @@ -158,15 +173,10 @@ public: MVKReferenceCountingMixin() : _refCount(1) {} /** Copy starts with fresh reference counts. */ - MVKReferenceCountingMixin(const MVKReferenceCountingMixin& other) { - _refCount = 1; - } + MVKReferenceCountingMixin(const MVKReferenceCountingMixin& other) : _refCount(1) {} - /** Copy starts with fresh reference counts. */ - MVKReferenceCountingMixin& operator=(const MVKReferenceCountingMixin& other) { - _refCount = 1; - return *this; - } + /** Don't overwrite refcounted objects. */ + MVKReferenceCountingMixin& operator=(const MVKReferenceCountingMixin& other) = delete; protected: std::atomic _refCount; @@ -202,3 +212,15 @@ public: protected: VkResult _configurationResult = VK_SUCCESS; }; + + +#pragma mark - +#pragma mark Support functions + +/** + * If the object is not a nullptr, returns the MoltenVK configuration info for the + * VkInstance that created the object, otherwise returns the global configuration info. + */ +static inline const MVKConfiguration& mvkGetMVKConfig(MVKBaseObject* mvkObj) { + return mvkObj ? mvkObj->getMVKConfig() : mvkConfig(); +} diff --git a/MoltenVK/MoltenVK/Utility/MVKBaseObject.mm b/MoltenVK/MoltenVK/Utility/MVKBaseObject.mm index 7783e9ab..c309cc13 100644 --- a/MoltenVK/MoltenVK/Utility/MVKBaseObject.mm +++ b/MoltenVK/MoltenVK/Utility/MVKBaseObject.mm @@ -27,24 +27,19 @@ using namespace std; -static const char* getReportingLevelString(MVKConfigLogLevel logLevel) { - switch (logLevel) { - case MVK_CONFIG_LOG_LEVEL_DEBUG: - return "mvk-debug"; - case MVK_CONFIG_LOG_LEVEL_INFO: - return "mvk-info"; - case MVK_CONFIG_LOG_LEVEL_WARNING: - return "mvk-warn"; - case MVK_CONFIG_LOG_LEVEL_ERROR: - default: - return "mvk-error"; - } -} - - #pragma mark - #pragma mark MVKBaseObject +static const char* getReportingLevelString(MVKConfigLogLevel logLevel) { + switch (logLevel) { + case MVK_CONFIG_LOG_LEVEL_ERROR: return "mvk-error"; + case MVK_CONFIG_LOG_LEVEL_WARNING: return "mvk-warn"; + case MVK_CONFIG_LOG_LEVEL_INFO: return "mvk-info"; + case MVK_CONFIG_LOG_LEVEL_DEBUG: return "mvk-debug"; + default: return "mvk-unknown"; + } +} + string MVKBaseObject::getClassName() { return mvk::getTypeName(this); } const MVKConfiguration& MVKBaseObject::getMVKConfig() { @@ -109,10 +104,43 @@ void MVKBaseObject::reportMessage(MVKBaseObject* mvkObj, MVKConfigLogLevel logLe free(redoBuff); } +VkResult MVKBaseObject::reportResult(VkResult vkErr, MVKConfigLogLevel logLevel, const char* format, ...) { + va_list args; + va_start(args, format); + VkResult rslt = reportResult(this, vkErr, logLevel, format, args); + va_end(args); + return rslt; +} + +VkResult MVKBaseObject::reportResult(MVKBaseObject* mvkObj, VkResult vkErr, MVKConfigLogLevel logLevel, const char* format, ...) { + va_list args; + va_start(args, format); + VkResult rslt = reportResult(mvkObj, vkErr, logLevel, format, args); + va_end(args); + return rslt; +} + +VkResult MVKBaseObject::reportResult(MVKBaseObject* mvkObj, VkResult vkRslt, MVKConfigLogLevel logLevel, const char* format, va_list args) { + + // Prepend the result code to the format string + const char* vkRsltName = mvkVkResultName(vkRslt); + size_t rsltLen = strlen(vkRsltName) + strlen(format) + 4; + char fmtStr[rsltLen]; + snprintf(fmtStr, rsltLen, "%s: %s", vkRsltName, format); + + // Report the message + va_list lclArgs; + va_copy(lclArgs, args); + reportMessage(mvkObj, logLevel, fmtStr, lclArgs); + va_end(lclArgs); + + return vkRslt; +} + VkResult MVKBaseObject::reportError(VkResult vkErr, const char* format, ...) { va_list args; va_start(args, format); - VkResult rslt = reportError(this, vkErr, format, args); + VkResult rslt = reportResult(this, vkErr, MVK_CONFIG_LOG_LEVEL_ERROR, format, args); va_end(args); return rslt; } @@ -120,25 +148,23 @@ VkResult MVKBaseObject::reportError(VkResult vkErr, const char* format, ...) { VkResult MVKBaseObject::reportError(MVKBaseObject* mvkObj, VkResult vkErr, const char* format, ...) { va_list args; va_start(args, format); - VkResult rslt = reportError(mvkObj, vkErr, format, args); + VkResult rslt = reportResult(mvkObj, vkErr, MVK_CONFIG_LOG_LEVEL_ERROR, format, args); va_end(args); return rslt; } -// This is the core reporting implementation. Other similar functions delegate here. -VkResult MVKBaseObject::reportError(MVKBaseObject* mvkObj, VkResult vkErr, const char* format, va_list args) { - - // Prepend the error code to the format string - const char* vkRsltName = mvkVkResultName(vkErr); - size_t rsltLen = strlen(vkRsltName) + strlen(format) + 4; - char fmtStr[rsltLen]; - snprintf(fmtStr, rsltLen, "%s: %s", vkRsltName, format); - - // Report the error - va_list lclArgs; - va_copy(lclArgs, args); - reportMessage(mvkObj, MVK_CONFIG_LOG_LEVEL_ERROR, fmtStr, lclArgs); - va_end(lclArgs); - - return vkErr; +VkResult MVKBaseObject::reportWarning(VkResult vkErr, const char* format, ...) { + va_list args; + va_start(args, format); + VkResult rslt = reportResult(this, vkErr, MVK_CONFIG_LOG_LEVEL_WARNING, format, args); + va_end(args); + return rslt; +} + +VkResult MVKBaseObject::reportWarning(MVKBaseObject* mvkObj, VkResult vkErr, const char* format, ...) { + va_list args; + va_start(args, format); + VkResult rslt = reportResult(mvkObj, vkErr, MVK_CONFIG_LOG_LEVEL_WARNING, format, args); + va_end(args); + return rslt; } diff --git a/MoltenVK/MoltenVK/Utility/MVKBitArray.h b/MoltenVK/MoltenVK/Utility/MVKBitArray.h index 46bf41cd..a1441bc5 100755 --- a/MoltenVK/MoltenVK/Utility/MVKBitArray.h +++ b/MoltenVK/MoltenVK/Utility/MVKBitArray.h @@ -90,7 +90,11 @@ public: * and optionally clears that bit. If no bits are set, returns the size() of this bit array. */ size_t getIndexOfFirstSetBit(size_t startIndex, bool shouldClear) { - size_t startSecIdx = std::max(getIndexOfSection(startIndex), _clearedSectionCount); + size_t startSecIdx = getIndexOfSection(startIndex); + if (startSecIdx < _clearedSectionCount) { + startSecIdx = _clearedSectionCount; + startIndex = 0; + } size_t bitIdx = startSecIdx << SectionMaskSize; size_t secCnt = getSectionCount(); for (size_t secIdx = startSecIdx; secIdx < secCnt; secIdx++) { @@ -101,6 +105,7 @@ public: if (shouldClear) { clearBit(bitIdx); } return std::min(bitIdx, _bitCount); } + startIndex = 0; } return std::min(bitIdx, _bitCount); } diff --git a/MoltenVK/MoltenVK/Utility/MVKConfigMembers.def b/MoltenVK/MoltenVK/Utility/MVKConfigMembers.def new file mode 100644 index 00000000..1b5cf69f --- /dev/null +++ b/MoltenVK/MoltenVK/Utility/MVKConfigMembers.def @@ -0,0 +1,86 @@ +/* + * MVKConfigMembers.def + * + * Copyright (c) 2015-2023 The Brenwill Workshop Ltd. (http://www.brenwill.com) + * + * Licensed under the Apache License, Version 2.0 (the "License", Int64) + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// The items in the list below describe the members of the MVKConfiguration struct. +// When a new member is added to the MVKConfiguration struct, a corresponding description +// must be added here. +// +// To use this file, define the macro: +// +// MVK_CONFIG_MEMBER(member, mbrType, name) +// +// and if strings are handled differently: +// +// MVK_CONFIG_MEMBER_STRING(member, mbrType, name) +// +// then #include this file inline with your code. +// +// The name prameter is the name of the configuration parameter, which is used as the name +// of the environment variable, and build setting, that sets the config value, and is entered +// here without the "MVK_CONFIG_" prefix. + + +#ifndef MVK_CONFIG_MEMBER +#error MVK_CONFIG_MEMBER must be defined before including this file +#endif + +#ifndef MVK_CONFIG_MEMBER_STRING +# define MVK_CONFIG_MEMBER_STRING(member, mbrType, name) MVK_CONFIG_MEMBER(member, mbrType, name) +#endif + +MVK_CONFIG_MEMBER(debugMode, VkBool32, DEBUG) +MVK_CONFIG_MEMBER(shaderConversionFlipVertexY, VkBool32, SHADER_CONVERSION_FLIP_VERTEX_Y) +MVK_CONFIG_MEMBER(synchronousQueueSubmits, VkBool32, SYNCHRONOUS_QUEUE_SUBMITS) +MVK_CONFIG_MEMBER(prefillMetalCommandBuffers, MVKPrefillMetalCommandBuffersStyle, PREFILL_METAL_COMMAND_BUFFERS) +MVK_CONFIG_MEMBER(maxActiveMetalCommandBuffersPerQueue, uint32_t, MAX_ACTIVE_METAL_COMMAND_BUFFERS_PER_QUEUE) +MVK_CONFIG_MEMBER(supportLargeQueryPools, VkBool32, SUPPORT_LARGE_QUERY_POOLS) +MVK_CONFIG_MEMBER(presentWithCommandBuffer, VkBool32, PRESENT_WITH_COMMAND_BUFFER) +MVK_CONFIG_MEMBER(swapchainMinMagFilterUseNearest, VkBool32, SWAPCHAIN_MAG_FILTER_USE_NEAREST) // Deprecated legacy renaming +MVK_CONFIG_MEMBER(swapchainMinMagFilterUseNearest, VkBool32, SWAPCHAIN_MIN_MAG_FILTER_USE_NEAREST) +MVK_CONFIG_MEMBER(metalCompileTimeout, uint64_t, METAL_COMPILE_TIMEOUT) +MVK_CONFIG_MEMBER(performanceTracking, VkBool32, PERFORMANCE_TRACKING) +MVK_CONFIG_MEMBER(performanceLoggingFrameCount, uint32_t, PERFORMANCE_LOGGING_FRAME_COUNT) +MVK_CONFIG_MEMBER(activityPerformanceLoggingStyle, MVKConfigActivityPerformanceLoggingStyle, ACTIVITY_PERFORMANCE_LOGGING_STYLE) +MVK_CONFIG_MEMBER(displayWatermark, VkBool32, DISPLAY_WATERMARK) +MVK_CONFIG_MEMBER(specializedQueueFamilies, VkBool32, SPECIALIZED_QUEUE_FAMILIES) +MVK_CONFIG_MEMBER(switchSystemGPU, VkBool32, SWITCH_SYSTEM_GPU) +MVK_CONFIG_MEMBER(fullImageViewSwizzle, VkBool32, FULL_IMAGE_VIEW_SWIZZLE) +MVK_CONFIG_MEMBER(defaultGPUCaptureScopeQueueFamilyIndex, VkBool32, DEFAULT_GPU_CAPTURE_SCOPE_QUEUE_FAMILY_INDEX) +MVK_CONFIG_MEMBER(defaultGPUCaptureScopeQueueIndex, VkBool32, DEFAULT_GPU_CAPTURE_SCOPE_QUEUE_INDEX) +MVK_CONFIG_MEMBER(fastMathEnabled, MVKConfigFastMath, FAST_MATH_ENABLED) +MVK_CONFIG_MEMBER(logLevel, MVKConfigLogLevel, LOG_LEVEL) +MVK_CONFIG_MEMBER(traceVulkanCalls, MVKConfigTraceVulkanCalls, TRACE_VULKAN_CALLS) +MVK_CONFIG_MEMBER(forceLowPowerGPU, VkBool32, FORCE_LOW_POWER_GPU) +MVK_CONFIG_MEMBER(semaphoreUseMTLFence, VkBool32, ALLOW_METAL_FENCES) // Deprecated legacy +MVK_CONFIG_MEMBER(semaphoreSupportStyle, MVKVkSemaphoreSupportStyle, VK_SEMAPHORE_SUPPORT_STYLE) +MVK_CONFIG_MEMBER(autoGPUCaptureScope, MVKConfigAutoGPUCaptureScope, AUTO_GPU_CAPTURE_SCOPE) +MVK_CONFIG_MEMBER_STRING(autoGPUCaptureOutputFilepath, char*, AUTO_GPU_CAPTURE_OUTPUT_FILE) +MVK_CONFIG_MEMBER(texture1DAs2D, VkBool32, TEXTURE_1D_AS_2D) +MVK_CONFIG_MEMBER(preallocateDescriptors, VkBool32, PREALLOCATE_DESCRIPTORS) +MVK_CONFIG_MEMBER(useCommandPooling, VkBool32, USE_COMMAND_POOLING) +MVK_CONFIG_MEMBER(useMTLHeap, VkBool32, USE_MTLHEAP) +MVK_CONFIG_MEMBER(apiVersionToAdvertise, uint32_t, API_VERSION_TO_ADVERTISE) +MVK_CONFIG_MEMBER(advertiseExtensions, uint32_t, ADVERTISE_EXTENSIONS) +MVK_CONFIG_MEMBER(resumeLostDevice, VkBool32, RESUME_LOST_DEVICE) +MVK_CONFIG_MEMBER(useMetalArgumentBuffers, MVKUseMetalArgumentBuffers, USE_METAL_ARGUMENT_BUFFERS) +MVK_CONFIG_MEMBER(shaderSourceCompressionAlgorithm, MVKConfigCompressionAlgorithm, SHADER_COMPRESSION_ALGORITHM) +MVK_CONFIG_MEMBER(shouldMaximizeConcurrentCompilation, VkBool32, SHOULD_MAXIMIZE_CONCURRENT_COMPILATION) +MVK_CONFIG_MEMBER(timestampPeriodLowPassAlpha, float, TIMESTAMP_PERIOD_LOWPASS_ALPHA) + +#undef MVK_CONFIG_MEMBER +#undef MVK_CONFIG_MEMBER_STRING diff --git a/MoltenVK/MoltenVK/Utility/MVKEnvironment.cpp b/MoltenVK/MoltenVK/Utility/MVKEnvironment.cpp index d8dd7341..e8f0870c 100644 --- a/MoltenVK/MoltenVK/Utility/MVKEnvironment.cpp +++ b/MoltenVK/MoltenVK/Utility/MVKEnvironment.cpp @@ -18,57 +18,42 @@ #include "MVKEnvironment.h" #include "MVKOSExtensions.h" +#include "MVKFoundation.h" +// Return the expected size of MVKConfiguration, based on contents of MVKConfigMembers.def. +static constexpr uint32_t getExpectedMVKConfigurationSize() { +#define MVK_CONFIG_MEMBER(member, mbrType, name) cfgSize += sizeof(mbrType); + uint32_t cfgSize = 0; +#include "MVKConfigMembers.def" + return cfgSize; +} + static bool _mvkConfigInitialized = false; static void mvkInitConfigFromEnvVars() { + static_assert(getExpectedMVKConfigurationSize() == sizeof(MVKConfiguration), "MVKConfigMembers.def does not match the members of MVKConfiguration."); + _mvkConfigInitialized = true; MVKConfiguration evCfg; std::string evGPUCapFileStrObj; - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.debugMode, MVK_CONFIG_DEBUG); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.shaderConversionFlipVertexY, MVK_CONFIG_SHADER_CONVERSION_FLIP_VERTEX_Y); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.synchronousQueueSubmits, MVK_CONFIG_SYNCHRONOUS_QUEUE_SUBMITS); - MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.prefillMetalCommandBuffers, MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS); - MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.maxActiveMetalCommandBuffersPerQueue, MVK_CONFIG_MAX_ACTIVE_METAL_COMMAND_BUFFERS_PER_QUEUE); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.supportLargeQueryPools, MVK_CONFIG_SUPPORT_LARGE_QUERY_POOLS); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.presentWithCommandBuffer, MVK_CONFIG_PRESENT_WITH_COMMAND_BUFFER); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.swapchainMinMagFilterUseNearest, MVK_CONFIG_SWAPCHAIN_MAG_FILTER_USE_NEAREST); // Deprecated legacy env var - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.swapchainMinMagFilterUseNearest, MVK_CONFIG_SWAPCHAIN_MIN_MAG_FILTER_USE_NEAREST); - MVK_SET_FROM_ENV_OR_BUILD_INT64 (evCfg.metalCompileTimeout, MVK_CONFIG_METAL_COMPILE_TIMEOUT); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.performanceTracking, MVK_CONFIG_PERFORMANCE_TRACKING); - MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.performanceLoggingFrameCount, MVK_CONFIG_PERFORMANCE_LOGGING_FRAME_COUNT); - MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.activityPerformanceLoggingStyle, MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.displayWatermark, MVK_CONFIG_DISPLAY_WATERMARK); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.specializedQueueFamilies, MVK_CONFIG_SPECIALIZED_QUEUE_FAMILIES); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.switchSystemGPU, MVK_CONFIG_SWITCH_SYSTEM_GPU); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.fullImageViewSwizzle, MVK_CONFIG_FULL_IMAGE_VIEW_SWIZZLE); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.defaultGPUCaptureScopeQueueFamilyIndex, MVK_CONFIG_DEFAULT_GPU_CAPTURE_SCOPE_QUEUE_FAMILY_INDEX); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.defaultGPUCaptureScopeQueueIndex, MVK_CONFIG_DEFAULT_GPU_CAPTURE_SCOPE_QUEUE_INDEX); - MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.fastMathEnabled, MVK_CONFIG_FAST_MATH_ENABLED); - MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.logLevel, MVK_CONFIG_LOG_LEVEL); - MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.traceVulkanCalls, MVK_CONFIG_TRACE_VULKAN_CALLS); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.forceLowPowerGPU, MVK_CONFIG_FORCE_LOW_POWER_GPU); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.semaphoreUseMTLFence, MVK_ALLOW_METAL_FENCES); - MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.semaphoreSupportStyle, MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE); - MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.autoGPUCaptureScope, MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE); - MVK_SET_FROM_ENV_OR_BUILD_STRING(evCfg.autoGPUCaptureOutputFilepath, MVK_CONFIG_AUTO_GPU_CAPTURE_OUTPUT_FILE, evGPUCapFileStrObj); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.texture1DAs2D, MVK_CONFIG_TEXTURE_1D_AS_2D); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.preallocateDescriptors, MVK_CONFIG_PREALLOCATE_DESCRIPTORS); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.useCommandPooling, MVK_CONFIG_USE_COMMAND_POOLING); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.useMTLHeap, MVK_CONFIG_USE_MTLHEAP); - MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.apiVersionToAdvertise, MVK_CONFIG_API_VERSION_TO_ADVERTISE); - MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.advertiseExtensions, MVK_CONFIG_ADVERTISE_EXTENSIONS); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.resumeLostDevice, MVK_CONFIG_RESUME_LOST_DEVICE); - MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.useMetalArgumentBuffers, MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS); - MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.shaderSourceCompressionAlgorithm, MVK_CONFIG_SHADER_COMPRESSION_ALGORITHM); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.shouldMaximizeConcurrentCompilation, MVK_CONFIG_SHOULD_MAXIMIZE_CONCURRENT_COMPILATION); +#define STR(name) #name - // Support legacy environment variable MVK_DEBUG, but only if it has been explicitly set as an environment variable. - bool legacyDebugWasFound = false; - bool legacyDebugEV = mvkGetEnvVarBool("MVK_DEBUG", &legacyDebugWasFound); - if (legacyDebugWasFound) { evCfg.debugMode = legacyDebugEV; } +#define MVK_CONFIG_MEMBER(member, mbrType, name) \ + evCfg.member = (mbrType)mvkGetEnvVarNumber(STR(MVK_CONFIG_##name), MVK_CONFIG_##name); + +#define MVK_CONFIG_MEMBER_STRING(member, mbrType, name) \ + evCfg.member = mvkGetEnvVarString(STR(MVK_CONFIG_##name), evGPUCapFileStrObj, MVK_CONFIG_##name); + +#include "MVKConfigMembers.def" + + // At this point, debugMode has been set by env var MVK_CONFIG_DEBUG. + // MVK_CONFIG_DEBUG replaced the deprecataed MVK_DEBUG env var, so for + // legacy use, if the MVK_DEBUG env var is explicitly set, override debugMode. + double noEV = -3.1415; // An unlikely env var value. + double cvMVKDebug = mvkGetEnvVarNumber("MVK_DEBUG", noEV); + if (cvMVKDebug != noEV) { evCfg.debugMode = cvMVKDebug; } // Deprected legacy VkSemaphore MVK_ALLOW_METAL_FENCES and MVK_ALLOW_METAL_EVENTS config. // Legacy MVK_ALLOW_METAL_EVENTS is covered by MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE, @@ -76,9 +61,7 @@ static void mvkInitConfigFromEnvVars() { // disabled, disable semaphoreUseMTLEvent (aliased as semaphoreSupportStyle value // MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_SINGLE_QUEUE), and let mvkSetConfig() // further process legacy behavior of MVK_ALLOW_METAL_FENCES. - bool sem4UseMTLEvent; - MVK_SET_FROM_ENV_OR_BUILD_BOOL(sem4UseMTLEvent, MVK_ALLOW_METAL_EVENTS); - if ( !sem4UseMTLEvent ) { + if ( !mvkGetEnvVarNumber("MVK_CONFIG_ALLOW_METAL_EVENTS", 1.0) ) { evCfg.semaphoreUseMTLEvent = (MVKVkSemaphoreSupportStyle)false; // Disabled. Also semaphoreSupportStyle MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_SINGLE_QUEUE. } @@ -86,18 +69,16 @@ static void mvkInitConfigFromEnvVars() { // MVK_CONFIG_PERFORMANCE_LOGGING_INLINE env var was used, and activityPerformanceLoggingStyle // was not already set by MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE, set // activityPerformanceLoggingStyle to MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_IMMEDIATE. - bool logPerfInline; - MVK_SET_FROM_ENV_OR_BUILD_BOOL(logPerfInline, MVK_CONFIG_PERFORMANCE_LOGGING_INLINE); + bool logPerfInline = mvkGetEnvVarNumber("MVK_CONFIG_PERFORMANCE_LOGGING_INLINE", 0.0); if (logPerfInline && evCfg.activityPerformanceLoggingStyle == MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_FRAME_COUNT) { evCfg.activityPerformanceLoggingStyle = MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_IMMEDIATE; } - mvkSetConfig(evCfg); } static MVKConfiguration _mvkConfig; -static std::string _autoGPUCaptureOutputFile; +static std::string _autoGPUCaptureOutputFilepath; // Returns the MoltenVK config, lazily initializing it if necessary. // We initialize lazily instead of in a library constructor function to @@ -109,29 +90,41 @@ const MVKConfiguration& mvkConfig() { return _mvkConfig; } -// Sets config content, and updates any content that needs baking, including copying the contents -// of strings from the incoming MVKConfiguration member to a corresponding std::string, and then -// repointing the MVKConfiguration member to the contents of the std::string. void mvkSetConfig(const MVKConfiguration& mvkConfig) { - _mvkConfig = mvkConfig; + mvkSetConfig(_mvkConfig, mvkConfig, _autoGPUCaptureOutputFilepath); +} + +// Sets destination config content from the source content, validates content, +// and updates any content that needs baking, including copying the contents of +// strings from the incoming MVKConfiguration member to a corresponding std::string, +// and then repointing the MVKConfiguration member to the contents of the std::string. +void mvkSetConfig(MVKConfiguration& dstMVKConfig, + const MVKConfiguration& srcMVKConfig, + std::string& autoGPUCaptureOutputFilepath) { + + dstMVKConfig = srcMVKConfig; // Ensure the API version is supported, and add the VK_HEADER_VERSION. - _mvkConfig.apiVersionToAdvertise = std::min(_mvkConfig.apiVersionToAdvertise, MVK_VULKAN_API_VERSION); - _mvkConfig.apiVersionToAdvertise = VK_MAKE_VERSION(VK_VERSION_MAJOR(_mvkConfig.apiVersionToAdvertise), - VK_VERSION_MINOR(_mvkConfig.apiVersionToAdvertise), - VK_HEADER_VERSION); + dstMVKConfig.apiVersionToAdvertise = std::min(dstMVKConfig.apiVersionToAdvertise, MVK_VULKAN_API_VERSION); + dstMVKConfig.apiVersionToAdvertise = VK_MAKE_VERSION(VK_VERSION_MAJOR(dstMVKConfig.apiVersionToAdvertise), + VK_VERSION_MINOR(dstMVKConfig.apiVersionToAdvertise), + VK_HEADER_VERSION); // Deprecated legacy support for specific case where both legacy semaphoreUseMTLEvent // (now aliased to semaphoreSupportStyle) and legacy semaphoreUseMTLFence are explicitly // disabled by the app. In this case the app had been using CPU emulation, so use // MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_CALLBACK. - if ( !_mvkConfig.semaphoreUseMTLEvent && !_mvkConfig.semaphoreUseMTLFence ) { - _mvkConfig.semaphoreSupportStyle = MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_CALLBACK; + if ( !dstMVKConfig.semaphoreUseMTLEvent && !dstMVKConfig.semaphoreUseMTLFence ) { + dstMVKConfig.semaphoreSupportStyle = MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_CALLBACK; } // Set capture file path string - if (_mvkConfig.autoGPUCaptureOutputFilepath) { - _autoGPUCaptureOutputFile = _mvkConfig.autoGPUCaptureOutputFilepath; + if (dstMVKConfig.autoGPUCaptureOutputFilepath) { + autoGPUCaptureOutputFilepath = dstMVKConfig.autoGPUCaptureOutputFilepath; } - _mvkConfig.autoGPUCaptureOutputFilepath = (char*)_autoGPUCaptureOutputFile.c_str(); + dstMVKConfig.autoGPUCaptureOutputFilepath = autoGPUCaptureOutputFilepath.c_str(); + + // Clamp timestampPeriodLowPassAlpha between 0.0 and 1.0. + dstMVKConfig.timestampPeriodLowPassAlpha = mvkClamp(dstMVKConfig.timestampPeriodLowPassAlpha, 0.0f, 1.0f); } + diff --git a/MoltenVK/MoltenVK/Utility/MVKEnvironment.h b/MoltenVK/MoltenVK/Utility/MVKEnvironment.h index 86215bf9..7da71657 100644 --- a/MoltenVK/MoltenVK/Utility/MVKEnvironment.h +++ b/MoltenVK/MoltenVK/Utility/MVKEnvironment.h @@ -23,6 +23,7 @@ #include "mvk_vulkan.h" #include "mvk_config.h" #include "MVKLogging.h" +#include // Expose MoltenVK Apple surface extension functionality @@ -69,7 +70,7 @@ #endif #if MVK_TVOS -# define MVK_SUPPORT_IOSURFACE_BOOL (__TV_OS_VERSION_MIN_REQUIRED >= __TVOS_11_0) +# define MVK_SUPPORT_IOSURFACE_BOOL (__TV_OS_VERSION_MIN_REQUIRED >= __TVOS_11_0) #endif @@ -79,9 +80,25 @@ /** Global function to access MoltenVK configuration info. */ const MVKConfiguration& mvkConfig(); -/** Global function to update MoltenVK configuration info. */ +/** Sets the MoltenVK global configuration content. */ void mvkSetConfig(const MVKConfiguration& mvkConfig); +/** + * Sets the content from the source config into the destination + * config, while using the string object to retain string content. + */ +void mvkSetConfig(MVKConfiguration& dstMVKConfig, + const MVKConfiguration& srcMVKConfig, + std::string& autoGPUCaptureOutputFilepath); + +/** + * Enable debug mode. + * By default, disabled for Release builds and enabled for Debug builds. + */ +#ifndef MVK_CONFIG_DEBUG +# define MVK_CONFIG_DEBUG MVK_DEBUG +#endif + /** Flip the vertex coordinate in shaders. Enabled by default. */ #ifndef MVK_CONFIG_SHADER_CONVERSION_FLIP_VERTEX_Y # define MVK_CONFIG_SHADER_CONVERSION_FLIP_VERTEX_Y 1 @@ -244,11 +261,17 @@ void mvkSetConfig(const MVKConfiguration& mvkConfig); #ifndef MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE # define MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_METAL_EVENTS_WHERE_SAFE #endif -#ifndef MVK_ALLOW_METAL_EVENTS // Deprecated -# define MVK_ALLOW_METAL_EVENTS 1 +#ifndef MVK_CONFIG_ALLOW_METAL_EVENTS +# define MVK_CONFIG_ALLOW_METAL_EVENTS 1 #endif -#ifndef MVK_ALLOW_METAL_FENCES // Deprecated -# define MVK_ALLOW_METAL_FENCES 1 +#ifndef MVK_ALLOW_METAL_EVENTS // Deprecated +# define MVK_ALLOW_METAL_EVENTS MVK_CONFIG_ALLOW_METAL_EVENTS +#endif +#ifndef MVK_CONFIG_ALLOW_METAL_FENCES +# define MVK_CONFIG_ALLOW_METAL_FENCES 1 +#endif +#ifndef MVK_ALLOW_METAL_FENCES // Deprecated +# define MVK_ALLOW_METAL_FENCES MVK_CONFIG_ALLOW_METAL_FENCES #endif /** Substitute Metal 2D textures for Vulkan 1D images. Enabled by default. */ @@ -303,3 +326,11 @@ void mvkSetConfig(const MVKConfiguration& mvkConfig); #ifndef MVK_CONFIG_SHOULD_MAXIMIZE_CONCURRENT_COMPILATION # define MVK_CONFIG_SHOULD_MAXIMIZE_CONCURRENT_COMPILATION 0 #endif + +/** + * The alpha value of a lowpass filter tracking VkPhysicalDeviceLimits::timestampPeriod. + * This can be set to a float between 0.0 and 1.0. + */ +#ifndef MVK_CONFIG_TIMESTAMP_PERIOD_LOWPASS_ALPHA +# define MVK_CONFIG_TIMESTAMP_PERIOD_LOWPASS_ALPHA 1.0 +#endif diff --git a/MoltenVK/MoltenVK/Utility/MVKFoundation.cpp b/MoltenVK/MoltenVK/Utility/MVKFoundation.cpp index 29ee115f..d00fb397 100644 --- a/MoltenVK/MoltenVK/Utility/MVKFoundation.cpp +++ b/MoltenVK/MoltenVK/Utility/MVKFoundation.cpp @@ -21,6 +21,44 @@ #define CASE_STRINGIFY(V) case V: return #V +const char* mvkVkCommandName(MVKCommandUse cmdUse) { + switch (cmdUse) { + case kMVKCommandUseBeginCommandBuffer: return "vkBeginCommandBuffer (prefilled VkCommandBuffer)"; + case kMVKCommandUseQueueSubmit: return "vkQueueSubmit"; + case kMVKCommandUseAcquireNextImage: return "vkAcquireNextImageKHR"; + case kMVKCommandUseQueuePresent: return "vkQueuePresentKHR"; + case kMVKCommandUseQueueWaitIdle: return "vkQueueWaitIdle"; + case kMVKCommandUseDeviceWaitIdle: return "vkDeviceWaitIdle"; + case kMVKCommandUseInvalidateMappedMemoryRanges: return "vkInvalidateMappedMemoryRanges"; + case kMVKCommandUseBeginRendering: return "vkCmdBeginRendering"; + case kMVKCommandUseBeginRenderPass: return "vkCmdBeginRenderPass"; + case kMVKCommandUseNextSubpass: return "vkCmdNextSubpass"; + case kMVKCommandUseRestartSubpass: return "Metal renderpass restart"; + case kMVKCommandUsePipelineBarrier: return "vkCmdPipelineBarrier"; + case kMVKCommandUseBlitImage: return "vkCmdBlitImage"; + case kMVKCommandUseCopyImage: return "vkCmdCopyImage"; + case kMVKCommandUseResolveImage: return "vkCmdResolveImage (resolve stage)"; + case kMVKCommandUseResolveExpandImage: return "vkCmdResolveImage (expand stage)"; + case kMVKCommandUseResolveCopyImage: return "vkCmdResolveImage (copy stage)"; + case kMVKCommandUseCopyBuffer: return "vkCmdCopyBuffer"; + case kMVKCommandUseCopyBufferToImage: return "vkCmdCopyBufferToImage"; + case kMVKCommandUseCopyImageToBuffer: return "vkCmdCopyImageToBuffer"; + case kMVKCommandUseFillBuffer: return "vkCmdFillBuffer"; + case kMVKCommandUseUpdateBuffer: return "vkCmdUpdateBuffer"; + case kMVKCommandUseClearAttachments: return "vkCmdClearAttachments"; + case kMVKCommandUseClearColorImage: return "vkCmdClearColorImage"; + case kMVKCommandUseClearDepthStencilImage: return "vkCmdClearDepthStencilImage"; + case kMVKCommandUseResetQueryPool: return "vkCmdResetQueryPool"; + case kMVKCommandUseDispatch: return "vkCmdDispatch"; + case kMVKCommandUseTessellationVertexTessCtl: return "vkCmdDraw (vertex and tess control stages)"; + case kMVKCommandUseDrawIndirectConvertBuffers: return "vkCmdDrawIndirect (convert indirect buffers)"; + case kMVKCommandUseCopyQueryPoolResults: return "vkCmdCopyQueryPoolResults"; + case kMVKCommandUseAccumOcclusionQuery: return "Post-render-pass occlusion query accumulation"; + case kMVKCommandUseRecordGPUCounterSample: return "Record GPU Counter Sample"; + default: return "Unknown Vulkan command"; + } +} + const char* mvkVkResultName(VkResult vkResult) { switch (vkResult) { diff --git a/MoltenVK/MoltenVK/Utility/MVKFoundation.h b/MoltenVK/MoltenVK/Utility/MVKFoundation.h index b8f10720..d3aa660a 100644 --- a/MoltenVK/MoltenVK/Utility/MVKFoundation.h +++ b/MoltenVK/MoltenVK/Utility/MVKFoundation.h @@ -60,10 +60,13 @@ typedef struct { #pragma mark - #pragma mark Vulkan support +/** A generic 32-bit color permitting float, int32, or uint32 values. */ +typedef VkClearColorValue MVKColor32; + /** Tracks the Vulkan command currently being used. */ typedef enum : uint8_t { kMVKCommandUseNone = 0, /**< No use defined. */ - kMVKCommandUseEndCommandBuffer, /**< vkEndCommandBuffer (prefilled VkCommandBuffer). */ + kMVKCommandUseBeginCommandBuffer, /**< vkBeginCommandBuffer (prefilled VkCommandBuffer). */ kMVKCommandUseQueueSubmit, /**< vkQueueSubmit. */ kMVKCommandUseAcquireNextImage, /**< vkAcquireNextImageKHR. */ kMVKCommandUseQueuePresent, /**< vkQueuePresentKHR. */ @@ -73,7 +76,7 @@ typedef enum : uint8_t { kMVKCommandUseBeginRendering, /**< vkCmdBeginRendering. */ kMVKCommandUseBeginRenderPass, /**< vkCmdBeginRenderPass. */ kMVKCommandUseNextSubpass, /**< vkCmdNextSubpass. */ - kMVKCommandUseRestartSubpass, /**< Restart a subpass because of explicit or implicit barrier. */ + kMVKCommandUseRestartSubpass, /**< Create a new Metal renderpass due to Metal requirements. */ kMVKCommandUsePipelineBarrier, /**< vkCmdPipelineBarrier. */ kMVKCommandUseBlitImage, /**< vkCmdBlitImage. */ kMVKCommandUseCopyImage, /**< vkCmdCopyImage. */ @@ -99,11 +102,14 @@ typedef enum : uint8_t { /** Represents a given stage of a graphics pipeline. */ enum MVKGraphicsStage { - kMVKGraphicsStageVertex = 0, /**< The vertex shader stage. */ - kMVKGraphicsStageTessControl, /**< The tessellation control shader stage. */ + kMVKGraphicsStageVertex = 0, /**< The tessellation vertex compute shader stage. */ + kMVKGraphicsStageTessControl, /**< The tessellation control compute shader stage. */ kMVKGraphicsStageRasterization /**< The rest of the pipeline. */ }; +/** Returns the name of the command defined by the command use. */ +const char* mvkVkCommandName(MVKCommandUse cmdUse); + /** Returns the name of the result value. */ const char* mvkVkResultName(VkResult vkResult); @@ -139,7 +145,7 @@ static inline std::string mvkGetMoltenVKVersionString(uint32_t mvkVersion) { /** Returns whether the specified positive value is a power-of-two. */ template static constexpr bool mvkIsPowerOfTwo(T value) { - return value && ((value & (value - 1)) == 0); + return value > 0 && ((value & (value - 1)) == 0); } /** @@ -275,21 +281,21 @@ void mvkFlipVertically(void* rowMajorData, uint32_t rowCount, size_t bytesPerRow * They are ridiculously large numbers, but low enough to be safely used as both * uint and int values without risking overflowing between positive and negative values. */ -static int32_t kMVKUndefinedLargePositiveInt32 = mvkEnsurePowerOfTwo(std::numeric_limits::max() / 2); -static int32_t kMVKUndefinedLargeNegativeInt32 = -kMVKUndefinedLargePositiveInt32; -static uint32_t kMVKUndefinedLargeUInt32 = kMVKUndefinedLargePositiveInt32; -static int64_t kMVKUndefinedLargePositiveInt64 = mvkEnsurePowerOfTwo(std::numeric_limits::max() / 2); -static int64_t kMVKUndefinedLargeNegativeInt64 = -kMVKUndefinedLargePositiveInt64; -static uint64_t kMVKUndefinedLargeUInt64 = kMVKUndefinedLargePositiveInt64; +static constexpr int32_t kMVKUndefinedLargePositiveInt32 = mvkEnsurePowerOfTwo(std::numeric_limits::max() / 2); +static constexpr int32_t kMVKUndefinedLargeNegativeInt32 = -kMVKUndefinedLargePositiveInt32; +static constexpr uint32_t kMVKUndefinedLargeUInt32 = kMVKUndefinedLargePositiveInt32; +static constexpr int64_t kMVKUndefinedLargePositiveInt64 = mvkEnsurePowerOfTwo(std::numeric_limits::max() / 2); +static constexpr int64_t kMVKUndefinedLargeNegativeInt64 = -kMVKUndefinedLargePositiveInt64; +static constexpr uint64_t kMVKUndefinedLargeUInt64 = kMVKUndefinedLargePositiveInt64; #pragma mark Vulkan structure support functions /** Returns a VkExtent2D created from the width and height of a VkExtent3D. */ -static inline VkExtent2D mvkVkExtent2DFromVkExtent3D(VkExtent3D e) { return {e.width, e.height }; } +static constexpr VkExtent2D mvkVkExtent2DFromVkExtent3D(VkExtent3D e) { return {e.width, e.height }; } /** Returns a VkExtent3D, created from a VkExtent2D, and with depth of 1. */ -static inline VkExtent3D mvkVkExtent3DFromVkExtent2D(VkExtent2D e) { return {e.width, e.height, 1U }; } +static constexpr VkExtent3D mvkVkExtent3DFromVkExtent2D(VkExtent2D e) { return {e.width, e.height, 1U }; } /** Returns whether the two Vulkan extents are equal by comparing their respective components. */ static constexpr bool mvkVkExtent2DsAreEqual(VkExtent2D e1, VkExtent2D e2) { @@ -330,13 +336,13 @@ static constexpr uint32_t mvkPackSwizzle(VkComponentMapping components) { } /** Unpacks a single 32-bit word containing four swizzle components. */ -static inline VkComponentMapping mvkUnpackSwizzle(uint32_t packed) { - VkComponentMapping components; - components.r = (VkComponentSwizzle)((packed >> 0) & 0xFF); - components.g = (VkComponentSwizzle)((packed >> 8) & 0xFF); - components.b = (VkComponentSwizzle)((packed >> 16) & 0xFF); - components.a = (VkComponentSwizzle)((packed >> 24) & 0xFF); - return components; +static constexpr VkComponentMapping mvkUnpackSwizzle(uint32_t packed) { + return { + .r = (VkComponentSwizzle)((packed >> 0) & 0xFF), + .g = (VkComponentSwizzle)((packed >> 8) & 0xFF), + .b = (VkComponentSwizzle)((packed >> 16) & 0xFF), + .a = (VkComponentSwizzle)((packed >> 24) & 0xFF), + }; } /** @@ -350,8 +356,8 @@ static inline VkComponentMapping mvkUnpackSwizzle(uint32_t packed) { * and matches any value. */ static constexpr bool mvkVKComponentSwizzlesMatch(VkComponentSwizzle cs1, - VkComponentSwizzle cs2, - VkComponentSwizzle csPos) { + VkComponentSwizzle cs2, + VkComponentSwizzle csPos) { return ((cs1 == cs2) || ((cs1 == VK_COMPONENT_SWIZZLE_IDENTITY) && (cs2 == csPos)) || ((cs2 == VK_COMPONENT_SWIZZLE_IDENTITY) && (cs1 == csPos)) || @@ -381,24 +387,24 @@ static constexpr bool mvkVkComponentMappingsMatch(VkComponentMapping cm1, VkComp /** Rounds the value to nearest integer using half-to-even rounding. */ static inline double mvkRoundHalfToEven(const double val) { - return val - std::remainder(val, 1.0); // remainder() uses half-to-even rounding, and unfortunately isn't constexpr until C++23. + return val - std::remainder(val, 1.0); // remainder() uses half-to-even rounding, but unfortunately isn't constexpr until C++23. } /** Returns whether the value will fit inside the numeric type. */ template -const bool mvkFits(const Tval& val) { +static constexpr bool mvkFits(const Tval& val) { return val <= std::numeric_limits::max(); } /** Clamps the value between the lower and upper bounds, inclusive. */ template -const T& mvkClamp(const T& val, const T& lower, const T& upper) { +static constexpr const T& mvkClamp(const T& val, const T& lower, const T& upper) { return std::min(std::max(val, lower), upper); } /** Returns the result of a division, rounded up. */ template -constexpr typename std::common_type::type mvkCeilingDivide(T numerator, U denominator) { +static constexpr typename std::common_type::type mvkCeilingDivide(T numerator, U denominator) { typedef typename std::common_type::type R; // Short circuit very common usecase of dividing by one. return (denominator == 1) ? numerator : (R(numerator) + denominator - 1) / denominator; @@ -424,18 +430,18 @@ struct MVKAbs { /** Returns the absolute value of the difference of two numbers. */ template -constexpr typename std::common_type::type mvkAbsDiff(T x, U y) { +static constexpr typename std::common_type::type mvkAbsDiff(T x, U y) { return x >= y ? x - y : y - x; } /** Returns the greatest common divisor of two numbers. */ template -constexpr T mvkGreatestCommonDivisorImpl(T a, T b) { +static constexpr T mvkGreatestCommonDivisorImpl(T a, T b) { return b == 0 ? a : mvkGreatestCommonDivisorImpl(b, a % b); } template -constexpr typename std::common_type::type mvkGreatestCommonDivisor(T a, U b) { +static constexpr typename std::common_type::type mvkGreatestCommonDivisor(T a, U b) { typedef typename std::common_type::type R; typedef typename std::make_unsigned::type UI; return static_cast(mvkGreatestCommonDivisorImpl(static_cast(MVKAbs::eval(a)), static_cast(MVKAbs::eval(b)))); @@ -443,7 +449,7 @@ constexpr typename std::common_type::type mvkGreatestCommonDivisor(T a, U /** Returns the least common multiple of two numbers. */ template -constexpr typename std::common_type::type mvkLeastCommonMultiple(T a, U b) { +static constexpr typename std::common_type::type mvkLeastCommonMultiple(T a, U b) { typedef typename std::common_type::type R; return (a == 0 && b == 0) ? 0 : MVKAbs::eval(a) / mvkGreatestCommonDivisor(a, b) * MVKAbs::eval(b); } @@ -460,7 +466,7 @@ constexpr typename std::common_type::type mvkLeastCommonMultiple(T a, U b) * value returned by previous calls as the seed in subsequent calls. */ template -std::size_t mvkHash(const N* pVals, std::size_t count = 1, std::size_t seed = 5381) { +static constexpr std::size_t mvkHash(const N* pVals, std::size_t count = 1, std::size_t seed = 5381) { std::size_t hash = seed; for (std::size_t i = 0; i < count; i++) { hash = ((hash << 5) + hash) ^ pVals[i]; } return hash; @@ -475,25 +481,26 @@ std::size_t mvkHash(const N* pVals, std::size_t count = 1, std::size_t seed = 53 */ template struct MVKArrayRef { - Type* data; - const size_t size; +public: + constexpr Type* begin() const { return _data; } + constexpr Type* end() const { return &_data[_size]; } + constexpr Type* data() const { return _data; } + constexpr size_t size() const { return _size; } + constexpr size_t byteSize() const { return _size * sizeof(Type); } + constexpr Type& operator[]( const size_t i ) const { return _data[i]; } + constexpr MVKArrayRef() : MVKArrayRef(nullptr, 0) {} + constexpr MVKArrayRef(Type* d, size_t s) : _data(d), _size(s) {} + template , bool> = true> + constexpr MVKArrayRef(MVKArrayRef other) : _data(other.data()), _size(other.size()) {} - const Type* begin() const { return data; } - const Type* end() const { return &data[size]; } - const Type& operator[]( const size_t i ) const { return data[i]; } - Type& operator[]( const size_t i ) { return data[i]; } - MVKArrayRef& operator=(const MVKArrayRef& other) { - data = other.data; - *(size_t*)&size = other.size; - return *this; - } - MVKArrayRef() : MVKArrayRef(nullptr, 0) {} - MVKArrayRef(Type* d, size_t s) : data(d), size(s) {} +protected: + Type* _data; + size_t _size; }; /** Ensures the size of the specified container is at least the specified size. */ template -void mvkEnsureSize(C& container, S size) { +static void mvkEnsureSize(C& container, S size) { if (size > container.size()) { container.resize(size); } } @@ -502,7 +509,7 @@ void mvkEnsureSize(C& container, S size) { * each object, including freeing the object memory, and clearing the container. */ template -void mvkDestroyContainerContents(C& container) { +static void mvkDestroyContainerContents(C& container) { for (auto elem : container) { elem->destroy(); } container.clear(); } @@ -513,7 +520,7 @@ void mvkDestroyContainerContents(C& container) { */ #ifdef __OBJC__ template -void mvkReleaseContainerContents(C& container) { +static void mvkReleaseContainerContents(C& container) { for (auto elem : container) { [elem release]; } container.clear(); } @@ -521,14 +528,14 @@ void mvkReleaseContainerContents(C& container) { /** Returns whether the container contains an item equal to the value. */ template -bool mvkContains(C& container, const T& val) { +static constexpr bool mvkContains(C& container, const T& val) { for (const T& cVal : container) { if (cVal == val) { return true; } } return false; } /** Removes the first occurance of the specified value from the specified container. */ template -void mvkRemoveFirstOccurance(C& container, T val) { +static void mvkRemoveFirstOccurance(C& container, T val) { for (auto iter = container.begin(), end = container.end(); iter != end; iter++) { if( *iter == val ) { container.erase(iter); @@ -539,7 +546,7 @@ void mvkRemoveFirstOccurance(C& container, T val) { /** Removes all occurances of the specified value from the specified container. */ template -void mvkRemoveAllOccurances(C& container, T val) { +static void mvkRemoveAllOccurances(C& container, T val) { container.erase(std::remove(container.begin(), container.end(), val), container.end()); } @@ -548,7 +555,7 @@ void mvkRemoveAllOccurances(C& container, T val) { /** Selects and returns one of the values, based on the platform OS. */ template -const T& mvkSelectPlatformValue(const T& macOSVal, const T& iOSVal) { +static constexpr const T& mvkSelectPlatformValue(const T& macOSVal, const T& iOSVal) { #if MVK_IOS_OR_TVOS return iOSVal; #endif @@ -562,22 +569,29 @@ const T& mvkSelectPlatformValue(const T& macOSVal, const T& iOSVal) { * The optional count allows clearing multiple elements in an array. */ template -void mvkClear(T* pVal, size_t count = 1) { if (pVal) { memset(pVal, 0, sizeof(T) * count); } } +static void mvkClear(T* pDst, size_t count = 1) { + if ( !pDst ) { return; } // Bad pointer + if constexpr(std::is_arithmetic_v) { if (count == 1) { *pDst = static_cast(0); } } // Fast clear of a single primitive + memset(pDst, 0, sizeof(T) * count); // Memory clear of complex content or array +} /** * If pVal is not null, overrides the const declaration, and clears the memory occupied by *pVal * by writing zeros to all bytes. The optional count allows clearing multiple elements in an array. */ template -void mvkClear(const T* pVal, size_t count = 1) { mvkClear((T*)pVal, count); } +static void mvkClear(const T* pVal, size_t count = 1) { mvkClear((T*)pVal, count); } /** * If pSrc and pDst are both not null, copies the contents of the source value to the * destination value. The optional count allows copying of multiple elements in an array. */ template -void mvkCopy(T* pDst, const T* pSrc, size_t count = 1) { - if (pSrc && pDst) { memcpy(pDst, pSrc, sizeof(T) * count); } +static void mvkCopy(T* pDst, const T* pSrc, size_t count = 1) { + if ( !pDst || !pSrc ) { return; } // Bad pointers + if (pDst == pSrc) { return; } // Same object + if constexpr(std::is_arithmetic_v) { if (count == 1) { *pDst = *pSrc; } } // Fast copy of a single primitive + memcpy(pDst, pSrc, sizeof(T) * count); // Memory copy of complex content or array } /** @@ -585,8 +599,11 @@ void mvkCopy(T* pDst, const T* pSrc, size_t count = 1) { * otherwise returns false. The optional count allows comparing multiple elements in an array. */ template -bool mvkAreEqual(const T* pV1, const T* pV2, size_t count = 1) { - return (pV1 && pV2) ? (memcmp(pV1, pV2, sizeof(T) * count) == 0) : false; +static constexpr bool mvkAreEqual(const T* pV1, const T* pV2, size_t count = 1) { + if ( !pV2 || !pV2 ) { return false; } // Bad pointers + if (pV1 == pV2) { return true; } // Same object + if constexpr(std::is_arithmetic_v) { if (count == 1) { return *pV1 == *pV2; } } // Fast compare of a single primitive + return memcmp(pV1, pV2, sizeof(T) * count) == 0; // Memory compare of complex content or array } /** @@ -595,7 +612,7 @@ bool mvkAreEqual(const T* pV1, const T* pV2, size_t count = 1) { * which works on individual chars or char arrays, not strings. * Returns false if either string is null. */ -static constexpr bool mvkStringsAreEqual(const char* pV1, const char* pV2, size_t count = 1) { +static constexpr bool mvkStringsAreEqual(const char* pV1, const char* pV2) { return pV1 && pV2 && (pV1 == pV2 || strcmp(pV1, pV2) == 0); } @@ -628,10 +645,18 @@ static constexpr bool mvkSetOrClear(T* pDest, const T* pSrc) { template void mvkEnableFlags(Tv& value, const Tm bitMask) { value = (Tv)(value | bitMask); } +/** Enables all the flags (sets bits to 1) within the value parameter. */ +template +void mvkEnableAllFlags(Tv& value) { value = ~static_cast(0); } + /** Disables the flags (sets bits to 0) within the value parameter specified by the bitMask parameter. */ template void mvkDisableFlags(Tv& value, const Tm bitMask) { value = (Tv)(value & ~(Tv)bitMask); } +/** Enables all the flags (sets bits to 1) within the value parameter. */ +template +void mvkDisableAllFlags(Tv& value) { value = static_cast(0); } + /** Returns whether the specified value has ANY of the flags specified in bitMask enabled (set to 1). */ template static constexpr bool mvkIsAnyFlagEnabled(Tv value, const Tm bitMask) { return ((value & bitMask) != 0); } diff --git a/MoltenVK/MoltenVK/Utility/MVKLogging.h b/MoltenVK/MoltenVK/Utility/MVKLogging.h index c65127b0..09618500 100644 --- a/MoltenVK/MoltenVK/Utility/MVKLogging.h +++ b/MoltenVK/MoltenVK/Utility/MVKLogging.h @@ -57,9 +57,9 @@ extern "C" { * MVKLogErrorIf(cond, fmt, ...) - same as MVKLogError if boolean "cond" condition expression evaluates to YES, * otherwise logs nothing. * - * MVKLogWarning(fmt, ...) - recommended for not immediately harmful errors + * MVKLogWarn(fmt, ...) - recommended for not immediately harmful errors * - will print if MVK_LOG_LEVEL_WARNING is set on. - * MVKLogWarningIf(cond, fmt, ...) - same as MVKLogWarning if boolean "cond" condition expression evaluates to YES, + * MVKLogWarnIf(cond, fmt, ...) - same as MVKLogWarn if boolean "cond" condition expression evaluates to YES, * otherwise logs nothing. * * MVKLogInfo(fmt, ...) - recommended for general, infrequent, information messages @@ -67,7 +67,7 @@ extern "C" { * MVKLogInfoIf(cond, fmt, ...) - same as MVKLogInfo if boolean "cond" condition expression evaluates to YES, * otherwise logs nothing. * - * MVKLogDebug(fmt, ...) - recommended for temporary use during debugging + * MVKLogDebug(fmt, ...) - recommended for temporary use during debugging * - will print if MVK_LOG_LEVEL_DEBUG is set on. * MVKLogDebugIf(cond, fmt, ...) - same as MVKLogDebug if boolean "cond" condition expression evaluates to YES, * otherwise logs nothing. @@ -148,11 +148,11 @@ extern "C" { // Warning logging - for not immediately harmful errors #if MVK_LOG_LEVEL_WARNING -# define MVKLogWarning(fmt, ...) MVKLogWarningImpl(fmt, ##__VA_ARGS__) -# define MVKLogWarningIf(cond, fmt, ...) if(cond) { MVKLogWarningImpl(fmt, ##__VA_ARGS__); } +# define MVKLogWarn(fmt, ...) MVKLogWarnImpl(fmt, ##__VA_ARGS__) +# define MVKLogWarnIf(cond, fmt, ...) if(cond) { MVKLogWarnImpl(fmt, ##__VA_ARGS__); } #else -# define MVKLogWarning(...) -# define MVKLogWarningIf(cond, fmt, ...) +# define MVKLogWarn(...) +# define MVKLogWarnIf(cond, fmt, ...) #endif // Info logging - for general, non-performance affecting information messages @@ -182,11 +182,11 @@ extern "C" { # define MVKLogTraceIf(cond, fmt, ...) #endif -#define MVKLogErrorImpl(fmt, ...) reportMessage(MVK_CONFIG_LOG_LEVEL_ERROR, fmt, ##__VA_ARGS__) -#define MVKLogWarningImpl(fmt, ...) reportMessage(MVK_CONFIG_LOG_LEVEL_WARNING, fmt, ##__VA_ARGS__) -#define MVKLogInfoImpl(fmt, ...) reportMessage(MVK_CONFIG_LOG_LEVEL_INFO, fmt, ##__VA_ARGS__) -#define MVKLogDebugImpl(fmt, ...) reportMessage(MVK_CONFIG_LOG_LEVEL_DEBUG, fmt, ##__VA_ARGS__) -#define MVKLogTraceImpl(fmt, ...) reportMessage(MVK_CONFIG_LOG_LEVEL_DEBUG, fmt, ##__VA_ARGS__) +#define MVKLogErrorImpl(fmt, ...) reportMessage(MVK_CONFIG_LOG_LEVEL_ERROR, fmt, ##__VA_ARGS__) +#define MVKLogWarnImpl(fmt, ...) reportMessage(MVK_CONFIG_LOG_LEVEL_WARNING, fmt, ##__VA_ARGS__) +#define MVKLogInfoImpl(fmt, ...) reportMessage(MVK_CONFIG_LOG_LEVEL_INFO, fmt, ##__VA_ARGS__) +#define MVKLogDebugImpl(fmt, ...) reportMessage(MVK_CONFIG_LOG_LEVEL_DEBUG, fmt, ##__VA_ARGS__) +#define MVKLogTraceImpl(fmt, ...) reportMessage(MVK_CONFIG_LOG_LEVEL_DEBUG, fmt, ##__VA_ARGS__) // Assertions #ifdef NS_BLOCK_ASSERTIONS diff --git a/MoltenVK/MoltenVK/Utility/MVKSmallVector.h b/MoltenVK/MoltenVK/Utility/MVKSmallVector.h index 6294f913..b6e1277c 100755 --- a/MoltenVK/MoltenVK/Utility/MVKSmallVector.h +++ b/MoltenVK/MoltenVK/Utility/MVKSmallVector.h @@ -298,12 +298,12 @@ public: reverse_iterator rbegin() const { return reverse_iterator( end() ); } reverse_iterator rend() const { return reverse_iterator( begin() ); } - const MVKArrayRef contents() const { return MVKArrayRef(data(), size()); } - MVKArrayRef contents() { return MVKArrayRef(data(), size()); } + MVKArrayRef contents() const { return MVKArrayRef(data(), size()); } + MVKArrayRef< Type> contents() { return MVKArrayRef< Type>(data(), size()); } - const Type &operator[]( const size_t i ) const { return alc[i]; } + const Type &operator[]( const size_t i ) const { return alc[i]; } Type &operator[]( const size_t i ) { return alc[i]; } - const Type &at( const size_t i ) const { return alc[i]; } + const Type &at( const size_t i ) const { return alc[i]; } Type &at( const size_t i ) { return alc[i]; } const Type &front() const { return alc[0]; } Type &front() { return alc[0]; } diff --git a/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.hpp b/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.hpp index 12afed6e..9984ef6e 100644 --- a/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.hpp +++ b/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.hpp @@ -47,6 +47,12 @@ class MVKPixelFormats; * of an MVKBaseObject subclass, which is true for all but static calling functions. */ +MTLTextureType mvkMTLTextureTypeFromVkImageTypeObj(VkImageType vkImageType, uint32_t arraySize, bool isMultisample, MVKBaseObject* mvkObj); +#define mvkMTLTextureTypeFromVkImageType(vkImageType, arraySize, isMultisample) mvkMTLTextureTypeFromVkImageTypeObj(vkImageType, arraySize, isMultisample, this) + +MTLTextureType mvkMTLTextureTypeFromVkImageViewTypeObj(VkImageViewType vkImageViewType, bool isMultisample, MVKBaseObject* mvkObj); +#define mvkMTLTextureTypeFromVkImageViewType(vkImageViewType, isMultisample) mvkMTLTextureTypeFromVkImageViewTypeObj(vkImageViewType, isMultisample, this) + MTLPrimitiveType mvkMTLPrimitiveTypeFromVkPrimitiveTopologyInObj(VkPrimitiveTopology vkTopology, MVKBaseObject* mvkObj); #define mvkMTLPrimitiveTypeFromVkPrimitiveTopology(vkTopology) mvkMTLPrimitiveTypeFromVkPrimitiveTopologyInObj(vkTopology, this) diff --git a/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm b/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm index caa77623..0f997f50 100644 --- a/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm +++ b/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm @@ -32,119 +32,133 @@ using namespace std; #pragma mark Pixel formats -static MVKPixelFormats _platformPixelFormats; +static std::unique_ptr _platformPixelFormats; + +static MVKPixelFormats* getPlatformPixelFormats() { + if ( !_platformPixelFormats ) { _platformPixelFormats.reset(new MVKPixelFormats()); } + return _platformPixelFormats.get(); +} MVK_PUBLIC_SYMBOL bool mvkVkFormatIsSupported(VkFormat vkFormat) { - return _platformPixelFormats.isSupported(vkFormat); + return getPlatformPixelFormats()->isSupported(vkFormat); } MVK_PUBLIC_SYMBOL bool mvkMTLPixelFormatIsSupported(MTLPixelFormat mtlFormat) { - return _platformPixelFormats.isSupported(mtlFormat); + return getPlatformPixelFormats()->isSupported(mtlFormat); } MVK_PUBLIC_SYMBOL MVKFormatType mvkFormatTypeFromVkFormat(VkFormat vkFormat) { - return _platformPixelFormats.getFormatType(vkFormat); + return getPlatformPixelFormats()->getFormatType(vkFormat); } MVK_PUBLIC_SYMBOL MVKFormatType mvkFormatTypeFromMTLPixelFormat(MTLPixelFormat mtlFormat) { - return _platformPixelFormats.getFormatType(mtlFormat); + return getPlatformPixelFormats()->getFormatType(mtlFormat); } MVK_PUBLIC_SYMBOL MTLPixelFormat mvkMTLPixelFormatFromVkFormat(VkFormat vkFormat) { - return _platformPixelFormats.getMTLPixelFormat(vkFormat); + return getPlatformPixelFormats()->getMTLPixelFormat(vkFormat); } MVK_PUBLIC_SYMBOL VkFormat mvkVkFormatFromMTLPixelFormat(MTLPixelFormat mtlFormat) { - return _platformPixelFormats.getVkFormat(mtlFormat); + return getPlatformPixelFormats()->getVkFormat(mtlFormat); } MVK_PUBLIC_SYMBOL uint32_t mvkVkFormatBytesPerBlock(VkFormat vkFormat) { - return _platformPixelFormats.getBytesPerBlock(vkFormat); + return getPlatformPixelFormats()->getBytesPerBlock(vkFormat); } MVK_PUBLIC_SYMBOL uint32_t mvkMTLPixelFormatBytesPerBlock(MTLPixelFormat mtlFormat) { - return _platformPixelFormats.getBytesPerBlock(mtlFormat); + return getPlatformPixelFormats()->getBytesPerBlock(mtlFormat); } MVK_PUBLIC_SYMBOL VkExtent2D mvkVkFormatBlockTexelSize(VkFormat vkFormat) { - return _platformPixelFormats.getBlockTexelSize(vkFormat); + return getPlatformPixelFormats()->getBlockTexelSize(vkFormat); } MVK_PUBLIC_SYMBOL VkExtent2D mvkMTLPixelFormatBlockTexelSize(MTLPixelFormat mtlFormat) { - return _platformPixelFormats.getBlockTexelSize(mtlFormat); + return getPlatformPixelFormats()->getBlockTexelSize(mtlFormat); } MVK_PUBLIC_SYMBOL float mvkVkFormatBytesPerTexel(VkFormat vkFormat) { - return _platformPixelFormats.getBytesPerTexel(vkFormat); + return getPlatformPixelFormats()->getBytesPerTexel(vkFormat); } MVK_PUBLIC_SYMBOL float mvkMTLPixelFormatBytesPerTexel(MTLPixelFormat mtlFormat) { - return _platformPixelFormats.getBytesPerTexel(mtlFormat); + return getPlatformPixelFormats()->getBytesPerTexel(mtlFormat); } MVK_PUBLIC_SYMBOL size_t mvkVkFormatBytesPerRow(VkFormat vkFormat, uint32_t texelsPerRow) { - return _platformPixelFormats.getBytesPerRow(vkFormat, texelsPerRow); + return getPlatformPixelFormats()->getBytesPerRow(vkFormat, texelsPerRow); } MVK_PUBLIC_SYMBOL size_t mvkMTLPixelFormatBytesPerRow(MTLPixelFormat mtlFormat, uint32_t texelsPerRow) { - return _platformPixelFormats.getBytesPerRow(mtlFormat, texelsPerRow); + return getPlatformPixelFormats()->getBytesPerRow(mtlFormat, texelsPerRow); } MVK_PUBLIC_SYMBOL size_t mvkVkFormatBytesPerLayer(VkFormat vkFormat, size_t bytesPerRow, uint32_t texelRowsPerLayer) { - return _platformPixelFormats.getBytesPerLayer(vkFormat, bytesPerRow, texelRowsPerLayer); + return getPlatformPixelFormats()->getBytesPerLayer(vkFormat, bytesPerRow, texelRowsPerLayer); } MVK_PUBLIC_SYMBOL size_t mvkMTLPixelFormatBytesPerLayer(MTLPixelFormat mtlFormat, size_t bytesPerRow, uint32_t texelRowsPerLayer) { - return _platformPixelFormats.getBytesPerLayer(mtlFormat, bytesPerRow, texelRowsPerLayer); + return getPlatformPixelFormats()->getBytesPerLayer(mtlFormat, bytesPerRow, texelRowsPerLayer); } MVK_PUBLIC_SYMBOL VkFormatProperties mvkVkFormatProperties(VkFormat vkFormat) { - return _platformPixelFormats.getVkFormatProperties(vkFormat); + return getPlatformPixelFormats()->getVkFormatProperties(vkFormat); } MVK_PUBLIC_SYMBOL const char* mvkVkFormatName(VkFormat vkFormat) { - return _platformPixelFormats.getName(vkFormat); + return getPlatformPixelFormats()->getName(vkFormat); } MVK_PUBLIC_SYMBOL const char* mvkMTLPixelFormatName(MTLPixelFormat mtlFormat) { - return _platformPixelFormats.getName(mtlFormat); + return getPlatformPixelFormats()->getName(mtlFormat); } MVK_PUBLIC_SYMBOL MTLVertexFormat mvkMTLVertexFormatFromVkFormat(VkFormat vkFormat) { - return _platformPixelFormats.getMTLVertexFormat(vkFormat); + return getPlatformPixelFormats()->getMTLVertexFormat(vkFormat); } MVK_PUBLIC_SYMBOL MTLClearColor mvkMTLClearColorFromVkClearValue(VkClearValue vkClearValue, VkFormat vkFormat) { - return _platformPixelFormats.getMTLClearColor(vkClearValue, vkFormat); + return getPlatformPixelFormats()->getMTLClearColor(vkClearValue, vkFormat); } MVK_PUBLIC_SYMBOL double mvkMTLClearDepthFromVkClearValue(VkClearValue vkClearValue) { - return _platformPixelFormats.getMTLClearDepthValue(vkClearValue); + return getPlatformPixelFormats()->getMTLClearDepthValue(vkClearValue); } MVK_PUBLIC_SYMBOL uint32_t mvkMTLClearStencilFromVkClearValue(VkClearValue vkClearValue) { - return _platformPixelFormats.getMTLClearStencilValue(vkClearValue); + return getPlatformPixelFormats()->getMTLClearStencilValue(vkClearValue); } MVK_PUBLIC_SYMBOL bool mvkMTLPixelFormatIsDepthFormat(MTLPixelFormat mtlFormat) { - return _platformPixelFormats.isDepthFormat(mtlFormat); + return getPlatformPixelFormats()->isDepthFormat(mtlFormat); } MVK_PUBLIC_SYMBOL bool mvkMTLPixelFormatIsStencilFormat(MTLPixelFormat mtlFormat) { - return _platformPixelFormats.isStencilFormat(mtlFormat); + return getPlatformPixelFormats()->isStencilFormat(mtlFormat); } MVK_PUBLIC_SYMBOL bool mvkMTLPixelFormatIsPVRTCFormat(MTLPixelFormat mtlFormat) { - return _platformPixelFormats.isPVRTCFormat(mtlFormat); + return getPlatformPixelFormats()->isPVRTCFormat(mtlFormat); } + +#undef mvkMTLTextureTypeFromVkImageType MVK_PUBLIC_SYMBOL MTLTextureType mvkMTLTextureTypeFromVkImageType(VkImageType vkImageType, uint32_t arraySize, bool isMultisample) { + return mvkMTLTextureTypeFromVkImageTypeObj(vkImageType, arraySize, isMultisample, nullptr); +} + +MTLTextureType mvkMTLTextureTypeFromVkImageTypeObj(VkImageType vkImageType, + uint32_t arraySize, + bool isMultisample, + MVKBaseObject* mvkObj) { switch (vkImageType) { case VK_IMAGE_TYPE_3D: return MTLTextureType3D; - case VK_IMAGE_TYPE_1D: return (mvkConfig().texture1DAs2D + case VK_IMAGE_TYPE_1D: return (mvkGetMVKConfig(mvkObj).texture1DAs2D ? mvkMTLTextureTypeFromVkImageType(VK_IMAGE_TYPE_2D, arraySize, isMultisample) : (arraySize > 1 ? MTLTextureType1DArray : MTLTextureType1D)); case VK_IMAGE_TYPE_2D: @@ -170,14 +184,22 @@ MVK_PUBLIC_SYMBOL VkImageType mvkVkImageTypeFromMTLTextureType(MTLTextureType mt return VK_IMAGE_TYPE_2D; } } + +#undef mvkMTLTextureTypeFromVkImageViewType MVK_PUBLIC_SYMBOL MTLTextureType mvkMTLTextureTypeFromVkImageViewType(VkImageViewType vkImageViewType, bool isMultisample) { + return mvkMTLTextureTypeFromVkImageViewTypeObj(vkImageViewType, isMultisample, nullptr); +} + +MTLTextureType mvkMTLTextureTypeFromVkImageViewTypeObj(VkImageViewType vkImageViewType, + bool isMultisample, + MVKBaseObject* mvkObj) { switch (vkImageViewType) { case VK_IMAGE_VIEW_TYPE_3D: return MTLTextureType3D; case VK_IMAGE_VIEW_TYPE_CUBE: return MTLTextureTypeCube; case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY: return MTLTextureTypeCubeArray; - case VK_IMAGE_VIEW_TYPE_1D: return mvkConfig().texture1DAs2D ? mvkMTLTextureTypeFromVkImageViewType(VK_IMAGE_VIEW_TYPE_2D, isMultisample) : MTLTextureType1D; - case VK_IMAGE_VIEW_TYPE_1D_ARRAY: return mvkConfig().texture1DAs2D ? mvkMTLTextureTypeFromVkImageViewType(VK_IMAGE_VIEW_TYPE_2D_ARRAY, isMultisample) : MTLTextureType1DArray; + case VK_IMAGE_VIEW_TYPE_1D: return mvkGetMVKConfig(mvkObj).texture1DAs2D ? mvkMTLTextureTypeFromVkImageViewType(VK_IMAGE_VIEW_TYPE_2D, isMultisample) : MTLTextureType1D; + case VK_IMAGE_VIEW_TYPE_1D_ARRAY: return mvkGetMVKConfig(mvkObj).texture1DAs2D ? mvkMTLTextureTypeFromVkImageViewType(VK_IMAGE_VIEW_TYPE_2D_ARRAY, isMultisample) : MTLTextureType1DArray; case VK_IMAGE_VIEW_TYPE_2D_ARRAY: #if MVK_MACOS @@ -192,11 +214,11 @@ MVK_PUBLIC_SYMBOL MTLTextureType mvkMTLTextureTypeFromVkImageViewType(VkImageVie } MVK_PUBLIC_SYMBOL MTLTextureUsage mvkMTLTextureUsageFromVkImageUsageFlags(VkImageUsageFlags vkImageUsageFlags, MTLPixelFormat mtlPixFmt) { - return _platformPixelFormats.getMTLTextureUsage(vkImageUsageFlags, mtlPixFmt); + return getPlatformPixelFormats()->getMTLTextureUsage(vkImageUsageFlags, mtlPixFmt); } MVK_PUBLIC_SYMBOL VkImageUsageFlags mvkVkImageUsageFlagsFromMTLTextureUsage(MTLTextureUsage mtlUsage, MTLPixelFormat mtlFormat) { - return _platformPixelFormats.getVkImageUsageFlags(mtlUsage, mtlFormat); + return getPlatformPixelFormats()->getVkImageUsageFlags(mtlUsage, mtlFormat); } MVK_PUBLIC_SYMBOL uint32_t mvkSampleCountFromVkSampleCountFlagBits(VkSampleCountFlagBits vkSampleCountFlag) { @@ -584,23 +606,32 @@ MTLMultisampleStencilResolveFilter mvkMTLMultisampleStencilResolveFilterFromVkRe #endif MVK_PUBLIC_SYMBOL MTLViewport mvkMTLViewportFromVkViewport(VkViewport vkViewport) { - MTLViewport mtlViewport; - mtlViewport.originX = vkViewport.x; - mtlViewport.originY = vkViewport.y; - mtlViewport.width = vkViewport.width; - mtlViewport.height = vkViewport.height; - mtlViewport.znear = vkViewport.minDepth; - mtlViewport.zfar = vkViewport.maxDepth; - return mtlViewport; + return { + .originX = vkViewport.x, + .originY = vkViewport.y, + .width = vkViewport.width, + .height = vkViewport.height, + .znear = vkViewport.minDepth, + .zfar = vkViewport.maxDepth + }; } MVK_PUBLIC_SYMBOL MTLScissorRect mvkMTLScissorRectFromVkRect2D(VkRect2D vkRect) { - MTLScissorRect mtlScissor; - mtlScissor.x = vkRect.offset.x; - mtlScissor.y = vkRect.offset.y; - mtlScissor.width = vkRect.extent.width; - mtlScissor.height = vkRect.extent.height; - return mtlScissor; + return { + .x = (NSUInteger)max(vkRect.offset.x, 0), + .y = (NSUInteger)max(vkRect.offset.y, 0), + .width = vkRect.extent.width, + .height = vkRect.extent.height + }; +} + +MVK_PUBLIC_SYMBOL VkRect2D mvkVkRect2DFromMTLScissorRect(MTLScissorRect mtlScissorRect) { + return { + .offset = { .x = (int32_t)mtlScissorRect.x, + .y = (int32_t)mtlScissorRect.y }, + .extent = { .width = (uint32_t)mtlScissorRect.width, + .height = (uint32_t)mtlScissorRect.height } + }; } MVK_PUBLIC_SYMBOL MTLCompareFunction mvkMTLCompareFunctionFromVkCompareOp(VkCompareOp vkOp) { @@ -728,40 +759,50 @@ MTLTessellationPartitionMode mvkMTLTessellationPartitionModeFromSpvExecutionMode } } -MVK_PUBLIC_SYMBOL MTLRenderStages mvkMTLRenderStagesFromVkPipelineStageFlags(VkPipelineStageFlags vkStages, +MVK_PUBLIC_SYMBOL MTLRenderStages mvkMTLRenderStagesFromVkPipelineStageFlags(VkPipelineStageFlags2 vkStages, bool placeBarrierBefore) { // Although there are many combined render/compute/host stages in Vulkan, there are only two render // stages in Metal. If the Vulkan stage did not map ONLY to a specific Metal render stage, then if the // barrier is to be placed before the render stages, it should come before the vertex stage, otherwise // if the barrier is to be placed after the render stages, it should come after the fragment stage. if (placeBarrierBefore) { - bool placeBeforeFragment = mvkIsOnlyAnyFlagEnabled(vkStages, (VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | - VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | - VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | - VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | - VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)); + bool placeBeforeFragment = mvkIsOnlyAnyFlagEnabled(vkStages, (VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT | + VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT)); return placeBeforeFragment ? MTLRenderStageFragment : MTLRenderStageVertex; } else { - bool placeAfterVertex = mvkIsOnlyAnyFlagEnabled(vkStages, (VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT | - VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | - VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | - VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | - VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT | - VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT)); + bool placeAfterVertex = mvkIsOnlyAnyFlagEnabled(vkStages, (VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT | + VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT | + VK_PIPELINE_STAGE_2_VERTEX_INPUT_BIT | + VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_2_TESSELLATION_CONTROL_SHADER_BIT | + VK_PIPELINE_STAGE_2_TESSELLATION_EVALUATION_SHADER_BIT)); return placeAfterVertex ? MTLRenderStageVertex : MTLRenderStageFragment; } } -MVK_PUBLIC_SYMBOL MTLBarrierScope mvkMTLBarrierScopeFromVkAccessFlags(VkAccessFlags vkAccess) { +MVK_PUBLIC_SYMBOL MTLBarrierScope mvkMTLBarrierScopeFromVkAccessFlags(VkAccessFlags2 vkAccess) { MTLBarrierScope mtlScope = MTLBarrierScope(0); - if ( mvkIsAnyFlagEnabled(vkAccess, VK_ACCESS_INDIRECT_COMMAND_READ_BIT | VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT) ) { + if ( mvkIsAnyFlagEnabled(vkAccess, (VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT | + VK_ACCESS_2_INDEX_READ_BIT | + VK_ACCESS_2_VERTEX_ATTRIBUTE_READ_BIT | + VK_ACCESS_2_UNIFORM_READ_BIT)) ) { mtlScope |= MTLBarrierScopeBuffers; } - if ( mvkIsAnyFlagEnabled(vkAccess, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT) ) { + if ( mvkIsAnyFlagEnabled(vkAccess, (VK_ACCESS_2_SHADER_READ_BIT | + VK_ACCESS_2_SHADER_WRITE_BIT | + VK_ACCESS_2_MEMORY_READ_BIT | + VK_ACCESS_2_MEMORY_WRITE_BIT)) ) { mtlScope |= MTLBarrierScopeBuffers | MTLBarrierScopeTextures; } #if MVK_MACOS - if ( mvkIsAnyFlagEnabled(vkAccess, VK_ACCESS_INPUT_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT) ) { + if ( mvkIsAnyFlagEnabled(vkAccess, (VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT | + VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_2_MEMORY_READ_BIT | + VK_ACCESS_2_MEMORY_WRITE_BIT)) ) { mtlScope |= MTLBarrierScopeRenderTargets; } #endif diff --git a/MoltenVK/MoltenVK/Vulkan/vulkan.mm b/MoltenVK/MoltenVK/Vulkan/vulkan.mm index 44b0e5f6..293826a3 100644 --- a/MoltenVK/MoltenVK/Vulkan/vulkan.mm +++ b/MoltenVK/MoltenVK/Vulkan/vulkan.mm @@ -1466,7 +1466,6 @@ MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetLineWidth( float lineWidth) { MVKTraceVulkanCallStart(); - MVKAddCmd(SetLineWidth, commandBuffer, lineWidth); MVKTraceVulkanCallEnd(); } @@ -1496,7 +1495,6 @@ MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetDepthBounds( float maxDepthBounds) { MVKTraceVulkanCallStart(); - MVKAddCmd(SetDepthBounds, commandBuffer, minDepthBounds, maxDepthBounds); MVKTraceVulkanCallEnd(); } @@ -1564,13 +1562,14 @@ MVK_PUBLIC_VULKAN_SYMBOL void vkCmdBindIndexBuffer( MVK_PUBLIC_VULKAN_SYMBOL void vkCmdBindVertexBuffers( VkCommandBuffer commandBuffer, - uint32_t startBinding, + uint32_t firstBinding, uint32_t bindingCount, const VkBuffer* pBuffers, const VkDeviceSize* pOffsets) { MVKTraceVulkanCallStart(); - MVKAddCmdFrom2Thresholds(BindVertexBuffers, bindingCount, 1, 2, commandBuffer, startBinding, bindingCount, pBuffers, pOffsets); + MVKAddCmdFrom2Thresholds(BindVertexBuffers, bindingCount, 1, 2, commandBuffer, + firstBinding, bindingCount, pBuffers, pOffsets, nullptr, nullptr); MVKTraceVulkanCallEnd(); } @@ -1964,7 +1963,7 @@ static void mvkCmdBeginRenderPass( MVKAddCmdFrom5Thresholds(BeginRenderPass, pRenderPassBegin->clearValueCount, 1, 2, - attachments.size, 0, 1, 2, + attachments.size(), 0, 1, 2, commandBuffer, pRenderPassBegin, pSubpassBeginInfo, @@ -2517,8 +2516,8 @@ MVK_PUBLIC_VULKAN_SYMBOL VkResult vkWaitSemaphores( #pragma mark Vulkan 1.3 calls MVK_PUBLIC_VULKAN_SYMBOL void vkCmdBeginRendering( - VkCommandBuffer commandBuffer, - const VkRenderingInfo* pRenderingInfo) { + VkCommandBuffer commandBuffer, + const VkRenderingInfo* pRenderingInfo) { MVKTraceVulkanCallStart(); MVKAddCmdFrom3Thresholds(BeginRendering, pRenderingInfo->colorAttachmentCount, @@ -2526,98 +2525,340 @@ MVK_PUBLIC_VULKAN_SYMBOL void vkCmdBeginRendering( MVKTraceVulkanCallEnd(); } -MVK_PUBLIC_VULKAN_SYMBOL void vkCmdEndRendering( - VkCommandBuffer commandBuffer) { +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdBindVertexBuffers2( + VkCommandBuffer commandBuffer, + uint32_t firstBinding, + uint32_t bindingCount, + const VkBuffer* pBuffers, + const VkDeviceSize* pOffsets, + const VkDeviceSize* pSizes, + const VkDeviceSize* pStrides) { MVKTraceVulkanCallStart(); - MVKAddCmd(EndRendering, commandBuffer); + MVKAddCmdFrom2Thresholds(BindVertexBuffers, bindingCount, 1, 2, commandBuffer, + firstBinding, bindingCount, pBuffers, pOffsets, pSizes, pStrides); MVKTraceVulkanCallEnd(); } -MVK_PUBLIC_VULKAN_STUB(vkCmdBindVertexBuffers2, void, VkCommandBuffer, uint32_t, uint32_t, const VkBuffer*, const VkDeviceSize*, const VkDeviceSize*, const VkDeviceSize*) - MVK_PUBLIC_VULKAN_SYMBOL void vkCmdBlitImage2( - VkCommandBuffer commandBuffer, - const VkBlitImageInfo2* pBlitImageInfo) { - MVKTraceVulkanCallStart(); + VkCommandBuffer commandBuffer, + const VkBlitImageInfo2* pBlitImageInfo) { + + MVKTraceVulkanCallStart(); MVKAddCmdFromThreshold(BlitImage, pBlitImageInfo->regionCount, 1, commandBuffer, pBlitImageInfo); MVKTraceVulkanCallEnd(); } MVK_PUBLIC_VULKAN_SYMBOL void vkCmdCopyBuffer2( - VkCommandBuffer commandBuffer, - const VkCopyBufferInfo2* pCopyBufferInfo) { - MVKTraceVulkanCallStart(); + VkCommandBuffer commandBuffer, + const VkCopyBufferInfo2* pCopyBufferInfo) { + + MVKTraceVulkanCallStart(); MVKAddCmdFromThreshold(CopyBuffer, pCopyBufferInfo->regionCount, 1, commandBuffer, pCopyBufferInfo); MVKTraceVulkanCallEnd(); } MVK_PUBLIC_VULKAN_SYMBOL void vkCmdCopyBufferToImage2( - VkCommandBuffer commandBuffer, - const VkCopyBufferToImageInfo2* pCopyBufferToImageInfo) { - MVKTraceVulkanCallStart(); + VkCommandBuffer commandBuffer, + const VkCopyBufferToImageInfo2* pCopyBufferToImageInfo) { + + MVKTraceVulkanCallStart(); MVKAddCmdFrom3Thresholds(BufferImageCopy, pCopyBufferToImageInfo->regionCount, 1, 4, 8, commandBuffer, pCopyBufferToImageInfo); MVKTraceVulkanCallEnd(); } MVK_PUBLIC_VULKAN_SYMBOL void vkCmdCopyImage2( - VkCommandBuffer commandBuffer, - const VkCopyImageInfo2* pCopyImageInfo) { - MVKTraceVulkanCallStart(); + VkCommandBuffer commandBuffer, + const VkCopyImageInfo2* pCopyImageInfo) { + + MVKTraceVulkanCallStart(); MVKAddCmdFromThreshold(CopyImage, pCopyImageInfo->regionCount, 1, commandBuffer, pCopyImageInfo); MVKTraceVulkanCallEnd(); } MVK_PUBLIC_VULKAN_SYMBOL void vkCmdCopyImageToBuffer2( - VkCommandBuffer commandBuffer, - const VkCopyImageToBufferInfo2* pCopyImageInfo) { - MVKTraceVulkanCallStart(); + VkCommandBuffer commandBuffer, + const VkCopyImageToBufferInfo2* pCopyImageInfo) { + + MVKTraceVulkanCallStart(); MVKAddCmdFrom3Thresholds(BufferImageCopy, pCopyImageInfo->regionCount, 1, 4, 8, commandBuffer, pCopyImageInfo); MVKTraceVulkanCallEnd(); } -MVK_PUBLIC_VULKAN_STUB(vkCmdPipelineBarrier2, void, VkCommandBuffer, const VkDependencyInfo*) -MVK_PUBLIC_VULKAN_STUB(vkCmdResetEvent2, void, VkCommandBuffer, VkEvent, VkPipelineStageFlags2 stageMask) +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdEndRendering( + VkCommandBuffer commandBuffer) { + + MVKTraceVulkanCallStart(); + MVKAddCmd(EndRendering, commandBuffer); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdPipelineBarrier2( + VkCommandBuffer commandBuffer, + const VkDependencyInfo* pDependencyInfo) { + + MVKTraceVulkanCallStart(); + uint32_t barrierCount = pDependencyInfo->memoryBarrierCount + pDependencyInfo->bufferMemoryBarrierCount + pDependencyInfo->imageMemoryBarrierCount; + MVKAddCmdFrom2Thresholds(PipelineBarrier, barrierCount, 1, 4, commandBuffer, pDependencyInfo); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdResetEvent2( + VkCommandBuffer commandBuffer, + VkEvent event, + VkPipelineStageFlags2 stageMask) { + + MVKTraceVulkanCallStart(); + MVKAddCmd(ResetEvent, commandBuffer, event, stageMask); + MVKTraceVulkanCallEnd(); +} MVK_PUBLIC_VULKAN_SYMBOL void vkCmdResolveImage2( - VkCommandBuffer commandBuffer, - const VkResolveImageInfo2* pResolveImageInfo) { - MVKTraceVulkanCallStart(); + VkCommandBuffer commandBuffer, + const VkResolveImageInfo2* pResolveImageInfo) { + + MVKTraceVulkanCallStart(); MVKAddCmdFromThreshold(ResolveImage, pResolveImageInfo->regionCount, 1, commandBuffer, pResolveImageInfo); MVKTraceVulkanCallEnd(); } -MVK_PUBLIC_VULKAN_STUB(vkCmdSetCullMode, void, VkCommandBuffer, VkCullModeFlags) -MVK_PUBLIC_VULKAN_STUB(vkCmdSetDepthBiasEnable, void, VkCommandBuffer, VkBool32) -MVK_PUBLIC_VULKAN_STUB(vkCmdSetDepthBoundsTestEnable, void, VkCommandBuffer, VkBool32) -MVK_PUBLIC_VULKAN_STUB(vkCmdSetDepthCompareOp, void, VkCommandBuffer, VkCompareOp) -MVK_PUBLIC_VULKAN_STUB(vkCmdSetDepthTestEnable, void, VkCommandBuffer, VkBool32) -MVK_PUBLIC_VULKAN_STUB(vkCmdSetDepthWriteEnable, void, VkCommandBuffer, VkBool32) -MVK_PUBLIC_VULKAN_STUB(vkCmdSetEvent2, void, VkCommandBuffer, VkEvent, const VkDependencyInfo*) -MVK_PUBLIC_VULKAN_STUB(vkCmdSetFrontFace, void, VkCommandBuffer, VkFrontFace) -MVK_PUBLIC_VULKAN_STUB(vkCmdSetPrimitiveRestartEnable, void, VkCommandBuffer, VkBool32) -MVK_PUBLIC_VULKAN_STUB(vkCmdSetPrimitiveTopology, void, VkCommandBuffer, VkPrimitiveTopology) -MVK_PUBLIC_VULKAN_STUB(vkCmdSetRasterizerDiscardEnable, void, VkCommandBuffer, VkBool32) -MVK_PUBLIC_VULKAN_STUB(vkCmdSetScissorWithCount, void, VkCommandBuffer, uint32_t, const VkRect2D*) -MVK_PUBLIC_VULKAN_STUB(vkCmdSetStencilOp, void, VkCommandBuffer, VkStencilFaceFlags, VkStencilOp, VkStencilOp, VkStencilOp, VkCompareOp) -MVK_PUBLIC_VULKAN_STUB(vkCmdSetStencilTestEnable, void, VkCommandBuffer, VkBool32) -MVK_PUBLIC_VULKAN_STUB(vkCmdSetViewportWithCount, void, VkCommandBuffer, uint32_t, const VkViewport*) -MVK_PUBLIC_VULKAN_STUB(vkCmdWaitEvents2, void, VkCommandBuffer, uint32_t, const VkEvent*, const VkDependencyInfo*) -MVK_PUBLIC_VULKAN_STUB(vkCmdWriteTimestamp2, void, VkCommandBuffer, VkPipelineStageFlags2, VkQueryPool, uint32_t) -MVK_PUBLIC_VULKAN_STUB_VKRESULT(vkCreatePrivateDataSlot, VkDevice, const VkPrivateDataSlotCreateInfo*, const VkAllocationCallbacks*, VkPrivateDataSlot*) -MVK_PUBLIC_VULKAN_STUB(vkDestroyPrivateDataSlot, void, VkDevice, VkPrivateDataSlot, const VkAllocationCallbacks*) +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetCullMode( + VkCommandBuffer commandBuffer, + VkCullModeFlags cullMode) { + + MVKTraceVulkanCallStart(); + MVKAddCmd(SetCullMode, commandBuffer, cullMode); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetDepthBiasEnable( + VkCommandBuffer commandBuffer, + VkBool32 depthBiasEnable) { + + MVKTraceVulkanCallStart(); + MVKAddCmd(SetDepthBiasEnable, commandBuffer, depthBiasEnable); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetDepthBoundsTestEnable( + VkCommandBuffer commandBuffer, + VkBool32 depthBoundsTestEnable) { + + MVKTraceVulkanCallStart(); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetDepthCompareOp( + VkCommandBuffer commandBuffer, + VkCompareOp depthCompareOp) { + + MVKTraceVulkanCallStart(); + MVKAddCmd(SetDepthCompareOp, commandBuffer, depthCompareOp); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetDepthTestEnable( + VkCommandBuffer commandBuffer, + VkBool32 depthTestEnable) { + + MVKTraceVulkanCallStart(); + MVKAddCmd(SetDepthTestEnable, commandBuffer, depthTestEnable); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetDepthWriteEnable( + VkCommandBuffer commandBuffer, + VkBool32 depthWriteEnable) { + + MVKTraceVulkanCallStart(); + MVKAddCmd(SetDepthWriteEnable, commandBuffer, depthWriteEnable); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetEvent2( + VkCommandBuffer commandBuffer, + VkEvent event, + const VkDependencyInfo* pDependencyInfo) { + + MVKTraceVulkanCallStart(); + MVKAddCmd(SetEvent, commandBuffer, event, pDependencyInfo); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetFrontFace( + VkCommandBuffer commandBuffer, + VkFrontFace frontFace) { + + MVKTraceVulkanCallStart(); + MVKAddCmd(SetFrontFace, commandBuffer, frontFace); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetPrimitiveRestartEnable( + VkCommandBuffer commandBuffer, + VkBool32 primitiveRestartEnable) { + + MVKTraceVulkanCallStart(); + MVKAddCmd(SetPrimitiveRestartEnable, commandBuffer, primitiveRestartEnable); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetPrimitiveTopology( + VkCommandBuffer commandBuffer, + VkPrimitiveTopology primitiveTopology) { + + MVKTraceVulkanCallStart(); + MVKAddCmd(SetPrimitiveTopology, commandBuffer, primitiveTopology); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetRasterizerDiscardEnable( + VkCommandBuffer commandBuffer, + VkBool32 rasterizerDiscardEnable) { + + MVKTraceVulkanCallStart(); + MVKAddCmd(SetRasterizerDiscardEnable, commandBuffer, rasterizerDiscardEnable); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetScissorWithCount( + VkCommandBuffer commandBuffer, + uint32_t scissorCount, + const VkRect2D* pScissors) { + + MVKTraceVulkanCallStart(); + MVKAddCmdFromThreshold(SetScissor, scissorCount, 1, commandBuffer, 0, scissorCount, pScissors); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetStencilOp( + VkCommandBuffer commandBuffer, + VkStencilFaceFlags faceMask, + VkStencilOp failOp, + VkStencilOp passOp, + VkStencilOp depthFailOp, + VkCompareOp compareOp) { + + MVKTraceVulkanCallStart(); + MVKAddCmd(SetStencilOp, commandBuffer, faceMask, failOp, passOp, depthFailOp, compareOp); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetStencilTestEnable( + VkCommandBuffer commandBuffer, + VkBool32 stencilTestEnable) { + + MVKTraceVulkanCallStart(); + MVKAddCmd(SetStencilTestEnable, commandBuffer, stencilTestEnable); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetViewportWithCount( + VkCommandBuffer commandBuffer, + uint32_t viewportCount, + const VkViewport* pViewports) { + + MVKTraceVulkanCallStart(); + MVKAddCmdFromThreshold(SetViewport, viewportCount, 1, commandBuffer, 0, viewportCount, pViewports); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdWaitEvents2( + VkCommandBuffer commandBuffer, + uint32_t eventCount, + const VkEvent* pEvents, + const VkDependencyInfo* pDependencyInfos) { + + MVKTraceVulkanCallStart(); + MVKAddCmdFromThreshold(WaitEvents, eventCount, 1, commandBuffer, eventCount, pEvents, pDependencyInfos); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdWriteTimestamp2( + VkCommandBuffer commandBuffer, + VkPipelineStageFlags2 stage, + VkQueryPool queryPool, + uint32_t query) { + + MVKTraceVulkanCallStart(); + MVKAddCmd(WriteTimestamp, commandBuffer, stage, queryPool, query); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL VkResult vkCreatePrivateDataSlot( + VkDevice device, + const VkPrivateDataSlotCreateInfoEXT* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkPrivateDataSlotEXT* pPrivateDataSlot) { + + MVKTraceVulkanCallStart(); + MVKDevice* mvkDev = MVKDevice::getMVKDevice(device); + VkResult rslt = mvkDev->createPrivateDataSlot(pCreateInfo, pAllocator, pPrivateDataSlot); + MVKTraceVulkanCallEnd(); + return rslt; +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkDestroyPrivateDataSlot( + VkDevice device, + VkPrivateDataSlotEXT privateDataSlot, + const VkAllocationCallbacks* pAllocator) { + + MVKTraceVulkanCallStart(); + MVKDevice* mvkDev = MVKDevice::getMVKDevice(device); + mvkDev->destroyPrivateDataSlot(privateDataSlot, pAllocator); + MVKTraceVulkanCallEnd(); +} + MVK_PUBLIC_VULKAN_STUB(vkGetDeviceBufferMemoryRequirements, void, VkDevice, const VkDeviceBufferMemoryRequirements*, VkMemoryRequirements2*) MVK_PUBLIC_VULKAN_STUB(vkGetDeviceImageMemoryRequirements, void, VkDevice, const VkDeviceImageMemoryRequirements*, VkMemoryRequirements2*) MVK_PUBLIC_VULKAN_STUB(vkGetDeviceImageSparseMemoryRequirements, void, VkDevice, const VkDeviceImageMemoryRequirements*, uint32_t*, VkSparseImageMemoryRequirements2*) MVK_PUBLIC_VULKAN_STUB_VKRESULT(vkGetPhysicalDeviceToolProperties, VkPhysicalDevice, uint32_t*, VkPhysicalDeviceToolProperties*) -MVK_PUBLIC_VULKAN_STUB(vkGetPrivateData, void, VkDevice, VkObjectType, uint64_t, VkPrivateDataSlot, uint64_t*) -MVK_PUBLIC_VULKAN_STUB_VKRESULT(vkQueueSubmit2, VkQueue, uint32_t, const VkSubmitInfo2*, VkFence) -MVK_PUBLIC_VULKAN_STUB_VKRESULT(vkSetPrivateData, VkDevice, VkObjectType, uint64_t, VkPrivateDataSlot, uint64_t) + +MVK_PUBLIC_VULKAN_SYMBOL void vkGetPrivateData( + VkDevice device, + VkObjectType objectType, + uint64_t objectHandle, + VkPrivateDataSlotEXT privateDataSlot, + uint64_t* pData) { + + MVKTraceVulkanCallStart(); + MVKPrivateDataSlot* mvkPDS = (MVKPrivateDataSlot*)privateDataSlot; + *pData = mvkPDS->getData(objectType, objectHandle); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL VkResult vkQueueSubmit2( + VkQueue queue, + uint32_t submitCount, + const VkSubmitInfo2* pSubmits, + VkFence fence) { + + MVKTraceVulkanCallStart(); + MVKQueue* mvkQ = MVKQueue::getMVKQueue(queue); + VkResult rslt = mvkQ->submit(submitCount, pSubmits, fence, kMVKCommandUseQueueSubmit); + MVKTraceVulkanCallEnd(); + return rslt; +} + +MVK_PUBLIC_VULKAN_SYMBOL VkResult vkSetPrivateData( + VkDevice device, + VkObjectType objectType, + uint64_t objectHandle, + VkPrivateDataSlotEXT privateDataSlot, + uint64_t data) { + + MVKTraceVulkanCallStart(); + MVKPrivateDataSlot* mvkPDS = (MVKPrivateDataSlot*)privateDataSlot; + mvkPDS->setData(objectType, objectHandle, data); + MVKTraceVulkanCallEnd(); + return VK_SUCCESS; +} + #pragma mark - #pragma mark VK_KHR_bind_memory2 extension @@ -3102,6 +3343,17 @@ MVK_PUBLIC_VULKAN_SYMBOL VkResult vkGetPhysicalDeviceSurfaceFormats2KHR( } +#pragma mark - +#pragma mark VK_KHR_synchronization2 + +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdPipelineBarrier2, KHR); +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdResetEvent2, KHR); +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdSetEvent2, KHR); +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdWaitEvents2, KHR); +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdWriteTimestamp2, KHR); +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkQueueSubmit2, KHR); + + #pragma mark - #pragma mark VK_KHR_timeline_semaphore @@ -3355,6 +3607,233 @@ MVK_PUBLIC_VULKAN_SYMBOL void vkSubmitDebugUtilsMessageEXT( } +#pragma mark - +#pragma mark VK_EXT_extended_dynamic_state + +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdBindVertexBuffers2, EXT); +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdSetCullMode, EXT); +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdSetDepthBoundsTestEnable, EXT); +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdSetDepthCompareOp, EXT); +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdSetDepthTestEnable, EXT); +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdSetDepthWriteEnable, EXT); +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdSetFrontFace, EXT); +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdSetPrimitiveTopology, EXT); +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdSetScissorWithCount, EXT); +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdSetStencilOp, EXT); +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdSetStencilTestEnable, EXT); +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdSetViewportWithCount, EXT); + + +#pragma mark - +#pragma mark VK_EXT_extended_dynamic_state2 +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdSetDepthBiasEnable, EXT); + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetLogicOpEXT( + VkCommandBuffer commandBuffer, + VkLogicOp logicOp) { + + MVKTraceVulkanCallStart(); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetPatchControlPointsEXT( + VkCommandBuffer commandBuffer, + uint32_t patchControlPoints) { + + MVKTraceVulkanCallStart(); + MVKAddCmd(SetPatchControlPoints, commandBuffer, patchControlPoints); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdSetPrimitiveRestartEnable, EXT); +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdSetRasterizerDiscardEnable, EXT); + + +#pragma mark - +#pragma mark VK_EXT_extended_dynamic_state3 + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetAlphaToCoverageEnableEXT( + VkCommandBuffer commandBuffer, + VkBool32 alphaToCoverageEnable) { + + MVKTraceVulkanCallStart(); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetAlphaToOneEnableEXT( + VkCommandBuffer commandBuffer, + VkBool32 alphaToOneEnable) { + + MVKTraceVulkanCallStart(); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetColorBlendAdvancedEXT( + VkCommandBuffer commandBuffer, + uint32_t firstAttachment, + uint32_t attachmentCount, + const VkColorBlendAdvancedEXT* pColorBlendAdvanced) { + + MVKTraceVulkanCallStart(); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetColorBlendEnableEXT( + VkCommandBuffer commandBuffer, + uint32_t firstAttachment, + uint32_t attachmentCount, + const VkBool32* pColorBlendEnables) { + + MVKTraceVulkanCallStart(); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetColorBlendEquationEXT( + VkCommandBuffer commandBuffer, + uint32_t firstAttachment, + uint32_t attachmentCount, + const VkColorBlendEquationEXT* pColorBlendEquations) { + + MVKTraceVulkanCallStart(); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetColorWriteMaskEXT( + VkCommandBuffer commandBuffer, + uint32_t firstAttachment, + uint32_t attachmentCount, + const VkColorComponentFlags* pColorWriteMasks) { + + MVKTraceVulkanCallStart(); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetConservativeRasterizationModeEXT( + VkCommandBuffer commandBuffer, + VkConservativeRasterizationModeEXT conservativeRasterizationMode) { + + MVKTraceVulkanCallStart(); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetDepthClampEnableEXT( + VkCommandBuffer commandBuffer, + VkBool32 depthClampEnable) { + + MVKTraceVulkanCallStart(); + MVKAddCmd(SetDepthClipEnable, commandBuffer, !depthClampEnable); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetDepthClipEnableEXT( + VkCommandBuffer commandBuffer, + VkBool32 depthClipEnable) { + + MVKTraceVulkanCallStart(); + MVKAddCmd(SetDepthClipEnable, commandBuffer, depthClipEnable); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetDepthClipNegativeOneToOneEXT( + VkCommandBuffer commandBuffer, + VkBool32 negativeOneToOne) { + + MVKTraceVulkanCallStart(); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetExtraPrimitiveOverestimationSizeEXT( + VkCommandBuffer commandBuffer, + float extraPrimitiveOverestimationSize) { + + MVKTraceVulkanCallStart(); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetLineRasterizationModeEXT( + VkCommandBuffer commandBuffer, + VkLineRasterizationModeEXT lineRasterizationMode) { + + MVKTraceVulkanCallStart(); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetLineStippleEnableEXT( + VkCommandBuffer commandBuffer, + VkBool32 stippledLineEnable) { + + MVKTraceVulkanCallStart(); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetLogicOpEnableEXT( + VkCommandBuffer commandBuffer, + VkBool32 logicOpEnable) { + + MVKTraceVulkanCallStart(); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetPolygonModeEXT( + VkCommandBuffer commandBuffer, + VkPolygonMode polygonMode) { + + MVKTraceVulkanCallStart(); + MVKAddCmd(SetPolygonMode, commandBuffer, polygonMode); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetProvokingVertexModeEXT( + VkCommandBuffer commandBuffer, + VkProvokingVertexModeEXT provokingVertexMode) { + + MVKTraceVulkanCallStart(); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetRasterizationSamplesEXT( + VkCommandBuffer commandBuffer, + VkSampleCountFlagBits rasterizationSamples) { + + MVKTraceVulkanCallStart(); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetRasterizationStreamEXT( + VkCommandBuffer commandBuffer, + uint32_t rasterizationStream) { + + MVKTraceVulkanCallStart(); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetSampleLocationsEnableEXT( + VkCommandBuffer commandBuffer, + VkBool32 sampleLocationsEnable) { + + MVKTraceVulkanCallStart(); + MVKAddCmd(SetSampleLocationsEnable, commandBuffer, sampleLocationsEnable); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetSampleMaskEXT( + VkCommandBuffer commandBuffer, + VkSampleCountFlagBits samples, + const VkSampleMask* pSampleMask) { + + MVKTraceVulkanCallStart(); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetTessellationDomainOriginEXT( + VkCommandBuffer commandBuffer, + VkTessellationDomainOrigin domainOrigin) { + + MVKTraceVulkanCallStart(); + MVKTraceVulkanCallEnd(); +} + + #pragma mark - #pragma mark VK_EXT_external_memory_host extension @@ -3390,6 +3869,26 @@ MVK_PUBLIC_VULKAN_SYMBOL void vkSetHdrMetadataEXT( } +#pragma mark - +#pragma mark VK_EXT_headless_surface extension + +MVK_PUBLIC_VULKAN_SYMBOL VkResult vkCreateHeadlessSurfaceEXT( + VkInstance instance, + const VkHeadlessSurfaceCreateInfoEXT* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkSurfaceKHR* pSurface) { + + MVKTraceVulkanCallStart(); + MVKInstance* mvkInst = MVKInstance::getMVKInstance(instance); + MVKSurface* mvkSrfc = mvkInst->createSurface(pCreateInfo, pAllocator); + *pSurface = (VkSurfaceKHR)mvkSrfc; + VkResult rslt = mvkSrfc->getConfigurationResult(); + if (rslt < 0) { *pSurface = VK_NULL_HANDLE; mvkInst->destroySurface(mvkSrfc, pAllocator); } + MVKTraceVulkanCallEnd(); + return rslt; +} + + #pragma mark - #pragma mark VK_EXT_host_query_reset extension @@ -3433,56 +3932,12 @@ MVK_PUBLIC_VULKAN_SYMBOL void vkExportMetalObjectsEXT( #pragma mark - #pragma mark VK_EXT_private_data extension -MVK_PUBLIC_VULKAN_SYMBOL VkResult vkCreatePrivateDataSlotEXT( - VkDevice device, - const VkPrivateDataSlotCreateInfoEXT* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkPrivateDataSlotEXT* pPrivateDataSlot) { +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCreatePrivateDataSlot, EXT); +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkDestroyPrivateDataSlot, EXT); +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkGetPrivateData, EXT); +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkSetPrivateData, EXT); - MVKTraceVulkanCallStart(); - MVKDevice* mvkDev = MVKDevice::getMVKDevice(device); - VkResult rslt = mvkDev->createPrivateDataSlot(pCreateInfo, pAllocator, pPrivateDataSlot); - MVKTraceVulkanCallEnd(); - return rslt; -} -MVK_PUBLIC_VULKAN_SYMBOL void vkDestroyPrivateDataSlotEXT( - VkDevice device, - VkPrivateDataSlotEXT privateDataSlot, - const VkAllocationCallbacks* pAllocator) { - - MVKTraceVulkanCallStart(); - MVKDevice* mvkDev = MVKDevice::getMVKDevice(device); - mvkDev->destroyPrivateDataSlot(privateDataSlot, pAllocator); - MVKTraceVulkanCallEnd(); -} - -MVK_PUBLIC_VULKAN_SYMBOL VkResult vkSetPrivateDataEXT( - VkDevice device, - VkObjectType objectType, - uint64_t objectHandle, - VkPrivateDataSlotEXT privateDataSlot, - uint64_t data) { - - MVKTraceVulkanCallStart(); - MVKPrivateDataSlot* mvkPDS = (MVKPrivateDataSlot*)privateDataSlot; - mvkPDS->setData(objectType, objectHandle, data); - MVKTraceVulkanCallEnd(); - return VK_SUCCESS; -} - -MVK_PUBLIC_VULKAN_SYMBOL void vkGetPrivateDataEXT( - VkDevice device, - VkObjectType objectType, - uint64_t objectHandle, - VkPrivateDataSlotEXT privateDataSlot, - uint64_t* pData) { - - MVKTraceVulkanCallStart(); - MVKPrivateDataSlot* mvkPDS = (MVKPrivateDataSlot*)privateDataSlot; - *pData = mvkPDS->getData(objectType, objectHandle); - MVKTraceVulkanCallEnd(); -} #pragma mark - #pragma mark VK_EXT_sample_locations extension @@ -3507,6 +3962,7 @@ void vkCmdSetSampleLocationsEXT( MVKTraceVulkanCallEnd(); } + #pragma mark - #pragma mark VK_GOOGLE_display_timing extension @@ -3535,12 +3991,14 @@ MVK_PUBLIC_VULKAN_SYMBOL VkResult vkGetPastPresentationTimingGOOGLE( return rslt; } + #pragma mark - #pragma mark VK_AMD_draw_indirect_count MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdDrawIndexedIndirectCount, AMD); MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdDrawIndirectCount, AMD); + #pragma mark - #pragma mark iOS & macOS surface extensions diff --git a/MoltenVKShaderConverter/MoltenVKShaderConverter.xcodeproj/xcshareddata/xcschemes/MoltenVKShaderConverter.xcscheme b/MoltenVKShaderConverter/MoltenVKShaderConverter.xcodeproj/xcshareddata/xcschemes/MoltenVKShaderConverter.xcscheme index eeee1a62..9f64b980 100644 --- a/MoltenVKShaderConverter/MoltenVKShaderConverter.xcodeproj/xcshareddata/xcschemes/MoltenVKShaderConverter.xcscheme +++ b/MoltenVKShaderConverter/MoltenVKShaderConverter.xcodeproj/xcshareddata/xcschemes/MoltenVKShaderConverter.xcscheme @@ -97,7 +97,7 @@ isEnabled = "NO"> & spv, mslContext.options.shouldFlipVertexY = _shouldFlipVertexY; mslContext.options.mslOptions.argument_buffers = _useMetalArgumentBuffers; mslContext.options.mslOptions.force_active_argument_buffer_resources = _useMetalArgumentBuffers; - mslContext.options.mslOptions.pad_argument_buffer_resources = _useMetalArgumentBuffers; + mslContext.options.mslOptions.pad_argument_buffer_resources = false; + mslContext.options.mslOptions.argument_buffers_tier = SPIRV_CROSS_NAMESPACE::CompilerMSL::Options::ArgumentBuffersTier::Tier2; + mslContext.options.mslOptions.replace_recursive_inputs = mvkOSVersionIsAtLeast(14.0, 17.0, 1.0); SPIRVToMSLConverter spvConverter; spvConverter.setSPIRV(spv); @@ -424,7 +426,10 @@ MoltenVKShaderConverterTool::MoltenVKShaderConverterTool(int argc, const char* a _quietMode = false; _useMetalArgumentBuffers = false; - if (mvkOSVersionIsAtLeast(13.0)) { + if (mvkOSVersionIsAtLeast(14.0)) { + _mslVersionMajor = 3; + _mslVersionMinor = 1; + } else if (mvkOSVersionIsAtLeast(13.0)) { _mslVersionMajor = 3; _mslVersionMinor = 0; } else if (mvkOSVersionIsAtLeast(12.0)) { diff --git a/README.md b/README.md index e44e34a6..2fa36526 100644 --- a/README.md +++ b/README.md @@ -149,24 +149,17 @@ for which to build the external libraries. The platform choices include: --maccat --tvos --tvossim - --visionos - --visionossim - -The `visionos` and `visionossim` selections require Xcode 15+. You can specify multiple of these selections. The result is a single `XCFramework` for each external dependency library, with each `XCFramework` containing binaries for each of the requested platforms. -The `--all` selection is the same as entering all of the other platform choices, except -`--visionos` and `--visionossim`, and will result in a single `XCFramework` for each -external dependency library, with each `XCFramework` containing binaries for all supported -platforms and simulators. The `--visionos` and `--visionossim` selections must be invoked -with a separate invocation of `fetchDependencies`, because those selections require -Xcode 15+, and will cause a multi-platform build on older versions of Xcode to abort. +The `--all` selection is the same as entering all of the other platform choices, +and will result in a single `XCFramework` for each external dependency library, +with each `XCFramework` containing binaries for all supported platforms and simulators. -Running `fetchDependencies` repeatedly with different platforms will accumulate -targets in the `XCFramework`. +Running `fetchDependencies` repeatedly with different platforms will accumulate targets +in the `XCFramework`, if the `--keep-cache` option is used on each invocation. For more information about the external open-source libraries used by **MoltenVK**, see the [`ExternalRevisions/README.md`](ExternalRevisions/README.md) document. @@ -263,8 +256,6 @@ from the command line. The following `make` targets are provided: make maccat make tvos make tvossim - make visionos - make visionossim make all-debug make macos-debug @@ -273,15 +264,12 @@ from the command line. The following `make` targets are provided: make maccat-debug make tvos-debug make tvossim-debug - make visionos-debug - make visionossim-debug make clean make install - Running `make` repeatedly with different targets will accumulate binaries for these different targets. -- The `all` target executes all platform targets, except `visionos` and `visionossim`, as these require - Xcode 15+, and will abort a multi-platform build on older versions of Xcode. +- The `all` target executes all platform targets. - The `all` target is the default target. Running `make` with no arguments is the same as running `make all`. - The `*-debug` targets build the binaries using the **_Debug_** configuration. - The `install` target will copy the most recently built `MoltenVK.xcframework` into the diff --git a/Scripts/create_ext_lib_xcframeworks.sh b/Scripts/create_ext_lib_xcframeworks.sh index 33cd977d..9cacc5f0 100755 --- a/Scripts/create_ext_lib_xcframeworks.sh +++ b/Scripts/create_ext_lib_xcframeworks.sh @@ -1,5 +1,7 @@ #!/bin/bash +if [ "${SKIP_PACKAGING}" = "Y" ]; then exit 0; fi + . "${PROJECT_DIR}/Scripts/create_xcframework_func.sh" export MVK_EXT_DIR="${PROJECT_DIR}/External" diff --git a/Scripts/package_ext_libs_finish.sh b/Scripts/package_ext_libs_finish.sh index d44a2d30..ec4d4753 100755 --- a/Scripts/package_ext_libs_finish.sh +++ b/Scripts/package_ext_libs_finish.sh @@ -1,5 +1,7 @@ #!/bin/bash +if [ "${SKIP_PACKAGING}" = "Y" ]; then exit 0; fi + set -e export MVK_EXT_LIB_DST_PATH="${PROJECT_DIR}/External/build/" @@ -7,6 +9,12 @@ export MVK_EXT_LIB_DST_PATH="${PROJECT_DIR}/External/build/" # Assign symlink to Latest ln -sfn "${CONFIGURATION}" "${MVK_EXT_LIB_DST_PATH}/Latest" +# Remove the large Intermediates directory if no longer needed +if [ "${KEEP_CACHE}" != "Y" ]; then + echo Removing Intermediates library at "${MVK_EXT_LIB_DST_PATH}/Intermediates" + rm -rf "${MVK_EXT_LIB_DST_PATH}/Intermediates" +fi + # Clean MoltenVK to ensure the next MoltenVK build will use the latest external library versions. make --quiet clean diff --git a/Scripts/runcts b/Scripts/runcts index 24cd9baa..dfac7a13 100755 --- a/Scripts/runcts +++ b/Scripts/runcts @@ -113,7 +113,7 @@ export MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS=0 #(2 = VK_EXT_descriptor_ export MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE=2 #(2 = MTLEvents always) export MVK_CONFIG_SHADER_COMPRESSION_ALGORITHM=0 #(2 = ZLIB, 3 = LZ4) export MVK_CONFIG_PERFORMANCE_TRACKING=0 -export MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE=2 #(2 = Device lifetime) +export MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE=3 #(2 = Device lifetime, 3 = Process lifetime) # -------------- Operation -------------------- @@ -122,8 +122,12 @@ start_time=${SECONDS} "${cts_vk_dir}/deqp-vk" \ --deqp-archive-dir="${cts_vk_dir}/.." \ +--deqp-log-filename="/dev/null" \ --deqp-log-images=disable \ --deqp-log-shader-sources=disable \ +--deqp-shadercache=disable \ +--deqp-log-decompiled-spirv=disable \ +--deqp-log-flush=disable \ --deqp-caselist-file="${caselist_file}" \ &> "${results_file}" diff --git a/Templates/spirv-tools/build.zip b/Templates/spirv-tools/build.zip index 3879c9b8..d087d8e2 100644 Binary files a/Templates/spirv-tools/build.zip and b/Templates/spirv-tools/build.zip differ diff --git a/fetchDependencies b/fetchDependencies index f03f36c8..8b80bf90 100755 --- a/fetchDependencies +++ b/fetchDependencies @@ -67,6 +67,12 @@ # --no-parallel-build # Build the external libraries serially instead of in parallel. This is the default. # +# --keep-cache +# Do not remove the External/build/Intermediates cache directory after building. +# Removing the Intermediates directory returns significant disk space after the +# build, and is the default behaviour. Use this option if you intend to run this +# script repeatedly to incrementally build one platform at a time. +# # --glslang-root path # "path" specifies a directory path to a KhronosGroup/glslang repository. # This repository does need to be built and the build directory must be in the @@ -117,6 +123,7 @@ V_HEADERS_ROOT="" SPIRV_CROSS_ROOT="" GLSLANG_ROOT="" BLD_SPV_TLS="" +export KEEP_CACHE="" while (( "$#" )); do case "$1" in @@ -191,6 +198,10 @@ while (( "$#" )); do XC_USE_BCKGND="" shift 1 ;; + --keep-cache) + KEEP_CACHE="Y" + shift 1 + ;; -v) XC_BUILD_VERBOSITY="" shift 1 @@ -410,7 +421,6 @@ function execute_xcodebuild_command () { # 2 - Platform # 3 - Destination (Optional. Defaults to same as platform) function build_impl() { - BLD_SPECIFIED="Y" XC_OS=${1} XC_PLTFM=${2} if [ "${3}" != "" ]; then @@ -442,7 +452,9 @@ function build_impl() { # Select whether or not to run the build in parallel. # 1 - OS # 2 - platform +# 3 - Destination (Optional. Defaults to same as platform) function build() { + BLD_SPECIFIED="Y" if [ "$XC_USE_BCKGND" != "" ]; then build_impl "${1}" "${2}" "${3}" & else @@ -453,6 +465,7 @@ function build() { EXT_DEPS=ExternalDependencies XC_PROJ="${EXT_DEPS}.xcodeproj" XC_DD_PATH="${EXT_DIR}/build" +export SKIP_PACKAGING="Y" # Determine if xcpretty is present XCPRETTY_PATH=$(command -v xcpretty 2> /dev/null || true) # ignore failures @@ -512,9 +525,10 @@ if [ "$XC_USE_BCKGND" != "" ]; then fi if [ "$BLD_SPECIFIED" != "" ]; then - # Build XCFrameworks, update latest symlink, and clean MoltenVK for rebuild + # Build XCFrameworks, update latest symlink, remove intermediates, and clean MoltenVK for rebuild PROJECT_DIR="." CONFIGURATION=${XC_CONFIG} + SKIP_PACKAGING="" . "./Scripts/create_ext_lib_xcframeworks.sh" . "./Scripts/package_ext_libs_finish.sh" else