From 259039ed2c9d692a2222452d29b935a17f04e93c Mon Sep 17 00:00:00 2001 From: spnda Date: Tue, 24 May 2022 15:36:11 +0200 Subject: [PATCH 01/41] Add: Support for VK_EXT_extended_dynamic_state --- Docs/Whats_New.md | 1 + MoltenVK/MoltenVK/Commands/MVKCmdRenderPass.h | 44 +++++++ .../MoltenVK/Commands/MVKCmdRenderPass.mm | 48 +++++++ .../MoltenVK/Commands/MVKCommandTypePools.def | 2 + MoltenVK/MoltenVK/Layers/MVKExtensions.def | 1 + MoltenVK/MoltenVK/Vulkan/vulkan.mm | 117 ++++++++++++++++++ 6 files changed, 213 insertions(+) diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md index 5c05fbd8..2131f429 100644 --- a/Docs/Whats_New.md +++ b/Docs/Whats_New.md @@ -25,6 +25,7 @@ Released TBD - `VK_KHR_dynamic_rendering` - `VK_KHR_separate_depth_stencil_layouts` - `VK_EXT_separate_stencil_usage` + - `VK_EXT_extended_dynamic_state` - Support attachment clearing when some clearing formats are not specified. - Fix error where previously bound push constants can override a descriptor buffer binding used by a subsequent pipeline that does not use push constants. diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdRenderPass.h b/MoltenVK/MoltenVK/Commands/MVKCmdRenderPass.h index 9b3c14d0..8af34247 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdRenderPass.h +++ b/MoltenVK/MoltenVK/Commands/MVKCmdRenderPass.h @@ -442,3 +442,47 @@ protected: uint32_t _stencilReference; }; + +#pragma mark - +#pragma mark MVKCmdSetCullMode + +/** + * Vulkan command to dynamically set the cull mode. Originally from VK_EXT_extended_dynamic_state, + * but also part of Vulkan 1.3. + */ +class MVKCmdSetCullMode : public MVKCommand { + +public: + VkResult setContent(MVKCommandBuffer* cmdBuff, + VkCullModeFlags cullMode); + + void encode(MVKCommandEncoder* cmdEncoder) override; + +protected: + MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; + + MTLCullMode _cullMode; +}; + + +#pragma mark - +#pragma mark MVKCmdSetFrontFace + +/** + * Vulkan command to dynamically set the front facing winding order. Originally from + * VK_EXT_extended_dynamic_state, but also part of Vulkan 1.3. + */ +class MVKCmdSetFrontFace : public MVKCommand { + +public: + VkResult setContent(MVKCommandBuffer* cmdBuff, + VkFrontFace frontFace); + + void encode(MVKCommandEncoder* cmdEncoder) override; + +protected: + MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; + + MTLWinding _frontFace; +}; + diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdRenderPass.mm b/MoltenVK/MoltenVK/Commands/MVKCmdRenderPass.mm index 967c905a..82445e1a 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdRenderPass.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCmdRenderPass.mm @@ -439,3 +439,51 @@ void MVKCmdSetStencilReference::encode(MVKCommandEncoder* cmdEncoder) { cmdEncoder->_stencilReferenceValueState.setReferenceValues(_faceMask, _stencilReference); } + +#pragma mark - +#pragma mark MVKCmdSetCullMode + +VkResult MVKCmdSetCullMode::setContent(MVKCommandBuffer* cmdBuff, + VkCullModeFlags cullMode) { + switch (cullMode) { + case VK_CULL_MODE_NONE: { + _cullMode = MTLCullModeNone; + break; + } + case VK_CULL_MODE_FRONT_BIT: { + _cullMode = MTLCullModeFront; + break; + } + case VK_CULL_MODE_BACK_BIT: { + _cullMode = MTLCullModeBack; + break; + } + case VK_CULL_MODE_FRONT_AND_BACK: { + // Metal doesn't have a equivalent to this... + } + } + + return VK_SUCCESS; +} + +void MVKCmdSetCullMode::encode(MVKCommandEncoder* cmdEncoder) { + [((id)cmdEncoder->getMTLEncoder()) setCullMode:_cullMode]; +} + + +#pragma mark - +#pragma mark MVKCmdSetFrontFace + +VkResult MVKCmdSetFrontFace::setContent(MVKCommandBuffer* cmdBuff, + VkFrontFace frontFace) { + _frontFace = frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE + ? MTLWindingClockwise + : MTLWindingCounterClockwise; + + return VK_SUCCESS; +} + +void MVKCmdSetFrontFace::encode(MVKCommandEncoder* cmdEncoder) { + [((id)cmdEncoder->getMTLEncoder()) setFrontFacingWinding:_frontFace]; +} + diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandTypePools.def b/MoltenVK/MoltenVK/Commands/MVKCommandTypePools.def index e8cbae6e..a17e57f4 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandTypePools.def +++ b/MoltenVK/MoltenVK/Commands/MVKCommandTypePools.def @@ -93,6 +93,8 @@ MVK_CMD_TYPE_POOL(SetDepthBounds) MVK_CMD_TYPE_POOL(SetStencilCompareMask) MVK_CMD_TYPE_POOL(SetStencilWriteMask) MVK_CMD_TYPE_POOL(SetStencilReference) +MVK_CMD_TYPE_POOL(SetCullMode) +MVK_CMD_TYPE_POOL(SetFrontFace) MVK_CMD_TYPE_POOLS_FROM_2_THRESHOLDS(BindVertexBuffers, 1, 2) MVK_CMD_TYPE_POOL(BindIndexBuffer) MVK_CMD_TYPE_POOL(Draw) diff --git a/MoltenVK/MoltenVK/Layers/MVKExtensions.def b/MoltenVK/MoltenVK/Layers/MVKExtensions.def index 019b23b4..32cb393d 100644 --- a/MoltenVK/MoltenVK/Layers/MVKExtensions.def +++ b/MoltenVK/MoltenVK/Layers/MVKExtensions.def @@ -87,6 +87,7 @@ MVK_EXTENSION(EXT_debug_marker, EXT_DEBUG_MARKER, MVK_EXTENSION(EXT_debug_report, EXT_DEBUG_REPORT, INSTANCE, 10.11, 8.0) MVK_EXTENSION(EXT_debug_utils, EXT_DEBUG_UTILS, INSTANCE, 10.11, 8.0) MVK_EXTENSION(EXT_descriptor_indexing, EXT_DESCRIPTOR_INDEXING, DEVICE, 10.11, 8.0) +MVK_EXTENSION(EXT_extended_dynamic_state, EXT_extended_dynamic_state, DEVICE, 10.11, 8.0) MVK_EXTENSION(EXT_fragment_shader_interlock, EXT_FRAGMENT_SHADER_INTERLOCK, DEVICE, 10.13, 11.0) MVK_EXTENSION(EXT_hdr_metadata, EXT_HDR_METADATA, DEVICE, 10.15, MVK_NA) MVK_EXTENSION(EXT_host_query_reset, EXT_HOST_QUERY_RESET, DEVICE, 10.11, 8.0) diff --git a/MoltenVK/MoltenVK/Vulkan/vulkan.mm b/MoltenVK/MoltenVK/Vulkan/vulkan.mm index c291e892..4231a525 100644 --- a/MoltenVK/MoltenVK/Vulkan/vulkan.mm +++ b/MoltenVK/MoltenVK/Vulkan/vulkan.mm @@ -2985,6 +2985,123 @@ MVK_PUBLIC_VULKAN_SYMBOL void vkSubmitDebugUtilsMessageEXT( } +#pragma mark - +#pragma mark VK_EXT_extended_dynamic_state + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdBindVertexBuffers2EXT( + VkCommandBuffer commandBuffer, + uint32_t firstBinding, + uint32_t bindingCount, + const VkBuffer* pBuffers, + const VkDeviceSize* pOffsets, + const VkDeviceSize* pSizes, + const VkDeviceSize* pStrides) { + + MVKTraceVulkanCallStart(); + + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetCullModeEXT( + VkCommandBuffer commandBuffer, + VkCullModeFlags cullMode) { + + MVKTraceVulkanCallStart(); + MVKAddCmd(SetCullMode, commandBuffer, cullMode); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetDepthBoundsTestEnableEXT( + VkCommandBuffer commandBuffer, + VkBool32 depthBoundsTestEnable) { + + MVKTraceVulkanCallStart(); + + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetDepthCompareOpEXT( + VkCommandBuffer commandBuffer, + VkCompareOp depthCompareOp) { + + MVKTraceVulkanCallStart(); + + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetDepthTestEnableEXT( + VkCommandBuffer commandBuffer, + VkBool32 depthTestEnable) { + + MVKTraceVulkanCallStart(); + + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetDepthWriteEnableEXT( + VkCommandBuffer commandBuffer, + VkBool32 depthWriteEnable) { + + MVKTraceVulkanCallStart(); + + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetFrontFaceEXT( + VkCommandBuffer commandBuffer, + VkFrontFace frontFace) { + + MVKTraceVulkanCallStart(); + MVKAddCmd(SetFrontFace, commandBuffer, frontFace); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetPrimitiveTopologyEXT( + VkCommandBuffer commandBuffer, + VkPrimitiveTopology primitiveTopology) { + + MVKTraceVulkanCallStart(); + + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetScissorWithCountEXT( + VkCommandBuffer commandBuffer, + uint32_t scissorCount, + const VkRect2D* pScissors) { + + MVKTraceVulkanCallStart(); + MVKAddCmdFromThreshold(SetScissor, scissorCount, 1, commandBuffer, 0, scissorCount, pScissors); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetStencilOpEXT( + VkCommandBuffer commandBuffer, + VkStencilFaceFlags faceMask, + VkStencilOp failOp, + VkStencilOp passOp, + VkStencilOp depthFailOp, + VkCompareOp compareOp) { + +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetStencilTestEnableEXT( + VkCommandBuffer commandBuffer, + VkBool32 stencilTestEnable) { + +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetViewportWithCountEXT( + VkCommandBuffer commandBuffer, + uint32_t viewportCount, + const VkViewport* pViewports) { + + MVKTraceVulkanCallStart(); + MVKAddCmdFromThreshold(SetViewport, viewportCount, 1, commandBuffer, 0, viewportCount, pViewports); + MVKTraceVulkanCallEnd(); +} + + #pragma mark - #pragma mark VK_EXT_hdr_metadata extension From 6acdd9fb9157a8d8aa03d0160798bb4288ed9fb0 Mon Sep 17 00:00:00 2001 From: Dan Ginsburg Date: Thu, 31 Aug 2023 09:13:26 -0400 Subject: [PATCH 02/41] Fix crash in Dota 2 on macOS < 10.15 - closes #2006 --- MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm | 28 ++++++++++++----------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm index 0b11d4df..203adb81 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm @@ -1572,19 +1572,21 @@ VkResult MVKPhysicalDevice::getQueueFamilyProperties(uint32_t* pCount, // On Apple GPUs, CPU & GPU timestamps are the same, and timestamp period never changes. void MVKPhysicalDevice::updateTimestampsAndPeriod() { if (_properties.vendorID == kAppleVendorId) { return; } - - MTLTimestamp earlierCPUTs = _prevCPUTimestamp; - MTLTimestamp earlierGPUTs = _prevGPUTimestamp; - [_mtlDevice sampleTimestamps: &_prevCPUTimestamp gpuTimestamp: &_prevGPUTimestamp]; - double elapsedCPUNanos = _prevCPUTimestamp - earlierCPUTs; - double elapsedGPUTicks = _prevGPUTimestamp - earlierGPUTs; - if (elapsedCPUNanos && elapsedGPUTicks) { // Ensure not zero - float tsPeriod = elapsedCPUNanos / elapsedGPUTicks; - - // Basic lowpass filter Y = (1 - a)Y + a*X. - // The lower a is, the slower Y will change over time. - static const float a = 0.05; - _properties.limits.timestampPeriod = ((1.0 - a) * _properties.limits.timestampPeriod) + (a * tsPeriod); + + if ([_mtlDevice respondsToSelector: @selector(sampleTimestamps:gpuTimestamp:)]) { + MTLTimestamp earlierCPUTs = _prevCPUTimestamp; + MTLTimestamp earlierGPUTs = _prevGPUTimestamp; + [_mtlDevice sampleTimestamps: &_prevCPUTimestamp gpuTimestamp: &_prevGPUTimestamp]; + double elapsedCPUNanos = _prevCPUTimestamp - earlierCPUTs; + double elapsedGPUTicks = _prevGPUTimestamp - earlierGPUTs; + if (elapsedCPUNanos && elapsedGPUTicks) { // Ensure not zero + float tsPeriod = elapsedCPUNanos / elapsedGPUTicks; + + // Basic lowpass filter Y = (1 - a)Y + a*X. + // The lower a is, the slower Y will change over time. + static const float a = 0.05; + _properties.limits.timestampPeriod = ((1.0 - a) * _properties.limits.timestampPeriod) + (a * tsPeriod); + } } } From 9f64faadbcf490e73e69db8bc3e10154e61f17e5 Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Sat, 2 Sep 2023 08:51:36 -0400 Subject: [PATCH 03/41] Improve behavior of swapchain image presentation stalls caused by Metal regression. In a recent Metal regression, Metal sometimes does not trigger the [CAMetalDrawable addPresentedHandler:] callback on the final few (1-3) CAMetalDrawable presentations, and retains internal memory associated with these CAMetalDrawables. This does not occur for any CAMetalDrawable presentations prior to those final few. Most apps typically don't care much what happens after the last few CAMetalDrawables are presented, and typically end shortly after that. However, for some apps, such as Vulkan CTS WSI tests, which serially create potentially hundreds, or thousands, of CAMetalLayers and MTLDevices,these retained device memory allocations can pile up and cause the CTS WSI tests to stall, block, or crash. This issue has proven very difficult to debug, or replicate in incrementally controlled environments. It appears consistently in some scenarios, and never in other, almost identical scenarios. For example, the MoltenVK Cube demo consistently runs without encountering this issue, but CTS WSI test dEQP-VK.wsi.macos.swapchain.render.basic consistently triggers the issue. Both apps run almost identical Vulkan command paths, and identical swapchain image presentation paths, and result in GPU captures that have identical swapchain image presentations. We may ultimately have to wait for Apple to fix the core issue, but this update includes workarounds that helps in some cases. During vkQueueWaitIdle() and vkDeviceWaitIdle(), wait a short while for any in-flight swapchain image presentations to finish, and attempt to force completion by calling MVKPresentableSwapchainImage::forcePresentationCompletion(), which releases the current CAMetalDrawable, and attempts to retrieve a new one, to trigger the callback on the current CAMetalDrawable. In exploring possible work-arounds for this issue, this update adds significant structural improvements in the handling of swapchains, and quite a bit of new performance and logging functionality that is useful for debugging purposes. - Add several additional performance trackers, available via logging, or the mvk_private_api.h API. - Rename MVKPerformanceTracker members, and refactor performance result collection, to support tracking and logging memory use, or other measurements, in addition to just durations. - Redefine MVKQueuePerformance to add tracking separate performance metrics for MTLCommandBuffer retrieval, encoding, and execution, plus swapchain presentation. - Add MVKDevicePerformance as part of MVKPerformanceStatistics to track device information, including GPU device memory allocated, and update device memory results whenever performance content is requested. - Add MVKConfigActivityPerformanceLoggingStyle:: MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_DEVICE_LIFETIME_ACCUMULATE to accumulate performance and memory results across multiple serial invocations of VkDevices, during the lifetime of the app process. This is useful for accumulating performance results across multiple CTS tests. - Log destruction of VkDevice, VkPhysicalDevice, and VkInstance, to bookend the corresponding logs performed upon their creation. - Include consumed GPU memory in log when VkPhysicalDevice is destroyed. - Add mvkGetAvailableMTLDevicesArray() to support consistency when retrieving MTLDevices available on the system. - Add mvkVkCommandName() to generically map command use to a command name. - MVKDevice: - Support MTLPhysicalDevice.recommendedMaxWorkingSetSize on iOS & tvOS. - Include available and consumed GPU memory in log of GPU device at VkInstance creation time. - MVKQueue: - Add handleMTLCommandBufferError() to handle errors for all MTLCommandBuffer executions. - Track time to retrieve a MTLCommandBuffer. - If MTLCommandBuffer could not be retrieved during queue submission, report error, signal queue submission completion, and return VK_ERROR_OUT_OF_POOL_MEMORY. - waitIdle() simplify to use [MTLCommandBuffer waitUntilCompleted], plus also wait for in-flight presentations to complete, and attempt to force them to complete if they are stuck. - MVKPresentableSwapchainImage: - Don't track presenting MTLCommandBuffer. - Add limit on number of attempts to retrieve a drawable, and report VK_ERROR_OUT_OF_POOL_MEMORY if drawable cannot be retrieved. - Return VkResult from acquireAndSignalWhenAvailable() to notify upstream if MTLCommandBuffer could not be created. - Track presentation time. - Notify MVKQueue when presentation has completed. - Add forcePresentationCompletion(), which releases the current CAMetalDrawable, and attempts to retrieve a new one, to trigger the callback on the current CAMetalDrawable. Called when a swapchain is destroyed, or by queue if waiting for presentation to complete stalls, - If destroyed while in flight, stop tracking swapchain and don't notify when presentation completes. - MVKSwapchain: - Track active swapchain in MVKSurface to check oldSwapchain - Track MVKSurface to access layer and detect lost surface. - Don't track layer and layer observer, since MVKSurface handles these. - On destruction, wait until all in-flight presentable images have returned. - Remove empty and unused releaseUndisplayedSurfaces() function. - MVKSurface: - Consolidate constructors into initLayer() function. - Update logic to test for valid layer and to set up layer observer. - MVKSemaphoreImpl: - Add getReservationCount() - MVKBaseObject: - Add reportResult() and reportWarning() functions to support logging and reporting Vulkan results that are not actual errors. - Rename MVKCommandUse::kMVKCommandUseEndCommandBuffer to kMVKCommandUseBeginCommandBuffer, since that's where it is used. - Update MVK_CONFIGURATION_API_VERSION and MVK_PRIVATE_API_VERSION to 38. - Cube Demo support running a maximum number of frames. --- Demos/Cube/Cube.xcodeproj/project.pbxproj | 2 +- Demos/Cube/iOS/DemoViewController.m | 19 +- Demos/Cube/macOS/DemoViewController.m | 64 ++- Docs/Whats_New.md | 3 + MoltenVK/MoltenVK/API/mvk_config.h | 13 +- MoltenVK/MoltenVK/API/mvk_private_api.h | 34 +- MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h | 3 - .../MoltenVK/Commands/MVKCommandBuffer.mm | 23 +- MoltenVK/MoltenVK/Commands/MVKCommandPool.h | 2 +- MoltenVK/MoltenVK/Commands/MVKCommandPool.mm | 4 +- MoltenVK/MoltenVK/GPUObjects/MVKDevice.h | 39 +- MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm | 305 +++++++++----- MoltenVK/MoltenVK/GPUObjects/MVKImage.h | 23 +- MoltenVK/MoltenVK/GPUObjects/MVKImage.mm | 111 +++-- MoltenVK/MoltenVK/GPUObjects/MVKInstance.h | 1 - MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm | 93 +--- .../MoltenVK/GPUObjects/MVKPixelFormats.mm | 21 +- MoltenVK/MoltenVK/GPUObjects/MVKQueue.h | 67 +-- MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm | 369 ++++++++++------ MoltenVK/MoltenVK/GPUObjects/MVKSurface.h | 19 +- MoltenVK/MoltenVK/GPUObjects/MVKSurface.mm | 43 +- MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.h | 28 +- MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm | 398 +++++++++--------- MoltenVK/MoltenVK/GPUObjects/MVKSync.h | 10 +- MoltenVK/MoltenVK/GPUObjects/MVKSync.mm | 5 + MoltenVK/MoltenVK/Utility/MVKBaseObject.h | 33 +- MoltenVK/MoltenVK/Utility/MVKBaseObject.mm | 92 ++-- MoltenVK/MoltenVK/Utility/MVKFoundation.cpp | 38 ++ MoltenVK/MoltenVK/Utility/MVKFoundation.h | 5 +- MoltenVK/MoltenVK/Utility/MVKLogging.h | 24 +- Scripts/runcts | 2 +- 31 files changed, 1081 insertions(+), 812 deletions(-) diff --git a/Demos/Cube/Cube.xcodeproj/project.pbxproj b/Demos/Cube/Cube.xcodeproj/project.pbxproj index 44654e51..66fb7d92 100644 --- a/Demos/Cube/Cube.xcodeproj/project.pbxproj +++ b/Demos/Cube/Cube.xcodeproj/project.pbxproj @@ -3,7 +3,7 @@ archiveVersion = 1; classes = { }; - objectVersion = 52; + objectVersion = 54; objects = { /* Begin PBXBuildFile section */ diff --git a/Demos/Cube/iOS/DemoViewController.m b/Demos/Cube/iOS/DemoViewController.m index 9256927a..30112a2c 100644 --- a/Demos/Cube/iOS/DemoViewController.m +++ b/Demos/Cube/iOS/DemoViewController.m @@ -30,15 +30,9 @@ struct demo demo; } --(void) dealloc { - demo_cleanup(&demo); - [_displayLink release]; - [super dealloc]; -} - -/** Since this is a single-view app, init Vulkan when the view is loaded. */ --(void) viewDidLoad { - [super viewDidLoad]; +/** Since this is a single-view app, initialize Vulkan as view is appearing. */ +-(void) viewWillAppear: (BOOL) animated { + [super viewWillAppear: animated]; self.view.contentScaleFactor = UIScreen.mainScreen.nativeScale; @@ -68,6 +62,13 @@ demo_resize(&demo); } +-(void) viewDidDisappear: (BOOL) animated { + [_displayLink invalidate]; + [_displayLink release]; + demo_cleanup(&demo); + [super viewDidDisappear: animated]; +} + @end diff --git a/Demos/Cube/macOS/DemoViewController.m b/Demos/Cube/macOS/DemoViewController.m index d8468bdc..76dc4d92 100644 --- a/Demos/Cube/macOS/DemoViewController.m +++ b/Demos/Cube/macOS/DemoViewController.m @@ -18,6 +18,7 @@ #import "DemoViewController.h" #import +#import #include #include "../../Vulkan-Tools/cube/cube.c" @@ -27,27 +28,34 @@ #pragma mark DemoViewController @implementation DemoViewController { - CVDisplayLinkRef _displayLink; + CVDisplayLinkRef _displayLink; struct demo demo; + uint32_t _maxFrameCount; + uint64_t _frameCount; + BOOL _stop; + BOOL _useDisplayLink; } --(void) dealloc { - demo_cleanup(&demo); - CVDisplayLinkRelease(_displayLink); - [super dealloc]; -} - -/** Since this is a single-view app, initialize Vulkan during view loading. */ --(void) viewDidLoad { - [super viewDidLoad]; +/** Since this is a single-view app, initialize Vulkan as view is appearing. */ +-(void) viewWillAppear { + [super viewWillAppear]; self.view.wantsLayer = YES; // Back the view with a layer created by the makeBackingLayer method. - // Enabling this will sync the rendering loop with the natural display link (60 fps). - // Disabling this will allow the rendering loop to run flat out, limited only by the rendering speed. - bool useDisplayLink = true; + // Enabling this will sync the rendering loop with the natural display link + // (monitor refresh rate, typically 60 fps). Disabling this will allow the + // rendering loop to run flat out, limited only by the rendering speed. + _useDisplayLink = YES; - VkPresentModeKHR vkPresentMode = useDisplayLink ? VK_PRESENT_MODE_FIFO_KHR : VK_PRESENT_MODE_IMMEDIATE_KHR; + // If this value is set to zero, the demo will render frames until the window is closed. + // If this value is not zero, it establishes a maximum number of frames that will be + // rendered, and once this count has been reached, the demo will stop rendering. + // Once rendering is finished, if _useDisplayLink is false, the demo will immediately + // clean up the Vulkan objects, or if _useDisplayLink is true, the demo will delay + // cleaning up Vulkan objects until the window is closed. + _maxFrameCount = 0; + + VkPresentModeKHR vkPresentMode = _useDisplayLink ? VK_PRESENT_MODE_FIFO_KHR : VK_PRESENT_MODE_IMMEDIATE_KHR; char vkPresentModeStr[64]; sprintf(vkPresentModeStr, "%d", vkPresentMode); @@ -55,19 +63,33 @@ int argc = sizeof(argv)/sizeof(char*); demo_main(&demo, self.view.layer, argc, argv); - if (useDisplayLink) { + _stop = NO; + _frameCount = 0; + if (_useDisplayLink) { CVDisplayLinkCreateWithActiveCGDisplays(&_displayLink); - CVDisplayLinkSetOutputCallback(_displayLink, &DisplayLinkCallback, &demo); + CVDisplayLinkSetOutputCallback(_displayLink, &DisplayLinkCallback, self); CVDisplayLinkStart(_displayLink); } else { dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{ - while(true) { + do { demo_draw(&demo); - } + _stop = _stop || (_maxFrameCount && ++_frameCount >= _maxFrameCount); + } while( !_stop ); + demo_cleanup(&demo); }); } } +-(void) viewDidDisappear { + _stop = YES; + if (_useDisplayLink) { + CVDisplayLinkRelease(_displayLink); + demo_cleanup(&demo); + } + + [super viewDidDisappear]; +} + #pragma mark Display loop callback function @@ -78,7 +100,11 @@ static CVReturn DisplayLinkCallback(CVDisplayLinkRef displayLink, CVOptionFlags flagsIn, CVOptionFlags* flagsOut, void* target) { - demo_draw((struct demo*)target); + DemoViewController* demoVC =(DemoViewController*)target; + if ( !demoVC->_stop ) { + demo_draw(&demoVC->demo); + demoVC->_stop = (demoVC->_maxFrameCount && ++demoVC->_frameCount >= demoVC->_maxFrameCount); + } return kCVReturnSuccess; } diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md index 82e9b155..3e476dd3 100644 --- a/Docs/Whats_New.md +++ b/Docs/Whats_New.md @@ -20,6 +20,9 @@ Released TBD - Fix rare case where vertex attribute buffers are not bound to Metal when no other bindings change between pipelines. +- Improve behavior of swapchain image presentation stalls caused by Metal regression. +- Add several additional performance trackers, available via logging, or the `mvk_private_api.h` API. +- Update `MVK_CONFIGURATION_API_VERSION` and `MVK_PRIVATE_API_VERSION` to `38`. diff --git a/MoltenVK/MoltenVK/API/mvk_config.h b/MoltenVK/MoltenVK/API/mvk_config.h index 360007e1..532d27db 100644 --- a/MoltenVK/MoltenVK/API/mvk_config.h +++ b/MoltenVK/MoltenVK/API/mvk_config.h @@ -51,7 +51,7 @@ extern "C" { #define MVK_VERSION MVK_MAKE_VERSION(MVK_VERSION_MAJOR, MVK_VERSION_MINOR, MVK_VERSION_PATCH) -#define MVK_CONFIGURATION_API_VERSION 37 +#define MVK_CONFIGURATION_API_VERSION 38 /** Identifies the level of logging MoltenVK should be limited to outputting. */ typedef enum MVKConfigLogLevel { @@ -138,10 +138,11 @@ typedef enum MVKConfigCompressionAlgorithm { /** Identifies the style of activity performance logging to use. */ typedef enum MVKConfigActivityPerformanceLoggingStyle { - MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_FRAME_COUNT = 0, /**< Repeatedly log performance after a configured number of frames. */ - MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_IMMEDIATE = 1, /**< Log immediately after each performance measurement. */ - MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_DEVICE_LIFETIME = 2, /**< Log at the end of the VkDevice lifetime. This is useful for one-shot apps such as testing frameworks. */ - MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_MAX_ENUM = 0x7FFFFFFF, + MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_FRAME_COUNT = 0, /**< Repeatedly log performance after a configured number of frames. */ + MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_IMMEDIATE = 1, /**< Log immediately after each performance measurement. */ + MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_DEVICE_LIFETIME = 2, /**< Log at the end of the VkDevice lifetime. This is useful for one-shot apps such as testing frameworks. */ + MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_DEVICE_LIFETIME_ACCUMULATE = 3, /**< Log at the end of the VkDevice lifetime, but continue to accumulate across mulitiple VkDevices throughout the app process. This is useful for testing frameworks that create many VkDevices serially. */ + MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_MAX_ENUM = 0x7FFFFFFF, } MVKConfigActivityPerformanceLoggingStyle; /** @@ -786,6 +787,8 @@ typedef struct { /** * Controls when MoltenVK should log activity performance events. * + * The performanceTracking parameter must also be enabled. + * * The value of this parameter must be changed before creating a VkDevice, * for the change to take effect. * diff --git a/MoltenVK/MoltenVK/API/mvk_private_api.h b/MoltenVK/MoltenVK/API/mvk_private_api.h index 87bc8ad9..e496fc5a 100644 --- a/MoltenVK/MoltenVK/API/mvk_private_api.h +++ b/MoltenVK/MoltenVK/API/mvk_private_api.h @@ -44,7 +44,7 @@ typedef unsigned long MTLArgumentBuffersTier; */ -#define MVK_PRIVATE_API_VERSION 37 +#define MVK_PRIVATE_API_VERSION 38 /** Identifies the type of rounding Metal uses for float to integer conversions in particular calculatons. */ @@ -153,13 +153,16 @@ typedef struct { VkDeviceSize hostMemoryPageSize; /**< The size of a page of host memory on this platform. */ } MVKPhysicalDeviceMetalFeatures; -/** MoltenVK performance of a particular type of activity. */ +/** + * MoltenVK performance of a particular type of activity. + * Durations are recorded in milliseconds. Memory sizes are recorded in kilobytes. + */ typedef struct { - uint32_t count; /**< The number of activities of this type. */ - double latestDuration; /**< The latest (most recent) duration of the activity, in milliseconds. */ - double averageDuration; /**< The average duration of the activity, in milliseconds. */ - double minimumDuration; /**< The minimum duration of the activity, in milliseconds. */ - double maximumDuration; /**< The maximum duration of the activity, in milliseconds. */ + uint32_t count; /**< The number of activities of this type. */ + double latest; /**< The latest (most recent) value of the activity. */ + double average; /**< The average value of the activity. */ + double minimum; /**< The minimum value of the activity. */ + double maximum; /**< The maximum value of the activity. */ } MVKPerformanceTracker; /** MoltenVK performance of shader compilation activities. */ @@ -186,12 +189,20 @@ typedef struct { /** MoltenVK performance of queue activities. */ typedef struct { - MVKPerformanceTracker mtlQueueAccess; /** Create an MTLCommandQueue or access an existing cached instance. */ - MVKPerformanceTracker mtlCommandBufferCompletion; /** Completion of a MTLCommandBuffer on the GPU, from commit to completion callback. */ - MVKPerformanceTracker nextCAMetalDrawable; /** Retrieve next CAMetalDrawable from CAMetalLayer during presentation. */ - MVKPerformanceTracker frameInterval; /** Frame presentation interval (1000/FPS). */ + MVKPerformanceTracker retrieveMTLCommandBuffer; /** Retrieve a MTLCommandBuffer from a MTLQueue. */ + MVKPerformanceTracker commandBufferEncoding; /** Encode a single VkCommandBuffer to a MTLCommandBuffer (excludes MTLCommandBuffer encoding from configured immediate prefilling). */ + MVKPerformanceTracker submitCommandBuffers; /** Submit and encode all VkCommandBuffers in a vkQueueSubmit() operation to MTLCommandBuffers (including both prefilled and deferred encoding). */ + MVKPerformanceTracker mtlCommandBufferExecution; /** Execute a MTLCommandBuffer on the GPU, from commit to completion callback. */ + MVKPerformanceTracker retrieveCAMetalDrawable; /** Retrieve next CAMetalDrawable from a CAMetalLayer. */ + MVKPerformanceTracker presentSwapchains; /** Present the swapchains in a vkQueuePresentKHR() on the GPU, from commit to presentation callback. */ + MVKPerformanceTracker frameInterval; /** Frame presentation interval (1000/FPS). */ } MVKQueuePerformance; +/** MoltenVK performance of device activities. */ +typedef struct { + MVKPerformanceTracker gpuMemoryAllocated; /** GPU memory allocated (in KB). */ +} MVKDevicePerformance; + /** * MoltenVK performance. You can retrieve a copy of this structure using the vkGetPerformanceStatisticsMVK() function. * @@ -209,6 +220,7 @@ typedef struct { MVKShaderCompilationPerformance shaderCompilation; /** Shader compilations activities. */ MVKPipelineCachePerformance pipelineCache; /** Pipeline cache activities. */ MVKQueuePerformance queue; /** Queue activities. */ + MVKDevicePerformance device; /** Device activities. */ } MVKPerformanceStatistics; diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h index 76274dad..59242aff 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h +++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h @@ -536,9 +536,6 @@ protected: #pragma mark - #pragma mark Support functions -/** Returns a name, suitable for use as a MTLCommandBuffer label, based on the MVKCommandUse. */ -NSString* mvkMTLCommandBufferLabel(MVKCommandUse cmdUse); - /** Returns a name, suitable for use as a MTLRenderCommandEncoder label, based on the MVKCommandUse. */ NSString* mvkMTLRenderCommandEncoderLabel(MVKCommandUse cmdUse); diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm index 5edc13aa..4e0af414 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm @@ -120,7 +120,7 @@ VkResult MVKCommandBuffer::begin(const VkCommandBufferBeginInfo* pBeginInfo) { if(_device->shouldPrefillMTLCommandBuffers() && !(_isSecondary || _supportsConcurrentExecution)) { @autoreleasepool { - _prefilledMTLCmdBuffer = [_commandPool->getMTLCommandBuffer(0) retain]; // retained + _prefilledMTLCmdBuffer = [_commandPool->getMTLCommandBuffer(kMVKCommandUseBeginCommandBuffer, 0) retain]; // retained auto prefillStyle = mvkConfig().prefillMetalCommandBuffers; if (prefillStyle == MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS_STYLE_IMMEDIATE_ENCODING || prefillStyle == MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS_STYLE_IMMEDIATE_ENCODING_NO_AUTORELEASE ) { @@ -335,11 +335,19 @@ void MVKCommandBuffer::recordBindPipeline(MVKCmdBindPipeline* mvkBindPipeline) { #pragma mark - #pragma mark MVKCommandEncoder +// Activity performance tracking is put here to deliberately exclude when +// MVKConfiguration::prefillMetalCommandBuffers is set to immediate prefilling, +// because that would include app time between command submissions. void MVKCommandEncoder::encode(id mtlCmdBuff, MVKCommandEncodingContext* pEncodingContext) { + MVKDevice* mvkDev = getDevice(); + uint64_t startTime = mvkDev->getPerformanceTimestamp(); + beginEncoding(mtlCmdBuff, pEncodingContext); encodeCommands(_cmdBuffer->_head); endEncoding(); + + mvkDev->addActivityPerformance(mvkDev->_performanceStatistics.queue.commandBufferEncoding, startTime); } void MVKCommandEncoder::beginEncoding(id mtlCmdBuff, MVKCommandEncodingContext* pEncodingContext) { @@ -1169,19 +1177,6 @@ MVKCommandEncoder::~MVKCommandEncoder() { #pragma mark - #pragma mark Support functions -NSString* mvkMTLCommandBufferLabel(MVKCommandUse cmdUse) { - switch (cmdUse) { - case kMVKCommandUseEndCommandBuffer: return @"vkEndCommandBuffer (Prefilled) CommandBuffer"; - case kMVKCommandUseQueueSubmit: return @"vkQueueSubmit CommandBuffer"; - case kMVKCommandUseQueuePresent: return @"vkQueuePresentKHR CommandBuffer"; - case kMVKCommandUseQueueWaitIdle: return @"vkQueueWaitIdle CommandBuffer"; - case kMVKCommandUseDeviceWaitIdle: return @"vkDeviceWaitIdle CommandBuffer"; - case kMVKCommandUseAcquireNextImage: return @"vkAcquireNextImageKHR CommandBuffer"; - case kMVKCommandUseInvalidateMappedMemoryRanges: return @"vkInvalidateMappedMemoryRanges CommandBuffer"; - default: return @"Unknown Use CommandBuffer"; - } -} - NSString* mvkMTLRenderCommandEncoderLabel(MVKCommandUse cmdUse) { switch (cmdUse) { case kMVKCommandUseBeginRendering: return @"vkCmdBeginRendering RenderEncoder"; diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandPool.h b/MoltenVK/MoltenVK/Commands/MVKCommandPool.h index f2cf1e66..a6b1a38b 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandPool.h +++ b/MoltenVK/MoltenVK/Commands/MVKCommandPool.h @@ -82,7 +82,7 @@ public: * Returns a retained MTLCommandBuffer created from the indexed queue * within the queue family for which this command pool was created. */ - id getMTLCommandBuffer(uint32_t queueIndex); + id getMTLCommandBuffer(MVKCommandUse cmdUse, uint32_t queueIndex); /** Release any held but unused memory back to the system. */ void trim(); diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandPool.mm b/MoltenVK/MoltenVK/Commands/MVKCommandPool.mm index be4713f3..656740b0 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandPool.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandPool.mm @@ -77,8 +77,8 @@ void MVKCommandPool::freeCommandBuffers(uint32_t commandBufferCount, } } -id MVKCommandPool::getMTLCommandBuffer(uint32_t queueIndex) { - return _device->getQueue(_queueFamilyIndex, queueIndex)->getMTLCommandBuffer(kMVKCommandUseEndCommandBuffer, true); +id MVKCommandPool::getMTLCommandBuffer(MVKCommandUse cmdUse, uint32_t queueIndex) { + return _device->getQueue(_queueFamilyIndex, queueIndex)->getMTLCommandBuffer(cmdUse, true); } // Clear the command type pool member variables. diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h index 450fad66..5d4c328b 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h @@ -53,7 +53,6 @@ class MVKSemaphore; class MVKTimelineSemaphore; class MVKDeferredOperation; class MVKEvent; -class MVKSemaphoreImpl; class MVKQueryPool; class MVKShaderModule; class MVKPipelineCache; @@ -440,6 +439,11 @@ protected: #pragma mark - #pragma mark MVKDevice +typedef enum { + MVKActivityPerformanceValueTypeDuration, + MVKActivityPerformanceValueTypeByteCount, +} MVKActivityPerformanceValueType; + typedef struct MVKMTLBlitEncoder { id mtlBlitEncoder = nil; id mtlCmdBuffer = nil; @@ -704,13 +708,17 @@ public: void addActivityPerformance(MVKPerformanceTracker& activityTracker, uint64_t startTime, uint64_t endTime = 0) { if (_isPerformanceTracking) { - updateActivityPerformance(activityTracker, startTime, endTime); + updateActivityPerformance(activityTracker, mvkGetElapsedMilliseconds(startTime, endTime)); + } + }; - // Log call not locked. Very minor chance that the tracker data will be updated during log call, - // resulting in an inconsistent report. Not worth taking lock perf hit for rare inline reporting. - if (_activityPerformanceLoggingStyle == MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_IMMEDIATE) { - logActivityPerformance(activityTracker, _performanceStatistics, true); - } + /** + * If performance is being tracked, adds the performance for an activity + * with a kilobyte count, to the given performance statistics. + */ + void addActivityByteCount(MVKPerformanceTracker& activityTracker, uint64_t byteCount) { + if (_isPerformanceTracking) { + updateActivityPerformance(activityTracker, double(byteCount / KIBI)); } }; @@ -885,8 +893,11 @@ protected: template void enableFeatures(S* pRequested, VkBool32* pEnabledBools, const VkBool32* pRequestedBools, const VkBool32* pAvailableBools, uint32_t count); void enableExtensions(const VkDeviceCreateInfo* pCreateInfo); const char* getActivityPerformanceDescription(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats); - void logActivityPerformance(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats, bool isInline = false); - void updateActivityPerformance(MVKPerformanceTracker& activity, uint64_t startTime, uint64_t endTime); + MVKActivityPerformanceValueType getActivityPerformanceValueType(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats); + void logActivityInline(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats); + void logActivityDuration(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats, bool isInline = false); + void logActivityByteCount(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats, bool isInline = false); + void updateActivityPerformance(MVKPerformanceTracker& activity, double currentValue); void getDescriptorVariableDescriptorCountLayoutSupport(const VkDescriptorSetLayoutCreateInfo* pCreateInfo, VkDescriptorSetLayoutSupport* pSupport, VkDescriptorSetVariableDescriptorCountLayoutSupport* pVarDescSetCountSupport); @@ -908,7 +919,6 @@ protected: id _defaultMTLSamplerState = nil; id _dummyBlitMTLBuffer = nil; uint32_t _globalVisibilityQueryCount = 0; - MVKConfigActivityPerformanceLoggingStyle _activityPerformanceLoggingStyle = MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_FRAME_COUNT; bool _isPerformanceTracking = false; bool _isCurrentlyAutoGPUCapturing = false; bool _isUsingMetalArgumentBuffers = false; @@ -1056,6 +1066,15 @@ protected: #pragma mark - #pragma mark Support functions +/** + * Returns an autoreleased array containing the MTLDevices available on this system, + * sorted according to power, with higher power GPU's at the front of the array. + * This ensures that a lazy app that simply grabs the first GPU will get a high-power + * one by default. If MVKConfiguration::forceLowPowerGPU is enabled, the returned + * array will only include low-power devices. + */ +NSArray>* mvkGetAvailableMTLDevicesArray(); + /** Returns the registry ID of the specified device, or zero if the device does not have a registry ID. */ uint64_t mvkGetRegistryID(id mtlDevice); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm index 0b11d4df..d44e6492 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm @@ -3056,32 +3056,23 @@ uint64_t MVKPhysicalDevice::getVRAMSize() { } } +// If possible, retrieve from the MTLDevice, otherwise from available memory size, or a fixed conservative estimate. uint64_t MVKPhysicalDevice::getRecommendedMaxWorkingSetSize() { -#if MVK_MACOS +#if MVK_XCODE_14 || MVK_MACOS if ( [_mtlDevice respondsToSelector: @selector(recommendedMaxWorkingSetSize)]) { return _mtlDevice.recommendedMaxWorkingSetSize; } #endif -#if MVK_IOS_OR_TVOS - // GPU and CPU use shared memory. Estimate the current free memory in the system. uint64_t freeMem = mvkGetAvailableMemorySize(); - if (freeMem) { return freeMem; } -#endif - - return 128 * MEBI; // Conservative minimum for macOS GPU's & iOS shared memory + return freeMem ? freeMem : 256 * MEBI; } +// If possible, retrieve from the MTLDevice, otherwise use the current memory used by this process. uint64_t MVKPhysicalDevice::getCurrentAllocatedSize() { if ( [_mtlDevice respondsToSelector: @selector(currentAllocatedSize)] ) { return _mtlDevice.currentAllocatedSize; } -#if MVK_IOS_OR_TVOS - // We can use the current memory used by this process as a reasonable approximation. return mvkGetUsedMemorySize(); -#endif -#if MVK_MACOS - return 0; -#endif } // When using argument buffers, Metal imposes a hard limit on the number of MTLSamplerState @@ -3249,31 +3240,14 @@ bool MVKPhysicalDevice::needsCounterSetRetained() { } void MVKPhysicalDevice::logGPUInfo() { - string devTypeStr; - switch (_properties.deviceType) { - case VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU: - devTypeStr = "Discrete"; - break; - case VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU: - devTypeStr = "Integrated"; - break; - case VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU: - devTypeStr = "Virtual"; - break; - case VK_PHYSICAL_DEVICE_TYPE_CPU: - devTypeStr = "CPU Emulation"; - break; - default: - devTypeStr = "Unknown"; - break; - } - string logMsg = "GPU device:"; logMsg += "\n\t\tmodel: %s"; logMsg += "\n\t\ttype: %s"; logMsg += "\n\t\tvendorID: %#06x"; logMsg += "\n\t\tdeviceID: %#06x"; logMsg += "\n\t\tpipelineCacheUUID: %s"; + logMsg += "\n\t\tGPU memory available: %llu MB"; + logMsg += "\n\t\tGPU memory used: %llu MB"; logMsg += "\n\tsupports the following Metal Versions, GPU's and Feature Sets:"; logMsg += "\n\t\tMetal Shading Language %s"; @@ -3356,9 +3330,29 @@ void MVKPhysicalDevice::logGPUInfo() { } #endif + string devTypeStr; + switch (_properties.deviceType) { + case VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU: + devTypeStr = "Discrete"; + break; + case VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU: + devTypeStr = "Integrated"; + break; + case VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU: + devTypeStr = "Virtual"; + break; + case VK_PHYSICAL_DEVICE_TYPE_CPU: + devTypeStr = "CPU Emulation"; + break; + default: + devTypeStr = "Unknown"; + break; + } + NSUUID* nsUUID = [[NSUUID alloc] initWithUUIDBytes: _properties.pipelineCacheUUID]; // temp retain - MVKLogInfo(logMsg.c_str(), _properties.deviceName, devTypeStr.c_str(), + MVKLogInfo(logMsg.c_str(), getName(), devTypeStr.c_str(), _properties.vendorID, _properties.deviceID, nsUUID.UUIDString.UTF8String, + getRecommendedMaxWorkingSetSize() / MEBI, getCurrentAllocatedSize() / MEBI, SPIRVToMSLConversionOptions::printMSLVersion(_metalFeatures.mslVersion).c_str()); [nsUUID release]; // temp release } @@ -3366,7 +3360,11 @@ void MVKPhysicalDevice::logGPUInfo() { MVKPhysicalDevice::~MVKPhysicalDevice() { mvkDestroyContainerContents(_queueFamilies); [_timestampMTLCounterSet release]; + + uint64_t memUsed = getCurrentAllocatedSize(); // Retrieve before releasing MTLDevice [_mtlDevice release]; + + MVKLogInfo("Destroyed VkPhysicalDevice for GPU %s with %llu MB of GPU memory still allocated.", getName(), memUsed / MEBI); } @@ -4185,30 +4183,52 @@ void MVKDevice::applyMemoryBarrier(VkPipelineStageFlags srcStageMask, } } -void MVKDevice::updateActivityPerformance(MVKPerformanceTracker& activity, - uint64_t startTime, uint64_t endTime) { - - double currInterval = mvkGetElapsedMilliseconds(startTime, endTime); +void MVKDevice::updateActivityPerformance(MVKPerformanceTracker& activity, double currentValue) { lock_guard lock(_perfLock); - activity.latestDuration = currInterval; - activity.minimumDuration = ((activity.minimumDuration == 0.0) - ? currInterval : - min(currInterval, activity.minimumDuration)); - activity.maximumDuration = max(currInterval, activity.maximumDuration); - double totalInterval = (activity.averageDuration * activity.count++) + currInterval; - activity.averageDuration = totalInterval / activity.count; + activity.latest = currentValue; + activity.minimum = ((activity.minimum == 0.0) + ? currentValue : + min(currentValue, activity.minimum)); + activity.maximum = max(currentValue, activity.maximum); + double total = (activity.average * activity.count++) + currentValue; + activity.average = total / activity.count; + + if (mvkConfig().activityPerformanceLoggingStyle == MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_IMMEDIATE) { + logActivityInline(activity, _performanceStatistics); + } } -void MVKDevice::logActivityPerformance(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats, bool isInline) { - MVKLogInfo("%s%s%s avg: %.3f ms, latest: %.3f ms, min: %.3f ms, max: %.3f ms, count: %d", - (isInline ? "" : " "), +void MVKDevice::logActivityInline(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats) { + if (getActivityPerformanceValueType(activity, _performanceStatistics) == MVKActivityPerformanceValueTypeByteCount) { + logActivityByteCount(activity, _performanceStatistics, true); + } else { + logActivityDuration(activity, _performanceStatistics, true); + } +} +void MVKDevice::logActivityDuration(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats, bool isInline) { + const char* fmt = (isInline + ? "%s performance avg: %.3f ms, latest: %.3f ms, min: %.3f ms, max: %.3f ms, count: %d" + : " %-45s avg: %.3f ms, latest: %.3f ms, min: %.3f ms, max: %.3f ms, count: %d"); + MVKLogInfo(fmt, getActivityPerformanceDescription(activity, perfStats), - (isInline ? " performance" : ""), - activity.averageDuration, - activity.latestDuration, - activity.minimumDuration, - activity.maximumDuration, + activity.average, + activity.latest, + activity.minimum, + activity.maximum, + activity.count); +} + +void MVKDevice::logActivityByteCount(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats, bool isInline) { + const char* fmt = (isInline + ? "%s avg: %5llu MB, latest: %5llu MB, min: %5llu MB, max: %5llu MB, count: %d" + : " %-45s avg: %5llu MB, latest: %5llu MB, min: %5llu MB, max: %5llu MB, count: %d"); + MVKLogInfo(fmt, + getActivityPerformanceDescription(activity, perfStats), + uint64_t(activity.average) / KIBI, + uint64_t(activity.latest) / KIBI, + uint64_t(activity.minimum) / KIBI, + uint64_t(activity.maximum) / KIBI, activity.count); } @@ -4218,49 +4238,71 @@ void MVKDevice::logPerformanceSummary() { MVKPerformanceStatistics perfStats; getPerformanceStatistics(&perfStats); - logActivityPerformance(perfStats.queue.frameInterval, perfStats); - logActivityPerformance(perfStats.queue.nextCAMetalDrawable, perfStats); - logActivityPerformance(perfStats.queue.mtlCommandBufferCompletion, perfStats); - logActivityPerformance(perfStats.queue.mtlQueueAccess, perfStats); - logActivityPerformance(perfStats.shaderCompilation.hashShaderCode, perfStats); - logActivityPerformance(perfStats.shaderCompilation.spirvToMSL, perfStats); - logActivityPerformance(perfStats.shaderCompilation.mslCompile, perfStats); - logActivityPerformance(perfStats.shaderCompilation.mslLoad, perfStats); - logActivityPerformance(perfStats.shaderCompilation.mslCompress, perfStats); - logActivityPerformance(perfStats.shaderCompilation.mslDecompress, perfStats); - logActivityPerformance(perfStats.shaderCompilation.shaderLibraryFromCache, perfStats); - logActivityPerformance(perfStats.shaderCompilation.functionRetrieval, perfStats); - logActivityPerformance(perfStats.shaderCompilation.functionSpecialization, perfStats); - logActivityPerformance(perfStats.shaderCompilation.pipelineCompile, perfStats); - logActivityPerformance(perfStats.pipelineCache.sizePipelineCache, perfStats); - logActivityPerformance(perfStats.pipelineCache.readPipelineCache, perfStats); - logActivityPerformance(perfStats.pipelineCache.writePipelineCache, perfStats); +#define logDuration(s) logActivityDuration(perfStats.s, perfStats) +#define logByteCount(s) logActivityByteCount(perfStats.s, perfStats) + + logDuration(queue.frameInterval); + logDuration(queue.retrieveMTLCommandBuffer); + logDuration(queue.commandBufferEncoding); + logDuration(queue.submitCommandBuffers); + logDuration(queue.mtlCommandBufferExecution); + logDuration(queue.retrieveCAMetalDrawable); + logDuration(queue.presentSwapchains); + logDuration(shaderCompilation.hashShaderCode); + logDuration(shaderCompilation.spirvToMSL); + logDuration(shaderCompilation.mslCompile); + logDuration(shaderCompilation.mslLoad); + logDuration(shaderCompilation.mslCompress); + logDuration(shaderCompilation.mslDecompress); + logDuration(shaderCompilation.shaderLibraryFromCache); + logDuration(shaderCompilation.functionRetrieval); + logDuration(shaderCompilation.functionSpecialization); + logDuration(shaderCompilation.pipelineCompile); + logDuration(pipelineCache.sizePipelineCache); + logDuration(pipelineCache.readPipelineCache); + logDuration(pipelineCache.writePipelineCache); + logByteCount(device.gpuMemoryAllocated); +#undef logDuration +#undef logByteCount } const char* MVKDevice::getActivityPerformanceDescription(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats) { - if (&activity == &perfStats.shaderCompilation.hashShaderCode) { return "Hash shader SPIR-V code"; } - if (&activity == &perfStats.shaderCompilation.spirvToMSL) { return "Convert SPIR-V to MSL source code"; } - if (&activity == &perfStats.shaderCompilation.mslCompile) { return "Compile MSL source code into a MTLLibrary"; } - if (&activity == &perfStats.shaderCompilation.mslLoad) { return "Load pre-compiled MSL code into a MTLLibrary"; } - if (&activity == &perfStats.shaderCompilation.mslCompress) { return "Compress MSL source code after compiling a MTLLibrary"; } - if (&activity == &perfStats.shaderCompilation.mslDecompress) { return "Decompress MSL source code during pipeline cache write"; } - if (&activity == &perfStats.shaderCompilation.shaderLibraryFromCache) { return "Retrieve shader library from the cache"; } - if (&activity == &perfStats.shaderCompilation.functionRetrieval) { return "Retrieve a MTLFunction from a MTLLibrary"; } - if (&activity == &perfStats.shaderCompilation.functionSpecialization) { return "Specialize a retrieved MTLFunction"; } - if (&activity == &perfStats.shaderCompilation.pipelineCompile) { return "Compile MTLFunctions into a pipeline"; } - if (&activity == &perfStats.pipelineCache.sizePipelineCache) { return "Calculate cache size required to write MSL to pipeline cache"; } - if (&activity == &perfStats.pipelineCache.readPipelineCache) { return "Read MSL from pipeline cache"; } - if (&activity == &perfStats.pipelineCache.writePipelineCache) { return "Write MSL to pipeline cache"; } - if (&activity == &perfStats.queue.mtlQueueAccess) { return "Access MTLCommandQueue"; } - if (&activity == &perfStats.queue.mtlCommandBufferCompletion) { return "Complete MTLCommandBuffer"; } - if (&activity == &perfStats.queue.nextCAMetalDrawable) { return "Retrieve a CAMetalDrawable from CAMetalLayer"; } - if (&activity == &perfStats.queue.frameInterval) { return "Frame interval"; } - return "Unknown performance activity"; +#define ifActivityReturnName(s, n) if (&activity == &perfStats.s) return n + ifActivityReturnName(shaderCompilation.hashShaderCode, "Hash shader SPIR-V code"); + ifActivityReturnName(shaderCompilation.spirvToMSL, "Convert SPIR-V to MSL source code"); + ifActivityReturnName(shaderCompilation.mslCompile, "Compile MSL into a MTLLibrary"); + ifActivityReturnName(shaderCompilation.mslLoad, "Load pre-compiled MSL into a MTLLibrary"); + ifActivityReturnName(shaderCompilation.mslCompress, "Compress MSL after compiling a MTLLibrary"); + ifActivityReturnName(shaderCompilation.mslDecompress, "Decompress MSL for pipeline cache write"); + ifActivityReturnName(shaderCompilation.shaderLibraryFromCache, "Retrieve shader library from the cache"); + ifActivityReturnName(shaderCompilation.functionRetrieval, "Retrieve a MTLFunction from a MTLLibrary"); + ifActivityReturnName(shaderCompilation.functionSpecialization, "Specialize a retrieved MTLFunction"); + ifActivityReturnName(shaderCompilation.pipelineCompile, "Compile MTLFunctions into a pipeline"); + ifActivityReturnName(pipelineCache.sizePipelineCache, "Calculate pipeline cache size"); + ifActivityReturnName(pipelineCache.readPipelineCache, "Read MSL from pipeline cache"); + ifActivityReturnName(pipelineCache.writePipelineCache, "Write MSL to pipeline cache"); + ifActivityReturnName(queue.retrieveMTLCommandBuffer, "Retrieve a MTLCommandBuffer"); + ifActivityReturnName(queue.commandBufferEncoding, "Encode VkCommandBuffer to MTLCommandBuffer"); + ifActivityReturnName(queue.submitCommandBuffers, "vkQueueSubmit() encoding to MTLCommandBuffers"); + ifActivityReturnName(queue.mtlCommandBufferExecution, "Execute a MTLCommandBuffer on GPU"); + ifActivityReturnName(queue.retrieveCAMetalDrawable, "Retrieve a CAMetalDrawable"); + ifActivityReturnName(queue.presentSwapchains, "Present swapchains in on GPU"); + ifActivityReturnName(queue.frameInterval, "Frame interval"); + ifActivityReturnName(device.gpuMemoryAllocated, "GPU memory allocated"); + return "Unknown performance activity"; +#undef ifActivityReturnName +} + +MVKActivityPerformanceValueType MVKDevice::getActivityPerformanceValueType(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats) { + if (&activity == &perfStats.device.gpuMemoryAllocated) return MVKActivityPerformanceValueTypeByteCount; + return MVKActivityPerformanceValueTypeDuration; } void MVKDevice::getPerformanceStatistics(MVKPerformanceStatistics* pPerf) { - lock_guard lock(_perfLock); + addActivityByteCount(_performanceStatistics.device.gpuMemoryAllocated, + _physicalDevice->getCurrentAllocatedSize()); + lock_guard lock(_perfLock); if (pPerf) { *pPerf = _performanceStatistics; } } @@ -4597,33 +4639,15 @@ MVKDevice::MVKDevice(MVKPhysicalDevice* physicalDevice, const VkDeviceCreateInfo startAutoGPUCapture(MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_DEVICE, getMTLDevice()); MVKLogInfo("Created VkDevice to run on GPU %s with the following %d Vulkan extensions enabled:%s", - _pProperties->deviceName, - _enabledExtensions.getEnabledCount(), - _enabledExtensions.enabledNamesString("\n\t\t", true).c_str()); + getName(), _enabledExtensions.getEnabledCount(), _enabledExtensions.enabledNamesString("\n\t\t", true).c_str()); } +// Perf stats that last the duration of the app process. +static MVKPerformanceStatistics _processPerformanceStatistics = {}; + void MVKDevice::initPerformanceTracking() { - _isPerformanceTracking = mvkConfig().performanceTracking; - _activityPerformanceLoggingStyle = mvkConfig().activityPerformanceLoggingStyle; - - _performanceStatistics.shaderCompilation.hashShaderCode = {}; - _performanceStatistics.shaderCompilation.spirvToMSL = {}; - _performanceStatistics.shaderCompilation.mslCompile = {}; - _performanceStatistics.shaderCompilation.mslLoad = {}; - _performanceStatistics.shaderCompilation.mslCompress = {}; - _performanceStatistics.shaderCompilation.mslDecompress = {}; - _performanceStatistics.shaderCompilation.shaderLibraryFromCache = {}; - _performanceStatistics.shaderCompilation.functionRetrieval = {}; - _performanceStatistics.shaderCompilation.functionSpecialization = {}; - _performanceStatistics.shaderCompilation.pipelineCompile = {}; - _performanceStatistics.pipelineCache.sizePipelineCache = {}; - _performanceStatistics.pipelineCache.writePipelineCache = {}; - _performanceStatistics.pipelineCache.readPipelineCache = {}; - _performanceStatistics.queue.mtlQueueAccess = {}; - _performanceStatistics.queue.mtlCommandBufferCompletion = {}; - _performanceStatistics.queue.nextCAMetalDrawable = {}; - _performanceStatistics.queue.frameInterval = {}; + _performanceStatistics = _processPerformanceStatistics; } void MVKDevice::initPhysicalDevice(MVKPhysicalDevice* physicalDevice, const VkDeviceCreateInfo* pCreateInfo) { @@ -4920,9 +4944,16 @@ void MVKDevice::reservePrivateData(const VkDeviceCreateInfo* pCreateInfo) { } MVKDevice::~MVKDevice() { - if (_activityPerformanceLoggingStyle == MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_DEVICE_LIFETIME) { - MVKLogInfo("Device activity performance summary:"); - logPerformanceSummary(); + if (_isPerformanceTracking) { + auto perfLogStyle = mvkConfig().activityPerformanceLoggingStyle; + if (perfLogStyle == MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_DEVICE_LIFETIME) { + MVKLogInfo("Device activity performance summary:"); + logPerformanceSummary(); + } else if (perfLogStyle == MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_DEVICE_LIFETIME_ACCUMULATE) { + MVKLogInfo("Process activity performance summary:"); + logPerformanceSummary(); + _processPerformanceStatistics = _performanceStatistics; + } } for (auto& queues : _queuesByQueueFamilyIndex) { @@ -4938,12 +4969,58 @@ MVKDevice::~MVKDevice() { stopAutoGPUCapture(MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_DEVICE); mvkDestroyContainerContents(_privateDataSlots); + + MVKLogInfo("Destroyed VkDevice on GPU %s with %d Vulkan extensions enabled.", + getName(), _enabledExtensions.getEnabledCount()); } #pragma mark - #pragma mark Support functions +NSArray>* mvkGetAvailableMTLDevicesArray() { + NSMutableArray* mtlDevs = [NSMutableArray array]; // autoreleased + +#if MVK_MACOS + NSArray* rawMTLDevs = [MTLCopyAllDevices() autorelease]; + bool forceLowPower = mvkConfig().forceLowPowerGPU; + + // Populate the array of appropriate MTLDevices + for (id md in rawMTLDevs) { + if ( !forceLowPower || md.isLowPower ) { [mtlDevs addObject: md]; } + } + + // Sort by power + [mtlDevs sortUsingComparator: ^(id md1, id md2) { + BOOL md1IsLP = md1.isLowPower; + BOOL md2IsLP = md2.isLowPower; + + if (md1IsLP == md2IsLP) { + // If one device is headless and the other one is not, select the + // one that is not headless first. + BOOL md1IsHeadless = md1.isHeadless; + BOOL md2IsHeadless = md2.isHeadless; + if (md1IsHeadless == md2IsHeadless ) { + return NSOrderedSame; + } + return md2IsHeadless ? NSOrderedAscending : NSOrderedDescending; + } + + return md2IsLP ? NSOrderedAscending : NSOrderedDescending; + }]; + + // If the survey found at least one device, return the array. + if (mtlDevs.count) { return mtlDevs; } + +#endif // MVK_MACOS + + // For other OS's, or for macOS if the survey returned empty, use the default device. + id md = [MTLCreateSystemDefaultDevice() autorelease]; + if (md) { [mtlDevs addObject: md]; } + + return mtlDevs; // retained +} + uint64_t mvkGetRegistryID(id mtlDevice) { return [mtlDevice respondsToSelector: @selector(registryID)] ? mtlDevice.registryID : 0; } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.h b/MoltenVK/MoltenVK/GPUObjects/MVKImage.h index 572e8f06..fb7c3dfa 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.h @@ -31,6 +31,7 @@ class MVKImage; class MVKImageView; class MVKSwapchain; +class MVKQueue; class MVKCommandEncoder; @@ -409,8 +410,8 @@ protected: virtual id getCAMetalDrawable() = 0; void detachSwapchain(); + std::mutex _detachmentLock; MVKSwapchain* _swapchain; - std::mutex _swapchainLock; uint32_t _swapchainIndex; }; @@ -429,6 +430,7 @@ typedef struct MVKSwapchainImageAvailability { /** Presentation info. */ typedef struct { MVKPresentableSwapchainImage* presentableImage; + MVKQueue* queue; // The queue on which the vkQueuePresentKHR() command was executed. MVKFence* fence; // VK_EXT_swapchain_maintenance1 fence signaled when resources can be destroyed uint64_t desiredPresentTime; // VK_GOOGLE_display_timing desired presentation time in nanoseconds uint32_t presentID; // VK_GOOGLE_display_timing presentID @@ -454,12 +456,22 @@ public: /** Presents the contained drawable to the OS. */ void presentCAMetalDrawable(id mtlCmdBuff, MVKImagePresentInfo presentInfo); + /** Called when the presentation begins. */ + void beginPresentation(const MVKImagePresentInfo& presentInfo); + + /** Called via callback when the presentation completes. */ + void endPresentation(const MVKImagePresentInfo& presentInfo, uint64_t actualPresentTime = 0); + + /** If this image is stuck in-flight, attempt to force it to complete. */ + void forcePresentationCompletion(); #pragma mark Construction MVKPresentableSwapchainImage(MVKDevice* device, const VkImageCreateInfo* pCreateInfo, MVKSwapchain* swapchain, uint32_t swapchainIndex); + void destroy() override; + ~MVKPresentableSwapchainImage() override; protected: @@ -471,15 +483,14 @@ protected: MVKSwapchainImageAvailability getAvailability(); void makeAvailable(const MVKSwapchainSignaler& signaler); void makeAvailable(); - void acquireAndSignalWhenAvailable(MVKSemaphore* semaphore, MVKFence* fence); - void renderWatermark(id mtlCmdBuff); + VkResult acquireAndSignalWhenAvailable(MVKSemaphore* semaphore, MVKFence* fence); - id _mtlDrawable; - id _presentingMTLCmdBuff; + id _mtlDrawable = nil; MVKSwapchainImageAvailability _availability; MVKSmallVector _availabilitySignalers; - MVKSwapchainSignaler _preSignaler; + MVKSwapchainSignaler _preSignaler = {}; std::mutex _availabilityLock; + uint64_t _presentationStartTime = 0; }; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm index 14870221..fa87643a 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm @@ -19,6 +19,7 @@ #include "MVKImage.h" #include "MVKQueue.h" #include "MVKSwapchain.h" +#include "MVKSurface.h" #include "MVKCommandBuffer.h" #include "MVKCmdDebug.h" #include "MVKFoundation.h" @@ -1192,8 +1193,9 @@ MVKSwapchainImage::MVKSwapchainImage(MVKDevice* device, } void MVKSwapchainImage::detachSwapchain() { - lock_guard lock(_swapchainLock); + lock_guard lock(_detachmentLock); _swapchain = nullptr; + _device = nullptr; } void MVKSwapchainImage::destroy() { @@ -1245,7 +1247,7 @@ static void signalAndUnmarkAsTracked(const MVKSwapchainSignaler& signaler) { unmarkAsTracked(signaler); } -void MVKPresentableSwapchainImage::acquireAndSignalWhenAvailable(MVKSemaphore* semaphore, MVKFence* fence) { +VkResult MVKPresentableSwapchainImage::acquireAndSignalWhenAvailable(MVKSemaphore* semaphore, MVKFence* fence) { lock_guard lock(_availabilityLock); // Upon acquisition, update acquisition ID immediately, to move it to the back of the chain, @@ -1256,18 +1258,21 @@ void MVKPresentableSwapchainImage::acquireAndSignalWhenAvailable(MVKSemaphore* s // This is not done earlier so the texture is retained for any post-processing such as screen captures, etc. releaseMetalDrawable(); + VkResult rslt = VK_SUCCESS; auto signaler = MVKSwapchainSignaler{fence, semaphore, semaphore ? semaphore->deferSignal() : 0}; if (_availability.isAvailable) { _availability.isAvailable = false; - // If signalling through a MTLEvent, and there's no command buffer presenting me, use an ephemeral MTLCommandBuffer. + // If signalling through a MTLEvent, signal through an ephemeral MTLCommandBuffer. // Another option would be to use MTLSharedEvent in MVKSemaphore, but that might // impose unacceptable performance costs to handle this particular case. @autoreleasepool { MVKSemaphore* mvkSem = signaler.semaphore; - id mtlCmdBuff = (mvkSem && mvkSem->isUsingCommandEncoding() - ? _device->getAnyQueue()->getMTLCommandBuffer(kMVKCommandUseAcquireNextImage) - : nil); + id mtlCmdBuff = nil; + if (mvkSem && mvkSem->isUsingCommandEncoding()) { + mtlCmdBuff = _device->getAnyQueue()->getMTLCommandBuffer(kMVKCommandUseAcquireNextImage); + if ( !mtlCmdBuff ) { rslt = VK_ERROR_OUT_OF_POOL_MEMORY; } + } signal(signaler, mtlCmdBuff); [mtlCmdBuff commit]; } @@ -1277,17 +1282,20 @@ void MVKPresentableSwapchainImage::acquireAndSignalWhenAvailable(MVKSemaphore* s _availabilitySignalers.push_back(signaler); } markAsTracked(signaler); + + return rslt; } id MVKPresentableSwapchainImage::getCAMetalDrawable() { - while ( !_mtlDrawable ) { - @autoreleasepool { // Reclaim auto-released drawable object before end of loop - uint64_t startTime = _device->getPerformanceTimestamp(); - - _mtlDrawable = [_swapchain->_mtlLayer.nextDrawable retain]; - if ( !_mtlDrawable ) { MVKLogError("CAMetalDrawable could not be acquired."); } - - _device->addActivityPerformance(_device->_performanceStatistics.queue.nextCAMetalDrawable, startTime); + if ( !_mtlDrawable ) { + @autoreleasepool { + uint32_t attemptCnt = _swapchain->getImageCount() * 2; // Attempt a resonable number of times + for (uint32_t attemptIdx = 0; !_mtlDrawable && attemptIdx < attemptCnt; attemptIdx++) { + uint64_t startTime = _device->getPerformanceTimestamp(); + _mtlDrawable = [_swapchain->_surface->getCAMetalLayer().nextDrawable retain]; // retained + _device->addActivityPerformance(_device->_performanceStatistics.queue.retrieveCAMetalDrawable, startTime); + } + if ( !_mtlDrawable ) { reportError(VK_ERROR_OUT_OF_POOL_MEMORY, "CAMetalDrawable could not be acquired after %d attempts.", attemptCnt); } } } return _mtlDrawable; @@ -1299,22 +1307,20 @@ void MVKPresentableSwapchainImage::presentCAMetalDrawable(id m MVKImagePresentInfo presentInfo) { lock_guard lock(_availabilityLock); - _swapchain->willPresentSurface(getMTLTexture(0), mtlCmdBuff); + _swapchain->renderWatermark(getMTLTexture(0), mtlCmdBuff); // According to Apple, it is more performant to call MTLDrawable present from within a // MTLCommandBuffer scheduled-handler than it is to call MTLCommandBuffer presentDrawable:. // But get current drawable now, intead of in handler, because a new drawable might be acquired by then. // Attach present handler before presenting to avoid race condition. id mtlDrwbl = getCAMetalDrawable(); + addPresentedHandler(mtlDrwbl, presentInfo); [mtlCmdBuff addScheduledHandler: ^(id mcb) { // Try to do any present mode transitions as late as possible in an attempt // to avoid visual disruptions on any presents already on the queue. if (presentInfo.presentMode != VK_PRESENT_MODE_MAX_ENUM_KHR) { mtlDrwbl.layer.displaySyncEnabledMVK = (presentInfo.presentMode != VK_PRESENT_MODE_IMMEDIATE_KHR); } - if (presentInfo.hasPresentTime) { - addPresentedHandler(mtlDrwbl, presentInfo); - } if (presentInfo.desiredPresentTime) { [mtlDrwbl presentAtTime: (double)presentInfo.desiredPresentTime * 1.0e-9]; } else { @@ -1362,34 +1368,45 @@ void MVKPresentableSwapchainImage::presentCAMetalDrawable(id m // Pass MVKImagePresentInfo by value because it may not exist when the callback runs. void MVKPresentableSwapchainImage::addPresentedHandler(id mtlDrawable, MVKImagePresentInfo presentInfo) { + beginPresentation(presentInfo); + #if !MVK_OS_SIMULATOR if ([mtlDrawable respondsToSelector: @selector(addPresentedHandler:)]) { - retain(); // Ensure this image is not destroyed while awaiting presentation - [mtlDrawable addPresentedHandler: ^(id drawable) { - // Since we're in a callback, it's possible that the swapchain has been released by now. - // Lock the swapchain, and test if it is present before doing anything with it. - lock_guard cblock(_swapchainLock); - if (_swapchain) { _swapchain->recordPresentTime(presentInfo, drawable.presentedTime * 1.0e9); } - release(); + [mtlDrawable addPresentedHandler: ^(id mtlDrwbl) { + endPresentation(presentInfo, mtlDrwbl.presentedTime * 1.0e9); }]; - return; - } + } else #endif - - // If MTLDrawable.presentedTime/addPresentedHandler isn't supported, - // treat it as if the present happened when requested. - // Since this function may be called in a callback, it's possible that - // the swapchain has been released by the time this function runs. - // Lock the swapchain, and test if it is present before doing anything with it. - lock_guard lock(_swapchainLock); - if (_swapchain) {_swapchain->recordPresentTime(presentInfo); } + { + // If MTLDrawable.presentedTime/addPresentedHandler isn't supported, + // treat it as if the present happened when requested. + endPresentation(presentInfo); + } } -// Resets the MTLTexture and CAMetalDrawable underlying this image. +// Ensure this image and the swapchain are not destroyed while awaiting presentation +void MVKPresentableSwapchainImage::beginPresentation(const MVKImagePresentInfo& presentInfo) { + retain(); + _swapchain->beginPresentation(presentInfo); + presentInfo.queue->beginPresentation(presentInfo); + _presentationStartTime = getDevice()->getPerformanceTimestamp(); +} + +void MVKPresentableSwapchainImage::endPresentation(const MVKImagePresentInfo& presentInfo, + uint64_t actualPresentTime) { + { // Scope to avoid deadlock if release() is run within detachment lock + // If I have become detached from the swapchain, it means the swapchain, and possibly the + // VkDevice, have been destroyed by the time of this callback, so do not reference them. + lock_guard lock(_detachmentLock); + if (_device) { _device->addActivityPerformance(_device->_performanceStatistics.queue.presentSwapchains, _presentationStartTime); } + if (_swapchain) { _swapchain->endPresentation(presentInfo, actualPresentTime); } + } + presentInfo.queue->endPresentation(presentInfo); + release(); +} + +// Releases the CAMetalDrawable underlying this image. void MVKPresentableSwapchainImage::releaseMetalDrawable() { - for (uint8_t planeIndex = 0; planeIndex < _planes.size(); ++planeIndex) { - _planes[planeIndex]->releaseMTLTexture(); - } [_mtlDrawable release]; _mtlDrawable = nil; } @@ -1417,6 +1434,13 @@ void MVKPresentableSwapchainImage::makeAvailable() { } } +// Clear the existing CAMetalDrawable and retrieve and release a new transient one, +// in an attempt to trigger the existing CAMetalDrawable to complete it's callback. +void MVKPresentableSwapchainImage::forcePresentationCompletion() { + releaseMetalDrawable(); + if (_swapchain) { @autoreleasepool { [_swapchain->_surface->getCAMetalLayer() nextDrawable]; } } +} + #pragma mark Construction @@ -1426,11 +1450,14 @@ MVKPresentableSwapchainImage::MVKPresentableSwapchainImage(MVKDevice* device, uint32_t swapchainIndex) : MVKSwapchainImage(device, pCreateInfo, swapchain, swapchainIndex) { - _mtlDrawable = nil; - _availability.acquisitionID = _swapchain->getNextAcquisitionID(); _availability.isAvailable = true; - _preSignaler = MVKSwapchainSignaler{nullptr, nullptr, 0}; +} + + +void MVKPresentableSwapchainImage::destroy() { + forcePresentationCompletion(); + MVKSwapchainImage::destroy(); } // Unsignaled signalers will exist if this image is acquired more than it is presented. diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.h b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.h index 9e41ac71..aa831845 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.h @@ -179,7 +179,6 @@ protected: void propagateDebugName() override {} void initProcAddrs(); void initDebugCallbacks(const VkInstanceCreateInfo* pCreateInfo); - NSArray>* getAvailableMTLDevicesArray(); VkDebugReportFlagsEXT getVkDebugReportFlagsFromLogLevel(MVKConfigLogLevel logLevel); VkDebugUtilsMessageSeverityFlagBitsEXT getVkDebugUtilsMessageSeverityFlagBitsFromLogLevel(MVKConfigLogLevel logLevel); VkDebugUtilsMessageTypeFlagsEXT getVkDebugUtilsMessageTypesFlagBitsFromLogLevel(MVKConfigLogLevel logLevel); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm index 5c6e6cb9..e3c94135 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm @@ -238,94 +238,37 @@ void MVKInstance::debugReportMessage(MVKVulkanAPIObject* mvkAPIObj, MVKConfigLog VkDebugReportFlagsEXT MVKInstance::getVkDebugReportFlagsFromLogLevel(MVKConfigLogLevel logLevel) { switch (logLevel) { - case MVK_CONFIG_LOG_LEVEL_DEBUG: - return VK_DEBUG_REPORT_DEBUG_BIT_EXT; - case MVK_CONFIG_LOG_LEVEL_INFO: - return VK_DEBUG_REPORT_INFORMATION_BIT_EXT; - case MVK_CONFIG_LOG_LEVEL_WARNING: - return VK_DEBUG_REPORT_WARNING_BIT_EXT; - case MVK_CONFIG_LOG_LEVEL_ERROR: - default: - return VK_DEBUG_REPORT_ERROR_BIT_EXT; + case MVK_CONFIG_LOG_LEVEL_ERROR: return VK_DEBUG_REPORT_ERROR_BIT_EXT; + case MVK_CONFIG_LOG_LEVEL_WARNING: return VK_DEBUG_REPORT_WARNING_BIT_EXT; + case MVK_CONFIG_LOG_LEVEL_INFO: return VK_DEBUG_REPORT_INFORMATION_BIT_EXT; + case MVK_CONFIG_LOG_LEVEL_DEBUG: return VK_DEBUG_REPORT_DEBUG_BIT_EXT; + default: return VK_DEBUG_REPORT_ERROR_BIT_EXT; } } VkDebugUtilsMessageSeverityFlagBitsEXT MVKInstance::getVkDebugUtilsMessageSeverityFlagBitsFromLogLevel(MVKConfigLogLevel logLevel) { switch (logLevel) { - case MVK_CONFIG_LOG_LEVEL_DEBUG: - return VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT; - case MVK_CONFIG_LOG_LEVEL_INFO: - return VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT; - case MVK_CONFIG_LOG_LEVEL_WARNING: - return VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT; - case MVK_CONFIG_LOG_LEVEL_ERROR: - default: - return VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT; + case MVK_CONFIG_LOG_LEVEL_ERROR: return VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT; + case MVK_CONFIG_LOG_LEVEL_WARNING: return VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT; + case MVK_CONFIG_LOG_LEVEL_INFO: return VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT; + case MVK_CONFIG_LOG_LEVEL_DEBUG: return VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT; + default: return VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT; } } VkDebugUtilsMessageTypeFlagsEXT MVKInstance::getVkDebugUtilsMessageTypesFlagBitsFromLogLevel(MVKConfigLogLevel logLevel) { switch (logLevel) { - case MVK_CONFIG_LOG_LEVEL_DEBUG: - case MVK_CONFIG_LOG_LEVEL_INFO: - case MVK_CONFIG_LOG_LEVEL_WARNING: - return VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT; - case MVK_CONFIG_LOG_LEVEL_ERROR: - default: - return VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT; + case MVK_CONFIG_LOG_LEVEL_ERROR: return VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT; + case MVK_CONFIG_LOG_LEVEL_WARNING: return VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT; + case MVK_CONFIG_LOG_LEVEL_DEBUG: return VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT; + case MVK_CONFIG_LOG_LEVEL_INFO: return VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT; + default: return VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT; } } #pragma mark Object Creation -// Returns an autoreleased array containing the MTLDevices available on this system, sorted according -// to power, with higher power GPU's at the front of the array. This ensures that a lazy app that simply -// grabs the first GPU will get a high-power one by default. If MVKConfiguration::forceLowPowerGPU is set, -// the returned array will only include low-power devices. -NSArray>* MVKInstance::getAvailableMTLDevicesArray() { - NSMutableArray* mtlDevs = [NSMutableArray array]; - -#if MVK_MACOS - NSArray* rawMTLDevs = [MTLCopyAllDevices() autorelease]; - if (rawMTLDevs) { - bool forceLowPower = mvkConfig().forceLowPowerGPU; - - // Populate the array of appropriate MTLDevices - for (id md in rawMTLDevs) { - if ( !forceLowPower || md.isLowPower ) { [mtlDevs addObject: md]; } - } - - // Sort by power - [mtlDevs sortUsingComparator: ^(id md1, id md2) { - BOOL md1IsLP = md1.isLowPower; - BOOL md2IsLP = md2.isLowPower; - - if (md1IsLP == md2IsLP) { - // If one device is headless and the other one is not, select the - // one that is not headless first. - BOOL md1IsHeadless = md1.isHeadless; - BOOL md2IsHeadless = md2.isHeadless; - if (md1IsHeadless == md2IsHeadless ) { - return NSOrderedSame; - } - return md2IsHeadless ? NSOrderedAscending : NSOrderedDescending; - } - - return md2IsLP ? NSOrderedAscending : NSOrderedDescending; - }]; - - } -#endif // MVK_MACOS - -#if MVK_IOS_OR_TVOS - id md = [MTLCreateSystemDefaultDevice() autorelease]; - if (md) { [mtlDevs addObject: md]; } -#endif // MVK_IOS_OR_TVOS - - return mtlDevs; // retained -} - MVKInstance::MVKInstance(const VkInstanceCreateInfo* pCreateInfo) : _enabledExtensions(this) { initDebugCallbacks(pCreateInfo); // Do before any creation activities @@ -347,7 +290,7 @@ MVKInstance::MVKInstance(const VkInstanceCreateInfo* pCreateInfo) : _enabledExte // This effort creates a number of autoreleased instances of Metal // and other Obj-C classes, so wrap it all in an autorelease pool. @autoreleasepool { - NSArray>* mtlDevices = getAvailableMTLDevicesArray(); + NSArray>* mtlDevices = mvkGetAvailableMTLDevicesArray(); _physicalDevices.reserve(mtlDevices.count); for (id mtlDev in mtlDevices) { _physicalDevices.push_back(new MVKPhysicalDevice(this, mtlDev)); @@ -782,5 +725,9 @@ MVKInstance::~MVKInstance() { lock_guard lock(_dcbLock); mvkDestroyContainerContents(_debugReportCallbacks); + + MVKLogInfo("Destroyed VkInstance for Vulkan version %s with %d Vulkan extensions enabled.", + mvkGetVulkanVersionString(_appInfo.apiVersion).c_str(), + _enabledExtensions.getEnabledCount()); } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.mm b/MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.mm index 3044f66d..7e1c1a19 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.mm @@ -1482,26 +1482,21 @@ void MVKPixelFormats::addMTLVertexFormatCapabilities(id mtlDevice, } } -// If supporting a physical device, retrieve the MTLDevice from it, -// otherwise create a temp copy of the system default MTLDevice. +// If supporting a physical device, retrieve the MTLDevice from it, otherwise +// retrieve the array of physical GPU devices, and use the first one. +// Retrieving the GPUs creates a number of autoreleased instances of Metal +// and other Obj-C classes, so wrap it all in an autorelease pool. void MVKPixelFormats::modifyMTLFormatCapabilities() { if (_physicalDevice) { modifyMTLFormatCapabilities(_physicalDevice->getMTLDevice()); } else { -#if MVK_IOS_OR_TVOS - id mtlDevice = MTLCreateSystemDefaultDevice(); // temp retained -#endif -#if MVK_MACOS - NSArray>* mtlDevices = MTLCopyAllDevices(); // temp retained - id mtlDevice = [mtlDevices count] > 0 ? [mtlDevices[0] retain] : MTLCreateSystemDefaultDevice(); // temp retained - [mtlDevices release]; // temp release -#endif - modifyMTLFormatCapabilities(mtlDevice); - [mtlDevice release]; // release temp instance + @autoreleasepool { + auto* mtlDevs = mvkGetAvailableMTLDevicesArray(); + if (mtlDevs.count) { modifyMTLFormatCapabilities(mtlDevs[0]); } + } } } - // Mac Catalyst does not support feature sets, so we redefine them to GPU families in MVKDevice.h. #if MVK_MACCAT #define addFeatSetMTLPixFmtCaps(FEAT_SET, MTL_FMT, CAPS) \ diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h index bcefd2f3..0de3d2b8 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h @@ -86,6 +86,9 @@ public: /** Returns a pointer to the Vulkan instance. */ MVKInstance* getInstance() override { return _device->getInstance(); } + /** Return the name of this queue. */ + const std::string& getName() { return _name; } + #pragma mark Queue submissions /** Submits the specified command buffers to the queue. */ @@ -97,8 +100,11 @@ public: /** Block the current thread until this queue is idle. */ VkResult waitIdle(MVKCommandUse cmdUse); - /** Return the name of this queue. */ - const std::string& getName() { return _name; } + /** Mark the beginning of a swapchain image presentation. */ + void beginPresentation(const MVKImagePresentInfo& presentInfo); + + /** Mark the end of a swapchain image presentation. */ + void endPresentation(const MVKImagePresentInfo& presentInfo); #pragma mark Metal @@ -140,25 +146,29 @@ protected: void initName(); void initExecQueue(); void initMTLCommandQueue(); - void initGPUCaptureScopes(); void destroyExecQueue(); VkResult submit(MVKQueueSubmission* qSubmit); NSString* getMTLCommandBufferLabel(MVKCommandUse cmdUse); + void handleMTLCommandBufferError(id mtlCmdBuff); + void waitSwapchainPresentations(MVKCommandUse cmdUse); MVKQueueFamily* _queueFamily; - uint32_t _index; - float _priority; - dispatch_queue_t _execQueue; - id _mtlQueue; + MVKSemaphoreImpl _presentationCompletionBlocker; + std::unordered_map _presentedImages; std::string _name; - NSString* _mtlCmdBuffLabelEndCommandBuffer; - NSString* _mtlCmdBuffLabelQueueSubmit; - NSString* _mtlCmdBuffLabelQueuePresent; - NSString* _mtlCmdBuffLabelDeviceWaitIdle; - NSString* _mtlCmdBuffLabelQueueWaitIdle; - NSString* _mtlCmdBuffLabelAcquireNextImage; - NSString* _mtlCmdBuffLabelInvalidateMappedMemoryRanges; - MVKGPUCaptureScope* _submissionCaptureScope; + dispatch_queue_t _execQueue; + id _mtlQueue = nil; + NSString* _mtlCmdBuffLabelBeginCommandBuffer = nil; + NSString* _mtlCmdBuffLabelQueueSubmit = nil; + NSString* _mtlCmdBuffLabelQueuePresent = nil; + NSString* _mtlCmdBuffLabelDeviceWaitIdle = nil; + NSString* _mtlCmdBuffLabelQueueWaitIdle = nil; + NSString* _mtlCmdBuffLabelAcquireNextImage = nil; + NSString* _mtlCmdBuffLabelInvalidateMappedMemoryRanges = nil; + MVKGPUCaptureScope* _submissionCaptureScope = nil; + std::mutex _presentedImagesLock; + float _priority; + uint32_t _index; }; @@ -178,7 +188,7 @@ public: * * Upon completion of this function, no further calls should be made to this instance. */ - virtual void execute() = 0; + virtual VkResult execute() = 0; MVKQueueSubmission(MVKQueue* queue, uint32_t waitSemaphoreCount, @@ -190,6 +200,7 @@ protected: friend class MVKQueue; virtual void finish() = 0; + MVKDevice* getDevice() { return _queue->getDevice(); } MVKQueue* _queue; MVKSmallVector> _waitSemaphores; @@ -206,7 +217,7 @@ protected: class MVKQueueCommandBufferSubmission : public MVKQueueSubmission { public: - void execute() override; + VkResult execute() override; MVKQueueCommandBufferSubmission(MVKQueue* queue, const VkSubmitInfo* pSubmit, VkFence fence, MVKCommandUse cmdUse); @@ -217,7 +228,7 @@ protected: id getActiveMTLCommandBuffer(); void setActiveMTLCommandBuffer(id mtlCmdBuff); - void commitActiveMTLCommandBuffer(bool signalCompletion = false); + VkResult commitActiveMTLCommandBuffer(bool signalCompletion = false); void finish() override; virtual void submitCommandBuffers() {} @@ -238,20 +249,10 @@ template class MVKQueueFullCommandBufferSubmission : public MVKQueueCommandBufferSubmission { public: - MVKQueueFullCommandBufferSubmission(MVKQueue* queue, const VkSubmitInfo* pSubmit, VkFence fence) : - MVKQueueCommandBufferSubmission(queue, pSubmit, fence, kMVKCommandUseQueueSubmit) { - - // pSubmit can be null if just tracking the fence alone - if (pSubmit) { - uint32_t cbCnt = pSubmit->commandBufferCount; - _cmdBuffers.reserve(cbCnt); - for (uint32_t i = 0; i < cbCnt; i++) { - MVKCommandBuffer* cb = MVKCommandBuffer::getMVKCommandBuffer(pSubmit->pCommandBuffers[i]); - _cmdBuffers.push_back(cb); - setConfigurationResult(cb->getConfigurationResult()); - } - } - } + MVKQueueFullCommandBufferSubmission(MVKQueue* queue, + const VkSubmitInfo* pSubmit, + VkFence fence, + MVKCommandUse cmdUse); protected: void submitCommandBuffers() override; @@ -267,7 +268,7 @@ protected: class MVKQueuePresentSurfaceSubmission : public MVKQueueSubmission { public: - void execute() override; + VkResult execute() override; MVKQueuePresentSurfaceSubmission(MVKQueue* queue, const VkPresentInfoKHR* pPresentInfo); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm index 0ad14307..293f50ef 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm @@ -18,6 +18,7 @@ #include "MVKInstance.h" #include "MVKQueue.h" +#include "MVKSurface.h" #include "MVKSwapchain.h" #include "MVKSync.h" #include "MVKFoundation.h" @@ -68,7 +69,7 @@ void MVKQueue::propagateDebugName() { setLabelIfNotNil(_mtlQueue, _debugName); } // Execute the queue submission under an autoreleasepool to ensure transient Metal objects are autoreleased. // This is critical for apps that don't use standard OS autoreleasing runloop threading. -static inline void execute(MVKQueueSubmission* qSubmit) { @autoreleasepool { qSubmit->execute(); } } +static inline VkResult execute(MVKQueueSubmission* qSubmit) { @autoreleasepool { return qSubmit->execute(); } } // Executes the submmission, either immediately, or by dispatching to an execution queue. // Submissions to the execution queue are wrapped in a dedicated autoreleasepool. @@ -80,10 +81,12 @@ VkResult MVKQueue::submit(MVKQueueSubmission* qSubmit) { if ( !qSubmit ) { return VK_SUCCESS; } // Ignore nils VkResult rslt = qSubmit->getConfigurationResult(); // Extract result before submission to avoid race condition with early destruction - if (_execQueue) { - dispatch_async(_execQueue, ^{ execute(qSubmit); } ); - } else { - execute(qSubmit); + if (rslt == VK_SUCCESS) { + if (_execQueue) { + dispatch_async(_execQueue, ^{ execute(qSubmit); } ); + } else { + rslt = execute(qSubmit); + } } return rslt; } @@ -103,19 +106,19 @@ VkResult MVKQueue::submit(uint32_t submitCount, const VkSubmitInfo* pSubmits, Vk MVKQueueCommandBufferSubmission* mvkSub; uint32_t cbCnt = pVkSub->commandBufferCount; if (cbCnt <= 1) { - mvkSub = new MVKQueueFullCommandBufferSubmission<1>(this, pVkSub, fenceOrNil); + mvkSub = new MVKQueueFullCommandBufferSubmission<1>(this, pVkSub, fenceOrNil, cmdUse); } else if (cbCnt <= 16) { - mvkSub = new MVKQueueFullCommandBufferSubmission<16>(this, pVkSub, fenceOrNil); + mvkSub = new MVKQueueFullCommandBufferSubmission<16>(this, pVkSub, fenceOrNil, cmdUse); } else if (cbCnt <= 32) { - mvkSub = new MVKQueueFullCommandBufferSubmission<32>(this, pVkSub, fenceOrNil); + mvkSub = new MVKQueueFullCommandBufferSubmission<32>(this, pVkSub, fenceOrNil, cmdUse); } else if (cbCnt <= 64) { - mvkSub = new MVKQueueFullCommandBufferSubmission<64>(this, pVkSub, fenceOrNil); + mvkSub = new MVKQueueFullCommandBufferSubmission<64>(this, pVkSub, fenceOrNil, cmdUse); } else if (cbCnt <= 128) { - mvkSub = new MVKQueueFullCommandBufferSubmission<128>(this, pVkSub, fenceOrNil); + mvkSub = new MVKQueueFullCommandBufferSubmission<128>(this, pVkSub, fenceOrNil, cmdUse); } else if (cbCnt <= 256) { - mvkSub = new MVKQueueFullCommandBufferSubmission<256>(this, pVkSub, fenceOrNil); + mvkSub = new MVKQueueFullCommandBufferSubmission<256>(this, pVkSub, fenceOrNil, cmdUse); } else { - mvkSub = new MVKQueueFullCommandBufferSubmission<512>(this, pVkSub, fenceOrNil); + mvkSub = new MVKQueueFullCommandBufferSubmission<512>(this, pVkSub, fenceOrNil, cmdUse); } VkResult subRslt = submit(mvkSub); @@ -128,29 +131,62 @@ VkResult MVKQueue::submit(const VkPresentInfoKHR* pPresentInfo) { return submit(new MVKQueuePresentSurfaceSubmission(this, pPresentInfo)); } -// Create an empty submit struct and fence, submit to queue and wait on fence. VkResult MVKQueue::waitIdle(MVKCommandUse cmdUse) { - if (_device->getConfigurationResult() != VK_SUCCESS) { return _device->getConfigurationResult(); } + VkResult rslt = _device->getConfigurationResult(); + if (rslt != VK_SUCCESS) { return rslt; } - VkFenceCreateInfo vkFenceInfo = { - .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - }; + auto* mtlCmdBuff = getMTLCommandBuffer(cmdUse); + [mtlCmdBuff commit]; + [mtlCmdBuff waitUntilCompleted]; - // The MVKFence is retained by the command submission, and may outlive this function while - // the command submission finishes, so we can't allocate MVKFence locally on the stack. - MVKFence* mvkFence = new MVKFence(_device, &vkFenceInfo); - VkFence vkFence = (VkFence)mvkFence; - submit(0, nullptr, vkFence, cmdUse); - VkResult rslt = mvkWaitForFences(_device, 1, &vkFence, false); - mvkFence->destroy(); - return rslt; + waitSwapchainPresentations(cmdUse); + + return VK_SUCCESS; +} + +// If there are any swapchain presentations in flight, wait a few frames for them to complete. +// If they don't complete within a few frames, attempt to force them to complete, and wait another +// few frames for that to happen. If there are still swapchain presentations that haven't completed, +// log a warning, and force them to end presentation, so the images and drawables will be released. +void MVKQueue::waitSwapchainPresentations(MVKCommandUse cmdUse) { + auto waitFrames = _device->_pMetalFeatures->maxSwapchainImageCount + 2; + if (_presentationCompletionBlocker.wait((waitFrames/60.0) * 1e9)) { return; } + + auto imgCnt = _presentationCompletionBlocker.getReservationCount(); + MVKPresentableSwapchainImage* images[imgCnt]; + mvkClear(images, imgCnt); + + { + // Scope of image lock limited to creating array copy of uncompleted presentations + // Populate a working array of the unpresented images. + lock_guard lock(_presentedImagesLock); + size_t imgIdx = 0; + for (auto imgPair : _presentedImages) { images[imgIdx++] = imgPair.first; } + } + + // Attempt to force each image to complete presentation through the callback. + for (size_t imgIdx = 0; imgIdx < imgCnt && _presentationCompletionBlocker.getReservationCount(); imgIdx++) { + auto* img = images[imgIdx]; + if (img) { img->forcePresentationCompletion(); } + } + + // Wait for forced presentation completions. If we still have unfinished swapchain image + // presentations, log a warning, and force each image to end, so that it can be released. + if ( !_presentationCompletionBlocker.wait((waitFrames/60.0) * 1e9) ) { + reportWarning(VK_TIMEOUT, "%s timed out after %d frames while awaiting %d swapchain image presentations to complete.", + mvkVkCommandName(cmdUse), waitFrames * 2, _presentationCompletionBlocker.getReservationCount()); + for (size_t imgIdx = 0; imgIdx < imgCnt; imgIdx++) { + auto* img = images[imgIdx]; + if (_presentedImages.count(img)) { img->endPresentation({.queue = this, .presentableImage = img}); } + } + } } id MVKQueue::getMTLCommandBuffer(MVKCommandUse cmdUse, bool retainRefs) { id mtlCmdBuff = nil; + MVKDevice* mvkDev = getDevice(); + uint64_t startTime = mvkDev->getPerformanceTimestamp(); #if MVK_XCODE_12 if ([_mtlQueue respondsToSelector: @selector(commandBufferWithDescriptor:)]) { MTLCommandBufferDescriptor* mtlCmdBuffDesc = [MTLCommandBufferDescriptor new]; // temp retain @@ -167,53 +203,145 @@ id MVKQueue::getMTLCommandBuffer(MVKCommandUse cmdUse, bool re } else { mtlCmdBuff = [_mtlQueue commandBufferWithUnretainedReferences]; } - setLabelIfNotNil(mtlCmdBuff, getMTLCommandBufferLabel(cmdUse)); + mvkDev->addActivityPerformance(mvkDev->_performanceStatistics.queue.retrieveMTLCommandBuffer, startTime); + NSString* mtlCmdBuffLabel = getMTLCommandBufferLabel(cmdUse); + setLabelIfNotNil(mtlCmdBuff, mtlCmdBuffLabel); + [mtlCmdBuff addCompletedHandler: ^(id mtlCB) { handleMTLCommandBufferError(mtlCB); }]; + + if ( !mtlCmdBuff ) { reportError(VK_ERROR_OUT_OF_POOL_MEMORY, "%s could not be acquired.", mtlCmdBuffLabel.UTF8String); } return mtlCmdBuff; } NSString* MVKQueue::getMTLCommandBufferLabel(MVKCommandUse cmdUse) { -#define CASE_GET_LABEL(cmdUse) \ - case kMVKCommandUse ##cmdUse: \ - if ( !_mtlCmdBuffLabel ##cmdUse ) { _mtlCmdBuffLabel ##cmdUse = [[NSString stringWithFormat: @"%@ on Queue %d-%d", mvkMTLCommandBufferLabel(kMVKCommandUse ##cmdUse), _queueFamily->getIndex(), _index] retain]; } \ - return _mtlCmdBuffLabel ##cmdUse +#define CASE_GET_LABEL(cu) \ + case kMVKCommandUse ##cu: \ + if ( !_mtlCmdBuffLabel ##cu ) { _mtlCmdBuffLabel ##cu = [[NSString stringWithFormat: @"%s MTLCommandBuffer on Queue %d-%d", mvkVkCommandName(kMVKCommandUse ##cu), _queueFamily->getIndex(), _index] retain]; } \ + return _mtlCmdBuffLabel ##cu switch (cmdUse) { - CASE_GET_LABEL(EndCommandBuffer); + CASE_GET_LABEL(BeginCommandBuffer); CASE_GET_LABEL(QueueSubmit); CASE_GET_LABEL(QueuePresent); CASE_GET_LABEL(QueueWaitIdle); CASE_GET_LABEL(DeviceWaitIdle); CASE_GET_LABEL(AcquireNextImage); CASE_GET_LABEL(InvalidateMappedMemoryRanges); - default: return mvkMTLCommandBufferLabel(cmdUse); + default: + MVKAssert(false, "Uncached MTLCommandBuffer label for command use %s.", mvkVkCommandName(cmdUse)); + return [NSString stringWithFormat: @"%s MTLCommandBuffer on Queue %d-%d", mvkVkCommandName(cmdUse), _queueFamily->getIndex(), _index]; } #undef CASE_GET_LABEL } +#if MVK_XCODE_12 +static const char* mvkStringFromMTLCommandEncoderErrorState(MTLCommandEncoderErrorState errState) { + switch (errState) { + case MTLCommandEncoderErrorStateUnknown: return "unknown"; + case MTLCommandEncoderErrorStateAffected: return "affected"; + case MTLCommandEncoderErrorStateCompleted: return "completed"; + case MTLCommandEncoderErrorStateFaulted: return "faulted"; + case MTLCommandEncoderErrorStatePending: return "pending"; + } + return "unknown"; +} +#endif + +void MVKQueue::handleMTLCommandBufferError(id mtlCmdBuff) { + if (mtlCmdBuff.status != MTLCommandBufferStatusError) { return; } + + // If a command buffer error has occurred, report the error. If the error affects + // the physical device, always mark both the device and physical device as lost. + // If the error is local to this command buffer, optionally mark the device (but not the + // physical device) as lost, depending on the value of MVKConfiguration::resumeLostDevice. + VkResult vkErr = VK_ERROR_UNKNOWN; + bool markDeviceLoss = !mvkConfig().resumeLostDevice; + bool markPhysicalDeviceLoss = false; + switch (mtlCmdBuff.error.code) { + case MTLCommandBufferErrorBlacklisted: + case MTLCommandBufferErrorNotPermitted: // May also be used for command buffers executed in the background without the right entitlement. +#if MVK_MACOS && !MVK_MACCAT + case MTLCommandBufferErrorDeviceRemoved: +#endif + vkErr = VK_ERROR_DEVICE_LOST; + markDeviceLoss = true; + markPhysicalDeviceLoss = true; + break; + case MTLCommandBufferErrorTimeout: + vkErr = VK_TIMEOUT; + break; +#if MVK_XCODE_13 + case MTLCommandBufferErrorStackOverflow: +#endif + case MTLCommandBufferErrorPageFault: + case MTLCommandBufferErrorOutOfMemory: + default: + vkErr = VK_ERROR_OUT_OF_DEVICE_MEMORY; + break; + } + reportError(vkErr, "MTLCommandBuffer \"%s\" execution failed (code %li): %s", + mtlCmdBuff.label ? mtlCmdBuff.label.UTF8String : "", + mtlCmdBuff.error.code, mtlCmdBuff.error.localizedDescription.UTF8String); + if (markDeviceLoss) { getDevice()->markLost(markPhysicalDeviceLoss); } + +#if MVK_XCODE_12 + if (&MTLCommandBufferEncoderInfoErrorKey != nullptr) { + if (NSArray>* mtlEncInfo = mtlCmdBuff.error.userInfo[MTLCommandBufferEncoderInfoErrorKey]) { + MVKLogInfo("Encoders for %p \"%s\":", mtlCmdBuff, mtlCmdBuff.label ? mtlCmdBuff.label.UTF8String : ""); + for (id enc in mtlEncInfo) { + MVKLogInfo(" - %s: %s", enc.label.UTF8String, mvkStringFromMTLCommandEncoderErrorState(enc.errorState)); + if (enc.debugSignposts.count > 0) { + MVKLogInfo(" Debug signposts:"); + for (NSString* signpost in enc.debugSignposts) { + MVKLogInfo(" - %s", signpost.UTF8String); + } + } + } + } + } + if ([mtlCmdBuff respondsToSelector: @selector(logs)]) { + bool isFirstMsg = true; + for (id log in mtlCmdBuff.logs) { + if (isFirstMsg) { + MVKLogInfo("Shader log messages:"); + isFirstMsg = false; + } + MVKLogInfo("%s", log.description.UTF8String); + } + } +#endif +} + +// _presentedImages counts presentations per swapchain image, because the presentation of an image can +// begin before the previous presentation of that image has indicated that it has completed via a callback. +void MVKQueue::beginPresentation(const MVKImagePresentInfo& presentInfo) { + lock_guard lock(_presentedImagesLock); + _presentationCompletionBlocker.reserve(); + _presentedImages[presentInfo.presentableImage]++; +} + +void MVKQueue::endPresentation(const MVKImagePresentInfo& presentInfo) { + lock_guard lock(_presentedImagesLock); + _presentationCompletionBlocker.release(); + if (_presentedImages[presentInfo.presentableImage]) { + _presentedImages[presentInfo.presentableImage]--; + } + if ( !_presentedImages[presentInfo.presentableImage] ) { + _presentedImages.erase(presentInfo.presentableImage); + } +} #pragma mark Construction #define MVK_DISPATCH_QUEUE_QOS_CLASS QOS_CLASS_USER_INITIATED -MVKQueue::MVKQueue(MVKDevice* device, MVKQueueFamily* queueFamily, uint32_t index, float priority) - : MVKDeviceTrackingMixin(device) { - +MVKQueue::MVKQueue(MVKDevice* device, MVKQueueFamily* queueFamily, uint32_t index, float priority) : MVKDeviceTrackingMixin(device) { _queueFamily = queueFamily; _index = index; _priority = priority; - _mtlCmdBuffLabelEndCommandBuffer = nil; - _mtlCmdBuffLabelQueueSubmit = nil; - _mtlCmdBuffLabelQueuePresent = nil; - _mtlCmdBuffLabelDeviceWaitIdle = nil; - _mtlCmdBuffLabelQueueWaitIdle = nil; - _mtlCmdBuffLabelAcquireNextImage = nil; - _mtlCmdBuffLabelInvalidateMappedMemoryRanges = nil; - initName(); initExecQueue(); initMTLCommandQueue(); - initGPUCaptureScopes(); } void MVKQueue::initName() { @@ -236,23 +364,15 @@ void MVKQueue::initExecQueue() { } } -// Retrieves and initializes the Metal command queue. +// Retrieves and initializes the Metal command queue and Xcode GPU capture scopes void MVKQueue::initMTLCommandQueue() { - uint64_t startTime = _device->getPerformanceTimestamp(); _mtlQueue = _queueFamily->getMTLCommandQueue(_index); // not retained (cached in queue family) - _device->addActivityPerformance(_device->_performanceStatistics.queue.mtlQueueAccess, startTime); -} -// Initializes Xcode GPU capture scopes -void MVKQueue::initGPUCaptureScopes() { _submissionCaptureScope = new MVKGPUCaptureScope(this); - if (_queueFamily->getIndex() == mvkConfig().defaultGPUCaptureScopeQueueFamilyIndex && _index == mvkConfig().defaultGPUCaptureScopeQueueIndex) { - getDevice()->startAutoGPUCapture(MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_FRAME, _mtlQueue); _submissionCaptureScope->makeDefault(); - } _submissionCaptureScope->beginScope(); // Allow Xcode to capture the first frame if desired. } @@ -261,7 +381,7 @@ MVKQueue::~MVKQueue() { destroyExecQueue(); _submissionCaptureScope->destroy(); - [_mtlCmdBuffLabelEndCommandBuffer release]; + [_mtlCmdBuffLabelBeginCommandBuffer release]; [_mtlCmdBuffLabelQueueSubmit release]; [_mtlCmdBuffLabelQueuePresent release]; [_mtlCmdBuffLabelDeviceWaitIdle release]; @@ -306,7 +426,7 @@ MVKQueueSubmission::~MVKQueueSubmission() { #pragma mark - #pragma mark MVKQueueCommandBufferSubmission -void MVKQueueCommandBufferSubmission::execute() { +VkResult MVKQueueCommandBufferSubmission::execute() { _queue->_submissionCaptureScope->beginScope(); @@ -321,7 +441,7 @@ void MVKQueueCommandBufferSubmission::execute() { // Commit the last MTLCommandBuffer. // Nothing after this because callback might destroy this instance before this function ends. - commitActiveMTLCommandBuffer(true); + return commitActiveMTLCommandBuffer(true); } // Returns the active MTLCommandBuffer, lazily retrieving it from the queue if needed. @@ -341,24 +461,11 @@ void MVKQueueCommandBufferSubmission::setActiveMTLCommandBuffer(id mtlCmdBuff = signalCompletion ? getActiveMTLCommandBuffer() : _activeMTLCommandBuffer; _activeMTLCommandBuffer = nil; - MVKDevice* mvkDev = _queue->getDevice(); + MVKDevice* mvkDev = getDevice(); uint64_t startTime = mvkDev->getPerformanceTimestamp(); [mtlCmdBuff addCompletedHandler: ^(id mtlCB) { - if (mtlCB.status == MTLCommandBufferStatusError) { - // If a command buffer error has occurred, report the error. If the error affects - // the physical device, always mark both the device and physical device as lost. - // If the error is local to this command buffer, optionally mark the device (but not the - // physical device) as lost, depending on the value of MVKConfiguration::resumeLostDevice. - getVulkanAPIObject()->reportError(VK_ERROR_DEVICE_LOST, "MTLCommandBuffer \"%s\" execution failed (code %li): %s", mtlCB.label ? mtlCB.label.UTF8String : "", mtlCB.error.code, mtlCB.error.localizedDescription.UTF8String); - switch (mtlCB.error.code) { - case MTLCommandBufferErrorBlacklisted: - case MTLCommandBufferErrorNotPermitted: // May also be used for command buffers executed in the background without the right entitlement. -#if MVK_MACOS && !MVK_MACCAT - case MTLCommandBufferErrorDeviceRemoved: -#endif - mvkDev->markLost(true); - break; - default: - if ( !mvkConfig().resumeLostDevice ) { mvkDev->markLost(); } - break; - } -#if MVK_XCODE_12 - if (mvkConfig().debugMode) { - if (&MTLCommandBufferEncoderInfoErrorKey != nullptr) { - if (NSArray>* mtlEncInfo = mtlCB.error.userInfo[MTLCommandBufferEncoderInfoErrorKey]) { - MVKLogInfo("Encoders for %p \"%s\":", mtlCB, mtlCB.label ? mtlCB.label.UTF8String : ""); - for (id enc in mtlEncInfo) { - MVKLogInfo(" - %s: %s", enc.label.UTF8String, mvkStringFromErrorState(enc.errorState)); - if (enc.debugSignposts.count > 0) { - MVKLogInfo(" Debug signposts:"); - for (NSString* signpost in enc.debugSignposts) { - MVKLogInfo(" - %s", signpost.UTF8String); - } - } - } - } - } - } -#endif - } -#if MVK_XCODE_12 - if (mvkConfig().debugMode && [mtlCB respondsToSelector: @selector(logs)]) { - bool isFirstMsg = true; - for (id log in mtlCB.logs) { - if (isFirstMsg) { - MVKLogInfo("Shader log messages:"); - isFirstMsg = false; - } - MVKLogInfo("%s", log.description.UTF8String); - } - } -#endif - - // Ensure finish() is the last thing the completetion callback does. - mvkDev->addActivityPerformance(mvkDev->_performanceStatistics.queue.mtlCommandBufferCompletion, startTime); - if (signalCompletion) { this->finish(); } + mvkDev->addActivityPerformance(mvkDev->_performanceStatistics.queue.mtlCommandBufferExecution, startTime); + if (signalCompletion) { this->finish(); } // Must be the last thing the completetion callback does. }]; [mtlCmdBuff commit]; [mtlCmdBuff release]; // retained + + // If we need to signal completion, but an error occurred and the MTLCommandBuffer + // was not created, call the finish() function directly. + if (signalCompletion && !mtlCmdBuff) { finish(); } + + return mtlCmdBuff ? VK_SUCCESS : VK_ERROR_OUT_OF_POOL_MEMORY; } // Be sure to retain() any API objects referenced in this function, and release() them in the @@ -474,10 +536,11 @@ void MVKQueueCommandBufferSubmission::finish() { MVKQueueCommandBufferSubmission::MVKQueueCommandBufferSubmission(MVKQueue* queue, const VkSubmitInfo* pSubmit, VkFence fence, - MVKCommandUse cmdUse) : - MVKQueueSubmission(queue, - (pSubmit ? pSubmit->waitSemaphoreCount : 0), - (pSubmit ? pSubmit->pWaitSemaphores : nullptr)), + MVKCommandUse cmdUse) + : MVKQueueSubmission(queue, + (pSubmit ? pSubmit->waitSemaphoreCount : 0), + (pSubmit ? pSubmit->pWaitSemaphores : nullptr)), + _commandUse(cmdUse), _emulatedWaitDone(false) { @@ -524,7 +587,31 @@ MVKQueueCommandBufferSubmission::~MVKQueueCommandBufferSubmission() { template void MVKQueueFullCommandBufferSubmission::submitCommandBuffers() { + MVKDevice* mvkDev = getDevice(); + uint64_t startTime = mvkDev->getPerformanceTimestamp(); + for (auto& cb : _cmdBuffers) { cb->submit(this, &_encodingContext); } + + mvkDev->addActivityPerformance(mvkDev->_performanceStatistics.queue.submitCommandBuffers, startTime); +} + +template +MVKQueueFullCommandBufferSubmission::MVKQueueFullCommandBufferSubmission(MVKQueue* queue, + const VkSubmitInfo* pSubmit, + VkFence fence, + MVKCommandUse cmdUse) + : MVKQueueCommandBufferSubmission(queue, pSubmit, fence, cmdUse) { + + // pSubmit can be null if just tracking the fence alone + if (pSubmit) { + uint32_t cbCnt = pSubmit->commandBufferCount; + _cmdBuffers.reserve(cbCnt); + for (uint32_t i = 0; i < cbCnt; i++) { + MVKCommandBuffer* cb = MVKCommandBuffer::getMVKCommandBuffer(pSubmit->pCommandBuffers[i]); + _cmdBuffers.push_back(cb); + setConfigurationResult(cb->getConfigurationResult()); + } + } } @@ -534,24 +621,31 @@ void MVKQueueFullCommandBufferSubmission::submitCommandBuffers() { // If the semaphores are encodable, wait on them by encoding them on the MTLCommandBuffer before presenting. // If the semaphores are not encodable, wait on them inline after presenting. // The semaphores know what to do. -void MVKQueuePresentSurfaceSubmission::execute() { +VkResult MVKQueuePresentSurfaceSubmission::execute() { id mtlCmdBuff = _queue->getMTLCommandBuffer(kMVKCommandUseQueuePresent); [mtlCmdBuff enqueue]; - for (auto& ws : _waitSemaphores) { ws.first->encodeWait(mtlCmdBuff, 0); } // Add completion handler that will destroy this submission only once the MTLCommandBuffer // is finished with the resources retained here, including the wait semaphores. // Completion handlers are also added in presentCAMetalDrawable() to retain the swapchain images. - [mtlCmdBuff addCompletedHandler: ^(id mcb) { - this->finish(); - }]; + [mtlCmdBuff addCompletedHandler: ^(id mtlCB) { this->finish(); }]; + + for (auto& ws : _waitSemaphores) { + auto& sem4 = ws.first; + sem4->encodeWait(mtlCmdBuff, 0); // Encoded semaphore waits + sem4->encodeWait(nil, 0); // Inline semaphore waits + } for (int i = 0; i < _presentInfo.size(); i++ ) { _presentInfo[i].presentableImage->presentCAMetalDrawable(mtlCmdBuff, _presentInfo[i]); } - for (auto& ws : _waitSemaphores) { ws.first->encodeWait(nil, 0); } [mtlCmdBuff commit]; + + // If an error occurred and the MTLCommandBuffer was not created, call finish() directly. + if ( !mtlCmdBuff ) { finish(); } + + return mtlCmdBuff ? VK_SUCCESS : VK_ERROR_OUT_OF_POOL_MEMORY; } void MVKQueuePresentSurfaceSubmission::finish() { @@ -563,7 +657,7 @@ void MVKQueuePresentSurfaceSubmission::finish() { cs->beginScope(); if (_queue->_queueFamily->getIndex() == mvkConfig().defaultGPUCaptureScopeQueueFamilyIndex && _queue->_index == mvkConfig().defaultGPUCaptureScopeQueueIndex) { - _queue->getDevice()->stopAutoGPUCapture(MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_FRAME); + getDevice()->stopAutoGPUCapture(MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_FRAME); } this->destroy(); @@ -623,6 +717,7 @@ MVKQueuePresentSurfaceSubmission::MVKQueuePresentSurfaceSubmission(MVKQueue* que for (uint32_t scIdx = 0; scIdx < scCnt; scIdx++) { MVKSwapchain* mvkSC = (MVKSwapchain*)pPresentInfo->pSwapchains[scIdx]; MVKImagePresentInfo presentInfo = {}; // Start with everything zeroed + presentInfo.queue = _queue; presentInfo.presentableImage = mvkSC->getPresentableImage(pPresentInfo->pImageIndices[scIdx]); presentInfo.presentMode = pPresentModes ? pPresentModes[scIdx] : VK_PRESENT_MODE_MAX_ENUM_KHR; presentInfo.fence = pFences ? (MVKFence*)pFences[scIdx] : nullptr; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSurface.h b/MoltenVK/MoltenVK/GPUObjects/MVKSurface.h index 0bcceb5d..5746bfbf 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKSurface.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKSurface.h @@ -35,6 +35,7 @@ #endif class MVKInstance; +class MVKSwapchain; @class MVKBlockObserver; @@ -55,11 +56,8 @@ public: /** Returns a pointer to the Vulkan instance. */ MVKInstance* getInstance() override { return _mvkInstance; } - /** Returns the CAMetalLayer underlying this surface. */ - inline CAMetalLayer* getCAMetalLayer() { - std::lock_guard lock(_layerLock); - return _mtlCAMetalLayer; - } + /** Returns the CAMetalLayer underlying this surface. */ + CAMetalLayer* getCAMetalLayer(); #pragma mark Construction @@ -75,13 +73,16 @@ public: ~MVKSurface() override; protected: + friend class MVKSwapchain; + void propagateDebugName() override {} - void initLayerObserver(); + void initLayer(CAMetalLayer* mtlLayer, const char* vkFuncName); void releaseLayer(); - MVKInstance* _mvkInstance; - CAMetalLayer* _mtlCAMetalLayer; - MVKBlockObserver* _layerObserver; std::mutex _layerLock; + MVKInstance* _mvkInstance = nullptr; + CAMetalLayer* _mtlCAMetalLayer = nil; + MVKBlockObserver* _layerObserver = nil; + MVKSwapchain* _activeSwapchain = nullptr; }; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSurface.mm b/MoltenVK/MoltenVK/GPUObjects/MVKSurface.mm index 1309d73d..3899ab69 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKSurface.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKSurface.mm @@ -29,12 +29,15 @@ #pragma mark MVKSurface +CAMetalLayer* MVKSurface::getCAMetalLayer() { + std::lock_guard lock(_layerLock); + return _mtlCAMetalLayer; +} + MVKSurface::MVKSurface(MVKInstance* mvkInstance, const VkMetalSurfaceCreateInfoEXT* pCreateInfo, const VkAllocationCallbacks* pAllocator) : _mvkInstance(mvkInstance) { - - _mtlCAMetalLayer = (CAMetalLayer*)[pCreateInfo->pLayer retain]; - initLayerObserver(); + initLayer((CAMetalLayer*)pCreateInfo->pLayer, "vkCreateMetalSurfaceEXT"); } // pCreateInfo->pView can be either a CAMetalLayer or a view (NSView/UIView). @@ -47,36 +50,30 @@ MVKSurface::MVKSurface(MVKInstance* mvkInstance, // If it's a view (NSView/UIView), extract the layer, otherwise assume it's already a CAMetalLayer. if ([obj isKindOfClass: [PLATFORM_VIEW_CLASS class]]) { + obj = ((PLATFORM_VIEW_CLASS*)obj).layer; if ( !NSThread.isMainThread ) { - MVKLogInfo("%s(): You are not calling this function from the main thread. %s should only be accessed from the main thread. When using this function outside the main thread, consider passing the CAMetalLayer itself in %s::pView, instead of the %s.", + MVKLogWarn("%s(): You are not calling this function from the main thread. %s should only be accessed from the main thread. When using this function outside the main thread, consider passing the CAMetalLayer itself in %s::pView, instead of the %s.", STR(vkCreate_PLATFORM_SurfaceMVK), STR(PLATFORM_VIEW_CLASS), STR(Vk_PLATFORM_SurfaceCreateInfoMVK), STR(PLATFORM_VIEW_CLASS)); } - obj = ((PLATFORM_VIEW_CLASS*)obj).layer; } // Confirm that we were provided with a CAMetalLayer - if ([obj isKindOfClass: [CAMetalLayer class]]) { - _mtlCAMetalLayer = (CAMetalLayer*)[obj retain]; // retained - } else { - setConfigurationResult(reportError(VK_ERROR_INITIALIZATION_FAILED, - "%s(): On-screen rendering requires a layer of type CAMetalLayer.", - STR(vkCreate_PLATFORM_SurfaceMVK))); - _mtlCAMetalLayer = nil; - } - - initLayerObserver(); + initLayer([obj isKindOfClass: CAMetalLayer.class] ? (CAMetalLayer*)obj : nil, + STR(vkCreate_PLATFORM_SurfaceMVK)); } -// Sometimes, the owning view can replace its CAMetalLayer. In that case, the client needs to recreate the surface. -void MVKSurface::initLayerObserver() { +void MVKSurface::initLayer(CAMetalLayer* mtlLayer, const char* vkFuncName) { - _layerObserver = nil; - if ( ![_mtlCAMetalLayer.delegate isKindOfClass: [PLATFORM_VIEW_CLASS class]] ) { return; } + _mtlCAMetalLayer = [mtlLayer retain]; // retained + if ( !_mtlCAMetalLayer ) { setConfigurationResult(reportError(VK_ERROR_SURFACE_LOST_KHR, "%s(): On-screen rendering requires a layer of type CAMetalLayer.", vkFuncName)); } - _layerObserver = [MVKBlockObserver observerWithBlock: ^(NSString* path, id, NSDictionary*, void*) { - if ( ![path isEqualToString: @"layer"] ) { return; } - this->releaseLayer(); - } forObject: _mtlCAMetalLayer.delegate atKeyPath: @"layer"]; + // Sometimes, the owning view can replace its CAMetalLayer. + // When that happens, the app needs to recreate the surface. + if ([_mtlCAMetalLayer.delegate isKindOfClass: [PLATFORM_VIEW_CLASS class]]) { + _layerObserver = [MVKBlockObserver observerWithBlock: ^(NSString* path, id, NSDictionary*, void*) { + if ([path isEqualToString: @"layer"]) { this->releaseLayer(); } + } forObject: _mtlCAMetalLayer.delegate atKeyPath: @"layer"]; + } } void MVKSurface::releaseLayer() { diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.h b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.h index 523a5807..7e7cff8c 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.h @@ -28,8 +28,6 @@ class MVKWatermark; -@class MVKBlockObserver; - #pragma mark - #pragma mark MVKSwapchain @@ -76,19 +74,8 @@ public: /** Releases swapchain images. */ VkResult releaseImages(const VkReleaseSwapchainImagesInfoEXT* pReleaseInfo); - /** Returns whether the parent surface is now lost and this swapchain must be recreated. */ - bool getIsSurfaceLost() { return _surfaceLost; } - - /** Returns whether this swapchain is optimally sized for the surface. */ - bool hasOptimalSurface(); - /** Returns the status of the surface. Surface loss takes precedence over sub-optimal errors. */ - VkResult getSurfaceStatus() { - if (_device->getConfigurationResult() != VK_SUCCESS) { return _device->getConfigurationResult(); } - if (getIsSurfaceLost()) { return VK_ERROR_SURFACE_LOST_KHR; } - if ( !hasOptimalSurface() ) { return VK_SUBOPTIMAL_KHR; } - return VK_SUCCESS; - } + VkResult getSurfaceStatus(); /** Adds HDR metadata to this swapchain. */ void setHDRMetadataEXT(const VkHdrMetadataEXT& metadata); @@ -118,31 +105,28 @@ protected: VkSwapchainPresentScalingCreateInfoEXT* pScalingInfo, uint32_t imgCnt); void initSurfaceImages(const VkSwapchainCreateInfoKHR* pCreateInfo, uint32_t imgCnt); - void releaseLayer(); - void releaseUndisplayedSurfaces(); + bool getIsSurfaceLost(); + bool hasOptimalSurface(); uint64_t getNextAcquisitionID(); - void willPresentSurface(id mtlTexture, id mtlCmdBuff); void renderWatermark(id mtlTexture, id mtlCmdBuff); void markFrameInterval(); - void recordPresentTime(const MVKImagePresentInfo& presentInfo, uint64_t actualPresentTime = 0); + void beginPresentation(const MVKImagePresentInfo& presentInfo); + void endPresentation(const MVKImagePresentInfo& presentInfo, uint64_t actualPresentTime = 0); - CAMetalLayer* _mtlLayer = nil; + MVKSurface* _surface = nullptr; MVKWatermark* _licenseWatermark = nullptr; MVKSmallVector _presentableImages; MVKSmallVector _compatiblePresentModes; static const int kMaxPresentationHistory = 60; VkPastPresentationTimingGOOGLE _presentTimingHistory[kMaxPresentationHistory]; std::atomic _currentAcquisitionID = 0; - MVKBlockObserver* _layerObserver = nil; std::mutex _presentHistoryLock; - std::mutex _layerLock; uint64_t _lastFrameTime = 0; VkExtent2D _mtlLayerDrawableExtent = {0, 0}; uint32_t _currentPerfLogFrameCount = 0; uint32_t _presentHistoryCount = 0; uint32_t _presentHistoryIndex = 0; uint32_t _presentHistoryHeadIndex = 0; - std::atomic _surfaceLost = false; bool _isDeliberatelyScaled = false; }; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm index 601fbc54..f326f82e 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm @@ -95,9 +95,8 @@ VkResult MVKSwapchain::acquireNextImage(uint64_t timeout, // Return the index of the image with the shortest wait, // and signal the semaphore and fence when it's available *pImageIndex = minWaitImage->_swapchainIndex; - minWaitImage->acquireAndSignalWhenAvailable((MVKSemaphore*)semaphore, (MVKFence*)fence); - - return getSurfaceStatus(); + VkResult rslt = minWaitImage->acquireAndSignalWhenAvailable((MVKSemaphore*)semaphore, (MVKFence*)fence); + return rslt ? rslt : getSurfaceStatus(); } VkResult MVKSwapchain::releaseImages(const VkReleaseSwapchainImagesInfoEXT* pReleaseInfo) { @@ -110,10 +109,18 @@ VkResult MVKSwapchain::releaseImages(const VkReleaseSwapchainImagesInfoEXT* pRel uint64_t MVKSwapchain::getNextAcquisitionID() { return ++_currentAcquisitionID; } -// Releases any surfaces that are not currently being displayed, -// so they can be used by a different swapchain. -void MVKSwapchain::releaseUndisplayedSurfaces() {} +bool MVKSwapchain::getIsSurfaceLost() { + VkResult surfRslt = _surface->getConfigurationResult(); + setConfigurationResult(surfRslt); + return surfRslt != VK_SUCCESS; +} +VkResult MVKSwapchain::getSurfaceStatus() { + if (_device->getConfigurationResult() != VK_SUCCESS) { return _device->getConfigurationResult(); } + if (getIsSurfaceLost()) { return VK_ERROR_SURFACE_LOST_KHR; } + if ( !hasOptimalSurface() ) { return VK_SUBOPTIMAL_KHR; } + return VK_SUCCESS; +} // This swapchain is optimally sized for the surface if the app has specified deliberate // swapchain scaling, or the CAMetalLayer drawableSize has not changed since the swapchain @@ -121,22 +128,16 @@ void MVKSwapchain::releaseUndisplayedSurfaces() {} bool MVKSwapchain::hasOptimalSurface() { if (_isDeliberatelyScaled) { return true; } - VkExtent2D drawExtent = mvkVkExtent2DFromCGSize(_mtlLayer.drawableSize); + auto* mtlLayer = _surface->getCAMetalLayer(); + VkExtent2D drawExtent = mvkVkExtent2DFromCGSize(mtlLayer.drawableSize); return (mvkVkExtent2DsAreEqual(drawExtent, _mtlLayerDrawableExtent) && - mvkVkExtent2DsAreEqual(drawExtent, mvkGetNaturalExtent(_mtlLayer))); + mvkVkExtent2DsAreEqual(drawExtent, mvkGetNaturalExtent(mtlLayer))); } #pragma mark Rendering -// Called automatically when a swapchain image is about to be presented to the surface by the queue. -// Activities include marking the frame interval and rendering the watermark if needed. -void MVKSwapchain::willPresentSurface(id mtlTexture, id mtlCmdBuff) { - markFrameInterval(); - renderWatermark(mtlTexture, mtlCmdBuff); -} - -// If the product has not been fully licensed, renders the watermark image to the surface. +// Renders the watermark image to the surface. void MVKSwapchain::renderWatermark(id mtlTexture, id mtlCmdBuff) { if (mvkConfig().displayWatermark) { if ( !_licenseWatermark ) { @@ -159,21 +160,20 @@ void MVKSwapchain::renderWatermark(id mtlTexture, idgetPerformanceTimestamp(); if (prevFrameTime == 0) { return; } // First frame starts at first presentation _device->addActivityPerformance(_device->_performanceStatistics.queue.frameInterval, prevFrameTime, _lastFrameTime); - uint32_t perfLogCntLimit = mvkConfig().performanceLoggingFrameCount; - if ((perfLogCntLimit > 0) && (++_currentPerfLogFrameCount >= perfLogCntLimit)) { + auto& mvkCfg = mvkConfig(); + bool shouldLogOnFrames = mvkCfg.performanceTracking && mvkCfg.activityPerformanceLoggingStyle == MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_FRAME_COUNT; + if (shouldLogOnFrames && (mvkCfg.performanceLoggingFrameCount > 0) && (++_currentPerfLogFrameCount >= mvkCfg.performanceLoggingFrameCount)) { _currentPerfLogFrameCount = 0; MVKLogInfo("Performance statistics reporting every: %d frames, avg FPS: %.2f, elapsed time: %.3f seconds:", - perfLogCntLimit, - (1000.0 / _device->_performanceStatistics.queue.frameInterval.averageDuration), + mvkCfg.performanceLoggingFrameCount, + (1000.0 / _device->_performanceStatistics.queue.frameInterval.average), mvkGetElapsedMilliseconds() / 1000.0); if (mvkConfig().activityPerformanceLoggingStyle == MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_FRAME_COUNT) { _device->logPerformanceSummary(); @@ -181,6 +181,119 @@ void MVKSwapchain::markFrameInterval() { } } +VkResult MVKSwapchain::getRefreshCycleDuration(VkRefreshCycleDurationGOOGLE *pRefreshCycleDuration) { + if (_device->getConfigurationResult() != VK_SUCCESS) { return _device->getConfigurationResult(); } + + auto* mtlLayer = _surface->getCAMetalLayer(); +#if MVK_VISIONOS + // TODO: See if this can be obtained from OS instead + NSInteger framesPerSecond = 90; +#elif MVK_IOS_OR_TVOS || MVK_MACCAT + NSInteger framesPerSecond = 60; + UIScreen* screen = mtlLayer.screenMVK; + if ([screen respondsToSelector: @selector(maximumFramesPerSecond)]) { + framesPerSecond = screen.maximumFramesPerSecond; + } +#elif MVK_MACOS && !MVK_MACCAT + NSScreen* screen = mtlLayer.screenMVK; + CGDirectDisplayID displayId = [[[screen deviceDescription] objectForKey:@"NSScreenNumber"] unsignedIntValue]; + CGDisplayModeRef mode = CGDisplayCopyDisplayMode(displayId); + double framesPerSecond = CGDisplayModeGetRefreshRate(mode); + CGDisplayModeRelease(mode); +#if MVK_XCODE_13 + if (framesPerSecond == 0 && [screen respondsToSelector: @selector(maximumFramesPerSecond)]) + framesPerSecond = [screen maximumFramesPerSecond]; +#endif + + // Builtin panels, e.g., on MacBook, report a zero refresh rate. + if (framesPerSecond == 0) + framesPerSecond = 60.0; +#endif + + pRefreshCycleDuration->refreshDuration = (uint64_t)1e9 / framesPerSecond; + return VK_SUCCESS; +} + +VkResult MVKSwapchain::getPastPresentationTiming(uint32_t *pCount, VkPastPresentationTimingGOOGLE *pPresentationTimings) { + if (_device->getConfigurationResult() != VK_SUCCESS) { return _device->getConfigurationResult(); } + + VkResult res = VK_SUCCESS; + + std::lock_guard lock(_presentHistoryLock); + if (pPresentationTimings == nullptr) { + *pCount = _presentHistoryCount; + } else { + uint32_t countRemaining = std::min(_presentHistoryCount, *pCount); + uint32_t outIndex = 0; + + res = (*pCount >= _presentHistoryCount) ? VK_SUCCESS : VK_INCOMPLETE; + *pCount = countRemaining; + + while (countRemaining > 0) { + pPresentationTimings[outIndex] = _presentTimingHistory[_presentHistoryHeadIndex]; + countRemaining--; + _presentHistoryCount--; + _presentHistoryHeadIndex = (_presentHistoryHeadIndex + 1) % kMaxPresentationHistory; + outIndex++; + } + } + + return res; +} + +void MVKSwapchain::beginPresentation(const MVKImagePresentInfo& presentInfo) {} + +void MVKSwapchain::endPresentation(const MVKImagePresentInfo& presentInfo, uint64_t actualPresentTime) { + + markFrameInterval(); + + std::lock_guard lock(_presentHistoryLock); + if (_presentHistoryCount < kMaxPresentationHistory) { + _presentHistoryCount++; + } else { + _presentHistoryHeadIndex = (_presentHistoryHeadIndex + 1) % kMaxPresentationHistory; + } + + // If actual present time is not available, use desired time instead, and if that + // hasn't been set, use the current time, which should be reasonably accurate (sub-ms), + // since we are here as part of the addPresentedHandler: callback. + if (actualPresentTime == 0) { actualPresentTime = presentInfo.desiredPresentTime; } + if (actualPresentTime == 0) { actualPresentTime = CACurrentMediaTime() * 1.0e9; } + + _presentTimingHistory[_presentHistoryIndex].presentID = presentInfo.presentID; + _presentTimingHistory[_presentHistoryIndex].desiredPresentTime = presentInfo.desiredPresentTime; + _presentTimingHistory[_presentHistoryIndex].actualPresentTime = actualPresentTime; + // These details are not available in Metal + _presentTimingHistory[_presentHistoryIndex].earliestPresentTime = actualPresentTime; + _presentTimingHistory[_presentHistoryIndex].presentMargin = 0; + _presentHistoryIndex = (_presentHistoryIndex + 1) % kMaxPresentationHistory; +} + +void MVKSwapchain::setLayerNeedsDisplay(const VkPresentRegionKHR* pRegion) { + auto* mtlLayer = _surface->getCAMetalLayer(); + if (!pRegion || pRegion->rectangleCount == 0) { + [mtlLayer setNeedsDisplay]; + return; + } + + for (uint32_t i = 0; i < pRegion->rectangleCount; ++i) { + CGRect cgRect = mvkCGRectFromVkRectLayerKHR(pRegion->pRectangles[i]); +#if MVK_MACOS + // VK_KHR_incremental_present specifies an upper-left origin, but macOS by default + // uses a lower-left origin. + cgRect.origin.y = mtlLayer.bounds.size.height - cgRect.origin.y; +#endif + // We were given rectangles in pixels, but -[CALayer setNeedsDisplayInRect:] wants them + // in points, which is pixels / contentsScale. + CGFloat scaleFactor = mtlLayer.contentsScale; + cgRect.origin.x /= scaleFactor; + cgRect.origin.y /= scaleFactor; + cgRect.size.width /= scaleFactor; + cgRect.size.height /= scaleFactor; + [mtlLayer setNeedsDisplayInRect:cgRect]; + } +} + #if MVK_MACOS struct CIE1931XY { uint16_t x; @@ -237,19 +350,31 @@ void MVKSwapchain::setHDRMetadataEXT(const VkHdrMetadataEXT& metadata) { CAEDRMetadata* caMetadata = [CAEDRMetadata HDR10MetadataWithDisplayInfo: colorVolData contentInfo: lightLevelData opticalOutputScale: 1]; - _mtlLayer.EDRMetadata = caMetadata; + auto* mtlLayer = _surface->getCAMetalLayer(); + mtlLayer.EDRMetadata = caMetadata; + mtlLayer.wantsExtendedDynamicRangeContent = YES; [caMetadata release]; [colorVolData release]; [lightLevelData release]; - _mtlLayer.wantsExtendedDynamicRangeContent = YES; #endif } #pragma mark Construction -MVKSwapchain::MVKSwapchain(MVKDevice* device, - const VkSwapchainCreateInfoKHR* pCreateInfo) : MVKVulkanAPIDeviceObject(device) { +MVKSwapchain::MVKSwapchain(MVKDevice* device, const VkSwapchainCreateInfoKHR* pCreateInfo) + : MVKVulkanAPIDeviceObject(device), + _surface((MVKSurface*)pCreateInfo->surface) { + + // Check if oldSwapchain is properly set + auto* oldSwapchain = (MVKSwapchain*)pCreateInfo->oldSwapchain; + if (oldSwapchain == _surface->_activeSwapchain) { + _surface->_activeSwapchain = this; + } else { + setConfigurationResult(reportError(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR, "vkCreateSwapchainKHR(): pCreateInfo->oldSwapchain does not match the VkSwapchain that is in use by the surface")); + return; + } + memset(_presentTimingHistory, 0, sizeof(_presentTimingHistory)); // Retrieve the scaling and present mode structs if they are supplied. @@ -280,10 +405,6 @@ MVKSwapchain::MVKSwapchain(MVKDevice* device, } } - // If applicable, release any surfaces (not currently being displayed) from the old swapchain. - MVKSwapchain* oldSwapchain = (MVKSwapchain*)pCreateInfo->oldSwapchain; - if (oldSwapchain) { oldSwapchain->releaseUndisplayedSurfaces(); } - uint32_t imgCnt = mvkClamp(pCreateInfo->minImageCount, _device->_pMetalFeatures->minSwapchainImageCount, _device->_pMetalFeatures->maxSwapchainImageCount); @@ -333,85 +454,80 @@ void MVKSwapchain::initCAMetalLayer(const VkSwapchainCreateInfoKHR* pCreateInfo, VkSwapchainPresentScalingCreateInfoEXT* pScalingInfo, uint32_t imgCnt) { - MVKSurface* mvkSrfc = (MVKSurface*)pCreateInfo->surface; - _mtlLayer = mvkSrfc->getCAMetalLayer(); - if ( !_mtlLayer ) { - setConfigurationResult(mvkSrfc->getConfigurationResult()); - _surfaceLost = true; - return; - } + if ( getIsSurfaceLost() ) { return; } + auto* mtlLayer = _surface->getCAMetalLayer(); auto minMagFilter = mvkConfig().swapchainMinMagFilterUseNearest ? kCAFilterNearest : kCAFilterLinear; - _mtlLayer.device = getMTLDevice(); - _mtlLayer.pixelFormat = getPixelFormats()->getMTLPixelFormat(pCreateInfo->imageFormat); - _mtlLayer.maximumDrawableCountMVK = imgCnt; - _mtlLayer.displaySyncEnabledMVK = (pCreateInfo->presentMode != VK_PRESENT_MODE_IMMEDIATE_KHR); - _mtlLayer.minificationFilter = minMagFilter; - _mtlLayer.magnificationFilter = minMagFilter; - _mtlLayer.contentsGravity = getCALayerContentsGravity(pScalingInfo); - _mtlLayer.framebufferOnly = !mvkIsAnyFlagEnabled(pCreateInfo->imageUsage, (VK_IMAGE_USAGE_TRANSFER_SRC_BIT | + mtlLayer.device = getMTLDevice(); + mtlLayer.pixelFormat = getPixelFormats()->getMTLPixelFormat(pCreateInfo->imageFormat); + mtlLayer.maximumDrawableCountMVK = imgCnt; + mtlLayer.displaySyncEnabledMVK = (pCreateInfo->presentMode != VK_PRESENT_MODE_IMMEDIATE_KHR); + mtlLayer.minificationFilter = minMagFilter; + mtlLayer.magnificationFilter = minMagFilter; + mtlLayer.contentsGravity = getCALayerContentsGravity(pScalingInfo); + mtlLayer.framebufferOnly = !mvkIsAnyFlagEnabled(pCreateInfo->imageUsage, (VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT)); // Remember the extent to later detect if it has changed under the covers, // and set the drawable size of the CAMetalLayer from the extent. _mtlLayerDrawableExtent = pCreateInfo->imageExtent; - _mtlLayer.drawableSize = mvkCGSizeFromVkExtent2D(_mtlLayerDrawableExtent); + mtlLayer.drawableSize = mvkCGSizeFromVkExtent2D(_mtlLayerDrawableExtent); if (pCreateInfo->compositeAlpha != VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR) { - _mtlLayer.opaque = pCreateInfo->compositeAlpha == VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; + mtlLayer.opaque = pCreateInfo->compositeAlpha == VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; } switch (pCreateInfo->imageColorSpace) { case VK_COLOR_SPACE_SRGB_NONLINEAR_KHR: - _mtlLayer.colorspaceNameMVK = kCGColorSpaceSRGB; - _mtlLayer.wantsExtendedDynamicRangeContentMVK = NO; + mtlLayer.colorspaceNameMVK = kCGColorSpaceSRGB; + mtlLayer.wantsExtendedDynamicRangeContentMVK = NO; break; case VK_COLOR_SPACE_DISPLAY_P3_NONLINEAR_EXT: - _mtlLayer.colorspaceNameMVK = kCGColorSpaceDisplayP3; - _mtlLayer.wantsExtendedDynamicRangeContentMVK = YES; + mtlLayer.colorspaceNameMVK = kCGColorSpaceDisplayP3; + mtlLayer.wantsExtendedDynamicRangeContentMVK = YES; break; case VK_COLOR_SPACE_EXTENDED_SRGB_LINEAR_EXT: - _mtlLayer.colorspaceNameMVK = kCGColorSpaceExtendedLinearSRGB; - _mtlLayer.wantsExtendedDynamicRangeContentMVK = YES; + mtlLayer.colorspaceNameMVK = kCGColorSpaceExtendedLinearSRGB; + mtlLayer.wantsExtendedDynamicRangeContentMVK = YES; break; case VK_COLOR_SPACE_EXTENDED_SRGB_NONLINEAR_EXT: - _mtlLayer.colorspaceNameMVK = kCGColorSpaceExtendedSRGB; - _mtlLayer.wantsExtendedDynamicRangeContentMVK = YES; + mtlLayer.colorspaceNameMVK = kCGColorSpaceExtendedSRGB; + mtlLayer.wantsExtendedDynamicRangeContentMVK = YES; break; case VK_COLOR_SPACE_DISPLAY_P3_LINEAR_EXT: - _mtlLayer.colorspaceNameMVK = kCGColorSpaceExtendedLinearDisplayP3; - _mtlLayer.wantsExtendedDynamicRangeContentMVK = YES; + mtlLayer.colorspaceNameMVK = kCGColorSpaceExtendedLinearDisplayP3; + mtlLayer.wantsExtendedDynamicRangeContentMVK = YES; break; case VK_COLOR_SPACE_DCI_P3_NONLINEAR_EXT: - _mtlLayer.colorspaceNameMVK = kCGColorSpaceDCIP3; - _mtlLayer.wantsExtendedDynamicRangeContentMVK = YES; + mtlLayer.colorspaceNameMVK = kCGColorSpaceDCIP3; + mtlLayer.wantsExtendedDynamicRangeContentMVK = YES; break; case VK_COLOR_SPACE_BT709_NONLINEAR_EXT: - _mtlLayer.colorspaceNameMVK = kCGColorSpaceITUR_709; - _mtlLayer.wantsExtendedDynamicRangeContentMVK = NO; + mtlLayer.colorspaceNameMVK = kCGColorSpaceITUR_709; + mtlLayer.wantsExtendedDynamicRangeContentMVK = NO; break; case VK_COLOR_SPACE_BT2020_LINEAR_EXT: - _mtlLayer.colorspaceNameMVK = kCGColorSpaceExtendedLinearITUR_2020; - _mtlLayer.wantsExtendedDynamicRangeContentMVK = YES; + mtlLayer.colorspaceNameMVK = kCGColorSpaceExtendedLinearITUR_2020; + mtlLayer.wantsExtendedDynamicRangeContentMVK = YES; break; #if MVK_XCODE_12 case VK_COLOR_SPACE_HDR10_ST2084_EXT: - _mtlLayer.colorspaceNameMVK = kCGColorSpaceITUR_2100_PQ; - _mtlLayer.wantsExtendedDynamicRangeContentMVK = YES; + mtlLayer.colorspaceNameMVK = kCGColorSpaceITUR_2100_PQ; + mtlLayer.wantsExtendedDynamicRangeContentMVK = YES; break; case VK_COLOR_SPACE_HDR10_HLG_EXT: - _mtlLayer.colorspaceNameMVK = kCGColorSpaceITUR_2100_HLG; - _mtlLayer.wantsExtendedDynamicRangeContentMVK = YES; + mtlLayer.colorspaceNameMVK = kCGColorSpaceITUR_2100_HLG; + mtlLayer.wantsExtendedDynamicRangeContentMVK = YES; break; #endif case VK_COLOR_SPACE_ADOBERGB_NONLINEAR_EXT: - _mtlLayer.colorspaceNameMVK = kCGColorSpaceAdobeRGB1998; - _mtlLayer.wantsExtendedDynamicRangeContentMVK = NO; + mtlLayer.colorspaceNameMVK = kCGColorSpaceAdobeRGB1998; + mtlLayer.wantsExtendedDynamicRangeContentMVK = NO; break; case VK_COLOR_SPACE_PASS_THROUGH_EXT: - _mtlLayer.colorspace = nil; - _mtlLayer.wantsExtendedDynamicRangeContentMVK = NO; + mtlLayer.colorspace = nil; + mtlLayer.wantsExtendedDynamicRangeContentMVK = NO; break; default: setConfigurationResult(reportError(VK_ERROR_FORMAT_NOT_SUPPORTED, "vkCreateSwapchainKHR(): Metal does not support VkColorSpaceKHR value %d.", pCreateInfo->imageColorSpace)); @@ -421,22 +537,6 @@ void MVKSwapchain::initCAMetalLayer(const VkSwapchainCreateInfoKHR* pCreateInfo, // TODO: set additional CAMetalLayer properties before extracting drawables: // - presentsWithTransaction // - drawsAsynchronously - - if ( [_mtlLayer.delegate isKindOfClass: [PLATFORM_VIEW_CLASS class]] ) { - // Sometimes, the owning view can replace its CAMetalLayer. In that case, the client - // needs to recreate the swapchain, or no content will be displayed. - _layerObserver = [MVKBlockObserver observerWithBlock: ^(NSString* path, id, NSDictionary*, void*) { - if ( ![path isEqualToString: @"layer"] ) { return; } - this->releaseLayer(); - } forObject: _mtlLayer.delegate atKeyPath: @"layer"]; - } -} - -void MVKSwapchain::releaseLayer() { - std::lock_guard lock(_layerLock); - _surfaceLost = true; - [_layerObserver release]; - _layerObserver = nil; } // Initializes the array of images used for the surface of this swapchain. @@ -459,13 +559,13 @@ void MVKSwapchain::initSurfaceImages(const VkSwapchainCreateInfoKHR* pCreateInfo } } + auto* mtlLayer = _surface->getCAMetalLayer(); VkExtent2D imgExtent = pCreateInfo->imageExtent; - VkImageCreateInfo imgInfo = { .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, .pNext = VK_NULL_HANDLE, .imageType = VK_IMAGE_TYPE_2D, - .format = getPixelFormats()->getVkFormat(_mtlLayer.pixelFormat), + .format = getPixelFormats()->getVkFormat(mtlLayer.pixelFormat), .extent = { imgExtent.width, imgExtent.height, 1 }, .mipLevels = 1, .arrayLayers = 1, @@ -494,131 +594,21 @@ void MVKSwapchain::initSurfaceImages(const VkSwapchainCreateInfoKHR* pCreateInfo NSString* screenName = @"Main Screen"; #if MVK_MACOS && !MVK_MACCAT - if ([_mtlLayer.screenMVK respondsToSelector:@selector(localizedName)]) { - screenName = _mtlLayer.screenMVK.localizedName; + if ([mtlLayer.screenMVK respondsToSelector:@selector(localizedName)]) { + screenName = mtlLayer.screenMVK.localizedName; } #endif MVKLogInfo("Created %d swapchain images with initial size (%d, %d) and contents scale %.1f for screen %s.", - imgCnt, imgExtent.width, imgExtent.height, _mtlLayer.contentsScale, screenName.UTF8String); + imgCnt, imgExtent.width, imgExtent.height, mtlLayer.contentsScale, screenName.UTF8String); } -VkResult MVKSwapchain::getRefreshCycleDuration(VkRefreshCycleDurationGOOGLE *pRefreshCycleDuration) { - if (_device->getConfigurationResult() != VK_SUCCESS) { return _device->getConfigurationResult(); } - -#if MVK_VISIONOS - // TODO: See if this can be obtained from OS instead - NSInteger framesPerSecond = 90; -#elif MVK_IOS_OR_TVOS || MVK_MACCAT - NSInteger framesPerSecond = 60; - UIScreen* screen = _mtlLayer.screenMVK; - if ([screen respondsToSelector: @selector(maximumFramesPerSecond)]) { - framesPerSecond = screen.maximumFramesPerSecond; - } -#elif MVK_MACOS && !MVK_MACCAT - NSScreen* screen = _mtlLayer.screenMVK; - CGDirectDisplayID displayId = [[[screen deviceDescription] objectForKey:@"NSScreenNumber"] unsignedIntValue]; - CGDisplayModeRef mode = CGDisplayCopyDisplayMode(displayId); - double framesPerSecond = CGDisplayModeGetRefreshRate(mode); - CGDisplayModeRelease(mode); -#if MVK_XCODE_13 - if (framesPerSecond == 0 && [screen respondsToSelector: @selector(maximumFramesPerSecond)]) - framesPerSecond = [screen maximumFramesPerSecond]; -#endif - - // Builtin panels, e.g., on MacBook, report a zero refresh rate. - if (framesPerSecond == 0) - framesPerSecond = 60.0; -#endif - - pRefreshCycleDuration->refreshDuration = (uint64_t)1e9 / framesPerSecond; - return VK_SUCCESS; -} - -VkResult MVKSwapchain::getPastPresentationTiming(uint32_t *pCount, VkPastPresentationTimingGOOGLE *pPresentationTimings) { - if (_device->getConfigurationResult() != VK_SUCCESS) { return _device->getConfigurationResult(); } - - VkResult res = VK_SUCCESS; - - std::lock_guard lock(_presentHistoryLock); - if (pPresentationTimings == nullptr) { - *pCount = _presentHistoryCount; - } else { - uint32_t countRemaining = std::min(_presentHistoryCount, *pCount); - uint32_t outIndex = 0; - - res = (*pCount >= _presentHistoryCount) ? VK_SUCCESS : VK_INCOMPLETE; - *pCount = countRemaining; - - while (countRemaining > 0) { - pPresentationTimings[outIndex] = _presentTimingHistory[_presentHistoryHeadIndex]; - countRemaining--; - _presentHistoryCount--; - _presentHistoryHeadIndex = (_presentHistoryHeadIndex + 1) % kMaxPresentationHistory; - outIndex++; - } - } - - return res; -} - -void MVKSwapchain::recordPresentTime(const MVKImagePresentInfo& presentInfo, uint64_t actualPresentTime) { - std::lock_guard lock(_presentHistoryLock); - if (_presentHistoryCount < kMaxPresentationHistory) { - _presentHistoryCount++; - } else { - _presentHistoryHeadIndex = (_presentHistoryHeadIndex + 1) % kMaxPresentationHistory; - } - - // If actual present time is not available, use desired time instead, and if that - // hasn't been set, use the current time, which should be reasonably accurate (sub-ms), - // since we are here as part of the addPresentedHandler: callback. - if (actualPresentTime == 0) { actualPresentTime = presentInfo.desiredPresentTime; } - if (actualPresentTime == 0) { actualPresentTime = CACurrentMediaTime() * 1.0e9; } - - _presentTimingHistory[_presentHistoryIndex].presentID = presentInfo.presentID; - _presentTimingHistory[_presentHistoryIndex].desiredPresentTime = presentInfo.desiredPresentTime; - _presentTimingHistory[_presentHistoryIndex].actualPresentTime = actualPresentTime; - // These details are not available in Metal - _presentTimingHistory[_presentHistoryIndex].earliestPresentTime = actualPresentTime; - _presentTimingHistory[_presentHistoryIndex].presentMargin = 0; - _presentHistoryIndex = (_presentHistoryIndex + 1) % kMaxPresentationHistory; -} - -void MVKSwapchain::setLayerNeedsDisplay(const VkPresentRegionKHR* pRegion) { - if (!pRegion || pRegion->rectangleCount == 0) { - [_mtlLayer setNeedsDisplay]; - return; - } - - for (uint32_t i = 0; i < pRegion->rectangleCount; ++i) { - CGRect cgRect = mvkCGRectFromVkRectLayerKHR(pRegion->pRectangles[i]); -#if MVK_MACOS - // VK_KHR_incremental_present specifies an upper-left origin, but macOS by default - // uses a lower-left origin. - cgRect.origin.y = _mtlLayer.bounds.size.height - cgRect.origin.y; -#endif - // We were given rectangles in pixels, but -[CALayer setNeedsDisplayInRect:] wants them - // in points, which is pixels / contentsScale. - CGFloat scaleFactor = _mtlLayer.contentsScale; - cgRect.origin.x /= scaleFactor; - cgRect.origin.y /= scaleFactor; - cgRect.size.width /= scaleFactor; - cgRect.size.height /= scaleFactor; - [_mtlLayer setNeedsDisplayInRect:cgRect]; - } -} - -// A retention loop exists between the swapchain and its images. The swapchain images -// retain the swapchain because they can be in flight when the app destroys the swapchain. -// Release the images now, when the app destroys the swapchain, so they will be destroyed when -// no longer held by the presentation flow, and will in turn release the swapchain for destruction. void MVKSwapchain::destroy() { + if (_surface->_activeSwapchain == this) { _surface->_activeSwapchain = nullptr; } for (auto& img : _presentableImages) { _device->destroyPresentableSwapchainImage(img, NULL); } MVKVulkanAPIDeviceObject::destroy(); } MVKSwapchain::~MVKSwapchain() { if (_licenseWatermark) { _licenseWatermark->destroy(); } - releaseLayer(); } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSync.h b/MoltenVK/MoltenVK/GPUObjects/MVKSync.h index ad87f715..4e3f5122 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKSync.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKSync.h @@ -63,6 +63,9 @@ public: /** Returns whether this instance is in a reserved state. */ bool isReserved(); + /** Returns the number of outstanding reservations. */ + uint32_t getReservationCount(); + /** * Blocks processing on the current thread until any or all (depending on configuration) outstanding * reservations have been released, or until the specified timeout interval in nanoseconds expires. @@ -89,20 +92,19 @@ public: * require a separate call to the release() function to cause the semaphore to stop blocking. */ MVKSemaphoreImpl(bool waitAll = true, uint32_t reservationCount = 0) - : _shouldWaitAll(waitAll), _reservationCount(reservationCount) {} + : _reservationCount(reservationCount), _shouldWaitAll(waitAll) {} - /** Destructor. */ ~MVKSemaphoreImpl(); private: bool operator()(); - inline bool isClear() { return _reservationCount == 0; } // Not thread-safe + bool isClear() { return _reservationCount == 0; } // Not thread-safe std::mutex _lock; std::condition_variable _blocker; - bool _shouldWaitAll; uint32_t _reservationCount; + bool _shouldWaitAll; }; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSync.mm b/MoltenVK/MoltenVK/GPUObjects/MVKSync.mm index efde21cb..fb1e0190 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKSync.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKSync.mm @@ -50,6 +50,11 @@ bool MVKSemaphoreImpl::isReserved() { return !isClear(); } +uint32_t MVKSemaphoreImpl::getReservationCount() { + lock_guard lock(_lock); + return _reservationCount; +} + bool MVKSemaphoreImpl::wait(uint64_t timeout, bool reserveAgain) { unique_lock lock(_lock); diff --git a/MoltenVK/MoltenVK/Utility/MVKBaseObject.h b/MoltenVK/MoltenVK/Utility/MVKBaseObject.h index 7005e985..d2fcb9e8 100644 --- a/MoltenVK/MoltenVK/Utility/MVKBaseObject.h +++ b/MoltenVK/MoltenVK/Utility/MVKBaseObject.h @@ -50,7 +50,7 @@ public: void reportMessage(MVKConfigLogLevel logLevel, const char* format, ...) __printflike(3, 4); /** - * Report a Vulkan error message, on behalf of the object, which may be nil. + * Report a message, on behalf of the object, which may be nil. * Reporting includes logging to a standard system logging stream, and if the object * is not nil and has access to the VkInstance, the message will also be forwarded * to the VkInstance for output to the Vulkan debug report messaging API. @@ -58,14 +58,19 @@ public: static void reportMessage(MVKBaseObject* mvkObj, MVKConfigLogLevel logLevel, const char* format, ...) __printflike(3, 4); /** - * Report a Vulkan error message, on behalf of the object, which may be nil. + * Report a Vulkan result message. This includes logging to a standard system logging stream, + * and some subclasses will also forward the message to their VkInstance for output to the + * Vulkan debug report messaging API. + */ + VkResult reportResult(VkResult vkRslt, MVKConfigLogLevel logLevel, const char* format, ...) __printflike(4, 5); + + /** + * Report a Vulkan result message, on behalf of the object. which may be nil. * Reporting includes logging to a standard system logging stream, and if the object * is not nil and has access to the VkInstance, the message will also be forwarded * to the VkInstance for output to the Vulkan debug report messaging API. - * - * This is the core reporting implementation. Other similar functions delegate here. */ - static void reportMessage(MVKBaseObject* mvkObj, MVKConfigLogLevel logLevel, const char* format, va_list args) __printflike(3, 0); + static VkResult reportResult(MVKBaseObject* mvkObj, VkResult vkRslt, MVKConfigLogLevel logLevel, const char* format, ...) __printflike(4, 5); /** * Report a Vulkan error message. This includes logging to a standard system logging stream, @@ -83,19 +88,29 @@ public: static VkResult reportError(MVKBaseObject* mvkObj, VkResult vkErr, const char* format, ...) __printflike(3, 4); /** - * Report a Vulkan error message, on behalf of the object. which may be nil. + * Report a Vulkan warning message. This includes logging to a standard system logging stream, + * and some subclasses will also forward the message to their VkInstance for output to the + * Vulkan debug report messaging API. + */ + VkResult reportWarning(VkResult vkRslt, const char* format, ...) __printflike(3, 4); + + /** + * Report a Vulkan warning message, on behalf of the object. which may be nil. * Reporting includes logging to a standard system logging stream, and if the object * is not nil and has access to the VkInstance, the message will also be forwarded * to the VkInstance for output to the Vulkan debug report messaging API. - * - * This is the core reporting implementation. Other similar functions delegate here. */ - static VkResult reportError(MVKBaseObject* mvkObj, VkResult vkErr, const char* format, va_list args) __printflike(3, 0); + static VkResult reportWarning(MVKBaseObject* mvkObj, VkResult vkRslt, const char* format, ...) __printflike(3, 4); /** Destroys this object. Default behaviour simply deletes it. Subclasses may override to delay deletion. */ virtual void destroy() { delete this; } virtual ~MVKBaseObject() {} + +protected: + static VkResult reportResult(MVKBaseObject* mvkObj, VkResult vkRslt, MVKConfigLogLevel logLevel, const char* format, va_list args) __printflike(4, 0); + static void reportMessage(MVKBaseObject* mvkObj, MVKConfigLogLevel logLevel, const char* format, va_list args) __printflike(3, 0); + }; diff --git a/MoltenVK/MoltenVK/Utility/MVKBaseObject.mm b/MoltenVK/MoltenVK/Utility/MVKBaseObject.mm index 427c3227..5a14888c 100644 --- a/MoltenVK/MoltenVK/Utility/MVKBaseObject.mm +++ b/MoltenVK/MoltenVK/Utility/MVKBaseObject.mm @@ -26,24 +26,19 @@ using namespace std; -static const char* getReportingLevelString(MVKConfigLogLevel logLevel) { - switch (logLevel) { - case MVK_CONFIG_LOG_LEVEL_DEBUG: - return "mvk-debug"; - case MVK_CONFIG_LOG_LEVEL_INFO: - return "mvk-info"; - case MVK_CONFIG_LOG_LEVEL_WARNING: - return "mvk-warn"; - case MVK_CONFIG_LOG_LEVEL_ERROR: - default: - return "mvk-error"; - } -} - - #pragma mark - #pragma mark MVKBaseObject +static const char* getReportingLevelString(MVKConfigLogLevel logLevel) { + switch (logLevel) { + case MVK_CONFIG_LOG_LEVEL_ERROR: return "mvk-error"; + case MVK_CONFIG_LOG_LEVEL_WARNING: return "mvk-warn"; + case MVK_CONFIG_LOG_LEVEL_INFO: return "mvk-info"; + case MVK_CONFIG_LOG_LEVEL_DEBUG: return "mvk-debug"; + default: return "mvk-unknown"; + } +} + string MVKBaseObject::getClassName() { return mvk::getTypeName(this); } void MVKBaseObject::reportMessage(MVKConfigLogLevel logLevel, const char* format, ...) { @@ -102,10 +97,43 @@ void MVKBaseObject::reportMessage(MVKBaseObject* mvkObj, MVKConfigLogLevel logLe free(redoBuff); } +VkResult MVKBaseObject::reportResult(VkResult vkErr, MVKConfigLogLevel logLevel, const char* format, ...) { + va_list args; + va_start(args, format); + VkResult rslt = reportResult(this, vkErr, logLevel, format, args); + va_end(args); + return rslt; +} + +VkResult MVKBaseObject::reportResult(MVKBaseObject* mvkObj, VkResult vkErr, MVKConfigLogLevel logLevel, const char* format, ...) { + va_list args; + va_start(args, format); + VkResult rslt = reportResult(mvkObj, vkErr, logLevel, format, args); + va_end(args); + return rslt; +} + +VkResult MVKBaseObject::reportResult(MVKBaseObject* mvkObj, VkResult vkRslt, MVKConfigLogLevel logLevel, const char* format, va_list args) { + + // Prepend the result code to the format string + const char* vkRsltName = mvkVkResultName(vkRslt); + size_t rsltLen = strlen(vkRsltName) + strlen(format) + 4; + char fmtStr[rsltLen]; + snprintf(fmtStr, rsltLen, "%s: %s", vkRsltName, format); + + // Report the message + va_list lclArgs; + va_copy(lclArgs, args); + reportMessage(mvkObj, logLevel, fmtStr, lclArgs); + va_end(lclArgs); + + return vkRslt; +} + VkResult MVKBaseObject::reportError(VkResult vkErr, const char* format, ...) { va_list args; va_start(args, format); - VkResult rslt = reportError(this, vkErr, format, args); + VkResult rslt = reportResult(this, vkErr, MVK_CONFIG_LOG_LEVEL_ERROR, format, args); va_end(args); return rslt; } @@ -113,25 +141,23 @@ VkResult MVKBaseObject::reportError(VkResult vkErr, const char* format, ...) { VkResult MVKBaseObject::reportError(MVKBaseObject* mvkObj, VkResult vkErr, const char* format, ...) { va_list args; va_start(args, format); - VkResult rslt = reportError(mvkObj, vkErr, format, args); + VkResult rslt = reportResult(mvkObj, vkErr, MVK_CONFIG_LOG_LEVEL_ERROR, format, args); va_end(args); return rslt; } -// This is the core reporting implementation. Other similar functions delegate here. -VkResult MVKBaseObject::reportError(MVKBaseObject* mvkObj, VkResult vkErr, const char* format, va_list args) { - - // Prepend the error code to the format string - const char* vkRsltName = mvkVkResultName(vkErr); - size_t rsltLen = strlen(vkRsltName) + strlen(format) + 4; - char fmtStr[rsltLen]; - snprintf(fmtStr, rsltLen, "%s: %s", vkRsltName, format); - - // Report the error - va_list lclArgs; - va_copy(lclArgs, args); - reportMessage(mvkObj, MVK_CONFIG_LOG_LEVEL_ERROR, fmtStr, lclArgs); - va_end(lclArgs); - - return vkErr; +VkResult MVKBaseObject::reportWarning(VkResult vkErr, const char* format, ...) { + va_list args; + va_start(args, format); + VkResult rslt = reportResult(this, vkErr, MVK_CONFIG_LOG_LEVEL_WARNING, format, args); + va_end(args); + return rslt; +} + +VkResult MVKBaseObject::reportWarning(MVKBaseObject* mvkObj, VkResult vkErr, const char* format, ...) { + va_list args; + va_start(args, format); + VkResult rslt = reportResult(mvkObj, vkErr, MVK_CONFIG_LOG_LEVEL_WARNING, format, args); + va_end(args); + return rslt; } diff --git a/MoltenVK/MoltenVK/Utility/MVKFoundation.cpp b/MoltenVK/MoltenVK/Utility/MVKFoundation.cpp index 29ee115f..85ad7d5b 100644 --- a/MoltenVK/MoltenVK/Utility/MVKFoundation.cpp +++ b/MoltenVK/MoltenVK/Utility/MVKFoundation.cpp @@ -21,6 +21,44 @@ #define CASE_STRINGIFY(V) case V: return #V +const char* mvkVkCommandName(MVKCommandUse cmdUse) { + switch (cmdUse) { + case kMVKCommandUseBeginCommandBuffer: return "vkBeginCommandBuffer (prefilled VkCommandBuffer)"; + case kMVKCommandUseQueueSubmit: return "vkQueueSubmit"; + case kMVKCommandUseAcquireNextImage: return "vkAcquireNextImageKHR"; + case kMVKCommandUseQueuePresent: return "vkQueuePresentKHR"; + case kMVKCommandUseQueueWaitIdle: return "vkQueueWaitIdle"; + case kMVKCommandUseDeviceWaitIdle: return "vkDeviceWaitIdle"; + case kMVKCommandUseInvalidateMappedMemoryRanges: return "vkInvalidateMappedMemoryRanges"; + case kMVKCommandUseBeginRendering: return "vkCmdBeginRendering"; + case kMVKCommandUseBeginRenderPass: return "vkCmdBeginRenderPass"; + case kMVKCommandUseNextSubpass: return "vkCmdNextSubpass"; + case kMVKCommandUseRestartSubpass: return "Metal renderpass restart on barrier"; + case kMVKCommandUsePipelineBarrier: return "vkCmdPipelineBarrier"; + case kMVKCommandUseBlitImage: return "vkCmdBlitImage"; + case kMVKCommandUseCopyImage: return "vkCmdCopyImage"; + case kMVKCommandUseResolveImage: return "vkCmdResolveImage (resolve stage)"; + case kMVKCommandUseResolveExpandImage: return "vkCmdResolveImage (expand stage)"; + case kMVKCommandUseResolveCopyImage: return "vkCmdResolveImage (copy stage)"; + case kMVKCommandUseCopyBuffer: return "vkCmdCopyBuffer"; + case kMVKCommandUseCopyBufferToImage: return "vkCmdCopyBufferToImage"; + case kMVKCommandUseCopyImageToBuffer: return "vkCmdCopyImageToBuffer"; + case kMVKCommandUseFillBuffer: return "vkCmdFillBuffer"; + case kMVKCommandUseUpdateBuffer: return "vkCmdUpdateBuffer"; + case kMVKCommandUseClearAttachments: return "vkCmdClearAttachments"; + case kMVKCommandUseClearColorImage: return "vkCmdClearColorImage"; + case kMVKCommandUseClearDepthStencilImage: return "vkCmdClearDepthStencilImage"; + case kMVKCommandUseResetQueryPool: return "vkCmdResetQueryPool"; + case kMVKCommandUseDispatch: return "vkCmdDispatch"; + case kMVKCommandUseTessellationVertexTessCtl: return "vkCmdDraw (vertex and tess control stages)"; + case kMVKCommandUseDrawIndirectConvertBuffers: return "vkCmdDrawIndirect (convert indirect buffers)"; + case kMVKCommandUseCopyQueryPoolResults: return "vkCmdCopyQueryPoolResults"; + case kMVKCommandUseAccumOcclusionQuery: return "Post-render-pass occlusion query accumulation"; + case kMVKCommandUseRecordGPUCounterSample: return "Record GPU Counter Sample"; + default: return "Unknown Vulkan command"; + } +} + const char* mvkVkResultName(VkResult vkResult) { switch (vkResult) { diff --git a/MoltenVK/MoltenVK/Utility/MVKFoundation.h b/MoltenVK/MoltenVK/Utility/MVKFoundation.h index b8f10720..1097afb5 100644 --- a/MoltenVK/MoltenVK/Utility/MVKFoundation.h +++ b/MoltenVK/MoltenVK/Utility/MVKFoundation.h @@ -63,7 +63,7 @@ typedef struct { /** Tracks the Vulkan command currently being used. */ typedef enum : uint8_t { kMVKCommandUseNone = 0, /**< No use defined. */ - kMVKCommandUseEndCommandBuffer, /**< vkEndCommandBuffer (prefilled VkCommandBuffer). */ + kMVKCommandUseBeginCommandBuffer, /**< vkBeginCommandBuffer (prefilled VkCommandBuffer). */ kMVKCommandUseQueueSubmit, /**< vkQueueSubmit. */ kMVKCommandUseAcquireNextImage, /**< vkAcquireNextImageKHR. */ kMVKCommandUseQueuePresent, /**< vkQueuePresentKHR. */ @@ -104,6 +104,9 @@ enum MVKGraphicsStage { kMVKGraphicsStageRasterization /**< The rest of the pipeline. */ }; +/** Returns the name of the command defined by the command use. */ +const char* mvkVkCommandName(MVKCommandUse cmdUse); + /** Returns the name of the result value. */ const char* mvkVkResultName(VkResult vkResult); diff --git a/MoltenVK/MoltenVK/Utility/MVKLogging.h b/MoltenVK/MoltenVK/Utility/MVKLogging.h index bea3a92f..840d3780 100644 --- a/MoltenVK/MoltenVK/Utility/MVKLogging.h +++ b/MoltenVK/MoltenVK/Utility/MVKLogging.h @@ -57,9 +57,9 @@ extern "C" { * MVKLogErrorIf(cond, fmt, ...) - same as MVKLogError if boolean "cond" condition expression evaluates to YES, * otherwise logs nothing. * - * MVKLogWarning(fmt, ...) - recommended for not immediately harmful errors + * MVKLogWarn(fmt, ...) - recommended for not immediately harmful errors * - will print if MVK_LOG_LEVEL_WARNING is set on. - * MVKLogWarningIf(cond, fmt, ...) - same as MVKLogWarning if boolean "cond" condition expression evaluates to YES, + * MVKLogWarnIf(cond, fmt, ...) - same as MVKLogWarn if boolean "cond" condition expression evaluates to YES, * otherwise logs nothing. * * MVKLogInfo(fmt, ...) - recommended for general, infrequent, information messages @@ -67,7 +67,7 @@ extern "C" { * MVKLogInfoIf(cond, fmt, ...) - same as MVKLogInfo if boolean "cond" condition expression evaluates to YES, * otherwise logs nothing. * - * MVKLogDebug(fmt, ...) - recommended for temporary use during debugging + * MVKLogDebug(fmt, ...) - recommended for temporary use during debugging * - will print if MVK_LOG_LEVEL_DEBUG is set on. * MVKLogDebugIf(cond, fmt, ...) - same as MVKLogDebug if boolean "cond" condition expression evaluates to YES, * otherwise logs nothing. @@ -148,11 +148,11 @@ extern "C" { // Warning logging - for not immediately harmful errors #if MVK_LOG_LEVEL_WARNING -# define MVKLogWarning(fmt, ...) MVKLogWarningImpl(fmt, ##__VA_ARGS__) -# define MVKLogWarningIf(cond, fmt, ...) if(cond) { MVKLogWarningImpl(fmt, ##__VA_ARGS__); } +# define MVKLogWarn(fmt, ...) MVKLogWarnImpl(fmt, ##__VA_ARGS__) +# define MVKLogWarnIf(cond, fmt, ...) if(cond) { MVKLogWarnImpl(fmt, ##__VA_ARGS__); } #else -# define MVKLogWarning(...) -# define MVKLogWarningIf(cond, fmt, ...) +# define MVKLogWarn(...) +# define MVKLogWarnIf(cond, fmt, ...) #endif // Info logging - for general, non-performance affecting information messages @@ -182,11 +182,11 @@ extern "C" { # define MVKLogTraceIf(cond, fmt, ...) #endif -#define MVKLogErrorImpl(fmt, ...) reportMessage(MVK_CONFIG_LOG_LEVEL_ERROR, fmt, ##__VA_ARGS__) -#define MVKLogWarningImpl(fmt, ...) reportMessage(MVK_CONFIG_LOG_LEVEL_WARNING, fmt, ##__VA_ARGS__) -#define MVKLogInfoImpl(fmt, ...) reportMessage(MVK_CONFIG_LOG_LEVEL_INFO, fmt, ##__VA_ARGS__) -#define MVKLogDebugImpl(fmt, ...) reportMessage(MVK_CONFIG_LOG_LEVEL_DEBUG, fmt, ##__VA_ARGS__) -#define MVKLogTraceImpl(fmt, ...) reportMessage(MVK_CONFIG_LOG_LEVEL_DEBUG, fmt, ##__VA_ARGS__) +#define MVKLogErrorImpl(fmt, ...) reportMessage(MVK_CONFIG_LOG_LEVEL_ERROR, fmt, ##__VA_ARGS__) +#define MVKLogWarnImpl(fmt, ...) reportMessage(MVK_CONFIG_LOG_LEVEL_WARNING, fmt, ##__VA_ARGS__) +#define MVKLogInfoImpl(fmt, ...) reportMessage(MVK_CONFIG_LOG_LEVEL_INFO, fmt, ##__VA_ARGS__) +#define MVKLogDebugImpl(fmt, ...) reportMessage(MVK_CONFIG_LOG_LEVEL_DEBUG, fmt, ##__VA_ARGS__) +#define MVKLogTraceImpl(fmt, ...) reportMessage(MVK_CONFIG_LOG_LEVEL_DEBUG, fmt, ##__VA_ARGS__) // Assertions #ifdef NS_BLOCK_ASSERTIONS diff --git a/Scripts/runcts b/Scripts/runcts index 20ae1abe..bf65cc9e 100755 --- a/Scripts/runcts +++ b/Scripts/runcts @@ -113,7 +113,7 @@ export MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS=0 #(2 = VK_EXT_descriptor_ export MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE=2 #(2 = MTLEvents always) export MVK_CONFIG_SHADER_COMPRESSION_ALGORITHM=0 #(2 = ZLIB, 3 = LZ4) export MVK_CONFIG_PERFORMANCE_TRACKING=0 -export MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE=2 #(2 = Device lifetime) +export MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE=3 #(2 = Device lifetime, 3 = Process lifetime) # -------------- Operation -------------------- From a28437d8f21dff45563eaa550a8331698a32babb Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Tue, 5 Sep 2023 14:56:43 -0400 Subject: [PATCH 04/41] Updates to code review on swapchain image presentation improvement. - Fix failure building on Xcode 14. - Track frame interval statistics, regardless of whether performance tracking is enabled. - Determine wait time for swapchain presentations from frame intervals. - MVKSwapchain call markFrameInterval() from within mutex lock. - MVKDevice rename addActivityPerformance() to addPerformanceInterval() and addActivityByteCount() to addPerformanceByteCount(). - Add documentation about performance being measured in milliseconds. --- MoltenVK/MoltenVK/API/mvk_private_api.h | 44 +++++++++---------- .../MoltenVK/Commands/MVKCommandBuffer.mm | 2 +- .../Commands/MVKCommandResourceFactory.mm | 8 ++-- MoltenVK/MoltenVK/GPUObjects/MVKDevice.h | 23 +++++----- MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm | 8 ++-- MoltenVK/MoltenVK/GPUObjects/MVKImage.mm | 4 +- MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm | 6 +-- MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm | 14 +++--- .../MoltenVK/GPUObjects/MVKShaderModule.mm | 22 +++++----- MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm | 8 ++-- MoltenVK/MoltenVK/GPUObjects/MVKSync.mm | 2 +- 11 files changed, 70 insertions(+), 71 deletions(-) diff --git a/MoltenVK/MoltenVK/API/mvk_private_api.h b/MoltenVK/MoltenVK/API/mvk_private_api.h index e496fc5a..8ed5b754 100644 --- a/MoltenVK/MoltenVK/API/mvk_private_api.h +++ b/MoltenVK/MoltenVK/API/mvk_private_api.h @@ -167,40 +167,40 @@ typedef struct { /** MoltenVK performance of shader compilation activities. */ typedef struct { - MVKPerformanceTracker hashShaderCode; /** Create a hash from the incoming shader code. */ - MVKPerformanceTracker spirvToMSL; /** Convert SPIR-V to MSL source code. */ - MVKPerformanceTracker mslCompile; /** Compile MSL source code into a MTLLibrary. */ - MVKPerformanceTracker mslLoad; /** Load pre-compiled MSL code into a MTLLibrary. */ - MVKPerformanceTracker mslCompress; /** Compress MSL source code after compiling a MTLLibrary, to hold it in a pipeline cache. */ - MVKPerformanceTracker mslDecompress; /** Decompress MSL source code to write the MSL when serializing a pipeline cache. */ - MVKPerformanceTracker shaderLibraryFromCache; /** Retrieve a shader library from the cache, lazily creating it if needed. */ - MVKPerformanceTracker functionRetrieval; /** Retrieve a MTLFunction from a MTLLibrary. */ - MVKPerformanceTracker functionSpecialization; /** Specialize a retrieved MTLFunction. */ - MVKPerformanceTracker pipelineCompile; /** Compile MTLFunctions into a pipeline. */ - MVKPerformanceTracker glslToSPRIV; /** Convert GLSL to SPIR-V code. */ + MVKPerformanceTracker hashShaderCode; /** Create a hash from the incoming shader code, in milliseconds. */ + MVKPerformanceTracker spirvToMSL; /** Convert SPIR-V to MSL source code, in milliseconds. */ + MVKPerformanceTracker mslCompile; /** Compile MSL source code into a MTLLibrary, in milliseconds. */ + MVKPerformanceTracker mslLoad; /** Load pre-compiled MSL code into a MTLLibrary, in milliseconds. */ + MVKPerformanceTracker mslCompress; /** Compress MSL source code after compiling a MTLLibrary, to hold it in a pipeline cache, in milliseconds. */ + MVKPerformanceTracker mslDecompress; /** Decompress MSL source code to write the MSL when serializing a pipeline cache, in milliseconds. */ + MVKPerformanceTracker shaderLibraryFromCache; /** Retrieve a shader library from the cache, lazily creating it if needed, in milliseconds. */ + MVKPerformanceTracker functionRetrieval; /** Retrieve a MTLFunction from a MTLLibrary, in milliseconds. */ + MVKPerformanceTracker functionSpecialization; /** Specialize a retrieved MTLFunction, in milliseconds. */ + MVKPerformanceTracker pipelineCompile; /** Compile MTLFunctions into a pipeline, in milliseconds. */ + MVKPerformanceTracker glslToSPRIV; /** Convert GLSL to SPIR-V code, in milliseconds. */ } MVKShaderCompilationPerformance; /** MoltenVK performance of pipeline cache activities. */ typedef struct { - MVKPerformanceTracker sizePipelineCache; /** Calculate the size of cache data required to write MSL to pipeline cache data stream. */ - MVKPerformanceTracker writePipelineCache; /** Write MSL to pipeline cache data stream. */ - MVKPerformanceTracker readPipelineCache; /** Read MSL from pipeline cache data stream. */ + MVKPerformanceTracker sizePipelineCache; /** Calculate the size of cache data required to write MSL to pipeline cache data stream, in milliseconds. */ + MVKPerformanceTracker writePipelineCache; /** Write MSL to pipeline cache data stream, in milliseconds. */ + MVKPerformanceTracker readPipelineCache; /** Read MSL from pipeline cache data stream, in milliseconds. */ } MVKPipelineCachePerformance; /** MoltenVK performance of queue activities. */ typedef struct { - MVKPerformanceTracker retrieveMTLCommandBuffer; /** Retrieve a MTLCommandBuffer from a MTLQueue. */ - MVKPerformanceTracker commandBufferEncoding; /** Encode a single VkCommandBuffer to a MTLCommandBuffer (excludes MTLCommandBuffer encoding from configured immediate prefilling). */ - MVKPerformanceTracker submitCommandBuffers; /** Submit and encode all VkCommandBuffers in a vkQueueSubmit() operation to MTLCommandBuffers (including both prefilled and deferred encoding). */ - MVKPerformanceTracker mtlCommandBufferExecution; /** Execute a MTLCommandBuffer on the GPU, from commit to completion callback. */ - MVKPerformanceTracker retrieveCAMetalDrawable; /** Retrieve next CAMetalDrawable from a CAMetalLayer. */ - MVKPerformanceTracker presentSwapchains; /** Present the swapchains in a vkQueuePresentKHR() on the GPU, from commit to presentation callback. */ - MVKPerformanceTracker frameInterval; /** Frame presentation interval (1000/FPS). */ + MVKPerformanceTracker retrieveMTLCommandBuffer; /** Retrieve a MTLCommandBuffer from a MTLQueue, in milliseconds. */ + MVKPerformanceTracker commandBufferEncoding; /** Encode a single VkCommandBuffer to a MTLCommandBuffer (excludes MTLCommandBuffer encoding from configured immediate prefilling), in milliseconds. */ + MVKPerformanceTracker submitCommandBuffers; /** Submit and encode all VkCommandBuffers in a vkQueueSubmit() operation to MTLCommandBuffers (including both prefilled and deferred encoding), in milliseconds. */ + MVKPerformanceTracker mtlCommandBufferExecution; /** Execute a MTLCommandBuffer on the GPU, from commit to completion callback, in milliseconds. */ + MVKPerformanceTracker retrieveCAMetalDrawable; /** Retrieve next CAMetalDrawable from a CAMetalLayer, in milliseconds. */ + MVKPerformanceTracker presentSwapchains; /** Present the swapchains in a vkQueuePresentKHR() on the GPU, from commit to presentation callback, in milliseconds. */ + MVKPerformanceTracker frameInterval; /** Frame presentation interval (1000/FPS), in milliseconds. */ } MVKQueuePerformance; /** MoltenVK performance of device activities. */ typedef struct { - MVKPerformanceTracker gpuMemoryAllocated; /** GPU memory allocated (in KB). */ + MVKPerformanceTracker gpuMemoryAllocated; /** GPU memory allocated, in kilobytes. */ } MVKDevicePerformance; /** diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm index 4e0af414..8ac91c26 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm @@ -347,7 +347,7 @@ void MVKCommandEncoder::encode(id mtlCmdBuff, encodeCommands(_cmdBuffer->_head); endEncoding(); - mvkDev->addActivityPerformance(mvkDev->_performanceStatistics.queue.commandBufferEncoding, startTime); + mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.queue.commandBufferEncoding, startTime); } void MVKCommandEncoder::beginEncoding(id mtlCmdBuff, MVKCommandEncodingContext* pEncodingContext) { diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm b/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm index b3003507..973db991 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm @@ -623,7 +623,7 @@ id MVKCommandResourceFactory::newFunctionNamed(const char* funcName NSString* nsFuncName = [[NSString alloc] initWithUTF8String: funcName]; // temp retained id mtlFunc = [_mtlLibrary newFunctionWithName: nsFuncName]; // retained [nsFuncName release]; // temp release - _device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.functionRetrieval, startTime); + _device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.functionRetrieval, startTime); return mtlFunc; } @@ -636,7 +636,7 @@ id MVKCommandResourceFactory::newMTLFunction(NSString* mslSrcCode, id mtlLib = [getMTLDevice() newLibraryWithSource: mslSrcCode options: getDevice()->getMTLCompileOptions() error: &err]; // temp retain - _device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.mslCompile, startTime); + _device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.mslCompile, startTime); if (err) { reportError(VK_ERROR_INITIALIZATION_FAILED, @@ -645,7 +645,7 @@ id MVKCommandResourceFactory::newMTLFunction(NSString* mslSrcCode, } else { startTime = _device->getPerformanceTimestamp(); mtlFunc = [mtlLib newFunctionWithName: funcName]; - _device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.functionRetrieval, startTime); + _device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.functionRetrieval, startTime); } [mtlLib release]; // temp release @@ -689,7 +689,7 @@ void MVKCommandResourceFactory::initMTLLibrary() { options: getDevice()->getMTLCompileOptions() error: &err]; // retained MVKAssert( !err, "Could not compile command shaders (Error code %li):\n%s", (long)err.code, err.localizedDescription.UTF8String); - _device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.mslCompile, startTime); + _device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.mslCompile, startTime); } } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h index 5d4c328b..5ae7f5ec 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h @@ -689,26 +689,23 @@ public: /** * If performance is being tracked, returns a monotonic timestamp value for use performance timestamping. - * * The returned value corresponds to the number of CPU "ticks" since the app was initialized. * - * Calling this value twice, subtracting the first value from the second, and then multiplying - * the result by the value returned by mvkGetTimestampPeriod() will provide an indication of the - * number of nanoseconds between the two calls. The convenience function mvkGetElapsedMilliseconds() - * can be used to perform this calculation. + * Call this function twice, then use the functions mvkGetElapsedNanoseconds() or mvkGetElapsedMilliseconds() + * to determine the number of nanoseconds or milliseconds between the two calls. */ uint64_t getPerformanceTimestamp() { return _isPerformanceTracking ? mvkGetTimestamp() : 0; } /** - * If performance is being tracked, adds the performance for an activity with a duration - * interval between the start and end times, to the given performance statistics. + * If performance is being tracked, adds the performance for an activity with a duration interval + * between the start and end times, measured in milliseconds, to the given performance statistics. * * If endTime is zero or not supplied, the current time is used. */ - void addActivityPerformance(MVKPerformanceTracker& activityTracker, + void addPerformanceInterval(MVKPerformanceTracker& perfTracker, uint64_t startTime, uint64_t endTime = 0) { if (_isPerformanceTracking) { - updateActivityPerformance(activityTracker, mvkGetElapsedMilliseconds(startTime, endTime)); + updateActivityPerformance(perfTracker, mvkGetElapsedMilliseconds(startTime, endTime)); } }; @@ -716,12 +713,15 @@ public: * If performance is being tracked, adds the performance for an activity * with a kilobyte count, to the given performance statistics. */ - void addActivityByteCount(MVKPerformanceTracker& activityTracker, uint64_t byteCount) { + void addPerformanceByteCount(MVKPerformanceTracker& perfTracker, uint64_t byteCount) { if (_isPerformanceTracking) { - updateActivityPerformance(activityTracker, double(byteCount / KIBI)); + updateActivityPerformance(perfTracker, double(byteCount / KIBI)); } }; + /** Updates the given performance statistic. */ + void updateActivityPerformance(MVKPerformanceTracker& activity, double currentValue); + /** Populates the specified statistics structure from the current activity performance statistics. */ void getPerformanceStatistics(MVKPerformanceStatistics* pPerf); @@ -897,7 +897,6 @@ protected: void logActivityInline(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats); void logActivityDuration(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats, bool isInline = false); void logActivityByteCount(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats, bool isInline = false); - void updateActivityPerformance(MVKPerformanceTracker& activity, double currentValue); void getDescriptorVariableDescriptorCountLayoutSupport(const VkDescriptorSetLayoutCreateInfo* pCreateInfo, VkDescriptorSetLayoutSupport* pSupport, VkDescriptorSetVariableDescriptorCountLayoutSupport* pVarDescSetCountSupport); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm index d44e6492..1930ceed 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm @@ -3058,7 +3058,7 @@ uint64_t MVKPhysicalDevice::getVRAMSize() { // If possible, retrieve from the MTLDevice, otherwise from available memory size, or a fixed conservative estimate. uint64_t MVKPhysicalDevice::getRecommendedMaxWorkingSetSize() { -#if MVK_XCODE_14 || MVK_MACOS +#if MVK_XCODE_15 || MVK_MACOS if ( [_mtlDevice respondsToSelector: @selector(recommendedMaxWorkingSetSize)]) { return _mtlDevice.recommendedMaxWorkingSetSize; } @@ -4194,7 +4194,7 @@ void MVKDevice::updateActivityPerformance(MVKPerformanceTracker& activity, doubl double total = (activity.average * activity.count++) + currentValue; activity.average = total / activity.count; - if (mvkConfig().activityPerformanceLoggingStyle == MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_IMMEDIATE) { + if (_isPerformanceTracking && mvkConfig().activityPerformanceLoggingStyle == MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_IMMEDIATE) { logActivityInline(activity, _performanceStatistics); } } @@ -4299,8 +4299,8 @@ MVKActivityPerformanceValueType MVKDevice::getActivityPerformanceValueType(MVKPe } void MVKDevice::getPerformanceStatistics(MVKPerformanceStatistics* pPerf) { - addActivityByteCount(_performanceStatistics.device.gpuMemoryAllocated, - _physicalDevice->getCurrentAllocatedSize()); + addPerformanceByteCount(_performanceStatistics.device.gpuMemoryAllocated, + _physicalDevice->getCurrentAllocatedSize()); lock_guard lock(_perfLock); if (pPerf) { *pPerf = _performanceStatistics; } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm index fa87643a..b632e78b 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm @@ -1293,7 +1293,7 @@ id MVKPresentableSwapchainImage::getCAMetalDrawable() { for (uint32_t attemptIdx = 0; !_mtlDrawable && attemptIdx < attemptCnt; attemptIdx++) { uint64_t startTime = _device->getPerformanceTimestamp(); _mtlDrawable = [_swapchain->_surface->getCAMetalLayer().nextDrawable retain]; // retained - _device->addActivityPerformance(_device->_performanceStatistics.queue.retrieveCAMetalDrawable, startTime); + _device->addPerformanceInterval(_device->_performanceStatistics.queue.retrieveCAMetalDrawable, startTime); } if ( !_mtlDrawable ) { reportError(VK_ERROR_OUT_OF_POOL_MEMORY, "CAMetalDrawable could not be acquired after %d attempts.", attemptCnt); } } @@ -1398,7 +1398,7 @@ void MVKPresentableSwapchainImage::endPresentation(const MVKImagePresentInfo& pr // If I have become detached from the swapchain, it means the swapchain, and possibly the // VkDevice, have been destroyed by the time of this callback, so do not reference them. lock_guard lock(_detachmentLock); - if (_device) { _device->addActivityPerformance(_device->_performanceStatistics.queue.presentSwapchains, _presentationStartTime); } + if (_device) { _device->addPerformanceInterval(_device->_performanceStatistics.queue.presentSwapchains, _presentationStartTime); } if (_swapchain) { _swapchain->endPresentation(presentInfo, actualPresentTime); } } presentInfo.queue->endPresentation(presentInfo); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm index 13f59b6b..e3da96b3 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm @@ -2269,7 +2269,7 @@ VkResult MVKPipelineCache::writeDataImpl(size_t* pDataSize, void* pData) { // Serializes the data in this cache to a stream void MVKPipelineCache::writeData(ostream& outstream, bool isCounting) { #if MVK_USE_CEREAL - MVKPerformanceTracker& activityTracker = isCounting + MVKPerformanceTracker& perfTracker = isCounting ? _device->_performanceStatistics.pipelineCache.sizePipelineCache : _device->_performanceStatistics.pipelineCache.writePipelineCache; @@ -2297,7 +2297,7 @@ void MVKPipelineCache::writeData(ostream& outstream, bool isCounting) { writer(cacheIter.getShaderConversionConfig()); writer(cacheIter.getShaderConversionResultInfo()); writer(cacheIter.getCompressedMSL()); - _device->addActivityPerformance(activityTracker, startTime); + _device->addPerformanceInterval(perfTracker, startTime); } } @@ -2366,7 +2366,7 @@ void MVKPipelineCache::readData(const VkPipelineCacheCreateInfo* pCreateInfo) { // Add the shader library to the staging cache. MVKShaderLibraryCache* slCache = getShaderLibraryCache(smKey); - _device->addActivityPerformance(_device->_performanceStatistics.pipelineCache.readPipelineCache, startTime); + _device->addPerformanceInterval(_device->_performanceStatistics.pipelineCache.readPipelineCache, startTime); slCache->addShaderLibrary(&shaderConversionConfig, resultInfo, compressedMSL); break; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm index 293f50ef..c104deed 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm @@ -150,8 +150,9 @@ VkResult MVKQueue::waitIdle(MVKCommandUse cmdUse) { // few frames for that to happen. If there are still swapchain presentations that haven't completed, // log a warning, and force them to end presentation, so the images and drawables will be released. void MVKQueue::waitSwapchainPresentations(MVKCommandUse cmdUse) { - auto waitFrames = _device->_pMetalFeatures->maxSwapchainImageCount + 2; - if (_presentationCompletionBlocker.wait((waitFrames/60.0) * 1e9)) { return; } + uint32_t waitFrames = _device->_pMetalFeatures->maxSwapchainImageCount + 2; + uint64_t waitNanos = waitFrames * _device->_performanceStatistics.queue.frameInterval.average * 1e6; + if (_presentationCompletionBlocker.wait(waitNanos)) { return; } auto imgCnt = _presentationCompletionBlocker.getReservationCount(); MVKPresentableSwapchainImage* images[imgCnt]; @@ -173,7 +174,7 @@ void MVKQueue::waitSwapchainPresentations(MVKCommandUse cmdUse) { // Wait for forced presentation completions. If we still have unfinished swapchain image // presentations, log a warning, and force each image to end, so that it can be released. - if ( !_presentationCompletionBlocker.wait((waitFrames/60.0) * 1e9) ) { + if ( !_presentationCompletionBlocker.wait(waitNanos) ) { reportWarning(VK_TIMEOUT, "%s timed out after %d frames while awaiting %d swapchain image presentations to complete.", mvkVkCommandName(cmdUse), waitFrames * 2, _presentationCompletionBlocker.getReservationCount()); for (size_t imgIdx = 0; imgIdx < imgCnt; imgIdx++) { @@ -203,7 +204,7 @@ id MVKQueue::getMTLCommandBuffer(MVKCommandUse cmdUse, bool re } else { mtlCmdBuff = [_mtlQueue commandBufferWithUnretainedReferences]; } - mvkDev->addActivityPerformance(mvkDev->_performanceStatistics.queue.retrieveMTLCommandBuffer, startTime); + mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.queue.retrieveMTLCommandBuffer, startTime); NSString* mtlCmdBuffLabel = getMTLCommandBufferLabel(cmdUse); setLabelIfNotNil(mtlCmdBuff, mtlCmdBuffLabel); [mtlCmdBuff addCompletedHandler: ^(id mtlCB) { handleMTLCommandBufferError(mtlCB); }]; @@ -496,7 +497,7 @@ VkResult MVKQueueCommandBufferSubmission::commitActiveMTLCommandBuffer(bool sign MVKDevice* mvkDev = getDevice(); uint64_t startTime = mvkDev->getPerformanceTimestamp(); [mtlCmdBuff addCompletedHandler: ^(id mtlCB) { - mvkDev->addActivityPerformance(mvkDev->_performanceStatistics.queue.mtlCommandBufferExecution, startTime); + mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.queue.mtlCommandBufferExecution, startTime); if (signalCompletion) { this->finish(); } // Must be the last thing the completetion callback does. }]; @@ -592,7 +593,7 @@ void MVKQueueFullCommandBufferSubmission::submitCommandBuffers() { for (auto& cb : _cmdBuffers) { cb->submit(this, &_encodingContext); } - mvkDev->addActivityPerformance(mvkDev->_performanceStatistics.queue.submitCommandBuffers, startTime); + mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.queue.submitCommandBuffers, startTime); } template @@ -602,7 +603,6 @@ MVKQueueFullCommandBufferSubmission::MVKQueueFullCommandBufferSubmission(MVKQ MVKCommandUse cmdUse) : MVKQueueCommandBufferSubmission(queue, pSubmit, fence, cmdUse) { - // pSubmit can be null if just tracking the fence alone if (pSubmit) { uint32_t cbCnt = pSubmit->commandBufferCount; _cmdBuffers.reserve(cbCnt); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm b/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm index a47a65b7..90831498 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm @@ -80,7 +80,7 @@ MVKMTLFunction MVKShaderLibrary::getMTLFunction(const VkSpecializationInfo* pSpe uint64_t startTime = pShaderFeedback ? mvkGetTimestamp() : mvkDev->getPerformanceTimestamp(); id mtlFunc = [[_mtlLibrary newFunctionWithName: mtlFuncName] autorelease]; - mvkDev->addActivityPerformance(mvkDev->_performanceStatistics.shaderCompilation.functionRetrieval, startTime); + mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.shaderCompilation.functionRetrieval, startTime); if (pShaderFeedback) { if (mtlFunc) { mvkEnableFlags(pShaderFeedback->flags, VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT); @@ -156,7 +156,7 @@ void MVKShaderLibrary::compressMSL(const string& msl) { MVKDevice* mvkDev = _owner->getDevice(); uint64_t startTime = mvkDev->getPerformanceTimestamp(); _compressedMSL.compress(msl, mvkConfig().shaderSourceCompressionAlgorithm); - mvkDev->addActivityPerformance(mvkDev->_performanceStatistics.shaderCompilation.mslCompress, startTime); + mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.shaderCompilation.mslCompress, startTime); } // Decompresses the cached MSL into the string. @@ -164,7 +164,7 @@ void MVKShaderLibrary::decompressMSL(string& msl) { MVKDevice* mvkDev = _owner->getDevice(); uint64_t startTime = mvkDev->getPerformanceTimestamp(); _compressedMSL.decompress(msl); - mvkDev->addActivityPerformance(mvkDev->_performanceStatistics.shaderCompilation.mslDecompress, startTime); + mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.shaderCompilation.mslDecompress, startTime); } MVKShaderLibrary::MVKShaderLibrary(MVKVulkanAPIDeviceObject* owner, @@ -207,7 +207,7 @@ MVKShaderLibrary::MVKShaderLibrary(MVKVulkanAPIDeviceObject* owner, handleCompilationError(err, "Compiled shader module creation"); [shdrData release]; } - mvkDev->addActivityPerformance(mvkDev->_performanceStatistics.shaderCompilation.mslLoad, startTime); + mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.shaderCompilation.mslLoad, startTime); } MVKShaderLibrary::MVKShaderLibrary(const MVKShaderLibrary& other) { @@ -283,7 +283,7 @@ MVKShaderLibrary* MVKShaderLibraryCache::findShaderLibrary(SPIRVToMSLConversionC if (slPair.first.matches(*pShaderConfig)) { pShaderConfig->alignWith(slPair.first); MVKDevice* mvkDev = _owner->getDevice(); - mvkDev->addActivityPerformance(mvkDev->_performanceStatistics.shaderCompilation.shaderLibraryFromCache, startTime); + mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.shaderCompilation.shaderLibraryFromCache, startTime); if (pShaderFeedback) { pShaderFeedback->duration += mvkGetElapsedNanoseconds(startTime); } @@ -363,7 +363,7 @@ bool MVKShaderModule::convert(SPIRVToMSLConversionConfiguration* pShaderConfig, GLSLToSPIRVConversionResult glslConversionResult; uint64_t startTime = _device->getPerformanceTimestamp(); bool wasConverted = _glslConverter.convert(getMVKGLSLConversionShaderStage(pShaderConfig), glslConversionResult, shouldLogCode, false); - _device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.glslToSPRIV, startTime); + _device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.glslToSPRIV, startTime); if (wasConverted) { if (shouldLogCode) { MVKLogInfo("%s", glslConversionResult.resultLog.c_str()); } @@ -376,7 +376,7 @@ bool MVKShaderModule::convert(SPIRVToMSLConversionConfiguration* pShaderConfig, uint64_t startTime = _device->getPerformanceTimestamp(); bool wasConverted = _spvConverter.convert(*pShaderConfig, conversionResult, shouldLogCode, shouldLogCode, shouldLogEstimatedGLSL); - _device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.spirvToMSL, startTime); + _device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.spirvToMSL, startTime); if (wasConverted) { if (shouldLogCode) { MVKLogInfo("%s", conversionResult.resultLog.c_str()); } @@ -436,7 +436,7 @@ MVKShaderModule::MVKShaderModule(MVKDevice* device, uint64_t startTime = _device->getPerformanceTimestamp(); codeHash = mvkHash(pCreateInfo->pCode, spvCount); - _device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.hashShaderCode, startTime); + _device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.hashShaderCode, startTime); _spvConverter.setSPIRV(pCreateInfo->pCode, spvCount); @@ -450,7 +450,7 @@ MVKShaderModule::MVKShaderModule(MVKDevice* device, uint64_t startTime = _device->getPerformanceTimestamp(); codeHash = mvkHash(&magicNum); codeHash = mvkHash(pMSLCode, mslCodeLen, codeHash); - _device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.hashShaderCode, startTime); + _device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.hashShaderCode, startTime); SPIRVToMSLConversionResult conversionResult; conversionResult.msl = pMSLCode; @@ -466,7 +466,7 @@ MVKShaderModule::MVKShaderModule(MVKDevice* device, uint64_t startTime = _device->getPerformanceTimestamp(); codeHash = mvkHash(&magicNum); codeHash = mvkHash(pMSLCode, mslCodeLen, codeHash); - _device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.hashShaderCode, startTime); + _device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.hashShaderCode, startTime); _directMSLLibrary = new MVKShaderLibrary(this, (void*)(pMSLCode), mslCodeLen); @@ -479,7 +479,7 @@ MVKShaderModule::MVKShaderModule(MVKDevice* device, uint64_t startTime = _device->getPerformanceTimestamp(); codeHash = mvkHash(pGLSL, codeSize); - _device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.hashShaderCode, startTime); + _device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.hashShaderCode, startTime); _glslConverter.setGLSL(pGLSL, glslLen); } else { diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm index f326f82e..159c2edf 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm @@ -159,13 +159,14 @@ void MVKSwapchain::renderWatermark(id mtlTexture, idgetPerformanceTimestamp(); + _lastFrameTime = mvkGetTimestamp(); if (prevFrameTime == 0) { return; } // First frame starts at first presentation - _device->addActivityPerformance(_device->_performanceStatistics.queue.frameInterval, prevFrameTime, _lastFrameTime); + _device->updateActivityPerformance(_device->_performanceStatistics.queue.frameInterval, mvkGetElapsedMilliseconds(prevFrameTime, _lastFrameTime)); auto& mvkCfg = mvkConfig(); bool shouldLogOnFrames = mvkCfg.performanceTracking && mvkCfg.activityPerformanceLoggingStyle == MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_FRAME_COUNT; @@ -244,10 +245,9 @@ VkResult MVKSwapchain::getPastPresentationTiming(uint32_t *pCount, VkPastPresent void MVKSwapchain::beginPresentation(const MVKImagePresentInfo& presentInfo) {} void MVKSwapchain::endPresentation(const MVKImagePresentInfo& presentInfo, uint64_t actualPresentTime) { + std::lock_guard lock(_presentHistoryLock); markFrameInterval(); - - std::lock_guard lock(_presentHistoryLock); if (_presentHistoryCount < kMaxPresentationHistory) { _presentHistoryCount++; } else { diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSync.mm b/MoltenVK/MoltenVK/GPUObjects/MVKSync.mm index fb1e0190..b7a4a64d 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKSync.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKSync.mm @@ -588,7 +588,7 @@ void MVKMetalCompiler::compile(unique_lock& lock, dispatch_block_t block) if (_compileError) { handleError(); } - mvkDev->addActivityPerformance(*_pPerformanceTracker, _startTime); + mvkDev->addPerformanceInterval(*_pPerformanceTracker, _startTime); } void MVKMetalCompiler::handleError() { From 7fe4963985d8ae44159243d8babff25cf830bca7 Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Wed, 6 Sep 2023 16:16:11 -0400 Subject: [PATCH 05/41] Guard against CAMetalDrawable with invalid pixel format. - Calling nextDrawable may result in a nil drawable, or a drawable with no pixel format. Attempt several times to retrieve a drawable with a valid pixel format, and if unsuccessful, return an error from vkQueuePresentKHR() and vkAcquireNextImageKHR(), to force swapchain to be re-created. - Reorganize MVKQueuePresentSurfaceSubmission::execute() to detect drawable with invalid format, attach MTLCommandBuffer completion handler just before commit, and delay enqueuing MTLCommandBuffer until commit. - Refactor mvkOSVersionIsAtLeast() for clarity (unrelated). --- Common/MVKOSExtensions.h | 17 +++++++++------- Docs/Whats_New.md | 1 + MoltenVK/MoltenVK/GPUObjects/MVKImage.h | 2 +- MoltenVK/MoltenVK/GPUObjects/MVKImage.mm | 23 ++++++++++++++++------ MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm | 25 ++++++++++++------------ 5 files changed, 42 insertions(+), 26 deletions(-) diff --git a/Common/MVKOSExtensions.h b/Common/MVKOSExtensions.h index 79d89216..e824ba06 100644 --- a/Common/MVKOSExtensions.h +++ b/Common/MVKOSExtensions.h @@ -39,27 +39,30 @@ static const MVKOSVersion kMVKOSVersionUnsupported = std::numeric_limits= minVer; } +static inline bool mvkOSVersionIsAtLeast(MVKOSVersion minVer) { return mvkOSVersion() >= minVer; } /** * Returns whether the operating system version is at least the appropriate min version. - * The constant kMVKOSVersionUnsupported can be used for either value to cause the test - * to always fail on that OS, which is useful for indidicating functionalty guarded by + * The constant kMVKOSVersionUnsupported can be used for any of the values to cause the test + * to always fail on that OS, which is useful for indidicating that functionalty guarded by * this test is not supported on that OS. */ -inline bool mvkOSVersionIsAtLeast(MVKOSVersion macOSMinVer, MVKOSVersion iOSMinVer, MVKOSVersion visionOSMinVer) { +static inline bool mvkOSVersionIsAtLeast(MVKOSVersion macOSMinVer, + MVKOSVersion iOSMinVer, + MVKOSVersion visionOSMinVer) { #if MVK_MACOS return mvkOSVersionIsAtLeast(macOSMinVer); #endif +#if MVK_IOS_OR_TVOS + return mvkOSVersionIsAtLeast(iOSMinVer); +#endif #if MVK_VISIONOS return mvkOSVersionIsAtLeast(visionOSMinVer); -#elif MVK_IOS_OR_TVOS - return mvkOSVersionIsAtLeast(iOSMinVer); #endif } diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md index 3e476dd3..836a60a9 100644 --- a/Docs/Whats_New.md +++ b/Docs/Whats_New.md @@ -20,6 +20,7 @@ Released TBD - Fix rare case where vertex attribute buffers are not bound to Metal when no other bindings change between pipelines. +- Fix case where a `CAMetalDrawable` with invalid pixel format causes onscreen flickering. - Improve behavior of swapchain image presentation stalls caused by Metal regression. - Add several additional performance trackers, available via logging, or the `mvk_private_api.h` API. - Update `MVK_CONFIGURATION_API_VERSION` and `MVK_PRIVATE_API_VERSION` to `38`. diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.h b/MoltenVK/MoltenVK/GPUObjects/MVKImage.h index fb7c3dfa..1479f724 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.h @@ -454,7 +454,7 @@ public: #pragma mark Metal /** Presents the contained drawable to the OS. */ - void presentCAMetalDrawable(id mtlCmdBuff, MVKImagePresentInfo presentInfo); + VkResult presentCAMetalDrawable(id mtlCmdBuff, MVKImagePresentInfo presentInfo); /** Called when the presentation begins. */ void beginPresentation(const MVKImagePresentInfo& presentInfo); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm index b632e78b..1769df11 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm @@ -1258,7 +1258,6 @@ VkResult MVKPresentableSwapchainImage::acquireAndSignalWhenAvailable(MVKSemaphor // This is not done earlier so the texture is retained for any post-processing such as screen captures, etc. releaseMetalDrawable(); - VkResult rslt = VK_SUCCESS; auto signaler = MVKSwapchainSignaler{fence, semaphore, semaphore ? semaphore->deferSignal() : 0}; if (_availability.isAvailable) { _availability.isAvailable = false; @@ -1271,7 +1270,7 @@ VkResult MVKPresentableSwapchainImage::acquireAndSignalWhenAvailable(MVKSemaphor id mtlCmdBuff = nil; if (mvkSem && mvkSem->isUsingCommandEncoding()) { mtlCmdBuff = _device->getAnyQueue()->getMTLCommandBuffer(kMVKCommandUseAcquireNextImage); - if ( !mtlCmdBuff ) { rslt = VK_ERROR_OUT_OF_POOL_MEMORY; } + if ( !mtlCmdBuff ) { setConfigurationResult(VK_ERROR_OUT_OF_POOL_MEMORY); } } signal(signaler, mtlCmdBuff); [mtlCmdBuff commit]; @@ -1283,19 +1282,29 @@ VkResult MVKPresentableSwapchainImage::acquireAndSignalWhenAvailable(MVKSemaphor } markAsTracked(signaler); - return rslt; + return getConfigurationResult(); } +// Calling nextDrawable may result in a nil drawable, or a drawable with no pixel format. +// Attempt several times to retrieve a good drawable, and set an error to trigger the +// swapchain to be re-established if one cannot be retrieved. id MVKPresentableSwapchainImage::getCAMetalDrawable() { if ( !_mtlDrawable ) { @autoreleasepool { + bool hasInvalidFormat = false; uint32_t attemptCnt = _swapchain->getImageCount() * 2; // Attempt a resonable number of times for (uint32_t attemptIdx = 0; !_mtlDrawable && attemptIdx < attemptCnt; attemptIdx++) { uint64_t startTime = _device->getPerformanceTimestamp(); _mtlDrawable = [_swapchain->_surface->getCAMetalLayer().nextDrawable retain]; // retained _device->addPerformanceInterval(_device->_performanceStatistics.queue.retrieveCAMetalDrawable, startTime); + hasInvalidFormat = _mtlDrawable && !_mtlDrawable.texture.pixelFormat; + if (hasInvalidFormat) { releaseMetalDrawable(); } + } + if (hasInvalidFormat) { + setConfigurationResult(reportError(VK_ERROR_OUT_OF_DATE_KHR, "CAMetalDrawable with valid format could not be acquired after %d attempts.", attemptCnt)); + } else if ( !_mtlDrawable ) { + setConfigurationResult(reportError(VK_ERROR_OUT_OF_POOL_MEMORY, "CAMetalDrawable could not be acquired after %d attempts.", attemptCnt)); } - if ( !_mtlDrawable ) { reportError(VK_ERROR_OUT_OF_POOL_MEMORY, "CAMetalDrawable could not be acquired after %d attempts.", attemptCnt); } } } return _mtlDrawable; @@ -1303,8 +1312,8 @@ id MVKPresentableSwapchainImage::getCAMetalDrawable() { // Present the drawable and make myself available only once the command buffer has completed. // Pass MVKImagePresentInfo by value because it may not exist when the callback runs. -void MVKPresentableSwapchainImage::presentCAMetalDrawable(id mtlCmdBuff, - MVKImagePresentInfo presentInfo) { +VkResult MVKPresentableSwapchainImage::presentCAMetalDrawable(id mtlCmdBuff, + MVKImagePresentInfo presentInfo) { lock_guard lock(_availabilityLock); _swapchain->renderWatermark(getMTLTexture(0), mtlCmdBuff); @@ -1363,6 +1372,8 @@ void MVKPresentableSwapchainImage::presentCAMetalDrawable(id m }]; signalPresentationSemaphore(signaler, mtlCmdBuff); + + return getConfigurationResult(); } // Pass MVKImagePresentInfo by value because it may not exist when the callback runs. diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm index c104deed..f53cb71d 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm @@ -623,12 +623,6 @@ MVKQueueFullCommandBufferSubmission::MVKQueueFullCommandBufferSubmission(MVKQ // The semaphores know what to do. VkResult MVKQueuePresentSurfaceSubmission::execute() { id mtlCmdBuff = _queue->getMTLCommandBuffer(kMVKCommandUseQueuePresent); - [mtlCmdBuff enqueue]; - - // Add completion handler that will destroy this submission only once the MTLCommandBuffer - // is finished with the resources retained here, including the wait semaphores. - // Completion handlers are also added in presentCAMetalDrawable() to retain the swapchain images. - [mtlCmdBuff addCompletedHandler: ^(id mtlCB) { this->finish(); }]; for (auto& ws : _waitSemaphores) { auto& sem4 = ws.first; @@ -637,15 +631,22 @@ VkResult MVKQueuePresentSurfaceSubmission::execute() { } for (int i = 0; i < _presentInfo.size(); i++ ) { - _presentInfo[i].presentableImage->presentCAMetalDrawable(mtlCmdBuff, _presentInfo[i]); + setConfigurationResult(_presentInfo[i].presentableImage->presentCAMetalDrawable(mtlCmdBuff, _presentInfo[i])); } - [mtlCmdBuff commit]; + if ( !mtlCmdBuff ) { setConfigurationResult(VK_ERROR_OUT_OF_POOL_MEMORY); } // Check after images may set error. - // If an error occurred and the MTLCommandBuffer was not created, call finish() directly. - if ( !mtlCmdBuff ) { finish(); } - - return mtlCmdBuff ? VK_SUCCESS : VK_ERROR_OUT_OF_POOL_MEMORY; + // Add completion callback to the MTLCommandBuffer to call finish(), + // or if the MTLCommandBuffer could not be created, call finish() directly. + // Retrieve the result first, because finish() will destroy this instance. + VkResult rslt = getConfigurationResult(); + if (mtlCmdBuff) { + [mtlCmdBuff addCompletedHandler: ^(id mtlCB) { this->finish(); }]; + [mtlCmdBuff commit]; + } else { + finish(); + } + return rslt; } void MVKQueuePresentSurfaceSubmission::finish() { From 6c6139ca929d2edd01930b3034c2ccd5adc55705 Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Thu, 7 Sep 2023 09:33:40 -0400 Subject: [PATCH 06/41] Update Common/MVKOSExtensions.h Co-authored-by: Chip Davis --- Common/MVKOSExtensions.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Common/MVKOSExtensions.h b/Common/MVKOSExtensions.h index e824ba06..13d864da 100644 --- a/Common/MVKOSExtensions.h +++ b/Common/MVKOSExtensions.h @@ -49,7 +49,7 @@ static inline bool mvkOSVersionIsAtLeast(MVKOSVersion minVer) { return mvkOSVers /** * Returns whether the operating system version is at least the appropriate min version. * The constant kMVKOSVersionUnsupported can be used for any of the values to cause the test - * to always fail on that OS, which is useful for indidicating that functionalty guarded by + * to always fail on that OS, which is useful for indicating that functionalty guarded by * this test is not supported on that OS. */ static inline bool mvkOSVersionIsAtLeast(MVKOSVersion macOSMinVer, From 0ee15222c83157dc765a9fdeac614e1620f76d12 Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Fri, 8 Sep 2023 20:46:28 -0400 Subject: [PATCH 07/41] Ensure objects retained for life of MTLCommandBuffer. - vkCmdBlitImage() ensure swizzle texture view is retained for life of MTLCommandBuffer. - vkQueuePresentKHR() use MTLCommandBuffer that retains references. - Update MoltenVK version to 1.2.6. --- Docs/Whats_New.md | 1 + MoltenVK/MoltenVK/API/mvk_config.h | 2 +- MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm | 19 ++++++++----------- MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm | 5 ++++- 4 files changed, 14 insertions(+), 13 deletions(-) diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md index 836a60a9..605cda82 100644 --- a/Docs/Whats_New.md +++ b/Docs/Whats_New.md @@ -20,6 +20,7 @@ Released TBD - Fix rare case where vertex attribute buffers are not bound to Metal when no other bindings change between pipelines. +- Ensure objects retained for life of `MTLCommandBuffer` during `vkCmdBlitImage()` & `vkQueuePresentKHR()`. - Fix case where a `CAMetalDrawable` with invalid pixel format causes onscreen flickering. - Improve behavior of swapchain image presentation stalls caused by Metal regression. - Add several additional performance trackers, available via logging, or the `mvk_private_api.h` API. diff --git a/MoltenVK/MoltenVK/API/mvk_config.h b/MoltenVK/MoltenVK/API/mvk_config.h index 532d27db..f9702661 100644 --- a/MoltenVK/MoltenVK/API/mvk_config.h +++ b/MoltenVK/MoltenVK/API/mvk_config.h @@ -45,7 +45,7 @@ extern "C" { */ #define MVK_VERSION_MAJOR 1 #define MVK_VERSION_MINOR 2 -#define MVK_VERSION_PATCH 5 +#define MVK_VERSION_PATCH 6 #define MVK_MAKE_VERSION(major, minor, patch) (((major) * 10000) + ((minor) * 100) + (patch)) #define MVK_VERSION MVK_MAKE_VERSION(MVK_VERSION_MAJOR, MVK_VERSION_MINOR, MVK_VERSION_PATCH) diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm b/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm index 2c0ef546..1c38d15a 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm @@ -504,11 +504,12 @@ void MVKCmdBlitImage::encode(MVKCommandEncoder* cmdEncoder, MVKCommandUse com if (cmdEncoder->getDevice()->_pMetalFeatures->nativeTextureSwizzle && _srcImage->needsSwizzle()) { // Use a view that has a swizzle on it. - srcMTLTex = [[srcMTLTex newTextureViewWithPixelFormat:srcMTLTex.pixelFormat - textureType:srcMTLTex.textureType - levels:NSMakeRange(0, srcMTLTex.mipmapLevelCount) - slices:NSMakeRange(0, srcMTLTex.arrayLength) - swizzle:_srcImage->getPixelFormats()->getMTLTextureSwizzleChannels(_srcImage->getVkFormat())] autorelease]; + srcMTLTex = [srcMTLTex newTextureViewWithPixelFormat:srcMTLTex.pixelFormat + textureType:srcMTLTex.textureType + levels:NSMakeRange(0, srcMTLTex.mipmapLevelCount) + slices:NSMakeRange(0, srcMTLTex.arrayLength) + swizzle:_srcImage->getPixelFormats()->getMTLTextureSwizzleChannels(_srcImage->getVkFormat())]; + [cmdEncoder->_mtlCmdBuffer addCompletedHandler: ^(id) { [srcMTLTex release]; }]; } cmdEncoder->endCurrentMetalEncoding(); @@ -551,9 +552,7 @@ void MVKCmdBlitImage::encode(MVKCommandEncoder* cmdEncoder, MVKCommandUse com textureType: MTLTextureType2DArray levels: NSMakeRange(0, srcMTLTex.mipmapLevelCount) slices: NSMakeRange(0, srcMTLTex.arrayLength)]; - [cmdEncoder->_mtlCmdBuffer addCompletedHandler: ^(id) { - [srcMTLTex release]; - }]; + [cmdEncoder->_mtlCmdBuffer addCompletedHandler: ^(id) { [srcMTLTex release]; }]; } blitKey.dstMTLPixelFormat = _dstImage->getMTLPixelFormat(dstPlaneIndex); blitKey.srcFilter = mvkMTLSamplerMinMagFilterFromVkFilter(_filter); @@ -655,9 +654,7 @@ void MVKCmdBlitImage::encode(MVKCommandEncoder* cmdEncoder, MVKCommandUse com #endif } id stencilMTLTex = [srcMTLTex newTextureViewWithPixelFormat: stencilFmt]; - [cmdEncoder->_mtlCmdBuffer addCompletedHandler: ^(id) { - [stencilMTLTex release]; - }]; + [cmdEncoder->_mtlCmdBuffer addCompletedHandler: ^(id) { [stencilMTLTex release]; }]; [mtlRendEnc setFragmentTexture: stencilMTLTex atIndex: 1]; } else { [mtlRendEnc setFragmentTexture: srcMTLTex atIndex: 1]; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm index f53cb71d..1c28f63f 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm @@ -622,7 +622,10 @@ MVKQueueFullCommandBufferSubmission::MVKQueueFullCommandBufferSubmission(MVKQ // If the semaphores are not encodable, wait on them inline after presenting. // The semaphores know what to do. VkResult MVKQueuePresentSurfaceSubmission::execute() { - id mtlCmdBuff = _queue->getMTLCommandBuffer(kMVKCommandUseQueuePresent); + // MTLCommandBuffer retain references to avoid rare case where objects are destroyed too early. + // Although testing could not determine which objects were being lost, queue present MTLCommandBuffers + // are used only once per frame, and retain so few objects, that blanket retention is still performant. + id mtlCmdBuff = _queue->getMTLCommandBuffer(kMVKCommandUseQueuePresent, true); for (auto& ws : _waitSemaphores) { auto& sem4 = ws.first; From 62e0368e21c067ee7bb12cbd87bfae04826c5636 Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Mon, 11 Sep 2023 20:14:23 -0400 Subject: [PATCH 08/41] Add configurable lowpass filter for VkPhysicalDeviceLimits::timestampPeriod. - Add MVKConfiguration::timestampPeriodLowPassAlpha, along with matching MVK_CONFIG_TIMESTAMP_PERIOD_LOWPASS_ALPHA env var. - Add MVKConfigMembers.def file to describe MVKConfiguration members, to support consistent batch handling of members. - Add env var & build settings MVK_CONFIG_DEBUG, plus legacy MVK_CONFIG_ALLOW_METAL_EVENTS & MVK_CONFIG_ALLOW_METAL_FENCES. - Simplify environment variable retrieval functions and macros. - Rename MVKDevice::updateTimestampsAndPeriod() to updateTimestampPeriod(). --- Common/MVKOSExtensions.h | 62 +++---------- Common/MVKOSExtensions.mm | 17 ++-- Docs/MoltenVK_Runtime_UserGuide.md | 4 +- Docs/Whats_New.md | 2 + MoltenVK/MoltenVK.xcodeproj/project.pbxproj | 10 +++ MoltenVK/MoltenVK/API/mvk_config.h | 37 +++++++- MoltenVK/MoltenVK/GPUObjects/MVKDevice.h | 2 +- MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm | 16 ++-- .../MoltenVK/Utility/MVKConfigMembers.def | 86 +++++++++++++++++++ MoltenVK/MoltenVK/Utility/MVKEnvironment.cpp | 66 +++++--------- MoltenVK/MoltenVK/Utility/MVKEnvironment.h | 30 ++++++- Scripts/runcts | 2 +- 12 files changed, 214 insertions(+), 120 deletions(-) create mode 100644 MoltenVK/MoltenVK/Utility/MVKConfigMembers.def diff --git a/Common/MVKOSExtensions.h b/Common/MVKOSExtensions.h index 13d864da..f9faba91 100644 --- a/Common/MVKOSExtensions.h +++ b/Common/MVKOSExtensions.h @@ -108,62 +108,22 @@ void mvkDispatchToMainAndWait(dispatch_block_t block); #pragma mark Process environment /** - * Returns the value of the environment variable at the given name, - * or an empty string if no environment variable with that name exists. - * - * If pWasFound is not null, its value is set to true if the environment - * variable exists, or false if not. + * Sets the value of the environment variable at the given name, into the + * std::string, and returns whether the environment variable was found. */ -std::string mvkGetEnvVar(std::string varName, bool* pWasFound = nullptr); +bool mvkGetEnvVar(const char* evName, std::string& evStr); + +/** + * Returns a pointer to a string containing the value of the environment variable at + * the given name, or returns the default value if the environment variable was not set. + */ +const char* mvkGetEnvVarString(const char* evName, std::string& evStr, const char* defaultValue = ""); /** * Returns the value of the environment variable at the given name, - * or zero if no environment variable with that name exists. - * - * If pWasFound is not null, its value is set to true if the environment - * variable exists, or false if not. + * or returns the default value if the environment variable was not set. */ -int64_t mvkGetEnvVarInt64(std::string varName, bool* pWasFound = nullptr); - -/** - * Returns the value of the environment variable at the given name, - * or false if no environment variable with that name exists. - * - * If pWasFound is not null, its value is set to true if the environment - * variable exists, or false if not. - */ -bool mvkGetEnvVarBool(std::string varName, bool* pWasFound = nullptr); - -#define MVK_SET_FROM_ENV_OR_BUILD_BOOL(cfgVal, EV) \ - do { \ - bool wasFound = false; \ - bool ev = mvkGetEnvVarBool(#EV, &wasFound); \ - cfgVal = wasFound ? ev : EV; \ - } while(false) - -#define MVK_SET_FROM_ENV_OR_BUILD_INT64(cfgVal, EV) \ - do { \ - bool wasFound = false; \ - int64_t ev = mvkGetEnvVarInt64(#EV, &wasFound); \ - cfgVal = wasFound ? ev : EV; \ - } while(false) - -// Pointer cast permits cfgVal to be an enum var -#define MVK_SET_FROM_ENV_OR_BUILD_INT32(cfgVal, EV) \ - do { \ - bool wasFound = false; \ - int64_t ev = mvkGetEnvVarInt64(#EV, &wasFound); \ - int64_t val = wasFound ? ev : EV; \ - *(int32_t*)&cfgVal = (int32_t)std::min(std::max(val, (int64_t)INT32_MIN), (int64_t)INT32_MAX); \ - } while(false) - -#define MVK_SET_FROM_ENV_OR_BUILD_STRING(cfgVal, EV, strObj) \ - do { \ - bool wasFound = false; \ - std::string ev = mvkGetEnvVar(#EV, &wasFound); \ - strObj = wasFound ? std::move(ev) : EV; \ - cfgVal = strObj.c_str(); \ - } while(false) +double mvkGetEnvVarNumber(const char* evName, double defaultValue = 0.0); #pragma mark - diff --git a/Common/MVKOSExtensions.mm b/Common/MVKOSExtensions.mm index 4e2c974c..93025f2b 100644 --- a/Common/MVKOSExtensions.mm +++ b/Common/MVKOSExtensions.mm @@ -81,21 +81,22 @@ void mvkDispatchToMainAndWait(dispatch_block_t block) { #pragma mark - #pragma mark Process environment -string mvkGetEnvVar(string varName, bool* pWasFound) { +bool mvkGetEnvVar(const char* varName, string& evStr) { @autoreleasepool { NSDictionary* nsEnv = [[NSProcessInfo processInfo] environment]; - NSString* envStr = nsEnv[@(varName.c_str())]; - if (pWasFound) { *pWasFound = envStr != nil; } - return envStr ? envStr.UTF8String : ""; + NSString* nsStr = nsEnv[@(varName)]; + if (nsStr) { evStr = nsStr.UTF8String; } + return nsStr != nil; } } -int64_t mvkGetEnvVarInt64(string varName, bool* pWasFound) { - return strtoll(mvkGetEnvVar(varName, pWasFound).c_str(), NULL, 0); +const char* mvkGetEnvVarString(const char* varName, string& evStr, const char* defaultValue) { + return mvkGetEnvVar(varName, evStr) ? evStr.c_str() : defaultValue; } -bool mvkGetEnvVarBool(std::string varName, bool* pWasFound) { - return mvkGetEnvVarInt64(varName, pWasFound) != 0; +double mvkGetEnvVarNumber(const char* varName, double defaultValue) { + string evStr; + return mvkGetEnvVar(varName, evStr) ? strtod(evStr.c_str(), nullptr) : defaultValue; } diff --git a/Docs/MoltenVK_Runtime_UserGuide.md b/Docs/MoltenVK_Runtime_UserGuide.md index f236a7e9..6684bb34 100644 --- a/Docs/MoltenVK_Runtime_UserGuide.md +++ b/Docs/MoltenVK_Runtime_UserGuide.md @@ -517,8 +517,8 @@ you can address the issue as follows: - Errors encountered during **Runtime Shader Conversion** are logged to the console. - To help understand conversion issues during **Runtime Shader Conversion**, you can enable the - logging of the *SPIR-V* and *MSL* shader source code during shader conversion, by turning on - the `MVKConfiguration::debugMode` configuration parameter, or setting the value of the `MVK_DEBUG` + logging of the *SPIR-V* and *MSL* shader source code during shader conversion, by turning on the + `MVKConfiguration::debugMode` configuration parameter, or setting the value of the `MVK_CONFIG_DEBUG` runtime environment variable to `1`. See the [*MoltenVK Configuration*](#moltenvk_config) description above. diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md index 605cda82..357df0e4 100644 --- a/Docs/Whats_New.md +++ b/Docs/Whats_New.md @@ -24,6 +24,8 @@ Released TBD - Fix case where a `CAMetalDrawable` with invalid pixel format causes onscreen flickering. - Improve behavior of swapchain image presentation stalls caused by Metal regression. - Add several additional performance trackers, available via logging, or the `mvk_private_api.h` API. +- Add configurable lowpass filter for `VkPhysicalDeviceLimits::timestampPeriod`. +- Deprecate `MVK_DEBUG` env var, and add `MVK_CONFIG_DEBUG` env var to replace it. - Update `MVK_CONFIGURATION_API_VERSION` and `MVK_PRIVATE_API_VERSION` to `38`. diff --git a/MoltenVK/MoltenVK.xcodeproj/project.pbxproj b/MoltenVK/MoltenVK.xcodeproj/project.pbxproj index 202efa91..1dffab36 100644 --- a/MoltenVK/MoltenVK.xcodeproj/project.pbxproj +++ b/MoltenVK/MoltenVK.xcodeproj/project.pbxproj @@ -331,6 +331,10 @@ A9B51BD8225E986A00AC74D2 /* MVKOSExtensions.mm in Sources */ = {isa = PBXBuildFile; fileRef = A9B51BD2225E986A00AC74D2 /* MVKOSExtensions.mm */; }; A9B51BD9225E986A00AC74D2 /* MVKOSExtensions.h in Headers */ = {isa = PBXBuildFile; fileRef = A9B51BD6225E986A00AC74D2 /* MVKOSExtensions.h */; }; A9B51BDA225E986A00AC74D2 /* MVKOSExtensions.h in Headers */ = {isa = PBXBuildFile; fileRef = A9B51BD6225E986A00AC74D2 /* MVKOSExtensions.h */; }; + A9C327562AAFBD390025EE79 /* MVKConfigMembers.def in Headers */ = {isa = PBXBuildFile; fileRef = A9C327542AAF8A770025EE79 /* MVKConfigMembers.def */; }; + A9C327572AAFBD3A0025EE79 /* MVKConfigMembers.def in Headers */ = {isa = PBXBuildFile; fileRef = A9C327542AAF8A770025EE79 /* MVKConfigMembers.def */; }; + A9C327582AAFBD3A0025EE79 /* MVKConfigMembers.def in Headers */ = {isa = PBXBuildFile; fileRef = A9C327542AAF8A770025EE79 /* MVKConfigMembers.def */; }; + A9C327592AAFBD3B0025EE79 /* MVKConfigMembers.def in Headers */ = {isa = PBXBuildFile; fileRef = A9C327542AAF8A770025EE79 /* MVKConfigMembers.def */; }; A9C96DD01DDC20C20053187F /* MVKMTLBufferAllocation.h in Headers */ = {isa = PBXBuildFile; fileRef = A9C96DCE1DDC20C20053187F /* MVKMTLBufferAllocation.h */; }; A9C96DD11DDC20C20053187F /* MVKMTLBufferAllocation.h in Headers */ = {isa = PBXBuildFile; fileRef = A9C96DCE1DDC20C20053187F /* MVKMTLBufferAllocation.h */; }; A9C96DD21DDC20C20053187F /* MVKMTLBufferAllocation.mm in Sources */ = {isa = PBXBuildFile; fileRef = A9C96DCF1DDC20C20053187F /* MVKMTLBufferAllocation.mm */; }; @@ -670,6 +674,7 @@ A9B51BD2225E986A00AC74D2 /* MVKOSExtensions.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = MVKOSExtensions.mm; sourceTree = ""; }; A9B51BD6225E986A00AC74D2 /* MVKOSExtensions.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MVKOSExtensions.h; sourceTree = ""; }; A9B8EE0A1A98D796009C5A02 /* libMoltenVK.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libMoltenVK.a; sourceTree = BUILT_PRODUCTS_DIR; }; + A9C327542AAF8A770025EE79 /* MVKConfigMembers.def */ = {isa = PBXFileReference; explicitFileType = sourcecode.cpp.h; fileEncoding = 4; path = MVKConfigMembers.def; sourceTree = ""; }; A9C83DCD24533E22003E5261 /* MVKCommandTypePools.def */ = {isa = PBXFileReference; explicitFileType = sourcecode.cpp.h; fileEncoding = 4; path = MVKCommandTypePools.def; sourceTree = ""; }; A9C86CB61C55B8350096CAF2 /* MoltenVKShaderConverter.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; name = MoltenVKShaderConverter.xcodeproj; path = ../MoltenVKShaderConverter/MoltenVKShaderConverter.xcodeproj; sourceTree = ""; }; A9C96DCE1DDC20C20053187F /* MVKMTLBufferAllocation.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MVKMTLBufferAllocation.h; sourceTree = ""; }; @@ -843,6 +848,7 @@ 4553AEF62251617100E8EBCD /* MVKBlockObserver.m */, 45557A5121C9EFF3008868BD /* MVKCodec.h */, 45557A4D21C9EFF3008868BD /* MVKCodec.mm */, + A9C327542AAF8A770025EE79 /* MVKConfigMembers.def */, 45557A5721CD83C3008868BD /* MVKDXTnCodec.def */, A9A5E9C525C0822700E9085E /* MVKEnvironment.cpp */, A98149431FB6A3F7005F00B4 /* MVKEnvironment.h */, @@ -1008,6 +1014,7 @@ 2FEA0A7824902F9F00EEF3AD /* MVKDeviceMemory.h in Headers */, 2FEA0A7924902F9F00EEF3AD /* MVKMTLResourceBindings.h in Headers */, 2FEA0A7A24902F9F00EEF3AD /* MVKExtensions.def in Headers */, + A9C327572AAFBD3A0025EE79 /* MVKConfigMembers.def in Headers */, 2FEA0A7B24902F9F00EEF3AD /* mvk_datatypes.hpp in Headers */, 2FEA0A7C24902F9F00EEF3AD /* MVKCommandEncodingPool.h in Headers */, 2FEA0A7D24902F9F00EEF3AD /* MVKResource.h in Headers */, @@ -1070,6 +1077,7 @@ A94FB7C41C7DFB4800632CA3 /* MVKCmdRenderPass.h in Headers */, A94FB7BC1C7DFB4800632CA3 /* MVKCmdPipeline.h in Headers */, A9F3D9DC24732A4D00745190 /* MVKSmallVectorAllocator.h in Headers */, + A9C327562AAFBD390025EE79 /* MVKConfigMembers.def in Headers */, A94FB7F81C7DFB4800632CA3 /* MVKPipeline.h in Headers */, A94FB7F01C7DFB4800632CA3 /* MVKImage.h in Headers */, 4553AEFD2251617100E8EBCD /* MVKBlockObserver.h in Headers */, @@ -1147,6 +1155,7 @@ A94FB7BD1C7DFB4800632CA3 /* MVKCmdPipeline.h in Headers */, A9F3D9DD24732A4D00745190 /* MVKSmallVectorAllocator.h in Headers */, A94FB7F91C7DFB4800632CA3 /* MVKPipeline.h in Headers */, + A9C327582AAFBD3A0025EE79 /* MVKConfigMembers.def in Headers */, A94FB7F11C7DFB4800632CA3 /* MVKImage.h in Headers */, 4553AEFE2251617100E8EBCD /* MVKBlockObserver.h in Headers */, A94FB7B91C7DFB4800632CA3 /* MVKCmdTransfer.h in Headers */, @@ -1204,6 +1213,7 @@ DCFD7EFD2A45BC6E007BBBF7 /* MVKSync.h in Headers */, DCFD7EFE2A45BC6E007BBBF7 /* MVKDevice.h in Headers */, DCFD7EFF2A45BC6E007BBBF7 /* MVKSmallVector.h in Headers */, + A9C327592AAFBD3B0025EE79 /* MVKConfigMembers.def in Headers */, DCFD7F002A45BC6E007BBBF7 /* MVKCommandPool.h in Headers */, DCFD7F012A45BC6E007BBBF7 /* MVKShaderModule.h in Headers */, DCFD7F022A45BC6E007BBBF7 /* MVKVulkanAPIObject.h in Headers */, diff --git a/MoltenVK/MoltenVK/API/mvk_config.h b/MoltenVK/MoltenVK/API/mvk_config.h index f9702661..f72ef777 100644 --- a/MoltenVK/MoltenVK/API/mvk_config.h +++ b/MoltenVK/MoltenVK/API/mvk_config.h @@ -191,7 +191,7 @@ typedef struct { * and the changed value will immediately effect subsequent MoltenVK behaviour. * * The initial value or this parameter is set by the - * MVK_DEBUG + * MVK_CONFIG_DEBUG * runtime environment variable or MoltenVK compile-time build setting. * If neither is set, the value of this parameter is false if MoltenVK was * built in Release mode, and true if MoltenVK was built in Debug mode. @@ -919,6 +919,9 @@ typedef struct { /** * Maximize the concurrent executing compilation tasks. * + * The value of this parameter must be changed before creating a VkInstance, + * for the change to take effect. + * * The initial value or this parameter is set by the * MVK_CONFIG_SHOULD_MAXIMIZE_CONCURRENT_COMPILATION * runtime environment variable or MoltenVK compile-time build setting. @@ -926,6 +929,38 @@ typedef struct { */ VkBool32 shouldMaximizeConcurrentCompilation; + /** + * This parameter is ignored on Apple Silicon devices. + * + * Non-Apple GPUs can have a dynamic timestamp period, which varies over time according to GPU + * workload. Depending on how often the app samples the VkPhysicalDeviceLimits::timestampPeriod + * value using vkGetPhysicalDeviceProperties(), the app may want up-to-date, but potentially + * volatile values, or it may find average values more useful. + * + * The value of this parameter sets the alpha (A) value of a simple lowpass filter + * on the timestampPeriod value, of the form: + * + * TPout = (1 - A)TPout + (A * TPin) + * + * The alpha value can be set to a float between 0.0 and 1.0. Values of alpha closer to + * 0.0 cause the value of timestampPeriod to vary slowly over time and be less volatile, + * and values of alpha closer to 1.0 cause the value of timestampPeriod to vary quickly + * and be more volatile. + * + * Apps that query the timestampPeriod value infrequently will prefer low volatility, whereas + * apps that query frequently may prefer higher volatility, to track more recent changes. + * + * The value of this parameter can be changed at any time, and will affect subsequent queries. + * + * The initial value or this parameter is set by the + * MVK_CONFIG_TIMESTAMP_PERIOD_LOWPASS_ALPHA + * runtime environment variable or MoltenVK compile-time build setting. + * If neither is set, this parameter is set to 0.05 by default, + * indicating that the timestampPeriod will vary relatively slowly, + * with the expectation that the app is querying this value infrequently. + */ + float timestampPeriodLowPassAlpha; + } MVKConfiguration; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h index 5ae7f5ec..7a04e90e 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h @@ -401,7 +401,7 @@ protected: void initExtensions(); void initCounterSets(); bool needsCounterSetRetained(); - void updateTimestampsAndPeriod(); + void updateTimestampPeriod(); MVKArrayRef getQueueFamilies(); void initPipelineCacheUUID(); uint32_t getHighestGPUCapability(); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm index 04816011..422f1b43 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm @@ -451,7 +451,7 @@ void MVKPhysicalDevice::getFeatures(VkPhysicalDeviceFeatures2* features) { } void MVKPhysicalDevice::getProperties(VkPhysicalDeviceProperties* properties) { - updateTimestampsAndPeriod(); + updateTimestampPeriod(); *properties = _properties; } @@ -1570,10 +1570,10 @@ VkResult MVKPhysicalDevice::getQueueFamilyProperties(uint32_t* pCount, // If needed, update the timestamp period for this device, using a crude lowpass filter to level out // wild temporary changes, particularly during initial queries before much GPU activity has occurred. // On Apple GPUs, CPU & GPU timestamps are the same, and timestamp period never changes. -void MVKPhysicalDevice::updateTimestampsAndPeriod() { - if (_properties.vendorID == kAppleVendorId) { return; } - - if ([_mtlDevice respondsToSelector: @selector(sampleTimestamps:gpuTimestamp:)]) { +void MVKPhysicalDevice::updateTimestampPeriod() { + if (_properties.vendorID != kAppleVendorId && + [_mtlDevice respondsToSelector: @selector(sampleTimestamps:gpuTimestamp:)]) { + MTLTimestamp earlierCPUTs = _prevCPUTimestamp; MTLTimestamp earlierGPUTs = _prevGPUTimestamp; [_mtlDevice sampleTimestamps: &_prevCPUTimestamp gpuTimestamp: &_prevGPUTimestamp]; @@ -1582,9 +1582,9 @@ void MVKPhysicalDevice::updateTimestampsAndPeriod() { if (elapsedCPUNanos && elapsedGPUTicks) { // Ensure not zero float tsPeriod = elapsedCPUNanos / elapsedGPUTicks; - // Basic lowpass filter Y = (1 - a)Y + a*X. - // The lower a is, the slower Y will change over time. - static const float a = 0.05; + // Basic lowpass filter TPout = (1 - A)TPout + (A * TPin). + // The lower A is, the slower TPout will change over time. + float a = mvkConfig().timestampPeriodLowPassAlpha; _properties.limits.timestampPeriod = ((1.0 - a) * _properties.limits.timestampPeriod) + (a * tsPeriod); } } diff --git a/MoltenVK/MoltenVK/Utility/MVKConfigMembers.def b/MoltenVK/MoltenVK/Utility/MVKConfigMembers.def new file mode 100644 index 00000000..aff0cf33 --- /dev/null +++ b/MoltenVK/MoltenVK/Utility/MVKConfigMembers.def @@ -0,0 +1,86 @@ +/* + * MVKConfigMembers.def + * + * Copyright (c) 2015-2023 The Brenwill Workshop Ltd. (http://www.brenwill.com) + * + * Licensed under the Apache License, Version 2.0 (the "License", Int64) + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// The items in the list below describe the members of the MVKConfiguration struct. +// When a new member is added to the MVKConfiguration struct, a corresponding description +// must be added here. +// +// To use this file, define the macros: +// +// MVK_CONFIG_MEMBER(member, mbrType, name) +// MVK_CONFIG_MEMBER_STRING(member, strObj, name) +// +// then #include this file inline with your code. +// +// The name prameter is the name of the configuration parameter, which is used as the name +// of the environment variable, and build setting, that sets the config value, and is entered +// here without the "MVK_CONFIG_" prefix. +// +// Since string members are set from char pointers, the text must be copied to a std::string +// object, which is passed as a parameter to MVK_CONFIG_MEMBER_STRING. + + +#ifndef MVK_CONFIG_MEMBER +#error MVK_CONFIG_MEMBER must be defined before including this file +#endif + +#ifndef MVK_CONFIG_MEMBER_STRING +#error MVK_CONFIG_MEMBER_STRING must be defined before including this file +#endif + +MVK_CONFIG_MEMBER(debugMode, VkBool32, DEBUG) +MVK_CONFIG_MEMBER(shaderConversionFlipVertexY, VkBool32, SHADER_CONVERSION_FLIP_VERTEX_Y) +MVK_CONFIG_MEMBER(synchronousQueueSubmits, VkBool32, SYNCHRONOUS_QUEUE_SUBMITS) +MVK_CONFIG_MEMBER(prefillMetalCommandBuffers, MVKPrefillMetalCommandBuffersStyle, PREFILL_METAL_COMMAND_BUFFERS) +MVK_CONFIG_MEMBER(maxActiveMetalCommandBuffersPerQueue, uint32_t, MAX_ACTIVE_METAL_COMMAND_BUFFERS_PER_QUEUE) +MVK_CONFIG_MEMBER(supportLargeQueryPools, VkBool32, SUPPORT_LARGE_QUERY_POOLS) +MVK_CONFIG_MEMBER(presentWithCommandBuffer, VkBool32, PRESENT_WITH_COMMAND_BUFFER) +MVK_CONFIG_MEMBER(swapchainMinMagFilterUseNearest, VkBool32, SWAPCHAIN_MAG_FILTER_USE_NEAREST) // Deprecated legacy renaming +MVK_CONFIG_MEMBER(swapchainMinMagFilterUseNearest, VkBool32, SWAPCHAIN_MIN_MAG_FILTER_USE_NEAREST) +MVK_CONFIG_MEMBER(metalCompileTimeout, uint64_t, METAL_COMPILE_TIMEOUT) +MVK_CONFIG_MEMBER(performanceTracking, VkBool32, PERFORMANCE_TRACKING) +MVK_CONFIG_MEMBER(performanceLoggingFrameCount, uint32_t, PERFORMANCE_LOGGING_FRAME_COUNT) +MVK_CONFIG_MEMBER(activityPerformanceLoggingStyle, MVKConfigActivityPerformanceLoggingStyle, ACTIVITY_PERFORMANCE_LOGGING_STYLE) +MVK_CONFIG_MEMBER(displayWatermark, VkBool32, DISPLAY_WATERMARK) +MVK_CONFIG_MEMBER(specializedQueueFamilies, VkBool32, SPECIALIZED_QUEUE_FAMILIES) +MVK_CONFIG_MEMBER(switchSystemGPU, VkBool32, SWITCH_SYSTEM_GPU) +MVK_CONFIG_MEMBER(fullImageViewSwizzle, VkBool32, FULL_IMAGE_VIEW_SWIZZLE) +MVK_CONFIG_MEMBER(defaultGPUCaptureScopeQueueFamilyIndex, VkBool32, DEFAULT_GPU_CAPTURE_SCOPE_QUEUE_FAMILY_INDEX) +MVK_CONFIG_MEMBER(defaultGPUCaptureScopeQueueIndex, VkBool32, DEFAULT_GPU_CAPTURE_SCOPE_QUEUE_INDEX) +MVK_CONFIG_MEMBER(fastMathEnabled, MVKConfigFastMath, FAST_MATH_ENABLED) +MVK_CONFIG_MEMBER(logLevel, MVKConfigLogLevel, LOG_LEVEL) +MVK_CONFIG_MEMBER(traceVulkanCalls, MVKConfigTraceVulkanCalls, TRACE_VULKAN_CALLS) +MVK_CONFIG_MEMBER(forceLowPowerGPU, VkBool32, FORCE_LOW_POWER_GPU) +MVK_CONFIG_MEMBER(semaphoreUseMTLFence, VkBool32, ALLOW_METAL_FENCES) // Deprecated legacy +MVK_CONFIG_MEMBER(semaphoreSupportStyle, MVKVkSemaphoreSupportStyle, VK_SEMAPHORE_SUPPORT_STYLE) +MVK_CONFIG_MEMBER(autoGPUCaptureScope, MVKConfigAutoGPUCaptureScope, AUTO_GPU_CAPTURE_SCOPE) +MVK_CONFIG_MEMBER_STRING(autoGPUCaptureOutputFilepath, evGPUCapFileStrObj, AUTO_GPU_CAPTURE_OUTPUT_FILE) +MVK_CONFIG_MEMBER(texture1DAs2D, VkBool32, TEXTURE_1D_AS_2D) +MVK_CONFIG_MEMBER(preallocateDescriptors, VkBool32, PREALLOCATE_DESCRIPTORS) +MVK_CONFIG_MEMBER(useCommandPooling, VkBool32, USE_COMMAND_POOLING) +MVK_CONFIG_MEMBER(useMTLHeap, VkBool32, USE_MTLHEAP) +MVK_CONFIG_MEMBER(apiVersionToAdvertise, uint32_t, API_VERSION_TO_ADVERTISE) +MVK_CONFIG_MEMBER(advertiseExtensions, uint32_t, ADVERTISE_EXTENSIONS) +MVK_CONFIG_MEMBER(resumeLostDevice, VkBool32, RESUME_LOST_DEVICE) +MVK_CONFIG_MEMBER(useMetalArgumentBuffers, MVKUseMetalArgumentBuffers, USE_METAL_ARGUMENT_BUFFERS) +MVK_CONFIG_MEMBER(shaderSourceCompressionAlgorithm, MVKConfigCompressionAlgorithm, SHADER_COMPRESSION_ALGORITHM) +MVK_CONFIG_MEMBER(shouldMaximizeConcurrentCompilation, VkBool32, SHOULD_MAXIMIZE_CONCURRENT_COMPILATION) +MVK_CONFIG_MEMBER(timestampPeriodLowPassAlpha, float, TIMESTAMP_PERIOD_LOWPASS_ALPHA) + +#undef MVK_CONFIG_MEMBER +#undef MVK_CONFIG_MEMBER_STRING diff --git a/MoltenVK/MoltenVK/Utility/MVKEnvironment.cpp b/MoltenVK/MoltenVK/Utility/MVKEnvironment.cpp index 5aa6f7db..f2fa9e86 100644 --- a/MoltenVK/MoltenVK/Utility/MVKEnvironment.cpp +++ b/MoltenVK/MoltenVK/Utility/MVKEnvironment.cpp @@ -18,7 +18,7 @@ #include "MVKEnvironment.h" #include "MVKOSExtensions.h" - +#include "MVKFoundation.h" static bool _mvkConfigInitialized = false; static void mvkInitConfigFromEnvVars() { @@ -27,43 +27,22 @@ static void mvkInitConfigFromEnvVars() { MVKConfiguration evCfg; std::string evGPUCapFileStrObj; - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.debugMode, MVK_DEBUG); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.shaderConversionFlipVertexY, MVK_CONFIG_SHADER_CONVERSION_FLIP_VERTEX_Y); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.synchronousQueueSubmits, MVK_CONFIG_SYNCHRONOUS_QUEUE_SUBMITS); - MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.prefillMetalCommandBuffers, MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS); - MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.maxActiveMetalCommandBuffersPerQueue, MVK_CONFIG_MAX_ACTIVE_METAL_COMMAND_BUFFERS_PER_QUEUE); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.supportLargeQueryPools, MVK_CONFIG_SUPPORT_LARGE_QUERY_POOLS); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.presentWithCommandBuffer, MVK_CONFIG_PRESENT_WITH_COMMAND_BUFFER); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.swapchainMinMagFilterUseNearest, MVK_CONFIG_SWAPCHAIN_MAG_FILTER_USE_NEAREST); // Deprecated legacy env var - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.swapchainMinMagFilterUseNearest, MVK_CONFIG_SWAPCHAIN_MIN_MAG_FILTER_USE_NEAREST); - MVK_SET_FROM_ENV_OR_BUILD_INT64 (evCfg.metalCompileTimeout, MVK_CONFIG_METAL_COMPILE_TIMEOUT); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.performanceTracking, MVK_CONFIG_PERFORMANCE_TRACKING); - MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.performanceLoggingFrameCount, MVK_CONFIG_PERFORMANCE_LOGGING_FRAME_COUNT); - MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.activityPerformanceLoggingStyle, MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.displayWatermark, MVK_CONFIG_DISPLAY_WATERMARK); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.specializedQueueFamilies, MVK_CONFIG_SPECIALIZED_QUEUE_FAMILIES); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.switchSystemGPU, MVK_CONFIG_SWITCH_SYSTEM_GPU); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.fullImageViewSwizzle, MVK_CONFIG_FULL_IMAGE_VIEW_SWIZZLE); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.defaultGPUCaptureScopeQueueFamilyIndex, MVK_CONFIG_DEFAULT_GPU_CAPTURE_SCOPE_QUEUE_FAMILY_INDEX); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.defaultGPUCaptureScopeQueueIndex, MVK_CONFIG_DEFAULT_GPU_CAPTURE_SCOPE_QUEUE_INDEX); - MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.fastMathEnabled, MVK_CONFIG_FAST_MATH_ENABLED); - MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.logLevel, MVK_CONFIG_LOG_LEVEL); - MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.traceVulkanCalls, MVK_CONFIG_TRACE_VULKAN_CALLS); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.forceLowPowerGPU, MVK_CONFIG_FORCE_LOW_POWER_GPU); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.semaphoreUseMTLFence, MVK_ALLOW_METAL_FENCES); - MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.semaphoreSupportStyle, MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE); - MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.autoGPUCaptureScope, MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE); - MVK_SET_FROM_ENV_OR_BUILD_STRING(evCfg.autoGPUCaptureOutputFilepath, MVK_CONFIG_AUTO_GPU_CAPTURE_OUTPUT_FILE, evGPUCapFileStrObj); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.texture1DAs2D, MVK_CONFIG_TEXTURE_1D_AS_2D); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.preallocateDescriptors, MVK_CONFIG_PREALLOCATE_DESCRIPTORS); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.useCommandPooling, MVK_CONFIG_USE_COMMAND_POOLING); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.useMTLHeap, MVK_CONFIG_USE_MTLHEAP); - MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.apiVersionToAdvertise, MVK_CONFIG_API_VERSION_TO_ADVERTISE); - MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.advertiseExtensions, MVK_CONFIG_ADVERTISE_EXTENSIONS); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.resumeLostDevice, MVK_CONFIG_RESUME_LOST_DEVICE); - MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.useMetalArgumentBuffers, MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS); - MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.shaderSourceCompressionAlgorithm, MVK_CONFIG_SHADER_COMPRESSION_ALGORITHM); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.shouldMaximizeConcurrentCompilation, MVK_CONFIG_SHOULD_MAXIMIZE_CONCURRENT_COMPILATION); +#define STR(name) #name + +#define MVK_CONFIG_MEMBER(member, mbrType, name) \ + evCfg.member = (mbrType)mvkGetEnvVarNumber(STR(MVK_CONFIG_##name), MVK_CONFIG_##name); + +#define MVK_CONFIG_MEMBER_STRING(member, strObj, name) \ + evCfg.member = mvkGetEnvVarString(STR(MVK_CONFIG_##name), strObj, MVK_CONFIG_##name); + +#include "MVKConfigMembers.def" + + // At this point, debugMode has been set by env var MVK_CONFIG_DEBUG. + // MVK_CONFIG_DEBUG replaced the deprecataed MVK_DEBUG env var, so for + // legacy use, if the MVK_DEBUG env var is explicitly set, override debugMode. + double noEV = -3.1415; // An unlikely env var value. + double cvMVKDebug = mvkGetEnvVarNumber("MVK_DEBUG", noEV); + if (cvMVKDebug != noEV) { evCfg.debugMode = cvMVKDebug; } // Deprected legacy VkSemaphore MVK_ALLOW_METAL_FENCES and MVK_ALLOW_METAL_EVENTS config. // Legacy MVK_ALLOW_METAL_EVENTS is covered by MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE, @@ -71,9 +50,7 @@ static void mvkInitConfigFromEnvVars() { // disabled, disable semaphoreUseMTLEvent (aliased as semaphoreSupportStyle value // MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_SINGLE_QUEUE), and let mvkSetConfig() // further process legacy behavior of MVK_ALLOW_METAL_FENCES. - bool sem4UseMTLEvent; - MVK_SET_FROM_ENV_OR_BUILD_BOOL(sem4UseMTLEvent, MVK_ALLOW_METAL_EVENTS); - if ( !sem4UseMTLEvent ) { + if ( !mvkGetEnvVarNumber("MVK_CONFIG_ALLOW_METAL_EVENTS", 1.0) ) { evCfg.semaphoreUseMTLEvent = (MVKVkSemaphoreSupportStyle)false; // Disabled. Also semaphoreSupportStyle MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_SINGLE_QUEUE. } @@ -81,13 +58,11 @@ static void mvkInitConfigFromEnvVars() { // MVK_CONFIG_PERFORMANCE_LOGGING_INLINE env var was used, and activityPerformanceLoggingStyle // was not already set by MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE, set // activityPerformanceLoggingStyle to MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_IMMEDIATE. - bool logPerfInline; - MVK_SET_FROM_ENV_OR_BUILD_BOOL(logPerfInline, MVK_CONFIG_PERFORMANCE_LOGGING_INLINE); + bool logPerfInline = mvkGetEnvVarNumber("MVK_CONFIG_PERFORMANCE_LOGGING_INLINE", 0.0); if (logPerfInline && evCfg.activityPerformanceLoggingStyle == MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_FRAME_COUNT) { evCfg.activityPerformanceLoggingStyle = MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_IMMEDIATE; } - mvkSetConfig(evCfg); } @@ -129,4 +104,7 @@ void mvkSetConfig(const MVKConfiguration& mvkConfig) { _autoGPUCaptureOutputFile = _mvkConfig.autoGPUCaptureOutputFilepath; } _mvkConfig.autoGPUCaptureOutputFilepath = (char*)_autoGPUCaptureOutputFile.c_str(); + + // Clamp timestampPeriodLowPassAlpha between 0.0 and 1.0. + _mvkConfig.timestampPeriodLowPassAlpha = mvkClamp(_mvkConfig.timestampPeriodLowPassAlpha, 0.0f, 1.0f); } diff --git a/MoltenVK/MoltenVK/Utility/MVKEnvironment.h b/MoltenVK/MoltenVK/Utility/MVKEnvironment.h index 86215bf9..f6f1ae9e 100644 --- a/MoltenVK/MoltenVK/Utility/MVKEnvironment.h +++ b/MoltenVK/MoltenVK/Utility/MVKEnvironment.h @@ -82,6 +82,14 @@ const MVKConfiguration& mvkConfig(); /** Global function to update MoltenVK configuration info. */ void mvkSetConfig(const MVKConfiguration& mvkConfig); +/** + * Enable debug mode. + * By default, disabled for Release builds and enabled for Debug builds. + */ +#ifndef MVK_CONFIG_DEBUG +# define MVK_CONFIG_DEBUG MVK_DEBUG +#endif + /** Flip the vertex coordinate in shaders. Enabled by default. */ #ifndef MVK_CONFIG_SHADER_CONVERSION_FLIP_VERTEX_Y # define MVK_CONFIG_SHADER_CONVERSION_FLIP_VERTEX_Y 1 @@ -244,11 +252,17 @@ void mvkSetConfig(const MVKConfiguration& mvkConfig); #ifndef MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE # define MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_METAL_EVENTS_WHERE_SAFE #endif -#ifndef MVK_ALLOW_METAL_EVENTS // Deprecated -# define MVK_ALLOW_METAL_EVENTS 1 +#ifndef MVK_CONFIG_ALLOW_METAL_EVENTS +# define MVK_CONFIG_ALLOW_METAL_EVENTS 1 #endif -#ifndef MVK_ALLOW_METAL_FENCES // Deprecated -# define MVK_ALLOW_METAL_FENCES 1 +#ifndef MVK_ALLOW_METAL_EVENTS // Deprecated +# define MVK_ALLOW_METAL_EVENTS MVK_CONFIG_ALLOW_METAL_EVENTS +#endif +#ifndef MVK_CONFIG_ALLOW_METAL_FENCES +# define MVK_CONFIG_ALLOW_METAL_FENCES 1 +#endif +#ifndef MVK_ALLOW_METAL_FENCES // Deprecated +# define MVK_ALLOW_METAL_FENCES MVK_CONFIG_ALLOW_METAL_FENCES #endif /** Substitute Metal 2D textures for Vulkan 1D images. Enabled by default. */ @@ -303,3 +317,11 @@ void mvkSetConfig(const MVKConfiguration& mvkConfig); #ifndef MVK_CONFIG_SHOULD_MAXIMIZE_CONCURRENT_COMPILATION # define MVK_CONFIG_SHOULD_MAXIMIZE_CONCURRENT_COMPILATION 0 #endif + +/** + * The alpha value of a lowpass filter tracking VkPhysicalDeviceLimits::timestampPeriod. + * This can be set to a float between 0.0 and 1.0. + */ +#ifndef MVK_CONFIG_TIMESTAMP_PERIOD_LOWPASS_ALPHA +# define MVK_CONFIG_TIMESTAMP_PERIOD_LOWPASS_ALPHA 0.05 +#endif diff --git a/Scripts/runcts b/Scripts/runcts index bf65cc9e..73aee7c6 100755 --- a/Scripts/runcts +++ b/Scripts/runcts @@ -103,7 +103,7 @@ export METAL_DEBUG_ERROR_MODE=3 # ----- MoltenVK config settings ------ export MVK_CONFIG_LOG_LEVEL=1 #(1 = Errors only, 3 = Info) -export MVK_DEBUG=0 +export MVK_CONFIG_DEBUG=0 # Additional MoltenVK configuration can be set here by editing below. export MVK_CONFIG_RESUME_LOST_DEVICE=1 From 9c206ecc79e707d81fb320b57a97e198a5aa743a Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Tue, 12 Sep 2023 16:44:33 -0400 Subject: [PATCH 09/41] Fix MSL code used in vkCmdBlitImage() on depth-stencil formats. --- Docs/Whats_New.md | 1 + MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md index 357df0e4..14f6edfc 100644 --- a/Docs/Whats_New.md +++ b/Docs/Whats_New.md @@ -22,6 +22,7 @@ Released TBD when no other bindings change between pipelines. - Ensure objects retained for life of `MTLCommandBuffer` during `vkCmdBlitImage()` & `vkQueuePresentKHR()`. - Fix case where a `CAMetalDrawable` with invalid pixel format causes onscreen flickering. +- Fix MSL code used in `vkCmdBlitImage()` on depth-stencil formats. - Improve behavior of swapchain image presentation stalls caused by Metal regression. - Add several additional performance trackers, available via logging, or the `mvk_private_api.h` API. - Add configurable lowpass filter for `VkPhysicalDeviceLimits::timestampPeriod`. diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm b/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm index 973db991..33ee4485 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm @@ -286,7 +286,7 @@ id MVKCommandResourceFactory::newBlitFragFunction(MVKRPSKeyBlitImg& [msl appendLineMVK: @" constant TexSubrez& subRez [[buffer(0)]]) {"]; [msl appendLineMVK: @" FragmentOutputs out;"]; if (mvkIsAnyFlagEnabled(blitKey.srcAspect, (VK_IMAGE_ASPECT_DEPTH_BIT))) { - [msl appendFormat: @" out.depth = tex.sample(ce_sampler, varyings.v_texCoord%@%@, level(subRez.lod)).%c;", coordArg, sliceArg, swizzleArg[0]]; + [msl appendFormat: @" out.depth = tex.sample(ce_sampler, varyings.v_texCoord%@%@, level(subRez.lod));", coordArg, sliceArg]; [msl appendLineMVK]; } if (mvkIsAnyFlagEnabled(blitKey.srcAspect, (VK_IMAGE_ASPECT_STENCIL_BIT))) { From f0cb31a12b59f05177f07ab5a46bc9084ba5fbc9 Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Fri, 15 Sep 2023 09:54:48 -0400 Subject: [PATCH 10/41] Rework workaround to force incomplete CAMetalDrawable presentations to complete. - To force any incomplete CAMetalDrawable presentations to complete, don't force the creation of another transient drawable, as this can stall the creation of future drawables. Instead, when a swapchain is destroyed, or replaced by a new swapchain, set the CAMetalLayer drawableSize, which will force presentation completion. - Add presentation completion handler in command buffer scheduling callback, move marking available to presentation completion handler, and minimize mutex locking. - MVKQueue::waitIdle() remove wait for swapchain presentations, and remove callbacks to MVKQueue from drawable completions. - MVKQueue::submit() don't bypass submitting a misconfigured submission, so that semaphores and fences will be signalled, and ensure misconfigured submissions are well behaved. - Add MVKSwapchain::getCAMetalLayer() to streamline layer access (unrelated). --- .../MoltenVK/Commands/MVKCommandBuffer.mm | 2 +- MoltenVK/MoltenVK/GPUObjects/MVKImage.h | 10 +- MoltenVK/MoltenVK/GPUObjects/MVKImage.mm | 99 ++++++++++--------- MoltenVK/MoltenVK/GPUObjects/MVKQueue.h | 11 --- MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm | 81 +++------------ MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.h | 9 +- MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm | 53 +++++++--- 7 files changed, 118 insertions(+), 147 deletions(-) diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm index 8ac91c26..72dde4f1 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm @@ -260,7 +260,7 @@ bool MVKCommandBuffer::canExecute() { } _wasExecuted = true; - return true; + return wasConfigurationSuccessful(); } // Return the number of bits set in the view mask, with a minimum value of 1. diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.h b/MoltenVK/MoltenVK/GPUObjects/MVKImage.h index 1479f724..ef606b03 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.h @@ -460,10 +460,9 @@ public: void beginPresentation(const MVKImagePresentInfo& presentInfo); /** Called via callback when the presentation completes. */ - void endPresentation(const MVKImagePresentInfo& presentInfo, uint64_t actualPresentTime = 0); - - /** If this image is stuck in-flight, attempt to force it to complete. */ - void forcePresentationCompletion(); + void endPresentation(const MVKImagePresentInfo& presentInfo, + const MVKSwapchainSignaler& signaler, + uint64_t actualPresentTime = 0); #pragma mark Construction @@ -478,12 +477,13 @@ protected: friend MVKSwapchain; id getCAMetalDrawable() override; - void addPresentedHandler(id mtlDrawable, MVKImagePresentInfo presentInfo); + void addPresentedHandler(id mtlDrawable, MVKImagePresentInfo presentInfo, MVKSwapchainSignaler signaler); void releaseMetalDrawable(); MVKSwapchainImageAvailability getAvailability(); void makeAvailable(const MVKSwapchainSignaler& signaler); void makeAvailable(); VkResult acquireAndSignalWhenAvailable(MVKSemaphore* semaphore, MVKFence* fence); + MVKSwapchainSignaler getPresentationSignaler(); id _mtlDrawable = nil; MVKSwapchainImageAvailability _availability; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm index 1769df11..f09495c7 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm @@ -1248,16 +1248,18 @@ static void signalAndUnmarkAsTracked(const MVKSwapchainSignaler& signaler) { } VkResult MVKPresentableSwapchainImage::acquireAndSignalWhenAvailable(MVKSemaphore* semaphore, MVKFence* fence) { + + // Now that this image is being acquired, release the existing drawable and its texture. + // This is not done earlier so the texture is retained for any post-processing such as screen captures, etc. + // This may trigger a delayed presentation callback, which uses the _availabilityLock, also used below. + releaseMetalDrawable(); + lock_guard lock(_availabilityLock); // Upon acquisition, update acquisition ID immediately, to move it to the back of the chain, // so other images will be preferred if either all images are available or no images are available. _availability.acquisitionID = _swapchain->getNextAcquisitionID(); - // Now that this image is being acquired, release the existing drawable and its texture. - // This is not done earlier so the texture is retained for any post-processing such as screen captures, etc. - releaseMetalDrawable(); - auto signaler = MVKSwapchainSignaler{fence, semaphore, semaphore ? semaphore->deferSignal() : 0}; if (_availability.isAvailable) { _availability.isAvailable = false; @@ -1292,10 +1294,10 @@ id MVKPresentableSwapchainImage::getCAMetalDrawable() { if ( !_mtlDrawable ) { @autoreleasepool { bool hasInvalidFormat = false; - uint32_t attemptCnt = _swapchain->getImageCount() * 2; // Attempt a resonable number of times + uint32_t attemptCnt = _swapchain->getImageCount(); // Attempt a resonable number of times for (uint32_t attemptIdx = 0; !_mtlDrawable && attemptIdx < attemptCnt; attemptIdx++) { uint64_t startTime = _device->getPerformanceTimestamp(); - _mtlDrawable = [_swapchain->_surface->getCAMetalLayer().nextDrawable retain]; // retained + _mtlDrawable = [_swapchain->getCAMetalLayer().nextDrawable retain]; // retained _device->addPerformanceInterval(_device->_performanceStatistics.queue.retrieveCAMetalDrawable, startTime); hasInvalidFormat = _mtlDrawable && !_mtlDrawable.texture.pixelFormat; if (hasInvalidFormat) { releaseMetalDrawable(); } @@ -1314,8 +1316,6 @@ id MVKPresentableSwapchainImage::getCAMetalDrawable() { // Pass MVKImagePresentInfo by value because it may not exist when the callback runs. VkResult MVKPresentableSwapchainImage::presentCAMetalDrawable(id mtlCmdBuff, MVKImagePresentInfo presentInfo) { - lock_guard lock(_availabilityLock); - _swapchain->renderWatermark(getMTLTexture(0), mtlCmdBuff); // According to Apple, it is more performant to call MTLDrawable present from within a @@ -1323,8 +1323,11 @@ VkResult MVKPresentableSwapchainImage::presentCAMetalDrawable(id mtlDrwbl = getCAMetalDrawable(); - addPresentedHandler(mtlDrwbl, presentInfo); + MVKSwapchainSignaler signaler = getPresentationSignaler(); [mtlCmdBuff addScheduledHandler: ^(id mcb) { + + addPresentedHandler(mtlDrwbl, presentInfo, signaler); + // Try to do any present mode transitions as late as possible in an attempt // to avoid visual disruptions on any presents already on the queue. if (presentInfo.presentMode != VK_PRESENT_MODE_MAX_ENUM_KHR) { @@ -1337,7 +1340,32 @@ VkResult MVKPresentableSwapchainImage::presentCAMetalDrawable(idretain(); } + [mtlCmdBuff addCompletedHandler: ^(id mcb) { + if (fence) { + fence->signal(); + fence->release(); + } + [mtlDrwbl release]; + release(); + }]; + + signalPresentationSemaphore(signaler, mtlCmdBuff); + + return getConfigurationResult(); +} + +MVKSwapchainSignaler MVKPresentableSwapchainImage::getPresentationSignaler() { + lock_guard lock(_availabilityLock); + // Mark this image as available if no semaphores or fences are waiting to be signaled. _availability.isAvailable = _availabilitySignalers.empty(); if (_availability.isAvailable) { @@ -1346,52 +1374,34 @@ VkResult MVKPresentableSwapchainImage::presentCAMetalDrawable(idretain(); } - [mtlCmdBuff addCompletedHandler: ^(id mcb) { - [mtlDrwbl release]; - makeAvailable(signaler); - release(); - if (fence) { - fence->signal(); - fence->release(); - } - }]; - - signalPresentationSemaphore(signaler, mtlCmdBuff); - - return getConfigurationResult(); } -// Pass MVKImagePresentInfo by value because it may not exist when the callback runs. +// Pass MVKImagePresentInfo & MVKSwapchainSignaler by value because they may not exist when the callback runs. void MVKPresentableSwapchainImage::addPresentedHandler(id mtlDrawable, - MVKImagePresentInfo presentInfo) { + MVKImagePresentInfo presentInfo, + MVKSwapchainSignaler signaler) { beginPresentation(presentInfo); #if !MVK_OS_SIMULATOR if ([mtlDrawable respondsToSelector: @selector(addPresentedHandler:)]) { [mtlDrawable addPresentedHandler: ^(id mtlDrwbl) { - endPresentation(presentInfo, mtlDrwbl.presentedTime * 1.0e9); + endPresentation(presentInfo, signaler, mtlDrwbl.presentedTime * 1.0e9); }]; } else #endif { // If MTLDrawable.presentedTime/addPresentedHandler isn't supported, // treat it as if the present happened when requested. - endPresentation(presentInfo); + endPresentation(presentInfo, signaler); } } @@ -1399,11 +1409,11 @@ void MVKPresentableSwapchainImage::addPresentedHandler(id mtlDr void MVKPresentableSwapchainImage::beginPresentation(const MVKImagePresentInfo& presentInfo) { retain(); _swapchain->beginPresentation(presentInfo); - presentInfo.queue->beginPresentation(presentInfo); _presentationStartTime = getDevice()->getPerformanceTimestamp(); } void MVKPresentableSwapchainImage::endPresentation(const MVKImagePresentInfo& presentInfo, + const MVKSwapchainSignaler& signaler, uint64_t actualPresentTime) { { // Scope to avoid deadlock if release() is run within detachment lock // If I have become detached from the swapchain, it means the swapchain, and possibly the @@ -1412,7 +1422,7 @@ void MVKPresentableSwapchainImage::endPresentation(const MVKImagePresentInfo& pr if (_device) { _device->addPerformanceInterval(_device->_performanceStatistics.queue.presentSwapchains, _presentationStartTime); } if (_swapchain) { _swapchain->endPresentation(presentInfo, actualPresentTime); } } - presentInfo.queue->endPresentation(presentInfo); + makeAvailable(signaler); release(); } @@ -1432,7 +1442,9 @@ void MVKPresentableSwapchainImage::makeAvailable(const MVKSwapchainSignaler& sig } // Signal, untrack, and release any signalers that are tracking. +// Release the drawable before the lock, as it may trigger completion callback. void MVKPresentableSwapchainImage::makeAvailable() { + releaseMetalDrawable(); lock_guard lock(_availabilityLock); if ( !_availability.isAvailable ) { @@ -1445,14 +1457,6 @@ void MVKPresentableSwapchainImage::makeAvailable() { } } -// Clear the existing CAMetalDrawable and retrieve and release a new transient one, -// in an attempt to trigger the existing CAMetalDrawable to complete it's callback. -void MVKPresentableSwapchainImage::forcePresentationCompletion() { - releaseMetalDrawable(); - if (_swapchain) { @autoreleasepool { [_swapchain->_surface->getCAMetalLayer() nextDrawable]; } } -} - - #pragma mark Construction MVKPresentableSwapchainImage::MVKPresentableSwapchainImage(MVKDevice* device, @@ -1467,14 +1471,13 @@ MVKPresentableSwapchainImage::MVKPresentableSwapchainImage(MVKDevice* device, void MVKPresentableSwapchainImage::destroy() { - forcePresentationCompletion(); + releaseMetalDrawable(); MVKSwapchainImage::destroy(); } // Unsignaled signalers will exist if this image is acquired more than it is presented. // Ensure they are signaled and untracked so the fences and semaphores will be released. MVKPresentableSwapchainImage::~MVKPresentableSwapchainImage() { - releaseMetalDrawable(); makeAvailable(); } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h index 0de3d2b8..c3b1d242 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h @@ -100,13 +100,6 @@ public: /** Block the current thread until this queue is idle. */ VkResult waitIdle(MVKCommandUse cmdUse); - /** Mark the beginning of a swapchain image presentation. */ - void beginPresentation(const MVKImagePresentInfo& presentInfo); - - /** Mark the end of a swapchain image presentation. */ - void endPresentation(const MVKImagePresentInfo& presentInfo); - - #pragma mark Metal /** Returns the Metal queue underlying this queue. */ @@ -150,11 +143,8 @@ protected: VkResult submit(MVKQueueSubmission* qSubmit); NSString* getMTLCommandBufferLabel(MVKCommandUse cmdUse); void handleMTLCommandBufferError(id mtlCmdBuff); - void waitSwapchainPresentations(MVKCommandUse cmdUse); MVKQueueFamily* _queueFamily; - MVKSemaphoreImpl _presentationCompletionBlocker; - std::unordered_map _presentedImages; std::string _name; dispatch_queue_t _execQueue; id _mtlQueue = nil; @@ -166,7 +156,6 @@ protected: NSString* _mtlCmdBuffLabelAcquireNextImage = nil; NSString* _mtlCmdBuffLabelInvalidateMappedMemoryRanges = nil; MVKGPUCaptureScope* _submissionCaptureScope = nil; - std::mutex _presentedImagesLock; float _priority; uint32_t _index; }; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm index 1c28f63f..401fa8b2 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm @@ -80,13 +80,14 @@ VkResult MVKQueue::submit(MVKQueueSubmission* qSubmit) { if ( !qSubmit ) { return VK_SUCCESS; } // Ignore nils - VkResult rslt = qSubmit->getConfigurationResult(); // Extract result before submission to avoid race condition with early destruction - if (rslt == VK_SUCCESS) { - if (_execQueue) { - dispatch_async(_execQueue, ^{ execute(qSubmit); } ); - } else { - rslt = execute(qSubmit); - } + // Extract result before submission to avoid race condition with early destruction + // Submit regardless of config result, to ensure submission semaphores and fences are signalled. + // The submissions will ensure a misconfiguration will be safe to execute. + VkResult rslt = qSubmit->getConfigurationResult(); + if (_execQueue) { + dispatch_async(_execQueue, ^{ execute(qSubmit); } ); + } else { + rslt = execute(qSubmit); } return rslt; } @@ -140,50 +141,9 @@ VkResult MVKQueue::waitIdle(MVKCommandUse cmdUse) { [mtlCmdBuff commit]; [mtlCmdBuff waitUntilCompleted]; - waitSwapchainPresentations(cmdUse); - return VK_SUCCESS; } -// If there are any swapchain presentations in flight, wait a few frames for them to complete. -// If they don't complete within a few frames, attempt to force them to complete, and wait another -// few frames for that to happen. If there are still swapchain presentations that haven't completed, -// log a warning, and force them to end presentation, so the images and drawables will be released. -void MVKQueue::waitSwapchainPresentations(MVKCommandUse cmdUse) { - uint32_t waitFrames = _device->_pMetalFeatures->maxSwapchainImageCount + 2; - uint64_t waitNanos = waitFrames * _device->_performanceStatistics.queue.frameInterval.average * 1e6; - if (_presentationCompletionBlocker.wait(waitNanos)) { return; } - - auto imgCnt = _presentationCompletionBlocker.getReservationCount(); - MVKPresentableSwapchainImage* images[imgCnt]; - mvkClear(images, imgCnt); - - { - // Scope of image lock limited to creating array copy of uncompleted presentations - // Populate a working array of the unpresented images. - lock_guard lock(_presentedImagesLock); - size_t imgIdx = 0; - for (auto imgPair : _presentedImages) { images[imgIdx++] = imgPair.first; } - } - - // Attempt to force each image to complete presentation through the callback. - for (size_t imgIdx = 0; imgIdx < imgCnt && _presentationCompletionBlocker.getReservationCount(); imgIdx++) { - auto* img = images[imgIdx]; - if (img) { img->forcePresentationCompletion(); } - } - - // Wait for forced presentation completions. If we still have unfinished swapchain image - // presentations, log a warning, and force each image to end, so that it can be released. - if ( !_presentationCompletionBlocker.wait(waitNanos) ) { - reportWarning(VK_TIMEOUT, "%s timed out after %d frames while awaiting %d swapchain image presentations to complete.", - mvkVkCommandName(cmdUse), waitFrames * 2, _presentationCompletionBlocker.getReservationCount()); - for (size_t imgIdx = 0; imgIdx < imgCnt; imgIdx++) { - auto* img = images[imgIdx]; - if (_presentedImages.count(img)) { img->endPresentation({.queue = this, .presentableImage = img}); } - } - } -} - id MVKQueue::getMTLCommandBuffer(MVKCommandUse cmdUse, bool retainRefs) { id mtlCmdBuff = nil; MVKDevice* mvkDev = getDevice(); @@ -312,25 +272,6 @@ void MVKQueue::handleMTLCommandBufferError(id mtlCmdBuff) { #endif } -// _presentedImages counts presentations per swapchain image, because the presentation of an image can -// begin before the previous presentation of that image has indicated that it has completed via a callback. -void MVKQueue::beginPresentation(const MVKImagePresentInfo& presentInfo) { - lock_guard lock(_presentedImagesLock); - _presentationCompletionBlocker.reserve(); - _presentedImages[presentInfo.presentableImage]++; -} - -void MVKQueue::endPresentation(const MVKImagePresentInfo& presentInfo) { - lock_guard lock(_presentedImagesLock); - _presentationCompletionBlocker.release(); - if (_presentedImages[presentInfo.presentableImage]) { - _presentedImages[presentInfo.presentableImage]--; - } - if ( !_presentedImages[presentInfo.presentableImage] ) { - _presentedImages.erase(presentInfo.presentableImage); - } -} - #pragma mark Construction #define MVK_DISPATCH_QUEUE_QOS_CLASS QOS_CLASS_USER_INITIATED @@ -488,7 +429,7 @@ VkResult MVKQueueCommandBufferSubmission::commitActiveMTLCommandBuffer(bool sign // If we need to signal completion, use getActiveMTLCommandBuffer() to ensure at least // one MTLCommandBuffer is used, otherwise if this instance has no content, it will not - // finish(), signal the fence and semaphores ,and be destroyed. + // finish(), signal the fence and semaphores, and be destroyed. // Use temp var for MTLCommandBuffer commit and release because completion callback // may destroy this instance before this function ends. id mtlCmdBuff = signalCompletion ? getActiveMTLCommandBuffer() : _activeMTLCommandBuffer; @@ -501,6 +442,8 @@ VkResult MVKQueueCommandBufferSubmission::commitActiveMTLCommandBuffer(bool sign if (signalCompletion) { this->finish(); } // Must be the last thing the completetion callback does. }]; + // Retrieve the result before committing MTLCommandBuffer, because finish() will destroy this instance. + VkResult rslt = mtlCmdBuff ? getConfigurationResult() : VK_ERROR_OUT_OF_POOL_MEMORY; [mtlCmdBuff commit]; [mtlCmdBuff release]; // retained @@ -508,7 +451,7 @@ VkResult MVKQueueCommandBufferSubmission::commitActiveMTLCommandBuffer(bool sign // was not created, call the finish() function directly. if (signalCompletion && !mtlCmdBuff) { finish(); } - return mtlCmdBuff ? VK_SUCCESS : VK_ERROR_OUT_OF_POOL_MEMORY; + return rslt; } // Be sure to retain() any API objects referenced in this function, and release() them in the diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.h b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.h index 7e7cff8c..cd418bd1 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.h @@ -43,11 +43,14 @@ public: /** Returns the debug report object type of this object. */ VkDebugReportObjectTypeEXT getVkDebugReportObjectType() override { return VK_DEBUG_REPORT_OBJECT_TYPE_SWAPCHAIN_KHR_EXT; } + /** Returns the CAMetalLayer underlying the surface used by this swapchain. */ + CAMetalLayer* getCAMetalLayer(); + /** Returns the number of images in this swapchain. */ - inline uint32_t getImageCount() { return (uint32_t)_presentableImages.size(); } + uint32_t getImageCount() { return (uint32_t)_presentableImages.size(); } /** Returns the image at the specified index. */ - inline MVKPresentableSwapchainImage* getPresentableImage(uint32_t index) { return _presentableImages[index]; } + MVKPresentableSwapchainImage* getPresentableImage(uint32_t index) { return _presentableImages[index]; } /** * Returns the array of presentable images associated with this swapchain. @@ -112,6 +115,7 @@ protected: void markFrameInterval(); void beginPresentation(const MVKImagePresentInfo& presentInfo); void endPresentation(const MVKImagePresentInfo& presentInfo, uint64_t actualPresentTime = 0); + void forceUnpresentedImageCompletion(); MVKSurface* _surface = nullptr; MVKWatermark* _licenseWatermark = nullptr; @@ -123,6 +127,7 @@ protected: std::mutex _presentHistoryLock; uint64_t _lastFrameTime = 0; VkExtent2D _mtlLayerDrawableExtent = {0, 0}; + std::atomic _unpresentedImageCount = 0; uint32_t _currentPerfLogFrameCount = 0; uint32_t _presentHistoryCount = 0; uint32_t _presentHistoryIndex = 0; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm index 159c2edf..63c3ac78 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm @@ -47,6 +47,8 @@ void MVKSwapchain::propagateDebugName() { } } +CAMetalLayer* MVKSwapchain::getCAMetalLayer() { return _surface->getCAMetalLayer(); } + VkResult MVKSwapchain::getImages(uint32_t* pCount, VkImage* pSwapchainImages) { // Get the number of surface images @@ -104,7 +106,7 @@ VkResult MVKSwapchain::releaseImages(const VkReleaseSwapchainImagesInfoEXT* pRel getPresentableImage(pReleaseInfo->pImageIndices[imgIdxIdx])->makeAvailable(); } - return VK_SUCCESS; + return _surface->getConfigurationResult(); } uint64_t MVKSwapchain::getNextAcquisitionID() { return ++_currentAcquisitionID; } @@ -128,7 +130,7 @@ VkResult MVKSwapchain::getSurfaceStatus() { bool MVKSwapchain::hasOptimalSurface() { if (_isDeliberatelyScaled) { return true; } - auto* mtlLayer = _surface->getCAMetalLayer(); + auto* mtlLayer = getCAMetalLayer(); VkExtent2D drawExtent = mvkVkExtent2DFromCGSize(mtlLayer.drawableSize); return (mvkVkExtent2DsAreEqual(drawExtent, _mtlLayerDrawableExtent) && mvkVkExtent2DsAreEqual(drawExtent, mvkGetNaturalExtent(mtlLayer))); @@ -185,7 +187,7 @@ void MVKSwapchain::markFrameInterval() { VkResult MVKSwapchain::getRefreshCycleDuration(VkRefreshCycleDurationGOOGLE *pRefreshCycleDuration) { if (_device->getConfigurationResult() != VK_SUCCESS) { return _device->getConfigurationResult(); } - auto* mtlLayer = _surface->getCAMetalLayer(); + auto* mtlLayer = getCAMetalLayer(); #if MVK_VISIONOS // TODO: See if this can be obtained from OS instead NSInteger framesPerSecond = 90; @@ -242,9 +244,13 @@ VkResult MVKSwapchain::getPastPresentationTiming(uint32_t *pCount, VkPastPresent return res; } -void MVKSwapchain::beginPresentation(const MVKImagePresentInfo& presentInfo) {} +void MVKSwapchain::beginPresentation(const MVKImagePresentInfo& presentInfo) { + _unpresentedImageCount++; +} void MVKSwapchain::endPresentation(const MVKImagePresentInfo& presentInfo, uint64_t actualPresentTime) { + _unpresentedImageCount--; + std::lock_guard lock(_presentHistoryLock); markFrameInterval(); @@ -269,8 +275,18 @@ void MVKSwapchain::endPresentation(const MVKImagePresentInfo& presentInfo, uint6 _presentHistoryIndex = (_presentHistoryIndex + 1) % kMaxPresentationHistory; } +// Because of a regression in Metal, the most recent one or two presentations may not complete +// and call back. To work around this, if there are any uncompleted presentations, change the +// drawableSize of the CAMetalLayer, which will trigger presentation completion and callbacks. +// The drawableSize will be set to a correct size by the next swapchain created on the same surface. +void MVKSwapchain::forceUnpresentedImageCompletion() { + if (_unpresentedImageCount) { + getCAMetalLayer().drawableSize = { 1,1 }; + } +} + void MVKSwapchain::setLayerNeedsDisplay(const VkPresentRegionKHR* pRegion) { - auto* mtlLayer = _surface->getCAMetalLayer(); + auto* mtlLayer = getCAMetalLayer(); if (!pRegion || pRegion->rectangleCount == 0) { [mtlLayer setNeedsDisplay]; return; @@ -350,7 +366,7 @@ void MVKSwapchain::setHDRMetadataEXT(const VkHdrMetadataEXT& metadata) { CAEDRMetadata* caMetadata = [CAEDRMetadata HDR10MetadataWithDisplayInfo: colorVolData contentInfo: lightLevelData opticalOutputScale: 1]; - auto* mtlLayer = _surface->getCAMetalLayer(); + auto* mtlLayer = getCAMetalLayer(); mtlLayer.EDRMetadata = caMetadata; mtlLayer.wantsExtendedDynamicRangeContent = YES; [caMetadata release]; @@ -456,7 +472,7 @@ void MVKSwapchain::initCAMetalLayer(const VkSwapchainCreateInfoKHR* pCreateInfo, if ( getIsSurfaceLost() ) { return; } - auto* mtlLayer = _surface->getCAMetalLayer(); + auto* mtlLayer = getCAMetalLayer(); auto minMagFilter = mvkConfig().swapchainMinMagFilterUseNearest ? kCAFilterNearest : kCAFilterLinear; mtlLayer.device = getMTLDevice(); mtlLayer.pixelFormat = getPixelFormats()->getMTLPixelFormat(pCreateInfo->imageFormat); @@ -469,6 +485,16 @@ void MVKSwapchain::initCAMetalLayer(const VkSwapchainCreateInfoKHR* pCreateInfo, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT)); + + // Because of a regression in Metal, the most recent one or two presentations may not + // complete and call back. Changing the CAMetalLayer drawableSize will force any incomplete + // presentations on the oldSwapchain to complete and call back, but if the drawableSize + // is not changing from the previous, we force those completions first. + auto* oldSwapchain = (MVKSwapchain*)pCreateInfo->oldSwapchain; + if (oldSwapchain && mvkVkExtent2DsAreEqual(pCreateInfo->imageExtent, mvkVkExtent2DFromCGSize(mtlLayer.drawableSize))) { + oldSwapchain->forceUnpresentedImageCompletion(); + } + // Remember the extent to later detect if it has changed under the covers, // and set the drawable size of the CAMetalLayer from the extent. _mtlLayerDrawableExtent = pCreateInfo->imageExtent; @@ -559,7 +585,7 @@ void MVKSwapchain::initSurfaceImages(const VkSwapchainCreateInfoKHR* pCreateInfo } } - auto* mtlLayer = _surface->getCAMetalLayer(); + auto* mtlLayer = getCAMetalLayer(); VkExtent2D imgExtent = pCreateInfo->imageExtent; VkImageCreateInfo imgInfo = { .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, @@ -598,12 +624,17 @@ void MVKSwapchain::initSurfaceImages(const VkSwapchainCreateInfoKHR* pCreateInfo screenName = mtlLayer.screenMVK.localizedName; } #endif - MVKLogInfo("Created %d swapchain images with initial size (%d, %d) and contents scale %.1f for screen %s.", - imgCnt, imgExtent.width, imgExtent.height, mtlLayer.contentsScale, screenName.UTF8String); + MVKLogInfo("Created %d swapchain images with size (%d, %d) and contents scale %.1f in layer %s (%p) on screen %s.", + imgCnt, imgExtent.width, imgExtent.height, mtlLayer.contentsScale, mtlLayer.name.UTF8String, mtlLayer, screenName.UTF8String); } void MVKSwapchain::destroy() { - if (_surface->_activeSwapchain == this) { _surface->_activeSwapchain = nullptr; } + // If this swapchain was not replaced by a new swapchain, remove this swapchain + // from the surface, and force any outstanding presentations to complete. + if (_surface->_activeSwapchain == this) { + _surface->_activeSwapchain = nullptr; + forceUnpresentedImageCompletion(); + } for (auto& img : _presentableImages) { _device->destroyPresentableSwapchainImage(img, NULL); } MVKVulkanAPIDeviceObject::destroy(); } From 6127918a32fa7df07b741c6547b4777029a38876 Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Mon, 18 Sep 2023 22:55:01 -0400 Subject: [PATCH 11/41] Add support for extension VK_KHR_synchronization2. - MVKPhysicalDevice add support for VkPhysicalDeviceSynchronization2Features. - Pass sync2 structs to MVKPipelineBarrier, MVKCmdPipelineBarrier, MVKCmdSetEvent, MVKCmdResetEvent, MVKCmdWaitEvents, MVKRenderPass, MVKQueue & MVKQueueSubmission. - Replace use of VkPipelineStageFlags & VkAccessFlags with VkPipelineStageFlags2 & VkAccessFlags2. - Add stage masks to MVKPipelineBarrier, and redefine apply*MemoryBarrier() functions to remove separately passing stage masks. - Add MVKSemaphoreSubmitInfo to track semaphores in MVKQueueSubmission. - Add MVKCommandBufferSubmitInfo to track command buffers in MVKQueueCommandBufferSubmission. - Add MVKSubpassDependency to combine VkSubpassDependency & VkMemoryBarrier2 in MVKRenderPass. - Remove abstract MVKCmdSetResetEvent superclass. - Streamline code in MVKMTLFunction::operator= (unrelated). --- Docs/MoltenVK_Runtime_UserGuide.md | 1 + Docs/Whats_New.md | 2 + MoltenVK/MoltenVK/API/mvk_datatypes.h | 8 +- MoltenVK/MoltenVK/Commands/MVKCmdPipeline.h | 41 ++-- MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm | 116 +++++++--- MoltenVK/MoltenVK/Commands/MVKCmdQueries.h | 4 +- MoltenVK/MoltenVK/Commands/MVKCmdQueries.mm | 4 +- .../Commands/MVKMTLResourceBindings.h | 66 +++++- MoltenVK/MoltenVK/GPUObjects/MVKBuffer.h | 12 +- MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm | 18 +- MoltenVK/MoltenVK/GPUObjects/MVKDevice.h | 4 +- MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm | 13 +- .../GPUObjects/MVKDeviceFeatureStructs.def | 1 + MoltenVK/MoltenVK/GPUObjects/MVKImage.h | 16 +- MoltenVK/MoltenVK/GPUObjects/MVKImage.mm | 22 +- MoltenVK/MoltenVK/GPUObjects/MVKQueue.h | 68 +++++- MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm | 203 ++++++++++++++---- MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.h | 18 +- MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm | 48 +++-- MoltenVK/MoltenVK/GPUObjects/MVKResource.h | 4 +- .../MoltenVK/GPUObjects/MVKShaderModule.h | 2 +- .../MoltenVK/GPUObjects/MVKShaderModule.mm | 9 +- MoltenVK/MoltenVK/Layers/MVKExtensions.def | 1 + MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm | 42 ++-- MoltenVK/MoltenVK/Vulkan/vulkan.mm | 132 +++++++++--- 25 files changed, 608 insertions(+), 247 deletions(-) diff --git a/Docs/MoltenVK_Runtime_UserGuide.md b/Docs/MoltenVK_Runtime_UserGuide.md index 6684bb34..57773c27 100644 --- a/Docs/MoltenVK_Runtime_UserGuide.md +++ b/Docs/MoltenVK_Runtime_UserGuide.md @@ -350,6 +350,7 @@ In addition to core *Vulkan* functionality, **MoltenVK** also supports the foll - `VK_KHR_surface` - `VK_KHR_swapchain` - `VK_KHR_swapchain_mutable_format` +- `VK_KHR_synchronization2` - `VK_KHR_timeline_semaphore` - `VK_KHR_uniform_buffer_standard_layout` - `VK_KHR_variable_pointers` diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md index 14f6edfc..d08bca1f 100644 --- a/Docs/Whats_New.md +++ b/Docs/Whats_New.md @@ -18,6 +18,8 @@ MoltenVK 1.2.6 Released TBD +- Add support for extensions: + - `VK_KHR_synchronization2` - Fix rare case where vertex attribute buffers are not bound to Metal when no other bindings change between pipelines. - Ensure objects retained for life of `MTLCommandBuffer` during `vkCmdBlitImage()` & `vkQueuePresentKHR()`. diff --git a/MoltenVK/MoltenVK/API/mvk_datatypes.h b/MoltenVK/MoltenVK/API/mvk_datatypes.h index b0e2dac7..8e5670c9 100644 --- a/MoltenVK/MoltenVK/API/mvk_datatypes.h +++ b/MoltenVK/MoltenVK/API/mvk_datatypes.h @@ -414,13 +414,13 @@ MTLWinding mvkMTLWindingFromSpvExecutionMode(uint32_t spvMode); MTLTessellationPartitionMode mvkMTLTessellationPartitionModeFromSpvExecutionMode(uint32_t spvMode); /** - * Returns the combination of Metal MTLRenderStage bits corresponding to the specified Vulkan VkPiplineStageFlags, + * Returns the combination of Metal MTLRenderStage bits corresponding to the specified Vulkan VkPipelineStageFlags2, * taking into consideration whether the barrier is to be placed before or after the specified pipeline stages. */ -MTLRenderStages mvkMTLRenderStagesFromVkPipelineStageFlags(VkPipelineStageFlags vkStages, bool placeBarrierBefore); +MTLRenderStages mvkMTLRenderStagesFromVkPipelineStageFlags(VkPipelineStageFlags2 vkStages, bool placeBarrierBefore); -/** Returns the combination of Metal MTLBarrierScope bits corresponding to the specified Vulkan VkAccessFlags. */ -MTLBarrierScope mvkMTLBarrierScopeFromVkAccessFlags(VkAccessFlags vkAccess); +/** Returns the combination of Metal MTLBarrierScope bits corresponding to the specified Vulkan VkAccessFlags2. */ +MTLBarrierScope mvkMTLBarrierScopeFromVkAccessFlags(VkAccessFlags2 vkAccess); #pragma mark - #pragma mark Geometry conversions diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.h b/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.h index 8e177256..aec8800c 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.h +++ b/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.h @@ -41,6 +41,9 @@ template class MVKCmdPipelineBarrier : public MVKCommand { public: + VkResult setContent(MVKCommandBuffer* cmdBuff, + const VkDependencyInfo* pDependencyInfo); + VkResult setContent(MVKCommandBuffer* cmdBuff, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask, @@ -59,8 +62,6 @@ protected: bool coversTextures(); MVKSmallVector _barriers; - VkPipelineStageFlags _srcStageMask; - VkPipelineStageFlags _dstStageMask; VkDependencyFlags _dependencyFlags; }; @@ -281,34 +282,26 @@ protected: #pragma mark - -#pragma mark MVKCmdSetResetEvent +#pragma mark MVKCmdSetEvent -/** Abstract Vulkan command to set or reset an event. */ -class MVKCmdSetResetEvent : public MVKCommand { +/** Vulkan command to set an event. */ +class MVKCmdSetEvent : public MVKCommand { public: + VkResult setContent(MVKCommandBuffer* cmdBuff, + VkEvent event, + const VkDependencyInfo* pDependencyInfo); + VkResult setContent(MVKCommandBuffer* cmdBuff, VkEvent event, VkPipelineStageFlags stageMask); -protected: - MVKEvent* _mvkEvent; - -}; - - -#pragma mark - -#pragma mark MVKCmdSetEvent - -/** Vulkan command to set an event. */ -class MVKCmdSetEvent : public MVKCmdSetResetEvent { - -public: void encode(MVKCommandEncoder* cmdEncoder) override; protected: MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; + MVKEvent* _mvkEvent; }; @@ -316,14 +309,19 @@ protected: #pragma mark MVKCmdResetEvent /** Vulkan command to reset an event. */ -class MVKCmdResetEvent : public MVKCmdSetResetEvent { +class MVKCmdResetEvent : public MVKCommand { public: + VkResult setContent(MVKCommandBuffer* cmdBuff, + VkEvent event, + VkPipelineStageFlags2 stageMask); + void encode(MVKCommandEncoder* cmdEncoder) override; protected: MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; + MVKEvent* _mvkEvent; }; @@ -339,6 +337,11 @@ template class MVKCmdWaitEvents : public MVKCommand { public: + VkResult setContent(MVKCommandBuffer* cmdBuff, + uint32_t eventCount, + const VkEvent* pEvents, + const VkDependencyInfo* pDependencyInfos); + VkResult setContent(MVKCommandBuffer* cmdBuff, uint32_t eventCount, const VkEvent* pEvents, diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm b/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm index 3efcab53..05e578f6 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm @@ -29,6 +29,29 @@ #pragma mark - #pragma mark MVKCmdPipelineBarrier +template +VkResult MVKCmdPipelineBarrier::setContent(MVKCommandBuffer* cmdBuff, + const VkDependencyInfo* pDependencyInfo) { + _dependencyFlags = pDependencyInfo->dependencyFlags; + + _barriers.clear(); // Clear for reuse + _barriers.reserve(pDependencyInfo->memoryBarrierCount + + pDependencyInfo->bufferMemoryBarrierCount + + pDependencyInfo->imageMemoryBarrierCount); + + for (uint32_t i = 0; i < pDependencyInfo->memoryBarrierCount; i++) { + _barriers.emplace_back(pDependencyInfo->pMemoryBarriers[i]); + } + for (uint32_t i = 0; i < pDependencyInfo->bufferMemoryBarrierCount; i++) { + _barriers.emplace_back(pDependencyInfo->pBufferMemoryBarriers[i]); + } + for (uint32_t i = 0; i < pDependencyInfo->imageMemoryBarrierCount; i++) { + _barriers.emplace_back(pDependencyInfo->pImageMemoryBarriers[i]); + } + + return VK_SUCCESS; +} + template VkResult MVKCmdPipelineBarrier::setContent(MVKCommandBuffer* cmdBuff, VkPipelineStageFlags srcStageMask, @@ -40,21 +63,19 @@ VkResult MVKCmdPipelineBarrier::setContent(MVKCommandBuffer* cmdBuff, const VkBufferMemoryBarrier* pBufferMemoryBarriers, uint32_t imageMemoryBarrierCount, const VkImageMemoryBarrier* pImageMemoryBarriers) { - _srcStageMask = srcStageMask; - _dstStageMask = dstStageMask; _dependencyFlags = dependencyFlags; _barriers.clear(); // Clear for reuse _barriers.reserve(memoryBarrierCount + bufferMemoryBarrierCount + imageMemoryBarrierCount); for (uint32_t i = 0; i < memoryBarrierCount; i++) { - _barriers.emplace_back(pMemoryBarriers[i]); + _barriers.emplace_back(pMemoryBarriers[i], srcStageMask, dstStageMask); } for (uint32_t i = 0; i < bufferMemoryBarrierCount; i++) { - _barriers.emplace_back(pBufferMemoryBarriers[i]); + _barriers.emplace_back(pBufferMemoryBarriers[i], srcStageMask, dstStageMask); } for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) { - _barriers.emplace_back(pImageMemoryBarriers[i]); + _barriers.emplace_back(pImageMemoryBarriers[i], srcStageMask, dstStageMask); } return VK_SUCCESS; @@ -67,13 +88,9 @@ void MVKCmdPipelineBarrier::encode(MVKCommandEncoder* cmdEncoder) { // Calls below invoke MTLBlitCommandEncoder so must apply this first. // Check if pipeline barriers are available and we are in a renderpass. if (cmdEncoder->getDevice()->_pMetalFeatures->memoryBarriers && cmdEncoder->_mtlRenderEncoder) { - MTLRenderStages srcStages = mvkMTLRenderStagesFromVkPipelineStageFlags(_srcStageMask, false); - MTLRenderStages dstStages = mvkMTLRenderStagesFromVkPipelineStageFlags(_dstStageMask, true); - - id resources[_barriers.size()]; - uint32_t rezCnt = 0; - for (auto& b : _barriers) { + MTLRenderStages srcStages = mvkMTLRenderStagesFromVkPipelineStageFlags(b.srcStageMask, false); + MTLRenderStages dstStages = mvkMTLRenderStagesFromVkPipelineStageFlags(b.dstStageMask, true); switch (b.type) { case MVKPipelineBarrier::Memory: { MTLBarrierScope scope = (mvkMTLBarrierScopeFromVkAccessFlags(b.srcAccessMask) | @@ -84,27 +101,30 @@ void MVKCmdPipelineBarrier::encode(MVKCommandEncoder* cmdEncoder) { break; } - case MVKPipelineBarrier::Buffer: - resources[rezCnt++] = b.mvkBuffer->getMTLBuffer(); + case MVKPipelineBarrier::Buffer: { + id mtlRez = b.mvkBuffer->getMTLBuffer(); + [cmdEncoder->_mtlRenderEncoder memoryBarrierWithResources: &mtlRez + count: 1 + afterStages: srcStages + beforeStages: dstStages]; break; - - case MVKPipelineBarrier::Image: - for (uint8_t planeIndex = 0; planeIndex < b.mvkImage->getPlaneCount(); planeIndex++) { - resources[rezCnt++] = b.mvkImage->getMTLTexture(planeIndex); - } + } + case MVKPipelineBarrier::Image: { + uint32_t plnCnt = b.mvkImage->getPlaneCount(); + id mtlRezs[plnCnt]; + for (uint8_t plnIdx = 0; plnIdx < plnCnt; plnIdx++) { + mtlRezs[plnIdx] = b.mvkImage->getMTLTexture(plnIdx); + } + [cmdEncoder->_mtlRenderEncoder memoryBarrierWithResources: mtlRezs + count: plnCnt + afterStages: srcStages + beforeStages: dstStages]; break; - + } default: break; } } - - if (rezCnt) { - [cmdEncoder->_mtlRenderEncoder memoryBarrierWithResources: resources - count: rezCnt - afterStages: srcStages - beforeStages: dstStages]; - } } else if (cmdEncoder->getDevice()->_pMetalFeatures->textureBarriers) { #if !MVK_MACCAT if (coversTextures()) { [cmdEncoder->_mtlRenderEncoder textureBarrier]; } @@ -138,15 +158,15 @@ void MVKCmdPipelineBarrier::encode(MVKCommandEncoder* cmdEncoder) { for (auto& b : _barriers) { switch (b.type) { case MVKPipelineBarrier::Memory: - mvkDvc->applyMemoryBarrier(_srcStageMask, _dstStageMask, b, cmdEncoder, cmdUse); + mvkDvc->applyMemoryBarrier(b, cmdEncoder, cmdUse); break; case MVKPipelineBarrier::Buffer: - b.mvkBuffer->applyBufferMemoryBarrier(_srcStageMask, _dstStageMask, b, cmdEncoder, cmdUse); + b.mvkBuffer->applyBufferMemoryBarrier(b, cmdEncoder, cmdUse); break; case MVKPipelineBarrier::Image: - b.mvkImage->applyImageMemoryBarrier(_srcStageMask, _dstStageMask, b, cmdEncoder, cmdUse); + b.mvkImage->applyImageMemoryBarrier(b, cmdEncoder, cmdUse); break; default: @@ -493,19 +513,23 @@ MVKCmdPushDescriptorSetWithTemplate::~MVKCmdPushDescriptorSetWithTemplate() { #pragma mark - -#pragma mark MVKCmdSetResetEvent +#pragma mark MVKCmdSetEvent -VkResult MVKCmdSetResetEvent::setContent(MVKCommandBuffer* cmdBuff, - VkEvent event, - VkPipelineStageFlags stageMask) { +VkResult MVKCmdSetEvent::setContent(MVKCommandBuffer* cmdBuff, + VkEvent event, + VkPipelineStageFlags stageMask) { _mvkEvent = (MVKEvent*)event; return VK_SUCCESS; } +VkResult MVKCmdSetEvent::setContent(MVKCommandBuffer* cmdBuff, + VkEvent event, + const VkDependencyInfo* pDependencyInfo) { + _mvkEvent = (MVKEvent*)event; -#pragma mark - -#pragma mark MVKCmdSetEvent + return VK_SUCCESS; +} void MVKCmdSetEvent::encode(MVKCommandEncoder* cmdEncoder) { cmdEncoder->signalEvent(_mvkEvent, true); @@ -515,6 +539,14 @@ void MVKCmdSetEvent::encode(MVKCommandEncoder* cmdEncoder) { #pragma mark - #pragma mark MVKCmdResetEvent +VkResult MVKCmdResetEvent::setContent(MVKCommandBuffer* cmdBuff, + VkEvent event, + VkPipelineStageFlags2 stageMask) { + _mvkEvent = (MVKEvent*)event; + + return VK_SUCCESS; +} + void MVKCmdResetEvent::encode(MVKCommandEncoder* cmdEncoder) { cmdEncoder->signalEvent(_mvkEvent, false); } @@ -523,6 +555,20 @@ void MVKCmdResetEvent::encode(MVKCommandEncoder* cmdEncoder) { #pragma mark - #pragma mark MVKCmdWaitEvents +template +VkResult MVKCmdWaitEvents::setContent(MVKCommandBuffer* cmdBuff, + uint32_t eventCount, + const VkEvent* pEvents, + const VkDependencyInfo* pDependencyInfos) { + _mvkEvents.clear(); // Clear for reuse + _mvkEvents.reserve(eventCount); + for (uint32_t i = 0; i < eventCount; i++) { + _mvkEvents.push_back((MVKEvent*)pEvents[i]); + } + + return VK_SUCCESS; +} + template VkResult MVKCmdWaitEvents::setContent(MVKCommandBuffer* cmdBuff, uint32_t eventCount, diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdQueries.h b/MoltenVK/MoltenVK/Commands/MVKCmdQueries.h index baa58863..6b3686e8 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdQueries.h +++ b/MoltenVK/MoltenVK/Commands/MVKCmdQueries.h @@ -85,7 +85,7 @@ class MVKCmdWriteTimestamp : public MVKCmdQuery { public: VkResult setContent(MVKCommandBuffer* cmdBuff, - VkPipelineStageFlagBits pipelineStage, + VkPipelineStageFlags2 stage, VkQueryPool queryPool, uint32_t query); @@ -94,7 +94,7 @@ public: protected: MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; - VkPipelineStageFlagBits _pipelineStage; + VkPipelineStageFlags2 _stage; }; diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdQueries.mm b/MoltenVK/MoltenVK/Commands/MVKCmdQueries.mm index bc5ba9c6..aac431fb 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdQueries.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCmdQueries.mm @@ -77,13 +77,13 @@ void MVKCmdEndQuery::encode(MVKCommandEncoder* cmdEncoder) { #pragma mark MVKCmdWriteTimestamp VkResult MVKCmdWriteTimestamp::setContent(MVKCommandBuffer* cmdBuff, - VkPipelineStageFlagBits pipelineStage, + VkPipelineStageFlags2 stage, VkQueryPool queryPool, uint32_t query) { VkResult rslt = MVKCmdQuery::setContent(cmdBuff, queryPool, query); - _pipelineStage = pipelineStage; + _stage = stage; cmdBuff->recordTimestampCommand(); diff --git a/MoltenVK/MoltenVK/Commands/MVKMTLResourceBindings.h b/MoltenVK/MoltenVK/Commands/MVKMTLResourceBindings.h index de72f06d..3eeb7d42 100644 --- a/MoltenVK/MoltenVK/Commands/MVKMTLResourceBindings.h +++ b/MoltenVK/MoltenVK/Commands/MVKMTLResourceBindings.h @@ -112,8 +112,10 @@ typedef struct MVKPipelineBarrier { } MVKPipelineBarrierType; MVKPipelineBarrierType type = None; - VkAccessFlags srcAccessMask = 0; - VkAccessFlags dstAccessMask = 0; + VkPipelineStageFlags2 srcStageMask = 0; + VkAccessFlags2 srcAccessMask = 0; + VkPipelineStageFlags2 dstStageMask = 0; + VkAccessFlags2 dstAccessMask = 0; uint8_t srcQueueFamilyIndex = 0; uint8_t dstQueueFamilyIndex = 0; union { MVKBuffer* mvkBuffer = nullptr; MVKImage* mvkImage; MVKResource* mvkResource; }; @@ -136,15 +138,29 @@ typedef struct MVKPipelineBarrier { bool isBufferBarrier() { return type == Buffer; } bool isImageBarrier() { return type == Image; } - MVKPipelineBarrier(const VkMemoryBarrier& vkBarrier) : + MVKPipelineBarrier(const VkMemoryBarrier2& vkBarrier) : type(Memory), + srcStageMask(vkBarrier.srcStageMask), srcAccessMask(vkBarrier.srcAccessMask), + dstStageMask(vkBarrier.dstStageMask), dstAccessMask(vkBarrier.dstAccessMask) {} - MVKPipelineBarrier(const VkBufferMemoryBarrier& vkBarrier) : - type(Buffer), + MVKPipelineBarrier(const VkMemoryBarrier& vkBarrier, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags dstStageMask) : + type(Memory), + srcStageMask(srcStageMask), srcAccessMask(vkBarrier.srcAccessMask), + dstStageMask(dstStageMask), + dstAccessMask(vkBarrier.dstAccessMask) + {} + + MVKPipelineBarrier(const VkBufferMemoryBarrier2& vkBarrier) : + type(Buffer), + srcStageMask(vkBarrier.srcStageMask), + srcAccessMask(vkBarrier.srcAccessMask), + dstStageMask(vkBarrier.dstStageMask), dstAccessMask(vkBarrier.dstAccessMask), srcQueueFamilyIndex(vkBarrier.srcQueueFamilyIndex), dstQueueFamilyIndex(vkBarrier.dstQueueFamilyIndex), @@ -153,9 +169,45 @@ typedef struct MVKPipelineBarrier { size(vkBarrier.size) {} - MVKPipelineBarrier(const VkImageMemoryBarrier& vkBarrier) : - type(Image), + MVKPipelineBarrier(const VkBufferMemoryBarrier& vkBarrier, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags dstStageMask) : + type(Buffer), + srcStageMask(srcStageMask), srcAccessMask(vkBarrier.srcAccessMask), + dstStageMask(dstStageMask), + dstAccessMask(vkBarrier.dstAccessMask), + srcQueueFamilyIndex(vkBarrier.srcQueueFamilyIndex), + dstQueueFamilyIndex(vkBarrier.dstQueueFamilyIndex), + mvkBuffer((MVKBuffer*)vkBarrier.buffer), + offset(vkBarrier.offset), + size(vkBarrier.size) + {} + + MVKPipelineBarrier(const VkImageMemoryBarrier2& vkBarrier) : + type(Image), + srcStageMask(vkBarrier.srcStageMask), + srcAccessMask(vkBarrier.srcAccessMask), + dstStageMask(vkBarrier.dstStageMask), + dstAccessMask(vkBarrier.dstAccessMask), + srcQueueFamilyIndex(vkBarrier.srcQueueFamilyIndex), + dstQueueFamilyIndex(vkBarrier.dstQueueFamilyIndex), + mvkImage((MVKImage*)vkBarrier.image), + newLayout(vkBarrier.newLayout), + aspectMask(vkBarrier.subresourceRange.aspectMask), + baseArrayLayer(vkBarrier.subresourceRange.baseArrayLayer), + layerCount(vkBarrier.subresourceRange.layerCount), + baseMipLevel(vkBarrier.subresourceRange.baseMipLevel), + levelCount(vkBarrier.subresourceRange.levelCount) + {} + + MVKPipelineBarrier(const VkImageMemoryBarrier& vkBarrier, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags dstStageMask) : + type(Image), + srcStageMask(srcStageMask), + srcAccessMask(vkBarrier.srcAccessMask), + dstStageMask(dstStageMask), dstAccessMask(vkBarrier.dstAccessMask), srcQueueFamilyIndex(vkBarrier.srcQueueFamilyIndex), dstQueueFamilyIndex(vkBarrier.dstQueueFamilyIndex), diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.h b/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.h index 2e338ce7..95fdf681 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.h @@ -52,16 +52,12 @@ public: VkResult bindDeviceMemory2(const VkBindBufferMemoryInfo* pBindInfo); /** Applies the specified global memory barrier. */ - void applyMemoryBarrier(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier, + void applyMemoryBarrier(MVKPipelineBarrier& barrier, MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse) override; /** Applies the specified buffer memory barrier. */ - void applyBufferMemoryBarrier(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier, + void applyBufferMemoryBarrier(MVKPipelineBarrier& barrier, MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse); @@ -95,9 +91,7 @@ protected: friend class MVKDeviceMemory; void propagateDebugName() override; - bool needsHostReadSync(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier); + bool needsHostReadSync(MVKPipelineBarrier& barrier); bool overlaps(VkDeviceSize offset, VkDeviceSize size, VkDeviceSize &overlapOffset, VkDeviceSize &overlapSize); bool shouldFlushHostMemory(); VkResult flushToDevice(VkDeviceSize offset, VkDeviceSize size); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm b/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm index a99f4f0f..41ee4cef 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm @@ -94,25 +94,21 @@ VkResult MVKBuffer::bindDeviceMemory2(const VkBindBufferMemoryInfo* pBindInfo) { return bindDeviceMemory((MVKDeviceMemory*)pBindInfo->memory, pBindInfo->memoryOffset); } -void MVKBuffer::applyMemoryBarrier(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier, +void MVKBuffer::applyMemoryBarrier(MVKPipelineBarrier& barrier, MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse) { #if MVK_MACOS - if ( needsHostReadSync(srcStageMask, dstStageMask, barrier) ) { + if ( needsHostReadSync(barrier) ) { [cmdEncoder->getMTLBlitEncoder(cmdUse) synchronizeResource: getMTLBuffer()]; } #endif } -void MVKBuffer::applyBufferMemoryBarrier(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier, +void MVKBuffer::applyBufferMemoryBarrier(MVKPipelineBarrier& barrier, MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse) { #if MVK_MACOS - if ( needsHostReadSync(srcStageMask, dstStageMask, barrier) ) { + if ( needsHostReadSync(barrier) ) { [cmdEncoder->getMTLBlitEncoder(cmdUse) synchronizeResource: getMTLBuffer()]; } #endif @@ -120,11 +116,9 @@ void MVKBuffer::applyBufferMemoryBarrier(VkPipelineStageFlags srcStageMask, // Returns whether the specified buffer memory barrier requires a sync between this // buffer and host memory for the purpose of the host reading texture memory. -bool MVKBuffer::needsHostReadSync(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier) { +bool MVKBuffer::needsHostReadSync(MVKPipelineBarrier& barrier) { #if MVK_MACOS - return (mvkIsAnyFlagEnabled(dstStageMask, (VK_PIPELINE_STAGE_HOST_BIT)) && + return (mvkIsAnyFlagEnabled(barrier.dstStageMask, (VK_PIPELINE_STAGE_HOST_BIT)) && mvkIsAnyFlagEnabled(barrier.dstAccessMask, (VK_ACCESS_HOST_READ_BIT)) && isMemoryHostAccessible() && (!isMemoryHostCoherent() || _isHostCoherentTexelBuffer)); #endif diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h index 7a04e90e..b6b462ad 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h @@ -681,9 +681,7 @@ public: void removeTimelineSemaphore(MVKTimelineSemaphore* sem4, uint64_t value); /** Applies the specified global memory barrier to all resource issued by this device. */ - void applyMemoryBarrier(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier, + void applyMemoryBarrier(MVKPipelineBarrier& barrier, MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm index 422f1b43..acd50514 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm @@ -320,6 +320,11 @@ void MVKPhysicalDevice::getFeatures(VkPhysicalDeviceFeatures2* features) { subgroupSizeFeatures->computeFullSubgroups = _metalFeatures.simdPermute || _metalFeatures.quadPermute; break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES: { + auto* synch2Features = (VkPhysicalDeviceSynchronization2Features*)next; + synch2Features->synchronization2 = true; + break; + } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXTURE_COMPRESSION_ASTC_HDR_FEATURES: { auto* astcHDRFeatures = (VkPhysicalDeviceTextureCompressionASTCHDRFeatures*)next; astcHDRFeatures->textureCompressionASTC_HDR = _metalFeatures.astcHDRTextures; @@ -4172,16 +4177,14 @@ void MVKDevice::removeTimelineSemaphore(MVKTimelineSemaphore* sem4, uint64_t val mvkRemoveFirstOccurance(_awaitingTimelineSem4s, make_pair(sem4, value)); } -void MVKDevice::applyMemoryBarrier(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier, +void MVKDevice::applyMemoryBarrier(MVKPipelineBarrier& barrier, MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse) { - if (!mvkIsAnyFlagEnabled(dstStageMask, VK_PIPELINE_STAGE_HOST_BIT) || + if (!mvkIsAnyFlagEnabled(barrier.dstStageMask, VK_PIPELINE_STAGE_HOST_BIT) || !mvkIsAnyFlagEnabled(barrier.dstAccessMask, VK_ACCESS_HOST_READ_BIT) ) { return; } lock_guard lock(_rezLock); for (auto& rez : _resources) { - rez->applyMemoryBarrier(srcStageMask, dstStageMask, barrier, cmdEncoder, cmdUse); + rez->applyMemoryBarrier(barrier, cmdEncoder, cmdUse); } } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceFeatureStructs.def b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceFeatureStructs.def index c0bbb481..88a3a33f 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceFeatureStructs.def +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceFeatureStructs.def @@ -55,6 +55,7 @@ MVK_DEVICE_FEATURE(ShaderAtomicInt64, SHADER_ATOMIC_INT64, MVK_DEVICE_FEATURE(ShaderFloat16Int8, SHADER_FLOAT16_INT8, 2) MVK_DEVICE_FEATURE(ShaderSubgroupExtendedTypes, SHADER_SUBGROUP_EXTENDED_TYPES, 1) MVK_DEVICE_FEATURE(SubgroupSizeControl, SUBGROUP_SIZE_CONTROL, 2) +MVK_DEVICE_FEATURE(Synchronization2, SYNCHRONIZATION_2, 1) MVK_DEVICE_FEATURE(TextureCompressionASTCHDR, TEXTURE_COMPRESSION_ASTC_HDR, 1) MVK_DEVICE_FEATURE(TimelineSemaphore, TIMELINE_SEMAPHORE, 1) MVK_DEVICE_FEATURE(UniformBufferStandardLayout, UNIFORM_BUFFER_STANDARD_LAYOUT, 1) diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.h b/MoltenVK/MoltenVK/GPUObjects/MVKImage.h index ef606b03..900b10ff 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.h @@ -74,9 +74,7 @@ protected: bool overlaps(VkSubresourceLayout& imgLayout, VkDeviceSize offset, VkDeviceSize size); void propagateDebugName(); MVKImageMemoryBinding* getMemoryBinding() const; - void applyImageMemoryBarrier(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier, + void applyImageMemoryBarrier(MVKPipelineBarrier& barrier, MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse); void pullFromDeviceOnCompletion(MVKCommandEncoder* cmdEncoder, @@ -119,9 +117,7 @@ public: VkResult bindDeviceMemory(MVKDeviceMemory* mvkMem, VkDeviceSize memOffset) override; /** Applies the specified global memory barrier. */ - void applyMemoryBarrier(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier, + void applyMemoryBarrier(MVKPipelineBarrier& barrier, MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse) override; @@ -133,9 +129,7 @@ protected: friend MVKImage; void propagateDebugName() override; - bool needsHostReadSync(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier); + bool needsHostReadSync(MVKPipelineBarrier& barrier); bool shouldFlushHostMemory(); VkResult flushToDevice(VkDeviceSize offset, VkDeviceSize size); VkResult pullFromDevice(VkDeviceSize offset, VkDeviceSize size); @@ -251,9 +245,7 @@ public: virtual VkResult bindDeviceMemory2(const VkBindImageMemoryInfo* pBindInfo); /** Applies the specified image memory barrier. */ - void applyImageMemoryBarrier(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier, + void applyImageMemoryBarrier(MVKPipelineBarrier& barrier, MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm index f09495c7..c605f45a 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm @@ -304,9 +304,7 @@ MVKImageMemoryBinding* MVKImagePlane::getMemoryBinding() const { return (_image->_memoryBindings.size() > 1) ? _image->_memoryBindings[_planeIndex] : _image->_memoryBindings[0]; } -void MVKImagePlane::applyImageMemoryBarrier(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier, +void MVKImagePlane::applyImageMemoryBarrier(MVKPipelineBarrier& barrier, MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse) { @@ -323,7 +321,7 @@ void MVKImagePlane::applyImageMemoryBarrier(VkPipelineStageFlags srcStageMask, : (layerStart + barrier.layerCount)); MVKImageMemoryBinding* memBind = getMemoryBinding(); - bool needsSync = memBind->needsHostReadSync(srcStageMask, dstStageMask, barrier); + bool needsSync = memBind->needsHostReadSync(barrier); bool needsPull = ((!memBind->_mtlTexelBuffer || memBind->_ownsTexelBuffer) && memBind->isMemoryHostCoherent() && barrier.newLayout == VK_IMAGE_LAYOUT_GENERAL && @@ -444,13 +442,11 @@ VkResult MVKImageMemoryBinding::bindDeviceMemory(MVKDeviceMemory* mvkMem, VkDevi return _deviceMemory ? _deviceMemory->addImageMemoryBinding(this) : VK_SUCCESS; } -void MVKImageMemoryBinding::applyMemoryBarrier(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier, +void MVKImageMemoryBinding::applyMemoryBarrier(MVKPipelineBarrier& barrier, MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse) { #if MVK_MACOS - if ( needsHostReadSync(srcStageMask, dstStageMask, barrier) ) { + if (needsHostReadSync(barrier)) { for(uint8_t planeIndex = beginPlaneIndex(); planeIndex < endPlaneIndex(); planeIndex++) { [cmdEncoder->getMTLBlitEncoder(cmdUse) synchronizeResource: _image->_planes[planeIndex]->_mtlTexture]; } @@ -469,9 +465,7 @@ void MVKImageMemoryBinding::propagateDebugName() { // Returns whether the specified image memory barrier requires a sync between this // texture and host memory for the purpose of the host reading texture memory. -bool MVKImageMemoryBinding::needsHostReadSync(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier) { +bool MVKImageMemoryBinding::needsHostReadSync(MVKPipelineBarrier& barrier) { #if MVK_MACOS return ((barrier.newLayout == VK_IMAGE_LAYOUT_GENERAL) && mvkIsAnyFlagEnabled(barrier.dstAccessMask, (VK_ACCESS_HOST_READ_BIT | VK_ACCESS_MEMORY_READ_BIT)) && @@ -625,15 +619,13 @@ bool MVKImage::getIsValidViewFormat(VkFormat viewFormat) { #pragma mark Resource memory -void MVKImage::applyImageMemoryBarrier(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier, +void MVKImage::applyImageMemoryBarrier(MVKPipelineBarrier& barrier, MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse) { for (uint8_t planeIndex = 0; planeIndex < _planes.size(); planeIndex++) { if ( !_hasChromaSubsampling || mvkIsAnyFlagEnabled(barrier.aspectMask, (VK_IMAGE_ASPECT_PLANE_0_BIT << planeIndex)) ) { - _planes[planeIndex]->applyImageMemoryBarrier(srcStageMask, dstStageMask, barrier, cmdEncoder, cmdUse); + _planes[planeIndex]->applyImageMemoryBarrier(barrier, cmdEncoder, cmdUse); } } } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h index c3b1d242..b4509f0b 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h @@ -92,7 +92,8 @@ public: #pragma mark Queue submissions /** Submits the specified command buffers to the queue. */ - VkResult submit(uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence, MVKCommandUse cmdUse); + template + VkResult submit(uint32_t submitCount, const S* pSubmits, VkFence fence, MVKCommandUse cmdUse); /** Submits the specified presentation command to the queue. */ VkResult submit(const VkPresentInfoKHR* pPresentInfo); @@ -164,6 +165,24 @@ protected: #pragma mark - #pragma mark MVKQueueSubmission +typedef struct MVKSemaphoreSubmitInfo { +private: + MVKSemaphore* _semaphore; +public: + uint64_t value; + VkPipelineStageFlags2 stageMask; + uint32_t deviceIndex; + + void encodeWait(id mtlCmdBuff); + void encodeSignal(id mtlCmdBuff); + MVKSemaphoreSubmitInfo(const VkSemaphoreSubmitInfo& semaphoreSubmitInfo); + MVKSemaphoreSubmitInfo(const VkSemaphore semaphore, VkPipelineStageFlags stageMask); + MVKSemaphoreSubmitInfo(const MVKSemaphoreSubmitInfo& other); + MVKSemaphoreSubmitInfo& operator=(const MVKSemaphoreSubmitInfo& other); + ~MVKSemaphoreSubmitInfo(); + +} MVKSemaphoreSubmitInfo; + /** This is an abstract class for an operation that can be submitted to an MVKQueue. */ class MVKQueueSubmission : public MVKBaseObject, public MVKConfigurableMixin { @@ -179,9 +198,14 @@ public: */ virtual VkResult execute() = 0; + MVKQueueSubmission(MVKQueue* queue, + uint32_t waitSemaphoreInfoCount, + const VkSemaphoreSubmitInfo* pWaitSemaphoreSubmitInfos); + MVKQueueSubmission(MVKQueue* queue, uint32_t waitSemaphoreCount, - const VkSemaphore* pWaitSemaphores); + const VkSemaphore* pWaitSemaphores, + const VkPipelineStageFlags* pWaitDstStageMask); ~MVKQueueSubmission() override; @@ -192,13 +216,22 @@ protected: MVKDevice* getDevice() { return _queue->getDevice(); } MVKQueue* _queue; - MVKSmallVector> _waitSemaphores; + MVKSmallVector _waitSemaphores; }; #pragma mark - #pragma mark MVKQueueCommandBufferSubmission +typedef struct MVKCommandBufferSubmitInfo { + MVKCommandBuffer* commandBuffer; + uint32_t deviceMask; + + MVKCommandBufferSubmitInfo(const VkCommandBufferSubmitInfo& commandBufferInfo); + MVKCommandBufferSubmitInfo(VkCommandBuffer commandBuffer); + +} MVKCommandBufferSubmitInfo; + /** * Submits an empty set of command buffers to the queue. * Used for fence-only command submissions. @@ -208,7 +241,15 @@ class MVKQueueCommandBufferSubmission : public MVKQueueSubmission { public: VkResult execute() override; - MVKQueueCommandBufferSubmission(MVKQueue* queue, const VkSubmitInfo* pSubmit, VkFence fence, MVKCommandUse cmdUse); + MVKQueueCommandBufferSubmission(MVKQueue* queue, + const VkSubmitInfo2* pSubmit, + VkFence fence, + MVKCommandUse cmdUse); + + MVKQueueCommandBufferSubmission(MVKQueue* queue, + const VkSubmitInfo* pSubmit, + VkFence fence, + MVKCommandUse cmdUse); ~MVKQueueCommandBufferSubmission() override; @@ -222,11 +263,11 @@ protected: virtual void submitCommandBuffers() {} MVKCommandEncodingContext _encodingContext; - MVKSmallVector> _signalSemaphores; - MVKFence* _fence; - id _activeMTLCommandBuffer; - MVKCommandUse _commandUse; - bool _emulatedWaitDone; //Used to track if we've already waited for emulated semaphores. + MVKSmallVector _signalSemaphores; + MVKFence* _fence = nullptr; + id _activeMTLCommandBuffer = nil; + MVKCommandUse _commandUse = kMVKCommandUseNone; + bool _emulatedWaitDone = false; //Used to track if we've already waited for emulated semaphores. }; @@ -238,7 +279,12 @@ template class MVKQueueFullCommandBufferSubmission : public MVKQueueCommandBufferSubmission { public: - MVKQueueFullCommandBufferSubmission(MVKQueue* queue, + MVKQueueFullCommandBufferSubmission(MVKQueue* queue, + const VkSubmitInfo2* pSubmit, + VkFence fence, + MVKCommandUse cmdUse); + + MVKQueueFullCommandBufferSubmission(MVKQueue* queue, const VkSubmitInfo* pSubmit, VkFence fence, MVKCommandUse cmdUse); @@ -246,7 +292,7 @@ public: protected: void submitCommandBuffers() override; - MVKSmallVector _cmdBuffers; + MVKSmallVector _cmdBuffers; }; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm index 401fa8b2..9b4afdf8 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm @@ -92,20 +92,24 @@ VkResult MVKQueue::submit(MVKQueueSubmission* qSubmit) { return rslt; } -VkResult MVKQueue::submit(uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence, MVKCommandUse cmdUse) { +static inline uint32_t getCommandBufferCount(const VkSubmitInfo2* pSubmitInfo) { return pSubmitInfo->commandBufferInfoCount; } +static inline uint32_t getCommandBufferCount(const VkSubmitInfo* pSubmitInfo) { return pSubmitInfo->commandBufferCount; } + +template +VkResult MVKQueue::submit(uint32_t submitCount, const S* pSubmits, VkFence fence, MVKCommandUse cmdUse) { // Fence-only submission if (submitCount == 0 && fence) { - return submit(new MVKQueueCommandBufferSubmission(this, nullptr, fence, cmdUse)); + return submit(new MVKQueueCommandBufferSubmission(this, (S*)nullptr, fence, cmdUse)); } VkResult rslt = VK_SUCCESS; for (uint32_t sIdx = 0; sIdx < submitCount; sIdx++) { VkFence fenceOrNil = (sIdx == (submitCount - 1)) ? fence : VK_NULL_HANDLE; // last one gets the fence - const VkSubmitInfo* pVkSub = &pSubmits[sIdx]; + const S* pVkSub = &pSubmits[sIdx]; MVKQueueCommandBufferSubmission* mvkSub; - uint32_t cbCnt = pVkSub->commandBufferCount; + uint32_t cbCnt = getCommandBufferCount(pVkSub); if (cbCnt <= 1) { mvkSub = new MVKQueueFullCommandBufferSubmission<1>(this, pVkSub, fenceOrNil, cmdUse); } else if (cbCnt <= 16) { @@ -128,6 +132,10 @@ VkResult MVKQueue::submit(uint32_t submitCount, const VkSubmitInfo* pSubmits, Vk return rslt; } +// Concrete implementations of templated MVKQueue::submit(). +template VkResult MVKQueue::submit(uint32_t submitCount, const VkSubmitInfo2* pSubmits, VkFence fence, MVKCommandUse cmdUse); +template VkResult MVKQueue::submit(uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence, MVKCommandUse cmdUse); + VkResult MVKQueue::submit(const VkPresentInfoKHR* pPresentInfo) { return submit(new MVKQueuePresentSurfaceSubmission(this, pPresentInfo)); } @@ -344,23 +352,89 @@ void MVKQueue::destroyExecQueue() { #pragma mark - #pragma mark MVKQueueSubmission +void MVKSemaphoreSubmitInfo::encodeWait(id mtlCmdBuff) { + if (_semaphore) { _semaphore->encodeWait(mtlCmdBuff, value); } +} + +void MVKSemaphoreSubmitInfo::encodeSignal(id mtlCmdBuff) { + if (_semaphore) { _semaphore->encodeSignal(mtlCmdBuff, value); } +} + +MVKSemaphoreSubmitInfo::MVKSemaphoreSubmitInfo(const VkSemaphoreSubmitInfo& semaphoreSubmitInfo) : + _semaphore((MVKSemaphore*)semaphoreSubmitInfo.semaphore), + value(semaphoreSubmitInfo.value), + stageMask(semaphoreSubmitInfo.stageMask), + deviceIndex(semaphoreSubmitInfo.deviceIndex) { + if (_semaphore) { _semaphore->retain(); } +} + +MVKSemaphoreSubmitInfo::MVKSemaphoreSubmitInfo(const VkSemaphore semaphore, + VkPipelineStageFlags stageMask) : + _semaphore((MVKSemaphore*)semaphore), + value(0), + stageMask(stageMask), + deviceIndex(0) { + if (_semaphore) { _semaphore->retain(); } +} + +MVKSemaphoreSubmitInfo::MVKSemaphoreSubmitInfo(const MVKSemaphoreSubmitInfo& other) : + _semaphore(other._semaphore), + value(other.value), + stageMask(other.stageMask), + deviceIndex(other.deviceIndex) { + if (_semaphore) { _semaphore->retain(); } +} + +MVKSemaphoreSubmitInfo& MVKSemaphoreSubmitInfo::operator=(const MVKSemaphoreSubmitInfo& other) { + // Retain new object first in case it's the same object + if (other._semaphore) {other._semaphore->retain(); } + if (_semaphore) { _semaphore->release(); } + _semaphore = other._semaphore; + + value = other.value; + stageMask = other.stageMask; + deviceIndex = other.deviceIndex; + return *this; +} + +MVKSemaphoreSubmitInfo::~MVKSemaphoreSubmitInfo() { + if (_semaphore) { _semaphore->release(); } +} + +MVKCommandBufferSubmitInfo::MVKCommandBufferSubmitInfo(const VkCommandBufferSubmitInfo& commandBufferInfo) : + commandBuffer(MVKCommandBuffer::getMVKCommandBuffer(commandBufferInfo.commandBuffer)), + deviceMask(commandBufferInfo.deviceMask) {} + +MVKCommandBufferSubmitInfo::MVKCommandBufferSubmitInfo(VkCommandBuffer commandBuffer) : + commandBuffer(MVKCommandBuffer::getMVKCommandBuffer(commandBuffer)), + deviceMask(0) {} + +MVKQueueSubmission::MVKQueueSubmission(MVKQueue* queue, + uint32_t waitSemaphoreInfoCount, + const VkSemaphoreSubmitInfo* pWaitSemaphoreSubmitInfos) { + _queue = queue; + _queue->retain(); // Retain here and release in destructor. See note for MVKQueueCommandBufferSubmission::finish(). + + _waitSemaphores.reserve(waitSemaphoreInfoCount); + for (uint32_t i = 0; i < waitSemaphoreInfoCount; i++) { + _waitSemaphores.emplace_back(pWaitSemaphoreSubmitInfos[i]); + } +} + MVKQueueSubmission::MVKQueueSubmission(MVKQueue* queue, uint32_t waitSemaphoreCount, - const VkSemaphore* pWaitSemaphores) { + const VkSemaphore* pWaitSemaphores, + const VkPipelineStageFlags* pWaitDstStageMask) { _queue = queue; _queue->retain(); // Retain here and release in destructor. See note for MVKQueueCommandBufferSubmission::finish(). _waitSemaphores.reserve(waitSemaphoreCount); for (uint32_t i = 0; i < waitSemaphoreCount; i++) { - auto* sem4 = (MVKSemaphore*)pWaitSemaphores[i]; - sem4->retain(); - uint64_t sem4Val = 0; - _waitSemaphores.emplace_back(sem4, sem4Val); + _waitSemaphores.emplace_back(pWaitSemaphores[i], pWaitDstStageMask ? pWaitDstStageMask[i] : 0); } } MVKQueueSubmission::~MVKQueueSubmission() { - for (auto s : _waitSemaphores) { s.first->release(); } _queue->release(); } @@ -373,13 +447,13 @@ VkResult MVKQueueCommandBufferSubmission::execute() { _queue->_submissionCaptureScope->beginScope(); // If using encoded semaphore waiting, do so now. - for (auto& ws : _waitSemaphores) { ws.first->encodeWait(getActiveMTLCommandBuffer(), ws.second); } + for (auto& ws : _waitSemaphores) { ws.encodeWait(getActiveMTLCommandBuffer()); } // Submit each command buffer. submitCommandBuffers(); // If using encoded semaphore signaling, do so now. - for (auto& ss : _signalSemaphores) { ss.first->encodeSignal(getActiveMTLCommandBuffer(), ss.second); } + for (auto& ss : _signalSemaphores) { ss.encodeSignal(getActiveMTLCommandBuffer()); } // Commit the last MTLCommandBuffer. // Nothing after this because callback might destroy this instance before this function ends. @@ -417,7 +491,7 @@ VkResult MVKQueueCommandBufferSubmission::commitActiveMTLCommandBuffer(bool sign // should be more performant when prefilled command buffers aren't used, because we spend time encoding commands // first, thus giving the command buffer signalling these semaphores more time to complete. if ( !_emulatedWaitDone ) { - for (auto& ws : _waitSemaphores) { ws.first->encodeWait(nil, ws.second); } + for (auto& ws : _waitSemaphores) { ws.encodeWait(nil); } _emulatedWaitDone = true; } @@ -466,7 +540,7 @@ void MVKQueueCommandBufferSubmission::finish() { _queue->_submissionCaptureScope->endScope(); // If using inline semaphore signaling, do so now. - for (auto& ss : _signalSemaphores) { ss.first->encodeSignal(nil, ss.second); } + for (auto& ss : _signalSemaphores) { ss.encodeSignal(nil); } // If a fence exists, signal it. if (_fence) { _fence->signal(); } @@ -474,6 +548,31 @@ void MVKQueueCommandBufferSubmission::finish() { this->destroy(); } +// On device loss, the fence and signal semaphores may be signalled early, and they might then +// be destroyed on the waiting thread before this submission is done with them. We therefore +// retain() each here to ensure they live long enough for this submission to finish using them. +MVKQueueCommandBufferSubmission::MVKQueueCommandBufferSubmission(MVKQueue* queue, + const VkSubmitInfo2* pSubmit, + VkFence fence, + MVKCommandUse cmdUse) : + MVKQueueSubmission(queue, + pSubmit ? pSubmit->waitSemaphoreInfoCount : 0, + pSubmit ? pSubmit->pWaitSemaphoreInfos : nullptr), + _fence((MVKFence*)fence), + _commandUse(cmdUse) { + + if (_fence) { _fence->retain(); } + + // pSubmit can be null if just tracking the fence alone + if (pSubmit) { + uint32_t ssCnt = pSubmit->signalSemaphoreInfoCount; + _signalSemaphores.reserve(ssCnt); + for (uint32_t i = 0; i < ssCnt; i++) { + _signalSemaphores.emplace_back(pSubmit->pSignalSemaphoreInfos[i]); + } + } +} + // On device loss, the fence and signal semaphores may be signalled early, and they might then // be destroyed on the waiting thread before this submission is done with them. We therefore // retain() each here to ensure they live long enough for this submission to finish using them. @@ -482,15 +581,24 @@ MVKQueueCommandBufferSubmission::MVKQueueCommandBufferSubmission(MVKQueue* queue VkFence fence, MVKCommandUse cmdUse) : MVKQueueSubmission(queue, - (pSubmit ? pSubmit->waitSemaphoreCount : 0), - (pSubmit ? pSubmit->pWaitSemaphores : nullptr)), + pSubmit ? pSubmit->waitSemaphoreCount : 0, + pSubmit ? pSubmit->pWaitSemaphores : nullptr, + pSubmit ? pSubmit->pWaitDstStageMask : nullptr), - _commandUse(cmdUse), - _emulatedWaitDone(false) { + _fence((MVKFence*)fence), + _commandUse(cmdUse) { + + if (_fence) { _fence->retain(); } // pSubmit can be null if just tracking the fence alone if (pSubmit) { - VkTimelineSemaphoreSubmitInfo* pTimelineSubmit = nullptr; + uint32_t ssCnt = pSubmit->signalSemaphoreCount; + _signalSemaphores.reserve(ssCnt); + for (uint32_t i = 0; i < ssCnt; i++) { + _signalSemaphores.emplace_back(pSubmit->pSignalSemaphores[i], 0); + } + + VkTimelineSemaphoreSubmitInfo* pTimelineSubmit = nullptr; for (const auto* next = (const VkBaseInStructure*)pSubmit->pNext; next; next = next->pNext) { switch (next->sType) { case VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO: @@ -501,31 +609,21 @@ MVKQueueCommandBufferSubmission::MVKQueueCommandBufferSubmission(MVKQueue* queue } } if (pTimelineSubmit) { - // Presentation doesn't support timeline semaphores, so handle wait values here. - uint32_t wsCnt = pTimelineSubmit->waitSemaphoreValueCount; - for (uint32_t i = 0; i < wsCnt; i++) { - _waitSemaphores[i].second = pTimelineSubmit->pWaitSemaphoreValues[i]; + uint32_t wsvCnt = pTimelineSubmit->waitSemaphoreValueCount; + for (uint32_t i = 0; i < wsvCnt; i++) { + _waitSemaphores[i].value = pTimelineSubmit->pWaitSemaphoreValues[i]; } + + uint32_t ssvCnt = pTimelineSubmit->signalSemaphoreValueCount; + for (uint32_t i = 0; i < ssvCnt; i++) { + _signalSemaphores[i].value = pTimelineSubmit->pSignalSemaphoreValues[i]; + } } - uint32_t ssCnt = pSubmit->signalSemaphoreCount; - _signalSemaphores.reserve(ssCnt); - for (uint32_t i = 0; i < ssCnt; i++) { - auto* sem4 = (MVKSemaphore*)pSubmit->pSignalSemaphores[i]; - sem4->retain(); - uint64_t sem4Val = pTimelineSubmit ? pTimelineSubmit->pSignalSemaphoreValues[i] : 0; - _signalSemaphores.emplace_back(sem4, sem4Val); - } } - - _fence = (MVKFence*)fence; - if (_fence) { _fence->retain(); } - - _activeMTLCommandBuffer = nil; } MVKQueueCommandBufferSubmission::~MVKQueueCommandBufferSubmission() { if (_fence) { _fence->release(); } - for (auto s : _signalSemaphores) { s.first->release(); } } @@ -534,11 +632,28 @@ void MVKQueueFullCommandBufferSubmission::submitCommandBuffers() { MVKDevice* mvkDev = getDevice(); uint64_t startTime = mvkDev->getPerformanceTimestamp(); - for (auto& cb : _cmdBuffers) { cb->submit(this, &_encodingContext); } + for (auto& cbInfo : _cmdBuffers) { cbInfo.commandBuffer->submit(this, &_encodingContext); } mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.queue.submitCommandBuffers, startTime); } +template +MVKQueueFullCommandBufferSubmission::MVKQueueFullCommandBufferSubmission(MVKQueue* queue, + const VkSubmitInfo2* pSubmit, + VkFence fence, + MVKCommandUse cmdUse) + : MVKQueueCommandBufferSubmission(queue, pSubmit, fence, cmdUse) { + + if (pSubmit) { + uint32_t cbCnt = pSubmit->commandBufferInfoCount; + _cmdBuffers.reserve(cbCnt); + for (uint32_t i = 0; i < cbCnt; i++) { + _cmdBuffers.emplace_back(pSubmit->pCommandBufferInfos[i]); + setConfigurationResult(_cmdBuffers.back().commandBuffer->getConfigurationResult()); + } + } +} + template MVKQueueFullCommandBufferSubmission::MVKQueueFullCommandBufferSubmission(MVKQueue* queue, const VkSubmitInfo* pSubmit, @@ -550,9 +665,8 @@ MVKQueueFullCommandBufferSubmission::MVKQueueFullCommandBufferSubmission(MVKQ uint32_t cbCnt = pSubmit->commandBufferCount; _cmdBuffers.reserve(cbCnt); for (uint32_t i = 0; i < cbCnt; i++) { - MVKCommandBuffer* cb = MVKCommandBuffer::getMVKCommandBuffer(pSubmit->pCommandBuffers[i]); - _cmdBuffers.push_back(cb); - setConfigurationResult(cb->getConfigurationResult()); + _cmdBuffers.emplace_back(pSubmit->pCommandBuffers[i]); + setConfigurationResult(_cmdBuffers.back().commandBuffer->getConfigurationResult()); } } } @@ -571,9 +685,8 @@ VkResult MVKQueuePresentSurfaceSubmission::execute() { id mtlCmdBuff = _queue->getMTLCommandBuffer(kMVKCommandUseQueuePresent, true); for (auto& ws : _waitSemaphores) { - auto& sem4 = ws.first; - sem4->encodeWait(mtlCmdBuff, 0); // Encoded semaphore waits - sem4->encodeWait(nil, 0); // Inline semaphore waits + ws.encodeWait(mtlCmdBuff); // Encoded semaphore waits + ws.encodeWait(nil); // Inline semaphore waits } for (int i = 0; i < _presentInfo.size(); i++ ) { @@ -612,7 +725,7 @@ void MVKQueuePresentSurfaceSubmission::finish() { MVKQueuePresentSurfaceSubmission::MVKQueuePresentSurfaceSubmission(MVKQueue* queue, const VkPresentInfoKHR* pPresentInfo) - : MVKQueueSubmission(queue, pPresentInfo->waitSemaphoreCount, pPresentInfo->pWaitSemaphores) { + : MVKQueueSubmission(queue, pPresentInfo->waitSemaphoreCount, pPresentInfo->pWaitSemaphores, nullptr) { const VkPresentTimesInfoGOOGLE* pPresentTimesInfo = nullptr; const VkSwapchainPresentFenceInfoEXT* pPresentFenceInfo = nullptr; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.h b/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.h index 534ec018..6cbe2e4e 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.h @@ -265,6 +265,22 @@ protected: #pragma mark - #pragma mark MVKRenderPass +/** Collects together VkSubpassDependency and VkMemoryBarrier2. */ +typedef struct MVKSubpassDependency { + uint32_t srcSubpass; + uint32_t dstSubpass; + VkPipelineStageFlags2 srcStageMask; + VkPipelineStageFlags2 dstStageMask; + VkAccessFlags2 srcAccessMask; + VkAccessFlags2 dstAccessMask; + VkDependencyFlags dependencyFlags; + int32_t viewOffset; + + MVKSubpassDependency(const VkSubpassDependency& spDep, int32_t viewOffset); + MVKSubpassDependency(const VkSubpassDependency2& spDep, const VkMemoryBarrier2* pMemBar); + +} MVKSubpassDependency; + /** Represents a Vulkan render pass. */ class MVKRenderPass : public MVKVulkanAPIDeviceObject { @@ -308,7 +324,7 @@ protected: MVKSmallVector _attachments; MVKSmallVector _subpasses; - MVKSmallVector _subpassDependencies; + MVKSmallVector _subpassDependencies; VkRenderingFlags _renderingFlags = 0; }; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm b/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm index 762d72d9..3bf8a188 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm @@ -904,6 +904,26 @@ MVKAttachmentDescription::MVKAttachmentDescription(MVKRenderPass* renderPass, #pragma mark - #pragma mark MVKRenderPass +MVKSubpassDependency::MVKSubpassDependency(const VkSubpassDependency& spDep, int32_t viewOffset) : + srcSubpass(spDep.srcSubpass), + dstSubpass(spDep.dstSubpass), + srcStageMask(spDep.srcStageMask), + dstStageMask(spDep.dstStageMask), + srcAccessMask(spDep.srcAccessMask), + dstAccessMask(spDep.dstAccessMask), + dependencyFlags(spDep.dependencyFlags), + viewOffset(viewOffset) {} + +MVKSubpassDependency::MVKSubpassDependency(const VkSubpassDependency2& spDep, const VkMemoryBarrier2* pMemBar) : + srcSubpass(spDep.srcSubpass), + dstSubpass(spDep.dstSubpass), + srcStageMask(pMemBar ? pMemBar->srcStageMask : spDep.srcStageMask), + dstStageMask(pMemBar ? pMemBar->dstStageMask : spDep.dstStageMask), + srcAccessMask(pMemBar ? pMemBar->srcAccessMask : spDep.srcAccessMask), + dstAccessMask(pMemBar ? pMemBar->dstAccessMask : spDep.dstAccessMask), + dependencyFlags(spDep.dependencyFlags), + viewOffset(spDep.viewOffset) {} + VkExtent2D MVKRenderPass::getRenderAreaGranularity() { if (_device->_pMetalFeatures->tileBasedDeferredRendering) { // This is the tile area. @@ -954,19 +974,7 @@ MVKRenderPass::MVKRenderPass(MVKDevice* device, } _subpassDependencies.reserve(pCreateInfo->dependencyCount); for (uint32_t i = 0; i < pCreateInfo->dependencyCount; i++) { - VkSubpassDependency2 dependency = { - .sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2, - .pNext = nullptr, - .srcSubpass = pCreateInfo->pDependencies[i].srcSubpass, - .dstSubpass = pCreateInfo->pDependencies[i].dstSubpass, - .srcStageMask = pCreateInfo->pDependencies[i].srcStageMask, - .dstStageMask = pCreateInfo->pDependencies[i].dstStageMask, - .srcAccessMask = pCreateInfo->pDependencies[i].srcAccessMask, - .dstAccessMask = pCreateInfo->pDependencies[i].dstAccessMask, - .dependencyFlags = pCreateInfo->pDependencies[i].dependencyFlags, - .viewOffset = viewOffsets ? viewOffsets[i] : 0, - }; - _subpassDependencies.push_back(dependency); + _subpassDependencies.emplace_back(pCreateInfo->pDependencies[i], viewOffsets ? viewOffsets[i] : 0); } // Link attachments to subpasses @@ -991,7 +999,19 @@ MVKRenderPass::MVKRenderPass(MVKDevice* device, } _subpassDependencies.reserve(pCreateInfo->dependencyCount); for (uint32_t i = 0; i < pCreateInfo->dependencyCount; i++) { - _subpassDependencies.push_back(pCreateInfo->pDependencies[i]); + auto& spDep = pCreateInfo->pDependencies[i]; + + const VkMemoryBarrier2* pMemoryBarrier2 = nullptr; + for (auto* next = (const VkBaseInStructure*)spDep.pNext; next; next = next->pNext) { + switch (next->sType) { + case VK_STRUCTURE_TYPE_MEMORY_BARRIER_2: + pMemoryBarrier2 = (const VkMemoryBarrier2*)next; + break; + default: + break; + } + } + _subpassDependencies.emplace_back(spDep, pMemoryBarrier2); } // Link attachments to subpasses diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKResource.h b/MoltenVK/MoltenVK/GPUObjects/MVKResource.h index a1c3da6b..5b9c47fd 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKResource.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKResource.h @@ -60,9 +60,7 @@ public: } /** Applies the specified global memory barrier. */ - virtual void applyMemoryBarrier(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier, + virtual void applyMemoryBarrier(MVKPipelineBarrier& barrier, MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse) = 0; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.h b/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.h index 87418edd..be4f2545 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.h @@ -43,7 +43,7 @@ using namespace mvk; typedef struct MVKMTLFunction { SPIRVToMSLConversionResultInfo shaderConversionResults; MTLSize threadGroupSize; - inline id getMTLFunction() { return _mtlFunction; } + id getMTLFunction() { return _mtlFunction; } MVKMTLFunction(id mtlFunc, const SPIRVToMSLConversionResultInfo scRslts, MTLSize tgSize); MVKMTLFunction(const MVKMTLFunction& other); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm b/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm index 90831498..8619a0da 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm @@ -36,10 +36,11 @@ MVKMTLFunction::MVKMTLFunction(const MVKMTLFunction& other) { } MVKMTLFunction& MVKMTLFunction::operator=(const MVKMTLFunction& other) { - if (_mtlFunction != other._mtlFunction) { - [_mtlFunction release]; - _mtlFunction = [other._mtlFunction retain]; // retained - } + // Retain new object first in case it's the same object + [other._mtlFunction retain]; + [_mtlFunction release]; + _mtlFunction = other._mtlFunction; + shaderConversionResults = other.shaderConversionResults; threadGroupSize = other.threadGroupSize; return *this; diff --git a/MoltenVK/MoltenVK/Layers/MVKExtensions.def b/MoltenVK/MoltenVK/Layers/MVKExtensions.def index 74a00629..f6ad3447 100644 --- a/MoltenVK/MoltenVK/Layers/MVKExtensions.def +++ b/MoltenVK/MoltenVK/Layers/MVKExtensions.def @@ -91,6 +91,7 @@ MVK_EXTENSION(KHR_storage_buffer_storage_class, KHR_STORAGE_BUFFER_STORAGE MVK_EXTENSION(KHR_surface, KHR_SURFACE, INSTANCE, 10.11, 8.0, 1.0) MVK_EXTENSION(KHR_swapchain, KHR_SWAPCHAIN, DEVICE, 10.11, 8.0, 1.0) MVK_EXTENSION(KHR_swapchain_mutable_format, KHR_SWAPCHAIN_MUTABLE_FORMAT, DEVICE, 10.11, 8.0, 1.0) +MVK_EXTENSION(KHR_synchronization2, KHR_SYNCHRONIZATION_2, DEVICE, 10.11, 8.0, 1.0) MVK_EXTENSION(KHR_timeline_semaphore, KHR_TIMELINE_SEMAPHORE, DEVICE, 10.11, 8.0, 1.0) MVK_EXTENSION(KHR_uniform_buffer_standard_layout, KHR_UNIFORM_BUFFER_STANDARD_LAYOUT, DEVICE, 10.11, 8.0, 1.0) MVK_EXTENSION(KHR_variable_pointers, KHR_VARIABLE_POINTERS, DEVICE, 10.11, 8.0, 1.0) diff --git a/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm b/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm index caa77623..90cb72e0 100644 --- a/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm +++ b/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm @@ -728,40 +728,50 @@ MTLTessellationPartitionMode mvkMTLTessellationPartitionModeFromSpvExecutionMode } } -MVK_PUBLIC_SYMBOL MTLRenderStages mvkMTLRenderStagesFromVkPipelineStageFlags(VkPipelineStageFlags vkStages, +MVK_PUBLIC_SYMBOL MTLRenderStages mvkMTLRenderStagesFromVkPipelineStageFlags(VkPipelineStageFlags2 vkStages, bool placeBarrierBefore) { // Although there are many combined render/compute/host stages in Vulkan, there are only two render // stages in Metal. If the Vulkan stage did not map ONLY to a specific Metal render stage, then if the // barrier is to be placed before the render stages, it should come before the vertex stage, otherwise // if the barrier is to be placed after the render stages, it should come after the fragment stage. if (placeBarrierBefore) { - bool placeBeforeFragment = mvkIsOnlyAnyFlagEnabled(vkStages, (VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | - VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | - VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | - VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | - VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)); + bool placeBeforeFragment = mvkIsOnlyAnyFlagEnabled(vkStages, (VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT | + VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT)); return placeBeforeFragment ? MTLRenderStageFragment : MTLRenderStageVertex; } else { - bool placeAfterVertex = mvkIsOnlyAnyFlagEnabled(vkStages, (VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT | - VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | - VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | - VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | - VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT | - VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT)); + bool placeAfterVertex = mvkIsOnlyAnyFlagEnabled(vkStages, (VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT | + VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT | + VK_PIPELINE_STAGE_2_VERTEX_INPUT_BIT | + VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_2_TESSELLATION_CONTROL_SHADER_BIT | + VK_PIPELINE_STAGE_2_TESSELLATION_EVALUATION_SHADER_BIT)); return placeAfterVertex ? MTLRenderStageVertex : MTLRenderStageFragment; } } -MVK_PUBLIC_SYMBOL MTLBarrierScope mvkMTLBarrierScopeFromVkAccessFlags(VkAccessFlags vkAccess) { +MVK_PUBLIC_SYMBOL MTLBarrierScope mvkMTLBarrierScopeFromVkAccessFlags(VkAccessFlags2 vkAccess) { MTLBarrierScope mtlScope = MTLBarrierScope(0); - if ( mvkIsAnyFlagEnabled(vkAccess, VK_ACCESS_INDIRECT_COMMAND_READ_BIT | VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT) ) { + if ( mvkIsAnyFlagEnabled(vkAccess, (VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT | + VK_ACCESS_2_INDEX_READ_BIT | + VK_ACCESS_2_VERTEX_ATTRIBUTE_READ_BIT | + VK_ACCESS_2_UNIFORM_READ_BIT)) ) { mtlScope |= MTLBarrierScopeBuffers; } - if ( mvkIsAnyFlagEnabled(vkAccess, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT) ) { + if ( mvkIsAnyFlagEnabled(vkAccess, (VK_ACCESS_2_SHADER_READ_BIT | + VK_ACCESS_2_SHADER_WRITE_BIT | + VK_ACCESS_2_MEMORY_READ_BIT | + VK_ACCESS_2_MEMORY_WRITE_BIT)) ) { mtlScope |= MTLBarrierScopeBuffers | MTLBarrierScopeTextures; } #if MVK_MACOS - if ( mvkIsAnyFlagEnabled(vkAccess, VK_ACCESS_INPUT_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT) ) { + if ( mvkIsAnyFlagEnabled(vkAccess, (VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT | + VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_2_MEMORY_READ_BIT | + VK_ACCESS_2_MEMORY_WRITE_BIT)) ) { mtlScope |= MTLBarrierScopeRenderTargets; } #endif diff --git a/MoltenVK/MoltenVK/Vulkan/vulkan.mm b/MoltenVK/MoltenVK/Vulkan/vulkan.mm index 44b0e5f6..d3dcbca9 100644 --- a/MoltenVK/MoltenVK/Vulkan/vulkan.mm +++ b/MoltenVK/MoltenVK/Vulkan/vulkan.mm @@ -2517,8 +2517,8 @@ MVK_PUBLIC_VULKAN_SYMBOL VkResult vkWaitSemaphores( #pragma mark Vulkan 1.3 calls MVK_PUBLIC_VULKAN_SYMBOL void vkCmdBeginRendering( - VkCommandBuffer commandBuffer, - const VkRenderingInfo* pRenderingInfo) { + VkCommandBuffer commandBuffer, + const VkRenderingInfo* pRenderingInfo) { MVKTraceVulkanCallStart(); MVKAddCmdFrom3Thresholds(BeginRendering, pRenderingInfo->colorAttachmentCount, @@ -2527,7 +2527,7 @@ MVK_PUBLIC_VULKAN_SYMBOL void vkCmdBeginRendering( } MVK_PUBLIC_VULKAN_SYMBOL void vkCmdEndRendering( - VkCommandBuffer commandBuffer) { + VkCommandBuffer commandBuffer) { MVKTraceVulkanCallStart(); MVKAddCmd(EndRendering, commandBuffer); @@ -2537,56 +2537,79 @@ MVK_PUBLIC_VULKAN_SYMBOL void vkCmdEndRendering( MVK_PUBLIC_VULKAN_STUB(vkCmdBindVertexBuffers2, void, VkCommandBuffer, uint32_t, uint32_t, const VkBuffer*, const VkDeviceSize*, const VkDeviceSize*, const VkDeviceSize*) MVK_PUBLIC_VULKAN_SYMBOL void vkCmdBlitImage2( - VkCommandBuffer commandBuffer, - const VkBlitImageInfo2* pBlitImageInfo) { - MVKTraceVulkanCallStart(); + VkCommandBuffer commandBuffer, + const VkBlitImageInfo2* pBlitImageInfo) { + + MVKTraceVulkanCallStart(); MVKAddCmdFromThreshold(BlitImage, pBlitImageInfo->regionCount, 1, commandBuffer, pBlitImageInfo); MVKTraceVulkanCallEnd(); } MVK_PUBLIC_VULKAN_SYMBOL void vkCmdCopyBuffer2( - VkCommandBuffer commandBuffer, - const VkCopyBufferInfo2* pCopyBufferInfo) { - MVKTraceVulkanCallStart(); + VkCommandBuffer commandBuffer, + const VkCopyBufferInfo2* pCopyBufferInfo) { + + MVKTraceVulkanCallStart(); MVKAddCmdFromThreshold(CopyBuffer, pCopyBufferInfo->regionCount, 1, commandBuffer, pCopyBufferInfo); MVKTraceVulkanCallEnd(); } MVK_PUBLIC_VULKAN_SYMBOL void vkCmdCopyBufferToImage2( - VkCommandBuffer commandBuffer, - const VkCopyBufferToImageInfo2* pCopyBufferToImageInfo) { - MVKTraceVulkanCallStart(); + VkCommandBuffer commandBuffer, + const VkCopyBufferToImageInfo2* pCopyBufferToImageInfo) { + + MVKTraceVulkanCallStart(); MVKAddCmdFrom3Thresholds(BufferImageCopy, pCopyBufferToImageInfo->regionCount, 1, 4, 8, commandBuffer, pCopyBufferToImageInfo); MVKTraceVulkanCallEnd(); } MVK_PUBLIC_VULKAN_SYMBOL void vkCmdCopyImage2( - VkCommandBuffer commandBuffer, - const VkCopyImageInfo2* pCopyImageInfo) { - MVKTraceVulkanCallStart(); + VkCommandBuffer commandBuffer, + const VkCopyImageInfo2* pCopyImageInfo) { + + MVKTraceVulkanCallStart(); MVKAddCmdFromThreshold(CopyImage, pCopyImageInfo->regionCount, 1, commandBuffer, pCopyImageInfo); MVKTraceVulkanCallEnd(); } MVK_PUBLIC_VULKAN_SYMBOL void vkCmdCopyImageToBuffer2( - VkCommandBuffer commandBuffer, - const VkCopyImageToBufferInfo2* pCopyImageInfo) { - MVKTraceVulkanCallStart(); + VkCommandBuffer commandBuffer, + const VkCopyImageToBufferInfo2* pCopyImageInfo) { + + MVKTraceVulkanCallStart(); MVKAddCmdFrom3Thresholds(BufferImageCopy, pCopyImageInfo->regionCount, 1, 4, 8, commandBuffer, pCopyImageInfo); MVKTraceVulkanCallEnd(); } -MVK_PUBLIC_VULKAN_STUB(vkCmdPipelineBarrier2, void, VkCommandBuffer, const VkDependencyInfo*) -MVK_PUBLIC_VULKAN_STUB(vkCmdResetEvent2, void, VkCommandBuffer, VkEvent, VkPipelineStageFlags2 stageMask) +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdPipelineBarrier2( + VkCommandBuffer commandBuffer, + const VkDependencyInfo* pDependencyInfo) { + + MVKTraceVulkanCallStart(); + uint32_t barrierCount = pDependencyInfo->memoryBarrierCount + pDependencyInfo->bufferMemoryBarrierCount + pDependencyInfo->imageMemoryBarrierCount; + MVKAddCmdFrom2Thresholds(PipelineBarrier, barrierCount, 1, 4, commandBuffer, pDependencyInfo); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdResetEvent2( + VkCommandBuffer commandBuffer, + VkEvent event, + VkPipelineStageFlags2 stageMask) { + + MVKTraceVulkanCallStart(); + MVKAddCmd(ResetEvent, commandBuffer, event, stageMask); + MVKTraceVulkanCallEnd(); +} MVK_PUBLIC_VULKAN_SYMBOL void vkCmdResolveImage2( - VkCommandBuffer commandBuffer, - const VkResolveImageInfo2* pResolveImageInfo) { - MVKTraceVulkanCallStart(); + VkCommandBuffer commandBuffer, + const VkResolveImageInfo2* pResolveImageInfo) { + + MVKTraceVulkanCallStart(); MVKAddCmdFromThreshold(ResolveImage, pResolveImageInfo->regionCount, 1, commandBuffer, pResolveImageInfo); MVKTraceVulkanCallEnd(); @@ -2598,7 +2621,17 @@ MVK_PUBLIC_VULKAN_STUB(vkCmdSetDepthBoundsTestEnable, void, VkCommandBuffer, VkB MVK_PUBLIC_VULKAN_STUB(vkCmdSetDepthCompareOp, void, VkCommandBuffer, VkCompareOp) MVK_PUBLIC_VULKAN_STUB(vkCmdSetDepthTestEnable, void, VkCommandBuffer, VkBool32) MVK_PUBLIC_VULKAN_STUB(vkCmdSetDepthWriteEnable, void, VkCommandBuffer, VkBool32) -MVK_PUBLIC_VULKAN_STUB(vkCmdSetEvent2, void, VkCommandBuffer, VkEvent, const VkDependencyInfo*) + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetEvent2( + VkCommandBuffer commandBuffer, + VkEvent event, + const VkDependencyInfo* pDependencyInfo) { + + MVKTraceVulkanCallStart(); + MVKAddCmd(SetEvent, commandBuffer, event, pDependencyInfo); + MVKTraceVulkanCallEnd(); +} + MVK_PUBLIC_VULKAN_STUB(vkCmdSetFrontFace, void, VkCommandBuffer, VkFrontFace) MVK_PUBLIC_VULKAN_STUB(vkCmdSetPrimitiveRestartEnable, void, VkCommandBuffer, VkBool32) MVK_PUBLIC_VULKAN_STUB(vkCmdSetPrimitiveTopology, void, VkCommandBuffer, VkPrimitiveTopology) @@ -2607,8 +2640,29 @@ MVK_PUBLIC_VULKAN_STUB(vkCmdSetScissorWithCount, void, VkCommandBuffer, uint32_t MVK_PUBLIC_VULKAN_STUB(vkCmdSetStencilOp, void, VkCommandBuffer, VkStencilFaceFlags, VkStencilOp, VkStencilOp, VkStencilOp, VkCompareOp) MVK_PUBLIC_VULKAN_STUB(vkCmdSetStencilTestEnable, void, VkCommandBuffer, VkBool32) MVK_PUBLIC_VULKAN_STUB(vkCmdSetViewportWithCount, void, VkCommandBuffer, uint32_t, const VkViewport*) -MVK_PUBLIC_VULKAN_STUB(vkCmdWaitEvents2, void, VkCommandBuffer, uint32_t, const VkEvent*, const VkDependencyInfo*) -MVK_PUBLIC_VULKAN_STUB(vkCmdWriteTimestamp2, void, VkCommandBuffer, VkPipelineStageFlags2, VkQueryPool, uint32_t) + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdWaitEvents2( + VkCommandBuffer commandBuffer, + uint32_t eventCount, + const VkEvent* pEvents, + const VkDependencyInfo* pDependencyInfos) { + + MVKTraceVulkanCallStart(); + MVKAddCmdFromThreshold(WaitEvents, eventCount, 1, commandBuffer, eventCount, pEvents, pDependencyInfos); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdWriteTimestamp2( + VkCommandBuffer commandBuffer, + VkPipelineStageFlags2 stage, + VkQueryPool queryPool, + uint32_t query) { + + MVKTraceVulkanCallStart(); + MVKAddCmd(WriteTimestamp, commandBuffer, stage, queryPool, query); + MVKTraceVulkanCallEnd(); +} + MVK_PUBLIC_VULKAN_STUB_VKRESULT(vkCreatePrivateDataSlot, VkDevice, const VkPrivateDataSlotCreateInfo*, const VkAllocationCallbacks*, VkPrivateDataSlot*) MVK_PUBLIC_VULKAN_STUB(vkDestroyPrivateDataSlot, void, VkDevice, VkPrivateDataSlot, const VkAllocationCallbacks*) MVK_PUBLIC_VULKAN_STUB(vkGetDeviceBufferMemoryRequirements, void, VkDevice, const VkDeviceBufferMemoryRequirements*, VkMemoryRequirements2*) @@ -2616,7 +2670,20 @@ MVK_PUBLIC_VULKAN_STUB(vkGetDeviceImageMemoryRequirements, void, VkDevice, const MVK_PUBLIC_VULKAN_STUB(vkGetDeviceImageSparseMemoryRequirements, void, VkDevice, const VkDeviceImageMemoryRequirements*, uint32_t*, VkSparseImageMemoryRequirements2*) MVK_PUBLIC_VULKAN_STUB_VKRESULT(vkGetPhysicalDeviceToolProperties, VkPhysicalDevice, uint32_t*, VkPhysicalDeviceToolProperties*) MVK_PUBLIC_VULKAN_STUB(vkGetPrivateData, void, VkDevice, VkObjectType, uint64_t, VkPrivateDataSlot, uint64_t*) -MVK_PUBLIC_VULKAN_STUB_VKRESULT(vkQueueSubmit2, VkQueue, uint32_t, const VkSubmitInfo2*, VkFence) + +MVK_PUBLIC_VULKAN_SYMBOL VkResult vkQueueSubmit2( + VkQueue queue, + uint32_t submitCount, + const VkSubmitInfo2* pSubmits, + VkFence fence) { + + MVKTraceVulkanCallStart(); + MVKQueue* mvkQ = MVKQueue::getMVKQueue(queue); + VkResult rslt = mvkQ->submit(submitCount, pSubmits, fence, kMVKCommandUseQueueSubmit); + MVKTraceVulkanCallEnd(); + return rslt; +} + MVK_PUBLIC_VULKAN_STUB_VKRESULT(vkSetPrivateData, VkDevice, VkObjectType, uint64_t, VkPrivateDataSlot, uint64_t) #pragma mark - @@ -3102,6 +3169,17 @@ MVK_PUBLIC_VULKAN_SYMBOL VkResult vkGetPhysicalDeviceSurfaceFormats2KHR( } +#pragma mark - +#pragma mark VK_KHR_synchronization2 + +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdPipelineBarrier2, KHR); +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdResetEvent2, KHR); +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdSetEvent2, KHR); +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdWaitEvents2, KHR); +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdWriteTimestamp2, KHR); +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkQueueSubmit2, KHR); + + #pragma mark - #pragma mark VK_KHR_timeline_semaphore From 2c3dc6415a57900b33953798259e0552f45ef964 Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Tue, 19 Sep 2023 16:29:30 -0400 Subject: [PATCH 12/41] Promote VK_EXT_private_data functions to Vulkan 1.3, and remove stubs. --- MoltenVK/MoltenVK/Vulkan/vulkan.mm | 109 +++++++++++++++-------------- 1 file changed, 56 insertions(+), 53 deletions(-) diff --git a/MoltenVK/MoltenVK/Vulkan/vulkan.mm b/MoltenVK/MoltenVK/Vulkan/vulkan.mm index d3dcbca9..cfa133fb 100644 --- a/MoltenVK/MoltenVK/Vulkan/vulkan.mm +++ b/MoltenVK/MoltenVK/Vulkan/vulkan.mm @@ -2663,13 +2663,47 @@ MVK_PUBLIC_VULKAN_SYMBOL void vkCmdWriteTimestamp2( MVKTraceVulkanCallEnd(); } -MVK_PUBLIC_VULKAN_STUB_VKRESULT(vkCreatePrivateDataSlot, VkDevice, const VkPrivateDataSlotCreateInfo*, const VkAllocationCallbacks*, VkPrivateDataSlot*) -MVK_PUBLIC_VULKAN_STUB(vkDestroyPrivateDataSlot, void, VkDevice, VkPrivateDataSlot, const VkAllocationCallbacks*) +MVK_PUBLIC_VULKAN_SYMBOL VkResult vkCreatePrivateDataSlot( + VkDevice device, + const VkPrivateDataSlotCreateInfoEXT* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkPrivateDataSlotEXT* pPrivateDataSlot) { + + MVKTraceVulkanCallStart(); + MVKDevice* mvkDev = MVKDevice::getMVKDevice(device); + VkResult rslt = mvkDev->createPrivateDataSlot(pCreateInfo, pAllocator, pPrivateDataSlot); + MVKTraceVulkanCallEnd(); + return rslt; +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkDestroyPrivateDataSlot( + VkDevice device, + VkPrivateDataSlotEXT privateDataSlot, + const VkAllocationCallbacks* pAllocator) { + + MVKTraceVulkanCallStart(); + MVKDevice* mvkDev = MVKDevice::getMVKDevice(device); + mvkDev->destroyPrivateDataSlot(privateDataSlot, pAllocator); + MVKTraceVulkanCallEnd(); +} + MVK_PUBLIC_VULKAN_STUB(vkGetDeviceBufferMemoryRequirements, void, VkDevice, const VkDeviceBufferMemoryRequirements*, VkMemoryRequirements2*) MVK_PUBLIC_VULKAN_STUB(vkGetDeviceImageMemoryRequirements, void, VkDevice, const VkDeviceImageMemoryRequirements*, VkMemoryRequirements2*) MVK_PUBLIC_VULKAN_STUB(vkGetDeviceImageSparseMemoryRequirements, void, VkDevice, const VkDeviceImageMemoryRequirements*, uint32_t*, VkSparseImageMemoryRequirements2*) MVK_PUBLIC_VULKAN_STUB_VKRESULT(vkGetPhysicalDeviceToolProperties, VkPhysicalDevice, uint32_t*, VkPhysicalDeviceToolProperties*) -MVK_PUBLIC_VULKAN_STUB(vkGetPrivateData, void, VkDevice, VkObjectType, uint64_t, VkPrivateDataSlot, uint64_t*) + +MVK_PUBLIC_VULKAN_SYMBOL void vkGetPrivateData( + VkDevice device, + VkObjectType objectType, + uint64_t objectHandle, + VkPrivateDataSlotEXT privateDataSlot, + uint64_t* pData) { + + MVKTraceVulkanCallStart(); + MVKPrivateDataSlot* mvkPDS = (MVKPrivateDataSlot*)privateDataSlot; + *pData = mvkPDS->getData(objectType, objectHandle); + MVKTraceVulkanCallEnd(); +} MVK_PUBLIC_VULKAN_SYMBOL VkResult vkQueueSubmit2( VkQueue queue, @@ -2684,7 +2718,21 @@ MVK_PUBLIC_VULKAN_SYMBOL VkResult vkQueueSubmit2( return rslt; } -MVK_PUBLIC_VULKAN_STUB_VKRESULT(vkSetPrivateData, VkDevice, VkObjectType, uint64_t, VkPrivateDataSlot, uint64_t) +MVK_PUBLIC_VULKAN_SYMBOL VkResult vkSetPrivateData( + VkDevice device, + VkObjectType objectType, + uint64_t objectHandle, + VkPrivateDataSlotEXT privateDataSlot, + uint64_t data) { + + MVKTraceVulkanCallStart(); + MVKPrivateDataSlot* mvkPDS = (MVKPrivateDataSlot*)privateDataSlot; + mvkPDS->setData(objectType, objectHandle, data); + MVKTraceVulkanCallEnd(); + return VK_SUCCESS; +} + + #pragma mark - #pragma mark VK_KHR_bind_memory2 extension @@ -3511,56 +3559,11 @@ MVK_PUBLIC_VULKAN_SYMBOL void vkExportMetalObjectsEXT( #pragma mark - #pragma mark VK_EXT_private_data extension -MVK_PUBLIC_VULKAN_SYMBOL VkResult vkCreatePrivateDataSlotEXT( - VkDevice device, - const VkPrivateDataSlotCreateInfoEXT* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkPrivateDataSlotEXT* pPrivateDataSlot) { +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCreatePrivateDataSlot, EXT); +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkDestroyPrivateDataSlot, EXT); +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkGetPrivateData, EXT); +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkSetPrivateData, EXT); - MVKTraceVulkanCallStart(); - MVKDevice* mvkDev = MVKDevice::getMVKDevice(device); - VkResult rslt = mvkDev->createPrivateDataSlot(pCreateInfo, pAllocator, pPrivateDataSlot); - MVKTraceVulkanCallEnd(); - return rslt; -} - -MVK_PUBLIC_VULKAN_SYMBOL void vkDestroyPrivateDataSlotEXT( - VkDevice device, - VkPrivateDataSlotEXT privateDataSlot, - const VkAllocationCallbacks* pAllocator) { - - MVKTraceVulkanCallStart(); - MVKDevice* mvkDev = MVKDevice::getMVKDevice(device); - mvkDev->destroyPrivateDataSlot(privateDataSlot, pAllocator); - MVKTraceVulkanCallEnd(); -} - -MVK_PUBLIC_VULKAN_SYMBOL VkResult vkSetPrivateDataEXT( - VkDevice device, - VkObjectType objectType, - uint64_t objectHandle, - VkPrivateDataSlotEXT privateDataSlot, - uint64_t data) { - - MVKTraceVulkanCallStart(); - MVKPrivateDataSlot* mvkPDS = (MVKPrivateDataSlot*)privateDataSlot; - mvkPDS->setData(objectType, objectHandle, data); - MVKTraceVulkanCallEnd(); - return VK_SUCCESS; -} - -MVK_PUBLIC_VULKAN_SYMBOL void vkGetPrivateDataEXT( - VkDevice device, - VkObjectType objectType, - uint64_t objectHandle, - VkPrivateDataSlotEXT privateDataSlot, - uint64_t* pData) { - - MVKTraceVulkanCallStart(); - MVKPrivateDataSlot* mvkPDS = (MVKPrivateDataSlot*)privateDataSlot; - *pData = mvkPDS->getData(objectType, objectHandle); - MVKTraceVulkanCallEnd(); -} #pragma mark - #pragma mark VK_EXT_sample_locations extension From 5a216ab1f86122339a8d3b0e4bf3ed3f8802245b Mon Sep 17 00:00:00 2001 From: Evan Tang Date: Tue, 13 Jun 2023 11:41:36 -0500 Subject: [PATCH 13/41] Refcounting cleanup - Use relaxed atomics where possible - Calling operator= on a refcounted object should not reinitialize the refcount --- MoltenVK/MoltenVK/Utility/MVKBaseObject.h | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/MoltenVK/MoltenVK/Utility/MVKBaseObject.h b/MoltenVK/MoltenVK/Utility/MVKBaseObject.h index d2fcb9e8..dd156b3e 100644 --- a/MoltenVK/MoltenVK/Utility/MVKBaseObject.h +++ b/MoltenVK/MoltenVK/Utility/MVKBaseObject.h @@ -143,7 +143,7 @@ public: * Called when this instance has been retained as a reference by another object, * indicating that this instance will not be deleted until that reference is released. */ - void retain() { _refCount++; } + void retain() { _refCount.fetch_add(1, std::memory_order_relaxed); } /** * Called when this instance has been released as a reference from another object. @@ -154,7 +154,7 @@ public: * Note that the destroy() function is called on the BaseClass. * Releasing will not call any overridden destroy() function in a descendant class. */ - void release() { if (--_refCount == 0) { BaseClass::destroy(); } } + void release() { if (_refCount.fetch_sub(1, std::memory_order_acq_rel) == 1) { BaseClass::destroy(); } } /** * Marks this instance as destroyed. If all previous references to this instance @@ -166,15 +166,10 @@ public: MVKReferenceCountingMixin() : _refCount(1) {} /** Copy starts with fresh reference counts. */ - MVKReferenceCountingMixin(const MVKReferenceCountingMixin& other) { - _refCount = 1; - } + MVKReferenceCountingMixin(const MVKReferenceCountingMixin& other) : _refCount(1) {} - /** Copy starts with fresh reference counts. */ - MVKReferenceCountingMixin& operator=(const MVKReferenceCountingMixin& other) { - _refCount = 1; - return *this; - } + /** Don't overwrite refcounted objects. */ + MVKReferenceCountingMixin& operator=(const MVKReferenceCountingMixin& other) = delete; protected: std::atomic _refCount; From aeae18d48b362e133718c2a1a3f4099d4b17d230 Mon Sep 17 00:00:00 2001 From: Evan Tang Date: Tue, 13 Jun 2023 12:00:31 -0500 Subject: [PATCH 14/41] Remove getBaseObject Nothing used it, and you should always be able to `static_cast` for any object without a crazy inheritance tree --- MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h | 1 - .../MoltenVK/Commands/MVKMTLBufferAllocation.h | 1 - MoltenVK/MoltenVK/GPUObjects/MVKDevice.h | 14 +------------- MoltenVK/MoltenVK/GPUObjects/MVKQueue.h | 1 - MoltenVK/MoltenVK/Utility/MVKBaseObject.h | 2 +- 5 files changed, 2 insertions(+), 17 deletions(-) diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h index 59242aff..8f8b2c0b 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h +++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h @@ -182,7 +182,6 @@ protected: friend class MVKCommandEncoder; friend class MVKCommandPool; - MVKBaseObject* getBaseObject() override { return this; }; void propagateDebugName() override {} void init(const VkCommandBufferAllocateInfo* pAllocateInfo); bool canExecute(); diff --git a/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.h b/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.h index 474a0a16..2be98144 100644 --- a/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.h +++ b/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.h @@ -99,7 +99,6 @@ public: protected: friend class MVKMTLBufferAllocation; - MVKBaseObject* getBaseObject() override { return this; }; MVKMTLBufferAllocation* newObject() override; void returnAllocationUnlocked(MVKMTLBufferAllocation* ba); void returnAllocation(MVKMTLBufferAllocation* ba); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h index b6b462ad..125bf9aa 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h @@ -959,13 +959,9 @@ public: bool isUsingPipelineStageMetalArgumentBuffers() { return isUsingMetalArgumentBuffers() && !_device->_pMetalFeatures->descriptorSetArgumentBuffers; }; /** Constructs an instance for the specified device. */ - MVKDeviceTrackingMixin(MVKDevice* device) : _device(device) { assert(_device); } - - virtual ~MVKDeviceTrackingMixin() {} + MVKDeviceTrackingMixin(MVKDevice* device) : _device(device) { assert(_device); } protected: - virtual MVKBaseObject* getBaseObject() = 0; - MVKDevice* _device; }; @@ -980,9 +976,6 @@ public: /** Constructs an instance for the specified device. */ MVKBaseDeviceObject(MVKDevice* device) : MVKDeviceTrackingMixin(device) {} - -protected: - MVKBaseObject* getBaseObject() override { return this; }; }; @@ -999,10 +992,6 @@ public: /** Constructs an instance for the specified device. */ MVKVulkanAPIDeviceObject(MVKDevice* device) : MVKDeviceTrackingMixin(device) {} - -protected: - MVKBaseObject* getBaseObject() override { return this; }; - }; @@ -1055,7 +1044,6 @@ public: protected: T* newObject() override { return new T(_device); } - MVKBaseObject* getBaseObject() override { return this; }; }; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h index b4509f0b..086410e8 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h @@ -135,7 +135,6 @@ protected: friend class MVKQueueCommandBufferSubmission; friend class MVKQueuePresentSurfaceSubmission; - MVKBaseObject* getBaseObject() override { return this; }; void propagateDebugName() override; void initName(); void initExecQueue(); diff --git a/MoltenVK/MoltenVK/Utility/MVKBaseObject.h b/MoltenVK/MoltenVK/Utility/MVKBaseObject.h index dd156b3e..41942802 100644 --- a/MoltenVK/MoltenVK/Utility/MVKBaseObject.h +++ b/MoltenVK/MoltenVK/Utility/MVKBaseObject.h @@ -105,7 +105,7 @@ public: /** Destroys this object. Default behaviour simply deletes it. Subclasses may override to delay deletion. */ virtual void destroy() { delete this; } - virtual ~MVKBaseObject() {} + virtual ~MVKBaseObject() {} protected: static VkResult reportResult(MVKBaseObject* mvkObj, VkResult vkRslt, MVKConfigLogLevel logLevel, const char* format, va_list args) __printflike(4, 0); From 89195dc7254c56c50c7cc64a17b6a0a12ff075d6 Mon Sep 17 00:00:00 2001 From: Evan Tang Date: Tue, 13 Jun 2023 12:13:21 -0500 Subject: [PATCH 15/41] Remove count from mvkStringsAreEqual It doesn't do anything, and we don't want anyone to think it does something --- MoltenVK/MoltenVK/Utility/MVKFoundation.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MoltenVK/MoltenVK/Utility/MVKFoundation.h b/MoltenVK/MoltenVK/Utility/MVKFoundation.h index 1097afb5..16c7fd2e 100644 --- a/MoltenVK/MoltenVK/Utility/MVKFoundation.h +++ b/MoltenVK/MoltenVK/Utility/MVKFoundation.h @@ -598,7 +598,7 @@ bool mvkAreEqual(const T* pV1, const T* pV2, size_t count = 1) { * which works on individual chars or char arrays, not strings. * Returns false if either string is null. */ -static constexpr bool mvkStringsAreEqual(const char* pV1, const char* pV2, size_t count = 1) { +static constexpr bool mvkStringsAreEqual(const char* pV1, const char* pV2) { return pV1 && pV2 && (pV1 == pV2 || strcmp(pV1, pV2) == 0); } From 4ba3f335b4f73627e94645361bc35efce222dad2 Mon Sep 17 00:00:00 2001 From: Evan Tang Date: Tue, 13 Jun 2023 15:22:44 -0500 Subject: [PATCH 16/41] MVKArrayRef cleanup Make everything constexpr, remove direct access to members --- .../MoltenVK/Commands/MVKCommandBuffer.mm | 6 ++-- .../Commands/MVKCommandEncoderState.mm | 28 +++++++++--------- MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm | 2 +- MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm | 2 +- MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm | 2 +- MoltenVK/MoltenVK/Utility/MVKFoundation.h | 29 ++++++++++--------- MoltenVK/MoltenVK/Vulkan/vulkan.mm | 2 +- 7 files changed, 37 insertions(+), 34 deletions(-) diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm index 72dde4f1..e92a57c1 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm @@ -472,8 +472,8 @@ void MVKCommandEncoder::beginRenderpass(MVKCommand* passCmd, _attachments.assign(attachments.begin(), attachments.end()); // Copy the sample positions array of arrays, one array of sample positions for each subpass index. - _subpassSamplePositions.resize(subpassSamplePositions.size); - for (uint32_t spSPIdx = 0; spSPIdx < subpassSamplePositions.size; spSPIdx++) { + _subpassSamplePositions.resize(subpassSamplePositions.size()); + for (uint32_t spSPIdx = 0; spSPIdx < subpassSamplePositions.size(); spSPIdx++) { _subpassSamplePositions[spSPIdx].assign(subpassSamplePositions[spSPIdx].begin(), subpassSamplePositions[spSPIdx].end()); } @@ -593,7 +593,7 @@ void MVKCommandEncoder::beginMetalRenderPass(MVKCommandUse cmdUse) { // and Metal will default to using default sample postions. if (_pDeviceMetalFeatures->programmableSamplePositions) { auto cstmSampPosns = getCustomSamplePositions(); - [mtlRPDesc setSamplePositions: cstmSampPosns.data count: cstmSampPosns.size]; + [mtlRPDesc setSamplePositions: cstmSampPosns.data() count: cstmSampPosns.size()]; } _mtlRenderEncoder = [_mtlCmdBuffer renderCommandEncoderWithDescriptor: mtlRPDesc]; diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm index 044dd96e..2817343d 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm @@ -58,7 +58,7 @@ void MVKViewportCommandEncoderState::setViewports(const MVKArrayRef uint32_t firstViewport, bool isSettingDynamically) { - size_t vpCnt = viewports.size; + size_t vpCnt = viewports.size(); uint32_t maxViewports = getDevice()->_pProperties->limits.maxViewports; if ((firstViewport + vpCnt > maxViewports) || (firstViewport >= maxViewports) || @@ -111,7 +111,7 @@ void MVKScissorCommandEncoderState::setScissors(const MVKArrayRef scis uint32_t firstScissor, bool isSettingDynamically) { - size_t sCnt = scissors.size; + size_t sCnt = scissors.size(); uint32_t maxScissors = getDevice()->_pProperties->limits.maxViewports; if ((firstScissor + sCnt > maxScissors) || (firstScissor >= maxScissors) || @@ -165,7 +165,7 @@ void MVKPushConstantsCommandEncoderState:: setPushConstants(uint32_t offset, MVK // Typically any MSL struct that contains a float4 will also have a size that is rounded up to a multiple of a float4 size. // Ensure that we pass along enough content to cover this extra space even if it is never actually accessed by the shader. size_t pcSizeAlign = getDevice()->_pMetalFeatures->pushConstantSizeAlignment; - size_t pcSize = pushConstants.size; + size_t pcSize = pushConstants.size(); size_t pcBuffSize = mvkAlignByteCount(offset + pcSize, pcSizeAlign); mvkEnsureSize(_pushConstants, pcBuffSize); copy(pushConstants.begin(), pushConstants.end(), _pushConstants.begin() + offset); @@ -488,7 +488,7 @@ void MVKResourcesCommandEncoderState::bindDescriptorSet(uint32_t descSetIndex, // Update dynamic buffer offsets uint32_t baseDynOfstIdx = dslMTLRezIdxOffsets.getMetalResourceIndexes().dynamicOffsetBufferIndex; uint32_t doCnt = descSet->getDynamicOffsetDescriptorCount(); - for (uint32_t doIdx = 0; doIdx < doCnt && dynamicOffsetIndex < dynamicOffsets.size; doIdx++) { + for (uint32_t doIdx = 0; doIdx < doCnt && dynamicOffsetIndex < dynamicOffsets.size(); doIdx++) { updateImplicitBuffer(_dynamicOffsets, baseDynOfstIdx + doIdx, dynamicOffsets[dynamicOffsetIndex++]); } @@ -797,8 +797,8 @@ void MVKGraphicsResourcesCommandEncoderState::encodeImpl(uint32_t stage) { }, [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, const MVKArrayRef s)->void { cmdEncoder->setComputeBytes(cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl), - s.data, - s.size * sizeof(uint32_t), + s.data(), + s.byteSize(), b.index); }, [](MVKCommandEncoder* cmdEncoder, MVKMTLTextureBinding& b)->void { @@ -848,8 +848,8 @@ void MVKGraphicsResourcesCommandEncoderState::encodeImpl(uint32_t stage) { }, [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, const MVKArrayRef s)->void { cmdEncoder->setVertexBytes(cmdEncoder->_mtlRenderEncoder, - s.data, - s.size * sizeof(uint32_t), + s.data(), + s.byteSize(), b.index); }, [](MVKCommandEncoder* cmdEncoder, MVKMTLTextureBinding& b)->void { @@ -881,8 +881,8 @@ void MVKGraphicsResourcesCommandEncoderState::encodeImpl(uint32_t stage) { }, [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, const MVKArrayRef s)->void { cmdEncoder->setComputeBytes(cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl), - s.data, - s.size * sizeof(uint32_t), + s.data(), + s.byteSize(), b.index); }, [](MVKCommandEncoder* cmdEncoder, MVKMTLTextureBinding& b)->void { @@ -914,8 +914,8 @@ void MVKGraphicsResourcesCommandEncoderState::encodeImpl(uint32_t stage) { }, [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, const MVKArrayRef s)->void { cmdEncoder->setVertexBytes(cmdEncoder->_mtlRenderEncoder, - s.data, - s.size * sizeof(uint32_t), + s.data(), + s.byteSize(), b.index); }, [](MVKCommandEncoder* cmdEncoder, MVKMTLTextureBinding& b)->void { @@ -947,8 +947,8 @@ void MVKGraphicsResourcesCommandEncoderState::encodeImpl(uint32_t stage) { }, [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, const MVKArrayRef s)->void { cmdEncoder->setFragmentBytes(cmdEncoder->_mtlRenderEncoder, - s.data, - s.size * sizeof(uint32_t), + s.data(), + s.byteSize(), b.index); }, [](MVKCommandEncoder* cmdEncoder, MVKMTLTextureBinding& b)->void { diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm index a3f02ea8..ac83d697 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm @@ -729,7 +729,7 @@ void MVKBufferDescriptor::bind(MVKCommandEncoder* cmdEncoder, MVKArrayRef dynamicOffsets, uint32_t& dynamicOffsetIndex) { MVKMTLBufferBinding bb; - NSUInteger bufferDynamicOffset = (usesDynamicBufferOffsets() && dynamicOffsets.size > dynamicOffsetIndex + NSUInteger bufferDynamicOffset = (usesDynamicBufferOffsets() && dynamicOffsets.size() > dynamicOffsetIndex ? dynamicOffsets[dynamicOffsetIndex++] : 0); if (_mvkBuffer) { bb.mtlBuffer = _mvkBuffer->getMTLBuffer(); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm index acd50514..20bad33e 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm @@ -1530,7 +1530,7 @@ MVKArrayRef MVKPhysicalDevice::getQueueFamilies() { VkResult MVKPhysicalDevice::getQueueFamilyProperties(uint32_t* pCount, VkQueueFamilyProperties* pQueueFamilyProperties) { auto qFams = getQueueFamilies(); - uint32_t qfCnt = uint32_t(qFams.size); + uint32_t qfCnt = uint32_t(qFams.size()); // If properties aren't actually being requested yet, simply update the returned count if ( !pQueueFamilyProperties ) { diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm index e3da96b3..779eb75a 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm @@ -49,7 +49,7 @@ void MVKPipelineLayout::bindDescriptorSets(MVKCommandEncoder* cmdEncoder, MVKArrayRef dynamicOffsets) { if (!cmdEncoder) { clearConfigurationResult(); } uint32_t dynamicOffsetIndex = 0; - size_t dsCnt = descriptorSets.size; + size_t dsCnt = descriptorSets.size(); for (uint32_t dsIdx = 0; dsIdx < dsCnt; dsIdx++) { MVKDescriptorSet* descSet = descriptorSets[dsIdx]; uint32_t dslIdx = firstSet + dsIdx; diff --git a/MoltenVK/MoltenVK/Utility/MVKFoundation.h b/MoltenVK/MoltenVK/Utility/MVKFoundation.h index 16c7fd2e..8ea9f658 100644 --- a/MoltenVK/MoltenVK/Utility/MVKFoundation.h +++ b/MoltenVK/MoltenVK/Utility/MVKFoundation.h @@ -478,20 +478,23 @@ std::size_t mvkHash(const N* pVals, std::size_t count = 1, std::size_t seed = 53 */ template struct MVKArrayRef { - Type* data; - const size_t size; +public: + constexpr const Type* begin() const { return _data; } + constexpr const Type* end() const { return &_data[_size]; } + constexpr const Type* data() const { return _data; } + constexpr Type* begin() { return _data; } + constexpr Type* end() { return &_data[_size]; } + constexpr Type* data() { return _data; } + constexpr const size_t size() const { return _size; } + constexpr const size_t byteSize() const { return _size * sizeof(Type); } + constexpr const Type& operator[]( const size_t i ) const { return _data[i]; } + constexpr Type& operator[]( const size_t i ) { return _data[i]; } + constexpr MVKArrayRef() : MVKArrayRef(nullptr, 0) {} + constexpr MVKArrayRef(Type* d, size_t s) : _data(d), _size(s) {} - const Type* begin() const { return data; } - const Type* end() const { return &data[size]; } - const Type& operator[]( const size_t i ) const { return data[i]; } - Type& operator[]( const size_t i ) { return data[i]; } - MVKArrayRef& operator=(const MVKArrayRef& other) { - data = other.data; - *(size_t*)&size = other.size; - return *this; - } - MVKArrayRef() : MVKArrayRef(nullptr, 0) {} - MVKArrayRef(Type* d, size_t s) : data(d), size(s) {} +protected: + Type* _data; + size_t _size; }; /** Ensures the size of the specified container is at least the specified size. */ diff --git a/MoltenVK/MoltenVK/Vulkan/vulkan.mm b/MoltenVK/MoltenVK/Vulkan/vulkan.mm index cfa133fb..c44dd7d1 100644 --- a/MoltenVK/MoltenVK/Vulkan/vulkan.mm +++ b/MoltenVK/MoltenVK/Vulkan/vulkan.mm @@ -1964,7 +1964,7 @@ static void mvkCmdBeginRenderPass( MVKAddCmdFrom5Thresholds(BeginRenderPass, pRenderPassBegin->clearValueCount, 1, 2, - attachments.size, 0, 1, 2, + attachments.size(), 0, 1, 2, commandBuffer, pRenderPassBegin, pSubpassBeginInfo, From 27f4f6a6a017cb7e9ddad72f7fb7b87f85707788 Mon Sep 17 00:00:00 2001 From: Evan Tang Date: Thu, 15 Jun 2023 13:24:58 -0500 Subject: [PATCH 17/41] Use MVKArrayRef, not const MVKArrayRef It's very easy to accidentally un-const a `const MVKArrayRef`, since ArrayRefs are meant to be passed by value --- MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h | 2 +- MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm | 2 +- .../MoltenVK/Commands/MVKCommandEncoderState.h | 8 ++++---- .../Commands/MVKCommandEncoderState.mm | 18 +++++++++--------- MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.h | 4 ++-- MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm | 4 ++-- MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.h | 10 +++++----- MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm | 10 +++++----- MoltenVK/MoltenVK/Utility/MVKFoundation.h | 18 ++++++++---------- MoltenVK/MoltenVK/Utility/MVKSmallVector.h | 8 ++++---- 10 files changed, 41 insertions(+), 43 deletions(-) diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h index 8f8b2c0b..07b4c202 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h +++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h @@ -143,7 +143,7 @@ public: bool _needsVisibilityResultMTLBuffer; /** Called when a MVKCmdExecuteCommands is added to this command buffer. */ - void recordExecuteCommands(const MVKArrayRef secondaryCommandBuffers); + void recordExecuteCommands(MVKArrayRef secondaryCommandBuffers); /** Called when a timestamp command is added. */ void recordTimestampCommand(); diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm index e92a57c1..5f32996e 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm @@ -310,7 +310,7 @@ MVKCommandBuffer::~MVKCommandBuffer() { } // Promote the initial visibility buffer and indication of timestamp use from the secondary buffers. -void MVKCommandBuffer::recordExecuteCommands(const MVKArrayRef secondaryCommandBuffers) { +void MVKCommandBuffer::recordExecuteCommands(MVKArrayRef secondaryCommandBuffers) { for (MVKCommandBuffer* cmdBuff : secondaryCommandBuffers) { if (cmdBuff->_needsVisibilityResultMTLBuffer) { _needsVisibilityResultMTLBuffer = true; } if (cmdBuff->_hasStageCounterTimestampCommand) { _hasStageCounterTimestampCommand = true; } diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h index 06152dd7..6dbeb647 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h +++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h @@ -143,7 +143,7 @@ public: * The isSettingDynamically indicates that the scissor is being changed dynamically, * which is only allowed if the pipeline was created as VK_DYNAMIC_STATE_SCISSOR. */ - void setViewports(const MVKArrayRef viewports, + void setViewports(MVKArrayRef viewports, uint32_t firstViewport, bool isSettingDynamically); @@ -171,7 +171,7 @@ public: * The isSettingDynamically indicates that the scissor is being changed dynamically, * which is only allowed if the pipeline was created as VK_DYNAMIC_STATE_SCISSOR. */ - void setScissors(const MVKArrayRef scissors, + void setScissors(MVKArrayRef scissors, uint32_t firstScissor, bool isSettingDynamically); @@ -457,7 +457,7 @@ protected: contents[index] = value; } - void assertMissingSwizzles(bool needsSwizzle, const char* stageName, const MVKArrayRef texBindings); + void assertMissingSwizzles(bool needsSwizzle, const char* stageName, MVKArrayRef texBindings); void encodeMetalArgumentBuffer(MVKShaderStage stage); virtual void bindMetalArgumentBuffer(MVKShaderStage stage, MVKMTLBufferBinding& buffBind) = 0; @@ -547,7 +547,7 @@ public: const char* pStageName, bool fullImageViewSwizzle, std::function bindBuffer, - std::function)> bindImplicitBuffer, + std::function)> bindImplicitBuffer, std::function bindTexture, std::function bindSampler); diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm index 2817343d..37f0194f 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm @@ -54,7 +54,7 @@ void MVKPipelineCommandEncoderState::encodeImpl(uint32_t stage) { #pragma mark - #pragma mark MVKViewportCommandEncoderState -void MVKViewportCommandEncoderState::setViewports(const MVKArrayRef viewports, +void MVKViewportCommandEncoderState::setViewports(MVKArrayRef viewports, uint32_t firstViewport, bool isSettingDynamically) { @@ -107,7 +107,7 @@ void MVKViewportCommandEncoderState::encodeImpl(uint32_t stage) { #pragma mark - #pragma mark MVKScissorCommandEncoderState -void MVKScissorCommandEncoderState::setScissors(const MVKArrayRef scissors, +void MVKScissorCommandEncoderState::setScissors(MVKArrayRef scissors, uint32_t firstScissor, bool isSettingDynamically) { @@ -594,7 +594,7 @@ void MVKResourcesCommandEncoderState::markDirty() { } // If a swizzle is needed for this stage, iterates all the bindings and logs errors for those that need texture swizzling. -void MVKResourcesCommandEncoderState::assertMissingSwizzles(bool needsSwizzle, const char* stageName, const MVKArrayRef texBindings) { +void MVKResourcesCommandEncoderState::assertMissingSwizzles(bool needsSwizzle, const char* stageName, MVKArrayRef texBindings) { if (needsSwizzle) { for (auto& tb : texBindings) { VkComponentMapping vkcm = mvkUnpackSwizzle(tb.swizzle); @@ -684,7 +684,7 @@ void MVKGraphicsResourcesCommandEncoderState::encodeBindings(MVKShaderStage stag const char* pStageName, bool fullImageViewSwizzle, std::function bindBuffer, - std::function)> bindImplicitBuffer, + std::function)> bindImplicitBuffer, std::function bindTexture, std::function bindSampler) { @@ -795,7 +795,7 @@ void MVKGraphicsResourcesCommandEncoderState::encodeImpl(uint32_t stage) { offset: b.offset atIndex: b.index]; }, - [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, const MVKArrayRef s)->void { + [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, MVKArrayRef s)->void { cmdEncoder->setComputeBytes(cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl), s.data(), s.byteSize(), @@ -846,7 +846,7 @@ void MVKGraphicsResourcesCommandEncoderState::encodeImpl(uint32_t stage) { b.isDirty = true; // We haven't written it out, so leave dirty until next time. } }, - [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, const MVKArrayRef s)->void { + [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, MVKArrayRef s)->void { cmdEncoder->setVertexBytes(cmdEncoder->_mtlRenderEncoder, s.data(), s.byteSize(), @@ -879,7 +879,7 @@ void MVKGraphicsResourcesCommandEncoderState::encodeImpl(uint32_t stage) { offset: b.offset atIndex: b.index]; }, - [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, const MVKArrayRef s)->void { + [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, MVKArrayRef s)->void { cmdEncoder->setComputeBytes(cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl), s.data(), s.byteSize(), @@ -912,7 +912,7 @@ void MVKGraphicsResourcesCommandEncoderState::encodeImpl(uint32_t stage) { offset: b.offset atIndex: b.index]; }, - [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, const MVKArrayRef s)->void { + [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, MVKArrayRef s)->void { cmdEncoder->setVertexBytes(cmdEncoder->_mtlRenderEncoder, s.data(), s.byteSize(), @@ -945,7 +945,7 @@ void MVKGraphicsResourcesCommandEncoderState::encodeImpl(uint32_t stage) { offset: b.offset atIndex: b.index]; }, - [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, const MVKArrayRef s)->void { + [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, MVKArrayRef s)->void { cmdEncoder->setFragmentBytes(cmdEncoder->_mtlRenderEncoder, s.data(), s.byteSize(), diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.h b/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.h index 80c3a357..5f44a95f 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.h @@ -56,7 +56,7 @@ public: virtual void endQuery(uint32_t query, MVKCommandEncoder* cmdEncoder); /** Finishes the specified queries and marks them as available. */ - virtual void finishQueries(const MVKArrayRef queries); + virtual void finishQueries(MVKArrayRef queries); /** Resets the results and availability status of the specified queries. */ virtual void resetResults(uint32_t firstQuery, uint32_t queryCount, MVKCommandEncoder* cmdEncoder); @@ -212,7 +212,7 @@ class MVKTimestampQueryPool : public MVKGPUCounterQueryPool { public: void endQuery(uint32_t query, MVKCommandEncoder* cmdEncoder) override; - void finishQueries(const MVKArrayRef queries) override; + void finishQueries(MVKArrayRef queries) override; #pragma mark Construction diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm b/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm index 2e0e1368..1bd0a6d0 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm @@ -52,7 +52,7 @@ void MVKQueryPool::endQuery(uint32_t query, MVKCommandEncoder* cmdEncoder) { } // Mark queries as available -void MVKQueryPool::finishQueries(const MVKArrayRef queries) { +void MVKQueryPool::finishQueries(MVKArrayRef queries) { lock_guard lock(_availabilityLock); for (uint32_t qry : queries) { if (_availability[qry] == DeviceAvailable) { @@ -379,7 +379,7 @@ void MVKTimestampQueryPool::endQuery(uint32_t query, MVKCommandEncoder* cmdEncod } // If not using MTLCounterSampleBuffer, update timestamp values, then mark queries as available -void MVKTimestampQueryPool::finishQueries(const MVKArrayRef queries) { +void MVKTimestampQueryPool::finishQueries(MVKArrayRef queries) { if ( !_mtlCounterBuffer ) { uint64_t ts = mvkGetTimestamp(); for (uint32_t qry : queries) { _timestamps[qry] = ts; } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.h b/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.h index 6cbe2e4e..cb9c8e44 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.h @@ -116,8 +116,8 @@ public: void populateMTLRenderPassDescriptor(MTLRenderPassDescriptor* mtlRPDesc, uint32_t passIdx, MVKFramebuffer* framebuffer, - const MVKArrayRef attachments, - const MVKArrayRef clearValues, + MVKArrayRef attachments, + MVKArrayRef clearValues, bool isRenderingEntireAttachment, bool loadOverride = false); @@ -126,7 +126,7 @@ public: * when the render area is smaller than the full framebuffer size. */ void populateClearAttachments(MVKClearAttachments& clearAtts, - const MVKArrayRef clearValues); + MVKArrayRef clearValues); /** * Populates the specified vector with VkClearRects for clearing views of a specified multiview @@ -140,11 +140,11 @@ public: /** If a render encoder is active, sets the store actions for all attachments to it. */ void encodeStoreActions(MVKCommandEncoder* cmdEncoder, bool isRenderingEntireAttachment, - const MVKArrayRef attachments, + MVKArrayRef attachments, bool storeOverride = false); /** Resolves any resolve attachments that cannot be handled by native Metal subpass resolve behavior. */ - void resolveUnresolvableAttachments(MVKCommandEncoder* cmdEncoder, const MVKArrayRef attachments); + void resolveUnresolvableAttachments(MVKCommandEncoder* cmdEncoder, MVKArrayRef attachments); MVKRenderSubpass(MVKRenderPass* renderPass, const VkSubpassDescription* pCreateInfo, const VkRenderPassInputAttachmentAspectCreateInfo* pInputAspects, diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm b/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm index 3bf8a188..a742690d 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm @@ -138,8 +138,8 @@ uint32_t MVKRenderSubpass::getViewCountUpToMetalPass(uint32_t passIdx) const { void MVKRenderSubpass::populateMTLRenderPassDescriptor(MTLRenderPassDescriptor* mtlRPDesc, uint32_t passIdx, MVKFramebuffer* framebuffer, - const MVKArrayRef attachments, - const MVKArrayRef clearValues, + MVKArrayRef attachments, + MVKArrayRef clearValues, bool isRenderingEntireAttachment, bool loadOverride) { MVKPixelFormats* pixFmts = _renderPass->getPixelFormats(); @@ -279,7 +279,7 @@ void MVKRenderSubpass::populateMTLRenderPassDescriptor(MTLRenderPassDescriptor* void MVKRenderSubpass::encodeStoreActions(MVKCommandEncoder* cmdEncoder, bool isRenderingEntireAttachment, - const MVKArrayRef attachments, + MVKArrayRef attachments, bool storeOverride) { if (!cmdEncoder->_mtlRenderEncoder) { return; } if (!_renderPass->getDevice()->_pMetalFeatures->deferredStoreActions) { return; } @@ -308,7 +308,7 @@ void MVKRenderSubpass::encodeStoreActions(MVKCommandEncoder* cmdEncoder, } void MVKRenderSubpass::populateClearAttachments(MVKClearAttachments& clearAtts, - const MVKArrayRef clearValues) { + MVKArrayRef clearValues) { uint32_t caCnt = getColorAttachmentCount(); for (uint32_t caIdx = 0; caIdx < caCnt; caIdx++) { uint32_t attIdx = _colorAttachments[caIdx].attachment; @@ -394,7 +394,7 @@ MVKMTLFmtCaps MVKRenderSubpass::getRequiredFormatCapabilitiesForAttachmentAt(uin return caps; } -void MVKRenderSubpass::resolveUnresolvableAttachments(MVKCommandEncoder* cmdEncoder, const MVKArrayRef attachments) { +void MVKRenderSubpass::resolveUnresolvableAttachments(MVKCommandEncoder* cmdEncoder, MVKArrayRef attachments) { MVKPixelFormats* pixFmts = cmdEncoder->getPixelFormats(); size_t raCnt = _resolveAttachments.size(); for (uint32_t raIdx = 0; raIdx < raCnt; raIdx++) { diff --git a/MoltenVK/MoltenVK/Utility/MVKFoundation.h b/MoltenVK/MoltenVK/Utility/MVKFoundation.h index 8ea9f658..d26b53a4 100644 --- a/MoltenVK/MoltenVK/Utility/MVKFoundation.h +++ b/MoltenVK/MoltenVK/Utility/MVKFoundation.h @@ -479,18 +479,16 @@ std::size_t mvkHash(const N* pVals, std::size_t count = 1, std::size_t seed = 53 template struct MVKArrayRef { public: - constexpr const Type* begin() const { return _data; } - constexpr const Type* end() const { return &_data[_size]; } - constexpr const Type* data() const { return _data; } - constexpr Type* begin() { return _data; } - constexpr Type* end() { return &_data[_size]; } - constexpr Type* data() { return _data; } - constexpr const size_t size() const { return _size; } - constexpr const size_t byteSize() const { return _size * sizeof(Type); } - constexpr const Type& operator[]( const size_t i ) const { return _data[i]; } - constexpr Type& operator[]( const size_t i ) { return _data[i]; } + constexpr Type* begin() const { return _data; } + constexpr Type* end() const { return &_data[_size]; } + constexpr Type* data() const { return _data; } + constexpr size_t size() const { return _size; } + constexpr size_t byteSize() const { return _size * sizeof(Type); } + constexpr Type& operator[]( const size_t i ) const { return _data[i]; } constexpr MVKArrayRef() : MVKArrayRef(nullptr, 0) {} constexpr MVKArrayRef(Type* d, size_t s) : _data(d), _size(s) {} + template , bool> = true> + constexpr MVKArrayRef(MVKArrayRef other) : _data(other.data()), _size(other.size()) {} protected: Type* _data; diff --git a/MoltenVK/MoltenVK/Utility/MVKSmallVector.h b/MoltenVK/MoltenVK/Utility/MVKSmallVector.h index 6294f913..b6e1277c 100755 --- a/MoltenVK/MoltenVK/Utility/MVKSmallVector.h +++ b/MoltenVK/MoltenVK/Utility/MVKSmallVector.h @@ -298,12 +298,12 @@ public: reverse_iterator rbegin() const { return reverse_iterator( end() ); } reverse_iterator rend() const { return reverse_iterator( begin() ); } - const MVKArrayRef contents() const { return MVKArrayRef(data(), size()); } - MVKArrayRef contents() { return MVKArrayRef(data(), size()); } + MVKArrayRef contents() const { return MVKArrayRef(data(), size()); } + MVKArrayRef< Type> contents() { return MVKArrayRef< Type>(data(), size()); } - const Type &operator[]( const size_t i ) const { return alc[i]; } + const Type &operator[]( const size_t i ) const { return alc[i]; } Type &operator[]( const size_t i ) { return alc[i]; } - const Type &at( const size_t i ) const { return alc[i]; } + const Type &at( const size_t i ) const { return alc[i]; } Type &at( const size_t i ) { return alc[i]; } const Type &front() const { return alc[0]; } Type &front() { return alc[0]; } From 3c75e114dd04636628cc212e947e420ed5415fca Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Thu, 5 Oct 2023 17:33:01 -0400 Subject: [PATCH 18/41] Add support for VK_EXT_extended_dynamic_state extension. - Add MVKRasterizingCommandEncoderState to consolidate handling of static and dynamic rasterizing states in a consistent manner. - Rework MVKDepthStencilCommandEncoderState to consolidate handling of static and dynamic depth states in a consistent manner. - MVKMTLDepthStencilDescriptorData clean up content setting, and struct layout. - Add MVKRenderStateType to enumerate render state types. - Add MVKRenderStateFlags to track binary info about states (enabled, dirty, etc). - Add MVKMTLBufferBinding::stride. - Add MVKPhysicalDeviceMetalFeatures::dynamicVertexStride. - Set MVKPhysicalDeviceMetalFeatures::vertexStrideAlignment to 1 for Apple5+ GPUs (unrelated). - Set VkPhysicalDeviceLimits::maxVertexInputBindingStride to unlimited for Apple2+ GPUs (unrelated). - Add mvkVkRect2DFromMTLScissorRect() and simplify mvkMTLViewportFromVkViewport() and mvkMTLScissorRectFromVkRect2D(). - MVKFoundation: - Add mvkEnableAllFlags() and mvkDisableAllFlags(). - Improve performance of mvkClear(), mvkCopy() & mvkAreEqual() when content is a single simple primitive type (unrelated). - Declare more functions as static constexpr (unrelated). --- Docs/MoltenVK_Runtime_UserGuide.md | 1 + MoltenVK/MoltenVK/API/mvk_datatypes.h | 7 +- MoltenVK/MoltenVK/API/mvk_private_api.h | 1 + MoltenVK/MoltenVK/Commands/MVKCmdDraw.h | 6 +- MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm | 23 +- MoltenVK/MoltenVK/Commands/MVKCmdRenderPass.h | 160 ++++- .../MoltenVK/Commands/MVKCmdRenderPass.mm | 162 +++-- MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm | 5 +- MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h | 38 +- .../MoltenVK/Commands/MVKCommandBuffer.mm | 115 ++-- .../Commands/MVKCommandEncoderState.h | 249 ++++--- .../Commands/MVKCommandEncoderState.mm | 626 ++++++++++-------- .../Commands/MVKCommandResourceFactory.h | 51 +- .../Commands/MVKCommandResourceFactory.mm | 15 +- .../MoltenVK/Commands/MVKCommandTypePools.def | 7 + .../Commands/MVKMTLResourceBindings.h | 7 +- MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm | 12 +- .../GPUObjects/MVKDeviceFeatureStructs.def | 1 + MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm | 6 +- MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h | 54 +- MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm | 161 +++-- MoltenVK/MoltenVK/Utility/MVKFoundation.h | 101 +-- MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm | 41 +- MoltenVK/MoltenVK/Vulkan/vulkan.mm | 18 +- 24 files changed, 1095 insertions(+), 772 deletions(-) diff --git a/Docs/MoltenVK_Runtime_UserGuide.md b/Docs/MoltenVK_Runtime_UserGuide.md index 57773c27..27ed0c2e 100644 --- a/Docs/MoltenVK_Runtime_UserGuide.md +++ b/Docs/MoltenVK_Runtime_UserGuide.md @@ -363,6 +363,7 @@ In addition to core *Vulkan* functionality, **MoltenVK** also supports the foll - `VK_EXT_descriptor_indexing` *(initial release limited to Metal Tier 1: 96/128 textures, 16 samplers, except macOS 11.0 (Big Sur) or later, or on older versions of macOS using an Intel GPU, and if Metal argument buffers enabled in config)* +- `VK_EXT_extended_dynamic_state` *(requires Metal 3.1)* - `VK_EXT_external_memory_host` - `VK_EXT_fragment_shader_interlock` *(requires Metal 2.0 and Raster Order Groups)* - `VK_EXT_host_query_reset` diff --git a/MoltenVK/MoltenVK/API/mvk_datatypes.h b/MoltenVK/MoltenVK/API/mvk_datatypes.h index 8e5670c9..4bbed323 100644 --- a/MoltenVK/MoltenVK/API/mvk_datatypes.h +++ b/MoltenVK/MoltenVK/API/mvk_datatypes.h @@ -378,10 +378,13 @@ MTLMultisampleStencilResolveFilter mvkMTLMultisampleStencilResolveFilterFromVkRe #endif /** Returns the Metal MTLViewport corresponding to the specified Vulkan VkViewport. */ -MTLViewport mvkMTLViewportFromVkViewport(VkViewport vkViewport); +MTLViewport mvkMTLViewportFromVkViewport(const VkViewport vkViewport); /** Returns the Metal MTLScissorRect corresponding to the specified Vulkan VkRect2D. */ -MTLScissorRect mvkMTLScissorRectFromVkRect2D(VkRect2D vkRect); +MTLScissorRect mvkMTLScissorRectFromVkRect2D(const VkRect2D vkRect); + +/** Returns the Vulkan VkRect2D corresponding to the specified Metal MTLScissorRect. */ +VkRect2D mvkVkRect2DFromMTLScissorRect(const MTLScissorRect mtlScissorRect); /** Returns the Metal MTLCompareFunction corresponding to the specified Vulkan VkCompareOp, */ MTLCompareFunction mvkMTLCompareFunctionFromVkCompareOp(VkCompareOp vkOp); diff --git a/MoltenVK/MoltenVK/API/mvk_private_api.h b/MoltenVK/MoltenVK/API/mvk_private_api.h index 8ed5b754..af6c3ffc 100644 --- a/MoltenVK/MoltenVK/API/mvk_private_api.h +++ b/MoltenVK/MoltenVK/API/mvk_private_api.h @@ -151,6 +151,7 @@ typedef struct { MTLArgumentBuffersTier argumentBuffersTier; /**< The argument buffer tier available on this device, as a Metal enumeration. */ VkBool32 needsSampleDrefLodArrayWorkaround; /**< If true, sampling from arrayed depth images with explicit LoD is broken and needs a workaround. */ VkDeviceSize hostMemoryPageSize; /**< The size of a page of host memory on this platform. */ + VkBool32 dynamicVertexStride; /**< If true, VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE is supported. */ } MVKPhysicalDeviceMetalFeatures; /** diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdDraw.h b/MoltenVK/MoltenVK/Commands/MVKCmdDraw.h index c1e4a693..2b9696ab 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdDraw.h +++ b/MoltenVK/MoltenVK/Commands/MVKCmdDraw.h @@ -37,10 +37,12 @@ class MVKCmdBindVertexBuffers : public MVKCommand { public: VkResult setContent(MVKCommandBuffer* cmdBuff, - uint32_t startBinding, + uint32_t firstBinding, uint32_t bindingCount, const VkBuffer* pBuffers, - const VkDeviceSize* pOffsets); + const VkDeviceSize* pOffsets, + const VkDeviceSize* pSizes, + const VkDeviceSize* pStrides); void encode(MVKCommandEncoder* cmdEncoder) override; diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm b/MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm index 20d27815..a1b71512 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm @@ -30,20 +30,23 @@ template VkResult MVKCmdBindVertexBuffers::setContent(MVKCommandBuffer* cmdBuff, - uint32_t startBinding, + uint32_t firstBinding, uint32_t bindingCount, const VkBuffer* pBuffers, - const VkDeviceSize* pOffsets) { - + const VkDeviceSize* pOffsets, + const VkDeviceSize* pSizes, + const VkDeviceSize* pStrides) { MVKDevice* mvkDvc = cmdBuff->getDevice(); _bindings.clear(); // Clear for reuse _bindings.reserve(bindingCount); MVKMTLBufferBinding b; for (uint32_t bindIdx = 0; bindIdx < bindingCount; bindIdx++) { MVKBuffer* mvkBuffer = (MVKBuffer*)pBuffers[bindIdx]; - b.index = mvkDvc->getMetalBufferIndexForVertexAttributeBinding(startBinding + bindIdx); + b.index = mvkDvc->getMetalBufferIndexForVertexAttributeBinding(firstBinding + bindIdx); b.mtlBuffer = mvkBuffer->getMTLBuffer(); b.offset = mvkBuffer->getMTLBufferOffset() + pOffsets[bindIdx]; + b.size = pSizes ? (uint32_t)pSizes[bindIdx] : 0; + b.stride = pStrides ? (uint32_t)pStrides[bindIdx] : 0; _bindings.push_back(b); } @@ -296,13 +299,13 @@ void MVKCmdDraw::encode(MVKCommandEncoder* cmdEncoder) { uint32_t instanceCount = _instanceCount * viewCount; cmdEncoder->_graphicsResourcesState.offsetZeroDivisorVertexBuffers(stage, pipeline, _firstInstance); if (cmdEncoder->_pDeviceMetalFeatures->baseVertexInstanceDrawing) { - [cmdEncoder->_mtlRenderEncoder drawPrimitives: cmdEncoder->_mtlPrimitiveType + [cmdEncoder->_mtlRenderEncoder drawPrimitives: cmdEncoder->_rasterizingState.getPrimitiveType() vertexStart: _firstVertex vertexCount: _vertexCount instanceCount: instanceCount baseInstance: _firstInstance]; } else { - [cmdEncoder->_mtlRenderEncoder drawPrimitives: cmdEncoder->_mtlPrimitiveType + [cmdEncoder->_mtlRenderEncoder drawPrimitives: cmdEncoder->_rasterizingState.getPrimitiveType() vertexStart: _firstVertex vertexCount: _vertexCount instanceCount: instanceCount]; @@ -530,7 +533,7 @@ void MVKCmdDrawIndexed::encode(MVKCommandEncoder* cmdEncoder) { uint32_t instanceCount = _instanceCount * viewCount; cmdEncoder->_graphicsResourcesState.offsetZeroDivisorVertexBuffers(stage, pipeline, _firstInstance); if (cmdEncoder->_pDeviceMetalFeatures->baseVertexInstanceDrawing) { - [cmdEncoder->_mtlRenderEncoder drawIndexedPrimitives: cmdEncoder->_mtlPrimitiveType + [cmdEncoder->_mtlRenderEncoder drawIndexedPrimitives: cmdEncoder->_rasterizingState.getPrimitiveType() indexCount: _indexCount indexType: (MTLIndexType)ibb.mtlIndexType indexBuffer: ibb.mtlBuffer @@ -539,7 +542,7 @@ void MVKCmdDrawIndexed::encode(MVKCommandEncoder* cmdEncoder) { baseVertex: _vertexOffset baseInstance: _firstInstance]; } else { - [cmdEncoder->_mtlRenderEncoder drawIndexedPrimitives: cmdEncoder->_mtlPrimitiveType + [cmdEncoder->_mtlRenderEncoder drawIndexedPrimitives: cmdEncoder->_rasterizingState.getPrimitiveType() indexCount: _indexCount indexType: (MTLIndexType)ibb.mtlIndexType indexBuffer: ibb.mtlBuffer @@ -925,7 +928,7 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) { cmdEncoder->_graphicsResourcesState.beginMetalRenderPass(); cmdEncoder->getPushConstants(VK_SHADER_STAGE_VERTEX_BIT)->beginMetalRenderPass(); } else { - [cmdEncoder->_mtlRenderEncoder drawPrimitives: cmdEncoder->_mtlPrimitiveType + [cmdEncoder->_mtlRenderEncoder drawPrimitives: cmdEncoder->_rasterizingState.getPrimitiveType() indirectBuffer: mtlIndBuff indirectBufferOffset: mtlIndBuffOfst]; mtlIndBuffOfst += needsInstanceAdjustment ? sizeof(MTLDrawPrimitivesIndirectArguments) : _mtlIndirectBufferStride; @@ -1312,7 +1315,7 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder, const MVKI cmdEncoder->getPushConstants(VK_SHADER_STAGE_VERTEX_BIT)->beginMetalRenderPass(); } else { cmdEncoder->_graphicsResourcesState.offsetZeroDivisorVertexBuffers(stage, pipeline, _directCmdFirstInstance); - [cmdEncoder->_mtlRenderEncoder drawIndexedPrimitives: cmdEncoder->_mtlPrimitiveType + [cmdEncoder->_mtlRenderEncoder drawIndexedPrimitives: cmdEncoder->_rasterizingState.getPrimitiveType() indexType: (MTLIndexType)ibb.mtlIndexType indexBuffer: ibb.mtlBuffer indexBufferOffset: ibb.offset diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdRenderPass.h b/MoltenVK/MoltenVK/Commands/MVKCmdRenderPass.h index a74657b2..1e3bae5c 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdRenderPass.h +++ b/MoltenVK/MoltenVK/Commands/MVKCmdRenderPass.h @@ -352,10 +352,64 @@ public: protected: MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; - float _red; - float _green; - float _blue; - float _alpha; + float _blendConstants[4] = {}; +}; + + +#pragma mark - +#pragma mark MVKCmdSetDepthTestEnable + +/** Vulkan command to dynamically enable depth testing. */ +class MVKCmdSetDepthTestEnable : public MVKCommand { + +public: + VkResult setContent(MVKCommandBuffer* cmdBuff, + VkBool32 depthTestEnable); + + void encode(MVKCommandEncoder* cmdEncoder) override; + +protected: + MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; + + VkBool32 _depthTestEnable; +}; + + +#pragma mark - +#pragma mark MVKCmdSetDepthWriteEnable + +/** Vulkan command to dynamically enable depth writing. */ +class MVKCmdSetDepthWriteEnable : public MVKCommand { + +public: + VkResult setContent(MVKCommandBuffer* cmdBuff, + VkBool32 depthWriteEnable); + + void encode(MVKCommandEncoder* cmdEncoder) override; + +protected: + MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; + + VkBool32 _depthWriteEnable; +}; + + +#pragma mark - +#pragma mark MVKCmdSetDepthCompareOp + +/** Vulkan command to dynamically set the depth compare operation. */ +class MVKCmdSetDepthCompareOp : public MVKCommand { + +public: + VkResult setContent(MVKCommandBuffer* cmdBuff, + VkCompareOp depthCompareOp); + + void encode(MVKCommandEncoder* cmdEncoder) override; + +protected: + MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; + + VkCompareOp _depthCompareOp; }; @@ -380,6 +434,71 @@ protected: }; +#pragma mark - +#pragma mark MVKCmdSetDepthBoundsTestEnable + +/** Vulkan command to enable depth bounds testing. */ +class MVKCmdSetDepthBoundsTestEnable : public MVKCommand { + +public: + VkResult setContent(MVKCommandBuffer* cmdBuff, + VkBool32 depthBoundsTestEnable); + + void encode(MVKCommandEncoder* cmdEncoder) override; + +protected: + MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; + + bool _depthBoundsTestEnable; +}; + + +#pragma mark - +#pragma mark MVKCmdSetStencilTestEnable + +/** Vulkan command to dynamically enable stencil testing. */ +class MVKCmdSetStencilTestEnable : public MVKCommand { + +public: + VkResult setContent(MVKCommandBuffer* cmdBuff, + VkBool32 stencilTestEnable); + + void encode(MVKCommandEncoder* cmdEncoder) override; + +protected: + MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; + + VkBool32 _stencilTestEnable; +}; + + +#pragma mark - +#pragma mark MVKCmdSetStencilOp + +/** Vulkan command to dynamically set the stencil operations. */ +class MVKCmdSetStencilOp : public MVKCommand { + +public: + VkResult setContent(MVKCommandBuffer* cmdBuff, + VkStencilFaceFlags faceMask, + VkStencilOp failOp, + VkStencilOp passOp, + VkStencilOp depthFailOp, + VkCompareOp compareOp); + + void encode(MVKCommandEncoder* cmdEncoder) override; + +protected: + MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; + + VkStencilFaceFlags _faceMask; + VkStencilOp _failOp; + VkStencilOp _passOp; + VkStencilOp _depthFailOp; + VkCompareOp _compareOp; +}; + + #pragma mark - #pragma mark MVKCmdSetStencilCompareMask @@ -446,10 +565,7 @@ protected: #pragma mark - #pragma mark MVKCmdSetCullMode -/** - * Vulkan command to dynamically set the cull mode. Originally from VK_EXT_extended_dynamic_state, - * but also part of Vulkan 1.3. - */ +/** Vulkan command to dynamically set the cull mode. */ class MVKCmdSetCullMode : public MVKCommand { public: @@ -461,17 +577,14 @@ public: protected: MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; - MTLCullMode _cullMode; + VkCullModeFlags _cullMode; }; #pragma mark - #pragma mark MVKCmdSetFrontFace -/** - * Vulkan command to dynamically set the front facing winding order. Originally from - * VK_EXT_extended_dynamic_state, but also part of Vulkan 1.3. - */ +/** Vulkan command to dynamically set the front facing winding order. */ class MVKCmdSetFrontFace : public MVKCommand { public: @@ -483,6 +596,25 @@ public: protected: MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; - MTLWinding _frontFace; + VkFrontFace _frontFace; +}; + + +#pragma mark - +#pragma mark MVKCmdSetPrimitiveTopology + +/** Vulkan command to dynamically set the primitive topology. */ +class MVKCmdSetPrimitiveTopology : public MVKCommand { + +public: + VkResult setContent(MVKCommandBuffer* cmdBuff, + VkPrimitiveTopology primitiveTopology); + + void encode(MVKCommandEncoder* cmdEncoder) override; + +protected: + MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; + + VkPrimitiveTopology _primitiveTopology; }; diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdRenderPass.mm b/MoltenVK/MoltenVK/Commands/MVKCmdRenderPass.mm index fbb3292c..b5befc3a 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdRenderPass.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCmdRenderPass.mm @@ -278,7 +278,7 @@ VkResult MVKCmdSetViewport::setContent(MVKCommandBuffer* cmdBuff, template void MVKCmdSetViewport::encode(MVKCommandEncoder* cmdEncoder) { - cmdEncoder->_viewportState.setViewports(_viewports.contents(), _firstViewport, true); + cmdEncoder->_rasterizingState.setViewports(_viewports.contents(), _firstViewport, true); } template class MVKCmdSetViewport<1>; @@ -305,7 +305,7 @@ VkResult MVKCmdSetScissor::setContent(MVKCommandBuffer* cmdBuff, template void MVKCmdSetScissor::encode(MVKCommandEncoder* cmdEncoder) { - cmdEncoder->_scissorState.setScissors(_scissors.contents(), _firstScissor, true); + cmdEncoder->_rasterizingState.setScissors(_scissors.contents(), _firstScissor, true); } template class MVKCmdSetScissor<1>; @@ -345,9 +345,9 @@ VkResult MVKCmdSetDepthBias::setContent(MVKCommandBuffer* cmdBuff, } void MVKCmdSetDepthBias::encode(MVKCommandEncoder* cmdEncoder) { - cmdEncoder->_depthBiasState.setDepthBias(_depthBiasConstantFactor, - _depthBiasSlopeFactor, - _depthBiasClamp); + cmdEncoder->_rasterizingState.setDepthBias(_depthBiasConstantFactor, + _depthBiasSlopeFactor, + _depthBiasClamp); } @@ -356,16 +356,54 @@ void MVKCmdSetDepthBias::encode(MVKCommandEncoder* cmdEncoder) { VkResult MVKCmdSetBlendConstants::setContent(MVKCommandBuffer* cmdBuff, const float blendConst[4]) { - _red = blendConst[0]; - _green = blendConst[1]; - _blue = blendConst[2]; - _alpha = blendConst[3]; - + mvkCopy(_blendConstants, blendConst, 4); return VK_SUCCESS; } void MVKCmdSetBlendConstants::encode(MVKCommandEncoder* cmdEncoder) { - cmdEncoder->_blendColorState.setBlendColor(_red, _green, _blue, _alpha, true); + cmdEncoder->_rasterizingState.setBlendConstants(_blendConstants, true); +} + + +#pragma mark - +#pragma mark MVKCmdSetDepthTestEnable + +VkResult MVKCmdSetDepthTestEnable::setContent(MVKCommandBuffer* cmdBuff, + VkBool32 depthTestEnable) { + _depthTestEnable = depthTestEnable; + return VK_SUCCESS; +} + +void MVKCmdSetDepthTestEnable::encode(MVKCommandEncoder* cmdEncoder) { + cmdEncoder->_depthStencilState.setDepthTestEnable(_depthTestEnable); +} + + +#pragma mark - +#pragma mark MVKCmdSetDepthWriteEnable + +VkResult MVKCmdSetDepthWriteEnable::setContent(MVKCommandBuffer* cmdBuff, + VkBool32 depthWriteEnable) { + _depthWriteEnable = depthWriteEnable; + return VK_SUCCESS; +} + +void MVKCmdSetDepthWriteEnable::encode(MVKCommandEncoder* cmdEncoder) { + cmdEncoder->_depthStencilState.setDepthWriteEnable(_depthWriteEnable); +} + + +#pragma mark - +#pragma mark MVKCmdSetDepthCompareOp + +VkResult MVKCmdSetDepthCompareOp::setContent(MVKCommandBuffer* cmdBuff, + VkCompareOp depthCompareOp) { + _depthCompareOp = depthCompareOp; + return VK_SUCCESS; +} + +void MVKCmdSetDepthCompareOp::encode(MVKCommandEncoder* cmdEncoder) { + cmdEncoder->_depthStencilState.setDepthCompareOp(_depthCompareOp); } @@ -389,6 +427,60 @@ VkResult MVKCmdSetDepthBounds::setContent(MVKCommandBuffer* cmdBuff, void MVKCmdSetDepthBounds::encode(MVKCommandEncoder* cmdEncoder) {} +#pragma mark - +#pragma mark MVKCmdSetDepthBoundsTestEnable + +VkResult MVKCmdSetDepthBoundsTestEnable::setContent(MVKCommandBuffer* cmdBuff, + VkBool32 depthBoundsTestEnable) { + _depthBoundsTestEnable = static_cast(depthBoundsTestEnable); + + // Validate + if (cmdBuff->getDevice()->_enabledFeatures.depthBounds) { + return cmdBuff->reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdSetDepthBoundsTestEnable(): The current device does not support testing depth bounds."); + } + + return VK_SUCCESS; +} + +void MVKCmdSetDepthBoundsTestEnable::encode(MVKCommandEncoder* cmdEncoder) {} + + +#pragma mark - +#pragma mark MVKCmdSetStencilTestEnable + +VkResult MVKCmdSetStencilTestEnable::setContent(MVKCommandBuffer* cmdBuff, + VkBool32 stencilTestEnable) { + _stencilTestEnable = stencilTestEnable; + return VK_SUCCESS; +} + +void MVKCmdSetStencilTestEnable::encode(MVKCommandEncoder* cmdEncoder) { + cmdEncoder->_depthStencilState.setStencilTestEnable(_stencilTestEnable); +} + + +#pragma mark - +#pragma mark MVKCmdSetStencilOp + +VkResult MVKCmdSetStencilOp::setContent(MVKCommandBuffer* cmdBuff, + VkStencilFaceFlags faceMask, + VkStencilOp failOp, + VkStencilOp passOp, + VkStencilOp depthFailOp, + VkCompareOp compareOp) { + _faceMask = faceMask; + _failOp = failOp; + _passOp = passOp; + _depthFailOp = depthFailOp; + _compareOp = compareOp; + return VK_SUCCESS; +} + +void MVKCmdSetStencilOp::encode(MVKCommandEncoder* cmdEncoder) { + cmdEncoder->_depthStencilState.setStencilOp(_faceMask, _failOp, _passOp, _depthFailOp, _compareOp); +} + + #pragma mark - #pragma mark MVKCmdSetStencilCompareMask @@ -436,7 +528,7 @@ VkResult MVKCmdSetStencilReference::setContent(MVKCommandBuffer* cmdBuff, } void MVKCmdSetStencilReference::encode(MVKCommandEncoder* cmdEncoder) { - cmdEncoder->_stencilReferenceValueState.setReferenceValues(_faceMask, _stencilReference); + cmdEncoder->_rasterizingState.setStencilReferenceValues(_faceMask, _stencilReference); } @@ -445,29 +537,12 @@ void MVKCmdSetStencilReference::encode(MVKCommandEncoder* cmdEncoder) { VkResult MVKCmdSetCullMode::setContent(MVKCommandBuffer* cmdBuff, VkCullModeFlags cullMode) { - switch (cullMode) { - case VK_CULL_MODE_NONE: { - _cullMode = MTLCullModeNone; - break; - } - case VK_CULL_MODE_FRONT_BIT: { - _cullMode = MTLCullModeFront; - break; - } - case VK_CULL_MODE_BACK_BIT: { - _cullMode = MTLCullModeBack; - break; - } - case VK_CULL_MODE_FRONT_AND_BACK: { - // Metal doesn't have a equivalent to this... - } - } - - return VK_SUCCESS; + _cullMode = cullMode; + return VK_SUCCESS; } void MVKCmdSetCullMode::encode(MVKCommandEncoder* cmdEncoder) { - [((id)cmdEncoder->getMTLEncoder()) setCullMode:_cullMode]; + cmdEncoder->_rasterizingState.setCullMode(_cullMode, true); } @@ -476,14 +551,25 @@ void MVKCmdSetCullMode::encode(MVKCommandEncoder* cmdEncoder) { VkResult MVKCmdSetFrontFace::setContent(MVKCommandBuffer* cmdBuff, VkFrontFace frontFace) { - _frontFace = frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE - ? MTLWindingClockwise - : MTLWindingCounterClockwise; - - return VK_SUCCESS; + _frontFace = frontFace; + return VK_SUCCESS; } void MVKCmdSetFrontFace::encode(MVKCommandEncoder* cmdEncoder) { - [((id)cmdEncoder->getMTLEncoder()) setFrontFacingWinding:_frontFace]; + cmdEncoder->_rasterizingState.setFrontFace(_frontFace, true); +} + + +#pragma mark - +#pragma mark MVKCmdSetPrimitiveTopology + +VkResult MVKCmdSetPrimitiveTopology::setContent(MVKCommandBuffer* cmdBuff, + VkPrimitiveTopology primitiveTopology) { + _primitiveTopology = primitiveTopology; + return VK_SUCCESS; +} + +void MVKCmdSetPrimitiveTopology::encode(MVKCommandEncoder* cmdEncoder) { + cmdEncoder->_rasterizingState.setPrimitiveTopology(_primitiveTopology, true); } diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm b/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm index 1c38d15a..5ae4fee3 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm @@ -1507,10 +1507,7 @@ void MVKCmdClearAttachments::encode(MVKCommandEncoder* cmdEncoder) { // Return to the previous rendering state on the next render activity cmdEncoder->_graphicsPipelineState.markDirty(); cmdEncoder->_depthStencilState.markDirty(); - cmdEncoder->_stencilReferenceValueState.markDirty(); - cmdEncoder->_depthBiasState.markDirty(); - cmdEncoder->_viewportState.markDirty(); - cmdEncoder->_scissorState.markDirty(); + cmdEncoder->_rasterizingState.markDirty(); } template diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h index 07b4c202..cc1d7539 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h +++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h @@ -300,14 +300,11 @@ public: /** Encodes an operation to signal an event to a status. */ void signalEvent(MVKEvent* mvkEvent, bool status); - /** - * If a pipeline is currently bound, returns whether the current pipeline permits dynamic - * setting of the specified state. If no pipeline is currently bound, returns true. - */ - bool supportsDynamicState(VkDynamicState state); + /** Clips the rect to ensure it fits inside the render area. */ + VkRect2D clipToRenderArea(VkRect2D rect); /** Clips the scissor to ensure it fits inside the render area. */ - VkRect2D clipToRenderArea(VkRect2D scissor); + MTLScissorRect clipToRenderArea(MTLScissorRect scissor); /** Called by each graphics draw command to establish any outstanding state just prior to performing the draw. */ void finalizeDrawState(MVKGraphicsStage stage); @@ -437,35 +434,20 @@ public: /** Tracks the current graphics pipeline bound to the encoder. */ MVKPipelineCommandEncoderState _graphicsPipelineState; + /** Tracks the current graphics resources state of the encoder. */ + MVKGraphicsResourcesCommandEncoderState _graphicsResourcesState; + /** Tracks the current compute pipeline bound to the encoder. */ MVKPipelineCommandEncoderState _computePipelineState; - /** Tracks the current viewport state of the encoder. */ - MVKViewportCommandEncoderState _viewportState; - - /** Tracks the current scissor state of the encoder. */ - MVKScissorCommandEncoderState _scissorState; - - /** Tracks the current depth bias state of the encoder. */ - MVKDepthBiasCommandEncoderState _depthBiasState; - - /** Tracks the current blend color state of the encoder. */ - MVKBlendColorCommandEncoderState _blendColorState; + /** Tracks the current compute resources state of the encoder. */ + MVKComputeResourcesCommandEncoderState _computeResourcesState; /** Tracks the current depth stencil state of the encoder. */ MVKDepthStencilCommandEncoderState _depthStencilState; - /** Tracks the current stencil reference value state of the encoder. */ - MVKStencilReferenceValueCommandEncoderState _stencilReferenceValueState; - - /** Tracks the current graphics resources state of the encoder. */ - MVKGraphicsResourcesCommandEncoderState _graphicsResourcesState; - - /** Tracks the current compute resources state of the encoder. */ - MVKComputeResourcesCommandEncoderState _computeResourcesState; - - /** The type of primitive that will be rendered. */ - MTLPrimitiveType _mtlPrimitiveType; + /** Tracks the current rasterizing states of the encoder. */ + MVKRasterizingCommandEncoderState _rasterizingState; /** The size of the threadgroup for the compute shader. */ MTLSize _mtlThreadgroupSize; diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm index 5f32996e..d640730c 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm @@ -607,16 +607,12 @@ void MVKCommandEncoder::beginMetalRenderPass(MVKCommandUse cmdUse) { _graphicsPipelineState.beginMetalRenderPass(); _graphicsResourcesState.beginMetalRenderPass(); - _viewportState.beginMetalRenderPass(); - _scissorState.beginMetalRenderPass(); - _depthBiasState.beginMetalRenderPass(); - _blendColorState.beginMetalRenderPass(); + _depthStencilState.beginMetalRenderPass(); + _rasterizingState.beginMetalRenderPass(); _vertexPushConstants.beginMetalRenderPass(); _tessCtlPushConstants.beginMetalRenderPass(); _tessEvalPushConstants.beginMetalRenderPass(); _fragmentPushConstants.beginMetalRenderPass(); - _depthStencilState.beginMetalRenderPass(); - _stencilReferenceValueState.beginMetalRenderPass(); _occlusionQueryState.beginMetalRenderPass(); } @@ -706,24 +702,23 @@ void MVKCommandEncoder::signalEvent(MVKEvent* mvkEvent, bool status) { mvkEvent->encodeSignal(_mtlCmdBuffer, status); } -bool MVKCommandEncoder::supportsDynamicState(VkDynamicState state) { - MVKGraphicsPipeline* gpl = (MVKGraphicsPipeline*)_graphicsPipelineState.getPipeline(); - return !gpl || gpl->supportsDynamicState(state); +VkRect2D MVKCommandEncoder::clipToRenderArea(VkRect2D rect) { + + uint32_t raLeft = max(_renderArea.offset.x, 0); + uint32_t raRight = raLeft + _renderArea.extent.width; + uint32_t raBottom = max(_renderArea.offset.y, 0); + uint32_t raTop = raBottom + _renderArea.extent.height; + + rect.offset.x = mvkClamp(rect.offset.x, raLeft, max(raRight - 1, raLeft)); + rect.offset.y = mvkClamp(rect.offset.y, raBottom, max(raTop - 1, raBottom)); + rect.extent.width = min(rect.extent.width, raRight - rect.offset.x); + rect.extent.height = min(rect.extent.height, raTop - rect.offset.y); + + return rect; } -VkRect2D MVKCommandEncoder::clipToRenderArea(VkRect2D scissor) { - - int32_t raLeft = _renderArea.offset.x; - int32_t raRight = raLeft + _renderArea.extent.width; - int32_t raBottom = _renderArea.offset.y; - int32_t raTop = raBottom + _renderArea.extent.height; - - scissor.offset.x = mvkClamp(scissor.offset.x, raLeft, max(raRight - 1, raLeft)); - scissor.offset.y = mvkClamp(scissor.offset.y, raBottom, max(raTop - 1, raBottom)); - scissor.extent.width = min(scissor.extent.width, raRight - scissor.offset.x); - scissor.extent.height = min(scissor.extent.height, raTop - scissor.offset.y); - - return scissor; +MTLScissorRect MVKCommandEncoder::clipToRenderArea(MTLScissorRect scissor) { + return mvkMTLScissorRectFromVkRect2D(clipToRenderArea(mvkVkRect2DFromMTLScissorRect(scissor))); } void MVKCommandEncoder::finalizeDrawState(MVKGraphicsStage stage) { @@ -733,16 +728,12 @@ void MVKCommandEncoder::finalizeDrawState(MVKGraphicsStage stage) { } _graphicsPipelineState.encode(stage); // Must do first..it sets others _graphicsResourcesState.encode(stage); // Before push constants, to allow them to override. - _viewportState.encode(stage); - _scissorState.encode(stage); - _depthBiasState.encode(stage); - _blendColorState.encode(stage); + _depthStencilState.encode(stage); + _rasterizingState.encode(stage); _vertexPushConstants.encode(stage); _tessCtlPushConstants.encode(stage); _tessEvalPushConstants.encode(stage); _fragmentPushConstants.encode(stage); - _depthStencilState.encode(stage); - _stencilReferenceValueState.encode(stage); _occlusionQueryState.encode(stage); } @@ -831,16 +822,12 @@ void MVKCommandEncoder::endMetalRenderEncoding() { _graphicsPipelineState.endMetalRenderPass(); _graphicsResourcesState.endMetalRenderPass(); - _viewportState.endMetalRenderPass(); - _scissorState.endMetalRenderPass(); - _depthBiasState.endMetalRenderPass(); - _blendColorState.endMetalRenderPass(); + _depthStencilState.endMetalRenderPass(); + _rasterizingState.endMetalRenderPass(); _vertexPushConstants.endMetalRenderPass(); _tessCtlPushConstants.endMetalRenderPass(); _tessEvalPushConstants.endMetalRenderPass(); _fragmentPushConstants.endMetalRenderPass(); - _depthStencilState.endMetalRenderPass(); - _stencilReferenceValueState.endMetalRenderPass(); _occlusionQueryState.endMetalRenderPass(); } @@ -1131,39 +1118,35 @@ void MVKCommandEncoder::finishQueries() { MVKCommandEncoder::MVKCommandEncoder(MVKCommandBuffer* cmdBuffer, MVKPrefillMetalCommandBuffersStyle prefillStyle) : MVKBaseDeviceObject(cmdBuffer->getDevice()), - _cmdBuffer(cmdBuffer), - _graphicsPipelineState(this), - _computePipelineState(this), - _viewportState(this), - _scissorState(this), - _depthBiasState(this), - _blendColorState(this), - _depthStencilState(this), - _stencilReferenceValueState(this), - _graphicsResourcesState(this), - _computeResourcesState(this), - _vertexPushConstants(this, VK_SHADER_STAGE_VERTEX_BIT), - _tessCtlPushConstants(this, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT), - _tessEvalPushConstants(this, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT), - _fragmentPushConstants(this, VK_SHADER_STAGE_FRAGMENT_BIT), - _computePushConstants(this, VK_SHADER_STAGE_COMPUTE_BIT), - _occlusionQueryState(this), - _prefillStyle(prefillStyle){ + _cmdBuffer(cmdBuffer), + _graphicsPipelineState(this), + _graphicsResourcesState(this), + _computePipelineState(this), + _computeResourcesState(this), + _depthStencilState(this), + _rasterizingState(this), + _vertexPushConstants(this, VK_SHADER_STAGE_VERTEX_BIT), + _tessCtlPushConstants(this, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT), + _tessEvalPushConstants(this, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT), + _fragmentPushConstants(this, VK_SHADER_STAGE_FRAGMENT_BIT), + _computePushConstants(this, VK_SHADER_STAGE_COMPUTE_BIT), + _occlusionQueryState(this), + _prefillStyle(prefillStyle){ - _pDeviceFeatures = &_device->_enabledFeatures; - _pDeviceMetalFeatures = _device->_pMetalFeatures; - _pDeviceProperties = _device->_pProperties; - _pDeviceMemoryProperties = _device->_pMemoryProperties; - _pActivatedQueries = nullptr; - _mtlCmdBuffer = nil; - _mtlRenderEncoder = nil; - _mtlComputeEncoder = nil; - _mtlComputeEncoderUse = kMVKCommandUseNone; - _mtlBlitEncoder = nil; - _mtlBlitEncoderUse = kMVKCommandUseNone; - _pEncodingContext = nullptr; - _stageCountersMTLFence = nil; - _flushCount = 0; + _pDeviceFeatures = &_device->_enabledFeatures; + _pDeviceMetalFeatures = _device->_pMetalFeatures; + _pDeviceProperties = _device->_pProperties; + _pDeviceMemoryProperties = _device->_pMemoryProperties; + _pActivatedQueries = nullptr; + _mtlCmdBuffer = nil; + _mtlRenderEncoder = nil; + _mtlComputeEncoder = nil; + _mtlComputeEncoderUse = kMVKCommandUseNone; + _mtlBlitEncoder = nil; + _mtlBlitEncoderUse = kMVKCommandUseNone; + _pEncodingContext = nullptr; + _stageCountersMTLFence = nil; + _flushCount = 0; } MVKCommandEncoder::~MVKCommandEncoder() { diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h index 6dbeb647..c8919538 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h +++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h @@ -21,6 +21,7 @@ #include "MVKMTLResourceBindings.h" #include "MVKCommandResourceFactory.h" #include "MVKDevice.h" +#include "MVKPipeline.h" #include "MVKDescriptor.h" #include "MVKSmallVector.h" #include "MVKBitArray.h" @@ -81,7 +82,7 @@ public: /** * If the content of this instance is dirty, marks this instance as no longer dirty * and calls the encodeImpl() function to encode the content onto the Metal encoder. - * Marking dirty is done in advance so that subclass encodeImpl() implementations + * Marking clean is done in advance so that subclass encodeImpl() implementations * can override to leave this instance in a dirty state. * Subclasses must override the encodeImpl() function to do the actual work. */ @@ -96,8 +97,18 @@ public: MVKCommandEncoderState(MVKCommandEncoder* cmdEncoder) : _cmdEncoder(cmdEncoder) {} protected: - virtual void encodeImpl(uint32_t stage) = 0; + enum StateScope { + Static = 0, + Dynamic, + Count + }; + + virtual void encodeImpl(uint32_t stage) = 0; MVKDevice* getDevice(); + bool isDynamicState(MVKRenderStateType state); + template T& getContent(T* iVarAry, MVKRenderStateType state) { + return iVarAry[isDynamicState(state) ? StateScope::Dynamic : StateScope::Static]; + } MVKCommandEncoder* _cmdEncoder; bool _isDirty = false; @@ -130,62 +141,6 @@ protected: }; -#pragma mark - -#pragma mark MVKViewportCommandEncoderState - -/** Holds encoder state established by viewport commands. */ -class MVKViewportCommandEncoderState : public MVKCommandEncoderState { - -public: - - /** - * Sets one or more of the viewports, starting at the first index. - * The isSettingDynamically indicates that the scissor is being changed dynamically, - * which is only allowed if the pipeline was created as VK_DYNAMIC_STATE_SCISSOR. - */ - void setViewports(MVKArrayRef viewports, - uint32_t firstViewport, - bool isSettingDynamically); - - /** Constructs this instance for the specified command encoder. */ - MVKViewportCommandEncoderState(MVKCommandEncoder* cmdEncoder) - : MVKCommandEncoderState(cmdEncoder) {} - -protected: - void encodeImpl(uint32_t stage) override; - - MVKSmallVector _viewports, _dynamicViewports; -}; - - -#pragma mark - -#pragma mark MVKScissorCommandEncoderState - -/** Holds encoder state established by viewport commands. */ -class MVKScissorCommandEncoderState : public MVKCommandEncoderState { - -public: - - /** - * Sets one or more of the scissors, starting at the first index. - * The isSettingDynamically indicates that the scissor is being changed dynamically, - * which is only allowed if the pipeline was created as VK_DYNAMIC_STATE_SCISSOR. - */ - void setScissors(MVKArrayRef scissors, - uint32_t firstScissor, - bool isSettingDynamically); - - /** Constructs this instance for the specified command encoder. */ - MVKScissorCommandEncoderState(MVKCommandEncoder* cmdEncoder) - : MVKCommandEncoderState(cmdEncoder) {} - -protected: - void encodeImpl(uint32_t stage) override; - - MVKSmallVector _scissors, _dynamicScissors; -}; - - #pragma mark - #pragma mark MVKPushConstantsCommandEncoderState @@ -226,16 +181,29 @@ public: /** Sets the depth stencil state during pipeline binding. */ void setDepthStencilState(const VkPipelineDepthStencilStateCreateInfo& vkDepthStencilInfo); - /** - * Sets the stencil compare mask value of the indicated faces - * to the specified value, from explicit dynamic command. - */ + /** Enables or disables depth testing, from explicit dynamic command. */ + void setDepthTestEnable(VkBool32 depthTestEnable); + + /** Enables or disables depth writing, from explicit dynamic command. */ + void setDepthWriteEnable(VkBool32 depthWriteEnable); + + /** Sets the depth compare operation, from explicit dynamic command. */ + void setDepthCompareOp(VkCompareOp depthCompareOp); + + /** Enables or disables stencil testing, from explicit dynamic command. */ + void setStencilTestEnable(VkBool32 stencilTestEnable); + + /** Sets the stencil operations of the indicated faces from explicit dynamic command. */ + void setStencilOp(VkStencilFaceFlags faceMask, + VkStencilOp failOp, + VkStencilOp passOp, + VkStencilOp depthFailOp, + VkCompareOp compareOp); + + /** Sets the stencil compare mask value of the indicated faces from explicit dynamic command. */ void setStencilCompareMask(VkStencilFaceFlags faceMask, uint32_t stencilCompareMask); - /** - * Sets the stencil write mask value of the indicated faces - * to the specified value, from explicit dynamic command. - */ + /** Sets the stencil write mask value of the indicated faces from explicit dynamic command. */ void setStencilWriteMask(VkStencilFaceFlags faceMask, uint32_t stencilWriteMask); void beginMetalRenderPass() override; @@ -246,96 +214,105 @@ public: protected: void encodeImpl(uint32_t stage) override; - void setStencilState(MVKMTLStencilDescriptorData& stencilInfo, - const VkStencilOpState& vkStencil, - bool enabled); + MVKMTLDepthStencilDescriptorData& getData(MVKRenderStateType state) { return getContent(_depthStencilData, state); } + template void setContent(T& content, T value) { + if (content != value) { + content = value; + markDirty(); + } + } + void setStencilState(MVKMTLStencilDescriptorData& sData, const VkStencilOpState& vkStencil); + void setStencilOp(MVKMTLStencilDescriptorData& sData, VkStencilOp failOp, + VkStencilOp passOp, VkStencilOp depthFailOp, VkCompareOp compareOp); - MVKMTLDepthStencilDescriptorData _depthStencilData = kMVKMTLDepthStencilDescriptorDataDefault; + MVKMTLDepthStencilDescriptorData _depthStencilData[StateScope::Count]; + bool _depthTestEnabled[StateScope::Count]; bool _hasDepthAttachment = false; bool _hasStencilAttachment = false; }; #pragma mark - -#pragma mark MVKStencilReferenceValueCommandEncoderState +#pragma mark MVKRasterizingCommandEncoderState -/** Holds encoder state established by stencil reference values commands. */ -class MVKStencilReferenceValueCommandEncoderState : public MVKCommandEncoderState { - -public: - - /** Sets the stencil references during pipeline binding. */ - void setReferenceValues(const VkPipelineDepthStencilStateCreateInfo& vkDepthStencilInfo); - - /** Sets the stencil state from explicit dynamic command. */ - void setReferenceValues(VkStencilFaceFlags faceMask, uint32_t stencilReference); - - /** Constructs this instance for the specified command encoder. */ - MVKStencilReferenceValueCommandEncoderState(MVKCommandEncoder* cmdEncoder) - : MVKCommandEncoderState(cmdEncoder) {} - -protected: - void encodeImpl(uint32_t stage) override; - - uint32_t _frontFaceValue = 0; - uint32_t _backFaceValue = 0; +struct MVKDepthBias { + float depthBiasConstantFactor; + float depthBiasSlopeFactor; + float depthBiasClamp; }; - -#pragma mark - -#pragma mark MVKDepthBiasCommandEncoderState - -/** Holds encoder state established by depth bias commands. */ -class MVKDepthBiasCommandEncoderState : public MVKCommandEncoderState { - -public: - - /** Sets the depth bias during pipeline binding. */ - void setDepthBias(const VkPipelineRasterizationStateCreateInfo& vkRasterInfo); - - /** Sets the depth bias dynamically. */ - void setDepthBias(float depthBiasConstantFactor, - float depthBiasSlopeFactor, - float depthBiasClamp); - - /** Constructs this instance for the specified command encoder. */ - MVKDepthBiasCommandEncoderState(MVKCommandEncoder* cmdEncoder) - : MVKCommandEncoderState(cmdEncoder) {} - -protected: - void encodeImpl(uint32_t stage) override; - - float _depthBiasConstantFactor = 0; - float _depthBiasClamp = 0; - float _depthBiasSlopeFactor = 0; - bool _isEnabled = false; +struct MVKStencilReference { + uint32_t frontFaceValue; + uint32_t backFaceValue; }; +struct MVKMTLViewports { + MTLViewport viewports[kMVKMaxViewportScissorCount]; + uint32_t viewportCount; +}; -#pragma mark - -#pragma mark MVKBlendColorCommandEncoderState - -/** Holds encoder state established by blend color commands. */ -class MVKBlendColorCommandEncoderState : public MVKCommandEncoderState { +struct MVKMTLScissors { + MTLScissorRect scissors[kMVKMaxViewportScissorCount]; + uint32_t scissorCount; +}; +/** Holds encoder state established by various state commands. */ +class MVKRasterizingCommandEncoderState : public MVKCommandEncoderState { public: + void setCullMode(VkCullModeFlags cullMode, bool isDynamic); - /** Sets the blend color, either as part of pipeline binding, or dynamically. */ - void setBlendColor(float red, float green, - float blue, float alpha, - bool isDynamic); + void setFrontFace(VkFrontFace frontFace, bool isDynamic); - /** Constructs this instance for the specified command encoder. */ - MVKBlendColorCommandEncoderState(MVKCommandEncoder* cmdEncoder) - : MVKCommandEncoderState(cmdEncoder) {} + void setPrimitiveTopology(VkPrimitiveTopology topology, bool isDynamic); + MTLPrimitiveType getPrimitiveType(); + + void setPolygonMode(VkPolygonMode polygonMode, bool isDynamic); + + void setBlendConstants(float blendConstants[4], bool isDynamic); + + void setDepthBias(const VkPipelineRasterizationStateCreateInfo& vkRasterInfo); + void setDepthBias(float depthBiasConstantFactor, float depthBiasSlopeFactor, float depthBiasClamp); + void setDepthBiasEnable(VkBool32 depthBiasEnable); + void setDepthClipEnable(bool depthClip, bool isDynamic); + + void setStencilReferenceValues(const VkPipelineDepthStencilStateCreateInfo& vkDepthStencilInfo); + void setStencilReferenceValues(VkStencilFaceFlags faceMask, uint32_t stencilReference); + + void setViewports(const MVKArrayRef viewports, uint32_t firstViewport, bool isDynamic); + void setScissors(const MVKArrayRef scissors, uint32_t firstScissor, bool isDynamic); + + void beginMetalRenderPass() override; + + MVKRasterizingCommandEncoderState(MVKCommandEncoder* cmdEncoder) : MVKCommandEncoderState(cmdEncoder) {} protected: - void encodeImpl(uint32_t stage) override; + void encodeImpl(uint32_t stage) override; + bool isDirty(MVKRenderStateType state); + bool isDrawingTriangles(); + template void setContent(T* iVarAry, T* pVal, MVKRenderStateType state, bool isDynamic) { + auto* pIVar = &iVarAry[isDynamic ? StateScope::Dynamic : StateScope::Static]; + if( !mvkAreEqual(pVal, pIVar) ) { + *pIVar = *pVal; + _dirtyStates.enable(state); + _modifiedStates.enable(state); + markDirty(); + } + } - float _red = 0; - float _green = 0; - float _blue = 0; - float _alpha = 0; + MVKMTLViewports _mtlViewports[StateScope::Count] = {}; + MVKMTLScissors _mtlScissors[StateScope::Count] = {}; + MVKColor32 _mtlBlendConstants[StateScope::Count] = {}; + MVKDepthBias _mtlDepthBias[StateScope::Count] = {}; + MVKStencilReference _mtlStencilReference[StateScope::Count] = {}; + MTLCullMode _mtlCullMode[StateScope::Count] = { MTLCullModeNone, MTLCullModeNone }; + MTLWinding _mtlFrontFace[StateScope::Count] = { MTLWindingClockwise, MTLWindingClockwise }; + MTLPrimitiveType _mtlPrimitiveTopology[StateScope::Count] = { MTLPrimitiveTypePoint, MTLPrimitiveTypePoint }; + MTLDepthClipMode _mtlDepthClipEnable[StateScope::Count] = { MTLDepthClipModeClip, MTLDepthClipModeClip }; + MTLTriangleFillMode _mtlPolygonMode[StateScope::Count] = { MTLTriangleFillModeFill, MTLTriangleFillModeFill }; + MVKRenderStateFlags _dirtyStates; + MVKRenderStateFlags _modifiedStates; + bool _mtlDepthBiasEnable[StateScope::Count] = {}; + bool _cullBothFaces[StateScope::Count] = {}; }; diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm index 37f0194f..828d7c09 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm @@ -25,13 +25,21 @@ using namespace std; +#define shouldUpdateFace(face) mvkAreAllFlagsEnabled(faceMask, VK_STENCIL_FACE_##face##_BIT) + #pragma mark - #pragma mark MVKCommandEncoderState MVKVulkanAPIObject* MVKCommandEncoderState::getVulkanAPIObject() { return _cmdEncoder->getVulkanAPIObject(); }; + MVKDevice* MVKCommandEncoderState::getDevice() { return _cmdEncoder->getDevice(); } +bool MVKCommandEncoderState::isDynamicState(MVKRenderStateType state) { + auto* gpl = (MVKGraphicsPipeline*)_cmdEncoder->_graphicsPipelineState.getPipeline(); + return !gpl || gpl->isDynamicState(state); +} + #pragma mark - #pragma mark MVKPipelineCommandEncoderState @@ -51,112 +59,6 @@ void MVKPipelineCommandEncoderState::encodeImpl(uint32_t stage) { } -#pragma mark - -#pragma mark MVKViewportCommandEncoderState - -void MVKViewportCommandEncoderState::setViewports(MVKArrayRef viewports, - uint32_t firstViewport, - bool isSettingDynamically) { - - size_t vpCnt = viewports.size(); - uint32_t maxViewports = getDevice()->_pProperties->limits.maxViewports; - if ((firstViewport + vpCnt > maxViewports) || - (firstViewport >= maxViewports) || - (isSettingDynamically && vpCnt == 0)) - return; - - auto& usingViewports = isSettingDynamically ? _dynamicViewports : _viewports; - - if (firstViewport + vpCnt > usingViewports.size()) { - usingViewports.resize(firstViewport + vpCnt); - } - - bool dirty; - bool mustSetDynamically = _cmdEncoder->supportsDynamicState(VK_DYNAMIC_STATE_VIEWPORT); - if (isSettingDynamically || (!mustSetDynamically && vpCnt > 0)) { - dirty = memcmp(&usingViewports[firstViewport], &viewports[0], vpCnt * sizeof(VkViewport)) != 0; - std::copy(viewports.begin(), viewports.end(), usingViewports.begin() + firstViewport); - } else { - dirty = !usingViewports.empty(); - usingViewports.clear(); - } - - if (dirty) markDirty(); -} - -void MVKViewportCommandEncoderState::encodeImpl(uint32_t stage) { - if (stage != kMVKGraphicsStageRasterization) { return; } - auto& usingViewports = _viewports.size() > 0 ? _viewports : _dynamicViewports; - if (usingViewports.empty()) { return; } - - if (_cmdEncoder->_pDeviceFeatures->multiViewport) { - size_t vpCnt = usingViewports.size(); - MTLViewport mtlViewports[vpCnt]; - for (uint32_t vpIdx = 0; vpIdx < vpCnt; vpIdx++) { - mtlViewports[vpIdx] = mvkMTLViewportFromVkViewport(usingViewports[vpIdx]); - } -#if MVK_MACOS_OR_IOS - [_cmdEncoder->_mtlRenderEncoder setViewports: mtlViewports count: vpCnt]; -#endif - } else { - [_cmdEncoder->_mtlRenderEncoder setViewport: mvkMTLViewportFromVkViewport(usingViewports[0])]; - } -} - - -#pragma mark - -#pragma mark MVKScissorCommandEncoderState - -void MVKScissorCommandEncoderState::setScissors(MVKArrayRef scissors, - uint32_t firstScissor, - bool isSettingDynamically) { - - size_t sCnt = scissors.size(); - uint32_t maxScissors = getDevice()->_pProperties->limits.maxViewports; - if ((firstScissor + sCnt > maxScissors) || - (firstScissor >= maxScissors) || - (isSettingDynamically && sCnt == 0)) - return; - - auto& usingScissors = isSettingDynamically ? _dynamicScissors : _scissors; - - if (firstScissor + sCnt > usingScissors.size()) { - usingScissors.resize(firstScissor + sCnt); - } - - bool dirty; - bool mustSetDynamically = _cmdEncoder->supportsDynamicState(VK_DYNAMIC_STATE_SCISSOR); - if (isSettingDynamically || (!mustSetDynamically && sCnt > 0)) { - dirty = memcmp(&usingScissors[firstScissor], &scissors[0], sCnt * sizeof(VkRect2D)) != 0; - std::copy(scissors.begin(), scissors.end(), usingScissors.begin() + firstScissor); - } else { - dirty = !usingScissors.empty(); - usingScissors.clear(); - } - - if (dirty) markDirty(); -} - -void MVKScissorCommandEncoderState::encodeImpl(uint32_t stage) { - if (stage != kMVKGraphicsStageRasterization) { return; } - auto& usingScissors = _scissors.size() > 0 ? _scissors : _dynamicScissors; - if (usingScissors.empty()) { return; } - - if (_cmdEncoder->_pDeviceFeatures->multiViewport) { - size_t sCnt = usingScissors.size(); - MTLScissorRect mtlScissors[sCnt]; - for (uint32_t sIdx = 0; sIdx < sCnt; sIdx++) { - mtlScissors[sIdx] = mvkMTLScissorRectFromVkRect2D(_cmdEncoder->clipToRenderArea(usingScissors[sIdx])); - } -#if MVK_MACOS_OR_IOS - [_cmdEncoder->_mtlRenderEncoder setScissorRects: mtlScissors count: sCnt]; -#endif - } else { - [_cmdEncoder->_mtlRenderEncoder setScissorRect: mvkMTLScissorRectFromVkRect2D(_cmdEncoder->clipToRenderArea(usingScissors[0]))]; - } -} - - #pragma mark - #pragma mark MVKPushConstantsCommandEncoderState @@ -254,74 +156,84 @@ bool MVKPushConstantsCommandEncoderState::isTessellating() { #pragma mark MVKDepthStencilCommandEncoderState void MVKDepthStencilCommandEncoderState:: setDepthStencilState(const VkPipelineDepthStencilStateCreateInfo& vkDepthStencilInfo) { - auto oldData = _depthStencilData; + auto& depthEnabled = _depthTestEnabled[StateScope::Static]; + auto oldDepthEnabled = depthEnabled; + depthEnabled = static_cast(vkDepthStencilInfo.depthTestEnable); - if (vkDepthStencilInfo.depthTestEnable) { - _depthStencilData.depthCompareFunction = mvkMTLCompareFunctionFromVkCompareOp(vkDepthStencilInfo.depthCompareOp); - _depthStencilData.depthWriteEnabled = vkDepthStencilInfo.depthWriteEnable; - } else { - _depthStencilData.depthCompareFunction = kMVKMTLDepthStencilDescriptorDataDefault.depthCompareFunction; - _depthStencilData.depthWriteEnabled = kMVKMTLDepthStencilDescriptorDataDefault.depthWriteEnabled; - } + auto& dsData = _depthStencilData[StateScope::Static]; + auto oldData = dsData; + dsData.depthCompareFunction = mvkMTLCompareFunctionFromVkCompareOp(vkDepthStencilInfo.depthCompareOp); + dsData.depthWriteEnabled = vkDepthStencilInfo.depthWriteEnable; - setStencilState(_depthStencilData.frontFaceStencilData, vkDepthStencilInfo.front, vkDepthStencilInfo.stencilTestEnable); - setStencilState(_depthStencilData.backFaceStencilData, vkDepthStencilInfo.back, vkDepthStencilInfo.stencilTestEnable); + dsData.stencilTestEnabled = static_cast(vkDepthStencilInfo.stencilTestEnable); + setStencilState(dsData.frontFaceStencilData, vkDepthStencilInfo.front); + setStencilState(dsData.backFaceStencilData, vkDepthStencilInfo.back); - if (!(oldData == _depthStencilData)) markDirty(); + if (depthEnabled != oldDepthEnabled || dsData != oldData) { markDirty(); } } -void MVKDepthStencilCommandEncoderState::setStencilState(MVKMTLStencilDescriptorData& stencilInfo, - const VkStencilOpState& vkStencil, - bool enabled) { - if ( !enabled ) { - stencilInfo = kMVKMTLStencilDescriptorDataDefault; - return; - } - - stencilInfo.enabled = true; - stencilInfo.stencilCompareFunction = mvkMTLCompareFunctionFromVkCompareOp(vkStencil.compareOp); - stencilInfo.stencilFailureOperation = mvkMTLStencilOperationFromVkStencilOp(vkStencil.failOp); - stencilInfo.depthFailureOperation = mvkMTLStencilOperationFromVkStencilOp(vkStencil.depthFailOp); - stencilInfo.depthStencilPassOperation = mvkMTLStencilOperationFromVkStencilOp(vkStencil.passOp); - - if ( !_cmdEncoder->supportsDynamicState(VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK) ) { - stencilInfo.readMask = vkStencil.compareMask; - } - if ( !_cmdEncoder->supportsDynamicState(VK_DYNAMIC_STATE_STENCIL_WRITE_MASK) ) { - stencilInfo.writeMask = vkStencil.writeMask; - } +void MVKDepthStencilCommandEncoderState::setStencilState(MVKMTLStencilDescriptorData& sData, + const VkStencilOpState& vkStencil) { + sData.readMask = vkStencil.compareMask; + sData.writeMask = vkStencil.writeMask; + sData.stencilCompareFunction = mvkMTLCompareFunctionFromVkCompareOp(vkStencil.compareOp); + sData.stencilFailureOperation = mvkMTLStencilOperationFromVkStencilOp(vkStencil.failOp); + sData.depthFailureOperation = mvkMTLStencilOperationFromVkStencilOp(vkStencil.depthFailOp); + sData.depthStencilPassOperation = mvkMTLStencilOperationFromVkStencilOp(vkStencil.passOp); +} + +void MVKDepthStencilCommandEncoderState::setDepthTestEnable(VkBool32 depthTestEnable) { + setContent(_depthTestEnabled[StateScope::Dynamic], static_cast(depthTestEnable)); +} + +void MVKDepthStencilCommandEncoderState::setDepthWriteEnable(VkBool32 depthWriteEnable) { + setContent(_depthStencilData[StateScope::Dynamic].depthWriteEnabled, static_cast(depthWriteEnable)); +} + +void MVKDepthStencilCommandEncoderState::setDepthCompareOp(VkCompareOp depthCompareOp) { + setContent(_depthStencilData[StateScope::Dynamic].depthCompareFunction, + (uint8_t)mvkMTLCompareFunctionFromVkCompareOp(depthCompareOp)); +} + +void MVKDepthStencilCommandEncoderState::setStencilTestEnable(VkBool32 stencilTestEnable) { + setContent(_depthStencilData[StateScope::Dynamic].stencilTestEnabled, static_cast(stencilTestEnable)); +} + +void MVKDepthStencilCommandEncoderState::setStencilOp(MVKMTLStencilDescriptorData& sData, + VkStencilOp failOp, + VkStencilOp passOp, + VkStencilOp depthFailOp, + VkCompareOp compareOp) { + auto oldData = sData; + sData.stencilCompareFunction = mvkMTLCompareFunctionFromVkCompareOp(compareOp); + sData.stencilFailureOperation = mvkMTLStencilOperationFromVkStencilOp(failOp); + sData.depthFailureOperation = mvkMTLStencilOperationFromVkStencilOp(depthFailOp); + sData.depthStencilPassOperation = mvkMTLStencilOperationFromVkStencilOp(passOp); + if (sData != oldData) { markDirty(); } +} + +void MVKDepthStencilCommandEncoderState::setStencilOp(VkStencilFaceFlags faceMask, + VkStencilOp failOp, + VkStencilOp passOp, + VkStencilOp depthFailOp, + VkCompareOp compareOp) { + auto& dsData = _depthStencilData[StateScope::Dynamic]; + if (shouldUpdateFace(FRONT)) { setStencilOp(dsData.frontFaceStencilData, failOp, passOp, depthFailOp, compareOp); } + if (shouldUpdateFace(BACK)) { setStencilOp(dsData.backFaceStencilData, failOp, passOp, depthFailOp, compareOp); } } -// We don't check for dynamic state here, because if this is called before pipeline is set, -// it may not be accurate, and if not dynamic, pipeline will override when it is encoded anyway. void MVKDepthStencilCommandEncoderState::setStencilCompareMask(VkStencilFaceFlags faceMask, - uint32_t stencilCompareMask) { - auto oldData = _depthStencilData; - - if (mvkAreAllFlagsEnabled(faceMask, VK_STENCIL_FACE_FRONT_BIT)) { - _depthStencilData.frontFaceStencilData.readMask = stencilCompareMask; - } - if (mvkAreAllFlagsEnabled(faceMask, VK_STENCIL_FACE_BACK_BIT)) { - _depthStencilData.backFaceStencilData.readMask = stencilCompareMask; - } - - if (!(oldData == _depthStencilData)) markDirty(); + uint32_t stencilCompareMask) { + auto& dsData = _depthStencilData[StateScope::Dynamic]; + if (shouldUpdateFace(FRONT)) { setContent(dsData.frontFaceStencilData.readMask, stencilCompareMask); } + if (shouldUpdateFace(BACK)) { setContent(dsData.backFaceStencilData.readMask, stencilCompareMask); } } -// We don't check for dynamic state here, because if this is called before pipeline is set, -// it may not be accurate, and if not dynamic, pipeline will override when it is encoded anyway. void MVKDepthStencilCommandEncoderState::setStencilWriteMask(VkStencilFaceFlags faceMask, - uint32_t stencilWriteMask) { - auto oldData = _depthStencilData; - - if (mvkAreAllFlagsEnabled(faceMask, VK_STENCIL_FACE_FRONT_BIT)) { - _depthStencilData.frontFaceStencilData.writeMask = stencilWriteMask; - } - if (mvkAreAllFlagsEnabled(faceMask, VK_STENCIL_FACE_BACK_BIT)) { - _depthStencilData.backFaceStencilData.writeMask = stencilWriteMask; - } - - if (!(oldData == _depthStencilData)) markDirty(); + uint32_t stencilWriteMask) { + auto& dsData = _depthStencilData[StateScope::Dynamic]; + if (shouldUpdateFace(FRONT)) { setContent(dsData.frontFaceStencilData.writeMask, stencilWriteMask); } + if (shouldUpdateFace(BACK)) { setContent(dsData.backFaceStencilData.writeMask, stencilWriteMask); } } void MVKDepthStencilCommandEncoderState::beginMetalRenderPass() { @@ -337,130 +249,247 @@ void MVKDepthStencilCommandEncoderState::beginMetalRenderPass() { if (_hasStencilAttachment != prevHasStencilAttachment) { markDirty(); } } +// Combine static and dynamic depth/stencil data void MVKDepthStencilCommandEncoderState::encodeImpl(uint32_t stage) { - auto cmdEncPool = _cmdEncoder->getCommandEncodingPool(); - switch (stage) { - case kMVKGraphicsStageRasterization: { - // If renderpass does not have a depth or a stencil attachment, disable corresponding test - MVKMTLDepthStencilDescriptorData adjustedDSData = _depthStencilData; - adjustedDSData.disable(!_hasDepthAttachment, !_hasStencilAttachment); - [_cmdEncoder->_mtlRenderEncoder setDepthStencilState: cmdEncPool->getMTLDepthStencilState(adjustedDSData)]; - break; - } - default: // Do nothing on other stages - break; + if (stage != kMVKGraphicsStageRasterization) { return; } + + MVKMTLDepthStencilDescriptorData dsData; + + if (_hasDepthAttachment && getContent(_depthTestEnabled, DepthTestEnable)) { + dsData.depthCompareFunction = getData(DepthCompareOp).depthCompareFunction; + dsData.depthWriteEnabled = getData(DepthWriteEnable).depthWriteEnabled; + } + + if (_hasStencilAttachment && getData(StencilTestEnable).stencilTestEnabled) { + dsData.stencilTestEnabled = true; + + auto& frontFace = dsData.frontFaceStencilData; + auto& backFace = dsData.backFaceStencilData; + + const auto& srcRM = getData(StencilCompareMask); + frontFace.readMask = srcRM.frontFaceStencilData.readMask; + backFace.readMask = srcRM.backFaceStencilData.readMask; + + const auto& srcWM = getData(StencilWriteMask); + frontFace.writeMask = srcWM.frontFaceStencilData.writeMask; + backFace.writeMask = srcWM.backFaceStencilData.writeMask; + + const auto& srcSOp = getData(StencilOp); + frontFace.stencilCompareFunction = srcSOp.frontFaceStencilData.stencilCompareFunction; + frontFace.stencilFailureOperation = srcSOp.frontFaceStencilData.stencilFailureOperation; + frontFace.depthFailureOperation = srcSOp.frontFaceStencilData.depthFailureOperation; + frontFace.depthStencilPassOperation = srcSOp.frontFaceStencilData.depthStencilPassOperation; + + backFace.stencilCompareFunction = srcSOp.backFaceStencilData.stencilCompareFunction; + backFace.stencilFailureOperation = srcSOp.backFaceStencilData.stencilFailureOperation; + backFace.depthFailureOperation = srcSOp.backFaceStencilData.depthFailureOperation; + backFace.depthStencilPassOperation = srcSOp.backFaceStencilData.depthStencilPassOperation; + } + + [_cmdEncoder->_mtlRenderEncoder setDepthStencilState: _cmdEncoder->getCommandEncodingPool()->getMTLDepthStencilState(dsData)]; +} + + +#pragma mark - +#pragma mark MVKRasterizingCommandEncoderState + +#define getContent(state) getContent(_mtl##state, state) +#define setContent(state) setContent(_mtl##state, &mtl##state, state, isDynamic) + +void MVKRasterizingCommandEncoderState::setCullMode(VkCullModeFlags cullMode, bool isDynamic) { + auto mtlCullMode = mvkMTLCullModeFromVkCullModeFlags(cullMode); + setContent(CullMode); + _cullBothFaces[isDynamic ? StateScope::Dynamic : StateScope::Static] = (cullMode == VK_CULL_MODE_FRONT_AND_BACK); +} + +void MVKRasterizingCommandEncoderState::setFrontFace(VkFrontFace frontFace, bool isDynamic) { + auto mtlFrontFace = mvkMTLWindingFromVkFrontFace(frontFace); + setContent(FrontFace); +} + +void MVKRasterizingCommandEncoderState::setPrimitiveTopology(VkPrimitiveTopology topology, bool isDynamic) { + auto mtlPrimitiveTopology = mvkMTLPrimitiveTypeFromVkPrimitiveTopology(topology); + setContent(PrimitiveTopology); +} + +MTLPrimitiveType MVKRasterizingCommandEncoderState::getPrimitiveType() { + return getContent(PrimitiveTopology); +} + +bool MVKRasterizingCommandEncoderState::isDrawingTriangles() { + switch (getPrimitiveType()) { + case MTLPrimitiveTypeTriangle: return true; + case MTLPrimitiveTypeTriangleStrip: return true; + default: return false; } } - -#pragma mark - -#pragma mark MVKStencilReferenceValueCommandEncoderState - -void MVKStencilReferenceValueCommandEncoderState:: setReferenceValues(const VkPipelineDepthStencilStateCreateInfo& vkDepthStencilInfo) { - - // If ref values are to be set dynamically, don't set them here. - if (_cmdEncoder->supportsDynamicState(VK_DYNAMIC_STATE_STENCIL_REFERENCE)) { return; } - - if (_frontFaceValue != vkDepthStencilInfo.front.reference || _backFaceValue != vkDepthStencilInfo.back.reference) - markDirty(); - - _frontFaceValue = vkDepthStencilInfo.front.reference; - _backFaceValue = vkDepthStencilInfo.back.reference; +void MVKRasterizingCommandEncoderState::setPolygonMode(VkPolygonMode polygonMode, bool isDynamic) { + auto mtlPolygonMode = mvkMTLTriangleFillModeFromVkPolygonMode(polygonMode); + setContent(PolygonMode); } -// We don't check for dynamic state here, because if this is called before pipeline is set, -// it may not be accurate, and if not dynamic, pipeline will override when it is encoded anyway. -void MVKStencilReferenceValueCommandEncoderState::setReferenceValues(VkStencilFaceFlags faceMask, - uint32_t stencilReference) { - bool dirty = false; - if (mvkAreAllFlagsEnabled(faceMask, VK_STENCIL_FACE_FRONT_BIT)) { - dirty |= (_frontFaceValue != stencilReference); - _frontFaceValue = stencilReference; - } - if (mvkAreAllFlagsEnabled(faceMask, VK_STENCIL_FACE_BACK_BIT)) { - dirty |= (_backFaceValue != stencilReference); - _backFaceValue = stencilReference; - } - if (dirty) markDirty(); +void MVKRasterizingCommandEncoderState::setBlendConstants(float blendConstants[4], bool isDynamic) { + MVKColor32 mtlBlendConstants; + mvkCopy(mtlBlendConstants.float32, blendConstants, 4); + setContent(BlendConstants); } -void MVKStencilReferenceValueCommandEncoderState::encodeImpl(uint32_t stage) { - if (stage != kMVKGraphicsStageRasterization) { return; } - [_cmdEncoder->_mtlRenderEncoder setStencilFrontReferenceValue: _frontFaceValue - backReferenceValue: _backFaceValue]; +void MVKRasterizingCommandEncoderState::setDepthBias(const VkPipelineRasterizationStateCreateInfo& vkRasterInfo) { + bool isDynamic = false; + + bool mtlDepthBiasEnable = static_cast(vkRasterInfo.depthBiasEnable); + setContent(DepthBiasEnable); + + MVKDepthBias mtlDepthBias = { + .depthBiasConstantFactor = vkRasterInfo.depthBiasConstantFactor, + .depthBiasSlopeFactor = vkRasterInfo.depthBiasSlopeFactor, + .depthBiasClamp = vkRasterInfo.depthBiasClamp + }; + setContent(DepthBias); } - -#pragma mark - -#pragma mark MVKDepthBiasCommandEncoderState - -void MVKDepthBiasCommandEncoderState::setDepthBias(const VkPipelineRasterizationStateCreateInfo& vkRasterInfo) { - - auto wasEnabled = _isEnabled; - _isEnabled = vkRasterInfo.depthBiasEnable; - - // If ref values are to be set dynamically, don't set them here. - if (_cmdEncoder->supportsDynamicState(VK_DYNAMIC_STATE_DEPTH_BIAS)) { return; } - - if (_isEnabled != wasEnabled || _depthBiasConstantFactor != vkRasterInfo.depthBiasConstantFactor - || _depthBiasSlopeFactor != vkRasterInfo.depthBiasSlopeFactor || _depthBiasClamp != vkRasterInfo.depthBiasClamp) { - - markDirty(); - _depthBiasConstantFactor = vkRasterInfo.depthBiasConstantFactor; - _depthBiasSlopeFactor = vkRasterInfo.depthBiasSlopeFactor; - _depthBiasClamp = vkRasterInfo.depthBiasClamp; - } +void MVKRasterizingCommandEncoderState::setDepthBias(float depthBiasConstantFactor, + float depthBiasSlopeFactor, + float depthBiasClamp) { + bool isDynamic = true; + MVKDepthBias mtlDepthBias = { + .depthBiasConstantFactor = depthBiasConstantFactor, + .depthBiasSlopeFactor = depthBiasSlopeFactor, + .depthBiasClamp = depthBiasClamp + }; + setContent(DepthBias); } -// We don't check for dynamic state here, because if this is called before pipeline is set, -// it may not be accurate, and if not dynamic, pipeline will override when it is encoded anyway. -void MVKDepthBiasCommandEncoderState::setDepthBias(float depthBiasConstantFactor, - float depthBiasSlopeFactor, - float depthBiasClamp) { - - if (_depthBiasConstantFactor != depthBiasConstantFactor || _depthBiasSlopeFactor != depthBiasSlopeFactor - || _depthBiasClamp != depthBiasClamp) { - - markDirty(); - _depthBiasConstantFactor = depthBiasConstantFactor; - _depthBiasSlopeFactor = depthBiasSlopeFactor; - _depthBiasClamp = depthBiasClamp; - } +void MVKRasterizingCommandEncoderState::setDepthBiasEnable(VkBool32 depthBiasEnable) { + bool isDynamic = true; + bool mtlDepthBiasEnable = static_cast(depthBiasEnable); + setContent(DepthBiasEnable); } -void MVKDepthBiasCommandEncoderState::encodeImpl(uint32_t stage) { - if (stage != kMVKGraphicsStageRasterization) { return; } - if (_isEnabled) { - [_cmdEncoder->_mtlRenderEncoder setDepthBias: _depthBiasConstantFactor - slopeScale: _depthBiasSlopeFactor - clamp: _depthBiasClamp]; - } else { - [_cmdEncoder->_mtlRenderEncoder setDepthBias: 0 slopeScale: 0 clamp: 0]; - } +void MVKRasterizingCommandEncoderState::setDepthClipEnable(bool depthClip, bool isDynamic) { + auto mtlDepthClipEnable = depthClip ? MTLDepthClipModeClip : MTLDepthClipModeClamp; + setContent(DepthClipEnable); } - -#pragma mark - -#pragma mark MVKBlendColorCommandEncoderState - -void MVKBlendColorCommandEncoderState::setBlendColor(float red, float green, - float blue, float alpha, - bool isDynamic) { - // Abort if we are using dynamic, but call is not dynamic. - if ( !isDynamic && _cmdEncoder->supportsDynamicState(VK_DYNAMIC_STATE_BLEND_CONSTANTS) ) { return; } - - if (_red != red || _green != green || _blue != blue || _alpha != alpha) { - markDirty(); - _red = red; - _green = green; - _blue = blue; - _alpha = alpha; - } +void MVKRasterizingCommandEncoderState::setStencilReferenceValues(const VkPipelineDepthStencilStateCreateInfo& vkDepthStencilInfo) { + bool isDynamic = false; + MVKStencilReference mtlStencilReference = { + .frontFaceValue = vkDepthStencilInfo.front.reference, + .backFaceValue = vkDepthStencilInfo.back.reference + }; + setContent(StencilReference); } -void MVKBlendColorCommandEncoderState::encodeImpl(uint32_t stage) { - if (stage != kMVKGraphicsStageRasterization) { return; } - [_cmdEncoder->_mtlRenderEncoder setBlendColorRed: _red green: _green blue: _blue alpha: _alpha]; +void MVKRasterizingCommandEncoderState::setStencilReferenceValues(VkStencilFaceFlags faceMask, uint32_t stencilReference) { + bool isDynamic = true; + MVKStencilReference mtlStencilReference = _mtlStencilReference[StateScope::Dynamic]; + if (shouldUpdateFace(FRONT)) { mtlStencilReference.frontFaceValue = stencilReference; } + if (shouldUpdateFace(BACK)) { mtlStencilReference.backFaceValue = stencilReference; } + setContent(StencilReference); +} + +void MVKRasterizingCommandEncoderState::setViewports(const MVKArrayRef viewports, + uint32_t firstViewport, + bool isDynamic) { + uint32_t maxViewports = getDevice()->_pProperties->limits.maxViewports; + if (firstViewport >= maxViewports) { return; } + + MVKMTLViewports mtlViewports = isDynamic ? _mtlViewports[StateScope::Dynamic] : _mtlViewports[StateScope::Static]; + size_t vpCnt = min((uint32_t)viewports.size(), maxViewports - firstViewport); + for (uint32_t vpIdx = 0; vpIdx < vpCnt; vpIdx++) { + mtlViewports.viewports[firstViewport + vpIdx] = mvkMTLViewportFromVkViewport(viewports[vpIdx]); + mtlViewports.viewportCount = max(mtlViewports.viewportCount, vpIdx + 1); + } + setContent(Viewports); +} + +void MVKRasterizingCommandEncoderState::setScissors(const MVKArrayRef scissors, + uint32_t firstScissor, + bool isDynamic) { + uint32_t maxScissors = getDevice()->_pProperties->limits.maxViewports; + if (firstScissor >= maxScissors) { return; } + + MVKMTLScissors mtlScissors = isDynamic ? _mtlScissors[StateScope::Dynamic] : _mtlScissors[StateScope::Static]; + size_t sCnt = min((uint32_t)scissors.size(), maxScissors - firstScissor); + for (uint32_t sIdx = 0; sIdx < sCnt; sIdx++) { + mtlScissors.scissors[firstScissor + sIdx] = mvkMTLScissorRectFromVkRect2D(scissors[sIdx]); + mtlScissors.scissorCount = max(mtlScissors.scissorCount, sIdx + 1); + } + setContent(Scissors); +} + +void MVKRasterizingCommandEncoderState::encodeImpl(uint32_t stage) { + if (stage != kMVKGraphicsStageRasterization) { return; } + + auto& rendEnc = _cmdEncoder->_mtlRenderEncoder; + + if (isDirty(CullMode)) { [rendEnc setCullMode: getContent(CullMode)]; } + if (isDirty(FrontFace)) { [rendEnc setFrontFacingWinding: getContent(FrontFace)]; } + if (isDirty(BlendConstants)) { + auto& bcFlt = getContent(BlendConstants).float32; + [rendEnc setBlendColorRed: bcFlt[0] green: bcFlt[1] blue: bcFlt[2] alpha: bcFlt[3]]; + } + if (isDirty(DepthBiasEnable) || isDirty(DepthBias)) { + if (getContent(DepthBiasEnable)) { + auto& db = getContent(DepthBias); + [rendEnc setDepthBias: db.depthBiasConstantFactor + slopeScale: db.depthBiasSlopeFactor + clamp: db.depthBiasClamp]; + } else { + [rendEnc setDepthBias: 0 slopeScale: 0 clamp: 0]; + } + } + if (isDirty(DepthClipEnable) && getDevice()->_enabledFeatures.depthClamp) { + [rendEnc setDepthClipMode: getContent(DepthClipEnable)]; + } + + if (isDirty(StencilReference)) { + auto& sr = getContent(StencilReference); + [rendEnc setStencilFrontReferenceValue: sr.frontFaceValue backReferenceValue: sr.backFaceValue]; + } + + if (isDirty(Viewports)) { + auto& mtlViewports = getContent(Viewports); + if (_cmdEncoder->_pDeviceFeatures->multiViewport) { +#if MVK_MACOS_OR_IOS + [rendEnc setViewports: mtlViewports.viewports count: mtlViewports.viewportCount]; +#endif + } else { + [rendEnc setViewport: mtlViewports.viewports[0]]; + } + } + + if (isDirty(Scissors)) { + auto mtlScissors = getContent(Scissors); + + // If culling has been dynamically set to front-and-back, emulate this by using zeroed scissor rectangles. + static MTLScissorRect zeroRect = {}; + bool cullBothFaces = isDrawingTriangles() && _cullBothFaces[StateScope::Dynamic] && isDynamicState(CullMode); + for (uint32_t sIdx = 0; sIdx < mtlScissors.scissorCount; sIdx++) { + mtlScissors.scissors[sIdx] = cullBothFaces ? zeroRect : _cmdEncoder->clipToRenderArea(mtlScissors.scissors[sIdx]); + } + + if (_cmdEncoder->_pDeviceFeatures->multiViewport) { +#if MVK_MACOS_OR_IOS + [rendEnc setScissorRects: mtlScissors.scissors count: mtlScissors.scissorCount]; +#endif + } else { + [rendEnc setScissorRect: mtlScissors.scissors[0]]; + } + } +} + +// Return whether state is dirty, and mark it not dirty +bool MVKRasterizingCommandEncoderState::isDirty(MVKRenderStateType state) { + bool rslt = _dirtyStates.isEnabled(state); + _dirtyStates.disable(state); + return rslt; +} + +void MVKRasterizingCommandEncoderState::beginMetalRenderPass() { + MVKCommandEncoderState::beginMetalRenderPass(); + _dirtyStates = _modifiedStates; } @@ -777,6 +806,7 @@ void MVKGraphicsResourcesCommandEncoderState::encodeImpl(uint32_t stage) { MVKGraphicsPipeline* pipeline = (MVKGraphicsPipeline*)getPipeline(); bool fullImageViewSwizzle = pipeline->fullImageViewSwizzle() || getDevice()->_pMetalFeatures->nativeTextureSwizzle; bool forTessellation = pipeline->isTessellationPipeline(); + bool isDynamicVertexStride = pipeline->isDynamicState(VertexStride); if (stage == kMVKGraphicsStageVertex) { encodeBindings(kMVKShaderStageVertex, "vertex", fullImageViewSwizzle, @@ -812,23 +842,32 @@ void MVKGraphicsResourcesCommandEncoderState::encodeImpl(uint32_t stage) { } else if (!forTessellation && stage == kMVKGraphicsStageRasterization) { encodeBindings(kMVKShaderStageVertex, "vertex", fullImageViewSwizzle, - [pipeline](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b)->void { + [pipeline, isDynamicVertexStride](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b)->void { // The app may have bound more vertex attribute buffers than used by the pipeline. // We must not bind those extra buffers to the shader because they might overwrite // any implicit buffers used by the pipeline. if (pipeline->isValidVertexBufferIndex(kMVKShaderStageVertex, b.index)) { - if (b.isInline) { - cmdEncoder->setVertexBytes(cmdEncoder->_mtlRenderEncoder, - b.mtlBytes, - b.size, - b.index); + NSUInteger mtlStride = isDynamicVertexStride ? b.stride : MTLAttributeStrideStatic; + if (b.isInline) { + [cmdEncoder->_mtlRenderEncoder setVertexBytes: b.mtlBytes + length: b.size +#if MVK_XCODE_15 + attributeStride: mtlStride +#endif + atIndex: b.index]; } else { - if (b.justOffset) { - [cmdEncoder->_mtlRenderEncoder setVertexBufferOffset: b.offset - atIndex: b.index]; - } else { + if (b.justOffset) { + [cmdEncoder->_mtlRenderEncoder setVertexBufferOffset: b.offset +#if MVK_XCODE_15 + attributeStride: mtlStride +#endif + atIndex: b.index]; + } else { [cmdEncoder->_mtlRenderEncoder setVertexBuffer: b.mtlBuffer offset: b.offset +#if MVK_XCODE_15 + attributeStride: mtlStride +#endif atIndex: b.index]; } @@ -838,6 +877,9 @@ void MVKGraphicsResourcesCommandEncoderState::encodeImpl(uint32_t stage) { if (b.index == pipeline->getMetalBufferIndexForVertexAttributeBinding(xltdBind.binding)) { [cmdEncoder->_mtlRenderEncoder setVertexBuffer: b.mtlBuffer offset: b.offset + xltdBind.translationOffset +#if MVK_XCODE_15 + attributeStride: mtlStride +#endif atIndex: pipeline->getMetalBufferIndexForVertexAttributeBinding(xltdBind.translationBinding)]; } } @@ -898,19 +940,29 @@ void MVKGraphicsResourcesCommandEncoderState::encodeImpl(uint32_t stage) { if (forTessellation && stage == kMVKGraphicsStageRasterization) { encodeBindings(kMVKShaderStageTessEval, "tessellation evaluation", fullImageViewSwizzle, - [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b)->void { - if (b.isInline) - cmdEncoder->setVertexBytes(cmdEncoder->_mtlRenderEncoder, - b.mtlBytes, - b.size, - b.index); - else if (b.justOffset) + [isDynamicVertexStride](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b)->void { + NSUInteger mtlStride = isDynamicVertexStride ? b.stride : MTLAttributeStrideStatic; + if (b.isInline) { + [cmdEncoder->_mtlRenderEncoder setVertexBytes: b.mtlBytes + length: b.size +#if MVK_XCODE_15 + attributeStride: mtlStride +#endif + atIndex: b.index]; + } else if (b.justOffset) { [cmdEncoder->_mtlRenderEncoder setVertexBufferOffset: b.offset +#if MVK_XCODE_15 + attributeStride: mtlStride +#endif atIndex: b.index]; - else + } else { [cmdEncoder->_mtlRenderEncoder setVertexBuffer: b.mtlBuffer offset: b.offset +#if MVK_XCODE_15 + attributeStride: mtlStride +#endif atIndex: b.index]; + } }, [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, MVKArrayRef s)->void { cmdEncoder->setVertexBytes(cmdEncoder->_mtlRenderEncoder, diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.h b/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.h index 84fa37b6..99fcb384 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.h +++ b/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.h @@ -210,27 +210,24 @@ namespace std { * change as early as possible. */ typedef struct MVKMTLStencilDescriptorData { - bool enabled; /**< Indicates whether stencil testing for this face is enabled. */ + uint32_t readMask; /**< The bit-mask to apply when comparing the stencil buffer value to the reference value. */ + uint32_t writeMask; /**< The bit-mask to apply when writing values to the stencil buffer. */ uint8_t stencilCompareFunction; /**< The stencil compare function (interpreted as MTLCompareFunction). */ uint8_t stencilFailureOperation; /**< The operation to take when the stencil test fails (interpreted as MTLStencilOperation). */ uint8_t depthFailureOperation; /**< The operation to take when the stencil test passes, but the depth test fails (interpreted as MTLStencilOperation). */ uint8_t depthStencilPassOperation; /**< The operation to take when both the stencil and depth tests pass (interpreted as MTLStencilOperation). */ - uint32_t readMask; /**< The bit-mask to apply when comparing the stencil buffer value to the reference value. */ - uint32_t writeMask; /**< The bit-mask to apply when writing values to the stencil buffer. */ + + bool operator==(const MVKMTLStencilDescriptorData& rhs) const { return mvkAreEqual(this, &rhs); } + bool operator!=(const MVKMTLStencilDescriptorData& rhs) const { return !(*this == rhs); } MVKMTLStencilDescriptorData() { - - // Start with all zeros to ensure memory comparisons will work, - // even if the structure contains alignment gaps. - mvkClear(this); - - enabled = false; + mvkClear(this); // Clear all memory to ensure memory comparisons will work. + mvkEnableAllFlags(readMask); + mvkEnableAllFlags(writeMask); stencilCompareFunction = MTLCompareFunctionAlways; stencilFailureOperation = MTLStencilOperationKeep; depthFailureOperation = MTLStencilOperationKeep; depthStencilPassOperation = MTLStencilOperationKeep; - readMask = static_cast(~0); - writeMask = static_cast(~0); } } MVKMTLStencilDescriptorData; @@ -247,34 +244,32 @@ const MVKMTLStencilDescriptorData kMVKMTLStencilDescriptorDataDefault; * change as early as possible. */ typedef struct MVKMTLDepthStencilDescriptorData { - uint8_t depthCompareFunction; /**< The depth compare function (interpreted as MTLCompareFunction). */ - bool depthWriteEnabled; /**< Indicates whether depth writing is enabled. */ MVKMTLStencilDescriptorData frontFaceStencilData; MVKMTLStencilDescriptorData backFaceStencilData; + uint8_t depthCompareFunction; /**< The depth compare function (interpreted as MTLCompareFunction). */ + bool depthWriteEnabled; /**< Indicates whether depth writing is enabled. */ + bool stencilTestEnabled; /**< Indicates whether stencil testing is enabled. */ bool operator==(const MVKMTLDepthStencilDescriptorData& rhs) const { return mvkAreEqual(this, &rhs); } + bool operator!=(const MVKMTLDepthStencilDescriptorData& rhs) const { return !(*this == rhs); } std::size_t hash() const { return mvkHash((uint64_t*)this, sizeof(*this) / sizeof(uint64_t)); } - - /** Disable depth and/or stencil testing. */ - void disable(bool disableDepth, bool disableStencil) { - if (disableDepth) { - depthCompareFunction = MTLCompareFunctionAlways; - depthWriteEnabled = false; - } - if (disableStencil) { - frontFaceStencilData = kMVKMTLStencilDescriptorDataDefault; - backFaceStencilData = kMVKMTLStencilDescriptorDataDefault; - } + void disableDepth() { + depthCompareFunction = MTLCompareFunctionAlways; + depthWriteEnabled = false; + } + void disableStencil() { + stencilTestEnabled = false; + frontFaceStencilData = kMVKMTLStencilDescriptorDataDefault; + backFaceStencilData = kMVKMTLStencilDescriptorDataDefault; } MVKMTLDepthStencilDescriptorData() { - // Start with all zeros to ensure memory comparisons will work, - // even if the structure contains alignment gaps. - mvkClear(this); - disable(true, true); + mvkClear(this); // Clear all memory to ensure memory comparisons will work. + disableDepth(); + disableStencil(); } } __attribute__((aligned(sizeof(uint64_t)))) MVKMTLDepthStencilDescriptorData; diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm b/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm index 33ee4485..1e301734 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm @@ -433,9 +433,10 @@ id MVKCommandResourceFactory::newMTLDepthStencilState(bool } id MVKCommandResourceFactory::newMTLDepthStencilState(MVKMTLDepthStencilDescriptorData& dsData) { - MTLStencilDescriptor* fsDesc = newMTLStencilDescriptor(dsData.frontFaceStencilData); // temp retain - MTLStencilDescriptor* bsDesc = newMTLStencilDescriptor(dsData.backFaceStencilData); // temp retain - MTLDepthStencilDescriptor* dsDesc = [MTLDepthStencilDescriptor new]; // temp retain + bool testStencil = dsData.stencilTestEnabled; + auto* fsDesc = testStencil ? newMTLStencilDescriptor(dsData.frontFaceStencilData) : nil; // temp retain + auto* bsDesc = testStencil ? newMTLStencilDescriptor(dsData.backFaceStencilData) : nil; // temp retain + auto* dsDesc = [MTLDepthStencilDescriptor new]; // temp retain dsDesc.depthCompareFunction = (MTLCompareFunction)dsData.depthCompareFunction; dsDesc.depthWriteEnabled = dsData.depthWriteEnabled; dsDesc.frontFaceStencil = fsDesc; @@ -443,16 +444,14 @@ id MVKCommandResourceFactory::newMTLDepthStencilState(MVKM id dss = [getMTLDevice() newDepthStencilStateWithDescriptor: dsDesc]; - [fsDesc release]; // temp release - [bsDesc release]; // temp release - [dsDesc release]; // temp release + [fsDesc release]; // temp release + [bsDesc release]; // temp release + [dsDesc release]; // temp release return dss; } MTLStencilDescriptor* MVKCommandResourceFactory::newMTLStencilDescriptor(MVKMTLStencilDescriptorData& sData) { - if ( !sData.enabled ) { return nil; } - MTLStencilDescriptor* sDesc = [MTLStencilDescriptor new]; // retained sDesc.stencilCompareFunction = (MTLCompareFunction)sData.stencilCompareFunction; sDesc.stencilFailureOperation = (MTLStencilOperation)sData.stencilFailureOperation; diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandTypePools.def b/MoltenVK/MoltenVK/Commands/MVKCommandTypePools.def index 0ab0ffab..3035677f 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandTypePools.def +++ b/MoltenVK/MoltenVK/Commands/MVKCommandTypePools.def @@ -89,12 +89,19 @@ MVK_CMD_TYPE_POOLS_FROM_THRESHOLD(SetScissor, 1) MVK_CMD_TYPE_POOL(SetLineWidth) MVK_CMD_TYPE_POOL(SetDepthBias) MVK_CMD_TYPE_POOL(SetBlendConstants) +MVK_CMD_TYPE_POOL(SetDepthTestEnable) +MVK_CMD_TYPE_POOL(SetDepthWriteEnable) +MVK_CMD_TYPE_POOL(SetDepthCompareOp) MVK_CMD_TYPE_POOL(SetDepthBounds) +MVK_CMD_TYPE_POOL(SetDepthBoundsTestEnable) +MVK_CMD_TYPE_POOL(SetStencilTestEnable) +MVK_CMD_TYPE_POOL(SetStencilOp) MVK_CMD_TYPE_POOL(SetStencilCompareMask) MVK_CMD_TYPE_POOL(SetStencilWriteMask) MVK_CMD_TYPE_POOL(SetStencilReference) MVK_CMD_TYPE_POOL(SetCullMode) MVK_CMD_TYPE_POOL(SetFrontFace) +MVK_CMD_TYPE_POOL(SetPrimitiveTopology) MVK_CMD_TYPE_POOLS_FROM_2_THRESHOLDS(BindVertexBuffers, 1, 2) MVK_CMD_TYPE_POOL(BindIndexBuffer) MVK_CMD_TYPE_POOL(Draw) diff --git a/MoltenVK/MoltenVK/Commands/MVKMTLResourceBindings.h b/MoltenVK/MoltenVK/Commands/MVKMTLResourceBindings.h index 3eeb7d42..e0637011 100644 --- a/MoltenVK/MoltenVK/Commands/MVKMTLResourceBindings.h +++ b/MoltenVK/MoltenVK/Commands/MVKMTLResourceBindings.h @@ -67,6 +67,7 @@ typedef struct MVKMTLBufferBinding { union { id mtlBuffer = nil; id mtlResource; const void* mtlBytes; }; // aliases VkDeviceSize offset = 0; uint32_t size = 0; + uint32_t stride = 0; uint16_t index = 0; bool justOffset = false; bool isDirty = true; @@ -78,14 +79,16 @@ typedef struct MVKMTLBufferBinding { void update(const MVKMTLBufferBinding &other) { if (mtlBuffer != other.mtlBuffer || size != other.size || other.isInline) { mtlBuffer = other.mtlBuffer; + offset = other.offset; size = other.size; + stride = other.stride; isInline = other.isInline; - offset = other.offset; justOffset = false; isOverridden = false; isDirty = true; - } else if (offset != other.offset) { + } else if (offset != other.offset || stride != other.stride) { offset = other.offset; + stride = other.stride; justOffset = !isOverridden && (!isDirty || justOffset); isOverridden = false; isDirty = true; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm index 20bad33e..c2fe1e86 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm @@ -387,6 +387,11 @@ void MVKPhysicalDevice::getFeatures(VkPhysicalDeviceFeatures2* features) { formatFeatures->formatA4B4G4R4 = canSupport4444; break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT: { + auto* extDynState = (VkPhysicalDeviceExtendedDynamicStateFeaturesEXT*)next; + extDynState->extendedDynamicState = true; + break; + } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_INTERLOCK_FEATURES_EXT: { auto* interlockFeatures = (VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT*)next; interlockFeatures->fragmentShaderSampleInterlock = _metalFeatures.rasterOrderGroups; @@ -1696,10 +1701,11 @@ void MVKPhysicalDevice::initMetalFeatures() { _metalFeatures.minSwapchainImageCount = kMVKMinSwapchainImageCount; _metalFeatures.maxSwapchainImageCount = kMVKMaxSwapchainImageCount; - _metalFeatures.vertexStrideAlignment = 4; - _metalFeatures.maxPerStageStorageTextureCount = 8; + _metalFeatures.vertexStrideAlignment = supportsMTLGPUFamily(Apple5) ? 1 : 4; + _metalFeatures.dynamicVertexStride = mvkOSVersionIsAtLeast(14.0, 17.0, 1.0) && (supportsMTLGPUFamily(Apple4) || supportsMTLGPUFamily(Mac2)); + // GPU-specific features switch (_properties.vendorID) { case kAMDVendorId: @@ -2411,7 +2417,7 @@ void MVKPhysicalDevice::initLimits() { _properties.limits.maxVertexInputAttributes = 31; _properties.limits.maxVertexInputBindings = 31; - _properties.limits.maxVertexInputBindingStride = (2 * KIBI); + _properties.limits.maxVertexInputBindingStride = supportsMTLGPUFamily(Apple2) ? kMVKUndefinedLargeUInt32 : (4 * KIBI); _properties.limits.maxVertexInputAttributeOffset = _properties.limits.maxVertexInputBindingStride - 1; _properties.limits.maxPerStageDescriptorSamplers = _metalFeatures.maxPerStageSamplerCount; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceFeatureStructs.def b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceFeatureStructs.def index 88a3a33f..4e9f3bed 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceFeatureStructs.def +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceFeatureStructs.def @@ -64,6 +64,7 @@ MVK_DEVICE_FEATURE(VulkanMemoryModel, VULKAN_MEMORY_MODEL, MVK_DEVICE_FEATURE_EXTN(FragmentShaderBarycentric, FRAGMENT_SHADER_BARYCENTRIC, KHR, 1) MVK_DEVICE_FEATURE_EXTN(PortabilitySubset, PORTABILITY_SUBSET, KHR, 15) MVK_DEVICE_FEATURE_EXTN(4444Formats, 4444_FORMATS, EXT, 2) +MVK_DEVICE_FEATURE_EXTN(ExtendedDynamicState, EXTENDED_DYNAMIC_STATE, EXT, 1) MVK_DEVICE_FEATURE_EXTN(FragmentShaderInterlock, FRAGMENT_SHADER_INTERLOCK, EXT, 3) MVK_DEVICE_FEATURE_EXTN(PipelineCreationCacheControl, PIPELINE_CREATION_CACHE_CONTROL, EXT, 1) MVK_DEVICE_FEATURE_EXTN(Robustness2, ROBUSTNESS_2, EXT, 3) diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm index e3c94135..acf6670f 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm @@ -635,16 +635,16 @@ void MVKInstance::initProcAddrs() { ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdResetEvent2, KHR, KHR_SYNCHRONIZATION_2); ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdResolveImage2, KHR, KHR_COPY_COMMANDS_2); ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdSetCullMode, EXT, EXT_EXTENDED_DYNAMIC_STATE); - ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdSetDepthBiasEnable, EXT, EXT_EXTENDED_DYNAMIC_STATE); + ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdSetDepthBiasEnable, EXT, EXT_EXTENDED_DYNAMIC_STATE_2); ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdSetDepthBoundsTestEnable, EXT, EXT_EXTENDED_DYNAMIC_STATE); ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdSetDepthCompareOp, EXT, EXT_EXTENDED_DYNAMIC_STATE); ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdSetDepthTestEnable, EXT, EXT_EXTENDED_DYNAMIC_STATE); ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdSetDepthWriteEnable, EXT, EXT_EXTENDED_DYNAMIC_STATE); ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdSetEvent2, KHR, KHR_SYNCHRONIZATION_2); ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdSetFrontFace, EXT, EXT_EXTENDED_DYNAMIC_STATE); - ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdSetPrimitiveRestartEnable, EXT, EXT_EXTENDED_DYNAMIC_STATE); + ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdSetPrimitiveRestartEnable, EXT, EXT_EXTENDED_DYNAMIC_STATE_2); ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdSetPrimitiveTopology, EXT, EXT_EXTENDED_DYNAMIC_STATE); - ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdSetRasterizerDiscardEnable, EXT, EXT_EXTENDED_DYNAMIC_STATE); + ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdSetRasterizerDiscardEnable, EXT, EXT_EXTENDED_DYNAMIC_STATE_2); ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdSetScissorWithCount, EXT, EXT_EXTENDED_DYNAMIC_STATE); ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdSetStencilOp, EXT, EXT_EXTENDED_DYNAMIC_STATE); ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdSetStencilTestEnable, EXT, EXT_EXTENDED_DYNAMIC_STATE); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h index 505e894c..c7f67db1 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h @@ -218,6 +218,44 @@ struct MVKStagedDescriptorBindingUse { MVKBitArray stages[4] = {}; }; +/** Enumeration identifying different state content types. */ +enum MVKRenderStateType { + Unknown = 0, + BlendConstants, + CullMode, + DepthBias, + DepthBiasEnable, + DepthBounds, + DepthBoundsTestEnable, + DepthClipEnable, + DepthCompareOp, + DepthTestEnable, + DepthWriteEnable, + FrontFace, + PolygonMode, + PrimitiveTopology, + SampleLocations, + Scissors, + StencilCompareMask, + StencilOp, + StencilReference, + StencilTestEnable, + StencilWriteMask, + VertexStride, + Viewports, +}; + +/** Boolean tracking of rendering state. */ +struct MVKRenderStateFlags { + void enable(MVKRenderStateType rs) { if (rs) { mvkEnableFlags(_stateFlags, getFlagMask(rs)); } } + void disable(MVKRenderStateType rs) { if (rs) { mvkDisableFlags(_stateFlags, getFlagMask(rs)); } } + bool isEnabled(MVKRenderStateType rs) { return mvkIsAnyFlagEnabled(_stateFlags, getFlagMask(rs)); } +protected: + uint32_t getFlagMask(MVKRenderStateType rs) { return rs ? (1u << (rs - 1u)) : 0; } // Ignore Unknown type + + uint32_t _stateFlags = 0; +}; + /** Represents an Vulkan graphics pipeline. */ class MVKGraphicsPipeline : public MVKPipeline { @@ -229,8 +267,8 @@ public: /** Binds this pipeline to the specified command encoder. */ void encode(MVKCommandEncoder* cmdEncoder, uint32_t stage = 0) override; - /** Returns whether this pipeline permits dynamic setting of the specifie state. */ - bool supportsDynamicState(VkDynamicState state); + /** Returns whether this pipeline permits dynamic setting of the state. */ + bool isDynamicState(MVKRenderStateType state) { return _dynamicState.isEnabled(state); } /** Returns whether this pipeline has tessellation shaders. */ bool isTessellationPipeline() { return _tessInfo.patchControlPoints > 0; } @@ -320,6 +358,7 @@ protected: id getOrCompilePipeline(MTLComputePipelineDescriptor* plDesc, id& plState, const char* compilerType); bool compileTessVertexStageState(MTLComputePipelineDescriptor* vtxPLDesc, MVKMTLFunction* pVtxFunctions, VkPipelineCreationFeedback* pVertexFB); bool compileTessControlStageState(MTLComputePipelineDescriptor* tcPLDesc, VkPipelineCreationFeedback* pTessCtlFB); + void initDynamicState(const VkGraphicsPipelineCreateInfo* pCreateInfo); void initCustomSamplePositions(const VkGraphicsPipelineCreateInfo* pCreateInfo); void initMTLRenderPipelineState(const VkGraphicsPipelineCreateInfo* pCreateInfo, const SPIRVTessReflectionData& reflectData, VkPipelineCreationFeedback* pPipelineFB, const VkPipelineShaderStageCreateInfo* pVertexSS, VkPipelineCreationFeedback* pVertexFB, const VkPipelineShaderStageCreateInfo* pTessCtlSS, VkPipelineCreationFeedback* pTessCtlFB, const VkPipelineShaderStageCreateInfo* pTessEvalSS, VkPipelineCreationFeedback* pTessEvalFB, const VkPipelineShaderStageCreateInfo* pFragmentSS, VkPipelineCreationFeedback* pFragmentFB); void initShaderConversionConfig(SPIRVToMSLConversionConfiguration& shaderConfig, const VkGraphicsPipelineCreateInfo* pCreateInfo, const SPIRVTessReflectionData& reflectData); @@ -356,10 +395,10 @@ protected: VkPipelineTessellationStateCreateInfo _tessInfo; VkPipelineRasterizationStateCreateInfo _rasterInfo; VkPipelineDepthStencilStateCreateInfo _depthStencilInfo; + MVKRenderStateFlags _dynamicState; MVKSmallVector _viewports; MVKSmallVector _scissors; - MVKSmallVector _dynamicState; MVKSmallVector _customSamplePositions; MVKSmallVector _translatedVertexBindings; MVKSmallVector _zeroDivisorVertexBindings; @@ -374,19 +413,16 @@ protected: id _mtlTessControlStageState = nil; id _mtlPipelineState = nil; - float _blendConstants[4] = { 0.0, 0.0, 0.0, 1.0 }; - MTLCullMode _mtlCullMode; - MTLWinding _mtlFrontWinding; - MTLTriangleFillMode _mtlFillMode; - MTLDepthClipMode _mtlDepthClipMode; + float _blendConstants[4] = {}; + VkPrimitiveTopology _vkPrimitiveTopology; MVKShaderImplicitRezBinding _reservedVertexAttributeBufferCount; MVKShaderImplicitRezBinding _viewRangeBufferIndex; MVKShaderImplicitRezBinding _outputBufferIndex; - VkPrimitiveTopology _vkPrimitiveTopology; uint32_t _outputControlPointCount; uint32_t _tessCtlPatchOutputBufferIndex = 0; uint32_t _tessCtlLevelBufferIndex = 0; + bool _hasRasterInfo = false; bool _needsVertexSwizzleBuffer = false; bool _needsVertexBufferSizeBuffer = false; bool _needsVertexDynamicOffsetBuffer = false; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm index 779eb75a..f41077c8 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm @@ -292,24 +292,20 @@ void MVKGraphicsPipeline::encode(MVKCommandEncoder* cmdEncoder, uint32_t stage) // Depth stencil state - Cleared _depthStencilInfo values will disable depth testing cmdEncoder->_depthStencilState.setDepthStencilState(_depthStencilInfo); - cmdEncoder->_stencilReferenceValueState.setReferenceValues(_depthStencilInfo); // Rasterization - cmdEncoder->_blendColorState.setBlendColor(_blendConstants[0], _blendConstants[1], - _blendConstants[2], _blendConstants[3], false); - cmdEncoder->_depthBiasState.setDepthBias(_rasterInfo); - cmdEncoder->_viewportState.setViewports(_viewports.contents(), 0, false); - cmdEncoder->_scissorState.setScissors(_scissors.contents(), 0, false); - cmdEncoder->_mtlPrimitiveType = mvkMTLPrimitiveTypeFromVkPrimitiveTopology(_vkPrimitiveTopology); - - [mtlCmdEnc setCullMode: _mtlCullMode]; - [mtlCmdEnc setFrontFacingWinding: _mtlFrontWinding]; - [mtlCmdEnc setTriangleFillMode: _mtlFillMode]; - - if (_device->_enabledFeatures.depthClamp) { - [mtlCmdEnc setDepthClipMode: _mtlDepthClipMode]; - } - + cmdEncoder->_rasterizingState.setPrimitiveTopology(_vkPrimitiveTopology, false); + cmdEncoder->_rasterizingState.setBlendConstants(_blendConstants, false); + cmdEncoder->_rasterizingState.setStencilReferenceValues(_depthStencilInfo); + cmdEncoder->_rasterizingState.setViewports(_viewports.contents(), 0, false); + cmdEncoder->_rasterizingState.setScissors(_scissors.contents(), 0, false); + if (_hasRasterInfo) { + cmdEncoder->_rasterizingState.setCullMode(_rasterInfo.cullMode, false); + cmdEncoder->_rasterizingState.setFrontFace(_rasterInfo.frontFace, false); + cmdEncoder->_rasterizingState.setPolygonMode(_rasterInfo.polygonMode, false); + cmdEncoder->_rasterizingState.setDepthBias(_rasterInfo); + cmdEncoder->_rasterizingState.setDepthClipEnable( !_rasterInfo.depthClampEnable, false ); + } break; } @@ -320,21 +316,6 @@ void MVKGraphicsPipeline::encode(MVKCommandEncoder* cmdEncoder, uint32_t stage) cmdEncoder->_graphicsResourcesState.bindViewRangeBuffer(_viewRangeBufferIndex, _needsVertexViewRangeBuffer, _needsFragmentViewRangeBuffer); } -bool MVKGraphicsPipeline::supportsDynamicState(VkDynamicState state) { - for (auto& ds : _dynamicState) { - if (state == ds) { - // Some dynamic states have other restrictions - switch (state) { - case VK_DYNAMIC_STATE_DEPTH_BIAS: - return _rasterInfo.depthBiasEnable; - default: - return true; - } - } - } - return false; -} - static const char vtxCompilerType[] = "Vertex stage pipeline for tessellation"; bool MVKGraphicsPipeline::compileTessVertexStageState(MTLComputePipelineDescriptor* vtxPLDesc, @@ -414,6 +395,10 @@ MVKGraphicsPipeline::MVKGraphicsPipeline(MVKDevice* device, const VkGraphicsPipelineCreateInfo* pCreateInfo) : MVKPipeline(device, pipelineCache, (MVKPipelineLayout*)pCreateInfo->layout, pCreateInfo->flags, parent) { + + // Extract dynamic state first, as it can affect many configurations. + initDynamicState(pCreateInfo); + // Determine rasterization early, as various other structs are validated and interpreted in this context. const VkPipelineRenderingCreateInfo* pRendInfo = getRenderingCreateInfo(pCreateInfo); _isRasterizing = !isRasterizationDisabled(pCreateInfo); @@ -509,17 +494,12 @@ MVKGraphicsPipeline::MVKGraphicsPipeline(MVKDevice* device, initMTLRenderPipelineState(pCreateInfo, reflectData, pPipelineFB, pVertexSS, pVertexFB, pTessCtlSS, pTessCtlFB, pTessEvalSS, pTessEvalFB, pFragmentSS, pFragmentFB); if ( !_hasValidMTLPipelineStates ) { return; } - // Track dynamic state - const VkPipelineDynamicStateCreateInfo* pDS = pCreateInfo->pDynamicState; - if (pDS) { - for (uint32_t i = 0; i < pDS->dynamicStateCount; i++) { - _dynamicState.push_back(pDS->pDynamicStates[i]); - } - } - // Blending - must ignore allowed bad pColorBlendState pointer if rasterization disabled or no color attachments if (_isRasterizingColor && pCreateInfo->pColorBlendState) { - memcpy(&_blendConstants, &pCreateInfo->pColorBlendState->blendConstants, sizeof(_blendConstants)); + mvkCopy(_blendConstants, pCreateInfo->pColorBlendState->blendConstants, 4); + } else { + float defaultBlendConstants[4] = { 0, 0.0, 0.0, 1.0 }; + mvkCopy(_blendConstants, defaultBlendConstants, 4); } // Topology @@ -528,21 +508,10 @@ MVKGraphicsPipeline::MVKGraphicsPipeline(MVKDevice* device, : VK_PRIMITIVE_TOPOLOGY_POINT_LIST); // Rasterization - _mtlCullMode = MTLCullModeNone; - _mtlFrontWinding = MTLWindingCounterClockwise; - _mtlFillMode = MTLTriangleFillModeFill; - _mtlDepthClipMode = MTLDepthClipModeClip; - bool hasRasterInfo = mvkSetOrClear(&_rasterInfo, pCreateInfo->pRasterizationState); - if (hasRasterInfo) { - _mtlCullMode = mvkMTLCullModeFromVkCullModeFlags(_rasterInfo.cullMode); - _mtlFrontWinding = mvkMTLWindingFromVkFrontFace(_rasterInfo.frontFace); - _mtlFillMode = mvkMTLTriangleFillModeFromVkPolygonMode(_rasterInfo.polygonMode); - if (_rasterInfo.depthClampEnable) { - if (_device->_enabledFeatures.depthClamp) { - _mtlDepthClipMode = MTLDepthClipModeClamp; - } else { - setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "This device does not support depth clamping.")); - } + _hasRasterInfo = mvkSetOrClear(&_rasterInfo, pCreateInfo->pRasterizationState); + if (_hasRasterInfo) { + if (_rasterInfo.depthClampEnable && !_device->_enabledFeatures.depthClamp) { + setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "This device does not support depth clamping.")); } } @@ -557,26 +526,76 @@ MVKGraphicsPipeline::MVKGraphicsPipeline(MVKDevice* device, // Viewports and scissors - must ignore allowed bad pViewportState pointer if rasterization is disabled auto pVPState = _isRasterizing ? pCreateInfo->pViewportState : nullptr; if (pVPState) { - uint32_t vpCnt = pVPState->viewportCount; + + // If viewports are dynamic, ignore them here. + uint32_t vpCnt = (pVPState->pViewports && !isDynamicState(Viewports)) ? pVPState->viewportCount : 0; _viewports.reserve(vpCnt); for (uint32_t vpIdx = 0; vpIdx < vpCnt; vpIdx++) { - // If viewport is dyanamic, we still add a dummy so that the count will be tracked. - VkViewport vp; - if ( !supportsDynamicState(VK_DYNAMIC_STATE_VIEWPORT) ) { vp = pVPState->pViewports[vpIdx]; } - _viewports.push_back(vp); + _viewports.push_back(pVPState->pViewports[vpIdx]); } - uint32_t sCnt = pVPState->scissorCount; + // If scissors are dynamic, ignore them here. + uint32_t sCnt = (pVPState->pScissors && !isDynamicState(Scissors)) ? pVPState->scissorCount : 0; _scissors.reserve(sCnt); for (uint32_t sIdx = 0; sIdx < sCnt; sIdx++) { - // If scissor is dyanamic, we still add a dummy so that the count will be tracked. - VkRect2D sc; - if ( !supportsDynamicState(VK_DYNAMIC_STATE_SCISSOR) ) { sc = pVPState->pScissors[sIdx]; } - _scissors.push_back(sc); + _scissors.push_back(pVPState->pScissors[sIdx]); } } } +static MVKRenderStateType getRenderStateType(VkDynamicState vkDynamicState) { + switch (vkDynamicState) { + case VK_DYNAMIC_STATE_BLEND_CONSTANTS: return BlendConstants; + case VK_DYNAMIC_STATE_CULL_MODE: return CullMode; + case VK_DYNAMIC_STATE_DEPTH_BIAS: return DepthBias; + case VK_DYNAMIC_STATE_DEPTH_BOUNDS: return DepthBounds; + case VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE: return DepthBoundsTestEnable; + case VK_DYNAMIC_STATE_DEPTH_CLAMP_ENABLE_EXT: return DepthClipEnable; + case VK_DYNAMIC_STATE_DEPTH_CLIP_ENABLE_EXT: return DepthClipEnable; + case VK_DYNAMIC_STATE_DEPTH_COMPARE_OP: return DepthCompareOp; + case VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE: return DepthTestEnable; + case VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE: return DepthWriteEnable; + case VK_DYNAMIC_STATE_FRONT_FACE: return FrontFace; + case VK_DYNAMIC_STATE_POLYGON_MODE_EXT: return PolygonMode; + case VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY: return PrimitiveTopology; + case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT: return SampleLocations; + case VK_DYNAMIC_STATE_SCISSOR: return Scissors; + case VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT: return Scissors; + case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK: return StencilCompareMask; + case VK_DYNAMIC_STATE_STENCIL_OP: return StencilOp; + case VK_DYNAMIC_STATE_STENCIL_REFERENCE: return StencilReference; + case VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE: return StencilTestEnable; + case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK: return StencilWriteMask; + case VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE: return VertexStride; + case VK_DYNAMIC_STATE_VIEWPORT: return Viewports; + case VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT: return Viewports; + default: return Unknown; + } +} + +// This is executed first during pipeline creation. Do not depend on any internal state here. +void MVKGraphicsPipeline::initDynamicState(const VkGraphicsPipelineCreateInfo* pCreateInfo) { + const auto* pDS = pCreateInfo->pDynamicState; + if ( !pDS ) { return; } + + for (uint32_t i = 0; i < pDS->dynamicStateCount; i++) { + VkDynamicState vkDynState = pDS->pDynamicStates[i]; + bool isDynamic = true; + + // Some dynamic states have other restrictions + switch (vkDynState) { + case VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE: + isDynamic = _device->_pMetalFeatures->dynamicVertexStride; + if ( !isDynamic ) { setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "This device and platform does not support VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE (macOS 14.0 or iOS/tvOS 17.0, plus either Apple4 or Mac2 GPU).")); } + break; + default: + break; + } + + if (isDynamic) { _dynamicState.enable(getRenderStateType(vkDynState)); } + } +} + // Either returns an existing pipeline state or compiles a new one. id MVKGraphicsPipeline::getOrCompilePipeline(MTLRenderPipelineDescriptor* plDesc, id& plState) { @@ -613,7 +632,7 @@ void MVKGraphicsPipeline::initCustomSamplePositions(const VkGraphicsPipelineCrea case VK_STRUCTURE_TYPE_PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT: { auto* pSampLocnsCreateInfo = (VkPipelineSampleLocationsStateCreateInfoEXT*)next; _isUsingCustomSamplePositions = pSampLocnsCreateInfo->sampleLocationsEnable; - if (_isUsingCustomSamplePositions && !supportsDynamicState(VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT)) { + if (_isUsingCustomSamplePositions && !isDynamicState(SampleLocations)) { for (uint32_t slIdx = 0; slIdx < pSampLocnsCreateInfo->sampleLocationsInfo.sampleLocationsCount; slIdx++) { auto& sl = pSampLocnsCreateInfo->sampleLocationsInfo.pSampleLocations[slIdx]; _customSamplePositions.push_back(MTLSamplePositionMake(sl.x, sl.y)); @@ -1328,8 +1347,9 @@ bool MVKGraphicsPipeline::addVertexInputToPipeline(T* inputDesc, } // Vertex buffer bindings - uint32_t vbCnt = pVI->vertexBindingDescriptionCount; + bool isVtxStrideStatic = !isDynamicState(VertexStride); uint32_t maxBinding = 0; + uint32_t vbCnt = pVI->vertexBindingDescriptionCount; for (uint32_t i = 0; i < vbCnt; i++) { const VkVertexInputBindingDescription* pVKVB = &pVI->pVertexBindingDescriptions[i]; if (shaderConfig.isVertexBufferUsed(pVKVB->binding)) { @@ -1352,7 +1372,7 @@ bool MVKGraphicsPipeline::addVertexInputToPipeline(T* inputDesc, vbDesc.stepFunction = (decltype(vbDesc.stepFunction))MTLStepFunctionConstant; vbDesc.stepRate = 0; } else { - vbDesc.stride = pVKVB->stride; + vbDesc.stride = isVtxStrideStatic ? pVKVB->stride : MTLBufferLayoutStrideDynamic; vbDesc.stepFunction = (decltype(vbDesc.stepFunction))mvkMTLStepFunctionFromVkVertexInputRate(pVKVB->inputRate, isTessellationPipeline()); vbDesc.stepRate = 1; } @@ -1903,11 +1923,12 @@ bool MVKGraphicsPipeline::isRenderingPoints(const VkGraphicsPipelineCreateInfo* (pCreateInfo->pRasterizationState && (pCreateInfo->pRasterizationState->polygonMode == VK_POLYGON_MODE_POINT))); } -// We disable rasterization if either rasterizerDiscard is enabled or the cull mode dictates it. +// We disable rasterization if either rasterizerDiscard is enabled or the static cull mode dictates it. bool MVKGraphicsPipeline::isRasterizationDisabled(const VkGraphicsPipelineCreateInfo* pCreateInfo) { return (pCreateInfo->pRasterizationState && (pCreateInfo->pRasterizationState->rasterizerDiscardEnable || - ((pCreateInfo->pRasterizationState->cullMode == VK_CULL_MODE_FRONT_AND_BACK) && pCreateInfo->pInputAssemblyState && + ((pCreateInfo->pRasterizationState->cullMode == VK_CULL_MODE_FRONT_AND_BACK) && !isDynamicState(CullMode) && + pCreateInfo->pInputAssemblyState && (mvkMTLPrimitiveTopologyClassFromVkPrimitiveTopology(pCreateInfo->pInputAssemblyState->topology) == MTLPrimitiveTopologyClassTriangle)))); } diff --git a/MoltenVK/MoltenVK/Utility/MVKFoundation.h b/MoltenVK/MoltenVK/Utility/MVKFoundation.h index d26b53a4..8503b917 100644 --- a/MoltenVK/MoltenVK/Utility/MVKFoundation.h +++ b/MoltenVK/MoltenVK/Utility/MVKFoundation.h @@ -60,6 +60,9 @@ typedef struct { #pragma mark - #pragma mark Vulkan support +/** A generic 32-bit color permitting float, int32, or uint32 values. */ +typedef VkClearColorValue MVKColor32; + /** Tracks the Vulkan command currently being used. */ typedef enum : uint8_t { kMVKCommandUseNone = 0, /**< No use defined. */ @@ -142,7 +145,7 @@ static inline std::string mvkGetMoltenVKVersionString(uint32_t mvkVersion) { /** Returns whether the specified positive value is a power-of-two. */ template static constexpr bool mvkIsPowerOfTwo(T value) { - return value && ((value & (value - 1)) == 0); + return value > 0 && ((value & (value - 1)) == 0); } /** @@ -278,21 +281,21 @@ void mvkFlipVertically(void* rowMajorData, uint32_t rowCount, size_t bytesPerRow * They are ridiculously large numbers, but low enough to be safely used as both * uint and int values without risking overflowing between positive and negative values. */ -static int32_t kMVKUndefinedLargePositiveInt32 = mvkEnsurePowerOfTwo(std::numeric_limits::max() / 2); -static int32_t kMVKUndefinedLargeNegativeInt32 = -kMVKUndefinedLargePositiveInt32; -static uint32_t kMVKUndefinedLargeUInt32 = kMVKUndefinedLargePositiveInt32; -static int64_t kMVKUndefinedLargePositiveInt64 = mvkEnsurePowerOfTwo(std::numeric_limits::max() / 2); -static int64_t kMVKUndefinedLargeNegativeInt64 = -kMVKUndefinedLargePositiveInt64; -static uint64_t kMVKUndefinedLargeUInt64 = kMVKUndefinedLargePositiveInt64; +static constexpr int32_t kMVKUndefinedLargePositiveInt32 = mvkEnsurePowerOfTwo(std::numeric_limits::max() / 2); +static constexpr int32_t kMVKUndefinedLargeNegativeInt32 = -kMVKUndefinedLargePositiveInt32; +static constexpr uint32_t kMVKUndefinedLargeUInt32 = kMVKUndefinedLargePositiveInt32; +static constexpr int64_t kMVKUndefinedLargePositiveInt64 = mvkEnsurePowerOfTwo(std::numeric_limits::max() / 2); +static constexpr int64_t kMVKUndefinedLargeNegativeInt64 = -kMVKUndefinedLargePositiveInt64; +static constexpr uint64_t kMVKUndefinedLargeUInt64 = kMVKUndefinedLargePositiveInt64; #pragma mark Vulkan structure support functions /** Returns a VkExtent2D created from the width and height of a VkExtent3D. */ -static inline VkExtent2D mvkVkExtent2DFromVkExtent3D(VkExtent3D e) { return {e.width, e.height }; } +static constexpr VkExtent2D mvkVkExtent2DFromVkExtent3D(VkExtent3D e) { return {e.width, e.height }; } /** Returns a VkExtent3D, created from a VkExtent2D, and with depth of 1. */ -static inline VkExtent3D mvkVkExtent3DFromVkExtent2D(VkExtent2D e) { return {e.width, e.height, 1U }; } +static constexpr VkExtent3D mvkVkExtent3DFromVkExtent2D(VkExtent2D e) { return {e.width, e.height, 1U }; } /** Returns whether the two Vulkan extents are equal by comparing their respective components. */ static constexpr bool mvkVkExtent2DsAreEqual(VkExtent2D e1, VkExtent2D e2) { @@ -333,13 +336,13 @@ static constexpr uint32_t mvkPackSwizzle(VkComponentMapping components) { } /** Unpacks a single 32-bit word containing four swizzle components. */ -static inline VkComponentMapping mvkUnpackSwizzle(uint32_t packed) { - VkComponentMapping components; - components.r = (VkComponentSwizzle)((packed >> 0) & 0xFF); - components.g = (VkComponentSwizzle)((packed >> 8) & 0xFF); - components.b = (VkComponentSwizzle)((packed >> 16) & 0xFF); - components.a = (VkComponentSwizzle)((packed >> 24) & 0xFF); - return components; +static constexpr VkComponentMapping mvkUnpackSwizzle(uint32_t packed) { + return { + .r = (VkComponentSwizzle)((packed >> 0) & 0xFF), + .g = (VkComponentSwizzle)((packed >> 8) & 0xFF), + .b = (VkComponentSwizzle)((packed >> 16) & 0xFF), + .a = (VkComponentSwizzle)((packed >> 24) & 0xFF), + }; } /** @@ -353,8 +356,8 @@ static inline VkComponentMapping mvkUnpackSwizzle(uint32_t packed) { * and matches any value. */ static constexpr bool mvkVKComponentSwizzlesMatch(VkComponentSwizzle cs1, - VkComponentSwizzle cs2, - VkComponentSwizzle csPos) { + VkComponentSwizzle cs2, + VkComponentSwizzle csPos) { return ((cs1 == cs2) || ((cs1 == VK_COMPONENT_SWIZZLE_IDENTITY) && (cs2 == csPos)) || ((cs2 == VK_COMPONENT_SWIZZLE_IDENTITY) && (cs1 == csPos)) || @@ -383,25 +386,25 @@ static constexpr bool mvkVkComponentMappingsMatch(VkComponentMapping cm1, VkComp #pragma mark Math /** Rounds the value to nearest integer using half-to-even rounding. */ -static inline double mvkRoundHalfToEven(const double val) { +static constexpr double mvkRoundHalfToEven(const double val) { return val - std::remainder(val, 1.0); // remainder() uses half-to-even rounding, and unfortunately isn't constexpr until C++23. } /** Returns whether the value will fit inside the numeric type. */ template -const bool mvkFits(const Tval& val) { +static constexpr const bool mvkFits(const Tval& val) { return val <= std::numeric_limits::max(); } /** Clamps the value between the lower and upper bounds, inclusive. */ template -const T& mvkClamp(const T& val, const T& lower, const T& upper) { +static constexpr const T& mvkClamp(const T& val, const T& lower, const T& upper) { return std::min(std::max(val, lower), upper); } /** Returns the result of a division, rounded up. */ template -constexpr typename std::common_type::type mvkCeilingDivide(T numerator, U denominator) { +static constexpr typename std::common_type::type mvkCeilingDivide(T numerator, U denominator) { typedef typename std::common_type::type R; // Short circuit very common usecase of dividing by one. return (denominator == 1) ? numerator : (R(numerator) + denominator - 1) / denominator; @@ -427,18 +430,18 @@ struct MVKAbs { /** Returns the absolute value of the difference of two numbers. */ template -constexpr typename std::common_type::type mvkAbsDiff(T x, U y) { +static constexpr typename std::common_type::type mvkAbsDiff(T x, U y) { return x >= y ? x - y : y - x; } /** Returns the greatest common divisor of two numbers. */ template -constexpr T mvkGreatestCommonDivisorImpl(T a, T b) { +static constexpr T mvkGreatestCommonDivisorImpl(T a, T b) { return b == 0 ? a : mvkGreatestCommonDivisorImpl(b, a % b); } template -constexpr typename std::common_type::type mvkGreatestCommonDivisor(T a, U b) { +static constexpr typename std::common_type::type mvkGreatestCommonDivisor(T a, U b) { typedef typename std::common_type::type R; typedef typename std::make_unsigned::type UI; return static_cast(mvkGreatestCommonDivisorImpl(static_cast(MVKAbs::eval(a)), static_cast(MVKAbs::eval(b)))); @@ -446,7 +449,7 @@ constexpr typename std::common_type::type mvkGreatestCommonDivisor(T a, U /** Returns the least common multiple of two numbers. */ template -constexpr typename std::common_type::type mvkLeastCommonMultiple(T a, U b) { +static constexpr typename std::common_type::type mvkLeastCommonMultiple(T a, U b) { typedef typename std::common_type::type R; return (a == 0 && b == 0) ? 0 : MVKAbs::eval(a) / mvkGreatestCommonDivisor(a, b) * MVKAbs::eval(b); } @@ -463,7 +466,7 @@ constexpr typename std::common_type::type mvkLeastCommonMultiple(T a, U b) * value returned by previous calls as the seed in subsequent calls. */ template -std::size_t mvkHash(const N* pVals, std::size_t count = 1, std::size_t seed = 5381) { +static constexpr std::size_t mvkHash(const N* pVals, std::size_t count = 1, std::size_t seed = 5381) { std::size_t hash = seed; for (std::size_t i = 0; i < count; i++) { hash = ((hash << 5) + hash) ^ pVals[i]; } return hash; @@ -497,7 +500,7 @@ protected: /** Ensures the size of the specified container is at least the specified size. */ template -void mvkEnsureSize(C& container, S size) { +static void mvkEnsureSize(C& container, S size) { if (size > container.size()) { container.resize(size); } } @@ -506,7 +509,7 @@ void mvkEnsureSize(C& container, S size) { * each object, including freeing the object memory, and clearing the container. */ template -void mvkDestroyContainerContents(C& container) { +static void mvkDestroyContainerContents(C& container) { for (auto elem : container) { elem->destroy(); } container.clear(); } @@ -517,7 +520,7 @@ void mvkDestroyContainerContents(C& container) { */ #ifdef __OBJC__ template -void mvkReleaseContainerContents(C& container) { +static void mvkReleaseContainerContents(C& container) { for (auto elem : container) { [elem release]; } container.clear(); } @@ -525,14 +528,14 @@ void mvkReleaseContainerContents(C& container) { /** Returns whether the container contains an item equal to the value. */ template -bool mvkContains(C& container, const T& val) { +static constexpr bool mvkContains(C& container, const T& val) { for (const T& cVal : container) { if (cVal == val) { return true; } } return false; } /** Removes the first occurance of the specified value from the specified container. */ template -void mvkRemoveFirstOccurance(C& container, T val) { +static void mvkRemoveFirstOccurance(C& container, T val) { for (auto iter = container.begin(), end = container.end(); iter != end; iter++) { if( *iter == val ) { container.erase(iter); @@ -543,7 +546,7 @@ void mvkRemoveFirstOccurance(C& container, T val) { /** Removes all occurances of the specified value from the specified container. */ template -void mvkRemoveAllOccurances(C& container, T val) { +static void mvkRemoveAllOccurances(C& container, T val) { container.erase(std::remove(container.begin(), container.end(), val), container.end()); } @@ -552,7 +555,7 @@ void mvkRemoveAllOccurances(C& container, T val) { /** Selects and returns one of the values, based on the platform OS. */ template -const T& mvkSelectPlatformValue(const T& macOSVal, const T& iOSVal) { +static constexpr const T& mvkSelectPlatformValue(const T& macOSVal, const T& iOSVal) { #if MVK_IOS_OR_TVOS return iOSVal; #endif @@ -566,22 +569,29 @@ const T& mvkSelectPlatformValue(const T& macOSVal, const T& iOSVal) { * The optional count allows clearing multiple elements in an array. */ template -void mvkClear(T* pVal, size_t count = 1) { if (pVal) { memset(pVal, 0, sizeof(T) * count); } } +static void mvkClear(T* pDst, size_t count = 1) { + if ( !pDst ) { return; } // Bad pointer + if constexpr(std::is_arithmetic_v) { if (count == 1) { *pDst = static_cast(0); } } // Fast clear of a single primitive + memset(pDst, 0, sizeof(T) * count); // Memory clear of complex content or array +} /** * If pVal is not null, overrides the const declaration, and clears the memory occupied by *pVal * by writing zeros to all bytes. The optional count allows clearing multiple elements in an array. */ template -void mvkClear(const T* pVal, size_t count = 1) { mvkClear((T*)pVal, count); } +static void mvkClear(const T* pVal, size_t count = 1) { mvkClear((T*)pVal, count); } /** * If pSrc and pDst are both not null, copies the contents of the source value to the * destination value. The optional count allows copying of multiple elements in an array. */ template -void mvkCopy(T* pDst, const T* pSrc, size_t count = 1) { - if (pSrc && pDst) { memcpy(pDst, pSrc, sizeof(T) * count); } +static void mvkCopy(T* pDst, const T* pSrc, size_t count = 1) { + if ( !pDst || !pSrc ) { return; } // Bad pointers + if (pDst == pSrc) { return; } // Same object + if constexpr(std::is_arithmetic_v) { if (count == 1) { *pDst = *pSrc; } } // Fast copy of a single primitive + memcpy(pDst, pSrc, sizeof(T) * count); // Memory copy of complex content or array } /** @@ -589,8 +599,11 @@ void mvkCopy(T* pDst, const T* pSrc, size_t count = 1) { * otherwise returns false. The optional count allows comparing multiple elements in an array. */ template -bool mvkAreEqual(const T* pV1, const T* pV2, size_t count = 1) { - return (pV1 && pV2) ? (memcmp(pV1, pV2, sizeof(T) * count) == 0) : false; +static constexpr bool mvkAreEqual(const T* pV1, const T* pV2, size_t count = 1) { + if ( !pV2 || !pV2 ) { return false; } // Bad pointers + if (pV1 == pV2) { return true; } // Same object + if constexpr(std::is_arithmetic_v) { if (count == 1) { return *pV1 == *pV2; } } // Fast compare of a single primitive + return memcmp(pV1, pV2, sizeof(T) * count) == 0; // Memory compare of complex content or array } /** @@ -632,10 +645,18 @@ static constexpr bool mvkSetOrClear(T* pDest, const T* pSrc) { template void mvkEnableFlags(Tv& value, const Tm bitMask) { value = (Tv)(value | bitMask); } +/** Enables all the flags (sets bits to 1) within the value parameter. */ +template +void mvkEnableAllFlags(Tv& value) { value = ~static_cast(0); } + /** Disables the flags (sets bits to 0) within the value parameter specified by the bitMask parameter. */ template void mvkDisableFlags(Tv& value, const Tm bitMask) { value = (Tv)(value & ~(Tv)bitMask); } +/** Enables all the flags (sets bits to 1) within the value parameter. */ +template +void mvkDisableAllFlags(Tv& value) { value = static_cast(0); } + /** Returns whether the specified value has ANY of the flags specified in bitMask enabled (set to 1). */ template static constexpr bool mvkIsAnyFlagEnabled(Tv value, const Tm bitMask) { return ((value & bitMask) != 0); } diff --git a/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm b/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm index 90cb72e0..ef3cf1ca 100644 --- a/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm +++ b/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm @@ -583,24 +583,33 @@ MTLMultisampleStencilResolveFilter mvkMTLMultisampleStencilResolveFilterFromVkRe } #endif -MVK_PUBLIC_SYMBOL MTLViewport mvkMTLViewportFromVkViewport(VkViewport vkViewport) { - MTLViewport mtlViewport; - mtlViewport.originX = vkViewport.x; - mtlViewport.originY = vkViewport.y; - mtlViewport.width = vkViewport.width; - mtlViewport.height = vkViewport.height; - mtlViewport.znear = vkViewport.minDepth; - mtlViewport.zfar = vkViewport.maxDepth; - return mtlViewport; +MVK_PUBLIC_SYMBOL MTLViewport mvkMTLViewportFromVkViewport(const VkViewport vkViewport) { + return { + .originX = vkViewport.x, + .originY = vkViewport.y, + .width = vkViewport.width, + .height = vkViewport.height, + .znear = vkViewport.minDepth, + .zfar = vkViewport.maxDepth + }; } -MVK_PUBLIC_SYMBOL MTLScissorRect mvkMTLScissorRectFromVkRect2D(VkRect2D vkRect) { - MTLScissorRect mtlScissor; - mtlScissor.x = vkRect.offset.x; - mtlScissor.y = vkRect.offset.y; - mtlScissor.width = vkRect.extent.width; - mtlScissor.height = vkRect.extent.height; - return mtlScissor; +MVK_PUBLIC_SYMBOL MTLScissorRect mvkMTLScissorRectFromVkRect2D(const VkRect2D vkRect) { + return { + .x = (NSUInteger)max(vkRect.offset.x, 0), + .y = (NSUInteger)max(vkRect.offset.y, 0), + .width = vkRect.extent.width, + .height = vkRect.extent.height + }; +} + +MVK_PUBLIC_SYMBOL VkRect2D mvkVkRect2DFromMTLScissorRect(const MTLScissorRect mtlScissorRect) { + return { + .offset = { .x = (int32_t)mtlScissorRect.x, + .y = (int32_t)mtlScissorRect.y }, + .extent = { .width = (uint32_t)mtlScissorRect.width, + .height = (uint32_t)mtlScissorRect.height } + }; } MVK_PUBLIC_SYMBOL MTLCompareFunction mvkMTLCompareFunctionFromVkCompareOp(VkCompareOp vkOp) { diff --git a/MoltenVK/MoltenVK/Vulkan/vulkan.mm b/MoltenVK/MoltenVK/Vulkan/vulkan.mm index 74819d84..cd6d15bf 100644 --- a/MoltenVK/MoltenVK/Vulkan/vulkan.mm +++ b/MoltenVK/MoltenVK/Vulkan/vulkan.mm @@ -1564,13 +1564,14 @@ MVK_PUBLIC_VULKAN_SYMBOL void vkCmdBindIndexBuffer( MVK_PUBLIC_VULKAN_SYMBOL void vkCmdBindVertexBuffers( VkCommandBuffer commandBuffer, - uint32_t startBinding, + uint32_t firstBinding, uint32_t bindingCount, const VkBuffer* pBuffers, const VkDeviceSize* pOffsets) { MVKTraceVulkanCallStart(); - MVKAddCmdFrom2Thresholds(BindVertexBuffers, bindingCount, 1, 2, commandBuffer, startBinding, bindingCount, pBuffers, pOffsets); + MVKAddCmdFrom2Thresholds(BindVertexBuffers, bindingCount, 1, 2, commandBuffer, + firstBinding, bindingCount, pBuffers, pOffsets, nullptr, nullptr); MVKTraceVulkanCallEnd(); } @@ -2536,7 +2537,8 @@ MVK_PUBLIC_VULKAN_SYMBOL void vkCmdBindVertexBuffers2( const VkDeviceSize* pStrides) { MVKTraceVulkanCallStart(); - + MVKAddCmdFrom2Thresholds(BindVertexBuffers, bindingCount, 1, 2, commandBuffer, + firstBinding, bindingCount, pBuffers, pOffsets, pSizes, pStrides); MVKTraceVulkanCallEnd(); } @@ -2643,7 +2645,7 @@ MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetDepthBoundsTestEnable( VkBool32 depthBoundsTestEnable) { MVKTraceVulkanCallStart(); - + MVKAddCmd(SetDepthBoundsTestEnable, commandBuffer, depthBoundsTestEnable); MVKTraceVulkanCallEnd(); } @@ -2652,7 +2654,7 @@ MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetDepthCompareOp( VkCompareOp depthCompareOp) { MVKTraceVulkanCallStart(); - + MVKAddCmd(SetDepthCompareOp, commandBuffer, depthCompareOp); MVKTraceVulkanCallEnd(); } @@ -2661,7 +2663,7 @@ MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetDepthTestEnable( VkBool32 depthTestEnable) { MVKTraceVulkanCallStart(); - + MVKAddCmd(SetDepthTestEnable, commandBuffer, depthTestEnable); MVKTraceVulkanCallEnd(); } @@ -2670,6 +2672,7 @@ MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetDepthWriteEnable( VkBool32 depthWriteEnable) { MVKTraceVulkanCallStart(); + MVKAddCmd(SetDepthWriteEnable, commandBuffer, depthWriteEnable); MVKTraceVulkanCallEnd(); } @@ -2699,6 +2702,7 @@ MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetPrimitiveTopology( VkPrimitiveTopology primitiveTopology) { MVKTraceVulkanCallStart(); + MVKAddCmd(SetPrimitiveTopology, commandBuffer, primitiveTopology); MVKTraceVulkanCallEnd(); } @@ -2723,6 +2727,7 @@ MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetStencilOp( VkCompareOp compareOp) { MVKTraceVulkanCallStart(); + MVKAddCmd(SetStencilOp, commandBuffer, faceMask, failOp, passOp, depthFailOp, compareOp); MVKTraceVulkanCallEnd(); } @@ -2731,6 +2736,7 @@ MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetStencilTestEnable( VkBool32 stencilTestEnable) { MVKTraceVulkanCallStart(); + MVKAddCmd(SetStencilTestEnable, commandBuffer, stencilTestEnable); MVKTraceVulkanCallEnd(); } From f4423428e3b0d1034130451d666ebdf7d63dbaff Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Tue, 10 Oct 2023 12:19:15 -0400 Subject: [PATCH 19/41] Add support for VK_EXT_extended_dynamic_state2 extension. - Add MVKPipelineCommandEncoderState subclasses MVKGraphicsPipelineCommandEncoderState & MVKComputePipelineCommandEncoderState, track patch control points in MVKGraphicsPipelineCommandEncoderState, and add getGraphicsPipeline() & getComputePipeline() to simplify casting. - Rename MVKRasterizingCommandEncoderState to MVKRenderingCommandEncoderState, and MVKCommandEncoder::_rasterizingState to _renderingState. - Rename MVKCmdRenderPass.h/mm to MVKCmdRendering.h/mm. - Move MVKCmdExecuteCommands from MVKCmdRenderPass.h/mm to MVKCmdPipeline.h/mm. - While working on vkCmdSetLogicOpEXT(), add support for vkCmdSetLogicOpEnableEXT() from VK_EXT_extended_dynamic_state3. --- Docs/MoltenVK_Runtime_UserGuide.md | 3 +- Docs/Whats_New.md | 3 +- MoltenVK/MoltenVK.xcodeproj/project.pbxproj | 40 ++--- MoltenVK/MoltenVK/Commands/MVKCmdDispatch.mm | 2 +- MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm | 28 ++-- MoltenVK/MoltenVK/Commands/MVKCmdPipeline.h | 28 ++++ MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm | 27 ++++ .../{MVKCmdRenderPass.h => MVKCmdRendering.h} | 138 +++++++++++++---- ...MVKCmdRenderPass.mm => MVKCmdRendering.mm} | 140 +++++++++++++----- MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm | 2 +- MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h | 8 +- .../MoltenVK/Commands/MVKCommandBuffer.mm | 10 +- .../Commands/MVKCommandEncoderState.h | 58 ++++++-- .../Commands/MVKCommandEncoderState.mm | 79 ++++++---- MoltenVK/MoltenVK/Commands/MVKCommandPool.h | 2 +- .../MoltenVK/Commands/MVKCommandTypePools.def | 8 +- MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm | 7 + .../GPUObjects/MVKDeviceFeatureStructs.def | 1 + MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm | 3 + MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h | 12 +- MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm | 54 ++++--- MoltenVK/MoltenVK/Layers/MVKExtensions.def | 1 + MoltenVK/MoltenVK/Vulkan/vulkan.mm | 67 ++++++++- 23 files changed, 541 insertions(+), 180 deletions(-) rename MoltenVK/MoltenVK/Commands/{MVKCmdRenderPass.h => MVKCmdRendering.h} (86%) rename MoltenVK/MoltenVK/Commands/{MVKCmdRenderPass.mm => MVKCmdRendering.mm} (82%) diff --git a/Docs/MoltenVK_Runtime_UserGuide.md b/Docs/MoltenVK_Runtime_UserGuide.md index 27ed0c2e..79f3f1a5 100644 --- a/Docs/MoltenVK_Runtime_UserGuide.md +++ b/Docs/MoltenVK_Runtime_UserGuide.md @@ -363,7 +363,8 @@ In addition to core *Vulkan* functionality, **MoltenVK** also supports the foll - `VK_EXT_descriptor_indexing` *(initial release limited to Metal Tier 1: 96/128 textures, 16 samplers, except macOS 11.0 (Big Sur) or later, or on older versions of macOS using an Intel GPU, and if Metal argument buffers enabled in config)* -- `VK_EXT_extended_dynamic_state` *(requires Metal 3.1)* +- `VK_EXT_extended_dynamic_state` *(requires Metal 3.1 for `VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE`)* +- `VK_EXT_extended_dynamic_state2` - `VK_EXT_external_memory_host` - `VK_EXT_fragment_shader_interlock` *(requires Metal 2.0 and Raster Order Groups)* - `VK_EXT_host_query_reset` diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md index 6da6eb0c..6ab13f7a 100644 --- a/Docs/Whats_New.md +++ b/Docs/Whats_New.md @@ -20,7 +20,8 @@ Released TBD - Add support for extensions: - `VK_KHR_synchronization2` - - `VK_EXT_extended_dynamic_state` + - `VK_EXT_extended_dynamic_state` *(requires Metal 3.1 for `VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE`)* + - `VK_EXT_extended_dynamic_state2` - Fix rare case where vertex attribute buffers are not bound to Metal when no other bindings change between pipelines. - Ensure objects retained for life of `MTLCommandBuffer` during `vkCmdBlitImage()` & `vkQueuePresentKHR()`. diff --git a/MoltenVK/MoltenVK.xcodeproj/project.pbxproj b/MoltenVK/MoltenVK.xcodeproj/project.pbxproj index 1dffab36..46ff50fc 100644 --- a/MoltenVK/MoltenVK.xcodeproj/project.pbxproj +++ b/MoltenVK/MoltenVK.xcodeproj/project.pbxproj @@ -46,7 +46,7 @@ 2FEA0A6724902F9F00EEF3AD /* MVKCommonEnvironment.h in Headers */ = {isa = PBXBuildFile; fileRef = A9F0429D1FB4CF82009FCCB8 /* MVKCommonEnvironment.h */; }; 2FEA0A6824902F9F00EEF3AD /* MVKWatermark.h in Headers */ = {isa = PBXBuildFile; fileRef = A98149491FB6A3F7005F00B4 /* MVKWatermark.h */; }; 2FEA0A6924902F9F00EEF3AD /* MVKOSExtensions.h in Headers */ = {isa = PBXBuildFile; fileRef = A9B51BD6225E986A00AC74D2 /* MVKOSExtensions.h */; }; - 2FEA0A6A24902F9F00EEF3AD /* MVKCmdRenderPass.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB7721C7DFB4800632CA3 /* MVKCmdRenderPass.h */; }; + 2FEA0A6A24902F9F00EEF3AD /* MVKCmdRendering.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB7721C7DFB4800632CA3 /* MVKCmdRendering.h */; }; 2FEA0A6B24902F9F00EEF3AD /* MVKCmdPipeline.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB76E1C7DFB4800632CA3 /* MVKCmdPipeline.h */; }; 2FEA0A6C24902F9F00EEF3AD /* MVKSmallVectorAllocator.h in Headers */ = {isa = PBXBuildFile; fileRef = A9F3D9D924732A4C00745190 /* MVKSmallVectorAllocator.h */; }; 2FEA0A6D24902F9F00EEF3AD /* MVKPipeline.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB78D1C7DFB4800632CA3 /* MVKPipeline.h */; }; @@ -90,7 +90,7 @@ 2FEA0A9424902F9F00EEF3AD /* MVKCommandPool.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB77B1C7DFB4800632CA3 /* MVKCommandPool.mm */; }; 2FEA0A9524902F9F00EEF3AD /* MVKCmdDraw.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7751C7DFB4800632CA3 /* MVKCmdDraw.mm */; }; 2FEA0A9624902F9F00EEF3AD /* MVKCommandBuffer.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7791C7DFB4800632CA3 /* MVKCommandBuffer.mm */; }; - 2FEA0A9724902F9F00EEF3AD /* MVKCmdRenderPass.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7731C7DFB4800632CA3 /* MVKCmdRenderPass.mm */; }; + 2FEA0A9724902F9F00EEF3AD /* MVKCmdRendering.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7731C7DFB4800632CA3 /* MVKCmdRendering.mm */; }; 2FEA0A9824902F9F00EEF3AD /* MVKBuffer.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7801C7DFB4800632CA3 /* MVKBuffer.mm */; }; 2FEA0A9924902F9F00EEF3AD /* mvk_datatypes.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7A91C7DFB4800632CA3 /* mvk_datatypes.mm */; }; 2FEA0A9A24902F9F00EEF3AD /* MVKExtensions.mm in Sources */ = {isa = PBXBuildFile; fileRef = A909F65E213B190700FCD6BE /* MVKExtensions.mm */; }; @@ -172,10 +172,10 @@ A94FB7C11C7DFB4800632CA3 /* MVKCmdQueries.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB7701C7DFB4800632CA3 /* MVKCmdQueries.h */; }; A94FB7C21C7DFB4800632CA3 /* MVKCmdQueries.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7711C7DFB4800632CA3 /* MVKCmdQueries.mm */; }; A94FB7C31C7DFB4800632CA3 /* MVKCmdQueries.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7711C7DFB4800632CA3 /* MVKCmdQueries.mm */; }; - A94FB7C41C7DFB4800632CA3 /* MVKCmdRenderPass.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB7721C7DFB4800632CA3 /* MVKCmdRenderPass.h */; }; - A94FB7C51C7DFB4800632CA3 /* MVKCmdRenderPass.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB7721C7DFB4800632CA3 /* MVKCmdRenderPass.h */; }; - A94FB7C61C7DFB4800632CA3 /* MVKCmdRenderPass.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7731C7DFB4800632CA3 /* MVKCmdRenderPass.mm */; }; - A94FB7C71C7DFB4800632CA3 /* MVKCmdRenderPass.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7731C7DFB4800632CA3 /* MVKCmdRenderPass.mm */; }; + A94FB7C41C7DFB4800632CA3 /* MVKCmdRendering.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB7721C7DFB4800632CA3 /* MVKCmdRendering.h */; }; + A94FB7C51C7DFB4800632CA3 /* MVKCmdRendering.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB7721C7DFB4800632CA3 /* MVKCmdRendering.h */; }; + A94FB7C61C7DFB4800632CA3 /* MVKCmdRendering.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7731C7DFB4800632CA3 /* MVKCmdRendering.mm */; }; + A94FB7C71C7DFB4800632CA3 /* MVKCmdRendering.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7731C7DFB4800632CA3 /* MVKCmdRendering.mm */; }; A94FB7C81C7DFB4800632CA3 /* MVKCmdDraw.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB7741C7DFB4800632CA3 /* MVKCmdDraw.h */; }; A94FB7C91C7DFB4800632CA3 /* MVKCmdDraw.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB7741C7DFB4800632CA3 /* MVKCmdDraw.h */; }; A94FB7CA1C7DFB4800632CA3 /* MVKCmdDraw.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7751C7DFB4800632CA3 /* MVKCmdDraw.mm */; }; @@ -420,7 +420,7 @@ DCFD7F0B2A45BC6E007BBBF7 /* MVKCommonEnvironment.h in Headers */ = {isa = PBXBuildFile; fileRef = A9F0429D1FB4CF82009FCCB8 /* MVKCommonEnvironment.h */; }; DCFD7F0C2A45BC6E007BBBF7 /* MVKWatermark.h in Headers */ = {isa = PBXBuildFile; fileRef = A98149491FB6A3F7005F00B4 /* MVKWatermark.h */; }; DCFD7F0D2A45BC6E007BBBF7 /* MVKOSExtensions.h in Headers */ = {isa = PBXBuildFile; fileRef = A9B51BD6225E986A00AC74D2 /* MVKOSExtensions.h */; }; - DCFD7F0E2A45BC6E007BBBF7 /* MVKCmdRenderPass.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB7721C7DFB4800632CA3 /* MVKCmdRenderPass.h */; }; + DCFD7F0E2A45BC6E007BBBF7 /* MVKCmdRendering.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB7721C7DFB4800632CA3 /* MVKCmdRendering.h */; }; DCFD7F0F2A45BC6E007BBBF7 /* MVKCmdPipeline.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB76E1C7DFB4800632CA3 /* MVKCmdPipeline.h */; }; DCFD7F102A45BC6E007BBBF7 /* MVKSmallVectorAllocator.h in Headers */ = {isa = PBXBuildFile; fileRef = A9F3D9D924732A4C00745190 /* MVKSmallVectorAllocator.h */; }; DCFD7F112A45BC6E007BBBF7 /* MVKPipeline.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB78D1C7DFB4800632CA3 /* MVKPipeline.h */; }; @@ -466,7 +466,7 @@ DCFD7F3A2A45BC6E007BBBF7 /* MVKCommandPool.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB77B1C7DFB4800632CA3 /* MVKCommandPool.mm */; }; DCFD7F3B2A45BC6E007BBBF7 /* MVKCmdDraw.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7751C7DFB4800632CA3 /* MVKCmdDraw.mm */; }; DCFD7F3C2A45BC6E007BBBF7 /* MVKCommandBuffer.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7791C7DFB4800632CA3 /* MVKCommandBuffer.mm */; }; - DCFD7F3D2A45BC6E007BBBF7 /* MVKCmdRenderPass.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7731C7DFB4800632CA3 /* MVKCmdRenderPass.mm */; }; + DCFD7F3D2A45BC6E007BBBF7 /* MVKCmdRendering.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7731C7DFB4800632CA3 /* MVKCmdRendering.mm */; }; DCFD7F3E2A45BC6E007BBBF7 /* MVKBuffer.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7801C7DFB4800632CA3 /* MVKBuffer.mm */; }; DCFD7F3F2A45BC6E007BBBF7 /* MVKEnvironment.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A9A5E9C525C0822700E9085E /* MVKEnvironment.cpp */; }; DCFD7F402A45BC6E007BBBF7 /* mvk_datatypes.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7A91C7DFB4800632CA3 /* mvk_datatypes.mm */; }; @@ -595,8 +595,8 @@ A94FB76F1C7DFB4800632CA3 /* MVKCmdPipeline.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = MVKCmdPipeline.mm; sourceTree = ""; }; A94FB7701C7DFB4800632CA3 /* MVKCmdQueries.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MVKCmdQueries.h; sourceTree = ""; }; A94FB7711C7DFB4800632CA3 /* MVKCmdQueries.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = MVKCmdQueries.mm; sourceTree = ""; }; - A94FB7721C7DFB4800632CA3 /* MVKCmdRenderPass.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MVKCmdRenderPass.h; sourceTree = ""; }; - A94FB7731C7DFB4800632CA3 /* MVKCmdRenderPass.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = MVKCmdRenderPass.mm; sourceTree = ""; }; + A94FB7721C7DFB4800632CA3 /* MVKCmdRendering.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MVKCmdRendering.h; sourceTree = ""; }; + A94FB7731C7DFB4800632CA3 /* MVKCmdRendering.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = MVKCmdRendering.mm; sourceTree = ""; }; A94FB7741C7DFB4800632CA3 /* MVKCmdDraw.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MVKCmdDraw.h; sourceTree = ""; }; A94FB7751C7DFB4800632CA3 /* MVKCmdDraw.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = MVKCmdDraw.mm; sourceTree = ""; }; A94FB7761C7DFB4800632CA3 /* MVKCommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MVKCommand.h; sourceTree = ""; }; @@ -744,8 +744,8 @@ A94FB76F1C7DFB4800632CA3 /* MVKCmdPipeline.mm */, A94FB7701C7DFB4800632CA3 /* MVKCmdQueries.h */, A94FB7711C7DFB4800632CA3 /* MVKCmdQueries.mm */, - A94FB7721C7DFB4800632CA3 /* MVKCmdRenderPass.h */, - A94FB7731C7DFB4800632CA3 /* MVKCmdRenderPass.mm */, + A94FB7721C7DFB4800632CA3 /* MVKCmdRendering.h */, + A94FB7731C7DFB4800632CA3 /* MVKCmdRendering.mm */, A94FB76C1C7DFB4800632CA3 /* MVKCmdTransfer.h */, A94FB76D1C7DFB4800632CA3 /* MVKCmdTransfer.mm */, A94FB7761C7DFB4800632CA3 /* MVKCommand.h */, @@ -995,7 +995,7 @@ 2FEA0A6724902F9F00EEF3AD /* MVKCommonEnvironment.h in Headers */, 2FEA0A6824902F9F00EEF3AD /* MVKWatermark.h in Headers */, 2FEA0A6924902F9F00EEF3AD /* MVKOSExtensions.h in Headers */, - 2FEA0A6A24902F9F00EEF3AD /* MVKCmdRenderPass.h in Headers */, + 2FEA0A6A24902F9F00EEF3AD /* MVKCmdRendering.h in Headers */, 2FEA0A6B24902F9F00EEF3AD /* MVKCmdPipeline.h in Headers */, 2FEA0A6C24902F9F00EEF3AD /* MVKSmallVectorAllocator.h in Headers */, 2FEA0A6D24902F9F00EEF3AD /* MVKPipeline.h in Headers */, @@ -1074,7 +1074,7 @@ A9F042A41FB4CF83009FCCB8 /* MVKCommonEnvironment.h in Headers */, A981495D1FB6A3F7005F00B4 /* MVKWatermark.h in Headers */, A9B51BD9225E986A00AC74D2 /* MVKOSExtensions.h in Headers */, - A94FB7C41C7DFB4800632CA3 /* MVKCmdRenderPass.h in Headers */, + A94FB7C41C7DFB4800632CA3 /* MVKCmdRendering.h in Headers */, A94FB7BC1C7DFB4800632CA3 /* MVKCmdPipeline.h in Headers */, A9F3D9DC24732A4D00745190 /* MVKSmallVectorAllocator.h in Headers */, A9C327562AAFBD390025EE79 /* MVKConfigMembers.def in Headers */, @@ -1151,7 +1151,7 @@ A9F042A51FB4CF83009FCCB8 /* MVKCommonEnvironment.h in Headers */, A981495E1FB6A3F7005F00B4 /* MVKWatermark.h in Headers */, A9B51BDA225E986A00AC74D2 /* MVKOSExtensions.h in Headers */, - A94FB7C51C7DFB4800632CA3 /* MVKCmdRenderPass.h in Headers */, + A94FB7C51C7DFB4800632CA3 /* MVKCmdRendering.h in Headers */, A94FB7BD1C7DFB4800632CA3 /* MVKCmdPipeline.h in Headers */, A9F3D9DD24732A4D00745190 /* MVKSmallVectorAllocator.h in Headers */, A94FB7F91C7DFB4800632CA3 /* MVKPipeline.h in Headers */, @@ -1228,7 +1228,7 @@ DCFD7F0B2A45BC6E007BBBF7 /* MVKCommonEnvironment.h in Headers */, DCFD7F0C2A45BC6E007BBBF7 /* MVKWatermark.h in Headers */, DCFD7F0D2A45BC6E007BBBF7 /* MVKOSExtensions.h in Headers */, - DCFD7F0E2A45BC6E007BBBF7 /* MVKCmdRenderPass.h in Headers */, + DCFD7F0E2A45BC6E007BBBF7 /* MVKCmdRendering.h in Headers */, DCFD7F0F2A45BC6E007BBBF7 /* MVKCmdPipeline.h in Headers */, DCFD7F102A45BC6E007BBBF7 /* MVKSmallVectorAllocator.h in Headers */, DCFD7F112A45BC6E007BBBF7 /* MVKPipeline.h in Headers */, @@ -1674,7 +1674,7 @@ 2FEA0A9424902F9F00EEF3AD /* MVKCommandPool.mm in Sources */, 2FEA0A9524902F9F00EEF3AD /* MVKCmdDraw.mm in Sources */, 2FEA0A9624902F9F00EEF3AD /* MVKCommandBuffer.mm in Sources */, - 2FEA0A9724902F9F00EEF3AD /* MVKCmdRenderPass.mm in Sources */, + 2FEA0A9724902F9F00EEF3AD /* MVKCmdRendering.mm in Sources */, 2FEA0A9824902F9F00EEF3AD /* MVKBuffer.mm in Sources */, 2FEA0A9924902F9F00EEF3AD /* mvk_datatypes.mm in Sources */, 2FEA0A9A24902F9F00EEF3AD /* MVKExtensions.mm in Sources */, @@ -1734,7 +1734,7 @@ A94FB7D61C7DFB4800632CA3 /* MVKCommandPool.mm in Sources */, A94FB7CA1C7DFB4800632CA3 /* MVKCmdDraw.mm in Sources */, A94FB7D21C7DFB4800632CA3 /* MVKCommandBuffer.mm in Sources */, - A94FB7C61C7DFB4800632CA3 /* MVKCmdRenderPass.mm in Sources */, + A94FB7C61C7DFB4800632CA3 /* MVKCmdRendering.mm in Sources */, A94FB7DE1C7DFB4800632CA3 /* MVKBuffer.mm in Sources */, A9A5E9C725C0822700E9085E /* MVKEnvironment.cpp in Sources */, A94FB82A1C7DFB4800632CA3 /* mvk_datatypes.mm in Sources */, @@ -1794,7 +1794,7 @@ A94FB7D71C7DFB4800632CA3 /* MVKCommandPool.mm in Sources */, A94FB7CB1C7DFB4800632CA3 /* MVKCmdDraw.mm in Sources */, A94FB7D31C7DFB4800632CA3 /* MVKCommandBuffer.mm in Sources */, - A94FB7C71C7DFB4800632CA3 /* MVKCmdRenderPass.mm in Sources */, + A94FB7C71C7DFB4800632CA3 /* MVKCmdRendering.mm in Sources */, A94FB7DF1C7DFB4800632CA3 /* MVKBuffer.mm in Sources */, A9A5E9C925C0822700E9085E /* MVKEnvironment.cpp in Sources */, A94FB82B1C7DFB4800632CA3 /* mvk_datatypes.mm in Sources */, @@ -1854,7 +1854,7 @@ DCFD7F3A2A45BC6E007BBBF7 /* MVKCommandPool.mm in Sources */, DCFD7F3B2A45BC6E007BBBF7 /* MVKCmdDraw.mm in Sources */, DCFD7F3C2A45BC6E007BBBF7 /* MVKCommandBuffer.mm in Sources */, - DCFD7F3D2A45BC6E007BBBF7 /* MVKCmdRenderPass.mm in Sources */, + DCFD7F3D2A45BC6E007BBBF7 /* MVKCmdRendering.mm in Sources */, DCFD7F3E2A45BC6E007BBBF7 /* MVKBuffer.mm in Sources */, DCFD7F3F2A45BC6E007BBBF7 /* MVKEnvironment.cpp in Sources */, DCFD7F402A45BC6E007BBBF7 /* mvk_datatypes.mm in Sources */, diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdDispatch.mm b/MoltenVK/MoltenVK/Commands/MVKCmdDispatch.mm index 1125963d..020f04b5 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdDispatch.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCmdDispatch.mm @@ -46,7 +46,7 @@ void MVKCmdDispatch::encode(MVKCommandEncoder* cmdEncoder) { MTLRegion mtlThreadgroupCount = MTLRegionMake3D(_baseGroupX, _baseGroupY, _baseGroupZ, _groupCountX, _groupCountY, _groupCountZ); cmdEncoder->finalizeDispatchState(); // Ensure all updated state has been submitted to Metal id mtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseDispatch); - auto* pipeline = (MVKComputePipeline*)cmdEncoder->_computePipelineState.getPipeline(); + auto* pipeline = cmdEncoder->_computePipelineState.getComputePipeline(); if (pipeline->allowsDispatchBase()) { if ([mtlEncoder respondsToSelector: @selector(setStageInRegion:)]) { // We'll use the stage-input region to pass the base along to the shader. diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm b/MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm index a1b71512..a7930a47 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm @@ -149,7 +149,7 @@ void MVKCmdDraw::encode(MVKCommandEncoder* cmdEncoder) { return; } - auto* pipeline = (MVKGraphicsPipeline*)cmdEncoder->_graphicsPipelineState.getPipeline(); + auto* pipeline = cmdEncoder->_graphicsPipelineState.getGraphicsPipeline(); // Metal doesn't support triangle fans, so encode it as triangles via an indexed indirect triangles command instead. if (pipeline->getVkPrimitiveTopology() == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN) { @@ -172,7 +172,7 @@ void MVKCmdDraw::encode(MVKCommandEncoder* cmdEncoder) { } tessParams; uint32_t outControlPointCount = 0; if (pipeline->isTessellationPipeline()) { - tessParams.inControlPointCount = pipeline->getInputControlPointCount(); + tessParams.inControlPointCount = cmdEncoder->_graphicsPipelineState.getPatchControlPoints(); outControlPointCount = pipeline->getOutputControlPointCount(); tessParams.patchCount = mvkCeilingDivide(_vertexCount, tessParams.inControlPointCount) * _instanceCount; } @@ -299,13 +299,13 @@ void MVKCmdDraw::encode(MVKCommandEncoder* cmdEncoder) { uint32_t instanceCount = _instanceCount * viewCount; cmdEncoder->_graphicsResourcesState.offsetZeroDivisorVertexBuffers(stage, pipeline, _firstInstance); if (cmdEncoder->_pDeviceMetalFeatures->baseVertexInstanceDrawing) { - [cmdEncoder->_mtlRenderEncoder drawPrimitives: cmdEncoder->_rasterizingState.getPrimitiveType() + [cmdEncoder->_mtlRenderEncoder drawPrimitives: cmdEncoder->_renderingState.getPrimitiveType() vertexStart: _firstVertex vertexCount: _vertexCount instanceCount: instanceCount baseInstance: _firstInstance]; } else { - [cmdEncoder->_mtlRenderEncoder drawPrimitives: cmdEncoder->_rasterizingState.getPrimitiveType() + [cmdEncoder->_mtlRenderEncoder drawPrimitives: cmdEncoder->_renderingState.getPrimitiveType() vertexStart: _firstVertex vertexCount: _vertexCount instanceCount: instanceCount]; @@ -374,7 +374,7 @@ void MVKCmdDrawIndexed::encode(MVKCommandEncoder* cmdEncoder) { return; } - auto* pipeline = (MVKGraphicsPipeline*)cmdEncoder->_graphicsPipelineState.getPipeline(); + auto* pipeline = cmdEncoder->_graphicsPipelineState.getGraphicsPipeline(); // Metal doesn't support triangle fans, so encode it as triangles via an indexed indirect triangles command instead. if (pipeline->getVkPrimitiveTopology() == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN) { @@ -401,7 +401,7 @@ void MVKCmdDrawIndexed::encode(MVKCommandEncoder* cmdEncoder) { } tessParams; uint32_t outControlPointCount = 0; if (pipeline->isTessellationPipeline()) { - tessParams.inControlPointCount = pipeline->getInputControlPointCount(); + tessParams.inControlPointCount = cmdEncoder->_graphicsPipelineState.getPatchControlPoints(); outControlPointCount = pipeline->getOutputControlPointCount(); tessParams.patchCount = mvkCeilingDivide(_indexCount, tessParams.inControlPointCount) * _instanceCount; } @@ -533,7 +533,7 @@ void MVKCmdDrawIndexed::encode(MVKCommandEncoder* cmdEncoder) { uint32_t instanceCount = _instanceCount * viewCount; cmdEncoder->_graphicsResourcesState.offsetZeroDivisorVertexBuffers(stage, pipeline, _firstInstance); if (cmdEncoder->_pDeviceMetalFeatures->baseVertexInstanceDrawing) { - [cmdEncoder->_mtlRenderEncoder drawIndexedPrimitives: cmdEncoder->_rasterizingState.getPrimitiveType() + [cmdEncoder->_mtlRenderEncoder drawIndexedPrimitives: cmdEncoder->_renderingState.getPrimitiveType() indexCount: _indexCount indexType: (MTLIndexType)ibb.mtlIndexType indexBuffer: ibb.mtlBuffer @@ -542,7 +542,7 @@ void MVKCmdDrawIndexed::encode(MVKCommandEncoder* cmdEncoder) { baseVertex: _vertexOffset baseInstance: _firstInstance]; } else { - [cmdEncoder->_mtlRenderEncoder drawIndexedPrimitives: cmdEncoder->_rasterizingState.getPrimitiveType() + [cmdEncoder->_mtlRenderEncoder drawIndexedPrimitives: cmdEncoder->_renderingState.getPrimitiveType() indexCount: _indexCount indexType: (MTLIndexType)ibb.mtlIndexType indexBuffer: ibb.mtlBuffer @@ -649,7 +649,7 @@ void MVKCmdDrawIndirect::encodeIndexedIndirect(MVKCommandEncoder* cmdEncoder) { void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) { - auto* pipeline = (MVKGraphicsPipeline*)cmdEncoder->_graphicsPipelineState.getPipeline(); + auto* pipeline = cmdEncoder->_graphicsPipelineState.getGraphicsPipeline(); // Metal doesn't support triangle fans, so encode it as indexed indirect triangles instead. if (pipeline->getVkPrimitiveTopology() == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN) { @@ -686,7 +686,7 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) { // encoding and execution. So we don't know how big to make the buffers. // We must assume an arbitrarily large number of vertices may be submitted. // But not too many, or we'll exhaust available VRAM. - inControlPointCount = pipeline->getInputControlPointCount(); + inControlPointCount = cmdEncoder->_graphicsPipelineState.getPatchControlPoints(); outControlPointCount = pipeline->getOutputControlPointCount(); vertexCount = kMVKMaxDrawIndirectVertexCount; patchCount = mvkCeilingDivide(vertexCount, inControlPointCount); @@ -928,7 +928,7 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) { cmdEncoder->_graphicsResourcesState.beginMetalRenderPass(); cmdEncoder->getPushConstants(VK_SHADER_STAGE_VERTEX_BIT)->beginMetalRenderPass(); } else { - [cmdEncoder->_mtlRenderEncoder drawPrimitives: cmdEncoder->_rasterizingState.getPrimitiveType() + [cmdEncoder->_mtlRenderEncoder drawPrimitives: cmdEncoder->_renderingState.getPrimitiveType() indirectBuffer: mtlIndBuff indirectBufferOffset: mtlIndBuffOfst]; mtlIndBuffOfst += needsInstanceAdjustment ? sizeof(MTLDrawPrimitivesIndirectArguments) : _mtlIndirectBufferStride; @@ -999,7 +999,7 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder, const MVKI MVKIndexMTLBufferBinding ibb = ibbOrig; MVKIndexMTLBufferBinding ibbTriFan = ibb; - auto* pipeline = (MVKGraphicsPipeline*)cmdEncoder->_graphicsPipelineState.getPipeline(); + auto* pipeline = cmdEncoder->_graphicsPipelineState.getGraphicsPipeline(); MVKVertexAdjustments vtxAdjmts; vtxAdjmts.mtlIndexType = ibb.mtlIndexType; @@ -1034,7 +1034,7 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder, const MVKI // encoding and execution. So we don't know how big to make the buffers. // We must assume an arbitrarily large number of vertices may be submitted. // But not too many, or we'll exhaust available VRAM. - inControlPointCount = pipeline->getInputControlPointCount(); + inControlPointCount = cmdEncoder->_graphicsPipelineState.getPatchControlPoints(); outControlPointCount = pipeline->getOutputControlPointCount(); vertexCount = kMVKMaxDrawIndirectVertexCount; patchCount = mvkCeilingDivide(vertexCount, inControlPointCount); @@ -1315,7 +1315,7 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder, const MVKI cmdEncoder->getPushConstants(VK_SHADER_STAGE_VERTEX_BIT)->beginMetalRenderPass(); } else { cmdEncoder->_graphicsResourcesState.offsetZeroDivisorVertexBuffers(stage, pipeline, _directCmdFirstInstance); - [cmdEncoder->_mtlRenderEncoder drawIndexedPrimitives: cmdEncoder->_rasterizingState.getPrimitiveType() + [cmdEncoder->_mtlRenderEncoder drawIndexedPrimitives: cmdEncoder->_renderingState.getPrimitiveType() indexType: (MTLIndexType)ibb.mtlIndexType indexBuffer: ibb.mtlBuffer indexBufferOffset: ibb.offset diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.h b/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.h index aec8800c..84bc923a 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.h +++ b/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.h @@ -30,6 +30,34 @@ class MVKDescriptorSet; class MVKDescriptorUpdateTemplate; +#pragma mark - +#pragma mark MVKCmdExecuteCommands + +/** + * Vulkan command to execute secondary command buffers. + * Template class to balance vector pre-allocations between very common low counts and fewer larger counts. + */ +template +class MVKCmdExecuteCommands : public MVKCommand { + +public: + VkResult setContent(MVKCommandBuffer* cmdBuff, + uint32_t commandBuffersCount, + const VkCommandBuffer* pCommandBuffers); + + void encode(MVKCommandEncoder* cmdEncoder) override; + +protected: + MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; + + MVKSmallVector _secondaryCommandBuffers; +}; + +// Concrete template class implementations. +typedef MVKCmdExecuteCommands<1> MVKCmdExecuteCommands1; +typedef MVKCmdExecuteCommands<16> MVKCmdExecuteCommandsMulti; + + #pragma mark - #pragma mark MVKCmdPipelineBarrier diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm b/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm index 05e578f6..1a30f550 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm @@ -26,6 +26,33 @@ #include "mvk_datatypes.hpp" +#pragma mark - +#pragma mark MVKCmdExecuteCommands + +template +VkResult MVKCmdExecuteCommands::setContent(MVKCommandBuffer* cmdBuff, + uint32_t commandBuffersCount, + const VkCommandBuffer* pCommandBuffers) { + // Add clear values + _secondaryCommandBuffers.clear(); // Clear for reuse + _secondaryCommandBuffers.reserve(commandBuffersCount); + for (uint32_t cbIdx = 0; cbIdx < commandBuffersCount; cbIdx++) { + _secondaryCommandBuffers.push_back(MVKCommandBuffer::getMVKCommandBuffer(pCommandBuffers[cbIdx])); + } + cmdBuff->recordExecuteCommands(_secondaryCommandBuffers.contents()); + + return VK_SUCCESS; +} + +template +void MVKCmdExecuteCommands::encode(MVKCommandEncoder* cmdEncoder) { + for (auto& cb : _secondaryCommandBuffers) { cmdEncoder->encodeSecondary(cb); } +} + +template class MVKCmdExecuteCommands<1>; +template class MVKCmdExecuteCommands<16>; + + #pragma mark - #pragma mark MVKCmdPipelineBarrier diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdRenderPass.h b/MoltenVK/MoltenVK/Commands/MVKCmdRendering.h similarity index 86% rename from MoltenVK/MoltenVK/Commands/MVKCmdRenderPass.h rename to MoltenVK/MoltenVK/Commands/MVKCmdRendering.h index 1e3bae5c..2b11ae8e 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdRenderPass.h +++ b/MoltenVK/MoltenVK/Commands/MVKCmdRendering.h @@ -1,5 +1,5 @@ /* - * MVKCmdRenderPass.h + * MVKCmdRendering.h * * Copyright (c) 2015-2023 The Brenwill Workshop Ltd. (http://www.brenwill.com) * @@ -207,34 +207,6 @@ protected: }; -#pragma mark - -#pragma mark MVKCmdExecuteCommands - -/** - * Vulkan command to execute secondary command buffers. - * Template class to balance vector pre-allocations between very common low counts and fewer larger counts. - */ -template -class MVKCmdExecuteCommands : public MVKCommand { - -public: - VkResult setContent(MVKCommandBuffer* cmdBuff, - uint32_t commandBuffersCount, - const VkCommandBuffer* pCommandBuffers); - - void encode(MVKCommandEncoder* cmdEncoder) override; - -protected: - MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; - - MVKSmallVector _secondaryCommandBuffers; -}; - -// Concrete template class implementations. -typedef MVKCmdExecuteCommands<1> MVKCmdExecuteCommands1; -typedef MVKCmdExecuteCommands<16> MVKCmdExecuteCommandsMulti; - - #pragma mark - #pragma mark MVKCmdSetViewport @@ -337,6 +309,25 @@ protected: }; +#pragma mark - +#pragma mark MVKCmdSetDepthBiasEnable + +/** Vulkan command to dynamically enable or disable depth bias. */ +class MVKCmdSetDepthBiasEnable : public MVKCommand { + +public: + VkResult setContent(MVKCommandBuffer* cmdBuff, + VkBool32 depthBiasEnable); + + void encode(MVKCommandEncoder* cmdEncoder) override; + +protected: + MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; + + VkBool32 _depthBiasEnable; +}; + + #pragma mark - #pragma mark MVKCmdSetBlendConstants @@ -356,6 +347,40 @@ protected: }; +#pragma mark - +#pragma mark MVKCmdSetLogicOp + +/** Vulkan command to dynamically set the blending logic operation. */ +class MVKCmdSetLogicOp : public MVKCommand { + +public: + VkResult setContent(MVKCommandBuffer* cmdBuff, + VkLogicOp logicOp); + + void encode(MVKCommandEncoder* cmdEncoder) override; + +protected: + MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; +}; + + +#pragma mark - +#pragma mark MVKCmdSetLogicOpEnable + +/** Vulkan command to dynamically enable or disable the blending logic operation. */ +class MVKCmdSetLogicOpEnable : public MVKCommand { + +public: + VkResult setContent(MVKCommandBuffer* cmdBuff, + VkBool32 logicOpEnable); + + void encode(MVKCommandEncoder* cmdEncoder) override; + +protected: + MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; +}; + + #pragma mark - #pragma mark MVKCmdSetDepthTestEnable @@ -600,6 +625,25 @@ protected: }; +#pragma mark - +#pragma mark MVKCmdSetPatchControlPoints + +/** Vulkan command to dynamically set the number of patch control points. */ +class MVKCmdSetPatchControlPoints : public MVKCommand { + +public: + VkResult setContent(MVKCommandBuffer* cmdBuff, + uint32_t patchControlPoints); + + void encode(MVKCommandEncoder* cmdEncoder) override; + +protected: + MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; + + uint32_t _patchControlPoints; +}; + + #pragma mark - #pragma mark MVKCmdSetPrimitiveTopology @@ -618,3 +662,39 @@ protected: VkPrimitiveTopology _primitiveTopology; }; + +#pragma mark - +#pragma mark MVKCmdSetPrimitiveRestartEnable + +/** Vulkan command to dynamically enable or disable primitive restart functionality. */ +class MVKCmdSetPrimitiveRestartEnable : public MVKCommand { + +public: + VkResult setContent(MVKCommandBuffer* cmdBuff, + VkBool32 primitiveRestartEnable); + + void encode(MVKCommandEncoder* cmdEncoder) override; + +protected: + MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; +}; + + +#pragma mark - +#pragma mark MVKCmdSetRasterizerDiscardEnable + +/** Vulkan command to dynamically enable or disable rasterization. */ +class MVKCmdSetRasterizerDiscardEnable : public MVKCommand { + +public: + VkResult setContent(MVKCommandBuffer* cmdBuff, + VkBool32 rasterizerDiscardEnable); + + void encode(MVKCommandEncoder* cmdEncoder) override; + +protected: + MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; + + VkBool32 _rasterizerDiscardEnable; +}; + diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdRenderPass.mm b/MoltenVK/MoltenVK/Commands/MVKCmdRendering.mm similarity index 82% rename from MoltenVK/MoltenVK/Commands/MVKCmdRenderPass.mm rename to MoltenVK/MoltenVK/Commands/MVKCmdRendering.mm index b5befc3a..33078a02 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdRenderPass.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCmdRendering.mm @@ -1,5 +1,5 @@ /* - * MVKCmdRenderPass.mm + * MVKCmdRendering.mm * * Copyright (c) 2015-2023 The Brenwill Workshop Ltd. (http://www.brenwill.com) * @@ -16,7 +16,7 @@ * limitations under the License. */ -#include "MVKCmdRenderPass.h" +#include "MVKCmdRendering.h" #include "MVKCommandBuffer.h" #include "MVKCommandPool.h" #include "MVKFramebuffer.h" @@ -231,33 +231,6 @@ void MVKCmdSetSampleLocations::encode(MVKCommandEncoder* cmdEncoder) { } -#pragma mark - -#pragma mark MVKCmdExecuteCommands - -template -VkResult MVKCmdExecuteCommands::setContent(MVKCommandBuffer* cmdBuff, - uint32_t commandBuffersCount, - const VkCommandBuffer* pCommandBuffers) { - // Add clear values - _secondaryCommandBuffers.clear(); // Clear for reuse - _secondaryCommandBuffers.reserve(commandBuffersCount); - for (uint32_t cbIdx = 0; cbIdx < commandBuffersCount; cbIdx++) { - _secondaryCommandBuffers.push_back(MVKCommandBuffer::getMVKCommandBuffer(pCommandBuffers[cbIdx])); - } - cmdBuff->recordExecuteCommands(_secondaryCommandBuffers.contents()); - - return VK_SUCCESS; -} - -template -void MVKCmdExecuteCommands::encode(MVKCommandEncoder* cmdEncoder) { - for (auto& cb : _secondaryCommandBuffers) { cmdEncoder->encodeSecondary(cb); } -} - -template class MVKCmdExecuteCommands<1>; -template class MVKCmdExecuteCommands<16>; - - #pragma mark - #pragma mark MVKCmdSetViewport @@ -278,7 +251,7 @@ VkResult MVKCmdSetViewport::setContent(MVKCommandBuffer* cmdBuff, template void MVKCmdSetViewport::encode(MVKCommandEncoder* cmdEncoder) { - cmdEncoder->_rasterizingState.setViewports(_viewports.contents(), _firstViewport, true); + cmdEncoder->_renderingState.setViewports(_viewports.contents(), _firstViewport, true); } template class MVKCmdSetViewport<1>; @@ -305,7 +278,7 @@ VkResult MVKCmdSetScissor::setContent(MVKCommandBuffer* cmdBuff, template void MVKCmdSetScissor::encode(MVKCommandEncoder* cmdEncoder) { - cmdEncoder->_rasterizingState.setScissors(_scissors.contents(), _firstScissor, true); + cmdEncoder->_renderingState.setScissors(_scissors.contents(), _firstScissor, true); } template class MVKCmdSetScissor<1>; @@ -345,12 +318,26 @@ VkResult MVKCmdSetDepthBias::setContent(MVKCommandBuffer* cmdBuff, } void MVKCmdSetDepthBias::encode(MVKCommandEncoder* cmdEncoder) { - cmdEncoder->_rasterizingState.setDepthBias(_depthBiasConstantFactor, + cmdEncoder->_renderingState.setDepthBias(_depthBiasConstantFactor, _depthBiasSlopeFactor, _depthBiasClamp); } +#pragma mark - +#pragma mark MVKCmdSetDepthBiasEnable + +VkResult MVKCmdSetDepthBiasEnable::setContent(MVKCommandBuffer* cmdBuff, + VkBool32 depthBiasEnable) { + _depthBiasEnable = depthBiasEnable; + return VK_SUCCESS; +} + +void MVKCmdSetDepthBiasEnable::encode(MVKCommandEncoder* cmdEncoder) { + cmdEncoder->_renderingState.setDepthBiasEnable(_depthBiasEnable); +} + + #pragma mark - #pragma mark MVKCmdSetBlendConstants @@ -361,10 +348,42 @@ VkResult MVKCmdSetBlendConstants::setContent(MVKCommandBuffer* cmdBuff, } void MVKCmdSetBlendConstants::encode(MVKCommandEncoder* cmdEncoder) { - cmdEncoder->_rasterizingState.setBlendConstants(_blendConstants, true); + cmdEncoder->_renderingState.setBlendConstants(_blendConstants, true); } +#pragma mark - +#pragma mark MVKCmdSetLogicOp + +VkResult MVKCmdSetLogicOp::setContent(MVKCommandBuffer* cmdBuff, + VkLogicOp logicOp) { + // Validate + if (logicOp != VK_LOGIC_OP_COPY) { + return reportError(VK_ERROR_FEATURE_NOT_PRESENT, "Metal does not support blending using logic operations."); + } + + return VK_SUCCESS; +} + +void MVKCmdSetLogicOp::encode(MVKCommandEncoder* cmdEncoder) {} + + +#pragma mark - +#pragma mark MVKCmdSetLogicOpEnable + +VkResult MVKCmdSetLogicOpEnable::setContent(MVKCommandBuffer* cmdBuff, + VkBool32 logicOpEnable) { + // Validate + if (logicOpEnable) { + return reportError(VK_ERROR_FEATURE_NOT_PRESENT, "Metal does not support blending using logic operations."); + } + + return VK_SUCCESS; +} + +void MVKCmdSetLogicOpEnable::encode(MVKCommandEncoder* cmdEncoder) {} + + #pragma mark - #pragma mark MVKCmdSetDepthTestEnable @@ -528,7 +547,7 @@ VkResult MVKCmdSetStencilReference::setContent(MVKCommandBuffer* cmdBuff, } void MVKCmdSetStencilReference::encode(MVKCommandEncoder* cmdEncoder) { - cmdEncoder->_rasterizingState.setStencilReferenceValues(_faceMask, _stencilReference); + cmdEncoder->_renderingState.setStencilReferenceValues(_faceMask, _stencilReference); } @@ -542,7 +561,7 @@ VkResult MVKCmdSetCullMode::setContent(MVKCommandBuffer* cmdBuff, } void MVKCmdSetCullMode::encode(MVKCommandEncoder* cmdEncoder) { - cmdEncoder->_rasterizingState.setCullMode(_cullMode, true); + cmdEncoder->_renderingState.setCullMode(_cullMode, true); } @@ -556,7 +575,21 @@ VkResult MVKCmdSetFrontFace::setContent(MVKCommandBuffer* cmdBuff, } void MVKCmdSetFrontFace::encode(MVKCommandEncoder* cmdEncoder) { - cmdEncoder->_rasterizingState.setFrontFace(_frontFace, true); + cmdEncoder->_renderingState.setFrontFace(_frontFace, true); +} + + +#pragma mark - +#pragma mark MVKCmdSetPatchControlPoints + +VkResult MVKCmdSetPatchControlPoints::setContent(MVKCommandBuffer* cmdBuff, + uint32_t patchControlPoints) { + _patchControlPoints = patchControlPoints; + return VK_SUCCESS; +} + +void MVKCmdSetPatchControlPoints::encode(MVKCommandEncoder* cmdEncoder) { + cmdEncoder->_graphicsPipelineState.setPatchControlPoints(_patchControlPoints); } @@ -570,6 +603,39 @@ VkResult MVKCmdSetPrimitiveTopology::setContent(MVKCommandBuffer* cmdBuff, } void MVKCmdSetPrimitiveTopology::encode(MVKCommandEncoder* cmdEncoder) { - cmdEncoder->_rasterizingState.setPrimitiveTopology(_primitiveTopology, true); + cmdEncoder->_renderingState.setPrimitiveTopology(_primitiveTopology, true); } + +#pragma mark - +#pragma mark MVKCmdSetPrimitiveRestartEnable + +VkResult MVKCmdSetPrimitiveRestartEnable::setContent(MVKCommandBuffer* cmdBuff, + VkBool32 primitiveRestartEnable) { + // Validate + // In Metal, primitive restart cannot be disabled. + // Just issue warning here, as it is very likely the app is not actually expecting + // to use primitive restart at all, and is just setting this as a "just-in-case", + // and forcing an error here would be unexpected to the app (including CTS). + if ( !primitiveRestartEnable ) { + reportWarning(VK_ERROR_FEATURE_NOT_PRESENT, "Metal does not support disabling primitive restart."); + } + + return VK_SUCCESS; +} + +void MVKCmdSetPrimitiveRestartEnable::encode(MVKCommandEncoder* cmdEncoder) {} + + +#pragma mark - +#pragma mark MVKCmdSetRasterizerDiscardEnable + +VkResult MVKCmdSetRasterizerDiscardEnable::setContent(MVKCommandBuffer* cmdBuff, + VkBool32 rasterizerDiscardEnable) { + _rasterizerDiscardEnable = rasterizerDiscardEnable; + return VK_SUCCESS; +} + +void MVKCmdSetRasterizerDiscardEnable::encode(MVKCommandEncoder* cmdEncoder) { + cmdEncoder->_renderingState.setRasterizerDiscardEnable(_rasterizerDiscardEnable, true); +} diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm b/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm index 5ae4fee3..124859bd 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm @@ -1507,7 +1507,7 @@ void MVKCmdClearAttachments::encode(MVKCommandEncoder* cmdEncoder) { // Return to the previous rendering state on the next render activity cmdEncoder->_graphicsPipelineState.markDirty(); cmdEncoder->_depthStencilState.markDirty(); - cmdEncoder->_rasterizingState.markDirty(); + cmdEncoder->_renderingState.markDirty(); } template diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h index cc1d7539..94f4585c 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h +++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h @@ -432,13 +432,13 @@ public: id _mtlRenderEncoder; /** Tracks the current graphics pipeline bound to the encoder. */ - MVKPipelineCommandEncoderState _graphicsPipelineState; + MVKGraphicsPipelineCommandEncoderState _graphicsPipelineState; /** Tracks the current graphics resources state of the encoder. */ MVKGraphicsResourcesCommandEncoderState _graphicsResourcesState; /** Tracks the current compute pipeline bound to the encoder. */ - MVKPipelineCommandEncoderState _computePipelineState; + MVKComputePipelineCommandEncoderState _computePipelineState; /** Tracks the current compute resources state of the encoder. */ MVKComputeResourcesCommandEncoderState _computeResourcesState; @@ -446,8 +446,8 @@ public: /** Tracks the current depth stencil state of the encoder. */ MVKDepthStencilCommandEncoderState _depthStencilState; - /** Tracks the current rasterizing states of the encoder. */ - MVKRasterizingCommandEncoderState _rasterizingState; + /** Tracks the current rendering states of the encoder. */ + MVKRenderingCommandEncoderState _renderingState; /** The size of the threadgroup for the compute shader. */ MTLSize _mtlThreadgroupSize; diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm index d640730c..55127489 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm @@ -25,7 +25,7 @@ #include "MVKFoundation.h" #include "MTLRenderPassDescriptor+MoltenVK.h" #include "MVKCmdDraw.h" -#include "MVKCmdRenderPass.h" +#include "MVKCmdRendering.h" #include using namespace std; @@ -608,7 +608,7 @@ void MVKCommandEncoder::beginMetalRenderPass(MVKCommandUse cmdUse) { _graphicsPipelineState.beginMetalRenderPass(); _graphicsResourcesState.beginMetalRenderPass(); _depthStencilState.beginMetalRenderPass(); - _rasterizingState.beginMetalRenderPass(); + _renderingState.beginMetalRenderPass(); _vertexPushConstants.beginMetalRenderPass(); _tessCtlPushConstants.beginMetalRenderPass(); _tessEvalPushConstants.beginMetalRenderPass(); @@ -729,7 +729,7 @@ void MVKCommandEncoder::finalizeDrawState(MVKGraphicsStage stage) { _graphicsPipelineState.encode(stage); // Must do first..it sets others _graphicsResourcesState.encode(stage); // Before push constants, to allow them to override. _depthStencilState.encode(stage); - _rasterizingState.encode(stage); + _renderingState.encode(stage); _vertexPushConstants.encode(stage); _tessCtlPushConstants.encode(stage); _tessEvalPushConstants.encode(stage); @@ -823,7 +823,7 @@ void MVKCommandEncoder::endMetalRenderEncoding() { _graphicsPipelineState.endMetalRenderPass(); _graphicsResourcesState.endMetalRenderPass(); _depthStencilState.endMetalRenderPass(); - _rasterizingState.endMetalRenderPass(); + _renderingState.endMetalRenderPass(); _vertexPushConstants.endMetalRenderPass(); _tessCtlPushConstants.endMetalRenderPass(); _tessEvalPushConstants.endMetalRenderPass(); @@ -1124,7 +1124,7 @@ MVKCommandEncoder::MVKCommandEncoder(MVKCommandBuffer* cmdBuffer, _computePipelineState(this), _computeResourcesState(this), _depthStencilState(this), - _rasterizingState(this), + _renderingState(this), _vertexPushConstants(this, VK_SHADER_STAGE_VERTEX_BIT), _tessCtlPushConstants(this, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT), _tessEvalPushConstants(this, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT), diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h index c8919538..c518c54a 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h +++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h @@ -119,20 +119,15 @@ protected: #pragma mark - #pragma mark MVKPipelineCommandEncoderState -/** Holds encoder state established by pipeline commands. */ +/** Abstract class to hold encoder state established by pipeline commands. */ class MVKPipelineCommandEncoderState : public MVKCommandEncoderState { public: + virtual void bindPipeline(MVKPipeline* pipeline); - /** Binds the pipeline. */ - void bindPipeline(MVKPipeline* pipeline); - - /** Returns the currently bound pipeline. */ MVKPipeline* getPipeline(); - /** Constructs this instance for the specified command encoder. */ - MVKPipelineCommandEncoderState(MVKCommandEncoder* cmdEncoder) - : MVKCommandEncoderState(cmdEncoder) {} + MVKPipelineCommandEncoderState(MVKCommandEncoder* cmdEncoder) : MVKCommandEncoderState(cmdEncoder) {} protected: void encodeImpl(uint32_t stage) override; @@ -141,6 +136,42 @@ protected: }; +#pragma mark - +#pragma mark MVKGraphicsPipelineCommandEncoderState + +/** Holds encoder state established by graphics pipeline commands. */ +class MVKGraphicsPipelineCommandEncoderState : public MVKPipelineCommandEncoderState { + +public: + void bindPipeline(MVKPipeline* pipeline) override; + + MVKGraphicsPipeline* getGraphicsPipeline() { return (MVKGraphicsPipeline*)getPipeline(); } + + void setPatchControlPoints(uint32_t patchControlPoints); + uint32_t getPatchControlPoints(); + + MVKGraphicsPipelineCommandEncoderState(MVKCommandEncoder* cmdEncoder) : MVKPipelineCommandEncoderState(cmdEncoder) {} + +protected: + uint32_t _patchControlPoints[StateScope::Count] = {}; +}; + + +#pragma mark - +#pragma mark MVKComputePipelineCommandEncoderState + +/** Holds encoder state established by compute pipeline commands. */ +class MVKComputePipelineCommandEncoderState : public MVKPipelineCommandEncoderState { + +public: + MVKComputePipeline* getComputePipeline() { return (MVKComputePipeline*)getPipeline(); } + + MVKComputePipelineCommandEncoderState(MVKCommandEncoder* cmdEncoder) : MVKPipelineCommandEncoderState(cmdEncoder) {} + +protected: +}; + + #pragma mark - #pragma mark MVKPushConstantsCommandEncoderState @@ -233,7 +264,7 @@ protected: #pragma mark - -#pragma mark MVKRasterizingCommandEncoderState +#pragma mark MVKRenderingCommandEncoderState struct MVKDepthBias { float depthBiasConstantFactor; @@ -256,8 +287,8 @@ struct MVKMTLScissors { uint32_t scissorCount; }; -/** Holds encoder state established by various state commands. */ -class MVKRasterizingCommandEncoderState : public MVKCommandEncoderState { +/** Holds encoder state established by various rendering state commands. */ +class MVKRenderingCommandEncoderState : public MVKCommandEncoderState { public: void setCullMode(VkCullModeFlags cullMode, bool isDynamic); @@ -281,9 +312,11 @@ public: void setViewports(const MVKArrayRef viewports, uint32_t firstViewport, bool isDynamic); void setScissors(const MVKArrayRef scissors, uint32_t firstScissor, bool isDynamic); + void setRasterizerDiscardEnable(VkBool32 rasterizerDiscardEnable, bool isDynamic); + void beginMetalRenderPass() override; - MVKRasterizingCommandEncoderState(MVKCommandEncoder* cmdEncoder) : MVKCommandEncoderState(cmdEncoder) {} + MVKRenderingCommandEncoderState(MVKCommandEncoder* cmdEncoder) : MVKCommandEncoderState(cmdEncoder) {} protected: void encodeImpl(uint32_t stage) override; @@ -312,6 +345,7 @@ protected: MVKRenderStateFlags _dirtyStates; MVKRenderStateFlags _modifiedStates; bool _mtlDepthBiasEnable[StateScope::Count] = {}; + bool _mtlRasterizerDiscardEnable[StateScope::Count] = {}; bool _cullBothFaces[StateScope::Count] = {}; }; diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm index 828d7c09..72db24d6 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm @@ -36,7 +36,7 @@ MVKVulkanAPIObject* MVKCommandEncoderState::getVulkanAPIObject() { return _cmdEn MVKDevice* MVKCommandEncoderState::getDevice() { return _cmdEncoder->getDevice(); } bool MVKCommandEncoderState::isDynamicState(MVKRenderStateType state) { - auto* gpl = (MVKGraphicsPipeline*)_cmdEncoder->_graphicsPipelineState.getPipeline(); + auto* gpl = _cmdEncoder->_graphicsPipelineState.getGraphicsPipeline(); return !gpl || gpl->isDynamicState(state); } @@ -59,6 +59,23 @@ void MVKPipelineCommandEncoderState::encodeImpl(uint32_t stage) { } +#pragma mark - +#pragma mark MVKGraphicsPipelineCommandEncoderState + +void MVKGraphicsPipelineCommandEncoderState::bindPipeline(MVKPipeline* pipeline) { + MVKPipelineCommandEncoderState::bindPipeline(pipeline); + _patchControlPoints[StateScope::Static] = getGraphicsPipeline()->_tessInfo.patchControlPoints; +} + +void MVKGraphicsPipelineCommandEncoderState::setPatchControlPoints(uint32_t patchControlPoints) { + _patchControlPoints[StateScope::Dynamic] = patchControlPoints; +} + +uint32_t MVKGraphicsPipelineCommandEncoderState::getPatchControlPoints() { + return getContent(_patchControlPoints, PatchControlPoints); +} + + #pragma mark - #pragma mark MVKPushConstantsCommandEncoderState @@ -147,7 +164,7 @@ void MVKPushConstantsCommandEncoderState::encodeImpl(uint32_t stage) { } bool MVKPushConstantsCommandEncoderState::isTessellating() { - MVKGraphicsPipeline* gp = (MVKGraphicsPipeline*)_cmdEncoder->_graphicsPipelineState.getPipeline(); + auto* gp = _cmdEncoder->_graphicsPipelineState.getGraphicsPipeline(); return gp ? gp->isTessellationPipeline() : false; } @@ -291,32 +308,32 @@ void MVKDepthStencilCommandEncoderState::encodeImpl(uint32_t stage) { #pragma mark - -#pragma mark MVKRasterizingCommandEncoderState +#pragma mark MVKRenderingCommandEncoderState #define getContent(state) getContent(_mtl##state, state) #define setContent(state) setContent(_mtl##state, &mtl##state, state, isDynamic) -void MVKRasterizingCommandEncoderState::setCullMode(VkCullModeFlags cullMode, bool isDynamic) { +void MVKRenderingCommandEncoderState::setCullMode(VkCullModeFlags cullMode, bool isDynamic) { auto mtlCullMode = mvkMTLCullModeFromVkCullModeFlags(cullMode); setContent(CullMode); _cullBothFaces[isDynamic ? StateScope::Dynamic : StateScope::Static] = (cullMode == VK_CULL_MODE_FRONT_AND_BACK); } -void MVKRasterizingCommandEncoderState::setFrontFace(VkFrontFace frontFace, bool isDynamic) { +void MVKRenderingCommandEncoderState::setFrontFace(VkFrontFace frontFace, bool isDynamic) { auto mtlFrontFace = mvkMTLWindingFromVkFrontFace(frontFace); setContent(FrontFace); } -void MVKRasterizingCommandEncoderState::setPrimitiveTopology(VkPrimitiveTopology topology, bool isDynamic) { +void MVKRenderingCommandEncoderState::setPrimitiveTopology(VkPrimitiveTopology topology, bool isDynamic) { auto mtlPrimitiveTopology = mvkMTLPrimitiveTypeFromVkPrimitiveTopology(topology); setContent(PrimitiveTopology); } -MTLPrimitiveType MVKRasterizingCommandEncoderState::getPrimitiveType() { +MTLPrimitiveType MVKRenderingCommandEncoderState::getPrimitiveType() { return getContent(PrimitiveTopology); } -bool MVKRasterizingCommandEncoderState::isDrawingTriangles() { +bool MVKRenderingCommandEncoderState::isDrawingTriangles() { switch (getPrimitiveType()) { case MTLPrimitiveTypeTriangle: return true; case MTLPrimitiveTypeTriangleStrip: return true; @@ -324,18 +341,18 @@ bool MVKRasterizingCommandEncoderState::isDrawingTriangles() { } } -void MVKRasterizingCommandEncoderState::setPolygonMode(VkPolygonMode polygonMode, bool isDynamic) { +void MVKRenderingCommandEncoderState::setPolygonMode(VkPolygonMode polygonMode, bool isDynamic) { auto mtlPolygonMode = mvkMTLTriangleFillModeFromVkPolygonMode(polygonMode); setContent(PolygonMode); } -void MVKRasterizingCommandEncoderState::setBlendConstants(float blendConstants[4], bool isDynamic) { +void MVKRenderingCommandEncoderState::setBlendConstants(float blendConstants[4], bool isDynamic) { MVKColor32 mtlBlendConstants; mvkCopy(mtlBlendConstants.float32, blendConstants, 4); setContent(BlendConstants); } -void MVKRasterizingCommandEncoderState::setDepthBias(const VkPipelineRasterizationStateCreateInfo& vkRasterInfo) { +void MVKRenderingCommandEncoderState::setDepthBias(const VkPipelineRasterizationStateCreateInfo& vkRasterInfo) { bool isDynamic = false; bool mtlDepthBiasEnable = static_cast(vkRasterInfo.depthBiasEnable); @@ -349,7 +366,7 @@ void MVKRasterizingCommandEncoderState::setDepthBias(const VkPipelineRasterizati setContent(DepthBias); } -void MVKRasterizingCommandEncoderState::setDepthBias(float depthBiasConstantFactor, +void MVKRenderingCommandEncoderState::setDepthBias(float depthBiasConstantFactor, float depthBiasSlopeFactor, float depthBiasClamp) { bool isDynamic = true; @@ -361,18 +378,18 @@ void MVKRasterizingCommandEncoderState::setDepthBias(float depthBiasConstantFact setContent(DepthBias); } -void MVKRasterizingCommandEncoderState::setDepthBiasEnable(VkBool32 depthBiasEnable) { +void MVKRenderingCommandEncoderState::setDepthBiasEnable(VkBool32 depthBiasEnable) { bool isDynamic = true; bool mtlDepthBiasEnable = static_cast(depthBiasEnable); setContent(DepthBiasEnable); } -void MVKRasterizingCommandEncoderState::setDepthClipEnable(bool depthClip, bool isDynamic) { +void MVKRenderingCommandEncoderState::setDepthClipEnable(bool depthClip, bool isDynamic) { auto mtlDepthClipEnable = depthClip ? MTLDepthClipModeClip : MTLDepthClipModeClamp; setContent(DepthClipEnable); } -void MVKRasterizingCommandEncoderState::setStencilReferenceValues(const VkPipelineDepthStencilStateCreateInfo& vkDepthStencilInfo) { +void MVKRenderingCommandEncoderState::setStencilReferenceValues(const VkPipelineDepthStencilStateCreateInfo& vkDepthStencilInfo) { bool isDynamic = false; MVKStencilReference mtlStencilReference = { .frontFaceValue = vkDepthStencilInfo.front.reference, @@ -381,7 +398,7 @@ void MVKRasterizingCommandEncoderState::setStencilReferenceValues(const VkPipeli setContent(StencilReference); } -void MVKRasterizingCommandEncoderState::setStencilReferenceValues(VkStencilFaceFlags faceMask, uint32_t stencilReference) { +void MVKRenderingCommandEncoderState::setStencilReferenceValues(VkStencilFaceFlags faceMask, uint32_t stencilReference) { bool isDynamic = true; MVKStencilReference mtlStencilReference = _mtlStencilReference[StateScope::Dynamic]; if (shouldUpdateFace(FRONT)) { mtlStencilReference.frontFaceValue = stencilReference; } @@ -389,7 +406,7 @@ void MVKRasterizingCommandEncoderState::setStencilReferenceValues(VkStencilFaceF setContent(StencilReference); } -void MVKRasterizingCommandEncoderState::setViewports(const MVKArrayRef viewports, +void MVKRenderingCommandEncoderState::setViewports(const MVKArrayRef viewports, uint32_t firstViewport, bool isDynamic) { uint32_t maxViewports = getDevice()->_pProperties->limits.maxViewports; @@ -404,7 +421,7 @@ void MVKRasterizingCommandEncoderState::setViewports(const MVKArrayRef scissors, +void MVKRenderingCommandEncoderState::setScissors(const MVKArrayRef scissors, uint32_t firstScissor, bool isDynamic) { uint32_t maxScissors = getDevice()->_pProperties->limits.maxViewports; @@ -419,7 +436,14 @@ void MVKRasterizingCommandEncoderState::setScissors(const MVKArrayRef setContent(Scissors); } -void MVKRasterizingCommandEncoderState::encodeImpl(uint32_t stage) { +void MVKRenderingCommandEncoderState::setRasterizerDiscardEnable(VkBool32 rasterizerDiscardEnable, bool isDynamic) { + bool mtlRasterizerDiscardEnable = static_cast(rasterizerDiscardEnable); + setContent(RasterizerDiscardEnable); +} + +#pragma mark Encoding + +void MVKRenderingCommandEncoderState::encodeImpl(uint32_t stage) { if (stage != kMVKGraphicsStageRasterization) { return; } auto& rendEnc = _cmdEncoder->_mtlRenderEncoder; @@ -460,14 +484,15 @@ void MVKRasterizingCommandEncoderState::encodeImpl(uint32_t stage) { } } + // If rasterizing discard has been dynamically enabled, or culling has been dynamically + // set to front-and-back, emulate this by using zeroed scissor rectangles. if (isDirty(Scissors)) { - auto mtlScissors = getContent(Scissors); - - // If culling has been dynamically set to front-and-back, emulate this by using zeroed scissor rectangles. static MTLScissorRect zeroRect = {}; - bool cullBothFaces = isDrawingTriangles() && _cullBothFaces[StateScope::Dynamic] && isDynamicState(CullMode); + auto mtlScissors = getContent(Scissors); + bool shouldDiscard = ((_mtlRasterizerDiscardEnable[StateScope::Dynamic] && isDynamicState(RasterizerDiscardEnable)) || + (isDrawingTriangles() && _cullBothFaces[StateScope::Dynamic] && isDynamicState(CullMode))); for (uint32_t sIdx = 0; sIdx < mtlScissors.scissorCount; sIdx++) { - mtlScissors.scissors[sIdx] = cullBothFaces ? zeroRect : _cmdEncoder->clipToRenderArea(mtlScissors.scissors[sIdx]); + mtlScissors.scissors[sIdx] = shouldDiscard ? zeroRect : _cmdEncoder->clipToRenderArea(mtlScissors.scissors[sIdx]); } if (_cmdEncoder->_pDeviceFeatures->multiViewport) { @@ -481,13 +506,13 @@ void MVKRasterizingCommandEncoderState::encodeImpl(uint32_t stage) { } // Return whether state is dirty, and mark it not dirty -bool MVKRasterizingCommandEncoderState::isDirty(MVKRenderStateType state) { +bool MVKRenderingCommandEncoderState::isDirty(MVKRenderStateType state) { bool rslt = _dirtyStates.isEnabled(state); _dirtyStates.disable(state); return rslt; } -void MVKRasterizingCommandEncoderState::beginMetalRenderPass() { +void MVKRenderingCommandEncoderState::beginMetalRenderPass() { MVKCommandEncoderState::beginMetalRenderPass(); _dirtyStates = _modifiedStates; } @@ -803,7 +828,7 @@ void MVKGraphicsResourcesCommandEncoderState::markDirty() { void MVKGraphicsResourcesCommandEncoderState::encodeImpl(uint32_t stage) { - MVKGraphicsPipeline* pipeline = (MVKGraphicsPipeline*)getPipeline(); + auto* pipeline = _cmdEncoder->_graphicsPipelineState.getGraphicsPipeline(); bool fullImageViewSwizzle = pipeline->fullImageViewSwizzle() || getDevice()->_pMetalFeatures->nativeTextureSwizzle; bool forTessellation = pipeline->isTessellationPipeline(); bool isDynamicVertexStride = pipeline->isDynamicState(VertexStride); diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandPool.h b/MoltenVK/MoltenVK/Commands/MVKCommandPool.h index a6b1a38b..e2325857 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandPool.h +++ b/MoltenVK/MoltenVK/Commands/MVKCommandPool.h @@ -23,7 +23,7 @@ #include "MVKCommandEncodingPool.h" #include "MVKCommand.h" #include "MVKCmdPipeline.h" -#include "MVKCmdRenderPass.h" +#include "MVKCmdRendering.h" #include "MVKCmdDispatch.h" #include "MVKCmdDraw.h" #include "MVKCmdTransfer.h" diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandTypePools.def b/MoltenVK/MoltenVK/Commands/MVKCommandTypePools.def index 3035677f..880f5551 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandTypePools.def +++ b/MoltenVK/MoltenVK/Commands/MVKCommandTypePools.def @@ -87,8 +87,11 @@ MVK_CMD_TYPE_POOLS_FROM_THRESHOLD(BindDescriptorSetsDynamic, 4) MVK_CMD_TYPE_POOLS_FROM_THRESHOLD(SetViewport, 1) MVK_CMD_TYPE_POOLS_FROM_THRESHOLD(SetScissor, 1) MVK_CMD_TYPE_POOL(SetLineWidth) -MVK_CMD_TYPE_POOL(SetDepthBias) MVK_CMD_TYPE_POOL(SetBlendConstants) +MVK_CMD_TYPE_POOL(SetLogicOp) +MVK_CMD_TYPE_POOL(SetLogicOpEnable) +MVK_CMD_TYPE_POOL(SetDepthBias) +MVK_CMD_TYPE_POOL(SetDepthBiasEnable) MVK_CMD_TYPE_POOL(SetDepthTestEnable) MVK_CMD_TYPE_POOL(SetDepthWriteEnable) MVK_CMD_TYPE_POOL(SetDepthCompareOp) @@ -102,6 +105,9 @@ MVK_CMD_TYPE_POOL(SetStencilReference) MVK_CMD_TYPE_POOL(SetCullMode) MVK_CMD_TYPE_POOL(SetFrontFace) MVK_CMD_TYPE_POOL(SetPrimitiveTopology) +MVK_CMD_TYPE_POOL(SetPatchControlPoints) +MVK_CMD_TYPE_POOL(SetPrimitiveRestartEnable) +MVK_CMD_TYPE_POOL(SetRasterizerDiscardEnable) MVK_CMD_TYPE_POOLS_FROM_2_THRESHOLDS(BindVertexBuffers, 1, 2) MVK_CMD_TYPE_POOL(BindIndexBuffer) MVK_CMD_TYPE_POOL(Draw) diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm index c2fe1e86..a660be0a 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm @@ -392,6 +392,13 @@ void MVKPhysicalDevice::getFeatures(VkPhysicalDeviceFeatures2* features) { extDynState->extendedDynamicState = true; break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_2_FEATURES_EXT: { + auto* extDynState2 = (VkPhysicalDeviceExtendedDynamicState2FeaturesEXT*)next; + extDynState2->extendedDynamicState2 = true; + extDynState2->extendedDynamicState2LogicOp = false; + extDynState2->extendedDynamicState2PatchControlPoints = true; + break; + } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_INTERLOCK_FEATURES_EXT: { auto* interlockFeatures = (VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT*)next; interlockFeatures->fragmentShaderSampleInterlock = _metalFeatures.rasterOrderGroups; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceFeatureStructs.def b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceFeatureStructs.def index 4e9f3bed..b9792833 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceFeatureStructs.def +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceFeatureStructs.def @@ -65,6 +65,7 @@ MVK_DEVICE_FEATURE_EXTN(FragmentShaderBarycentric, FRAGMENT_SHADER_BARYCENTRI MVK_DEVICE_FEATURE_EXTN(PortabilitySubset, PORTABILITY_SUBSET, KHR, 15) MVK_DEVICE_FEATURE_EXTN(4444Formats, 4444_FORMATS, EXT, 2) MVK_DEVICE_FEATURE_EXTN(ExtendedDynamicState, EXTENDED_DYNAMIC_STATE, EXT, 1) +MVK_DEVICE_FEATURE_EXTN(ExtendedDynamicState2, EXTENDED_DYNAMIC_STATE_2, EXT, 3) MVK_DEVICE_FEATURE_EXTN(FragmentShaderInterlock, FRAGMENT_SHADER_INTERLOCK, EXT, 3) MVK_DEVICE_FEATURE_EXTN(PipelineCreationCacheControl, PIPELINE_CREATION_CACHE_CONTROL, EXT, 1) MVK_DEVICE_FEATURE_EXTN(Robustness2, ROBUSTNESS_2, EXT, 3) diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm index acf6670f..b08f6741 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm @@ -698,6 +698,9 @@ void MVKInstance::initProcAddrs() { ADD_DVC_EXT_ENTRY_POINT(vkReleaseSwapchainImagesEXT, EXT_SWAPCHAIN_MAINTENANCE_1); ADD_DVC_EXT_ENTRY_POINT(vkGetRefreshCycleDurationGOOGLE, GOOGLE_DISPLAY_TIMING); ADD_DVC_EXT_ENTRY_POINT(vkGetPastPresentationTimingGOOGLE, GOOGLE_DISPLAY_TIMING); + ADD_DVC_EXT_ENTRY_POINT(vkCmdSetLogicOpEXT, EXT_EXTENDED_DYNAMIC_STATE_2); + ADD_DVC_EXT_ENTRY_POINT(vkCmdSetPatchControlPointsEXT, EXT_EXTENDED_DYNAMIC_STATE_2); + ADD_DVC_EXT_ENTRY_POINT(vkCmdSetLogicOpEnableEXT, EXT_EXTENDED_DYNAMIC_STATE_3); } void MVKInstance::logVersions() { diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h index c7f67db1..68e10dd8 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h @@ -232,8 +232,13 @@ enum MVKRenderStateType { DepthTestEnable, DepthWriteEnable, FrontFace, + LogicOp, + LogicOpEnable, + PatchControlPoints, PolygonMode, + PrimitiveRestartEnable, PrimitiveTopology, + RasterizerDiscardEnable, SampleLocations, Scissors, StencilCompareMask, @@ -273,9 +278,6 @@ public: /** Returns whether this pipeline has tessellation shaders. */ bool isTessellationPipeline() { return _tessInfo.patchControlPoints > 0; } - /** Returns the number of input tessellation patch control points. */ - uint32_t getInputControlPointCount() { return _tessInfo.patchControlPoints; } - /** Returns the number of output tessellation patch control points. */ uint32_t getOutputControlPointCount() { return _outputControlPointCount; } @@ -351,6 +353,8 @@ public: ~MVKGraphicsPipeline() override; protected: + friend class MVKGraphicsPipelineCommandEncoderState; + typedef MVKSmallVector SPIRVShaderOutputs; typedef MVKSmallVector SPIRVShaderInputs; @@ -414,10 +418,10 @@ protected: id _mtlPipelineState = nil; float _blendConstants[4] = {}; - VkPrimitiveTopology _vkPrimitiveTopology; MVKShaderImplicitRezBinding _reservedVertexAttributeBufferCount; MVKShaderImplicitRezBinding _viewRangeBufferIndex; MVKShaderImplicitRezBinding _outputBufferIndex; + VkPrimitiveTopology _vkPrimitiveTopology; uint32_t _outputControlPointCount; uint32_t _tessCtlPatchOutputBufferIndex = 0; uint32_t _tessCtlLevelBufferIndex = 0; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm index f41077c8..e361f1ea 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm @@ -294,17 +294,17 @@ void MVKGraphicsPipeline::encode(MVKCommandEncoder* cmdEncoder, uint32_t stage) cmdEncoder->_depthStencilState.setDepthStencilState(_depthStencilInfo); // Rasterization - cmdEncoder->_rasterizingState.setPrimitiveTopology(_vkPrimitiveTopology, false); - cmdEncoder->_rasterizingState.setBlendConstants(_blendConstants, false); - cmdEncoder->_rasterizingState.setStencilReferenceValues(_depthStencilInfo); - cmdEncoder->_rasterizingState.setViewports(_viewports.contents(), 0, false); - cmdEncoder->_rasterizingState.setScissors(_scissors.contents(), 0, false); + cmdEncoder->_renderingState.setPrimitiveTopology(_vkPrimitiveTopology, false); + cmdEncoder->_renderingState.setBlendConstants(_blendConstants, false); + cmdEncoder->_renderingState.setStencilReferenceValues(_depthStencilInfo); + cmdEncoder->_renderingState.setViewports(_viewports.contents(), 0, false); + cmdEncoder->_renderingState.setScissors(_scissors.contents(), 0, false); if (_hasRasterInfo) { - cmdEncoder->_rasterizingState.setCullMode(_rasterInfo.cullMode, false); - cmdEncoder->_rasterizingState.setFrontFace(_rasterInfo.frontFace, false); - cmdEncoder->_rasterizingState.setPolygonMode(_rasterInfo.polygonMode, false); - cmdEncoder->_rasterizingState.setDepthBias(_rasterInfo); - cmdEncoder->_rasterizingState.setDepthClipEnable( !_rasterInfo.depthClampEnable, false ); + cmdEncoder->_renderingState.setCullMode(_rasterInfo.cullMode, false); + cmdEncoder->_renderingState.setFrontFace(_rasterInfo.frontFace, false); + cmdEncoder->_renderingState.setPolygonMode(_rasterInfo.polygonMode, false); + cmdEncoder->_renderingState.setDepthBias(_rasterInfo); + cmdEncoder->_renderingState.setDepthClipEnable( !_rasterInfo.depthClampEnable, false ); } break; } @@ -497,8 +497,13 @@ MVKGraphicsPipeline::MVKGraphicsPipeline(MVKDevice* device, // Blending - must ignore allowed bad pColorBlendState pointer if rasterization disabled or no color attachments if (_isRasterizingColor && pCreateInfo->pColorBlendState) { mvkCopy(_blendConstants, pCreateInfo->pColorBlendState->blendConstants, 4); + + // Metal does not support blending with logic operations. + if (pCreateInfo->pColorBlendState->logicOpEnable && pCreateInfo->pColorBlendState->logicOp != VK_LOGIC_OP_COPY) { + setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "Metal does not support blending using logic operations.")); + } } else { - float defaultBlendConstants[4] = { 0, 0.0, 0.0, 1.0 }; + static float defaultBlendConstants[4] = { 0, 0.0, 0.0, 1.0 }; mvkCopy(_blendConstants, defaultBlendConstants, 4); } @@ -507,6 +512,14 @@ MVKGraphicsPipeline::MVKGraphicsPipeline(MVKDevice* device, ? pCreateInfo->pInputAssemblyState->topology : VK_PRIMITIVE_TOPOLOGY_POINT_LIST); + // In Metal, primitive restart cannot be disabled. + // Just issue warning here, as it is very likely the app is not actually expecting + // to use primitive restart at all, and is just setting this as a "just-in-case", + // and forcing an error here would be unexpected to the app (including CTS). + if (pCreateInfo->pInputAssemblyState && !pCreateInfo->pInputAssemblyState->primitiveRestartEnable) { + reportWarning(VK_ERROR_FEATURE_NOT_PRESENT, "vkCreateGraphicsPipeline(): Metal does not support disabling primitive restart."); + } + // Rasterization _hasRasterInfo = mvkSetOrClear(&_rasterInfo, pCreateInfo->pRasterizationState); if (_hasRasterInfo) { @@ -548,6 +561,7 @@ static MVKRenderStateType getRenderStateType(VkDynamicState vkDynamicState) { case VK_DYNAMIC_STATE_BLEND_CONSTANTS: return BlendConstants; case VK_DYNAMIC_STATE_CULL_MODE: return CullMode; case VK_DYNAMIC_STATE_DEPTH_BIAS: return DepthBias; + case VK_DYNAMIC_STATE_DEPTH_BIAS_ENABLE: return DepthBiasEnable; case VK_DYNAMIC_STATE_DEPTH_BOUNDS: return DepthBounds; case VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE: return DepthBoundsTestEnable; case VK_DYNAMIC_STATE_DEPTH_CLAMP_ENABLE_EXT: return DepthClipEnable; @@ -556,9 +570,13 @@ static MVKRenderStateType getRenderStateType(VkDynamicState vkDynamicState) { case VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE: return DepthTestEnable; case VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE: return DepthWriteEnable; case VK_DYNAMIC_STATE_FRONT_FACE: return FrontFace; + case VK_DYNAMIC_STATE_LOGIC_OP_EXT: return LogicOp; + case VK_DYNAMIC_STATE_LOGIC_OP_ENABLE_EXT: return LogicOpEnable; + case VK_DYNAMIC_STATE_PATCH_CONTROL_POINTS_EXT: return PatchControlPoints; case VK_DYNAMIC_STATE_POLYGON_MODE_EXT: return PolygonMode; + case VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE: return PrimitiveRestartEnable; case VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY: return PrimitiveTopology; - case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT: return SampleLocations; + case VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE: return RasterizerDiscardEnable; case VK_DYNAMIC_STATE_SCISSOR: return Scissors; case VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT: return Scissors; case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK: return StencilCompareMask; @@ -579,12 +597,12 @@ void MVKGraphicsPipeline::initDynamicState(const VkGraphicsPipelineCreateInfo* p if ( !pDS ) { return; } for (uint32_t i = 0; i < pDS->dynamicStateCount; i++) { - VkDynamicState vkDynState = pDS->pDynamicStates[i]; + auto dynStateType = getRenderStateType(pDS->pDynamicStates[i]); bool isDynamic = true; // Some dynamic states have other restrictions - switch (vkDynState) { - case VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE: + switch (dynStateType) { + case VertexStride: isDynamic = _device->_pMetalFeatures->dynamicVertexStride; if ( !isDynamic ) { setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "This device and platform does not support VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE (macOS 14.0 or iOS/tvOS 17.0, plus either Apple4 or Mac2 GPU).")); } break; @@ -592,7 +610,7 @@ void MVKGraphicsPipeline::initDynamicState(const VkGraphicsPipelineCreateInfo* p break; } - if (isDynamic) { _dynamicState.enable(getRenderStateType(vkDynState)); } + if (isDynamic) { _dynamicState.enable(dynStateType); } } } @@ -1923,10 +1941,10 @@ bool MVKGraphicsPipeline::isRenderingPoints(const VkGraphicsPipelineCreateInfo* (pCreateInfo->pRasterizationState && (pCreateInfo->pRasterizationState->polygonMode == VK_POLYGON_MODE_POINT))); } -// We disable rasterization if either rasterizerDiscard is enabled or the static cull mode dictates it. +// We disable rasterization if either static rasterizerDiscard is enabled or the static cull mode dictates it. bool MVKGraphicsPipeline::isRasterizationDisabled(const VkGraphicsPipelineCreateInfo* pCreateInfo) { return (pCreateInfo->pRasterizationState && - (pCreateInfo->pRasterizationState->rasterizerDiscardEnable || + ((pCreateInfo->pRasterizationState->rasterizerDiscardEnable && !isDynamicState(RasterizerDiscardEnable)) || ((pCreateInfo->pRasterizationState->cullMode == VK_CULL_MODE_FRONT_AND_BACK) && !isDynamicState(CullMode) && pCreateInfo->pInputAssemblyState && (mvkMTLPrimitiveTopologyClassFromVkPrimitiveTopology(pCreateInfo->pInputAssemblyState->topology) == MTLPrimitiveTopologyClassTriangle)))); diff --git a/MoltenVK/MoltenVK/Layers/MVKExtensions.def b/MoltenVK/MoltenVK/Layers/MVKExtensions.def index 944b81a6..f63ecf98 100644 --- a/MoltenVK/MoltenVK/Layers/MVKExtensions.def +++ b/MoltenVK/MoltenVK/Layers/MVKExtensions.def @@ -104,6 +104,7 @@ MVK_EXTENSION(EXT_debug_report, EXT_DEBUG_REPORT, MVK_EXTENSION(EXT_debug_utils, EXT_DEBUG_UTILS, INSTANCE, 10.11, 8.0, 1.0) MVK_EXTENSION(EXT_descriptor_indexing, EXT_DESCRIPTOR_INDEXING, DEVICE, 10.11, 8.0, 1.0) MVK_EXTENSION(EXT_extended_dynamic_state, EXT_EXTENDED_DYNAMIC_STATE, DEVICE, 10.11, 8.0, 1.0) +MVK_EXTENSION(EXT_extended_dynamic_state2, EXT_EXTENDED_DYNAMIC_STATE_2, DEVICE, 10.11, 8.0, 1.0) MVK_EXTENSION(EXT_external_memory_host, EXT_EXTERNAL_MEMORY_HOST, DEVICE, 10.11, 8.0, 1.0) MVK_EXTENSION(EXT_fragment_shader_interlock, EXT_FRAGMENT_SHADER_INTERLOCK, DEVICE, 10.13, 11.0, 1.0) MVK_EXTENSION(EXT_hdr_metadata, EXT_HDR_METADATA, DEVICE, 10.15, MVK_NA, MVK_NA) diff --git a/MoltenVK/MoltenVK/Vulkan/vulkan.mm b/MoltenVK/MoltenVK/Vulkan/vulkan.mm index cd6d15bf..642ad040 100644 --- a/MoltenVK/MoltenVK/Vulkan/vulkan.mm +++ b/MoltenVK/MoltenVK/Vulkan/vulkan.mm @@ -2638,7 +2638,14 @@ MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetCullMode( MVKTraceVulkanCallEnd(); } -MVK_PUBLIC_VULKAN_STUB(vkCmdSetDepthBiasEnable, void, VkCommandBuffer, VkBool32) +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetDepthBiasEnable( + VkCommandBuffer commandBuffer, + VkBool32 depthBiasEnable) { + + MVKTraceVulkanCallStart(); + MVKAddCmd(SetDepthBiasEnable, commandBuffer, depthBiasEnable); + MVKTraceVulkanCallEnd(); +} MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetDepthBoundsTestEnable( VkCommandBuffer commandBuffer, @@ -2695,7 +2702,14 @@ MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetFrontFace( MVKTraceVulkanCallEnd(); } -MVK_PUBLIC_VULKAN_STUB(vkCmdSetPrimitiveRestartEnable, void, VkCommandBuffer, VkBool32) +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetPrimitiveRestartEnable( + VkCommandBuffer commandBuffer, + VkBool32 primitiveRestartEnable) { + + MVKTraceVulkanCallStart(); + MVKAddCmd(SetPrimitiveRestartEnable, commandBuffer, primitiveRestartEnable); + MVKTraceVulkanCallEnd(); +} MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetPrimitiveTopology( VkCommandBuffer commandBuffer, @@ -2706,7 +2720,14 @@ MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetPrimitiveTopology( MVKTraceVulkanCallEnd(); } -MVK_PUBLIC_VULKAN_STUB(vkCmdSetRasterizerDiscardEnable, void, VkCommandBuffer, VkBool32) +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetRasterizerDiscardEnable( + VkCommandBuffer commandBuffer, + VkBool32 rasterizerDiscardEnable) { + + MVKTraceVulkanCallStart(); + MVKAddCmd(SetRasterizerDiscardEnable, commandBuffer, rasterizerDiscardEnable); + MVKTraceVulkanCallEnd(); +} MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetScissorWithCount( VkCommandBuffer commandBuffer, @@ -2842,7 +2863,6 @@ MVK_PUBLIC_VULKAN_SYMBOL VkResult vkSetPrivateData( } - #pragma mark - #pragma mark VK_KHR_bind_memory2 extension @@ -3607,6 +3627,45 @@ MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdSetStencilTestEnable, EXT); MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdSetViewportWithCount, EXT); +#pragma mark - +#pragma mark VK_EXT_extended_dynamic_state2 +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdSetDepthBiasEnable, EXT); + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetLogicOpEXT( + VkCommandBuffer commandBuffer, + VkLogicOp logicOp) { + + MVKTraceVulkanCallStart(); + MVKAddCmd(SetLogicOp, commandBuffer, logicOp); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetPatchControlPointsEXT( + VkCommandBuffer commandBuffer, + uint32_t patchControlPoints) { + + MVKTraceVulkanCallStart(); + MVKAddCmd(SetPatchControlPoints, commandBuffer, patchControlPoints); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdSetPrimitiveRestartEnable, EXT); +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdSetRasterizerDiscardEnable, EXT); + + +#pragma mark - +#pragma mark VK_EXT_extended_dynamic_state3 + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetLogicOpEnableEXT( + VkCommandBuffer commandBuffer, + VkBool32 logicOpEnable) { + + MVKTraceVulkanCallStart(); + MVKAddCmd(SetLogicOpEnable, commandBuffer, logicOpEnable); + MVKTraceVulkanCallEnd(); +} + + #pragma mark - #pragma mark VK_EXT_external_memory_host extension From 0584aa00831a1a537285618ea0033dc3705c199f Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Tue, 10 Oct 2023 15:02:06 -0400 Subject: [PATCH 20/41] Fix deadlock when reporting debug message on MVKInstance destruction. --- MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm index e3c94135..6e768161 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm @@ -723,11 +723,12 @@ MVKInstance::~MVKInstance() { _useCreationCallbacks = true; mvkDestroyContainerContents(_physicalDevices); - lock_guard lock(_dcbLock); - mvkDestroyContainerContents(_debugReportCallbacks); - - MVKLogInfo("Destroyed VkInstance for Vulkan version %s with %d Vulkan extensions enabled.", + // Since this message may invoke debug callbacks, do it before locking callbacks. + MVKLogInfo("Destroying VkInstance for Vulkan version %s with %d Vulkan extensions enabled.", mvkGetVulkanVersionString(_appInfo.apiVersion).c_str(), _enabledExtensions.getEnabledCount()); + + lock_guard lock(_dcbLock); + mvkDestroyContainerContents(_debugReportCallbacks); } From 659b1cecd798aac65a9084bab12e6774911dfabc Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Tue, 10 Oct 2023 13:58:08 -0400 Subject: [PATCH 21/41] VK_EXT_extended_dynamic_state fix build fail before Xcode 15. --- MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm | 4 ++++ MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm | 4 ++++ MoltenVK/MoltenVK/Utility/MVKFoundation.h | 4 ++-- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm index 72db24d6..d01dc5ef 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm @@ -826,6 +826,10 @@ void MVKGraphicsResourcesCommandEncoderState::markDirty() { } } +#if !MVK_XCODE_15 +static const NSUInteger MTLAttributeStrideStatic = NSUIntegerMax; +#endif + void MVKGraphicsResourcesCommandEncoderState::encodeImpl(uint32_t stage) { auto* pipeline = _cmdEncoder->_graphicsPipelineState.getGraphicsPipeline(); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm index e361f1ea..6660e9b7 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm @@ -1348,6 +1348,10 @@ bool MVKGraphicsPipeline::addFragmentShaderToPipeline(MTLRenderPipelineDescripto return true; } +#if !MVK_XCODE_15 +static const NSUInteger MTLBufferLayoutStrideDynamic = NSUIntegerMax; +#endif + template bool MVKGraphicsPipeline::addVertexInputToPipeline(T* inputDesc, const VkPipelineVertexInputStateCreateInfo* pVI, diff --git a/MoltenVK/MoltenVK/Utility/MVKFoundation.h b/MoltenVK/MoltenVK/Utility/MVKFoundation.h index 8503b917..c729c153 100644 --- a/MoltenVK/MoltenVK/Utility/MVKFoundation.h +++ b/MoltenVK/MoltenVK/Utility/MVKFoundation.h @@ -386,8 +386,8 @@ static constexpr bool mvkVkComponentMappingsMatch(VkComponentMapping cm1, VkComp #pragma mark Math /** Rounds the value to nearest integer using half-to-even rounding. */ -static constexpr double mvkRoundHalfToEven(const double val) { - return val - std::remainder(val, 1.0); // remainder() uses half-to-even rounding, and unfortunately isn't constexpr until C++23. +static inline double mvkRoundHalfToEven(const double val) { + return val - std::remainder(val, 1.0); // remainder() uses half-to-even rounding, but unfortunately isn't constexpr until C++23. } /** Returns whether the value will fit inside the numeric type. */ From eb7cfa1006adaf5d5baf2702c4d5d72af13bba3b Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Wed, 11 Oct 2023 11:37:50 -0400 Subject: [PATCH 22/41] Fixes based on VK_EXT_extended_dynamic_state code review. - Fix runtime failure on Metal versions that don't support dynamic attribute stride. - Add MVKCommandEncoder::encodeVertexAttributeBuffer() consolidation function. - Remove unnecessary validations that will be caught by Vulkan validation layers. - To reduce memory, remove command class and pools for rendering commands that are not supported, and perform no validation. - Document extension conformance limitations in MoltenVK_Runtime_UserGuide.md. --- Docs/MoltenVK_Runtime_UserGuide.md | 86 +++++++++++------ MoltenVK/MoltenVK/API/mvk_datatypes.h | 6 +- MoltenVK/MoltenVK/Commands/MVKCmdRendering.h | 93 ------------------- MoltenVK/MoltenVK/Commands/MVKCmdRendering.mm | 88 ------------------ MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h | 3 + .../MoltenVK/Commands/MVKCommandBuffer.mm | 36 +++++++ .../Commands/MVKCommandEncoderState.mm | 64 +++---------- .../MoltenVK/Commands/MVKCommandTypePools.def | 5 - MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm | 10 -- MoltenVK/MoltenVK/Utility/MVKFoundation.h | 2 +- MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm | 6 +- MoltenVK/MoltenVK/Vulkan/vulkan.mm | 5 - 12 files changed, 114 insertions(+), 290 deletions(-) diff --git a/Docs/MoltenVK_Runtime_UserGuide.md b/Docs/MoltenVK_Runtime_UserGuide.md index 79f3f1a5..080b4d0c 100644 --- a/Docs/MoltenVK_Runtime_UserGuide.md +++ b/Docs/MoltenVK_Runtime_UserGuide.md @@ -312,7 +312,8 @@ In addition to core *Vulkan* functionality, **MoltenVK** also supports the foll - `VK_KHR_16bit_storage` - `VK_KHR_8bit_storage` - `VK_KHR_bind_memory2` -- `VK_KHR_buffer_device_address` *(requires GPU Tier 2 argument buffers support)* +- `VK_KHR_buffer_device_address` + - *Requires GPU Tier 2 argument buffers support.* - `VK_KHR_copy_commands2` - `VK_KHR_create_renderpass2` - `VK_KHR_dedicated_allocation` @@ -322,7 +323,8 @@ In addition to core *Vulkan* functionality, **MoltenVK** also supports the foll - `VK_KHR_device_group_creation` - `VK_KHR_driver_properties` - `VK_KHR_dynamic_rendering` -- `VK_KHR_fragment_shader_barycentric` *(requires Metal 2.2 on Mac or Metal 2.3 on iOS)* +- `VK_KHR_fragment_shader_barycentric` + - *Requires Metal 2.2 on Mac or Metal 2.3 on iOS.* - `VK_KHR_get_memory_requirements2` - `VK_KHR_get_physical_device_properties2` - `VK_KHR_get_surface_capabilities2` @@ -337,14 +339,16 @@ In addition to core *Vulkan* functionality, **MoltenVK** also supports the foll - `VK_KHR_portability_subset` - `VK_KHR_push_descriptor` - `VK_KHR_relaxed_block_layout` -- `VK_KHR_sampler_mirror_clamp_to_edge` *(requires a Mac GPU or Apple family 7 GPU)* +- `VK_KHR_sampler_mirror_clamp_to_edge` + - *Requires a Mac GPU or Apple family 7 GPU.* - `VK_KHR_sampler_ycbcr_conversion` - `VK_KHR_separate_depth_stencil_layouts` - `VK_KHR_shader_draw_parameters` - `VK_KHR_shader_float_controls` - `VK_KHR_shader_float16_int8` - `VK_KHR_shader_non_semantic_info` -- `VK_KHR_shader_subgroup_extended_types` *(requires Metal 2.1 on Mac or Metal 2.2 and Apple family 4 on iOS)* +- `VK_KHR_shader_subgroup_extended_types` + - *Requires Metal 2.1 on Mac or Metal 2.2 and Apple family 4 on iOS.* - `VK_KHR_spirv_1_4` - `VK_KHR_storage_buffer_storage_class` - `VK_KHR_surface` @@ -354,55 +358,78 @@ In addition to core *Vulkan* functionality, **MoltenVK** also supports the foll - `VK_KHR_timeline_semaphore` - `VK_KHR_uniform_buffer_standard_layout` - `VK_KHR_variable_pointers` -- `VK_EXT_4444_formats` *(requires 16-bit formats and either native texture swizzling or manual swizzling to be enabled)* -- `VK_EXT_buffer_device_address` *(requires GPU Tier 2 argument buffers support)* -- `VK_EXT_calibrated_timestamps` *(requires Metal 2.2)* +- `VK_EXT_4444_formats` + - *Requires 16-bit formats and either native texture swizzling or manual swizzling to be enabled.* +- `VK_EXT_buffer_device_address` + - *Requires GPU Tier 2 argument buffers support.* +- `VK_EXT_calibrated_timestamps` + - *Requires Metal 2.2.* - `VK_EXT_debug_marker` - `VK_EXT_debug_report` - `VK_EXT_debug_utils` -- `VK_EXT_descriptor_indexing` *(initial release limited to Metal Tier 1: 96/128 textures, - 16 samplers, except macOS 11.0 (Big Sur) or later, or on older versions of macOS using - an Intel GPU, and if Metal argument buffers enabled in config)* -- `VK_EXT_extended_dynamic_state` *(requires Metal 3.1 for `VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE`)* +- `VK_EXT_descriptor_indexing` + - *Initial release limited to Metal Tier 1: 96/128 textures, + 16 samplers, except macOS 11.0 (Big Sur) or later, or on older versions of macOS using + an Intel GPU, and if Metal argument buffers enabled in config.* +- `VK_EXT_extended_dynamic_state` + - *Requires Metal 3.1 for `VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE`.* - `VK_EXT_extended_dynamic_state2` + - *Primitive restart is always enabled, as Metal does not support disabling it (`VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE_EXT`).* - `VK_EXT_external_memory_host` -- `VK_EXT_fragment_shader_interlock` *(requires Metal 2.0 and Raster Order Groups)* +- `VK_EXT_fragment_shader_interlock` + - *Requires Metal 2.0 and Raster Order Groups.* - `VK_EXT_host_query_reset` - `VK_EXT_image_robustness` - `VK_EXT_inline_uniform_block` -- `VK_EXT_memory_budget` *(requires Metal 2.0)* +- `VK_EXT_memory_budget` + - *Requires Metal 2.0.* - `VK_EXT_metal_objects` - `VK_EXT_metal_surface` - `VK_EXT_pipeline_creation_cache_control` - `VK_EXT_pipeline_creation_feedback` -- `VK_EXT_post_depth_coverage` *(iOS and macOS, requires family 4 (A11) or better Apple GPU)* +- `VK_EXT_post_depth_coverage` + - *iOS and macOS, requires family 4 (A11) or better Apple GPU.* - `VK_EXT_private_data ` - `VK_EXT_robustness2` - `VK_EXT_sample_locations` - `VK_EXT_scalar_block_layout` - `VK_EXT_separate_stencil_usage` -- `VK_EXT_shader_atomic_float` *(requires Metal 3.0)* -- `VK_EXT_shader_demote_to_helper_invocation` *(requires Metal Shading Language 2.3)* -- `VK_EXT_shader_stencil_export` *(requires Mac GPU family 2 or iOS GPU family 5)* -- `VK_EXT_shader_subgroup_ballot` *(requires Mac GPU family 2 or Apple GPU family 4)* -- `VK_EXT_shader_subgroup_vote` *(requires Mac GPU family 2 or Apple GPU family 4)* +- `VK_EXT_shader_atomic_float` + - *Requires Metal 3.0.* +- `VK_EXT_shader_demote_to_helper_invocation` + - *Requires Metal Shading Language 2.3.* +- `VK_EXT_shader_stencil_export` + - *Requires Mac GPU family 2 or iOS GPU family 5.* +- `VK_EXT_shader_subgroup_ballot` + - *Requires Mac GPU family 2 or Apple GPU family 4.* +- `VK_EXT_shader_subgroup_vote` + - *Requires Mac GPU family 2 or Apple GPU family 4.* - `VK_EXT_shader_viewport_index_layer` -- `VK_EXT_subgroup_size_control` *(requires Metal 2.1 on Mac or Metal 2.2 and Apple family 4 on iOS)* +- `VK_EXT_subgroup_size_control` + - *Requires Metal 2.1 on Mac or Metal 2.2 and Apple family 4 on iOS.* - `VK_EXT_surface_maintenance1` - `VK_EXT_swapchain_colorspace` - `VK_EXT_swapchain_maintenance1` - `VK_EXT_vertex_attribute_divisor` -- `VK_EXT_texel_buffer_alignment` *(requires Metal 2.0)* -- `VK_EXT_texture_compression_astc_hdr` *(iOS and macOS, requires family 6 (A13) or better Apple GPU)* -- `VK_MVK_ios_surface` *(iOS) (Obsolete. Use `VK_EXT_metal_surface` instead.)* -- `VK_MVK_macos_surface` *(macOS) (Obsolete. Use `VK_EXT_metal_surface` instead.)* +- `VK_EXT_texel_buffer_alignment` + - *Requires Metal 2.0.* +- `VK_EXT_texture_compression_astc_hdr` + - *iOS and macOS, requires family 6 (A13) or better Apple GPU.* +- `VK_MVK_ios_surface` + - *Obsolete. Use `VK_EXT_metal_surface` instead.* +- `VK_MVK_macos_surface` + - *Obsolete. Use `VK_EXT_metal_surface` instead.* - `VK_AMD_gpu_shader_half_float` - `VK_AMD_negative_viewport_height` -- `VK_AMD_shader_image_load_store_lod` *(requires Apple GPU)* -- `VK_AMD_shader_trinary_minmax` *(requires Metal 2.1)* -- `VK_IMG_format_pvrtc` *(requires Apple GPU)* +- `VK_AMD_shader_image_load_store_lod` + - *Requires Apple GPU.* +- `VK_AMD_shader_trinary_minmax` + - *Requires Metal 2.1.* +- `VK_IMG_format_pvrtc` + - *Requires Apple GPU.* - `VK_INTEL_shader_integer_functions2` -- `VK_NV_fragment_shader_barycentric` *(requires Metal 2.2 on Mac or Metal 2.3 on iOS)* +- `VK_NV_fragment_shader_barycentric` + - *Requires Metal 2.2 on Mac or Metal 2.3 on iOS.* - `VK_NV_glsl_shader` In order to visibly display your content on *macOS*, *iOS*, or *tvOS*, you must enable the @@ -624,6 +651,8 @@ Known **MoltenVK** Limitations ------------------------------ This section documents the known limitations in this version of **MoltenVK**. + +- See [above](#interaction) for known limitations for specific Vulkan extensions. - On *macOS* versions prior to *macOS 10.15.6*, native host-coherent image device memory is not available. Because of this, changes made to `VkImage VK_MEMORY_PROPERTY_HOST_COHERENT_BIT` device memory by the CPU @@ -647,4 +676,3 @@ This section documents the known limitations in this version of **MoltenVK**. use the *Vulkan Loader and Layers* from the *[Vulkan SDK](https://vulkan.lunarg.com/sdk/home)*. Refer to the *Vulkan SDK [Getting Started](https://vulkan.lunarg.com/doc/sdk/latest/mac/getting_started.html)* document for more info. - diff --git a/MoltenVK/MoltenVK/API/mvk_datatypes.h b/MoltenVK/MoltenVK/API/mvk_datatypes.h index 4bbed323..eabb5aa3 100644 --- a/MoltenVK/MoltenVK/API/mvk_datatypes.h +++ b/MoltenVK/MoltenVK/API/mvk_datatypes.h @@ -378,13 +378,13 @@ MTLMultisampleStencilResolveFilter mvkMTLMultisampleStencilResolveFilterFromVkRe #endif /** Returns the Metal MTLViewport corresponding to the specified Vulkan VkViewport. */ -MTLViewport mvkMTLViewportFromVkViewport(const VkViewport vkViewport); +MTLViewport mvkMTLViewportFromVkViewport(VkViewport vkViewport); /** Returns the Metal MTLScissorRect corresponding to the specified Vulkan VkRect2D. */ -MTLScissorRect mvkMTLScissorRectFromVkRect2D(const VkRect2D vkRect); +MTLScissorRect mvkMTLScissorRectFromVkRect2D(VkRect2D vkRect); /** Returns the Vulkan VkRect2D corresponding to the specified Metal MTLScissorRect. */ -VkRect2D mvkVkRect2DFromMTLScissorRect(const MTLScissorRect mtlScissorRect); +VkRect2D mvkVkRect2DFromMTLScissorRect(MTLScissorRect mtlScissorRect); /** Returns the Metal MTLCompareFunction corresponding to the specified Vulkan VkCompareOp, */ MTLCompareFunction mvkMTLCompareFunctionFromVkCompareOp(VkCompareOp vkOp); diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdRendering.h b/MoltenVK/MoltenVK/Commands/MVKCmdRendering.h index 2b11ae8e..fc67422e 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdRendering.h +++ b/MoltenVK/MoltenVK/Commands/MVKCmdRendering.h @@ -267,25 +267,6 @@ typedef MVKCmdSetScissor<1> MVKCmdSetScissor1; typedef MVKCmdSetScissor MVKCmdSetScissorMulti; -#pragma mark - -#pragma mark MVKCmdSetLineWidth - -/** Vulkan command to set the line width. */ -class MVKCmdSetLineWidth : public MVKCommand { - -public: - VkResult setContent(MVKCommandBuffer* cmdBuff, - float lineWidth); - - void encode(MVKCommandEncoder* cmdEncoder) override; - -protected: - MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; - - float _lineWidth; -}; - - #pragma mark - #pragma mark MVKCmdSetDepthBias @@ -347,40 +328,6 @@ protected: }; -#pragma mark - -#pragma mark MVKCmdSetLogicOp - -/** Vulkan command to dynamically set the blending logic operation. */ -class MVKCmdSetLogicOp : public MVKCommand { - -public: - VkResult setContent(MVKCommandBuffer* cmdBuff, - VkLogicOp logicOp); - - void encode(MVKCommandEncoder* cmdEncoder) override; - -protected: - MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; -}; - - -#pragma mark - -#pragma mark MVKCmdSetLogicOpEnable - -/** Vulkan command to dynamically enable or disable the blending logic operation. */ -class MVKCmdSetLogicOpEnable : public MVKCommand { - -public: - VkResult setContent(MVKCommandBuffer* cmdBuff, - VkBool32 logicOpEnable); - - void encode(MVKCommandEncoder* cmdEncoder) override; - -protected: - MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; -}; - - #pragma mark - #pragma mark MVKCmdSetDepthTestEnable @@ -438,46 +385,6 @@ protected: }; -#pragma mark - -#pragma mark MVKCmdSetDepthBounds - -/** Vulkan command to set depth bounds. */ -class MVKCmdSetDepthBounds : public MVKCommand { - -public: - VkResult setContent(MVKCommandBuffer* cmdBuff, - float minDepthBounds, - float maxDepthBounds); - - void encode(MVKCommandEncoder* cmdEncoder) override; - -protected: - MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; - - float _minDepthBounds; - float _maxDepthBounds; -}; - - -#pragma mark - -#pragma mark MVKCmdSetDepthBoundsTestEnable - -/** Vulkan command to enable depth bounds testing. */ -class MVKCmdSetDepthBoundsTestEnable : public MVKCommand { - -public: - VkResult setContent(MVKCommandBuffer* cmdBuff, - VkBool32 depthBoundsTestEnable); - - void encode(MVKCommandEncoder* cmdEncoder) override; - -protected: - MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; - - bool _depthBoundsTestEnable; -}; - - #pragma mark - #pragma mark MVKCmdSetStencilTestEnable diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdRendering.mm b/MoltenVK/MoltenVK/Commands/MVKCmdRendering.mm index 33078a02..1aa4aa5c 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdRendering.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCmdRendering.mm @@ -285,24 +285,6 @@ template class MVKCmdSetScissor<1>; template class MVKCmdSetScissor; -#pragma mark - -#pragma mark MVKCmdSetLineWidth - -VkResult MVKCmdSetLineWidth::setContent(MVKCommandBuffer* cmdBuff, - float lineWidth) { - _lineWidth = lineWidth; - - // Validate - if (_lineWidth != 1.0 || cmdBuff->getDevice()->_enabledFeatures.wideLines) { - return cmdBuff->reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdSetLineWidth(): The current device does not support wide lines."); - } - - return VK_SUCCESS; -} - -void MVKCmdSetLineWidth::encode(MVKCommandEncoder* cmdEncoder) {} - - #pragma mark - #pragma mark MVKCmdSetDepthBias @@ -352,38 +334,6 @@ void MVKCmdSetBlendConstants::encode(MVKCommandEncoder* cmdEncoder) { } -#pragma mark - -#pragma mark MVKCmdSetLogicOp - -VkResult MVKCmdSetLogicOp::setContent(MVKCommandBuffer* cmdBuff, - VkLogicOp logicOp) { - // Validate - if (logicOp != VK_LOGIC_OP_COPY) { - return reportError(VK_ERROR_FEATURE_NOT_PRESENT, "Metal does not support blending using logic operations."); - } - - return VK_SUCCESS; -} - -void MVKCmdSetLogicOp::encode(MVKCommandEncoder* cmdEncoder) {} - - -#pragma mark - -#pragma mark MVKCmdSetLogicOpEnable - -VkResult MVKCmdSetLogicOpEnable::setContent(MVKCommandBuffer* cmdBuff, - VkBool32 logicOpEnable) { - // Validate - if (logicOpEnable) { - return reportError(VK_ERROR_FEATURE_NOT_PRESENT, "Metal does not support blending using logic operations."); - } - - return VK_SUCCESS; -} - -void MVKCmdSetLogicOpEnable::encode(MVKCommandEncoder* cmdEncoder) {} - - #pragma mark - #pragma mark MVKCmdSetDepthTestEnable @@ -426,44 +376,6 @@ void MVKCmdSetDepthCompareOp::encode(MVKCommandEncoder* cmdEncoder) { } -#pragma mark - -#pragma mark MVKCmdSetDepthBounds - -VkResult MVKCmdSetDepthBounds::setContent(MVKCommandBuffer* cmdBuff, - float minDepthBounds, - float maxDepthBounds) { - _minDepthBounds = minDepthBounds; - _maxDepthBounds = maxDepthBounds; - - // Validate - if (cmdBuff->getDevice()->_enabledFeatures.depthBounds) { - return cmdBuff->reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdSetDepthBounds(): The current device does not support setting depth bounds."); - } - - return VK_SUCCESS; -} - -void MVKCmdSetDepthBounds::encode(MVKCommandEncoder* cmdEncoder) {} - - -#pragma mark - -#pragma mark MVKCmdSetDepthBoundsTestEnable - -VkResult MVKCmdSetDepthBoundsTestEnable::setContent(MVKCommandBuffer* cmdBuff, - VkBool32 depthBoundsTestEnable) { - _depthBoundsTestEnable = static_cast(depthBoundsTestEnable); - - // Validate - if (cmdBuff->getDevice()->_enabledFeatures.depthBounds) { - return cmdBuff->reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdSetDepthBoundsTestEnable(): The current device does not support testing depth bounds."); - } - - return VK_SUCCESS; -} - -void MVKCmdSetDepthBoundsTestEnable::encode(MVKCommandEncoder* cmdEncoder) {} - - #pragma mark - #pragma mark MVKCmdSetStencilTestEnable diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h index 94f4585c..39580131 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h +++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h @@ -358,6 +358,9 @@ public: /** Returns the push constants associated with the specified shader stage. */ MVKPushConstantsCommandEncoderState* getPushConstants(VkShaderStageFlagBits shaderStage); + /** Encode the buffer binding as a vertex attribute buffer. */ + void encodeVertexAttributeBuffer(MVKMTLBufferBinding& b, bool isDynamicStride); + /** * Copy bytes into the Metal encoder at a Metal vertex buffer index, and optionally indicate * that this binding might override a desriptor binding. If so, the descriptor binding will diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm index 55127489..9575bb76 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm @@ -919,6 +919,42 @@ void MVKCommandEncoder::setVertexBytes(id mtlEncoder, } } +void MVKCommandEncoder::encodeVertexAttributeBuffer(MVKMTLBufferBinding& b, bool isDynamicStride) { + if (_device->_pMetalFeatures->dynamicVertexStride) { +#if MVK_XCODE_15 + NSUInteger mtlStride = isDynamicStride ? b.stride : MTLAttributeStrideStatic; + if (b.isInline) { + [_mtlRenderEncoder setVertexBytes: b.mtlBytes + length: b.size + attributeStride: mtlStride + atIndex: b.index]; + } else if (b.justOffset) { + [_mtlRenderEncoder setVertexBufferOffset: b.offset + attributeStride: mtlStride + atIndex: b.index]; + } else { + [_mtlRenderEncoder setVertexBuffer: b.mtlBuffer + offset: b.offset + attributeStride: mtlStride + atIndex: b.index]; + } +#endif + } else { + if (b.isInline) { + [_mtlRenderEncoder setVertexBytes: b.mtlBytes + length: b.size + atIndex: b.index]; + } else if (b.justOffset) { + [_mtlRenderEncoder setVertexBufferOffset: b.offset + atIndex: b.index]; + } else { + [_mtlRenderEncoder setVertexBuffer: b.mtlBuffer + offset: b.offset + atIndex: b.index]; + } + } +} + void MVKCommandEncoder::setFragmentBytes(id mtlEncoder, const void* bytes, NSUInteger length, diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm index d01dc5ef..35f0b6e8 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm @@ -464,7 +464,7 @@ void MVKRenderingCommandEncoderState::encodeImpl(uint32_t stage) { [rendEnc setDepthBias: 0 slopeScale: 0 clamp: 0]; } } - if (isDirty(DepthClipEnable) && getDevice()->_enabledFeatures.depthClamp) { + if (isDirty(DepthClipEnable) && _cmdEncoder->_pDeviceFeatures->depthClamp) { [rendEnc setDepthClipMode: getContent(DepthClipEnable)]; } @@ -876,40 +876,19 @@ void MVKGraphicsResourcesCommandEncoderState::encodeImpl(uint32_t stage) { // We must not bind those extra buffers to the shader because they might overwrite // any implicit buffers used by the pipeline. if (pipeline->isValidVertexBufferIndex(kMVKShaderStageVertex, b.index)) { - NSUInteger mtlStride = isDynamicVertexStride ? b.stride : MTLAttributeStrideStatic; - if (b.isInline) { - [cmdEncoder->_mtlRenderEncoder setVertexBytes: b.mtlBytes - length: b.size -#if MVK_XCODE_15 - attributeStride: mtlStride -#endif - atIndex: b.index]; - } else { - if (b.justOffset) { - [cmdEncoder->_mtlRenderEncoder setVertexBufferOffset: b.offset -#if MVK_XCODE_15 - attributeStride: mtlStride -#endif - atIndex: b.index]; - } else { - [cmdEncoder->_mtlRenderEncoder setVertexBuffer: b.mtlBuffer - offset: b.offset -#if MVK_XCODE_15 - attributeStride: mtlStride -#endif - atIndex: b.index]; - } + cmdEncoder->encodeVertexAttributeBuffer(b, isDynamicVertexStride); - // Add any translated vertex bindings for this binding + // Add any translated vertex bindings for this binding + if ( !b.isInline ) { auto xltdVtxBindings = pipeline->getTranslatedVertexBindings(); for (auto& xltdBind : xltdVtxBindings) { if (b.index == pipeline->getMetalBufferIndexForVertexAttributeBinding(xltdBind.binding)) { - [cmdEncoder->_mtlRenderEncoder setVertexBuffer: b.mtlBuffer - offset: b.offset + xltdBind.translationOffset -#if MVK_XCODE_15 - attributeStride: mtlStride -#endif - atIndex: pipeline->getMetalBufferIndexForVertexAttributeBinding(xltdBind.translationBinding)]; + MVKMTLBufferBinding bx = { + .mtlBuffer = b.mtlBuffer, + .offset = b.offset + xltdBind.translationOffset, + .stride = b.stride, + .index = static_cast(pipeline->getMetalBufferIndexForVertexAttributeBinding(xltdBind.translationBinding)) }; + cmdEncoder->encodeVertexAttributeBuffer(bx, isDynamicVertexStride); } } } @@ -970,28 +949,7 @@ void MVKGraphicsResourcesCommandEncoderState::encodeImpl(uint32_t stage) { if (forTessellation && stage == kMVKGraphicsStageRasterization) { encodeBindings(kMVKShaderStageTessEval, "tessellation evaluation", fullImageViewSwizzle, [isDynamicVertexStride](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b)->void { - NSUInteger mtlStride = isDynamicVertexStride ? b.stride : MTLAttributeStrideStatic; - if (b.isInline) { - [cmdEncoder->_mtlRenderEncoder setVertexBytes: b.mtlBytes - length: b.size -#if MVK_XCODE_15 - attributeStride: mtlStride -#endif - atIndex: b.index]; - } else if (b.justOffset) { - [cmdEncoder->_mtlRenderEncoder setVertexBufferOffset: b.offset -#if MVK_XCODE_15 - attributeStride: mtlStride -#endif - atIndex: b.index]; - } else { - [cmdEncoder->_mtlRenderEncoder setVertexBuffer: b.mtlBuffer - offset: b.offset -#if MVK_XCODE_15 - attributeStride: mtlStride -#endif - atIndex: b.index]; - } + cmdEncoder->encodeVertexAttributeBuffer(b, isDynamicVertexStride); }, [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, MVKArrayRef s)->void { cmdEncoder->setVertexBytes(cmdEncoder->_mtlRenderEncoder, diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandTypePools.def b/MoltenVK/MoltenVK/Commands/MVKCommandTypePools.def index 880f5551..65683f84 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandTypePools.def +++ b/MoltenVK/MoltenVK/Commands/MVKCommandTypePools.def @@ -86,17 +86,12 @@ MVK_CMD_TYPE_POOLS_FROM_2_THRESHOLDS(BindDescriptorSetsStatic, 1, 4) MVK_CMD_TYPE_POOLS_FROM_THRESHOLD(BindDescriptorSetsDynamic, 4) MVK_CMD_TYPE_POOLS_FROM_THRESHOLD(SetViewport, 1) MVK_CMD_TYPE_POOLS_FROM_THRESHOLD(SetScissor, 1) -MVK_CMD_TYPE_POOL(SetLineWidth) MVK_CMD_TYPE_POOL(SetBlendConstants) -MVK_CMD_TYPE_POOL(SetLogicOp) -MVK_CMD_TYPE_POOL(SetLogicOpEnable) MVK_CMD_TYPE_POOL(SetDepthBias) MVK_CMD_TYPE_POOL(SetDepthBiasEnable) MVK_CMD_TYPE_POOL(SetDepthTestEnable) MVK_CMD_TYPE_POOL(SetDepthWriteEnable) MVK_CMD_TYPE_POOL(SetDepthCompareOp) -MVK_CMD_TYPE_POOL(SetDepthBounds) -MVK_CMD_TYPE_POOL(SetDepthBoundsTestEnable) MVK_CMD_TYPE_POOL(SetStencilTestEnable) MVK_CMD_TYPE_POOL(SetStencilOp) MVK_CMD_TYPE_POOL(SetStencilCompareMask) diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm index 6660e9b7..cb0acac2 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm @@ -497,11 +497,6 @@ MVKGraphicsPipeline::MVKGraphicsPipeline(MVKDevice* device, // Blending - must ignore allowed bad pColorBlendState pointer if rasterization disabled or no color attachments if (_isRasterizingColor && pCreateInfo->pColorBlendState) { mvkCopy(_blendConstants, pCreateInfo->pColorBlendState->blendConstants, 4); - - // Metal does not support blending with logic operations. - if (pCreateInfo->pColorBlendState->logicOpEnable && pCreateInfo->pColorBlendState->logicOp != VK_LOGIC_OP_COPY) { - setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "Metal does not support blending using logic operations.")); - } } else { static float defaultBlendConstants[4] = { 0, 0.0, 0.0, 1.0 }; mvkCopy(_blendConstants, defaultBlendConstants, 4); @@ -522,11 +517,6 @@ MVKGraphicsPipeline::MVKGraphicsPipeline(MVKDevice* device, // Rasterization _hasRasterInfo = mvkSetOrClear(&_rasterInfo, pCreateInfo->pRasterizationState); - if (_hasRasterInfo) { - if (_rasterInfo.depthClampEnable && !_device->_enabledFeatures.depthClamp) { - setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "This device does not support depth clamping.")); - } - } // Must run after _isRasterizing and _dynamicState are populated initCustomSamplePositions(pCreateInfo); diff --git a/MoltenVK/MoltenVK/Utility/MVKFoundation.h b/MoltenVK/MoltenVK/Utility/MVKFoundation.h index c729c153..f5820721 100644 --- a/MoltenVK/MoltenVK/Utility/MVKFoundation.h +++ b/MoltenVK/MoltenVK/Utility/MVKFoundation.h @@ -392,7 +392,7 @@ static inline double mvkRoundHalfToEven(const double val) { /** Returns whether the value will fit inside the numeric type. */ template -static constexpr const bool mvkFits(const Tval& val) { +static constexpr bool mvkFits(const Tval& val) { return val <= std::numeric_limits::max(); } diff --git a/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm b/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm index ef3cf1ca..7644f49c 100644 --- a/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm +++ b/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm @@ -583,7 +583,7 @@ MTLMultisampleStencilResolveFilter mvkMTLMultisampleStencilResolveFilterFromVkRe } #endif -MVK_PUBLIC_SYMBOL MTLViewport mvkMTLViewportFromVkViewport(const VkViewport vkViewport) { +MVK_PUBLIC_SYMBOL MTLViewport mvkMTLViewportFromVkViewport(VkViewport vkViewport) { return { .originX = vkViewport.x, .originY = vkViewport.y, @@ -594,7 +594,7 @@ MVK_PUBLIC_SYMBOL MTLViewport mvkMTLViewportFromVkViewport(const VkViewport vkVi }; } -MVK_PUBLIC_SYMBOL MTLScissorRect mvkMTLScissorRectFromVkRect2D(const VkRect2D vkRect) { +MVK_PUBLIC_SYMBOL MTLScissorRect mvkMTLScissorRectFromVkRect2D(VkRect2D vkRect) { return { .x = (NSUInteger)max(vkRect.offset.x, 0), .y = (NSUInteger)max(vkRect.offset.y, 0), @@ -603,7 +603,7 @@ MVK_PUBLIC_SYMBOL MTLScissorRect mvkMTLScissorRectFromVkRect2D(const VkRect2D vk }; } -MVK_PUBLIC_SYMBOL VkRect2D mvkVkRect2DFromMTLScissorRect(const MTLScissorRect mtlScissorRect) { +MVK_PUBLIC_SYMBOL VkRect2D mvkVkRect2DFromMTLScissorRect(MTLScissorRect mtlScissorRect) { return { .offset = { .x = (int32_t)mtlScissorRect.x, .y = (int32_t)mtlScissorRect.y }, diff --git a/MoltenVK/MoltenVK/Vulkan/vulkan.mm b/MoltenVK/MoltenVK/Vulkan/vulkan.mm index 642ad040..d1c6fbbe 100644 --- a/MoltenVK/MoltenVK/Vulkan/vulkan.mm +++ b/MoltenVK/MoltenVK/Vulkan/vulkan.mm @@ -1466,7 +1466,6 @@ MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetLineWidth( float lineWidth) { MVKTraceVulkanCallStart(); - MVKAddCmd(SetLineWidth, commandBuffer, lineWidth); MVKTraceVulkanCallEnd(); } @@ -1496,7 +1495,6 @@ MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetDepthBounds( float maxDepthBounds) { MVKTraceVulkanCallStart(); - MVKAddCmd(SetDepthBounds, commandBuffer, minDepthBounds, maxDepthBounds); MVKTraceVulkanCallEnd(); } @@ -2652,7 +2650,6 @@ MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetDepthBoundsTestEnable( VkBool32 depthBoundsTestEnable) { MVKTraceVulkanCallStart(); - MVKAddCmd(SetDepthBoundsTestEnable, commandBuffer, depthBoundsTestEnable); MVKTraceVulkanCallEnd(); } @@ -3636,7 +3633,6 @@ MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetLogicOpEXT( VkLogicOp logicOp) { MVKTraceVulkanCallStart(); - MVKAddCmd(SetLogicOp, commandBuffer, logicOp); MVKTraceVulkanCallEnd(); } @@ -3661,7 +3657,6 @@ MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetLogicOpEnableEXT( VkBool32 logicOpEnable) { MVKTraceVulkanCallStart(); - MVKAddCmd(SetLogicOpEnable, commandBuffer, logicOpEnable); MVKTraceVulkanCallEnd(); } From d706ed0a63eb170b54de0c55a96b819c0122815c Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Mon, 16 Oct 2023 17:24:03 -0400 Subject: [PATCH 23/41] Fixes for VK_EXT_extended_dynamic_state. - MVKPipeline only work around zero stride if stride is static. - Ensure dynamic vertex stride is not enabled on builds before Xcode 15. - Add MVKRenderStateType::LineWidth for track all default options (unrelated). --- MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm | 4 + MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h | 1 + MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm | 87 +++++++++++---------- 3 files changed, 49 insertions(+), 43 deletions(-) diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm index a660be0a..77b628ca 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm @@ -1711,7 +1711,11 @@ void MVKPhysicalDevice::initMetalFeatures() { _metalFeatures.maxPerStageStorageTextureCount = 8; _metalFeatures.vertexStrideAlignment = supportsMTLGPUFamily(Apple5) ? 1 : 4; + +#if MVK_XCODE_15 + // Dynamic vertex stride needs to have everything aligned - compiled with support for vertex stride calls, and supported by both runtime OS and GPU. _metalFeatures.dynamicVertexStride = mvkOSVersionIsAtLeast(14.0, 17.0, 1.0) && (supportsMTLGPUFamily(Apple4) || supportsMTLGPUFamily(Mac2)); +#endif // GPU-specific features switch (_properties.vendorID) { diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h index 68e10dd8..3bb08735 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h @@ -232,6 +232,7 @@ enum MVKRenderStateType { DepthTestEnable, DepthWriteEnable, FrontFace, + LineWidth, LogicOp, LogicOpEnable, PatchControlPoints, diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm index cb0acac2..2f8a299e 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm @@ -560,6 +560,7 @@ static MVKRenderStateType getRenderStateType(VkDynamicState vkDynamicState) { case VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE: return DepthTestEnable; case VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE: return DepthWriteEnable; case VK_DYNAMIC_STATE_FRONT_FACE: return FrontFace; + case VK_DYNAMIC_STATE_LINE_WIDTH: return LineWidth; case VK_DYNAMIC_STATE_LOGIC_OP_EXT: return LogicOp; case VK_DYNAMIC_STATE_LOGIC_OP_ENABLE_EXT: return LogicOpEnable; case VK_DYNAMIC_STATE_PATCH_CONTROL_POINTS_EXT: return PatchControlPoints; @@ -1366,18 +1367,16 @@ bool MVKGraphicsPipeline::addVertexInputToPipeline(T* inputDesc, const VkVertexInputBindingDescription* pVKVB = &pVI->pVertexBindingDescriptions[i]; if (shaderConfig.isVertexBufferUsed(pVKVB->binding)) { - // Vulkan allows any stride, but Metal only allows multiples of 4. - // TODO: We could try to expand the buffer to the required alignment in that case. - VkDeviceSize mtlVtxStrideAlignment = _device->_pMetalFeatures->vertexStrideAlignment; - if ((pVKVB->stride % mtlVtxStrideAlignment) != 0) { - setConfigurationResult(reportError(VK_ERROR_INITIALIZATION_FAILED, "Under Metal, vertex attribute binding strides must be aligned to %llu bytes.", mtlVtxStrideAlignment)); + // Vulkan allows any stride, but Metal requires multiples of 4 on older GPUs. + if (isVtxStrideStatic && (pVKVB->stride % _device->_pMetalFeatures->vertexStrideAlignment) != 0) { + setConfigurationResult(reportError(VK_ERROR_INITIALIZATION_FAILED, "Under Metal, vertex attribute binding strides must be aligned to %llu bytes.", _device->_pMetalFeatures->vertexStrideAlignment)); return false; } maxBinding = max(pVKVB->binding, maxBinding); uint32_t vbIdx = getMetalBufferIndexForVertexAttributeBinding(pVKVB->binding); auto vbDesc = inputDesc.layouts[vbIdx]; - if (pVKVB->stride == 0) { + if (isVtxStrideStatic && pVKVB->stride == 0) { // Stride can't be 0, it will be set later to attributes' maximum offset + size // to prevent it from being larger than the underlying buffer permits. vbDesc.stride = 0; @@ -1418,52 +1417,54 @@ bool MVKGraphicsPipeline::addVertexInputToPipeline(T* inputDesc, if (shaderConfig.isShaderInputLocationUsed(pVKVA->location)) { uint32_t vaBinding = pVKVA->binding; uint32_t vaOffset = pVKVA->offset; + auto vaDesc = inputDesc.attributes[pVKVA->location]; + auto mtlFormat = (decltype(vaDesc.format))getPixelFormats()->getMTLVertexFormat(pVKVA->format); // Vulkan allows offsets to exceed the buffer stride, but Metal doesn't. // If this is the case, fetch a translated artificial buffer binding, using the same MTLBuffer, // but that is translated so that the reduced VA offset fits into the binding stride. - const VkVertexInputBindingDescription* pVKVB = pVI->pVertexBindingDescriptions; - uint32_t attrSize = 0; - for (uint32_t j = 0; j < vbCnt; j++, pVKVB++) { - if (pVKVB->binding == pVKVA->binding) { - attrSize = getPixelFormats()->getBytesPerBlock(pVKVA->format); - if (pVKVB->stride == 0) { - // The step is set to constant, but we need to change stride to be non-zero for metal. - // Look for the maximum offset + size to set as the stride. - uint32_t vbIdx = getMetalBufferIndexForVertexAttributeBinding(pVKVB->binding); - auto vbDesc = inputDesc.layouts[vbIdx]; - uint32_t strideLowBound = vaOffset + attrSize; - if (vbDesc.stride < strideLowBound) vbDesc.stride = strideLowBound; - } else if (vaOffset && vaOffset + attrSize > pVKVB->stride) { - // Move vertex attribute offset into the stride. This vertex attribute may be - // combined with other vertex attributes into the same translated buffer binding. - // But if the reduced offset combined with the vertex attribute size still won't - // fit into the buffer binding stride, force the vertex attribute offset to zero, - // effectively dedicating this vertex attribute to its own buffer binding. - uint32_t origOffset = vaOffset; - vaOffset %= pVKVB->stride; - if (vaOffset + attrSize > pVKVB->stride) { - vaOffset = 0; + if (isVtxStrideStatic) { + const VkVertexInputBindingDescription* pVKVB = pVI->pVertexBindingDescriptions; + uint32_t attrSize = 0; + for (uint32_t j = 0; j < vbCnt; j++, pVKVB++) { + if (pVKVB->binding == pVKVA->binding) { + attrSize = getPixelFormats()->getBytesPerBlock(pVKVA->format); + if (pVKVB->stride == 0) { + // The step is set to constant, but we need to change stride to be non-zero for metal. + // Look for the maximum offset + size to set as the stride. + uint32_t vbIdx = getMetalBufferIndexForVertexAttributeBinding(pVKVB->binding); + auto vbDesc = inputDesc.layouts[vbIdx]; + uint32_t strideLowBound = vaOffset + attrSize; + if (vbDesc.stride < strideLowBound) vbDesc.stride = strideLowBound; + } else if (vaOffset && vaOffset + attrSize > pVKVB->stride) { + // Move vertex attribute offset into the stride. This vertex attribute may be + // combined with other vertex attributes into the same translated buffer binding. + // But if the reduced offset combined with the vertex attribute size still won't + // fit into the buffer binding stride, force the vertex attribute offset to zero, + // effectively dedicating this vertex attribute to its own buffer binding. + uint32_t origOffset = vaOffset; + vaOffset %= pVKVB->stride; + if (vaOffset + attrSize > pVKVB->stride) { + vaOffset = 0; + } + vaBinding = getTranslatedVertexBinding(vaBinding, origOffset - vaOffset, maxBinding); + if (zeroDivisorBindings.count(pVKVB->binding)) { + zeroDivisorBindings.insert(vaBinding); + } } - vaBinding = getTranslatedVertexBinding(vaBinding, origOffset - vaOffset, maxBinding); - if (zeroDivisorBindings.count(pVKVB->binding)) { - zeroDivisorBindings.insert(vaBinding); - } + break; } - break; + } + if (pVKVB->stride && attrSize > pVKVB->stride) { + /* Metal does not support overlapping loads. Truncate format vector length to prevent an assertion + * and hope it's not used by the shader. */ + MTLVertexFormat newFormat = mvkAdjustFormatVectorToSize((MTLVertexFormat)mtlFormat, pVKVB->stride); + reportError(VK_SUCCESS, "Found attribute with size (%u) larger than it's binding's stride (%u). Changing descriptor format from %s to %s.", + attrSize, pVKVB->stride, getPixelFormats()->getName((MTLVertexFormat)mtlFormat), getPixelFormats()->getName(newFormat)); + mtlFormat = (decltype(vaDesc.format))newFormat; } } - auto vaDesc = inputDesc.attributes[pVKVA->location]; - auto mtlFormat = (decltype(vaDesc.format))getPixelFormats()->getMTLVertexFormat(pVKVA->format); - if (pVKVB->stride && attrSize > pVKVB->stride) { - /* Metal does not support overlapping loads. Truncate format vector length to prevent an assertion - * and hope it's not used by the shader. */ - MTLVertexFormat newFormat = mvkAdjustFormatVectorToSize((MTLVertexFormat)mtlFormat, pVKVB->stride); - reportError(VK_SUCCESS, "Found attribute with size (%u) larger than it's binding's stride (%u). Changing descriptor format from %s to %s.", - attrSize, pVKVB->stride, getPixelFormats()->getName((MTLVertexFormat)mtlFormat), getPixelFormats()->getName(newFormat)); - mtlFormat = (decltype(vaDesc.format))newFormat; - } vaDesc.format = mtlFormat; vaDesc.bufferIndex = (decltype(vaDesc.bufferIndex))getMetalBufferIndexForVertexAttributeBinding(vaBinding); vaDesc.offset = vaOffset; From 3a77f4ea97bb81965d96427d79faeb49a36c35bb Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Mon, 16 Oct 2023 22:59:21 -0400 Subject: [PATCH 24/41] Fixes to determination of VkPhysicalDeviceLimits::timestampPeriod. - On Apple GPUs, set timestampPeriod to 1.0. - On non-Apple GPUs, calculate timestampPeriod each time it is retrieved. - On older devices that do not support GPU timestamps, use nanosecond CPU timestamps to be consistent with timestampPeriod of 1.0. - Change MVKConfiguration::timestampPeriodLowPassAlpha and environment variable MVK_CONFIG_TIMESTAMP_PERIOD_LOWPASS_ALPHA to 1.0, to use only latest value by default. - Add build-time verification that MVKConfigMembers.def includes all members of MVKConfiguration (unrelated). --- Docs/MoltenVK_Runtime_UserGuide.md | 6 +++--- Docs/Whats_New.md | 4 +++- MoltenVK/MoltenVK/API/mvk_config.h | 2 +- MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm | 5 +++-- MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm | 2 +- MoltenVK/MoltenVK/Utility/MVKConfigMembers.def | 7 ++----- MoltenVK/MoltenVK/Utility/MVKEnvironment.cpp | 15 +++++++++++++-- MoltenVK/MoltenVK/Utility/MVKEnvironment.h | 2 +- 8 files changed, 27 insertions(+), 16 deletions(-) diff --git a/Docs/MoltenVK_Runtime_UserGuide.md b/Docs/MoltenVK_Runtime_UserGuide.md index 080b4d0c..c2f54985 100644 --- a/Docs/MoltenVK_Runtime_UserGuide.md +++ b/Docs/MoltenVK_Runtime_UserGuide.md @@ -617,9 +617,9 @@ vailable when you request it, resulting in frame delays and visual stuttering. ### Timestamping -On non-Apple Silicon devices (older Mac devices), the GPU can switch power and performance -states as required by usage. This affects the GPU timestamps retrievable through the Vulkan -API. As a result, the value of `VkPhysicalDeviceLimits::timestampPeriod` can vary over time. +On non-Apple GPUs (older Mac devices), the GPU can switch power and performance states as +required by usage. This affects the GPU timestamps retrievable through the Vulkan API. +As a result, the value of `VkPhysicalDeviceLimits::timestampPeriod` can vary over time. Consider calling `vkGetPhysicalDeviceProperties()`, when needed, and retrieve the current value of `VkPhysicalDeviceLimits::timestampPeriod`, to help you calibrate recent GPU timestamps queried through the Vulkan API. diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md index 6ab13f7a..201d483c 100644 --- a/Docs/Whats_New.md +++ b/Docs/Whats_New.md @@ -29,7 +29,9 @@ Released TBD - Fix MSL code used in `vkCmdBlitImage()` on depth-stencil formats. - Improve behavior of swapchain image presentation stalls caused by Metal regression. - Add several additional performance trackers, available via logging, or the `mvk_private_api.h` API. -- Add configurable lowpass filter for `VkPhysicalDeviceLimits::timestampPeriod`. +- Add `MVKConfiguration::timestampPeriodLowPassAlpha` and environment variable + `MVK_CONFIG_TIMESTAMP_PERIOD_LOWPASS_ALPHA`, to add a configurable lowpass filter + for varying `VkPhysicalDeviceLimits::timestampPeriod` on non-Apple GPUs. - Deprecate `MVK_DEBUG` env var, and add `MVK_CONFIG_DEBUG` env var to replace it. - Update `MVK_CONFIGURATION_API_VERSION` and `MVK_PRIVATE_API_VERSION` to `38`. diff --git a/MoltenVK/MoltenVK/API/mvk_config.h b/MoltenVK/MoltenVK/API/mvk_config.h index f72ef777..21041efc 100644 --- a/MoltenVK/MoltenVK/API/mvk_config.h +++ b/MoltenVK/MoltenVK/API/mvk_config.h @@ -955,7 +955,7 @@ typedef struct { * The initial value or this parameter is set by the * MVK_CONFIG_TIMESTAMP_PERIOD_LOWPASS_ALPHA * runtime environment variable or MoltenVK compile-time build setting. - * If neither is set, this parameter is set to 0.05 by default, + * If neither is set, this parameter is set to 1.0 by default, * indicating that the timestampPeriod will vary relatively slowly, * with the expectation that the app is querying this value infrequently. */ diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm index 77b628ca..7d342f68 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm @@ -1601,7 +1601,8 @@ void MVKPhysicalDevice::updateTimestampPeriod() { // Basic lowpass filter TPout = (1 - A)TPout + (A * TPin). // The lower A is, the slower TPout will change over time. - float a = mvkConfig().timestampPeriodLowPassAlpha; + // First time through, just use the measured value directly. + float a = earlierCPUTs ? mvkConfig().timestampPeriodLowPassAlpha : 1.0; _properties.limits.timestampPeriod = ((1.0 - a) * _properties.limits.timestampPeriod) + (a * tsPeriod); } } @@ -2637,7 +2638,7 @@ void MVKPhysicalDevice::initLimits() { _properties.limits.optimalBufferCopyRowPitchAlignment = 1; _properties.limits.timestampComputeAndGraphics = VK_TRUE; - _properties.limits.timestampPeriod = mvkGetTimestampPeriod(); // Will be 1.0 on Apple Silicon + _properties.limits.timestampPeriod = 1.0; // On non-Apple GPU's, this can vary over time, and is calculated based on actual GPU activity. _properties.limits.pointSizeRange[0] = 1; switch (_properties.vendorID) { diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm b/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm index 1bd0a6d0..7b16cb81 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm @@ -381,7 +381,7 @@ void MVKTimestampQueryPool::endQuery(uint32_t query, MVKCommandEncoder* cmdEncod // If not using MTLCounterSampleBuffer, update timestamp values, then mark queries as available void MVKTimestampQueryPool::finishQueries(MVKArrayRef queries) { if ( !_mtlCounterBuffer ) { - uint64_t ts = mvkGetTimestamp(); + uint64_t ts = mvkGetElapsedNanoseconds(); for (uint32_t qry : queries) { _timestamps[qry] = ts; } } MVKQueryPool::finishQueries(queries); diff --git a/MoltenVK/MoltenVK/Utility/MVKConfigMembers.def b/MoltenVK/MoltenVK/Utility/MVKConfigMembers.def index aff0cf33..e148b1ac 100644 --- a/MoltenVK/MoltenVK/Utility/MVKConfigMembers.def +++ b/MoltenVK/MoltenVK/Utility/MVKConfigMembers.def @@ -23,16 +23,13 @@ // To use this file, define the macros: // // MVK_CONFIG_MEMBER(member, mbrType, name) -// MVK_CONFIG_MEMBER_STRING(member, strObj, name) +// MVK_CONFIG_MEMBER_STRING(member, mbrType, name) // // then #include this file inline with your code. // // The name prameter is the name of the configuration parameter, which is used as the name // of the environment variable, and build setting, that sets the config value, and is entered // here without the "MVK_CONFIG_" prefix. -// -// Since string members are set from char pointers, the text must be copied to a std::string -// object, which is passed as a parameter to MVK_CONFIG_MEMBER_STRING. #ifndef MVK_CONFIG_MEMBER @@ -69,7 +66,7 @@ MVK_CONFIG_MEMBER(forceLowPowerGPU, VkBool32, MVK_CONFIG_MEMBER(semaphoreUseMTLFence, VkBool32, ALLOW_METAL_FENCES) // Deprecated legacy MVK_CONFIG_MEMBER(semaphoreSupportStyle, MVKVkSemaphoreSupportStyle, VK_SEMAPHORE_SUPPORT_STYLE) MVK_CONFIG_MEMBER(autoGPUCaptureScope, MVKConfigAutoGPUCaptureScope, AUTO_GPU_CAPTURE_SCOPE) -MVK_CONFIG_MEMBER_STRING(autoGPUCaptureOutputFilepath, evGPUCapFileStrObj, AUTO_GPU_CAPTURE_OUTPUT_FILE) +MVK_CONFIG_MEMBER_STRING(autoGPUCaptureOutputFilepath, char*, AUTO_GPU_CAPTURE_OUTPUT_FILE) MVK_CONFIG_MEMBER(texture1DAs2D, VkBool32, TEXTURE_1D_AS_2D) MVK_CONFIG_MEMBER(preallocateDescriptors, VkBool32, PREALLOCATE_DESCRIPTORS) MVK_CONFIG_MEMBER(useCommandPooling, VkBool32, USE_COMMAND_POOLING) diff --git a/MoltenVK/MoltenVK/Utility/MVKEnvironment.cpp b/MoltenVK/MoltenVK/Utility/MVKEnvironment.cpp index f2fa9e86..fefceddd 100644 --- a/MoltenVK/MoltenVK/Utility/MVKEnvironment.cpp +++ b/MoltenVK/MoltenVK/Utility/MVKEnvironment.cpp @@ -20,8 +20,19 @@ #include "MVKOSExtensions.h" #include "MVKFoundation.h" +// Return the expected size of MVKConfiguration, based on contents of MVKConfigMembers.def. +static constexpr uint32_t getExpectedMVKConfigurationSize() { +#define MVK_CONFIG_MEMBER_STRING(member, mbrType, name) MVK_CONFIG_MEMBER(member, mbrType, name) +#define MVK_CONFIG_MEMBER(member, mbrType, name) cfgSize += sizeof(mbrType); + uint32_t cfgSize = 0; +#include "MVKConfigMembers.def" + return cfgSize; +} + static bool _mvkConfigInitialized = false; static void mvkInitConfigFromEnvVars() { + static_assert(getExpectedMVKConfigurationSize() == sizeof(MVKConfiguration), "MVKConfigMembers.def does not match the members of MVKConfiguration."); + _mvkConfigInitialized = true; MVKConfiguration evCfg; @@ -32,8 +43,8 @@ static void mvkInitConfigFromEnvVars() { #define MVK_CONFIG_MEMBER(member, mbrType, name) \ evCfg.member = (mbrType)mvkGetEnvVarNumber(STR(MVK_CONFIG_##name), MVK_CONFIG_##name); -#define MVK_CONFIG_MEMBER_STRING(member, strObj, name) \ - evCfg.member = mvkGetEnvVarString(STR(MVK_CONFIG_##name), strObj, MVK_CONFIG_##name); +#define MVK_CONFIG_MEMBER_STRING(member, mbrType, name) \ + evCfg.member = mvkGetEnvVarString(STR(MVK_CONFIG_##name), evGPUCapFileStrObj, MVK_CONFIG_##name); #include "MVKConfigMembers.def" diff --git a/MoltenVK/MoltenVK/Utility/MVKEnvironment.h b/MoltenVK/MoltenVK/Utility/MVKEnvironment.h index f6f1ae9e..6c50582a 100644 --- a/MoltenVK/MoltenVK/Utility/MVKEnvironment.h +++ b/MoltenVK/MoltenVK/Utility/MVKEnvironment.h @@ -323,5 +323,5 @@ void mvkSetConfig(const MVKConfiguration& mvkConfig); * This can be set to a float between 0.0 and 1.0. */ #ifndef MVK_CONFIG_TIMESTAMP_PERIOD_LOWPASS_ALPHA -# define MVK_CONFIG_TIMESTAMP_PERIOD_LOWPASS_ALPHA 0.05 +# define MVK_CONFIG_TIMESTAMP_PERIOD_LOWPASS_ALPHA 1.0 #endif From 49c6e8bae36f6b561ecac2b20e59a78245f65999 Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Tue, 17 Oct 2023 14:36:26 -0400 Subject: [PATCH 25/41] Update dependency libraries to match Vulkan SDK 1.3.268. --- Docs/Whats_New.md | 19 +++++++++++++----- ExternalRevisions/SPIRV-Cross_repo_revision | 2 +- .../Vulkan-Headers_repo_revision | 2 +- ExternalRevisions/Vulkan-Tools_repo_revision | 2 +- ExternalRevisions/glslang_repo_revision | 2 +- MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm | 2 ++ .../MoltenVKShaderConverterTool.cpp | 1 + Templates/spirv-tools/build.zip | Bin 54542 -> 55923 bytes 8 files changed, 21 insertions(+), 9 deletions(-) diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md index 201d483c..2308d5d6 100644 --- a/Docs/Whats_New.md +++ b/Docs/Whats_New.md @@ -16,24 +16,33 @@ Copyright (c) 2015-2023 [The Brenwill Workshop Ltd.](http://www.brenwill.com) MoltenVK 1.2.6 -------------- -Released TBD +Released 2023/10/17 - Add support for extensions: - `VK_KHR_synchronization2` - `VK_EXT_extended_dynamic_state` *(requires Metal 3.1 for `VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE`)* - `VK_EXT_extended_dynamic_state2` -- Fix rare case where vertex attribute buffers are not bound to Metal - when no other bindings change between pipelines. +- Fix rare case where vertex attribute buffers are not bound to Metal when no other bindings change between pipelines. - Ensure objects retained for life of `MTLCommandBuffer` during `vkCmdBlitImage()` & `vkQueuePresentKHR()`. - Fix case where a `CAMetalDrawable` with invalid pixel format causes onscreen flickering. -- Fix MSL code used in `vkCmdBlitImage()` on depth-stencil formats. +- Fix deadlock when reporting debug message on `MVKInstance` destruction. +- Fix MSL code used in `vkCmdBlitImage()` on depth-stencil formats. - Improve behavior of swapchain image presentation stalls caused by Metal regression. -- Add several additional performance trackers, available via logging, or the `mvk_private_api.h` API. +- `VkPhysicalDeviceLimits::timestampPeriod` set to 1.0 on Apple GPUs, and calculated dynamically on non-Apple GPUs. - Add `MVKConfiguration::timestampPeriodLowPassAlpha` and environment variable `MVK_CONFIG_TIMESTAMP_PERIOD_LOWPASS_ALPHA`, to add a configurable lowpass filter for varying `VkPhysicalDeviceLimits::timestampPeriod` on non-Apple GPUs. +- Add several additional performance trackers, available via logging, or the `mvk_private_api.h` API. - Deprecate `MVK_DEBUG` env var, and add `MVK_CONFIG_DEBUG` env var to replace it. - Update `MVK_CONFIGURATION_API_VERSION` and `MVK_PRIVATE_API_VERSION` to `38`. +- Update dependency libraries to match _Vulkan SDK 1.3.268_. +- Update to latest SPIRV-Cross: + - MSL: Workaround Metal 3.1 regression bug on recursive input structs. + - MSL: fix extraction of global variables, in case of atomics. + - MSL: Workaround bizarre crash on macOS. + - MSL: runtime array over argument buffers. + - MSL: Make rw texture fences optional. + - MSL: Prevent RAW hazards on read_write textures. diff --git a/ExternalRevisions/SPIRV-Cross_repo_revision b/ExternalRevisions/SPIRV-Cross_repo_revision index 590969e2..fa4ca520 100644 --- a/ExternalRevisions/SPIRV-Cross_repo_revision +++ b/ExternalRevisions/SPIRV-Cross_repo_revision @@ -1 +1 @@ -bccaa94db814af33d8ef05c153e7c34d8bd4d685 +2de1265fca722929785d9acdec4ab728c47a0254 diff --git a/ExternalRevisions/Vulkan-Headers_repo_revision b/ExternalRevisions/Vulkan-Headers_repo_revision index a8ebecec..e37595bd 100644 --- a/ExternalRevisions/Vulkan-Headers_repo_revision +++ b/ExternalRevisions/Vulkan-Headers_repo_revision @@ -1 +1 @@ -85c2334e92e215cce34e8e0ed8b2dce4700f4a50 +7b3466a1f47a9251ac1113efbe022ff016e2f95b diff --git a/ExternalRevisions/Vulkan-Tools_repo_revision b/ExternalRevisions/Vulkan-Tools_repo_revision index bf77fda0..6ecf5015 100644 --- a/ExternalRevisions/Vulkan-Tools_repo_revision +++ b/ExternalRevisions/Vulkan-Tools_repo_revision @@ -1 +1 @@ -300d9bf6b3cf7b237ee5e2c1d0ae10b9236f82d3 +1532001f7edae559af1988293eec90bc5e2607d5 diff --git a/ExternalRevisions/glslang_repo_revision b/ExternalRevisions/glslang_repo_revision index aba7fbb1..cd54f4f3 100644 --- a/ExternalRevisions/glslang_repo_revision +++ b/ExternalRevisions/glslang_repo_revision @@ -1 +1 @@ -76b52ebf77833908dc4c0dd6c70a9c357ac720bd +be564292f00c5bf0d7251c11f1c9618eb1117762 diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm index 2f8a299e..49399b06 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm @@ -1734,6 +1734,7 @@ void MVKGraphicsPipeline::initShaderConversionConfig(SPIRVToMSLConversionConfigu shaderConfig.options.mslOptions.multiview = mvkIsMultiview(pRendInfo->viewMask); shaderConfig.options.mslOptions.multiview_layered_rendering = getPhysicalDevice()->canUseInstancingForMultiview(); shaderConfig.options.mslOptions.view_index_from_device_index = mvkAreAllFlagsEnabled(pCreateInfo->flags, VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT); + shaderConfig.options.mslOptions.replace_recursive_inputs = mvkOSVersionIsAtLeast(14.0, 17.0, 1.0); #if MVK_MACOS shaderConfig.options.mslOptions.emulate_subgroups = !_device->_pMetalFeatures->simdPermute; #endif @@ -2139,6 +2140,7 @@ MVKMTLFunction MVKComputePipeline::getMTLFunction(const VkComputePipelineCreateI shaderConfig.options.mslOptions.buffer_size_buffer_index = _bufferSizeBufferIndex.stages[kMVKShaderStageCompute]; shaderConfig.options.mslOptions.dynamic_offsets_buffer_index = _dynamicOffsetBufferIndex.stages[kMVKShaderStageCompute]; shaderConfig.options.mslOptions.indirect_params_buffer_index = _indirectParamsIndex.stages[kMVKShaderStageCompute]; + shaderConfig.options.mslOptions.replace_recursive_inputs = mvkOSVersionIsAtLeast(14.0, 17.0, 1.0); MVKMTLFunction func = ((MVKShaderModule*)pSS->module)->getMTLFunction(&shaderConfig, pSS->pSpecializationInfo, this, pStageFB); if ( !func.getMTLFunction() ) { diff --git a/MoltenVKShaderConverter/MoltenVKShaderConverterTool/MoltenVKShaderConverterTool.cpp b/MoltenVKShaderConverter/MoltenVKShaderConverterTool/MoltenVKShaderConverterTool.cpp index 9a911e3d..2eb1c48c 100644 --- a/MoltenVKShaderConverter/MoltenVKShaderConverterTool/MoltenVKShaderConverterTool.cpp +++ b/MoltenVKShaderConverter/MoltenVKShaderConverterTool/MoltenVKShaderConverterTool.cpp @@ -221,6 +221,7 @@ bool MoltenVKShaderConverterTool::convertSPIRV(const vector& spv, mslContext.options.mslOptions.argument_buffers = _useMetalArgumentBuffers; mslContext.options.mslOptions.force_active_argument_buffer_resources = _useMetalArgumentBuffers; mslContext.options.mslOptions.pad_argument_buffer_resources = _useMetalArgumentBuffers; + mslContext.options.mslOptions.replace_recursive_inputs = mvkOSVersionIsAtLeast(14.0, 17.0, 1.0); SPIRVToMSLConverter spvConverter; spvConverter.setSPIRV(spv); diff --git a/Templates/spirv-tools/build.zip b/Templates/spirv-tools/build.zip index 3879c9b8368fa5cf774fce1db6774bc26225b43d..d087d8e2b16e86191b8180ef03b827b132dcc4ae 100644 GIT binary patch delta 42339 zcmZU(18gtM6E0kvr?yUQ+qP}Dr?&g6ZQHg^ZQHhOyZ5~Ri?~{wEkfxQSm=*s#z*p#Ps5ZiQ}O3{};Ov7;G8@ z;XiN}h^EE`Fd!ge#KbKKlK(0E2b>56Ne=KPSyhoF&LBpkvw7J$Nmf((v*FZu!K+kN zp)O7E^Yx**xe4$=&QC`35Pa?t3HSXv;vgzebOmuT&OJt{X8Btdop{Hrvxutdm0oPq z**={&<{dZO+S-!%cuZ4N9TXoga%H*@A5E+jwg>W6&8~GuA$Lg96|U#GrbwfcQ3jY$ z&4wG{t>Rvb^J7njn)CQ>TyHE`QG0JZZMr(t4mmhr+*2Vjp18Key2tU|gdIx7PheBr zSWDbUC0A5D*|l^oAYDL2=H?_VbZs^OKXQpp1+G1n-5?Xj*f}>r;yXe@sGvh$o^U9J z=gOh4U*3#om&0S06VInL9Z*}5HUan_6m(vgyCEsbR58W74FO(_p!H^Tqa0Cob25>(ggTC-xkc>V|is7NmT%8lGk|5b4s zhdtLug|!YuUX3K88y`mwK2G?j3EG6rmY;9j@VnoRbU9KB;u^O8h<*+|H~`G!<`cr= zcfmqYd5r?qUlp7gh85G z^`E7;FY0KgucIV5`_LM2SqbpifnvnTAjIw6a?0q$YjHH&rzOnLd?LAdD-QwS7)7i| z=ukeg76fQ~0`1!-(JDg1*IB&Dw!uyCa{MuErkgfX-2RxnU<%ra>I(p#7L5%Dlyx*0 zT@{|HRt#$ZqFo-%B8aSD$X~lC)D`gyYmY*C^kM<;y!r_S%=)Sxb{_1~UON~PgJvxgHw?y2wdA7DK`N<5I2h>wXvEt4RohjM(<^Kferq~SLcOw8m;JW!n7mf_d> zfNv!A6$`b8qx7%X2{Skl!t$4ecCJvlh>NJ zqXDxW@_A$bk6z`AhRzBPFO&DQ(GjsH3s-Rj7K{%;_K=nC5wlZjUFdDyxb-rvTbJjW z`H*T{9jg+gkMZ~)?GCsP_eDgwnt=$&n@Ky5` z-f4#ivB8e{;GZ6I0gG{gV7n(G=kdp@#6Yqg3D1=*QC_;K!ya>-X-U~I4Hxk6E+nlr zJhh^kFveQlU(3@&+qEBhA+;Al{MJAK}C%FID5#;=zCi#0kP(}+P z4ee61Ti>uo-KL3%RG&1`r=U1PKep zA=78np*ObieSnr|2h%ozs-F1Yit`8uk#ngp0I#WK!tz`pmDLj~*N~~V3hQ!5`{b)*ozTY_<94NlTb7N=lBnf(o9U(im5`m;ao9xZY1;9~EA_z$xD)>4z>=Cu zQ|v|HnU1B7;!8lUB)wZD1>;MH<$RT#Q|rbX;rRYhEdX&&4G(sU@`9MF>~b-tafCSB zDiGw7nX0peigF>**G9{-vim$7_vM!W=Y=ukCTlvrgJeE;7l=M^4Ejf!troj#;W_!T zVj1x>n`}VEWCR#)L=nizSy=^Wy^aUcw3b7}Q<3qooD%xTYf$>5L)O;9Ub$2vhn0AD zIt1I4Eg-Z7)89$&-5g^j*jBxzs>>5^NV|zTA7r|6g=vY8Z^t3v&#O259 zi}zqq2*yTTE1@V|b1UMP&WU2(tZ=qjg%zf;4+sn`f5~~Ir38EBS==u4O-~Ta(bd4! zRcNL(@5pI#uL!Ox4x_M~K~e8BQ6&?v!*O`}6kuN_c27OX2Z;7|PUZNGFS}QL*O0YyBY@5=Tjuo`nNPP&(#ubw4vDWWvfEiY>iV&=*YdGiq02XP0XBLw^n*YiaPHIR|iw3 z@OJ}zh`U~pve9qGZJ1r*Q@=Y^VGDES#^x@(;{HRWrwC& z@dQ@SCl*6LJq$M4scpG3!pEj+IpH4z9e{>Jz76#yo3cR4u^AiE4oWGVb@15N!=a>m zD!$fvA%$E(I`tsT6@klf=}I8S+*XqjK~`wanee+ONaw74d2iULD3tY+qHGI#BJ3Za zqw1SRqr!#Mkye?5XatrH>4uC}e zHx?*V4Z2neLxJ07zDg$b^s+z*g>6r@(=k%UNw=&_(g$~{iYBsUei6ZaNqHb{JOSlq zFDSdKvS!Q8A+2}KeKOsw!5Y?Z5iYkd>ffpPx1G8BDkq|HJdyal+Ekf3p$g-#+b0ZP zU?3h7=%tP9-d6m%3h@{f#J8^V;Q+0s@Cl}1W)&c^x^?Sk)48lLMP?y)tLW=z(?m=v z7i!%vdp!T1a}GyIN%VzCLZD;bC}#7L`>q&^C#UG}bfh5I&ay$IO0#B*vaz8dMOVQ4 z9i8WsmU+|wt*{sm^E)759#}lX+|D`eX{1ZjJw?` zLwqjqqz9(iu8pF2sZ!|+w8T51!gM=AC?7`KHSiF6R)r6o`OT_HduYiyteG{KeagL2 zXD+>|cY~0^Buh=-$2bWdGT@wpLbfRy4>QD1VER7F2NGo3ldLQp(DuYJ2#St`aaGxNWs_$tGSb}EBG>b4l8#3LH_D|tn3d@ibmIeHDYKLM1YV_Ep zb6uERU|GN{irxm2Ul2NqweZ{s)RoP|>x7C$FE140)VsY{5{`a;4gd;oS)U+E%v2G@ zDgY8i+veU`J!!;HlO^&u*GYnuQEY7RE*)H0W7*C!9S`Ch5uIikPgau4d!$+fLlb^SJZWNrjRHW>%+_|1rXkK6>n z`TT_(i!VsEfcM;w2e4yCR(JzzZa@GWRlk11dZ1D?{#Xw`74Joesv(~M1`NiO(o#O% zdm!SHtS-_89%6bq122Hnng>e~*fZmSh|H-y^R3N?(5(%lNx zR&6M6o5-qxgUoVJ7!g}4^OsT2q+n#;8bpbRyma8L5ti3g6rh+ZWwL++4$CX!#ac`y z^|@8xo}POyp|(PDsl0I1pp%BA{1EHMAO<`Zbms2+S^B_!8?4A%>6=|!+_w9wc)OM_4qRc; zT=mR-*q5$303E1LUg8JncWQs))Q0neR&HEX8aQx4o;opNE~|!UGqFR%fIADa`H4rC z30G-@;f?p5A0Zv15oiNdIlB9Sf%lp>jO#+8r)+B(0gCL=g@Dj%k!UE?{t7%(S?eND ziPK_<^=Ke<8YMzV5(;OVKs8e1374yvVS-d?4JbgNL+8Ys`qrRQ%{w70*Ke(5y>sAS zE;rV{*27E*3W=J~_0oblndmX5k~m2<@Ib2zAN77QQK(}sGQRDXV~-^gx`+^s*Mh(x z3joVQ0UD(3g($V@Q7T@S=z#jyN@0(DtD$4^)(xrnHY)|kLP*RaX(XC-arS_|?jh7Q z1isj-SnLNi;{718T|crXwqv@;LESd+ChHuobsk^3@rb%DullpH&DB|KiMUh4tMxG; zFE_b>+&7kjTeVBgR}soc-2-(HT}f!MdNDMV0OtC$?bo(9njm(G(DM>o$JLS$;a&ssF~9j5!x?OTX9`vPFSsbBC=u)#NYL4@q@ z@MWL3?ec>gP!R$x4*0ML;R5&AAkDZ3PBlSLdX#9VjB=naxtvN z0Sr*R$v{H1o$%mp*PtxBeq!@-9iv|=l*8QWM?Wm_jo#Eu~wnqJYOF+3vYK!Q3(#j4Eg z`!b-#f(sdf^AdxaL4gwEA|u?a5yS1?i*djh#W6U~kQy4rs`eXD^aN3@%AmktbCARN zHOiNXjS3qo#KjBsA+VzrfTX~L06Gnk`JBKD`*G2S*R0W_?A>WG_vsL!AZ8-LVTAFa zgn_?dA=YIuqcAxkG@=@fF}DxK4AMCMh_*6gp4M4#6df(tJIGVj0mT{z-cf9b_1EJr znD0sijSLZK1SAWy==jC}3(DXX8gX!Ag4DY){5rWa@a{b&s*BqQ^v9jR0Q{-P!DtixoltDeUJ+alchWIyn^gHWrK6&_tG8gU100+?3OJ{di8)xst zSU+d@i|6_`Ru?=^Eq3Y#TaQ!}2?(e+dsI7l4eSb^vomJh=l0nuz6@_xDgHa6t4#Bw zjvcgpjcj7T<-Uc+5_v@}Ju(&Fi^tL3W^TzUpNnJ|d+9X;E=q3|2Zh^#xhtGP;aGz} zwb(hDEBa)rwfO_rfSNC-D}%qLIHnM_YbkSLlQN%rSyf%%Q;|Fl>xGpb361q0;9{Qdc-S><*&xUfPvB z7hC8*@xQOPkR9=;aY0wUImZtN|FSVY`MlTWp%F^3q!*BZ0)Td=_?kAjczaBxg#c(Ve;8=~uUcY^}t=qkTI^HSJ9;3Oli8tNO zKsK>x{7kl>0I|%N;0ZY}Sal+kg9#TrWSVYg0FKI4CEIRqP?=OaOw#N;`U@^O1Tsa0 zCp^Y48fRbiGjX%$&imh=729ZuC2E$wC^>xFy2+if>KLON?k;^VAqs6hGfbo_G^HY{k$$l_JP<9st0tZZ> z*1ocFjGO=&i^uZP_<;0L^!cl+G5Riv9HUbxM0q*v4@`om*3ej% zMpJ87K)~Q}O4y7FAG!y@kw4|c?;PWE6#Gt4pmbx$@|_O7&U1#pBK}U3J&=ejz=BSb z+XRoBxDYoC=U5>(D$CQROpu_SF@+B`$0U+tRYU%!8)VUh)~ z{SKAoM3Ss<@yBOD13KO9qIhE3iGQ5vP(|wL02p~*NjBP&@zOUGuq-?`(0a9gJ`rgI z+m}ra1+O_Jw?XAIVZ7+7=1xL`m3XxJo|MlB%5dE;#JT~~LKcX?O0gFMv4H~NJty6z zvU;w@Q)$pSH z+P{GCVl2Z={Hd^u=0EU!z656&RHjJ~*#s$WY1p$&Is3eE9FMzo;;d@-&mY>$&RtnK zN2U|{^6&4S#MnBwKh+Ci-TJ8}@w9_GcT9gUm8tVwvyJY-s#{RmR$B@HenOIY5`uF-e(ZG*HsqDU6j* z2+S{-cuQz87EIE+*;i@6F>{P9X3&_y>*^OXUl6JEH$pr*Ig@j6ktay--oxEk~YW@XVGZ z7V`~GJ-Aj%!jV8CEZen{km)B|FKN9_q6W?qH5J0S^PyMHL7$^N*yn7*(%m}iWV>{_ z<)>|3xqmF7%?3mnRaCWUc&-|o+G6wX{90rKM6Y^IyUR%R-h$GEWot^Q)D4IU0d)b$ z!A{rRSqOIRh|zSzv8jDmyqK)!?3zkDgbMM$1-OV6*Dl{t#iSEprkrUOGBl%@yIehZ z<;s|Ae}}$MDx_Gz( zC3ViItALJ^7{{vNEHQo()HX>cG*J0mF==k1kybyBW4hs)^7a|Hyb?&HyXF*xoeivS zPY#7)k0PtC!puMq8He4we_E~BU3v{u1)kH5{+5-XVs zq!V)@6OZ!mXVl!-Nq4Y@FJ64o&dZ+`Y*C{xEv?dSzkn~hP?B+5DQ;g7UVgRUW5~(L z2`;ey`99Df-9)B`%N7fa^`P#cdNH}@R|`TElmivww7WdOX#eGm*plf#<9tM;DFnzz z5N?+lq1ZaTtONj(Z@1zNo`)Kb1(7V9Km0PzqzS`psz+I1l41hBb=Yj-RvDI^_4{Zn zxT1dJrM);Usod^523d2{U#}d(p$Mhn6`v{6ek9Ha3{b?`+$9B7k34*V#cJjaw_o1} zXKo{$!}v{QJ582nG?L+}`{&qPtMh)jY~%Gp$TJg4H3S2iE@LWi2figsPc$X|qg(tK{y2uw1*eyJD1xj-`r8%drZm-ul)4Il}>d+hdnpNF4Zvh4gF$pf|-w7<`C zXbb*B1QZ6~(KeAqKr-W;d=XYXd$S)UZ(uW$nD4tPUzSH;vz$0wqwzWb4Kj3PpZez7 zL92Y;G_3PM3c1eg`gyqfG;t3IGIVY?L%_T05f5T!CJ=D@dIno38u}xge734J%U8?D z{o5z#jRJ)0Ajg-cq8mipo*O@{4MjLH?IAaQncK*cukv(S_Lx`_XgEXfSzJLsmRzCm z7@30K2ZF%M=z$!pNFCA1og>{Y7>cVmW~3HcRz+=@{-^o%LVs#evGQQ-Fe|cM!`VD{ zC9ggHr6+|Qe~gW##0Bq@=Hv(T{|P=P<`Z%M-vnIZA<@MD@^8WJo}x`MKtK;SiCx6h z|BFcH{{JJ=S3cTK2OJ4cADL#qj0>;wcdd(%oC4qpIYD?9z`(U6F8 zsCm0K$OaqGcsn?pweF3#c=Xsa%LCzXO%-}1OnY+SV*uMl=ffLZFSIw{btBD>H6~dB zTU@(?1o`^mT1DL5wJxyJJROuezJ^@;otJ`MH{s+}00JyqfK&EYFVPZ@Hd&8#ycp8A zErGbZALvQ2icepQKfw)oOsFDZa&dbCC-#m}6SP&su1iKd)UE7gYy3fQ*O3ZLCiM~& zh3i(uO$OyWn2{eZ|7!Fmhqu;8&p~hME^d+)f>@JMSRjUG?0s-XUnd6}z9(RiBc;$} zoCPu;AS?+jD9r)_HTwb%Gm9Y35m!zBCUj+3@^1j%{OAK4^Oy7=%jDl0kbGtgJt&}8d2=|Q(@D2CSv4HbZi^EF=Y_;y8{vg zvToV)cRe(B$+MDPp8_xmL{CLj4n&c9HP=o+r;ze%-=98=Nvchuu@XPyo8U;Bz03Lw zcax%XH@!kvWuuhd-Va$PnxrIKxXTYOyp5VBYvozW z?YBPdndURw0+{9^XK8~gK{c&ztXI@7guq_J#ke-5Xur`EkjH!YHO95WTz})4Yw2&m z>%>ZNl!zuQskE*~p$!L$G39U5(f+a2SyHSMCF$2XHRv%NVVlY36`TWzJzvGcKH`b{Xd zXKRfx@tRoTY!_j*Zg`HTek5s4NDg&CGxRC#^JvY=@s-5vf9LZ$_=&h(1ax`ACcCne z7M5|-K#II;eJ(SLlgZD~>UR6mMGz<33(+&*g928VJs}_kpf$a#3D62G-fv%2sM$HO zwo#^u@=dZHASXE?_0)zOEy+9UK{@iAtTC6Q0FZUc8BozHV2a866WHYznE_G?#Hc|O z{pRG3p{bJofE~Ugj&_{@3b`$;KY+g+$Db1NMsQ}EZMJWOug3`#gijN|2VJlE5Rzhrg(o$?7k8E;&R~F%t zrFH6iF3Cx@5S_7wdZO4jsJL`uxzfeZJQpC+}^>J7)KRy(3sbVi3O*8ytkdb>U zi(HY)rC_pO@+fF5r~b!lBuQEEU1P|D}7 zYP+d{Z=$K!idO}p;T7gtQ1y(^a=^Tk2oSL|avoJDRZVt9Mfa7+wjjY>ljmZhRI@<% z2?#`Li|k$FL?VJcmptf@uwo_NAK5m$itQ2~Au(0CFxH$Kj&t6(zIEhP$uBNm2mPu6 zfAk{BTaC;D;E~}(Rfn>VN!6g1{C8_;Y=O~RII#%)8~a!)0%_KjstYRSV?ff0=74Sr z$>$760|T+Apasf6T9T6L(jBMz*iJx6q29Vg52Qw1Yh1JdPYsQTgOoNsIx=|6KnzmA$9W%wb(7wVxGmqI)@P zd>Har|As-ke^yri?3DN%IW{en-2yqAk-VV>4+&4ZTzk{}*GzYL_aN{j?#h?* z&gH;BAzihEifT;S(iC`JuU*>|Df}TAn_0tl>(L6~V~s;?`PHq!S(IEgwA2W|`Bi$b z>+a=PD0#H)t=03T;Mi%NmNwbgqI*zgm&iAL+XWm9(-CX*on)|VyR0sef{Cjw?ZltZ0rKrKgjgAez|mWBBe;lZ8<0m{T~48I z+6-n@9}`aNq1jWpU)fJfG*-C)QB@6Ye6@VxCXG_sP~KR!LTU<{Qp8J(1c1ia43`Y4 zYGp`5k=BOqJ#vJFBX6@PM^D47n6#`kwOelmcxesh6>hACPpz!x-FZkFW8uu$)=W8B zQn_iEc-NLRId`l~VZ5+!UUpUA?`o5@L!GjyGt*&0bLee2Qtb+UY5X zM&|a`^hrx%{?lOy9pk)40}NPB`a=cXs1^1uO6cv^_H%M>hekI%+fyTQ`h8aAvfgjq zzxYHK6@MY^jM0dy#kh>1&!M8`nnj$hoCnY*7i`#YjBHX0f}cO}2P4F>{RAYUyKbUDlldbwug3j}ax@3kPwv3poQ2{Daz7Cw4|HO0c-g4E~6c{MEB- z2C!YLMY`>@gN7n0sCYkqX3SUVOw1w@-Hun3)h2#J+nypsuQLVYuTU8TS9ux8WP|vP z42KB|Wbum({ym~1ruDe_)+BO?LFbN?wKicG8@3uXC5h5{3rGZj%%cuMvcIjBDuMHc zrovThdwP@6*v?FU;qFFX3!f$D=gb-^{){6Trxu@B6Lty7Dtmha^qok#tIWeDV(=Zb z!ptA4`brcQ7;?!5&hl!ALKOyn+_k%D_yP`7lmn%aTQ zyjH@iAW^~Op8O>w(LmwctEx&Mq$66yVC)B5hg#2K)#@3d*ia<=tbUnTLMjoFx*3<8A>;(5z>CV z$k<(hsL!VagvR>ddHt=D(?(!Rx5cE{!hn-2wjQPcj~vjl@vtA+VSA~G3lMDf*0%m^ zCnv#pn&FF4VS>GT{L%PuIH)4PD+m!xhs zC80@(CeZxo-5GQ}@=q%I_q0v4UoO2&%-%Akm`gMO-L%gL_j+9>3qh2AiTyT;y!`P8 zFF;q!+5kN4`*jUkXnCROQPl(2OiJ+^)LSgnNN7WV1hhzopwpo9Y-B zFA0cKd`r;dE1*hcI>sspN?mq2wc z^lgFtFrN|&Y}6`@!-Qrl*}-G*5UnZE1=hWo9T+_&&Ur%G&T}NoMq9d7o&VsXhr90T zKZI(-U@&g!+|pHs4(k5x!Ol>y;@ls{!bw1}o0yA*>fgDLgUj!h#iW*Jp8m#R4;fR? z?1lp|$>pi0J7Y83mSljP zq=*Y&YyGra;-d8!d66Y}d1(K=$Fr=BxU}k5^Jc?}3ce5jIwUsH`FupkY7w#n@Pxj4 z;H;o7b5?)$v+n7*9tVxN^y;v$N$obYjV!uhBP~9`eUQPU@Sy>2~yo z{Cj1`a)BKIMs%SF>8+;$m4%qZ905M>IC8In9Y^B=Kohd;o=2<@!+*yP@(mE0V%f^gT#8Icd*JME1vY$GDM;AVoNKrc+g( zgu?a04;|2*@R}KS=?CIHVD%lcy#eQ>BV=MbF?VEx9IG0aQv1ic;;K8Jq3*boY{UV3 z9=RUtLg5-aX??Bdj`xltr~(j}bm=N!0AIjg3>+c9&u07~zFRQyx)DB>VW(ccSRl`U zdSYo4`Zj3hP)a$OvKgG1eh)y(^h?_&J{VN>h7Qh#v4z6o&B{rUK{F|9Yk zQ@g|GeHuk-o8dt)4Hu6G3>V!qh2RmsJs!)abrB~6Ay34H8q%G;ujd@LVUkl_7kM^j zn2T49?^F^!>L|ow!LDp0@U9HRz`Bz$ij`C71Btxj0XBOj*Z=lBMb!knJ+QspxbqsQ z+gk0*UE7;JTL~PUg#<{v%lKRg5_{c2NPW_bv$^8v|C}z0;j$La-#kt$SOJB>7%7h1 zU0~PAY$woK&@i}sCVHekh~XgPac@ve>GR?+rk02nE($3 z?>%oX3b4+jmST2=L9Q+UzieNsd0CId%^D@1-}Kjc*+nn|PzSIS4F~@g7IPPcVbjKx zzNA26DM)AGUm(;)M*O+N<78W}f7s}~F*u5`WDCBo+VuV~tKJmFZEChc3zgckUTZ|0 zvRVaO+sC^xPY2XG1iOurEJ?*gr<#;>?aQ)*)^`wXH~xfW=M^$am;@-U&M_QL!eW|@ z9Kie_8zWkqy}*Zbfm#}JJ5t@XbgT+YSj(S_K(m|@#X64lQMHghF~Ybw8xVd#_eL4RMjCjgu{G{qLe^|whO_Ut&wq?B@S zaw%5W>t!MWwY~y}!Q|&KtXvX(AB`=bg77Mq?9;HVNoAvs9H-y_dT+xrL$iKI+Yxxu zxF{FecVp}Elyhcqi?G>l^m*^q5~CZHlm)*WfVCvK{0_H&b@)h#%v2iU{6^Z6;2(`$ zN@T$ee*u(o4L+b6m2#}9oRg2WK!yXWvuSFVc>&|AuiQ$QU}WOU(~fSTz-(6x2F)|1 zON~$r%_OQ57-xaJkx_rrvz(m}-xA z$pE72L&KS31aJP)CzqtXls&gT^#NcM13Sx0_ya1h!E6#&*gDvFZFf4J)G1PS&`F3X zF%aV$qKZ|4sX;B;f`Z)YMry6}YcgfJ?Tp;YZAA1%fs8YZvhc6fe_Za}0RYC~ z@Z(O}Fc-u(?aaRwr-Lb?{)r}AW-i%k|Eg+9$HBhGm?6J z)79}&d-dxlw`G~Uw$i3j&^8#2PISc_r0X8SM-(?PAeiB=XlCAw%Gqk!m^t)Yi`kbL zWf5U9d9f6UP5urO}ll5lyts4NyWj`(GB8B(;r zy-B!BLdF8Giqr9Kp=7IguD}_S;qgLHRFO>j-<=&KdoxZ`ym=x-I+IE&99G&JCy}L? z+ciRKEE9#Fao)A;rZaT+>GE_1&)$x}7W<67306o6`>Cf&1go~y)Lb9EVV%X*({Ago61ky9t0vJgwTru#s=)+W0m~tPMhHM#<6`rG zZ|b{zHzHjQ3w6NRw8XKC;lp0Pt-hh!VEA1zNoku0DoYHm&Dt|Dx%eMtgH^* zDKm7)v4l|6TiWU$6b+~D5sLC{u6!$`Le}MnM=gYrE<2osA?m;qTk_2%ih# z+@YVTJ^mqItNF?Hh@huOYi?TQR&x(azTx{*uk%Xu;qLQYfMn`gu3+_858CSI zqVPAtb}VsD>o?>oNwzi^$6z0<@v9`?;6pLBtOjNr4aa)`uHP~VWFxV_g7UeM zsk_W(1>ke=<5H+EVF|mt-%8n;R`E8N+4jx|qY`o^FCoGcnT3Y-_nzqc7Y=Dj9Yoq| z$Tk6E45LaJ!E%6}IYUw%9%NiIgP>|vqiR`n4P@;pccqc+ z-pDpYPl@$WV++byA2|(%_EM)8e?92}!`;vVEFfOU*jo+mEwHUDjD2ni@9M;6oy6BY z;UacFxis+xF>C6OyKok+bUWRowU2bSNEqhH@f>fzkah(x_?z?Ty4PUhNz~!nJ;O(` zt3AKf{Z`ZytiYLGeQ*x1n>NdfvK3#|7A%`<_WZz zhlbCk3|1)ZMsc|``q-4i_d6fY%+V?XnZM8VuT>_tZAS#V(Xzjc8lXsL3?9beHFi9F z{ad_oZo50vsg*7RgAQ;p{DPYCVWEXH9Fm{h~Ytf+th9P>vm2Fm@%8wcDHI56r@6xKyBJ>d;CjfsL zusfB@A3QBVOHSAjr- zPVMMY(6~C3J>9hYvbr<$mac__hjHKj%+;YjY;bMdXxCGh_eb;jbb9l9tisc%J?O3o zkSJG{3sOxK0>$_KrKLA=QL~CdWPr{-ZD$6u%v^U+!M;LE)c%C1c&$V}-YZ35Z=c|` zW@)ALkfm1Xv0x#X&Lx8Rj;Lw*^uj32JdM|o2(s291A3Y+CG#qkd37hX**PEL79aIZ zFqY8x@F13N7Uc`vIOKvyhdYe9YrgcpvH?>?7A|}O&|PXTU)uDg3#_FsEx&DD#r%l{yaix{Zy&`1ikSRTa#tTR9rXr<3TQqRM5o0&;r%5pVpHn== zivwW@{t|nH!6ve+^UH*s+>Q;%ArRv<`ci?-vPpO;d5CQe0ce!>27Gl!^`&>cH=>CG z6qP6^=BPl?EGYsRC6Arj2r$$J%JPu9E+hIY6{VQ#*_z;Tl4);TzFsccf%tHtNaprc zX0PDOJtL6lh!V`RK%|FDvB3+8n0HD(*YrQ%s5~Ovg+Yt{s0C3zvKHw!{Fj;Z?yn8s zIz#jrB>O_^z?)un-MjT1&Ry&?^q3Vo<*RPcq_@-Y1+vDDNSar? zBP*|bd=8&rG7HA-<1j{soo0Nbc`G6={M{-omph7YaAF2@ed$Zj2EWfk75%d}QQSpb zWj_Nz&D~BBcnHcC0F`qC_fv0qPikLA2AhIf4+S`{bKjtNh(c5(XGhbDORv) zbXO~OZCmMVFH*t@o=s2()TGlN!`~woa6+pc7AoVxI2|e}?!B-aDp~I5lszEdbPDcI z#wTMOQvGVb_Ch=kCtGa1)#&cy(+S$d+KPU{zB_cVn-XBdBwY~<%#8;O1>&X%2nCrUAhrWPihw=~alA8JtePoe zsY??=4KDb6o9W=5JHT`}ep}k^F7`zaJJM(7rC~_}u(KnnSYOkV8~=Pq3;_|56e8vV z2Fe{F)NZCRvP#ZWWPFJ_GlL}VMgH)I+VjJ=B4V#6v&QK5rlbn<)60@NU7r zxYYR-fORnuJ-=m+P*Uprh}GZLR%~?fIkkkJn5lDzE$u9`sCFu1CpD8zR43W##B;c& zaaPXEO02hpz-bJ6qk84W%-*ZNM8_s9z_L)bo@@3i+#FKrV-^lL#9E0G*9^j6Z%L0a zP=fdG!vyLtbsv`o%k%BpbMXIh8KT3MT?qjm@1q_=IG)=1>9bG;PapQ2>Qa+DmyT=-8Qa-} z5i#Q!qqYVkjq&qHHxMpdISwXo?Q8Ik06BW%G4<(dgX%Yfyrwx>A@+r|e}tGjjdyzv zo`n8O*=6!_iW7Pczm>4xQuiySb`B=AZRh2rOV9-h>LZH@|H!E$j}}r5W4C}bj&0`k zeaBL?iK!Y~{7zr12dmBlg?k4>7?|~RFSZ=SlyTtrT}ryGK30{~qz6f_l%BJF*m^IK z!EJ|Z@zO^>ltOqN8lDjIyOrK>2FQzV>pwCprCiA0Ee`@vp=F-oJy3NR! zPdm#2_!V1XW`D{b0W-!k_FsU!dqmUjLQaFBhuIK6oOFtP1eX8tASA9+31U)r!nHWZ z?|=r*yuY_Qw@6+vLqRHmDjP4c`qSTdR|NMM5kSm*39bq`dPg|G(%yuoT(9#o-!!YY z+dcqTUh$0shmDFiI*w0z$b8AQ2{J3qSdcinjiF!^&R)vR=N(XR08v9WDzwo2)rlqi zjmPBehX|c$aZlh+2&HC%1MJ@&lXSaqBdt<^&(~Zd)Wj>C_fi51v>DWRqWkssN}nAx z4aY8Hs7eg5Q}~>^oiZ2sa*ZiO^5x2)SRKw}*BzeRjj=!d+t>;_xh%}Xz&8Gu<`K&t z@3X!jOJDg%{}?25fYpPMWTl_)KFH2C9QozL2Ug7^bPlMOIOj7 zg3513LIqtG)0i9>tnNW#(Z*b*^piP6+aC;Rr=e{OUI}SqfV8UCzvdE%PhaG?1`uX! zU!#*Ry+shipEWsa!+mBL(V0^}*<-_c$v{~cve8D$yNgA^nQqm)h$*qE0J@xold-Q%u(*PL=67M~A6AB-FH|HIZh1_u&t?ZU~#wk8wXwry)-+n7w; zv2EKnCbn(c*2Kx3bKa`=*8S@HwX4_O)xUaG^?ufaH=Z95_LVlb!V&Hy7JUilY^^!w z+3+SgPkih_=XyIVz;rOb_U4X==|F#ijZuLA(YoLcmreLsCBj!T zd}6nx4|b8?av>KIn7yc7ld_-D{#XmzsUwNiol@F8h(lL-2)x^|V^7;voqElL_CN=r ztfB}weuG_3FZRV;Ds9oyp5J=47Eyjbq4z$P@D8YZH&NnFRzI+ww8JSA_FHA>TqpX7 z5vSLOh7;VAiXWfa)CwvLGvaGYJa_n&E_UA#<6-#Qrnj^5)aANQ=)vP2b5}G+pW5GV zj&yiPGOI#N6qsBYPse`;1zW!MX52D z!{O)}RM#YPv=!cp^7RXPhXmBkvV<=|*Ol(DEAgw@2GGS3G}1iIp#Km8RYsg0G1x&D zVKYViZgn}hTaAh4HW2*hmq_-V=eL-VkZ$f-mAlL+YD1MlXZFK;$pEyp!A!-}8^uH0 zGk+-3O8}oM8;##jB*1IqAt;BQ026gu9+|0U-W%p5rCd%g>hpLq#7Z8y4GPn7o8n7Y zoEM<|1eA%O&eAjUZ(eGvoouGBs$!R#Tnp&)qQv#AA|Y{=hhf6m`d{v6TQb_Wb+n^L;roI$5uBvZpF_U%VD z8g^Mn>&&0XfCYD|;%A3<5v{BzQKfvn>1T@PmeZAqCuQ@mk{ENfWHBv0t|3ESThtT+ zfGK$tpS=#IXvy(~gpsPyZT$#cGaQWfvl903MXC$!!*Oee%0<>I0Epad_ZGUZj zlUNBtEuxn8SZoS6s8Lv(p(7Ou8$@(%FcjEg%~QOnnqbo1bgSfbHC;O<%Ykr78gguy zphP}Dg`@K(;%vnydbhP_+R}rsG%7c3NJQ zTY`(@dd6NBK}S(9=$iP1PedhN)jy_hbF!6rEKcxUQUzLY)$l9r79Zg?*My5u7bboS z+?w?Y!fDTMb2L6vZX$hEg+??7X)I0#%06%4dglTv&YoJnk%1wAf5!L|=-sph-op9_yv<|Z~MH#6%ZBQA- z;uW8}la;?Jdk!kdeP48S*X1Co>BHgsKQI^T^9e3poqIk(MbqywrG^tatSCt5$6A_U z$qkt)w=f$1PH-FM>xd7~M{_)l{ut56R}!+OhYx1tQhgF+RWAYXAWvWgDNH9CV1oJ# z#C;@!F3z=NjRlPi{0h;V@_sm!<#T+q`(c)#!kRc7dQn5()r~Z~<=1i)(dQ2JE7y)i z6`DW$8mdCf?*8uIStd#MhV|q%g7ECDN*f@DsH(Lm4Ueahx(>*0EIo(toh4@xMQ`bD zCuRPdIg%$q&Z>M3(S{>`MknF1R`2k4r1Bpy zSZV>pOE;aR%mmr3uSv>k9edEKGUYfW?AZa>1854C!YD5trfl5ts#6FeUGRLNlV`mZ z4c3Fon?-0LpA+q^FaPR1Db_z*H~7;-=Z^NwpL}>&OD#NiA+tZNuX?9q zH&s#OyQ$X9TgQ{^Jg=Hc%!p>OP2Gyd6|;&id^eU~tZaeD*NM2AK~xD7_= zcR~z}wT$UKo-VXW(rH~EZ#7Nh$x%!- zz_!^T2_#L?d-veVSs+2urUfz$8L;%RE;PQN7PqZpBz2)(hhQ;HbWc^J&+zQMV5WF^DWr zyv`or3y{doax26q>y+g?7cVkmf6Qhq$?chM&;hpioJpwSa$ODQ?! z6$6ou)( z=6$JCHP^)Ccdk|OA(UCLU3GQfk(7XV7!d7RX^{NqfgS(Y6gCDbNpvP~$Hym3~B#l08;W<6A_JP+;O-RZg0c z!N>E`R5r%{l|WddRhx<#e8(*Lra@{UAh#CQ1CSmmQ9HE3IOaE^`F#maPylHtpNA3! zS?l@dOQcsCOg>iH>rLX35LpxJl2|K@BoOb$cxkw|@n zQ{~#Fpkm5G{rV#HC*SF&^I<;TMXcVN}0H%7?`T zJ6axRrde~h(gZqTKJzKqpf}C}pJKX6f56XQNj2&{@padS+ko7`>zPME7T2Pk`4jF) z4eEjX?$>gz2Kh#LBJeVtM+u%X?SSy9JeqP0F|Qr`$p3C%c4~AvnpyZ2uQ!yUS;+=z zpGScupspa!)XAWLPPG- z1gky%2hQT;iE*qSYbHB9nYaXg>H<}kO{5*s@6Nr$3wa#VJ>WN7p6ix1kRtbdZsVL9C@MykSio+2e@T%j;Lxs6}eajr1DuR5v*l?Hvh_!2x|#{ z@C9(hpqyNgJb`;XJjnpHllWLKn3v!S@NUtyFl78UN4+{4{!kBNcOwGQx=V`*E`6*7 zg?!VGv($qqIr-3In)*5B03OpO@+zUJSYj5sh*mf=13&x@&LYW~C8jKxaK1OoiIlIH za22i7&dRYSVnKWLf$;m@oEI6oWG@r}+w`)ruTstcJ^&$GIF~~DK|{cS1Kb7w-)Id@ zILR|eKE#@MWuBZTH|7^9{&)rf3hO;f_vW!I`vmXaIs$yMuTS3YCy{{AFP^5++Ki6I2(!z&>~n_oAp=%D=X#O zXr&2*pp6I(VnnIhSWTM4XcCV_?Rt3CV~Z&>2B5kEBayS--PjJ99r`*WY|`nHn%hL) zAQd!{S;l@)Kr4r@$nO^pWnW&Gf<4(D@|i0eXfT)K7`&jQOgb6UfzRSm8Sc%S_}#6m^jwTWe>Q+{%B1wW-PT9g0q<2P>Wk!BQOY8~tcverl##K_05vcNWgl5{SAn)AY z06cr942KE((=-P(8-8BaewVl6p;AW-uxWl!_gl)M(jDR8Zg`tz#0!F#m{TSE1`#wT z{Etk{-#~lj(G$61E_6S8Z_f2(H?)W_b&5+d28VnKBL~jrgJPreTg!GwXpM23gE|;b z>4)GNcMq%*v6yc7Adb=Fq-|tMM#=IeLZUXjQ2)>?KX8OU++!tTev-w1y1aWA0Wuv{ zl%&Uew7I*sI0-5m{AEsrv9?)BDjK~?H>@!^RjdrI_cPedAiVOLG^Dn{wAV9sW0alR z;#BrCe1yvI^@Z{i*xJ#O(*iyZhn=LGeg$T+1BRL*IgaKIYSZahZIZC8M&*lOW+GFx zD(a?2BHqtth{-RjrL&Cx0AEIcnG22$N1C%)0&l%}>QqhQ+2)K%G19Lr{ko8;#IaS{ z9jNr~wcHttc8wH9m`VAoy5S7Ac7}qI#lg=*9n9CANfDDD z^Zl41#XjAof0{S-zBTp(xYVFHFk;HC1vx@guwvvVh>!u1qQ~Jy&y4cN89{v+oZ*c# z#i|q|Ed&CLIDlet&`|*km90gqxgHA0`<4EoA$yYCy&VnW!B6|V+%k7ML7ru^mX&@# zeq0WpQApr1I%6@enrXbR=d~xrC4GkKK{;)NVI7#!(-+^jDKuo+{|iELNLM4 zsS`VoigU|}a0>4Fx~>ZnSO52dTyqWcZQbpKdO5i~== zZJM#7Q_q7{*5pc|&t22)B$k4SJUx14A}q1Ir3WJt=!V0#mT;W28KoDS>m;N+_VC^p zwP~cNJan~Ct5L`mIM#r|o8z?RgtTQ!=#iBx&05ZoATGZeT+ZrRZAgu@!S?9X^zwNW z^!QPanUMxoX?In5G4yz8?(zDH7#O&FWxL=XPvITAsN(W?;*UA^A>>&X+F#P8V1Fc$ zPxE>N#3@lf&>G@Pym1=l+f$?&#=Cn5qRh0!Me<6kGU5zwI zaVTEyb8%iD(HInyR|OU(7m|9I(yMHX|NK}gbp2t_ocj46I5Lo9`_=i(x}#%B!&@=r zrLod`r@ve&p)cx-+eya=wD9B=wg%wiw$Uwim`dq8r)fR9d9R0v5G$-P_2%ZWKK$YD zaCga%xwMGpM9|jf9!ScjRmp7l=O49};*)%L$tv($ODN37+T2kU)Lhr$t49lF5CwWA zfA2|05)uWPSB{%78#5+!1=*Jo_Pz>UXznqY-A(n+PkD)>2C6H~1vFIL0~3JLwlpoM zNdT{tr2m^!U~YLo`-o5TobQ)9HWvAiCPzeU;pLpy^KaDM zWD192Jpo#{C0v@UVCDgXU17(w8n(#chE3AJ4yP&Vxhi9KTsoZy>Zq>x=GizD+T>&rg9;-mtcK zUdapVLqAIx<#@x^NFOIXwpx|4nx}fZ#oShCN#A>yXIf4Yob!0qMWd<)f}ZkG>Aa-t zR*7{#AhfHWYmHN!J1M9#aaScbLLzC-Li{>-o6YvRJcz1+yBwsI8}9)yXC-D;bwNXs zoHo~9%RU+nPXi<(Wewky>+*P}a3!G_*t5tNySmpCsM3Qm=ba%!(WLS1=YM-3^Rrts za9Jv>Rh59xad=XOFf4XOhl`a3qGTd5t1OFiPBR(%nvuHchMsM zC?tf*rZ4TqlvrUL65_UCPzmvC- zOJk2Ow+o|{i1Nifoz@-CTSw{FRdi6uu4w3i&2ysi@jr<^mx$*O^jU^=QQHqwF)M%r zS_eq?V1*U3dM9e_5jJjrVd4}HeEi>$=7>j3^bgB<hc1 zUPMc_NW$tU-UPAkP*T2omTadT+-JH12MYU&f6EIwo4|Ak%@TwcLTyI68ia8yO}v!e zI@2XSOt!g=bhLn%c&)Bbm)QAirSjj5pTf8(zYSe!*?a;FT?ED9b5(ZxjV@0~&?7ZE zEk=xQq(%SoFJLOz=NXf5*i)G^L!+53-XdYx-SR=9Fm*41sQ|>K?eE8kuOtiiNQ*^x zEZP4q@}#QDQP1apLwM5$3$T}@QyaN8ZBTJnpS=G*48j3sqrE}0T0xhJOk@b!JlU=t zM|=j%gr!X42{(e+@z}oM2dKXk1_noKeJzBU`Cxer*68J1`BbJD?-4Ny7NvbB^()5U z>~@g@ZWK+bh(*>TYy>fbpld#AR>;6OpJ0)RmZ@12aq?z5FkY6>AVz+;vP7G#9p zKVYZupNj#bdM#|K9BIu}c=jE|dfC^b0{I+5a?p2LUFL})%Nn6e=8BK0%uE&3T<9p=i=0Jkzf9duHg{e-CO=%-N%KmBunKM<;DY<-ThUBB`VDj zuGj*rS#Ma8(z`qzHS>eOCXNnKG<*1lI6(iAyJ9L-ax#u?V^GrIA+SFp0(TasrJ`Z2 zepe2_sFu)>Hs1$diB3+HmA_c!lo9$uRl|ktbV%u4*uH0x9d;gVo)$J?X!mq@q~EIJ z*>N%O=}6_?Lhp16*g4Yyq)mwXU*sIvd4ESsuuFLG6u>7`jDJJ>05wP*eNpq1D(KLR zN~`4vf5sHR!wKh5{27%!teP>D>z;=(P~8nc()jgaFD`B(7=+Y#Dwkme-^xD^R?|N? z_IC6(!lV)vMroa^Uk{p9Zy929@mMhycYZ1VF1};Mixt=qW@x^--z&Ua+8C+#Wd?sN z5R4J6{Q^JI3O3XoABXSv+S&uJZhjDCc!<|{gU?M`QO|+)`sX%(Mw0U9|MSedBxmA zuo@@&#V%~;QXfH_G%hZRa4$fT5gFr{!@6xU#MnI>$dNqvr<8_XIs%V7Udi};U4~31 z`aYcFz+z-Gy;>a;CoWfGe?m*`ZP z*`HbV>?+~g5;6Q~)fHXkZ6;v$te_MT^Q`KL>cy@6$SsHGZs3A0cn=t>83zz$3FC>_Zyz>EA_1=LKR4c;x57ZGm1&N-%)xN3tp{Z`<@Ds z%M0x4F*Z@#A!3bxCquueRf(+AzuEWSt{%-ObITY}nz$WL7Gv78mq1{mL2}@hUo`my zfw@Fz2&T6%*pE;i)K*ztAYx7^RnEWC9u~x@zw|pZ>adqFG!!Cyn_eOjX_gDKEo+-k z=R*N$jR2A2K3_bn(`e9zFY&_D#S=U4=Fp;M6zv8x7W>)>4l!4XPqP2#P&r*t*tM+~ zyYfvzf10aNjBb|hauYDDGly^EViB<5EvLP($Gk0ZkYPU4y|8KU`ZhvBw;(V$0{s>7 z*%1r<^)9Xb4)%_0KfF~SK=w+@XW7jyP_zDs&fZBIjdm;1qx?Mn=W_!&-S2^5COX^9=>zHj_KAJ-oC2`l2*e@(#nGs}X7|dn z{$3)-;VRUrQ&?Gj;7F@{t!7?b-a*>^N~BPlnWw#Mu8zKkc2FhGE7y?xz9Y#D{XUUe zj>mh~8e%L7C&KdDr2-Cvunj9;P;tMPA(g(RvZO^fj8c07v5uYZV_|bhGM2f3Mjn;v z8rm{qL}i)q3JLV|zBz1sYvrvlcO9(6OYZFMVw#iqi$>4gTtjb7gNwU1`^)O0K8u3d@^92E>R$G z;i7m=M6ke&7f`XFM7(S*L)VcU^-YBlcW{9xpO={R*aC$=rJX;ThhgKA)j;PDHV6KY zQ*JE{ZiZ?E^w9B!$wT67kB{}QZSR7m$oYmN>1dHr>_-bY;rRU%FC4iXAvx>T{>1zX zf0f6JG)^jC?(gO4vQP!Oj#Jry2dIT<(F<8i?S{s--0 z#Dl{VvQ-*_m7ZbEOal>l(Jv@IDaqN>t z?Hdp`lATI3TmJd2|43|j{NK8L{R-EA05cM*)Y~{Pf0ylw)KtQ(R>cm1&7mXQ2K};mIpbF2(9zNM}zh)$T2ph{Qa9SO$T`a(jN2;41&Nb>OeYKgTNSJ zp)bHOu*5q?$cuf4htNqbLgZ)=qtlqQQko7~zEJsjTDhUHrA#6MqW$tMVJqw)i^m}6*#&+?nHAruEFj`wz~TJo>_ zF=V?rABND-DkB~p`9T&#G~~THGgyaJP!M1OQov*V{?uU;bl`nA$E534+v45W1*9S0 z!y^1Q(vQP@w2$8FMi`Z7AQdr|{Anc0(dlT?`Q4*gLEoOTPN|Qt*$}8N+ZtWOK%P!l>8z<(zlpkT$97-zY=6yJkN{9v zSmoHqGs%#>SH#*sD8SEl$86S3OrV!?+AY)W*_&nAvl*mjB-(~EhhQ^u@Ms*R2H)RX z?^98XY+P9}=_FNmLGwI$DUJ0yp7`AmbtkkrXm`mFcf&AxKRzJ%%=OyqRpvl7AqXbn z+;)fXO8=Ja&zd37xDt9wK#cv3o&%cmc3+HIFD4A8@M~0tHJnz7z>qtI)r(22*uygL>`;DBH_sR2~98h|l2 zeiy$!Yoe+?1GpU4xExg8wFo|poEWC`TIKxwY9WGfG z%AZ0|Z8b9?nC^Lg9`e18dp@K*orK)sJ9>s3yHkwl%bUAk-`d3e;tjnCwjgzberI%x z|C+LkO4m@#_jd5g6)f!L0E%l^MF+0j@Sww?`U8&b5dcY3ryGRf`K`;FYk|k)@hySM z#pkEpf2ub%qD3d7Kc50fedS})OjLM;(f>=)sbI@ON<4WX^-+GFGeUh|bYQ-y5xV(! zvFXfIP<^JksMn@=LGBWPjQ2-HOnC9i1Iv+OrV!(&B|!t;H)Y$mi4#mP+}ycf{7xOX z^-()^8#%ON-e|0O6&(#mJJsDnM&6qbjcYnG(?x2w398i>PcGu^AY7_r!Jkrgo!B#f zMjU#vW=ER0h+0M;QG2=axuZPCM8gS1nb6Bvj*LsMfSg#&wW~=Rgl*LyLx$7_eXBQY zx!9BsX=nnPUe|C1r&BtcJf_UE1$G&RMuu}5D{cNxUnhGrgL*csx5G}h-0k-ap&i&7 z=WiUabXTT=dq1z&0uslBl>CSAKe~)~Fgd^v&}th~!8=j|AKe!)kxiNg) zxgHNHz^6Nm)R`Vx1@D=cG25~SXdu@;mW+2 z-{k&HVoPTTYg4f_JEV`5j;z$df#C$Z1TH5HCW(&7Qm-*qJ}3_2#7q!lmP>IG6BqWF zPO1RUhwDE&kd(qW1X=0wd9@qQI#btXU|DtCS~I`-%ONc-nW)|kI7y6!z&w5V?uuw) zkzl19x;Om>A3YhA0;Fvp=pbdNJ9Ef2r0V16uI9fMl!po{Z#dPU^C~j~donND#1md> z7MWAKcXnnUyDQ%G)uqkuV$8dHY)aEX(1roeU~^x!{mH&%6PP_scy z-DWZxAIpE=M2&O(d__O;WTeWZs9v)bRO1Pq=R6TDa_6H5S65p_t>j<j1%gyZpC?%{2SNXQ*T3;v21{5=@kj z{cJmqMw*XzqXd#kcdFYIq90N?Wpl#zRq#L~l*W$J;5!Y5r;O%Z_MCTdr2|>srx!O~ z&2qm`v{n)3l3WA{g|=ElI@im3>Ixv+A3Q;G*GZYGv5?59|M1$4%(RV`eP0wjNHdw~ zG$N~|$_{#@1Ga(GJK6fYCu5r>J8gjGVRgaO|9=U7Kn+ zgBUjuvZS%r29620W_ZAEWTo08SoBwVQ2d9eDPEaC3kM-u^VhG^<3&0-8Aae|jgGGL zh8_MMlG;z^ARd>F-$Ax3nx6E!AS zi&`@AdP{!tug8vtws+8g}W>5QG5L99j6%ql*%AAXkq%r48nNn06K#jVu+y|7&}ifD-+!JbGq&A0ar=&LpB z_pb^xDN=Nrwva_Zg2Se18Y^o(O{Z&{61;5v3l;Mh-Xciqh&q-@JMX2uZEhO*g`3cN zz!kxcPe`?~?^>QY6w3JZ6NRIaC9`?G7j2g_LR7(>7{$rC51CDt*&-w;_5tPo?bEb- z3h6<$4x_0m7UYsL%;3?ScbbL-D1GHy=d|U{2CD);Q*IpHtA~R7#8$v3Tq<9iL%E7) zy_M;U#GBj2PGRRnI$Ii_Y}RDQLc1j)3FlRc{s@3WE6H~5%PZX(R!ry>g^*LMeZ^YBJ-p_+nRtL%YSINYyf-qF87zLd?_n+aDb2|+n$f&gioiqX7M*;=t=J?gDUiuFYN_LF$d0CLm-v{!= z+5KipX%L|1-RVAE^H>T577mo&zH@f@Hp8})Y_7Z(X0Tf7<-Boe`RX#OXw?^`&*Qp{ zzi3@>j<&;@lZ=&)`n$r88kRh;X;ZzZENZ06j0#804I*HFiv+JVF1`qoYHj!|lLk1Z z=7kf_Pe6hoMaSzDl!qF<(UWfoFR7Yt%bGpgdL@4RkVIWUVj-`$YN}mES+5ooA9PTe zl|Z%%#4MO!)}iS?t^Dl@-Pyy#1{;D**PT~QpH|Uj3!=fD`HcV#MK=RBt3&ekBJy*L zwcNBAWTtM2=hw^}t6J%@p^V|Q^n+Y?n6#r|P*zo0A5BZ-EwS-1* z=Kov0@{R!eZ`I3B{(q|pbAFZoE&BP3{NLh)|I&YpMFIa~#8~v7CO-*ih5D}(qotm_ zJOe8TNR)U&b;SQ<1O5MW`K7mJ!hUn|k>?kzgn*pHbFZ3fyM4PcexlYKtPE8WrRic; z7z{*spt6HfA$+`M+SAa-HJC%3BrSNWBr%n%iw2J)V&J{^v4Q@{OOM16Qt`RlC(_pK zW6O5%4s;hW?#s2nk8S$98S^9kw8r2hTyy3}XQ z*9%9@Lkz>yH9oVS?K3?j!wu2OCE*hNt3-3{w$J!iuXHp2@Td$H7yrPeOd; zuyparxnISmwOXD&1@eu?fwAv)3U>O4NHvR){R}KZI)BwL3yy+?9VAK#?eJ_2+3NzRN9dD6Bjm?!Tr{&M)bq@xg zlWi(i?@4V>-aS>KZv#8wyXUtm>^=RMR)Wwo=c{|2|N}L zkZh67i<#jQ1PHoi#-s++eAaH5z@b(rf@Z1JkX-}z6V7dZG&l|q%zJ6G%cIaNYh1vt0lN+rinmh5-zVJ23uHU%%gj!{ zwOQ3TG)Z>5?9Q=sqbI+to`QDtgq^=i$Z}I#(wQzwV?9#&&UIZ>DxMun3A}xCoV)cP zxc_5b=agpj<-6inFC|Gz`_&*lM$9JkTRCbRhj* z!G-TG0dTiVFsD@UVueUS$AFRfpiR5kd-Agsp2l5)b~AZSgOV=SuXeB}Kyp9qzo}Fn zG%B(CFQ>cqQ2wq4 z`*RLfJY^)${%WzP?u;x#&)6V7ca-kBQl$-<4q%@?^B-#FUca55)|K84z%#I{LZ#85 zQ7@=H6GL~tm~Tiu{_{SyqCyI}P`pS$JgfEZXHW9yr#V5a02!A9a#WC$42qy}eM*b5 zrA_CP3) zd_PeLzHdXxQq=U80x8ql%+{+OyEoo%Z0(Q3p9A*Y-e_jXxpXS8^jlHe_`Xk7z9?y4 zYBofVrq)5`vCUoWn;|8!&7lG;WB`*?TxYo{MJkg`Vy4neolJULx!Ucu-%aVhs{|6h zouBFH3hAiUVE*vno+CTC9-~x(=)0g66!WC4)Zb=v``@>Eh`Eoa0b{DS_MepXG#Mg_ z>If@iB&Dm_wTG)dQ87k#F=>a^VPyHdi?%Cx`aPKaSelxvDxTGicr%)`{qHpnIwRxmhKHeX{lILC>Aa&>{KfY#SM9fx{N-N79Cz{`F5 zm)0sZEAu|HDm`B|m%aYH1YmY2tzcfN#chd;G?M3C{YfaBToX>GFkN`cg*QymKQUnl z5=PV~i&4cXXivcNv*0<=bElID4Hh(ld^ zTPC8~yMl*%u@5{;-cEZ?G;w5c2gg4Fr8% zmsI*{nbVe39ADnMnaW}*tWQTQo&es@wkWfW^ z-WYM|A!LiMKW?W0`zJZ4l+?^5no&`cHgYBdeMq}s>a{ZtU^c-|KrEUdjWrF)1m=T~ z$)n95a$vG~$b-$>qcWgx1Ts!{HRnUjB)V>;GI*f8cYfdOh7&V34O=#3D`r1Q(6~+q zjifAm1c&0ZC^qLTIMhGkhP*nDVmvimRGHOCd&K19@8mA9OEb`sbo_8J{iU;_?!KTy z_@nO-iyU^342NvLgk5BK0ZC0Rizp;7vsfql;%5P3Guh%*Ge&fxd<=>lk_6UUrlGmg z{x{-gd5d~+mg-q7Sd+ei+my#=Dpt2w(Hq;~#+NOA+an~y4b>}!f`l<==I7craSD`w zR3gUjC>D@F9>Aj?c_IZPZ_1y|pbo?gG=XE`P;`d;zQyLqnq%W-`((zq6P7kLvlh)1 z+;y)_N5nVVw^&dS5%V__lt@@0uu&}*HGQk6Mfmm8YYS>T-BW(qA}%=KaK})3st=l}T!mF$8Bsz2(SHmcq^2;*72`t)2~_l3zH1`lWSDylF`1);G-NCTi#; z$0t9DDSH@iPF!rF!)YRQ*g#z<@v6cxH6NdXk`Z+^_E#{cKYKF)HqP5B4>#zNAyQjFV{by z6$Q22&4*PDIY?WgG~$t}4Vvu!y;klo35)0kvysB>tK?S8%GN+3`zikJz zo(R^^%AZ}-v%IINv-)PEVB|n)l|5)=%;u5nBvbZ~Giw>BxNSxCq!@&~Eak=It0Mqt zBhl|JgrP{|)?e%s34Gqat7g?T}aFdje`kULL^T)j#eU=t-vCVS|t89JN z&6%yySwL|)ve2dAZvJ6svh66;MK5nGvX#Z{<{a~*i^dW$p&jnObhtICgi^33E}!+> zv&?r3|GBSsY;uo&X@aVHz9`&rt4;xQk@f6=w1Lu5*vUJ`rLw!y)=i4W((RO_%lnZ* zz=N(Q6_H^@L|Lnz9q=1q>cx97S(G2>D+a@jSwBPC?%G77nZs2$zN>p-w0^%y zgG-ebQK`nQqtG9MBFKO7pyNYLlWO3jm76;OtQfn4HJ@(zxOMxY6B1kyB-8*1Cz1ik zS13=5BoyfqjWRMc!^z5@x8DU-TyT6^p+a$@3&E?deicsz;BqEYKu{}Mu-*aMBt@ z)bM@j*Xrn?li8ve-w&tm#gMUQ6=^G!DDa^1!-L@=*cg5qToxd@ekc7f>eW$zuVHfF z-DCXADIc$Mh5Sl~GCGYsK8@5;z!9Y<7Riq%@hL=pXI@SAv+~F#@qRr+b>g0xpAS}_ zMP#3YO@t~7vQZ_)z%UTlm8Ve0<}>Yh_sB3@Z3+mT%SV%p)QdQ22e+2F_$)(niQK|t zS55@YdNz7UA6+Pdp0n)-|3gV{RB?XzBa>I zCzAuxw!zr^24gK7{TJnBuya9M5V{qu9m`VQd57?Y!qQc6`${78_btP0IyMvJNLN9| zr7KZrKiD-;Ly`cX){;L^&W8G4+%9v?VILvyR-7j?2RbUJvHw{Cl+X7WN%Gr!e~l(M zj#9tjfC+;)?KvE=(xU|Ecc@K}&$hhH+VvVs%{OvV^EVD1sI;Uer1e!d)bx5ibDW&~ zbM+BaRgs++YYu++Nxbf9$@Sy4l^bY?V_o$%AFt6aG5#q)R3_|cPDT1}Q}UP>FX1-$ zMZtulQ9qCTK1`RxvB&;Ea0Wk08oy=GsmdHT%e_P0}rllqE?# zOo+*xGU>c~&&GWTm8~fAZmg_UxCx7J%>*&nzkSpI5u--*(LfPqORD(E%+2`Re}{}J zcl^TW$*h)jCT!F3*!LU z6zyP;1|+_$cv%EbCwVh1Kizip$4UB&?*{hD2I=S~2p+{{3rNX{_BH{9F`h$;orz}n% zLwfC8zD<)`4Xt0-D4Cga3NTQ)Lo;vzB|3E~HfmvX&*TCY^5mYncGGO%*qB;@#46?y zPvM^08pgTB_2c@opXnu<_OD2xlGa=8jAS}t$VahBCiNhTKX&Igg3dVxpO+QeJwu7_ zI>Hv$RUw#SxJXx-TUKtY@4@st+=b2_>${yYJDtBl^w0x|eJ!Obh=6FM5rEtoVdP=_ z*oCS?;sigJKp*U znwDe?^RAAt8V*O`2G|5+Hm85vjCA(+d(6Q1n|4C*5RzF~dOIeN7*^PCXiO8q!o*up zP9DEI&2y(#e`#c_eo)7wvkc$EDjejMOw-p8UnWwzd8V5!i&j^_YX_HN#ih7I3lw*E3KVE@E$$8_NQzS&g1Z%WD=x(X6bn||CAfs*?WO&F_r3T1 zZnCo0WZTR+Gk?t4`+4TLi3H)x&NUZG7&uMamH0K`ZN8IY7eiW;asa8 z6~`YmMjW-{)$&olhPXjk+o<4v3F3*2Ue6tq43csUiLu_IeY~^FwwmbCfpqOig6k#N z3|YnH2D|ZxbcE0X1N#E7MUHyp!l z#R6W`*nOTqjktgKn51pb2bL-wwg%Ds><0oN z5r`bKTHAylC#+{#%WFZvUoB{Kl1oNpzYvMnuZODjdqk2-#9hoOirxCcyK5Gt4XVtT zjpyyXuIrs5yOUzv)}{6J_8fb=(E}V{-O-X_7NcB*i=mn)Off>Lb$;Uag8NdE<=Dt? zpB#(9?{1y=-k;ftwd6!~^OLx#2Qihc1cUBA8JV81A&rhC)!G}Xa1w1?x%Ad#m z>)Eoa)00>7Aa>OgzNxXyPCQM-aZ_ALrtQxIZYqo*Wrl6&|ora=CW zSYDWMjg0asCBP``rh=eRo`l7Aw0`w%g7h}NWGl(f*{?DW?VP>QLL(8BT>hgKZdS2- z$?};~ZJDRclpCX!k*izED_^G4y~OjHP|N3Co$~eI?4;O>eScF8_SEFEwAOxNHKTb} z796E#{pd?kZUI5{SIh zF@^c2U}Wxu@8J(RBF);MgJPcU`|s077-RSu^Fx?~%J1i&5ok<^Yda~tgzhsCneWvA zP+f4&1b*UJ$S1ic-w~yA%^k1C^P&VSMj>K zeiyXYVcKCSKBo8k5Gvd0?;bT-)p@w_1Uagk)K`^EMS>>j z{)F4)Yl0!f_ueFQ?OJ|9f~A&hQ(Mr1cI%@?gfcD+kn?|G0wF=Mw*mC!#45m+PgHw3 zxKu+V`Hm08DhA388;wS>RnN)D575X9t0wf_2V-?2Bu7Ar0fgL(j@xjzcZmUa>W^m@ zjTFXZd~~k|;`xp45{7I9vZ{f4Y;z|VZjE(0#w?qWP2NE-9JlMA;OOz~VUE3m`AQvojI{pA1^Br(&)FzODDgKvtYdOyGK$gu6#~nB(Ki6=etzWN{2H) zAyGee^AA6YyZDu@AfkvZ*+*;aCWi>~9-MhM9?F2E)N6g?d04cjx9VKY0v11Jw=&h* z`e(QNK=0}*jkQp`+QBxdhq7VL-YaMB;4t3V*VwHUwOj^(Cve!-?#BTk(3$*YRSkd_;U z9!3Uw#{JDf>Koo0Ag_66hPc?A+4>E3hSUv9j=gKD7^zt^&7Y!ti{ap3xYO#n|*0r?^y(Ye78}oKraLt+WW@8EBsLPZ9=$qfRoU2yDdJ~h?@V3 zk6Mf)M4i-f`?@dG@tL>=r$!rv_Jwf(z76c+0^pDtaNa7XE=^QuA5_a>oo}bHx%fhp zHpI{ALy~vj@C-(`Tcwn${ADa@B#kJI$<47t=*5bF3I=sSXp-ox6~4~zb`ccaESe!u z1{p3n!(fKDjuPuhE5FQT`v=Gmw025wm~3g`=!jCMeU&y=^!-`zyDICKh|NL=c+CkC zLa)YagZLxFMVB;IG;D)NAjlE1vSrA``J zG@DkL=Pxks-y5fEpC;J*$Tso&X&?H#x#oUfCb2*?n03nr%>r``N0}p24vS`exAmzs znhdR>_y{U^<>qnIZ>0*^&{;=yhLyaRFk}@O4w^ATrZ$jWWn@h*ub(7?9#h&8}^%t!isr zy!F0zGxUv9_aGYid#M)&l&ge-_evv3HS-$MsLih!X%krkrzwO5LMRRD{INMPVc)z~ zcWPzy1k8p~Yc33PSUe&DRmv6wS9YWOB=d@!HYp)G{1%?zYrA!yRSQc@OLt2d14>{#*o!>JLJuMO_B&+<^952CeD@n*zR(T;pt8j3)B$(R3dB z(D`RD(O2_pR0Ikn7A}3EGrL_fcJ!EKs??e&Y>d=egV|`ww9#yZxD`Q;Vq|7yl9M15 zd6+)JT$cA*HB?0YJNi8YtSInnQGO-4OZl)v4`@G6BBx3$#e_cBlrjwHU zo=U1TI1x1ekgAU_QAZF>T;BPi$e@6c(YW&EcK7)8Migf_cbpg-Ndc)Y4~cHj4s)G0 zrGt^9Lpo>78UrO(xOpE-L*uwq@eoK(mfZ)xDaa6W2=+BU5@goM^8T`iN|)Ts8)`4J zz5^CC41pcOERYrG{)m|Bd&Fi(5y2_#EhiP)6g6%{aDo{RBpLfb- zvl!tBaFU~mT^s9k($gzK{InE7r*i47LReg!OjqXC4iMnvW?cB@;m-E=WU&ylpIjTz zgLkh(DiC|@Y}?|-vTp*f^T^-EpPXa2V1m?D`GP*ZKMnBAofr0OUU){d#hJhY{3*UR zF`)w8EXiOf^f+T}7v4t~26~-%-h03zBxD)UxODrN8U)AY-KT)VEpb++z8-1{9PQHg zCWxpp|L)12hv>MuFM0))uIts~?a2lUtCf^+6j{c;VDzk1Q>Pqgoyv=yhLsfxVq+f& zX6Lk(at8@`hFW?64+=&~i~p#i*a|a*)%H%xGEL8$Mmd^Yd})~Mrorl}uKG@5rya;T@B&Euc<4=Xc9QIfhDY{ArB~HyNf{kLXd~ku zY~RcH@vD)kPe=jh8wSNjLYtv+69lYvB_8ir^=VU(CD0&5VO(W|DStzAjBaqvkA1&G zB=MBB)%bbKH!Dz8vHv4C&Tq=ws2Y&;w8<1MxBm7-s|tyFZcfAg4iMT-|NQO}yOxN7 zj5Yu$z9ttDu+@vh^_Y!&x4rEBK`i!H^uyy!-FjyAP%9Q+oAK}IT5qs!aOxi9vausm zyK5=dz&RbzU-tK0Rauu>fW%acRLt z{da{EcHZGZDDs?&xS)bXr?cTsM>(F6@+%{A@X{FsLt~z#6LKZu@{(eAph;A{9q9r2 z7(s~ma~EHeL=YLy&zUSmp61Ln^pP7E_!?1RiEatjGWp(#p+S*DrE-Shc9G6C{$aXb znY969_B804I5mfZB#q^o=*a8RIEKtw=oZPT5sf34g2dKA$eps;_@b+Q@0bwZTMDVw!44R$P9}LGAGZqJAa-+n6iI8BE?^^ z0Nnpm=gwVK(V(#=G{E&cc-jiR5gdy-Z`8J?!37~rc;hO^J)1R9mu|r;Zx3L3dxC{< zFe{_oa5V|5rxFuQ3cf9ZV%Of{h?g}|9L{roGwg~KQ? zj<$BcydQ%&N%Unf&=yO)f4*n?q@x@`*7eAm{tR@|%SG&q2+btEib@K)Ap6{&7 zXPHF!X`5wUF1v*+#CgHlFe9&0)@Is+H$rz77dRoa)4!<8Y4q80EzZ0m)R6NxycIs} zq0@cokFzzc@;KC>q+GBM(`BQy>iBvoYqtj&d*C6x@bnYAk;}^{hL1Lw~lEx4g zB@llCE6i`FW|2^c%N+H6{c?y=C|kveA}+l;h027bo;GbF_{scP#&qId9iyli;K2zW zk&D~2Y|H|-5QD1!R2sUSaEf@xYn-D_9n<>L09)vE3T*evC)W77&Lamez+%LHna3RT zaR0FURx^*bD&KZ6U?zY-;+f3TTYozzCt`l^)Ke@e;fA`P%74yCxuSkqHqw2QWF}v< z@oX1h2Fl7yt7s07?>^R%&dXUU+LO68&+oh>-xggM)%X_JSn_z5#Os@LU#QdZVaH|L z=eW9kT{vlVDExx=i^*&Sm4B+^UkZD9lR_XleCujf>Rn(kMYf`*bj^iqobYb!`##*RVE0~37t2&VLCekFj zOLS->cuG0Zyo#I-Ue0hZb0YkdPfpaXPb;03+}a7gihNDlNnTYAtJ8md=Trw8Gzv;W zy9vs=BsLWEE8^63St1pRgqlfT*eNznkvLX<;j!9LfulOM3L2A{FrFgjV$K_d(~Rt) zNhKOpZKuv`p$v9MH=H^*()yj-aF!zz)- zyM-b{$Mz_-5PD~{7CGLrS~r1QS}La6V7wN*$W^sxRi{qZX6Fp3T538?m&4Xpeb~S* zf9rlt)O~+x3ZU`B8#{V6XjWC6KIX|uh-7{YKxVCSB?fs_Z&}`@&U*U{is^{_F;ZMb z1EoRVF(gAi&#iP8S{u{%x-sWF1J`R#Lz-s6iCtUbz?X)?8LIQ!H47lGyGAOBSe-B9 z2mM15(f+kd39iJqU&w+b^Jpb{ncg|=YOHC+ye&3%md75Lce##dY} z1_taj@xQ3OPdT&oySp>T`BfQ<9{{&fNV9E=XjtwFRX4}Ltt|uOCM(l+!50RLjxI@_ z6B-k;umxscX1VZ*WKDojEBszUU-l{NSkthHfN)+RH_}VyRiWKyoriP^jXv_pj*;bR z??=q25co6^J-Tf)leGnIO_;X8e}ZVd#1KUA8Ex4qF z)@AyJAURrR{0fsNBl-TVC0UWlw zK@cI9TTs+?sWrJSyw7KnJ_hGGS9o|0-rlWoQ39hR$RJLraoRXiHi2%woxLpwmutC= zAbN(IZU=RJeKxw9GER(|vVaG!nllZh@Dy4Xaz#N5QH>qW_fY-qsf(6W3@V zWIokM;EM(ho0um_T%_4d-?-w-LVEXkpARm-1ECQWPQN8(-$!Y-VU#X~%HkDWluKH? zC&&IK3C~8-1k;AaDv5kR$8pL-;t;Sel!`+%O%2Kd*X4f-Rg*5b90mH2V`{CFqA2>r zGD$O}l#^#yDicV>$7-X{RX|zrVBD%V=#S5r5$srvLO}qkEU?5RYf(!4cZ`SYtmyzo zP(Z2s^rx33sFfqdtDGKL@=Qh1VuC5y3LMTuWh$E`c-GK2l+00|lc)F8lm|1oKtrr= zq)WKGTggP!^qHKv(ooRecEy0VQ$deDRbI6w6W`=U>EoE$*YGm`(8Qr)(3?X zCL53f=e^*}$Rx{{@il%KYM>`n+PMZ96pq=y{UAA7mfpI?envOCqO7?uK%2(IAXNHg z+In}lN>WjWqf$vk@eWTa^jGrOs&|cTMvkGig=U5{`hac9UN`TAstu)#Z z$AJmA=8l9L22~-{NsYml2yORU-5Lmu=Clqm-78f4a#w&*$RUivlqUdylQx+FqOmbh znP=*OGg%%`@81Zfy2YTAhO~?6Gh+P#!jXG?ZXA3|1O91 zk`br6xBd{XG>ku1I|aur(brI(Q(Zuhm7KNd_yd13c$q*6=+ic=Jf&FXSkYpZC2FO^ zIz!pUB~x+};Vmp0#L-b*CiD5}6wDG`pHIfDRzga8ll z9(v?|zir&=I1sC6)P3`=j%nDfAxAb?PR#dNjJ`k#d=@x(-vnH6UUY4R@j&w10EAxX|afO zBm|_ORre=VqIWJqw8yG_bdvlu=6~V&@xuu)F8`~yqv!GT!{?LmrT^fyuws~0@>X4g z25Wk7KH*BxgG@YZDn>xQapuds%mb+Hw?g*`5yJa!P7W#eF*zRZ9a0J&=u)kIS4f4d zvB3xCCsckZt^w_hPdTp${HF`ni0AR=_Htom^NwKqh--bD?%-yzPTd0cFu9yBA`R;b zPB25?mVC$}YwQR%#Er)*ocfMwy$)`wl-+LrCiw-t^ujBgaS>F@FqzgPn0w3Wa#8)D z?&W8u97S}@V{ZwnWxh?*pV6Xx%la7QE$d#KTv<3g`K#AAYtkO?eKYpdLu-yFt?CyF zqLsCFtyOA6hbbiIp5Xt5akOeQTt!k#ZJa?DuJM< z%B251^@!}WVNM4{2d4B~(|YYfq{7$ZPtrUg(~nPuj+5n9ZPVD>gxE9wDT&e13hnjv z?#wQJ-=?k|sWqeyI}}{Mn=%oOmJk!jWbN7cK$a+_EhoBk@DuL&#ekOeF{uK-8^pyj z$N?G^P3a%RaNE|W2Aeh~_c?V!9CFcIho^B{ckjH)V+GyUd((!Y=$(ogk}0c^9G9PE zJSK=opwJU!Hy~cTq=bFOvN(wC@R>bvk<99l;Y0`<6wn$*EHMlW@8&B78dcF13uRH6 z-MAFtM5Lu<1w2o6V`s zjy=^460AfcN!x_;TF~P&4}VD?5z;3ik2S{(yPVIa?$!~9#+|=I!koVXKE1{}PJ%QRL3YTTVt)W? zTyStXh@CJjLmAJEF74wj5T#Jr+;}@Wkn?cqB=b8Z7=KsTp8`N~MT|YC1WPFJ;)}_v z6h+gIu?2*^tP}V$1U^Nvq3JMHLXzxNkWVLk?DK+$>aPq(Ti3X)_rAO%{U&p&GV7mr zyYat726Vt-C2GF5((+MS8{pyFw$2Hsy@sZW!@345IgrsEN^J_z-1KwU!iVfgs+Jp8 zX+Hn4^JP9i75Srs4aLgo`(~rAY_w+R*UxIFOJ28LA;!sFsvf|#=BKxW*?+M&2PH@5 zbi#Kn#A2%gC+jjMUDZp)^Lac&#_!NyNtX&sTlhv(+e1hsJ4XM^?pO!4$Dv2!#+g}B=#pDh;+i@}qcg8vG z(tA#LXdZNW@Nn9~-&j|zsta6BZv@BYe)!YOSitNaXPSGeDHifc>UjJze!AX$t1HDJ zRo13eErH9V)rC7C&fIkX+uDKcOLbhMPh)7Ay8mwvclD}3`QJ$_SH^AGsU8Q@dR5mm$D&`1StRVQtc@0T*1p^u!B%8T zt{o5k+M%u|<=0Lko%cmsD<#4mzV~SK8C%QT)tRj)uKU8`UUFYK^~z@SD26UwP45uG zwAr-m+#tTN|3K+Lbfi=5jDC7&W7o~_@Na7?sAZa8`B}Z}2Y1S4=Hz=QTG2jbJ?9M- zP)8xxe3+u(K$;H8A+cgGBj3AW`IbWx9<>8(`&i)DUo*b{))gK5N6=)@s|eM}!d|Ww z`tRnn6>r0kHyWfv*|!|i(rG7j>cJXq%4u2U6GV`-JOLXekQ%kMrCf%4#(9RLkGuZC zXyXoTeto8TbnW$Y)#LWgyOM^dR?H*As`f|(kb32Z!e&)ubm{ff@u5a+rZSZxOw5x^ z9V4}e%*_r1jR?*xJS z9uYj=MA1i#MB+zk@<|6){S=3j-GsM(NlO`RHlSc4$5-`dUTYQOWvh$UzETm zSq0Jn%GY{R7P-PCrHTnm`n(E37m>NQTGa&sF~>_(Wr4~8;^x~jC{iD?ea&p@{a#e& z)o!UO6**WWXY}`-+#oiSmh1e`S%3VK8l{I1gFw}LklU?mBCT*)Hcd7B&}#>tmCdkf3#D{*UK``vlq5y|@y_o4se|lZT=>x^iN485(?cX4d~nXJCouiU&IP z3K(_u>z$_bmc37E;Tl@GusjMxIY)AP3GJ=2m14JP`=Nj2MFo#?D|={a-@1L?hPDsd zx|a6L81wM1eDzwGhPv~cbfatQN$gj&Piw0CH>ByU!LeDQ-a`w>?XeOFOf!rxvo}tD zkRw8y4SK?Zd;9t}(IIiL3#ReK9jVjJ!`y9&2$v9-q=M08uDunMrO%;HS)^Mqj6yo1BmVIz?CiKP(yg!aqUm{`AxuLiNnMI6bs`>z7yZ-xZr&d;YD;wu z6A3tKP}NA~?!;=mMF-h?(}kyN+c3WYW*#7zM$=HmgG}VAir%SWE(k6{;;t@xc0K3^ zfN6__8>=yoq_S7)bE%YCT6kea;KT`19 zzUIGVD2#rte}oNE{go(xrDwzP|05M2*+4`xdLmxYjj#)fHR0Duk`03dz( z{`nw0{fYvi;IHE}{}|K$F*bd9wDX$yiK|%v08swR`1%C)~PcZ+}_uojbCMl3|Tmaw)d&>VE z>F>DyA&6oC0EGXFg!B;rHghNWKOV;7#&6P50RRsGn3a=;w3I+r8pTez(muSre_YA(qAcKWPC1(Kok6pBX5Kut>0Fzfi8UF{S2?1;d1mQnsFbY9BO2B}CybzMlA^xwO z{eQzqMu(&X9E;aiBxPAfskS$F{qWZ{Cna5GX6xNVnyU3^zdzpW)a_s2HCd*h?@`g- z_kBBO#p>b2?o-LpTOXKyA~RS=owv^}aQxjepR?qbG|}DKkn(mxUr-+$mn?j4w3-k{ zrXH>r>2npQ#UIJ^Esj1dPNyFkEJ@@hT%L5?80<_1z(aegL?Y#!2vpJ2dfS!sMX7J9 z>$BjckA>9XV8>tPrV$J5Z5Ao|o)TN3w&d`3{CtzpSmX1?%HvJA`VG&>Pn74-YYKU7 z@0s<}c3N(lo?hzYzzvq+4GJw282!SN*vLO{i*GrM>vlR%angnvQ|7RB%Mr!kVwmhw zcgJ1=;MWP!TD{jh#~@bJM#qnBn!3LFDlSdeB&a^=Or4X>^*GtSN$5L_d>Qw6r*Ys$(&6UZ@17OMRwwnAFS zPD%g~*xiuR0Qh-#0FV|4T_5TG%p@-aF6vwWVATp;9Ph_;<1h#wl3iECMEKE$hf=tX z=ngS4zT&uGEBZt{RQTaStX|IMhHNQi+*abB@SV5jrGuxspa`ONsQ9P*b}n zxS0|%l`?i3Hvv^`^ve3ZXda!a%%>2*sCMtj|xGH@y;3IY*f1#oX+WV$tHPg*X2QyAi$;@@0yJ)k2v!gR9pn?%Dw0kILC* zNwBz=`4#WSs`K<}=`IkAmaAZy{VuCJq;J6DXp8Cx#lrf|Nib?G{AT{#sBj4AxBezx z(g^+OQNL+u2kXeTcug6Z6nm6qV@GF2d*}ZZz1lfxam-=FxVx9QUY$Mb{m__AF}9!w zfzrxlUEmT`!cW(}r_B>(5--_xXkQX*Imc$^F;sX%*>1H|qvzIn%b;gyE;)ia!iG2<$;<)FjucS^s-0ZtuofDm7 zfKd%+Al^lJ68$14W7YM2mtd4UO7yh0y2fiDzECOGfbLDqdAOK-RDB4X@v_#tso-*& zr!X5|#CmGW`;!*0e?!2~>)-3+B@Sv!_p`D`pX!rl#9XBJiTJ;wnnp*oduSj!5YYAS zWJEYB!2b)QZl16}z+g|nKtTVEfF*i54qM`l{x5Y{mqUZlt_Ul?0*$yR5l!^)h;eL4 z&bWjyfCu&-{~#SFG%07Hu+)WC@3-~p?uG&_O6uxJzOc>p5baiWzIh9a|-0T243;=OyRneAt zhe~b{ZI!+|Aehn0%W|2F-zX&C_n;;hOONBHNETg|>F7?eS)u)m_efVMy=Pkf+^N`q z7rOl))OQO9XoQ#=5AR-+cS_#7>od)h}Lt>Q+X}Sc_Fhh_uqUN zQn~q^6i2EgnBQeZl?cPmM?zdSBh)L&ao9XkT6hPj<_lv^?h4^2G;;gZ z_;2PoYiwQke~rOryw39UbTK>iwi}<1?a%-Y^R*!~-TjNQ0*p_E7A;VjpxEdIZT49! zUpH4TF{rTA&3aZ+N#^Emw9<|_5(C1BZdnzzMlny&RL!3mm35`W+tsBaK^EOVC^RW; z@ojB{4h&CtMH(QePO5gcu)hKmK{msyCm)A1Da|>eS^2_tke-2?`z2H|Pd3<3f1 z1($jN@AmGHz5B~8;$-B_p>4b&Zcd;4#?esTJ*dhH? zL!<$lJATd~x(u`qeN9@jT)_blL~|{hJYjF;VZU~+zt`$^S$?i`t#T00-ALpOZy6Nc zo+UcyBk_?dHbp==K&KU;(JdjB-7f(9_m$aJhUn^dgqI^=@HB;ahFaNS1HwN;LbL4KxO0qP&(q>R{wJPiIxr(>HSvxf?Py;KNm*8-#QRtkHdJEY!t6^Ht$$@ zcqy$|--JdF$!%S>Iwg#O{Wpbnp=0Ld=2rINHLtl`Zbr1Sak-SUgI$kv$^b$!W`4Zu z)_?o#Tgk%G`)?fTwjAyAfY)gze{mgUKkQcuOpLB>NWVIvpQ{Z;$1S+xU+=7o>(Ud8 zug|z_xIbV*`TfoVH4dObZBJsU9h0JUbOHA(O~Qz6!}V28_85r=6?4=K4J1S`C<@+^ zm#(K0>(|8HEkzrpPMGAU7J$fXS0|LNmHH^xC0q?jH2FO*fsu-t=--|gd;x8$932QW zDR(`yM*Co3_i6MT#ts>S{vk>YP?aRN?hDl7&5=_qp)RyQ6!jan&t~(#Ba~Q$J#AvI zpUsl7P#kIX5FPOY`_8!Ogw(|g;(SN&_J7yfG66S%1wR|a=UhTv9K;=GO9KwLj7ys>R)a}@mnXg3& zGvNmoaB5<#iS=uhbazQZbvrl<1-_g|9roz5xDn=Hgovh73f>GU%#n>pDowrtUds0u z7CA-8_DMPiid}ZzIKcQU&@nF@>uqa!kxB*XU$A0YBq|HNa1nzj-IoAkSovlCuqF>% zmTeK0Cv;X05H1-n=D)M6Ed20f*hxe4INf!;3&;g=!3(0BYk@{kP=|E0hJT>BE=uMy zztnRcdH-G)Ate+1bLWdtV(Bb85l&*zgyDSw9JQc}WY7f7O28DCkT4G-%aHbx_z?Ye zxZU6)&Tq!_eTAseLvC(O(J~=bp;PFcR764HXy{h-W0R2QRv@uJ4^NcMr?$Qll>PTI+Xdb%tzlpGf+@`kZ7VMk1eVXY;Hi;QFQzTm%? zqxHYP_87V_qyU8+y1@_ZVe@Epz#v*va?j*5;mu1-twh*ig1;^dqVHuq+Im(rE0RWL z#e(Hge-j+NES~I?9tm(i@z}r-4${mMxC!PuuqMyHkQ3I&1C454J!Co2D4V^ng&mLc zA;DBrN(BLdU{A4vnrR2$wP=LqmbiJGu`76k)F5^eUI7MDQ&$XqnM=?Zf*M9bFd2mH zZPvs#OyO6=!KPo!_YbI5?BFmmPixsW)m0_O&m6xmQG)c8mda4p<;DI*qs43CiYOF7 z8W)?yw8<|sh&Q-u1 zhN@ZfeZQSA7njzZ?&#=b7mi~Ps_}6gRno=z<+vKFDP0&I*;?PU`5(Ey)GZ8{WzpI8 zFnZmwuU@q6^-rGP0O(`PxG9{_q9Xk@`FNvOsu0by#L zO61DdPz<2KbtKp$|4h)h{0*Zh_^m3TmH^Uc&QCofV7F4on{8y1&0_+~wI6o|AD^{c zxGjJ?(~x?j%lrh37ibh7+x4WLqO08C^^7bLY`POVl7n58ex`6!o?5|z=R**BUpd{ z8(3h=4>HE|2dwne361KH1ufL>xXvzC#sJYBLAsE!h3VK?Z{I`T?(Hbq;W{ zWIQCYk=YeJI#W4v8W^azFEkA8DKL^F6&8|_ilgUjl_5~N(SQ=tYEXgC3OK6ICoF_W zCYI{Jv8m!*|BNBf?=m5Ffbb|paL^V!jJxJ0*3++)##gW7M#LLagpK#DjC0wi)BfFFQf!v==Z zVZj}~)+kb(+v>4f#aWu}%?_-Dievn<_{$8Tz*>CKAG9`+e#=kmi0cH0AD}Lv>!9G2P)lC@OMnP zuv$3;JAHYzj7fJAFP@S=-Uic06@7p{5Lzn@p>^AQPsG(!h?e5O{V{)z2et5v;`?AJ z^d`9Kls2>C#3Of|pcCX65alm%i7-KAD?p~|SI0Dj-}E(2#cPKg_TU5ce|o4%ocjU^ zv^$5OutorY_1Sp=ReiFR`XZ1XdLI_91J+X*V*Qa5(?=Xl2OCG0Hflwp9|$w`*ZEAg!BDh@lQV8;&G3r=%+kZ zI?Z<3RCl9$!f!_^@fry-#3H|h8<>U z?koKjx8HcOg?Q&Y#_w9EpS26|e{30#q+WAcXes1s7CtF>r8>H3y#D=(C$!xi2i?FI zI0Llyv9I=fb&cWTNbw7ZDnVj3Nrbpuem`VJ zLFIu?+~M(ous~aesz(*4Br#R~2jEya#1RPgu3-k5sZY`sR}=^Wp@=|*`Ql~WH2h%= zWwaQ52OJCX7b5dZ+g(L}oq;gAtb)B7@{r^dgechFw-zM_WKChuo}UlX^~n^NUCRN% zl$5c4QSsheL*csXEF3&S#x63$7BqQL-SPMRDP}0M%um>_yMzIMm^+qk^ceNsGPDc( ze;o@1NazOS|1!Tp^tML`_P}}q8S7z8`JVMC|A>_dFMCaVFV7{XRSR>e9`6rc7EkJiC}u`~)UwGRgMlpa>h?V>oe)vrzrB@p1*Cy06@r&! zD+6T%2O#)NzFy$)eUGLlh56&VqiS54@0<0ddoJ3OgG?XZGI~5;i)ec=TGguRNIV?D zdp<&A$Uy*a;x~b|85a0w46iqCf zh;Yb$U)oj2j|u4fg2GR?j5H0T!6{z&!1q&2*1^L=pXw|>cW3`KCeJufaDVqC&(Zz& zOCmW2j|AnW5^hEn4(M^ZbtDzSJ?DmhB)K0C8z7(kp2LAI(55=kNk_0L{LBTqf`LWO zBCCm-;YnqpLPuzE!NONckGW`?*~_^`2aee;O*ItR|3^gQc1aGmuhJDzzPzHE8?vbc zrD*&ua_3q?>Zyj3h95ItPQ9&zEgsZ(9zdqEvCT0`W2nj3;CgW>f}_DXg24n8B#PXX z3HV!p(P?a+X4@1x^<0EYRC9a8>fRdJ^~Cg9>L<|{HtiA;^!c*o3Q7g$Ry-*7+RA8$ zi?O1Rcc;cRJh?X0s&`S*%J?WGc!B~#9}VONg2z1+>IK(N3c>_Z^kV0iqckMD0hMKT zxy3lPFgHg6gbY7N`+&Xg^@2`vtBZ_<4-lz3pF7z#;=v~tP^}9So|;QyOmBs5m(if{ zwkIsz*9eMoRDx?;TDg?RCADib_T=2POEy&cVC=k+jMU&KK6Z?}sGvj7fP$(v6x}21 zU@SXGy!Oe9zDw3~6hIK4UjI%-BZE>U!;VHER^{l*51CIoWMIXgYBpD&m95j$2MD5` z|EVh2^Mg?}S6_hgg>gMzUX1YTedRAe?)OQx>M=cy@IkG6O#&wK-~B20puW+5A!OIf zbjTWqo0fpV54i}SN5#=jwWAZfE2Y=+z}O622dy}$w?@Z6vT7EJkXHa_5fB~1dZ?{ln%eSf&=jV zZV(Dl#`2GAvQn`LlhBk(>aGrpm1Xm4Kw`Z%$vL-@@rMNdT|hV8K#w@0a8*FRdP+#y z?>IQ)y@?;)c&;D@Y#%fTll#3?*mXU`d`S)@xbPYNeKC;h3^NS@ggr392p}esZzf;W zkZaz_H~Dbp5O3YvlnNSMR!()ti=Vv3?i+N!#@=1m4>gDX14cv&-|gtun`Gv4TulSy6jkba?0rG(*# z=|8>dK8M=CsfB$8*~GjR0PNH(_@WB{H{r%4;rM^_4A+PLdp@WeuL;i+>wQFO6K(}4 zu!%y!F}Qfrbto^)nfd6BH1gGeWZ3dGSaNIeQ|O)g{>s%Cm8Zh`9+|oCjIr>+OVvMr z{xrlm>hPzhpJ(`z^J$gNE*_{O7>J}OZB=+-UphQQ1G0L!h@WBrh-G4#t?@#b{xKL) zt1-C-?S%q?UrcQ>h5icVhSOut0d*~UNc&bB^pFTz$DGZA z6e?c7)-ft&3?yo8Az7qS}_W@9MW9$YpBgXV@h&0{5z|rG7vn4+<4X9Zjj6> z3uW|dTq@CW8Gj7|fG-kk9Z}J5CQyh)8hm|-qsn#pq11@~_4 zFnAiC_woqB;++nlnmC?nsi=&Y^vw6x%Nq2HWJhM%!Ap z!QN6KfJMb)o)#>p3Jv;W`z6XsG zHh-4&i@oUw@Q5(;$G86y>JGUy9E$2-a1Q$TQE#}K^tkBedP$pMAH=Zw%nSf@`{en} zXiX4!zbJeAMaVHLD9CLeGj=l|Td%ZI_tW}%VK5z0zIHni=ul%%J4!F{ue14*$e6u1?Asx<|ZHx&_>E*?&WTf2HR)@hrOARyaqD-Bf zHrVHXzJ@08i6nzk8vC-yDUzD_ufjTcMdw_5P(^e?@yOfska-b5-CiIDrl&|Ur+|e*9eno zu}MkOKsS(}0785s?LEAu8wNLU?Hsc z6;$QwRD8y|IW+%fEqFc3N4+i>PK`$D-6epRiLs6rK{GvMtm@MY?UKFpCFUv5oO+qE zwm#Hj1WK zLTZ`=1V7}B$&QELFVj+0QXlOEsjRwjz~GKb9Qu_S@-dpCL#r4```WsSd@HN^Xdf}z225#> zY)V}PWHHGB+!Kzl@sw4K53K!fi2zubnLpp*NCDqlrDyNBsQM37Iw%P8WixQTb8nzu z60rU+x`cKh?zcIg_{5f1x-iPGHq_BYFxJuZ=$jN_&R_gxHW$VPPesMJ{Mf(h2F6w( zDS+j^@u^q&yK*Urzp(GP11q{ggphh)?VAC}Sz1XrHBce^V33KFR2r4YNC0;=dfk(O zhFrZLB9O_n1)$+iP{*<`EX-A)9uR-ay+mk_{aNaYg^)qlNIqa?P$IozEAzN@s;XwMLRK||Ax_&yW0UrX^`%+O zaDuiXHhZlW$lSJ}aY_U@9MtXB3(RErTf?5pQ^qK~HxBx?fH&Pm?!Hx1HP^YOMJ!1L zZflUH5tfv%Az_vqX{9cf&8Q6@;wrodS~^-S(jI6KU=8~?tk>LnqyT7=@ntM#Spk25 zQTn^`-QG=#%vG)r1MP^L2G*zEiRpIWbd7%I|Nfb^C971*%JhHYRNuaFI8I}A5i5oJ zYEWuQ#MfS%C9=-^`CI99Ib06X|Bn?+d6jGCc4aZN_Rl8l?`nD!Zbp%G)9;i-jX=M0 zD12uH!IVErvQ|T}9tK<=fnGHPOp{EULX^-%Dd<-q!*^$+73}R~v2{Waa=lw31Zgz5 zrO+=S9qWL7AyPvy&Z7_=*j0*G-D|L$76}Vsj&F_B#_;fIEQdMoo zU%k@tJ;vXf*k|_o0Ofj8C#YFT7#%?jyRZiqycLop`-UFhIsoPn1A6Xjf+{%U z(C051Oi{HG4kU;K;`)lpmmkaGY+2DVw652Ct>NY22}=BQQ>)4^*}f z5nt{}YKhlsw|?M3=NF0RJd!0;vPFNg>Y*;lw|05+$s7FX8(u^>TNi3OsYBC5G924> zJtPNLJqTc>-Ui9Y)oB#Dfj&ouqV1ouW+O=ukg0OHRV&{*>cD`+)e3~1>+K9eWn2N1 zCa$ctJKb7oJmbXcY`v%>UYi#p%dPs4PbbK!(SwZqdtoA1jTY9*5hwHA1@qLEC)(lz zeozDZ&A1A7X~b&LWn8$u17ffHV5)IOv2bDM%n8dI~C^Edh zXgDV%{Xpk>+`{k)7uBf|?w~O2>-?rwvTMd~w%km!3LVdNC6&q-Q)3q`=_hy9<9U6v zXzbnn1Lnnl%O?us^<)WmOloo?CNB7g?*M#lc9b>lgEQAiTjB)w^X)_<^qgF|awLWw z@R6c7M5fDv&Q>6@yGaP)=nkPxpMUp9R)#c0$U3o(g50hQv9_H~xvAS$P>so&(j{xF zkrcpKG_!#ebaBp?jWde-DDrCW`RVVTGZVQnH_V*20&G`deLK0dI6N{)+YnfKf#8R@ zI`-39B~(%Etz%9+4QYHSg;jB98Yq|wNX@sV6~E{(+Rx=U^AjS={I??qv6qroqQ6(nrL-}gF02L%XIrYUsrTIMYJmy#pno4uHRAZ9`DnKx zgR(Jb|L8F?cew$P_EjsiBHwyh5k=KzQ-MPmc!E*94eDeSVvV}O%IF9|vt@S)y(svI zsOqMNnpRy8mDT!DD^rJLIYhYyz;;*O=5uCxl4#A=Jcn4E%$`A(%GTg%QpUd2DBRT) zuSqy20PLIKuM}*iRfNvX4Y^Y4@iKMvWoz5L78o1DmPG($*f;K_hB?g6ej5q$`NiKo zIh(_{1Cj07kszzJAvJX1MHy@H+jHTzzOF7RL75c4VfY*hx5zHieB&$(K+B=EjUsYl0`-OtK;&0sqKX0dp) zOT|fvN@Z`@04K=9Wj`&pw%Sj;2lsM{8ZmFIw3bFS$)+H+CF;?Y)AQBk{HAgnPOT`5 zoC?Id#tt}Qs2-ruP+`rZ7^+azK&2#9@62c_Vc%`c0c~MVg&!0Kv^>kwLCVRPQsz7O zdNv9yc^=YO8Sbq#e0ZKcx7U#scRV}NsoKmAyC+(cr3MDa76*qj!cuFq1x<2m5~n`K zv}RWFYTiAML$)^owcO6Xs0Yuk%nuNkfWY_H{;hJRuG-8^z6g!Gpz=plZ|AF2V8q|^ z-!)Waf7C5`rc<^6@cQ_=Pd68we~W19fwIoF3Likf_NrZpGgcG^0!KTjE*U!gLr1PW zmZ0D#+hK3;8-8MUghjZE43;`LLX$$LZM8uC*$$f_xZ)E!PD)Bquu_AMk!;7+VkTK0 zq0lLVhxBD~_5TVf6-w~o7k5rdshGp~Dy4Xh6}R0=85sretBcUQzH!_A%CPJd@=ZU7 zH};U^-i!?qYf#rA_+!>G+xhgIe9{YJ<(w zx~kYr?vDVQ#)OVDxoxgdq|lDK?evugqt2h#poms!gIB81@J?5)-E#;GqSIcMHJlF~ zduC1;Bi44AL?S~>q?_t6^z^E8f!~nwNJUYyFg7A`Kg7jayw=DSQPEN@RlzQDQSXVF z=k_xt#&V}&Eqe`r=&V;!4_h|xCm}X7>_hncENKCCrg%`aHpV6H7KMsJsGSuk7+H*U z&^(ynTESwq<~ID9P@R_ysp*k6mG9|QIJFBV;QcE-rU)=Y9wK6!bQ)r~!4djOM--eK z{i(KSR-hYeJss>N!L-d{CwztUE88PcHf=}CmRmN2T@HuZgdV_zESNxMyiv;=EN?T6 zZ+8Io10P9_j*~Lss>)anJk0n$^3v|ThW37(6XsNS*U30+a`wUn+U!e8dHj~g;aurs z%-@xnn3c_@{k#!(EZBWi)wj#!sX9%S7bvtfOGzWSJpzLtC*-raRC9?-_7hbrimP1O z(HcV5VFVVNr;H2jf6XO#5GXfM;(>h|NmBtg6y@tl3W6+!$aMjGjl+SHGqIKLb{!>* zXuMtar*AL=!-G*-Yh-sS{L>F=!HNa71o8tyZ!6YT#4}Xq72#?Cx84>`>g;!3p7cCrL*{r;@eorwp@H_EK^tI%HY(V5PlDdAmJC{ zX+pa-f+KxHP>?3KUhOTwrXDtxi z1Xi25tM3}IY1(R+4?nTDoset)%)LWlsU?)vTm;Q2%r{AFne8Sg8cW#B8r_ zbx0RrKFtwC@b`lI7}6&Is#VSP1f7c&rD$iIo;llo9xyFg{`KXlmj?jfIFuKWG(iGI zuUQKd?f_p`xuswQd<=YpU;`hAK@(A6fCHQ>c(QuE~@xHwHh zXdFdK`!}GbH{dU#@;d{S&LrBaI$eWhISCN!y=uc^m+Y%h1)@QoS`9HVv`oK^+$*==Itq2LT)DZ&ai7)0?e&9*&Vv zp2*t)CHvu)q7v@n1f1#kGLtD#_;PmH8E=U>2rqVb(YRRmn@jC$X1HzY37ffsf^6_P z+tXT-z7{K59?Nsfk6iyo?Exl9%o|S|nVxuDPauR!b?5wU@kPKD{xvsFEIz_x;G2Z$ zn;yI<83t5P?I_7XF6cZj73Xa{6!)5UtHAAUo1_11ZMWTh|waP1*K&)R;SNasy zV<8i`6T{-f7!ROY>f!K2no=4)@7R0@l))dUP`8}&54+zm!0n!`Lr@`wTA(8KMV)e_ zp#yy^H~7MZGZDGu`wPqmxL0Y(0DdfYjcfWh%SGG6?Qf%-KSza?5iKtVA$=)sWO0G7 zP9I5;)k$I^-$>h%{NwS<$z+C+Pi5Q)k7$KeSnHzaloJ4tfW>D!+2&p(4^aPF?E#TQ zeMbnoJ3v;=+N_>RFrX9uVIT>`GcnhZI4k{Ub`KoB7SFq z0)=eHUN3+Q*tP)gdA@UucgQIr8izAkIsvFYZKJ4uwHGjsx-`z_Z6FlebKc1;LOoWj z!bXpN-<$-DR;Z!%O{Q^%4!?wqSFS>d;r&8Nj5j6EWmJ2o&z;ABm~aE7wZ;oDG{inS zkObj21dLwwSf06{0YgWR^!%y)6SpD--JKAb_g=u}Z@wG%h5=~l(LTE>zVOIf7rXoo z_I5VzNRy6h4z}fO92EvS{P;P`|PZ95Y6IG@y!o>QEcstX+e3BB^^ zof=@ggFl)xp(YU*JfssD=Z=+akF=fcF28In`UP{?0rY5sAG^>)N7NZCb)q z1`)tLl*uegZ{WUoZyma^a+&ldZFZ;Cbk?nnJzMLbcwGKv({ttUEu{TbsD*Lxl;kTg zO&K4(6B*<1)cStGmxH@es*5@uz$u9wIXTR8f8PSut1>cow|;^lU~Rlfgl;9Q77oOz zr_cEQ3crT@##w3RP3$;}GOYMS8gLl8F$B1Ci?A+^6S(@am2?4Bl3QlR*`8D>Szfo# zi^W0eW*~5?W2o(u#N!%()w#ipZ;6G|%{Ga+igL#qFaAg5z%msZrk|uY=0G(CV8&my zl3*KlVxK~1g84x_=EB|lO9|}&+i&gh2SCn@Y0w_(eQn_yZ%yzMq1M6NoVwMOI{_>{ z0&;YO`n%prD7)K({eTbx<>HP#DNEv*r!$-+K7d9ppq}`Ad%H2}#;C~+W>*uc$(W-& z@$F+4hJo-e-TCDh$m#_xt_gV1A8+I|7zu&x-|mI2%~^uD;ENfCFK8?D)`3Z2UAjoE zLZClSrf3_Om9*)_s<;UEO(;Ke+W}*hCkvPlMS=sfv2mc__=iQG|LC)aHJky{4NYD| zMMRDtbN8n=S?)GhqY-64qKN@H(J+iyt3>2+*hgo=+=%s-hQe4}WLr=3h={cQs6 zK5%}&lGIOHT{KbrL)q~kpkG*0{C)ff9Z}>$A6)_*24(+y_if=jh7t&E4$GBmh6ux? zrO_KC&v@YnW5c=W_DQD%2>mXI58dm0@j_{b9POeCHX66+qwwf2RY z!9@-lhC$2XH|KN+2hokh9nO`nA1e2b&pQ6t6R%R_fV`II|6*nekON&kKB#2;h|)JK znx{j1%hF7(H2CF$M(S4T#`Hn`=J{^mOB~i7)5*O(>Ut}>o&SX^&*?$D()d0~9eVUY z7IrOy$uI2kv3JLQq+AV$TSGmK0v=*()9O3g0mIl86Lf41Ly6ruzY#TEBhz4A39p%J zY-9WjI!lw&O8B4+(42YYLd|@mIgyC&NyKKr=5P0_#Sr}K^`8BhiWyQN=pdyXj*MqA zi|z>1gW(W@m|TU`%P*37m8i`hoV8k=il>1=MTKq~MSmA77xT1;{&B8F4}cWb?>oSA z&JM!6qM{n7>JL5*EXoy|s4=;deKt3^b20d@%n01sL5oL?fluvxs5dV!rBegto?tH5ZJ%In`{wJAP-k~M8vkrdJKC% z9d-pn>Xb4jHDxVc%IrVDcy2w{%*oL`^{NdMp&J-Zcb93$)94V$=h*oEBu_m@zYf}_ z`=b8=L}z{N5$=Xx%~AQ^z7gC!jcWmaNOJBk|K5h9uA^fYELM=Gy_ap$yQ|d)$ zTsunfx{CW{p|`2q-?mS6q>|CXtj9mDkj4mITL3+v_mC?3(yxcdM(@s9eDfqMuo)Fa zj?XB3XEIRj47!3CmwiCb@fJUo&21I3uihmNi0yT2&HU?y3C^(MF=n2-#e*5Cw+}U) z$Y^!J#Yg1}6#4Qf`W`TjQpZ1K8LNP=*?Bd)nk1PbVHs9CtDbL6B z-g)^eFP=YwUU_)fpbOI&_0m^(cps~Gz&6`hW3TKYXWNyRpzKtQYp=7E6Y}Pc+Rz*a zu(hS0gNryg*Oogcx;slINkKmy{w#ftO;o#PfR~aoVSg@*xU6$83pLYFM= zXK^^{KetkES1K^{L&ho4EiX?cyeP5`sd8r7XbXL;K&1K@M9kPEMpelLNJ4SRXjV$2 zbHbdJe~J0z?{;S?qIzQ0Sd{E zRx=pVnzqH?P?7p%+tDiPpFEF9R#Qid0`m}bb=wY=wUU(U{3{8+pV-C$&Jj(v%yK9< zqM4p`WLA|A7IWy7BB7`!Sw^KI9*~@D0*3p=0;p5rH+<{Q3hN?of6k9MU{i@v9nzrF zkHkxjp(gYnI`a(*?ShMcE zB=Zzv`Q^hEWuM(cfApZZ9)u(?VeM{&*sXQZeSZ#{f>*Vo8O9Bg9=F@pYfp(D&97al z?Z2G~D|5ku*Uu}k>}|d~Pg3`2v78YtrdAe3_Qvb4OO?LELIL6c)3vGmqQp4-_%;e> z5^kgYcSxk?*YOeWA20^S+>-mbRxsk~hl>+^gxr;86~5}Kauv?|P&LYh(Et3s}xG#ucI0o5UQ_<{2Q zZ4;8qA#D>yiABRpPy#uRc?WN{kJ#sLO`2%;?})I?Be4(wwWM?VnHmOK4dJq@$bk2{ zm~y%PbK;3bzySW#;l{%^fs&OH$k?Jb+K;Do^gf}SgVL)Bb?pUh5P*C|TZOW_cOThb z8womJ)5!|d_7V-R7icVyqvK4|6~5NTDI3B2Zm#a;!)X@r-D!Gf$L%sc-U&iEJ)a@J z%ZH@nIO0zQhjES+~POwU|$=Ff64 z=|tZ)18x6gwg-!~L2~x?=h=8=j;zLZ&b6-Gn2W`_b5)(0z$>-c2lND3%bq&$+3>s= zsUqu4WEKp&;o0!Z7yOxZqH8`L98Kh@*erpJvG$!gppkpD{d5dN8a|W#)!$z{7=?$O zL^=?_NcB&%c*(UgWtu8Ks$u0L-fc(Wipz*7h#xU@Erq~viTeAvt>O=6@Bwbn5H`L$264x zRv^JrjhWrt|t5dafv->tzoQT{(k7@4ulZT5bEnS&h-xUD-J?dfWQc5=_|eC!xPM%(SJ7< z@uNe%)e*84JV@WS57q;!o9W(&(SHXJIz_4UlA95lo!h+acoL)Jd{T5j%#QWunD0IUr0;ka3GZ2t?8wom|j=+;&|hjcCF7a zH}A8Kr{M8fbh|qBb0~tnezyBhd%G;?5)d}w^D%>?RjfNAf+^V<4^|{6t2dNq0HXD_$E+GuQPJ~EuP<@@8)viJjq;6U+x0Gigz!Z8fm)eHulBGry_F8{64=UpPsTz5qc+&i?oX!P{lXS^>#^5G zmWyCnqq7Eipt%^TjTSC!~69GHlk64+VhfN%?GuJ2#516Q7 zu*0>xM+o>tSmkp?WO@{^T>pFuM}RBBo+4S@)z^Tub;NL9Enw}q9;rh({l|9uIyll- zN9$#Y6~f}^;#~y%fwF7yn}XE?i1$_KRk z1smK(?N=~+>ZkjgB6STY!VkoLenwGXbjO@a79zWbnb5Y6 z!)ye>De1-FkaK4eF*D@ADwuz5KeurbUGtNroiO>w@|a-MVmJUGpzO5V^FW(UmcU4_ zVQj~sK)W`8M~1Kos^?c{{eH^hMAEb47s9l1sgbmsK-N*mp?>0wjokHIS=!gvFJa%W zuR0aFkN}ogE?o+p94d3r1b71WA~`wdE<>Z0cgIifM(qgiy~AfWx>P>ybB#;Yd4^4w zh#n2;{f(bIS2qXP=nK&SLw>v6bo@p6+rF2|WzY={b|;3JcZc07O0MJ-YTqo+WK|jX zyA<;EuFFEF6g}x>nD^cNYx%0?{Z>*yVRnOM&YnKxCTt8bBsF6f3g7k)PVbSiobod8 zlr~Z!!n5d@$di60)mfhx2e`rpPtk11tOY(tmsZbhs8cB*px1B+NaXdQS_(Dyn@vK zq3a!kGYi^o;n=oq+nCt4Cw3;u#JXeij_qV(+qSKVZA^?W&-0x+Z=I@hes%TQd;jQN zyLWZ3wR&By%jhx@UV^5%JKH6u@GCN#?YYb|1xw={CB;FnCuYqZa}(6-+K7tb6B&#!SRLdy6h1 zw&Cj~+CZ~Q1#rTK4AY%2K++9~3w!J{f_ z@7))RKE1={_^VdCH`EfAkP)zB{EybzXOlJ{n zv&ZDW8_Lzh=`l6QmDILyvl?B{LjC$w- zOlNPHcqcIAXVrR{HJ__+1(7;V6jIBK6vJOWm=qB;g~e?KD~DM5lF=wl1wp7<(x>-J z9^HSX3r5theKZv|fcrzhecd5L3K=L>UzFTi*Y&VDk2?JsO#a$XZ}>NUwMNQc4AocA zzdam3;s6IbT5I9AkG(wc<51zY^kr^v=_WLWZG6V7UX{!R1L%&1LF(9pw#GdM%7z11 z%Fx|<9XFIqu(t(X1=(pqPszW!0#J|)W&%%<$^NQmSs8IjTd&VXAsNoY%P?#w99@mZ zQb?}hnPO^e9P|rjHu5;FN=`$nhQptF3bs%;_$b77;?uq8jcSz# zRGQ5W+zm`oBGm|Q{+1Sn6o(y4j}#S%D1=mSWUWz^@rJNC&u`)SB+_2AEp+>E9+Utm z3$+HIx5sfWI$Of8V;9!+nVxo$c{o6n-V5iYy14#)|9&v3j0=kZDXVKfpC?`52;)j^$~!T*v6eC zmg1hsUo3P$ii-$h?H^}2%}!!zY&&GRprJAI_Kc?yd;`ehMp*qPT#vm8e28oKfFuJd zSxbD~ptvezd1pb=mnWm{>SAnc4vOpD4yBYDIg}0~$+mA|$0zE2XA5FTnew@T)%8|-UFb;4ryLTw}OjH5?d_J{<-Q1~P_GO9L`Kf1@M zDttHV>I)2!tJMD&2vj%KUecjz`@`&%cm_GVV!ij*t>)*LEbhK*jWM;*dVpfM&7eO} z+)_8Oe2$|)16!x5M59olWRfUiqk-MS$Mqrc0hPp;80+F?AR8#nVE}M6%D*hG;rA(c zndFl+5Dzml_;S7ONC;~RyrQsl+Mje=%06On$SuL^Sl-}!4BFrgI@pUcM8K=hf^Iu) zCZ9BHoz1F70mLMP{GygMiFYYhFAOx@mqk-3g8y}qC+G^{{zA$afjqylL@}lD^s-L7 z_sx^ujD;r!A6ZQ#U;_lr54;rIa!dxj^?BL^$1Aq~W94%;)xs|1^*AnSS89({XB!#9 zdr|EKr^5`w0`sY~40rk3Ar32yu%CXt;t69`d*i8DN{}MISGze-*~+A-t0~|!O6HnL zJRdl|DA%Kx91f<_K}LC-&y1W7Mmnr4mjScsUUpU+VQ3}i)g8V zKd@_|6cXOr@d=In`sp26QSn=*eDmFBddmIP-P(h^+knv0FR!?`lNr-$;eb#W>)tyJ zMjwd(%watO_LYzp7STJjJQy(_3;7Aonp-USd>JRnkM$fkD1f_*ZPIWnZv8vp3uf~A z2S^#AEN#@vIUoVKdt{~svV76)k0$Vd*=jN&vpT7G_1s4w1&5cbvW96ebBjUZ)L6a_ zQfHpUDeR`koOZ~@jaA6UjG&G`4cTq2iY|qUUa#VA!>rym-wR6dp~g)Rv0JCiU^-^HfA6m&OZDAB%{EX|o!)x&DVs^ZK8%R(p_% za|l*~@&IfNb@;Fuv-S`1`h`gLfE4qQsY4pZWsCzF^KWl(3@(T;vPXVOb=dLBp=D>5 z=Td%iY-c;?NiuB9S$d5hjdPzl`<#hV!OlcO;Fx`$7x8C{nFzN4hw4}lMXH5_n|@4R z=K0vrxAZ3?e$_f+Ue1}Y=fQw$o3c9Qb@h{{QlK4Sl(@ry0vC2 z$*?&G4yC|8-Emy@BWN%!qaVuhI2g#LNBs+~5>@2HI=6QIunEy)nu6cG;+?8JQ_ceg zfB%U}?UZqZMve=ZTrlIMkdX(}B{w!9`P?(K@uuHocB~e)H<0%%LvtY&<=h6-(IImm z$rc!Q3Yd_FOVGQyQM!0m!76U)d$m#%Nt-^%-aE4`M`rm49~zkx?<&N|sN2EikV zi)L7MV{e>$aqRDaTPoxFGislH~PcF4HylpH_zEor8(Zj#X8|VGGR6O_n>C_#qf#R`6 zG|VOO_^%Eqv+;LI;vr}9!zeA@)h$~*UwzaC<1SoNHElj*TDUIFSz{Fiydz`WJ{o>t zNGTt;GAW{0to23#Vrqb%#GN&~*~9DelKAx4Yt$@7@2s=*DXFEQ;suwO1Djzq>^|ZS zp2KBF?AtZXW1+9!mVWHUBHfamGmSS?n{^d`=83f4{D#M(=a}t=cP7PXVs8NW)gCiL z_h?QbidI#hsN=VzjFdH+7qQ+e(o!;3y1f0BY}|zkDd#wOX&R7{c$Z!$MH8j?DMRAihxmR|d^vXRt}VEMXb0&^?Xf zu`|{`Q-{?vPLS8rdET;jLPw7&9O8gH^c1Z+b}st>oMxq6T51Q;DewfosWyP}G-PSj zhAn>1uQ>`laR82+3TF(njN~x8s&y0$C3$;dx4XW-ovH9#X)L2`XL3X%qusSXo*7Dn z%hg9yc-HPH?a&hxGDbt_nMsQv9zWQ3ksz@AD)fMxnky|-9=Bp(IBIv%%99G{SDSZ8 zAT{d1&SpN5F?wm7>Ng$lnT76b?)J5^BoooI)}N9n7Jymby*6Atexce^M7_!)iWhTN z<9VI2oWB0}JxoI*QSNdqW{x-e`6$oVVlWvny)NF8wo@P|Ha(c3ej;8g^e+6v$Tev< zz3hC8TkcVYaIWMcN;fZVeBE0F)t^h8tY2GWK%rjjXenZ(ABE`z+}afA%?AdKUFo&% z;!tE*Kmf*)S5L4y4$!65009Oy>WWkT^DzF38?8-BjH1n{$y$YB&-g)n=v^iQKNWGV zwXs?xcf2wd_Tk0>wy8lL2+(%ska&xNx5?ZlX!7oILnb`YqtcNG$0Zqqz__MvVwm90 zwF)%oYVN@f31Wdbg6|%tvG8k^tf>NXC# z%Q_I8^OX&u#!mB(M*>Kd^wjXu{zxj6{D=mU|h zdf9x>A{jsk9-LU}x)h7=k%;F(6n3vMkdzke0nI20?0kzb_Fm%_+5Ua%5F%_N=g{eT z4WRxPfd13e8*KLqHm!yNuP`h*UeFoy6@}o&>UDF(jZQExLTv=jXp^SM6fg0_-BiLy z77}Tzhc?Nx5?)L;cK=~4htP@Cbba8Lgz;v%c=)7@8u)wX`xvO>4=h-|(DmM()d>hLg4`&)X#jZo zD*N9$Tn1lZ)t9fUNVsWK>*os`(BTPQOtb1(V<=5F+O0ap+No&}o~C<+^;*3a=z(gJHrr`lXT;(|MbrbeC|K%xys;#x^ z7guFsg0_nBLKwzxRKH# z+OvzoWX@cnWPfZ)&fonMyJ*QjS^8Xo6TLLF^)R!q->)QLkB1ctDNKh8y)_bqDhQ#a zq;9?_5G$~yh;h={!%#2W5WpGB4_tFBN@4;v8?Q&i^)2i-HNaDf%(J(f<}_ByLAE%`FfA| z!^8>PX83! z%2Hq@C|+B)D4#HnvXK%hiaM>C{pQphpX^&HrbjC9U6;bnm2*>eUziLzZB$!-Z&Rnl zF!)0l80}u$|7&zK;&b?+Wa@ zP|8-SI(4~Y!B|`|9WC1gn>dhSw$@Qk;Vb>|9dKK6_~EQX)BMU)c&{mxr9SI-0jn_@ z*jtry5ha_XSeL4BEah$DPoN$cqZ)t2|m^XIdSU)R{jYiwS|AIqFe zI01B3&xSapG!a88-bamhjCH#3_NVYBRm;E>Yx5*Pm*^4OeohyoE<}E^n3G`%DA2o32;ax zvKZ}6jP&u{F6f-aLJf`-T@ICQICcC>`f+Chx9g~PY4k@siO)le@_upRbUx|OB)@X$ zLkEv-Tv^17j9u6$r|FK^xNrBJwEHms1GI^IPohpmUrRJDrWf6$H8yLOFAsMjbAN>x zWlYlnPo>X}RA&DGU>L%2R4)fw)4IK>cRI{Mi* z(l5;Uda)xX^Pr)_rI_H!@Vr1~)Y+d~$)=O9b;&f(I!`Swl=239$4tS5GS*F3dC$={ zQXnkx8e)e?wx*RWl11~P{aiZH9Tc(%@@Md^LFEX;og)NGkNraFZYSUEgvIA^6i9N2yt1P7OOv8TZC)=4%u{{LBOK0;r!L+JW{B!~$(H>!e!#a5-WKMay?8_f zgsKD}5dSFc4;yYt9HF48@j?yvy@`}?Be1DRjq7C#$ZkqfD+8c5oOL*3Iyl7z-0MYJ(=Dy$|S$?)?fNQ3pB=o{xiraKRUMw{S9iX>?q z{=yjShkz6z@#7z|pv=V#c{)d)=yJ^D|WPVIFR-{|XFl;m(?^n2l5Xfgk8CLY@Zn zwWjLxnbuk6{{Ep0vTqJ2ORMy*R##vd8yOJOHBRi=6o+%ScJH0ce)96Qve`~stxc~f zd45eth+}|LJQ1CbN?uL5$9o~PpMEz2^U~Ih@32Y*m^YSotP^Spmklj|Lw1DSt*s!q$7vo=4tf7rC{(71#+ZIh(0| z3kViB$X>~UPy=vLl*xpO7Bcr~AlfRXLr#F|kwtC1N(xh%%tM{aLd1>BVDw`6Y_*9% z?X%m6wcQs^(eM1*!bA{=;ovd#Fh!s_wZ5 z3s|@De=7_rGMHpao*_8Dxa6Ct`zXmZsul(l!eU|xtPJvn>)dR9?90@J(hjMRk7?OZ zJu53{L<2S*4?FnH{ARTUGdsh)HO1(66lk8}E;muAY5Jx(`8R26^E>v`NL~KLCZ?!} znoJN1^SJ7V)-5WgAHb#6a43D~#Aqpd7huZWtWMHIc?pqzFqk+IgpK0)>;&uaSc6%r z4*b{|v8AG`$nhyfp+zIF$OOY_GyVyK7opb7ItYM9`VPFzBbe5(Dqgp84)VARb>j07 z&l~Q*!aUgH9}*JzbG=4a`OE=IdQo z=N&-)jB!~y-M}yP%D`*gBP3k2@sH2}*84)4-G(o~mC)BWE0(3nSoEaHUhS_FyMw|M z_|=-`C{d#9PR-ftZ}3fo>7M}`))$AK9nv)VQKl@xNCik;BG2JdPg_aRHqc@M-p|@o z{;jyKMkKwSTSCr?@#pvNdTfz@f2`{=O2GguSZ|(CV`a#99cD;e@1oN@pD9YW)J=Q_4QiQ^MPos4e{~4_YsEtn<>HoZYkwpgJVpT zE=7Qf3l+Z;?bM8o^FH(S`LHegVMN7b9gbJ;CP0&jS-OQ`6ec~CYJR9pP6AgY0!6Nb>(t|yoI~~0{bd!sgg6qR##e-W*7QZELZ>cvzQs?S9d?c>}o#4>e)GPjbAj>S1WfLAxmc6>5mNO>Oi=8N=s`l_%f zBj$-@z|Y4+yBK?%iuEE0K^euYMkqKQYaL;DkfIXjb8!D_xsJ`|Zpy@ccn}KU-M2@k zt#fA)<|v3E01=5=+Bfbl)~J}62omi47ZIwvVf58$4p#KROpgk(^#TlpIANplqk?Ij zSi~9NZ}=`s!u={jwe3!BHLrNE)D!y%?Pn#ZS2A>vBuqLB+GpfL0is0+va_}mYPImq zj442)Oq2o&UWTtIYUAcWCqn@z`Md%GtVvOd56~UEy@iHrVW%M0k1!W7!`!s1$EP4x z24F6K6YdPW@?C2`%?d9?$z}JOPLLo_uGdYKE>~F%fU)hhT!K6S&SGjY&ic%-fkhgir%v7_hV3XA#?x1dzecR{&{RWjJM#Vcawh%fWc^D+%VV)O|7`~ zs>Q+G08|bS+qFx4agqa$t)BK>6PL!}w6G{F!tD&oL`!-z+U&c*mA={^HewNZr)<&K z!l$%MHcpL_!|5XFmdUG(@)7`6TzK1po8+dC%q;^(4q-i;NPsrNhk^G)x96|*%Xmp# z{yEaWpF1@Rpm1FIw{Ict#6ITKxut4Fd)$2GD^xw@Ux&Tc=e>ZrPT&YqmLh@JBx=DL zj1sRSpEDxs-*p5hLbYRNBSm7>&xxu9>1?2*=W;U_V)4HB=b_;1g!e=0(@EGJ5vz9? zWwpd;ODQ{FqenB+bAn~77#?hoo$jpMySrDIbWh;krRfOtji->2{j02q_(Q0;h%!SY zruS#_GjfDH+%~|Gk9afg>vQ4fx4(E`RUSb6B~{G`^kB!vddm%oFRxcsmxUZ08&5;v z(lZrW_2WF6NWc)xoR@(y(v|n`!JQc%V8nWNY+J2PWyk%5TIbNu+Im1`L&3msOk+%? z$9z^FCna^Ri)5%bUbrC6jyany3btu9d_Lr1QMo)Z=E7yLNnXsWGk$Zb(4KrPq{lVw z6m(P{AKF^!j|i1L3H783TiGY|x|s))Nl$|3cHZ~l)47V5SrdeM_2N~VJoBP1PcE%$tBc^jS4Z77_u1WIc{nNaHR?Zzd+jsEsH+&U8{D%Su5qlH2M>mnaNb zSDWQHo<~L>;ph&vnDbFz=yg@*qtbk8YF5Ok9jr}R+W+kmR8PML*@Tm3NphRaPUZu) z6^LFzp(11aGzOrfHC0dX&Y}xe)>QoQGvZM{aqVu!2A5$w@5`=Z`|yLAo|5gQG|K44 z?oS5DxBqPD8gG2qw=UXhh}web^S$BJ3_!O6wir3pVJD~gKJTpI9lP{XFfFc5mdLK< z{B%n!1aUO+wb1t;Xvw{urh`PVhIrJWM{AL(YP+)N!ql^{-13AED(KD@xNc_r2Xhd) z61~EZ_S&|D?X2UstI;JsLxaH3Wk7~VMsDp8|brf?13aoWzbW%TOsM>21DO zD^LSQ@oB`UGl#l;!x&V!G;HWt089mc$_0=((~DI-CBS}Qaq2* z`EjmbnCLw1Y9toUqEL2|*rzTGYds(5J)gw?M}$*ig(J)V&TeYhccg>%~~ za1fB(L_80Y|5vnrjE2JRbrzJaC#{QTf|4n%Od?q2uwfPq^b-wMxaHLpoUxLZwD%{Q z6|AOkW3Pp?E?=Pg_V_QOX_8i@Cg3=P8*XF&Q-jj4W3E`bF_)8BD$$$%P`Q-r)`2sS zd^vo?#f)FSANOW`0nx$q9jpeiVf)=dnJ|5}_>afvVLNCoJ+JOD1&0T}1@LFQ<+QD+ z6sBd1^^8EIaQ^(8T5+Z*b_Vw~^|(#@t#l3p-sIp@#nN&-ia7a#zM@Y>;HO;~oS@$V zsWhkjujBBuTs!Aafn)OO$RhrCs5{$c>KKzXaVA#^+c&W&D|r%bjq?EQx|r0aV!ZY!-3wM1*x?N($e>J3kLzyEAZb4Lb=_dWs+1 zvSh0tkTdt=Fo3hqUL?iT*!>^W0VNpr_3Z?`k;0)3&@fHq%`PDwG_m@TMs;F*1pD)# z$A;TbZEMXNnj_+!H{pp^0`3~oZCDK0HUFeI)1_c24&Q#0UhFhZ%+ox-C7tls3nLb) zXc^!KIAjISR;d{}Tkr%|?v&i-qcU8oKD0VUOV1xTFaaRpH^C$&BrwAwq#I=~)V!>m z3qFw3V}I1dMM{;hhfmlr+Dy#QtAV*~Z9zNe0B>J+?@k@F~eCv*Fn0qlQAei zU`y=8a$UngL6ai?=7uw-h##$OIT^@*Gr~4`Kpx?S-JGVU-zQccR zI8<6HNCU)(T#kEhA^65~V|f}rA{uyXgmd2nWqWCLXc*UY8Bd8lCOqlfvJTFnsnN}6 zEkw9O4jffOGwhSz%QC1dnZ{(J>SOg-JaAxl1}E3x6CEu-)RM34^@l@f7Ir{Fu*6S! zs98$(t1^-%vQ3zQfb$o4;IXJ8cge^J>`#s7^a=7m=N7gP!p`yVhP zZ27<7?{NPAg4f@PYYoletBMh03=3y z@uVJtD%yV;YV=;T6@hUETnQ{iDQ9NP88?X!$Oj zF{c9@yRQi@;01}FN8kHg2xq?f_`5&vCQu0aw9LrAKEn3j;9Z&lH6Y|SOQCw`1YMWC z^(+qpCm-8iPXk|{yGHUUMt-~#UmwDNTez=h=dUYL^wrR7lN&H%#SX&3b&0Q0tj7uF zHDT@X&ZesT?f%+xORJ@j66kci2aG_h06qAsCzR_W~uG(8}lSO}Fm7GOjT^m`qp z^nK-(2d?#h<&XqJNSN6U6{m#qNKF%T*3n3m<@yPgp8XW0uh_Anql#n(dnIMFJ>jb< zQcP5yj3kM0%MVO2T`Qv09^n28-d$*@l-8_KHg2}{D_jD=6XvJF#;Y-?ZqB;Kp(M)J zI3oE(|Mh!wrKD9Z(v?Y26@|=@fFtM&Iv;3yqi-CWQOMR>q3GG+1FD*zX<#wd1mN;U z;`-0o_L7vr2)Wr?*F_uF^&2L@j!frLdj!r0qVa{mIek#NRVAKI$nwx9y?J^nNS|WR zn!URPmk0xHIu}64ql_CPF*tODkTh>7D+p!EG3WxL(~xFwk9*&^(&EgDi<-tU%q{{T z>p+pD@D)#QV!48gyowIx*zD>LOSgyu2e6q^G*mK4lm1^wD zby@*Gs@LiD8RwbxTN`7Lf+hUiyg-;8_u4;E6Jup{<#d|*C47ZMtc~1MXddp1a(x4M zt~|Qpbha9_+obDyL@s`;*^yvied$n|=V#>c(~cY>BsjpW?t&;ZS7vC^sYjo8B9w7C zzUO=Mv=hCIF|>@aW6%m^!OKO$g@pK}ObP&Q-+DRnj}h%41lZmu!6=uQHCtQaLObt_ zJ#@>=Sm(LOi`9zasU7{7!FzFJOj*iQi(uMM&>1!5WTmYz7=J9=K-ZHAw}V}aETy;$ z!p5*cGFU?v+AhsE@O!W@iwbJB8BHGv11vx z!lVxmFZdL&og6&Hq`P?d7owO2Rbv5=o?X44BhBM%u#;%!lOKymep(guXM=(lm8$8a zC#Ov0@{!0(-5cLQz+Pa}<s}IL}Hk=-}RwH5CIuy~wEc z=Z#CG8M#g}U1kYQA~cHHh{S8{ZB{jP;vAcfAVE2Am&e5(yN0B;e+l{}QbaOV6)JjX zZu^+ce@0~-WPn->)9jc{B&jqU72lMoiUGm?2lA--l3AT|zDZ#pA1t-)QTxbAo~YZY zj%OqeYA!>T$nnrNG7HqT&fx@L5grp!L6pKkNWi>?>n^g0r&?f}QeajLih}OsmZn)= z)k)^9&F3%DQDGG}!gh<%LWk3Z()-;6w=opxI$7~`0pi^ysK>T5PQtj>FqW!Ov!EU1I~gYsVhmUc?40LBQh+O1KvM?EJ_RIUx4cop3L^oIHk38nrQ5;#vJTHW~hJO`LVB{sN zZqDV(N!s3HnyyNUHrcq^H-U5uX~*g_M=!Wsowpf2ammW3P-qR3S=C7y_Or$UKF|RG zU$BKI_=`yXyewO|30(i`9ijVu12$&6W*F(wbRKp!Gr_tksd-7;W zIsX+mDKmnA53x3`D=JC9A*|NXihG!wTtTyhyz=ET5wl3o(8dC(@>zM*quT5oGuetz zEEi`ny|HpYn^HNSh&!I5xIPT-Y?WIf43l_BZ#uESZtXQKwKN7`VQ&&E5d|6 zxg>|S#mJpy(s78T%UrIm-UO?XdWCkex!|mv-nzpGbxP)T+}G6%nRoP%1t*0CVh#~^&`6#C^r{Kps~9}#xOu^6|w9FpBR!DpF&ygm2@FmH>KyyNRv$Wd{q0kY>W+W!R6_gle9cO=XkgX!@ z`Ct057L-!)B)@ocYk!bqXrUXK%{~xqCYuttDy{%-mY$z(9ns_I8uZ9dAmC<#mRxRF z@WaUS=;+Wnlw9m&1pA>ZMAaMBR;t#J_QJJyOR>sUFzu6%W%5fq*Z5Fj9YXVXbgR>%zC zOr|#6+VE#5u3fm*YfnL+SIV6J-(yF5k&H_nD#4kjx58;#nm_eYg*p*S1OUv4so;w5Z z;SAG|jn0E4Eu&-l%^lPfQ%uG5iBwBzkGyc#dxyq6ex%bO-ITw?U?6@CS;>lTEE6GF z>Rp9czgh`fD;O~p@F##Os%q&ZGLo@jURd&5Mhl@Y+r&pV`Dy8757S1}LiV$N41+w0 z%knf#*zbK;xMBc*hAW-(M3sGI%taEQzTY%^#^XaKOhOeei>J;XL(XwnMM7`?XE$&; z9-Vv9rETfb7B_KOPFE^l)>e14=wDPJ!P=fSK7}bSkwaJ%wW*+oF1kwcC%O{BP~tEg zw>WiXsbfcCuWk+2URRqeC=)0Y^k|F&uT#)^^lEdRX+#WFpbIRZ;%rk}zaIrq>#M7( zs^_6!Y%$^vv+%sPxL#YP8G;tpMHhfu$yKw9-x#vMub9PD)nBLoiMaQ)6*C=ImJ_YuN%5O0q>4Xp^1Tz~R(HG5RkM6clR5INy`J5Vb! zxw(Cm#2-M%9frl~%~yo}VhG(1B?pk8zp$^MRhsMh#+s~zR69FjM9>#{=XJUb@MR20 zZgx_7Krj~R@u36^V8iGk#^u1FAsYZqP7fI#;p;DTzj(za19QQ>6A80-4=Q)9Dh&98 zYGxmcYQe3gHt`iXinPg{D*BKNjxkLD^={moF@LmYA7bxK%1H-)HbMl|xlxNEsL!Gy z;NylWz_Zwq>7}Q76fTzN{er3D);0MMxB4ieTm}fu4MYOIf+p(hOK#y{iF@8K zy3cMbs8j?%p~aW1-gh2pqqHc&qU4N1!W6c{vw{&FX}qXPxn%g1ejgJ!FH7^k`uFJf zx*!VHlu5AA=JT2~nW$doSVYpY^!Ydh7==~JR0K&fdii!o7om{fH({7UBFpwfZ5e?4`~nUCG)%gw09uZi6DHQ9h#No0(m)V{sS1S$u;4EXaU3W=Ca7! zmYp}+`PDbH`Z3LHAADLB+(e3gyz@H*>upa%bzasg6eDBK%?doMOphY})V2v6@K`qv z`X*_9ZFAZT-ftj;#`ti;)p3JY+>8<*V3-Ac zfFCdD=%Vud7J>_bR`6Zc zCT14?O#%nJ(!=J2II7(bhfP6%lT+^Tr)fIH*T3B9hIBjfp-*C z^79kK_pu&ODP7p5Z0VS~J00AZj6xj$Md%s7MZ!zng1dNq;k5jpH{tXql=`6AzsKdH zTe~1elS^WyS8#1wWIo@5*3T^xamVq>0Ow3tBcl|oDW6RG9u7x8C8w3yGH-H`DR05` zG&Tv>EjsB}e*nx$jgjOc6?;0kF+^6){0kDBf(7_(8CW76_dbYB!i_e{4}H)T@`GwS zVaN}WQ{I*o2TvaTpfy)j+MA0MgU)CUoQNz>tnUwe}B$kej_GM za%UD&f5b)RmteO$Z1)q>4*b4yFbl0U_wX&~%Z%9(Xgbm>kZI@%W()kG@XhS2{DAbO z&J@7NN+y9mkK>I7tk4yY9I8ALNoj>OaX{u zh5ohY0s8A{aze6y;%M4)7fCoP@j4yz@mVp;YbMLF6fiyZG&b%Wm-b8dB4M~dkCiG1 zy5e-lSTp^WeMNAZ)Z+N)*bI^6l_--DYvR)Jw$YH$BDu0Z*u6L59ERlkT}6fLk>Jk^ zi@l<*%3kIK)WrK@1X2|PG+O<9kN~lOk#1&JO|b8QbGWCK7|Tk?SsQzbt<<5oVteBy zp<^5HEh-6U)vviI04+C87rfqi3^mXjwo#urA6Tm9+o=YWlUMx2F37DpBs|Fq>ZeLD zAC%H?U&5?Ux2sxvEq~_b{aO6s*d^QG^z8`GL7VJGbHQ2^A4Is!i$j{d0f2govQw&7 zGn}>byDknyRb8os_6u?m43fJkjRZVhrHMU_612!pRJToMva(fB*JaG;t8{mR9K;#r z?|Xx>F^f`WdOqo#Z&tv82iRBdtH1bhY245WUgpUBa8*3`OAw>RAN~)tqj#@IG9`u9 zNvn0zkY<#gEXYV*n3_8{5HPZ2(twb7QdwXj^vvv`?cd<&)}j6Loxi&~Xs7 zZN^KzO-YRIQ1|{{I1~DR!F<%)_{8)um6UwWw&2F)Nr-e8dxwL}I4|+X`&mBj!IgQ? zxAcu>PQaA%ld?BuP5AI6aUANaGIbSP=q8h;+mK_-H1Z zHbNs;i^4R1R#1stzNQwpv_@s?$nbqked(V=2%&7yIfNW0?$MAP?n$nlUPjd>JH6Rs zR8&tHjKb-rJ-Ba)X~Zs)2|^VH^L^rFA-VU30)TyC3)mhg^h43nBZ9vR2fop}6K{sI zIu9`bIwaJPLZQ|LN1(EkbO9rvE?RlTaMxHgrnce)ytYp_7DPd{0KV9hUJ&YG28q>Y zCpXQLnwj~qS$Bn~Z%%l(mWiRRLyXJWuw;7Lyg<1`b39@t_pb;5!rh-^H@8eje;^m5 z8BkCnm=$j?7^C@~i6axil*Cn2uYl~2Fd6q?&jL;$4J}Cd-*sN9f%Wuk(l#H=gVwAV zmdoT5zEhL-d?ED1!IRGoPjy(!Z6?z%*s0KaO>zTrHrYjFPD#Do<|7318q}a9st+Es z#tmtk7=7T?EAj`3c_Y4I4W|ifUpyDU;5C25iaV>?MrVBG4%WG`u3#C%=><2KwT03; zPkK?Mt7va!L3vZW=JIOAFy>-;yPNvZ9AQTWa5$p@Ia_JkSZ+H0H)*0nV=FNLiI3wE zgZf=zPqb?gbPzgcRL*?7CO=(gk79zuUk0~HQTuqRLzAC6I9`8a;_&@Y`gMAUgBqwG z5Y2j-K3V%G?`dVS$B80PHw3;Ze&|@{CcV5oOYaboae->(5J}8Wq^AIy6IeH7XDD+U zF99y5$(fEi_7w=Osv2}`{L&f#b9o`2s;}n+wslOrJ49b+z9}?32Et6=#=244vD4hyONso@v2uO~Gf#g8CyIZ6i>5%RY;ZDE5pZ9w2 z_WHbDXXnZDKL70OJnv^qhqfb2hiXye;+CQJSGWyyC<FM4r}QNbIzev{}c;?MeH(`pizE`kjO8@mf9Oel@l zZ583B6BooD4=?Z!zro|oH7k4xsoyo;9_Uy@7MlL^M?Gp1Mpd)B8~U z!%+aN9wgjYoIcewZ7tVcAFZG`<=HE3l9q$>j^ajTY7L9sGVs{BmTJ>>!jI&;{j1qinrfpF9U@yVK?v$pTBc zpAci}h1&$Vir^5c#=BB)v6qdgpN`An>@e1Y6||rHzO4F)%_uYDdeVO7ujDDq>^|sw zf0tSkPETbeQv;SiM-b|ijfu9#edSri)c;0+{p%s{BCuMZvw4qDdPDX}=qhr+0KC#*{#up1nn3d7`W($V52QyRC z(O_@{0g7VL8S_cP!*-OFJ0RwsV_jUUxVboWv>3LoSq@%9d1cookNQ+oic*(qfvEmk z=^IknjJoubT24l~cvk-j3SogDD%}bnY%YwQPS>T)awJ`WcM3^ml?K%8pm4y4sA?x4 zJhDuoGhv$@wH5;M(kdc{npNPw^Sp}9^UQR;jOnxR^>yX6$rn$U3=mr1DWT;5319N4yAcI-+)d+>Yk1N(6X1q++n(jYZS#-s~S_1LQaIBkuq;) zLFk&GF8UD%@#vIRU3f4K2t#<9oVEAX?Hk< zku13{eH1qf;=NyyyHURl;raSmyx6Js!Ys%fAVo@+Z-b)ayyVlQd_=~_u+K0;zj$p! z;i3)R6_=B0*a_4aNG?x02&~M;(V2aF`L3`dr9Sfzsmj}ATw5`;)C+4`(P)+Onylpn zVi%Se?_Nm0r;pnAxX4E?t`6))zh{ zyNo5w{a}>C)aNP0TJjj5VXc}_y%M(hvDE`u;xah%_KmPGuDVTe&2GtuzF^~ww4V-W zn&+eUEiXiX9$FP=yk16nNd#$=EclEr0q#LKSWCLY^Ovt#_N0nzv^i*&viA1ze3sO~ zNToV;Sy5QjG@;%CeS;T(g^gzU*y{_(Lzb$u{B=0!V%u`qBao6JwPRpl1vB(NS2MSw z2H)kDdDr4M6!^k;nK{0X<&GHcy*^uMRgFDqSk&6{=LPE^&)v>%XiJmCiDNs3nCv;JzeQZd zpRM6mO)$HF4$PJ{0o0|sm8ug;M|x~lTcu6OGIGLGWbUT^&IAAC0-ZJpbms{eSh%&I`gz*?~QFkNLIf+X}Obo{R?IIU; zD$||R^nzWHPx}7cTWFK&p*PT{^W04w&1Rw`>5w8>$4n;9ZH08>2o{C13=$hQFBrYJ zrCDrF>&9pnRmgyCf){h&yBHC35!a+nD{O-uD{M;^kC!#{Jmlm?Xus4Th>R+Ut{P9< z`LSaP=FH-+JnL6L3r%}zeQtGd_`1A(8)auTTJs0Ccq@L9rMzxUtIbCz3Tk1x{oBsE zC|{H)^RD9h#4Rf~zsxK#+CKWRy8O#fp1_WbR6?s>S3*K*axhKQL(}_nq8%dtBH=9c z`~i)ZS}3Z+&LR0;VNwye$tmkrIaRc%PCb-Ay;|;n&4)Rx zn|E=j{~jcH-U;@1iW#!5rM_hV;f}(XABg+Od!~^xg)?v|3Dx*bMGL3M`@0}pO{jT& z!Mi2QbUJ8Lh7v?9ZXtk+UAm$<&ux%yH1%MAySLWPli81`nVR$1uDj;kSkO(|(3Bc2 ze5D^ayw0^`5&GzSmF6vdcsd}>HLQr9v^-sM5FOtT1$eF}pA=Ci3da>jyWa>?Ij>Eg z4E}>#!8Tx++!@ws)(*;ozsEU=99~q4Dt=8n1#kf*NfKePazAIiY(Y+hOO% ztH-!(%BP(!W8d~}5`jIz=~aSdI_;L+!%)OGOV+ofSY(pA>G$gcTUk!;ZHryB z-_IpA%z~}yW(LJ)*ne%$OxaHubza{;xjlIw7gqp3#yFsVw;xsJ3ZNzQlxX_}%~Q!M zS%2)bb$^MDsv+GSB%kC0s>t+tVUU}BBJ6Y;(@}FoXwjVV3N21FrTFd(HxO;P@9gQ- zce2Nj=QFIB&vP|d24 zBazi5$5klzknFdP%{&{oMInoGPn!Pz@p#`Gb>Vo{Qw}oYkcUstQgW{xJ`cYcyhED9Rc^(uS*>-Rz>4U(Xt;>4utTo5hgj*&7I3 z4Zk@ttAFb=L^4hxYO~gp=cMNS%@Ivq|9$!|#BQp4u_3}oWt`qK;G0EdulP%_@~N8Gwsot)ujz17}fOJ6ui z6>L5iT_1dNUTxgNlf7}W5?iAA1XdEWbKsy#&9!!u8b^!M#WiSZlfFxa7e-5Da!$u- z@pyy~6%*hd4JzY#-|Bx&{^7mE%ZT?%Ebl-%;k_1SXw%b@G&LD{nNbXk<=aD78lucw zFnw($G+H8u>4QET9wq}@ohL|8zk{Du36y<+m#^fL7(ScaeiG*DBV#EN7!=REj!d>CFLfN+3@2e#bn zXf+cxwI6l@qo_M~z=(zEFFIw7Y~|W?>TN>~2Tm+_bUg1Q@R~CGsKVmzlU=X#x&{W& zFHc*dRd?8=X>G7#N`eM$YH*KMiUb%D*xHlhH+rYOM+HdED|XlkKS}hB>;$v|K5C5P zms7CLs$R9Lp_bu6T4lHAg9;cNOa+|k({MT?4eRL*KcmRW2*=7RwA;t8$UUV(05<6> zML=Uf$oujbt`lPA6E_17JL^9}f%MDyW>g5x1Ayio`jsq-X0^4xpInP82L1rw#Qb-8O0KVOU;y6SC z?`_lK74>BL>=i6;v+RhC7eYFYY^9C^mCw4yg(gM@Za)aDi)h`Dla19G_~T;JFl+^j zZ70X)txJQ??0%oZMiQYu$&S2tjur0DN(zXHTP^}B3FBH0=C&%3%3~iN=+?h%jKt++ zD4DF{T1er(5SDGHq;8c4r*C@2%BKQ;*^2!fRr6vckPA0BD&6)JZ#UUfL4l z)aGw?6}|KZp^c0Rc&Mq^Pn!E}bu*&jy%Z9jn7Rj=;WOl;)*~O8(K6_n5bedcGwolh zw4@QI)CY(#V#&k8c(Baa-th>l2Js3^N%d?$C+fxUWY(`HpnGctrbpB8(4-w_i+u8q z_I4PdZ~#xHFPDlJ8TBd`iHqMRM=DyaoeNor!BhqT6B;DDm2`3XoG8DY7bE7BQZ-1C zL-U2ORRU80EmaAItX`fvZJc@PG5#$>J<_B`BgP{@RFL^maw|qpnveN-nP2UW_&RG? z6=~dntehjwvu)H7{4^zo?{Y=wIb}mL6^tt?(;!`m-zA-cNo$xUiiOOk^PUREg9BYy zF#oVGXjNF>Vd5(6w`QCEtnFT;5d))Yqq@5G6w489DNkHn3 z-E&SMODjm;>hf!n;tRCmI1*2?tL!)IzgB&KR6wE5}r8g!l( zrIcG#Ve#QSFlFu)?zi&7N0mj`iyBk9d_<|S$ThyUw*k{6t@8wYL@lCnnGJ)pt}}xZ z&}rVV*Nu@m33-9(X4FYzgA=M+r}E0?P06a8&(37yj56#h;{-I=ez4h*F-PxcVz`8I zeII2N&xYbef59xZ*?mp_eS%es`iu4_a;sLg3c|ye@Ya_L4*FUmX2~2>8^}TpLP{1p z)XZCklcK6BY${iRO56#VlrA&r&?+x1!~D@i-vx)1#_uYta^dL`E^b9cX3}CVXuc*n zV77TjqYLUe0uCm47kRUWH<#l!Ax{@h$Zvm5eWH>cB;C+Cbd!Nyp3WN@?J@bGPSY@t zFqe}yLBY)4wX!A4eR{IP9xcz#a8isiqOdeOFRgHvoNye8Z-kB5(a$3Ti{0mu_Juqt z=zusK~Uv`pyLq7K|?(YvhqqFMVY+Vi}?!yj4e~^AMx)iU*ieysxvg8=h zTh=a)K;tX*AbvEem2gGfSFfaYitSqEe93_CG72_dnB};n5Zn1W%OUH*ne^eYNGfQV z?Yw7Z1oJ_0+0(}8i2Iz-XCixdo^`IfD3r-$o8x&U}-sGx>(KKgyQUh(T5JN^idl;&1&qi`-Th`I@MqZ zb~8CRlag>$wbjHKw5!wK-2NIb5!&#EVe#Cq$z4P@l<#19hsbMHjgK6O)Bi|j|9fm8 z4os%fbeax$WpU`=q_U+KAoNo~xbuhXBL{bxUO?cZ-R=DT)NLZF5AN|-M$zL{`$b17 zDqlL8u7y5Q{!>tH3oRNp-z~cn5fEBqN;wDar|U{UuTy`!x?Y{gD5aDmi|osNvEZbav_1i-gaji#yR>Wi}hXs)N`} z`(#+dpsiWOk1w0lem36CoOJWhk7Zo+WEuNQ zBQfKcy))_wCAnzea6}u836`t~BOfQLlSfDz!^?K|^sc~&V0~$;cL?4y_fMatNXGbw zQ(|*-1GWRvUYi^(uRZS0i?oA{@vYF0@48(PS_(x$EsO+Y8CfiH4#QR5qq5Y6rcxg~ z347$W+>G{Gg?Q6S2iiX9>n*IiPfaSKWP~ z*`;ccx1>^cGG2D1pnU+&kI<5svQ7fmJRNK;X|726a2Vm~PPua~-k1_CzALz6dX)FW zbEj(=v_NEa;oAt!gr659w2ojuuC4C#FExrM?zTUOYh1nh?Ns0_PJRS5w)+djFF_@~ zea$)!ALgW#9PfR64_#6a&_e>rZ5{Opw?x}%MS~XNU+;;5UHE}J(2}yBCO${7wUUgG zOtvP#*j?dvg)x+_`wNZC!jj6v#Acu zlPxsNxdv`b*`p3ET{qPR-Nhv+Lu(vGX)<3u24yAm$0t*s8hKI|=Ld$Spvhm`p&OHU zpS5uQ%35)&gUJ@^rx7*1$Z?vYvtd|?TtJc>MoaBXgCSzGJ{)5w`CCbkVDiQBL^^pB zy&DPd>Twb?6jc|`*0ul@2qHiEC}q+?BFnTrW&%z=#Q#tcqs1Z}aIBlPkZ|(y`}deF z_Bkb1L&jIbQ(G8jzpLq=!J;QBMTdlj+=ptLg*aWrY&|3k_J0yG?EeDUNO+J>?a;$V zrV?Rw2^!gOpf6+Hi|845eL#1f@vHd(s2ZNYsdd&>0>R$~cUr(h*n0}!mP~7#8!%td z+oO_RcNK3h{@(_sG+{@)d9v^w+RDj`?g~{4oxN!0=6WCzt>SXfP0aT$nMX^Y_Csw^IboLJ?vMQEy?q(^wPLbGT z+nwoeJ&1i;u(5?HFa8$q*>nS#G=vG>8wCSHWGP7Yo4j73lIVUEw@E~nXRr^-l0+u0 zFE~gX-UdD@ec9KL*F`axe)HKh4rteko^KSQQAgHn|Cr0Pfh{z}XJrHHr7crb_yS9eKK*M;Ip9X@S$Yms$r; z0{+_x)j}#sbwGYZNsrx@lmvIuH{&Le44N!2KXKm?P1xKkx4wCT9Hi4R_V02ONqdLx zUV!Vxps48B6rt_@erLCI)$+Y-LKA7j&N%YJs1iC_+a?rmK{8QMjwrwTWG?%)Vngb4 zuGR;s2k9V&pe`cA(jX!55!dW62M3SmK!)rGizdvUt@X3Dpg`5!g_^Hqrf(H+VRJ9o zu6HY3kw$NJCmAOUCkvh-ARO0FoPHU6npW6UjO~5p_kEzfvbU`)#kogG{k2t)zHSyt z3-QV7n}RgO+Z~3a^PR10(!B>9KVE}=*|QVxirc^6E&ypd+0K0)&Hxhu0% zvQn^XXKq~~HJ}%54dLcYb5dD(DmNxFC-PoV0`sH1KwMcUE7>5rjxm6MY9@)LhT_K5 z0nOC6vO!8;gT9B1#g?+#`Q#m(`5+-llOh%MSj?5nZ~4yMfb-=bPnZ!|QL#93JClYH zliScB(rFt5E)Jz{K=g@i{epjIAvOKAf9OPZ1OJ00m_JaA@@H0a zME)-PP06BU>fr>t5W@I>Rw@ynkI@1* z0K>P2^B*Hn0Nm*Lzaeh&@L4nAxC~JP0F3`=CkH|ZXF2|A|H1vgxoSxNYXcJ_01*Ee z+W(VH&4wKzEquXo6ac`pfBXlxK~`tk|N8%fB-b1TB2pJbWsddl{GYgQ`YzicC;-5k z@c-Vu`z`>)cYx~u#MSJOmhMFa08*a9!Tu-i`T*p3oCxAR^54m-&C@&~9D-v8_%DMK Bz*GPL From 37e4fefe5fb326c0dd3221930edecb9fa72e5bee Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Wed, 18 Oct 2023 09:58:11 -0400 Subject: [PATCH 26/41] Emit primitiveRestartEnable disabled warning only for strip topology. - Move check and warning to MVKRenderingCommandEncoderState. - Pass primitiveRestartEnable to MVKRenderingCommandEncoderState. - Warn only if primitiveRestartEnable disabled and strip topology is used. --- MoltenVK/MoltenVK/Commands/MVKCmdRendering.h | 2 ++ MoltenVK/MoltenVK/Commands/MVKCmdRendering.mm | 14 ++++---------- .../MoltenVK/Commands/MVKCommandEncoderState.h | 3 +++ .../MoltenVK/Commands/MVKCommandEncoderState.mm | 16 ++++++++++++++++ MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h | 1 + MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm | 9 ++------- 6 files changed, 28 insertions(+), 17 deletions(-) diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdRendering.h b/MoltenVK/MoltenVK/Commands/MVKCmdRendering.h index fc67422e..7f1df4b4 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdRendering.h +++ b/MoltenVK/MoltenVK/Commands/MVKCmdRendering.h @@ -584,6 +584,8 @@ public: protected: MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; + + VkBool32 _primitiveRestartEnable; }; diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdRendering.mm b/MoltenVK/MoltenVK/Commands/MVKCmdRendering.mm index 1aa4aa5c..c4bb7548 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdRendering.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCmdRendering.mm @@ -524,19 +524,13 @@ void MVKCmdSetPrimitiveTopology::encode(MVKCommandEncoder* cmdEncoder) { VkResult MVKCmdSetPrimitiveRestartEnable::setContent(MVKCommandBuffer* cmdBuff, VkBool32 primitiveRestartEnable) { - // Validate - // In Metal, primitive restart cannot be disabled. - // Just issue warning here, as it is very likely the app is not actually expecting - // to use primitive restart at all, and is just setting this as a "just-in-case", - // and forcing an error here would be unexpected to the app (including CTS). - if ( !primitiveRestartEnable ) { - reportWarning(VK_ERROR_FEATURE_NOT_PRESENT, "Metal does not support disabling primitive restart."); - } - + _primitiveRestartEnable = primitiveRestartEnable; return VK_SUCCESS; } -void MVKCmdSetPrimitiveRestartEnable::encode(MVKCommandEncoder* cmdEncoder) {} +void MVKCmdSetPrimitiveRestartEnable::encode(MVKCommandEncoder* cmdEncoder) { + cmdEncoder->_renderingState.setPrimitiveRestartEnable(_primitiveRestartEnable, true); +} #pragma mark - diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h index c518c54a..82ea4eab 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h +++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h @@ -312,6 +312,8 @@ public: void setViewports(const MVKArrayRef viewports, uint32_t firstViewport, bool isDynamic); void setScissors(const MVKArrayRef scissors, uint32_t firstScissor, bool isDynamic); + void setPrimitiveRestartEnable(VkBool32 primitiveRestartEnable, bool isDynamic); + void setRasterizerDiscardEnable(VkBool32 rasterizerDiscardEnable, bool isDynamic); void beginMetalRenderPass() override; @@ -345,6 +347,7 @@ protected: MVKRenderStateFlags _dirtyStates; MVKRenderStateFlags _modifiedStates; bool _mtlDepthBiasEnable[StateScope::Count] = {}; + bool _mtlPrimitiveRestartEnable[StateScope::Count] = {}; bool _mtlRasterizerDiscardEnable[StateScope::Count] = {}; bool _cullBothFaces[StateScope::Count] = {}; }; diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm index 35f0b6e8..9e17aa99 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm @@ -436,6 +436,11 @@ void MVKRenderingCommandEncoderState::setScissors(const MVKArrayRef sc setContent(Scissors); } +void MVKRenderingCommandEncoderState::setPrimitiveRestartEnable(VkBool32 primitiveRestartEnable, bool isDynamic) { + bool mtlPrimitiveRestartEnable = static_cast(primitiveRestartEnable); + setContent(PrimitiveRestartEnable); +} + void MVKRenderingCommandEncoderState::setRasterizerDiscardEnable(VkBool32 rasterizerDiscardEnable, bool isDynamic) { bool mtlRasterizerDiscardEnable = static_cast(rasterizerDiscardEnable); setContent(RasterizerDiscardEnable); @@ -473,6 +478,17 @@ void MVKRenderingCommandEncoderState::encodeImpl(uint32_t stage) { [rendEnc setStencilFrontReferenceValue: sr.frontFaceValue backReferenceValue: sr.backFaceValue]; } + // Validate + // In Metal, primitive restart cannot be disabled. + // Just issue warning here, as it is very likely the app is not actually expecting + // to use primitive restart at all, and is just setting this as a "just-in-case", + // and forcing an error here would be unexpected to the app (including CTS). + auto mtlPrimType = getPrimitiveType(); + if (isDirty(PrimitiveRestartEnable) && !getContent(PrimitiveRestartEnable) && + (mtlPrimType == MTLPrimitiveTypeTriangleStrip || mtlPrimType == MTLPrimitiveTypeLineStrip)) { + reportWarning(VK_ERROR_FEATURE_NOT_PRESENT, "Metal does not support disabling primitive restart."); + } + if (isDirty(Viewports)) { auto& mtlViewports = getContent(Viewports); if (_cmdEncoder->_pDeviceFeatures->multiViewport) { diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h index 3bb08735..062b646b 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h @@ -427,6 +427,7 @@ protected: uint32_t _tessCtlPatchOutputBufferIndex = 0; uint32_t _tessCtlLevelBufferIndex = 0; + bool _primitiveRestartEnable = true; bool _hasRasterInfo = false; bool _needsVertexSwizzleBuffer = false; bool _needsVertexBufferSizeBuffer = false; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm index 49399b06..e6e05fa5 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm @@ -295,6 +295,7 @@ void MVKGraphicsPipeline::encode(MVKCommandEncoder* cmdEncoder, uint32_t stage) // Rasterization cmdEncoder->_renderingState.setPrimitiveTopology(_vkPrimitiveTopology, false); + cmdEncoder->_renderingState.setPrimitiveRestartEnable(_primitiveRestartEnable, false); cmdEncoder->_renderingState.setBlendConstants(_blendConstants, false); cmdEncoder->_renderingState.setStencilReferenceValues(_depthStencilInfo); cmdEncoder->_renderingState.setViewports(_viewports.contents(), 0, false); @@ -507,13 +508,7 @@ MVKGraphicsPipeline::MVKGraphicsPipeline(MVKDevice* device, ? pCreateInfo->pInputAssemblyState->topology : VK_PRIMITIVE_TOPOLOGY_POINT_LIST); - // In Metal, primitive restart cannot be disabled. - // Just issue warning here, as it is very likely the app is not actually expecting - // to use primitive restart at all, and is just setting this as a "just-in-case", - // and forcing an error here would be unexpected to the app (including CTS). - if (pCreateInfo->pInputAssemblyState && !pCreateInfo->pInputAssemblyState->primitiveRestartEnable) { - reportWarning(VK_ERROR_FEATURE_NOT_PRESENT, "vkCreateGraphicsPipeline(): Metal does not support disabling primitive restart."); - } + _primitiveRestartEnable = pCreateInfo->pInputAssemblyState ? pCreateInfo->pInputAssemblyState->primitiveRestartEnable : true; // Rasterization _hasRasterInfo = mvkSetOrClear(&_rasterInfo, pCreateInfo->pRasterizationState); From e693a0a2be23a5b59804137986d34a458a0fe298 Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Mon, 23 Oct 2023 20:04:04 -0400 Subject: [PATCH 27/41] Reduce disk space consumed after running fetchDependencies. - Add --keep-cache option to control whether or not to retain the External/build/Intermediates directory (default not retained). - Export KEEP_CACHE & SKIP_PACKAGING to be available within scripts used by ExternalDependencies Xcode builds. - Move BLD_SPECIFIED to build() instead of build_impl() to avoid updating it from background thread (which will fail). - Update MoltenVK version to 1.2.7 (unrelated). - Add CompilerMSL::Options::replace_recursive_inputs to pipeline cache (unrelated). - Update GitHub CI to Xcode 15.0. - Update Whats_new.md document. --- .github/workflows/CI.yml | 4 ++-- Docs/Whats_New.md | 9 +++++++++ MoltenVK/MoltenVK/API/mvk_config.h | 2 +- MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm | 3 ++- README.md | 4 ++-- Scripts/create_ext_lib_xcframeworks.sh | 2 ++ Scripts/package_ext_libs_finish.sh | 8 ++++++++ fetchDependencies | 18 ++++++++++++++++-- 8 files changed, 42 insertions(+), 8 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index aa78ff8b..3ae3d142 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -10,12 +10,12 @@ on: # A workflow run is made up of one or more jobs that can run sequentially or in parallel # See the following, which includes links to supported macOS versions, including supported Xcode versions -# https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners#supported-runners-and-hardware-resources +# https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners/about-github-hosted-runners#supported-runners-and-hardware-resources jobs: build: strategy: matrix: - xcode: [ "14.3.1" ] + xcode: [ "15.0" ] platform: [ "all", "macos", "ios" ] os: [ "macos-13" ] upload_artifacts: [ true ] diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md index 2308d5d6..ff09d596 100644 --- a/Docs/Whats_New.md +++ b/Docs/Whats_New.md @@ -13,6 +13,15 @@ Copyright (c) 2015-2023 [The Brenwill Workshop Ltd.](http://www.brenwill.com) +MoltenVK 1.2.7 +-------------- + +Released TBD + +- Reduce disk space consumed after running `fetchDependencies` script by removing intermediate file caches. + + + MoltenVK 1.2.6 -------------- diff --git a/MoltenVK/MoltenVK/API/mvk_config.h b/MoltenVK/MoltenVK/API/mvk_config.h index 21041efc..077a4a1f 100644 --- a/MoltenVK/MoltenVK/API/mvk_config.h +++ b/MoltenVK/MoltenVK/API/mvk_config.h @@ -45,7 +45,7 @@ extern "C" { */ #define MVK_VERSION_MAJOR 1 #define MVK_VERSION_MINOR 2 -#define MVK_VERSION_PATCH 6 +#define MVK_VERSION_PATCH 7 #define MVK_MAKE_VERSION(major, minor, patch) (((major) * 10000) + ((minor) * 100) + (patch)) #define MVK_VERSION MVK_MAKE_VERSION(MVK_VERSION_MAJOR, MVK_VERSION_MINOR, MVK_VERSION_PATCH) diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm index e6e05fa5..7cb4a3ce 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm @@ -2506,7 +2506,8 @@ namespace SPIRV_CROSS_NAMESPACE { opt.force_sample_rate_shading, opt.manual_helper_invocation_updates, opt.check_discarded_frag_stores, - opt.sample_dref_lod_array_as_grad); + opt.sample_dref_lod_array_as_grad, + opt.replace_recursive_inputs); } template diff --git a/README.md b/README.md index e44e34a6..5c205b08 100644 --- a/README.md +++ b/README.md @@ -165,8 +165,8 @@ platforms and simulators. The `--visionos` and `--visionossim` selections must b with a separate invocation of `fetchDependencies`, because those selections require Xcode 15+, and will cause a multi-platform build on older versions of Xcode to abort. -Running `fetchDependencies` repeatedly with different platforms will accumulate -targets in the `XCFramework`. +Running `fetchDependencies` repeatedly with different platforms will accumulate targets +in the `XCFramework`, if the `--keep-cache` option is used on each invocation. For more information about the external open-source libraries used by **MoltenVK**, see the [`ExternalRevisions/README.md`](ExternalRevisions/README.md) document. diff --git a/Scripts/create_ext_lib_xcframeworks.sh b/Scripts/create_ext_lib_xcframeworks.sh index 33cd977d..9cacc5f0 100755 --- a/Scripts/create_ext_lib_xcframeworks.sh +++ b/Scripts/create_ext_lib_xcframeworks.sh @@ -1,5 +1,7 @@ #!/bin/bash +if [ "${SKIP_PACKAGING}" = "Y" ]; then exit 0; fi + . "${PROJECT_DIR}/Scripts/create_xcframework_func.sh" export MVK_EXT_DIR="${PROJECT_DIR}/External" diff --git a/Scripts/package_ext_libs_finish.sh b/Scripts/package_ext_libs_finish.sh index d44a2d30..ec4d4753 100755 --- a/Scripts/package_ext_libs_finish.sh +++ b/Scripts/package_ext_libs_finish.sh @@ -1,5 +1,7 @@ #!/bin/bash +if [ "${SKIP_PACKAGING}" = "Y" ]; then exit 0; fi + set -e export MVK_EXT_LIB_DST_PATH="${PROJECT_DIR}/External/build/" @@ -7,6 +9,12 @@ export MVK_EXT_LIB_DST_PATH="${PROJECT_DIR}/External/build/" # Assign symlink to Latest ln -sfn "${CONFIGURATION}" "${MVK_EXT_LIB_DST_PATH}/Latest" +# Remove the large Intermediates directory if no longer needed +if [ "${KEEP_CACHE}" != "Y" ]; then + echo Removing Intermediates library at "${MVK_EXT_LIB_DST_PATH}/Intermediates" + rm -rf "${MVK_EXT_LIB_DST_PATH}/Intermediates" +fi + # Clean MoltenVK to ensure the next MoltenVK build will use the latest external library versions. make --quiet clean diff --git a/fetchDependencies b/fetchDependencies index f03f36c8..8b80bf90 100755 --- a/fetchDependencies +++ b/fetchDependencies @@ -67,6 +67,12 @@ # --no-parallel-build # Build the external libraries serially instead of in parallel. This is the default. # +# --keep-cache +# Do not remove the External/build/Intermediates cache directory after building. +# Removing the Intermediates directory returns significant disk space after the +# build, and is the default behaviour. Use this option if you intend to run this +# script repeatedly to incrementally build one platform at a time. +# # --glslang-root path # "path" specifies a directory path to a KhronosGroup/glslang repository. # This repository does need to be built and the build directory must be in the @@ -117,6 +123,7 @@ V_HEADERS_ROOT="" SPIRV_CROSS_ROOT="" GLSLANG_ROOT="" BLD_SPV_TLS="" +export KEEP_CACHE="" while (( "$#" )); do case "$1" in @@ -191,6 +198,10 @@ while (( "$#" )); do XC_USE_BCKGND="" shift 1 ;; + --keep-cache) + KEEP_CACHE="Y" + shift 1 + ;; -v) XC_BUILD_VERBOSITY="" shift 1 @@ -410,7 +421,6 @@ function execute_xcodebuild_command () { # 2 - Platform # 3 - Destination (Optional. Defaults to same as platform) function build_impl() { - BLD_SPECIFIED="Y" XC_OS=${1} XC_PLTFM=${2} if [ "${3}" != "" ]; then @@ -442,7 +452,9 @@ function build_impl() { # Select whether or not to run the build in parallel. # 1 - OS # 2 - platform +# 3 - Destination (Optional. Defaults to same as platform) function build() { + BLD_SPECIFIED="Y" if [ "$XC_USE_BCKGND" != "" ]; then build_impl "${1}" "${2}" "${3}" & else @@ -453,6 +465,7 @@ function build() { EXT_DEPS=ExternalDependencies XC_PROJ="${EXT_DEPS}.xcodeproj" XC_DD_PATH="${EXT_DIR}/build" +export SKIP_PACKAGING="Y" # Determine if xcpretty is present XCPRETTY_PATH=$(command -v xcpretty 2> /dev/null || true) # ignore failures @@ -512,9 +525,10 @@ if [ "$XC_USE_BCKGND" != "" ]; then fi if [ "$BLD_SPECIFIED" != "" ]; then - # Build XCFrameworks, update latest symlink, and clean MoltenVK for rebuild + # Build XCFrameworks, update latest symlink, remove intermediates, and clean MoltenVK for rebuild PROJECT_DIR="." CONFIGURATION=${XC_CONFIG} + SKIP_PACKAGING="" . "./Scripts/create_ext_lib_xcframeworks.sh" . "./Scripts/package_ext_libs_finish.sh" else From 40baeaa965494e0d0c5fcb527dc603c726f35272 Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Mon, 6 Nov 2023 16:56:06 -0500 Subject: [PATCH 28/41] Fix regression error in argument buffer runtime arrays. - Update to latest SPIRV-Cross contining the fix. - Modify CTS options in runcts script to avoid outputting full CTS log, and use less file caching, all to reduce memory and filespace consumption, and possibly improve performance (unrelated). - Update MoltenVKShaderConverter tool to include Metal 3.1 support and improved argument buffer settings (unrelated). - Force Github CI to use Python 3.11, to avoid crash in glslang::update_glslang_sources.py due to use of distutils, removed in Python 3.12 (unrelated). - Small unrelated non-functional edits. --- .github/workflows/CI.yml | 5 +++++ Docs/Whats_New.md | 3 +++ ExternalRevisions/SPIRV-Cross_repo_revision | 2 +- MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm | 2 ++ .../xcschemes/MoltenVKShaderConverter.xcscheme | 2 +- .../MoltenVKShaderConverterTool.cpp | 8 ++++++-- Scripts/runcts | 4 ++++ 7 files changed, 22 insertions(+), 4 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 3ae3d142..9ee53ee4 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -39,6 +39,11 @@ jobs: # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it - uses: actions/checkout@v3 + # Python 3.12 removed distutils, which is used by glslang::update_glslang_sources.py called from fetchDependencies + - uses: actions/setup-python@v4 + with: + python-version: '3.11' + - name: Select Xcode version run: sudo xcode-select -switch "${XCODE_DEV_PATH}" diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md index ff09d596..f69d4203 100644 --- a/Docs/Whats_New.md +++ b/Docs/Whats_New.md @@ -19,6 +19,8 @@ MoltenVK 1.2.7 Released TBD - Reduce disk space consumed after running `fetchDependencies` script by removing intermediate file caches. +- Update to latest SPIRV-Cross: + - MSL: Fix regression error in argument buffer runtime arrays. @@ -61,6 +63,7 @@ MoltenVK 1.2.5 Released 2023/08/15 - Add support for extensions: + - `VK_KHR_deferred_host_operations` - `VK_KHR_incremental_present` - `VK_KHR_shader_non_semantic_info` - `VK_EXT_4444_formats` diff --git a/ExternalRevisions/SPIRV-Cross_repo_revision b/ExternalRevisions/SPIRV-Cross_repo_revision index fa4ca520..9f579560 100644 --- a/ExternalRevisions/SPIRV-Cross_repo_revision +++ b/ExternalRevisions/SPIRV-Cross_repo_revision @@ -1 +1 @@ -2de1265fca722929785d9acdec4ab728c47a0254 +4818f7e7ef7b7078a3a7a5a52c4a338e0dda22f4 diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm index 7d342f68..99ac0ef3 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm @@ -2208,6 +2208,8 @@ void MVKPhysicalDevice::initMetalFeatures() { if ([_mtlDevice respondsToSelector: @selector(argumentBuffersSupport)]) { _metalFeatures.argumentBuffersTier = _mtlDevice.argumentBuffersSupport; + } else { + _metalFeatures.argumentBuffersTier = MTLArgumentBuffersTier1; } #define checkSupportsMTLCounterSamplingPoint(mtlSP, mvkSP) \ diff --git a/MoltenVKShaderConverter/MoltenVKShaderConverter.xcodeproj/xcshareddata/xcschemes/MoltenVKShaderConverter.xcscheme b/MoltenVKShaderConverter/MoltenVKShaderConverter.xcodeproj/xcshareddata/xcschemes/MoltenVKShaderConverter.xcscheme index eeee1a62..9f64b980 100644 --- a/MoltenVKShaderConverter/MoltenVKShaderConverter.xcodeproj/xcshareddata/xcschemes/MoltenVKShaderConverter.xcscheme +++ b/MoltenVKShaderConverter/MoltenVKShaderConverter.xcodeproj/xcshareddata/xcschemes/MoltenVKShaderConverter.xcscheme @@ -97,7 +97,7 @@ isEnabled = "NO"> & spv, mslContext.options.shouldFlipVertexY = _shouldFlipVertexY; mslContext.options.mslOptions.argument_buffers = _useMetalArgumentBuffers; mslContext.options.mslOptions.force_active_argument_buffer_resources = _useMetalArgumentBuffers; - mslContext.options.mslOptions.pad_argument_buffer_resources = _useMetalArgumentBuffers; + mslContext.options.mslOptions.pad_argument_buffer_resources = false; + mslContext.options.mslOptions.argument_buffers_tier = SPIRV_CROSS_NAMESPACE::CompilerMSL::Options::ArgumentBuffersTier::Tier2; mslContext.options.mslOptions.replace_recursive_inputs = mvkOSVersionIsAtLeast(14.0, 17.0, 1.0); SPIRVToMSLConverter spvConverter; @@ -425,7 +426,10 @@ MoltenVKShaderConverterTool::MoltenVKShaderConverterTool(int argc, const char* a _quietMode = false; _useMetalArgumentBuffers = false; - if (mvkOSVersionIsAtLeast(13.0)) { + if (mvkOSVersionIsAtLeast(14.0)) { + _mslVersionMajor = 3; + _mslVersionMinor = 1; + } else if (mvkOSVersionIsAtLeast(13.0)) { _mslVersionMajor = 3; _mslVersionMinor = 0; } else if (mvkOSVersionIsAtLeast(12.0)) { diff --git a/Scripts/runcts b/Scripts/runcts index 73aee7c6..dfac7a13 100755 --- a/Scripts/runcts +++ b/Scripts/runcts @@ -122,8 +122,12 @@ start_time=${SECONDS} "${cts_vk_dir}/deqp-vk" \ --deqp-archive-dir="${cts_vk_dir}/.." \ +--deqp-log-filename="/dev/null" \ --deqp-log-images=disable \ --deqp-log-shader-sources=disable \ +--deqp-shadercache=disable \ +--deqp-log-decompiled-spirv=disable \ +--deqp-log-flush=disable \ --deqp-caselist-file="${caselist_file}" \ &> "${results_file}" From 36e57f434859d96f3749c61414cf5eee6723f706 Mon Sep 17 00:00:00 2001 From: Jan Sikorski Date: Tue, 14 Nov 2023 16:09:13 +0100 Subject: [PATCH 29/41] Fix MVKBitArray::getIndexOfFirstSetBit() skipping over entries. Reset startIndex so that we only skip over bits in the section that contains it. --- MoltenVK/MoltenVK/Utility/MVKBitArray.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/MoltenVK/MoltenVK/Utility/MVKBitArray.h b/MoltenVK/MoltenVK/Utility/MVKBitArray.h index 46bf41cd..a1441bc5 100755 --- a/MoltenVK/MoltenVK/Utility/MVKBitArray.h +++ b/MoltenVK/MoltenVK/Utility/MVKBitArray.h @@ -90,7 +90,11 @@ public: * and optionally clears that bit. If no bits are set, returns the size() of this bit array. */ size_t getIndexOfFirstSetBit(size_t startIndex, bool shouldClear) { - size_t startSecIdx = std::max(getIndexOfSection(startIndex), _clearedSectionCount); + size_t startSecIdx = getIndexOfSection(startIndex); + if (startSecIdx < _clearedSectionCount) { + startSecIdx = _clearedSectionCount; + startIndex = 0; + } size_t bitIdx = startSecIdx << SectionMaskSize; size_t secCnt = getSectionCount(); for (size_t secIdx = startSecIdx; secIdx < secCnt; secIdx++) { @@ -101,6 +105,7 @@ public: if (shouldClear) { clearBit(bitIdx); } return std::min(bitIdx, _bitCount); } + startIndex = 0; } return std::min(bitIdx, _bitCount); } From 037ac4b76c41a1a08b20fd217a335101e0768e2a Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Wed, 15 Nov 2023 17:14:57 -0500 Subject: [PATCH 30/41] Fix rare deadlock during launch via dlopen(). - Don't create global MVKPixelFormats instance during launch, as this triggers a call to MTLCopyAllDevices(), which can deadlock if app is also launching other services that use Metal. --- Docs/Whats_New.md | 1 + MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm | 63 ++++++++++++----------- 2 files changed, 35 insertions(+), 29 deletions(-) diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md index f69d4203..4b091229 100644 --- a/Docs/Whats_New.md +++ b/Docs/Whats_New.md @@ -19,6 +19,7 @@ MoltenVK 1.2.7 Released TBD - Reduce disk space consumed after running `fetchDependencies` script by removing intermediate file caches. +- Fix rare deadlock during launch via `dlopen()`. - Update to latest SPIRV-Cross: - MSL: Fix regression error in argument buffer runtime arrays. diff --git a/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm b/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm index 7644f49c..8fdbc1a6 100644 --- a/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm +++ b/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm @@ -32,111 +32,116 @@ using namespace std; #pragma mark Pixel formats -static MVKPixelFormats _platformPixelFormats; +static std::unique_ptr _platformPixelFormats; + +static MVKPixelFormats* getPlatformPixelFormats() { + if ( !_platformPixelFormats ) { _platformPixelFormats.reset(new MVKPixelFormats()); } + return _platformPixelFormats.get(); +} MVK_PUBLIC_SYMBOL bool mvkVkFormatIsSupported(VkFormat vkFormat) { - return _platformPixelFormats.isSupported(vkFormat); + return getPlatformPixelFormats()->isSupported(vkFormat); } MVK_PUBLIC_SYMBOL bool mvkMTLPixelFormatIsSupported(MTLPixelFormat mtlFormat) { - return _platformPixelFormats.isSupported(mtlFormat); + return getPlatformPixelFormats()->isSupported(mtlFormat); } MVK_PUBLIC_SYMBOL MVKFormatType mvkFormatTypeFromVkFormat(VkFormat vkFormat) { - return _platformPixelFormats.getFormatType(vkFormat); + return getPlatformPixelFormats()->getFormatType(vkFormat); } MVK_PUBLIC_SYMBOL MVKFormatType mvkFormatTypeFromMTLPixelFormat(MTLPixelFormat mtlFormat) { - return _platformPixelFormats.getFormatType(mtlFormat); + return getPlatformPixelFormats()->getFormatType(mtlFormat); } MVK_PUBLIC_SYMBOL MTLPixelFormat mvkMTLPixelFormatFromVkFormat(VkFormat vkFormat) { - return _platformPixelFormats.getMTLPixelFormat(vkFormat); + return getPlatformPixelFormats()->getMTLPixelFormat(vkFormat); } MVK_PUBLIC_SYMBOL VkFormat mvkVkFormatFromMTLPixelFormat(MTLPixelFormat mtlFormat) { - return _platformPixelFormats.getVkFormat(mtlFormat); + return getPlatformPixelFormats()->getVkFormat(mtlFormat); } MVK_PUBLIC_SYMBOL uint32_t mvkVkFormatBytesPerBlock(VkFormat vkFormat) { - return _platformPixelFormats.getBytesPerBlock(vkFormat); + return getPlatformPixelFormats()->getBytesPerBlock(vkFormat); } MVK_PUBLIC_SYMBOL uint32_t mvkMTLPixelFormatBytesPerBlock(MTLPixelFormat mtlFormat) { - return _platformPixelFormats.getBytesPerBlock(mtlFormat); + return getPlatformPixelFormats()->getBytesPerBlock(mtlFormat); } MVK_PUBLIC_SYMBOL VkExtent2D mvkVkFormatBlockTexelSize(VkFormat vkFormat) { - return _platformPixelFormats.getBlockTexelSize(vkFormat); + return getPlatformPixelFormats()->getBlockTexelSize(vkFormat); } MVK_PUBLIC_SYMBOL VkExtent2D mvkMTLPixelFormatBlockTexelSize(MTLPixelFormat mtlFormat) { - return _platformPixelFormats.getBlockTexelSize(mtlFormat); + return getPlatformPixelFormats()->getBlockTexelSize(mtlFormat); } MVK_PUBLIC_SYMBOL float mvkVkFormatBytesPerTexel(VkFormat vkFormat) { - return _platformPixelFormats.getBytesPerTexel(vkFormat); + return getPlatformPixelFormats()->getBytesPerTexel(vkFormat); } MVK_PUBLIC_SYMBOL float mvkMTLPixelFormatBytesPerTexel(MTLPixelFormat mtlFormat) { - return _platformPixelFormats.getBytesPerTexel(mtlFormat); + return getPlatformPixelFormats()->getBytesPerTexel(mtlFormat); } MVK_PUBLIC_SYMBOL size_t mvkVkFormatBytesPerRow(VkFormat vkFormat, uint32_t texelsPerRow) { - return _platformPixelFormats.getBytesPerRow(vkFormat, texelsPerRow); + return getPlatformPixelFormats()->getBytesPerRow(vkFormat, texelsPerRow); } MVK_PUBLIC_SYMBOL size_t mvkMTLPixelFormatBytesPerRow(MTLPixelFormat mtlFormat, uint32_t texelsPerRow) { - return _platformPixelFormats.getBytesPerRow(mtlFormat, texelsPerRow); + return getPlatformPixelFormats()->getBytesPerRow(mtlFormat, texelsPerRow); } MVK_PUBLIC_SYMBOL size_t mvkVkFormatBytesPerLayer(VkFormat vkFormat, size_t bytesPerRow, uint32_t texelRowsPerLayer) { - return _platformPixelFormats.getBytesPerLayer(vkFormat, bytesPerRow, texelRowsPerLayer); + return getPlatformPixelFormats()->getBytesPerLayer(vkFormat, bytesPerRow, texelRowsPerLayer); } MVK_PUBLIC_SYMBOL size_t mvkMTLPixelFormatBytesPerLayer(MTLPixelFormat mtlFormat, size_t bytesPerRow, uint32_t texelRowsPerLayer) { - return _platformPixelFormats.getBytesPerLayer(mtlFormat, bytesPerRow, texelRowsPerLayer); + return getPlatformPixelFormats()->getBytesPerLayer(mtlFormat, bytesPerRow, texelRowsPerLayer); } MVK_PUBLIC_SYMBOL VkFormatProperties mvkVkFormatProperties(VkFormat vkFormat) { - return _platformPixelFormats.getVkFormatProperties(vkFormat); + return getPlatformPixelFormats()->getVkFormatProperties(vkFormat); } MVK_PUBLIC_SYMBOL const char* mvkVkFormatName(VkFormat vkFormat) { - return _platformPixelFormats.getName(vkFormat); + return getPlatformPixelFormats()->getName(vkFormat); } MVK_PUBLIC_SYMBOL const char* mvkMTLPixelFormatName(MTLPixelFormat mtlFormat) { - return _platformPixelFormats.getName(mtlFormat); + return getPlatformPixelFormats()->getName(mtlFormat); } MVK_PUBLIC_SYMBOL MTLVertexFormat mvkMTLVertexFormatFromVkFormat(VkFormat vkFormat) { - return _platformPixelFormats.getMTLVertexFormat(vkFormat); + return getPlatformPixelFormats()->getMTLVertexFormat(vkFormat); } MVK_PUBLIC_SYMBOL MTLClearColor mvkMTLClearColorFromVkClearValue(VkClearValue vkClearValue, VkFormat vkFormat) { - return _platformPixelFormats.getMTLClearColor(vkClearValue, vkFormat); + return getPlatformPixelFormats()->getMTLClearColor(vkClearValue, vkFormat); } MVK_PUBLIC_SYMBOL double mvkMTLClearDepthFromVkClearValue(VkClearValue vkClearValue) { - return _platformPixelFormats.getMTLClearDepthValue(vkClearValue); + return getPlatformPixelFormats()->getMTLClearDepthValue(vkClearValue); } MVK_PUBLIC_SYMBOL uint32_t mvkMTLClearStencilFromVkClearValue(VkClearValue vkClearValue) { - return _platformPixelFormats.getMTLClearStencilValue(vkClearValue); + return getPlatformPixelFormats()->getMTLClearStencilValue(vkClearValue); } MVK_PUBLIC_SYMBOL bool mvkMTLPixelFormatIsDepthFormat(MTLPixelFormat mtlFormat) { - return _platformPixelFormats.isDepthFormat(mtlFormat); + return getPlatformPixelFormats()->isDepthFormat(mtlFormat); } MVK_PUBLIC_SYMBOL bool mvkMTLPixelFormatIsStencilFormat(MTLPixelFormat mtlFormat) { - return _platformPixelFormats.isStencilFormat(mtlFormat); + return getPlatformPixelFormats()->isStencilFormat(mtlFormat); } MVK_PUBLIC_SYMBOL bool mvkMTLPixelFormatIsPVRTCFormat(MTLPixelFormat mtlFormat) { - return _platformPixelFormats.isPVRTCFormat(mtlFormat); + return getPlatformPixelFormats()->isPVRTCFormat(mtlFormat); } MVK_PUBLIC_SYMBOL MTLTextureType mvkMTLTextureTypeFromVkImageType(VkImageType vkImageType, @@ -192,11 +197,11 @@ MVK_PUBLIC_SYMBOL MTLTextureType mvkMTLTextureTypeFromVkImageViewType(VkImageVie } MVK_PUBLIC_SYMBOL MTLTextureUsage mvkMTLTextureUsageFromVkImageUsageFlags(VkImageUsageFlags vkImageUsageFlags, MTLPixelFormat mtlPixFmt) { - return _platformPixelFormats.getMTLTextureUsage(vkImageUsageFlags, mtlPixFmt); + return getPlatformPixelFormats()->getMTLTextureUsage(vkImageUsageFlags, mtlPixFmt); } MVK_PUBLIC_SYMBOL VkImageUsageFlags mvkVkImageUsageFlagsFromMTLTextureUsage(MTLTextureUsage mtlUsage, MTLPixelFormat mtlFormat) { - return _platformPixelFormats.getVkImageUsageFlags(mtlUsage, mtlFormat); + return getPlatformPixelFormats()->getVkImageUsageFlags(mtlUsage, mtlFormat); } MVK_PUBLIC_SYMBOL uint32_t mvkSampleCountFromVkSampleCountFlagBits(VkSampleCountFlagBits vkSampleCountFlag) { From 13998affe86a8acba245c1be30fe7e8529fd10c8 Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Wed, 15 Nov 2023 11:06:19 -0500 Subject: [PATCH 31/41] Add support for VK_EXT_extended_dynamic_state3 extension. - Move patch point tracking from pipeline state to render state, and remove MVKPipelineCommandEncoderState subclasses no longer needed. - Move sample location tracking from renderpass input to pipeline static or dynamic state tracking. - Restart Metal render pass when sample locations change, and enable VkPhysicalDeviceSampleLocationsPropertiesEXT::variableSampleLocations. - Fix regression that broke VK_POLYGON_MODE_LINE (unrelated). - Fix regression in marking MVKRenderingCommandEncoderState dirty after vkCmdClearAttachments() (unrelated). --- Docs/MoltenVK_Runtime_UserGuide.md | 2 + Docs/Whats_New.md | 4 + MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm | 25 ++- MoltenVK/MoltenVK/Commands/MVKCmdRendering.h | 60 ++++- MoltenVK/MoltenVK/Commands/MVKCmdRendering.mm | 91 ++++---- MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm | 2 + MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h | 23 +- .../MoltenVK/Commands/MVKCommandBuffer.mm | 37 +-- .../Commands/MVKCommandEncoderState.h | 70 +++--- .../Commands/MVKCommandEncoderState.mm | 210 ++++++++++++------ .../MoltenVK/Commands/MVKCommandTypePools.def | 5 +- MoltenVK/MoltenVK/GPUObjects/MVKDevice.h | 22 +- MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm | 52 ++++- .../GPUObjects/MVKDeviceFeatureStructs.def | 1 + MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm | 20 ++ MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h | 29 +-- MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm | 34 ++- MoltenVK/MoltenVK/Layers/MVKExtensions.def | 1 + MoltenVK/MoltenVK/Utility/MVKFoundation.cpp | 2 +- MoltenVK/MoltenVK/Utility/MVKFoundation.h | 6 +- MoltenVK/MoltenVK/Vulkan/vulkan.mm | 173 +++++++++++++++ 21 files changed, 608 insertions(+), 261 deletions(-) diff --git a/Docs/MoltenVK_Runtime_UserGuide.md b/Docs/MoltenVK_Runtime_UserGuide.md index c2f54985..9b00360e 100644 --- a/Docs/MoltenVK_Runtime_UserGuide.md +++ b/Docs/MoltenVK_Runtime_UserGuide.md @@ -375,6 +375,8 @@ In addition to core *Vulkan* functionality, **MoltenVK** also supports the foll - *Requires Metal 3.1 for `VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE`.* - `VK_EXT_extended_dynamic_state2` - *Primitive restart is always enabled, as Metal does not support disabling it (`VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE_EXT`).* +- `VK_EXT_extended_dynamic_state3` + - *Metal does not support `VK_POLYGON_MODE_POINT`* - `VK_EXT_external_memory_host` - `VK_EXT_fragment_shader_interlock` - *Requires Metal 2.0 and Raster Order Groups.* diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md index f69d4203..f7c4815d 100644 --- a/Docs/Whats_New.md +++ b/Docs/Whats_New.md @@ -18,6 +18,10 @@ MoltenVK 1.2.7 Released TBD +- Add support for extensions: + - `VK_EXT_extended_dynamic_state3` *(Metal does not support `VK_POLYGON_MODE_POINT`)* +- Fix regression that broke `VK_POLYGON_MODE_LINE`. +- Fix regression in marking rendering state dirty after `vkCmdClearAttachments()`. - Reduce disk space consumed after running `fetchDependencies` script by removing intermediate file caches. - Update to latest SPIRV-Cross: - MSL: Fix regression error in argument buffer runtime arrays. diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm b/MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm index a7930a47..87515ba1 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm @@ -144,10 +144,9 @@ void MVKCmdDraw::encodeIndexedIndirect(MVKCommandEncoder* cmdEncoder) { void MVKCmdDraw::encode(MVKCommandEncoder* cmdEncoder) { - if (_vertexCount == 0 || _instanceCount == 0) { - // Nothing to do. - return; - } + if (_vertexCount == 0 || _instanceCount == 0) { return; } // Nothing to do. + + cmdEncoder->restartMetalRenderPassIfNeeded(); auto* pipeline = cmdEncoder->_graphicsPipelineState.getGraphicsPipeline(); @@ -172,7 +171,7 @@ void MVKCmdDraw::encode(MVKCommandEncoder* cmdEncoder) { } tessParams; uint32_t outControlPointCount = 0; if (pipeline->isTessellationPipeline()) { - tessParams.inControlPointCount = cmdEncoder->_graphicsPipelineState.getPatchControlPoints(); + tessParams.inControlPointCount = cmdEncoder->_renderingState.getPatchControlPoints(); outControlPointCount = pipeline->getOutputControlPointCount(); tessParams.patchCount = mvkCeilingDivide(_vertexCount, tessParams.inControlPointCount) * _instanceCount; } @@ -369,10 +368,9 @@ void MVKCmdDrawIndexed::encodeIndexedIndirect(MVKCommandEncoder* cmdEncoder) { void MVKCmdDrawIndexed::encode(MVKCommandEncoder* cmdEncoder) { - if (_indexCount == 0 || _instanceCount == 0) { - // Nothing to do. - return; - } + if (_indexCount == 0 || _instanceCount == 0) { return; } // Nothing to do. + + cmdEncoder->restartMetalRenderPassIfNeeded(); auto* pipeline = cmdEncoder->_graphicsPipelineState.getGraphicsPipeline(); @@ -401,7 +399,7 @@ void MVKCmdDrawIndexed::encode(MVKCommandEncoder* cmdEncoder) { } tessParams; uint32_t outControlPointCount = 0; if (pipeline->isTessellationPipeline()) { - tessParams.inControlPointCount = cmdEncoder->_graphicsPipelineState.getPatchControlPoints(); + tessParams.inControlPointCount = cmdEncoder->_renderingState.getPatchControlPoints(); outControlPointCount = pipeline->getOutputControlPointCount(); tessParams.patchCount = mvkCeilingDivide(_indexCount, tessParams.inControlPointCount) * _instanceCount; } @@ -649,6 +647,8 @@ void MVKCmdDrawIndirect::encodeIndexedIndirect(MVKCommandEncoder* cmdEncoder) { void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) { + cmdEncoder->restartMetalRenderPassIfNeeded(); + auto* pipeline = cmdEncoder->_graphicsPipelineState.getGraphicsPipeline(); // Metal doesn't support triangle fans, so encode it as indexed indirect triangles instead. @@ -686,7 +686,7 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) { // encoding and execution. So we don't know how big to make the buffers. // We must assume an arbitrarily large number of vertices may be submitted. // But not too many, or we'll exhaust available VRAM. - inControlPointCount = cmdEncoder->_graphicsPipelineState.getPatchControlPoints(); + inControlPointCount = cmdEncoder->_renderingState.getPatchControlPoints(); outControlPointCount = pipeline->getOutputControlPointCount(); vertexCount = kMVKMaxDrawIndirectVertexCount; patchCount = mvkCeilingDivide(vertexCount, inControlPointCount); @@ -990,6 +990,7 @@ VkResult MVKCmdDrawIndexedIndirect::setContent(MVKCommandBuffer* cmdBuff, } void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder) { + cmdEncoder->restartMetalRenderPassIfNeeded(); encode(cmdEncoder, cmdEncoder->_graphicsResourcesState._mtlIndexBufferBinding); } @@ -1034,7 +1035,7 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder, const MVKI // encoding and execution. So we don't know how big to make the buffers. // We must assume an arbitrarily large number of vertices may be submitted. // But not too many, or we'll exhaust available VRAM. - inControlPointCount = cmdEncoder->_graphicsPipelineState.getPatchControlPoints(); + inControlPointCount = cmdEncoder->_renderingState.getPatchControlPoints(); outControlPointCount = pipeline->getOutputControlPointCount(); vertexCount = kMVKMaxDrawIndirectVertexCount; patchCount = mvkCeilingDivide(vertexCount, inControlPointCount); diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdRendering.h b/MoltenVK/MoltenVK/Commands/MVKCmdRendering.h index 7f1df4b4..16e4863b 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdRendering.h +++ b/MoltenVK/MoltenVK/Commands/MVKCmdRendering.h @@ -46,7 +46,6 @@ public: protected: - MVKSmallVector> _subpassSamplePositions; MVKRenderPass* _renderPass; MVKFramebuffer* _framebuffer; VkRect2D _renderArea; @@ -203,7 +202,26 @@ public: protected: MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; - MVKSmallVector _samplePositions; + MVKSmallVector _sampleLocations; +}; + + +#pragma mark - +#pragma mark MVKCmdSetSampleLocationsEnable + +/** Vulkan command to dynamically enable custom sample locations. */ +class MVKCmdSetSampleLocationsEnable : public MVKCommand { + +public: + VkResult setContent(MVKCommandBuffer* cmdBuff, + VkBool32 sampleLocationsEnable); + + void encode(MVKCommandEncoder* cmdEncoder) override; + +protected: + MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; + + VkBool32 _sampleLocationsEnable; }; @@ -366,6 +384,25 @@ protected: }; +#pragma mark - +#pragma mark MVKCmdSetDepthClipEnable + +/** Vulkan command to dynamically enable depth clip. */ +class MVKCmdSetDepthClipEnable : public MVKCommand { + +public: + VkResult setContent(MVKCommandBuffer* cmdBuff, + VkBool32 depthClipEnable); + + void encode(MVKCommandEncoder* cmdEncoder) override; + +protected: + MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; + + VkBool32 _depthClipEnable; +}; + + #pragma mark - #pragma mark MVKCmdSetDepthCompareOp @@ -551,6 +588,25 @@ protected: }; +#pragma mark - +#pragma mark MVKCmdSetPolygonMode + +/** Vulkan command to dynamically set the polygon mode. */ +class MVKCmdSetPolygonMode : public MVKCommand { + +public: + VkResult setContent(MVKCommandBuffer* cmdBuff, + VkPolygonMode polygonMode); + + void encode(MVKCommandEncoder* cmdEncoder) override; + +protected: + MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; + + VkPolygonMode _polygonMode; +}; + + #pragma mark - #pragma mark MVKCmdSetPrimitiveTopology diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdRendering.mm b/MoltenVK/MoltenVK/Commands/MVKCmdRendering.mm index c4bb7548..a2492ace 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdRendering.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCmdRendering.mm @@ -36,30 +36,6 @@ VkResult MVKCmdBeginRenderPassBase::setContent(MVKCommandBuffer* cmdBuff, _renderPass = (MVKRenderPass*)pRenderPassBegin->renderPass; _framebuffer = (MVKFramebuffer*)pRenderPassBegin->framebuffer; _renderArea = pRenderPassBegin->renderArea; - _subpassSamplePositions.clear(); - - for (const auto* next = (VkBaseInStructure*)pRenderPassBegin->pNext; next; next = next->pNext) { - switch (next->sType) { - case VK_STRUCTURE_TYPE_RENDER_PASS_SAMPLE_LOCATIONS_BEGIN_INFO_EXT: { - // Build an array of arrays, one array of sample positions for each subpass index. - // For subpasses not included in VkRenderPassSampleLocationsBeginInfoEXT, the resulting array of samples will be empty. - _subpassSamplePositions.resize(_renderPass->getSubpassCount()); - auto* pRPSampLocnsInfo = (VkRenderPassSampleLocationsBeginInfoEXT*)next; - for (uint32_t spSLIdx = 0; spSLIdx < pRPSampLocnsInfo->postSubpassSampleLocationsCount; spSLIdx++) { - auto& spsl = pRPSampLocnsInfo->pPostSubpassSampleLocations[spSLIdx]; - uint32_t spIdx = spsl.subpassIndex; - auto& spSampPosns = _subpassSamplePositions[spIdx]; - for (uint32_t slIdx = 0; slIdx < spsl.sampleLocationsInfo.sampleLocationsCount; slIdx++) { - auto& sl = spsl.sampleLocationsInfo.pSampleLocations[slIdx]; - spSampPosns.push_back(MTLSamplePositionMake(sl.x, sl.y)); - } - } - break; - } - default: - break; - } - } cmdBuff->_currentSubpassInfo.beginRenderpass(_renderPass); @@ -86,15 +62,6 @@ VkResult MVKCmdBeginRenderPass::setContent(MVKCommandBuffer* cmdBuff, template void MVKCmdBeginRenderPass::encode(MVKCommandEncoder* cmdEncoder) { - - // Convert the sample position array of arrays to an array of array-references, - // so that it can be passed to the command encoder. - size_t spSPCnt = _subpassSamplePositions.size(); - MVKArrayRef spSPRefs[spSPCnt]; - for (uint32_t spSPIdx = 0; spSPIdx < spSPCnt; spSPIdx++) { - spSPRefs[spSPIdx] = _subpassSamplePositions[spSPIdx].contents(); - } - cmdEncoder->beginRenderpass(this, _contents, _renderPass, @@ -102,7 +69,7 @@ void MVKCmdBeginRenderPass::encode(MVKCommandEncoder* cmdEncoder) { _renderArea, _clearValues.contents(), _attachments.contents(), - MVKArrayRef(spSPRefs, spSPCnt)); + kMVKCommandUseBeginRenderPass); } template class MVKCmdBeginRenderPass<1, 0>; @@ -217,17 +184,29 @@ void MVKCmdEndRendering::encode(MVKCommandEncoder* cmdEncoder) { VkResult MVKCmdSetSampleLocations::setContent(MVKCommandBuffer* cmdBuff, const VkSampleLocationsInfoEXT* pSampleLocationsInfo) { - + _sampleLocations.clear(); for (uint32_t slIdx = 0; slIdx < pSampleLocationsInfo->sampleLocationsCount; slIdx++) { - auto& sl = pSampleLocationsInfo->pSampleLocations[slIdx]; - _samplePositions.push_back(MTLSamplePositionMake(sl.x, sl.y)); + _sampleLocations.push_back(pSampleLocationsInfo->pSampleLocations[slIdx]); } - return VK_SUCCESS; } void MVKCmdSetSampleLocations::encode(MVKCommandEncoder* cmdEncoder) { - cmdEncoder->setDynamicSamplePositions(_samplePositions.contents()); + cmdEncoder->_renderingState.setSampleLocations(_sampleLocations.contents(), true); +} + + +#pragma mark - +#pragma mark MVKCmdSetSampleLocationsEnable + +VkResult MVKCmdSetSampleLocationsEnable::setContent(MVKCommandBuffer* cmdBuff, + VkBool32 sampleLocationsEnable) { + _sampleLocationsEnable = sampleLocationsEnable; + return VK_SUCCESS; +} + +void MVKCmdSetSampleLocationsEnable::encode(MVKCommandEncoder* cmdEncoder) { + cmdEncoder->_renderingState.setSampleLocationsEnable(_sampleLocationsEnable, true); } @@ -240,7 +219,7 @@ VkResult MVKCmdSetViewport::setContent(MVKCommandBuffer* cmdBuff, uint32_t viewportCount, const VkViewport* pViewports) { _firstViewport = firstViewport; - _viewports.clear(); // Clear for reuse + _viewports.clear(); _viewports.reserve(viewportCount); for (uint32_t vpIdx = 0; vpIdx < viewportCount; vpIdx++) { _viewports.push_back(pViewports[vpIdx]); @@ -267,7 +246,7 @@ VkResult MVKCmdSetScissor::setContent(MVKCommandBuffer* cmdBuff, uint32_t scissorCount, const VkRect2D* pScissors) { _firstScissor = firstScissor; - _scissors.clear(); // Clear for reuse + _scissors.clear(); _scissors.reserve(scissorCount); for (uint32_t sIdx = 0; sIdx < scissorCount; sIdx++) { _scissors.push_back(pScissors[sIdx]); @@ -362,6 +341,20 @@ void MVKCmdSetDepthWriteEnable::encode(MVKCommandEncoder* cmdEncoder) { } +#pragma mark - +#pragma mark MVKCmdSetDepthClipEnable + +VkResult MVKCmdSetDepthClipEnable::setContent(MVKCommandBuffer* cmdBuff, + VkBool32 depthClipEnable) { + _depthClipEnable = depthClipEnable; + return VK_SUCCESS; +} + +void MVKCmdSetDepthClipEnable::encode(MVKCommandEncoder* cmdEncoder) { + cmdEncoder->_renderingState.setDepthClipEnable(_depthClipEnable, true); +} + + #pragma mark - #pragma mark MVKCmdSetDepthCompareOp @@ -501,7 +494,21 @@ VkResult MVKCmdSetPatchControlPoints::setContent(MVKCommandBuffer* cmdBuff, } void MVKCmdSetPatchControlPoints::encode(MVKCommandEncoder* cmdEncoder) { - cmdEncoder->_graphicsPipelineState.setPatchControlPoints(_patchControlPoints); + cmdEncoder->_renderingState.setPatchControlPoints(_patchControlPoints, true); +} + + +#pragma mark - +#pragma mark MVKCmdSetPolygonMode + +VkResult MVKCmdSetPolygonMode::setContent(MVKCommandBuffer* cmdBuff, + VkPolygonMode polygonMode) { + _polygonMode = polygonMode; + return VK_SUCCESS; +} + +void MVKCmdSetPolygonMode::encode(MVKCommandEncoder* cmdEncoder) { + cmdEncoder->_renderingState.setPolygonMode(_polygonMode, true); } diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm b/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm index 124859bd..52dcb78f 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm @@ -1506,8 +1506,10 @@ void MVKCmdClearAttachments::encode(MVKCommandEncoder* cmdEncoder) { // Return to the previous rendering state on the next render activity cmdEncoder->_graphicsPipelineState.markDirty(); + cmdEncoder->_graphicsResourcesState.markDirty(); cmdEncoder->_depthStencilState.markDirty(); cmdEncoder->_renderingState.markDirty(); + cmdEncoder->_occlusionQueryState.markDirty(); } template diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h index 39580131..92d02e77 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h +++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h @@ -250,21 +250,23 @@ public: const VkRect2D& renderArea, MVKArrayRef clearValues, MVKArrayRef attachments, - MVKArrayRef> subpassSamplePositions, - MVKCommandUse cmdUse = kMVKCommandUseBeginRenderPass); + MVKCommandUse cmdUse); /** Begins the next render subpass. */ void beginNextSubpass(MVKCommand* subpassCmd, VkSubpassContents renderpassContents); - /** Sets the dynamic custom sample positions to use when rendering. */ - void setDynamicSamplePositions(MVKArrayRef dynamicSamplePositions); - /** Begins dynamic rendering. */ void beginRendering(MVKCommand* rendCmd, const VkRenderingInfo* pRenderingInfo); /** Begins a Metal render pass for the current render subpass. */ void beginMetalRenderPass(MVKCommandUse cmdUse); + /** + * If a Metal render pass has started, and it needs to be restarted, + * then end the existing Metal render pass, and start a new one. + */ + void restartMetalRenderPassIfNeeded(); + /** If a render encoder is active, encodes store actions for all attachments to it. */ void encodeStoreActions(bool storeOverride = false); @@ -435,13 +437,13 @@ public: id _mtlRenderEncoder; /** Tracks the current graphics pipeline bound to the encoder. */ - MVKGraphicsPipelineCommandEncoderState _graphicsPipelineState; + MVKPipelineCommandEncoderState _graphicsPipelineState; /** Tracks the current graphics resources state of the encoder. */ MVKGraphicsResourcesCommandEncoderState _graphicsResourcesState; /** Tracks the current compute pipeline bound to the encoder. */ - MVKComputePipelineCommandEncoderState _computePipelineState; + MVKPipelineCommandEncoderState _computePipelineState; /** Tracks the current compute resources state of the encoder. */ MVKComputeResourcesCommandEncoderState _computeResourcesState; @@ -452,6 +454,9 @@ public: /** Tracks the current rendering states of the encoder. */ MVKRenderingCommandEncoderState _renderingState; + /** Tracks the occlusion query state of the encoder. */ + MVKOcclusionQueryCommandEncoderState _occlusionQueryState; + /** The size of the threadgroup for the compute shader. */ MTLSize _mtlThreadgroupSize; @@ -479,7 +484,6 @@ protected: void encodeGPUCounterSample(MVKGPUCounterQueryPool* mvkQryPool, uint32_t sampleIndex, MVKCounterSamplingFlags samplingPoints); void encodeTimestampStageCounterSamples(); id getStageCountersMTLFence(); - MVKArrayRef getCustomSamplePositions(); NSString* getMTLRenderCommandEncoderName(MVKCommandUse cmdUse); template void retainIfImmediatelyEncoding(T& mtlEnc); template void endMetalEncoding(T& mtlEnc); @@ -495,8 +499,6 @@ protected: MVKSmallVector _timestampStageCounterQueries; MVKSmallVector _clearValues; MVKSmallVector _attachments; - MVKSmallVector _dynamicSamplePositions; - MVKSmallVector> _subpassSamplePositions; id _mtlComputeEncoder; id _mtlBlitEncoder; id _stageCountersMTLFence; @@ -505,7 +507,6 @@ protected: MVKPushConstantsCommandEncoderState _tessEvalPushConstants; MVKPushConstantsCommandEncoderState _fragmentPushConstants; MVKPushConstantsCommandEncoderState _computePushConstants; - MVKOcclusionQueryCommandEncoderState _occlusionQueryState; MVKPrefillMetalCommandBuffersStyle _prefillStyle; VkSubpassContents _subpassContents; uint32_t _renderSubpassIndex; diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm index 9575bb76..44f0204e 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm @@ -442,7 +442,6 @@ void MVKCommandEncoder::beginRendering(MVKCommand* rendCmd, const VkRenderingInf pRenderingInfo->renderArea, MVKArrayRef(clearValues, attCnt), MVKArrayRef(imageViews, attCnt), - MVKArrayRef>(), kMVKCommandUseBeginRendering); // If we've just created new transient objects, once retained by this encoder, @@ -462,7 +461,6 @@ void MVKCommandEncoder::beginRenderpass(MVKCommand* passCmd, const VkRect2D& renderArea, MVKArrayRef clearValues, MVKArrayRef attachments, - MVKArrayRef> subpassSamplePositions, MVKCommandUse cmdUse) { _pEncodingContext->setRenderingContext(renderPass, framebuffer); _renderArea = renderArea; @@ -471,13 +469,6 @@ void MVKCommandEncoder::beginRenderpass(MVKCommand* passCmd, _clearValues.assign(clearValues.begin(), clearValues.end()); _attachments.assign(attachments.begin(), attachments.end()); - // Copy the sample positions array of arrays, one array of sample positions for each subpass index. - _subpassSamplePositions.resize(subpassSamplePositions.size()); - for (uint32_t spSPIdx = 0; spSPIdx < subpassSamplePositions.size(); spSPIdx++) { - _subpassSamplePositions[spSPIdx].assign(subpassSamplePositions[spSPIdx].begin(), - subpassSamplePositions[spSPIdx].end()); - } - setSubpass(passCmd, subpassContents, 0, cmdUse); } @@ -518,10 +509,6 @@ void MVKCommandEncoder::beginNextMultiviewPass() { beginMetalRenderPass(kMVKCommandUseNextSubpass); } -void MVKCommandEncoder::setDynamicSamplePositions(MVKArrayRef dynamicSamplePositions) { - _dynamicSamplePositions.assign(dynamicSamplePositions.begin(), dynamicSamplePositions.end()); -} - // Retain encoders when prefilling, because prefilling may span multiple autorelease pools. template void MVKCommandEncoder::retainIfImmediatelyEncoding(T& mtlEnc) { @@ -536,7 +523,6 @@ void MVKCommandEncoder::endMetalEncoding(T& mtlEnc) { mtlEnc = nil; } - // Creates _mtlRenderEncoder and marks cached render state as dirty so it will be set into the _mtlRenderEncoder. void MVKCommandEncoder::beginMetalRenderPass(MVKCommandUse cmdUse) { @@ -592,8 +578,8 @@ void MVKCommandEncoder::beginMetalRenderPass(MVKCommandUse cmdUse) { // If no custom sample positions are established, size will be zero, // and Metal will default to using default sample postions. if (_pDeviceMetalFeatures->programmableSamplePositions) { - auto cstmSampPosns = getCustomSamplePositions(); - [mtlRPDesc setSamplePositions: cstmSampPosns.data() count: cstmSampPosns.size()]; + auto sampPosns = _renderingState.getSamplePositions(); + [mtlRPDesc setSamplePositions: sampPosns.data() count: sampPosns.size()]; } _mtlRenderEncoder = [_mtlCmdBuffer renderCommandEncoderWithDescriptor: mtlRPDesc]; @@ -616,16 +602,13 @@ void MVKCommandEncoder::beginMetalRenderPass(MVKCommandUse cmdUse) { _occlusionQueryState.beginMetalRenderPass(); } -// If custom sample positions have been set, return them, otherwise return an empty array. -// For Metal, VkPhysicalDeviceSampleLocationsPropertiesEXT::variableSampleLocations is false. -// As such, Vulkan requires that sample positions must be established at the beginning of -// a renderpass, and that both pipeline and dynamic sample locations must be the same as those -// set for each subpass. Therefore, the only sample positions of use are those set for each -// subpass when the renderpass begins. The pipeline and dynamic sample positions are ignored. -MVKArrayRef MVKCommandEncoder::getCustomSamplePositions() { - return (_renderSubpassIndex < _subpassSamplePositions.size() - ? _subpassSamplePositions[_renderSubpassIndex].contents() - : MVKArrayRef()); +void MVKCommandEncoder::restartMetalRenderPassIfNeeded() { + if ( !_mtlRenderEncoder ) { return; } + + if (_renderingState.needsMetalRenderPassRestart()) { + encodeStoreActions(true); + beginMetalRenderPass(kMVKCommandUseRestartSubpass); + } } void MVKCommandEncoder::encodeStoreActions(bool storeOverride) { @@ -1161,12 +1144,12 @@ MVKCommandEncoder::MVKCommandEncoder(MVKCommandBuffer* cmdBuffer, _computeResourcesState(this), _depthStencilState(this), _renderingState(this), + _occlusionQueryState(this), _vertexPushConstants(this, VK_SHADER_STAGE_VERTEX_BIT), _tessCtlPushConstants(this, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT), _tessEvalPushConstants(this, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT), _fragmentPushConstants(this, VK_SHADER_STAGE_FRAGMENT_BIT), _computePushConstants(this, VK_SHADER_STAGE_COMPUTE_BIT), - _occlusionQueryState(this), _prefillStyle(prefillStyle){ _pDeviceFeatures = &_device->_enabledFeatures; diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h index 82ea4eab..4ac895d4 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h +++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h @@ -106,8 +106,11 @@ protected: virtual void encodeImpl(uint32_t stage) = 0; MVKDevice* getDevice(); bool isDynamicState(MVKRenderStateType state); + template T& getContent(T* iVarAry, bool isDynamic) { + return iVarAry[isDynamic ? StateScope::Dynamic : StateScope::Static]; + } template T& getContent(T* iVarAry, MVKRenderStateType state) { - return iVarAry[isDynamicState(state) ? StateScope::Dynamic : StateScope::Static]; + return getContent(iVarAry, isDynamicState(state)); } MVKCommandEncoder* _cmdEncoder; @@ -123,9 +126,11 @@ protected: class MVKPipelineCommandEncoderState : public MVKCommandEncoderState { public: - virtual void bindPipeline(MVKPipeline* pipeline); + void bindPipeline(MVKPipeline* pipeline); MVKPipeline* getPipeline(); + MVKGraphicsPipeline* getGraphicsPipeline() { return (MVKGraphicsPipeline*)getPipeline(); } + MVKComputePipeline* getComputePipeline() { return (MVKComputePipeline*)getPipeline(); } MVKPipelineCommandEncoderState(MVKCommandEncoder* cmdEncoder) : MVKCommandEncoderState(cmdEncoder) {} @@ -136,42 +141,6 @@ protected: }; -#pragma mark - -#pragma mark MVKGraphicsPipelineCommandEncoderState - -/** Holds encoder state established by graphics pipeline commands. */ -class MVKGraphicsPipelineCommandEncoderState : public MVKPipelineCommandEncoderState { - -public: - void bindPipeline(MVKPipeline* pipeline) override; - - MVKGraphicsPipeline* getGraphicsPipeline() { return (MVKGraphicsPipeline*)getPipeline(); } - - void setPatchControlPoints(uint32_t patchControlPoints); - uint32_t getPatchControlPoints(); - - MVKGraphicsPipelineCommandEncoderState(MVKCommandEncoder* cmdEncoder) : MVKPipelineCommandEncoderState(cmdEncoder) {} - -protected: - uint32_t _patchControlPoints[StateScope::Count] = {}; -}; - - -#pragma mark - -#pragma mark MVKComputePipelineCommandEncoderState - -/** Holds encoder state established by compute pipeline commands. */ -class MVKComputePipelineCommandEncoderState : public MVKPipelineCommandEncoderState { - -public: - MVKComputePipeline* getComputePipeline() { return (MVKComputePipeline*)getPipeline(); } - - MVKComputePipelineCommandEncoderState(MVKCommandEncoder* cmdEncoder) : MVKPipelineCommandEncoderState(cmdEncoder) {} - -protected: -}; - - #pragma mark - #pragma mark MVKPushConstantsCommandEncoderState @@ -257,7 +226,7 @@ protected: VkStencilOp passOp, VkStencilOp depthFailOp, VkCompareOp compareOp); MVKMTLDepthStencilDescriptorData _depthStencilData[StateScope::Count]; - bool _depthTestEnabled[StateScope::Count]; + bool _depthTestEnabled[StateScope::Count] = {}; bool _hasDepthAttachment = false; bool _hasStencilAttachment = false; }; @@ -294,9 +263,6 @@ public: void setFrontFace(VkFrontFace frontFace, bool isDynamic); - void setPrimitiveTopology(VkPrimitiveTopology topology, bool isDynamic); - MTLPrimitiveType getPrimitiveType(); - void setPolygonMode(VkPolygonMode polygonMode, bool isDynamic); void setBlendConstants(float blendConstants[4], bool isDynamic); @@ -316,13 +282,26 @@ public: void setRasterizerDiscardEnable(VkBool32 rasterizerDiscardEnable, bool isDynamic); + void setPrimitiveTopology(VkPrimitiveTopology topology, bool isDynamic); + MTLPrimitiveType getPrimitiveType(); + + void setPatchControlPoints(uint32_t patchControlPoints, bool isDynamic); + uint32_t getPatchControlPoints(); + + void setSampleLocationsEnable(VkBool32 sampleLocationsEnable, bool isDynamic); + void setSampleLocations(const MVKArrayRef sampleLocations, bool isDynamic); + MVKArrayRef getSamplePositions(); + void beginMetalRenderPass() override; + bool needsMetalRenderPassRestart(); + + bool isDirty(MVKRenderStateType state); + void markDirty() override; MVKRenderingCommandEncoderState(MVKCommandEncoder* cmdEncoder) : MVKCommandEncoderState(cmdEncoder) {} protected: void encodeImpl(uint32_t stage) override; - bool isDirty(MVKRenderStateType state); bool isDrawingTriangles(); template void setContent(T* iVarAry, T* pVal, MVKRenderStateType state, bool isDynamic) { auto* pIVar = &iVarAry[isDynamic ? StateScope::Dynamic : StateScope::Static]; @@ -330,10 +309,11 @@ protected: *pIVar = *pVal; _dirtyStates.enable(state); _modifiedStates.enable(state); - markDirty(); + MVKCommandEncoderState::markDirty(); // Avoid local markDirty() as it marks all states dirty. } } + MVKSmallVector _mtlSampleLocations[StateScope::Count] = {}; MVKMTLViewports _mtlViewports[StateScope::Count] = {}; MVKMTLScissors _mtlScissors[StateScope::Count] = {}; MVKColor32 _mtlBlendConstants[StateScope::Count] = {}; @@ -344,8 +324,10 @@ protected: MTLPrimitiveType _mtlPrimitiveTopology[StateScope::Count] = { MTLPrimitiveTypePoint, MTLPrimitiveTypePoint }; MTLDepthClipMode _mtlDepthClipEnable[StateScope::Count] = { MTLDepthClipModeClip, MTLDepthClipModeClip }; MTLTriangleFillMode _mtlPolygonMode[StateScope::Count] = { MTLTriangleFillModeFill, MTLTriangleFillModeFill }; + uint32_t _mtlPatchControlPoints[StateScope::Count] = {}; MVKRenderStateFlags _dirtyStates; MVKRenderStateFlags _modifiedStates; + bool _mtlSampleLocationsEnable[StateScope::Count] = {}; bool _mtlDepthBiasEnable[StateScope::Count] = {}; bool _mtlPrimitiveRestartEnable[StateScope::Count] = {}; bool _mtlRasterizerDiscardEnable[StateScope::Count] = {}; diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm index 9e17aa99..c7246a51 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm @@ -45,8 +45,11 @@ bool MVKCommandEncoderState::isDynamicState(MVKRenderStateType state) { #pragma mark MVKPipelineCommandEncoderState void MVKPipelineCommandEncoderState::bindPipeline(MVKPipeline* pipeline) { - if (pipeline != _pipeline) markDirty(); - _pipeline = pipeline; + if (pipeline == _pipeline) { return; } + + _pipeline = pipeline; + _pipeline->wasBound(_cmdEncoder); + markDirty(); } MVKPipeline* MVKPipelineCommandEncoderState::getPipeline() { return _pipeline; } @@ -59,23 +62,6 @@ void MVKPipelineCommandEncoderState::encodeImpl(uint32_t stage) { } -#pragma mark - -#pragma mark MVKGraphicsPipelineCommandEncoderState - -void MVKGraphicsPipelineCommandEncoderState::bindPipeline(MVKPipeline* pipeline) { - MVKPipelineCommandEncoderState::bindPipeline(pipeline); - _patchControlPoints[StateScope::Static] = getGraphicsPipeline()->_tessInfo.patchControlPoints; -} - -void MVKGraphicsPipelineCommandEncoderState::setPatchControlPoints(uint32_t patchControlPoints) { - _patchControlPoints[StateScope::Dynamic] = patchControlPoints; -} - -uint32_t MVKGraphicsPipelineCommandEncoderState::getPatchControlPoints() { - return getContent(_patchControlPoints, PatchControlPoints); -} - - #pragma mark - #pragma mark MVKPushConstantsCommandEncoderState @@ -310,60 +296,43 @@ void MVKDepthStencilCommandEncoderState::encodeImpl(uint32_t stage) { #pragma mark - #pragma mark MVKRenderingCommandEncoderState -#define getContent(state) getContent(_mtl##state, state) -#define setContent(state) setContent(_mtl##state, &mtl##state, state, isDynamic) +#define getMTLContent(state) getContent(_mtl##state, state) +#define setMTLContent(state) setContent(_mtl##state, &mtl##state, state, isDynamic) void MVKRenderingCommandEncoderState::setCullMode(VkCullModeFlags cullMode, bool isDynamic) { auto mtlCullMode = mvkMTLCullModeFromVkCullModeFlags(cullMode); - setContent(CullMode); + setMTLContent(CullMode); _cullBothFaces[isDynamic ? StateScope::Dynamic : StateScope::Static] = (cullMode == VK_CULL_MODE_FRONT_AND_BACK); } void MVKRenderingCommandEncoderState::setFrontFace(VkFrontFace frontFace, bool isDynamic) { auto mtlFrontFace = mvkMTLWindingFromVkFrontFace(frontFace); - setContent(FrontFace); -} - -void MVKRenderingCommandEncoderState::setPrimitiveTopology(VkPrimitiveTopology topology, bool isDynamic) { - auto mtlPrimitiveTopology = mvkMTLPrimitiveTypeFromVkPrimitiveTopology(topology); - setContent(PrimitiveTopology); -} - -MTLPrimitiveType MVKRenderingCommandEncoderState::getPrimitiveType() { - return getContent(PrimitiveTopology); -} - -bool MVKRenderingCommandEncoderState::isDrawingTriangles() { - switch (getPrimitiveType()) { - case MTLPrimitiveTypeTriangle: return true; - case MTLPrimitiveTypeTriangleStrip: return true; - default: return false; - } + setMTLContent(FrontFace); } void MVKRenderingCommandEncoderState::setPolygonMode(VkPolygonMode polygonMode, bool isDynamic) { auto mtlPolygonMode = mvkMTLTriangleFillModeFromVkPolygonMode(polygonMode); - setContent(PolygonMode); + setMTLContent(PolygonMode); } void MVKRenderingCommandEncoderState::setBlendConstants(float blendConstants[4], bool isDynamic) { MVKColor32 mtlBlendConstants; mvkCopy(mtlBlendConstants.float32, blendConstants, 4); - setContent(BlendConstants); + setMTLContent(BlendConstants); } void MVKRenderingCommandEncoderState::setDepthBias(const VkPipelineRasterizationStateCreateInfo& vkRasterInfo) { bool isDynamic = false; bool mtlDepthBiasEnable = static_cast(vkRasterInfo.depthBiasEnable); - setContent(DepthBiasEnable); + setMTLContent(DepthBiasEnable); MVKDepthBias mtlDepthBias = { .depthBiasConstantFactor = vkRasterInfo.depthBiasConstantFactor, .depthBiasSlopeFactor = vkRasterInfo.depthBiasSlopeFactor, .depthBiasClamp = vkRasterInfo.depthBiasClamp }; - setContent(DepthBias); + setMTLContent(DepthBias); } void MVKRenderingCommandEncoderState::setDepthBias(float depthBiasConstantFactor, @@ -375,18 +344,18 @@ void MVKRenderingCommandEncoderState::setDepthBias(float depthBiasConstantFactor .depthBiasSlopeFactor = depthBiasSlopeFactor, .depthBiasClamp = depthBiasClamp }; - setContent(DepthBias); + setMTLContent(DepthBias); } void MVKRenderingCommandEncoderState::setDepthBiasEnable(VkBool32 depthBiasEnable) { bool isDynamic = true; bool mtlDepthBiasEnable = static_cast(depthBiasEnable); - setContent(DepthBiasEnable); + setMTLContent(DepthBiasEnable); } void MVKRenderingCommandEncoderState::setDepthClipEnable(bool depthClip, bool isDynamic) { auto mtlDepthClipEnable = depthClip ? MTLDepthClipModeClip : MTLDepthClipModeClamp; - setContent(DepthClipEnable); + setMTLContent(DepthClipEnable); } void MVKRenderingCommandEncoderState::setStencilReferenceValues(const VkPipelineDepthStencilStateCreateInfo& vkDepthStencilInfo) { @@ -395,7 +364,7 @@ void MVKRenderingCommandEncoderState::setStencilReferenceValues(const VkPipeline .frontFaceValue = vkDepthStencilInfo.front.reference, .backFaceValue = vkDepthStencilInfo.back.reference }; - setContent(StencilReference); + setMTLContent(StencilReference); } void MVKRenderingCommandEncoderState::setStencilReferenceValues(VkStencilFaceFlags faceMask, uint32_t stencilReference) { @@ -403,7 +372,7 @@ void MVKRenderingCommandEncoderState::setStencilReferenceValues(VkStencilFaceFla MVKStencilReference mtlStencilReference = _mtlStencilReference[StateScope::Dynamic]; if (shouldUpdateFace(FRONT)) { mtlStencilReference.frontFaceValue = stencilReference; } if (shouldUpdateFace(BACK)) { mtlStencilReference.backFaceValue = stencilReference; } - setContent(StencilReference); + setMTLContent(StencilReference); } void MVKRenderingCommandEncoderState::setViewports(const MVKArrayRef viewports, @@ -418,7 +387,7 @@ void MVKRenderingCommandEncoderState::setViewports(const MVKArrayRef mtlViewports.viewports[firstViewport + vpIdx] = mvkMTLViewportFromVkViewport(viewports[vpIdx]); mtlViewports.viewportCount = max(mtlViewports.viewportCount, vpIdx + 1); } - setContent(Viewports); + setMTLContent(Viewports); } void MVKRenderingCommandEncoderState::setScissors(const MVKArrayRef scissors, @@ -433,17 +402,118 @@ void MVKRenderingCommandEncoderState::setScissors(const MVKArrayRef sc mtlScissors.scissors[firstScissor + sIdx] = mvkMTLScissorRectFromVkRect2D(scissors[sIdx]); mtlScissors.scissorCount = max(mtlScissors.scissorCount, sIdx + 1); } - setContent(Scissors); + setMTLContent(Scissors); } void MVKRenderingCommandEncoderState::setPrimitiveRestartEnable(VkBool32 primitiveRestartEnable, bool isDynamic) { bool mtlPrimitiveRestartEnable = static_cast(primitiveRestartEnable); - setContent(PrimitiveRestartEnable); + setMTLContent(PrimitiveRestartEnable); } void MVKRenderingCommandEncoderState::setRasterizerDiscardEnable(VkBool32 rasterizerDiscardEnable, bool isDynamic) { bool mtlRasterizerDiscardEnable = static_cast(rasterizerDiscardEnable); - setContent(RasterizerDiscardEnable); + setMTLContent(RasterizerDiscardEnable); +} + +// This value is retrieved, not encoded, so don't mark this encoder as dirty. +void MVKRenderingCommandEncoderState::setPrimitiveTopology(VkPrimitiveTopology topology, bool isDynamic) { + getContent(_mtlPrimitiveTopology, isDynamic) = mvkMTLPrimitiveTypeFromVkPrimitiveTopology(topology); +} + +MTLPrimitiveType MVKRenderingCommandEncoderState::getPrimitiveType() { + return getMTLContent(PrimitiveTopology); +} + +bool MVKRenderingCommandEncoderState::isDrawingTriangles() { + switch (getPrimitiveType()) { + case MTLPrimitiveTypeTriangle: return true; + case MTLPrimitiveTypeTriangleStrip: return true; + default: return false; + } +} + +// This value is retrieved, not encoded, so don't mark this encoder as dirty. +void MVKRenderingCommandEncoderState::setPatchControlPoints(uint32_t patchControlPoints, bool isDynamic) { + getContent(_mtlPatchControlPoints, isDynamic) = patchControlPoints; +} + +uint32_t MVKRenderingCommandEncoderState::getPatchControlPoints() { + return getMTLContent(PatchControlPoints); +} + +void MVKRenderingCommandEncoderState::setSampleLocationsEnable(VkBool32 sampleLocationsEnable, bool isDynamic) { + bool slEnbl = static_cast(sampleLocationsEnable); + auto& mtlSampLocEnbl = getContent(_mtlSampleLocationsEnable, isDynamic); + + if (slEnbl == mtlSampLocEnbl) { return; } + + mtlSampLocEnbl = slEnbl; + + // This value is retrieved, not encoded, so don't mark this encoder as dirty. + _dirtyStates.enable(SampleLocationsEnable); +} + +void MVKRenderingCommandEncoderState::setSampleLocations(MVKArrayRef sampleLocations, bool isDynamic) { + auto& mtlSampPosns = getContent(_mtlSampleLocations, isDynamic); + size_t slCnt = sampleLocations.size(); + + // When comparing new vs current, make use of fact that MTLSamplePosition & VkSampleLocationEXT have same memory footprint. + if (slCnt == mtlSampPosns.size() && + mvkAreEqual((MTLSamplePosition*)sampleLocations.data(), + mtlSampPosns.data(), slCnt)) { + return; + } + + mtlSampPosns.clear(); + for (uint32_t slIdx = 0; slIdx < slCnt; slIdx++) { + auto& sl = sampleLocations[slIdx]; + mtlSampPosns.push_back(MTLSamplePositionMake(mvkClamp(sl.x, kMVKMinSampleLocationCoordinate, kMVKMaxSampleLocationCoordinate), + mvkClamp(sl.y, kMVKMinSampleLocationCoordinate, kMVKMaxSampleLocationCoordinate))); + } + + // This value is retrieved, not encoded, so don't mark this encoder as dirty. + _dirtyStates.enable(SampleLocations); +} + +MVKArrayRef MVKRenderingCommandEncoderState::getSamplePositions() { + return getMTLContent(SampleLocationsEnable) ? getMTLContent(SampleLocations).contents() : MVKArrayRef(); +} + +// Return whether state is dirty, and mark it not dirty +bool MVKRenderingCommandEncoderState::isDirty(MVKRenderStateType state) { + bool rslt = _dirtyStates.isEnabled(state); + _dirtyStates.disable(state); + return rslt; +} + +// Don't force sample location & sample location enable to become dirty if they weren't already, because +// this may cause needsMetalRenderPassRestart() to trigger an unnecessary Metal renderpass restart. +void MVKRenderingCommandEncoderState::markDirty() { + MVKCommandEncoderState::markDirty(); + + bool wasSLDirty = _dirtyStates.isEnabled(SampleLocations); + bool wasSLEnblDirty = _dirtyStates.isEnabled(SampleLocationsEnable); + + _dirtyStates.enableAll(); + + _dirtyStates.set(SampleLocations, wasSLDirty); + _dirtyStates.set(SampleLocationsEnable, wasSLEnblDirty); +} + +// Don't call parent beginMetalRenderPass() because it +// will call local markDirty() which is too aggressive. +void MVKRenderingCommandEncoderState::beginMetalRenderPass() { + if (_isModified) { + _dirtyStates = _modifiedStates; + MVKCommandEncoderState::markDirty(); + } +} + +// Don't use || on isDirty calls, to ensure they both get called, so that the dirty flag of each will be cleared. +bool MVKRenderingCommandEncoderState::needsMetalRenderPassRestart() { + bool isSLDirty = isDirty(SampleLocations); + bool isSLEnblDirty = isDirty(SampleLocationsEnable); + return isSLDirty || isSLEnblDirty; } #pragma mark Encoding @@ -453,15 +523,16 @@ void MVKRenderingCommandEncoderState::encodeImpl(uint32_t stage) { auto& rendEnc = _cmdEncoder->_mtlRenderEncoder; - if (isDirty(CullMode)) { [rendEnc setCullMode: getContent(CullMode)]; } - if (isDirty(FrontFace)) { [rendEnc setFrontFacingWinding: getContent(FrontFace)]; } + if (isDirty(PolygonMode)) { [rendEnc setTriangleFillMode: getMTLContent(PolygonMode)]; } + if (isDirty(CullMode)) { [rendEnc setCullMode: getMTLContent(CullMode)]; } + if (isDirty(FrontFace)) { [rendEnc setFrontFacingWinding: getMTLContent(FrontFace)]; } if (isDirty(BlendConstants)) { - auto& bcFlt = getContent(BlendConstants).float32; + auto& bcFlt = getMTLContent(BlendConstants).float32; [rendEnc setBlendColorRed: bcFlt[0] green: bcFlt[1] blue: bcFlt[2] alpha: bcFlt[3]]; } if (isDirty(DepthBiasEnable) || isDirty(DepthBias)) { - if (getContent(DepthBiasEnable)) { - auto& db = getContent(DepthBias); + if (getMTLContent(DepthBiasEnable)) { + auto& db = getMTLContent(DepthBias); [rendEnc setDepthBias: db.depthBiasConstantFactor slopeScale: db.depthBiasSlopeFactor clamp: db.depthBiasClamp]; @@ -470,11 +541,11 @@ void MVKRenderingCommandEncoderState::encodeImpl(uint32_t stage) { } } if (isDirty(DepthClipEnable) && _cmdEncoder->_pDeviceFeatures->depthClamp) { - [rendEnc setDepthClipMode: getContent(DepthClipEnable)]; + [rendEnc setDepthClipMode: getMTLContent(DepthClipEnable)]; } if (isDirty(StencilReference)) { - auto& sr = getContent(StencilReference); + auto& sr = getMTLContent(StencilReference); [rendEnc setStencilFrontReferenceValue: sr.frontFaceValue backReferenceValue: sr.backFaceValue]; } @@ -484,13 +555,13 @@ void MVKRenderingCommandEncoderState::encodeImpl(uint32_t stage) { // to use primitive restart at all, and is just setting this as a "just-in-case", // and forcing an error here would be unexpected to the app (including CTS). auto mtlPrimType = getPrimitiveType(); - if (isDirty(PrimitiveRestartEnable) && !getContent(PrimitiveRestartEnable) && + if (isDirty(PrimitiveRestartEnable) && !getMTLContent(PrimitiveRestartEnable) && (mtlPrimType == MTLPrimitiveTypeTriangleStrip || mtlPrimType == MTLPrimitiveTypeLineStrip)) { reportWarning(VK_ERROR_FEATURE_NOT_PRESENT, "Metal does not support disabling primitive restart."); } if (isDirty(Viewports)) { - auto& mtlViewports = getContent(Viewports); + auto& mtlViewports = getMTLContent(Viewports); if (_cmdEncoder->_pDeviceFeatures->multiViewport) { #if MVK_MACOS_OR_IOS [rendEnc setViewports: mtlViewports.viewports count: mtlViewports.viewportCount]; @@ -504,7 +575,7 @@ void MVKRenderingCommandEncoderState::encodeImpl(uint32_t stage) { // set to front-and-back, emulate this by using zeroed scissor rectangles. if (isDirty(Scissors)) { static MTLScissorRect zeroRect = {}; - auto mtlScissors = getContent(Scissors); + auto mtlScissors = getMTLContent(Scissors); bool shouldDiscard = ((_mtlRasterizerDiscardEnable[StateScope::Dynamic] && isDynamicState(RasterizerDiscardEnable)) || (isDrawingTriangles() && _cullBothFaces[StateScope::Dynamic] && isDynamicState(CullMode))); for (uint32_t sIdx = 0; sIdx < mtlScissors.scissorCount; sIdx++) { @@ -521,17 +592,8 @@ void MVKRenderingCommandEncoderState::encodeImpl(uint32_t stage) { } } -// Return whether state is dirty, and mark it not dirty -bool MVKRenderingCommandEncoderState::isDirty(MVKRenderStateType state) { - bool rslt = _dirtyStates.isEnabled(state); - _dirtyStates.disable(state); - return rslt; -} - -void MVKRenderingCommandEncoderState::beginMetalRenderPass() { - MVKCommandEncoderState::beginMetalRenderPass(); - _dirtyStates = _modifiedStates; -} +#undef getMTLContent +#undef setMTLContent #pragma mark - diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandTypePools.def b/MoltenVK/MoltenVK/Commands/MVKCommandTypePools.def index 65683f84..6703a0ba 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandTypePools.def +++ b/MoltenVK/MoltenVK/Commands/MVKCommandTypePools.def @@ -81,6 +81,7 @@ MVK_CMD_TYPE_POOL(EndRenderPass) MVK_CMD_TYPE_POOLS_FROM_3_THRESHOLDS(BeginRendering, 1, 2, 4) MVK_CMD_TYPE_POOL(EndRendering) MVK_CMD_TYPE_POOL(SetSampleLocations) +MVK_CMD_TYPE_POOL(SetSampleLocationsEnable) MVK_CMD_TYPE_POOLS_FROM_THRESHOLD(ExecuteCommands, 1) MVK_CMD_TYPE_POOLS_FROM_2_THRESHOLDS(BindDescriptorSetsStatic, 1, 4) MVK_CMD_TYPE_POOLS_FROM_THRESHOLD(BindDescriptorSetsDynamic, 4) @@ -91,6 +92,7 @@ MVK_CMD_TYPE_POOL(SetDepthBias) MVK_CMD_TYPE_POOL(SetDepthBiasEnable) MVK_CMD_TYPE_POOL(SetDepthTestEnable) MVK_CMD_TYPE_POOL(SetDepthWriteEnable) +MVK_CMD_TYPE_POOL(SetDepthClipEnable) MVK_CMD_TYPE_POOL(SetDepthCompareOp) MVK_CMD_TYPE_POOL(SetStencilTestEnable) MVK_CMD_TYPE_POOL(SetStencilOp) @@ -100,8 +102,9 @@ MVK_CMD_TYPE_POOL(SetStencilReference) MVK_CMD_TYPE_POOL(SetCullMode) MVK_CMD_TYPE_POOL(SetFrontFace) MVK_CMD_TYPE_POOL(SetPrimitiveTopology) -MVK_CMD_TYPE_POOL(SetPatchControlPoints) MVK_CMD_TYPE_POOL(SetPrimitiveRestartEnable) +MVK_CMD_TYPE_POOL(SetPolygonMode) +MVK_CMD_TYPE_POOL(SetPatchControlPoints) MVK_CMD_TYPE_POOL(SetRasterizerDiscardEnable) MVK_CMD_TYPE_POOLS_FROM_2_THRESHOLDS(BindVertexBuffers, 1, 2) MVK_CMD_TYPE_POOL(BindIndexBuffer) diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h index 125bf9aa..be9c08fb 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h @@ -73,16 +73,22 @@ class MVKPrivateDataSlot; /** The buffer index to use for vertex content. */ -const static uint32_t kMVKVertexContentBufferIndex = 0; +static constexpr uint32_t kMVKVertexContentBufferIndex = 0; // Parameters to define the sizing of inline collections -const static uint32_t kMVKQueueFamilyCount = 4; -const static uint32_t kMVKQueueCountPerQueueFamily = 1; // Must be 1. See comments in MVKPhysicalDevice::getQueueFamilies() -const static uint32_t kMVKMinSwapchainImageCount = 2; -const static uint32_t kMVKMaxSwapchainImageCount = 3; -const static uint32_t kMVKMaxColorAttachmentCount = 8; -const static uint32_t kMVKMaxViewportScissorCount = 16; -const static uint32_t kMVKMaxDescriptorSetCount = SPIRV_CROSS_NAMESPACE::kMaxArgumentBuffers; +static constexpr uint32_t kMVKQueueFamilyCount = 4; +static constexpr uint32_t kMVKQueueCountPerQueueFamily = 1; // Must be 1. See comments in MVKPhysicalDevice::getQueueFamilies() +static constexpr uint32_t kMVKMinSwapchainImageCount = 2; +static constexpr uint32_t kMVKMaxSwapchainImageCount = 3; +static constexpr uint32_t kMVKMaxColorAttachmentCount = 8; +static constexpr uint32_t kMVKMaxViewportScissorCount = 16; +static constexpr uint32_t kMVKMaxDescriptorSetCount = SPIRV_CROSS_NAMESPACE::kMaxArgumentBuffers; +static constexpr uint32_t kMVKMaxSampleCount = 8; +static constexpr uint32_t kMVKSampleLocationCoordinateGridSize = 16; +static constexpr float kMVKMinSampleLocationCoordinate = 0.0; +static constexpr float kMVKMaxSampleLocationCoordinate = (float)(kMVKSampleLocationCoordinateGridSize - 1) / (float)kMVKSampleLocationCoordinateGridSize; +static constexpr VkExtent2D kMVKSampleLocationPixelGridSize = { 1, 1 }; +static constexpr VkExtent2D kMVKSampleLocationPixelGridSizeNotSupported = { 0, 0 }; #if !MVK_XCODE_12 typedef NSUInteger MTLTimestamp; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm index 99ac0ef3..0dd865f2 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm @@ -75,9 +75,6 @@ static const uint32_t kAMDRadeonRX5500DeviceId = 0x7340; static const uint32_t kAMDRadeonRX6800DeviceId = 0x73bf; static const uint32_t kAMDRadeonRX6700DeviceId = 0x73df; -static const VkExtent2D kMetalSamplePositionGridSize = { 1, 1 }; -static const VkExtent2D kMetalSamplePositionGridSizeNotSupported = { 0, 0 }; - static const uint32_t kMaxTimeDomains = 2; #pragma clang diagnostic pop @@ -399,6 +396,41 @@ void MVKPhysicalDevice::getFeatures(VkPhysicalDeviceFeatures2* features) { extDynState2->extendedDynamicState2PatchControlPoints = true; break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_3_FEATURES_EXT: { + auto* extDynState3 = (VkPhysicalDeviceExtendedDynamicState3FeaturesEXT*)next; + extDynState3->extendedDynamicState3TessellationDomainOrigin = false; + extDynState3->extendedDynamicState3DepthClampEnable = true; + extDynState3->extendedDynamicState3PolygonMode = true; + extDynState3->extendedDynamicState3RasterizationSamples = false; + extDynState3->extendedDynamicState3SampleMask = false; + extDynState3->extendedDynamicState3AlphaToCoverageEnable = false; + extDynState3->extendedDynamicState3AlphaToOneEnable = false; + extDynState3->extendedDynamicState3LogicOpEnable = false; + extDynState3->extendedDynamicState3ColorBlendEnable = false; + extDynState3->extendedDynamicState3ColorBlendEquation = false; + extDynState3->extendedDynamicState3ColorWriteMask = false; + extDynState3->extendedDynamicState3RasterizationStream = false; + extDynState3->extendedDynamicState3ConservativeRasterizationMode = false; + extDynState3->extendedDynamicState3ExtraPrimitiveOverestimationSize = false; + extDynState3->extendedDynamicState3DepthClipEnable = true; + extDynState3->extendedDynamicState3SampleLocationsEnable = true; + extDynState3->extendedDynamicState3ColorBlendAdvanced = false; + extDynState3->extendedDynamicState3ProvokingVertexMode = false; + extDynState3->extendedDynamicState3LineRasterizationMode = false; + extDynState3->extendedDynamicState3LineStippleEnable = false; + extDynState3->extendedDynamicState3DepthClipNegativeOneToOne = false; + extDynState3->extendedDynamicState3ViewportWScalingEnable = false; + extDynState3->extendedDynamicState3ViewportSwizzle = false; + extDynState3->extendedDynamicState3CoverageToColorEnable = false; + extDynState3->extendedDynamicState3CoverageToColorLocation = false; + extDynState3->extendedDynamicState3CoverageModulationMode = false; + extDynState3->extendedDynamicState3CoverageModulationTableEnable = false; + extDynState3->extendedDynamicState3CoverageModulationTable = false; + extDynState3->extendedDynamicState3CoverageReductionMode = false; + extDynState3->extendedDynamicState3RepresentativeFragmentTestEnable = false; + extDynState3->extendedDynamicState3ShadingRateImageEnable = false; + break; + } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_INTERLOCK_FEATURES_EXT: { auto* interlockFeatures = (VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT*)next; interlockFeatures->fragmentShaderSampleInterlock = _metalFeatures.rasterOrderGroups; @@ -747,11 +779,11 @@ void MVKPhysicalDevice::getProperties(VkPhysicalDeviceProperties2* properties) { case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT: { auto* sampLocnProps = (VkPhysicalDeviceSampleLocationsPropertiesEXT*)next; sampLocnProps->sampleLocationSampleCounts = _metalFeatures.supportedSampleCounts; - sampLocnProps->maxSampleLocationGridSize = kMetalSamplePositionGridSize; - sampLocnProps->sampleLocationCoordinateRange[0] = 0.0; - sampLocnProps->sampleLocationCoordinateRange[1] = (15.0 / 16.0); - sampLocnProps->sampleLocationSubPixelBits = 4; - sampLocnProps->variableSampleLocations = VK_FALSE; + sampLocnProps->maxSampleLocationGridSize = kMVKSampleLocationPixelGridSize; + sampLocnProps->sampleLocationCoordinateRange[0] = kMVKMinSampleLocationCoordinate; + sampLocnProps->sampleLocationCoordinateRange[1] = kMVKMaxSampleLocationCoordinate; + sampLocnProps->sampleLocationSubPixelBits = mvkPowerOfTwoExponent(kMVKSampleLocationCoordinateGridSize); + sampLocnProps->variableSampleLocations = true; break; } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: { @@ -860,8 +892,8 @@ void MVKPhysicalDevice::getMultisampleProperties(VkSampleCountFlagBits samples, VkMultisamplePropertiesEXT* pMultisampleProperties) { if (pMultisampleProperties) { pMultisampleProperties->maxSampleLocationGridSize = (mvkIsOnlyAnyFlagEnabled(samples, _metalFeatures.supportedSampleCounts) - ? kMetalSamplePositionGridSize - : kMetalSamplePositionGridSizeNotSupported); + ? kMVKSampleLocationPixelGridSize + : kMVKSampleLocationPixelGridSizeNotSupported); } } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceFeatureStructs.def b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceFeatureStructs.def index b9792833..d3856e8c 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceFeatureStructs.def +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceFeatureStructs.def @@ -66,6 +66,7 @@ MVK_DEVICE_FEATURE_EXTN(PortabilitySubset, PORTABILITY_SUBSET, MVK_DEVICE_FEATURE_EXTN(4444Formats, 4444_FORMATS, EXT, 2) MVK_DEVICE_FEATURE_EXTN(ExtendedDynamicState, EXTENDED_DYNAMIC_STATE, EXT, 1) MVK_DEVICE_FEATURE_EXTN(ExtendedDynamicState2, EXTENDED_DYNAMIC_STATE_2, EXT, 3) +MVK_DEVICE_FEATURE_EXTN(ExtendedDynamicState3, EXTENDED_DYNAMIC_STATE_3, EXT, 31) MVK_DEVICE_FEATURE_EXTN(FragmentShaderInterlock, FRAGMENT_SHADER_INTERLOCK, EXT, 3) MVK_DEVICE_FEATURE_EXTN(PipelineCreationCacheControl, PIPELINE_CREATION_CACHE_CONTROL, EXT, 1) MVK_DEVICE_FEATURE_EXTN(Robustness2, ROBUSTNESS_2, EXT, 3) diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm index 45068c19..ea815201 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm @@ -700,7 +700,27 @@ void MVKInstance::initProcAddrs() { ADD_DVC_EXT_ENTRY_POINT(vkGetPastPresentationTimingGOOGLE, GOOGLE_DISPLAY_TIMING); ADD_DVC_EXT_ENTRY_POINT(vkCmdSetLogicOpEXT, EXT_EXTENDED_DYNAMIC_STATE_2); ADD_DVC_EXT_ENTRY_POINT(vkCmdSetPatchControlPointsEXT, EXT_EXTENDED_DYNAMIC_STATE_2); + ADD_DVC_EXT_ENTRY_POINT(vkCmdSetAlphaToCoverageEnableEXT, EXT_EXTENDED_DYNAMIC_STATE_3); + ADD_DVC_EXT_ENTRY_POINT(vkCmdSetAlphaToOneEnableEXT, EXT_EXTENDED_DYNAMIC_STATE_3); + ADD_DVC_EXT_ENTRY_POINT(vkCmdSetColorBlendAdvancedEXT, EXT_EXTENDED_DYNAMIC_STATE_3); + ADD_DVC_EXT_ENTRY_POINT(vkCmdSetColorBlendEnableEXT, EXT_EXTENDED_DYNAMIC_STATE_3); + ADD_DVC_EXT_ENTRY_POINT(vkCmdSetColorBlendEquationEXT, EXT_EXTENDED_DYNAMIC_STATE_3); + ADD_DVC_EXT_ENTRY_POINT(vkCmdSetColorWriteMaskEXT, EXT_EXTENDED_DYNAMIC_STATE_3); + ADD_DVC_EXT_ENTRY_POINT(vkCmdSetConservativeRasterizationModeEXT, EXT_EXTENDED_DYNAMIC_STATE_3); + ADD_DVC_EXT_ENTRY_POINT(vkCmdSetDepthClampEnableEXT, EXT_EXTENDED_DYNAMIC_STATE_3); + ADD_DVC_EXT_ENTRY_POINT(vkCmdSetDepthClipEnableEXT, EXT_EXTENDED_DYNAMIC_STATE_3); + ADD_DVC_EXT_ENTRY_POINT(vkCmdSetDepthClipNegativeOneToOneEXT, EXT_EXTENDED_DYNAMIC_STATE_3); + ADD_DVC_EXT_ENTRY_POINT(vkCmdSetExtraPrimitiveOverestimationSizeEXT, EXT_EXTENDED_DYNAMIC_STATE_3); + ADD_DVC_EXT_ENTRY_POINT(vkCmdSetLineRasterizationModeEXT, EXT_EXTENDED_DYNAMIC_STATE_3); + ADD_DVC_EXT_ENTRY_POINT(vkCmdSetLineStippleEnableEXT, EXT_EXTENDED_DYNAMIC_STATE_3); ADD_DVC_EXT_ENTRY_POINT(vkCmdSetLogicOpEnableEXT, EXT_EXTENDED_DYNAMIC_STATE_3); + ADD_DVC_EXT_ENTRY_POINT(vkCmdSetPolygonModeEXT, EXT_EXTENDED_DYNAMIC_STATE_3); + ADD_DVC_EXT_ENTRY_POINT(vkCmdSetProvokingVertexModeEXT, EXT_EXTENDED_DYNAMIC_STATE_3); + ADD_DVC_EXT_ENTRY_POINT(vkCmdSetRasterizationSamplesEXT, EXT_EXTENDED_DYNAMIC_STATE_3); + ADD_DVC_EXT_ENTRY_POINT(vkCmdSetRasterizationStreamEXT, EXT_EXTENDED_DYNAMIC_STATE_3); + ADD_DVC_EXT_ENTRY_POINT(vkCmdSetSampleLocationsEnableEXT, EXT_EXTENDED_DYNAMIC_STATE_3); + ADD_DVC_EXT_ENTRY_POINT(vkCmdSetSampleMaskEXT, EXT_EXTENDED_DYNAMIC_STATE_3); + ADD_DVC_EXT_ENTRY_POINT(vkCmdSetTessellationDomainOriginEXT, EXT_EXTENDED_DYNAMIC_STATE_3); } void MVKInstance::logVersions() { diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h index 062b646b..6827b5b9 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h @@ -132,7 +132,10 @@ public: /** Returns the debug report object type of this object. */ VkDebugReportObjectTypeEXT getVkDebugReportObjectType() override { return VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_EXT; } - /** Binds this pipeline to the specified command encoder. */ + /** Called when the pipeline has been bound to the command encoder. */ + virtual void wasBound(MVKCommandEncoder* cmdEncoder) {} + + /** Encodes this pipeline to the command encoder. */ virtual void encode(MVKCommandEncoder* cmdEncoder, uint32_t stage = 0) = 0; /** Binds the push constants to a command encoder. */ @@ -241,6 +244,7 @@ enum MVKRenderStateType { PrimitiveTopology, RasterizerDiscardEnable, SampleLocations, + SampleLocationsEnable, Scissors, StencilCompareMask, StencilOp, @@ -249,17 +253,22 @@ enum MVKRenderStateType { StencilWriteMask, VertexStride, Viewports, + MVKRenderStateTypeCount }; /** Boolean tracking of rendering state. */ struct MVKRenderStateFlags { void enable(MVKRenderStateType rs) { if (rs) { mvkEnableFlags(_stateFlags, getFlagMask(rs)); } } void disable(MVKRenderStateType rs) { if (rs) { mvkDisableFlags(_stateFlags, getFlagMask(rs)); } } + void set(MVKRenderStateType rs, bool val) { val? enable(rs) : disable(rs); } + void enableAll() { mvkEnableAllFlags(_stateFlags); } + void disableAll() { mvkDisableAllFlags(_stateFlags); } bool isEnabled(MVKRenderStateType rs) { return mvkIsAnyFlagEnabled(_stateFlags, getFlagMask(rs)); } protected: uint32_t getFlagMask(MVKRenderStateType rs) { return rs ? (1u << (rs - 1u)) : 0; } // Ignore Unknown type uint32_t _stateFlags = 0; + static_assert(sizeof(_stateFlags) * 8 >= MVKRenderStateTypeCount - 1, "_stateFlags is too small to support the number of flags in MVKRenderStateType."); // Ignore Unknown type }; /** Represents an Vulkan graphics pipeline. */ @@ -270,7 +279,8 @@ public: /** Returns the number and order of stages in this pipeline. Draws commands must encode this pipeline once per stage. */ void getStages(MVKPiplineStages& stages); - /** Binds this pipeline to the specified command encoder. */ + virtual void wasBound(MVKCommandEncoder* cmdEncoder) override; + void encode(MVKCommandEncoder* cmdEncoder, uint32_t stage = 0) override; /** Returns whether this pipeline permits dynamic setting of the state. */ @@ -312,9 +322,6 @@ public: /** Returns true if the tessellation control shader needs a buffer to store its per-patch output. */ bool needsTessCtlPatchOutputBuffer() { return _needsTessCtlPatchOutputBuffer; } - /** Returns whether this pipeline has custom sample positions enabled. */ - bool isUsingCustomSamplePositions() { return _isUsingCustomSamplePositions; } - /** Returns the Vulkan primitive topology. */ VkPrimitiveTopology getVkPrimitiveTopology() { return _vkPrimitiveTopology; } @@ -327,9 +334,6 @@ public: */ bool isValidVertexBufferIndex(MVKShaderStage stage, uint32_t mtlBufferIndex); - /** Returns the custom samples used by this pipeline. */ - MVKArrayRef getCustomSamplePositions() { return _customSamplePositions.contents(); } - /** Returns the Metal vertex buffer index to use for the specified vertex attribute binding number. */ uint32_t getMetalBufferIndexForVertexAttributeBinding(uint32_t binding) { return _device->getMetalBufferIndexForVertexAttributeBinding(binding); } @@ -354,8 +358,6 @@ public: ~MVKGraphicsPipeline() override; protected: - friend class MVKGraphicsPipelineCommandEncoderState; - typedef MVKSmallVector SPIRVShaderOutputs; typedef MVKSmallVector SPIRVShaderInputs; @@ -364,7 +366,7 @@ protected: bool compileTessVertexStageState(MTLComputePipelineDescriptor* vtxPLDesc, MVKMTLFunction* pVtxFunctions, VkPipelineCreationFeedback* pVertexFB); bool compileTessControlStageState(MTLComputePipelineDescriptor* tcPLDesc, VkPipelineCreationFeedback* pTessCtlFB); void initDynamicState(const VkGraphicsPipelineCreateInfo* pCreateInfo); - void initCustomSamplePositions(const VkGraphicsPipelineCreateInfo* pCreateInfo); + void initSampleLocations(const VkGraphicsPipelineCreateInfo* pCreateInfo); void initMTLRenderPipelineState(const VkGraphicsPipelineCreateInfo* pCreateInfo, const SPIRVTessReflectionData& reflectData, VkPipelineCreationFeedback* pPipelineFB, const VkPipelineShaderStageCreateInfo* pVertexSS, VkPipelineCreationFeedback* pVertexFB, const VkPipelineShaderStageCreateInfo* pTessCtlSS, VkPipelineCreationFeedback* pTessCtlFB, const VkPipelineShaderStageCreateInfo* pTessEvalSS, VkPipelineCreationFeedback* pTessEvalFB, const VkPipelineShaderStageCreateInfo* pFragmentSS, VkPipelineCreationFeedback* pFragmentFB); void initShaderConversionConfig(SPIRVToMSLConversionConfiguration& shaderConfig, const VkGraphicsPipelineCreateInfo* pCreateInfo, const SPIRVTessReflectionData& reflectData); void initReservedVertexAttributeBufferCount(const VkGraphicsPipelineCreateInfo* pCreateInfo); @@ -404,7 +406,7 @@ protected: MVKSmallVector _viewports; MVKSmallVector _scissors; - MVKSmallVector _customSamplePositions; + MVKSmallVector _sampleLocations; MVKSmallVector _translatedVertexBindings; MVKSmallVector _zeroDivisorVertexBindings; MVKSmallVector _mtlArgumentEncoders; @@ -449,7 +451,7 @@ protected: bool _needsFragmentViewRangeBuffer = false; bool _isRasterizing = false; bool _isRasterizingColor = false; - bool _isUsingCustomSamplePositions = false; + bool _sampleLocationsEnable = false; }; @@ -461,7 +463,6 @@ class MVKComputePipeline : public MVKPipeline { public: - /** Binds this pipeline to the specified command encoder. */ void encode(MVKCommandEncoder* cmdEncoder, uint32_t = 0) override; /** Returns if this pipeline allows non-zero dispatch bases in vkCmdDispatchBase(). */ diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm index 7cb4a3ce..0b266340 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm @@ -229,6 +229,13 @@ MVKPipeline::MVKPipeline(MVKDevice* device, MVKPipelineCache* pipelineCache, MVK #pragma mark - #pragma mark MVKGraphicsPipeline +// Set retrieve-only rendering state when pipeline is bound, as it's too late at draw command. +void MVKGraphicsPipeline::wasBound(MVKCommandEncoder* cmdEncoder) { + cmdEncoder->_renderingState.setPatchControlPoints(_tessInfo.patchControlPoints, false); + cmdEncoder->_renderingState.setSampleLocations(_sampleLocations.contents(), false); + cmdEncoder->_renderingState.setSampleLocationsEnable(_sampleLocationsEnable, false); +} + void MVKGraphicsPipeline::getStages(MVKPiplineStages& stages) { if (isTessellationPipeline()) { stages.push_back(kMVKGraphicsStageVertex); @@ -514,7 +521,7 @@ MVKGraphicsPipeline::MVKGraphicsPipeline(MVKDevice* device, _hasRasterInfo = mvkSetOrClear(&_rasterInfo, pCreateInfo->pRasterizationState); // Must run after _isRasterizing and _dynamicState are populated - initCustomSamplePositions(pCreateInfo); + initSampleLocations(pCreateInfo); // Depth stencil content - clearing will disable depth and stencil testing // Must ignore allowed bad pDepthStencilState pointer if rasterization disabled or no depth or stencil attachment format @@ -563,6 +570,8 @@ static MVKRenderStateType getRenderStateType(VkDynamicState vkDynamicState) { case VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE: return PrimitiveRestartEnable; case VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY: return PrimitiveTopology; case VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE: return RasterizerDiscardEnable; + case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT: return SampleLocations; + case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_ENABLE_EXT: return SampleLocationsEnable; case VK_DYNAMIC_STATE_SCISSOR: return Scissors; case VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT: return Scissors; case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK: return StencilCompareMask; @@ -626,7 +635,7 @@ id MVKGraphicsPipeline::getOrCompilePipeline(MTLCompute } // Must run after _isRasterizing and _dynamicState are populated -void MVKGraphicsPipeline::initCustomSamplePositions(const VkGraphicsPipelineCreateInfo* pCreateInfo) { +void MVKGraphicsPipeline::initSampleLocations(const VkGraphicsPipelineCreateInfo* pCreateInfo) { // Must ignore allowed bad pMultisampleState pointer if rasterization disabled if ( !(_isRasterizing && pCreateInfo->pMultisampleState) ) { return; } @@ -635,12 +644,9 @@ void MVKGraphicsPipeline::initCustomSamplePositions(const VkGraphicsPipelineCrea switch (next->sType) { case VK_STRUCTURE_TYPE_PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT: { auto* pSampLocnsCreateInfo = (VkPipelineSampleLocationsStateCreateInfoEXT*)next; - _isUsingCustomSamplePositions = pSampLocnsCreateInfo->sampleLocationsEnable; - if (_isUsingCustomSamplePositions && !isDynamicState(SampleLocations)) { - for (uint32_t slIdx = 0; slIdx < pSampLocnsCreateInfo->sampleLocationsInfo.sampleLocationsCount; slIdx++) { - auto& sl = pSampLocnsCreateInfo->sampleLocationsInfo.pSampleLocations[slIdx]; - _customSamplePositions.push_back(MTLSamplePositionMake(sl.x, sl.y)); - } + _sampleLocationsEnable = pSampLocnsCreateInfo->sampleLocationsEnable; + for (uint32_t slIdx = 0; slIdx < pSampLocnsCreateInfo->sampleLocationsInfo.sampleLocationsCount; slIdx++) { + _sampleLocations.push_back(pSampLocnsCreateInfo->sampleLocationsInfo.pSampleLocations[slIdx]); } break; } @@ -1635,7 +1641,7 @@ void MVKGraphicsPipeline::addFragmentOutputToPipeline(MTLRenderPipelineDescripto // Multisampling - must ignore allowed bad pMultisampleState pointer if rasterization disabled if (_isRasterizing && pCreateInfo->pMultisampleState) { - plDesc.sampleCount = mvkSampleCountFromVkSampleCountFlagBits(pCreateInfo->pMultisampleState->rasterizationSamples); + plDesc.rasterSampleCount = mvkSampleCountFromVkSampleCountFlagBits(pCreateInfo->pMultisampleState->rasterizationSamples); plDesc.alphaToCoverageEnabled = pCreateInfo->pMultisampleState->alphaToCoverageEnable; plDesc.alphaToOneEnabled = pCreateInfo->pMultisampleState->alphaToOneEnable; @@ -1926,10 +1932,14 @@ void MVKGraphicsPipeline::addPrevStageOutputToShaderConversionConfig(SPIRVToMSLC } } -// We render points if either the topology or polygon fill mode dictate it +// We render points if either the static topology or static polygon fill mode dictate it bool MVKGraphicsPipeline::isRenderingPoints(const VkGraphicsPipelineCreateInfo* pCreateInfo) { - return ((pCreateInfo->pInputAssemblyState && (pCreateInfo->pInputAssemblyState->topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST)) || - (pCreateInfo->pRasterizationState && (pCreateInfo->pRasterizationState->polygonMode == VK_POLYGON_MODE_POINT))); + return ((pCreateInfo->pInputAssemblyState && + (pCreateInfo->pInputAssemblyState->topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST) && + !isDynamicState(PrimitiveTopology)) || + (pCreateInfo->pRasterizationState && + (pCreateInfo->pRasterizationState->polygonMode == VK_POLYGON_MODE_POINT) && + !isDynamicState(PolygonMode))); } // We disable rasterization if either static rasterizerDiscard is enabled or the static cull mode dictates it. diff --git a/MoltenVK/MoltenVK/Layers/MVKExtensions.def b/MoltenVK/MoltenVK/Layers/MVKExtensions.def index f63ecf98..d8c222bd 100644 --- a/MoltenVK/MoltenVK/Layers/MVKExtensions.def +++ b/MoltenVK/MoltenVK/Layers/MVKExtensions.def @@ -105,6 +105,7 @@ MVK_EXTENSION(EXT_debug_utils, EXT_DEBUG_UTILS, MVK_EXTENSION(EXT_descriptor_indexing, EXT_DESCRIPTOR_INDEXING, DEVICE, 10.11, 8.0, 1.0) MVK_EXTENSION(EXT_extended_dynamic_state, EXT_EXTENDED_DYNAMIC_STATE, DEVICE, 10.11, 8.0, 1.0) MVK_EXTENSION(EXT_extended_dynamic_state2, EXT_EXTENDED_DYNAMIC_STATE_2, DEVICE, 10.11, 8.0, 1.0) +MVK_EXTENSION(EXT_extended_dynamic_state3, EXT_EXTENDED_DYNAMIC_STATE_3, DEVICE, 10.11, 8.0, 1.0) MVK_EXTENSION(EXT_external_memory_host, EXT_EXTERNAL_MEMORY_HOST, DEVICE, 10.11, 8.0, 1.0) MVK_EXTENSION(EXT_fragment_shader_interlock, EXT_FRAGMENT_SHADER_INTERLOCK, DEVICE, 10.13, 11.0, 1.0) MVK_EXTENSION(EXT_hdr_metadata, EXT_HDR_METADATA, DEVICE, 10.15, MVK_NA, MVK_NA) diff --git a/MoltenVK/MoltenVK/Utility/MVKFoundation.cpp b/MoltenVK/MoltenVK/Utility/MVKFoundation.cpp index 85ad7d5b..d00fb397 100644 --- a/MoltenVK/MoltenVK/Utility/MVKFoundation.cpp +++ b/MoltenVK/MoltenVK/Utility/MVKFoundation.cpp @@ -33,7 +33,7 @@ const char* mvkVkCommandName(MVKCommandUse cmdUse) { case kMVKCommandUseBeginRendering: return "vkCmdBeginRendering"; case kMVKCommandUseBeginRenderPass: return "vkCmdBeginRenderPass"; case kMVKCommandUseNextSubpass: return "vkCmdNextSubpass"; - case kMVKCommandUseRestartSubpass: return "Metal renderpass restart on barrier"; + case kMVKCommandUseRestartSubpass: return "Metal renderpass restart"; case kMVKCommandUsePipelineBarrier: return "vkCmdPipelineBarrier"; case kMVKCommandUseBlitImage: return "vkCmdBlitImage"; case kMVKCommandUseCopyImage: return "vkCmdCopyImage"; diff --git a/MoltenVK/MoltenVK/Utility/MVKFoundation.h b/MoltenVK/MoltenVK/Utility/MVKFoundation.h index f5820721..d3aa660a 100644 --- a/MoltenVK/MoltenVK/Utility/MVKFoundation.h +++ b/MoltenVK/MoltenVK/Utility/MVKFoundation.h @@ -76,7 +76,7 @@ typedef enum : uint8_t { kMVKCommandUseBeginRendering, /**< vkCmdBeginRendering. */ kMVKCommandUseBeginRenderPass, /**< vkCmdBeginRenderPass. */ kMVKCommandUseNextSubpass, /**< vkCmdNextSubpass. */ - kMVKCommandUseRestartSubpass, /**< Restart a subpass because of explicit or implicit barrier. */ + kMVKCommandUseRestartSubpass, /**< Create a new Metal renderpass due to Metal requirements. */ kMVKCommandUsePipelineBarrier, /**< vkCmdPipelineBarrier. */ kMVKCommandUseBlitImage, /**< vkCmdBlitImage. */ kMVKCommandUseCopyImage, /**< vkCmdCopyImage. */ @@ -102,8 +102,8 @@ typedef enum : uint8_t { /** Represents a given stage of a graphics pipeline. */ enum MVKGraphicsStage { - kMVKGraphicsStageVertex = 0, /**< The vertex shader stage. */ - kMVKGraphicsStageTessControl, /**< The tessellation control shader stage. */ + kMVKGraphicsStageVertex = 0, /**< The tessellation vertex compute shader stage. */ + kMVKGraphicsStageTessControl, /**< The tessellation control compute shader stage. */ kMVKGraphicsStageRasterization /**< The rest of the pipeline. */ }; diff --git a/MoltenVK/MoltenVK/Vulkan/vulkan.mm b/MoltenVK/MoltenVK/Vulkan/vulkan.mm index d1c6fbbe..c08c5b3a 100644 --- a/MoltenVK/MoltenVK/Vulkan/vulkan.mm +++ b/MoltenVK/MoltenVK/Vulkan/vulkan.mm @@ -3652,6 +3652,120 @@ MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdSetRasterizerDiscardEnable, EXT); #pragma mark - #pragma mark VK_EXT_extended_dynamic_state3 +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetAlphaToCoverageEnableEXT( + VkCommandBuffer commandBuffer, + VkBool32 alphaToCoverageEnable) { + + MVKTraceVulkanCallStart(); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetAlphaToOneEnableEXT( + VkCommandBuffer commandBuffer, + VkBool32 alphaToOneEnable) { + + MVKTraceVulkanCallStart(); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetColorBlendAdvancedEXT( + VkCommandBuffer commandBuffer, + uint32_t firstAttachment, + uint32_t attachmentCount, + const VkColorBlendAdvancedEXT* pColorBlendAdvanced) { + + MVKTraceVulkanCallStart(); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetColorBlendEnableEXT( + VkCommandBuffer commandBuffer, + uint32_t firstAttachment, + uint32_t attachmentCount, + const VkBool32* pColorBlendEnables) { + + MVKTraceVulkanCallStart(); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetColorBlendEquationEXT( + VkCommandBuffer commandBuffer, + uint32_t firstAttachment, + uint32_t attachmentCount, + const VkColorBlendEquationEXT* pColorBlendEquations) { + + MVKTraceVulkanCallStart(); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetColorWriteMaskEXT( + VkCommandBuffer commandBuffer, + uint32_t firstAttachment, + uint32_t attachmentCount, + const VkColorComponentFlags* pColorWriteMasks) { + + MVKTraceVulkanCallStart(); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetConservativeRasterizationModeEXT( + VkCommandBuffer commandBuffer, + VkConservativeRasterizationModeEXT conservativeRasterizationMode) { + + MVKTraceVulkanCallStart(); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetDepthClampEnableEXT( + VkCommandBuffer commandBuffer, + VkBool32 depthClampEnable) { + + MVKTraceVulkanCallStart(); + MVKAddCmd(SetDepthClipEnable, commandBuffer, !depthClampEnable); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetDepthClipEnableEXT( + VkCommandBuffer commandBuffer, + VkBool32 depthClipEnable) { + + MVKTraceVulkanCallStart(); + MVKAddCmd(SetDepthClipEnable, commandBuffer, depthClipEnable); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetDepthClipNegativeOneToOneEXT( + VkCommandBuffer commandBuffer, + VkBool32 negativeOneToOne) { + + MVKTraceVulkanCallStart(); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetExtraPrimitiveOverestimationSizeEXT( + VkCommandBuffer commandBuffer, + float extraPrimitiveOverestimationSize) { + + MVKTraceVulkanCallStart(); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetLineRasterizationModeEXT( + VkCommandBuffer commandBuffer, + VkLineRasterizationModeEXT lineRasterizationMode) { + + MVKTraceVulkanCallStart(); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetLineStippleEnableEXT( + VkCommandBuffer commandBuffer, + VkBool32 stippledLineEnable) { + + MVKTraceVulkanCallStart(); + MVKTraceVulkanCallEnd(); +} + MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetLogicOpEnableEXT( VkCommandBuffer commandBuffer, VkBool32 logicOpEnable) { @@ -3660,6 +3774,65 @@ MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetLogicOpEnableEXT( MVKTraceVulkanCallEnd(); } +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetPolygonModeEXT( + VkCommandBuffer commandBuffer, + VkPolygonMode polygonMode) { + + MVKTraceVulkanCallStart(); + MVKAddCmd(SetPolygonMode, commandBuffer, polygonMode); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetProvokingVertexModeEXT( + VkCommandBuffer commandBuffer, + VkProvokingVertexModeEXT provokingVertexMode) { + + MVKTraceVulkanCallStart(); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetRasterizationSamplesEXT( + VkCommandBuffer commandBuffer, + VkSampleCountFlagBits rasterizationSamples) { + + MVKTraceVulkanCallStart(); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetRasterizationStreamEXT( + VkCommandBuffer commandBuffer, + uint32_t rasterizationStream) { + + MVKTraceVulkanCallStart(); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetSampleLocationsEnableEXT( + VkCommandBuffer commandBuffer, + VkBool32 sampleLocationsEnable) { + + MVKTraceVulkanCallStart(); + MVKAddCmd(SetSampleLocationsEnable, commandBuffer, sampleLocationsEnable); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetSampleMaskEXT( + VkCommandBuffer commandBuffer, + VkSampleCountFlagBits samples, + const VkSampleMask* pSampleMask) { + + MVKTraceVulkanCallStart(); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetTessellationDomainOriginEXT( + VkCommandBuffer commandBuffer, + VkTessellationDomainOrigin domainOrigin) { + + MVKTraceVulkanCallStart(); + MVKTraceVulkanCallEnd(); +} + #pragma mark - #pragma mark VK_EXT_external_memory_host extension From 6b9371ff059207d8684712a4f958e2e408541d82 Mon Sep 17 00:00:00 2001 From: Chip Davis Date: Sun, 19 Nov 2023 22:51:08 -0800 Subject: [PATCH 32/41] MVKBuffer, MVKImage: Also ignore no external handle types specified. Previously, we were erroneously failing create calls that specified zero external handle types (as opposed to omitting external memory info entirely). Fixes 6 tests under `dEQP-VK.api.buffer_memory_requirements.create_no_flags.ext_mem_flags_included.*` --- MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm | 1 + MoltenVK/MoltenVK/GPUObjects/MVKImage.mm | 1 + 2 files changed, 2 insertions(+) diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm b/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm index 41ee4cef..e14ff7d5 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm @@ -232,6 +232,7 @@ MVKBuffer::MVKBuffer(MVKDevice* device, const VkBufferCreateInfo* pCreateInfo) : } void MVKBuffer::initExternalMemory(VkExternalMemoryHandleTypeFlags handleTypes) { + if ( !handleTypes ) { return; } if (mvkIsOnlyAnyFlagEnabled(handleTypes, VK_EXTERNAL_MEMORY_HANDLE_TYPE_MTLBUFFER_BIT_KHR)) { _externalMemoryHandleTypes = handleTypes; auto& xmProps = getPhysicalDevice()->getExternalBufferProperties(VK_EXTERNAL_MEMORY_HANDLE_TYPE_MTLBUFFER_BIT_KHR); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm index c605f45a..323918f2 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm @@ -1142,6 +1142,7 @@ bool MVKImage::validateLinear(const VkImageCreateInfo* pCreateInfo, bool isAttac } void MVKImage::initExternalMemory(VkExternalMemoryHandleTypeFlags handleTypes) { + if ( !handleTypes ) { return; } if (mvkIsOnlyAnyFlagEnabled(handleTypes, VK_EXTERNAL_MEMORY_HANDLE_TYPE_MTLTEXTURE_BIT_KHR)) { auto& xmProps = getPhysicalDevice()->getExternalImageProperties(VK_EXTERNAL_MEMORY_HANDLE_TYPE_MTLTEXTURE_BIT_KHR); for(auto& memoryBinding : _memoryBindings) { From 52ce6a2c6ccd3ad14d507f445f375c911a2d445f Mon Sep 17 00:00:00 2001 From: Chip Davis Date: Mon, 20 Nov 2023 14:07:04 -0800 Subject: [PATCH 33/41] MVKDevice: Correct `lineWidthGranularity`. If the `wideLines` feature isn't supported, then `lineWidthGranularity` must be zero. Fixes one part of the `dEQP-VK.info.device_properties` CTS test. --- MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm index 0dd865f2..59e0b349 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm @@ -2692,7 +2692,7 @@ void MVKPhysicalDevice::initLimits() { _properties.limits.pointSizeGranularity = 1; _properties.limits.lineWidthRange[0] = 1; _properties.limits.lineWidthRange[1] = 1; - _properties.limits.lineWidthGranularity = 1; + _properties.limits.lineWidthGranularity = 0; _properties.limits.standardSampleLocations = VK_TRUE; _properties.limits.strictLines = _properties.vendorID == kIntelVendorId || _properties.vendorID == kNVVendorId; From 3836b003094ae3c0922193c3b8d8c6fc268598b4 Mon Sep 17 00:00:00 2001 From: Chip Davis Date: Mon, 20 Nov 2023 14:08:28 -0800 Subject: [PATCH 34/41] MVKDevice: Change `maxDrawIndexedIndexValue` back to `UINT32_MAX`. If the `fullDrawIndexUint32` feature is supported, then `maxDrawIndexedIndexValue` must be `UINT32_MAX`. I had originally done this when I turned the feature on, but for a while now, we've been setting it to one less, because primitive restart can't be disabled and the value is defined to exclude primitive restart. The wording in the spec is ambiguous. The description of `maxDrawIndexedIndexValue` says: > * `maxDrawIndexedIndexValue` is the maximum index value that **can** > be used for indexed draw calls when using 32-bit indices. *This > excludes the primitive restart index value of 0xFFFFFFFF.* [emphasis > added] But the description of `fullDrawIndexUint32` says: > * `fullDrawIndexUint32` specifies the full 32-bit range of indices is > supported for indexed draw calls when using a VkIndexType of > `VK_INDEX_TYPE_UINT32`. `maxDrawIndexedIndexValue` is the maximum > index value that **may** [sic] be used *(aside from the primitive > restart index, which is always 232-1 when the VkIndexType > is `VK_INDEX_TYPE_UINT32`)*. If this feature is supported, > `maxDrawIndexedIndexValue` **must** be 232-1; otherwise > it **must** be no smaller than 224-1. [emphasis added] It's unclear whether it means that the primitive restart index is to be ignored, or the maximum draw index must account for it. The alternative is to disable `fullDrawIndexUint32` because we cannot set `maxDrawIndexedIndexValue` to `UINT32_MAX`; but that might mislead programs into thinking that we only support 24-bit vertex indices. Fixes the rest of the `dEQP-VK.info.device_properties` test. --- MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm index 59e0b349..d43d7d99 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm @@ -2748,7 +2748,7 @@ void MVKPhysicalDevice::initLimits() { _properties.limits.maxComputeWorkGroupCount[1] = kMVKUndefinedLargeUInt32; _properties.limits.maxComputeWorkGroupCount[2] = kMVKUndefinedLargeUInt32; - _properties.limits.maxDrawIndexedIndexValue = numeric_limits::max() - 1; // Support both fullDrawIndexUint32 and automatic primitive restart. + _properties.limits.maxDrawIndexedIndexValue = numeric_limits::max(); _properties.limits.maxDrawIndirectCount = kMVKUndefinedLargeUInt32; From e09ca0186f7d600125008431652c82046288e3ca Mon Sep 17 00:00:00 2001 From: Chip Davis Date: Sun, 19 Nov 2023 17:45:12 -0800 Subject: [PATCH 35/41] MVKDevice: Expose `VK_EXT_debug_utils` device functions as device functions. Despite being an instance extension, `VK_EXT_debug_utils` provides some functions that belong to the device, not the instance. Therefore, when checking device functions, we must also check if an enabled *instance* extension provides the function. Also, only enable two-extension functions if *both* extensions or the requisite core version are enabled. In all cases, these functions require both extensions or the first extension plus some core version. Fixes two CTS tests: `dEQP-VK.info.instance_extension_device_functions` `dEQP-VK.api.version_check.entry_points` --- MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm | 11 ++++---- MoltenVK/MoltenVK/GPUObjects/MVKInstance.h | 11 ++++++-- MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm | 31 +++++++++++---------- 3 files changed, 30 insertions(+), 23 deletions(-) diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm index 0dd865f2..a86877a4 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm @@ -3432,12 +3432,13 @@ MVKPhysicalDevice::~MVKPhysicalDevice() { // Returns core device commands and enabled extension device commands. PFN_vkVoidFunction MVKDevice::getProcAddr(const char* pName) { - MVKEntryPoint* pMVKPA = _physicalDevice->_mvkInstance->getEntryPoint(pName); - uint32_t apiVersion = _physicalDevice->_mvkInstance->_appInfo.apiVersion; + MVKInstance* pMVKInst = _physicalDevice->_mvkInstance; + MVKEntryPoint* pMVKPA = pMVKInst->getEntryPoint(pName); + uint32_t apiVersion = pMVKInst->_appInfo.apiVersion; - bool isSupported = (pMVKPA && // Command exists and... - pMVKPA->isDevice && // ...is a device command and... - pMVKPA->isEnabled(apiVersion, _enabledExtensions)); // ...is a core or enabled extension command. + bool isSupported = (pMVKPA && // Command exists and... + pMVKPA->isDevice && // ...is a device command and... + pMVKPA->isEnabled(apiVersion, _enabledExtensions, &pMVKInst->_enabledExtensions)); // ...is a core or enabled extension command. return isSupported ? pMVKPA->functionPointer : nullptr; } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.h b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.h index aa831845..f6ca8e78 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.h @@ -41,9 +41,14 @@ typedef struct MVKEntryPoint { bool isDevice; bool isCore() { return !ext1Name && !ext2Name; } - bool isEnabled(uint32_t enabledVersion, const MVKExtensionList& extList) { - return ((isCore() && MVK_VULKAN_API_VERSION_CONFORM(enabledVersion) >= apiVersion) || - extList.isEnabled(ext1Name) || extList.isEnabled(ext2Name)); + bool isEnabled(uint32_t enabledVersion, const MVKExtensionList& extList, const MVKExtensionList* instExtList = nullptr) { + bool isAPISupported = MVK_VULKAN_API_VERSION_CONFORM(enabledVersion) >= apiVersion; + auto isExtnSupported = [this, isAPISupported](const MVKExtensionList& extList) { + return extList.isEnabled(this->ext1Name) && (isAPISupported || !this->ext2Name || extList.isEnabled(this->ext2Name)); + }; + return ((isCore() && isAPISupported) || + isExtnSupported(extList) || + (instExtList && isExtnSupported(*instExtList))); } } MVKEntryPoint; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm index ea815201..543a1fe7 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm @@ -380,8 +380,8 @@ void MVKInstance::initDebugCallbacks(const VkInstanceCreateInfo* pCreateInfo) { #define ADD_INST_EXT_ENTRY_POINT(func, EXT) ADD_ENTRY_POINT(func, 0, VK_##EXT##_EXTENSION_NAME, nullptr, false) #define ADD_DVC_EXT_ENTRY_POINT(func, EXT) ADD_ENTRY_POINT(func, 0, VK_##EXT##_EXTENSION_NAME, nullptr, true) -#define ADD_INST_EXT2_ENTRY_POINT(func, EXT1, EXT2) ADD_ENTRY_POINT(func, 0, VK_##EXT1##_EXTENSION_NAME, VK_##EXT2##_EXTENSION_NAME, false) -#define ADD_DVC_EXT2_ENTRY_POINT(func, EXT1, EXT2) ADD_ENTRY_POINT(func, 0, VK_##EXT1##_EXTENSION_NAME, VK_##EXT2##_EXTENSION_NAME, true) +#define ADD_INST_EXT2_ENTRY_POINT(func, API, EXT1, EXT2) ADD_ENTRY_POINT(func, VK_API_VERSION_##API, VK_##EXT1##_EXTENSION_NAME, VK_##EXT2##_EXTENSION_NAME, false) +#define ADD_DVC_EXT2_ENTRY_POINT(func, API, EXT1, EXT2) ADD_ENTRY_POINT(func, VK_API_VERSION_##API, VK_##EXT1##_EXTENSION_NAME, VK_##EXT2##_EXTENSION_NAME, true) // Add an open function, not tied to core or an extension. #define ADD_INST_OPEN_ENTRY_POINT(func) ADD_ENTRY_POINT(func, 0, nullptr, nullptr, false) @@ -429,14 +429,15 @@ void MVKInstance::initProcAddrs() { ADD_INST_EXT_ENTRY_POINT(vkCreateDebugReportCallbackEXT, EXT_DEBUG_REPORT); ADD_INST_EXT_ENTRY_POINT(vkDestroyDebugReportCallbackEXT, EXT_DEBUG_REPORT); ADD_INST_EXT_ENTRY_POINT(vkDebugReportMessageEXT, EXT_DEBUG_REPORT); - ADD_INST_EXT_ENTRY_POINT(vkSetDebugUtilsObjectNameEXT, EXT_DEBUG_UTILS); - ADD_INST_EXT_ENTRY_POINT(vkSetDebugUtilsObjectTagEXT, EXT_DEBUG_UTILS); - ADD_INST_EXT_ENTRY_POINT(vkQueueBeginDebugUtilsLabelEXT, EXT_DEBUG_UTILS); - ADD_INST_EXT_ENTRY_POINT(vkQueueEndDebugUtilsLabelEXT, EXT_DEBUG_UTILS); - ADD_INST_EXT_ENTRY_POINT(vkQueueInsertDebugUtilsLabelEXT, EXT_DEBUG_UTILS); - ADD_INST_EXT_ENTRY_POINT(vkCmdBeginDebugUtilsLabelEXT, EXT_DEBUG_UTILS); - ADD_INST_EXT_ENTRY_POINT(vkCmdEndDebugUtilsLabelEXT, EXT_DEBUG_UTILS); - ADD_INST_EXT_ENTRY_POINT(vkCmdInsertDebugUtilsLabelEXT, EXT_DEBUG_UTILS); + // n.b. Despite that VK_EXT_debug_utils is an instance extension, these functions are device functions. + ADD_DVC_EXT_ENTRY_POINT(vkSetDebugUtilsObjectNameEXT, EXT_DEBUG_UTILS); + ADD_DVC_EXT_ENTRY_POINT(vkSetDebugUtilsObjectTagEXT, EXT_DEBUG_UTILS); + ADD_DVC_EXT_ENTRY_POINT(vkQueueBeginDebugUtilsLabelEXT, EXT_DEBUG_UTILS); + ADD_DVC_EXT_ENTRY_POINT(vkQueueEndDebugUtilsLabelEXT, EXT_DEBUG_UTILS); + ADD_DVC_EXT_ENTRY_POINT(vkQueueInsertDebugUtilsLabelEXT, EXT_DEBUG_UTILS); + ADD_DVC_EXT_ENTRY_POINT(vkCmdBeginDebugUtilsLabelEXT, EXT_DEBUG_UTILS); + ADD_DVC_EXT_ENTRY_POINT(vkCmdEndDebugUtilsLabelEXT, EXT_DEBUG_UTILS); + ADD_DVC_EXT_ENTRY_POINT(vkCmdInsertDebugUtilsLabelEXT, EXT_DEBUG_UTILS); ADD_INST_EXT_ENTRY_POINT(vkCreateDebugUtilsMessengerEXT, EXT_DEBUG_UTILS); ADD_INST_EXT_ENTRY_POINT(vkDestroyDebugUtilsMessengerEXT, EXT_DEBUG_UTILS); ADD_INST_EXT_ENTRY_POINT(vkSubmitDebugUtilsMessageEXT, EXT_DEBUG_UTILS); @@ -669,16 +670,16 @@ void MVKInstance::initProcAddrs() { ADD_DVC_EXT_ENTRY_POINT(vkMapMemory2KHR, KHR_MAP_MEMORY_2); ADD_DVC_EXT_ENTRY_POINT(vkUnmapMemory2KHR, KHR_MAP_MEMORY_2); ADD_DVC_EXT_ENTRY_POINT(vkCmdPushDescriptorSetKHR, KHR_PUSH_DESCRIPTOR); - ADD_DVC_EXT2_ENTRY_POINT(vkCmdPushDescriptorSetWithTemplateKHR, KHR_PUSH_DESCRIPTOR, KHR_DESCRIPTOR_UPDATE_TEMPLATE); + ADD_DVC_EXT2_ENTRY_POINT(vkCmdPushDescriptorSetWithTemplateKHR, 1_1, KHR_PUSH_DESCRIPTOR, KHR_DESCRIPTOR_UPDATE_TEMPLATE); ADD_DVC_EXT_ENTRY_POINT(vkCreateSwapchainKHR, KHR_SWAPCHAIN); ADD_DVC_EXT_ENTRY_POINT(vkDestroySwapchainKHR, KHR_SWAPCHAIN); ADD_DVC_EXT_ENTRY_POINT(vkGetSwapchainImagesKHR, KHR_SWAPCHAIN); ADD_DVC_EXT_ENTRY_POINT(vkAcquireNextImageKHR, KHR_SWAPCHAIN); ADD_DVC_EXT_ENTRY_POINT(vkQueuePresentKHR, KHR_SWAPCHAIN); - ADD_DVC_EXT2_ENTRY_POINT(vkGetDeviceGroupPresentCapabilitiesKHR, KHR_SWAPCHAIN, KHR_DEVICE_GROUP); - ADD_DVC_EXT2_ENTRY_POINT(vkGetDeviceGroupSurfacePresentModesKHR, KHR_SWAPCHAIN, KHR_DEVICE_GROUP); - ADD_DVC_EXT2_ENTRY_POINT(vkGetPhysicalDevicePresentRectanglesKHR, KHR_SWAPCHAIN, KHR_DEVICE_GROUP); - ADD_DVC_EXT2_ENTRY_POINT(vkAcquireNextImage2KHR, KHR_SWAPCHAIN, KHR_DEVICE_GROUP); + ADD_DVC_EXT2_ENTRY_POINT(vkGetDeviceGroupPresentCapabilitiesKHR, 1_1, KHR_SWAPCHAIN, KHR_DEVICE_GROUP); + ADD_DVC_EXT2_ENTRY_POINT(vkGetDeviceGroupSurfacePresentModesKHR, 1_1, KHR_SWAPCHAIN, KHR_DEVICE_GROUP); + ADD_DVC_EXT2_ENTRY_POINT(vkGetPhysicalDevicePresentRectanglesKHR, 1_1, KHR_SWAPCHAIN, KHR_DEVICE_GROUP); + ADD_DVC_EXT2_ENTRY_POINT(vkAcquireNextImage2KHR, 1_1, KHR_SWAPCHAIN, KHR_DEVICE_GROUP); ADD_DVC_EXT_ENTRY_POINT(vkGetCalibratedTimestampsEXT, EXT_CALIBRATED_TIMESTAMPS); ADD_DVC_EXT_ENTRY_POINT(vkGetPhysicalDeviceCalibrateableTimeDomainsEXT, EXT_CALIBRATED_TIMESTAMPS); ADD_DVC_EXT_ENTRY_POINT(vkDebugMarkerSetObjectTagEXT, EXT_DEBUG_MARKER); From ff21180909f122eaaaed958f1c0b59589b10c17b Mon Sep 17 00:00:00 2001 From: Chip Davis Date: Sun, 19 Nov 2023 18:50:08 -0800 Subject: [PATCH 36/41] Enable some mandatory `VK_EXT_descriptor_indexing` features. These features must be supported if the extension or `descriptorIndexing` features is. Conversely, disable the extension and the feature if we cannot support the features. Fixes `dEQP-VK.info.device_mandatory_features`. --- MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm index 0dd865f2..7d61f2e9 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm @@ -128,9 +128,9 @@ void MVKPhysicalDevice::getFeatures(VkPhysicalDeviceFeatures2* features) { .shaderInputAttachmentArrayDynamicIndexing = _metalFeatures.arrayOfTextures, .shaderUniformTexelBufferArrayDynamicIndexing = _metalFeatures.arrayOfTextures, .shaderStorageTexelBufferArrayDynamicIndexing = _metalFeatures.arrayOfTextures, - .shaderUniformBufferArrayNonUniformIndexing = false, + .shaderUniformBufferArrayNonUniformIndexing = true, .shaderSampledImageArrayNonUniformIndexing = _metalFeatures.arrayOfTextures && _metalFeatures.arrayOfSamplers, - .shaderStorageBufferArrayNonUniformIndexing = false, + .shaderStorageBufferArrayNonUniformIndexing = true, .shaderStorageImageArrayNonUniformIndexing = _metalFeatures.arrayOfTextures, .shaderInputAttachmentArrayNonUniformIndexing = _metalFeatures.arrayOfTextures, .shaderUniformTexelBufferArrayNonUniformIndexing = _metalFeatures.arrayOfTextures, @@ -2402,7 +2402,7 @@ void MVKPhysicalDevice::initFeatures() { mvkClear(&_vulkan12FeaturesNoExt); // Start with everything cleared _vulkan12FeaturesNoExt.samplerMirrorClampToEdge = _metalFeatures.samplerMirrorClampToEdge; _vulkan12FeaturesNoExt.drawIndirectCount = false; - _vulkan12FeaturesNoExt.descriptorIndexing = true; + _vulkan12FeaturesNoExt.descriptorIndexing = _metalFeatures.arrayOfTextures && _metalFeatures.arrayOfSamplers; _vulkan12FeaturesNoExt.samplerFilterMinmax = false; _vulkan12FeaturesNoExt.shaderOutputViewportIndex = _features.multiViewport; _vulkan12FeaturesNoExt.shaderOutputLayer = _metalFeatures.layeredRendering; @@ -3199,6 +3199,9 @@ void MVKPhysicalDevice::initExtensions() { pWritableExtns->vk_KHR_fragment_shader_barycentric.enabled = false; pWritableExtns->vk_NV_fragment_shader_barycentric.enabled = false; } + if (!_metalFeatures.arrayOfTextures || !_metalFeatures.arrayOfSamplers) { + pWritableExtns->vk_EXT_descriptor_indexing.enabled = false; + } // The relevant functions are not available if not built with Xcode 14. #if MVK_XCODE_14 From 552e5c03ba477d3bdb564444a8bd3a409d0922ff Mon Sep 17 00:00:00 2001 From: Chip Davis Date: Sun, 19 Nov 2023 22:33:42 -0800 Subject: [PATCH 37/41] MVKPixelFormats: Handle the `B4G4R4A4` format. Handle it similarly to the `A4R4B4G4` and `A4B4G4R4` formats, with a swizzle. Vulkan requires support for this format. Fixes the following tests: * `dEQP-VK.api.info.format_properties.b4g4r4a4_unorm_pack16`. * `dEQP-VK.texture.explicit_lod.2d.formats.b4g4r4a4*` --- MoltenVK/MoltenVK/GPUObjects/MVKImage.mm | 8 ++++++++ MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.mm | 3 ++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm index c605f45a..05cfe613 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm @@ -1660,6 +1660,14 @@ VkResult MVKImageViewPlane::initSwizzledMTLPixelFormat(const VkImageViewCreateIn adjustAnyComponentSwizzleValue(a, R, A, B, G, R); break; + case VK_FORMAT_B4G4R4A4_UNORM_PACK16: + // Metal doesn't support this directly, so use a swizzle to get the ordering right. + adjustAnyComponentSwizzleValue(r, B, B, G, R, A); + adjustAnyComponentSwizzleValue(g, G, B, G, R, A); + adjustAnyComponentSwizzleValue(b, R, B, G, R, A); + adjustAnyComponentSwizzleValue(a, A, B, G, R, A); + break; + default: break; } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.mm b/MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.mm index 7e1c1a19..fa76befa 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.mm @@ -529,6 +529,7 @@ MTLClearColor MVKPixelFormats::getMTLClearColor(VkClearValue vkClearValue, VkFor #define OFFSET_SNORM(COLOR, BIT_WIDTH) OFFSET_NORM(-1.0, COLOR, BIT_WIDTH - 1) switch (vkFormat) { case VK_FORMAT_R4G4B4A4_UNORM_PACK16: + case VK_FORMAT_B4G4R4A4_UNORM_PACK16: case VK_FORMAT_A4R4G4B4_UNORM_PACK16: case VK_FORMAT_A4B4G4R4_UNORM_PACK16: OFFSET_UNORM(red, 4) @@ -831,7 +832,7 @@ void MVKPixelFormats::initVkFormatCapabilities() { addVkFormatDesc( R4G4_UNORM_PACK8, Invalid, Invalid, Invalid, Invalid, 1, 1, 1, ColorFloat ); addVkFormatDesc( R4G4B4A4_UNORM_PACK16, ABGR4Unorm, Invalid, Invalid, Invalid, 1, 1, 2, ColorFloat ); - addVkFormatDesc( B4G4R4A4_UNORM_PACK16, Invalid, Invalid, Invalid, Invalid, 1, 1, 2, ColorFloat ); + addVkFormatDescSwizzled( B4G4R4A4_UNORM_PACK16, ABGR4Unorm, Invalid, Invalid, Invalid, 1, 1, 2, ColorFloat, B, G, R, A ); addVkFormatDescSwizzled( A4R4G4B4_UNORM_PACK16, ABGR4Unorm, Invalid, Invalid, Invalid, 1, 1, 2, ColorFloat, G, B, A, R ); addVkFormatDescSwizzled( A4B4G4R4_UNORM_PACK16, ABGR4Unorm, Invalid, Invalid, Invalid, 1, 1, 2, ColorFloat, A, B, G, R ); From a7372edafec07abff4d1e1684ed2088dc1c0ee32 Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Wed, 22 Nov 2023 16:42:29 -0500 Subject: [PATCH 38/41] Fix initial value of VkPhysicalDeviceLimits::timestampPeriod on non-Apple Silicon GPUs. - Don't update value of timestampPeriod on first measurement. - Force that first measurement upon creation of MVKPhysicalDevice, so an accurate value for timestampPeriod will be calculated when next queried. --- Docs/Whats_New.md | 1 + MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm | 18 +++++++++++------- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md index e7b23f2c..e0abbf70 100644 --- a/Docs/Whats_New.md +++ b/Docs/Whats_New.md @@ -24,6 +24,7 @@ Released TBD - Fix regression in marking rendering state dirty after `vkCmdClearAttachments()`. - Reduce disk space consumed after running `fetchDependencies` script by removing intermediate file caches. - Fix rare deadlock during launch via `dlopen()`. +- Fix initial value of `VkPhysicalDeviceLimits::timestampPeriod` on non-Apple Silicon GPUs. - Update to latest SPIRV-Cross: - MSL: Fix regression error in argument buffer runtime arrays. diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm index 95eb0821..cbbbfab4 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm @@ -1628,14 +1628,15 @@ void MVKPhysicalDevice::updateTimestampPeriod() { [_mtlDevice sampleTimestamps: &_prevCPUTimestamp gpuTimestamp: &_prevGPUTimestamp]; double elapsedCPUNanos = _prevCPUTimestamp - earlierCPUTs; double elapsedGPUTicks = _prevGPUTimestamp - earlierGPUTs; - if (elapsedCPUNanos && elapsedGPUTicks) { // Ensure not zero - float tsPeriod = elapsedCPUNanos / elapsedGPUTicks; - + + // Don't update period the first time through, or if no time elapsed. + if (earlierCPUTs && elapsedCPUNanos && elapsedGPUTicks) { // Basic lowpass filter TPout = (1 - A)TPout + (A * TPin). // The lower A is, the slower TPout will change over time. - // First time through, just use the measured value directly. - float a = earlierCPUTs ? mvkConfig().timestampPeriodLowPassAlpha : 1.0; - _properties.limits.timestampPeriod = ((1.0 - a) * _properties.limits.timestampPeriod) + (a * tsPeriod); + auto& vkTsp = _properties.limits.timestampPeriod; + float a = mvkConfig().timestampPeriodLowPassAlpha; + float tsPeriod = elapsedCPUNanos / elapsedGPUTicks; + vkTsp = ((1.0 - a) * vkTsp) + (a * tsPeriod); } } } @@ -2672,7 +2673,10 @@ void MVKPhysicalDevice::initLimits() { _properties.limits.optimalBufferCopyRowPitchAlignment = 1; _properties.limits.timestampComputeAndGraphics = VK_TRUE; - _properties.limits.timestampPeriod = 1.0; // On non-Apple GPU's, this can vary over time, and is calculated based on actual GPU activity. + + // On non-Apple GPU's, this can vary over time, and is calculated based on actual GPU activity. + _properties.limits.timestampPeriod = 1.0; + updateTimestampPeriod(); _properties.limits.pointSizeRange[0] = 1; switch (_properties.vendorID) { From 44b361390a9f3b1a37fe80ae31310bee3e1a7d35 Mon Sep 17 00:00:00 2001 From: Chip Davis Date: Sun, 19 Nov 2023 18:58:15 -0800 Subject: [PATCH 39/41] MVKDevice: Clamp max per-set descriptor limit to minimum 1024. As required by the Vulkan spec. Fixes the CTS tests `dEQP-VK.api.info.vulkan1p2_limits_validation.khr_maintenance_3` and `dEQP-VK.api.maintenance3_check.maintenance3_properties`. --- MoltenVK/MoltenVK/GPUObjects/MVKDevice.h | 1 + MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm | 13 +++++++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h index be9c08fb..f40201cc 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h @@ -403,6 +403,7 @@ protected: uint64_t getRecommendedMaxWorkingSetSize(); uint64_t getCurrentAllocatedSize(); uint32_t getMaxSamplerCount(); + uint32_t getMaxPerSetDescriptorCount(); void initExternalMemoryProperties(); void initExtensions(); void initCounterSets(); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm index 0dd865f2..fe9a9f6a 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm @@ -525,9 +525,7 @@ void MVKPhysicalDevice::getProperties(VkPhysicalDeviceProperties2* properties) { supportedProps11.maxMultiviewViewCount = 32; supportedProps11.maxMultiviewInstanceIndex = canUseInstancingForMultiview() ? uintMax / 32 : uintMax; supportedProps11.protectedNoFault = false; - supportedProps11.maxPerSetDescriptors = 4 * (_metalFeatures.maxPerStageBufferCount + - _metalFeatures.maxPerStageTextureCount + - _metalFeatures.maxPerStageSamplerCount); + supportedProps11.maxPerSetDescriptors = getMaxPerSetDescriptorCount(); supportedProps11.maxMemoryAllocationSize = _metalFeatures.maxMTLBufferSize; // Create a SSOT for these Vulkan 1.2 properties, which can be queried via two mechanisms here. @@ -3146,6 +3144,13 @@ uint32_t MVKPhysicalDevice::getMaxSamplerCount() { } } +// Vulkan imposes a minimum maximum of 1024 descriptors per set. +uint32_t MVKPhysicalDevice::getMaxPerSetDescriptorCount() { + return max(4 * (_metalFeatures.maxPerStageBufferCount + + _metalFeatures.maxPerStageTextureCount + + _metalFeatures.maxPerStageSamplerCount), 1024u); +} + void MVKPhysicalDevice::initExternalMemoryProperties() { // Common @@ -3499,7 +3504,7 @@ void MVKDevice::getDescriptorSetLayoutSupport(const VkDescriptorSetLayoutCreateI for (uint32_t i = 0; i < pCreateInfo->bindingCount; i++) { descriptorCount += pCreateInfo->pBindings[i].descriptorCount; } - pSupport->supported = (descriptorCount < ((_physicalDevice->_metalFeatures.maxPerStageBufferCount + _physicalDevice->_metalFeatures.maxPerStageTextureCount + _physicalDevice->_metalFeatures.maxPerStageSamplerCount) * 2)); + pSupport->supported = (descriptorCount < _physicalDevice->getMaxPerSetDescriptorCount()); // Check whether the layout has a variable-count descriptor, and if so, whether we can support it. for (auto* next = (VkBaseOutStructure*)pSupport->pNext; next; next = next->pNext) { From d0dba760e72b93a17e94b94a2d3038f781f03ffc Mon Sep 17 00:00:00 2001 From: Chip Davis Date: Tue, 28 Nov 2023 14:05:21 -0800 Subject: [PATCH 40/41] MVKPipeline: Enable the cube texture gradient workaround for Apple Silicon. Update SPIRV-Cross to pull in the code for the workaround. Fixes 8 tests under `dEQP-VK.glsl.texture_functions.texturegrad.*`. --- Docs/Whats_New.md | 1 + ExternalRevisions/SPIRV-Cross_repo_revision | 2 +- MoltenVK/MoltenVK/API/mvk_private_api.h | 1 + MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm | 1 + MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm | 4 +++- 5 files changed, 7 insertions(+), 2 deletions(-) diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md index e0abbf70..724532ae 100644 --- a/Docs/Whats_New.md +++ b/Docs/Whats_New.md @@ -27,6 +27,7 @@ Released TBD - Fix initial value of `VkPhysicalDeviceLimits::timestampPeriod` on non-Apple Silicon GPUs. - Update to latest SPIRV-Cross: - MSL: Fix regression error in argument buffer runtime arrays. + - MSL: Work around broken cube texture gradients on Apple Silicon. diff --git a/ExternalRevisions/SPIRV-Cross_repo_revision b/ExternalRevisions/SPIRV-Cross_repo_revision index 9f579560..cd24745d 100644 --- a/ExternalRevisions/SPIRV-Cross_repo_revision +++ b/ExternalRevisions/SPIRV-Cross_repo_revision @@ -1 +1 @@ -4818f7e7ef7b7078a3a7a5a52c4a338e0dda22f4 +50e90dd74e0e43e243f12a70f0326d2cf8ed3945 diff --git a/MoltenVK/MoltenVK/API/mvk_private_api.h b/MoltenVK/MoltenVK/API/mvk_private_api.h index af6c3ffc..dd0b31ca 100644 --- a/MoltenVK/MoltenVK/API/mvk_private_api.h +++ b/MoltenVK/MoltenVK/API/mvk_private_api.h @@ -152,6 +152,7 @@ typedef struct { VkBool32 needsSampleDrefLodArrayWorkaround; /**< If true, sampling from arrayed depth images with explicit LoD is broken and needs a workaround. */ VkDeviceSize hostMemoryPageSize; /**< The size of a page of host memory on this platform. */ VkBool32 dynamicVertexStride; /**< If true, VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE is supported. */ + VkBool32 needsCubeGradWorkaround; /**< If true, sampling from cube textures with explicit gradients is broken and needs a workaround. */ } MVKPhysicalDeviceMetalFeatures; /** diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm index cca95d57..dfad9cf0 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm @@ -1759,6 +1759,7 @@ void MVKPhysicalDevice::initMetalFeatures() { if (!mvkOSVersionIsAtLeast(14.0, 17.0, 1.0)) { _metalFeatures.needsSampleDrefLodArrayWorkaround = true; } + _metalFeatures.needsCubeGradWorkaround = true; // fallthrough case kIntelVendorId: case kNVVendorId: diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm index 0b266340..c85ef684 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm @@ -1680,6 +1680,7 @@ void MVKGraphicsPipeline::initShaderConversionConfig(SPIRVToMSLConversionConfigu shaderConfig.options.mslOptions.argument_buffers = useMetalArgBuff; shaderConfig.options.mslOptions.force_active_argument_buffer_resources = useMetalArgBuff; shaderConfig.options.mslOptions.pad_argument_buffer_resources = useMetalArgBuff; + shaderConfig.options.mslOptions.agx_manual_cube_grad_fixup = _device->_pMetalFeatures->needsCubeGradWorkaround; MVKPipelineLayout* layout = (MVKPipelineLayout*)pCreateInfo->layout; layout->populateShaderConversionConfig(shaderConfig); @@ -2517,7 +2518,8 @@ namespace SPIRV_CROSS_NAMESPACE { opt.manual_helper_invocation_updates, opt.check_discarded_frag_stores, opt.sample_dref_lod_array_as_grad, - opt.replace_recursive_inputs); + opt.replace_recursive_inputs, + opt.agx_manual_cube_grad_fixup); } template From 90eb1af19f2351329d7025e8c121dda3fd7a15ca Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Sat, 2 Dec 2023 19:20:31 -0500 Subject: [PATCH 41/41] Add support for extension VK_EXT_headless_surface. - Consolidate info about CAMetalLayer and headless in MVKSurface. - MVKSwapchainImage remove getCAMetalDrawable() and focus on abstracting getMTLTexture(). - MVKPresentableSwapchainImage::getCAMetalDrawable() return nil if headless. - Add MVKPresentableSwapchainImage::_mtlTextureHeadless to support a fixed MTLTexture that is not retrieved from a CAMetalDrawable. - MVKPresentableSwapchainImage refactor signalling semaphores and fences. - MVKPresentableSwapchainImage don't lock when signalling semaphores and fences. - If no present occurs, actualPresentTime will be zero. Set it to current time, instead of to desiredPresentTime, since it's more accurate. - Rework timestamps: - Remove _mvkTimestampBase so mvkGetTimestamp() is equal to mach_absolute_time(), which is used in presentation timing. - Add mvkGetRuntimeNanoseconds(). - Rename mvkGetAbsoluteTime() to mvkGetContinuousNanoseconds(). - Remove mvkGetTimestampPeriod() as unused. - MVKSemaphoreMTLEvent::encodeDeferredSignal remove redundant nil test (unrelated). - Fix swapchain and surface bugs when windowing system is accessed from off the main thread (unrelated). - Log warning when deprecated functions vkCreateMacOSSurfaceMVK() or vkCreateIOSSurfaceMVK() are used (unrelated). - Remove documentation for visionos, as support is not ready (unrelated). --- Common/MVKOSExtensions.h | 42 ++++--- Common/MVKOSExtensions.mm | 43 ++++--- Docs/MoltenVK_Runtime_UserGuide.md | 3 + Docs/Whats_New.md | 2 + MoltenVK/MoltenVK.xcodeproj/project.pbxproj | 20 ++-- MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm | 27 ++--- MoltenVK/MoltenVK/GPUObjects/MVKImage.h | 20 +--- MoltenVK/MoltenVK/GPUObjects/MVKImage.mm | 108 ++++++++++-------- MoltenVK/MoltenVK/GPUObjects/MVKInstance.h | 3 + MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm | 8 +- .../MoltenVK/GPUObjects/MVKPixelFormats.h | 2 +- MoltenVK/MoltenVK/GPUObjects/MVKSurface.h | 26 +++-- MoltenVK/MoltenVK/GPUObjects/MVKSurface.mm | 56 +++++++-- MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.h | 24 ++-- MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm | 102 ++++++++--------- MoltenVK/MoltenVK/GPUObjects/MVKSync.mm | 2 +- MoltenVK/MoltenVK/Layers/MVKExtensions.def | 1 + MoltenVK/MoltenVK/OS/CAMetalLayer+MoltenVK.h | 13 ++- ...er+MoltenVK.m => CAMetalLayer+MoltenVK.mm} | 8 ++ MoltenVK/MoltenVK/Vulkan/vulkan.mm | 20 ++++ README.md | 20 +--- 21 files changed, 313 insertions(+), 237 deletions(-) rename MoltenVK/MoltenVK/OS/{CAMetalLayer+MoltenVK.m => CAMetalLayer+MoltenVK.mm} (92%) diff --git a/Common/MVKOSExtensions.h b/Common/MVKOSExtensions.h index f9faba91..2c40602a 100644 --- a/Common/MVKOSExtensions.h +++ b/Common/MVKOSExtensions.h @@ -24,6 +24,9 @@ #include +#pragma mark - +#pragma mark Operating System versions + typedef float MVKOSVersion; /*** Constant indicating unsupported functionality in an OS. */ @@ -66,20 +69,31 @@ static inline bool mvkOSVersionIsAtLeast(MVKOSVersion macOSMinVer, #endif } + +#pragma mark - +#pragma mark Timestamps + /** - * Returns a monotonic timestamp value for use in Vulkan and performance timestamping. + * Returns a monotonic tick value for use in Vulkan and performance timestamping. * - * The returned value corresponds to the number of CPU "ticks" since the app was initialized. - * - * Calling this value twice, subtracting the first value from the second, and then multiplying - * the result by the value returned by mvkGetTimestampPeriod() will provide an indication of the - * number of nanoseconds between the two calls. The convenience function mvkGetElapsedMilliseconds() - * can be used to perform this calculation. + * The returned value corresponds to the number of CPU ticks since an arbitrary + * point in the past, and does not increment while the system is asleep. */ uint64_t mvkGetTimestamp(); -/** Returns the number of nanoseconds between each increment of the value returned by mvkGetTimestamp(). */ -double mvkGetTimestampPeriod(); +/** + * Returns the number of runtime nanoseconds since an arbitrary point in the past, + * excluding any time spent while the system is asleep. + * + * This value corresponds to the timestamps returned by Metal presentation timings. + */ +uint64_t mvkGetRuntimeNanoseconds(); + +/** + * Returns the number of nanoseconds since an arbitrary point in the past, + * including any time spent while the system is asleep. + */ +uint64_t mvkGetContinuousNanoseconds(); /** * Returns the number of nanoseconds elapsed between startTimestamp and endTimestamp, @@ -97,12 +111,6 @@ uint64_t mvkGetElapsedNanoseconds(uint64_t startTimestamp = 0, uint64_t endTimes */ double mvkGetElapsedMilliseconds(uint64_t startTimestamp = 0, uint64_t endTimestamp = 0); -/** Returns the current absolute time in nanoseconds. */ -uint64_t mvkGetAbsoluteTime(); - -/** Ensures the block is executed on the main thread. */ -void mvkDispatchToMainAndWait(dispatch_block_t block); - #pragma mark - #pragma mark Process environment @@ -141,8 +149,12 @@ uint64_t mvkGetUsedMemorySize(); /** Returns the size of a page of host memory on this platform. */ uint64_t mvkGetHostMemoryPageSize(); + #pragma mark - #pragma mark Threading /** Returns the amount of avaliable CPU cores. */ uint32_t mvkGetAvaliableCPUCores(); + +/** Ensures the block is executed on the main thread. */ +void mvkDispatchToMainAndWait(dispatch_block_t block); diff --git a/Common/MVKOSExtensions.mm b/Common/MVKOSExtensions.mm index 93025f2b..8d33f3d4 100644 --- a/Common/MVKOSExtensions.mm +++ b/Common/MVKOSExtensions.mm @@ -29,6 +29,10 @@ using namespace std; + +#pragma mark - +#pragma mark Operating System versions + MVKOSVersion mvkOSVersion() { static MVKOSVersion _mvkOSVersion = 0; if ( !_mvkOSVersion ) { @@ -38,43 +42,35 @@ MVKOSVersion mvkOSVersion() { return _mvkOSVersion; } -static uint64_t _mvkTimestampBase; -static double _mvkTimestampPeriod; + +#pragma mark - +#pragma mark Timestamps + static mach_timebase_info_data_t _mvkMachTimebase; -uint64_t mvkGetTimestamp() { return mach_absolute_time() - _mvkTimestampBase; } +uint64_t mvkGetTimestamp() { return mach_absolute_time(); } -double mvkGetTimestampPeriod() { return _mvkTimestampPeriod; } +uint64_t mvkGetRuntimeNanoseconds() { return mach_absolute_time() * _mvkMachTimebase.numer / _mvkMachTimebase.denom; } + +uint64_t mvkGetContinuousNanoseconds() { return mach_continuous_time() * _mvkMachTimebase.numer / _mvkMachTimebase.denom; } uint64_t mvkGetElapsedNanoseconds(uint64_t startTimestamp, uint64_t endTimestamp) { if (endTimestamp == 0) { endTimestamp = mvkGetTimestamp(); } - return (endTimestamp - startTimestamp) * _mvkTimestampPeriod; + return (endTimestamp - startTimestamp) * _mvkMachTimebase.numer / _mvkMachTimebase.denom; } double mvkGetElapsedMilliseconds(uint64_t startTimestamp, uint64_t endTimestamp) { return mvkGetElapsedNanoseconds(startTimestamp, endTimestamp) / 1e6; } -uint64_t mvkGetAbsoluteTime() { return mach_continuous_time() * _mvkMachTimebase.numer / _mvkMachTimebase.denom; } - -// Initialize timestamping capabilities on app startup. -//Called automatically when the framework is loaded and initialized. +// Initialize timestamp capabilities on app startup. +// Called automatically when the framework is loaded and initialized. static bool _mvkTimestampsInitialized = false; __attribute__((constructor)) static void MVKInitTimestamps() { if (_mvkTimestampsInitialized ) { return; } _mvkTimestampsInitialized = true; - _mvkTimestampBase = mach_absolute_time(); mach_timebase_info(&_mvkMachTimebase); - _mvkTimestampPeriod = (double)_mvkMachTimebase.numer / (double)_mvkMachTimebase.denom; -} - -void mvkDispatchToMainAndWait(dispatch_block_t block) { - if (NSThread.isMainThread) { - block(); - } else { - dispatch_sync(dispatch_get_main_queue(), block); - } } @@ -145,6 +141,7 @@ uint64_t mvkGetUsedMemorySize() { uint64_t mvkGetHostMemoryPageSize() { return sysconf(_SC_PAGESIZE); } + #pragma mark - #pragma mark Threading @@ -152,3 +149,11 @@ uint64_t mvkGetHostMemoryPageSize() { return sysconf(_SC_PAGESIZE); } uint32_t mvkGetAvaliableCPUCores() { return (uint32_t)[[NSProcessInfo processInfo] activeProcessorCount]; } + +void mvkDispatchToMainAndWait(dispatch_block_t block) { + if (NSThread.isMainThread) { + block(); + } else { + dispatch_sync(dispatch_get_main_queue(), block); + } +} diff --git a/Docs/MoltenVK_Runtime_UserGuide.md b/Docs/MoltenVK_Runtime_UserGuide.md index 9b00360e..ee93c801 100644 --- a/Docs/MoltenVK_Runtime_UserGuide.md +++ b/Docs/MoltenVK_Runtime_UserGuide.md @@ -380,6 +380,9 @@ In addition to core *Vulkan* functionality, **MoltenVK** also supports the foll - `VK_EXT_external_memory_host` - `VK_EXT_fragment_shader_interlock` - *Requires Metal 2.0 and Raster Order Groups.* +- `VK_EXT_hdr_metadata` + - *macOS only.* +- `VK_EXT_headless_surface` - `VK_EXT_host_query_reset` - `VK_EXT_image_robustness` - `VK_EXT_inline_uniform_block` diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md index e0abbf70..ebafdde4 100644 --- a/Docs/Whats_New.md +++ b/Docs/Whats_New.md @@ -20,11 +20,13 @@ Released TBD - Add support for extensions: - `VK_EXT_extended_dynamic_state3` *(Metal does not support `VK_POLYGON_MODE_POINT`)* + - `VK_EXT_headless_surface` - Fix regression that broke `VK_POLYGON_MODE_LINE`. - Fix regression in marking rendering state dirty after `vkCmdClearAttachments()`. - Reduce disk space consumed after running `fetchDependencies` script by removing intermediate file caches. - Fix rare deadlock during launch via `dlopen()`. - Fix initial value of `VkPhysicalDeviceLimits::timestampPeriod` on non-Apple Silicon GPUs. +- Fix swapchain and surface bugs when windowing system is accessed from off the main thread. - Update to latest SPIRV-Cross: - MSL: Fix regression error in argument buffer runtime arrays. diff --git a/MoltenVK/MoltenVK.xcodeproj/project.pbxproj b/MoltenVK/MoltenVK.xcodeproj/project.pbxproj index 46ff50fc..be7ca325 100644 --- a/MoltenVK/MoltenVK.xcodeproj/project.pbxproj +++ b/MoltenVK/MoltenVK.xcodeproj/project.pbxproj @@ -117,7 +117,7 @@ 2FEA0AAF24902F9F00EEF3AD /* MVKLayers.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7A11C7DFB4800632CA3 /* MVKLayers.mm */; }; 2FEA0AB024902F9F00EEF3AD /* MVKFramebuffer.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7881C7DFB4800632CA3 /* MVKFramebuffer.mm */; }; 2FEA0AB124902F9F00EEF3AD /* MVKMTLBufferAllocation.mm in Sources */ = {isa = PBXBuildFile; fileRef = A9C96DCF1DDC20C20053187F /* MVKMTLBufferAllocation.mm */; }; - 2FEA0AB224902F9F00EEF3AD /* CAMetalLayer+MoltenVK.m in Sources */ = {isa = PBXBuildFile; fileRef = A9E53DD62100B197002781DD /* CAMetalLayer+MoltenVK.m */; }; + 2FEA0AB224902F9F00EEF3AD /* CAMetalLayer+MoltenVK.mm in Sources */ = {isa = PBXBuildFile; fileRef = A9E53DD62100B197002781DD /* CAMetalLayer+MoltenVK.mm */; }; 2FEA0AB324902F9F00EEF3AD /* MVKCmdDispatch.mm in Sources */ = {isa = PBXBuildFile; fileRef = A9096E5D1F81E16300DFBEA6 /* MVKCmdDispatch.mm */; }; 2FEA0AB424902F9F00EEF3AD /* MVKCmdDebug.mm in Sources */ = {isa = PBXBuildFile; fileRef = A99C90ED229455B300A061DA /* MVKCmdDebug.mm */; }; 45003E73214AD4E500E989CB /* MVKExtensions.def in Headers */ = {isa = PBXBuildFile; fileRef = 45003E6F214AD4C900E989CB /* MVKExtensions.def */; }; @@ -360,8 +360,8 @@ A9E53DE62100B197002781DD /* NSString+MoltenVK.mm in Sources */ = {isa = PBXBuildFile; fileRef = A9E53DD42100B197002781DD /* NSString+MoltenVK.mm */; }; A9E53DE72100B197002781DD /* MTLTextureDescriptor+MoltenVK.m in Sources */ = {isa = PBXBuildFile; fileRef = A9E53DD52100B197002781DD /* MTLTextureDescriptor+MoltenVK.m */; }; A9E53DE82100B197002781DD /* MTLTextureDescriptor+MoltenVK.m in Sources */ = {isa = PBXBuildFile; fileRef = A9E53DD52100B197002781DD /* MTLTextureDescriptor+MoltenVK.m */; }; - A9E53DE92100B197002781DD /* CAMetalLayer+MoltenVK.m in Sources */ = {isa = PBXBuildFile; fileRef = A9E53DD62100B197002781DD /* CAMetalLayer+MoltenVK.m */; }; - A9E53DEA2100B197002781DD /* CAMetalLayer+MoltenVK.m in Sources */ = {isa = PBXBuildFile; fileRef = A9E53DD62100B197002781DD /* CAMetalLayer+MoltenVK.m */; }; + A9E53DE92100B197002781DD /* CAMetalLayer+MoltenVK.mm in Sources */ = {isa = PBXBuildFile; fileRef = A9E53DD62100B197002781DD /* CAMetalLayer+MoltenVK.mm */; }; + A9E53DEA2100B197002781DD /* CAMetalLayer+MoltenVK.mm in Sources */ = {isa = PBXBuildFile; fileRef = A9E53DD62100B197002781DD /* CAMetalLayer+MoltenVK.mm */; }; A9E53DF32100B302002781DD /* MTLRenderPassDescriptor+MoltenVK.h in Headers */ = {isa = PBXBuildFile; fileRef = A9E53DEE2100B302002781DD /* MTLRenderPassDescriptor+MoltenVK.h */; }; A9E53DF42100B302002781DD /* MTLRenderPassDescriptor+MoltenVK.h in Headers */ = {isa = PBXBuildFile; fileRef = A9E53DEE2100B302002781DD /* MTLRenderPassDescriptor+MoltenVK.h */; }; A9E53DF52100B302002781DD /* MTLRenderPassDescriptor+MoltenVK.m in Sources */ = {isa = PBXBuildFile; fileRef = A9E53DF22100B302002781DD /* MTLRenderPassDescriptor+MoltenVK.m */; }; @@ -495,7 +495,7 @@ DCFD7F572A45BC6E007BBBF7 /* MVKFramebuffer.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7881C7DFB4800632CA3 /* MVKFramebuffer.mm */; }; DCFD7F582A45BC6E007BBBF7 /* MTLRenderPassStencilAttachmentDescriptor+MoltenVK.m in Sources */ = {isa = PBXBuildFile; fileRef = 453638302508A4C6000EFFD3 /* MTLRenderPassStencilAttachmentDescriptor+MoltenVK.m */; }; DCFD7F592A45BC6E007BBBF7 /* MVKMTLBufferAllocation.mm in Sources */ = {isa = PBXBuildFile; fileRef = A9C96DCF1DDC20C20053187F /* MVKMTLBufferAllocation.mm */; }; - DCFD7F5A2A45BC6E007BBBF7 /* CAMetalLayer+MoltenVK.m in Sources */ = {isa = PBXBuildFile; fileRef = A9E53DD62100B197002781DD /* CAMetalLayer+MoltenVK.m */; }; + DCFD7F5A2A45BC6E007BBBF7 /* CAMetalLayer+MoltenVK.mm in Sources */ = {isa = PBXBuildFile; fileRef = A9E53DD62100B197002781DD /* CAMetalLayer+MoltenVK.mm */; }; DCFD7F5B2A45BC6E007BBBF7 /* MVKCmdDispatch.mm in Sources */ = {isa = PBXBuildFile; fileRef = A9096E5D1F81E16300DFBEA6 /* MVKCmdDispatch.mm */; }; DCFD7F5C2A45BC6E007BBBF7 /* MVKCmdDebug.mm in Sources */ = {isa = PBXBuildFile; fileRef = A99C90ED229455B300A061DA /* MVKCmdDebug.mm */; }; /* End PBXBuildFile section */ @@ -691,7 +691,7 @@ A9E53DD32100B197002781DD /* MTLSamplerDescriptor+MoltenVK.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "MTLSamplerDescriptor+MoltenVK.h"; sourceTree = ""; }; A9E53DD42100B197002781DD /* NSString+MoltenVK.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = "NSString+MoltenVK.mm"; sourceTree = ""; }; A9E53DD52100B197002781DD /* MTLTextureDescriptor+MoltenVK.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = "MTLTextureDescriptor+MoltenVK.m"; sourceTree = ""; }; - A9E53DD62100B197002781DD /* CAMetalLayer+MoltenVK.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = "CAMetalLayer+MoltenVK.m"; sourceTree = ""; }; + A9E53DD62100B197002781DD /* CAMetalLayer+MoltenVK.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = "CAMetalLayer+MoltenVK.mm"; sourceTree = ""; }; A9E53DEE2100B302002781DD /* MTLRenderPassDescriptor+MoltenVK.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "MTLRenderPassDescriptor+MoltenVK.h"; sourceTree = ""; }; A9E53DF22100B302002781DD /* MTLRenderPassDescriptor+MoltenVK.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = "MTLRenderPassDescriptor+MoltenVK.m"; sourceTree = ""; }; A9E53DFA21064F84002781DD /* MTLRenderPipelineDescriptor+MoltenVK.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = "MTLRenderPipelineDescriptor+MoltenVK.m"; sourceTree = ""; }; @@ -889,7 +889,7 @@ isa = PBXGroup; children = ( A9E53DD12100B197002781DD /* CAMetalLayer+MoltenVK.h */, - A9E53DD62100B197002781DD /* CAMetalLayer+MoltenVK.m */, + A9E53DD62100B197002781DD /* CAMetalLayer+MoltenVK.mm */, 453638312508A4C7000EFFD3 /* MTLRenderPassDepthAttachmentDescriptor+MoltenVK.h */, 4536382F2508A4C6000EFFD3 /* MTLRenderPassDepthAttachmentDescriptor+MoltenVK.m */, A9E53DEE2100B302002781DD /* MTLRenderPassDescriptor+MoltenVK.h */, @@ -1703,7 +1703,7 @@ 2FEA0AAF24902F9F00EEF3AD /* MVKLayers.mm in Sources */, 2FEA0AB024902F9F00EEF3AD /* MVKFramebuffer.mm in Sources */, 2FEA0AB124902F9F00EEF3AD /* MVKMTLBufferAllocation.mm in Sources */, - 2FEA0AB224902F9F00EEF3AD /* CAMetalLayer+MoltenVK.m in Sources */, + 2FEA0AB224902F9F00EEF3AD /* CAMetalLayer+MoltenVK.mm in Sources */, 2FEA0AB324902F9F00EEF3AD /* MVKCmdDispatch.mm in Sources */, 2FEA0AB424902F9F00EEF3AD /* MVKCmdDebug.mm in Sources */, ); @@ -1763,7 +1763,7 @@ A94FB7EE1C7DFB4800632CA3 /* MVKFramebuffer.mm in Sources */, 453638382508A4C7000EFFD3 /* MTLRenderPassStencilAttachmentDescriptor+MoltenVK.m in Sources */, A9C96DD21DDC20C20053187F /* MVKMTLBufferAllocation.mm in Sources */, - A9E53DE92100B197002781DD /* CAMetalLayer+MoltenVK.m in Sources */, + A9E53DE92100B197002781DD /* CAMetalLayer+MoltenVK.mm in Sources */, A9096E5E1F81E16300DFBEA6 /* MVKCmdDispatch.mm in Sources */, A99C90F0229455B300A061DA /* MVKCmdDebug.mm in Sources */, ); @@ -1823,7 +1823,7 @@ A94FB7EF1C7DFB4800632CA3 /* MVKFramebuffer.mm in Sources */, 4536383A2508A4C7000EFFD3 /* MTLRenderPassStencilAttachmentDescriptor+MoltenVK.m in Sources */, A9C96DD31DDC20C20053187F /* MVKMTLBufferAllocation.mm in Sources */, - A9E53DEA2100B197002781DD /* CAMetalLayer+MoltenVK.m in Sources */, + A9E53DEA2100B197002781DD /* CAMetalLayer+MoltenVK.mm in Sources */, A9096E5F1F81E16300DFBEA6 /* MVKCmdDispatch.mm in Sources */, A99C90F1229455B300A061DA /* MVKCmdDebug.mm in Sources */, ); @@ -1883,7 +1883,7 @@ DCFD7F572A45BC6E007BBBF7 /* MVKFramebuffer.mm in Sources */, DCFD7F582A45BC6E007BBBF7 /* MTLRenderPassStencilAttachmentDescriptor+MoltenVK.m in Sources */, DCFD7F592A45BC6E007BBBF7 /* MVKMTLBufferAllocation.mm in Sources */, - DCFD7F5A2A45BC6E007BBBF7 /* CAMetalLayer+MoltenVK.m in Sources */, + DCFD7F5A2A45BC6E007BBBF7 /* CAMetalLayer+MoltenVK.mm in Sources */, DCFD7F5B2A45BC6E007BBBF7 /* MVKCmdDispatch.mm in Sources */, DCFD7F5C2A45BC6E007BBBF7 /* MVKCmdDebug.mm in Sources */, ); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm index cbbbfab4..401c880f 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm @@ -1204,8 +1204,8 @@ VkResult MVKPhysicalDevice::getSurfaceSupport(uint32_t queueFamilyIndex, isHeadless = getMTLDevice().isHeadless; #endif - // If this device is headless or the surface does not have a CAMetalLayer, it is not supported. - *pSupported = !(isHeadless || (surface->getCAMetalLayer() == nil)); + // If this device is headless, the surface must be headless. + *pSupported = isHeadless ? surface->isHeadless() : wasConfigurationSuccessful(); return *pSupported ? VK_SUCCESS : surface->getConfigurationResult(); } @@ -1264,13 +1264,12 @@ VkResult MVKPhysicalDevice::getSurfaceCapabilities( const VkPhysicalDeviceSurfac // The CAlayer underlying the surface must be a CAMetalLayer. MVKSurface* surface = (MVKSurface*)pSurfaceInfo->surface; - CAMetalLayer* mtlLayer = surface->getCAMetalLayer(); - if ( !mtlLayer ) { return surface->getConfigurationResult(); } + if ( !surface->wasConfigurationSuccessful() ) { return surface->getConfigurationResult(); } VkSurfaceCapabilitiesKHR& surfCaps = pSurfaceCapabilities->surfaceCapabilities; surfCaps.minImageCount = _metalFeatures.minSwapchainImageCount; surfCaps.maxImageCount = _metalFeatures.maxSwapchainImageCount; - surfCaps.currentExtent = mvkGetNaturalExtent(mtlLayer); + surfCaps.currentExtent = surface->getNaturalExtent(); surfCaps.minImageExtent = { 1, 1 }; surfCaps.maxImageExtent = { _properties.limits.maxImageDimension2D, _properties.limits.maxImageDimension2D }; surfCaps.maxImageArrayLayers = 1; @@ -1349,9 +1348,7 @@ VkResult MVKPhysicalDevice::getSurfaceFormats(MVKSurface* surface, uint32_t* pCount, VkSurfaceFormatKHR* pSurfaceFormats) { - // The layer underlying the surface view must be a CAMetalLayer. - CAMetalLayer* mtlLayer = surface->getCAMetalLayer(); - if ( !mtlLayer ) { return surface->getConfigurationResult(); } + if ( !surface->wasConfigurationSuccessful() ) { return surface->getConfigurationResult(); } #define addSurfFmt(MTL_FMT) \ do { \ @@ -1474,9 +1471,7 @@ VkResult MVKPhysicalDevice::getSurfacePresentModes(MVKSurface* surface, uint32_t* pCount, VkPresentModeKHR* pPresentModes) { - // The layer underlying the surface view must be a CAMetalLayer. - CAMetalLayer* mtlLayer = surface->getCAMetalLayer(); - if ( !mtlLayer ) { return surface->getConfigurationResult(); } + if ( !surface->wasConfigurationSuccessful() ) { return surface->getConfigurationResult(); } #define ADD_VK_PRESENT_MODE(VK_PM) \ do { \ @@ -1504,9 +1499,7 @@ VkResult MVKPhysicalDevice::getPresentRectangles(MVKSurface* surface, uint32_t* pRectCount, VkRect2D* pRects) { - // The layer underlying the surface view must be a CAMetalLayer. - CAMetalLayer* mtlLayer = surface->getCAMetalLayer(); - if ( !mtlLayer ) { return surface->getConfigurationResult(); } + if ( !surface->wasConfigurationSuccessful() ) { return surface->getConfigurationResult(); } if ( !pRects ) { *pRectCount = 1; @@ -1518,7 +1511,7 @@ VkResult MVKPhysicalDevice::getPresentRectangles(MVKSurface* surface, *pRectCount = 1; pRects[0].offset = { 0, 0 }; - pRects[0].extent = mvkGetNaturalExtent(mtlLayer); + pRects[0].extent = surface->getNaturalExtent(); return VK_SUCCESS; } @@ -3666,14 +3659,14 @@ void MVKDevice::getCalibratedTimestamps(uint32_t timestampCount, MTLTimestamp cpuStamp, gpuStamp; uint64_t cpuStart, cpuEnd; - cpuStart = mvkGetAbsoluteTime(); + cpuStart = mvkGetContinuousNanoseconds(); [getMTLDevice() sampleTimestamps: &cpuStamp gpuTimestamp: &gpuStamp]; // Sample again to calculate the maximum deviation. Note that the // -[MTLDevice sampleTimestamps:gpuTimestamp:] method guarantees that CPU // timestamps are in nanoseconds. We don't want to call the method again, // because that could result in an expensive syscall to query the GPU time- // stamp. - cpuEnd = mvkGetAbsoluteTime(); + cpuEnd = mvkGetContinuousNanoseconds(); for (uint32_t tsIdx = 0; tsIdx < timestampCount; ++tsIdx) { switch (pTimestampInfos[tsIdx].timeDomain) { case VK_TIME_DOMAIN_DEVICE_EXT: diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.h b/MoltenVK/MoltenVK/GPUObjects/MVKImage.h index 900b10ff..058876d6 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.h @@ -378,14 +378,8 @@ class MVKSwapchainImage : public MVKImage { public: - /** Binds this resource to the specified offset within the specified memory allocation. */ VkResult bindDeviceMemory(MVKDeviceMemory* mvkMem, VkDeviceSize memOffset, uint8_t planeIndex) override; -#pragma mark Metal - - /** Returns the Metal texture used by the CAMetalDrawable underlying this image. */ - id getMTLTexture(uint8_t planeIndex) override; - #pragma mark Construction @@ -399,7 +393,6 @@ public: protected: friend class MVKPeerSwapchainImage; - virtual id getCAMetalDrawable() = 0; void detachSwapchain(); std::mutex _detachmentLock; @@ -445,6 +438,8 @@ public: #pragma mark Metal + id getMTLTexture(uint8_t planeIndex) override; + /** Presents the contained drawable to the OS. */ VkResult presentCAMetalDrawable(id mtlCmdBuff, MVKImagePresentInfo presentInfo); @@ -468,16 +463,16 @@ public: protected: friend MVKSwapchain; - id getCAMetalDrawable() override; + id getCAMetalDrawable(); void addPresentedHandler(id mtlDrawable, MVKImagePresentInfo presentInfo, MVKSwapchainSignaler signaler); void releaseMetalDrawable(); MVKSwapchainImageAvailability getAvailability(); - void makeAvailable(const MVKSwapchainSignaler& signaler); void makeAvailable(); VkResult acquireAndSignalWhenAvailable(MVKSemaphore* semaphore, MVKFence* fence); MVKSwapchainSignaler getPresentationSignaler(); id _mtlDrawable = nil; + id _mtlTextureHeadless = nil; MVKSwapchainImageAvailability _availability; MVKSmallVector _availabilitySignalers; MVKSwapchainSignaler _preSignaler = {}; @@ -494,7 +489,8 @@ class MVKPeerSwapchainImage : public MVKSwapchainImage { public: - /** Binds this resource according to the specified bind information. */ + id getMTLTexture(uint8_t planeIndex) override; + VkResult bindDeviceMemory2(const VkBindImageMemoryInfo* pBindInfo) override; @@ -504,10 +500,6 @@ public: const VkImageCreateInfo* pCreateInfo, MVKSwapchain* swapchain, uint32_t swapchainIndex); - -protected: - id getCAMetalDrawable() override; - }; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm index 323918f2..1c132a4d 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm @@ -25,8 +25,10 @@ #include "MVKFoundation.h" #include "MVKOSExtensions.h" #include "MVKCodec.h" + #import "MTLTextureDescriptor+MoltenVK.h" #import "MTLSamplerDescriptor+MoltenVK.h" +#import "CAMetalLayer+MoltenVK.h" using namespace std; using namespace SPIRV_CROSS_NAMESPACE; @@ -1169,12 +1171,6 @@ VkResult MVKSwapchainImage::bindDeviceMemory(MVKDeviceMemory* mvkMem, VkDeviceSi } -#pragma mark Metal - -// Overridden to always retrieve the MTLTexture directly from the CAMetalDrawable. -id MVKSwapchainImage::getMTLTexture(uint8_t planeIndex) { return [getCAMetalDrawable() texture]; } - - #pragma mark Construction MVKSwapchainImage::MVKSwapchainImage(MVKDevice* device, @@ -1212,39 +1208,33 @@ MVKSwapchainImageAvailability MVKPresentableSwapchainImage::getAvailability() { return _availability; } -// If present, signal the semaphore for the first waiter for the given image. -static void signalPresentationSemaphore(const MVKSwapchainSignaler& signaler, id mtlCmdBuff) { - if (signaler.semaphore) { signaler.semaphore->encodeDeferredSignal(mtlCmdBuff, signaler.semaphoreSignalToken); } -} - -// Signal either or both of the semaphore and fence in the specified tracker pair. -static void signal(const MVKSwapchainSignaler& signaler, id mtlCmdBuff) { - if (signaler.semaphore) { signaler.semaphore->encodeDeferredSignal(mtlCmdBuff, signaler.semaphoreSignalToken); } - if (signaler.fence) { signaler.fence->signal(); } -} - // Tell the semaphore and fence that they are being tracked for future signaling. -static void markAsTracked(const MVKSwapchainSignaler& signaler) { +static void track(const MVKSwapchainSignaler& signaler) { if (signaler.semaphore) { signaler.semaphore->retain(); } if (signaler.fence) { signaler.fence->retain(); } } -// Tell the semaphore and fence that they are no longer being tracked for future signaling. -static void unmarkAsTracked(const MVKSwapchainSignaler& signaler) { - if (signaler.semaphore) { signaler.semaphore->release(); } - if (signaler.fence) { signaler.fence->release(); } +static void signal(MVKSemaphore* semaphore, uint64_t semaphoreSignalToken, id mtlCmdBuff) { + if (semaphore) { semaphore->encodeDeferredSignal(mtlCmdBuff, semaphoreSignalToken); } } -static void signalAndUnmarkAsTracked(const MVKSwapchainSignaler& signaler) { - signal(signaler, nil); - unmarkAsTracked(signaler); +static void signal(MVKFence* fence) { + if (fence) { fence->signal(); } +} + +// Signal the semaphore and fence and tell them that they are no longer being tracked for future signaling. +static void signalAndUntrack(const MVKSwapchainSignaler& signaler) { + signal(signaler.semaphore, signaler.semaphoreSignalToken, nil); + if (signaler.semaphore) { signaler.semaphore->release(); } + + signal(signaler.fence); + if (signaler.fence) { signaler.fence->release(); } } VkResult MVKPresentableSwapchainImage::acquireAndSignalWhenAvailable(MVKSemaphore* semaphore, MVKFence* fence) { // Now that this image is being acquired, release the existing drawable and its texture. // This is not done earlier so the texture is retained for any post-processing such as screen captures, etc. - // This may trigger a delayed presentation callback, which uses the _availabilityLock, also used below. releaseMetalDrawable(); lock_guard lock(_availabilityLock); @@ -1267,7 +1257,8 @@ VkResult MVKPresentableSwapchainImage::acquireAndSignalWhenAvailable(MVKSemaphor mtlCmdBuff = _device->getAnyQueue()->getMTLCommandBuffer(kMVKCommandUseAcquireNextImage); if ( !mtlCmdBuff ) { setConfigurationResult(VK_ERROR_OUT_OF_POOL_MEMORY); } } - signal(signaler, mtlCmdBuff); + signal(signaler.semaphore, signaler.semaphoreSignalToken, mtlCmdBuff); + signal(signaler.fence); [mtlCmdBuff commit]; } @@ -1275,7 +1266,7 @@ VkResult MVKPresentableSwapchainImage::acquireAndSignalWhenAvailable(MVKSemaphor } else { _availabilitySignalers.push_back(signaler); } - markAsTracked(signaler); + track(signaler); return getConfigurationResult(); } @@ -1284,6 +1275,9 @@ VkResult MVKPresentableSwapchainImage::acquireAndSignalWhenAvailable(MVKSemaphor // Attempt several times to retrieve a good drawable, and set an error to trigger the // swapchain to be re-established if one cannot be retrieved. id MVKPresentableSwapchainImage::getCAMetalDrawable() { + + if (_mtlTextureHeadless) { return nil; } // If headless, there is no drawable. + if ( !_mtlDrawable ) { @autoreleasepool { bool hasInvalidFormat = false; @@ -1305,6 +1299,11 @@ id MVKPresentableSwapchainImage::getCAMetalDrawable() { return _mtlDrawable; } +// If not headless, retrieve the MTLTexture directly from the CAMetalDrawable. +id MVKPresentableSwapchainImage::getMTLTexture(uint8_t planeIndex) { + return _mtlTextureHeadless ? _mtlTextureHeadless : getCAMetalDrawable().texture; +} + // Present the drawable and make myself available only once the command buffer has completed. // Pass MVKImagePresentInfo by value because it may not exist when the callback runs. VkResult MVKPresentableSwapchainImage::presentCAMetalDrawable(id mtlCmdBuff, @@ -1343,15 +1342,13 @@ VkResult MVKPresentableSwapchainImage::presentCAMetalDrawable(idretain(); } [mtlCmdBuff addCompletedHandler: ^(id mcb) { - if (fence) { - fence->signal(); - fence->release(); - } + signal(fence); + if (fence) { fence->release(); } [mtlDrwbl release]; release(); }]; - signalPresentationSemaphore(signaler, mtlCmdBuff); + signal(signaler.semaphore, signaler.semaphoreSignalToken, mtlCmdBuff); return getConfigurationResult(); } @@ -1408,6 +1405,13 @@ void MVKPresentableSwapchainImage::beginPresentation(const MVKImagePresentInfo& void MVKPresentableSwapchainImage::endPresentation(const MVKImagePresentInfo& presentInfo, const MVKSwapchainSignaler& signaler, uint64_t actualPresentTime) { + + // If the presentation time is not available, use the current nanosecond runtime clock, + // which should be reasonably accurate (sub-ms) to the presentation time. The presentation + // time will not be available if the presentation did not actually happen, such as when + // running headless, or on a test harness that is not attached to the windowing system. + if (actualPresentTime == 0) { actualPresentTime = mvkGetRuntimeNanoseconds(); } + { // Scope to avoid deadlock if release() is run within detachment lock // If I have become detached from the swapchain, it means the swapchain, and possibly the // VkDevice, have been destroyed by the time of this callback, so do not reference them. @@ -1415,7 +1419,11 @@ void MVKPresentableSwapchainImage::endPresentation(const MVKImagePresentInfo& pr if (_device) { _device->addPerformanceInterval(_device->_performanceStatistics.queue.presentSwapchains, _presentationStartTime); } if (_swapchain) { _swapchain->endPresentation(presentInfo, actualPresentTime); } } - makeAvailable(signaler); + + // Makes an image available for acquisition by the app. + // If any semaphores are waiting to be signaled when this image becomes available, the + // earliest semaphore is signaled, and this image remains unavailable for other uses. + signalAndUntrack(signaler); release(); } @@ -1425,15 +1433,6 @@ void MVKPresentableSwapchainImage::releaseMetalDrawable() { _mtlDrawable = nil; } -// Makes an image available for acquisition by the app. -// If any semaphores are waiting to be signaled when this image becomes available, the -// earliest semaphore is signaled, and this image remains unavailable for other uses. -void MVKPresentableSwapchainImage::makeAvailable(const MVKSwapchainSignaler& signaler) { - lock_guard lock(_availabilityLock); - - signalAndUnmarkAsTracked(signaler); -} - // Signal, untrack, and release any signalers that are tracking. // Release the drawable before the lock, as it may trigger completion callback. void MVKPresentableSwapchainImage::makeAvailable() { @@ -1441,9 +1440,9 @@ void MVKPresentableSwapchainImage::makeAvailable() { lock_guard lock(_availabilityLock); if ( !_availability.isAvailable ) { - signalAndUnmarkAsTracked(_preSignaler); + signalAndUntrack(_preSignaler); for (auto& sig : _availabilitySignalers) { - signalAndUnmarkAsTracked(sig); + signalAndUntrack(sig); } _availabilitySignalers.clear(); _availability.isAvailable = true; @@ -1460,11 +1459,26 @@ MVKPresentableSwapchainImage::MVKPresentableSwapchainImage(MVKDevice* device, _availability.acquisitionID = _swapchain->getNextAcquisitionID(); _availability.isAvailable = true; + + if (swapchain->isHeadless()) { + @autoreleasepool { + MTLTextureDescriptor* mtlTexDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat: getMTLPixelFormat() + width: pCreateInfo->extent.width + height: pCreateInfo->extent.height + mipmapped: NO]; + mtlTexDesc.usageMVK = MTLTextureUsageRenderTarget; + mtlTexDesc.storageModeMVK = MTLStorageModePrivate; + + _mtlTextureHeadless = [[getMTLDevice() newTextureWithDescriptor: mtlTexDesc] retain]; // retained + } + } } void MVKPresentableSwapchainImage::destroy() { releaseMetalDrawable(); + [_mtlTextureHeadless release]; + _mtlTextureHeadless = nil; MVKSwapchainImage::destroy(); } @@ -1498,8 +1512,8 @@ VkResult MVKPeerSwapchainImage::bindDeviceMemory2(const VkBindImageMemoryInfo* p #pragma mark Metal -id MVKPeerSwapchainImage::getCAMetalDrawable() { - return ((MVKSwapchainImage*)_swapchain->getPresentableImage(_swapchainIndex))->getCAMetalDrawable(); +id MVKPeerSwapchainImage::getMTLTexture(uint8_t planeIndex) { + return ((MVKSwapchainImage*)_swapchain->getPresentableImage(_swapchainIndex))->getMTLTexture(planeIndex); } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.h b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.h index f6ca8e78..6a2fa92f 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.h @@ -117,6 +117,9 @@ public: MVKSurface* createSurface(const VkMetalSurfaceCreateInfoEXT* pCreateInfo, const VkAllocationCallbacks* pAllocator); + MVKSurface* createSurface(const VkHeadlessSurfaceCreateInfoEXT* pCreateInfo, + const VkAllocationCallbacks* pAllocator); + MVKSurface* createSurface(const Vk_PLATFORM_SurfaceCreateInfoMVK* pCreateInfo, const VkAllocationCallbacks* pAllocator); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm index 543a1fe7..de9ad021 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm @@ -102,6 +102,11 @@ MVKSurface* MVKInstance::createSurface(const VkMetalSurfaceCreateInfoEXT* pCreat return new MVKSurface(this, pCreateInfo, pAllocator); } +MVKSurface* MVKInstance::createSurface(const VkHeadlessSurfaceCreateInfoEXT* pCreateInfo, + const VkAllocationCallbacks* pAllocator) { + return new MVKSurface(this, pCreateInfo, pAllocator); +} + MVKSurface* MVKInstance::createSurface(const Vk_PLATFORM_SurfaceCreateInfoMVK* pCreateInfo, const VkAllocationCallbacks* pAllocator) { return new MVKSurface(this, pCreateInfo, pAllocator); @@ -426,6 +431,8 @@ void MVKInstance::initProcAddrs() { ADD_INST_EXT_ENTRY_POINT(vkGetPhysicalDeviceSurfacePresentModesKHR, KHR_SURFACE); ADD_INST_EXT_ENTRY_POINT(vkGetPhysicalDeviceSurfaceCapabilities2KHR, KHR_GET_SURFACE_CAPABILITIES_2); ADD_INST_EXT_ENTRY_POINT(vkGetPhysicalDeviceSurfaceFormats2KHR, KHR_GET_SURFACE_CAPABILITIES_2); + ADD_INST_EXT_ENTRY_POINT(vkCreateHeadlessSurfaceEXT, EXT_HEADLESS_SURFACE); + ADD_INST_EXT_ENTRY_POINT(vkCreateMetalSurfaceEXT, EXT_METAL_SURFACE); ADD_INST_EXT_ENTRY_POINT(vkCreateDebugReportCallbackEXT, EXT_DEBUG_REPORT); ADD_INST_EXT_ENTRY_POINT(vkDestroyDebugReportCallbackEXT, EXT_DEBUG_REPORT); ADD_INST_EXT_ENTRY_POINT(vkDebugReportMessageEXT, EXT_DEBUG_REPORT); @@ -441,7 +448,6 @@ void MVKInstance::initProcAddrs() { ADD_INST_EXT_ENTRY_POINT(vkCreateDebugUtilsMessengerEXT, EXT_DEBUG_UTILS); ADD_INST_EXT_ENTRY_POINT(vkDestroyDebugUtilsMessengerEXT, EXT_DEBUG_UTILS); ADD_INST_EXT_ENTRY_POINT(vkSubmitDebugUtilsMessageEXT, EXT_DEBUG_UTILS); - ADD_INST_EXT_ENTRY_POINT(vkCreateMetalSurfaceEXT, EXT_METAL_SURFACE); #ifdef VK_USE_PLATFORM_IOS_MVK ADD_INST_EXT_ENTRY_POINT(vkCreateIOSSurfaceMVK, MVK_IOS_SURFACE); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.h b/MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.h index 479965b4..5d23225b 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.h @@ -20,7 +20,7 @@ #include "MVKBaseObject.h" #include "MVKOSExtensions.h" -#include "mvk_datatypes.h" +#include "mvk_datatypes.hpp" #include #include diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSurface.h b/MoltenVK/MoltenVK/GPUObjects/MVKSurface.h index 5746bfbf..453eac66 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKSurface.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKSurface.h @@ -24,16 +24,6 @@ #import #import -#ifdef VK_USE_PLATFORM_IOS_MVK -# define PLATFORM_VIEW_CLASS UIView -# import -#endif - -#ifdef VK_USE_PLATFORM_MACOS_MVK -# define PLATFORM_VIEW_CLASS NSView -# import -#endif - class MVKInstance; class MVKSwapchain; @@ -59,6 +49,14 @@ public: /** Returns the CAMetalLayer underlying this surface. */ CAMetalLayer* getCAMetalLayer(); + /** Returns the extent of this surface. */ + VkExtent2D getExtent(); + + /** Returns the extent for which the underlying CAMetalLayer will not need to be scaled when composited. */ + VkExtent2D getNaturalExtent(); + + /** Returns whether this surface is headless. */ + bool isHeadless() { return !_mtlCAMetalLayer && wasConfigurationSuccessful(); } #pragma mark Construction @@ -66,6 +64,10 @@ public: const VkMetalSurfaceCreateInfoEXT* pCreateInfo, const VkAllocationCallbacks* pAllocator); + MVKSurface(MVKInstance* mvkInstance, + const VkHeadlessSurfaceCreateInfoEXT* pCreateInfo, + const VkAllocationCallbacks* pAllocator); + MVKSurface(MVKInstance* mvkInstance, const Vk_PLATFORM_SurfaceCreateInfoMVK* pCreateInfo, const VkAllocationCallbacks* pAllocator); @@ -76,7 +78,8 @@ protected: friend class MVKSwapchain; void propagateDebugName() override {} - void initLayer(CAMetalLayer* mtlLayer, const char* vkFuncName); + void setActiveSwapchain(MVKSwapchain* swapchain); + void initLayer(CAMetalLayer* mtlLayer, const char* vkFuncName, bool isHeadless); void releaseLayer(); std::mutex _layerLock; @@ -84,5 +87,6 @@ protected: CAMetalLayer* _mtlCAMetalLayer = nil; MVKBlockObserver* _layerObserver = nil; MVKSwapchain* _activeSwapchain = nullptr; + VkExtent2D _headlessExtent = {0xFFFFFFFF, 0xFFFFFFFF}; }; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSurface.mm b/MoltenVK/MoltenVK/GPUObjects/MVKSurface.mm index 3899ab69..04855718 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKSurface.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKSurface.mm @@ -17,11 +17,26 @@ */ #include "MVKSurface.h" +#include "MVKSwapchain.h" #include "MVKInstance.h" #include "MVKFoundation.h" #include "MVKOSExtensions.h" +#include "mvk_datatypes.hpp" + +#import "CAMetalLayer+MoltenVK.h" #import "MVKBlockObserver.h" +#ifdef VK_USE_PLATFORM_IOS_MVK +# define PLATFORM_VIEW_CLASS UIView +# import +#endif + +#ifdef VK_USE_PLATFORM_MACOS_MVK +# define PLATFORM_VIEW_CLASS NSView +# import +#endif + + // We need to double-dereference the name to first convert to the platform symbol, then to a string. #define STR_PLATFORM(NAME) #NAME #define STR(NAME) STR_PLATFORM(NAME) @@ -34,38 +49,55 @@ CAMetalLayer* MVKSurface::getCAMetalLayer() { return _mtlCAMetalLayer; } +VkExtent2D MVKSurface::getExtent() { + return _mtlCAMetalLayer ? mvkVkExtent2DFromCGSize(_mtlCAMetalLayer.drawableSize) : _headlessExtent; +} + +VkExtent2D MVKSurface::getNaturalExtent() { + return _mtlCAMetalLayer ? mvkVkExtent2DFromCGSize(_mtlCAMetalLayer.naturalDrawableSizeMVK) : _headlessExtent; +} + +// Per spec, headless surface extent is set from the swapchain. +void MVKSurface::setActiveSwapchain(MVKSwapchain* swapchain) { + _activeSwapchain = swapchain; + _headlessExtent = swapchain->getImageExtent(); +} + MVKSurface::MVKSurface(MVKInstance* mvkInstance, const VkMetalSurfaceCreateInfoEXT* pCreateInfo, const VkAllocationCallbacks* pAllocator) : _mvkInstance(mvkInstance) { - initLayer((CAMetalLayer*)pCreateInfo->pLayer, "vkCreateMetalSurfaceEXT"); + initLayer((CAMetalLayer*)pCreateInfo->pLayer, "vkCreateMetalSurfaceEXT", false); +} + +MVKSurface::MVKSurface(MVKInstance* mvkInstance, + const VkHeadlessSurfaceCreateInfoEXT* pCreateInfo, + const VkAllocationCallbacks* pAllocator) : _mvkInstance(mvkInstance) { + initLayer(nil, "vkCreateHeadlessSurfaceEXT", true); } // pCreateInfo->pView can be either a CAMetalLayer or a view (NSView/UIView). MVKSurface::MVKSurface(MVKInstance* mvkInstance, const Vk_PLATFORM_SurfaceCreateInfoMVK* pCreateInfo, const VkAllocationCallbacks* pAllocator) : _mvkInstance(mvkInstance) { + MVKLogWarn("%s() is deprecated. Use vkCreateMetalSurfaceEXT() from the VK_EXT_metal_surface extension.", STR(vkCreate_PLATFORM_SurfaceMVK)); // Get the platform object contained in pView - id obj = (id)pCreateInfo->pView; - // If it's a view (NSView/UIView), extract the layer, otherwise assume it's already a CAMetalLayer. + id obj = (id)pCreateInfo->pView; if ([obj isKindOfClass: [PLATFORM_VIEW_CLASS class]]) { - obj = ((PLATFORM_VIEW_CLASS*)obj).layer; - if ( !NSThread.isMainThread ) { - MVKLogWarn("%s(): You are not calling this function from the main thread. %s should only be accessed from the main thread. When using this function outside the main thread, consider passing the CAMetalLayer itself in %s::pView, instead of the %s.", - STR(vkCreate_PLATFORM_SurfaceMVK), STR(PLATFORM_VIEW_CLASS), STR(Vk_PLATFORM_SurfaceCreateInfoMVK), STR(PLATFORM_VIEW_CLASS)); - } + __block id layer; + mvkDispatchToMainAndWait(^{ layer = ((PLATFORM_VIEW_CLASS*)obj).layer; }); + obj = layer; } // Confirm that we were provided with a CAMetalLayer - initLayer([obj isKindOfClass: CAMetalLayer.class] ? (CAMetalLayer*)obj : nil, - STR(vkCreate_PLATFORM_SurfaceMVK)); + initLayer([obj isKindOfClass: CAMetalLayer.class] ? (CAMetalLayer*)obj : nil, STR(vkCreate_PLATFORM_SurfaceMVK), false); } -void MVKSurface::initLayer(CAMetalLayer* mtlLayer, const char* vkFuncName) { +void MVKSurface::initLayer(CAMetalLayer* mtlLayer, const char* vkFuncName, bool isHeadless) { _mtlCAMetalLayer = [mtlLayer retain]; // retained - if ( !_mtlCAMetalLayer ) { setConfigurationResult(reportError(VK_ERROR_SURFACE_LOST_KHR, "%s(): On-screen rendering requires a layer of type CAMetalLayer.", vkFuncName)); } + if ( !_mtlCAMetalLayer && !isHeadless ) { setConfigurationResult(reportError(VK_ERROR_SURFACE_LOST_KHR, "%s(): On-screen rendering requires a layer of type CAMetalLayer.", vkFuncName)); } // Sometimes, the owning view can replace its CAMetalLayer. // When that happens, the app needs to recreate the surface. diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.h b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.h index cd418bd1..d8eb535f 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.h @@ -23,7 +23,6 @@ #include "MVKSmallVector.h" #include -#import "CAMetalLayer+MoltenVK.h" #import class MVKWatermark; @@ -46,9 +45,15 @@ public: /** Returns the CAMetalLayer underlying the surface used by this swapchain. */ CAMetalLayer* getCAMetalLayer(); + /** Returns whether the surface is headless. */ + bool isHeadless(); + /** Returns the number of images in this swapchain. */ uint32_t getImageCount() { return (uint32_t)_presentableImages.size(); } + /** Returns the size of the images in this swapchain. */ + VkExtent2D getImageExtent() { return _imageExtent; } + /** Returns the image at the specified index. */ MVKPresentableSwapchainImage* getPresentableImage(uint32_t index) { return _presentableImages[index]; } @@ -126,7 +131,7 @@ protected: std::atomic _currentAcquisitionID = 0; std::mutex _presentHistoryLock; uint64_t _lastFrameTime = 0; - VkExtent2D _mtlLayerDrawableExtent = {0, 0}; + VkExtent2D _imageExtent = {0, 0}; std::atomic _unpresentedImageCount = 0; uint32_t _currentPerfLogFrameCount = 0; uint32_t _presentHistoryCount = 0; @@ -134,18 +139,3 @@ protected: uint32_t _presentHistoryHeadIndex = 0; bool _isDeliberatelyScaled = false; }; - - -#pragma mark - -#pragma mark Support functions - -/** - * Returns the natural extent of the CAMetalLayer. - * - * The natural extent is the size of the bounds property of the layer, - * multiplied by the contentsScale property of the layer, rounded - * to nearest integer using half-to-even rounding. - */ -static inline VkExtent2D mvkGetNaturalExtent(CAMetalLayer* mtlLayer) { - return mvkVkExtent2DFromCGSize(mtlLayer.naturalDrawableSizeMVK); -} diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm index 63c3ac78..5beeee04 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm @@ -26,9 +26,11 @@ #include "MVKWatermarkTextureContent.h" #include "MVKWatermarkShaderSource.h" #include "mvk_datatypes.hpp" +#include + +#import "CAMetalLayer+MoltenVK.h" #import "MVKBlockObserver.h" -#include using namespace std; @@ -49,6 +51,8 @@ void MVKSwapchain::propagateDebugName() { CAMetalLayer* MVKSwapchain::getCAMetalLayer() { return _surface->getCAMetalLayer(); } +bool MVKSwapchain::isHeadless() { return _surface->isHeadless(); } + VkResult MVKSwapchain::getImages(uint32_t* pCount, VkImage* pSwapchainImages) { // Get the number of surface images @@ -124,16 +128,15 @@ VkResult MVKSwapchain::getSurfaceStatus() { return VK_SUCCESS; } -// This swapchain is optimally sized for the surface if the app has specified deliberate -// swapchain scaling, or the CAMetalLayer drawableSize has not changed since the swapchain -// was created, and the CAMetalLayer will not need to be scaled when composited. +// This swapchain is optimally sized for the surface if the app has specified +// deliberate swapchain scaling, or the surface extent has not changed since the +// swapchain was created, and the surface will not need to be scaled when composited. bool MVKSwapchain::hasOptimalSurface() { if (_isDeliberatelyScaled) { return true; } - auto* mtlLayer = getCAMetalLayer(); - VkExtent2D drawExtent = mvkVkExtent2DFromCGSize(mtlLayer.drawableSize); - return (mvkVkExtent2DsAreEqual(drawExtent, _mtlLayerDrawableExtent) && - mvkVkExtent2DsAreEqual(drawExtent, mvkGetNaturalExtent(mtlLayer))); + VkExtent2D surfExtent = _surface->getExtent(); + return (mvkVkExtent2DsAreEqual(surfExtent, _imageExtent) && + mvkVkExtent2DsAreEqual(surfExtent, _surface->getNaturalExtent())); } @@ -187,30 +190,29 @@ void MVKSwapchain::markFrameInterval() { VkResult MVKSwapchain::getRefreshCycleDuration(VkRefreshCycleDurationGOOGLE *pRefreshCycleDuration) { if (_device->getConfigurationResult() != VK_SUCCESS) { return _device->getConfigurationResult(); } - auto* mtlLayer = getCAMetalLayer(); -#if MVK_VISIONOS - // TODO: See if this can be obtained from OS instead - NSInteger framesPerSecond = 90; + auto* screen = getCAMetalLayer().screenMVK; // Will be nil if headless +#if MVK_MACOS && !MVK_MACCAT + double framesPerSecond = 60; + if (screen) { + CGDirectDisplayID displayId = [[[screen deviceDescription] objectForKey:@"NSScreenNumber"] unsignedIntValue]; + CGDisplayModeRef mode = CGDisplayCopyDisplayMode(displayId); + framesPerSecond = CGDisplayModeGetRefreshRate(mode); + CGDisplayModeRelease(mode); +#if MVK_XCODE_13 + if (framesPerSecond == 0 && [screen respondsToSelector: @selector(maximumFramesPerSecond)]) + framesPerSecond = [screen maximumFramesPerSecond]; +#endif + // Builtin panels, e.g., on MacBook, report a zero refresh rate. + if (framesPerSecond == 0) + framesPerSecond = 60.0; + } #elif MVK_IOS_OR_TVOS || MVK_MACCAT NSInteger framesPerSecond = 60; - UIScreen* screen = mtlLayer.screenMVK; if ([screen respondsToSelector: @selector(maximumFramesPerSecond)]) { framesPerSecond = screen.maximumFramesPerSecond; } -#elif MVK_MACOS && !MVK_MACCAT - NSScreen* screen = mtlLayer.screenMVK; - CGDirectDisplayID displayId = [[[screen deviceDescription] objectForKey:@"NSScreenNumber"] unsignedIntValue]; - CGDisplayModeRef mode = CGDisplayCopyDisplayMode(displayId); - double framesPerSecond = CGDisplayModeGetRefreshRate(mode); - CGDisplayModeRelease(mode); -#if MVK_XCODE_13 - if (framesPerSecond == 0 && [screen respondsToSelector: @selector(maximumFramesPerSecond)]) - framesPerSecond = [screen maximumFramesPerSecond]; -#endif - - // Builtin panels, e.g., on MacBook, report a zero refresh rate. - if (framesPerSecond == 0) - framesPerSecond = 60.0; +#elif MVK_VISIONOS + NSInteger framesPerSecond = 90; // TODO: See if this can be obtained from OS instead #endif pRefreshCycleDuration->refreshDuration = (uint64_t)1e9 / framesPerSecond; @@ -260,12 +262,6 @@ void MVKSwapchain::endPresentation(const MVKImagePresentInfo& presentInfo, uint6 _presentHistoryHeadIndex = (_presentHistoryHeadIndex + 1) % kMaxPresentationHistory; } - // If actual present time is not available, use desired time instead, and if that - // hasn't been set, use the current time, which should be reasonably accurate (sub-ms), - // since we are here as part of the addPresentedHandler: callback. - if (actualPresentTime == 0) { actualPresentTime = presentInfo.desiredPresentTime; } - if (actualPresentTime == 0) { actualPresentTime = CACurrentMediaTime() * 1.0e9; } - _presentTimingHistory[_presentHistoryIndex].presentID = presentInfo.presentID; _presentTimingHistory[_presentHistoryIndex].desiredPresentTime = presentInfo.desiredPresentTime; _presentTimingHistory[_presentHistoryIndex].actualPresentTime = actualPresentTime; @@ -380,12 +376,13 @@ void MVKSwapchain::setHDRMetadataEXT(const VkHdrMetadataEXT& metadata) { MVKSwapchain::MVKSwapchain(MVKDevice* device, const VkSwapchainCreateInfoKHR* pCreateInfo) : MVKVulkanAPIDeviceObject(device), - _surface((MVKSurface*)pCreateInfo->surface) { + _surface((MVKSurface*)pCreateInfo->surface), + _imageExtent(pCreateInfo->imageExtent) { // Check if oldSwapchain is properly set auto* oldSwapchain = (MVKSwapchain*)pCreateInfo->oldSwapchain; if (oldSwapchain == _surface->_activeSwapchain) { - _surface->_activeSwapchain = this; + _surface->setActiveSwapchain(this); } else { setConfigurationResult(reportError(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR, "vkCreateSwapchainKHR(): pCreateInfo->oldSwapchain does not match the VkSwapchain that is in use by the surface")); return; @@ -470,10 +467,11 @@ void MVKSwapchain::initCAMetalLayer(const VkSwapchainCreateInfoKHR* pCreateInfo, VkSwapchainPresentScalingCreateInfoEXT* pScalingInfo, uint32_t imgCnt) { - if ( getIsSurfaceLost() ) { return; } - auto* mtlLayer = getCAMetalLayer(); + if ( !mtlLayer || getIsSurfaceLost() ) { return; } + auto minMagFilter = mvkConfig().swapchainMinMagFilterUseNearest ? kCAFilterNearest : kCAFilterLinear; + mtlLayer.drawableSize = mvkCGSizeFromVkExtent2D(_imageExtent); mtlLayer.device = getMTLDevice(); mtlLayer.pixelFormat = getPixelFormats()->getMTLPixelFormat(pCreateInfo->imageFormat); mtlLayer.maximumDrawableCountMVK = imgCnt; @@ -491,15 +489,10 @@ void MVKSwapchain::initCAMetalLayer(const VkSwapchainCreateInfoKHR* pCreateInfo, // presentations on the oldSwapchain to complete and call back, but if the drawableSize // is not changing from the previous, we force those completions first. auto* oldSwapchain = (MVKSwapchain*)pCreateInfo->oldSwapchain; - if (oldSwapchain && mvkVkExtent2DsAreEqual(pCreateInfo->imageExtent, mvkVkExtent2DFromCGSize(mtlLayer.drawableSize))) { + if (oldSwapchain && mvkVkExtent2DsAreEqual(pCreateInfo->imageExtent, _surface->getExtent())) { oldSwapchain->forceUnpresentedImageCompletion(); } - // Remember the extent to later detect if it has changed under the covers, - // and set the drawable size of the CAMetalLayer from the extent. - _mtlLayerDrawableExtent = pCreateInfo->imageExtent; - mtlLayer.drawableSize = mvkCGSizeFromVkExtent2D(_mtlLayerDrawableExtent); - if (pCreateInfo->compositeAlpha != VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR) { mtlLayer.opaque = pCreateInfo->compositeAlpha == VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; } @@ -585,14 +578,13 @@ void MVKSwapchain::initSurfaceImages(const VkSwapchainCreateInfoKHR* pCreateInfo } } - auto* mtlLayer = getCAMetalLayer(); VkExtent2D imgExtent = pCreateInfo->imageExtent; VkImageCreateInfo imgInfo = { .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, .pNext = VK_NULL_HANDLE, .imageType = VK_IMAGE_TYPE_2D, - .format = getPixelFormats()->getVkFormat(mtlLayer.pixelFormat), - .extent = { imgExtent.width, imgExtent.height, 1 }, + .format = pCreateInfo->imageFormat, + .extent = mvkVkExtent3DFromVkExtent2D(imgExtent), .mipLevels = 1, .arrayLayers = 1, .samples = VK_SAMPLE_COUNT_1_BIT, @@ -618,14 +610,20 @@ void MVKSwapchain::initSurfaceImages(const VkSwapchainCreateInfoKHR* pCreateInfo _presentableImages.push_back(_device->createPresentableSwapchainImage(&imgInfo, this, imgIdx, nullptr)); } - NSString* screenName = @"Main Screen"; + auto* mtlLayer = getCAMetalLayer(); + if (mtlLayer) { + NSString* screenName = @"Main Screen"; #if MVK_MACOS && !MVK_MACCAT - if ([mtlLayer.screenMVK respondsToSelector:@selector(localizedName)]) { - screenName = mtlLayer.screenMVK.localizedName; - } + auto* screen = mtlLayer.screenMVK; + if ([screen respondsToSelector:@selector(localizedName)]) { + screenName = screen.localizedName; + } #endif - MVKLogInfo("Created %d swapchain images with size (%d, %d) and contents scale %.1f in layer %s (%p) on screen %s.", - imgCnt, imgExtent.width, imgExtent.height, mtlLayer.contentsScale, mtlLayer.name.UTF8String, mtlLayer, screenName.UTF8String); + MVKLogInfo("Created %d swapchain images with size (%d, %d) and contents scale %.1f in layer %s (%p) on screen %s.", + imgCnt, imgExtent.width, imgExtent.height, mtlLayer.contentsScale, mtlLayer.name.UTF8String, mtlLayer, screenName.UTF8String); + } else { + MVKLogInfo("Created %d swapchain images with size (%d, %d) on headless surface.", imgCnt, imgExtent.width, imgExtent.height); + } } void MVKSwapchain::destroy() { diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSync.mm b/MoltenVK/MoltenVK/GPUObjects/MVKSync.mm index b7a4a64d..dfb536bb 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKSync.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKSync.mm @@ -128,7 +128,7 @@ uint64_t MVKSemaphoreMTLEvent::deferSignal() { } void MVKSemaphoreMTLEvent::encodeDeferredSignal(id mtlCmdBuff, uint64_t deferToken) { - if (mtlCmdBuff) { [mtlCmdBuff encodeSignalEvent: _mtlEvent value: deferToken]; } + [mtlCmdBuff encodeSignalEvent: _mtlEvent value: deferToken]; } MVKSemaphoreMTLEvent::MVKSemaphoreMTLEvent(MVKDevice* device, diff --git a/MoltenVK/MoltenVK/Layers/MVKExtensions.def b/MoltenVK/MoltenVK/Layers/MVKExtensions.def index d8c222bd..777d7252 100644 --- a/MoltenVK/MoltenVK/Layers/MVKExtensions.def +++ b/MoltenVK/MoltenVK/Layers/MVKExtensions.def @@ -109,6 +109,7 @@ MVK_EXTENSION(EXT_extended_dynamic_state3, EXT_EXTENDED_DYNAMIC_STATE MVK_EXTENSION(EXT_external_memory_host, EXT_EXTERNAL_MEMORY_HOST, DEVICE, 10.11, 8.0, 1.0) MVK_EXTENSION(EXT_fragment_shader_interlock, EXT_FRAGMENT_SHADER_INTERLOCK, DEVICE, 10.13, 11.0, 1.0) MVK_EXTENSION(EXT_hdr_metadata, EXT_HDR_METADATA, DEVICE, 10.15, MVK_NA, MVK_NA) +MVK_EXTENSION(EXT_headless_surface, EXT_HEADLESS_SURFACE, INSTANCE, 10.11, 8.0, 1.0) MVK_EXTENSION(EXT_host_query_reset, EXT_HOST_QUERY_RESET, DEVICE, 10.11, 8.0, 1.0) MVK_EXTENSION(EXT_image_robustness, EXT_IMAGE_ROBUSTNESS, DEVICE, 10.11, 8.0, 1.0) MVK_EXTENSION(EXT_inline_uniform_block, EXT_INLINE_UNIFORM_BLOCK, DEVICE, 10.11, 8.0, 1.0) diff --git a/MoltenVK/MoltenVK/OS/CAMetalLayer+MoltenVK.h b/MoltenVK/MoltenVK/OS/CAMetalLayer+MoltenVK.h index 61a5c439..c78128ba 100644 --- a/MoltenVK/MoltenVK/OS/CAMetalLayer+MoltenVK.h +++ b/MoltenVK/MoltenVK/OS/CAMetalLayer+MoltenVK.h @@ -23,12 +23,10 @@ #import #if MVK_IOS_OR_TVOS || MVK_MACCAT -# define PLATFORM_SCREEN_CLASS UIScreen # include #endif #if MVK_MACOS && !MVK_MACCAT -# define PLATFORM_SCREEN_CLASS NSScreen # include #endif @@ -76,9 +74,16 @@ */ @property(nonatomic, readwrite) CFStringRef colorspaceNameMVK; -#if !MVK_VISIONOS +#if MVK_IOS_OR_TVOS || MVK_MACCAT /** Returns the screen on which this layer is rendering. */ -@property(nonatomic, readonly) PLATFORM_SCREEN_CLASS* screenMVK; +@property(nonatomic, readonly) UIScreen* screenMVK; +#endif + +#if MVK_MACOS && !MVK_MACCAT +/** Returns the screen on which this layer is rendering. */ +@property(nonatomic, readonly) NSScreen* screenMVK; + +@property(nonatomic, readonly) NSScreen* privateScreenMVKImpl; #endif @end diff --git a/MoltenVK/MoltenVK/OS/CAMetalLayer+MoltenVK.m b/MoltenVK/MoltenVK/OS/CAMetalLayer+MoltenVK.mm similarity index 92% rename from MoltenVK/MoltenVK/OS/CAMetalLayer+MoltenVK.m rename to MoltenVK/MoltenVK/OS/CAMetalLayer+MoltenVK.mm index 9a8b10d8..380a9150 100644 --- a/MoltenVK/MoltenVK/OS/CAMetalLayer+MoltenVK.m +++ b/MoltenVK/MoltenVK/OS/CAMetalLayer+MoltenVK.mm @@ -18,6 +18,7 @@ #include "CAMetalLayer+MoltenVK.h" +#include "MVKOSExtensions.h" #if MVK_MACOS && !MVK_MACCAT # include @@ -88,6 +89,13 @@ #if MVK_MACOS && !MVK_MACCAT -(NSScreen*) screenMVK { + __block NSScreen* screen; + mvkDispatchToMainAndWait(^{ screen = self.privateScreenMVKImpl; }); + return screen; +} + +// Search for the screen currently displaying the layer, and default to the main screen if it can't be found. +-(NSScreen*) privateScreenMVKImpl { // If this layer has a delegate that is an NSView, and the view is in a window, retrieve the screen from the window. if ([self.delegate isKindOfClass: NSView.class]) { NSWindow* window = ((NSView*)self.delegate).window; diff --git a/MoltenVK/MoltenVK/Vulkan/vulkan.mm b/MoltenVK/MoltenVK/Vulkan/vulkan.mm index c08c5b3a..293826a3 100644 --- a/MoltenVK/MoltenVK/Vulkan/vulkan.mm +++ b/MoltenVK/MoltenVK/Vulkan/vulkan.mm @@ -3869,6 +3869,26 @@ MVK_PUBLIC_VULKAN_SYMBOL void vkSetHdrMetadataEXT( } +#pragma mark - +#pragma mark VK_EXT_headless_surface extension + +MVK_PUBLIC_VULKAN_SYMBOL VkResult vkCreateHeadlessSurfaceEXT( + VkInstance instance, + const VkHeadlessSurfaceCreateInfoEXT* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkSurfaceKHR* pSurface) { + + MVKTraceVulkanCallStart(); + MVKInstance* mvkInst = MVKInstance::getMVKInstance(instance); + MVKSurface* mvkSrfc = mvkInst->createSurface(pCreateInfo, pAllocator); + *pSurface = (VkSurfaceKHR)mvkSrfc; + VkResult rslt = mvkSrfc->getConfigurationResult(); + if (rslt < 0) { *pSurface = VK_NULL_HANDLE; mvkInst->destroySurface(mvkSrfc, pAllocator); } + MVKTraceVulkanCallEnd(); + return rslt; +} + + #pragma mark - #pragma mark VK_EXT_host_query_reset extension diff --git a/README.md b/README.md index 5c205b08..2fa36526 100644 --- a/README.md +++ b/README.md @@ -149,21 +149,14 @@ for which to build the external libraries. The platform choices include: --maccat --tvos --tvossim - --visionos - --visionossim - -The `visionos` and `visionossim` selections require Xcode 15+. You can specify multiple of these selections. The result is a single `XCFramework` for each external dependency library, with each `XCFramework` containing binaries for each of the requested platforms. -The `--all` selection is the same as entering all of the other platform choices, except -`--visionos` and `--visionossim`, and will result in a single `XCFramework` for each -external dependency library, with each `XCFramework` containing binaries for all supported -platforms and simulators. The `--visionos` and `--visionossim` selections must be invoked -with a separate invocation of `fetchDependencies`, because those selections require -Xcode 15+, and will cause a multi-platform build on older versions of Xcode to abort. +The `--all` selection is the same as entering all of the other platform choices, +and will result in a single `XCFramework` for each external dependency library, +with each `XCFramework` containing binaries for all supported platforms and simulators. Running `fetchDependencies` repeatedly with different platforms will accumulate targets in the `XCFramework`, if the `--keep-cache` option is used on each invocation. @@ -263,8 +256,6 @@ from the command line. The following `make` targets are provided: make maccat make tvos make tvossim - make visionos - make visionossim make all-debug make macos-debug @@ -273,15 +264,12 @@ from the command line. The following `make` targets are provided: make maccat-debug make tvos-debug make tvossim-debug - make visionos-debug - make visionossim-debug make clean make install - Running `make` repeatedly with different targets will accumulate binaries for these different targets. -- The `all` target executes all platform targets, except `visionos` and `visionossim`, as these require - Xcode 15+, and will abort a multi-platform build on older versions of Xcode. +- The `all` target executes all platform targets. - The `all` target is the default target. Running `make` with no arguments is the same as running `make all`. - The `*-debug` targets build the binaries using the **_Debug_** configuration. - The `install` target will copy the most recently built `MoltenVK.xcframework` into the