From 13e8103651e5a32c7c607cf95a734cbf88c4e764 Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Fri, 14 Apr 2023 17:30:32 -0400 Subject: [PATCH] Ensure shaders using PhysicalStorageBufferAddresses encode the associated MTLBuffer. - MVKDevice track VkBuffers marked with VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT. - Add SPIRVToMSLConversionResultInfo::usesPhysicalStorageBufferAddressesCapability to detect and track shaders that use PhysicalStorageBufferAddresses capability, and track such shader stages within pipeline. - MVKResourcesCommandEncoderState encode usage of VkBuffers marked with VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT when pipeline uses PhysicalStorageBufferAddresses capability. - Rename MVKResourcesCommandEncoderState::encodeArgumentBufferResourceUsage() to encodeResourceUsage(). - MVKDevice move some functions to public scope and remove friend classes. - MVKDeviceMemory ensure _vkMemAllocFlags is always initialized (unrelated). - Rename MVKFoundation template method contains() to mvkContains() (unrelated). --- Docs/Whats_New.md | 1 + .../Commands/MVKCommandEncoderState.h | 26 ++--- .../Commands/MVKCommandEncoderState.mm | 28 +++-- MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm | 20 ++-- MoltenVK/MoltenVK/GPUObjects/MVKDevice.h | 34 ++++-- MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm | 104 +++++++++++------- .../MoltenVK/GPUObjects/MVKDeviceMemory.mm | 5 +- MoltenVK/MoltenVK/GPUObjects/MVKImage.mm | 2 +- MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h | 11 ++ MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm | 24 +++- MoltenVK/MoltenVK/Utility/MVKFoundation.h | 2 +- .../SPIRVToMSLConverter.cpp | 13 +++ .../SPIRVToMSLConverter.h | 2 + 13 files changed, 180 insertions(+), 92 deletions(-) diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md index 01d6be49..f22472d7 100644 --- a/Docs/Whats_New.md +++ b/Docs/Whats_New.md @@ -22,6 +22,7 @@ Released TBD - `VK_KHR_map_memory2` - Support BC compression on iOS/tvOS where available (iOS/tvOS 16.4 and above and supported by the GPU). - Fix memory leak when waiting on timeline semaphores. +- Ensure shaders that use `PhysicalStorageBufferAddresses` encode the use of the associated `MTLBuffer`. - Add `MVK_ENABLE_EXPLICIT_LOD_WORKAROUND` environment variable to selectively disable recent fixes to handling LOD for arrayed depth images in shaders, on Apple Silicon, when those fixes cause regression in rendering behavior. diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h index 8667aabf..9caca16c 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h +++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h @@ -355,11 +355,11 @@ public: MVKArrayRef dynamicOffsets, uint32_t& dynamicOffsetIndex); - /** Encodes the Metal resource to the Metal command encoder. */ - virtual void encodeArgumentBufferResourceUsage(MVKShaderStage stage, - id mtlResource, - MTLResourceUsage mtlUsage, - MTLRenderStages mtlStages) = 0; + /** Encodes the indirect use of the Metal resource to the Metal command encoder. */ + virtual void encodeResourceUsage(MVKShaderStage stage, + id mtlResource, + MTLResourceUsage mtlUsage, + MTLRenderStages mtlStages) = 0; void markDirty() override; @@ -548,10 +548,10 @@ public: std::function bindTexture, std::function bindSampler); - void encodeArgumentBufferResourceUsage(MVKShaderStage stage, - id mtlResource, - MTLResourceUsage mtlUsage, - MTLRenderStages mtlStages) override; + void encodeResourceUsage(MVKShaderStage stage, + id mtlResource, + MTLResourceUsage mtlUsage, + MTLRenderStages mtlStages) override; /** Offset all buffers for vertex attribute bindings with zero divisors by the given number of strides. */ void offsetZeroDivisorVertexBuffers(MVKGraphicsStage stage, MVKGraphicsPipeline* pipeline, uint32_t firstInstance); @@ -609,10 +609,10 @@ public: /** Sets the current dynamic offset buffer state. */ void bindDynamicOffsetBuffer(const MVKShaderImplicitRezBinding& binding, bool needDynamicOffsetBuffer); - void encodeArgumentBufferResourceUsage(MVKShaderStage stage, - id mtlResource, - MTLResourceUsage mtlUsage, - MTLRenderStages mtlStages) override; + void encodeResourceUsage(MVKShaderStage stage, + id mtlResource, + MTLResourceUsage mtlUsage, + MTLRenderStages mtlStages) override; /** * Marks the buffer binding using the index as having been overridden, diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm index d8fc5d21..c1f7b4c0 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm @@ -693,6 +693,11 @@ void MVKGraphicsResourcesCommandEncoderState::encodeBindings(MVKShaderStage stag encodeMetalArgumentBuffer(stage); + MVKPipeline* pipeline = getPipeline(); + if (pipeline && pipeline->usesPhysicalStorageBufferAddressesCapability(stage)) { + getDevice()->encodeGPUAddressableBuffers(this, stage); + } + auto& shaderStage = _shaderStageResourceBindings[stage]; if (shaderStage.swizzleBufferBinding.isDirty) { @@ -963,10 +968,10 @@ void MVKGraphicsResourcesCommandEncoderState::bindMetalArgumentBuffer(MVKShaderS bindBuffer(stage, buffBind); } -void MVKGraphicsResourcesCommandEncoderState::encodeArgumentBufferResourceUsage(MVKShaderStage stage, - id mtlResource, - MTLResourceUsage mtlUsage, - MTLRenderStages mtlStages) { +void MVKGraphicsResourcesCommandEncoderState::encodeResourceUsage(MVKShaderStage stage, + id mtlResource, + MTLResourceUsage mtlUsage, + MTLRenderStages mtlStages) { if (mtlResource && mtlStages) { if (stage == kMVKShaderStageTessCtl) { auto* mtlCompEnc = _cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl); @@ -1039,8 +1044,10 @@ void MVKComputeResourcesCommandEncoderState::encodeImpl(uint32_t) { encodeMetalArgumentBuffer(kMVKShaderStageCompute); - MVKPipeline* pipeline = getPipeline(); - bool fullImageViewSwizzle = pipeline ? pipeline->fullImageViewSwizzle() : false; + MVKPipeline* pipeline = getPipeline(); + if (pipeline && pipeline->usesPhysicalStorageBufferAddressesCapability(kMVKShaderStageCompute)) { + getDevice()->encodeGPUAddressableBuffers(this, kMVKShaderStageCompute); + } if (_resourceBindings.swizzleBufferBinding.isDirty) { for (auto& b : _resourceBindings.textureBindings) { @@ -1053,6 +1060,7 @@ void MVKComputeResourcesCommandEncoderState::encodeImpl(uint32_t) { _resourceBindings.swizzleBufferBinding.index); } else { + bool fullImageViewSwizzle = pipeline ? pipeline->fullImageViewSwizzle() : false; assertMissingSwizzles(_resourceBindings.needsSwizzle && !fullImageViewSwizzle, "compute", _resourceBindings.textureBindings.contents()); } @@ -1116,10 +1124,10 @@ void MVKComputeResourcesCommandEncoderState::bindMetalArgumentBuffer(MVKShaderSt bindBuffer(buffBind); } -void MVKComputeResourcesCommandEncoderState::encodeArgumentBufferResourceUsage(MVKShaderStage stage, - id mtlResource, - MTLResourceUsage mtlUsage, - MTLRenderStages mtlStages) { +void MVKComputeResourcesCommandEncoderState::encodeResourceUsage(MVKShaderStage stage, + id mtlResource, + MTLResourceUsage mtlUsage, + MTLRenderStages mtlStages) { if (mtlResource) { auto* mtlCompEnc = _cmdEncoder->getMTLComputeEncoder(kMVKCommandUseDispatch); [mtlCompEnc useResource: mtlResource usage: mtlUsage]; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm index 172fb036..c13296d2 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm @@ -786,10 +786,8 @@ void MVKBufferDescriptor::encodeToMetalArgumentBuffer(MVKResourcesCommandEncoder atIndex: argIdx]; } if (encodeUsage) { - rezEncState->encodeArgumentBufferResourceUsage(stage, - _mvkBuffer ? _mvkBuffer->getMTLBuffer() : nil, - getMTLResourceUsage(), - mvkDSLBind->getMTLRenderStages()); + id mtlBuffer = _mvkBuffer ? _mvkBuffer->getMTLBuffer() : nil; + rezEncState->encodeResourceUsage(stage, mtlBuffer, getMTLResourceUsage(), mvkDSLBind->getMTLRenderStages()); } } @@ -876,10 +874,8 @@ void MVKInlineUniformBlockDescriptor::encodeToMetalArgumentBuffer(MVKResourcesCo atIndex: argIdx]; } if (encodeUsage) { - rezEncState->encodeArgumentBufferResourceUsage(stage, - _mvkMTLBufferAllocation ? _mvkMTLBufferAllocation->_mtlBuffer : nil, - getMTLResourceUsage(), - mvkDSLBind->getMTLRenderStages()); + id mtlBuffer = _mvkMTLBufferAllocation ? _mvkMTLBufferAllocation->_mtlBuffer : nil; + rezEncState->encodeResourceUsage(stage, mtlBuffer, getMTLResourceUsage(), mvkDSLBind->getMTLRenderStages()); } } @@ -994,7 +990,7 @@ void MVKImageDescriptor::encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderS [mtlArgEncoder setTexture: mtlTexture atIndex: argIdx]; } if (encodeUsage) { - rezEncState->encodeArgumentBufferResourceUsage(stage, mtlTexture, getMTLResourceUsage(), mvkDSLBind->getMTLRenderStages()); + rezEncState->encodeResourceUsage(stage, mtlTexture, getMTLResourceUsage(), mvkDSLBind->getMTLRenderStages()); } if (descType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) { id mtlTex = mtlTexture.parentTexture ? mtlTexture.parentTexture : mtlTexture; @@ -1005,7 +1001,7 @@ void MVKImageDescriptor::encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderS [mtlArgEncoder setBuffer: mtlBuff offset: mtlTex.bufferOffset atIndex: argIdx]; } if (encodeUsage) { - rezEncState->encodeArgumentBufferResourceUsage(stage, mtlBuff, getMTLResourceUsage(), mvkDSLBind->getMTLRenderStages()); + rezEncState->encodeResourceUsage(stage, mtlBuff, getMTLResourceUsage(), mvkDSLBind->getMTLRenderStages()); } } } @@ -1294,7 +1290,7 @@ void MVKTexelBufferDescriptor::encodeToMetalArgumentBuffer(MVKResourcesCommandEn [mtlArgEncoder setTexture: mtlTexture atIndex: argIdx]; } if (encodeUsage) { - rezEncState->encodeArgumentBufferResourceUsage(stage, mtlTexture, getMTLResourceUsage(), mvkDSLBind->getMTLRenderStages()); + rezEncState->encodeResourceUsage(stage, mtlTexture, getMTLResourceUsage(), mvkDSLBind->getMTLRenderStages()); } if (descType == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) { @@ -1305,7 +1301,7 @@ void MVKTexelBufferDescriptor::encodeToMetalArgumentBuffer(MVKResourcesCommandEn [mtlArgEncoder setBuffer: mtlBuff offset: mtlTexture.bufferOffset atIndex: argIdx]; } if (encodeUsage) { - rezEncState->encodeArgumentBufferResourceUsage(stage, mtlBuff, getMTLResourceUsage(), mvkDSLBind->getMTLRenderStages()); + rezEncState->encodeResourceUsage(stage, mtlBuff, getMTLResourceUsage(), mvkDSLBind->getMTLRenderStages()); } } } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h index 22f8649e..5eddac11 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h @@ -63,6 +63,7 @@ class MVKSamplerYcbcrConversion; class MVKDescriptorSetLayout; class MVKDescriptorPool; class MVKDescriptorUpdateTemplate; +class MVKResourcesCommandEncoderState; class MVKFramebuffer; class MVKRenderPass; class MVKCommandPool; @@ -660,6 +661,22 @@ public: #pragma mark Operations + /** Tell the GPU to be ready to use any of the GPU-addressable buffers. */ + void encodeGPUAddressableBuffers(MVKResourcesCommandEncoderState* rezEncState, + MVKShaderStage stage); + + /** Adds the specified host semaphore to be woken upon device loss. */ + void addSemaphore(MVKSemaphoreImpl* sem4); + + /** Removes the specified host semaphore. */ + void removeSemaphore(MVKSemaphoreImpl* sem4); + + /** Adds the specified timeline semaphore to be woken at the specified value upon device loss. */ + void addTimelineSemaphore(MVKTimelineSemaphore* sem4, uint64_t value); + + /** Removes the specified timeline semaphore. */ + void removeTimelineSemaphore(MVKTimelineSemaphore* sem4, uint64_t value); + /** Applies the specified global memory barrier to all resource issued by this device. */ void applyMemoryBarrier(VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask, @@ -855,19 +872,11 @@ public: } protected: - friend class MVKSemaphoreEmulated; - friend class MVKTimelineSemaphoreMTLEvent; - friend class MVKTimelineSemaphoreEmulated; - friend class MVKFence; - friend class MVKEventEmulated; - void propagateDebugName() override {} - MVKResource* addResource(MVKResource* rez); - MVKResource* removeResource(MVKResource* rez); - void addSemaphore(MVKSemaphoreImpl* sem4); - void removeSemaphore(MVKSemaphoreImpl* sem4); - void addTimelineSemaphore(MVKTimelineSemaphore* sem4, uint64_t value); - void removeTimelineSemaphore(MVKTimelineSemaphore* sem4, uint64_t value); + MVKBuffer* addBuffer(MVKBuffer* mvkBuff); + MVKBuffer* removeBuffer(MVKBuffer* mvkBuff); + MVKImage* addImage(MVKImage* mvkImg); + MVKImage* removeImage(MVKImage* mvkImg); void initPerformanceTracking(); void initPhysicalDevice(MVKPhysicalDevice* physicalDevice, const VkDeviceCreateInfo* pCreateInfo); void initQueues(const VkDeviceCreateInfo* pCreateInfo); @@ -887,6 +896,7 @@ protected: MVKCommandResourceFactory* _commandResourceFactory = nullptr; MVKSmallVector, kMVKQueueFamilyCount> _queuesByQueueFamilyIndex; MVKSmallVector _resources; + MVKSmallVector _gpuAddressableBuffers; MVKSmallVector _privateDataSlots; MVKSmallVector _privateDataSlotsAvailability; MVKSmallVector _awaitingSemaphores; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm index 6f2dbe78..3251449c 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm @@ -3545,15 +3545,14 @@ uint32_t MVKDevice::getVulkanMemoryTypeIndex(MTLStorageMode mtlStorageMode) { MVKBuffer* MVKDevice::createBuffer(const VkBufferCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator) { - return (MVKBuffer*)addResource(new MVKBuffer(this, pCreateInfo)); + return addBuffer(new MVKBuffer(this, pCreateInfo)); } void MVKDevice::destroyBuffer(MVKBuffer* mvkBuff, const VkAllocationCallbacks* pAllocator) { - if (mvkBuff) { - removeResource(mvkBuff); - mvkBuff->destroy(); - } + if ( !mvkBuff ) { return; } + removeBuffer(mvkBuff); + mvkBuff->destroy(); } MVKBufferView* MVKDevice::createBufferView(const VkBufferViewCreateInfo* pCreateInfo, @@ -3582,20 +3581,14 @@ MVKImage* MVKDevice::createImage(const VkImageCreateInfo* pCreateInfo, MVKImage* mvkImg = (swapchainInfo) ? new MVKPeerSwapchainImage(this, pCreateInfo, (MVKSwapchain*)swapchainInfo->swapchain, uint32_t(-1)) : new MVKImage(this, pCreateInfo); - for (auto& memoryBinding : mvkImg->_memoryBindings) { - addResource(memoryBinding); - } - return mvkImg; + return addImage(mvkImg); } void MVKDevice::destroyImage(MVKImage* mvkImg, const VkAllocationCallbacks* pAllocator) { - if (mvkImg) { - for (auto& memoryBinding : mvkImg->_memoryBindings) { - removeResource(memoryBinding); - } - mvkImg->destroy(); - } + if ( !mvkImg ) { return; } + removeImage(mvkImg); + mvkImg->destroy(); } MVKImageView* MVKDevice::createImageView(const VkImageViewCreateInfo* pCreateInfo, @@ -3636,22 +3629,16 @@ MVKPresentableSwapchainImage* MVKDevice::createPresentableSwapchainImage(const V MVKSwapchain* swapchain, uint32_t swapchainIndex, const VkAllocationCallbacks* pAllocator) { - MVKPresentableSwapchainImage* mvkImg = new MVKPresentableSwapchainImage(this, pCreateInfo, - swapchain, swapchainIndex); - for (auto& memoryBinding : mvkImg->_memoryBindings) { - addResource(memoryBinding); - } - return mvkImg; + auto* pImg = new MVKPresentableSwapchainImage(this, pCreateInfo, swapchain, swapchainIndex); + addImage(pImg); + return pImg; } void MVKDevice::destroyPresentableSwapchainImage(MVKPresentableSwapchainImage* mvkImg, const VkAllocationCallbacks* pAllocator) { - if (mvkImg) { - for (auto& memoryBinding : mvkImg->_memoryBindings) { - removeResource(memoryBinding); - } - mvkImg->destroy(); - } + if ( !mvkImg ) { return; } + removeImage(mvkImg); + mvkImg->destroy(); } MVKFence* MVKDevice::createFence(const VkFenceCreateInfo* pCreateInfo, @@ -3987,42 +3974,79 @@ void MVKDevice::destroyPrivateDataSlot(VkPrivateDataSlotEXT privateDataSlot, mvkPDS->destroy(); } - #pragma mark Operations -// Adds the specified resource for tracking, and returns the added resource. -MVKResource* MVKDevice::addResource(MVKResource* rez) { +// If the underlying MTLBuffer is referenced in a shader only via its gpuAddress, +// the GPU might not be aware that the MTLBuffer needs to be made resident. +// Track the buffer as needing to be made resident if a shader is bound that uses +// PhysicalStorageBufferAddresses to access the contents of the underlying MTLBuffer. +MVKBuffer* MVKDevice::addBuffer(MVKBuffer* mvkBuff) { + if ( !mvkBuff ) { return mvkBuff; } + lock_guard lock(_rezLock); - _resources.push_back(rez); - return rez; + _resources.push_back(mvkBuff); + if (mvkIsAnyFlagEnabled(mvkBuff->getUsage(), VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT)) { + _gpuAddressableBuffers.push_back(mvkBuff); + } + return mvkBuff; } -// Removes the specified resource for tracking and returns the removed resource. -MVKResource* MVKDevice::removeResource(MVKResource* rez) { +MVKBuffer* MVKDevice::removeBuffer(MVKBuffer* mvkBuff) { + if ( !mvkBuff ) { return mvkBuff; } + lock_guard lock(_rezLock); - mvkRemoveFirstOccurance(_resources, rez); - return rez; + mvkRemoveFirstOccurance(_resources, mvkBuff); + if (mvkIsAnyFlagEnabled(mvkBuff->getUsage(), VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT)) { + mvkRemoveFirstOccurance(_gpuAddressableBuffers, mvkBuff); + } + return mvkBuff; +} + +void MVKDevice::encodeGPUAddressableBuffers(MVKResourcesCommandEncoderState* rezEncState, MVKShaderStage stage) { + MTLResourceUsage mtlUsage = MTLResourceUsageRead | MTLResourceUsageWrite; + MTLRenderStages mtlRendStage = (stage == kMVKShaderStageFragment) ? MTLRenderStageFragment : MTLRenderStageVertex; + + lock_guard lock(_rezLock); + for (auto& buff : _gpuAddressableBuffers) { + rezEncState->encodeResourceUsage(stage, buff->getMTLBuffer(), mtlUsage, mtlRendStage); + } +} + +MVKImage* MVKDevice::addImage(MVKImage* mvkImg) { + if ( !mvkImg ) { return mvkImg; } + + lock_guard lock(_rezLock); + for (auto& mb : mvkImg->_memoryBindings) { + _resources.push_back(mb); + } + return mvkImg; +} + +MVKImage* MVKDevice::removeImage(MVKImage* mvkImg) { + if ( !mvkImg ) { return mvkImg; } + + lock_guard lock(_rezLock); + for (auto& mb : mvkImg->_memoryBindings) { + mvkRemoveFirstOccurance(_resources, mb); + } + return mvkImg; } -// Adds the specified host semaphore to be woken upon device loss. void MVKDevice::addSemaphore(MVKSemaphoreImpl* sem4) { lock_guard lock(_sem4Lock); _awaitingSemaphores.push_back(sem4); } -// Removes the specified host semaphore. void MVKDevice::removeSemaphore(MVKSemaphoreImpl* sem4) { lock_guard lock(_sem4Lock); mvkRemoveFirstOccurance(_awaitingSemaphores, sem4); } -// Adds the specified timeline semaphore to be woken at the specified value upon device loss. void MVKDevice::addTimelineSemaphore(MVKTimelineSemaphore* sem4, uint64_t value) { lock_guard lock(_sem4Lock); _awaitingTimelineSem4s.emplace_back(sem4, value); } -// Removes the specified timeline semaphore. void MVKDevice::removeTimelineSemaphore(MVKTimelineSemaphore* sem4, uint64_t value) { lock_guard lock(_sem4Lock); mvkRemoveFirstOccurance(_awaitingTimelineSem4s, make_pair(sem4, value)); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.mm index e7e97ea9..76bc37c5 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.mm @@ -160,7 +160,7 @@ VkResult MVKDeviceMemory::addImageMemoryBinding(MVKImageMemoryBinding* mvkImg) { // If a dedicated alloc, ensure this image is the one and only image // I am dedicated to. If my image is aliasable, though, allow other aliasable // images to bind to me. - if (_isDedicated && (_imageMemoryBindings.empty() || !(contains(_imageMemoryBindings, mvkImg) || (_imageMemoryBindings[0]->_image->getIsAliasable() && mvkImg->_image->getIsAliasable()))) ) { + if (_isDedicated && (_imageMemoryBindings.empty() || !(mvkContains(_imageMemoryBindings, mvkImg) || (_imageMemoryBindings[0]->_image->getIsAliasable() && mvkImg->_image->getIsAliasable()))) ) { return reportError(VK_ERROR_OUT_OF_DEVICE_MEMORY, "Could not bind VkImage %p to a VkDeviceMemory dedicated to resource %p. A dedicated allocation may only be used with the resource it was dedicated to.", mvkImg, getDedicatedResource() ); } @@ -180,7 +180,7 @@ bool MVKDeviceMemory::ensureMTLHeap() { if (_mtlHeap) { return true; } - // Can't create a MTLHeap on a imported memory + // Can't create a MTLHeap on imported memory if (_isHostMemImported) { return true; } // Don't bother if we don't have placement heaps. @@ -284,6 +284,7 @@ MVKDeviceMemory::MVKDeviceMemory(MVKDevice* device, const VkMemoryAllocateInfo* pAllocateInfo, const VkAllocationCallbacks* pAllocator) : MVKVulkanAPIDeviceObject(device) { // Set Metal memory parameters + _vkMemAllocFlags = 0; _vkMemPropFlags = _device->_pMemoryProperties->memoryTypes[pAllocateInfo->memoryTypeIndex].propertyFlags; _mtlStorageMode = mvkMTLStorageModeFromVkMemoryPropertyFlags(_vkMemPropFlags); _mtlCPUCacheMode = mvkMTLCPUCacheModeFromVkMemoryPropertyFlags(_vkMemPropFlags); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm index 19822de6..3fa4dce7 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm @@ -63,7 +63,7 @@ id MVKImagePlane::getMTLTexture() { offset: memoryBinding->getDeviceMemoryOffset() + _subresources[0].layout.offset]; if (_image->_isAliasable) { [_mtlTexture makeAliasable]; } } else if (_image->_isAliasable && dvcMem && dvcMem->isDedicatedAllocation() && - !contains(dvcMem->_imageMemoryBindings, memoryBinding)) { + !mvkContains(dvcMem->_imageMemoryBindings, memoryBinding)) { // This is a dedicated allocation, but it belongs to another aliasable image. // In this case, use the MTLTexture from the memory's dedicated image. // We know the other image must be aliasable, or I couldn't have been bound diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h index caffd5c2..8d09b87e 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h @@ -164,6 +164,9 @@ public: mvkIsAnyFlagEnabled(_flags, VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT)); } + /** Returns whether the shader for the stage uses physical storage buffer addresses. */ + virtual bool usesPhysicalStorageBufferAddressesCapability(MVKShaderStage stage) = 0; + /** Constructs an instance for the device. layout, and parent (which may be NULL). */ MVKPipeline(MVKDevice* device, MVKPipelineCache* pipelineCache, MVKPipelineLayout* layout, VkPipelineCreateFlags flags, MVKPipeline* parent); @@ -270,6 +273,8 @@ public: /** Returns whether this pipeline has custom sample positions enabled. */ bool isUsingCustomSamplePositions() { return _isUsingCustomSamplePositions; } + bool usesPhysicalStorageBufferAddressesCapability(MVKShaderStage stage) override; + /** * Returns whether the MTLBuffer vertex shader buffer index is valid for a stage of this pipeline. * It is if it is a descriptor binding within the descriptor binding range, @@ -338,6 +343,8 @@ protected: MVKMTLFunction getMTLFunction(SPIRVToMSLConversionConfiguration& shaderConfig, const VkPipelineShaderStageCreateInfo* pShaderStage, const char* pStageName); + void markIfUsingPhysicalStorageBufferAddressesCapability(SPIRVToMSLConversionResultInfo& resultsInfo, + MVKShaderStage stage); const VkPipelineShaderStageCreateInfo* _pVertexSS = nullptr; const VkPipelineShaderStageCreateInfo* _pTessCtlSS = nullptr; @@ -356,6 +363,7 @@ protected: MVKSmallVector _zeroDivisorVertexBindings; MVKSmallVector _mtlArgumentEncoders; MVKSmallVector _descriptorBindingUse; + MVKSmallVector _stagesUsingPhysicalStorageBufferAddressesCapability; MTLComputePipelineDescriptor* _mtlTessVertexStageDesc = nil; id _mtlTessVertexFunctions[3] = {nil, nil, nil}; @@ -425,6 +433,8 @@ public: /** Returns the array of descriptor binding use for the descriptor set. */ MVKBitArray& getDescriptorBindingUse(uint32_t descSetIndex, MVKShaderStage stage) override { return _descriptorBindingUse[descSetIndex]; } + bool usesPhysicalStorageBufferAddressesCapability(MVKShaderStage stage) override; + /** Constructs an instance for the device and parent (which may be NULL). */ MVKComputePipeline(MVKDevice* device, MVKPipelineCache* pipelineCache, @@ -446,6 +456,7 @@ protected: bool _needsDynamicOffsetBuffer = false; bool _needsDispatchBaseBuffer = false; bool _allowsDispatchBase = false; + bool _usesPhysicalStorageBufferAddressesCapability = false; }; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm index 652699ac..e24e24e8 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm @@ -929,6 +929,7 @@ bool MVKGraphicsPipeline::addVertexShaderToPipeline(MTLRenderPipelineDescriptor* _needsVertexDynamicOffsetBuffer = funcRslts.needsDynamicOffsetBuffer; _needsVertexViewRangeBuffer = funcRslts.needsViewRangeBuffer; _needsVertexOutputBuffer = funcRslts.needsOutputBuffer; + markIfUsingPhysicalStorageBufferAddressesCapability(funcRslts, kMVKShaderStageVertex); addMTLArgumentEncoders(func, pCreateInfo, shaderConfig, kMVKShaderStageVertex); @@ -998,6 +999,7 @@ bool MVKGraphicsPipeline::addVertexShaderToPipeline(MTLComputePipelineDescriptor _needsVertexBufferSizeBuffer = funcRslts.needsBufferSizeBuffer; _needsVertexDynamicOffsetBuffer = funcRslts.needsDynamicOffsetBuffer; _needsVertexOutputBuffer = funcRslts.needsOutputBuffer; + markIfUsingPhysicalStorageBufferAddressesCapability(funcRslts, kMVKShaderStageVertex); } addMTLArgumentEncoders(func, pCreateInfo, shaderConfig, kMVKShaderStageVertex); @@ -1057,6 +1059,7 @@ bool MVKGraphicsPipeline::addTessCtlShaderToPipeline(MTLComputePipelineDescripto _needsTessCtlOutputBuffer = funcRslts.needsOutputBuffer; _needsTessCtlPatchOutputBuffer = funcRslts.needsPatchOutputBuffer; _needsTessCtlInputBuffer = funcRslts.needsInputThreadgroupMem; + markIfUsingPhysicalStorageBufferAddressesCapability(funcRslts, kMVKShaderStageTessCtl); addMTLArgumentEncoders(func, pCreateInfo, shaderConfig, kMVKShaderStageTessCtl); @@ -1113,6 +1116,7 @@ bool MVKGraphicsPipeline::addTessEvalShaderToPipeline(MTLRenderPipelineDescripto _needsTessEvalSwizzleBuffer = funcRslts.needsSwizzleBuffer; _needsTessEvalBufferSizeBuffer = funcRslts.needsBufferSizeBuffer; _needsTessEvalDynamicOffsetBuffer = funcRslts.needsDynamicOffsetBuffer; + markIfUsingPhysicalStorageBufferAddressesCapability(funcRslts, kMVKShaderStageTessEval); addMTLArgumentEncoders(func, pCreateInfo, shaderConfig, kMVKShaderStageTessEval); @@ -1170,6 +1174,7 @@ bool MVKGraphicsPipeline::addFragmentShaderToPipeline(MTLRenderPipelineDescripto _needsFragmentBufferSizeBuffer = funcRslts.needsBufferSizeBuffer; _needsFragmentDynamicOffsetBuffer = funcRslts.needsDynamicOffsetBuffer; _needsFragmentViewRangeBuffer = funcRslts.needsViewRangeBuffer; + markIfUsingPhysicalStorageBufferAddressesCapability(funcRslts, kMVKShaderStageFragment); addMTLArgumentEncoders(func, pCreateInfo, shaderConfig, kMVKShaderStageFragment); @@ -1804,6 +1809,17 @@ MVKMTLFunction MVKGraphicsPipeline::getMTLFunction(SPIRVToMSLConversionConfigura return func; } +void MVKGraphicsPipeline::markIfUsingPhysicalStorageBufferAddressesCapability(SPIRVToMSLConversionResultInfo& resultsInfo, + MVKShaderStage stage) { + if (resultsInfo.usesPhysicalStorageBufferAddressesCapability) { + _stagesUsingPhysicalStorageBufferAddressesCapability.push_back(stage); + } +} + +bool MVKGraphicsPipeline::usesPhysicalStorageBufferAddressesCapability(MVKShaderStage stage) { + return mvkContains(_stagesUsingPhysicalStorageBufferAddressesCapability, stage); +} + MVKGraphicsPipeline::~MVKGraphicsPipeline() { @synchronized (getMTLDevice()) { [_mtlTessVertexStageDesc release]; @@ -1952,6 +1968,7 @@ MVKMTLFunction MVKComputePipeline::getMTLFunction(const VkComputePipelineCreateI _needsBufferSizeBuffer = funcRslts.needsBufferSizeBuffer; _needsDynamicOffsetBuffer = funcRslts.needsDynamicOffsetBuffer; _needsDispatchBaseBuffer = funcRslts.needsDispatchBaseBuffer; + _usesPhysicalStorageBufferAddressesCapability = funcRslts.usesPhysicalStorageBufferAddressesCapability; addMTLArgumentEncoders(func, pCreateInfo, shaderConfig, kMVKShaderStageCompute); @@ -1962,6 +1979,10 @@ uint32_t MVKComputePipeline::getImplicitBufferIndex(uint32_t bufferIndexOffset) return _device->_pMetalFeatures->maxPerStageBufferCount - (bufferIndexOffset + 1); } +bool MVKComputePipeline::usesPhysicalStorageBufferAddressesCapability(MVKShaderStage stage) { + return _usesPhysicalStorageBufferAddressesCapability; +} + MVKComputePipeline::~MVKComputePipeline() { @synchronized (getMTLDevice()) { [_mtlPipelineState release]; @@ -2428,7 +2449,8 @@ namespace mvk { scr.needsDynamicOffsetBuffer, scr.needsInputThreadgroupMem, scr.needsDispatchBaseBuffer, - scr.needsViewRangeBuffer); + scr.needsViewRangeBuffer, + scr.usesPhysicalStorageBufferAddressesCapability); } } diff --git a/MoltenVK/MoltenVK/Utility/MVKFoundation.h b/MoltenVK/MoltenVK/Utility/MVKFoundation.h index 58b85d15..a0c96742 100644 --- a/MoltenVK/MoltenVK/Utility/MVKFoundation.h +++ b/MoltenVK/MoltenVK/Utility/MVKFoundation.h @@ -484,7 +484,7 @@ void mvkReleaseContainerContents(C& container) { /** Returns whether the container contains an item equal to the value. */ template -bool contains(C& container, const T& val) { +bool mvkContains(C& container, const T& val) { for (const T& cVal : container) { if (cVal == val) { return true; } } return false; } diff --git a/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.cpp b/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.cpp index f1672e2b..b95b704d 100644 --- a/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.cpp +++ b/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.cpp @@ -366,6 +366,7 @@ MVK_PUBLIC_SYMBOL bool SPIRVToMSLConverter::convert(SPIRVToMSLConversionConfigur conversionResult.resultInfo.needsInputThreadgroupMem = pMSLCompiler && pMSLCompiler->needs_input_threadgroup_mem(); conversionResult.resultInfo.needsDispatchBaseBuffer = pMSLCompiler && pMSLCompiler->needs_dispatch_base_buffer(); conversionResult.resultInfo.needsViewRangeBuffer = pMSLCompiler && pMSLCompiler->needs_view_mask_buffer(); + conversionResult.resultInfo.usesPhysicalStorageBufferAddressesCapability = usesPhysicalStorageBufferAddressesCapability(pMSLCompiler); // When using Metal argument buffers, if the shader is provided with dynamic buffer offsets, // then it needs a buffer to hold these dynamic offsets. @@ -533,3 +534,15 @@ void SPIRVToMSLConverter::populateEntryPoint(Compiler* pCompiler, populateWorkgroupDimension(wgSize.height, spvEP.workgroup_size.y, heightSC); populateWorkgroupDimension(wgSize.depth, spvEP.workgroup_size.z, depthSC); } + +bool SPIRVToMSLConverter::usesPhysicalStorageBufferAddressesCapability(Compiler* pCompiler) { + if (pCompiler) { + auto& declaredCapabilities = pCompiler->get_declared_capabilities(); + for(auto dc: declaredCapabilities) { + if (dc == CapabilityPhysicalStorageBufferAddresses) { + return true; + } + } + } + return false; +} diff --git a/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.h b/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.h index 765e110f..1789ee93 100644 --- a/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.h +++ b/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.h @@ -244,6 +244,7 @@ namespace mvk { bool needsInputThreadgroupMem = false; bool needsDispatchBaseBuffer = false; bool needsViewRangeBuffer = false; + bool usesPhysicalStorageBufferAddressesCapability = false; } SPIRVToMSLConversionResultInfo; @@ -300,6 +301,7 @@ namespace mvk { void writeSPIRVToFile(std::string spvFilepath, std::string& log); void populateWorkgroupDimension(SPIRVWorkgroupSizeDimension& wgDim, uint32_t size, SPIRV_CROSS_NAMESPACE::SpecializationConstant& spvSpecConst); void populateEntryPoint(SPIRV_CROSS_NAMESPACE::Compiler* pCompiler, SPIRVToMSLConversionOptions& options, SPIRVEntryPoint& entryPoint); + bool usesPhysicalStorageBufferAddressesCapability(SPIRV_CROSS_NAMESPACE::Compiler* pCompiler); std::vector _spirv; };