diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md index 30b85f2f..c8e4524a 100644 --- a/Docs/Whats_New.md +++ b/Docs/Whats_New.md @@ -19,6 +19,7 @@ MoltenVK 1.2.8 Released TBD - Fix potential crash when using multi-planar images. +- Ensure buffers available for buffer addresses in push constants. diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdDispatch.mm b/MoltenVK/MoltenVK/Commands/MVKCmdDispatch.mm index 3e2d6d7f..a0eb0379 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdDispatch.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCmdDispatch.mm @@ -46,7 +46,7 @@ void MVKCmdDispatch::encode(MVKCommandEncoder* cmdEncoder) { MTLRegion mtlThreadgroupCount = MTLRegionMake3D(_baseGroupX, _baseGroupY, _baseGroupZ, _groupCountX, _groupCountY, _groupCountZ); cmdEncoder->finalizeDispatchState(); // Ensure all updated state has been submitted to Metal id mtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseDispatch); - auto* pipeline = cmdEncoder->_computePipelineState.getComputePipeline(); + auto* pipeline = cmdEncoder->getComputePipeline(); if (pipeline->allowsDispatchBase()) { if ([mtlEncoder respondsToSelector: @selector(setStageInRegion:)]) { // We'll use the stage-input region to pass the base along to the shader. diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm b/MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm index 9bc2e6c1..714bddbe 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm @@ -148,7 +148,7 @@ void MVKCmdDraw::encode(MVKCommandEncoder* cmdEncoder) { cmdEncoder->restartMetalRenderPassIfNeeded(); - auto* pipeline = cmdEncoder->_graphicsPipelineState.getGraphicsPipeline(); + auto* pipeline = cmdEncoder->getGraphicsPipeline(); // Metal doesn't support triangle fans, so encode it as triangles via an indexed indirect triangles command instead. if (pipeline->getVkPrimitiveTopology() == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN) { @@ -372,7 +372,7 @@ void MVKCmdDrawIndexed::encode(MVKCommandEncoder* cmdEncoder) { cmdEncoder->restartMetalRenderPassIfNeeded(); - auto* pipeline = cmdEncoder->_graphicsPipelineState.getGraphicsPipeline(); + auto* pipeline = cmdEncoder->getGraphicsPipeline(); // Metal doesn't support triangle fans, so encode it as triangles via an indexed indirect triangles command instead. if (pipeline->getVkPrimitiveTopology() == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN) { @@ -649,7 +649,7 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) { cmdEncoder->restartMetalRenderPassIfNeeded(); - auto* pipeline = cmdEncoder->_graphicsPipelineState.getGraphicsPipeline(); + auto* pipeline = cmdEncoder->getGraphicsPipeline(); // Metal doesn't support triangle fans, so encode it as indexed indirect triangles instead. if (pipeline->getVkPrimitiveTopology() == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN) { @@ -1000,7 +1000,7 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder, const MVKI MVKIndexMTLBufferBinding ibb = ibbOrig; MVKIndexMTLBufferBinding ibbTriFan = ibb; - auto* pipeline = cmdEncoder->_graphicsPipelineState.getGraphicsPipeline(); + auto* pipeline = cmdEncoder->getGraphicsPipeline(); MVKVertexAdjustments vtxAdjmts; vtxAdjmts.mtlIndexType = ibb.mtlIndexType; diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h index a618d0b8..6bcea53f 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h +++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h @@ -357,6 +357,12 @@ public: */ id getMTLEncoder(); + /** Returns the graphics pipeline. */ + MVKGraphicsPipeline* getGraphicsPipeline() { return (MVKGraphicsPipeline*)_graphicsPipelineState.getPipeline(); } + + /** Returns the compute pipeline. */ + MVKComputePipeline* getComputePipeline() { return (MVKComputePipeline*)_computePipelineState.getPipeline(); } + /** Returns the push constants associated with the specified shader stage. */ MVKPushConstantsCommandEncoderState* getPushConstants(VkShaderStageFlagBits shaderStage); @@ -448,6 +454,9 @@ public: /** Tracks the current compute resources state of the encoder. */ MVKComputeResourcesCommandEncoderState _computeResourcesState; + /** Tracks whether the GPU-addressable buffers need to be used. */ + MVKGPUAddressableBuffersCommandEncoderState _gpuAddressableBuffersState; + /** Tracks the current depth stencil state of the encoder. */ MVKDepthStencilCommandEncoderState _depthStencilState; diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm index 9ae225ff..031830aa 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm @@ -709,14 +709,15 @@ void MVKCommandEncoder::finalizeDrawState(MVKGraphicsStage stage) { // Must happen before switching encoders. encodeStoreActions(true); } - _graphicsPipelineState.encode(stage); // Must do first..it sets others - _graphicsResourcesState.encode(stage); // Before push constants, to allow them to override. + _graphicsPipelineState.encode(stage); // Must do first..it sets others _depthStencilState.encode(stage); - _renderingState.encode(stage); + _graphicsResourcesState.encode(stage); // Before push constants, to allow them to override. _vertexPushConstants.encode(stage); _tessCtlPushConstants.encode(stage); _tessEvalPushConstants.encode(stage); _fragmentPushConstants.encode(stage); + _gpuAddressableBuffersState.encode(stage); // After resources and push constants + _renderingState.encode(stage); _occlusionQueryState.encode(stage); } @@ -771,9 +772,10 @@ void MVKCommandEncoder::beginMetalComputeEncoding(MVKCommandUse cmdUse) { } void MVKCommandEncoder::finalizeDispatchState() { - _computePipelineState.encode(); // Must do first..it sets others - _computeResourcesState.encode(); // Before push constants, to allow them to override. + _computePipelineState.encode(); // Must do first..it sets others + _computeResourcesState.encode(); // Before push constants, to allow them to override. _computePushConstants.encode(); + _gpuAddressableBuffersState.encode(); // After resources and push constants } void MVKCommandEncoder::endRendering() { @@ -1142,6 +1144,7 @@ MVKCommandEncoder::MVKCommandEncoder(MVKCommandBuffer* cmdBuffer, _graphicsResourcesState(this), _computePipelineState(this), _computeResourcesState(this), + _gpuAddressableBuffersState(this), _depthStencilState(this), _renderingState(this), _occlusionQueryState(this), diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h index d5e5aefe..4ab4eb98 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h +++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h @@ -129,8 +129,6 @@ public: void bindPipeline(MVKPipeline* pipeline); MVKPipeline* getPipeline(); - MVKGraphicsPipeline* getGraphicsPipeline() { return (MVKGraphicsPipeline*)getPipeline(); } - MVKComputePipeline* getComputePipeline() { return (MVKComputePipeline*)getPipeline(); } MVKPipelineCommandEncoderState(MVKCommandEncoder* cmdEncoder) : MVKCommandEncoderState(cmdEncoder) {} @@ -641,6 +639,26 @@ protected: }; +#pragma mark - +#pragma mark MVKGPUAddressableBuffersCommandEncoderState + +/** Tracks whether the GPU-addressable buffers need to be used. */ +class MVKGPUAddressableBuffersCommandEncoderState : public MVKCommandEncoderState { + +public: + + /** Marks that GPU addressable buffers may be needed in the specified shader stage. */ + void useGPUAddressableBuffersInStage(MVKShaderStage shaderStage); + + MVKGPUAddressableBuffersCommandEncoderState(MVKCommandEncoder* cmdEncoder) : MVKCommandEncoderState(cmdEncoder) {} + +protected: + void encodeImpl(uint32_t stage) override; + + bool _usageStages[kMVKShaderStageCount] = {}; +}; + + #pragma mark - #pragma mark MVKOcclusionQueryCommandEncoderState diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm index fa4d4b8c..d7fa40c0 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm @@ -36,7 +36,7 @@ MVKVulkanAPIObject* MVKCommandEncoderState::getVulkanAPIObject() { return _cmdEn MVKDevice* MVKCommandEncoderState::getDevice() { return _cmdEncoder->getDevice(); } bool MVKCommandEncoderState::isDynamicState(MVKRenderStateType state) { - auto* gpl = _cmdEncoder->_graphicsPipelineState.getGraphicsPipeline(); + auto* gpl = _cmdEncoder->getGraphicsPipeline(); return !gpl || gpl->isDynamicState(state); } @@ -100,12 +100,14 @@ void MVKPushConstantsCommandEncoderState::encodeImpl(uint32_t stage) { _pushConstants.data(), _pushConstants.size(), _mtlBufferIndex, true); + _cmdEncoder->_gpuAddressableBuffersState.useGPUAddressableBuffersInStage(kMVKShaderStageVertex); _isDirty = false; // Okay, I changed the encoder } else if (!isTessellating() && stage == kMVKGraphicsStageRasterization) { _cmdEncoder->setVertexBytes(_cmdEncoder->_mtlRenderEncoder, _pushConstants.data(), _pushConstants.size(), _mtlBufferIndex, true); + _cmdEncoder->_gpuAddressableBuffersState.useGPUAddressableBuffersInStage(kMVKShaderStageVertex); _isDirty = false; // Okay, I changed the encoder } break; @@ -115,6 +117,7 @@ void MVKPushConstantsCommandEncoderState::encodeImpl(uint32_t stage) { _pushConstants.data(), _pushConstants.size(), _mtlBufferIndex, true); + _cmdEncoder->_gpuAddressableBuffersState.useGPUAddressableBuffersInStage(kMVKShaderStageTessCtl); _isDirty = false; // Okay, I changed the encoder } break; @@ -124,6 +127,7 @@ void MVKPushConstantsCommandEncoderState::encodeImpl(uint32_t stage) { _pushConstants.data(), _pushConstants.size(), _mtlBufferIndex, true); + _cmdEncoder->_gpuAddressableBuffersState.useGPUAddressableBuffersInStage(kMVKShaderStageTessEval); _isDirty = false; // Okay, I changed the encoder } break; @@ -133,6 +137,7 @@ void MVKPushConstantsCommandEncoderState::encodeImpl(uint32_t stage) { _pushConstants.data(), _pushConstants.size(), _mtlBufferIndex, true); + _cmdEncoder->_gpuAddressableBuffersState.useGPUAddressableBuffersInStage(kMVKShaderStageFragment); _isDirty = false; // Okay, I changed the encoder } break; @@ -141,6 +146,7 @@ void MVKPushConstantsCommandEncoderState::encodeImpl(uint32_t stage) { _pushConstants.data(), _pushConstants.size(), _mtlBufferIndex, true); + _cmdEncoder->_gpuAddressableBuffersState.useGPUAddressableBuffersInStage(kMVKShaderStageCompute); _isDirty = false; // Okay, I changed the encoder break; default: @@ -150,7 +156,7 @@ void MVKPushConstantsCommandEncoderState::encodeImpl(uint32_t stage) { } bool MVKPushConstantsCommandEncoderState::isTessellating() { - auto* gp = _cmdEncoder->_graphicsPipelineState.getGraphicsPipeline(); + auto* gp = _cmdEncoder->getGraphicsPipeline(); return gp ? gp->isTessellationPipeline() : false; } @@ -835,11 +841,6 @@ void MVKGraphicsResourcesCommandEncoderState::encodeBindings(MVKShaderStage stag encodeMetalArgumentBuffer(stage); - MVKPipeline* pipeline = getPipeline(); - if (pipeline && pipeline->usesPhysicalStorageBufferAddressesCapability(stage)) { - getDevice()->encodeGPUAddressableBuffers(this, stage); - } - auto& shaderStage = _shaderStageResourceBindings[stage]; if (shaderStage.swizzleBufferBinding.isDirty) { @@ -873,9 +874,15 @@ void MVKGraphicsResourcesCommandEncoderState::encodeBindings(MVKShaderStage stag bindImplicitBuffer(_cmdEncoder, shaderStage.viewRangeBufferBinding, viewRange.contents()); } + bool wereBufferBindingsDirty = shaderStage.areBufferBindingsDirty; encodeBinding(shaderStage.bufferBindings, shaderStage.areBufferBindingsDirty, bindBuffer); encodeBinding(shaderStage.textureBindings, shaderStage.areTextureBindingsDirty, bindTexture); encodeBinding(shaderStage.samplerStateBindings, shaderStage.areSamplerStateBindingsDirty, bindSampler); + + // If any buffers have been bound, mark the GPU addressable buffers as needed. + if (wereBufferBindingsDirty && !shaderStage.areBufferBindingsDirty ) { + _cmdEncoder->_gpuAddressableBuffersState.useGPUAddressableBuffersInStage(MVKShaderStage(stage)); + } } void MVKGraphicsResourcesCommandEncoderState::offsetZeroDivisorVertexBuffers(MVKGraphicsStage stage, @@ -923,7 +930,7 @@ static const NSUInteger MTLAttributeStrideStatic = NSUIntegerMax; void MVKGraphicsResourcesCommandEncoderState::encodeImpl(uint32_t stage) { - auto* pipeline = _cmdEncoder->_graphicsPipelineState.getGraphicsPipeline(); + auto* pipeline = _cmdEncoder->getGraphicsPipeline(); bool fullImageViewSwizzle = pipeline->fullImageViewSwizzle() || getDevice()->_pMetalFeatures->nativeTextureSwizzle; bool forTessellation = pipeline->isTessellationPipeline(); bool isDynamicVertexStride = pipeline->isDynamicState(VertexStride); @@ -1181,11 +1188,6 @@ void MVKComputeResourcesCommandEncoderState::encodeImpl(uint32_t) { encodeMetalArgumentBuffer(kMVKShaderStageCompute); - MVKPipeline* pipeline = getPipeline(); - if (pipeline && pipeline->usesPhysicalStorageBufferAddressesCapability(kMVKShaderStageCompute)) { - getDevice()->encodeGPUAddressableBuffers(this, kMVKShaderStageCompute); - } - if (_resourceBindings.swizzleBufferBinding.isDirty) { for (auto& b : _resourceBindings.textureBindings) { if (b.isDirty) { updateImplicitBuffer(_resourceBindings.swizzleConstants, b.index, b.swizzle); } @@ -1197,6 +1199,7 @@ void MVKComputeResourcesCommandEncoderState::encodeImpl(uint32_t) { _resourceBindings.swizzleBufferBinding.index); } else { + MVKPipeline* pipeline = getPipeline(); bool fullImageViewSwizzle = pipeline ? pipeline->fullImageViewSwizzle() : false; assertMissingSwizzles(_resourceBindings.needsSwizzle && !fullImageViewSwizzle, "compute", _resourceBindings.textureBindings.contents()); } @@ -1221,6 +1224,7 @@ void MVKComputeResourcesCommandEncoderState::encodeImpl(uint32_t) { } + bool wereBufferBindingsDirty = _resourceBindings.areBufferBindingsDirty; encodeBinding(_resourceBindings.bufferBindings, _resourceBindings.areBufferBindingsDirty, [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b)->void { if (b.isInline) { @@ -1251,6 +1255,11 @@ void MVKComputeResourcesCommandEncoderState::encodeImpl(uint32_t) { [cmdEncoder->getMTLComputeEncoder(kMVKCommandUseDispatch) setSamplerState: b.mtlSamplerState atIndex: b.index]; }); + + // If any buffers have been bound, mark the GPU addressable buffers as needed. + if (wereBufferBindingsDirty && !_resourceBindings.areBufferBindingsDirty ) { + _cmdEncoder->_gpuAddressableBuffersState.useGPUAddressableBuffersInStage(kMVKShaderStageCompute); + } } MVKPipeline* MVKComputeResourcesCommandEncoderState::getPipeline() { @@ -1280,6 +1289,34 @@ void MVKComputeResourcesCommandEncoderState::markOverriddenBufferIndexesDirty() } +#pragma mark - +#pragma mark MVKGPUAddressableBuffersCommandEncoderState + +void MVKGPUAddressableBuffersCommandEncoderState::useGPUAddressableBuffersInStage(MVKShaderStage shaderStage) { + MVKPipeline* pipeline = (shaderStage == kMVKShaderStageCompute + ? (MVKPipeline*)_cmdEncoder->getComputePipeline() + : (MVKPipeline*)_cmdEncoder->getGraphicsPipeline()); + if (pipeline && pipeline->usesPhysicalStorageBufferAddressesCapability(shaderStage)) { + _usageStages[shaderStage] = true; + markDirty(); + } +} + +void MVKGPUAddressableBuffersCommandEncoderState::encodeImpl(uint32_t stage) { + auto* mvkDev = getDevice(); + for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageCount; i++) { + MVKShaderStage shaderStage = MVKShaderStage(i); + if (_usageStages[shaderStage]) { + MVKResourcesCommandEncoderState* rezEncState = (shaderStage == kMVKShaderStageCompute + ? (MVKResourcesCommandEncoderState*)&_cmdEncoder->_computeResourcesState + : (MVKResourcesCommandEncoderState*)&_cmdEncoder->_graphicsResourcesState); + mvkDev->encodeGPUAddressableBuffers(rezEncState, shaderStage); + } + } + mvkClear(_usageStages); +} + + #pragma mark - #pragma mark MVKOcclusionQueryCommandEncoderState diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h index 275d0f55..6747bfa4 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h @@ -908,8 +908,8 @@ protected: MVKPhysicalDevice* _physicalDevice = nullptr; MVKCommandResourceFactory* _commandResourceFactory = nullptr; MVKSmallVector, kMVKQueueFamilyCount> _queuesByQueueFamilyIndex; - MVKSmallVector _resources; - MVKSmallVector _gpuAddressableBuffers; + MVKSmallVector _resources; + MVKSmallVector _gpuAddressableBuffers; MVKSmallVector _privateDataSlots; MVKSmallVector _privateDataSlotsAvailability; MVKSmallVector _awaitingSemaphores; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm index 60ba4540..9bcac0d8 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm @@ -153,7 +153,7 @@ void MVKPhysicalDevice::getFeatures(VkPhysicalDeviceFeatures2* features) { .separateDepthStencilLayouts = true, .hostQueryReset = true, .timelineSemaphore = true, - .bufferDeviceAddress = mvkOSVersionIsAtLeast(12.05, 16.0, 1.0), + .bufferDeviceAddress = mvkOSVersionIsAtLeast(13.0, 16.0, 1.0), .bufferDeviceAddressCaptureReplay = false, .bufferDeviceAddressMultiDevice = false, .vulkanMemoryModel = false,