diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md index 8f9530a1..b49e17b3 100644 --- a/Docs/Whats_New.md +++ b/Docs/Whats_New.md @@ -29,6 +29,7 @@ Released TBD - Fix incorrect translation of clear color values on Apple Silicon. - Fix swizzle of depth and stencil values into RGBA (`float4`) variable in shaders. - Fix pipeline barriers not working inside self-dependent subpasses on Apple GPUs. +- Fix GPU race condition when clearing a renderpass input attachment on Apple GPUs. - Disable `VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT` for `VK_FORMAT_E5B9G9R9_UFLOAT_PACK32` on macOS Apple Silicon. - Support alpha-to-coverage without a color attachment. diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm b/MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm index a8888c93..baf9ce64 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm @@ -213,7 +213,7 @@ void MVKCmdDraw::encode(MVKCommandEncoder* cmdEncoder) { } // Running this stage prematurely ended the render pass, so we have to start it up again. // TODO: On iOS, maybe we could use a tile shader to avoid this. - cmdEncoder->beginMetalRenderPass(true); + cmdEncoder->beginMetalRenderPass(kMVKCommandUseRestartSubpass); break; } case kMVKGraphicsStageRasterization: @@ -416,7 +416,7 @@ void MVKCmdDrawIndexed::encode(MVKCommandEncoder* cmdEncoder) { } // Running this stage prematurely ended the render pass, so we have to start it up again. // TODO: On iOS, maybe we could use a tile shader to avoid this. - cmdEncoder->beginMetalRenderPass(true); + cmdEncoder->beginMetalRenderPass(kMVKCommandUseRestartSubpass); break; } case kMVKGraphicsStageRasterization: @@ -686,7 +686,7 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) { threadsPerThreadgroup: MTLSizeMake(mtlConvertState.threadExecutionWidth, 1, 1)]; } // Switch back to rendering now, since we don't have compute stages to run anyway. - cmdEncoder->beginMetalRenderPass(true); + cmdEncoder->beginMetalRenderPass(kMVKCommandUseRestartSubpass); } cmdEncoder->finalizeDrawState(stage); // Ensure all updated state has been submitted to Metal @@ -749,7 +749,7 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) { mtlIndBuffOfst += sizeof(MTLDispatchThreadgroupsIndirectArguments); // Running this stage prematurely ended the render pass, so we have to start it up again. // TODO: On iOS, maybe we could use a tile shader to avoid this. - cmdEncoder->beginMetalRenderPass(true); + cmdEncoder->beginMetalRenderPass(kMVKCommandUseRestartSubpass); break; case kMVKGraphicsStageRasterization: if (pipeline->isTessellationPipeline()) { @@ -1019,7 +1019,7 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder) { threadsPerThreadgroup: MTLSizeMake(mtlConvertState.threadExecutionWidth, 1, 1)]; } // Switch back to rendering now, since we don't have compute stages to run anyway. - cmdEncoder->beginMetalRenderPass(true); + cmdEncoder->beginMetalRenderPass(kMVKCommandUseRestartSubpass); } cmdEncoder->finalizeDrawState(stage); // Ensure all updated state has been submitted to Metal @@ -1084,7 +1084,7 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder) { mtlTempIndBuffOfst += sizeof(MTLDispatchThreadgroupsIndirectArguments); // Running this stage prematurely ended the render pass, so we have to start it up again. // TODO: On iOS, maybe we could use a tile shader to avoid this. - cmdEncoder->beginMetalRenderPass(true); + cmdEncoder->beginMetalRenderPass(kMVKCommandUseRestartSubpass); break; case kMVKGraphicsStageRasterization: if (pipeline->isTessellationPipeline()) { diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm b/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm index a927d008..3a82e28d 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm @@ -130,7 +130,7 @@ void MVKCmdPipelineBarrier::encode(MVKCommandEncoder* cmdEncoder) { } if (needsRenderpassRestart) { cmdEncoder->encodeStoreActions(true); - cmdEncoder->beginMetalRenderPass(true); + cmdEncoder->beginMetalRenderPass(kMVKCommandUseRestartSubpass); } } #endif diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm b/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm index 7cf349de..a4d81da8 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm @@ -1310,6 +1310,23 @@ void MVKCmdClearAttachments::encode(MVKCommandEncoder* cmdEncoder) { [mtlRendEnc drawPrimitives: MTLPrimitiveTypeTriangle vertexStart: 0 vertexCount: vtxCnt]; [mtlRendEnc popDebugGroup]; +#if MVK_APPLE_SILICON + // Apple GPUs do not support rendering/writing to an attachment and then reading from + // that attachment within a single Metal renderpass. So, if any of the attachments just + // cleared is an input attachment, we need to restart into separate Metal renderpasses. + bool needsRenderpassRestart = false; + for (uint32_t caIdx = 0; caIdx < caCnt; caIdx++) { + if (_rpsKey.isAttachmentEnabled(caIdx) && subpass->isColorAttachmentAlsoInputAttachment(caIdx)) { + needsRenderpassRestart = true; + break; + } + } + if (needsRenderpassRestart) { + cmdEncoder->encodeStoreActions(true); + cmdEncoder->beginMetalRenderPass(kMVKCommandUseRestartSubpass); + } +#endif + // Return to the previous rendering state on the next render activity cmdEncoder->_graphicsPipelineState.markDirty(); cmdEncoder->_depthStencilState.markDirty(); diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h index 7bf8ebfb..32aef4e0 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h +++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h @@ -296,7 +296,7 @@ public: void beginNextMultiviewPass(); /** Begins a Metal render pass for the current render subpass. */ - void beginMetalRenderPass(bool loadOverride = false); + void beginMetalRenderPass(MVKCommandUse cmdUse); /** If a render encoder is active, encodes store actions for all attachments to it. */ void encodeStoreActions(bool storeOverride = false); @@ -498,7 +498,7 @@ protected: void finishQueries(); void setSubpass(MVKCommand* passCmd, VkSubpassContents subpassContents, uint32_t subpassIndex); void clearRenderArea(); - NSString* getMTLRenderCommandEncoderName(); + NSString* getMTLRenderCommandEncoderName(MVKCommandUse cmdUse); void encodeGPUCounterSample(MVKGPUCounterQueryPool* mvkQryPool, uint32_t sampleIndex, MVKCounterSamplingFlags samplingPoints); void encodeTimestampStageCounterSamples(); diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm index 411e17cb..c29db0a5 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm @@ -326,22 +326,23 @@ void MVKCommandEncoder::setSubpass(MVKCommand* subpassCmd, (_device->_pMetalFeatures->multisampleLayeredRendering || (getSubpass()->getSampleCount() == VK_SAMPLE_COUNT_1_BIT))); - beginMetalRenderPass(); + beginMetalRenderPass(_renderSubpassIndex == 0 ? kMVKCommandUseBeginRenderPass : kMVKCommandUseNextSubpass); } void MVKCommandEncoder::beginNextMultiviewPass() { encodeStoreActions(); _multiviewPassIndex++; - beginMetalRenderPass(); + beginMetalRenderPass(kMVKCommandUseNextSubpass); } uint32_t MVKCommandEncoder::getMultiviewPassIndex() { return _multiviewPassIndex; } // Creates _mtlRenderEncoder and marks cached render state as dirty so it will be set into the _mtlRenderEncoder. -void MVKCommandEncoder::beginMetalRenderPass(bool loadOverride) { +void MVKCommandEncoder::beginMetalRenderPass(MVKCommandUse cmdUse) { endCurrentMetalEncoding(); + bool isRestart = cmdUse == kMVKCommandUseRestartSubpass; MTLRenderPassDescriptor* mtlRPDesc = [MTLRenderPassDescriptor renderPassDescriptor]; getSubpass()->populateMTLRenderPassDescriptor(mtlRPDesc, _multiviewPassIndex, @@ -350,7 +351,7 @@ void MVKCommandEncoder::beginMetalRenderPass(bool loadOverride) { _attachments.contents(), _clearValues.contents(), _isRenderingEntireAttachment, - loadOverride); + isRestart); if (_cmdBuffer->_needsVisibilityResultMTLBuffer) { if ( !_pEncodingContext->visibilityResultBuffer ) { _pEncodingContext->visibilityResultBuffer = getTempMTLBuffer(_pDeviceMetalFeatures->maxQueryBufferSize, true, true); @@ -389,9 +390,12 @@ void MVKCommandEncoder::beginMetalRenderPass(bool loadOverride) { } _mtlRenderEncoder = [_mtlCmdBuffer renderCommandEncoderWithDescriptor: mtlRPDesc]; // not retained - setLabelIfNotNil(_mtlRenderEncoder, getMTLRenderCommandEncoderName()); + setLabelIfNotNil(_mtlRenderEncoder, getMTLRenderCommandEncoderName(cmdUse)); - if ( !_isRenderingEntireAttachment ) { clearRenderArea(); } + // We shouldn't clear the render area if we are restarting the Metal renderpass + // separately from a Vulkan subpass, and we otherwise only need to clear render + // area if we're not rendering to the entire attachment. + if ( !isRestart && !_isRenderingEntireAttachment ) { clearRenderArea(); } _graphicsPipelineState.beginMetalRenderPass(); _graphicsResourcesState.beginMetalRenderPass(); @@ -418,7 +422,7 @@ void MVKCommandEncoder::encodeStoreActions(bool storeOverride) { MVKRenderSubpass* MVKCommandEncoder::getSubpass() { return _renderPass->getSubpass(_renderSubpassIndex); } // Returns a name for use as a MTLRenderCommandEncoder label -NSString* MVKCommandEncoder::getMTLRenderCommandEncoderName() { +NSString* MVKCommandEncoder::getMTLRenderCommandEncoderName(MVKCommandUse cmdUse) { NSString* rpName; rpName = _renderPass->getDebugName(); @@ -427,7 +431,6 @@ NSString* MVKCommandEncoder::getMTLRenderCommandEncoderName() { rpName = _cmdBuffer->getDebugName(); if (rpName) { return rpName; } - MVKCommandUse cmdUse = (_renderSubpassIndex == 0) ? kMVKCommandUseBeginRenderPass : kMVKCommandUseNextSubpass; return mvkMTLRenderCommandEncoderLabel(cmdUse); } @@ -901,6 +904,7 @@ NSString* mvkMTLRenderCommandEncoderLabel(MVKCommandUse cmdUse) { switch (cmdUse) { case kMVKCommandUseBeginRenderPass: return @"vkCmdBeginRenderPass RenderEncoder"; case kMVKCommandUseNextSubpass: return @"vkCmdNextSubpass RenderEncoder"; + case kMVKCommandUseRestartSubpass: return @"Metal renderpass restart RenderEncoder"; case kMVKCommandUseBlitImage: return @"vkCmdBlitImage RenderEncoder"; case kMVKCommandUseResolveImage: return @"vkCmdResolveImage (resolve stage) RenderEncoder"; case kMVKCommandUseResolveExpandImage: return @"vkCmdResolveImage (expand stage) RenderEncoder"; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.h b/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.h index 31f9b770..93695ba4 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.h @@ -67,6 +67,9 @@ public: /** Returns whether or not the color attachment at the specified index is being used. */ bool isColorAttachmentUsed(uint32_t colorAttIdx); + /** Returns whether or not the color attachment is used as both a color attachment and an input attachment. */ + bool isColorAttachmentAlsoInputAttachment(uint32_t colorAttIdx); + /** Returns the format of the depth/stencil attachment. */ VkFormat getDepthStencilFormat(); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm b/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm index 72393992..47dc9c36 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm @@ -59,6 +59,19 @@ bool MVKRenderSubpass::isColorAttachmentUsed(uint32_t colorAttIdx) { return _colorAttachments[colorAttIdx].attachment != VK_ATTACHMENT_UNUSED; } + +bool MVKRenderSubpass::isColorAttachmentAlsoInputAttachment(uint32_t colorAttIdx) { + if (colorAttIdx >= _colorAttachments.size()) { return false; } + + uint32_t rspAttIdx = _colorAttachments[colorAttIdx].attachment; + if (rspAttIdx == VK_ATTACHMENT_UNUSED) { return false; } + + for (auto& inAtt : _inputAttachments) { + if (inAtt.attachment == rspAttIdx) { return true; } + } + return false; +} + VkFormat MVKRenderSubpass::getDepthStencilFormat() { uint32_t rpAttIdx = _depthStencilAttachment.attachment; if (rpAttIdx == VK_ATTACHMENT_UNUSED) { return VK_FORMAT_UNDEFINED; } diff --git a/MoltenVK/MoltenVK/Utility/MVKFoundation.h b/MoltenVK/MoltenVK/Utility/MVKFoundation.h index 1fd226c1..5a3f0376 100644 --- a/MoltenVK/MoltenVK/Utility/MVKFoundation.h +++ b/MoltenVK/MoltenVK/Utility/MVKFoundation.h @@ -63,7 +63,7 @@ typedef struct { /** Tracks the Vulkan command currently being used. */ typedef enum : uint8_t { - kMVKCommandUseNone, /**< No use defined. */ + kMVKCommandUseNone = 0, /**< No use defined. */ kMVKCommandUseEndCommandBuffer, /**< vkEndCommandBuffer (prefilled VkCommandBuffer). */ kMVKCommandUseQueueSubmit, /**< vkQueueSubmit. */ kMVKCommandUseAcquireNextImage, /**< vkAcquireNextImageKHR. */ @@ -73,6 +73,8 @@ typedef enum : uint8_t { kMVKCommandUseInvalidateMappedMemoryRanges, /**< vkInvalidateMappedMemoryRanges. */ kMVKCommandUseBeginRenderPass, /**< vkCmdBeginRenderPass. */ kMVKCommandUseNextSubpass, /**< vkCmdNextSubpass. */ + kMVKCommandUseBeginMultiViewPass, /**< vkCmdBeginRenderPass. */ + kMVKCommandUseRestartSubpass, /**< Restart a subpass because of explicit or implicit barrier. */ kMVKCommandUsePipelineBarrier, /**< vkCmdPipelineBarrier. */ kMVKCommandUseBlitImage, /**< vkCmdBlitImage. */ kMVKCommandUseCopyImage, /**< vkCmdCopyImage. */