Ensure compute encoding state is marked dirty for non-dispatch commands.

The same compute encoder is used across dispatches and other commands, which may override compute state, and end up breaking subsequent dispatches. - Mark compute encoding state dirty when following commands, which use Metal compute encoders, are issued: - vkCmdCopyBuffer() - vkCmdClearColorImage() - vkCmdClearDepthStencilImage() - vkCmdFillBuffer() - vkCmdCopyQueryPoolResults() - MVKCommandEncoder move marking compute state dirty from endCurrentMetalEncoding() to getMTLComputeEncoder(). - For efficiency, don't prematurely force end of query copy compute encoder used on renderpass end, in case compute dispatches follow. - Update MoltenVK to 1.2.5 (unrelated).
2023-05-26 00:06:40 -04:00 · 2023-05-26 00:06:40 -04:00 · 83a1811230
commit 83a1811230
parent 4c6bfbebd1
7 changed files with 30 additions and 18 deletions
--- a/Docs/Whats_New.md
+++ b/Docs/Whats_New.md
@ -13,6 +13,15 @@ Copyright (c) 2015-2023 [The Brenwill Workshop Ltd.](http://www.brenwill.com)
 MoltenVK 1.2.5
 --------------
 Released TBD
 - Ensure non-dispatch compute commands don't interfere with compute encoding state used by dispatch commands.
 MoltenVK 1.2.4
 --------------
--- a/MoltenVK/MoltenVK/API/mvk_config.h
+++ b/MoltenVK/MoltenVK/API/mvk_config.h
@ -45,7 +45,7 @@ extern "C" {
 */
 #define MVK_VERSION_MAJOR   1
 #define MVK_VERSION_MINOR   2
-#define MVK_VERSION_PATCH   4
+#define MVK_VERSION_PATCH   5
 #define MVK_MAKE_VERSION(major, minor, patch)    (((major) * 10000) + ((minor) * 100) + (patch))
 #define MVK_VERSION     MVK_MAKE_VERSION(MVK_VERSION_MAJOR, MVK_VERSION_MINOR, MVK_VERSION_PATCH)
--- a/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm
@ -955,7 +955,7 @@ void MVKCmdCopyBuffer<N>::encode(MVKCommandEncoder* cmdEncoder) {
 			copyInfo.dstOffset = (uint32_t)cpyRgn.dstOffset;
 			copyInfo.size = (uint32_t)cpyRgn.size;
-			id<MTLComputeCommandEncoder> mtlComputeEnc = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseCopyBuffer);
+			id<MTLComputeCommandEncoder> mtlComputeEnc = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseCopyBuffer, true);
 			[mtlComputeEnc pushDebugGroup: @"vkCmdCopyBuffer"];
 			[mtlComputeEnc setComputePipelineState: cmdEncoder->getCommandEncodingPool()->getCmdCopyBufferBytesMTLComputePipelineState()];
 			[mtlComputeEnc setBuffer:srcMTLBuff offset: srcMTLBuffOffset atIndex: 0];
@ -1141,7 +1141,7 @@ void MVKCmdBufferImageCopy<N>::encode(MVKCommandEncoder* cmdEncoder) {
            info.offset = cpyRgn.imageOffset;
            info.extent = cpyRgn.imageExtent;
            bool needsTempBuff = mipLevel != 0;
-            id<MTLComputeCommandEncoder> mtlComputeEnc = cmdEncoder->getMTLComputeEncoder(cmdUse);
+			id<MTLComputeCommandEncoder> mtlComputeEnc = cmdEncoder->getMTLComputeEncoder(cmdUse, false);  // Compute state will be marked dirty on next compute encoder after Blit encoder below.
            id<MTLComputePipelineState> mtlComputeState = cmdEncoder->getCommandEncodingPool()->getCmdCopyBufferToImage3DDecompressMTLComputePipelineState(needsTempBuff);
            [mtlComputeEnc pushDebugGroup: @"vkCmdCopyBufferToImage"];
            [mtlComputeEnc setComputePipelineState: mtlComputeState];
@ -1580,7 +1580,7 @@ void MVKCmdClearImage<N>::encode(MVKCommandEncoder* cmdEncoder) {
            // Luckily for us, linear images only have one mip and one array layer under Metal.
            assert( !isDS );
            id<MTLComputePipelineState> mtlClearState = cmdEncoder->getCommandEncodingPool()->getCmdClearColorImageMTLComputePipelineState(pixFmts->getFormatType(_image->getVkFormat()));
-            id<MTLComputeCommandEncoder> mtlComputeEnc = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseClearColorImage);
+            id<MTLComputeCommandEncoder> mtlComputeEnc = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseClearColorImage, true);
            [mtlComputeEnc pushDebugGroup: @"vkCmdClearColorImage"];
            [mtlComputeEnc setComputePipelineState: mtlClearState];
            [mtlComputeEnc setTexture: imgMTLTex atIndex: 0];
@ -1747,7 +1747,7 @@ void MVKCmdFillBuffer::encode(MVKCommandEncoder* cmdEncoder) {
 	NSUInteger tgWidth = std::min(cps.maxTotalThreadsPerThreadgroup, cmdEncoder->getMTLDevice().maxThreadsPerThreadgroup.width);
 	NSUInteger tgCount = _wordCount / tgWidth;
-	id<MTLComputeCommandEncoder> mtlComputeEnc = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseFillBuffer);
+	id<MTLComputeCommandEncoder> mtlComputeEnc = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseFillBuffer, true);
 	[mtlComputeEnc pushDebugGroup: @"vkCmdFillBuffer"];
 	[mtlComputeEnc setComputePipelineState: cps];
 	[mtlComputeEnc setBytes: &_dataValue length: sizeof(_dataValue) atIndex: 1];
--- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
@ -337,10 +337,12 @@ public:
 	 * Returns the current Metal compute encoder for the specified use,
 	 * which determines the label assigned to the returned encoder.
 	 *
-	 * If the current encoder is not a compute encoder, this function ends current before 
+	 * If the current encoder is a compute encoder, the compute state being tracked can
-	 * beginning compute encoding.
+	 * optionally be marked dirty. Otherwise, if the current encoder is not a compute
 	 * encoder, this function ends the current encoder before beginning compute encoding.
 	 */
-	id<MTLComputeCommandEncoder> getMTLComputeEncoder(MVKCommandUse cmdUse);
+	id<MTLComputeCommandEncoder> getMTLComputeEncoder(MVKCommandUse cmdUse,
 													  bool markCurrentComputeStateDirty = false);
 	/**
 	 * Returns the current Metal BLIT encoder for the specified use,
--- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
@ -839,10 +839,6 @@ void MVKCommandEncoder::endMetalRenderEncoding() {
 void MVKCommandEncoder::endCurrentMetalEncoding() {
 	endMetalRenderEncoding();
 	_computePipelineState.markDirty();
 	_computeResourcesState.markDirty();
 	_computePushConstants.markDirty();
 	if (_mtlComputeEncoder && _cmdBuffer->_hasStageCounterTimestampCommand) { [_mtlComputeEncoder updateFence: getStageCountersMTLFence()]; }
 	endMetalEncoding(_mtlComputeEncoder);
 	_mtlComputeEncoderUse = kMVKCommandUseNone;
@ -854,12 +850,18 @@ void MVKCommandEncoder::endCurrentMetalEncoding() {
 	encodeTimestampStageCounterSamples();
 }
-id<MTLComputeCommandEncoder> MVKCommandEncoder::getMTLComputeEncoder(MVKCommandUse cmdUse) {
+id<MTLComputeCommandEncoder> MVKCommandEncoder::getMTLComputeEncoder(MVKCommandUse cmdUse, bool markCurrentComputeStateDirty) {
 	if ( !_mtlComputeEncoder ) {
 		endCurrentMetalEncoding();
 		_mtlComputeEncoder = [_mtlCmdBuffer computeCommandEncoder];
 		retainIfImmediatelyEncoding(_mtlComputeEncoder);
 		beginMetalComputeEncoding(cmdUse);
 		markCurrentComputeStateDirty = true;	// Always mark current compute state dirty for new encoder
 	}
 	if(markCurrentComputeStateDirty) {
 		_computePipelineState.markDirty();
 		_computePushConstants.markDirty();
 		_computeResourcesState.markDirty();
 	}
 	if (_mtlComputeEncoderUse != cmdUse) {
 		_mtlComputeEncoderUse = cmdUse;
--- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
@ -1160,7 +1160,7 @@ void MVKOcclusionQueryCommandEncoderState::endMetalRenderPass() {
    if ( !_hasRasterized || !vizBuff || _mtlRenderPassQueries.empty() ) { return; }  // Nothing to do.
 	id<MTLComputePipelineState> mtlAccumState = _cmdEncoder->getCommandEncodingPool()->getAccumulateOcclusionQueryResultsMTLComputePipelineState();
-    id<MTLComputeCommandEncoder> mtlAccumEncoder = _cmdEncoder->getMTLComputeEncoder(kMVKCommandUseAccumOcclusionQuery);
+    id<MTLComputeCommandEncoder> mtlAccumEncoder = _cmdEncoder->getMTLComputeEncoder(kMVKCommandUseAccumOcclusionQuery, true);
    [mtlAccumEncoder setComputePipelineState: mtlAccumState];
    for (auto& qryLoc : _mtlRenderPassQueries) {
        // Accumulate the current results to the query pool's buffer.
@ -1173,7 +1173,6 @@ void MVKOcclusionQueryCommandEncoderState::endMetalRenderPass() {
        [mtlAccumEncoder dispatchThreadgroups: MTLSizeMake(1, 1, 1)
                        threadsPerThreadgroup: MTLSizeMake(1, 1, 1)];
    }
    _cmdEncoder->endCurrentMetalEncoding();
    _mtlRenderPassQueries.clear();
 	_hasRasterized = false;
 }
--- a/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm
@ -284,7 +284,7 @@ id<MTLBuffer> MVKOcclusionQueryPool::getResultBuffer(MVKCommandEncoder*, uint32_
 }
 id<MTLComputeCommandEncoder> MVKOcclusionQueryPool::encodeComputeCopyResults(MVKCommandEncoder* cmdEncoder, uint32_t firstQuery, uint32_t, uint32_t index) {
-	id<MTLComputeCommandEncoder> mtlCmdEnc = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseCopyQueryPoolResults);
+	id<MTLComputeCommandEncoder> mtlCmdEnc = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseCopyQueryPoolResults, true);
 	[mtlCmdEnc setBuffer: getVisibilityResultMTLBuffer() offset: getVisibilityResultOffset(firstQuery) atIndex: index];
 	return mtlCmdEnc;
 }
@ -434,12 +434,12 @@ id<MTLComputeCommandEncoder> MVKTimestampQueryPool::encodeComputeCopyResults(MVK
 					 destinationBuffer: tempBuff->_mtlBuffer
 					 destinationOffset: tempBuff->_offset];
-		id<MTLComputeCommandEncoder> mtlCmdEnc = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseCopyQueryPoolResults);
+		id<MTLComputeCommandEncoder> mtlCmdEnc = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseCopyQueryPoolResults, true);
 		[mtlCmdEnc setBuffer: tempBuff->_mtlBuffer offset: tempBuff->_offset atIndex: index];
 		return mtlCmdEnc;
 	} else {
 		// We can set the timestamp bytes into the compute encoder.
-		id<MTLComputeCommandEncoder> mtlCmdEnc = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseCopyQueryPoolResults);
+		id<MTLComputeCommandEncoder> mtlCmdEnc = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseCopyQueryPoolResults, true);
 		cmdEncoder->setComputeBytes(mtlCmdEnc, &_timestamps[firstQuery], queryCount * _queryElementCount * sizeof(uint64_t), index);
 		return mtlCmdEnc;
 	}