Ensure compute encoding state is marked dirty for non-dispatch commands.

The same compute encoder is used across dispatches and other commands,
which may override compute state, and end up breaking subsequent dispatches.

- Mark compute encoding state dirty when following commands,
  which use Metal compute encoders, are issued:
  - vkCmdCopyBuffer()
  - vkCmdClearColorImage()
  - vkCmdClearDepthStencilImage()
  - vkCmdFillBuffer()
  - vkCmdCopyQueryPoolResults()

- MVKCommandEncoder move marking compute state dirty from
  endCurrentMetalEncoding() to getMTLComputeEncoder().
- For efficiency, don't prematurely force end of query copy compute encoder
  used on renderpass end, in case compute dispatches follow.

- Update MoltenVK to 1.2.5 (unrelated).
This commit is contained in:
Bill Hollings 2023-05-26 00:06:40 -04:00
parent 4c6bfbebd1
commit 83a1811230
7 changed files with 30 additions and 18 deletions

View File

@ -13,6 +13,15 @@ Copyright (c) 2015-2023 [The Brenwill Workshop Ltd.](http://www.brenwill.com)
MoltenVK 1.2.5
--------------
Released TBD
- Ensure non-dispatch compute commands don't interfere with compute encoding state used by dispatch commands.
MoltenVK 1.2.4 MoltenVK 1.2.4
-------------- --------------

View File

@ -45,7 +45,7 @@ extern "C" {
*/ */
#define MVK_VERSION_MAJOR 1 #define MVK_VERSION_MAJOR 1
#define MVK_VERSION_MINOR 2 #define MVK_VERSION_MINOR 2
#define MVK_VERSION_PATCH 4 #define MVK_VERSION_PATCH 5
#define MVK_MAKE_VERSION(major, minor, patch) (((major) * 10000) + ((minor) * 100) + (patch)) #define MVK_MAKE_VERSION(major, minor, patch) (((major) * 10000) + ((minor) * 100) + (patch))
#define MVK_VERSION MVK_MAKE_VERSION(MVK_VERSION_MAJOR, MVK_VERSION_MINOR, MVK_VERSION_PATCH) #define MVK_VERSION MVK_MAKE_VERSION(MVK_VERSION_MAJOR, MVK_VERSION_MINOR, MVK_VERSION_PATCH)

View File

@ -955,7 +955,7 @@ void MVKCmdCopyBuffer<N>::encode(MVKCommandEncoder* cmdEncoder) {
copyInfo.dstOffset = (uint32_t)cpyRgn.dstOffset; copyInfo.dstOffset = (uint32_t)cpyRgn.dstOffset;
copyInfo.size = (uint32_t)cpyRgn.size; copyInfo.size = (uint32_t)cpyRgn.size;
id<MTLComputeCommandEncoder> mtlComputeEnc = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseCopyBuffer); id<MTLComputeCommandEncoder> mtlComputeEnc = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseCopyBuffer, true);
[mtlComputeEnc pushDebugGroup: @"vkCmdCopyBuffer"]; [mtlComputeEnc pushDebugGroup: @"vkCmdCopyBuffer"];
[mtlComputeEnc setComputePipelineState: cmdEncoder->getCommandEncodingPool()->getCmdCopyBufferBytesMTLComputePipelineState()]; [mtlComputeEnc setComputePipelineState: cmdEncoder->getCommandEncodingPool()->getCmdCopyBufferBytesMTLComputePipelineState()];
[mtlComputeEnc setBuffer:srcMTLBuff offset: srcMTLBuffOffset atIndex: 0]; [mtlComputeEnc setBuffer:srcMTLBuff offset: srcMTLBuffOffset atIndex: 0];
@ -1141,7 +1141,7 @@ void MVKCmdBufferImageCopy<N>::encode(MVKCommandEncoder* cmdEncoder) {
info.offset = cpyRgn.imageOffset; info.offset = cpyRgn.imageOffset;
info.extent = cpyRgn.imageExtent; info.extent = cpyRgn.imageExtent;
bool needsTempBuff = mipLevel != 0; bool needsTempBuff = mipLevel != 0;
id<MTLComputeCommandEncoder> mtlComputeEnc = cmdEncoder->getMTLComputeEncoder(cmdUse); id<MTLComputeCommandEncoder> mtlComputeEnc = cmdEncoder->getMTLComputeEncoder(cmdUse, false); // Compute state will be marked dirty on next compute encoder after Blit encoder below.
id<MTLComputePipelineState> mtlComputeState = cmdEncoder->getCommandEncodingPool()->getCmdCopyBufferToImage3DDecompressMTLComputePipelineState(needsTempBuff); id<MTLComputePipelineState> mtlComputeState = cmdEncoder->getCommandEncodingPool()->getCmdCopyBufferToImage3DDecompressMTLComputePipelineState(needsTempBuff);
[mtlComputeEnc pushDebugGroup: @"vkCmdCopyBufferToImage"]; [mtlComputeEnc pushDebugGroup: @"vkCmdCopyBufferToImage"];
[mtlComputeEnc setComputePipelineState: mtlComputeState]; [mtlComputeEnc setComputePipelineState: mtlComputeState];
@ -1580,7 +1580,7 @@ void MVKCmdClearImage<N>::encode(MVKCommandEncoder* cmdEncoder) {
// Luckily for us, linear images only have one mip and one array layer under Metal. // Luckily for us, linear images only have one mip and one array layer under Metal.
assert( !isDS ); assert( !isDS );
id<MTLComputePipelineState> mtlClearState = cmdEncoder->getCommandEncodingPool()->getCmdClearColorImageMTLComputePipelineState(pixFmts->getFormatType(_image->getVkFormat())); id<MTLComputePipelineState> mtlClearState = cmdEncoder->getCommandEncodingPool()->getCmdClearColorImageMTLComputePipelineState(pixFmts->getFormatType(_image->getVkFormat()));
id<MTLComputeCommandEncoder> mtlComputeEnc = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseClearColorImage); id<MTLComputeCommandEncoder> mtlComputeEnc = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseClearColorImage, true);
[mtlComputeEnc pushDebugGroup: @"vkCmdClearColorImage"]; [mtlComputeEnc pushDebugGroup: @"vkCmdClearColorImage"];
[mtlComputeEnc setComputePipelineState: mtlClearState]; [mtlComputeEnc setComputePipelineState: mtlClearState];
[mtlComputeEnc setTexture: imgMTLTex atIndex: 0]; [mtlComputeEnc setTexture: imgMTLTex atIndex: 0];
@ -1747,7 +1747,7 @@ void MVKCmdFillBuffer::encode(MVKCommandEncoder* cmdEncoder) {
NSUInteger tgWidth = std::min(cps.maxTotalThreadsPerThreadgroup, cmdEncoder->getMTLDevice().maxThreadsPerThreadgroup.width); NSUInteger tgWidth = std::min(cps.maxTotalThreadsPerThreadgroup, cmdEncoder->getMTLDevice().maxThreadsPerThreadgroup.width);
NSUInteger tgCount = _wordCount / tgWidth; NSUInteger tgCount = _wordCount / tgWidth;
id<MTLComputeCommandEncoder> mtlComputeEnc = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseFillBuffer); id<MTLComputeCommandEncoder> mtlComputeEnc = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseFillBuffer, true);
[mtlComputeEnc pushDebugGroup: @"vkCmdFillBuffer"]; [mtlComputeEnc pushDebugGroup: @"vkCmdFillBuffer"];
[mtlComputeEnc setComputePipelineState: cps]; [mtlComputeEnc setComputePipelineState: cps];
[mtlComputeEnc setBytes: &_dataValue length: sizeof(_dataValue) atIndex: 1]; [mtlComputeEnc setBytes: &_dataValue length: sizeof(_dataValue) atIndex: 1];

View File

@ -337,10 +337,12 @@ public:
* Returns the current Metal compute encoder for the specified use, * Returns the current Metal compute encoder for the specified use,
* which determines the label assigned to the returned encoder. * which determines the label assigned to the returned encoder.
* *
* If the current encoder is not a compute encoder, this function ends current before * If the current encoder is a compute encoder, the compute state being tracked can
* beginning compute encoding. * optionally be marked dirty. Otherwise, if the current encoder is not a compute
* encoder, this function ends the current encoder before beginning compute encoding.
*/ */
id<MTLComputeCommandEncoder> getMTLComputeEncoder(MVKCommandUse cmdUse); id<MTLComputeCommandEncoder> getMTLComputeEncoder(MVKCommandUse cmdUse,
bool markCurrentComputeStateDirty = false);
/** /**
* Returns the current Metal BLIT encoder for the specified use, * Returns the current Metal BLIT encoder for the specified use,

View File

@ -839,10 +839,6 @@ void MVKCommandEncoder::endMetalRenderEncoding() {
void MVKCommandEncoder::endCurrentMetalEncoding() { void MVKCommandEncoder::endCurrentMetalEncoding() {
endMetalRenderEncoding(); endMetalRenderEncoding();
_computePipelineState.markDirty();
_computeResourcesState.markDirty();
_computePushConstants.markDirty();
if (_mtlComputeEncoder && _cmdBuffer->_hasStageCounterTimestampCommand) { [_mtlComputeEncoder updateFence: getStageCountersMTLFence()]; } if (_mtlComputeEncoder && _cmdBuffer->_hasStageCounterTimestampCommand) { [_mtlComputeEncoder updateFence: getStageCountersMTLFence()]; }
endMetalEncoding(_mtlComputeEncoder); endMetalEncoding(_mtlComputeEncoder);
_mtlComputeEncoderUse = kMVKCommandUseNone; _mtlComputeEncoderUse = kMVKCommandUseNone;
@ -854,12 +850,18 @@ void MVKCommandEncoder::endCurrentMetalEncoding() {
encodeTimestampStageCounterSamples(); encodeTimestampStageCounterSamples();
} }
id<MTLComputeCommandEncoder> MVKCommandEncoder::getMTLComputeEncoder(MVKCommandUse cmdUse) { id<MTLComputeCommandEncoder> MVKCommandEncoder::getMTLComputeEncoder(MVKCommandUse cmdUse, bool markCurrentComputeStateDirty) {
if ( !_mtlComputeEncoder ) { if ( !_mtlComputeEncoder ) {
endCurrentMetalEncoding(); endCurrentMetalEncoding();
_mtlComputeEncoder = [_mtlCmdBuffer computeCommandEncoder]; _mtlComputeEncoder = [_mtlCmdBuffer computeCommandEncoder];
retainIfImmediatelyEncoding(_mtlComputeEncoder); retainIfImmediatelyEncoding(_mtlComputeEncoder);
beginMetalComputeEncoding(cmdUse); beginMetalComputeEncoding(cmdUse);
markCurrentComputeStateDirty = true; // Always mark current compute state dirty for new encoder
}
if(markCurrentComputeStateDirty) {
_computePipelineState.markDirty();
_computePushConstants.markDirty();
_computeResourcesState.markDirty();
} }
if (_mtlComputeEncoderUse != cmdUse) { if (_mtlComputeEncoderUse != cmdUse) {
_mtlComputeEncoderUse = cmdUse; _mtlComputeEncoderUse = cmdUse;

View File

@ -1160,7 +1160,7 @@ void MVKOcclusionQueryCommandEncoderState::endMetalRenderPass() {
if ( !_hasRasterized || !vizBuff || _mtlRenderPassQueries.empty() ) { return; } // Nothing to do. if ( !_hasRasterized || !vizBuff || _mtlRenderPassQueries.empty() ) { return; } // Nothing to do.
id<MTLComputePipelineState> mtlAccumState = _cmdEncoder->getCommandEncodingPool()->getAccumulateOcclusionQueryResultsMTLComputePipelineState(); id<MTLComputePipelineState> mtlAccumState = _cmdEncoder->getCommandEncodingPool()->getAccumulateOcclusionQueryResultsMTLComputePipelineState();
id<MTLComputeCommandEncoder> mtlAccumEncoder = _cmdEncoder->getMTLComputeEncoder(kMVKCommandUseAccumOcclusionQuery); id<MTLComputeCommandEncoder> mtlAccumEncoder = _cmdEncoder->getMTLComputeEncoder(kMVKCommandUseAccumOcclusionQuery, true);
[mtlAccumEncoder setComputePipelineState: mtlAccumState]; [mtlAccumEncoder setComputePipelineState: mtlAccumState];
for (auto& qryLoc : _mtlRenderPassQueries) { for (auto& qryLoc : _mtlRenderPassQueries) {
// Accumulate the current results to the query pool's buffer. // Accumulate the current results to the query pool's buffer.
@ -1173,7 +1173,6 @@ void MVKOcclusionQueryCommandEncoderState::endMetalRenderPass() {
[mtlAccumEncoder dispatchThreadgroups: MTLSizeMake(1, 1, 1) [mtlAccumEncoder dispatchThreadgroups: MTLSizeMake(1, 1, 1)
threadsPerThreadgroup: MTLSizeMake(1, 1, 1)]; threadsPerThreadgroup: MTLSizeMake(1, 1, 1)];
} }
_cmdEncoder->endCurrentMetalEncoding();
_mtlRenderPassQueries.clear(); _mtlRenderPassQueries.clear();
_hasRasterized = false; _hasRasterized = false;
} }

View File

@ -284,7 +284,7 @@ id<MTLBuffer> MVKOcclusionQueryPool::getResultBuffer(MVKCommandEncoder*, uint32_
} }
id<MTLComputeCommandEncoder> MVKOcclusionQueryPool::encodeComputeCopyResults(MVKCommandEncoder* cmdEncoder, uint32_t firstQuery, uint32_t, uint32_t index) { id<MTLComputeCommandEncoder> MVKOcclusionQueryPool::encodeComputeCopyResults(MVKCommandEncoder* cmdEncoder, uint32_t firstQuery, uint32_t, uint32_t index) {
id<MTLComputeCommandEncoder> mtlCmdEnc = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseCopyQueryPoolResults); id<MTLComputeCommandEncoder> mtlCmdEnc = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseCopyQueryPoolResults, true);
[mtlCmdEnc setBuffer: getVisibilityResultMTLBuffer() offset: getVisibilityResultOffset(firstQuery) atIndex: index]; [mtlCmdEnc setBuffer: getVisibilityResultMTLBuffer() offset: getVisibilityResultOffset(firstQuery) atIndex: index];
return mtlCmdEnc; return mtlCmdEnc;
} }
@ -434,12 +434,12 @@ id<MTLComputeCommandEncoder> MVKTimestampQueryPool::encodeComputeCopyResults(MVK
destinationBuffer: tempBuff->_mtlBuffer destinationBuffer: tempBuff->_mtlBuffer
destinationOffset: tempBuff->_offset]; destinationOffset: tempBuff->_offset];
id<MTLComputeCommandEncoder> mtlCmdEnc = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseCopyQueryPoolResults); id<MTLComputeCommandEncoder> mtlCmdEnc = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseCopyQueryPoolResults, true);
[mtlCmdEnc setBuffer: tempBuff->_mtlBuffer offset: tempBuff->_offset atIndex: index]; [mtlCmdEnc setBuffer: tempBuff->_mtlBuffer offset: tempBuff->_offset atIndex: index];
return mtlCmdEnc; return mtlCmdEnc;
} else { } else {
// We can set the timestamp bytes into the compute encoder. // We can set the timestamp bytes into the compute encoder.
id<MTLComputeCommandEncoder> mtlCmdEnc = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseCopyQueryPoolResults); id<MTLComputeCommandEncoder> mtlCmdEnc = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseCopyQueryPoolResults, true);
cmdEncoder->setComputeBytes(mtlCmdEnc, &_timestamps[firstQuery], queryCount * _queryElementCount * sizeof(uint64_t), index); cmdEncoder->setComputeBytes(mtlCmdEnc, &_timestamps[firstQuery], queryCount * _queryElementCount * sizeof(uint64_t), index);
return mtlCmdEnc; return mtlCmdEnc;
} }