Merge pull request #1387 from billhollings/occlusion-query-fixes-for-M1

Occlusion query fixes for M1
This commit is contained in:
Bill Hollings 2021-06-28 08:10:17 -04:00 committed by GitHub
commit e3cf071ace
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 76 additions and 41 deletions

View File

@ -33,6 +33,8 @@ Released TBD
- Fix synchronization issue with locking `MTLArgumentEncoder` for Metal Argument Buffers. - Fix synchronization issue with locking `MTLArgumentEncoder` for Metal Argument Buffers.
- Fix race condition on submission fence during device loss. - Fix race condition on submission fence during device loss.
- Fix crash using memoryless storage for input attachments on Apple Silicon. - Fix crash using memoryless storage for input attachments on Apple Silicon.
- Fix issue where M1 GPU does not support reusing Metal visibility buffer offsets
across separate render encoders within a single Metal command buffer (Vulkan submit).
- On command buffer submission failure, if `MVKConfiguration::resumeLostDevice` enabled, do not release - On command buffer submission failure, if `MVKConfiguration::resumeLostDevice` enabled, do not release
waits on `VkDevice`, and do not return `VK_ERROR_DEVICE_LOST`, unless `VkPhysicalDevice` is also lost. waits on `VkDevice`, and do not return `VK_ERROR_DEVICE_LOST`, unless `VkPhysicalDevice` is also lost.
- Fix inconsistent handling of linear attachment decisions on Apple Silicon. - Fix inconsistent handling of linear attachment decisions on Apple Silicon.

View File

@ -46,6 +46,16 @@ class MVKComputePipeline;
typedef uint64_t MVKMTLCommandBufferID; typedef uint64_t MVKMTLCommandBufferID;
#pragma mark -
#pragma mark MVKCommandEncodingContext
/** Context for tracking information across multiple encodings. */
typedef struct MVKCommandEncodingContext {
NSUInteger mtlVisibilityResultOffset = 0;
const MVKMTLBufferAllocation* visibilityResultBuffer = nullptr;
} MVKCommandEncodingContext;
#pragma mark - #pragma mark -
#pragma mark MVKCommandBuffer #pragma mark MVKCommandBuffer
@ -83,7 +93,7 @@ public:
inline MVKCommandPool* getCommandPool() { return _commandPool; } inline MVKCommandPool* getCommandPool() { return _commandPool; }
/** Submit the commands in this buffer as part of the queue submission. */ /** Submit the commands in this buffer as part of the queue submission. */
void submit(MVKQueueCommandBufferSubmission* cmdBuffSubmit); void submit(MVKQueueCommandBufferSubmission* cmdBuffSubmit, MVKCommandEncodingContext* pEncodingContext);
/** Returns whether this command buffer can be submitted to a queue more than once. */ /** Returns whether this command buffer can be submitted to a queue more than once. */
inline bool getIsReusable() { return _isReusable; } inline bool getIsReusable() { return _isReusable; }
@ -264,7 +274,7 @@ public:
MVKVulkanAPIObject* getVulkanAPIObject() override { return _cmdBuffer->getVulkanAPIObject(); }; MVKVulkanAPIObject* getVulkanAPIObject() override { return _cmdBuffer->getVulkanAPIObject(); };
/** Encode commands from the command buffer onto the Metal command buffer. */ /** Encode commands from the command buffer onto the Metal command buffer. */
void encode(id<MTLCommandBuffer> mtlCmdBuff); void encode(id<MTLCommandBuffer> mtlCmdBuff, MVKCommandEncodingContext* pEncodingContext);
/** Encode commands from the specified secondary command buffer onto the Metal command buffer. */ /** Encode commands from the specified secondary command buffer onto the Metal command buffer. */
void encodeSecondary(MVKCommandBuffer* secondaryCmdBuffer); void encodeSecondary(MVKCommandBuffer* secondaryCmdBuffer);
@ -407,6 +417,9 @@ public:
#pragma mark Dynamic encoding state accessed directly #pragma mark Dynamic encoding state accessed directly
/** Context for tracking information across multiple encodings. */
MVKCommandEncodingContext* _pEncodingContext;
/** A reference to the Metal features supported by the device. */ /** A reference to the Metal features supported by the device. */
const MVKPhysicalDeviceMetalFeatures* _pDeviceMetalFeatures; const MVKPhysicalDeviceMetalFeatures* _pDeviceMetalFeatures;
@ -428,9 +441,6 @@ public:
/** The current Metal render encoder. */ /** The current Metal render encoder. */
id<MTLRenderCommandEncoder> _mtlRenderEncoder; id<MTLRenderCommandEncoder> _mtlRenderEncoder;
/** The buffer used to hold occlusion query results in a render pass. */
const MVKMTLBufferAllocation* _visibilityResultMTLBuffer;
/** Tracks the current graphics pipeline bound to the encoder. */ /** Tracks the current graphics pipeline bound to the encoder. */
MVKPipelineCommandEncoderState _graphicsPipelineState; MVKPipelineCommandEncoderState _graphicsPipelineState;

View File

@ -105,7 +105,8 @@ void MVKCommandBuffer::addCommand(MVKCommand* command) {
_commandCount++; _commandCount++;
} }
void MVKCommandBuffer::submit(MVKQueueCommandBufferSubmission* cmdBuffSubmit) { void MVKCommandBuffer::submit(MVKQueueCommandBufferSubmission* cmdBuffSubmit,
MVKCommandEncodingContext* pEncodingContext) {
if ( !canExecute() ) { return; } if ( !canExecute() ) { return; }
if (_prefilledMTLCmdBuffer) { if (_prefilledMTLCmdBuffer) {
@ -113,7 +114,7 @@ void MVKCommandBuffer::submit(MVKQueueCommandBufferSubmission* cmdBuffSubmit) {
clearPrefilledMTLCommandBuffer(); clearPrefilledMTLCommandBuffer();
} else { } else {
MVKCommandEncoder encoder(this); MVKCommandEncoder encoder(this);
encoder.encode(cmdBuffSubmit->getActiveMTLCommandBuffer()); encoder.encode(cmdBuffSubmit->getActiveMTLCommandBuffer(), pEncodingContext);
} }
if ( !_supportsConcurrentExecution ) { _isExecutingNonConcurrently.clear(); } if ( !_supportsConcurrentExecution ) { _isExecutingNonConcurrently.clear(); }
@ -150,8 +151,9 @@ void MVKCommandBuffer::prefill() {
uint32_t qIdx = 0; uint32_t qIdx = 0;
_prefilledMTLCmdBuffer = _commandPool->newMTLCommandBuffer(qIdx); // retain _prefilledMTLCmdBuffer = _commandPool->newMTLCommandBuffer(qIdx); // retain
MVKCommandEncodingContext encodingContext;
MVKCommandEncoder encoder(this); MVKCommandEncoder encoder(this);
encoder.encode(_prefilledMTLCmdBuffer); encoder.encode(_prefilledMTLCmdBuffer, &encodingContext);
// Once encoded onto Metal, if this command buffer is not reusable, we don't need the // Once encoded onto Metal, if this command buffer is not reusable, we don't need the
// MVKCommand instances anymore, so release them in order to reduce memory pressure. // MVKCommand instances anymore, so release them in order to reduce memory pressure.
@ -246,13 +248,15 @@ MVKRenderSubpass* MVKCommandBuffer::getLastMultiviewSubpass() {
#pragma mark - #pragma mark -
#pragma mark MVKCommandEncoder #pragma mark MVKCommandEncoder
void MVKCommandEncoder::encode(id<MTLCommandBuffer> mtlCmdBuff) { void MVKCommandEncoder::encode(id<MTLCommandBuffer> mtlCmdBuff,
MVKCommandEncodingContext* pEncodingContext) {
_renderPass = nullptr; _renderPass = nullptr;
_subpassContents = VK_SUBPASS_CONTENTS_INLINE; _subpassContents = VK_SUBPASS_CONTENTS_INLINE;
_renderSubpassIndex = 0; _renderSubpassIndex = 0;
_multiviewPassIndex = 0; _multiviewPassIndex = 0;
_canUseLayeredRendering = false; _canUseLayeredRendering = false;
_pEncodingContext = pEncodingContext;
_mtlCmdBuffer = mtlCmdBuff; // not retained _mtlCmdBuffer = mtlCmdBuff; // not retained
setLabelIfNotNil(_mtlCmdBuffer, _cmdBuffer->_debugName); setLabelIfNotNil(_mtlCmdBuffer, _cmdBuffer->_debugName);
@ -345,12 +349,12 @@ void MVKCommandEncoder::beginMetalRenderPass(bool loadOverride) {
_clearValues.contents(), _clearValues.contents(),
_isRenderingEntireAttachment, _isRenderingEntireAttachment,
loadOverride); loadOverride);
if (_cmdBuffer->_needsVisibilityResultMTLBuffer) { if (_cmdBuffer->_needsVisibilityResultMTLBuffer) {
if (!_visibilityResultMTLBuffer) { if ( !_pEncodingContext->visibilityResultBuffer ) {
_visibilityResultMTLBuffer = getTempMTLBuffer(_pDeviceMetalFeatures->maxQueryBufferSize, true, true); _pEncodingContext->visibilityResultBuffer = getTempMTLBuffer(_pDeviceMetalFeatures->maxQueryBufferSize, true, true);
} }
mtlRPDesc.visibilityResultBuffer = _visibilityResultMTLBuffer->_mtlBuffer; mtlRPDesc.visibilityResultBuffer = _pEncodingContext->visibilityResultBuffer->_mtlBuffer;
} }
VkExtent2D fbExtent = _framebufferExtent; VkExtent2D fbExtent = _framebufferExtent;
mtlRPDesc.renderTargetWidthMVK = max(min(_renderArea.offset.x + _renderArea.extent.width, fbExtent.width), 1u); mtlRPDesc.renderTargetWidthMVK = max(min(_renderArea.offset.x + _renderArea.extent.width, fbExtent.width), 1u);
@ -770,7 +774,6 @@ void MVKCommandEncoder::finishQueries() {
MVKCommandEncoder::MVKCommandEncoder(MVKCommandBuffer* cmdBuffer) : MVKBaseDeviceObject(cmdBuffer->getDevice()), MVKCommandEncoder::MVKCommandEncoder(MVKCommandBuffer* cmdBuffer) : MVKBaseDeviceObject(cmdBuffer->getDevice()),
_cmdBuffer(cmdBuffer), _cmdBuffer(cmdBuffer),
_visibilityResultMTLBuffer(nil),
_graphicsPipelineState(this), _graphicsPipelineState(this),
_computePipelineState(this), _computePipelineState(this),
_viewportState(this), _viewportState(this),
@ -799,6 +802,7 @@ MVKCommandEncoder::MVKCommandEncoder(MVKCommandBuffer* cmdBuffer) : MVKBaseDevic
_mtlComputeEncoderUse = kMVKCommandUseNone; _mtlComputeEncoderUse = kMVKCommandUseNone;
_mtlBlitEncoder = nil; _mtlBlitEncoder = nil;
_mtlBlitEncoderUse = kMVKCommandUseNone; _mtlBlitEncoderUse = kMVKCommandUseNone;
_pEncodingContext = nullptr;
} }

View File

@ -614,9 +614,18 @@ public:
protected: protected:
void encodeImpl(uint32_t) override; void encodeImpl(uint32_t) override;
typedef struct OcclusionQueryLocation {
MVKOcclusionQueryPool* queryPool = nullptr;
uint32_t query = 0;
NSUInteger visibilityBufferOffset = 0;
OcclusionQueryLocation(MVKOcclusionQueryPool* qPool, uint32_t qIdx, NSUInteger vbOfst)
: queryPool(qPool), query(qIdx), visibilityBufferOffset(vbOfst) {}
} OcclusionQueryLocation;
MVKSmallVector<OcclusionQueryLocation> _mtlRenderPassQueries;
MTLVisibilityResultMode _mtlVisibilityResultMode = MTLVisibilityResultModeDisabled; MTLVisibilityResultMode _mtlVisibilityResultMode = MTLVisibilityResultModeDisabled;
NSUInteger _mtlVisibilityResultOffset = 0;
MVKSmallVector<std::pair<MVKQuerySpec, NSUInteger>> _mtlRenderPassQueries;
}; };

View File

@ -1073,20 +1073,19 @@ void MVKComputeResourcesCommandEncoderState::encodeArgumentBufferResourceUsage(M
#pragma mark MVKOcclusionQueryCommandEncoderState #pragma mark MVKOcclusionQueryCommandEncoderState
void MVKOcclusionQueryCommandEncoderState::endMetalRenderPass() { void MVKOcclusionQueryCommandEncoderState::endMetalRenderPass() {
const MVKMTLBufferAllocation* vizBuff = _cmdEncoder->_pEncodingContext->visibilityResultBuffer;
if ( !vizBuff || _mtlRenderPassQueries.empty() ) { return; } // Nothing to do.
if (_mtlRenderPassQueries.empty()) { return; } // Nothing to do. id<MTLComputePipelineState> mtlAccumState = _cmdEncoder->getCommandEncodingPool()->getAccumulateOcclusionQueryResultsMTLComputePipelineState();
id<MTLComputePipelineState> mtlAccumState = _cmdEncoder->getCommandEncodingPool()->getAccumulateOcclusionQueryResultsMTLComputePipelineState();
id<MTLComputeCommandEncoder> mtlAccumEncoder = _cmdEncoder->getMTLComputeEncoder(kMVKCommandUseAccumOcclusionQuery); id<MTLComputeCommandEncoder> mtlAccumEncoder = _cmdEncoder->getMTLComputeEncoder(kMVKCommandUseAccumOcclusionQuery);
[mtlAccumEncoder setComputePipelineState: mtlAccumState]; [mtlAccumEncoder setComputePipelineState: mtlAccumState];
for (auto& query : _mtlRenderPassQueries) { for (auto& qryLoc : _mtlRenderPassQueries) {
// Accumulate the current results to the query pool's buffer. // Accumulate the current results to the query pool's buffer.
auto* pQueryPool = (MVKOcclusionQueryPool*)query.first.queryPool; [mtlAccumEncoder setBuffer: qryLoc.queryPool->getVisibilityResultMTLBuffer()
[mtlAccumEncoder setBuffer: pQueryPool->getVisibilityResultMTLBuffer() offset: qryLoc.queryPool->getVisibilityResultOffset(qryLoc.query)
offset: pQueryPool->getVisibilityResultOffset(query.first.query)
atIndex: 0]; atIndex: 0];
[mtlAccumEncoder setBuffer: _cmdEncoder->_visibilityResultMTLBuffer->_mtlBuffer [mtlAccumEncoder setBuffer: vizBuff->_mtlBuffer
offset: query.second offset: vizBuff->_offset + qryLoc.visibilityBufferOffset
atIndex: 1]; atIndex: 1];
[mtlAccumEncoder dispatchThreadgroups: MTLSizeMake(1, 1, 1) [mtlAccumEncoder dispatchThreadgroups: MTLSizeMake(1, 1, 1)
threadsPerThreadgroup: MTLSizeMake(1, 1, 1)]; threadsPerThreadgroup: MTLSizeMake(1, 1, 1)];
@ -1095,24 +1094,28 @@ void MVKOcclusionQueryCommandEncoderState::endMetalRenderPass() {
_mtlRenderPassQueries.clear(); _mtlRenderPassQueries.clear();
} }
// The Metal visibility buffer has a finite size, and on some Metal platforms (looking at you M1),
// query offsets cannnot be reused with the same MTLCommandBuffer. If enough occlusion queries are
// begun within a single MTLCommandBuffer, it may exhaust the visibility buffer. If that occurs,
// report an error and disable further visibility tracking for the remainder of the MTLCommandBuffer.
// In most cases, a MTLCommandBuffer corresponds to a Vulkan command submit (VkSubmitInfo),
// and so the error text is framed in terms of the Vulkan submit.
void MVKOcclusionQueryCommandEncoderState::beginOcclusionQuery(MVKOcclusionQueryPool* pQueryPool, uint32_t query, VkQueryControlFlags flags) { void MVKOcclusionQueryCommandEncoderState::beginOcclusionQuery(MVKOcclusionQueryPool* pQueryPool, uint32_t query, VkQueryControlFlags flags) {
if (_cmdEncoder->_pEncodingContext->mtlVisibilityResultOffset + kMVKQuerySlotSizeInBytes <= _cmdEncoder->_pDeviceMetalFeatures->maxQueryBufferSize) {
MVKQuerySpec querySpec; bool shouldCount = _cmdEncoder->_pDeviceFeatures->occlusionQueryPrecise && mvkAreAllFlagsEnabled(flags, VK_QUERY_CONTROL_PRECISE_BIT);
querySpec.set(pQueryPool, query); _mtlVisibilityResultMode = shouldCount ? MTLVisibilityResultModeCounting : MTLVisibilityResultModeBoolean;
NSUInteger offset = _mtlRenderPassQueries.empty() ? 0 : _mtlVisibilityResultOffset + kMVKQuerySlotSizeInBytes; _mtlRenderPassQueries.emplace_back(pQueryPool, query, _cmdEncoder->_pEncodingContext->mtlVisibilityResultOffset);
NSUInteger maxOffset = _cmdEncoder->_pDeviceMetalFeatures->maxQueryBufferSize - kMVKQuerySlotSizeInBytes; } else {
offset = min(offset, maxOffset); reportError(VK_ERROR_OUT_OF_DEVICE_MEMORY, "vkCmdBeginQuery(): The maximum number of queries in a single Vulkan command submission is %llu.", _cmdEncoder->_pDeviceMetalFeatures->maxQueryBufferSize / kMVKQuerySlotSizeInBytes);
_mtlRenderPassQueries.push_back(make_pair(querySpec, offset)); _mtlVisibilityResultMode = MTLVisibilityResultModeDisabled;
_cmdEncoder->_pEncodingContext->mtlVisibilityResultOffset -= kMVKQuerySlotSizeInBytes;
bool shouldCount = _cmdEncoder->_pDeviceFeatures->occlusionQueryPrecise && mvkAreAllFlagsEnabled(flags, VK_QUERY_CONTROL_PRECISE_BIT); }
_mtlVisibilityResultMode = shouldCount ? MTLVisibilityResultModeCounting : MTLVisibilityResultModeBoolean;
_mtlVisibilityResultOffset = offset;
markDirty(); markDirty();
} }
void MVKOcclusionQueryCommandEncoderState::endOcclusionQuery(MVKOcclusionQueryPool* pQueryPool, uint32_t query) { void MVKOcclusionQueryCommandEncoderState::endOcclusionQuery(MVKOcclusionQueryPool* pQueryPool, uint32_t query) {
_mtlVisibilityResultMode = MTLVisibilityResultModeDisabled; _mtlVisibilityResultMode = MTLVisibilityResultModeDisabled;
_cmdEncoder->_pEncodingContext->mtlVisibilityResultOffset += kMVKQuerySlotSizeInBytes;
markDirty(); markDirty();
} }
@ -1120,5 +1123,5 @@ void MVKOcclusionQueryCommandEncoderState::encodeImpl(uint32_t stage) {
if (stage != kMVKGraphicsStageRasterization) { return; } if (stage != kMVKGraphicsStageRasterization) { return; }
[_cmdEncoder->_mtlRenderEncoder setVisibilityResultMode: _mtlVisibilityResultMode [_cmdEncoder->_mtlRenderEncoder setVisibilityResultMode: _mtlVisibilityResultMode
offset: _mtlVisibilityResultOffset]; offset: _cmdEncoder->_pEncodingContext->mtlVisibilityResultOffset];
} }

View File

@ -237,7 +237,7 @@ public:
} }
protected: protected:
void submitCommandBuffers() override { for (auto& cb : _cmdBuffers) { cb->submit(this); } } void submitCommandBuffers() override;
MVKSmallVector<MVKCommandBuffer*, N> _cmdBuffers; MVKSmallVector<MVKCommandBuffer*, N> _cmdBuffers;
}; };

View File

@ -463,6 +463,13 @@ MVKQueueCommandBufferSubmission::~MVKQueueCommandBufferSubmission() {
} }
template <size_t N>
void MVKQueueFullCommandBufferSubmission<N>::submitCommandBuffers() {
MVKCommandEncodingContext encodingContext;
for (auto& cb : _cmdBuffers) { cb->submit(this, &encodingContext); }
}
#pragma mark - #pragma mark -
#pragma mark MVKQueuePresentSurfaceSubmission #pragma mark MVKQueuePresentSurfaceSubmission