Merge pull request #1387 from billhollings/occlusion-query-fixes-for-M1
Occlusion query fixes for M1
This commit is contained in:
commit
e3cf071ace
@ -33,6 +33,8 @@ Released TBD
|
||||
- Fix synchronization issue with locking `MTLArgumentEncoder` for Metal Argument Buffers.
|
||||
- Fix race condition on submission fence during device loss.
|
||||
- Fix crash using memoryless storage for input attachments on Apple Silicon.
|
||||
- Fix issue where M1 GPU does not support reusing Metal visibility buffer offsets
|
||||
across separate render encoders within a single Metal command buffer (Vulkan submit).
|
||||
- On command buffer submission failure, if `MVKConfiguration::resumeLostDevice` enabled, do not release
|
||||
waits on `VkDevice`, and do not return `VK_ERROR_DEVICE_LOST`, unless `VkPhysicalDevice` is also lost.
|
||||
- Fix inconsistent handling of linear attachment decisions on Apple Silicon.
|
||||
|
@ -46,6 +46,16 @@ class MVKComputePipeline;
|
||||
typedef uint64_t MVKMTLCommandBufferID;
|
||||
|
||||
|
||||
#pragma mark -
|
||||
#pragma mark MVKCommandEncodingContext
|
||||
|
||||
/** Context for tracking information across multiple encodings. */
|
||||
typedef struct MVKCommandEncodingContext {
|
||||
NSUInteger mtlVisibilityResultOffset = 0;
|
||||
const MVKMTLBufferAllocation* visibilityResultBuffer = nullptr;
|
||||
} MVKCommandEncodingContext;
|
||||
|
||||
|
||||
#pragma mark -
|
||||
#pragma mark MVKCommandBuffer
|
||||
|
||||
@ -83,7 +93,7 @@ public:
|
||||
inline MVKCommandPool* getCommandPool() { return _commandPool; }
|
||||
|
||||
/** Submit the commands in this buffer as part of the queue submission. */
|
||||
void submit(MVKQueueCommandBufferSubmission* cmdBuffSubmit);
|
||||
void submit(MVKQueueCommandBufferSubmission* cmdBuffSubmit, MVKCommandEncodingContext* pEncodingContext);
|
||||
|
||||
/** Returns whether this command buffer can be submitted to a queue more than once. */
|
||||
inline bool getIsReusable() { return _isReusable; }
|
||||
@ -264,7 +274,7 @@ public:
|
||||
MVKVulkanAPIObject* getVulkanAPIObject() override { return _cmdBuffer->getVulkanAPIObject(); };
|
||||
|
||||
/** Encode commands from the command buffer onto the Metal command buffer. */
|
||||
void encode(id<MTLCommandBuffer> mtlCmdBuff);
|
||||
void encode(id<MTLCommandBuffer> mtlCmdBuff, MVKCommandEncodingContext* pEncodingContext);
|
||||
|
||||
/** Encode commands from the specified secondary command buffer onto the Metal command buffer. */
|
||||
void encodeSecondary(MVKCommandBuffer* secondaryCmdBuffer);
|
||||
@ -407,6 +417,9 @@ public:
|
||||
|
||||
#pragma mark Dynamic encoding state accessed directly
|
||||
|
||||
/** Context for tracking information across multiple encodings. */
|
||||
MVKCommandEncodingContext* _pEncodingContext;
|
||||
|
||||
/** A reference to the Metal features supported by the device. */
|
||||
const MVKPhysicalDeviceMetalFeatures* _pDeviceMetalFeatures;
|
||||
|
||||
@ -428,9 +441,6 @@ public:
|
||||
/** The current Metal render encoder. */
|
||||
id<MTLRenderCommandEncoder> _mtlRenderEncoder;
|
||||
|
||||
/** The buffer used to hold occlusion query results in a render pass. */
|
||||
const MVKMTLBufferAllocation* _visibilityResultMTLBuffer;
|
||||
|
||||
/** Tracks the current graphics pipeline bound to the encoder. */
|
||||
MVKPipelineCommandEncoderState _graphicsPipelineState;
|
||||
|
||||
|
@ -105,7 +105,8 @@ void MVKCommandBuffer::addCommand(MVKCommand* command) {
|
||||
_commandCount++;
|
||||
}
|
||||
|
||||
void MVKCommandBuffer::submit(MVKQueueCommandBufferSubmission* cmdBuffSubmit) {
|
||||
void MVKCommandBuffer::submit(MVKQueueCommandBufferSubmission* cmdBuffSubmit,
|
||||
MVKCommandEncodingContext* pEncodingContext) {
|
||||
if ( !canExecute() ) { return; }
|
||||
|
||||
if (_prefilledMTLCmdBuffer) {
|
||||
@ -113,7 +114,7 @@ void MVKCommandBuffer::submit(MVKQueueCommandBufferSubmission* cmdBuffSubmit) {
|
||||
clearPrefilledMTLCommandBuffer();
|
||||
} else {
|
||||
MVKCommandEncoder encoder(this);
|
||||
encoder.encode(cmdBuffSubmit->getActiveMTLCommandBuffer());
|
||||
encoder.encode(cmdBuffSubmit->getActiveMTLCommandBuffer(), pEncodingContext);
|
||||
}
|
||||
|
||||
if ( !_supportsConcurrentExecution ) { _isExecutingNonConcurrently.clear(); }
|
||||
@ -150,8 +151,9 @@ void MVKCommandBuffer::prefill() {
|
||||
uint32_t qIdx = 0;
|
||||
_prefilledMTLCmdBuffer = _commandPool->newMTLCommandBuffer(qIdx); // retain
|
||||
|
||||
MVKCommandEncodingContext encodingContext;
|
||||
MVKCommandEncoder encoder(this);
|
||||
encoder.encode(_prefilledMTLCmdBuffer);
|
||||
encoder.encode(_prefilledMTLCmdBuffer, &encodingContext);
|
||||
|
||||
// Once encoded onto Metal, if this command buffer is not reusable, we don't need the
|
||||
// MVKCommand instances anymore, so release them in order to reduce memory pressure.
|
||||
@ -246,13 +248,15 @@ MVKRenderSubpass* MVKCommandBuffer::getLastMultiviewSubpass() {
|
||||
#pragma mark -
|
||||
#pragma mark MVKCommandEncoder
|
||||
|
||||
void MVKCommandEncoder::encode(id<MTLCommandBuffer> mtlCmdBuff) {
|
||||
void MVKCommandEncoder::encode(id<MTLCommandBuffer> mtlCmdBuff,
|
||||
MVKCommandEncodingContext* pEncodingContext) {
|
||||
_renderPass = nullptr;
|
||||
_subpassContents = VK_SUBPASS_CONTENTS_INLINE;
|
||||
_renderSubpassIndex = 0;
|
||||
_multiviewPassIndex = 0;
|
||||
_canUseLayeredRendering = false;
|
||||
|
||||
_pEncodingContext = pEncodingContext;
|
||||
_mtlCmdBuffer = mtlCmdBuff; // not retained
|
||||
|
||||
setLabelIfNotNil(_mtlCmdBuffer, _cmdBuffer->_debugName);
|
||||
@ -345,12 +349,12 @@ void MVKCommandEncoder::beginMetalRenderPass(bool loadOverride) {
|
||||
_clearValues.contents(),
|
||||
_isRenderingEntireAttachment,
|
||||
loadOverride);
|
||||
if (_cmdBuffer->_needsVisibilityResultMTLBuffer) {
|
||||
if (!_visibilityResultMTLBuffer) {
|
||||
_visibilityResultMTLBuffer = getTempMTLBuffer(_pDeviceMetalFeatures->maxQueryBufferSize, true, true);
|
||||
}
|
||||
mtlRPDesc.visibilityResultBuffer = _visibilityResultMTLBuffer->_mtlBuffer;
|
||||
}
|
||||
if (_cmdBuffer->_needsVisibilityResultMTLBuffer) {
|
||||
if ( !_pEncodingContext->visibilityResultBuffer ) {
|
||||
_pEncodingContext->visibilityResultBuffer = getTempMTLBuffer(_pDeviceMetalFeatures->maxQueryBufferSize, true, true);
|
||||
}
|
||||
mtlRPDesc.visibilityResultBuffer = _pEncodingContext->visibilityResultBuffer->_mtlBuffer;
|
||||
}
|
||||
|
||||
VkExtent2D fbExtent = _framebufferExtent;
|
||||
mtlRPDesc.renderTargetWidthMVK = max(min(_renderArea.offset.x + _renderArea.extent.width, fbExtent.width), 1u);
|
||||
@ -770,7 +774,6 @@ void MVKCommandEncoder::finishQueries() {
|
||||
|
||||
MVKCommandEncoder::MVKCommandEncoder(MVKCommandBuffer* cmdBuffer) : MVKBaseDeviceObject(cmdBuffer->getDevice()),
|
||||
_cmdBuffer(cmdBuffer),
|
||||
_visibilityResultMTLBuffer(nil),
|
||||
_graphicsPipelineState(this),
|
||||
_computePipelineState(this),
|
||||
_viewportState(this),
|
||||
@ -799,6 +802,7 @@ MVKCommandEncoder::MVKCommandEncoder(MVKCommandBuffer* cmdBuffer) : MVKBaseDevic
|
||||
_mtlComputeEncoderUse = kMVKCommandUseNone;
|
||||
_mtlBlitEncoder = nil;
|
||||
_mtlBlitEncoderUse = kMVKCommandUseNone;
|
||||
_pEncodingContext = nullptr;
|
||||
}
|
||||
|
||||
|
||||
|
@ -614,9 +614,18 @@ public:
|
||||
protected:
|
||||
void encodeImpl(uint32_t) override;
|
||||
|
||||
typedef struct OcclusionQueryLocation {
|
||||
MVKOcclusionQueryPool* queryPool = nullptr;
|
||||
uint32_t query = 0;
|
||||
NSUInteger visibilityBufferOffset = 0;
|
||||
|
||||
OcclusionQueryLocation(MVKOcclusionQueryPool* qPool, uint32_t qIdx, NSUInteger vbOfst)
|
||||
: queryPool(qPool), query(qIdx), visibilityBufferOffset(vbOfst) {}
|
||||
|
||||
} OcclusionQueryLocation;
|
||||
|
||||
MVKSmallVector<OcclusionQueryLocation> _mtlRenderPassQueries;
|
||||
MTLVisibilityResultMode _mtlVisibilityResultMode = MTLVisibilityResultModeDisabled;
|
||||
NSUInteger _mtlVisibilityResultOffset = 0;
|
||||
MVKSmallVector<std::pair<MVKQuerySpec, NSUInteger>> _mtlRenderPassQueries;
|
||||
};
|
||||
|
||||
|
||||
|
@ -1073,20 +1073,19 @@ void MVKComputeResourcesCommandEncoderState::encodeArgumentBufferResourceUsage(M
|
||||
#pragma mark MVKOcclusionQueryCommandEncoderState
|
||||
|
||||
void MVKOcclusionQueryCommandEncoderState::endMetalRenderPass() {
|
||||
const MVKMTLBufferAllocation* vizBuff = _cmdEncoder->_pEncodingContext->visibilityResultBuffer;
|
||||
if ( !vizBuff || _mtlRenderPassQueries.empty() ) { return; } // Nothing to do.
|
||||
|
||||
if (_mtlRenderPassQueries.empty()) { return; } // Nothing to do.
|
||||
|
||||
id<MTLComputePipelineState> mtlAccumState = _cmdEncoder->getCommandEncodingPool()->getAccumulateOcclusionQueryResultsMTLComputePipelineState();
|
||||
id<MTLComputePipelineState> mtlAccumState = _cmdEncoder->getCommandEncodingPool()->getAccumulateOcclusionQueryResultsMTLComputePipelineState();
|
||||
id<MTLComputeCommandEncoder> mtlAccumEncoder = _cmdEncoder->getMTLComputeEncoder(kMVKCommandUseAccumOcclusionQuery);
|
||||
[mtlAccumEncoder setComputePipelineState: mtlAccumState];
|
||||
for (auto& query : _mtlRenderPassQueries) {
|
||||
for (auto& qryLoc : _mtlRenderPassQueries) {
|
||||
// Accumulate the current results to the query pool's buffer.
|
||||
auto* pQueryPool = (MVKOcclusionQueryPool*)query.first.queryPool;
|
||||
[mtlAccumEncoder setBuffer: pQueryPool->getVisibilityResultMTLBuffer()
|
||||
offset: pQueryPool->getVisibilityResultOffset(query.first.query)
|
||||
[mtlAccumEncoder setBuffer: qryLoc.queryPool->getVisibilityResultMTLBuffer()
|
||||
offset: qryLoc.queryPool->getVisibilityResultOffset(qryLoc.query)
|
||||
atIndex: 0];
|
||||
[mtlAccumEncoder setBuffer: _cmdEncoder->_visibilityResultMTLBuffer->_mtlBuffer
|
||||
offset: query.second
|
||||
[mtlAccumEncoder setBuffer: vizBuff->_mtlBuffer
|
||||
offset: vizBuff->_offset + qryLoc.visibilityBufferOffset
|
||||
atIndex: 1];
|
||||
[mtlAccumEncoder dispatchThreadgroups: MTLSizeMake(1, 1, 1)
|
||||
threadsPerThreadgroup: MTLSizeMake(1, 1, 1)];
|
||||
@ -1095,24 +1094,28 @@ void MVKOcclusionQueryCommandEncoderState::endMetalRenderPass() {
|
||||
_mtlRenderPassQueries.clear();
|
||||
}
|
||||
|
||||
// The Metal visibility buffer has a finite size, and on some Metal platforms (looking at you M1),
|
||||
// query offsets cannnot be reused with the same MTLCommandBuffer. If enough occlusion queries are
|
||||
// begun within a single MTLCommandBuffer, it may exhaust the visibility buffer. If that occurs,
|
||||
// report an error and disable further visibility tracking for the remainder of the MTLCommandBuffer.
|
||||
// In most cases, a MTLCommandBuffer corresponds to a Vulkan command submit (VkSubmitInfo),
|
||||
// and so the error text is framed in terms of the Vulkan submit.
|
||||
void MVKOcclusionQueryCommandEncoderState::beginOcclusionQuery(MVKOcclusionQueryPool* pQueryPool, uint32_t query, VkQueryControlFlags flags) {
|
||||
|
||||
MVKQuerySpec querySpec;
|
||||
querySpec.set(pQueryPool, query);
|
||||
NSUInteger offset = _mtlRenderPassQueries.empty() ? 0 : _mtlVisibilityResultOffset + kMVKQuerySlotSizeInBytes;
|
||||
NSUInteger maxOffset = _cmdEncoder->_pDeviceMetalFeatures->maxQueryBufferSize - kMVKQuerySlotSizeInBytes;
|
||||
offset = min(offset, maxOffset);
|
||||
_mtlRenderPassQueries.push_back(make_pair(querySpec, offset));
|
||||
|
||||
bool shouldCount = _cmdEncoder->_pDeviceFeatures->occlusionQueryPrecise && mvkAreAllFlagsEnabled(flags, VK_QUERY_CONTROL_PRECISE_BIT);
|
||||
_mtlVisibilityResultMode = shouldCount ? MTLVisibilityResultModeCounting : MTLVisibilityResultModeBoolean;
|
||||
_mtlVisibilityResultOffset = offset;
|
||||
|
||||
if (_cmdEncoder->_pEncodingContext->mtlVisibilityResultOffset + kMVKQuerySlotSizeInBytes <= _cmdEncoder->_pDeviceMetalFeatures->maxQueryBufferSize) {
|
||||
bool shouldCount = _cmdEncoder->_pDeviceFeatures->occlusionQueryPrecise && mvkAreAllFlagsEnabled(flags, VK_QUERY_CONTROL_PRECISE_BIT);
|
||||
_mtlVisibilityResultMode = shouldCount ? MTLVisibilityResultModeCounting : MTLVisibilityResultModeBoolean;
|
||||
_mtlRenderPassQueries.emplace_back(pQueryPool, query, _cmdEncoder->_pEncodingContext->mtlVisibilityResultOffset);
|
||||
} else {
|
||||
reportError(VK_ERROR_OUT_OF_DEVICE_MEMORY, "vkCmdBeginQuery(): The maximum number of queries in a single Vulkan command submission is %llu.", _cmdEncoder->_pDeviceMetalFeatures->maxQueryBufferSize / kMVKQuerySlotSizeInBytes);
|
||||
_mtlVisibilityResultMode = MTLVisibilityResultModeDisabled;
|
||||
_cmdEncoder->_pEncodingContext->mtlVisibilityResultOffset -= kMVKQuerySlotSizeInBytes;
|
||||
}
|
||||
markDirty();
|
||||
}
|
||||
|
||||
void MVKOcclusionQueryCommandEncoderState::endOcclusionQuery(MVKOcclusionQueryPool* pQueryPool, uint32_t query) {
|
||||
_mtlVisibilityResultMode = MTLVisibilityResultModeDisabled;
|
||||
_cmdEncoder->_pEncodingContext->mtlVisibilityResultOffset += kMVKQuerySlotSizeInBytes;
|
||||
markDirty();
|
||||
}
|
||||
|
||||
@ -1120,5 +1123,5 @@ void MVKOcclusionQueryCommandEncoderState::encodeImpl(uint32_t stage) {
|
||||
if (stage != kMVKGraphicsStageRasterization) { return; }
|
||||
|
||||
[_cmdEncoder->_mtlRenderEncoder setVisibilityResultMode: _mtlVisibilityResultMode
|
||||
offset: _mtlVisibilityResultOffset];
|
||||
offset: _cmdEncoder->_pEncodingContext->mtlVisibilityResultOffset];
|
||||
}
|
||||
|
@ -237,7 +237,7 @@ public:
|
||||
}
|
||||
|
||||
protected:
|
||||
void submitCommandBuffers() override { for (auto& cb : _cmdBuffers) { cb->submit(this); } }
|
||||
void submitCommandBuffers() override;
|
||||
|
||||
MVKSmallVector<MVKCommandBuffer*, N> _cmdBuffers;
|
||||
};
|
||||
|
@ -463,6 +463,13 @@ MVKQueueCommandBufferSubmission::~MVKQueueCommandBufferSubmission() {
|
||||
}
|
||||
|
||||
|
||||
template <size_t N>
|
||||
void MVKQueueFullCommandBufferSubmission<N>::submitCommandBuffers() {
|
||||
MVKCommandEncodingContext encodingContext;
|
||||
for (auto& cb : _cmdBuffers) { cb->submit(this, &encodingContext); }
|
||||
}
|
||||
|
||||
|
||||
#pragma mark -
|
||||
#pragma mark MVKQueuePresentSurfaceSubmission
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user