From 450feb0d30f1c0072cd5e2ff1503099cdce6aa84 Mon Sep 17 00:00:00 2001
From: Bill Hollings <bill.hollings@brenwill.com>
Date: Fri, 18 Jun 2021 17:59:30 -0400
Subject: [PATCH 1/4] Fix issue where M1 GPU does not support reusing Metal
 visibility buffer offsets across separate render encoders within a single
 Vulkan command buffer.

Monotonically increase the offset into the temporary visibility buffer
after each query in a MVKCommandEncoder (ie- Vulkan command buffer).
---
 MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
index 96ea826f..60a34fbc 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
@@ -1099,20 +1099,18 @@ void MVKOcclusionQueryCommandEncoderState::beginOcclusionQuery(MVKOcclusionQuery
 
     MVKQuerySpec querySpec;
     querySpec.set(pQueryPool, query);
-    NSUInteger offset = _mtlRenderPassQueries.empty() ? 0 : _mtlVisibilityResultOffset + kMVKQuerySlotSizeInBytes;
-    NSUInteger maxOffset = _cmdEncoder->_pDeviceMetalFeatures->maxQueryBufferSize - kMVKQuerySlotSizeInBytes;
-    offset = min(offset, maxOffset);
-    _mtlRenderPassQueries.push_back(make_pair(querySpec, offset));
+    _mtlRenderPassQueries.push_back(make_pair(querySpec, _mtlVisibilityResultOffset));
 
     bool shouldCount = _cmdEncoder->_pDeviceFeatures->occlusionQueryPrecise && mvkAreAllFlagsEnabled(flags, VK_QUERY_CONTROL_PRECISE_BIT);
     _mtlVisibilityResultMode = shouldCount ? MTLVisibilityResultModeCounting : MTLVisibilityResultModeBoolean;
-    _mtlVisibilityResultOffset = offset;
 
     markDirty();
 }
 
 void MVKOcclusionQueryCommandEncoderState::endOcclusionQuery(MVKOcclusionQueryPool* pQueryPool, uint32_t query) {
 	_mtlVisibilityResultMode = MTLVisibilityResultModeDisabled;
+	_mtlVisibilityResultOffset = min<NSUInteger>(_mtlVisibilityResultOffset + kMVKQuerySlotSizeInBytes,
+												 _cmdEncoder->_pDeviceMetalFeatures->maxQueryBufferSize - kMVKQuerySlotSizeInBytes);
 	markDirty();
 }
 

From e72cd614e16c31e6b1351c7421da02ac099d9b32 Mon Sep 17 00:00:00 2001
From: Bill Hollings <bill.hollings@brenwill.com>
Date: Mon, 21 Jun 2021 11:17:40 -0400
Subject: [PATCH 2/4] Fix issue where M1 GPU does not support reusing Metal
 visibility buffer offsets across separate render encoders within a single
 Metal command buffer (Vulkan submit).

Add MVKCommandEncodingContext to track information across multiple
MVKCommandEncoders, and use it to track temporary visibility buffer and offset.
Add support for more than one temporary visibility buffer per MTLCommandBuffer
when current temporary visibility buffer is exhausted.
---
 Docs/Whats_New.md                             |  2 +
 MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h | 26 ++++++++--
 .../MoltenVK/Commands/MVKCommandBuffer.mm     | 49 ++++++++++++++-----
 .../Commands/MVKCommandEncoderState.h         |  3 +-
 .../Commands/MVKCommandEncoderState.mm        | 14 +++---
 MoltenVK/MoltenVK/GPUObjects/MVKQueue.h       |  2 +-
 MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm      |  7 +++
 7 files changed, 77 insertions(+), 26 deletions(-)

diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md
index 6ae284d2..d956204a 100644
--- a/Docs/Whats_New.md
+++ b/Docs/Whats_New.md
@@ -33,6 +33,8 @@ Released TBD
 - Fix synchronization issue with locking `MTLArgumentEncoder` for Metal Argument Buffers.
 - Fix race condition on submission fence during device loss.
 - Fix crash using memoryless storage for input attachments on Apple Silicon.
+- Fix issue where M1 GPU does not support reusing Metal visibility buffer offsets
+  across separate render encoders within a single Metal command buffer (Vulkan submit).
 - On command buffer submission failure, if `MVKConfiguration::resumeLostDevice` enabled,  do not release 
   waits on `VkDevice`, and do not return `VK_ERROR_DEVICE_LOST`, unless `VkPhysicalDevice` is also lost.
 - Fix inconsistent handling of linear attachment decisions on Apple Silicon.
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
index 952c7ec1..fa3bb7f6 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
@@ -46,6 +46,22 @@ class MVKComputePipeline;
 typedef uint64_t MVKMTLCommandBufferID;
 
 
+#pragma mark -
+#pragma mark MVKCommandEncodingContext
+
+/** Context for tracking information across multiple encodings. */
+typedef struct MVKCommandEncodingContext {
+	NSUInteger mtlVisibilityResultOffset = 0;
+
+	void incrementMTLVisibilityResultOffset(MVKCommandEncoder* cmdEncoder);
+	const MVKMTLBufferAllocation* getVisibilityResultBuffer(MVKCommandEncoder* cmdEncoder);
+
+private:
+	const MVKMTLBufferAllocation* _visibilityResultBuffer = nullptr;
+
+} MVKCommandEncodingContext;
+
+
 #pragma mark -
 #pragma mark MVKCommandBuffer
 
@@ -83,7 +99,7 @@ public:
 	inline MVKCommandPool* getCommandPool() { return _commandPool; }
 
 	/** Submit the commands in this buffer as part of the queue submission. */
-	void submit(MVKQueueCommandBufferSubmission* cmdBuffSubmit);
+	void submit(MVKQueueCommandBufferSubmission* cmdBuffSubmit, MVKCommandEncodingContext* pEncodingContext);
 
     /** Returns whether this command buffer can be submitted to a queue more than once. */
     inline bool getIsReusable() { return _isReusable; }
@@ -264,7 +280,7 @@ public:
 	MVKVulkanAPIObject* getVulkanAPIObject() override { return _cmdBuffer->getVulkanAPIObject(); };
 
 	/** Encode commands from the command buffer onto the Metal command buffer. */
-	void encode(id<MTLCommandBuffer> mtlCmdBuff);
+	void encode(id<MTLCommandBuffer> mtlCmdBuff, MVKCommandEncodingContext* pEncodingContext);
 
 	/** Encode commands from the specified secondary command buffer onto the Metal command buffer. */
 	void encodeSecondary(MVKCommandBuffer* secondaryCmdBuffer);
@@ -407,6 +423,9 @@ public:
 
 #pragma mark Dynamic encoding state accessed directly
 
+	/** Context for tracking information across multiple encodings. */
+	MVKCommandEncodingContext* _pEncodingContext;
+
     /** A reference to the Metal features supported by the device. */
     const MVKPhysicalDeviceMetalFeatures* _pDeviceMetalFeatures;
 
@@ -428,9 +447,6 @@ public:
 	/** The current Metal render encoder. */
 	id<MTLRenderCommandEncoder> _mtlRenderEncoder;
 
-	/** The buffer used to hold occlusion query results in a render pass. */
-	const MVKMTLBufferAllocation* _visibilityResultMTLBuffer;
-
     /** Tracks the current graphics pipeline bound to the encoder. */
     MVKPipelineCommandEncoderState _graphicsPipelineState;
 
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
index 85f312b2..7225d938 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
@@ -105,7 +105,8 @@ void MVKCommandBuffer::addCommand(MVKCommand* command) {
     _commandCount++;
 }
 
-void MVKCommandBuffer::submit(MVKQueueCommandBufferSubmission* cmdBuffSubmit) {
+void MVKCommandBuffer::submit(MVKQueueCommandBufferSubmission* cmdBuffSubmit,
+							  MVKCommandEncodingContext* pEncodingContext) {
 	if ( !canExecute() ) { return; }
 
 	if (_prefilledMTLCmdBuffer) {
@@ -113,7 +114,7 @@ void MVKCommandBuffer::submit(MVKQueueCommandBufferSubmission* cmdBuffSubmit) {
 		clearPrefilledMTLCommandBuffer();
 	} else {
 		MVKCommandEncoder encoder(this);
-		encoder.encode(cmdBuffSubmit->getActiveMTLCommandBuffer());
+		encoder.encode(cmdBuffSubmit->getActiveMTLCommandBuffer(), pEncodingContext);
 	}
 
 	if ( !_supportsConcurrentExecution ) { _isExecutingNonConcurrently.clear(); }
@@ -150,8 +151,9 @@ void MVKCommandBuffer::prefill() {
 		uint32_t qIdx = 0;
 		_prefilledMTLCmdBuffer = _commandPool->newMTLCommandBuffer(qIdx);	// retain
 
+		MVKCommandEncodingContext encodingContext;
 		MVKCommandEncoder encoder(this);
-		encoder.encode(_prefilledMTLCmdBuffer);
+		encoder.encode(_prefilledMTLCmdBuffer, &encodingContext);
 
 		// Once encoded onto Metal, if this command buffer is not reusable, we don't need the
 		// MVKCommand instances anymore, so release them in order to reduce memory pressure.
@@ -246,13 +248,15 @@ MVKRenderSubpass* MVKCommandBuffer::getLastMultiviewSubpass() {
 #pragma mark -
 #pragma mark MVKCommandEncoder
 
-void MVKCommandEncoder::encode(id<MTLCommandBuffer> mtlCmdBuff) {
+void MVKCommandEncoder::encode(id<MTLCommandBuffer> mtlCmdBuff,
+							   MVKCommandEncodingContext* pEncodingContext) {
 	_renderPass = nullptr;
 	_subpassContents = VK_SUBPASS_CONTENTS_INLINE;
 	_renderSubpassIndex = 0;
 	_multiviewPassIndex = 0;
 	_canUseLayeredRendering = false;
 
+	_pEncodingContext = pEncodingContext;
 	_mtlCmdBuffer = mtlCmdBuff;		// not retained
 
 	setLabelIfNotNil(_mtlCmdBuffer, _cmdBuffer->_debugName);
@@ -345,12 +349,9 @@ void MVKCommandEncoder::beginMetalRenderPass(bool loadOverride) {
 												  _clearValues.contents(),
 												  _isRenderingEntireAttachment,
 												  loadOverride);
-    if (_cmdBuffer->_needsVisibilityResultMTLBuffer) {
-        if (!_visibilityResultMTLBuffer) {
-            _visibilityResultMTLBuffer = getTempMTLBuffer(_pDeviceMetalFeatures->maxQueryBufferSize, true, true);
-        }
-        mtlRPDesc.visibilityResultBuffer = _visibilityResultMTLBuffer->_mtlBuffer;
-    }
+	if (_cmdBuffer->_needsVisibilityResultMTLBuffer) {
+		mtlRPDesc.visibilityResultBuffer = _pEncodingContext->getVisibilityResultBuffer(this)->_mtlBuffer;
+	}
 
 	VkExtent2D fbExtent = _framebufferExtent;
     mtlRPDesc.renderTargetWidthMVK = max(min(_renderArea.offset.x + _renderArea.extent.width, fbExtent.width), 1u);
@@ -770,7 +771,6 @@ void MVKCommandEncoder::finishQueries() {
 
 MVKCommandEncoder::MVKCommandEncoder(MVKCommandBuffer* cmdBuffer) : MVKBaseDeviceObject(cmdBuffer->getDevice()),
         _cmdBuffer(cmdBuffer),
-        _visibilityResultMTLBuffer(nil),
         _graphicsPipelineState(this),
         _computePipelineState(this),
         _viewportState(this),
@@ -799,6 +799,33 @@ MVKCommandEncoder::MVKCommandEncoder(MVKCommandBuffer* cmdBuffer) : MVKBaseDevic
 			_mtlComputeEncoderUse = kMVKCommandUseNone;
             _mtlBlitEncoder = nil;
             _mtlBlitEncoderUse = kMVKCommandUseNone;
+			_pEncodingContext = nullptr;
+}
+
+
+#pragma mark -
+#pragma mark MVKCommandEncodingContext
+
+// Increment to the next query slot offset. If we reach the size of the visibility buffer,
+// reset to retrieve and start filling another visibility buffer. This approach may still
+// cause Metal validation errors if the platform does not permit offsets to be reused
+// witin a MTLCommandBuffer, even when a different visibility buffer is used.
+// We don't test against the size of the visibility buffer itself, because this call may
+// arrive before the visibiltiy buffer in the case of a query that ends before the renderpass.
+void MVKCommandEncodingContext::incrementMTLVisibilityResultOffset(MVKCommandEncoder* cmdEncoder) {
+	mtlVisibilityResultOffset += kMVKQuerySlotSizeInBytes;
+
+	if (mtlVisibilityResultOffset + kMVKQuerySlotSizeInBytes > cmdEncoder->_pDeviceMetalFeatures->maxQueryBufferSize) {
+		_visibilityResultBuffer = nullptr;
+		mtlVisibilityResultOffset = 0;
+	}
+}
+
+const MVKMTLBufferAllocation* MVKCommandEncodingContext::getVisibilityResultBuffer(MVKCommandEncoder* cmdEncoder) {
+	if ( !_visibilityResultBuffer ) {
+		_visibilityResultBuffer = cmdEncoder->getTempMTLBuffer(cmdEncoder->_pDeviceMetalFeatures->maxQueryBufferSize, true, true);
+	}
+	return _visibilityResultBuffer;
 }
 
 
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h
index d18fa87f..0608f367 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h
@@ -614,9 +614,8 @@ public:
 protected:
     void encodeImpl(uint32_t) override;
 
-    MTLVisibilityResultMode _mtlVisibilityResultMode = MTLVisibilityResultModeDisabled;
-    NSUInteger _mtlVisibilityResultOffset = 0;
 	MVKSmallVector<std::pair<MVKQuerySpec, NSUInteger>> _mtlRenderPassQueries;
+    MTLVisibilityResultMode _mtlVisibilityResultMode = MTLVisibilityResultModeDisabled;
 };
 
 
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
index 60a34fbc..8f9ed367 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
@@ -1076,7 +1076,8 @@ void MVKOcclusionQueryCommandEncoderState::endMetalRenderPass() {
 
     if (_mtlRenderPassQueries.empty()) { return; }  // Nothing to do.
 
-    id<MTLComputePipelineState> mtlAccumState = _cmdEncoder->getCommandEncodingPool()->getAccumulateOcclusionQueryResultsMTLComputePipelineState();
+	const MVKMTLBufferAllocation* vizResultBuffer = _cmdEncoder->_pEncodingContext->getVisibilityResultBuffer(_cmdEncoder);
+	id<MTLComputePipelineState> mtlAccumState = _cmdEncoder->getCommandEncodingPool()->getAccumulateOcclusionQueryResultsMTLComputePipelineState();
     id<MTLComputeCommandEncoder> mtlAccumEncoder = _cmdEncoder->getMTLComputeEncoder(kMVKCommandUseAccumOcclusionQuery);
     [mtlAccumEncoder setComputePipelineState: mtlAccumState];
     for (auto& query : _mtlRenderPassQueries) {
@@ -1085,8 +1086,8 @@ void MVKOcclusionQueryCommandEncoderState::endMetalRenderPass() {
         [mtlAccumEncoder setBuffer: pQueryPool->getVisibilityResultMTLBuffer()
                             offset: pQueryPool->getVisibilityResultOffset(query.first.query)
                            atIndex: 0];
-        [mtlAccumEncoder setBuffer: _cmdEncoder->_visibilityResultMTLBuffer->_mtlBuffer
-                            offset: query.second
+        [mtlAccumEncoder setBuffer: vizResultBuffer->_mtlBuffer
+                            offset: vizResultBuffer->_offset + query.second
                            atIndex: 1];
         [mtlAccumEncoder dispatchThreadgroups: MTLSizeMake(1, 1, 1)
                         threadsPerThreadgroup: MTLSizeMake(1, 1, 1)];
@@ -1099,7 +1100,7 @@ void MVKOcclusionQueryCommandEncoderState::beginOcclusionQuery(MVKOcclusionQuery
 
     MVKQuerySpec querySpec;
     querySpec.set(pQueryPool, query);
-    _mtlRenderPassQueries.push_back(make_pair(querySpec, _mtlVisibilityResultOffset));
+    _mtlRenderPassQueries.push_back(make_pair(querySpec, _cmdEncoder->_pEncodingContext->mtlVisibilityResultOffset));
 
     bool shouldCount = _cmdEncoder->_pDeviceFeatures->occlusionQueryPrecise && mvkAreAllFlagsEnabled(flags, VK_QUERY_CONTROL_PRECISE_BIT);
     _mtlVisibilityResultMode = shouldCount ? MTLVisibilityResultModeCounting : MTLVisibilityResultModeBoolean;
@@ -1109,8 +1110,7 @@ void MVKOcclusionQueryCommandEncoderState::beginOcclusionQuery(MVKOcclusionQuery
 
 void MVKOcclusionQueryCommandEncoderState::endOcclusionQuery(MVKOcclusionQueryPool* pQueryPool, uint32_t query) {
 	_mtlVisibilityResultMode = MTLVisibilityResultModeDisabled;
-	_mtlVisibilityResultOffset = min<NSUInteger>(_mtlVisibilityResultOffset + kMVKQuerySlotSizeInBytes,
-												 _cmdEncoder->_pDeviceMetalFeatures->maxQueryBufferSize - kMVKQuerySlotSizeInBytes);
+	_cmdEncoder->_pEncodingContext->incrementMTLVisibilityResultOffset(_cmdEncoder);
 	markDirty();
 }
 
@@ -1118,5 +1118,5 @@ void MVKOcclusionQueryCommandEncoderState::encodeImpl(uint32_t stage) {
 	if (stage != kMVKGraphicsStageRasterization) { return; }
 
 	[_cmdEncoder->_mtlRenderEncoder setVisibilityResultMode: _mtlVisibilityResultMode
-													 offset: _mtlVisibilityResultOffset];
+													 offset: _cmdEncoder->_pEncodingContext->mtlVisibilityResultOffset];
 }
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h
index e9c92ee1..13796872 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h
@@ -237,7 +237,7 @@ public:
 		}
 
 protected:
-	void submitCommandBuffers() override { for (auto& cb : _cmdBuffers) { cb->submit(this); } }
+	void submitCommandBuffers() override;
 
 	MVKSmallVector<MVKCommandBuffer*, N> _cmdBuffers;
 };
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm
index 22bd1b22..286946a3 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm
@@ -463,6 +463,13 @@ MVKQueueCommandBufferSubmission::~MVKQueueCommandBufferSubmission() {
 }
 
 
+template <size_t N>
+void MVKQueueFullCommandBufferSubmission<N>::submitCommandBuffers() {
+	MVKCommandEncodingContext encodingContext;
+	for (auto& cb : _cmdBuffers) { cb->submit(this, &encodingContext); }
+}
+
+
 #pragma mark -
 #pragma mark MVKQueuePresentSurfaceSubmission
 

From 53dde5718a38fa7d780736dcaa67a3db54f03f1f Mon Sep 17 00:00:00 2001
From: Bill Hollings <bill.hollings@brenwill.com>
Date: Mon, 21 Jun 2021 12:37:46 -0400
Subject: [PATCH 3/4] Syntactically simplify tracking of outstanding occlusion
 queries.

Add MVKOcclusionQueryCommandEncoderState::OcclusionQueryLocation to
more directly track occlusion query locations in temp visibility buffer.
---
 .../MoltenVK/Commands/MVKCommandEncoderState.h   | 12 +++++++++++-
 .../MoltenVK/Commands/MVKCommandEncoderState.mm  | 16 +++++-----------
 2 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h
index 0608f367..572a732d 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h
@@ -614,7 +614,17 @@ public:
 protected:
     void encodeImpl(uint32_t) override;
 
-	MVKSmallVector<std::pair<MVKQuerySpec, NSUInteger>> _mtlRenderPassQueries;
+	typedef struct OcclusionQueryLocation {
+		MVKOcclusionQueryPool* queryPool = nullptr;
+		uint32_t query = 0;
+		NSUInteger visibilityBufferOffset = 0;
+
+		OcclusionQueryLocation(MVKOcclusionQueryPool* qPool, uint32_t qIdx, NSUInteger vbOfst)
+		: queryPool(qPool), query(qIdx), visibilityBufferOffset(vbOfst) {}
+
+	} OcclusionQueryLocation;
+
+	MVKSmallVector<OcclusionQueryLocation> _mtlRenderPassQueries;
     MTLVisibilityResultMode _mtlVisibilityResultMode = MTLVisibilityResultModeDisabled;
 };
 
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
index 8f9ed367..2f9312b8 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
@@ -1080,14 +1080,13 @@ void MVKOcclusionQueryCommandEncoderState::endMetalRenderPass() {
 	id<MTLComputePipelineState> mtlAccumState = _cmdEncoder->getCommandEncodingPool()->getAccumulateOcclusionQueryResultsMTLComputePipelineState();
     id<MTLComputeCommandEncoder> mtlAccumEncoder = _cmdEncoder->getMTLComputeEncoder(kMVKCommandUseAccumOcclusionQuery);
     [mtlAccumEncoder setComputePipelineState: mtlAccumState];
-    for (auto& query : _mtlRenderPassQueries) {
+    for (auto& qryLoc : _mtlRenderPassQueries) {
         // Accumulate the current results to the query pool's buffer.
-        auto* pQueryPool = (MVKOcclusionQueryPool*)query.first.queryPool;
-        [mtlAccumEncoder setBuffer: pQueryPool->getVisibilityResultMTLBuffer()
-                            offset: pQueryPool->getVisibilityResultOffset(query.first.query)
+        [mtlAccumEncoder setBuffer: qryLoc.queryPool->getVisibilityResultMTLBuffer()
+                            offset: qryLoc.queryPool->getVisibilityResultOffset(qryLoc.query)
                            atIndex: 0];
         [mtlAccumEncoder setBuffer: vizResultBuffer->_mtlBuffer
-                            offset: vizResultBuffer->_offset + query.second
+                            offset: vizResultBuffer->_offset + qryLoc.visibilityBufferOffset
                            atIndex: 1];
         [mtlAccumEncoder dispatchThreadgroups: MTLSizeMake(1, 1, 1)
                         threadsPerThreadgroup: MTLSizeMake(1, 1, 1)];
@@ -1097,14 +1096,9 @@ void MVKOcclusionQueryCommandEncoderState::endMetalRenderPass() {
 }
 
 void MVKOcclusionQueryCommandEncoderState::beginOcclusionQuery(MVKOcclusionQueryPool* pQueryPool, uint32_t query, VkQueryControlFlags flags) {
-
-    MVKQuerySpec querySpec;
-    querySpec.set(pQueryPool, query);
-    _mtlRenderPassQueries.push_back(make_pair(querySpec, _cmdEncoder->_pEncodingContext->mtlVisibilityResultOffset));
-
     bool shouldCount = _cmdEncoder->_pDeviceFeatures->occlusionQueryPrecise && mvkAreAllFlagsEnabled(flags, VK_QUERY_CONTROL_PRECISE_BIT);
     _mtlVisibilityResultMode = shouldCount ? MTLVisibilityResultModeCounting : MTLVisibilityResultModeBoolean;
-
+	_mtlRenderPassQueries.emplace_back(pQueryPool, query, _cmdEncoder->_pEncodingContext->mtlVisibilityResultOffset);
     markDirty();
 }
 

From 45ffcc9564d71fce38b04d74acfa713ef927947e Mon Sep 17 00:00:00 2001
From: Bill Hollings <bill.hollings@brenwill.com>
Date: Fri, 25 Jun 2021 08:46:52 -0400
Subject: [PATCH 4/4] Occlusion query fixes from PR review.

MVKOcclusionQueryCommandEncoderState::beginOcclusionQuery() check for visibility
buffer exhaustion, and if needed, log an error and disable further visibility
tracking for the remainder of the current MTLCommandBuffer.
Create visibility buffer if needed during MVKCommandEncoder::beginMetalRenderPass.
Simplify MVKCommandEncodingContext to PODS.
---
 MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h |  8 +----
 .../MoltenVK/Commands/MVKCommandBuffer.mm     | 31 +++----------------
 .../Commands/MVKCommandEncoderState.mm        | 29 +++++++++++------
 3 files changed, 25 insertions(+), 43 deletions(-)

diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
index fa3bb7f6..7d9c92f9 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
@@ -52,13 +52,7 @@ typedef uint64_t MVKMTLCommandBufferID;
 /** Context for tracking information across multiple encodings. */
 typedef struct MVKCommandEncodingContext {
 	NSUInteger mtlVisibilityResultOffset = 0;
-
-	void incrementMTLVisibilityResultOffset(MVKCommandEncoder* cmdEncoder);
-	const MVKMTLBufferAllocation* getVisibilityResultBuffer(MVKCommandEncoder* cmdEncoder);
-
-private:
-	const MVKMTLBufferAllocation* _visibilityResultBuffer = nullptr;
-
+	const MVKMTLBufferAllocation* visibilityResultBuffer = nullptr;
 } MVKCommandEncodingContext;
 
 
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
index 7225d938..18d24418 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
@@ -350,7 +350,10 @@ void MVKCommandEncoder::beginMetalRenderPass(bool loadOverride) {
 												  _isRenderingEntireAttachment,
 												  loadOverride);
 	if (_cmdBuffer->_needsVisibilityResultMTLBuffer) {
-		mtlRPDesc.visibilityResultBuffer = _pEncodingContext->getVisibilityResultBuffer(this)->_mtlBuffer;
+		if ( !_pEncodingContext->visibilityResultBuffer ) {
+			_pEncodingContext->visibilityResultBuffer = getTempMTLBuffer(_pDeviceMetalFeatures->maxQueryBufferSize, true, true);
+		}
+		mtlRPDesc.visibilityResultBuffer = _pEncodingContext->visibilityResultBuffer->_mtlBuffer;
 	}
 
 	VkExtent2D fbExtent = _framebufferExtent;
@@ -803,32 +806,6 @@ MVKCommandEncoder::MVKCommandEncoder(MVKCommandBuffer* cmdBuffer) : MVKBaseDevic
 }
 
 
-#pragma mark -
-#pragma mark MVKCommandEncodingContext
-
-// Increment to the next query slot offset. If we reach the size of the visibility buffer,
-// reset to retrieve and start filling another visibility buffer. This approach may still
-// cause Metal validation errors if the platform does not permit offsets to be reused
-// witin a MTLCommandBuffer, even when a different visibility buffer is used.
-// We don't test against the size of the visibility buffer itself, because this call may
-// arrive before the visibiltiy buffer in the case of a query that ends before the renderpass.
-void MVKCommandEncodingContext::incrementMTLVisibilityResultOffset(MVKCommandEncoder* cmdEncoder) {
-	mtlVisibilityResultOffset += kMVKQuerySlotSizeInBytes;
-
-	if (mtlVisibilityResultOffset + kMVKQuerySlotSizeInBytes > cmdEncoder->_pDeviceMetalFeatures->maxQueryBufferSize) {
-		_visibilityResultBuffer = nullptr;
-		mtlVisibilityResultOffset = 0;
-	}
-}
-
-const MVKMTLBufferAllocation* MVKCommandEncodingContext::getVisibilityResultBuffer(MVKCommandEncoder* cmdEncoder) {
-	if ( !_visibilityResultBuffer ) {
-		_visibilityResultBuffer = cmdEncoder->getTempMTLBuffer(cmdEncoder->_pDeviceMetalFeatures->maxQueryBufferSize, true, true);
-	}
-	return _visibilityResultBuffer;
-}
-
-
 #pragma mark -
 #pragma mark Support functions
 
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
index 2f9312b8..8093cab2 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
@@ -1073,10 +1073,9 @@ void MVKComputeResourcesCommandEncoderState::encodeArgumentBufferResourceUsage(M
 #pragma mark MVKOcclusionQueryCommandEncoderState
 
 void MVKOcclusionQueryCommandEncoderState::endMetalRenderPass() {
+	const MVKMTLBufferAllocation* vizBuff = _cmdEncoder->_pEncodingContext->visibilityResultBuffer;
+    if ( !vizBuff || _mtlRenderPassQueries.empty() ) { return; }  // Nothing to do.
 
-    if (_mtlRenderPassQueries.empty()) { return; }  // Nothing to do.
-
-	const MVKMTLBufferAllocation* vizResultBuffer = _cmdEncoder->_pEncodingContext->getVisibilityResultBuffer(_cmdEncoder);
 	id<MTLComputePipelineState> mtlAccumState = _cmdEncoder->getCommandEncodingPool()->getAccumulateOcclusionQueryResultsMTLComputePipelineState();
     id<MTLComputeCommandEncoder> mtlAccumEncoder = _cmdEncoder->getMTLComputeEncoder(kMVKCommandUseAccumOcclusionQuery);
     [mtlAccumEncoder setComputePipelineState: mtlAccumState];
@@ -1085,8 +1084,8 @@ void MVKOcclusionQueryCommandEncoderState::endMetalRenderPass() {
         [mtlAccumEncoder setBuffer: qryLoc.queryPool->getVisibilityResultMTLBuffer()
                             offset: qryLoc.queryPool->getVisibilityResultOffset(qryLoc.query)
                            atIndex: 0];
-        [mtlAccumEncoder setBuffer: vizResultBuffer->_mtlBuffer
-                            offset: vizResultBuffer->_offset + qryLoc.visibilityBufferOffset
+        [mtlAccumEncoder setBuffer: vizBuff->_mtlBuffer
+                            offset: vizBuff->_offset + qryLoc.visibilityBufferOffset
                            atIndex: 1];
         [mtlAccumEncoder dispatchThreadgroups: MTLSizeMake(1, 1, 1)
                         threadsPerThreadgroup: MTLSizeMake(1, 1, 1)];
@@ -1095,16 +1094,28 @@ void MVKOcclusionQueryCommandEncoderState::endMetalRenderPass() {
     _mtlRenderPassQueries.clear();
 }
 
+// The Metal visibility buffer has a finite size, and on some Metal platforms (looking at you M1),
+// query offsets cannnot be reused with the same MTLCommandBuffer. If enough occlusion queries are
+// begun within a single MTLCommandBuffer, it may exhaust the visibility buffer. If that occurs,
+// report an error and disable further visibility tracking for the remainder of the MTLCommandBuffer.
+// In most cases, a MTLCommandBuffer corresponds to a Vulkan command submit (VkSubmitInfo),
+// and so the error text is framed in terms of the Vulkan submit.
 void MVKOcclusionQueryCommandEncoderState::beginOcclusionQuery(MVKOcclusionQueryPool* pQueryPool, uint32_t query, VkQueryControlFlags flags) {
-    bool shouldCount = _cmdEncoder->_pDeviceFeatures->occlusionQueryPrecise && mvkAreAllFlagsEnabled(flags, VK_QUERY_CONTROL_PRECISE_BIT);
-    _mtlVisibilityResultMode = shouldCount ? MTLVisibilityResultModeCounting : MTLVisibilityResultModeBoolean;
-	_mtlRenderPassQueries.emplace_back(pQueryPool, query, _cmdEncoder->_pEncodingContext->mtlVisibilityResultOffset);
+	if (_cmdEncoder->_pEncodingContext->mtlVisibilityResultOffset + kMVKQuerySlotSizeInBytes <= _cmdEncoder->_pDeviceMetalFeatures->maxQueryBufferSize) {
+		bool shouldCount = _cmdEncoder->_pDeviceFeatures->occlusionQueryPrecise && mvkAreAllFlagsEnabled(flags, VK_QUERY_CONTROL_PRECISE_BIT);
+		_mtlVisibilityResultMode = shouldCount ? MTLVisibilityResultModeCounting : MTLVisibilityResultModeBoolean;
+		_mtlRenderPassQueries.emplace_back(pQueryPool, query, _cmdEncoder->_pEncodingContext->mtlVisibilityResultOffset);
+	} else {
+		reportError(VK_ERROR_OUT_OF_DEVICE_MEMORY, "vkCmdBeginQuery(): The maximum number of queries in a single Vulkan command submission is %llu.", _cmdEncoder->_pDeviceMetalFeatures->maxQueryBufferSize / kMVKQuerySlotSizeInBytes);
+		_mtlVisibilityResultMode = MTLVisibilityResultModeDisabled;
+		_cmdEncoder->_pEncodingContext->mtlVisibilityResultOffset -= kMVKQuerySlotSizeInBytes;
+	}
     markDirty();
 }
 
 void MVKOcclusionQueryCommandEncoderState::endOcclusionQuery(MVKOcclusionQueryPool* pQueryPool, uint32_t query) {
 	_mtlVisibilityResultMode = MTLVisibilityResultModeDisabled;
-	_cmdEncoder->_pEncodingContext->incrementMTLVisibilityResultOffset(_cmdEncoder);
+	_cmdEncoder->_pEncodingContext->mtlVisibilityResultOffset += kMVKQuerySlotSizeInBytes;
 	markDirty();
 }