From 55d206e058bb17f2c6fab718c939119e1f1a30f3 Mon Sep 17 00:00:00 2001
From: Chip Davis <cdavis@codeweavers.com>
Date: Sun, 31 Jan 2021 20:47:32 -0600
Subject: [PATCH] MVKCommandEncoder: Use the temp buffer mechanism for
 visibility buffers.

Add support for "dedicated" temp buffers, where instead of allocating a
big buffer and carving regions out of it, a unique buffer is returned
for each allocation request. This is necessary for visibility buffers,
because the offset passed to `-[MTLRenderCommandEncoder
setVisibilityResultMode:offset:]` cannot exceed an
implementation-defined value, currently 256k less 8 bytes for Mac family
2 on Catalina and up, and on Apple family 7; and 64k less 8 bytes
otherwise.
---
 MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h   |  6 +++---
 MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm  | 17 ++++-------------
 .../MoltenVK/Commands/MVKCommandEncoderState.mm |  2 +-
 .../MoltenVK/Commands/MVKCommandEncodingPool.h  |  3 ++-
 .../MoltenVK/Commands/MVKCommandEncodingPool.mm |  8 ++++++--
 .../MoltenVK/Commands/MVKMTLBufferAllocation.h  |  4 ++--
 .../MoltenVK/Commands/MVKMTLBufferAllocation.mm |  8 ++++----
 7 files changed, 22 insertions(+), 26 deletions(-)
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
index dd7df60a..8fef07aa 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
@@ -370,7 +370,7 @@ public:
     void setComputeBytes(id<MTLComputeCommandEncoder> mtlEncoder, const void* bytes, NSUInteger length, uint32_t mtlBuffIndex);
 
     /** Get a temporary MTLBuffer that will be returned to a pool after the command buffer is finished. */
-    const MVKMTLBufferAllocation* getTempMTLBuffer(NSUInteger length);
+    const MVKMTLBufferAllocation* getTempMTLBuffer(NSUInteger length, bool dedicated = false);
 
     /** Returns the command encoding pool. */
     MVKCommandEncodingPool* getCommandEncodingPool();
@@ -415,8 +415,8 @@ public:
 	/** The current Metal render encoder. */
 	id<MTLRenderCommandEncoder> _mtlRenderEncoder;
 
-	/** The buffer used to hold occlusion query results in this render pass. */
-	id<MTLBuffer> _visibilityResultMTLBuffer;
+	/** The buffer used to hold occlusion query results in a render pass. */
+	const MVKMTLBufferAllocation* _visibilityResultMTLBuffer;
 
     /** Tracks the current graphics pipeline bound to the encoder. */
     MVKPipelineCommandEncoderState _graphicsPipelineState;
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
index 6b41c3ad..392dd74f 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
@@ -336,18 +336,9 @@ void MVKCommandEncoder::beginMetalRenderPass(bool loadOverride) {
     getSubpass()->populateMTLRenderPassDescriptor(mtlRPDesc, _multiviewPassIndex, _framebuffer, _clearValues.contents(), _isRenderingEntireAttachment, loadOverride);
     if (_occlusionQueryState.getNeedsVisibilityResultMTLBuffer()) {
         if (!_visibilityResultMTLBuffer) {
-            // Unfortunately, the temp buffer mechanism tends to allocate large buffers and return offsets into them.
-            // This won't work with visibility buffers, particularly if the offset is greater than the maximum supported
-            // by the device. So we can't use that.
-            // Use a local variable to make sure it gets copied.
-            id<MTLBuffer> visibilityResultMTLBuffer = [getMTLDevice() newBufferWithLength: _pDeviceMetalFeatures->maxQueryBufferSize options: MTLResourceStorageModePrivate];    // not retained
-            [visibilityResultMTLBuffer setPurgeableState: MTLPurgeableStateVolatile];
-            [_mtlCmdBuffer addCompletedHandler: ^(id<MTLCommandBuffer>) {
-                [visibilityResultMTLBuffer release];
-            }];
-            _visibilityResultMTLBuffer = visibilityResultMTLBuffer;
+            _visibilityResultMTLBuffer = getTempMTLBuffer(_pDeviceMetalFeatures->maxQueryBufferSize, true);
         }
-        mtlRPDesc.visibilityResultBuffer = _visibilityResultMTLBuffer;
+        mtlRPDesc.visibilityResultBuffer = _visibilityResultMTLBuffer->_mtlBuffer;
     }
 
     VkExtent2D fbExtent = _framebuffer->getExtent2D();
@@ -655,8 +646,8 @@ void MVKCommandEncoder::setComputeBytes(id<MTLComputeCommandEncoder> mtlEncoder,
     }
 }
 
-const MVKMTLBufferAllocation* MVKCommandEncoder::getTempMTLBuffer(NSUInteger length) {
-    const MVKMTLBufferAllocation* mtlBuffAlloc = getCommandEncodingPool()->acquireMTLBufferAllocation(length);
+const MVKMTLBufferAllocation* MVKCommandEncoder::getTempMTLBuffer(NSUInteger length, bool isDedicated) {
+    const MVKMTLBufferAllocation* mtlBuffAlloc = getCommandEncodingPool()->acquireMTLBufferAllocation(length, isDedicated);
 	MVKMTLBufferAllocationPool* pool = mtlBuffAlloc->getPool();
 
     // Return the MTLBuffer allocation to the pool once the command buffer is done with it
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
index df213b72..f69b03fe 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
@@ -929,7 +929,7 @@ void MVKOcclusionQueryCommandEncoderState::endMetalRenderPass() {
         [mtlAccumEncoder setBuffer: pQueryPool->getVisibilityResultMTLBuffer()
                             offset: pQueryPool->getVisibilityResultOffset(query.first.query)
                            atIndex: 0];
-        [mtlAccumEncoder setBuffer: _cmdEncoder->_visibilityResultMTLBuffer
+        [mtlAccumEncoder setBuffer: _cmdEncoder->_visibilityResultMTLBuffer->_mtlBuffer
                             offset: query.second
                            atIndex: 1];
         [mtlAccumEncoder dispatchThreadgroups: MTLSizeMake(1, 1, 1)
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.h b/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.h
index 3a255f01..adaa9e0a 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.h
@@ -66,7 +66,7 @@ public:
      * To return the returned allocation back to the pool to be reused,
      * call the returnToPool() function on the returned allocation.
      */
-    const MVKMTLBufferAllocation* acquireMTLBufferAllocation(NSUInteger length);
+    const MVKMTLBufferAllocation* acquireMTLBufferAllocation(NSUInteger length, bool isDedicated = false);
 
 	/**
 	 * Returns a MTLRenderPipelineState dedicated to rendering to several attachments
@@ -153,6 +153,7 @@ protected:
     std::unordered_map<MVKBufferDescriptorData, MVKBuffer*> _transferBuffers;
     std::unordered_map<MVKBufferDescriptorData, MVKDeviceMemory*> _transferBufferMemory;
     MVKMTLBufferAllocator _mtlBufferAllocator;
+    MVKMTLBufferAllocator _dedicatedMtlBufferAllocator;
     id<MTLDepthStencilState> _cmdClearDepthOnlyDepthStencilState = nil;
     id<MTLDepthStencilState> _cmdClearStencilOnlyDepthStencilState = nil;
     id<MTLDepthStencilState> _cmdClearDepthAndStencilDepthStencilState = nil;
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.mm b/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.mm
index 03c25951..39efae65 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.mm
@@ -77,7 +77,10 @@ id<MTLDepthStencilState> MVKCommandEncodingPool::getMTLDepthStencilState(bool us
 	MVK_ENC_REZ_ACCESS(_cmdClearDefaultDepthStencilState, newMTLDepthStencilState(useDepth, useStencil));
 }
 
-const MVKMTLBufferAllocation* MVKCommandEncodingPool::acquireMTLBufferAllocation(NSUInteger length) {
+const MVKMTLBufferAllocation* MVKCommandEncodingPool::acquireMTLBufferAllocation(NSUInteger length, bool isDedicated) {
+    if (isDedicated) {
+        return _dedicatedMtlBufferAllocator.acquireMTLBufferRegion(length);
+    }
     return _mtlBufferAllocator.acquireMTLBufferRegion(length);
 }
 
@@ -159,7 +162,8 @@ void MVKCommandEncodingPool::clear() {
 #pragma mark Construction
 
 MVKCommandEncodingPool::MVKCommandEncodingPool(MVKCommandPool* commandPool) : _commandPool(commandPool),
-    _mtlBufferAllocator(commandPool->getDevice(), commandPool->getDevice()->_pMetalFeatures->maxMTLBufferSize, true) {
+    _mtlBufferAllocator(commandPool->getDevice(), commandPool->getDevice()->_pMetalFeatures->maxMTLBufferSize, true),
+    _dedicatedMtlBufferAllocator(commandPool->getDevice(), commandPool->getDevice()->_pMetalFeatures->maxQueryBufferSize, true, true) {
 }
 
 MVKCommandEncodingPool::~MVKCommandEncodingPool() {
diff --git a/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.h b/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.h
index 310db4a6..84ca1216 100644
--- a/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.h
+++ b/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.h
@@ -86,7 +86,7 @@ public:
     MVKMTLBufferAllocation* newObject() override;
 
     /** Configures this instance to dispense MVKMTLBufferAllocation instances of the specified size. */
-    MVKMTLBufferAllocationPool(MVKDevice* device, NSUInteger allocationLength);
+    MVKMTLBufferAllocationPool(MVKDevice* device, NSUInteger allocationLength, bool isDedicated);
 
     ~MVKMTLBufferAllocationPool() override;
 
@@ -137,7 +137,7 @@ public:
      * next power-of-two value that is at least as big as the specified maximum size.
 	 * If makeThreadSafe is true, a lock will be applied when an allocation is acquired.
      */
-    MVKMTLBufferAllocator(MVKDevice* device, NSUInteger maxRegionLength, bool makeThreadSafe = false);
+    MVKMTLBufferAllocator(MVKDevice* device, NSUInteger maxRegionLength, bool makeThreadSafe = false, bool isDedicated = false);
 
     ~MVKMTLBufferAllocator() override;
 
diff --git a/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.mm b/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.mm
index e1e9f7c2..b60b6409 100644
--- a/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.mm
@@ -50,11 +50,11 @@ void MVKMTLBufferAllocationPool::addMTLBuffer() {
 }
 
 
-MVKMTLBufferAllocationPool::MVKMTLBufferAllocationPool(MVKDevice* device, NSUInteger allocationLength)
+MVKMTLBufferAllocationPool::MVKMTLBufferAllocationPool(MVKDevice* device, NSUInteger allocationLength, bool isDedicated)
         : MVKObjectPool<MVKMTLBufferAllocation>(true) {
     _device = device;
     _allocationLength = allocationLength;
-    _mtlBufferLength = _allocationLength * calcMTLBufferAllocationCount();
+    _mtlBufferLength = _allocationLength * (isDedicated ? 1 : calcMTLBufferAllocationCount());
     _nextOffset = _mtlBufferLength;     // Force a MTLBuffer to be added on first access
 }
 
@@ -85,7 +85,7 @@ const MVKMTLBufferAllocation* MVKMTLBufferAllocator::acquireMTLBufferRegion(NSUI
 	return _makeThreadSafe ? pRP->acquireObjectSafely() : pRP->acquireObject();
 }
 
-MVKMTLBufferAllocator::MVKMTLBufferAllocator(MVKDevice* device, NSUInteger maxRegionLength, bool makeThreadSafe) : MVKBaseDeviceObject(device) {
+MVKMTLBufferAllocator::MVKMTLBufferAllocator(MVKDevice* device, NSUInteger maxRegionLength, bool makeThreadSafe, bool isDedicated) : MVKBaseDeviceObject(device) {
     _maxAllocationLength = maxRegionLength;
 	_makeThreadSafe = makeThreadSafe;
 
@@ -96,7 +96,7 @@ MVKMTLBufferAllocator::MVKMTLBufferAllocator(MVKDevice* device, NSUInteger maxRe
     _regionPools.reserve(maxP2Exp + 1);
     NSUInteger allocLen = 1;
     for (uint32_t p2Exp = 0; p2Exp <= maxP2Exp; p2Exp++) {
-        _regionPools.push_back(new MVKMTLBufferAllocationPool(device, allocLen));
+        _regionPools.push_back(new MVKMTLBufferAllocationPool(device, allocLen, isDedicated));
         allocLen <<= 1;
     }
 }