From 55d206e058bb17f2c6fab718c939119e1f1a30f3 Mon Sep 17 00:00:00 2001 From: Chip Davis Date: Sun, 31 Jan 2021 20:47:32 -0600 Subject: [PATCH] MVKCommandEncoder: Use the temp buffer mechanism for visibility buffers. Add support for "dedicated" temp buffers, where instead of allocating a big buffer and carving regions out of it, a unique buffer is returned for each allocation request. This is necessary for visibility buffers, because the offset passed to `-[MTLRenderCommandEncoder setVisibilityResultMode:offset:]` cannot exceed an implementation-defined value, currently 256k less 8 bytes for Mac family 2 on Catalina and up, and on Apple family 7; and 64k less 8 bytes otherwise. --- MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h | 6 +++--- MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm | 17 ++++------------- .../MoltenVK/Commands/MVKCommandEncoderState.mm | 2 +- .../MoltenVK/Commands/MVKCommandEncodingPool.h | 3 ++- .../MoltenVK/Commands/MVKCommandEncodingPool.mm | 8 ++++++-- .../MoltenVK/Commands/MVKMTLBufferAllocation.h | 4 ++-- .../MoltenVK/Commands/MVKMTLBufferAllocation.mm | 8 ++++---- 7 files changed, 22 insertions(+), 26 deletions(-) diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h index dd7df60a..8fef07aa 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h +++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h @@ -370,7 +370,7 @@ public: void setComputeBytes(id mtlEncoder, const void* bytes, NSUInteger length, uint32_t mtlBuffIndex); /** Get a temporary MTLBuffer that will be returned to a pool after the command buffer is finished. */ - const MVKMTLBufferAllocation* getTempMTLBuffer(NSUInteger length); + const MVKMTLBufferAllocation* getTempMTLBuffer(NSUInteger length, bool dedicated = false); /** Returns the command encoding pool. */ MVKCommandEncodingPool* getCommandEncodingPool(); @@ -415,8 +415,8 @@ public: /** The current Metal render encoder. */ id _mtlRenderEncoder; - /** The buffer used to hold occlusion query results in this render pass. */ - id _visibilityResultMTLBuffer; + /** The buffer used to hold occlusion query results in a render pass. */ + const MVKMTLBufferAllocation* _visibilityResultMTLBuffer; /** Tracks the current graphics pipeline bound to the encoder. */ MVKPipelineCommandEncoderState _graphicsPipelineState; diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm index 6b41c3ad..392dd74f 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm @@ -336,18 +336,9 @@ void MVKCommandEncoder::beginMetalRenderPass(bool loadOverride) { getSubpass()->populateMTLRenderPassDescriptor(mtlRPDesc, _multiviewPassIndex, _framebuffer, _clearValues.contents(), _isRenderingEntireAttachment, loadOverride); if (_occlusionQueryState.getNeedsVisibilityResultMTLBuffer()) { if (!_visibilityResultMTLBuffer) { - // Unfortunately, the temp buffer mechanism tends to allocate large buffers and return offsets into them. - // This won't work with visibility buffers, particularly if the offset is greater than the maximum supported - // by the device. So we can't use that. - // Use a local variable to make sure it gets copied. - id visibilityResultMTLBuffer = [getMTLDevice() newBufferWithLength: _pDeviceMetalFeatures->maxQueryBufferSize options: MTLResourceStorageModePrivate]; // not retained - [visibilityResultMTLBuffer setPurgeableState: MTLPurgeableStateVolatile]; - [_mtlCmdBuffer addCompletedHandler: ^(id) { - [visibilityResultMTLBuffer release]; - }]; - _visibilityResultMTLBuffer = visibilityResultMTLBuffer; + _visibilityResultMTLBuffer = getTempMTLBuffer(_pDeviceMetalFeatures->maxQueryBufferSize, true); } - mtlRPDesc.visibilityResultBuffer = _visibilityResultMTLBuffer; + mtlRPDesc.visibilityResultBuffer = _visibilityResultMTLBuffer->_mtlBuffer; } VkExtent2D fbExtent = _framebuffer->getExtent2D(); @@ -655,8 +646,8 @@ void MVKCommandEncoder::setComputeBytes(id mtlEncoder, } } -const MVKMTLBufferAllocation* MVKCommandEncoder::getTempMTLBuffer(NSUInteger length) { - const MVKMTLBufferAllocation* mtlBuffAlloc = getCommandEncodingPool()->acquireMTLBufferAllocation(length); +const MVKMTLBufferAllocation* MVKCommandEncoder::getTempMTLBuffer(NSUInteger length, bool isDedicated) { + const MVKMTLBufferAllocation* mtlBuffAlloc = getCommandEncodingPool()->acquireMTLBufferAllocation(length, isDedicated); MVKMTLBufferAllocationPool* pool = mtlBuffAlloc->getPool(); // Return the MTLBuffer allocation to the pool once the command buffer is done with it diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm index df213b72..f69b03fe 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm @@ -929,7 +929,7 @@ void MVKOcclusionQueryCommandEncoderState::endMetalRenderPass() { [mtlAccumEncoder setBuffer: pQueryPool->getVisibilityResultMTLBuffer() offset: pQueryPool->getVisibilityResultOffset(query.first.query) atIndex: 0]; - [mtlAccumEncoder setBuffer: _cmdEncoder->_visibilityResultMTLBuffer + [mtlAccumEncoder setBuffer: _cmdEncoder->_visibilityResultMTLBuffer->_mtlBuffer offset: query.second atIndex: 1]; [mtlAccumEncoder dispatchThreadgroups: MTLSizeMake(1, 1, 1) diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.h b/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.h index 3a255f01..adaa9e0a 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.h +++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.h @@ -66,7 +66,7 @@ public: * To return the returned allocation back to the pool to be reused, * call the returnToPool() function on the returned allocation. */ - const MVKMTLBufferAllocation* acquireMTLBufferAllocation(NSUInteger length); + const MVKMTLBufferAllocation* acquireMTLBufferAllocation(NSUInteger length, bool isDedicated = false); /** * Returns a MTLRenderPipelineState dedicated to rendering to several attachments @@ -153,6 +153,7 @@ protected: std::unordered_map _transferBuffers; std::unordered_map _transferBufferMemory; MVKMTLBufferAllocator _mtlBufferAllocator; + MVKMTLBufferAllocator _dedicatedMtlBufferAllocator; id _cmdClearDepthOnlyDepthStencilState = nil; id _cmdClearStencilOnlyDepthStencilState = nil; id _cmdClearDepthAndStencilDepthStencilState = nil; diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.mm b/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.mm index 03c25951..39efae65 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.mm @@ -77,7 +77,10 @@ id MVKCommandEncodingPool::getMTLDepthStencilState(bool us MVK_ENC_REZ_ACCESS(_cmdClearDefaultDepthStencilState, newMTLDepthStencilState(useDepth, useStencil)); } -const MVKMTLBufferAllocation* MVKCommandEncodingPool::acquireMTLBufferAllocation(NSUInteger length) { +const MVKMTLBufferAllocation* MVKCommandEncodingPool::acquireMTLBufferAllocation(NSUInteger length, bool isDedicated) { + if (isDedicated) { + return _dedicatedMtlBufferAllocator.acquireMTLBufferRegion(length); + } return _mtlBufferAllocator.acquireMTLBufferRegion(length); } @@ -159,7 +162,8 @@ void MVKCommandEncodingPool::clear() { #pragma mark Construction MVKCommandEncodingPool::MVKCommandEncodingPool(MVKCommandPool* commandPool) : _commandPool(commandPool), - _mtlBufferAllocator(commandPool->getDevice(), commandPool->getDevice()->_pMetalFeatures->maxMTLBufferSize, true) { + _mtlBufferAllocator(commandPool->getDevice(), commandPool->getDevice()->_pMetalFeatures->maxMTLBufferSize, true), + _dedicatedMtlBufferAllocator(commandPool->getDevice(), commandPool->getDevice()->_pMetalFeatures->maxQueryBufferSize, true, true) { } MVKCommandEncodingPool::~MVKCommandEncodingPool() { diff --git a/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.h b/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.h index 310db4a6..84ca1216 100644 --- a/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.h +++ b/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.h @@ -86,7 +86,7 @@ public: MVKMTLBufferAllocation* newObject() override; /** Configures this instance to dispense MVKMTLBufferAllocation instances of the specified size. */ - MVKMTLBufferAllocationPool(MVKDevice* device, NSUInteger allocationLength); + MVKMTLBufferAllocationPool(MVKDevice* device, NSUInteger allocationLength, bool isDedicated); ~MVKMTLBufferAllocationPool() override; @@ -137,7 +137,7 @@ public: * next power-of-two value that is at least as big as the specified maximum size. * If makeThreadSafe is true, a lock will be applied when an allocation is acquired. */ - MVKMTLBufferAllocator(MVKDevice* device, NSUInteger maxRegionLength, bool makeThreadSafe = false); + MVKMTLBufferAllocator(MVKDevice* device, NSUInteger maxRegionLength, bool makeThreadSafe = false, bool isDedicated = false); ~MVKMTLBufferAllocator() override; diff --git a/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.mm b/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.mm index e1e9f7c2..b60b6409 100644 --- a/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.mm +++ b/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.mm @@ -50,11 +50,11 @@ void MVKMTLBufferAllocationPool::addMTLBuffer() { } -MVKMTLBufferAllocationPool::MVKMTLBufferAllocationPool(MVKDevice* device, NSUInteger allocationLength) +MVKMTLBufferAllocationPool::MVKMTLBufferAllocationPool(MVKDevice* device, NSUInteger allocationLength, bool isDedicated) : MVKObjectPool(true) { _device = device; _allocationLength = allocationLength; - _mtlBufferLength = _allocationLength * calcMTLBufferAllocationCount(); + _mtlBufferLength = _allocationLength * (isDedicated ? 1 : calcMTLBufferAllocationCount()); _nextOffset = _mtlBufferLength; // Force a MTLBuffer to be added on first access } @@ -85,7 +85,7 @@ const MVKMTLBufferAllocation* MVKMTLBufferAllocator::acquireMTLBufferRegion(NSUI return _makeThreadSafe ? pRP->acquireObjectSafely() : pRP->acquireObject(); } -MVKMTLBufferAllocator::MVKMTLBufferAllocator(MVKDevice* device, NSUInteger maxRegionLength, bool makeThreadSafe) : MVKBaseDeviceObject(device) { +MVKMTLBufferAllocator::MVKMTLBufferAllocator(MVKDevice* device, NSUInteger maxRegionLength, bool makeThreadSafe, bool isDedicated) : MVKBaseDeviceObject(device) { _maxAllocationLength = maxRegionLength; _makeThreadSafe = makeThreadSafe; @@ -96,7 +96,7 @@ MVKMTLBufferAllocator::MVKMTLBufferAllocator(MVKDevice* device, NSUInteger maxRe _regionPools.reserve(maxP2Exp + 1); NSUInteger allocLen = 1; for (uint32_t p2Exp = 0; p2Exp <= maxP2Exp; p2Exp++) { - _regionPools.push_back(new MVKMTLBufferAllocationPool(device, allocLen)); + _regionPools.push_back(new MVKMTLBufferAllocationPool(device, allocLen, isDedicated)); allocLen <<= 1; } }