MVKMTLBufferAllocation: Support private temp buffers.

Some buffers, particularly those used by tessellation, indirect
multiview, or occlusion queries, are never written or read from the
host. Keeping the relevant data in VRAM should improve performance by
reducing the need for the GPU to make expensive accesses over the PCI
bus.

VRAM is a scarce resource on discrete GPUs, but this is part of why we
marked the buffers volatile.
This commit is contained in:
Chip Davis 2021-02-01 13:46:03 -06:00
parent 3058a130d1
commit c225c423ba
7 changed files with 44 additions and 36 deletions

View File

@ -141,7 +141,7 @@ void MVKCmdDraw::encode(MVKCommandEncoder* cmdEncoder) {
case kMVKGraphicsStageVertex: {
mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl);
if (pipeline->needsVertexOutputBuffer()) {
vtxOutBuff = cmdEncoder->getTempMTLBuffer(_vertexCount * _instanceCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents);
vtxOutBuff = cmdEncoder->getTempMTLBuffer(_vertexCount * _instanceCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents, true);
[mtlTessCtlEncoder setBuffer: vtxOutBuff->_mtlBuffer
offset: vtxOutBuff->_offset
atIndex: pipeline->getOutputBufferIndex().stages[kMVKShaderStageVertex]];
@ -171,18 +171,18 @@ void MVKCmdDraw::encode(MVKCommandEncoder* cmdEncoder) {
case kMVKGraphicsStageTessControl: {
mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl);
if (pipeline->needsTessCtlOutputBuffer()) {
tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * tessParams.patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerVertexOutputComponents);
tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * tessParams.patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerVertexOutputComponents, true);
[mtlTessCtlEncoder setBuffer: tcOutBuff->_mtlBuffer
offset: tcOutBuff->_offset
atIndex: pipeline->getOutputBufferIndex().stages[kMVKShaderStageTessCtl]];
}
if (pipeline->needsTessCtlPatchOutputBuffer()) {
tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(tessParams.patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerPatchOutputComponents);
tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(tessParams.patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerPatchOutputComponents, true);
[mtlTessCtlEncoder setBuffer: tcPatchOutBuff->_mtlBuffer
offset: tcPatchOutBuff->_offset
atIndex: pipeline->getTessCtlPatchOutputBufferIndex()];
}
tcLevelBuff = cmdEncoder->getTempMTLBuffer(tessParams.patchCount * sizeof(MTLQuadTessellationFactorsHalf));
tcLevelBuff = cmdEncoder->getTempMTLBuffer(tessParams.patchCount * sizeof(MTLQuadTessellationFactorsHalf), true);
[mtlTessCtlEncoder setBuffer: tcLevelBuff->_mtlBuffer
offset: tcLevelBuff->_offset
atIndex: pipeline->getTessCtlLevelBufferIndex()];
@ -340,7 +340,7 @@ void MVKCmdDrawIndexed::encode(MVKCommandEncoder* cmdEncoder) {
case kMVKGraphicsStageVertex: {
mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl);
if (pipeline->needsVertexOutputBuffer()) {
vtxOutBuff = cmdEncoder->getTempMTLBuffer(_indexCount * _instanceCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents);
vtxOutBuff = cmdEncoder->getTempMTLBuffer(_indexCount * _instanceCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents, true);
[mtlTessCtlEncoder setBuffer: vtxOutBuff->_mtlBuffer
offset: vtxOutBuff->_offset
atIndex: pipeline->getOutputBufferIndex().stages[kMVKShaderStageVertex]];
@ -373,18 +373,18 @@ void MVKCmdDrawIndexed::encode(MVKCommandEncoder* cmdEncoder) {
case kMVKGraphicsStageTessControl: {
mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl);
if (pipeline->needsTessCtlOutputBuffer()) {
tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * tessParams.patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerVertexOutputComponents);
tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * tessParams.patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerVertexOutputComponents, true);
[mtlTessCtlEncoder setBuffer: tcOutBuff->_mtlBuffer
offset: tcOutBuff->_offset
atIndex: pipeline->getOutputBufferIndex().stages[kMVKShaderStageTessCtl]];
}
if (pipeline->needsTessCtlPatchOutputBuffer()) {
tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(tessParams.patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerPatchOutputComponents);
tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(tessParams.patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerPatchOutputComponents, true);
[mtlTessCtlEncoder setBuffer: tcPatchOutBuff->_mtlBuffer
offset: tcPatchOutBuff->_offset
atIndex: pipeline->getTessCtlPatchOutputBufferIndex()];
}
tcLevelBuff = cmdEncoder->getTempMTLBuffer(tessParams.patchCount * sizeof(MTLQuadTessellationFactorsHalf));
tcLevelBuff = cmdEncoder->getTempMTLBuffer(tessParams.patchCount * sizeof(MTLQuadTessellationFactorsHalf), true);
[mtlTessCtlEncoder setBuffer: tcLevelBuff->_mtlBuffer
offset: tcLevelBuff->_offset
atIndex: pipeline->getTessCtlLevelBufferIndex()];
@ -554,21 +554,21 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) {
}
paramsIncr = std::max((size_t)cmdEncoder->getDevice()->_pProperties->limits.minUniformBufferOffsetAlignment, sizeof(uint32_t) * 2);
VkDeviceSize paramsSize = paramsIncr * _drawCount;
tempIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize);
tempIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize, true);
mtlIndBuff = tempIndirectBuff->_mtlBuffer;
mtlIndBuffOfst = tempIndirectBuff->_offset;
tcParamsBuff = cmdEncoder->getTempMTLBuffer(paramsSize);
tcParamsBuff = cmdEncoder->getTempMTLBuffer(paramsSize, true);
mtlParmBuffOfst = tcParamsBuff->_offset;
if (pipeline->needsVertexOutputBuffer()) {
vtxOutBuff = cmdEncoder->getTempMTLBuffer(vertexCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents);
vtxOutBuff = cmdEncoder->getTempMTLBuffer(vertexCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents, true);
}
if (pipeline->needsTessCtlOutputBuffer()) {
tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerVertexOutputComponents);
tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerVertexOutputComponents, true);
}
if (pipeline->needsTessCtlPatchOutputBuffer()) {
tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerPatchOutputComponents);
tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerPatchOutputComponents, true);
}
tcLevelBuff = cmdEncoder->getTempMTLBuffer(patchCount * sizeof(MTLQuadTessellationFactorsHalf));
tcLevelBuff = cmdEncoder->getTempMTLBuffer(patchCount * sizeof(MTLQuadTessellationFactorsHalf), true);
vtxThreadExecWidth = pipeline->getTessVertexStageState().threadExecutionWidth;
NSUInteger sgSize = pipeline->getTessControlStageState().threadExecutionWidth;
@ -580,7 +580,7 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) {
} else if (needsInstanceAdjustment) {
// In this case, we need to adjust the instance count for the views being drawn.
VkDeviceSize indirectSize = sizeof(MTLDrawPrimitivesIndirectArguments) * _drawCount;
tempIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize);
tempIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize, true);
mtlIndBuff = tempIndirectBuff->_mtlBuffer;
mtlIndBuffOfst = tempIndirectBuff->_offset;
}
@ -869,22 +869,22 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder) {
}
paramsIncr = std::max((size_t)cmdEncoder->getDevice()->_pProperties->limits.minUniformBufferOffsetAlignment, sizeof(uint32_t) * 2);
VkDeviceSize paramsSize = paramsIncr * _drawCount;
tempIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize);
tempIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize, true);
mtlIndBuff = tempIndirectBuff->_mtlBuffer;
mtlTempIndBuffOfst = tempIndirectBuff->_offset;
tcParamsBuff = cmdEncoder->getTempMTLBuffer(paramsSize);
tcParamsBuff = cmdEncoder->getTempMTLBuffer(paramsSize, true);
mtlParmBuffOfst = tcParamsBuff->_offset;
if (pipeline->needsVertexOutputBuffer()) {
vtxOutBuff = cmdEncoder->getTempMTLBuffer(vertexCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents);
vtxOutBuff = cmdEncoder->getTempMTLBuffer(vertexCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents, true);
}
if (pipeline->needsTessCtlOutputBuffer()) {
tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerVertexOutputComponents);
tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerVertexOutputComponents, true);
}
if (pipeline->needsTessCtlPatchOutputBuffer()) {
tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerPatchOutputComponents);
tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerPatchOutputComponents, true);
}
tcLevelBuff = cmdEncoder->getTempMTLBuffer(patchCount * sizeof(MTLQuadTessellationFactorsHalf));
vtxIndexBuff = cmdEncoder->getTempMTLBuffer(ibb.mtlBuffer.length);
tcLevelBuff = cmdEncoder->getTempMTLBuffer(patchCount * sizeof(MTLQuadTessellationFactorsHalf), true);
vtxIndexBuff = cmdEncoder->getTempMTLBuffer(ibb.mtlBuffer.length, true);
id<MTLComputePipelineState> vtxState;
vtxState = ibb.mtlIndexType == MTLIndexTypeUInt16 ? pipeline->getTessVertexStageIndex16State() : pipeline->getTessVertexStageIndex32State();
@ -899,7 +899,7 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder) {
} else if (needsInstanceAdjustment) {
// In this case, we need to adjust the instance count for the views being drawn.
VkDeviceSize indirectSize = sizeof(MTLDrawIndexedPrimitivesIndirectArguments) * _drawCount;
tempIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize);
tempIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize, true);
mtlIndBuff = tempIndirectBuff->_mtlBuffer;
mtlTempIndBuffOfst = tempIndirectBuff->_offset;
}

View File

@ -370,7 +370,7 @@ public:
void setComputeBytes(id<MTLComputeCommandEncoder> mtlEncoder, const void* bytes, NSUInteger length, uint32_t mtlBuffIndex);
/** Get a temporary MTLBuffer that will be returned to a pool after the command buffer is finished. */
const MVKMTLBufferAllocation* getTempMTLBuffer(NSUInteger length, bool dedicated = false);
const MVKMTLBufferAllocation* getTempMTLBuffer(NSUInteger length, bool isPrivate = false, bool isDedicated = false);
/** Returns the command encoding pool. */
MVKCommandEncodingPool* getCommandEncodingPool();

View File

@ -337,7 +337,7 @@ void MVKCommandEncoder::beginMetalRenderPass(bool loadOverride) {
getSubpass()->populateMTLRenderPassDescriptor(mtlRPDesc, _multiviewPassIndex, _framebuffer, _clearValues.contents(), _isRenderingEntireAttachment, loadOverride);
if (_cmdBuffer->_needsVisibilityResultMTLBuffer) {
if (!_visibilityResultMTLBuffer) {
_visibilityResultMTLBuffer = getTempMTLBuffer(_pDeviceMetalFeatures->maxQueryBufferSize, true);
_visibilityResultMTLBuffer = getTempMTLBuffer(_pDeviceMetalFeatures->maxQueryBufferSize, true, true);
}
mtlRPDesc.visibilityResultBuffer = _visibilityResultMTLBuffer->_mtlBuffer;
}
@ -647,8 +647,8 @@ void MVKCommandEncoder::setComputeBytes(id<MTLComputeCommandEncoder> mtlEncoder,
}
}
const MVKMTLBufferAllocation* MVKCommandEncoder::getTempMTLBuffer(NSUInteger length, bool isDedicated) {
const MVKMTLBufferAllocation* mtlBuffAlloc = getCommandEncodingPool()->acquireMTLBufferAllocation(length, isDedicated);
const MVKMTLBufferAllocation* MVKCommandEncoder::getTempMTLBuffer(NSUInteger length, bool isPrivate, bool isDedicated) {
const MVKMTLBufferAllocation* mtlBuffAlloc = getCommandEncodingPool()->acquireMTLBufferAllocation(length, isPrivate, isDedicated);
MVKMTLBufferAllocationPool* pool = mtlBuffAlloc->getPool();
// Return the MTLBuffer allocation to the pool once the command buffer is done with it

View File

@ -66,7 +66,7 @@ public:
* To return the returned allocation back to the pool to be reused,
* call the returnToPool() function on the returned allocation.
*/
const MVKMTLBufferAllocation* acquireMTLBufferAllocation(NSUInteger length, bool isDedicated = false);
const MVKMTLBufferAllocation* acquireMTLBufferAllocation(NSUInteger length, bool isPrivate = false, bool isDedicated = false);
/**
* Returns a MTLRenderPipelineState dedicated to rendering to several attachments
@ -153,6 +153,7 @@ protected:
std::unordered_map<MVKBufferDescriptorData, MVKBuffer*> _transferBuffers;
std::unordered_map<MVKBufferDescriptorData, MVKDeviceMemory*> _transferBufferMemory;
MVKMTLBufferAllocator _mtlBufferAllocator;
MVKMTLBufferAllocator _privateMtlBufferAllocator;
MVKMTLBufferAllocator _dedicatedMtlBufferAllocator;
id<MTLDepthStencilState> _cmdClearDepthOnlyDepthStencilState = nil;
id<MTLDepthStencilState> _cmdClearStencilOnlyDepthStencilState = nil;

View File

@ -77,10 +77,14 @@ id<MTLDepthStencilState> MVKCommandEncodingPool::getMTLDepthStencilState(bool us
MVK_ENC_REZ_ACCESS(_cmdClearDefaultDepthStencilState, newMTLDepthStencilState(useDepth, useStencil));
}
const MVKMTLBufferAllocation* MVKCommandEncodingPool::acquireMTLBufferAllocation(NSUInteger length, bool isDedicated) {
const MVKMTLBufferAllocation* MVKCommandEncodingPool::acquireMTLBufferAllocation(NSUInteger length, bool isPrivate, bool isDedicated) {
MVKAssert(isPrivate || !isDedicated, "Dedicated, host-shared temporary buffers are not supported.");
if (isDedicated) {
return _dedicatedMtlBufferAllocator.acquireMTLBufferRegion(length);
}
if (isPrivate) {
return _privateMtlBufferAllocator.acquireMTLBufferRegion(length);
}
return _mtlBufferAllocator.acquireMTLBufferRegion(length);
}
@ -163,7 +167,8 @@ void MVKCommandEncodingPool::clear() {
MVKCommandEncodingPool::MVKCommandEncodingPool(MVKCommandPool* commandPool) : _commandPool(commandPool),
_mtlBufferAllocator(commandPool->getDevice(), commandPool->getDevice()->_pMetalFeatures->maxMTLBufferSize, true),
_dedicatedMtlBufferAllocator(commandPool->getDevice(), commandPool->getDevice()->_pMetalFeatures->maxQueryBufferSize, true, true) {
_privateMtlBufferAllocator(commandPool->getDevice(), commandPool->getDevice()->_pMetalFeatures->maxMTLBufferSize, true, false, MTLStorageModePrivate),
_dedicatedMtlBufferAllocator(commandPool->getDevice(), commandPool->getDevice()->_pMetalFeatures->maxQueryBufferSize, true, true, MTLStorageModePrivate) {
}
MVKCommandEncodingPool::~MVKCommandEncodingPool() {

View File

@ -86,7 +86,7 @@ public:
MVKMTLBufferAllocation* newObject() override;
/** Configures this instance to dispense MVKMTLBufferAllocation instances of the specified size. */
MVKMTLBufferAllocationPool(MVKDevice* device, NSUInteger allocationLength, bool isDedicated);
MVKMTLBufferAllocationPool(MVKDevice* device, NSUInteger allocationLength, MTLStorageMode mtlStorageMode, bool isDedicated);
~MVKMTLBufferAllocationPool() override;
@ -97,6 +97,7 @@ protected:
NSUInteger _nextOffset;
NSUInteger _allocationLength;
NSUInteger _mtlBufferLength;
MTLStorageMode _mtlStorageMode;
MVKSmallVector<id<MTLBuffer>, 64> _mtlBuffers;
MVKDevice* _device;
};
@ -137,7 +138,7 @@ public:
* next power-of-two value that is at least as big as the specified maximum size.
* If makeThreadSafe is true, a lock will be applied when an allocation is acquired.
*/
MVKMTLBufferAllocator(MVKDevice* device, NSUInteger maxRegionLength, bool makeThreadSafe = false, bool isDedicated = false);
MVKMTLBufferAllocator(MVKDevice* device, NSUInteger maxRegionLength, bool makeThreadSafe = false, bool isDedicated = false, MTLStorageMode mtlStorageMode = MTLStorageModeShared);
~MVKMTLBufferAllocator() override;

View File

@ -44,17 +44,18 @@ MVKMTLBufferAllocation* MVKMTLBufferAllocationPool::newObject() {
// Adds a new MTLBuffer to the buffer pool and resets the next offset to the start of it
void MVKMTLBufferAllocationPool::addMTLBuffer() {
MTLResourceOptions mbOpts = MTLResourceStorageModeShared | MTLResourceCPUCacheModeDefaultCache;
MTLResourceOptions mbOpts = (_mtlStorageMode << MTLResourceStorageModeShift) | MTLResourceCPUCacheModeDefaultCache;
_mtlBuffers.push_back([_device->getMTLDevice() newBufferWithLength: _mtlBufferLength options: mbOpts]);
_nextOffset = 0;
}
MVKMTLBufferAllocationPool::MVKMTLBufferAllocationPool(MVKDevice* device, NSUInteger allocationLength, bool isDedicated)
MVKMTLBufferAllocationPool::MVKMTLBufferAllocationPool(MVKDevice* device, NSUInteger allocationLength, MTLStorageMode mtlStorageMode, bool isDedicated)
: MVKObjectPool<MVKMTLBufferAllocation>(true) {
_device = device;
_allocationLength = allocationLength;
_mtlBufferLength = _allocationLength * (isDedicated ? 1 : calcMTLBufferAllocationCount());
_mtlStorageMode = mtlStorageMode;
_nextOffset = _mtlBufferLength; // Force a MTLBuffer to be added on first access
}
@ -89,7 +90,7 @@ const MVKMTLBufferAllocation* MVKMTLBufferAllocator::acquireMTLBufferRegion(NSUI
return region;
}
MVKMTLBufferAllocator::MVKMTLBufferAllocator(MVKDevice* device, NSUInteger maxRegionLength, bool makeThreadSafe, bool isDedicated) : MVKBaseDeviceObject(device) {
MVKMTLBufferAllocator::MVKMTLBufferAllocator(MVKDevice* device, NSUInteger maxRegionLength, bool makeThreadSafe, bool isDedicated, MTLStorageMode mtlStorageMode) : MVKBaseDeviceObject(device) {
_maxAllocationLength = maxRegionLength;
_makeThreadSafe = makeThreadSafe;
@ -100,7 +101,7 @@ MVKMTLBufferAllocator::MVKMTLBufferAllocator(MVKDevice* device, NSUInteger maxRe
_regionPools.reserve(maxP2Exp + 1);
NSUInteger allocLen = 1;
for (uint32_t p2Exp = 0; p2Exp <= maxP2Exp; p2Exp++) {
_regionPools.push_back(new MVKMTLBufferAllocationPool(device, allocLen, isDedicated));
_regionPools.push_back(new MVKMTLBufferAllocationPool(device, allocLen, mtlStorageMode, isDedicated));
allocLen <<= 1;
}
}