Merge pull request #711 from cdavis5e/metal3-placement-heaps
Use placement heaps for VkDeviceMemory when possible.
This commit is contained in:
commit
69b238f95e
@ -542,6 +542,7 @@ typedef struct {
|
||||
VkBool32 postDepthCoverage; /**< If true, coverage masks in fragment shaders post-depth-test are supported. */
|
||||
VkBool32 native3DCompressedTextures; /**< If true, 3D compressed images are supported natively, without manual decompression. */
|
||||
VkBool32 nativeTextureSwizzle; /**< If true, component swizzle is supported natively, without manual swizzling in shaders. */
|
||||
VkBool32 placementHeaps; /**< If true, MTLHeap objects support placement of resources. */
|
||||
} MVKPhysicalDeviceMetalFeatures;
|
||||
|
||||
/**
|
||||
|
@ -69,10 +69,10 @@ public:
|
||||
#pragma mark Metal
|
||||
|
||||
/** Returns the Metal buffer underlying this memory allocation. */
|
||||
inline id<MTLBuffer> getMTLBuffer() { return _deviceMemory ? _deviceMemory->getMTLBuffer() : nullptr; }
|
||||
id<MTLBuffer> getMTLBuffer();
|
||||
|
||||
/** Returns the offset at which the contents of this instance starts within the underlying Metal buffer. */
|
||||
inline NSUInteger getMTLBufferOffset() { return _deviceMemoryOffset; }
|
||||
inline NSUInteger getMTLBufferOffset() { return _deviceMemory && _deviceMemory->getMTLHeap() ? 0 : _deviceMemoryOffset; }
|
||||
|
||||
|
||||
#pragma mark Construction
|
||||
@ -90,6 +90,7 @@ protected:
|
||||
VkBufferMemoryBarrier* pBufferMemoryBarrier);
|
||||
|
||||
VkBufferUsageFlags _usage;
|
||||
id<MTLBuffer> _mtlBuffer = nil;
|
||||
};
|
||||
|
||||
|
||||
|
@ -29,21 +29,28 @@ using namespace std;
|
||||
#pragma mark MVKBuffer
|
||||
|
||||
void MVKBuffer::propogateDebugName() {
|
||||
if (_debugName &&
|
||||
_deviceMemory &&
|
||||
if (!_debugName) { return; }
|
||||
if (_deviceMemory &&
|
||||
_deviceMemory->isDedicatedAllocation() &&
|
||||
_deviceMemory->_debugName.length == 0) {
|
||||
|
||||
_deviceMemory->setDebugName(_debugName.UTF8String);
|
||||
}
|
||||
setLabelIfNotNil(_mtlBuffer, _debugName);
|
||||
}
|
||||
|
||||
|
||||
#pragma mark Resource memory
|
||||
|
||||
VkResult MVKBuffer::getMemoryRequirements(VkMemoryRequirements* pMemoryRequirements) {
|
||||
pMemoryRequirements->size = getByteCount();
|
||||
pMemoryRequirements->alignment = _byteAlignment;
|
||||
if (_device->_pMetalFeatures->placementHeaps) {
|
||||
MTLSizeAndAlign sizeAndAlign = [_device->getMTLDevice() heapBufferSizeAndAlignWithLength: getByteCount() options: MTLResourceStorageModePrivate];
|
||||
pMemoryRequirements->size = sizeAndAlign.size;
|
||||
pMemoryRequirements->alignment = sizeAndAlign.align;
|
||||
} else {
|
||||
pMemoryRequirements->size = getByteCount();
|
||||
pMemoryRequirements->alignment = _byteAlignment;
|
||||
}
|
||||
pMemoryRequirements->memoryTypeBits = _device->getPhysicalDevice()->getAllMemoryTypes();
|
||||
#if MVK_MACOS
|
||||
// Textures must not use shared memory
|
||||
@ -61,21 +68,15 @@ VkResult MVKBuffer::getMemoryRequirements(VkMemoryRequirements* pMemoryRequireme
|
||||
VkResult MVKBuffer::getMemoryRequirements(const void*, VkMemoryRequirements2* pMemoryRequirements) {
|
||||
pMemoryRequirements->sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2;
|
||||
getMemoryRequirements(&pMemoryRequirements->memoryRequirements);
|
||||
auto* next = (VkStructureType*)pMemoryRequirements->pNext;
|
||||
while (next) {
|
||||
switch (*next) {
|
||||
for (auto* next = (VkBaseOutStructure*)pMemoryRequirements->pNext; next; next = next->pNext) {
|
||||
switch (next->sType) {
|
||||
case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
|
||||
auto* dedicatedReqs = (VkMemoryDedicatedRequirements*)next;
|
||||
// TODO: Maybe someday we could do something with MTLHeaps
|
||||
// and allocate non-dedicated memory from them. For now, we
|
||||
// always prefer dedicated allocations.
|
||||
dedicatedReqs->prefersDedicatedAllocation = VK_TRUE;
|
||||
dedicatedReqs->prefersDedicatedAllocation = VK_FALSE;
|
||||
dedicatedReqs->requiresDedicatedAllocation = VK_FALSE;
|
||||
next = (VkStructureType*)dedicatedReqs->pNext;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
next = (VkStructureType*)((VkMemoryRequirements2*)next)->pNext;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -134,6 +135,25 @@ bool MVKBuffer::needsHostReadSync(VkPipelineStageFlags srcStageMask,
|
||||
}
|
||||
|
||||
|
||||
#pragma mark Metal
|
||||
|
||||
id<MTLBuffer> MVKBuffer::getMTLBuffer() {
|
||||
if (_mtlBuffer) { return _mtlBuffer; }
|
||||
if (_deviceMemory) {
|
||||
if (_deviceMemory->getMTLHeap()) {
|
||||
_mtlBuffer = [_deviceMemory->getMTLHeap() newBufferWithLength: getByteCount()
|
||||
options: _deviceMemory->getMTLResourceOptions()
|
||||
offset: _deviceMemoryOffset]; // retained
|
||||
propogateDebugName();
|
||||
return _mtlBuffer;
|
||||
} else {
|
||||
return _deviceMemory->getMTLBuffer();
|
||||
}
|
||||
}
|
||||
return nil;
|
||||
}
|
||||
|
||||
|
||||
#pragma mark Construction
|
||||
|
||||
MVKBuffer::MVKBuffer(MVKDevice* device, const VkBufferCreateInfo* pCreateInfo) : MVKResource(device), _usage(pCreateInfo->usage) {
|
||||
@ -143,6 +163,7 @@ MVKBuffer::MVKBuffer(MVKDevice* device, const VkBufferCreateInfo* pCreateInfo) :
|
||||
|
||||
MVKBuffer::~MVKBuffer() {
|
||||
if (_deviceMemory) { _deviceMemory->removeBuffer(this); }
|
||||
if (_mtlBuffer) { [_mtlBuffer release]; }
|
||||
}
|
||||
|
||||
|
||||
|
@ -840,6 +840,7 @@ void MVKPhysicalDevice::initMetalFeatures() {
|
||||
|
||||
if ( mvkOSVersion() >= 13.0 ) {
|
||||
_metalFeatures.mslVersionEnum = MTLLanguageVersion2_2;
|
||||
_metalFeatures.placementHeaps = true;
|
||||
if ( getSupportsGPUFamily(MTLGPUFamilyApple4) ) {
|
||||
_metalFeatures.nativeTextureSwizzle = true;
|
||||
}
|
||||
@ -894,6 +895,7 @@ void MVKPhysicalDevice::initMetalFeatures() {
|
||||
_metalFeatures.native3DCompressedTextures = true;
|
||||
if ( getSupportsGPUFamily(MTLGPUFamilyMac2) ) {
|
||||
_metalFeatures.nativeTextureSwizzle = true;
|
||||
_metalFeatures.placementHeaps = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -94,6 +94,9 @@ public:
|
||||
/** Returns the Metal buffer underlying this memory allocation. */
|
||||
inline id<MTLBuffer> getMTLBuffer() { return _mtlBuffer; }
|
||||
|
||||
/** Returns the Metal heap underlying this memory allocation. */
|
||||
inline id<MTLHeap> getMTLHeap() { return _mtlHeap; }
|
||||
|
||||
/** Returns the Metal storage mode used by this memory allocation. */
|
||||
inline MTLStorageMode getMTLStorageMode() { return _mtlStorageMode; }
|
||||
|
||||
@ -123,6 +126,7 @@ protected:
|
||||
void removeBuffer(MVKBuffer* mvkBuff);
|
||||
VkResult addImage(MVKImage* mvkImg);
|
||||
void removeImage(MVKImage* mvkImg);
|
||||
bool ensureMTLHeap();
|
||||
bool ensureMTLBuffer();
|
||||
bool ensureHostMemory();
|
||||
void freeHostMemory();
|
||||
@ -135,6 +139,7 @@ protected:
|
||||
VkDeviceSize _mapOffset = 0;
|
||||
VkDeviceSize _mapSize = 0;
|
||||
id<MTLBuffer> _mtlBuffer = nil;
|
||||
id<MTLHeap> _mtlHeap = nil;
|
||||
void* _pMemory = nullptr;
|
||||
void* _pHostMemory = nullptr;
|
||||
bool _isMapped = false;
|
||||
|
@ -31,7 +31,10 @@ using namespace std;
|
||||
|
||||
#pragma mark MVKDeviceMemory
|
||||
|
||||
void MVKDeviceMemory::propogateDebugName() { setLabelIfNotNil(_mtlBuffer, _debugName); }
|
||||
void MVKDeviceMemory::propogateDebugName() {
|
||||
setLabelIfNotNil(_mtlHeap, _debugName);
|
||||
setLabelIfNotNil(_mtlBuffer, _debugName);
|
||||
}
|
||||
|
||||
VkResult MVKDeviceMemory::map(VkDeviceSize offset, VkDeviceSize size, VkMemoryMapFlags flags, void** ppData) {
|
||||
|
||||
@ -85,8 +88,11 @@ VkResult MVKDeviceMemory::flushToDevice(VkDeviceSize offset, VkDeviceSize size,
|
||||
}
|
||||
#endif
|
||||
|
||||
lock_guard<mutex> lock(_rezLock);
|
||||
for (auto& img : _images) { img->flushToDevice(offset, memSize); }
|
||||
// If we have an MTLHeap object, there's no need to sync memory manually between images and the buffer.
|
||||
if (!_mtlHeap) {
|
||||
lock_guard<mutex> lock(_rezLock);
|
||||
for (auto& img : _images) { img->flushToDevice(offset, memSize); }
|
||||
}
|
||||
}
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
@ -94,7 +100,7 @@ VkResult MVKDeviceMemory::flushToDevice(VkDeviceSize offset, VkDeviceSize size,
|
||||
VkResult MVKDeviceMemory::pullFromDevice(VkDeviceSize offset, VkDeviceSize size, bool evenIfCoherent) {
|
||||
// Coherent memory is flushed on unmap(), so it is only flushed if forced
|
||||
VkDeviceSize memSize = adjustMemorySize(size, offset);
|
||||
if (memSize > 0 && isMemoryHostAccessible() && (evenIfCoherent || !isMemoryHostCoherent()) ) {
|
||||
if (memSize > 0 && isMemoryHostAccessible() && (evenIfCoherent || !isMemoryHostCoherent()) && !_mtlHeap) {
|
||||
lock_guard<mutex> lock(_rezLock);
|
||||
for (auto& img : _images) { img->pullFromDevice(offset, memSize); }
|
||||
}
|
||||
@ -140,8 +146,7 @@ VkResult MVKDeviceMemory::addImage(MVKImage* mvkImg) {
|
||||
return reportError(VK_ERROR_OUT_OF_DEVICE_MEMORY, "Could not bind VkImage %p to a VkDeviceMemory dedicated to resource %p. A dedicated allocation may only be used with the resource it was dedicated to.", mvkImg, getDedicatedResource() );
|
||||
}
|
||||
|
||||
if (!_isDedicated)
|
||||
_images.push_back(mvkImg);
|
||||
if (!_isDedicated) { _images.push_back(mvkImg); }
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
@ -151,6 +156,36 @@ void MVKDeviceMemory::removeImage(MVKImage* mvkImg) {
|
||||
mvkRemoveAllOccurances(_images, mvkImg);
|
||||
}
|
||||
|
||||
// Ensures that this instance is backed by a MTLHeap object,
|
||||
// creating the MTLHeap if needed, and returns whether it was successful.
|
||||
bool MVKDeviceMemory::ensureMTLHeap() {
|
||||
|
||||
if (_mtlHeap) { return true; }
|
||||
|
||||
// Don't bother if we don't have placement heaps.
|
||||
if (!getDevice()->_pMetalFeatures->placementHeaps) { return true; }
|
||||
|
||||
#if MVK_MACOS
|
||||
// MTLHeaps on Mac must use private storage for now.
|
||||
if (_mtlStorageMode != MTLStorageModePrivate) { return true; }
|
||||
#endif
|
||||
|
||||
MTLHeapDescriptor* heapDesc = [MTLHeapDescriptor new];
|
||||
heapDesc.type = MTLHeapTypePlacement;
|
||||
heapDesc.resourceOptions = getMTLResourceOptions();
|
||||
// For now, use tracked resources. Later, we should probably default
|
||||
// to untracked, since Vulkan uses explicit barriers anyway.
|
||||
heapDesc.hazardTrackingMode = MTLHazardTrackingModeTracked;
|
||||
heapDesc.size = _allocationSize;
|
||||
_mtlHeap = [_device->getMTLDevice() newHeapWithDescriptor: heapDesc]; // retained
|
||||
[heapDesc release];
|
||||
if (!_mtlHeap) { return false; }
|
||||
|
||||
propogateDebugName();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Ensures that this instance is backed by a MTLBuffer object,
|
||||
// creating the MTLBuffer if needed, and returns whether it was successful.
|
||||
bool MVKDeviceMemory::ensureMTLBuffer() {
|
||||
@ -162,12 +197,20 @@ bool MVKDeviceMemory::ensureMTLBuffer() {
|
||||
if (memLen > _device->_pMetalFeatures->maxMTLBufferSize) { return false; }
|
||||
|
||||
// If host memory was already allocated, it is copied into the new MTLBuffer, and then released.
|
||||
if (_pHostMemory) {
|
||||
if (_mtlHeap) {
|
||||
_mtlBuffer = [_mtlHeap newBufferWithLength: memLen options: getMTLResourceOptions() offset: 0]; // retained
|
||||
if (_pHostMemory) {
|
||||
memcpy(_mtlBuffer.contents, _pHostMemory, memLen);
|
||||
freeHostMemory();
|
||||
}
|
||||
[_mtlBuffer makeAliasable];
|
||||
} else if (_pHostMemory) {
|
||||
_mtlBuffer = [getMTLDevice() newBufferWithBytes: _pHostMemory length: memLen options: getMTLResourceOptions()]; // retained
|
||||
freeHostMemory();
|
||||
} else {
|
||||
_mtlBuffer = [getMTLDevice() newBufferWithLength: memLen options: getMTLResourceOptions()]; // retained
|
||||
}
|
||||
if (!_mtlBuffer) { return false; }
|
||||
_pMemory = isMemoryHostAccessible() ? _mtlBuffer.contents : nullptr;
|
||||
|
||||
propogateDebugName();
|
||||
@ -254,6 +297,15 @@ MVKDeviceMemory::MVKDeviceMemory(MVKDevice* device,
|
||||
return;
|
||||
}
|
||||
|
||||
// If we can, create a MTLHeap. This should happen before creating the buffer
|
||||
// allowing us to map its contents.
|
||||
if (!dedicatedImage && !dedicatedBuffer) {
|
||||
if (!ensureMTLHeap()) {
|
||||
setConfigurationResult(reportError(VK_ERROR_OUT_OF_DEVICE_MEMORY, "Could not allocate VkDeviceMemory of size %llu bytes.", _allocationSize));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// If memory needs to be coherent it must reside in an MTLBuffer, since an open-ended map() must work.
|
||||
if (isMemoryHostCoherent() && !ensureMTLBuffer() ) {
|
||||
setConfigurationResult(reportError(VK_ERROR_OUT_OF_DEVICE_MEMORY, "Could not allocate a host-coherent VkDeviceMemory of size %llu bytes. The maximum memory-aligned size of a host-coherent VkDeviceMemory is %llu bytes.", _allocationSize, _device->_pMetalFeatures->maxMTLBufferSize));
|
||||
|
@ -275,6 +275,7 @@ protected:
|
||||
bool _usesTexelBuffer;
|
||||
bool _isLinear;
|
||||
bool _is3DCompressed;
|
||||
bool _isAliasable;
|
||||
};
|
||||
|
||||
|
||||
|
@ -192,21 +192,16 @@ VkResult MVKImage::getMemoryRequirements(VkMemoryRequirements* pMemoryRequiremen
|
||||
VkResult MVKImage::getMemoryRequirements(const void*, VkMemoryRequirements2* pMemoryRequirements) {
|
||||
pMemoryRequirements->sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2;
|
||||
getMemoryRequirements(&pMemoryRequirements->memoryRequirements);
|
||||
auto* next = (VkStructureType*)pMemoryRequirements->pNext;
|
||||
while (next) {
|
||||
switch (*next) {
|
||||
for (auto* next = (VkBaseOutStructure*)pMemoryRequirements->pNext; next; next = next->pNext) {
|
||||
switch (next->sType) {
|
||||
case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
|
||||
auto* dedicatedReqs = (VkMemoryDedicatedRequirements*)next;
|
||||
// TODO: Maybe someday we could do something with MTLHeaps
|
||||
// and allocate non-dedicated memory from them. For now, we
|
||||
// always prefer dedicated allocations.
|
||||
dedicatedReqs->prefersDedicatedAllocation = VK_TRUE;
|
||||
bool writable = mvkIsAnyFlagEnabled(_usage, VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT);
|
||||
dedicatedReqs->prefersDedicatedAllocation = !_usesTexelBuffer && (writable || !_device->_pMetalFeatures->placementHeaps);
|
||||
dedicatedReqs->requiresDedicatedAllocation = VK_FALSE;
|
||||
next = (VkStructureType*)dedicatedReqs->pNext;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
next = (VkStructureType*)((VkMemoryRequirements2*)next)->pNext;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -231,7 +226,7 @@ bool MVKImage::validateUseTexelBuffer() {
|
||||
bool isUncompressed = blockExt.width == 1 && blockExt.height == 1;
|
||||
|
||||
bool useTexelBuffer = _device->_pMetalFeatures->texelBuffers; // Texel buffers available
|
||||
useTexelBuffer = useTexelBuffer && isMemoryHostAccessible() && _isLinear && isUncompressed; // Applicable memory layout
|
||||
useTexelBuffer = useTexelBuffer && (isMemoryHostAccessible() || _device->_pMetalFeatures->placementHeaps) && _isLinear && isUncompressed; // Applicable memory layout
|
||||
useTexelBuffer = useTexelBuffer && _deviceMemory && _deviceMemory->_mtlBuffer; // Buffer is available to overlay
|
||||
|
||||
#if MVK_MACOS
|
||||
@ -352,6 +347,10 @@ id<MTLTexture> MVKImage::newMTLTexture() {
|
||||
mtlTex = [_deviceMemory->_mtlBuffer newTextureWithDescriptor: mtlTexDesc
|
||||
offset: getDeviceMemoryOffset()
|
||||
bytesPerRow: _subresources[0].layout.rowPitch];
|
||||
} else if (_deviceMemory->_mtlHeap) {
|
||||
mtlTex = [_deviceMemory->_mtlHeap newTextureWithDescriptor: mtlTexDesc
|
||||
offset: getDeviceMemoryOffset()];
|
||||
if (_isAliasable) [mtlTex makeAliasable];
|
||||
} else {
|
||||
mtlTex = [getMTLDevice() newTextureWithDescriptor: mtlTexDesc];
|
||||
}
|
||||
@ -628,11 +627,20 @@ MVKImage::MVKImage(MVKDevice* device, const VkImageCreateInfo* pCreateInfo) : MV
|
||||
_canSupportMTLTextureView = !_isDepthStencilAttachment || _device->_pMetalFeatures->stencilViews;
|
||||
_hasExpectedTexelSize = (mvkMTLPixelFormatBytesPerBlock(_mtlPixelFormat) == mvkVkFormatBytesPerBlock(pCreateInfo->format));
|
||||
|
||||
// Calc _byteCount after _byteAlignment
|
||||
_byteAlignment = _isLinear ? _device->getVkFormatTexelBufferAlignment(pCreateInfo->format, this) : mvkEnsurePowerOfTwo(mvkVkFormatBytesPerBlock(pCreateInfo->format));
|
||||
for (uint32_t mipLvl = 0; mipLvl < _mipLevels; mipLvl++) {
|
||||
_byteCount += getBytesPerLayer(mipLvl) * _extent.depth * _arrayLayers;
|
||||
}
|
||||
if (!_isLinear && _device->_pMetalFeatures->placementHeaps) {
|
||||
MTLTextureDescriptor *mtlTexDesc = newMTLTextureDescriptor(); // temp retain
|
||||
MTLSizeAndAlign sizeAndAlign = [_device->getMTLDevice() heapTextureSizeAndAlignWithDescriptor: mtlTexDesc];
|
||||
[mtlTexDesc release];
|
||||
_byteCount = sizeAndAlign.size;
|
||||
_byteAlignment = sizeAndAlign.align;
|
||||
_isAliasable = mvkIsAnyFlagEnabled(pCreateInfo->flags, VK_IMAGE_CREATE_ALIAS_BIT);
|
||||
} else {
|
||||
// Calc _byteCount after _byteAlignment
|
||||
_byteAlignment = _isLinear ? _device->getVkFormatTexelBufferAlignment(pCreateInfo->format, this) : mvkEnsurePowerOfTwo(mvkVkFormatBytesPerBlock(pCreateInfo->format));
|
||||
for (uint32_t mipLvl = 0; mipLvl < _mipLevels; mipLvl++) {
|
||||
_byteCount += getBytesPerLayer(mipLvl) * _extent.depth * _arrayLayers;
|
||||
}
|
||||
}
|
||||
|
||||
initSubresources(pCreateInfo);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user