Merge pull request #2217 from billhollings/avoid-managed-mem-on-apple-silicon

On macOS Apple Silicon, avoid managed-memory textures, and resource syncs.
This commit is contained in:
Bill Hollings 2024-04-30 20:00:26 -04:00 committed by GitHub
commit 0d62a427d4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 84 additions and 74 deletions

View File

@ -482,9 +482,6 @@ static inline VkExtent3D mvkVkExtent3DFromMTLSize(MTLSize mtlSize) {
/** Macro indicating the Vulkan memory type bits corresponding to Metal memoryless memory (not host visible and lazily allocated). */
#define MVK_VK_MEMORY_TYPE_METAL_MEMORYLESS (VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT)
/** Returns the Metal storage mode corresponding to the specified Vulkan memory flags. */
MTLStorageMode mvkMTLStorageModeFromVkMemoryPropertyFlags(VkMemoryPropertyFlags vkFlags);
/** Returns the Metal CPU cache mode corresponding to the specified Vulkan memory flags. */
MTLCPUCacheMode mvkMTLCPUCacheModeFromVkMemoryPropertyFlags(VkMemoryPropertyFlags vkFlags);

View File

@ -81,7 +81,11 @@ VkResult MVKBuffer::bindDeviceMemory(MVKDeviceMemory* mvkMem, VkDeviceSize memOf
#if MVK_MACOS
if (_deviceMemory) {
_isHostCoherentTexelBuffer = !_device->_pMetalFeatures->sharedLinearTextures && _deviceMemory->isMemoryHostCoherent() && mvkIsAnyFlagEnabled(_usage, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT);
_isHostCoherentTexelBuffer = (!isUnifiedMemoryGPU() &&
!_device->_pMetalFeatures->sharedLinearTextures &&
_deviceMemory->isMemoryHostCoherent() &&
mvkIsAnyFlagEnabled(_usage, (VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT)));
}
#endif
@ -118,7 +122,8 @@ void MVKBuffer::applyBufferMemoryBarrier(MVKPipelineBarrier& barrier,
// buffer and host memory for the purpose of the host reading texture memory.
bool MVKBuffer::needsHostReadSync(MVKPipelineBarrier& barrier) {
#if MVK_MACOS
return (mvkIsAnyFlagEnabled(barrier.dstStageMask, (VK_PIPELINE_STAGE_HOST_BIT)) &&
return (!isUnifiedMemoryGPU() &&
mvkIsAnyFlagEnabled(barrier.dstStageMask, (VK_PIPELINE_STAGE_HOST_BIT)) &&
mvkIsAnyFlagEnabled(barrier.dstAccessMask, (VK_ACCESS_HOST_READ_BIT)) &&
isMemoryHostAccessible() && (!isMemoryHostCoherent() || _isHostCoherentTexelBuffer));
#else
@ -138,9 +143,7 @@ bool MVKBuffer::overlaps(VkDeviceSize offset, VkDeviceSize size, VkDeviceSize &o
return false;
}
#if MVK_MACOS
bool MVKBuffer::shouldFlushHostMemory() { return _isHostCoherentTexelBuffer; }
#endif
bool MVKBuffer::shouldFlushHostMemory() { return !isUnifiedMemoryGPU() && _isHostCoherentTexelBuffer; }
// Flushes the device memory at the specified memory range into the MTLBuffer.
VkResult MVKBuffer::flushToDevice(VkDeviceSize offset, VkDeviceSize size) {

View File

@ -331,9 +331,6 @@ public:
*/
uint32_t getLazilyAllocatedMemoryTypes() { return _lazilyAllocatedMemoryTypes; }
/** Returns whether this is a unified memory device. */
bool getHasUnifiedMemory();
/** Returns the external memory properties supported for buffers for the handle type. */
VkExternalMemoryProperties& getExternalBufferProperties(VkExternalMemoryHandleTypeFlagBits handleType);
@ -363,6 +360,9 @@ public:
/** Returns whether native texture atomics are supported and should be used. */
bool useNativeTextureAtomics() { return _metalFeatures.nativeTextureAtomics; }
/** Returns the MTLStorageMode that matches the Vulkan memory property flags. */
MTLStorageMode getMTLStorageModeFromVkMemoryPropertyFlags(VkMemoryPropertyFlags vkFlags);
#pragma mark Construction
@ -388,6 +388,7 @@ public:
protected:
friend class MVKDevice;
friend class MVKDeviceTrackingMixin;
void propagateDebugName() override {}
MTLFeatureSet getMaximalMTLFeatureSet();
@ -443,6 +444,8 @@ protected:
uint32_t _hostCoherentMemoryTypes;
uint32_t _privateMemoryTypes;
uint32_t _lazilyAllocatedMemoryTypes;
bool _hasUnifiedMemory = true;
bool _isAppleGPU = true;
};
@ -887,6 +890,8 @@ public:
}
protected:
friend class MVKDeviceTrackingMixin;
void propagateDebugName() override {}
MVKBuffer* addBuffer(MVKBuffer* mvkBuff);
MVKBuffer* removeBuffer(MVKBuffer* mvkBuff);
@ -956,6 +961,12 @@ public:
/** Returns the underlying Metal device. */
id<MTLDevice> getMTLDevice() { return _device->getMTLDevice(); }
/** Returns whether the GPU is a unified memory device. */
bool isUnifiedMemoryGPU() { return getPhysicalDevice()->_hasUnifiedMemory; }
/** Returns whether the GPU is Apple Silicon. */
bool isAppleGPU() { return getPhysicalDevice()->_isAppleGPU; }
/** Returns info about the pixel format supported by the physical device. */
MVKPixelFormats* getPixelFormats() { return _device->getPixelFormats(); }

View File

@ -1765,9 +1765,7 @@ VkResult MVKPhysicalDevice::getQueueFamilyProperties(uint32_t* pCount,
// wild temporary changes, particularly during initial queries before much GPU activity has occurred.
// On Apple GPUs, CPU & GPU timestamps are the same, and timestamp period never changes.
void MVKPhysicalDevice::updateTimestampPeriod() {
if (_properties.vendorID != kAppleVendorId &&
[_mtlDevice respondsToSelector: @selector(sampleTimestamps:gpuTimestamp:)]) {
if ( !_isAppleGPU && [_mtlDevice respondsToSelector: @selector(sampleTimestamps:gpuTimestamp:)]) {
MTLTimestamp earlierCPUTs = _prevCPUTimestamp;
MTLTimestamp earlierGPUTs = _prevGPUTimestamp;
[_mtlDevice sampleTimestamps: &_prevCPUTimestamp gpuTimestamp: &_prevGPUTimestamp];
@ -1804,7 +1802,7 @@ VkResult MVKPhysicalDevice::getMemoryProperties(VkPhysicalDeviceMemoryProperties
auto* budgetProps = (VkPhysicalDeviceMemoryBudgetPropertiesEXT*)next;
mvkClear(budgetProps->heapBudget, VK_MAX_MEMORY_HEAPS);
mvkClear(budgetProps->heapUsage, VK_MAX_MEMORY_HEAPS);
if (!getHasUnifiedMemory()) {
if ( !_hasUnifiedMemory ) {
budgetProps->heapBudget[1] = (VkDeviceSize)mvkGetAvailableMemorySize();
budgetProps->heapUsage[1] = (VkDeviceSize)mvkGetUsedMemorySize();
}
@ -1833,11 +1831,11 @@ MVKPhysicalDevice::MVKPhysicalDevice(MVKInstance* mvkInstance, id<MTLDevice> mtl
_supportedExtensions(this, true),
_pixelFormats(this) { // Set after _mtlDevice
initMTLDevice();
initProperties(); // Call first.
initMetalFeatures(); // Call second.
initFeatures(); // Call third.
initLimits(); // Call fourth.
initMTLDevice(); // Call first.
initProperties(); // Call second.
initMetalFeatures(); // Call third.
initFeatures(); // Call fourth.
initLimits(); // Call fifth.
initExtensions();
initMemoryProperties();
initExternalMemoryProperties();
@ -1847,12 +1845,21 @@ MVKPhysicalDevice::MVKPhysicalDevice(MVKInstance* mvkInstance, id<MTLDevice> mtl
}
void MVKPhysicalDevice::initMTLDevice() {
#if MVK_XCODE_14_3 && MVK_MACOS && !MVK_MACCAT
#if MVK_MACOS
_isAppleGPU = supportsMTLGPUFamily(Apple1);
// Apple Silicon will respond false to isLowPower, but never hits it.
_hasUnifiedMemory = ([_mtlDevice respondsToSelector: @selector(hasUnifiedMemory)]
? _mtlDevice.hasUnifiedMemory : _mtlDevice.isLowPower);
#if MVK_XCODE_14_3 && !MVK_MACCAT
if ([_mtlDevice respondsToSelector: @selector(setShouldMaximizeConcurrentCompilation:)]) {
[_mtlDevice setShouldMaximizeConcurrentCompilation: getMVKConfig().shouldMaximizeConcurrentCompilation];
MVKLogInfoIf(getMVKConfig().debugMode, "maximumConcurrentCompilationTaskCount %lu", _mtlDevice.maximumConcurrentCompilationTaskCount);
}
#endif
#endif // MVK_MACOS
}
// Initializes the physical device properties (except limits).
@ -2968,16 +2975,14 @@ static uint32_t mvkGetEntryProperty(io_registry_entry_t entry, CFStringRef prope
}
void MVKPhysicalDevice::initGPUInfoProperties() {
bool isIntegrated = getHasUnifiedMemory();
_properties.deviceType = isIntegrated ? VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU : VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU;
_properties.deviceType = _hasUnifiedMemory ? VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU : VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU;
strlcpy(_properties.deviceName, _mtlDevice.name.UTF8String, VK_MAX_PHYSICAL_DEVICE_NAME_SIZE);
// For Apple Silicon, the Device ID is determined by the highest
// GPU capability, which is a combination of OS version and GPU type.
// We determine Apple Silicon directly from the GPU, instead
// of from the build, in case we are running Rosetta2.
if (supportsMTLGPUFamily(Apple1)) {
if (_isAppleGPU) {
_properties.vendorID = kAppleVendorId;
_properties.deviceID = getHighestGPUCapability();
return;
@ -3012,9 +3017,9 @@ void MVKPhysicalDevice::initGPUInfoProperties() {
if (mvkGetEntryProperty(entry, CFSTR("class-code")) == 0x30000) { // 0x30000 : DISPLAY_VGA
// The Intel GPU will always be marked as integrated.
// Return on a match of either Intel && low power, or non-Intel and non-low-power.
// Return on a match of either Intel && unified memory, or non-Intel and non-unified memory.
uint32_t vendorID = mvkGetEntryProperty(entry, CFSTR("vendor-id"));
if ( (vendorID == kIntelVendorId) == isIntegrated) {
if ( (vendorID == kIntelVendorId) == _hasUnifiedMemory) {
isFound = true;
_properties.vendorID = vendorID;
_properties.deviceID = mvkGetEntryProperty(entry, CFSTR("device-id"));
@ -3168,7 +3173,7 @@ void MVKPhysicalDevice::initMemoryProperties() {
// Optional second heap for shared memory
uint32_t sharedHeapIdx;
VkMemoryPropertyFlags sharedTypePropFlags;
if (getHasUnifiedMemory()) {
if (_hasUnifiedMemory) {
// Shared memory goes in the single main heap in unified memory, and per Vulkan spec must be marked local
sharedHeapIdx = mainHeapIdx;
sharedTypePropFlags = MVK_VK_MEMORY_TYPE_METAL_SHARED | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
@ -3194,12 +3199,14 @@ void MVKPhysicalDevice::initMemoryProperties() {
setMemoryType(typeIdx, sharedHeapIdx, sharedTypePropFlags);
typeIdx++;
// Managed storage
// Managed storage. On all Apple Silicon, use Shared instead.
uint32_t managedBit = 0;
#if MVK_MACOS
if ( !_isAppleGPU ) {
managedBit = 1 << typeIdx;
setMemoryType(typeIdx, mainHeapIdx, MVK_VK_MEMORY_TYPE_METAL_MANAGED);
typeIdx++;
}
#endif
// Memoryless storage
@ -3235,17 +3242,33 @@ void MVKPhysicalDevice::initMemoryProperties() {
_allMemoryTypes = privateBit | sharedBit | managedBit | memlessBit;
}
bool MVKPhysicalDevice::getHasUnifiedMemory() {
MVK_PUBLIC_SYMBOL MTLStorageMode MVKPhysicalDevice::getMTLStorageModeFromVkMemoryPropertyFlags(VkMemoryPropertyFlags vkFlags) {
// If not visible to the host, use Private, or Memoryless if available and lazily allocated.
if ( !mvkAreAllFlagsEnabled(vkFlags, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) ) {
#if MVK_APPLE_SILICON
if (mvkAreAllFlagsEnabled(vkFlags, VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT)) {
return MTLStorageModeMemoryless;
}
#endif
return MTLStorageModePrivate;
}
// If visible to the host and coherent: Shared
if (mvkAreAllFlagsEnabled(vkFlags, VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
return MTLStorageModeShared;
}
// If visible to the host, but not coherent: Shared on Apple Silicon, Managed on other GPUs.
#if MVK_MACOS
return ([_mtlDevice respondsToSelector: @selector(hasUnifiedMemory)]
? _mtlDevice.hasUnifiedMemory : _mtlDevice.isLowPower);
return _isAppleGPU ? MTLStorageModeShared : MTLStorageModeManaged;
#else
return true;
return MTLStorageModeShared;
#endif
}
uint64_t MVKPhysicalDevice::getVRAMSize() {
if (getHasUnifiedMemory()) {
if (_hasUnifiedMemory) {
return mvkGetSystemMemorySize();
} else {
// There's actually no way to query the total physical VRAM on the device in Metal.
@ -3408,7 +3431,7 @@ void MVKPhysicalDevice::initVkSemaphoreStyle() {
switch (getMVKConfig().semaphoreSupportStyle) {
case MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_METAL_EVENTS_WHERE_SAFE: {
bool isNVIDIA = _properties.vendorID == kNVVendorId;
bool isRosetta2 = _properties.vendorID == kAppleVendorId && !MVK_APPLE_SILICON;
bool isRosetta2 = _isAppleGPU && !MVK_APPLE_SILICON;
if (_metalFeatures.events && !(isRosetta2 || isNVIDIA)) { _vkSemaphoreStyle = MVKSemaphoreStyleUseMTLEvent; }
break;
}

View File

@ -84,7 +84,7 @@ VkResult MVKDeviceMemory::flushToDevice(VkDeviceSize offset, VkDeviceSize size)
if (memSize == 0 || !isMemoryHostAccessible()) { return VK_SUCCESS; }
#if MVK_MACOS
if (_mtlBuffer && _mtlStorageMode == MTLStorageModeManaged) {
if ( !isUnifiedMemoryGPU() && _mtlBuffer && _mtlStorageMode == MTLStorageModeManaged) {
[_mtlBuffer didModifyRange: NSMakeRange(offset, memSize)];
}
#endif
@ -106,7 +106,7 @@ VkResult MVKDeviceMemory::pullFromDevice(VkDeviceSize offset,
if (memSize == 0 || !isMemoryHostAccessible()) { return VK_SUCCESS; }
#if MVK_MACOS
if (pBlitEnc && _mtlBuffer && _mtlStorageMode == MTLStorageModeManaged) {
if ( !isUnifiedMemoryGPU() && pBlitEnc && _mtlBuffer && _mtlStorageMode == MTLStorageModeManaged) {
if ( !pBlitEnc->mtlCmdBuffer) { pBlitEnc->mtlCmdBuffer = _device->getAnyQueue()->getMTLCommandBuffer(kMVKCommandUseInvalidateMappedMemoryRanges); }
if ( !pBlitEnc->mtlBlitEncoder) { pBlitEnc->mtlBlitEncoder = [pBlitEnc->mtlCmdBuffer blitCommandEncoder]; }
[pBlitEnc->mtlBlitEncoder synchronizeResource: _mtlBuffer];
@ -285,7 +285,7 @@ MVKDeviceMemory::MVKDeviceMemory(MVKDevice* device,
// Set Metal memory parameters
_vkMemAllocFlags = 0;
_vkMemPropFlags = _device->_pMemoryProperties->memoryTypes[pAllocateInfo->memoryTypeIndex].propertyFlags;
_mtlStorageMode = mvkMTLStorageModeFromVkMemoryPropertyFlags(_vkMemPropFlags);
_mtlStorageMode = getPhysicalDevice()->getMTLStorageModeFromVkMemoryPropertyFlags(_vkMemPropFlags);
_mtlCPUCacheMode = mvkMTLCPUCacheModeFromVkMemoryPropertyFlags(_vkMemPropFlags);
_allocationSize = pAllocateInfo->allocationSize;

View File

@ -469,7 +469,7 @@ void MVKImageMemoryBinding::propagateDebugName() {
// texture and host memory for the purpose of the host reading texture memory.
bool MVKImageMemoryBinding::needsHostReadSync(MVKPipelineBarrier& barrier) {
#if MVK_MACOS
return ((barrier.newLayout == VK_IMAGE_LAYOUT_GENERAL) &&
return ( !isUnifiedMemoryGPU() && (barrier.newLayout == VK_IMAGE_LAYOUT_GENERAL) &&
mvkIsAnyFlagEnabled(barrier.dstAccessMask, (VK_ACCESS_HOST_READ_BIT | VK_ACCESS_MEMORY_READ_BIT)) &&
isMemoryHostAccessible() && (!_device->_pMetalFeatures->sharedLinearTextures || !isMemoryHostCoherent()));
#else
@ -479,8 +479,8 @@ bool MVKImageMemoryBinding::needsHostReadSync(MVKPipelineBarrier& barrier) {
bool MVKImageMemoryBinding::shouldFlushHostMemory() { return isMemoryHostAccessible() && (!_mtlTexelBuffer || _ownsTexelBuffer); }
// Flushes the device memory at the specified memory range into the MTLTexture. Updates
// all subresources that overlap the specified range and are in an updatable layout state.
// Flushes the memory at the specified memory range into the MTLTexture.
// Updates all subresources that overlap the specified range and are in an updatable layout state.
VkResult MVKImageMemoryBinding::flushToDevice(VkDeviceSize offset, VkDeviceSize size) {
if (shouldFlushHostMemory()) {
for(uint8_t planeIndex = beginPlaneIndex(); planeIndex < endPlaneIndex(); planeIndex++) {
@ -501,7 +501,7 @@ VkResult MVKImageMemoryBinding::flushToDevice(VkDeviceSize offset, VkDeviceSize
return VK_SUCCESS;
}
// Pulls content from the MTLTexture into the device memory at the specified memory range.
// Pulls content from the MTLTexture into memory at the specified memory range.
// Pulls from all subresources that overlap the specified range and are in an updatable layout state.
VkResult MVKImageMemoryBinding::pullFromDevice(VkDeviceSize offset, VkDeviceSize size) {
if (shouldFlushHostMemory()) {
@ -715,7 +715,7 @@ VkResult MVKImage::copyImageToMemory(const VkCopyImageToMemoryInfoEXT* pCopyImag
#if MVK_MACOS
// On macOS, if the device doesn't have unified memory, and the texture is using managed memory, we need
// to sync the managed memory from the GPU, so the texture content is accessible to be copied by the CPU.
if ( !getPhysicalDevice()->getHasUnifiedMemory() && getMTLStorageMode() == MTLStorageModeManaged ) {
if ( !isUnifiedMemoryGPU() && getMTLStorageMode() == MTLStorageModeManaged ) {
@autoreleasepool {
id<MTLCommandBuffer> mtlCmdBuff = getDevice()->getAnyQueue()->getMTLCommandBuffer(kMVKCommandUseCopyImageToMemory);
id<MTLBlitCommandEncoder> mtlBlitEnc = [mtlCmdBuff blitCommandEncoder];
@ -858,9 +858,9 @@ VkResult MVKImage::getMemoryRequirements(VkMemoryRequirements* pMemoryRequiremen
pMemoryRequirements->memoryTypeBits = (_isDepthStencilAttachment)
? mvkPD->getPrivateMemoryTypes()
: mvkPD->getAllMemoryTypes();
// Metal on non-Apple GPUs does not provide native support for host-coherent memory, but Vulkan requires it for Linear images
#if MVK_MACOS
// Metal on macOS does not provide native support for host-coherent memory, but Vulkan requires it for Linear images
if ( !_isLinear ) {
if ( !isAppleGPU() && !_isLinear ) {
mvkDisableFlags(pMemoryRequirements->memoryTypeBits, mvkPD->getHostCoherentMemoryTypes());
}
#endif
@ -1052,6 +1052,7 @@ MTLStorageMode MVKImage::getMTLStorageMode() {
#if MVK_MACOS
// For macOS prior to 10.15.5, textures cannot use Shared storage mode, so change to Managed storage mode.
// All Apple GPUs support shared linear textures, so this only applies to other GPUs.
if (stgMode == MTLStorageModeShared && !_device->_pMetalFeatures->sharedLinearTextures) {
stgMode = MTLStorageModeManaged;
}

View File

@ -882,31 +882,6 @@ MVK_PUBLIC_SYMBOL CGRect mvkCGRectFromVkRectLayerKHR(VkRectLayerKHR vkRect) {
#pragma mark -
#pragma mark Memory options
MVK_PUBLIC_SYMBOL MTLStorageMode mvkMTLStorageModeFromVkMemoryPropertyFlags(VkMemoryPropertyFlags vkFlags) {
// If not visible to the host, use Private, or Memoryless if available and lazily allocated.
if ( !mvkAreAllFlagsEnabled(vkFlags, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) ) {
#if MVK_APPLE_SILICON
if (mvkAreAllFlagsEnabled(vkFlags, VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT)) {
return MTLStorageModeMemoryless;
}
#endif
return MTLStorageModePrivate;
}
// If visible to the host and coherent: Shared
if (mvkAreAllFlagsEnabled(vkFlags, VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
return MTLStorageModeShared;
}
// If visible to the host, and not coherent: Managed on macOS, Shared on iOS
#if MVK_MACOS
return MTLStorageModeManaged;
#else
return MTLStorageModeShared;
#endif
}
MVK_PUBLIC_SYMBOL MTLCPUCacheMode mvkMTLCPUCacheModeFromVkMemoryPropertyFlags(VkMemoryPropertyFlags vkFlags) {
return MTLCPUCacheModeDefaultCache;
}