On macOS Apple Silicon, avoid managed-memory textures, and resource syncs.

Like their iOS/tvOS counterparts, macOS Apple Silicon GPUs support
using Shared memory for textures, and do not require resource
synchronization, even with Managed memory. This change treats
macOS Apple Silicon the same as iOS & tvOS.

- MVKPhysicalDevice add _hasUnifiedMemory & _isAppleGPU flags.
- MVKDeviceTrackingMixin add isUnifiedMemoryGPU() & isAppleGPU().
- Do not advertise host-visible-but-not-host-coherent
  Vulkan memory type on macOS Apple Silicon.
- Replace mvkMTLStorageModeFromVkMemoryPropertyFlags() with
  MVKPhysicalDevice::getMTLStorageModeFromVkMemoryPropertyFlags(),
  and return Shared instead of Managed for Apple Silicon,
  even if coherency is not requested.
- On unified memory devices, avoid needless calls to didModifyRange:,
  synchronizeResource:, and synchronizeTexture:slice:level:.
This commit is contained in:
Bill Hollings 2024-04-25 10:31:25 -04:00
parent 6c68ba1e0c
commit 607aaff4c1
7 changed files with 84 additions and 74 deletions

View File

@ -482,9 +482,6 @@ static inline VkExtent3D mvkVkExtent3DFromMTLSize(MTLSize mtlSize) {
/** Macro indicating the Vulkan memory type bits corresponding to Metal memoryless memory (not host visible and lazily allocated). */
#define MVK_VK_MEMORY_TYPE_METAL_MEMORYLESS (VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT)
/** Returns the Metal storage mode corresponding to the specified Vulkan memory flags. */
MTLStorageMode mvkMTLStorageModeFromVkMemoryPropertyFlags(VkMemoryPropertyFlags vkFlags);
/** Returns the Metal CPU cache mode corresponding to the specified Vulkan memory flags. */
MTLCPUCacheMode mvkMTLCPUCacheModeFromVkMemoryPropertyFlags(VkMemoryPropertyFlags vkFlags);

View File

@ -81,7 +81,11 @@ VkResult MVKBuffer::bindDeviceMemory(MVKDeviceMemory* mvkMem, VkDeviceSize memOf
#if MVK_MACOS
if (_deviceMemory) {
_isHostCoherentTexelBuffer = !_device->_pMetalFeatures->sharedLinearTextures && _deviceMemory->isMemoryHostCoherent() && mvkIsAnyFlagEnabled(_usage, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT);
_isHostCoherentTexelBuffer = (!isUnifiedMemoryGPU() &&
!_device->_pMetalFeatures->sharedLinearTextures &&
_deviceMemory->isMemoryHostCoherent() &&
mvkIsAnyFlagEnabled(_usage, (VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT)));
}
#endif
@ -118,7 +122,8 @@ void MVKBuffer::applyBufferMemoryBarrier(MVKPipelineBarrier& barrier,
// buffer and host memory for the purpose of the host reading texture memory.
bool MVKBuffer::needsHostReadSync(MVKPipelineBarrier& barrier) {
#if MVK_MACOS
return (mvkIsAnyFlagEnabled(barrier.dstStageMask, (VK_PIPELINE_STAGE_HOST_BIT)) &&
return (!isUnifiedMemoryGPU() &&
mvkIsAnyFlagEnabled(barrier.dstStageMask, (VK_PIPELINE_STAGE_HOST_BIT)) &&
mvkIsAnyFlagEnabled(barrier.dstAccessMask, (VK_ACCESS_HOST_READ_BIT)) &&
isMemoryHostAccessible() && (!isMemoryHostCoherent() || _isHostCoherentTexelBuffer));
#else
@ -138,9 +143,7 @@ bool MVKBuffer::overlaps(VkDeviceSize offset, VkDeviceSize size, VkDeviceSize &o
return false;
}
#if MVK_MACOS
bool MVKBuffer::shouldFlushHostMemory() { return _isHostCoherentTexelBuffer; }
#endif
bool MVKBuffer::shouldFlushHostMemory() { return !isUnifiedMemoryGPU() && _isHostCoherentTexelBuffer; }
// Flushes the device memory at the specified memory range into the MTLBuffer.
VkResult MVKBuffer::flushToDevice(VkDeviceSize offset, VkDeviceSize size) {

View File

@ -331,9 +331,6 @@ public:
*/
uint32_t getLazilyAllocatedMemoryTypes() { return _lazilyAllocatedMemoryTypes; }
/** Returns whether this is a unified memory device. */
bool getHasUnifiedMemory();
/** Returns the external memory properties supported for buffers for the handle type. */
VkExternalMemoryProperties& getExternalBufferProperties(VkExternalMemoryHandleTypeFlagBits handleType);
@ -363,6 +360,9 @@ public:
/** Returns whether native texture atomics are supported and should be used. */
bool useNativeTextureAtomics() { return _metalFeatures.nativeTextureAtomics; }
/** Returns the MTLStorageMode that matches the Vulkan memory property flags. */
MTLStorageMode getMTLStorageModeFromVkMemoryPropertyFlags(VkMemoryPropertyFlags vkFlags);
#pragma mark Construction
@ -388,6 +388,7 @@ public:
protected:
friend class MVKDevice;
friend class MVKDeviceTrackingMixin;
void propagateDebugName() override {}
MTLFeatureSet getMaximalMTLFeatureSet();
@ -443,6 +444,8 @@ protected:
uint32_t _hostCoherentMemoryTypes;
uint32_t _privateMemoryTypes;
uint32_t _lazilyAllocatedMemoryTypes;
bool _hasUnifiedMemory = true;
bool _isAppleGPU = true;
};
@ -887,6 +890,8 @@ public:
}
protected:
friend class MVKDeviceTrackingMixin;
void propagateDebugName() override {}
MVKBuffer* addBuffer(MVKBuffer* mvkBuff);
MVKBuffer* removeBuffer(MVKBuffer* mvkBuff);
@ -956,6 +961,12 @@ public:
/** Returns the underlying Metal device. */
id<MTLDevice> getMTLDevice() { return _device->getMTLDevice(); }
/** Returns whether the GPU is a unified memory device. */
bool isUnifiedMemoryGPU() { return getPhysicalDevice()->_hasUnifiedMemory; }
/** Returns whether the GPU is Apple Silicon. */
bool isAppleGPU() { return getPhysicalDevice()->_isAppleGPU; }
/** Returns info about the pixel format supported by the physical device. */
MVKPixelFormats* getPixelFormats() { return _device->getPixelFormats(); }

View File

@ -1765,9 +1765,7 @@ VkResult MVKPhysicalDevice::getQueueFamilyProperties(uint32_t* pCount,
// wild temporary changes, particularly during initial queries before much GPU activity has occurred.
// On Apple GPUs, CPU & GPU timestamps are the same, and timestamp period never changes.
void MVKPhysicalDevice::updateTimestampPeriod() {
if (_properties.vendorID != kAppleVendorId &&
[_mtlDevice respondsToSelector: @selector(sampleTimestamps:gpuTimestamp:)]) {
if ( !_isAppleGPU && [_mtlDevice respondsToSelector: @selector(sampleTimestamps:gpuTimestamp:)]) {
MTLTimestamp earlierCPUTs = _prevCPUTimestamp;
MTLTimestamp earlierGPUTs = _prevGPUTimestamp;
[_mtlDevice sampleTimestamps: &_prevCPUTimestamp gpuTimestamp: &_prevGPUTimestamp];
@ -1804,7 +1802,7 @@ VkResult MVKPhysicalDevice::getMemoryProperties(VkPhysicalDeviceMemoryProperties
auto* budgetProps = (VkPhysicalDeviceMemoryBudgetPropertiesEXT*)next;
mvkClear(budgetProps->heapBudget, VK_MAX_MEMORY_HEAPS);
mvkClear(budgetProps->heapUsage, VK_MAX_MEMORY_HEAPS);
if (!getHasUnifiedMemory()) {
if ( !_hasUnifiedMemory ) {
budgetProps->heapBudget[1] = (VkDeviceSize)mvkGetAvailableMemorySize();
budgetProps->heapUsage[1] = (VkDeviceSize)mvkGetUsedMemorySize();
}
@ -1833,11 +1831,11 @@ MVKPhysicalDevice::MVKPhysicalDevice(MVKInstance* mvkInstance, id<MTLDevice> mtl
_supportedExtensions(this, true),
_pixelFormats(this) { // Set after _mtlDevice
initMTLDevice();
initProperties(); // Call first.
initMetalFeatures(); // Call second.
initFeatures(); // Call third.
initLimits(); // Call fourth.
initMTLDevice(); // Call first.
initProperties(); // Call second.
initMetalFeatures(); // Call third.
initFeatures(); // Call fourth.
initLimits(); // Call fifth.
initExtensions();
initMemoryProperties();
initExternalMemoryProperties();
@ -1847,12 +1845,21 @@ MVKPhysicalDevice::MVKPhysicalDevice(MVKInstance* mvkInstance, id<MTLDevice> mtl
}
void MVKPhysicalDevice::initMTLDevice() {
#if MVK_XCODE_14_3 && MVK_MACOS && !MVK_MACCAT
#if MVK_MACOS
_isAppleGPU = supportsMTLGPUFamily(Apple1);
// Apple Silicon will respond false to isLowPower, but never hits it.
_hasUnifiedMemory = ([_mtlDevice respondsToSelector: @selector(hasUnifiedMemory)]
? _mtlDevice.hasUnifiedMemory : _mtlDevice.isLowPower);
#if MVK_XCODE_14_3 && !MVK_MACCAT
if ([_mtlDevice respondsToSelector: @selector(setShouldMaximizeConcurrentCompilation:)]) {
[_mtlDevice setShouldMaximizeConcurrentCompilation: getMVKConfig().shouldMaximizeConcurrentCompilation];
MVKLogInfoIf(getMVKConfig().debugMode, "maximumConcurrentCompilationTaskCount %lu", _mtlDevice.maximumConcurrentCompilationTaskCount);
}
#endif
#endif // MVK_MACOS
}
// Initializes the physical device properties (except limits).
@ -2968,16 +2975,14 @@ static uint32_t mvkGetEntryProperty(io_registry_entry_t entry, CFStringRef prope
}
void MVKPhysicalDevice::initGPUInfoProperties() {
bool isIntegrated = getHasUnifiedMemory();
_properties.deviceType = isIntegrated ? VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU : VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU;
_properties.deviceType = _hasUnifiedMemory ? VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU : VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU;
strlcpy(_properties.deviceName, _mtlDevice.name.UTF8String, VK_MAX_PHYSICAL_DEVICE_NAME_SIZE);
// For Apple Silicon, the Device ID is determined by the highest
// GPU capability, which is a combination of OS version and GPU type.
// We determine Apple Silicon directly from the GPU, instead
// of from the build, in case we are running Rosetta2.
if (supportsMTLGPUFamily(Apple1)) {
if (_isAppleGPU) {
_properties.vendorID = kAppleVendorId;
_properties.deviceID = getHighestGPUCapability();
return;
@ -3012,9 +3017,9 @@ void MVKPhysicalDevice::initGPUInfoProperties() {
if (mvkGetEntryProperty(entry, CFSTR("class-code")) == 0x30000) { // 0x30000 : DISPLAY_VGA
// The Intel GPU will always be marked as integrated.
// Return on a match of either Intel && low power, or non-Intel and non-low-power.
// Return on a match of either Intel && unified memory, or non-Intel and non-unified memory.
uint32_t vendorID = mvkGetEntryProperty(entry, CFSTR("vendor-id"));
if ( (vendorID == kIntelVendorId) == isIntegrated) {
if ( (vendorID == kIntelVendorId) == _hasUnifiedMemory) {
isFound = true;
_properties.vendorID = vendorID;
_properties.deviceID = mvkGetEntryProperty(entry, CFSTR("device-id"));
@ -3168,7 +3173,7 @@ void MVKPhysicalDevice::initMemoryProperties() {
// Optional second heap for shared memory
uint32_t sharedHeapIdx;
VkMemoryPropertyFlags sharedTypePropFlags;
if (getHasUnifiedMemory()) {
if (_hasUnifiedMemory) {
// Shared memory goes in the single main heap in unified memory, and per Vulkan spec must be marked local
sharedHeapIdx = mainHeapIdx;
sharedTypePropFlags = MVK_VK_MEMORY_TYPE_METAL_SHARED | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
@ -3194,12 +3199,14 @@ void MVKPhysicalDevice::initMemoryProperties() {
setMemoryType(typeIdx, sharedHeapIdx, sharedTypePropFlags);
typeIdx++;
// Managed storage
// Managed storage. On all Apple Silicon, use Shared instead.
uint32_t managedBit = 0;
#if MVK_MACOS
managedBit = 1 << typeIdx;
setMemoryType(typeIdx, mainHeapIdx, MVK_VK_MEMORY_TYPE_METAL_MANAGED);
typeIdx++;
if ( !_isAppleGPU ) {
managedBit = 1 << typeIdx;
setMemoryType(typeIdx, mainHeapIdx, MVK_VK_MEMORY_TYPE_METAL_MANAGED);
typeIdx++;
}
#endif
// Memoryless storage
@ -3235,17 +3242,33 @@ void MVKPhysicalDevice::initMemoryProperties() {
_allMemoryTypes = privateBit | sharedBit | managedBit | memlessBit;
}
bool MVKPhysicalDevice::getHasUnifiedMemory() {
MVK_PUBLIC_SYMBOL MTLStorageMode MVKPhysicalDevice::getMTLStorageModeFromVkMemoryPropertyFlags(VkMemoryPropertyFlags vkFlags) {
// If not visible to the host, use Private, or Memoryless if available and lazily allocated.
if ( !mvkAreAllFlagsEnabled(vkFlags, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) ) {
#if MVK_APPLE_SILICON
if (mvkAreAllFlagsEnabled(vkFlags, VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT)) {
return MTLStorageModeMemoryless;
}
#endif
return MTLStorageModePrivate;
}
// If visible to the host and coherent: Shared
if (mvkAreAllFlagsEnabled(vkFlags, VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
return MTLStorageModeShared;
}
// If visible to the host, but not coherent: Shared on Apple Silicon, Managed on other GPUs.
#if MVK_MACOS
return ([_mtlDevice respondsToSelector: @selector(hasUnifiedMemory)]
? _mtlDevice.hasUnifiedMemory : _mtlDevice.isLowPower);
return _isAppleGPU ? MTLStorageModeShared : MTLStorageModeManaged;
#else
return true;
return MTLStorageModeShared;
#endif
}
uint64_t MVKPhysicalDevice::getVRAMSize() {
if (getHasUnifiedMemory()) {
if (_hasUnifiedMemory) {
return mvkGetSystemMemorySize();
} else {
// There's actually no way to query the total physical VRAM on the device in Metal.
@ -3408,7 +3431,7 @@ void MVKPhysicalDevice::initVkSemaphoreStyle() {
switch (getMVKConfig().semaphoreSupportStyle) {
case MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_METAL_EVENTS_WHERE_SAFE: {
bool isNVIDIA = _properties.vendorID == kNVVendorId;
bool isRosetta2 = _properties.vendorID == kAppleVendorId && !MVK_APPLE_SILICON;
bool isRosetta2 = _isAppleGPU && !MVK_APPLE_SILICON;
if (_metalFeatures.events && !(isRosetta2 || isNVIDIA)) { _vkSemaphoreStyle = MVKSemaphoreStyleUseMTLEvent; }
break;
}

View File

@ -84,7 +84,7 @@ VkResult MVKDeviceMemory::flushToDevice(VkDeviceSize offset, VkDeviceSize size)
if (memSize == 0 || !isMemoryHostAccessible()) { return VK_SUCCESS; }
#if MVK_MACOS
if (_mtlBuffer && _mtlStorageMode == MTLStorageModeManaged) {
if ( !isUnifiedMemoryGPU() && _mtlBuffer && _mtlStorageMode == MTLStorageModeManaged) {
[_mtlBuffer didModifyRange: NSMakeRange(offset, memSize)];
}
#endif
@ -106,7 +106,7 @@ VkResult MVKDeviceMemory::pullFromDevice(VkDeviceSize offset,
if (memSize == 0 || !isMemoryHostAccessible()) { return VK_SUCCESS; }
#if MVK_MACOS
if (pBlitEnc && _mtlBuffer && _mtlStorageMode == MTLStorageModeManaged) {
if ( !isUnifiedMemoryGPU() && pBlitEnc && _mtlBuffer && _mtlStorageMode == MTLStorageModeManaged) {
if ( !pBlitEnc->mtlCmdBuffer) { pBlitEnc->mtlCmdBuffer = _device->getAnyQueue()->getMTLCommandBuffer(kMVKCommandUseInvalidateMappedMemoryRanges); }
if ( !pBlitEnc->mtlBlitEncoder) { pBlitEnc->mtlBlitEncoder = [pBlitEnc->mtlCmdBuffer blitCommandEncoder]; }
[pBlitEnc->mtlBlitEncoder synchronizeResource: _mtlBuffer];
@ -285,7 +285,7 @@ MVKDeviceMemory::MVKDeviceMemory(MVKDevice* device,
// Set Metal memory parameters
_vkMemAllocFlags = 0;
_vkMemPropFlags = _device->_pMemoryProperties->memoryTypes[pAllocateInfo->memoryTypeIndex].propertyFlags;
_mtlStorageMode = mvkMTLStorageModeFromVkMemoryPropertyFlags(_vkMemPropFlags);
_mtlStorageMode = getPhysicalDevice()->getMTLStorageModeFromVkMemoryPropertyFlags(_vkMemPropFlags);
_mtlCPUCacheMode = mvkMTLCPUCacheModeFromVkMemoryPropertyFlags(_vkMemPropFlags);
_allocationSize = pAllocateInfo->allocationSize;

View File

@ -469,18 +469,18 @@ void MVKImageMemoryBinding::propagateDebugName() {
// texture and host memory for the purpose of the host reading texture memory.
bool MVKImageMemoryBinding::needsHostReadSync(MVKPipelineBarrier& barrier) {
#if MVK_MACOS
return ((barrier.newLayout == VK_IMAGE_LAYOUT_GENERAL) &&
return ( !isUnifiedMemoryGPU() && (barrier.newLayout == VK_IMAGE_LAYOUT_GENERAL) &&
mvkIsAnyFlagEnabled(barrier.dstAccessMask, (VK_ACCESS_HOST_READ_BIT | VK_ACCESS_MEMORY_READ_BIT)) &&
isMemoryHostAccessible() && (!_device->_pMetalFeatures->sharedLinearTextures || !isMemoryHostCoherent()));
#else
return false;
return false;
#endif
}
bool MVKImageMemoryBinding::shouldFlushHostMemory() { return isMemoryHostAccessible() && (!_mtlTexelBuffer || _ownsTexelBuffer); }
// Flushes the device memory at the specified memory range into the MTLTexture. Updates
// all subresources that overlap the specified range and are in an updatable layout state.
// Flushes the memory at the specified memory range into the MTLTexture.
// Updates all subresources that overlap the specified range and are in an updatable layout state.
VkResult MVKImageMemoryBinding::flushToDevice(VkDeviceSize offset, VkDeviceSize size) {
if (shouldFlushHostMemory()) {
for(uint8_t planeIndex = beginPlaneIndex(); planeIndex < endPlaneIndex(); planeIndex++) {
@ -501,7 +501,7 @@ VkResult MVKImageMemoryBinding::flushToDevice(VkDeviceSize offset, VkDeviceSize
return VK_SUCCESS;
}
// Pulls content from the MTLTexture into the device memory at the specified memory range.
// Pulls content from the MTLTexture into memory at the specified memory range.
// Pulls from all subresources that overlap the specified range and are in an updatable layout state.
VkResult MVKImageMemoryBinding::pullFromDevice(VkDeviceSize offset, VkDeviceSize size) {
if (shouldFlushHostMemory()) {
@ -715,7 +715,7 @@ VkResult MVKImage::copyImageToMemory(const VkCopyImageToMemoryInfoEXT* pCopyImag
#if MVK_MACOS
// On macOS, if the device doesn't have unified memory, and the texture is using managed memory, we need
// to sync the managed memory from the GPU, so the texture content is accessible to be copied by the CPU.
if ( !getPhysicalDevice()->getHasUnifiedMemory() && getMTLStorageMode() == MTLStorageModeManaged ) {
if ( !isUnifiedMemoryGPU() && getMTLStorageMode() == MTLStorageModeManaged ) {
@autoreleasepool {
id<MTLCommandBuffer> mtlCmdBuff = getDevice()->getAnyQueue()->getMTLCommandBuffer(kMVKCommandUseCopyImageToMemory);
id<MTLBlitCommandEncoder> mtlBlitEnc = [mtlCmdBuff blitCommandEncoder];
@ -858,9 +858,9 @@ VkResult MVKImage::getMemoryRequirements(VkMemoryRequirements* pMemoryRequiremen
pMemoryRequirements->memoryTypeBits = (_isDepthStencilAttachment)
? mvkPD->getPrivateMemoryTypes()
: mvkPD->getAllMemoryTypes();
// Metal on non-Apple GPUs does not provide native support for host-coherent memory, but Vulkan requires it for Linear images
#if MVK_MACOS
// Metal on macOS does not provide native support for host-coherent memory, but Vulkan requires it for Linear images
if ( !_isLinear ) {
if ( !isAppleGPU() && !_isLinear ) {
mvkDisableFlags(pMemoryRequirements->memoryTypeBits, mvkPD->getHostCoherentMemoryTypes());
}
#endif
@ -1052,6 +1052,7 @@ MTLStorageMode MVKImage::getMTLStorageMode() {
#if MVK_MACOS
// For macOS prior to 10.15.5, textures cannot use Shared storage mode, so change to Managed storage mode.
// All Apple GPUs support shared linear textures, so this only applies to other GPUs.
if (stgMode == MTLStorageModeShared && !_device->_pMetalFeatures->sharedLinearTextures) {
stgMode = MTLStorageModeManaged;
}

View File

@ -882,31 +882,6 @@ MVK_PUBLIC_SYMBOL CGRect mvkCGRectFromVkRectLayerKHR(VkRectLayerKHR vkRect) {
#pragma mark -
#pragma mark Memory options
MVK_PUBLIC_SYMBOL MTLStorageMode mvkMTLStorageModeFromVkMemoryPropertyFlags(VkMemoryPropertyFlags vkFlags) {
// If not visible to the host, use Private, or Memoryless if available and lazily allocated.
if ( !mvkAreAllFlagsEnabled(vkFlags, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) ) {
#if MVK_APPLE_SILICON
if (mvkAreAllFlagsEnabled(vkFlags, VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT)) {
return MTLStorageModeMemoryless;
}
#endif
return MTLStorageModePrivate;
}
// If visible to the host and coherent: Shared
if (mvkAreAllFlagsEnabled(vkFlags, VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
return MTLStorageModeShared;
}
// If visible to the host, and not coherent: Managed on macOS, Shared on iOS
#if MVK_MACOS
return MTLStorageModeManaged;
#else
return MTLStorageModeShared;
#endif
}
MVK_PUBLIC_SYMBOL MTLCPUCacheMode mvkMTLCPUCacheModeFromVkMemoryPropertyFlags(VkMemoryPropertyFlags vkFlags) {
return MTLCPUCacheModeDefaultCache;
}