On macOS Apple Silicon, avoid managed-memory textures, and resource syncs.
Like their iOS/tvOS counterparts, macOS Apple Silicon GPUs support using Shared memory for textures, and do not require resource synchronization, even with Managed memory. This change treats macOS Apple Silicon the same as iOS & tvOS. - MVKPhysicalDevice add _hasUnifiedMemory & _isAppleGPU flags. - MVKDeviceTrackingMixin add isUnifiedMemoryGPU() & isAppleGPU(). - Do not advertise host-visible-but-not-host-coherent Vulkan memory type on macOS Apple Silicon. - Replace mvkMTLStorageModeFromVkMemoryPropertyFlags() with MVKPhysicalDevice::getMTLStorageModeFromVkMemoryPropertyFlags(), and return Shared instead of Managed for Apple Silicon, even if coherency is not requested. - On unified memory devices, avoid needless calls to didModifyRange:, synchronizeResource:, and synchronizeTexture:slice:level:.
This commit is contained in:
parent
6c68ba1e0c
commit
607aaff4c1
@ -482,9 +482,6 @@ static inline VkExtent3D mvkVkExtent3DFromMTLSize(MTLSize mtlSize) {
|
|||||||
/** Macro indicating the Vulkan memory type bits corresponding to Metal memoryless memory (not host visible and lazily allocated). */
|
/** Macro indicating the Vulkan memory type bits corresponding to Metal memoryless memory (not host visible and lazily allocated). */
|
||||||
#define MVK_VK_MEMORY_TYPE_METAL_MEMORYLESS (VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT)
|
#define MVK_VK_MEMORY_TYPE_METAL_MEMORYLESS (VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT)
|
||||||
|
|
||||||
/** Returns the Metal storage mode corresponding to the specified Vulkan memory flags. */
|
|
||||||
MTLStorageMode mvkMTLStorageModeFromVkMemoryPropertyFlags(VkMemoryPropertyFlags vkFlags);
|
|
||||||
|
|
||||||
/** Returns the Metal CPU cache mode corresponding to the specified Vulkan memory flags. */
|
/** Returns the Metal CPU cache mode corresponding to the specified Vulkan memory flags. */
|
||||||
MTLCPUCacheMode mvkMTLCPUCacheModeFromVkMemoryPropertyFlags(VkMemoryPropertyFlags vkFlags);
|
MTLCPUCacheMode mvkMTLCPUCacheModeFromVkMemoryPropertyFlags(VkMemoryPropertyFlags vkFlags);
|
||||||
|
|
||||||
|
@ -81,7 +81,11 @@ VkResult MVKBuffer::bindDeviceMemory(MVKDeviceMemory* mvkMem, VkDeviceSize memOf
|
|||||||
|
|
||||||
#if MVK_MACOS
|
#if MVK_MACOS
|
||||||
if (_deviceMemory) {
|
if (_deviceMemory) {
|
||||||
_isHostCoherentTexelBuffer = !_device->_pMetalFeatures->sharedLinearTextures && _deviceMemory->isMemoryHostCoherent() && mvkIsAnyFlagEnabled(_usage, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT);
|
_isHostCoherentTexelBuffer = (!isUnifiedMemoryGPU() &&
|
||||||
|
!_device->_pMetalFeatures->sharedLinearTextures &&
|
||||||
|
_deviceMemory->isMemoryHostCoherent() &&
|
||||||
|
mvkIsAnyFlagEnabled(_usage, (VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
|
||||||
|
VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT)));
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -118,7 +122,8 @@ void MVKBuffer::applyBufferMemoryBarrier(MVKPipelineBarrier& barrier,
|
|||||||
// buffer and host memory for the purpose of the host reading texture memory.
|
// buffer and host memory for the purpose of the host reading texture memory.
|
||||||
bool MVKBuffer::needsHostReadSync(MVKPipelineBarrier& barrier) {
|
bool MVKBuffer::needsHostReadSync(MVKPipelineBarrier& barrier) {
|
||||||
#if MVK_MACOS
|
#if MVK_MACOS
|
||||||
return (mvkIsAnyFlagEnabled(barrier.dstStageMask, (VK_PIPELINE_STAGE_HOST_BIT)) &&
|
return (!isUnifiedMemoryGPU() &&
|
||||||
|
mvkIsAnyFlagEnabled(barrier.dstStageMask, (VK_PIPELINE_STAGE_HOST_BIT)) &&
|
||||||
mvkIsAnyFlagEnabled(barrier.dstAccessMask, (VK_ACCESS_HOST_READ_BIT)) &&
|
mvkIsAnyFlagEnabled(barrier.dstAccessMask, (VK_ACCESS_HOST_READ_BIT)) &&
|
||||||
isMemoryHostAccessible() && (!isMemoryHostCoherent() || _isHostCoherentTexelBuffer));
|
isMemoryHostAccessible() && (!isMemoryHostCoherent() || _isHostCoherentTexelBuffer));
|
||||||
#else
|
#else
|
||||||
@ -138,9 +143,7 @@ bool MVKBuffer::overlaps(VkDeviceSize offset, VkDeviceSize size, VkDeviceSize &o
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if MVK_MACOS
|
bool MVKBuffer::shouldFlushHostMemory() { return !isUnifiedMemoryGPU() && _isHostCoherentTexelBuffer; }
|
||||||
bool MVKBuffer::shouldFlushHostMemory() { return _isHostCoherentTexelBuffer; }
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Flushes the device memory at the specified memory range into the MTLBuffer.
|
// Flushes the device memory at the specified memory range into the MTLBuffer.
|
||||||
VkResult MVKBuffer::flushToDevice(VkDeviceSize offset, VkDeviceSize size) {
|
VkResult MVKBuffer::flushToDevice(VkDeviceSize offset, VkDeviceSize size) {
|
||||||
|
@ -331,9 +331,6 @@ public:
|
|||||||
*/
|
*/
|
||||||
uint32_t getLazilyAllocatedMemoryTypes() { return _lazilyAllocatedMemoryTypes; }
|
uint32_t getLazilyAllocatedMemoryTypes() { return _lazilyAllocatedMemoryTypes; }
|
||||||
|
|
||||||
/** Returns whether this is a unified memory device. */
|
|
||||||
bool getHasUnifiedMemory();
|
|
||||||
|
|
||||||
/** Returns the external memory properties supported for buffers for the handle type. */
|
/** Returns the external memory properties supported for buffers for the handle type. */
|
||||||
VkExternalMemoryProperties& getExternalBufferProperties(VkExternalMemoryHandleTypeFlagBits handleType);
|
VkExternalMemoryProperties& getExternalBufferProperties(VkExternalMemoryHandleTypeFlagBits handleType);
|
||||||
|
|
||||||
@ -363,6 +360,9 @@ public:
|
|||||||
/** Returns whether native texture atomics are supported and should be used. */
|
/** Returns whether native texture atomics are supported and should be used. */
|
||||||
bool useNativeTextureAtomics() { return _metalFeatures.nativeTextureAtomics; }
|
bool useNativeTextureAtomics() { return _metalFeatures.nativeTextureAtomics; }
|
||||||
|
|
||||||
|
/** Returns the MTLStorageMode that matches the Vulkan memory property flags. */
|
||||||
|
MTLStorageMode getMTLStorageModeFromVkMemoryPropertyFlags(VkMemoryPropertyFlags vkFlags);
|
||||||
|
|
||||||
|
|
||||||
#pragma mark Construction
|
#pragma mark Construction
|
||||||
|
|
||||||
@ -388,6 +388,7 @@ public:
|
|||||||
|
|
||||||
protected:
|
protected:
|
||||||
friend class MVKDevice;
|
friend class MVKDevice;
|
||||||
|
friend class MVKDeviceTrackingMixin;
|
||||||
|
|
||||||
void propagateDebugName() override {}
|
void propagateDebugName() override {}
|
||||||
MTLFeatureSet getMaximalMTLFeatureSet();
|
MTLFeatureSet getMaximalMTLFeatureSet();
|
||||||
@ -443,6 +444,8 @@ protected:
|
|||||||
uint32_t _hostCoherentMemoryTypes;
|
uint32_t _hostCoherentMemoryTypes;
|
||||||
uint32_t _privateMemoryTypes;
|
uint32_t _privateMemoryTypes;
|
||||||
uint32_t _lazilyAllocatedMemoryTypes;
|
uint32_t _lazilyAllocatedMemoryTypes;
|
||||||
|
bool _hasUnifiedMemory = true;
|
||||||
|
bool _isAppleGPU = true;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -887,6 +890,8 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
friend class MVKDeviceTrackingMixin;
|
||||||
|
|
||||||
void propagateDebugName() override {}
|
void propagateDebugName() override {}
|
||||||
MVKBuffer* addBuffer(MVKBuffer* mvkBuff);
|
MVKBuffer* addBuffer(MVKBuffer* mvkBuff);
|
||||||
MVKBuffer* removeBuffer(MVKBuffer* mvkBuff);
|
MVKBuffer* removeBuffer(MVKBuffer* mvkBuff);
|
||||||
@ -956,6 +961,12 @@ public:
|
|||||||
/** Returns the underlying Metal device. */
|
/** Returns the underlying Metal device. */
|
||||||
id<MTLDevice> getMTLDevice() { return _device->getMTLDevice(); }
|
id<MTLDevice> getMTLDevice() { return _device->getMTLDevice(); }
|
||||||
|
|
||||||
|
/** Returns whether the GPU is a unified memory device. */
|
||||||
|
bool isUnifiedMemoryGPU() { return getPhysicalDevice()->_hasUnifiedMemory; }
|
||||||
|
|
||||||
|
/** Returns whether the GPU is Apple Silicon. */
|
||||||
|
bool isAppleGPU() { return getPhysicalDevice()->_isAppleGPU; }
|
||||||
|
|
||||||
/** Returns info about the pixel format supported by the physical device. */
|
/** Returns info about the pixel format supported by the physical device. */
|
||||||
MVKPixelFormats* getPixelFormats() { return _device->getPixelFormats(); }
|
MVKPixelFormats* getPixelFormats() { return _device->getPixelFormats(); }
|
||||||
|
|
||||||
|
@ -1765,9 +1765,7 @@ VkResult MVKPhysicalDevice::getQueueFamilyProperties(uint32_t* pCount,
|
|||||||
// wild temporary changes, particularly during initial queries before much GPU activity has occurred.
|
// wild temporary changes, particularly during initial queries before much GPU activity has occurred.
|
||||||
// On Apple GPUs, CPU & GPU timestamps are the same, and timestamp period never changes.
|
// On Apple GPUs, CPU & GPU timestamps are the same, and timestamp period never changes.
|
||||||
void MVKPhysicalDevice::updateTimestampPeriod() {
|
void MVKPhysicalDevice::updateTimestampPeriod() {
|
||||||
if (_properties.vendorID != kAppleVendorId &&
|
if ( !_isAppleGPU && [_mtlDevice respondsToSelector: @selector(sampleTimestamps:gpuTimestamp:)]) {
|
||||||
[_mtlDevice respondsToSelector: @selector(sampleTimestamps:gpuTimestamp:)]) {
|
|
||||||
|
|
||||||
MTLTimestamp earlierCPUTs = _prevCPUTimestamp;
|
MTLTimestamp earlierCPUTs = _prevCPUTimestamp;
|
||||||
MTLTimestamp earlierGPUTs = _prevGPUTimestamp;
|
MTLTimestamp earlierGPUTs = _prevGPUTimestamp;
|
||||||
[_mtlDevice sampleTimestamps: &_prevCPUTimestamp gpuTimestamp: &_prevGPUTimestamp];
|
[_mtlDevice sampleTimestamps: &_prevCPUTimestamp gpuTimestamp: &_prevGPUTimestamp];
|
||||||
@ -1804,7 +1802,7 @@ VkResult MVKPhysicalDevice::getMemoryProperties(VkPhysicalDeviceMemoryProperties
|
|||||||
auto* budgetProps = (VkPhysicalDeviceMemoryBudgetPropertiesEXT*)next;
|
auto* budgetProps = (VkPhysicalDeviceMemoryBudgetPropertiesEXT*)next;
|
||||||
mvkClear(budgetProps->heapBudget, VK_MAX_MEMORY_HEAPS);
|
mvkClear(budgetProps->heapBudget, VK_MAX_MEMORY_HEAPS);
|
||||||
mvkClear(budgetProps->heapUsage, VK_MAX_MEMORY_HEAPS);
|
mvkClear(budgetProps->heapUsage, VK_MAX_MEMORY_HEAPS);
|
||||||
if (!getHasUnifiedMemory()) {
|
if ( !_hasUnifiedMemory ) {
|
||||||
budgetProps->heapBudget[1] = (VkDeviceSize)mvkGetAvailableMemorySize();
|
budgetProps->heapBudget[1] = (VkDeviceSize)mvkGetAvailableMemorySize();
|
||||||
budgetProps->heapUsage[1] = (VkDeviceSize)mvkGetUsedMemorySize();
|
budgetProps->heapUsage[1] = (VkDeviceSize)mvkGetUsedMemorySize();
|
||||||
}
|
}
|
||||||
@ -1833,11 +1831,11 @@ MVKPhysicalDevice::MVKPhysicalDevice(MVKInstance* mvkInstance, id<MTLDevice> mtl
|
|||||||
_supportedExtensions(this, true),
|
_supportedExtensions(this, true),
|
||||||
_pixelFormats(this) { // Set after _mtlDevice
|
_pixelFormats(this) { // Set after _mtlDevice
|
||||||
|
|
||||||
initMTLDevice();
|
initMTLDevice(); // Call first.
|
||||||
initProperties(); // Call first.
|
initProperties(); // Call second.
|
||||||
initMetalFeatures(); // Call second.
|
initMetalFeatures(); // Call third.
|
||||||
initFeatures(); // Call third.
|
initFeatures(); // Call fourth.
|
||||||
initLimits(); // Call fourth.
|
initLimits(); // Call fifth.
|
||||||
initExtensions();
|
initExtensions();
|
||||||
initMemoryProperties();
|
initMemoryProperties();
|
||||||
initExternalMemoryProperties();
|
initExternalMemoryProperties();
|
||||||
@ -1847,12 +1845,21 @@ MVKPhysicalDevice::MVKPhysicalDevice(MVKInstance* mvkInstance, id<MTLDevice> mtl
|
|||||||
}
|
}
|
||||||
|
|
||||||
void MVKPhysicalDevice::initMTLDevice() {
|
void MVKPhysicalDevice::initMTLDevice() {
|
||||||
#if MVK_XCODE_14_3 && MVK_MACOS && !MVK_MACCAT
|
#if MVK_MACOS
|
||||||
|
_isAppleGPU = supportsMTLGPUFamily(Apple1);
|
||||||
|
|
||||||
|
// Apple Silicon will respond false to isLowPower, but never hits it.
|
||||||
|
_hasUnifiedMemory = ([_mtlDevice respondsToSelector: @selector(hasUnifiedMemory)]
|
||||||
|
? _mtlDevice.hasUnifiedMemory : _mtlDevice.isLowPower);
|
||||||
|
|
||||||
|
#if MVK_XCODE_14_3 && !MVK_MACCAT
|
||||||
if ([_mtlDevice respondsToSelector: @selector(setShouldMaximizeConcurrentCompilation:)]) {
|
if ([_mtlDevice respondsToSelector: @selector(setShouldMaximizeConcurrentCompilation:)]) {
|
||||||
[_mtlDevice setShouldMaximizeConcurrentCompilation: getMVKConfig().shouldMaximizeConcurrentCompilation];
|
[_mtlDevice setShouldMaximizeConcurrentCompilation: getMVKConfig().shouldMaximizeConcurrentCompilation];
|
||||||
MVKLogInfoIf(getMVKConfig().debugMode, "maximumConcurrentCompilationTaskCount %lu", _mtlDevice.maximumConcurrentCompilationTaskCount);
|
MVKLogInfoIf(getMVKConfig().debugMode, "maximumConcurrentCompilationTaskCount %lu", _mtlDevice.maximumConcurrentCompilationTaskCount);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#endif // MVK_MACOS
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initializes the physical device properties (except limits).
|
// Initializes the physical device properties (except limits).
|
||||||
@ -2968,16 +2975,14 @@ static uint32_t mvkGetEntryProperty(io_registry_entry_t entry, CFStringRef prope
|
|||||||
}
|
}
|
||||||
|
|
||||||
void MVKPhysicalDevice::initGPUInfoProperties() {
|
void MVKPhysicalDevice::initGPUInfoProperties() {
|
||||||
|
_properties.deviceType = _hasUnifiedMemory ? VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU : VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU;
|
||||||
bool isIntegrated = getHasUnifiedMemory();
|
|
||||||
_properties.deviceType = isIntegrated ? VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU : VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU;
|
|
||||||
strlcpy(_properties.deviceName, _mtlDevice.name.UTF8String, VK_MAX_PHYSICAL_DEVICE_NAME_SIZE);
|
strlcpy(_properties.deviceName, _mtlDevice.name.UTF8String, VK_MAX_PHYSICAL_DEVICE_NAME_SIZE);
|
||||||
|
|
||||||
// For Apple Silicon, the Device ID is determined by the highest
|
// For Apple Silicon, the Device ID is determined by the highest
|
||||||
// GPU capability, which is a combination of OS version and GPU type.
|
// GPU capability, which is a combination of OS version and GPU type.
|
||||||
// We determine Apple Silicon directly from the GPU, instead
|
// We determine Apple Silicon directly from the GPU, instead
|
||||||
// of from the build, in case we are running Rosetta2.
|
// of from the build, in case we are running Rosetta2.
|
||||||
if (supportsMTLGPUFamily(Apple1)) {
|
if (_isAppleGPU) {
|
||||||
_properties.vendorID = kAppleVendorId;
|
_properties.vendorID = kAppleVendorId;
|
||||||
_properties.deviceID = getHighestGPUCapability();
|
_properties.deviceID = getHighestGPUCapability();
|
||||||
return;
|
return;
|
||||||
@ -3012,9 +3017,9 @@ void MVKPhysicalDevice::initGPUInfoProperties() {
|
|||||||
if (mvkGetEntryProperty(entry, CFSTR("class-code")) == 0x30000) { // 0x30000 : DISPLAY_VGA
|
if (mvkGetEntryProperty(entry, CFSTR("class-code")) == 0x30000) { // 0x30000 : DISPLAY_VGA
|
||||||
|
|
||||||
// The Intel GPU will always be marked as integrated.
|
// The Intel GPU will always be marked as integrated.
|
||||||
// Return on a match of either Intel && low power, or non-Intel and non-low-power.
|
// Return on a match of either Intel && unified memory, or non-Intel and non-unified memory.
|
||||||
uint32_t vendorID = mvkGetEntryProperty(entry, CFSTR("vendor-id"));
|
uint32_t vendorID = mvkGetEntryProperty(entry, CFSTR("vendor-id"));
|
||||||
if ( (vendorID == kIntelVendorId) == isIntegrated) {
|
if ( (vendorID == kIntelVendorId) == _hasUnifiedMemory) {
|
||||||
isFound = true;
|
isFound = true;
|
||||||
_properties.vendorID = vendorID;
|
_properties.vendorID = vendorID;
|
||||||
_properties.deviceID = mvkGetEntryProperty(entry, CFSTR("device-id"));
|
_properties.deviceID = mvkGetEntryProperty(entry, CFSTR("device-id"));
|
||||||
@ -3168,7 +3173,7 @@ void MVKPhysicalDevice::initMemoryProperties() {
|
|||||||
// Optional second heap for shared memory
|
// Optional second heap for shared memory
|
||||||
uint32_t sharedHeapIdx;
|
uint32_t sharedHeapIdx;
|
||||||
VkMemoryPropertyFlags sharedTypePropFlags;
|
VkMemoryPropertyFlags sharedTypePropFlags;
|
||||||
if (getHasUnifiedMemory()) {
|
if (_hasUnifiedMemory) {
|
||||||
// Shared memory goes in the single main heap in unified memory, and per Vulkan spec must be marked local
|
// Shared memory goes in the single main heap in unified memory, and per Vulkan spec must be marked local
|
||||||
sharedHeapIdx = mainHeapIdx;
|
sharedHeapIdx = mainHeapIdx;
|
||||||
sharedTypePropFlags = MVK_VK_MEMORY_TYPE_METAL_SHARED | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
|
sharedTypePropFlags = MVK_VK_MEMORY_TYPE_METAL_SHARED | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
|
||||||
@ -3194,12 +3199,14 @@ void MVKPhysicalDevice::initMemoryProperties() {
|
|||||||
setMemoryType(typeIdx, sharedHeapIdx, sharedTypePropFlags);
|
setMemoryType(typeIdx, sharedHeapIdx, sharedTypePropFlags);
|
||||||
typeIdx++;
|
typeIdx++;
|
||||||
|
|
||||||
// Managed storage
|
// Managed storage. On all Apple Silicon, use Shared instead.
|
||||||
uint32_t managedBit = 0;
|
uint32_t managedBit = 0;
|
||||||
#if MVK_MACOS
|
#if MVK_MACOS
|
||||||
managedBit = 1 << typeIdx;
|
if ( !_isAppleGPU ) {
|
||||||
setMemoryType(typeIdx, mainHeapIdx, MVK_VK_MEMORY_TYPE_METAL_MANAGED);
|
managedBit = 1 << typeIdx;
|
||||||
typeIdx++;
|
setMemoryType(typeIdx, mainHeapIdx, MVK_VK_MEMORY_TYPE_METAL_MANAGED);
|
||||||
|
typeIdx++;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Memoryless storage
|
// Memoryless storage
|
||||||
@ -3235,17 +3242,33 @@ void MVKPhysicalDevice::initMemoryProperties() {
|
|||||||
_allMemoryTypes = privateBit | sharedBit | managedBit | memlessBit;
|
_allMemoryTypes = privateBit | sharedBit | managedBit | memlessBit;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MVKPhysicalDevice::getHasUnifiedMemory() {
|
MVK_PUBLIC_SYMBOL MTLStorageMode MVKPhysicalDevice::getMTLStorageModeFromVkMemoryPropertyFlags(VkMemoryPropertyFlags vkFlags) {
|
||||||
|
|
||||||
|
// If not visible to the host, use Private, or Memoryless if available and lazily allocated.
|
||||||
|
if ( !mvkAreAllFlagsEnabled(vkFlags, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) ) {
|
||||||
|
#if MVK_APPLE_SILICON
|
||||||
|
if (mvkAreAllFlagsEnabled(vkFlags, VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT)) {
|
||||||
|
return MTLStorageModeMemoryless;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return MTLStorageModePrivate;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If visible to the host and coherent: Shared
|
||||||
|
if (mvkAreAllFlagsEnabled(vkFlags, VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
|
||||||
|
return MTLStorageModeShared;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If visible to the host, but not coherent: Shared on Apple Silicon, Managed on other GPUs.
|
||||||
#if MVK_MACOS
|
#if MVK_MACOS
|
||||||
return ([_mtlDevice respondsToSelector: @selector(hasUnifiedMemory)]
|
return _isAppleGPU ? MTLStorageModeShared : MTLStorageModeManaged;
|
||||||
? _mtlDevice.hasUnifiedMemory : _mtlDevice.isLowPower);
|
|
||||||
#else
|
#else
|
||||||
return true;
|
return MTLStorageModeShared;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t MVKPhysicalDevice::getVRAMSize() {
|
uint64_t MVKPhysicalDevice::getVRAMSize() {
|
||||||
if (getHasUnifiedMemory()) {
|
if (_hasUnifiedMemory) {
|
||||||
return mvkGetSystemMemorySize();
|
return mvkGetSystemMemorySize();
|
||||||
} else {
|
} else {
|
||||||
// There's actually no way to query the total physical VRAM on the device in Metal.
|
// There's actually no way to query the total physical VRAM on the device in Metal.
|
||||||
@ -3408,7 +3431,7 @@ void MVKPhysicalDevice::initVkSemaphoreStyle() {
|
|||||||
switch (getMVKConfig().semaphoreSupportStyle) {
|
switch (getMVKConfig().semaphoreSupportStyle) {
|
||||||
case MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_METAL_EVENTS_WHERE_SAFE: {
|
case MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_METAL_EVENTS_WHERE_SAFE: {
|
||||||
bool isNVIDIA = _properties.vendorID == kNVVendorId;
|
bool isNVIDIA = _properties.vendorID == kNVVendorId;
|
||||||
bool isRosetta2 = _properties.vendorID == kAppleVendorId && !MVK_APPLE_SILICON;
|
bool isRosetta2 = _isAppleGPU && !MVK_APPLE_SILICON;
|
||||||
if (_metalFeatures.events && !(isRosetta2 || isNVIDIA)) { _vkSemaphoreStyle = MVKSemaphoreStyleUseMTLEvent; }
|
if (_metalFeatures.events && !(isRosetta2 || isNVIDIA)) { _vkSemaphoreStyle = MVKSemaphoreStyleUseMTLEvent; }
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -84,7 +84,7 @@ VkResult MVKDeviceMemory::flushToDevice(VkDeviceSize offset, VkDeviceSize size)
|
|||||||
if (memSize == 0 || !isMemoryHostAccessible()) { return VK_SUCCESS; }
|
if (memSize == 0 || !isMemoryHostAccessible()) { return VK_SUCCESS; }
|
||||||
|
|
||||||
#if MVK_MACOS
|
#if MVK_MACOS
|
||||||
if (_mtlBuffer && _mtlStorageMode == MTLStorageModeManaged) {
|
if ( !isUnifiedMemoryGPU() && _mtlBuffer && _mtlStorageMode == MTLStorageModeManaged) {
|
||||||
[_mtlBuffer didModifyRange: NSMakeRange(offset, memSize)];
|
[_mtlBuffer didModifyRange: NSMakeRange(offset, memSize)];
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@ -106,7 +106,7 @@ VkResult MVKDeviceMemory::pullFromDevice(VkDeviceSize offset,
|
|||||||
if (memSize == 0 || !isMemoryHostAccessible()) { return VK_SUCCESS; }
|
if (memSize == 0 || !isMemoryHostAccessible()) { return VK_SUCCESS; }
|
||||||
|
|
||||||
#if MVK_MACOS
|
#if MVK_MACOS
|
||||||
if (pBlitEnc && _mtlBuffer && _mtlStorageMode == MTLStorageModeManaged) {
|
if ( !isUnifiedMemoryGPU() && pBlitEnc && _mtlBuffer && _mtlStorageMode == MTLStorageModeManaged) {
|
||||||
if ( !pBlitEnc->mtlCmdBuffer) { pBlitEnc->mtlCmdBuffer = _device->getAnyQueue()->getMTLCommandBuffer(kMVKCommandUseInvalidateMappedMemoryRanges); }
|
if ( !pBlitEnc->mtlCmdBuffer) { pBlitEnc->mtlCmdBuffer = _device->getAnyQueue()->getMTLCommandBuffer(kMVKCommandUseInvalidateMappedMemoryRanges); }
|
||||||
if ( !pBlitEnc->mtlBlitEncoder) { pBlitEnc->mtlBlitEncoder = [pBlitEnc->mtlCmdBuffer blitCommandEncoder]; }
|
if ( !pBlitEnc->mtlBlitEncoder) { pBlitEnc->mtlBlitEncoder = [pBlitEnc->mtlCmdBuffer blitCommandEncoder]; }
|
||||||
[pBlitEnc->mtlBlitEncoder synchronizeResource: _mtlBuffer];
|
[pBlitEnc->mtlBlitEncoder synchronizeResource: _mtlBuffer];
|
||||||
@ -285,7 +285,7 @@ MVKDeviceMemory::MVKDeviceMemory(MVKDevice* device,
|
|||||||
// Set Metal memory parameters
|
// Set Metal memory parameters
|
||||||
_vkMemAllocFlags = 0;
|
_vkMemAllocFlags = 0;
|
||||||
_vkMemPropFlags = _device->_pMemoryProperties->memoryTypes[pAllocateInfo->memoryTypeIndex].propertyFlags;
|
_vkMemPropFlags = _device->_pMemoryProperties->memoryTypes[pAllocateInfo->memoryTypeIndex].propertyFlags;
|
||||||
_mtlStorageMode = mvkMTLStorageModeFromVkMemoryPropertyFlags(_vkMemPropFlags);
|
_mtlStorageMode = getPhysicalDevice()->getMTLStorageModeFromVkMemoryPropertyFlags(_vkMemPropFlags);
|
||||||
_mtlCPUCacheMode = mvkMTLCPUCacheModeFromVkMemoryPropertyFlags(_vkMemPropFlags);
|
_mtlCPUCacheMode = mvkMTLCPUCacheModeFromVkMemoryPropertyFlags(_vkMemPropFlags);
|
||||||
|
|
||||||
_allocationSize = pAllocateInfo->allocationSize;
|
_allocationSize = pAllocateInfo->allocationSize;
|
||||||
|
@ -469,18 +469,18 @@ void MVKImageMemoryBinding::propagateDebugName() {
|
|||||||
// texture and host memory for the purpose of the host reading texture memory.
|
// texture and host memory for the purpose of the host reading texture memory.
|
||||||
bool MVKImageMemoryBinding::needsHostReadSync(MVKPipelineBarrier& barrier) {
|
bool MVKImageMemoryBinding::needsHostReadSync(MVKPipelineBarrier& barrier) {
|
||||||
#if MVK_MACOS
|
#if MVK_MACOS
|
||||||
return ((barrier.newLayout == VK_IMAGE_LAYOUT_GENERAL) &&
|
return ( !isUnifiedMemoryGPU() && (barrier.newLayout == VK_IMAGE_LAYOUT_GENERAL) &&
|
||||||
mvkIsAnyFlagEnabled(barrier.dstAccessMask, (VK_ACCESS_HOST_READ_BIT | VK_ACCESS_MEMORY_READ_BIT)) &&
|
mvkIsAnyFlagEnabled(barrier.dstAccessMask, (VK_ACCESS_HOST_READ_BIT | VK_ACCESS_MEMORY_READ_BIT)) &&
|
||||||
isMemoryHostAccessible() && (!_device->_pMetalFeatures->sharedLinearTextures || !isMemoryHostCoherent()));
|
isMemoryHostAccessible() && (!_device->_pMetalFeatures->sharedLinearTextures || !isMemoryHostCoherent()));
|
||||||
#else
|
#else
|
||||||
return false;
|
return false;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MVKImageMemoryBinding::shouldFlushHostMemory() { return isMemoryHostAccessible() && (!_mtlTexelBuffer || _ownsTexelBuffer); }
|
bool MVKImageMemoryBinding::shouldFlushHostMemory() { return isMemoryHostAccessible() && (!_mtlTexelBuffer || _ownsTexelBuffer); }
|
||||||
|
|
||||||
// Flushes the device memory at the specified memory range into the MTLTexture. Updates
|
// Flushes the memory at the specified memory range into the MTLTexture.
|
||||||
// all subresources that overlap the specified range and are in an updatable layout state.
|
// Updates all subresources that overlap the specified range and are in an updatable layout state.
|
||||||
VkResult MVKImageMemoryBinding::flushToDevice(VkDeviceSize offset, VkDeviceSize size) {
|
VkResult MVKImageMemoryBinding::flushToDevice(VkDeviceSize offset, VkDeviceSize size) {
|
||||||
if (shouldFlushHostMemory()) {
|
if (shouldFlushHostMemory()) {
|
||||||
for(uint8_t planeIndex = beginPlaneIndex(); planeIndex < endPlaneIndex(); planeIndex++) {
|
for(uint8_t planeIndex = beginPlaneIndex(); planeIndex < endPlaneIndex(); planeIndex++) {
|
||||||
@ -501,7 +501,7 @@ VkResult MVKImageMemoryBinding::flushToDevice(VkDeviceSize offset, VkDeviceSize
|
|||||||
return VK_SUCCESS;
|
return VK_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Pulls content from the MTLTexture into the device memory at the specified memory range.
|
// Pulls content from the MTLTexture into memory at the specified memory range.
|
||||||
// Pulls from all subresources that overlap the specified range and are in an updatable layout state.
|
// Pulls from all subresources that overlap the specified range and are in an updatable layout state.
|
||||||
VkResult MVKImageMemoryBinding::pullFromDevice(VkDeviceSize offset, VkDeviceSize size) {
|
VkResult MVKImageMemoryBinding::pullFromDevice(VkDeviceSize offset, VkDeviceSize size) {
|
||||||
if (shouldFlushHostMemory()) {
|
if (shouldFlushHostMemory()) {
|
||||||
@ -715,7 +715,7 @@ VkResult MVKImage::copyImageToMemory(const VkCopyImageToMemoryInfoEXT* pCopyImag
|
|||||||
#if MVK_MACOS
|
#if MVK_MACOS
|
||||||
// On macOS, if the device doesn't have unified memory, and the texture is using managed memory, we need
|
// On macOS, if the device doesn't have unified memory, and the texture is using managed memory, we need
|
||||||
// to sync the managed memory from the GPU, so the texture content is accessible to be copied by the CPU.
|
// to sync the managed memory from the GPU, so the texture content is accessible to be copied by the CPU.
|
||||||
if ( !getPhysicalDevice()->getHasUnifiedMemory() && getMTLStorageMode() == MTLStorageModeManaged ) {
|
if ( !isUnifiedMemoryGPU() && getMTLStorageMode() == MTLStorageModeManaged ) {
|
||||||
@autoreleasepool {
|
@autoreleasepool {
|
||||||
id<MTLCommandBuffer> mtlCmdBuff = getDevice()->getAnyQueue()->getMTLCommandBuffer(kMVKCommandUseCopyImageToMemory);
|
id<MTLCommandBuffer> mtlCmdBuff = getDevice()->getAnyQueue()->getMTLCommandBuffer(kMVKCommandUseCopyImageToMemory);
|
||||||
id<MTLBlitCommandEncoder> mtlBlitEnc = [mtlCmdBuff blitCommandEncoder];
|
id<MTLBlitCommandEncoder> mtlBlitEnc = [mtlCmdBuff blitCommandEncoder];
|
||||||
@ -858,9 +858,9 @@ VkResult MVKImage::getMemoryRequirements(VkMemoryRequirements* pMemoryRequiremen
|
|||||||
pMemoryRequirements->memoryTypeBits = (_isDepthStencilAttachment)
|
pMemoryRequirements->memoryTypeBits = (_isDepthStencilAttachment)
|
||||||
? mvkPD->getPrivateMemoryTypes()
|
? mvkPD->getPrivateMemoryTypes()
|
||||||
: mvkPD->getAllMemoryTypes();
|
: mvkPD->getAllMemoryTypes();
|
||||||
|
// Metal on non-Apple GPUs does not provide native support for host-coherent memory, but Vulkan requires it for Linear images
|
||||||
#if MVK_MACOS
|
#if MVK_MACOS
|
||||||
// Metal on macOS does not provide native support for host-coherent memory, but Vulkan requires it for Linear images
|
if ( !isAppleGPU() && !_isLinear ) {
|
||||||
if ( !_isLinear ) {
|
|
||||||
mvkDisableFlags(pMemoryRequirements->memoryTypeBits, mvkPD->getHostCoherentMemoryTypes());
|
mvkDisableFlags(pMemoryRequirements->memoryTypeBits, mvkPD->getHostCoherentMemoryTypes());
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@ -1052,6 +1052,7 @@ MTLStorageMode MVKImage::getMTLStorageMode() {
|
|||||||
|
|
||||||
#if MVK_MACOS
|
#if MVK_MACOS
|
||||||
// For macOS prior to 10.15.5, textures cannot use Shared storage mode, so change to Managed storage mode.
|
// For macOS prior to 10.15.5, textures cannot use Shared storage mode, so change to Managed storage mode.
|
||||||
|
// All Apple GPUs support shared linear textures, so this only applies to other GPUs.
|
||||||
if (stgMode == MTLStorageModeShared && !_device->_pMetalFeatures->sharedLinearTextures) {
|
if (stgMode == MTLStorageModeShared && !_device->_pMetalFeatures->sharedLinearTextures) {
|
||||||
stgMode = MTLStorageModeManaged;
|
stgMode = MTLStorageModeManaged;
|
||||||
}
|
}
|
||||||
|
@ -882,31 +882,6 @@ MVK_PUBLIC_SYMBOL CGRect mvkCGRectFromVkRectLayerKHR(VkRectLayerKHR vkRect) {
|
|||||||
#pragma mark -
|
#pragma mark -
|
||||||
#pragma mark Memory options
|
#pragma mark Memory options
|
||||||
|
|
||||||
MVK_PUBLIC_SYMBOL MTLStorageMode mvkMTLStorageModeFromVkMemoryPropertyFlags(VkMemoryPropertyFlags vkFlags) {
|
|
||||||
|
|
||||||
// If not visible to the host, use Private, or Memoryless if available and lazily allocated.
|
|
||||||
if ( !mvkAreAllFlagsEnabled(vkFlags, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) ) {
|
|
||||||
#if MVK_APPLE_SILICON
|
|
||||||
if (mvkAreAllFlagsEnabled(vkFlags, VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT)) {
|
|
||||||
return MTLStorageModeMemoryless;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
return MTLStorageModePrivate;
|
|
||||||
}
|
|
||||||
|
|
||||||
// If visible to the host and coherent: Shared
|
|
||||||
if (mvkAreAllFlagsEnabled(vkFlags, VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
|
|
||||||
return MTLStorageModeShared;
|
|
||||||
}
|
|
||||||
|
|
||||||
// If visible to the host, and not coherent: Managed on macOS, Shared on iOS
|
|
||||||
#if MVK_MACOS
|
|
||||||
return MTLStorageModeManaged;
|
|
||||||
#else
|
|
||||||
return MTLStorageModeShared;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
MVK_PUBLIC_SYMBOL MTLCPUCacheMode mvkMTLCPUCacheModeFromVkMemoryPropertyFlags(VkMemoryPropertyFlags vkFlags) {
|
MVK_PUBLIC_SYMBOL MTLCPUCacheMode mvkMTLCPUCacheModeFromVkMemoryPropertyFlags(VkMemoryPropertyFlags vkFlags) {
|
||||||
return MTLCPUCacheModeDefaultCache;
|
return MTLCPUCacheModeDefaultCache;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user