MVKDevice: Report a second heap for non-UMA devices.

This is the heap representing system memory. The memory type
corresponding to `MTLStorageModeShared` is reported as belonging to this
heap.

I've tried to correct the reported size of the heaps as well while I'm
at it. UMA GPUs and the system heap for NUMA GPUs use the system memory
size as the heap size, and the amount of available memory as the budget.
iOS 13 uses the new `os_proc_available_memory()` API for this purpose.
NUMA GPUs additionally use the memory used by the process as a crude
measure of system heap usage.
This commit is contained in:
Chip Davis 2019-08-08 14:53:19 -05:00
parent 69b238f95e
commit 92f0bec6e2
4 changed files with 114 additions and 19 deletions

View File

@ -111,3 +111,17 @@ bool mvkGetEnvVarBool(std::string varName, bool* pWasFound = nullptr);
int64_t val = wasFound ? ev : EV; \ int64_t val = wasFound ? ev : EV; \
cfgVal = (int32_t)std::min(std::max(val, (int64_t)INT32_MIN), (int64_t)INT32_MAX); \ cfgVal = (int32_t)std::min(std::max(val, (int64_t)INT32_MIN), (int64_t)INT32_MAX); \
} while(false) } while(false)
#pragma mark -
#pragma mark System memory
/** Returns the total amount of physical RAM in the system. */
uint64_t mvkGetSystemMemorySize();
/** Returns the amount of memory available to this process. */
uint64_t mvkGetAvailableMemorySize();
/** Returns the amount of memory currently used by this process. */
uint64_t mvkGetUsedMemorySize();

View File

@ -18,7 +18,9 @@
#include "MVKOSExtensions.h" #include "MVKOSExtensions.h"
#include <mach/mach_host.h>
#include <mach/mach_time.h> #include <mach/mach_time.h>
#include <mach/task.h>
#import <Foundation/Foundation.h> #import <Foundation/Foundation.h>
@ -91,3 +93,43 @@ int64_t mvkGetEnvVarInt64(string varName, bool* pWasFound) {
bool mvkGetEnvVarBool(std::string varName, bool* pWasFound) { bool mvkGetEnvVarBool(std::string varName, bool* pWasFound) {
return mvkGetEnvVarInt64(varName, pWasFound) != 0; return mvkGetEnvVarInt64(varName, pWasFound) != 0;
} }
#pragma mark -
#pragma mark System memory
uint64_t mvkGetSystemMemorySize() {
mach_msg_type_number_t host_size = HOST_BASIC_INFO_COUNT;
host_basic_info_data_t info;
if (host_info(mach_host_self(), HOST_BASIC_INFO, (host_info_t)&info, &host_size) == KERN_SUCCESS) {
return info.max_mem;
}
return 0;
}
uint64_t mvkGetAvailableMemorySize() {
#if MVK_IOS
if (mvkOSVersion() >= 13.0) { return os_proc_available_memory(); }
#endif
mach_port_t host_port;
mach_msg_type_number_t host_size;
vm_size_t pagesize;
host_port = mach_host_self();
host_size = HOST_VM_INFO_COUNT;
host_page_size(host_port, &pagesize);
vm_statistics_data_t vm_stat;
if (host_statistics(host_port, HOST_VM_INFO, (host_info_t)&vm_stat, &host_size) == KERN_SUCCESS ) {
return vm_stat.free_count * pagesize;
}
return 0;
}
uint64_t mvkGetUsedMemorySize() {
task_vm_info_data_t task_vm_info;
mach_msg_type_number_t task_size = TASK_VM_INFO_COUNT;
if (task_info(mach_task_self(), TASK_VM_INFO, (task_info_t)&task_vm_info, &task_size) == KERN_SUCCESS) {
return task_vm_info.phys_footprint;
}
return 0;
}

View File

@ -278,6 +278,9 @@ public:
*/ */
inline uint32_t getLazilyAllocatedMemoryTypes() { return _lazilyAllocatedMemoryTypes; } inline uint32_t getLazilyAllocatedMemoryTypes() { return _lazilyAllocatedMemoryTypes; }
/** Returns whether this is a unified memory device. */
bool getHasUnifiedMemory();
#pragma mark Metal #pragma mark Metal
@ -330,7 +333,9 @@ protected:
void initProperties(); void initProperties();
void initGPUInfoProperties(); void initGPUInfoProperties();
void initMemoryProperties(); void initMemoryProperties();
uint64_t getVRAMSize();
uint64_t getRecommendedMaxWorkingSetSize(); uint64_t getRecommendedMaxWorkingSetSize();
uint64_t getCurrentAllocatedSize();
void initExtensions(); void initExtensions();
MVKExtensionList* getSupportedExtensions(const char* pLayerName = nullptr); MVKExtensionList* getSupportedExtensions(const char* pLayerName = nullptr);
std::vector<MVKQueueFamily*>& getQueueFamilies(); std::vector<MVKQueueFamily*>& getQueueFamilies();

View File

@ -36,7 +36,6 @@
#include "MVKOSExtensions.h" #include "MVKOSExtensions.h"
#include <MoltenVKSPIRVToMSLConverter/SPIRVToMSLConverter.h> #include <MoltenVKSPIRVToMSLConverter/SPIRVToMSLConverter.h>
#include "vk_mvk_moltenvk.h" #include "vk_mvk_moltenvk.h"
#include <mach/mach_host.h>
#import "CAMetalLayer+MoltenVK.h" #import "CAMetalLayer+MoltenVK.h"
@ -728,22 +727,21 @@ VkResult MVKPhysicalDevice::getPhysicalDeviceMemoryProperties(VkPhysicalDeviceMe
if (pMemoryProperties) { if (pMemoryProperties) {
pMemoryProperties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PROPERTIES_2; pMemoryProperties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PROPERTIES_2;
pMemoryProperties->memoryProperties = _memoryProperties; pMemoryProperties->memoryProperties = _memoryProperties;
auto* next = (MVKVkAPIStructHeader*)pMemoryProperties->pNext; for (auto* next = (VkBaseOutStructure*)pMemoryProperties->pNext; next; next = next->pNext) {
while (next) { switch (next->sType) {
switch ((uint32_t)next->sType) {
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT: { case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT: {
auto* budgetProps = (VkPhysicalDeviceMemoryBudgetPropertiesEXT*)next; auto* budgetProps = (VkPhysicalDeviceMemoryBudgetPropertiesEXT*)next;
memset(budgetProps->heapBudget, 0, sizeof(budgetProps->heapBudget)); memset(budgetProps->heapBudget, 0, sizeof(budgetProps->heapBudget));
memset(budgetProps->heapUsage, 0, sizeof(budgetProps->heapUsage)); memset(budgetProps->heapUsage, 0, sizeof(budgetProps->heapUsage));
budgetProps->heapBudget[0] = (VkDeviceSize)getRecommendedMaxWorkingSetSize(); budgetProps->heapBudget[0] = (VkDeviceSize)getRecommendedMaxWorkingSetSize();
if ( [_mtlDevice respondsToSelector: @selector(currentAllocatedSize)] ) { budgetProps->heapUsage[0] = (VkDeviceSize)getCurrentAllocatedSize();
budgetProps->heapUsage[0] = (VkDeviceSize)_mtlDevice.currentAllocatedSize; if (!getHasUnifiedMemory()) {
budgetProps->heapBudget[1] = (VkDeviceSize)mvkGetAvailableMemorySize();
budgetProps->heapUsage[1] = (VkDeviceSize)mvkGetUsedMemorySize();
} }
next = (MVKVkAPIStructHeader*)budgetProps->pNext;
break; break;
} }
default: default:
next = (MVKVkAPIStructHeader*)next->pNext;
break; break;
} }
} }
@ -1758,7 +1756,7 @@ void MVKPhysicalDevice::initMemoryProperties() {
.memoryHeaps = { .memoryHeaps = {
{ {
.flags = (VK_MEMORY_HEAP_DEVICE_LOCAL_BIT), .flags = (VK_MEMORY_HEAP_DEVICE_LOCAL_BIT),
.size = (VkDeviceSize)getRecommendedMaxWorkingSetSize(), .size = (VkDeviceSize)getVRAMSize(),
}, },
}, },
// NB this list needs to stay sorted by propertyFlags (as bit sets) // NB this list needs to stay sorted by propertyFlags (as bit sets)
@ -1808,6 +1806,37 @@ void MVKPhysicalDevice::initMemoryProperties() {
_allMemoryTypes = 0x7; // Private, shared & memoryless _allMemoryTypes = 0x7; // Private, shared & memoryless
} }
#endif #endif
#if MVK_MACOS
if (!getHasUnifiedMemory()) {
// This means we really have two heaps. The second heap is system memory.
_memoryProperties.memoryHeapCount = 2;
_memoryProperties.memoryHeaps[1].size = mvkGetSystemMemorySize();
_memoryProperties.memoryHeaps[1].flags = 0;
_memoryProperties.memoryTypes[2].heapIndex = 1; // Shared memory in the shared heap
}
#endif
}
bool MVKPhysicalDevice::getHasUnifiedMemory() {
#if MVK_IOS
return true;
#endif
#if MVK_MACOS
return [_mtlDevice respondsToSelector: @selector(hasUnifiedMemory)] && _mtlDevice.hasUnifiedMemory;
#endif
}
uint64_t MVKPhysicalDevice::getVRAMSize() {
#if MVK_IOS
// All iOS devices are UMA, so return the system memory size.
return mvkGetSystemMemorySize();
#endif
#if MVK_MACOS
if (getHasUnifiedMemory()) { return mvkGetSystemMemorySize(); }
// There's actually no way to query the total physical VRAM on the device in Metal.
// Just default to using the recommended max working set size (i.e. the budget).
return getRecommendedMaxWorkingSetSize();
#endif
} }
uint64_t MVKPhysicalDevice::getRecommendedMaxWorkingSetSize() { uint64_t MVKPhysicalDevice::getRecommendedMaxWorkingSetSize() {
@ -1818,21 +1847,26 @@ uint64_t MVKPhysicalDevice::getRecommendedMaxWorkingSetSize() {
#endif #endif
#if MVK_IOS #if MVK_IOS
// GPU and CPU use shared memory. Estimate the current free memory in the system. // GPU and CPU use shared memory. Estimate the current free memory in the system.
mach_port_t host_port; uint64_t freeMem = mvkGetAvailableMemorySize();
mach_msg_type_number_t host_size; if (freeMem) { return freeMem; }
vm_size_t pagesize;
host_port = mach_host_self();
host_size = sizeof(vm_statistics_data_t) / sizeof(integer_t);
host_page_size(host_port, &pagesize);
vm_statistics_data_t vm_stat;
if (host_statistics(host_port, HOST_VM_INFO, (host_info_t)&vm_stat, &host_size) == KERN_SUCCESS ) {
return vm_stat.free_count * pagesize;
}
#endif #endif
return 128 * MEBI; // Conservative minimum for macOS GPU's & iOS shared memory return 128 * MEBI; // Conservative minimum for macOS GPU's & iOS shared memory
} }
uint64_t MVKPhysicalDevice::getCurrentAllocatedSize() {
if ( [_mtlDevice respondsToSelector: @selector(currentAllocatedSize)] ) {
return _mtlDevice.currentAllocatedSize;
}
#if MVK_IOS
// We can use the current memory used by this process as a reasonable approximation.
return mvkGetUsedMemorySize();
#endif
#if MVK_MACOS
return 0;
#endif
}
void MVKPhysicalDevice::initExtensions() { void MVKPhysicalDevice::initExtensions() {
if (!_metalFeatures.postDepthCoverage) { if (!_metalFeatures.postDepthCoverage) {
_supportedExtensions.vk_EXT_post_depth_coverage.enabled = false; _supportedExtensions.vk_EXT_post_depth_coverage.enabled = false;