Cache MTLCommandQueues for reuse to handle long delays in creating new VkDevices and VkQueues.

Move MVKQueueFamily to MVKPhysicalDevice. Cache MTLCommandQueues in MVKQueueFamily and assign to MVKQueues as they are created. Refactor performance tracking. Replace vkGetShaderCompilationPerformanceMVK() with vkGetPerformanceStatisticsMVK() and associated structures. Add MTLCommandQueue access performance tracker. Update MoltenVK version to v1.0.6.
2018-05-06 23:28:21 -04:00 · 2018-05-06 23:28:21 -04:00 · 64bdbd995a
commit 64bdbd995a
parent 6088fa88f0
12 changed files with 199 additions and 169 deletions
--- a/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
+++ b/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
@ -48,7 +48,7 @@ extern "C" {
 */
 #define MVK_VERSION_MAJOR   1
 #define MVK_VERSION_MINOR   0
-#define MVK_VERSION_PATCH   5
+#define MVK_VERSION_PATCH   6

 #define MVK_MAKE_VERSION(major, minor, patch)    (((major) * 10000) + ((minor) * 100) + (patch))
 #define MVK_VERSION     MVK_MAKE_VERSION(MVK_VERSION_MAJOR, MVK_VERSION_MINOR, MVK_VERSION_PATCH)
@ -64,7 +64,7 @@ typedef struct {
    VkBool32 supportLargeQueryPools;        /**< Metal allows only 8192 occlusion queries per MTLBuffer. If enabled, MoltenVK allocates a MTLBuffer for each query pool, allowing each query pool to support 8192 queries, which may slow performance or cause unexpected behaviour if the query pool is not established prior to a Metal renderpass, or if the query pool is changed within a Metal renderpass. If disabled, one MTLBuffer will be shared by all query pools, which improves performance, but limits the total device queries to 8192. Default is false. */
 	VkBool32 presentWithCommandBuffer;      /**< If enabled, each surface presentation is scheduled using a command buffer. Enabling this may improve rendering frame synchronization, but may result in reduced frame rates. Default value is false if the MVK_PRESENT_WITHOUT_COMMAND_BUFFER build setting is defined when MoltenVK is compiled, and true otherwise. By default the MVK_PRESENT_WITHOUT_COMMAND_BUFFER build setting is not defined and the value of this setting is true. */
    VkBool32 displayWatermark;              /**< If enabled, a MoltenVK logo watermark will be rendered on top of the scene. This can be enabled for publicity during demos. Default value is true if the MVK_DISPLAY_WATERMARK build setting is defined when MoltenVK is compiled, and false otherwise. By default the MVK_DISPLAY_WATERMARK build setting is not defined. */
-    VkBool32 performanceTracking;           /**< If enabled, per-frame performance statistics are tracked, optionally logged, and can be retrieved via the vkGetSwapchainPerformanceMVK() function, and various shader compilation performance statistics are tracked, logged, and can be retrieved via the vkGetShaderCompilationPerformanceMVK() function. Default is false. */
+    VkBool32 performanceTracking;           /**< If enabled, per-frame performance statistics are tracked, optionally logged, and can be retrieved via the vkGetSwapchainPerformanceMVK() function, and various performance statistics are tracked, logged, and can be retrieved via the vkGetPerformanceStatisticsMVK() function. Default is false. */
    uint32_t performanceLoggingFrameCount;  /**< If non-zero, performance statistics will be periodically logged to the console, on a repeating cycle of this many frames per swapchain. The performanceTracking capability must also be enabled. Default is zero, indicating no logging. */
 } MVKDeviceConfiguration;

@ -96,30 +96,47 @@ typedef struct {
    double averageFramesPerSecond;      /**< The rolling average number of frames per second. This is simply the 1000 divided by the averageFrameInterval value. */
 } MVKSwapchainPerformance;

-/** MoltenVK performance of a particular type of shader compilation event. */
+/** MoltenVK performance of a particular type of activity. */
 typedef struct {
-    uint32_t count;             /**< The number of compilation events of this type. */
-    double averageDuration;     /**< The average duration of the compilation event, in milliseconds. */
-    double minimumDuration;     /**< The minimum duration of the compilation event, in milliseconds. */
-    double maximumDuration;     /**< The maximum duration of the compilation event, in milliseconds. */
-} MVKShaderCompilationEventPerformance;
+    uint32_t count;             /**< The number of activities of this type. */
+    double averageDuration;     /**< The average duration of the activity, in milliseconds. */
+    double minimumDuration;     /**< The minimum duration of the activity, in milliseconds. */
+    double maximumDuration;     /**< The maximum duration of the activity, in milliseconds. */
+} MVKPerformanceTracker;

-/** MoltenVK performance of shader compilation events for a VkDevice. */
+/** MoltenVK performance of shader compilation activities. */
 typedef struct {
-	MVKShaderCompilationEventPerformance hashShaderCode;            /** Create a hash from the incoming shader code. */
-    MVKShaderCompilationEventPerformance spirvToMSL;                /** Convert SPIR-V to MSL source code. */
-    MVKShaderCompilationEventPerformance mslCompile;                /** Compile MSL source code into a MTLLibrary. */
-    MVKShaderCompilationEventPerformance mslLoad;                   /** Load pre-compiled MSL code into a MTLLibrary. */
-	MVKShaderCompilationEventPerformance shaderLibraryFromCache;    /** Retrieve a shader library from the cache, lazily creating it if needed. */
-    MVKShaderCompilationEventPerformance functionRetrieval;         /** Retrieve a MTLFunction from a MTLLibrary. */
-    MVKShaderCompilationEventPerformance functionSpecialization;    /** Specialize a retrieved MTLFunction. */
-    MVKShaderCompilationEventPerformance pipelineCompile;           /** Compile MTLFunctions into a pipeline. */
-	MVKShaderCompilationEventPerformance sizePipelineCache;         /** Calculate the size of cache data required to write MSL to pipeline cache data stream. */
-	MVKShaderCompilationEventPerformance writePipelineCache;        /** Write MSL to pipeline cache data stream. */
-	MVKShaderCompilationEventPerformance readPipelineCache;         /** Read MSL from pipeline cache data stream. */
+	MVKPerformanceTracker hashShaderCode;				/** Create a hash from the incoming shader code. */
+    MVKPerformanceTracker spirvToMSL;					/** Convert SPIR-V to MSL source code. */
+    MVKPerformanceTracker mslCompile;					/** Compile MSL source code into a MTLLibrary. */
+    MVKPerformanceTracker mslLoad;						/** Load pre-compiled MSL code into a MTLLibrary. */
+	MVKPerformanceTracker shaderLibraryFromCache;		/** Retrieve a shader library from the cache, lazily creating it if needed. */
+    MVKPerformanceTracker functionRetrieval;			/** Retrieve a MTLFunction from a MTLLibrary. */
+    MVKPerformanceTracker functionSpecialization;		/** Specialize a retrieved MTLFunction. */
+    MVKPerformanceTracker pipelineCompile;				/** Compile MTLFunctions into a pipeline. */
 } MVKShaderCompilationPerformance;


+/** MoltenVK performance of pipeline cache activities. */
+typedef struct {
+	MVKPerformanceTracker sizePipelineCache;			/** Calculate the size of cache data required to write MSL to pipeline cache data stream. */
+	MVKPerformanceTracker writePipelineCache;			/** Write MSL to pipeline cache data stream. */
+	MVKPerformanceTracker readPipelineCache;			/** Read MSL from pipeline cache data stream. */
+} MVKPipelineCachePerformance;
+
+/** MoltenVK performance of queue activities. */
+typedef struct {
+	MVKPerformanceTracker mtlQueueAccess;          	/** Create an MTLCommmandQueue or access an existing cached instance. */
+} MVKQueuePerformance;
+
+/** MoltenVK performance. */
+typedef struct {
+	MVKShaderCompilationPerformance shaderCompilation;	/** Shader compilations activities. */
+	MVKPipelineCachePerformance pipelineCache;			/** Pipeline cache activities. */
+	MVKQueuePerformance queue;          				/** Queue activities. */
+} MVKPerformanceStatistics;
+
+
 #pragma mark -
 #pragma mark Function types

@ -127,7 +144,7 @@ typedef void (VKAPI_PTR *PFN_vkGetMoltenVKDeviceConfigurationMVK)(VkDevice devic
 typedef VkResult (VKAPI_PTR *PFN_vkSetMoltenVKDeviceConfigurationMVK)(VkDevice device, MVKDeviceConfiguration* pConfiguration);
 typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceMetalFeaturesMVK)(VkPhysicalDevice physicalDevice, MVKPhysicalDeviceMetalFeatures* pMetalFeatures);
 typedef void (VKAPI_PTR *PFN_vkGetSwapchainPerformanceMVK)(VkDevice device, VkSwapchainKHR swapchain, MVKSwapchainPerformance* pSwapchainPerf);
-typedef void (VKAPI_PTR *PFN_vkGetShaderCompilationPerformanceMVK)(VkDevice device, MVKShaderCompilationPerformance* pShaderCompPerf);
+typedef void (VKAPI_PTR *PFN_vkGetPerformanceStatisticsMVK)(VkDevice device, MVKPerformanceStatistics* pPerf);
 typedef void (VKAPI_PTR *PFN_vkGetVersionStringsMVK)(char* pMoltenVersionStringBuffer, uint32_t moltenVersionStringBufferLength, char* pVulkanVersionStringBuffer, uint32_t vulkanVersionStringBufferLength);

 #ifdef __OBJC__
@ -190,12 +207,12 @@ VKAPI_ATTR void VKAPI_CALL vkGetSwapchainPerformanceMVK(
    MVKSwapchainPerformance*                    pSwapchainPerf);

 /**
- * Populates the specified MVKShaderCompilationPerformance structure with the
- * current shader compilation performance statistics for the specified device.
+ * Populates the specified MVKPerformanceStatistics structure with
+ * the current performance statistics for the specified device.
 */
-VKAPI_ATTR void VKAPI_CALL vkGetShaderCompilationPerformanceMVK(
+VKAPI_ATTR void VKAPI_CALL vkGetPerformanceStatisticsMVK(
    VkDevice                                    device,
-    MVKShaderCompilationPerformance*            pShaderCompPerf);
+    MVKPerformanceStatistics*            		pPerf);

 /**
 * Returns a human readable version of the MoltenVK and Vulkan versions.
--- a/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm
@ -300,7 +300,7 @@ MVKImage* MVKCommandResourceFactory::newMVKImage(MVKImageDescriptorData& imgData
 id<MTLFunction> MVKCommandResourceFactory::getFunctionNamed(const char* funcName) {
    uint64_t startTime = _device->getPerformanceTimestamp();
    id<MTLFunction> mtlFunc = [[_mtlLibrary newFunctionWithName: @(funcName)] autorelease];
-    _device->addShaderCompilationEventPerformance(_device->_shaderCompilationPerformance.functionRetrieval, startTime);
+    _device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.functionRetrieval, startTime);
    return mtlFunc;
 }

@ -311,7 +311,7 @@ id<MTLFunction> MVKCommandResourceFactory::newMTLFunction(NSString* mslSrcCode,
 	id<MTLLibrary> mtlLib = [[getMTLDevice() newLibraryWithSource: mslSrcCode
 														  options: shdrOpts
 															error: &err] autorelease];
-	_device->addShaderCompilationEventPerformance(_device->_shaderCompilationPerformance.mslCompile, startTime);
+	_device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.mslCompile, startTime);
 	if (err) {
 		mvkNotifyErrorWithText(VK_ERROR_INITIALIZATION_FAILED, "Could not compile support shader from MSL source:\n%s\n %s (code %li) %s", mslSrcCode.UTF8String, err.localizedDescription.UTF8String, (long)err.code, err.localizedFailureReason.UTF8String);
 		return nil;
@ -319,7 +319,7 @@ id<MTLFunction> MVKCommandResourceFactory::newMTLFunction(NSString* mslSrcCode,

 	startTime = _device->getPerformanceTimestamp();
 	id<MTLFunction> mtlFunc = [mtlLib newFunctionWithName: funcName];
-	_device->addShaderCompilationEventPerformance(_device->_shaderCompilationPerformance.functionRetrieval, startTime);
+	_device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.functionRetrieval, startTime);
 	return mtlFunc;
 }

@ -328,7 +328,7 @@ id<MTLRenderPipelineState> MVKCommandResourceFactory::newMTLRenderPipelineState(
    NSError* err = nil;
    id<MTLRenderPipelineState> rps = [getMTLDevice() newRenderPipelineStateWithDescriptor: plDesc error: &err];    // retained
    MVKAssert( !err, "Could not create %s pipeline state: %s (code %li) %s", plDesc.label.UTF8String, err.localizedDescription.UTF8String, (long)err.code, err.localizedFailureReason.UTF8String);
-    _device->addShaderCompilationEventPerformance(_device->_shaderCompilationPerformance.pipelineCompile, startTime);
+    _device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.pipelineCompile, startTime);
    return rps;
 }

@ -360,7 +360,7 @@ void MVKCommandResourceFactory::initMTLLibrary() {
                                                     error: &err];    // retained
        MVKAssert( !err, "Could not compile command shaders %s (code %li) %s", err.localizedDescription.UTF8String, (long)err.code, err.localizedFailureReason.UTF8String);
    }
-    _device->addShaderCompilationEventPerformance(_device->_shaderCompilationPerformance.mslCompile, startTime);
+    _device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.mslCompile, startTime);
 }

 MVKCommandResourceFactory::~MVKCommandResourceFactory() {
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
@ -60,13 +60,9 @@ class MVKCommandEncoder;
 class MVKCommandResourceFactory;


-#define kMVKVertexContentBufferIndex	0
+/** The buffer index to use for vertex content. */
+const static uint32_t kMVKVertexContentBufferIndex = 0;

-#define MVK_MAX_QUEUE_FAMILIES					1
-#define MVK_MIN_SWAPCHAIN_SURFACE_IMAGE_COUNT	2
-#define MVK_MAX_SWAPCHAIN_SURFACE_IMAGE_COUNT	2	// Metal supports 3 concurrent drawables, but if the
-													// swapchain is destroyed and rebuilt as part of resizing,
-													// one will be held by the current display image.

 #pragma mark -
 #pragma mark MVKPhysicalDevice
@ -145,9 +141,6 @@ public:

 #pragma mark Queues

-	/** Returns the number of queue families supported by this device. */
-	inline uint32_t getQueueFamilyCount() { return _queueFamilyCount; }
-
 	/**
 	 * If properties is null, the value of pCount is updated with the number of
 	 * queue families supported by this instance.
@ -240,11 +233,10 @@ protected:
 	MVKPhysicalDeviceMetalFeatures _metalFeatures;
 	VkPhysicalDeviceProperties _properties;
 	VkPhysicalDeviceMemoryProperties _memoryProperties;
-	VkQueueFamilyProperties _queueFamilyProperties[MVK_MAX_QUEUE_FAMILIES];
+	std::vector<MVKQueueFamily*> _queueFamilies;
 	uint32_t _allMemoryTypes;
 	uint32_t _hostVisibleMemoryTypes;
 	uint32_t _privateMemoryTypes;
-	uint32_t _queueFamilyCount;
 };


@ -405,20 +397,20 @@ public:
 	}

    /**
-     * If performance is being tracked, adds a shader compilation event with a duration
+     * If performance is being tracked, adds the performance for an activity with a duration
     * interval between the start and end times, to the given performance statistics.
     *
     * If endTime is zero or not supplied, the current time is used.
     */
-    inline void addShaderCompilationEventPerformance(MVKShaderCompilationEventPerformance& shaderCompilationEvent,
-													 uint64_t startTime, uint64_t endTime = 0) {
+    inline void addActivityPerformance(MVKPerformanceTracker& shaderCompilationEvent,
+									   uint64_t startTime, uint64_t endTime = 0) {
 		if (_mvkConfig.performanceTracking) {
-			addShaderCompilationEventPerformanceImpl(shaderCompilationEvent, startTime, endTime);
+			addActivityPerformanceImpl(shaderCompilationEvent, startTime, endTime);
 		}
 	};

-    /** Populates the specified statistics structure from the current shader performance statistics. */
-    void getShaderCompilationPerformanceStatistics(MVKShaderCompilationPerformance* pShaderCompPerf);
+    /** Populates the specified statistics structure from the current activity performance statistics. */
+    void getPerformanceStatistics(MVKPerformanceStatistics* pPerf);


 #pragma mark Metal
@ -482,8 +474,8 @@ public:
    /** The MoltenVK configuration settings for this device. */
    const MVKDeviceConfiguration _mvkConfig;

-    /** The shader compilation performance statistics. */
-    MVKShaderCompilationPerformance _shaderCompilationPerformance;
+    /** Performance statistics. */
+    MVKPerformanceStatistics _performanceStatistics;


 #pragma mark Construction
@ -511,18 +503,17 @@ protected:
 	MVKResource* addResource(MVKResource* rez);
 	MVKResource* removeResource(MVKResource* rez);
    void initPerformanceTracking();
-    const char* getShaderCompilationEventName(MVKShaderCompilationEventPerformance& shaderCompilationEvent);
+    const char* getActivityPerformanceDescription(MVKPerformanceTracker& shaderCompilationEvent);
 	uint64_t getPerformanceTimestampImpl();
-	void addShaderCompilationEventPerformanceImpl(MVKShaderCompilationEventPerformance& shaderCompilationEvent,
-												  uint64_t startTime, uint64_t endTime);
+	void addActivityPerformanceImpl(MVKPerformanceTracker& shaderCompilationEvent,
+									uint64_t startTime, uint64_t endTime);

 	MVKPhysicalDevice* _physicalDevice;
    MVKCommandResourceFactory* _commandResourceFactory;
-	std::vector<MVKQueueFamily*> _queueFamilies;
-	std::vector<MVKQueue*> _queues;
+	std::vector<std::vector<MVKQueue*>> _queuesByQueueFamilyIndex;
 	std::vector<MVKResource*> _resources;
 	std::mutex _rezLock;
-    std::mutex _shaderCompPerfLock;
+    std::mutex _perfLock;
    id<MTLBuffer> _globalVisibilityResultMTLBuffer;
    uint32_t _globalVisibilityQueryCount;
    std::mutex _vizLock;
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
@ -168,8 +168,11 @@ VkResult MVKPhysicalDevice::getSurfaceCapabilities(MVKSurface* surface,

    VkExtent2D surfExtnt = mvkVkExtent2DFromCGSize(mtlLayer.updatedDrawableSizeMVK);

-	pSurfaceCapabilities->minImageCount = MVK_MIN_SWAPCHAIN_SURFACE_IMAGE_COUNT;
-	pSurfaceCapabilities->maxImageCount = MVK_MAX_SWAPCHAIN_SURFACE_IMAGE_COUNT;
+	// Metal supports 3 concurrent drawables, but if the swapchain is destroyed and
+	// rebuilt as part of resizing, one will be held by the current display image.
+	pSurfaceCapabilities->minImageCount = 2;
+	pSurfaceCapabilities->maxImageCount = 2;
+
 	pSurfaceCapabilities->currentExtent = surfExtnt;
 	pSurfaceCapabilities->minImageExtent = surfExtnt;
 	pSurfaceCapabilities->maxImageExtent = surfExtnt;
@ -263,23 +266,25 @@ VkResult MVKPhysicalDevice::getSurfacePresentModes(MVKSurface* surface,
 VkResult MVKPhysicalDevice::getQueueFamilyProperties(uint32_t* pCount,
 													 VkQueueFamilyProperties* queueProperties) {

+	uint32_t qfCnt = uint32_t(_queueFamilies.size());
+
 	// If properties aren't actually being requested yet, simply update the returned count
 	if ( !queueProperties ) {
-		*pCount = getQueueFamilyCount();
+		*pCount = qfCnt;
 		return VK_SUCCESS;
 	}

 	// Determine how many families we'll return, and return that number
-	uint32_t qCnt = getQueueFamilyCount();
-	VkResult result = (*pCount <= qCnt) ? VK_SUCCESS : VK_INCOMPLETE;
-	*pCount = min(*pCount, qCnt);
+	*pCount = min(*pCount, qfCnt);

 	// Now populate the queue families
-	for (uint32_t qIdx = 0; qIdx < *pCount; qIdx++) {
-		queueProperties[qIdx] = _queueFamilyProperties[qIdx];
+	if (queueProperties) {
+		for (uint32_t qfIdx = 0; qfIdx < *pCount; qfIdx++) {
+			_queueFamilies[qfIdx]->getProperties(&queueProperties[qfIdx]);
+		}
 	}

-	return result;
+	return (*pCount <= qfCnt) ? VK_SUCCESS : VK_INCOMPLETE;
 }


@ -966,11 +971,16 @@ void MVKPhysicalDevice::logGPUInfo() {
 void MVKPhysicalDevice::initQueueFamilies() {

 	// TODO: determine correct values
-	_queueFamilyCount = 1;
-	_queueFamilyProperties[0].queueFlags = (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT);
-    _queueFamilyProperties[0].queueCount = 16;
-	_queueFamilyProperties[0].timestampValidBits = 64;
-	_queueFamilyProperties[0].minImageTransferGranularity = { 1, 1, 1};
+	VkQueueFamilyProperties qfProps;
+	qfProps.queueFlags = (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT);
+	qfProps.queueCount = 8;
+	qfProps.timestampValidBits = 64;
+	qfProps.minImageTransferGranularity = { 1, 1, 1};
+
+	uint32_t qfCount = 1;
+	for (uint32_t qfIdx = 0; qfIdx < qfCount; qfIdx++) {
+		_queueFamilies.push_back(new MVKQueueFamily(this, qfIdx, &qfProps));
+	}
 }

 MVKPhysicalDevice::MVKPhysicalDevice(MVKInstance* mvkInstance, id<MTLDevice> mtlDevice) {
@ -986,6 +996,7 @@ MVKPhysicalDevice::MVKPhysicalDevice(MVKInstance* mvkInstance, id<MTLDevice> mtl
 }

 MVKPhysicalDevice::~MVKPhysicalDevice() {
+	mvkDestroyContainerContents(_queueFamilies);
 	[_mtlDevice release];
 }

@ -998,12 +1009,16 @@ PFN_vkVoidFunction MVKDevice::getProcAddr(const char* pName) {
 }

 VkResult MVKDevice::getDeviceQueue(uint32_t queueFamilyIndex, uint32_t queueIndex, VkQueue* pQueue) {
-	*pQueue = _queueFamilies[queueFamilyIndex]->getQueue(queueIndex)->getVkQueue();
+	*pQueue = _queuesByQueueFamilyIndex[queueFamilyIndex][queueIndex]->getVkQueue();
 	return VK_SUCCESS;
 }

 VkResult MVKDevice::waitIdle() {
-    for (auto& q : _queues) { q->waitIdle(kMVKCommandUseDeviceWaitIdle); }
+	for (auto& queues : _queuesByQueueFamilyIndex) {
+		for (MVKQueue* q : queues) {
+			q->waitIdle(kMVKCommandUseDeviceWaitIdle);
+		}
+	}
 	return VK_SUCCESS;
 }

@ -1327,9 +1342,9 @@ void MVKDevice::applyMemoryBarrier(VkPipelineStageFlags srcStageMask,

 uint64_t MVKDevice::getPerformanceTimestampImpl() { return mvkGetTimestamp(); }

-void MVKDevice::addShaderCompilationEventPerformanceImpl(MVKShaderCompilationEventPerformance& shaderCompilationEvent,
-														 uint64_t startTime, uint64_t endTime) {
-    lock_guard<mutex> lock(_shaderCompPerfLock);
+void MVKDevice::addActivityPerformanceImpl(MVKPerformanceTracker& shaderCompilationEvent,
+										   uint64_t startTime, uint64_t endTime) {
+    lock_guard<mutex> lock(_perfLock);

 	double currInterval = mvkGetElapsedMilliseconds(startTime, endTime);
    shaderCompilationEvent.minimumDuration = min(currInterval, shaderCompilationEvent.minimumDuration);
@ -1337,8 +1352,8 @@ void MVKDevice::addShaderCompilationEventPerformanceImpl(MVKShaderCompilationEve
    double totalInterval = (shaderCompilationEvent.averageDuration * shaderCompilationEvent.count++) + currInterval;
    shaderCompilationEvent.averageDuration = totalInterval / shaderCompilationEvent.count;

-	MVKLogDebug("Shader building performance to %s curr: %.3f ms, avg: %.3f ms, min: %.3f ms, max: %.3f ms, count: %d",
-				getShaderCompilationEventName(shaderCompilationEvent),
+	MVKLogDebug("Performance to %s curr: %.3f ms, avg: %.3f ms, min: %.3f ms, max: %.3f ms, count: %d",
+				getActivityPerformanceDescription(shaderCompilationEvent),
 				currInterval,
 				shaderCompilationEvent.averageDuration,
 				shaderCompilationEvent.minimumDuration,
@ -1346,25 +1361,26 @@ void MVKDevice::addShaderCompilationEventPerformanceImpl(MVKShaderCompilationEve
 				shaderCompilationEvent.count);
 }

-const char* MVKDevice::getShaderCompilationEventName(MVKShaderCompilationEventPerformance& shaderCompilationEvent) {
-	if (&shaderCompilationEvent == &_shaderCompilationPerformance.hashShaderCode) { return "hash shader code"; }
-    if (&shaderCompilationEvent == &_shaderCompilationPerformance.spirvToMSL) { return "convert SPIR-V to MSL source code"; }
-    if (&shaderCompilationEvent == &_shaderCompilationPerformance.mslCompile) { return "compile MSL source code into a MTLLibrary"; }
-    if (&shaderCompilationEvent == &_shaderCompilationPerformance.mslLoad) { return "load pre-compiled MSL code into a MTLLibrary"; }
-	if (&shaderCompilationEvent == &_shaderCompilationPerformance.shaderLibraryFromCache) { return "retrieve shader library from the cache."; }
-    if (&shaderCompilationEvent == &_shaderCompilationPerformance.functionRetrieval) { return "retrieve a MTLFunction from a MTLLibrary"; }
-    if (&shaderCompilationEvent == &_shaderCompilationPerformance.functionSpecialization) { return "specialize a retrieved MTLFunction"; }
-    if (&shaderCompilationEvent == &_shaderCompilationPerformance.pipelineCompile) { return "compile MTLFunctions into a pipeline"; }
-	if (&shaderCompilationEvent == &_shaderCompilationPerformance.sizePipelineCache) { return "calculate cache size required to write MSL to pipeline cache"; }
-	if (&shaderCompilationEvent == &_shaderCompilationPerformance.writePipelineCache) { return "write MSL to pipeline cache"; }
-	if (&shaderCompilationEvent == &_shaderCompilationPerformance.readPipelineCache) { return "read MSL from pipeline cache"; }
+const char* MVKDevice::getActivityPerformanceDescription(MVKPerformanceTracker& shaderCompilationEvent) {
+	if (&shaderCompilationEvent == &_performanceStatistics.shaderCompilation.hashShaderCode) { return "hash shader SPIR-V code"; }
+    if (&shaderCompilationEvent == &_performanceStatistics.shaderCompilation.spirvToMSL) { return "convert SPIR-V to MSL source code"; }
+    if (&shaderCompilationEvent == &_performanceStatistics.shaderCompilation.mslCompile) { return "compile MSL source code into a MTLLibrary"; }
+    if (&shaderCompilationEvent == &_performanceStatistics.shaderCompilation.mslLoad) { return "load pre-compiled MSL code into a MTLLibrary"; }
+	if (&shaderCompilationEvent == &_performanceStatistics.shaderCompilation.shaderLibraryFromCache) { return "retrieve shader library from the cache."; }
+    if (&shaderCompilationEvent == &_performanceStatistics.shaderCompilation.functionRetrieval) { return "retrieve a MTLFunction from a MTLLibrary"; }
+    if (&shaderCompilationEvent == &_performanceStatistics.shaderCompilation.functionSpecialization) { return "specialize a retrieved MTLFunction"; }
+    if (&shaderCompilationEvent == &_performanceStatistics.shaderCompilation.pipelineCompile) { return "compile MTLFunctions into a pipeline"; }
+	if (&shaderCompilationEvent == &_performanceStatistics.pipelineCache.sizePipelineCache) { return "calculate cache size required to write MSL to pipeline cache"; }
+	if (&shaderCompilationEvent == &_performanceStatistics.pipelineCache.writePipelineCache) { return "write MSL to pipeline cache"; }
+	if (&shaderCompilationEvent == &_performanceStatistics.pipelineCache.readPipelineCache) { return "read MSL from pipeline cache"; }
+	if (&shaderCompilationEvent == &_performanceStatistics.queue.mtlQueueAccess) { return "access MTLCommandQueue"; }
    return "Unknown shader compile event";
 }

-void MVKDevice::getShaderCompilationPerformanceStatistics(MVKShaderCompilationPerformance* pShaderCompPerf) {
-    lock_guard<mutex> lock(_shaderCompPerfLock);
+void MVKDevice::getPerformanceStatistics(MVKPerformanceStatistics* pPerf) {
+    lock_guard<mutex> lock(_perfLock);

-    if (pShaderCompPerf) { *pShaderCompPerf = _shaderCompilationPerformance; }
+    if (pPerf) { *pPerf = _performanceStatistics; }
 }


@ -1445,25 +1461,15 @@ MVKDevice::MVKDevice(MVKPhysicalDevice* physicalDevice, const VkDeviceCreateInfo
    _commandResourceFactory = new MVKCommandResourceFactory(this);

 	// Create the queues
-	uint32_t qfCnt = _physicalDevice->getQueueFamilyCount();
-	VkQueueFamilyProperties qfProperties[qfCnt];
-	_physicalDevice->getQueueFamilyProperties(&qfCnt, qfProperties);
-	_queueFamilies.assign(qfCnt, VK_NULL_HANDLE);
-
-	// For each element in the queue record count, create a queue family with the requested number of queues.
 	uint32_t qrCnt = pCreateInfo->queueCreateInfoCount;
 	for (uint32_t qrIdx = 0; qrIdx < qrCnt; qrIdx++) {
 		const VkDeviceQueueCreateInfo* pQFInfo = &pCreateInfo->pQueueCreateInfos[qrIdx];
 		uint32_t qfIdx = pQFInfo->queueFamilyIndex;
-		if (_queueFamilies[qfIdx] == VK_NULL_HANDLE) {
-			MVKQueueFamily* qFam = new MVKQueueFamily(this, pQFInfo, &qfProperties[qfIdx]);
-			_queueFamilies[qfIdx] = qFam;
-
-			// Extract the queues from the queue family into a cache
-			uint32_t qCnt = qFam->getQueueCount();
-			for (uint32_t qIdx = 0; qIdx < qCnt; qIdx++) {
-				_queues.push_back(qFam->getQueue(qIdx));
-			}
+		MVKQueueFamily* qFam = _physicalDevice->_queueFamilies[qfIdx];
+		_queuesByQueueFamilyIndex.resize(qfIdx + 1);	// Ensure an entry for this queue family exists
+		auto& queues = _queuesByQueueFamilyIndex[qfIdx];
+		for (uint32_t qIdx = 0; qIdx < pQFInfo->queueCount; qIdx++) {
+			queues.push_back(new MVKQueue(this, qFam, qIdx, pQFInfo->pQueuePriorities[qIdx]));
 		}
 	}

@ -1471,27 +1477,30 @@ MVKDevice::MVKDevice(MVKPhysicalDevice* physicalDevice, const VkDeviceCreateInfo
 }

 void MVKDevice::initPerformanceTracking() {
-    MVKShaderCompilationEventPerformance initPerf;
+    MVKPerformanceTracker initPerf;
    initPerf.count = 0;
    initPerf.averageDuration = 0.0;
    initPerf.minimumDuration = numeric_limits<double>::max();
    initPerf.maximumDuration = 0.0;

-	_shaderCompilationPerformance.hashShaderCode = initPerf;
-    _shaderCompilationPerformance.spirvToMSL = initPerf;
-    _shaderCompilationPerformance.mslCompile = initPerf;
-    _shaderCompilationPerformance.mslLoad = initPerf;
-	_shaderCompilationPerformance.shaderLibraryFromCache = initPerf;
-    _shaderCompilationPerformance.functionRetrieval = initPerf;
-    _shaderCompilationPerformance.functionSpecialization = initPerf;
-    _shaderCompilationPerformance.pipelineCompile = initPerf;
-	_shaderCompilationPerformance.sizePipelineCache = initPerf;
-	_shaderCompilationPerformance.writePipelineCache = initPerf;
-	_shaderCompilationPerformance.readPipelineCache = initPerf;
+	_performanceStatistics.shaderCompilation.hashShaderCode = initPerf;
+    _performanceStatistics.shaderCompilation.spirvToMSL = initPerf;
+    _performanceStatistics.shaderCompilation.mslCompile = initPerf;
+    _performanceStatistics.shaderCompilation.mslLoad = initPerf;
+	_performanceStatistics.shaderCompilation.shaderLibraryFromCache = initPerf;
+    _performanceStatistics.shaderCompilation.functionRetrieval = initPerf;
+    _performanceStatistics.shaderCompilation.functionSpecialization = initPerf;
+    _performanceStatistics.shaderCompilation.pipelineCompile = initPerf;
+	_performanceStatistics.pipelineCache.sizePipelineCache = initPerf;
+	_performanceStatistics.pipelineCache.writePipelineCache = initPerf;
+	_performanceStatistics.pipelineCache.readPipelineCache = initPerf;
+	_performanceStatistics.queue.mtlQueueAccess = initPerf;
 }

 MVKDevice::~MVKDevice() {
-	mvkDestroyContainerContents(_queueFamilies);
+	for (auto& queues : _queuesByQueueFamilyIndex) {
+		mvkDestroyContainerContents(queues);
+	}
    [_globalVisibilityResultMTLBuffer release];
 	_commandResourceFactory->destroy();
 }
--- a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm
@ -245,7 +245,7 @@ void MVKInstance::initProcAddrs() {
    ADD_PROC_ADDR(vkSetMoltenVKDeviceConfigurationMVK);
    ADD_PROC_ADDR(vkGetPhysicalDeviceMetalFeaturesMVK);
    ADD_PROC_ADDR(vkGetSwapchainPerformanceMVK);
-    ADD_PROC_ADDR(vkGetShaderCompilationPerformanceMVK);
+    ADD_PROC_ADDR(vkGetPerformanceStatisticsMVK);
    ADD_PROC_ADDR(vkGetVersionStringsMVK);
    ADD_PROC_ADDR(vkGetMTLDeviceMVK);
    ADD_PROC_ADDR(vkSetMTLTextureMVK);
--- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
@ -252,7 +252,7 @@ void MVKGraphicsPipeline::initMTLRenderPipelineState(const VkGraphicsPipelineCre
            if (psError) {
                setConfigurationResult(mvkNotifyErrorWithText(VK_ERROR_INITIALIZATION_FAILED, "Could not create render pipeline:\n%s.", psError.description.UTF8String));
            }
-            _device->addShaderCompilationEventPerformance(_device->_shaderCompilationPerformance.pipelineCompile, startTime);
+            _device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.pipelineCompile, startTime);
        }
    }
 }
@ -430,7 +430,7 @@ MVKComputePipeline::MVKComputePipeline(MVKDevice* device,
 			if (psError) {
 				setConfigurationResult(mvkNotifyErrorWithText(VK_ERROR_INITIALIZATION_FAILED, "Could not create compute pipeline:\n%s.", psError.description.UTF8String));
 			}
-			_device->addShaderCompilationEventPerformance(_device->_shaderCompilationPerformance.pipelineCompile, startTime);
+			_device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.pipelineCompile, startTime);
 		} else {
 			setConfigurationResult(mvkNotifyErrorWithText(VK_ERROR_INITIALIZATION_FAILED, "Compute shader function could not be compiled into pipeline. See previous error."));
 		}
@ -612,9 +612,9 @@ VkResult MVKPipelineCache::writeData(size_t* pDataSize, void* pData) {
 // Serializes the data in this cache to a stream
 void MVKPipelineCache::writeData(ostream& outstream, bool isCounting) {

-	MVKShaderCompilationEventPerformance& shaderCompilationEvent = isCounting
-		? _device->_shaderCompilationPerformance.sizePipelineCache
-		: _device->_shaderCompilationPerformance.writePipelineCache;
+	MVKPerformanceTracker& shaderCompilationEvent = isCounting
+		? _device->_performanceStatistics.pipelineCache.sizePipelineCache
+		: _device->_performanceStatistics.pipelineCache.writePipelineCache;

 	uint32_t cacheEntryType;
 	cereal::BinaryOutputArchive writer(outstream);
@ -640,7 +640,7 @@ void MVKPipelineCache::writeData(ostream& outstream, bool isCounting) {
 			writer(cacheIter.getShaderContext());
 			writer(cacheIter.getEntryPoint());
 			writer(cacheIter.getMSL());
-			_device->addShaderCompilationEventPerformance(shaderCompilationEvent, startTime);
+			_device->addActivityPerformance(shaderCompilationEvent, startTime);
 		}
 	}

@ -705,7 +705,7 @@ void MVKPipelineCache::readData(const VkPipelineCacheCreateInfo* pCreateInfo) {

 					// Add the shader library to the staging cache.
 					MVKShaderLibraryCache* slCache = getShaderLibraryCache(smKey);
-					_device->addShaderCompilationEventPerformance(_device->_shaderCompilationPerformance.readPipelineCache, startTime);
+					_device->addActivityPerformance(_device->_performanceStatistics.pipelineCache.readPipelineCache, startTime);
 					slCache->addShaderLibrary(&shaderContext, msl, entryPoint);

 					break;
--- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h
@ -30,37 +30,39 @@

 class MVKQueue;
 class MVKQueueSubmission;
+class MVKPhysicalDevice;


 #pragma mark -
 #pragma mark MVKQueueFamily

 /** Represents a Vulkan queue family. */
-class MVKQueueFamily : public MVKBaseDeviceObject {
+class MVKQueueFamily : public MVKConfigurableObject {

 public:

 	/** Returns the index of this queue family. */
 	inline uint32_t getIndex() { return _queueFamilyIndex; }

-	/** Returns the number of queues allocated for this family. */
-	inline uint32_t getQueueCount() { return uint32_t(_queues.size()); }
+	/** Populates the specified properties structure. */
+	void getProperties(VkQueueFamilyProperties* queueProperties) {
+		if (queueProperties) { *queueProperties = _properties; }
+	}

-	/** Returns the queue at the specified index. */
-	inline MVKQueue* getQueue(uint32_t queueIndex) { return _queues[queueIndex]; }
+	/** Returns the MTLCommandQueue at the specified index. */
+	id<MTLCommandQueue> getMTLCommandQueue(uint32_t queueIndex);

-	/** Constructs an instance with the specified number of queues for the specified device. */
-	MVKQueueFamily(MVKDevice* device,
-				   const VkDeviceQueueCreateInfo* pCreateInfo,
-				   const VkQueueFamilyProperties* pProperties);
+	/** Constructs an instance with the specified index. */
+	MVKQueueFamily(MVKPhysicalDevice* physicalDevice, uint32_t queueFamilyIndex, const VkQueueFamilyProperties* pProperties);

 	~MVKQueueFamily() override;

 protected:
+	MVKPhysicalDevice* _physicalDevice;
    uint32_t _queueFamilyIndex;
 	VkQueueFamilyProperties _properties;
-	std::vector<MVKQueue*> _queues;
-	std::mutex _lock;
+	std::vector<id<MTLCommandQueue>> _mtlQueues;
+	std::mutex _qLock;
 };


--- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm
@ -29,21 +29,28 @@ using namespace std;
 #pragma mark -
 #pragma mark MVKQueueFamily

-MVKQueueFamily::MVKQueueFamily(MVKDevice* device,
-							   const VkDeviceQueueCreateInfo* pCreateInfo,
-							   const VkQueueFamilyProperties* pProperties) : MVKBaseDeviceObject(device) {
-	_properties = *pProperties;
-
-	// Create the queues
-	uint32_t qCnt = pCreateInfo->queueCount;
-	_queues.reserve(qCnt);
-	for (uint32_t qIdx = 0; qIdx < qCnt; qIdx++) {
-        _queues.push_back(new MVKQueue(_device, this, qIdx, pCreateInfo->pQueuePriorities[qIdx]));
+// MTLCommandQueues are cached in MVKQueueFamily/MVKPhysicalDevice because they are very
+// limited in number. An app that creates multiple VkDevices over time (such as a test suite)
+// will soon find 15 second delays when creating subsequent MTLCommandQueues.
+id<MTLCommandQueue> MVKQueueFamily::getMTLCommandQueue(uint32_t queueIndex) {
+	lock_guard<mutex> lock(_qLock);
+	id<MTLCommandQueue> mtlQ = _mtlQueues[queueIndex];
+	if ( !mtlQ ) {
+		mtlQ = [_physicalDevice->getMTLDevice() newCommandQueue];	// retained
+		_mtlQueues[queueIndex] = mtlQ;
 	}
+	return mtlQ;
+}
+
+MVKQueueFamily::MVKQueueFamily(MVKPhysicalDevice* physicalDevice, uint32_t queueFamilyIndex, const VkQueueFamilyProperties* pProperties) {
+	_physicalDevice = physicalDevice;
+	_queueFamilyIndex = queueFamilyIndex;
+	_properties = *pProperties;
+	_mtlQueues.assign(_properties.queueCount, nil);
 }

 MVKQueueFamily::~MVKQueueFamily() {
-	mvkDestroyContainerContents(_queues);
+	mvkReleaseContainerContents(_mtlQueues);
 }


@ -123,7 +130,7 @@ VkResult MVKQueue::waitIdle(MVKCommandUse cmdBuffUse) {
 // from a single thread.
 id<MTLCommandBuffer> MVKQueue::makeMTLCommandBuffer(NSString* mtlCmdBuffLabel) {

-	// Retrieve a MTLCommandBuffer from the MTLQueue.
+	// Retrieve a MTLCommandBuffer from the MTLCommandQueue.
 	id<MTLCommandBuffer> mtlCmdBuffer = [_mtlQueue commandBufferWithUnretainedReferences];
    mtlCmdBuffer.label = mtlCmdBuffLabel;

@ -208,7 +215,9 @@ void MVKQueue::initExecQueue() {

 /** Creates and initializes the Metal queue. */
 void MVKQueue::initMTLCommandQueue() {
-	_mtlQueue = [_device->getMTLDevice() newCommandQueue];	// retained
+	uint64_t startTime = _device->getPerformanceTimestamp();
+	_mtlQueue = _queueFamily->getMTLCommandQueue(_index);	// not retained (cached in queue family)
+	_device->addActivityPerformance(_device->_performanceStatistics.queue.mtlQueueAccess, startTime);
    [_mtlQueue insertDebugCaptureBoundary];                 // Allow Xcode to capture the first frame if desired.
 }

@ -223,7 +232,6 @@ MVKQueue::~MVKQueue() {
    // in the destructor of the lock created in registerMTLCommandBufferCountdown().
    lock_guard<mutex> lock(_completionLock);
 	destroyExecQueue();
-	[_mtlQueue release];
 }

 /** Destroys the execution dispatch queue. */
--- a/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm
@ -50,7 +50,7 @@ MVKMTLFunction MVKShaderLibrary::getMTLFunction(const VkSpecializationInfo* pSpe

    uint64_t startTime = _device->getPerformanceTimestamp();
    id<MTLFunction> mtlFunc = [[_mtlLibrary newFunctionWithName: mtlFuncName] autorelease];
-    _device->addShaderCompilationEventPerformance(_device->_shaderCompilationPerformance.functionRetrieval, startTime);
+    _device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.functionRetrieval, startTime);

    if (mtlFunc) {
        // If the Metal device supports shader specialization, and the Metal function expects to be
@ -83,7 +83,7 @@ MVKMTLFunction MVKShaderLibrary::getMTLFunction(const VkSpecializationInfo* pSpe
                NSError* err = nil;
                mtlFunc = [[_mtlLibrary newFunctionWithName: mtlFuncName constantValues: mtlFCVals error: &err] autorelease];
                handleCompilationError(err, "Shader function specialization");
-                _device->addShaderCompilationEventPerformance(_device->_shaderCompilationPerformance.functionSpecialization, startTimeSpec);
+                _device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.functionSpecialization, startTimeSpec);
            }
        }
    } else {
@ -130,7 +130,7 @@ MVKShaderLibrary::MVKShaderLibrary(MVKDevice* device, const string& mslSourceCod
 													 error: &err];        // retained
 		handleCompilationError(err, "Shader module compilation");
 	}
-	_device->addShaderCompilationEventPerformance(_device->_shaderCompilationPerformance.mslCompile, startTime);
+	_device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.mslCompile, startTime);

 	_entryPoint = entryPoint;
 	_msl = mslSourceCode;
@ -150,7 +150,7 @@ MVKShaderLibrary::MVKShaderLibrary(MVKDevice* device,
        handleCompilationError(err, "Compiled shader module creation");
        [shdrData release];
    }
-    _device->addShaderCompilationEventPerformance(_device->_shaderCompilationPerformance.mslLoad, startTime);
+    _device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.mslLoad, startTime);
 }

 // If err object is nil, the compilation succeeded without any warnings.
@ -246,7 +246,7 @@ MVKMTLFunction MVKShaderModule::getMTLFunction(SPIRVToMSLConverterContext* pCont
 		} else {
 			mvkLib = _shaderLibraryCache.getShaderLibrary(pContext, this);
 		}
-		_device->addShaderCompilationEventPerformance(_device->_shaderCompilationPerformance.shaderLibraryFromCache, startTime);
+		_device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.shaderLibraryFromCache, startTime);
 	}
 	return mvkLib ? mvkLib->getMTLFunction(pSpecializationInfo) : MVKMTLFunctionNull;
 }
@ -256,7 +256,7 @@ bool MVKShaderModule::convert(SPIRVToMSLConverterContext* pContext) {

 	uint64_t startTime = _device->getPerformanceTimestamp();
 	bool wasConverted = _converter.convert(*pContext, shouldLogCode, shouldLogCode, shouldLogCode);
-	_device->addShaderCompilationEventPerformance(_device->_shaderCompilationPerformance.spirvToMSL, startTime);
+	_device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.spirvToMSL, startTime);

 	if (wasConverted) {
 		if (shouldLogCode) { MVKLogInfo("%s", _converter.getResultLog().data()); }
@ -293,7 +293,7 @@ MVKShaderModule::MVKShaderModule(MVKDevice* device,

 			uint64_t startTime = _device->getPerformanceTimestamp();
 			codeHash = mvkHash(pCreateInfo->pCode, spvCount);
-			_device->addShaderCompilationEventPerformance(_device->_shaderCompilationPerformance.hashShaderCode, startTime);
+			_device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.hashShaderCode, startTime);

 			_converter.setSPIRV(pCreateInfo->pCode, spvCount);

@ -307,7 +307,7 @@ MVKShaderModule::MVKShaderModule(MVKDevice* device,
 			uint64_t startTime = _device->getPerformanceTimestamp();
 			codeHash = mvkHash(&magicNum);
 			codeHash = mvkHash(pMSLCode, mslCodeLen, codeHash);
-			_device->addShaderCompilationEventPerformance(_device->_shaderCompilationPerformance.hashShaderCode, startTime);
+			_device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.hashShaderCode, startTime);

 			_converter.setMSL(pMSLCode, nullptr);
 			_defaultLibrary = new MVKShaderLibrary(_device, _converter.getMSL().c_str(), _converter.getEntryPoint());
@ -322,7 +322,7 @@ MVKShaderModule::MVKShaderModule(MVKDevice* device,
 			uint64_t startTime = _device->getPerformanceTimestamp();
 			codeHash = mvkHash(&magicNum);
 			codeHash = mvkHash(pMSLCode, mslCodeLen, codeHash);
-			_device->addShaderCompilationEventPerformance(_device->_shaderCompilationPerformance.hashShaderCode, startTime);
+			_device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.hashShaderCode, startTime);

 			_defaultLibrary = new MVKShaderLibrary(_device, (void*)(pMSLCode), mslCodeLen);

--- a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm
@ -230,8 +230,12 @@ void MVKSwapchain::initSurfaceImages(const VkSwapchainCreateInfoKHR* pCreateInfo
        .flags = 0,
    };

-    uint32_t imgCnt = MVK_MAX_SWAPCHAIN_SURFACE_IMAGE_COUNT;
-    _surfaceImages.reserve(imgCnt);
+	VkSurfaceCapabilitiesKHR srfcProps;
+	MVKSurface* mvkSrfc = (MVKSurface*)pCreateInfo->surface;
+	_device->getPhysicalDevice()->getSurfaceCapabilities(mvkSrfc, &srfcProps);
+
+	uint32_t imgCnt = srfcProps.maxImageCount;
+	_surfaceImages.reserve(imgCnt);
    for (uint32_t imgIdx = 0; imgIdx < imgCnt; imgIdx++) {
        _surfaceImages.push_back(_device->createSwapchainImage(&imgInfo, this, NULL));
    }
--- a/MoltenVK/MoltenVK/Vulkan/vk_mvk_moltenvk.mm
+++ b/MoltenVK/MoltenVK/Vulkan/vk_mvk_moltenvk.mm
@ -60,12 +60,11 @@ MVK_PUBLIC_SYMBOL void vkGetSwapchainPerformanceMVK(
    mvkSwapchain->getPerformanceStatistics(pSwapchainPerf);
 }

-MVK_PUBLIC_SYMBOL void vkGetShaderCompilationPerformanceMVK(
+MVK_PUBLIC_SYMBOL void vkGetPerformanceStatisticsMVK(
    VkDevice                                    device,
-    MVKShaderCompilationPerformance*            pShaderCompPerf) {
+    MVKPerformanceStatistics*            		pPerf) {

-    MVKDevice* mvkDev = MVKDevice::getMVKDevice(device);
-    mvkDev->getShaderCompilationPerformanceStatistics(pShaderCompPerf);
+    MVKDevice::getMVKDevice(device)->getPerformanceStatistics(pPerf);
 }

 MVK_PUBLIC_SYMBOL void vkGetVersionStringsMVK(
--- a/MoltenVK/MoltenVK/Vulkan/vulkan.mm
+++ b/MoltenVK/MoltenVK/Vulkan/vulkan.mm
@ -112,11 +112,11 @@ MVK_PUBLIC_SYMBOL void vkGetPhysicalDeviceProperties(

 MVK_PUBLIC_SYMBOL void vkGetPhysicalDeviceQueueFamilyProperties(
 	VkPhysicalDevice                            physicalDevice,
-	uint32_t*                                   pCount,
+	uint32_t*                                   pQueueFamilyPropertyCount,
 	VkQueueFamilyProperties*                    pQueueFamilyProperties) {
 	
 	MVKPhysicalDevice* mvkPD = MVKPhysicalDevice::getMVKPhysicalDevice(physicalDevice);
-	mvkPD->getQueueFamilyProperties(pCount, pQueueFamilyProperties);
+	mvkPD->getQueueFamilyProperties(pQueueFamilyPropertyCount, pQueueFamilyProperties);
 }

 MVK_PUBLIC_SYMBOL void vkGetPhysicalDeviceMemoryProperties(