From 11bd581c8deb28cabebcd2e2ce7be94e50c4d1fe Mon Sep 17 00:00:00 2001
From: SRSaunders <82544213+SRSaunders@users.noreply.github.com>
Date: Wed, 13 Mar 2024 09:55:08 -0400
Subject: [PATCH 1/6] Add "previous" to MVKPerformanceTracker and save value
 before capturing "latest"

---
 MoltenVK/MoltenVK/API/mvk_private_api.h   |  5 +----
 MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm | 11 +++++++----
 2 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/MoltenVK/MoltenVK/API/mvk_private_api.h b/MoltenVK/MoltenVK/API/mvk_private_api.h
index 0d2d9ae9..6f8f2e87 100644
--- a/MoltenVK/MoltenVK/API/mvk_private_api.h
+++ b/MoltenVK/MoltenVK/API/mvk_private_api.h
@@ -376,6 +376,7 @@ typedef struct {
 typedef struct {
     uint32_t count;       /**< The number of activities of this type. */
 	double latest;        /**< The latest (most recent) value of the activity. */
+	double previous;      /**< The previous (second most recent) value of the activity. */
     double average;       /**< The average value of the activity. */
     double minimum;       /**< The minimum value of the activity. */
     double maximum;       /**< The maximum value of the activity. */
@@ -427,10 +428,6 @@ typedef struct {
  * than your app was, the size of this structure in your app may be larger or smaller than the
  * struct in MoltenVK. See the description of the vkGetPerformanceStatisticsMVK() function for
  * information about how to handle this.
- *
- * TO SUPPORT DYNAMIC LINKING TO THIS STRUCTURE AS DESCRIBED ABOVE, THIS STRUCTURE SHOULD NOT
- * BE CHANGED EXCEPT TO ADD ADDITIONAL MEMBERS ON THE END. EXISTING MEMBERS, AND THEIR ORDER,
- * SHOULD NOT BE CHANGED.
  */
 typedef struct {
 	MVKShaderCompilationPerformance shaderCompilation;	/** Shader compilations activities. */
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
index 61a441de..cb6b659d 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
@@ -4342,6 +4342,7 @@ void MVKDevice::applyMemoryBarrier(MVKPipelineBarrier& barrier,
 void MVKDevice::updateActivityPerformance(MVKPerformanceTracker& activity, double currentValue) {
 	lock_guard<mutex> lock(_perfLock);
 
+	activity.previous = activity.latest;
 	activity.latest = currentValue;
 	activity.minimum = ((activity.minimum == 0.0)
 								? currentValue :
@@ -4364,12 +4365,13 @@ void MVKDevice::logActivityInline(MVKPerformanceTracker& activity, MVKPerformanc
 }
 void MVKDevice::logActivityDuration(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats, bool isInline) {
 	const char* fmt = (isInline
-					   ? "%s performance avg: %.3f ms, latest: %.3f ms, min: %.3f ms, max: %.3f ms, count: %d"
-					   : "  %-45s avg: %.3f ms, latest: %.3f ms, min: %.3f ms, max: %.3f ms, count: %d");
+					   ? "%s performance avg: %.3f ms, latest: %.3f ms, prev: %.3f ms, min: %.3f ms, max: %.3f ms, count: %d"
+					   : "  %-45s avg: %.3f ms, latest: %.3f ms, prev: %.3f ms, min: %.3f ms, max: %.3f ms, count: %d");
 	MVKLogInfo(fmt,
 			   getActivityPerformanceDescription(activity, perfStats),
 			   activity.average,
 			   activity.latest,
+			   activity.previous,
 			   activity.minimum,
 			   activity.maximum,
 			   activity.count);
@@ -4377,12 +4379,13 @@ void MVKDevice::logActivityDuration(MVKPerformanceTracker& activity, MVKPerforma
 
 void MVKDevice::logActivityByteCount(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats, bool isInline) {
 	const char* fmt = (isInline
-					   ? "%s avg: %5llu MB, latest: %5llu MB, min: %5llu MB, max: %5llu MB, count: %d"
-					   : "  %-45s avg: %5llu MB, latest: %5llu MB, min: %5llu MB, max: %5llu MB, count: %d");
+					   ? "%s avg: %5llu MB, latest: %5llu MB, prev: %5llu MB, min: %5llu MB, max: %5llu MB, count: %d"
+					   : "  %-45s avg: %5llu MB, latest: %5llu MB, prev: %5llu MB, min: %5llu MB, max: %5llu MB, count: %d");
 	MVKLogInfo(fmt,
 			   getActivityPerformanceDescription(activity, perfStats),
 			   uint64_t(activity.average) / KIBI,
 			   uint64_t(activity.latest) / KIBI,
+			   uint64_t(activity.previous) / KIBI,
 			   uint64_t(activity.minimum) / KIBI,
 			   uint64_t(activity.maximum) / KIBI,
 			   activity.count);

From 3ec155a3b6f3285482cfc28a74f5ec0a1f3dd2e9 Mon Sep 17 00:00:00 2001
From: SRSaunders <82544213+SRSaunders@users.noreply.github.com>
Date: Sun, 17 Mar 2024 11:34:14 -0400
Subject: [PATCH 2/6] Fix mvkCopyGrowingStruct() to return success only if
 struct and buffer sizes match

---
 MoltenVK/MoltenVK/Vulkan/mvk_api.mm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MoltenVK/MoltenVK/Vulkan/mvk_api.mm b/MoltenVK/MoltenVK/Vulkan/mvk_api.mm
index cade5025..1f7bc04e 100644
--- a/MoltenVK/MoltenVK/Vulkan/mvk_api.mm
+++ b/MoltenVK/MoltenVK/Vulkan/mvk_api.mm
@@ -43,7 +43,7 @@ VkResult mvkCopyGrowingStruct(S* pDst, const S* pSrc, size_t* pCopySize) {
 		size_t origSize = *pCopySize;
 		*pCopySize = std::min(origSize, sizeof(S));
 		memcpy(pDst, pSrc, *pCopySize);
-		return (*pCopySize == origSize) ? VK_SUCCESS : VK_INCOMPLETE;
+		return (sizeof(S) == origSize) ? VK_SUCCESS : VK_INCOMPLETE;
 	} else {
 		*pCopySize = sizeof(S);
 		return VK_SUCCESS;

From 08c1ad705a9224d8d8b2c272b512cc8684fc759e Mon Sep 17 00:00:00 2001
From: SRSaunders <82544213+SRSaunders@users.noreply.github.com>
Date: Sun, 17 Mar 2024 11:35:08 -0400
Subject: [PATCH 3/6] Increment MVK_PRIVATE_API_VERSION to 41

---
 MoltenVK/MoltenVK/API/mvk_private_api.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MoltenVK/MoltenVK/API/mvk_private_api.h b/MoltenVK/MoltenVK/API/mvk_private_api.h
index 6f8f2e87..5f63019f 100644
--- a/MoltenVK/MoltenVK/API/mvk_private_api.h
+++ b/MoltenVK/MoltenVK/API/mvk_private_api.h
@@ -44,7 +44,7 @@ typedef unsigned long MTLArgumentBuffersTier;
  */
 
 
-#define MVK_PRIVATE_API_VERSION   40
+#define MVK_PRIVATE_API_VERSION   41
 
 
 #pragma mark -

From 10810f41d061362e87d8a4b9d0ab30e7bf97a390 Mon Sep 17 00:00:00 2001
From: SRSaunders <82544213+SRSaunders@users.noreply.github.com>
Date: Tue, 19 Mar 2024 00:09:11 -0400
Subject: [PATCH 4/6] Add two new counters in MVKQueuePerformance for async
 queue submit wait times

---
 MoltenVK/MoltenVK/API/mvk_private_api.h  |  2 ++
 MoltenVK/MoltenVK/GPUObjects/MVKQueue.h  |  6 +++---
 MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm | 22 ++++++++++++++++------
 3 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/MoltenVK/MoltenVK/API/mvk_private_api.h b/MoltenVK/MoltenVK/API/mvk_private_api.h
index 5f63019f..a2183e56 100644
--- a/MoltenVK/MoltenVK/API/mvk_private_api.h
+++ b/MoltenVK/MoltenVK/API/mvk_private_api.h
@@ -408,9 +408,11 @@ typedef struct {
 typedef struct {
 	MVKPerformanceTracker retrieveMTLCommandBuffer;     /** Retrieve a MTLCommandBuffer from a MTLQueue, in milliseconds. */
 	MVKPerformanceTracker commandBufferEncoding;        /** Encode a single VkCommandBuffer to a MTLCommandBuffer (excludes MTLCommandBuffer encoding from configured immediate prefilling), in milliseconds. */
+	MVKPerformanceTracker waitSubmitCommandBuffers;		/** Wait time from initial call to starting the submit and encoding of all VkCommandBuffers in an asynchronous vkQueueSubmit() operation,  in milliseconds. */
 	MVKPerformanceTracker submitCommandBuffers;         /** Submit and encode all VkCommandBuffers in a vkQueueSubmit() operation to MTLCommandBuffers (including both prefilled and deferred encoding), in milliseconds. */
 	MVKPerformanceTracker mtlCommandBufferExecution;    /** Execute a MTLCommandBuffer on the GPU, from commit to completion callback, in milliseconds. */
 	MVKPerformanceTracker retrieveCAMetalDrawable;      /** Retrieve next CAMetalDrawable from a CAMetalLayer, in milliseconds. */
+	MVKPerformanceTracker waitPresentSwapchains;		/** Wait time from initial call to starting presentation of the swapchains in an asynchronous vkQueuePresentKHR() operation,  in milliseconds. */
 	MVKPerformanceTracker presentSwapchains;            /** Present the swapchains in a vkQueuePresentKHR() on the GPU, from commit to presentation callback, in milliseconds. */
 	MVKPerformanceTracker frameInterval;                /** Frame presentation interval (1000/FPS), in milliseconds. */
 } MVKQueuePerformance;
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h
index 0adc557f..6459ebca 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h
@@ -195,7 +195,7 @@ public:
 	 *
 	 * Upon completion of this function, no further calls should be made to this instance.
 	 */
-	virtual VkResult execute() = 0;
+	virtual VkResult execute(uint64_t startTime) = 0;
 
 	MVKQueueSubmission(MVKQueue* queue,
 					   uint32_t waitSemaphoreInfoCount,
@@ -238,7 +238,7 @@ typedef struct MVKCommandBufferSubmitInfo {
 class MVKQueueCommandBufferSubmission : public MVKQueueSubmission {
 
 public:
-	VkResult execute() override;
+	VkResult execute(uint64_t startTime) override;
 
 	MVKQueueCommandBufferSubmission(MVKQueue* queue, 
 									const VkSubmitInfo2* pSubmit,
@@ -302,7 +302,7 @@ protected:
 class MVKQueuePresentSurfaceSubmission : public MVKQueueSubmission {
 
 public:
-	VkResult execute() override;
+	VkResult execute(uint64_t startTime) override;
 
 	MVKQueuePresentSurfaceSubmission(MVKQueue* queue,
 									 const VkPresentInfoKHR* pPresentInfo);
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm
index 40301695..e190e905 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm
@@ -69,7 +69,7 @@ void MVKQueue::propagateDebugName() { setLabelIfNotNil(_mtlQueue, _debugName); }
 
 // Execute the queue submission under an autoreleasepool to ensure transient Metal objects are autoreleased.
 // This is critical for apps that don't use standard OS autoreleasing runloop threading.
-static inline VkResult execute(MVKQueueSubmission* qSubmit) { @autoreleasepool { return qSubmit->execute(); } }
+static inline VkResult execute(MVKQueueSubmission* qSubmit, uint64_t startTime) { @autoreleasepool { return qSubmit->execute(startTime); } }
 
 // Executes the submmission, either immediately, or by dispatching to an execution queue.
 // Submissions to the execution queue are wrapped in a dedicated autoreleasepool.
@@ -83,11 +83,13 @@ VkResult MVKQueue::submit(MVKQueueSubmission* qSubmit) {
 	// Extract result before submission to avoid race condition with early destruction
 	// Submit regardless of config result, to ensure submission semaphores and fences are signalled.
 	// The submissions will ensure a misconfiguration will be safe to execute.
+	MVKDevice* mvkDev = getDevice();
+	uint64_t startTime = mvkDev->getPerformanceTimestamp();
 	VkResult rslt = qSubmit->getConfigurationResult();
 	if (_execQueue) {
-		dispatch_async(_execQueue, ^{ execute(qSubmit); } );
+		dispatch_async(_execQueue, ^{ execute(qSubmit, startTime); } );
 	} else {
-		rslt = execute(qSubmit);
+		rslt = execute(qSubmit, startTime);
 	}
 	return rslt;
 }
@@ -442,13 +444,17 @@ MVKQueueSubmission::~MVKQueueSubmission() {
 #pragma mark -
 #pragma mark MVKQueueCommandBufferSubmission
 
-VkResult MVKQueueCommandBufferSubmission::execute() {
+VkResult MVKQueueCommandBufferSubmission::execute(uint64_t startTime) {
 
 	_queue->_submissionCaptureScope->beginScope();
 
 	// If using encoded semaphore waiting, do so now.
 	for (auto& ws : _waitSemaphores) { ws.encodeWait(getActiveMTLCommandBuffer()); }
 
+	// Wait time from an async vkQueueSubmit() call to starting submit and encoding of the command buffers
+	MVKDevice* mvkDev = getDevice();
+	mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.queue.waitSubmitCommandBuffers, startTime);
+
 	// Submit each command buffer.
 	submitCommandBuffers();
 
@@ -678,8 +684,8 @@ MVKQueueFullCommandBufferSubmission<N>::MVKQueueFullCommandBufferSubmission(MVKQ
 // If the semaphores are encodable, wait on them by encoding them on the MTLCommandBuffer before presenting.
 // If the semaphores are not encodable, wait on them inline after presenting.
 // The semaphores know what to do.
-VkResult MVKQueuePresentSurfaceSubmission::execute() {
-	// MTLCommandBuffer retain references to avoid rare case where objects are destroyed too early. 
+VkResult MVKQueuePresentSurfaceSubmission::execute(uint64_t startTime) {
+	// MTLCommandBuffer retain references to avoid rare case where objects are destroyed too early.
 	// Although testing could not determine which objects were being lost, queue present MTLCommandBuffers
 	// are used only once per frame, and retain so few objects, that blanket retention is still performant.
 	id<MTLCommandBuffer> mtlCmdBuff = _queue->getMTLCommandBuffer(kMVKCommandUseQueuePresent, true);
@@ -689,6 +695,10 @@ VkResult MVKQueuePresentSurfaceSubmission::execute() {
 		ws.encodeWait(nil);			// Inline semaphore waits
 	}
 
+	// Wait time from an async vkQueuePresentKHR() call to starting presentation of the swapchains
+	MVKDevice* mvkDev = getDevice();
+	mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.queue.waitPresentSwapchains, startTime);
+
 	for (int i = 0; i < _presentInfo.size(); i++ ) {
 		setConfigurationResult(_presentInfo[i].presentableImage->presentCAMetalDrawable(mtlCmdBuff, _presentInfo[i]));
 	}

From 1d3fe52db9a860264eb3874a2bedaeb644098ef7 Mon Sep 17 00:00:00 2001
From: SRSaunders <82544213+SRSaunders@users.noreply.github.com>
Date: Tue, 19 Mar 2024 23:19:01 -0400
Subject: [PATCH 5/6] Update documentation for the new waitSubmitCommandBuffers
 and waitPresentSwapchains perf counters

---
 MoltenVK/MoltenVK/API/mvk_private_api.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/MoltenVK/MoltenVK/API/mvk_private_api.h b/MoltenVK/MoltenVK/API/mvk_private_api.h
index a2183e56..6110ae12 100644
--- a/MoltenVK/MoltenVK/API/mvk_private_api.h
+++ b/MoltenVK/MoltenVK/API/mvk_private_api.h
@@ -408,11 +408,11 @@ typedef struct {
 typedef struct {
 	MVKPerformanceTracker retrieveMTLCommandBuffer;     /** Retrieve a MTLCommandBuffer from a MTLQueue, in milliseconds. */
 	MVKPerformanceTracker commandBufferEncoding;        /** Encode a single VkCommandBuffer to a MTLCommandBuffer (excludes MTLCommandBuffer encoding from configured immediate prefilling), in milliseconds. */
-	MVKPerformanceTracker waitSubmitCommandBuffers;		/** Wait time from initial call to starting the submit and encoding of all VkCommandBuffers in an asynchronous vkQueueSubmit() operation,  in milliseconds. */
+	MVKPerformanceTracker waitSubmitCommandBuffers;		/** Wait time from vkQueueSubmit() call to starting the encoding of the command buffers to the GPU, in milliseconds. Useful when MVK_CONFIG_SYNCHRONOUS_QUEUE_SUBMITS is disabled. */
 	MVKPerformanceTracker submitCommandBuffers;         /** Submit and encode all VkCommandBuffers in a vkQueueSubmit() operation to MTLCommandBuffers (including both prefilled and deferred encoding), in milliseconds. */
 	MVKPerformanceTracker mtlCommandBufferExecution;    /** Execute a MTLCommandBuffer on the GPU, from commit to completion callback, in milliseconds. */
 	MVKPerformanceTracker retrieveCAMetalDrawable;      /** Retrieve next CAMetalDrawable from a CAMetalLayer, in milliseconds. */
-	MVKPerformanceTracker waitPresentSwapchains;		/** Wait time from initial call to starting presentation of the swapchains in an asynchronous vkQueuePresentKHR() operation,  in milliseconds. */
+	MVKPerformanceTracker waitPresentSwapchains;		/** Wait time from vkQueuePresentKHR() call to starting the encoding of the swapchains to the GPU, in milliseconds. Useful when MVK_CONFIG_SYNCHRONOUS_QUEUE_SUBMITS is disabled. */
 	MVKPerformanceTracker presentSwapchains;            /** Present the swapchains in a vkQueuePresentKHR() on the GPU, from commit to presentation callback, in milliseconds. */
 	MVKPerformanceTracker frameInterval;                /** Frame presentation interval (1000/FPS), in milliseconds. */
 } MVKQueuePerformance;
@@ -525,11 +525,12 @@ VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceMetalFeaturesMVK(
  * to tell MoltenVK the limit of the size of your MVKPerformanceStatistics structure. Upon return
  * from this function, the value of *pPerfSize will hold the actual number of bytes copied into
  * your passed MVKPerformanceStatistics structure, which will be the smaller of what your app
- * thinks is the size of MVKPerformanceStatistics, and what MoltenVK thinks it is. This
- * represents the safe access area within the structure for both MoltenVK and your app.
+ * thinks is the size of MVKPerformanceStatistics, and what MoltenVK thinks it is.
  *
  * If the size that MoltenVK expects for MVKPerformanceStatistics is different than the value passed
  * in *pPerfSize, this function will return VK_INCOMPLETE, otherwise it will return VK_SUCCESS.
+ * This indicates that the data returned from this function will likely be incorrect, as the structures
+ * nested under MVKPerformanceStatistics may be different.
  *
  * Although it is not necessary, you can use this function to determine in advance the value
  * that MoltenVK expects the size of MVKPerformanceStatistics to be by setting the value of

From 0cf9f7f24b7f814474013f49c9a826a2ba530c76 Mon Sep 17 00:00:00 2001
From: SRSaunders <82544213+SRSaunders@users.noreply.github.com>
Date: Tue, 19 Mar 2024 23:29:23 -0400
Subject: [PATCH 6/6] Capture perf start times in MVKQueueSubmission
 constructors vs. arguments of execute() function

---
 MoltenVK/MoltenVK/GPUObjects/MVKQueue.h  |  7 ++++---
 MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm | 20 +++++++++++---------
 2 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h
index 6459ebca..752fcf80 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h
@@ -195,7 +195,7 @@ public:
 	 *
 	 * Upon completion of this function, no further calls should be made to this instance.
 	 */
-	virtual VkResult execute(uint64_t startTime) = 0;
+	virtual VkResult execute() = 0;
 
 	MVKQueueSubmission(MVKQueue* queue,
 					   uint32_t waitSemaphoreInfoCount,
@@ -216,6 +216,7 @@ protected:
 
 	MVKQueue* _queue;
 	MVKSmallVector<MVKSemaphoreSubmitInfo> _waitSemaphores;
+	uint64_t _creationTime;
 };
 
 
@@ -238,7 +239,7 @@ typedef struct MVKCommandBufferSubmitInfo {
 class MVKQueueCommandBufferSubmission : public MVKQueueSubmission {
 
 public:
-	VkResult execute(uint64_t startTime) override;
+	VkResult execute() override;
 
 	MVKQueueCommandBufferSubmission(MVKQueue* queue, 
 									const VkSubmitInfo2* pSubmit,
@@ -302,7 +303,7 @@ protected:
 class MVKQueuePresentSurfaceSubmission : public MVKQueueSubmission {
 
 public:
-	VkResult execute(uint64_t startTime) override;
+	VkResult execute() override;
 
 	MVKQueuePresentSurfaceSubmission(MVKQueue* queue,
 									 const VkPresentInfoKHR* pPresentInfo);
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm
index e190e905..38b50b16 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm
@@ -69,7 +69,7 @@ void MVKQueue::propagateDebugName() { setLabelIfNotNil(_mtlQueue, _debugName); }
 
 // Execute the queue submission under an autoreleasepool to ensure transient Metal objects are autoreleased.
 // This is critical for apps that don't use standard OS autoreleasing runloop threading.
-static inline VkResult execute(MVKQueueSubmission* qSubmit, uint64_t startTime) { @autoreleasepool { return qSubmit->execute(startTime); } }
+static inline VkResult execute(MVKQueueSubmission* qSubmit) { @autoreleasepool { return qSubmit->execute(); } }
 
 // Executes the submmission, either immediately, or by dispatching to an execution queue.
 // Submissions to the execution queue are wrapped in a dedicated autoreleasepool.
@@ -83,13 +83,11 @@ VkResult MVKQueue::submit(MVKQueueSubmission* qSubmit) {
 	// Extract result before submission to avoid race condition with early destruction
 	// Submit regardless of config result, to ensure submission semaphores and fences are signalled.
 	// The submissions will ensure a misconfiguration will be safe to execute.
-	MVKDevice* mvkDev = getDevice();
-	uint64_t startTime = mvkDev->getPerformanceTimestamp();
 	VkResult rslt = qSubmit->getConfigurationResult();
 	if (_execQueue) {
-		dispatch_async(_execQueue, ^{ execute(qSubmit, startTime); } );
+		dispatch_async(_execQueue, ^{ execute(qSubmit); } );
 	} else {
-		rslt = execute(qSubmit, startTime);
+		rslt = execute(qSubmit);
 	}
 	return rslt;
 }
@@ -417,6 +415,8 @@ MVKQueueSubmission::MVKQueueSubmission(MVKQueue* queue,
 	_queue = queue;
 	_queue->retain();	// Retain here and release in destructor. See note for MVKQueueCommandBufferSubmission::finish().
 
+	_creationTime = getDevice()->getPerformanceTimestamp();		// call getDevice() only after _queue is defined
+
 	_waitSemaphores.reserve(waitSemaphoreInfoCount);
 	for (uint32_t i = 0; i < waitSemaphoreInfoCount; i++) {
 		_waitSemaphores.emplace_back(pWaitSemaphoreSubmitInfos[i]);
@@ -430,6 +430,8 @@ MVKQueueSubmission::MVKQueueSubmission(MVKQueue* queue,
 	_queue = queue;
 	_queue->retain();	// Retain here and release in destructor. See note for MVKQueueCommandBufferSubmission::finish().
 
+	_creationTime = getDevice()->getPerformanceTimestamp();		// call getDevice() only after _queue is defined
+
 	_waitSemaphores.reserve(waitSemaphoreCount);
 	for (uint32_t i = 0; i < waitSemaphoreCount; i++) {
 		_waitSemaphores.emplace_back(pWaitSemaphores[i], pWaitDstStageMask ? pWaitDstStageMask[i] : 0);
@@ -444,7 +446,7 @@ MVKQueueSubmission::~MVKQueueSubmission() {
 #pragma mark -
 #pragma mark MVKQueueCommandBufferSubmission
 
-VkResult MVKQueueCommandBufferSubmission::execute(uint64_t startTime) {
+VkResult MVKQueueCommandBufferSubmission::execute() {
 
 	_queue->_submissionCaptureScope->beginScope();
 
@@ -453,7 +455,7 @@ VkResult MVKQueueCommandBufferSubmission::execute(uint64_t startTime) {
 
 	// Wait time from an async vkQueueSubmit() call to starting submit and encoding of the command buffers
 	MVKDevice* mvkDev = getDevice();
-	mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.queue.waitSubmitCommandBuffers, startTime);
+	mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.queue.waitSubmitCommandBuffers, _creationTime);
 
 	// Submit each command buffer.
 	submitCommandBuffers();
@@ -684,7 +686,7 @@ MVKQueueFullCommandBufferSubmission<N>::MVKQueueFullCommandBufferSubmission(MVKQ
 // If the semaphores are encodable, wait on them by encoding them on the MTLCommandBuffer before presenting.
 // If the semaphores are not encodable, wait on them inline after presenting.
 // The semaphores know what to do.
-VkResult MVKQueuePresentSurfaceSubmission::execute(uint64_t startTime) {
+VkResult MVKQueuePresentSurfaceSubmission::execute() {
 	// MTLCommandBuffer retain references to avoid rare case where objects are destroyed too early.
 	// Although testing could not determine which objects were being lost, queue present MTLCommandBuffers
 	// are used only once per frame, and retain so few objects, that blanket retention is still performant.
@@ -697,7 +699,7 @@ VkResult MVKQueuePresentSurfaceSubmission::execute(uint64_t startTime) {
 
 	// Wait time from an async vkQueuePresentKHR() call to starting presentation of the swapchains
 	MVKDevice* mvkDev = getDevice();
-	mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.queue.waitPresentSwapchains, startTime);
+	mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.queue.waitPresentSwapchains, _creationTime);
 
 	for (int i = 0; i < _presentInfo.size(); i++ ) {
 		setConfigurationResult(_presentInfo[i].presentableImage->presentCAMetalDrawable(mtlCmdBuff, _presentInfo[i]));