Capture perf start times in MVKQueueSubmission constructors vs. arguments of execute() function

2024-03-19 23:29:23 -04:00 · 2024-03-19 23:29:23 -04:00 · 0cf9f7f24b
commit 0cf9f7f24b
parent 1d3fe52db9
2 changed files with 15 additions and 12 deletions
--- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h
@ -195,7 +195,7 @@ public:
 	 *
 	 * Upon completion of this function, no further calls should be made to this instance.
 	 */
-	virtual VkResult execute(uint64_t startTime) = 0;
+	virtual VkResult execute() = 0;

 	MVKQueueSubmission(MVKQueue* queue,
 					   uint32_t waitSemaphoreInfoCount,
@ -216,6 +216,7 @@ protected:

 	MVKQueue* _queue;
 	MVKSmallVector<MVKSemaphoreSubmitInfo> _waitSemaphores;
+	uint64_t _creationTime;
 };


@ -238,7 +239,7 @@ typedef struct MVKCommandBufferSubmitInfo {
 class MVKQueueCommandBufferSubmission : public MVKQueueSubmission {

 public:
-	VkResult execute(uint64_t startTime) override;
+	VkResult execute() override;

 	MVKQueueCommandBufferSubmission(MVKQueue* queue, 
 									const VkSubmitInfo2* pSubmit,
@ -302,7 +303,7 @@ protected:
 class MVKQueuePresentSurfaceSubmission : public MVKQueueSubmission {

 public:
-	VkResult execute(uint64_t startTime) override;
+	VkResult execute() override;

 	MVKQueuePresentSurfaceSubmission(MVKQueue* queue,
 									 const VkPresentInfoKHR* pPresentInfo);
--- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm
@ -69,7 +69,7 @@ void MVKQueue::propagateDebugName() { setLabelIfNotNil(_mtlQueue, _debugName); }

 // Execute the queue submission under an autoreleasepool to ensure transient Metal objects are autoreleased.
 // This is critical for apps that don't use standard OS autoreleasing runloop threading.
-static inline VkResult execute(MVKQueueSubmission* qSubmit, uint64_t startTime) { @autoreleasepool { return qSubmit->execute(startTime); } }
+static inline VkResult execute(MVKQueueSubmission* qSubmit) { @autoreleasepool { return qSubmit->execute(); } }

 // Executes the submmission, either immediately, or by dispatching to an execution queue.
 // Submissions to the execution queue are wrapped in a dedicated autoreleasepool.
@ -83,13 +83,11 @@ VkResult MVKQueue::submit(MVKQueueSubmission* qSubmit) {
 	// Extract result before submission to avoid race condition with early destruction
 	// Submit regardless of config result, to ensure submission semaphores and fences are signalled.
 	// The submissions will ensure a misconfiguration will be safe to execute.
-	MVKDevice* mvkDev = getDevice();
-	uint64_t startTime = mvkDev->getPerformanceTimestamp();
 	VkResult rslt = qSubmit->getConfigurationResult();
 	if (_execQueue) {
-		dispatch_async(_execQueue, ^{ execute(qSubmit, startTime); } );
+		dispatch_async(_execQueue, ^{ execute(qSubmit); } );
 	} else {
-		rslt = execute(qSubmit, startTime);
+		rslt = execute(qSubmit);
 	}
 	return rslt;
 }
@ -417,6 +415,8 @@ MVKQueueSubmission::MVKQueueSubmission(MVKQueue* queue,
 	_queue = queue;
 	_queue->retain();	// Retain here and release in destructor. See note for MVKQueueCommandBufferSubmission::finish().

+	_creationTime = getDevice()->getPerformanceTimestamp();		// call getDevice() only after _queue is defined
+
 	_waitSemaphores.reserve(waitSemaphoreInfoCount);
 	for (uint32_t i = 0; i < waitSemaphoreInfoCount; i++) {
 		_waitSemaphores.emplace_back(pWaitSemaphoreSubmitInfos[i]);
@ -430,6 +430,8 @@ MVKQueueSubmission::MVKQueueSubmission(MVKQueue* queue,
 	_queue = queue;
 	_queue->retain();	// Retain here and release in destructor. See note for MVKQueueCommandBufferSubmission::finish().

+	_creationTime = getDevice()->getPerformanceTimestamp();		// call getDevice() only after _queue is defined
+
 	_waitSemaphores.reserve(waitSemaphoreCount);
 	for (uint32_t i = 0; i < waitSemaphoreCount; i++) {
 		_waitSemaphores.emplace_back(pWaitSemaphores[i], pWaitDstStageMask ? pWaitDstStageMask[i] : 0);
@ -444,7 +446,7 @@ MVKQueueSubmission::~MVKQueueSubmission() {
 #pragma mark -
 #pragma mark MVKQueueCommandBufferSubmission

-VkResult MVKQueueCommandBufferSubmission::execute(uint64_t startTime) {
+VkResult MVKQueueCommandBufferSubmission::execute() {

 	_queue->_submissionCaptureScope->beginScope();

@ -453,7 +455,7 @@ VkResult MVKQueueCommandBufferSubmission::execute(uint64_t startTime) {

 	// Wait time from an async vkQueueSubmit() call to starting submit and encoding of the command buffers
 	MVKDevice* mvkDev = getDevice();
-	mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.queue.waitSubmitCommandBuffers, startTime);
+	mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.queue.waitSubmitCommandBuffers, _creationTime);

 	// Submit each command buffer.
 	submitCommandBuffers();
@ -684,7 +686,7 @@ MVKQueueFullCommandBufferSubmission<N>::MVKQueueFullCommandBufferSubmission(MVKQ
 // If the semaphores are encodable, wait on them by encoding them on the MTLCommandBuffer before presenting.
 // If the semaphores are not encodable, wait on them inline after presenting.
 // The semaphores know what to do.
-VkResult MVKQueuePresentSurfaceSubmission::execute(uint64_t startTime) {
+VkResult MVKQueuePresentSurfaceSubmission::execute() {
 	// MTLCommandBuffer retain references to avoid rare case where objects are destroyed too early.
 	// Although testing could not determine which objects were being lost, queue present MTLCommandBuffers
 	// are used only once per frame, and retain so few objects, that blanket retention is still performant.
@ -697,7 +699,7 @@ VkResult MVKQueuePresentSurfaceSubmission::execute(uint64_t startTime) {

 	// Wait time from an async vkQueuePresentKHR() call to starting presentation of the swapchains
 	MVKDevice* mvkDev = getDevice();
-	mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.queue.waitPresentSwapchains, startTime);
+	mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.queue.waitPresentSwapchains, _creationTime);

 	for (int i = 0; i < _presentInfo.size(); i++ ) {
 		setConfigurationResult(_presentInfo[i].presentableImage->presentCAMetalDrawable(mtlCmdBuff, _presentInfo[i]));