From 2b7f9aee43ebd5d8cddf53a158c5bb987d1adfa4 Mon Sep 17 00:00:00 2001
From: Chip Davis <cdavis@codeweavers.com>
Date: Tue, 27 Oct 2020 16:10:57 -0500
Subject: [PATCH] Support the VK_KHR_timeline_semaphore extension.

This implementation uses `MTLSharedEvent` where possible, and emulates
it on the host otherwise. Unlike binary semaphores, `MTLSharedEvent`s
map well to timeline semaphores; there should be no problems using them
when they're available.

I'm extremely confident in the `MTLSharedEvent`-based implementation. It
passes nearly all the synchronization tests. I'm less confident in the
emulated implementation.
---
 MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm   |  38 ++++-
 MoltenVK/MoltenVK/GPUObjects/MVKImage.mm    |   4 +-
 MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm |   3 +
 MoltenVK/MoltenVK/GPUObjects/MVKQueue.h     |   4 +-
 MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm    |  35 ++++-
 MoltenVK/MoltenVK/GPUObjects/MVKSync.h      | 135 ++++++++++++++--
 MoltenVK/MoltenVK/GPUObjects/MVKSync.mm     | 163 +++++++++++++++++++-
 MoltenVK/MoltenVK/Layers/MVKExtensions.def  |   1 +
 MoltenVK/MoltenVK/Vulkan/vulkan.mm          |  39 +++++
 9 files changed, 383 insertions(+), 39 deletions(-)

diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
index 372d3bd4..411348cd 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
@@ -118,6 +118,11 @@ void MVKPhysicalDevice::getFeatures(VkPhysicalDeviceFeatures2* features) {
 				shaderSGTypesFeatures->shaderSubgroupExtendedTypes = _metalFeatures.subgroupSize != 0;
 				break;
 			}
+			case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES: {
+				auto* timelineSem4Features = (VkPhysicalDeviceTimelineSemaphoreFeatures*)next;
+				timelineSem4Features->timelineSemaphore = true;
+				break;
+			}
 			case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES_KHR: {
 				auto* uboLayoutFeatures = (VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR*)next;
 				uboLayoutFeatures->uniformBufferStandardLayout = true;
@@ -305,6 +310,11 @@ void MVKPhysicalDevice::getProperties(VkPhysicalDeviceProperties2* properties) {
                 }
 				break;
 #endif
+            case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_PROPERTIES: {
+                auto* timelineSem4Props = (VkPhysicalDeviceTimelineSemaphoreProperties*)next;
+                timelineSem4Props->maxTimelineSemaphoreValueDifference = std::numeric_limits<uint64_t>::max();
+                break;
+            }
             case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_PROPERTIES_EXT: {
 				auto* inlineUniformBlockProps = (VkPhysicalDeviceInlineUniformBlockPropertiesEXT*)next;
 				inlineUniformBlockProps->maxInlineUniformBlockSize = _metalFeatures.dynamicMTLBufferSize;
@@ -2668,12 +2678,30 @@ void MVKDevice::destroyFence(MVKFence* mvkFence,
 
 MVKSemaphore* MVKDevice::createSemaphore(const VkSemaphoreCreateInfo* pCreateInfo,
 										 const VkAllocationCallbacks* pAllocator) {
-	if (_useMTLFenceForSemaphores) {
-		return new MVKSemaphoreMTLFence(this, pCreateInfo);
-	} else if (_useMTLEventForSemaphores) {
-		return new MVKSemaphoreMTLEvent(this, pCreateInfo);
+	const VkSemaphoreTypeCreateInfo* pTypeCreateInfo = nullptr;
+	for (auto* next = (const VkBaseInStructure*)pCreateInfo->pNext; next; next = next->pNext) {
+		switch (next->sType) {
+			case VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO:
+				pTypeCreateInfo = (VkSemaphoreTypeCreateInfo*)next;
+				break;
+			default:
+				break;
+		}
+	}
+	if (pTypeCreateInfo && pTypeCreateInfo->semaphoreType == VK_SEMAPHORE_TYPE_TIMELINE) {
+		if (_pMetalFeatures->events) {
+			return new MVKTimelineSemaphoreMTLEvent(this, pCreateInfo, pTypeCreateInfo);
+		} else {
+			return new MVKTimelineSemaphoreEmulated(this, pCreateInfo, pTypeCreateInfo);
+		}
 	} else {
-		return new MVKSemaphoreEmulated(this, pCreateInfo);
+		if (_useMTLFenceForSemaphores) {
+			return new MVKSemaphoreMTLFence(this, pCreateInfo);
+		} else if (_useMTLEventForSemaphores) {
+			return new MVKSemaphoreMTLEvent(this, pCreateInfo);
+		} else {
+			return new MVKSemaphoreEmulated(this, pCreateInfo);
+		}
 	}
 }
 
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm
index ce164b96..48a6ae43 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm
@@ -1263,13 +1263,13 @@ void MVKPresentableSwapchainImage::signalPresentationSemaphore(id<MTLCommandBuff
 
 	if ( !_availabilitySignalers.empty() ) {
 		MVKSemaphore* mvkSem = _availabilitySignalers.front().first;
-		if (mvkSem) { mvkSem->encodeSignal(mtlCmdBuff); }
+		if (mvkSem) { mvkSem->encodeSignal(mtlCmdBuff, 0); }
 	}
 }
 
 // Signal either or both of the semaphore and fence in the specified tracker pair.
 void MVKPresentableSwapchainImage::signal(MVKSwapchainSignaler& signaler, id<MTLCommandBuffer> mtlCmdBuff) {
-	if (signaler.first) { signaler.first->encodeSignal(mtlCmdBuff); }
+	if (signaler.first) { signaler.first->encodeSignal(mtlCmdBuff, 0); }
 	if (signaler.second) { signaler.second->signal(); }
 }
 
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm
index f27496f5..76193070 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm
@@ -660,6 +660,9 @@ void MVKInstance::initProcAddrs() {
 	ADD_DVC_EXT2_ENTRY_POINT(vkGetDeviceGroupSurfacePresentModesKHR, KHR_SWAPCHAIN, KHR_DEVICE_GROUP);
 	ADD_DVC_EXT2_ENTRY_POINT(vkGetPhysicalDevicePresentRectanglesKHR, KHR_SWAPCHAIN, KHR_DEVICE_GROUP);
 	ADD_DVC_EXT2_ENTRY_POINT(vkAcquireNextImage2KHR, KHR_SWAPCHAIN, KHR_DEVICE_GROUP);
+	ADD_DVC_EXT_ENTRY_POINT(vkGetSemaphoreCounterValueKHR, KHR_TIMELINE_SEMAPHORE);
+	ADD_DVC_EXT_ENTRY_POINT(vkSignalSemaphoreKHR, KHR_TIMELINE_SEMAPHORE);
+	ADD_DVC_EXT_ENTRY_POINT(vkWaitSemaphoresKHR, KHR_TIMELINE_SEMAPHORE);
 	ADD_DVC_EXT_ENTRY_POINT(vkDebugMarkerSetObjectTagEXT, EXT_DEBUG_MARKER);
 	ADD_DVC_EXT_ENTRY_POINT(vkDebugMarkerSetObjectNameEXT, EXT_DEBUG_MARKER);
 	ADD_DVC_EXT_ENTRY_POINT(vkCmdDebugMarkerBeginEXT, EXT_DEBUG_MARKER);
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h
index dde55fb2..5df84015 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h
@@ -178,7 +178,7 @@ protected:
 	friend class MVKQueue;
 
 	MVKQueue* _queue;
-	MVKSmallVector<MVKSemaphore*> _waitSemaphores;
+	MVKSmallVector<std::pair<MVKSemaphore*, uint64_t>> _waitSemaphores;
 	bool _trackPerformance;
 };
 
@@ -203,7 +203,7 @@ protected:
 	void finish();
 	virtual void submitCommandBuffers() {}
 
-	MVKSmallVector<MVKSemaphore*> _signalSemaphores;
+	MVKSmallVector<std::pair<MVKSemaphore*, uint64_t>> _signalSemaphores;
 	MVKFence* _fence;
 	id<MTLCommandBuffer> _activeMTLCommandBuffer;
 };
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm
index 595ea60b..9233c9c0 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm
@@ -226,7 +226,7 @@ MVKQueueSubmission::MVKQueueSubmission(MVKQueue* queue,
 
 	_waitSemaphores.reserve(waitSemaphoreCount);
 	for (uint32_t i = 0; i < waitSemaphoreCount; i++) {
-		_waitSemaphores.push_back((MVKSemaphore*)pWaitSemaphores[i]);
+		_waitSemaphores.push_back(make_pair((MVKSemaphore*)pWaitSemaphores[i], (uint64_t)0));
 	}
 }
 
@@ -241,13 +241,13 @@ void MVKQueueCommandBufferSubmission::execute() {
 	_queue->_submissionCaptureScope->beginScope();
 
 	// If using encoded semaphore waiting, do so now.
-	for (auto* ws : _waitSemaphores) { ws->encodeWait(getActiveMTLCommandBuffer()); }
+	for (auto& ws : _waitSemaphores) { ws.first->encodeWait(getActiveMTLCommandBuffer(), ws.second); }
 
 	// Submit each command buffer.
 	submitCommandBuffers();
 
 	// If using encoded semaphore signaling, do so now.
-	for (auto* ss : _signalSemaphores) { ss->encodeSignal(getActiveMTLCommandBuffer()); }
+	for (auto& ss : _signalSemaphores) { ss.first->encodeSignal(getActiveMTLCommandBuffer(), ss.second); }
 
 	// Commit the last MTLCommandBuffer.
 	// Nothing after this because callback might destroy this instance before this function ends.
@@ -278,7 +278,7 @@ void MVKQueueCommandBufferSubmission::setActiveMTLCommandBuffer(id<MTLCommandBuf
 void MVKQueueCommandBufferSubmission::commitActiveMTLCommandBuffer(bool signalCompletion) {
 
 	// If using inline semaphore waiting, do so now.
-	for (auto& ws : _waitSemaphores) { ws->encodeWait(nil); }
+	for (auto& ws : _waitSemaphores) { ws.first->encodeWait(nil, ws.second); }
 
 	MVKDevice* mkvDev = _queue->_device;
 	uint64_t startTime = mkvDev->getPerformanceTimestamp();
@@ -308,7 +308,7 @@ void MVKQueueCommandBufferSubmission::finish() {
 	_queue->_submissionCaptureScope->endScope();
 
 	// If using inline semaphore signaling, do so now.
-	for (auto& ss : _signalSemaphores) { ss->encodeSignal(nil); }
+	for (auto& ss : _signalSemaphores) { ss.first->encodeSignal(nil, ss.second); }
 
 	// If a fence exists, signal it.
 	if (_fence) { _fence->signal(); }
@@ -325,10 +325,29 @@ MVKQueueCommandBufferSubmission::MVKQueueCommandBufferSubmission(MVKQueue* queue
 
     // pSubmit can be null if just tracking the fence alone
     if (pSubmit) {
+        VkTimelineSemaphoreSubmitInfo* pTimelineSubmit = nullptr;
+        for (const auto* next = (const VkBaseInStructure*)pSubmit->pNext; next; next = next->pNext) {
+            switch (next->sType) {
+                case VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO:
+                    pTimelineSubmit = (VkTimelineSemaphoreSubmitInfo*)next;
+                    break;
+                default:
+                    break;
+            }
+        }
+        if (pTimelineSubmit) {
+            // Presentation doesn't support timeline semaphores, so handle wait values here.
+            uint32_t wsCnt = pTimelineSubmit->waitSemaphoreValueCount;
+            for (uint32_t i = 0; i < wsCnt; i++) {
+                _waitSemaphores[i].second = pTimelineSubmit->pWaitSemaphoreValues[i];
+            }
+        }
         uint32_t ssCnt = pSubmit->signalSemaphoreCount;
         _signalSemaphores.reserve(ssCnt);
         for (uint32_t i = 0; i < ssCnt; i++) {
-            _signalSemaphores.push_back((MVKSemaphore*)pSubmit->pSignalSemaphores[i]);
+			auto ss = make_pair((MVKSemaphore*)pSubmit->pSignalSemaphores[i], (uint64_t)0);
+            if (pTimelineSubmit) { ss.second = pTimelineSubmit->pSignalSemaphoreValues[i]; }
+            _signalSemaphores.push_back(ss);
         }
     }
 
@@ -348,12 +367,12 @@ void MVKQueuePresentSurfaceSubmission::execute() {
 	// If the semaphores are not encodable, wait on them inline after presenting.
 	// The semaphores know what to do.
 	id<MTLCommandBuffer> mtlCmdBuff = getMTLCommandBuffer();
-	for (auto& ws : _waitSemaphores) { ws->encodeWait(mtlCmdBuff); }
+	for (auto& ws : _waitSemaphores) { ws.first->encodeWait(mtlCmdBuff, 0); }
 	for (int i = 0; i < _presentInfo.size(); i++ ) {
 		MVKPresentableSwapchainImage *img = _presentInfo[i].presentableImage;
 		img->presentCAMetalDrawable(mtlCmdBuff, _presentInfo[i]);
 	}
-	for (auto& ws : _waitSemaphores) { ws->encodeWait(nil); }
+	for (auto& ws : _waitSemaphores) { ws.first->encodeWait(nil, 0); }
 	[mtlCmdBuff commit];
 
 	// Let Xcode know the current frame is done, then start a new frame
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSync.h b/MoltenVK/MoltenVK/GPUObjects/MVKSync.h
index 917255c3..8dd79451 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKSync.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKSync.h
@@ -120,6 +120,9 @@ public:
 	/** Returns the debug report object type of this object. */
 	VkDebugReportObjectTypeEXT getVkDebugReportObjectType() override { return VK_DEBUG_REPORT_OBJECT_TYPE_SEMAPHORE_EXT; }
 
+	/** Returns the type of this semaphore. */
+	virtual VkSemaphoreType getSemaphoreType() { return VK_SEMAPHORE_TYPE_BINARY; }
+
 	/**
 	 * Wait for this semaphore to be signaled.
 	 *
@@ -132,8 +135,11 @@ public:
 	 * code path, once with a mtlCmdBuff to support encoding a wait on the command buffer if the
 	 * subclass supports command encoding, and once without a mtlCmdBuff, at the point in the
 	 * code path where the code should block if the subclass does not support command encoding.
+	 *
+	 * 'value' only applies if this semaphore is a timeline semaphore. It is the value to wait for the
+	 * semaphore to have.
 	 */
-	virtual void encodeWait(id<MTLCommandBuffer> mtlCmdBuff) = 0;
+	virtual void encodeWait(id<MTLCommandBuffer> mtlCmdBuff, uint64_t value) = 0;
 
 	/**
 	 * Signals this semaphore.
@@ -147,8 +153,11 @@ public:
 	 * code path, once with a mtlCmdBuff to support encoding a wait on the command buffer if the
 	 * subclass supports command encoding, and once without a mtlCmdBuff, at the point in the
 	 * code path where the code should block if the subclass does not support command encoding.
+	 *
+	 * 'value' only applies if this semaphore is a timeline semaphore. It is the value to assign the semaphore
+	 * upon completion.
 	 */
-	virtual void encodeSignal(id<MTLCommandBuffer> mtlCmdBuff) = 0;
+	virtual void encodeSignal(id<MTLCommandBuffer> mtlCmdBuff, uint64_t value) = 0;
 
 	/** Returns whether this semaphore uses command encoding. */
 	virtual bool isUsingCommandEncoding() = 0;
@@ -171,8 +180,8 @@ protected:
 class MVKSemaphoreMTLFence : public MVKSemaphore {
 
 public:
-	void encodeWait(id<MTLCommandBuffer> mtlCmdBuff) override;
-	void encodeSignal(id<MTLCommandBuffer> mtlCmdBuff) override;
+	void encodeWait(id<MTLCommandBuffer> mtlCmdBuff, uint64_t) override;
+	void encodeSignal(id<MTLCommandBuffer> mtlCmdBuff, uint64_t) override;
 	bool isUsingCommandEncoding() override { return true; }
 
 	MVKSemaphoreMTLFence(MVKDevice* device, const VkSemaphoreCreateInfo* pCreateInfo);
@@ -191,8 +200,8 @@ protected:
 class MVKSemaphoreMTLEvent : public MVKSemaphore {
 
 public:
-	void encodeWait(id<MTLCommandBuffer> mtlCmdBuff) override;
-	void encodeSignal(id<MTLCommandBuffer> mtlCmdBuff) override;
+	void encodeWait(id<MTLCommandBuffer> mtlCmdBuff, uint64_t) override;
+	void encodeSignal(id<MTLCommandBuffer> mtlCmdBuff, uint64_t) override;
 	bool isUsingCommandEncoding() override { return true; }
 
 	MVKSemaphoreMTLEvent(MVKDevice* device, const VkSemaphoreCreateInfo* pCreateInfo);
@@ -212,8 +221,8 @@ protected:
 class MVKSemaphoreEmulated : public MVKSemaphore {
 
 public:
-	void encodeWait(id<MTLCommandBuffer> mtlCmdBuff) override;
-	void encodeSignal(id<MTLCommandBuffer> mtlCmdBuff) override;
+	void encodeWait(id<MTLCommandBuffer> mtlCmdBuff, uint64_t) override;
+	void encodeSignal(id<MTLCommandBuffer> mtlCmdBuff, uint64_t) override;
 	bool isUsingCommandEncoding() override { return false; }
 
 	MVKSemaphoreEmulated(MVKDevice* device, const VkSemaphoreCreateInfo* pCreateInfo);
@@ -223,6 +232,91 @@ protected:
 };
 
 
+#pragma mark -
+#pragma mark MVKTimelineSemaphore
+
+/** Abstract class that represents a Vulkan timeline semaphore. */
+class MVKTimelineSemaphore : public MVKSemaphore {
+
+public:
+
+	VkSemaphoreType getSemaphoreType() override { return VK_SEMAPHORE_TYPE_TIMELINE; }
+
+	/** Returns the current value of the semaphore counter. */
+	virtual uint64_t getCounterValue() = 0;
+
+	/** Signals this semaphore on the host. */
+	virtual void signal(const VkSemaphoreSignalInfo* pSignalInfo) = 0;
+
+	/** Registers a wait for this semaphore on the host. Returns true if the semaphore is already signaled. */
+	virtual bool registerWait(MVKFenceSitter* sitter, const VkSemaphoreWaitInfo* pWaitInfo, uint32_t index) = 0;
+
+	/** Stops waiting for this semaphore. */
+	virtual void unregisterWait(MVKFenceSitter* sitter) = 0;
+
+#pragma mark Construction
+
+    MVKTimelineSemaphore(MVKDevice* device, const VkSemaphoreCreateInfo* pCreateInfo, const VkSemaphoreTypeCreateInfo* pTypeCreateInfo)
+        : MVKSemaphore(device, pCreateInfo) {}
+
+};
+
+
+#pragma mark -
+#pragma mark MVKTimelineSemaphoreMTLEvent
+
+/** An MVKTimelineSemaphore that uses MTLSharedEvent to provide synchronization. */
+class MVKTimelineSemaphoreMTLEvent : public MVKTimelineSemaphore {
+
+public:
+	void encodeWait(id<MTLCommandBuffer> mtlCmdBuff, uint64_t value) override;
+	void encodeSignal(id<MTLCommandBuffer> mtlCmdBuff, uint64_t value) override;
+	bool isUsingCommandEncoding() override { return true; }
+
+	uint64_t getCounterValue() override { return _mtlEvent.signaledValue; }
+	void signal(const VkSemaphoreSignalInfo* pSignalInfo) override;
+	bool registerWait(MVKFenceSitter* sitter, const VkSemaphoreWaitInfo* pWaitInfo, uint32_t index) override;
+	void unregisterWait(MVKFenceSitter* sitter) override;
+
+	MVKTimelineSemaphoreMTLEvent(MVKDevice* device, const VkSemaphoreCreateInfo* pCreateInfo, const VkSemaphoreTypeCreateInfo* pTypeCreateInfo);
+
+	~MVKTimelineSemaphoreMTLEvent() override;
+
+protected:
+	id<MTLSharedEvent> _mtlEvent;
+	std::mutex _lock;
+	std::unordered_set<MVKFenceSitter*> _sitters;
+};
+
+
+#pragma mark -
+#pragma mark MVKTimelineSemaphoreEmulated
+
+/** An MVKTimelineSemaphore that uses CPU synchronization to provide synchronization functionality. */
+class MVKTimelineSemaphoreEmulated : public MVKTimelineSemaphore {
+
+public:
+	void encodeWait(id<MTLCommandBuffer> mtlCmdBuff, uint64_t value) override;
+	void encodeSignal(id<MTLCommandBuffer> mtlCmdBuff, uint64_t value) override;
+	bool isUsingCommandEncoding() override { return false; }
+
+	uint64_t getCounterValue() override { return _value; }
+	void signal(const VkSemaphoreSignalInfo* pSignalInfo) override;
+	bool registerWait(MVKFenceSitter* sitter, const VkSemaphoreWaitInfo* pWaitInfo, uint32_t index) override;
+	void unregisterWait(MVKFenceSitter* sitter) override;
+
+	MVKTimelineSemaphoreEmulated(MVKDevice* device, const VkSemaphoreCreateInfo* pCreateInfo, const VkSemaphoreTypeCreateInfo* pTypeCreateInfo);
+
+protected:
+	void signalImpl(uint64_t value);
+
+	std::atomic<uint64_t> _value;
+	std::mutex _lock;
+	std::condition_variable _blocker;
+	std::unordered_map<uint64_t, std::unordered_set<MVKFenceSitter*>> _sitters;
+};
+
+
 #pragma mark -
 #pragma mark MVKFence
 
@@ -240,11 +334,10 @@ public:
 	/**
 	 * If this fence has not been signaled yet, adds the specified fence sitter to the
 	 * internal list of fence sitters that will be notified when this fence is signaled,
-	 * and then calls addUnsignaledFence() on the fence sitter so it is aware that it
-	 * will be signaled.
+	 * and then calls await() on the fence sitter so it is aware that it will be signaled.
 	 *
 	 * Does nothing if this fence has already been signaled, and does not call 
-	 * addUnsignaledFence() on the fence sitter.
+	 * await() on the fence sitter.
 	 *
 	 * Each fence sitter should only listen once for each fence. Adding the same fence sitter
 	 * more than once in between each fence reset and signal results in undefined behaviour.
@@ -282,7 +375,7 @@ protected:
 #pragma mark -
 #pragma mark MVKFenceSitter
 
-/** An object that responds to signals from MVKFences. */
+/** An object that responds to signals from MVKFences and MVKTimelineSemaphores. */
 class MVKFenceSitter : public MVKBaseObject {
 
 public:
@@ -309,11 +402,16 @@ public:
 
 private:
 	friend class MVKFence;
+	friend class MVKTimelineSemaphoreMTLEvent;
+	friend class MVKTimelineSemaphoreEmulated;
 
-	void awaitFence(MVKFence* fence) { _blocker.reserve(); }
-	void fenceSignaled(MVKFence* fence) { _blocker.release(); }
+	MTLSharedEventListener* getMTLSharedEventListener();
+
+	void await() { _blocker.reserve(); }
+	void signaled() { _blocker.release(); }
 
 	MVKSemaphoreImpl _blocker;
+	MTLSharedEventListener* _listener = nil;
 };
 
 
@@ -411,6 +509,15 @@ VkResult mvkWaitForFences(MVKDevice* device,
 						  VkBool32 waitAll,
 						  uint64_t timeout = UINT64_MAX);
 
+/** 
+ * Blocks the current thread until any or all of the specified 
+ * semaphores have been signaled at the specified values, or the
+ * specified timeout occurs.
+ */
+VkResult mvkWaitSemaphores(MVKDevice* device,
+						   const VkSemaphoreWaitInfo* pWaitInfo,
+						   uint64_t timeout = UINT64_MAX);
+
 
 #pragma mark -
 #pragma mark MVKMetalCompiler
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSync.mm b/MoltenVK/MoltenVK/GPUObjects/MVKSync.mm
index c63a16e5..e5ea2c56 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKSync.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKSync.mm
@@ -82,7 +82,7 @@ MVKSemaphoreImpl::~MVKSemaphoreImpl() {
 
 // Could use any encoder. Assume BLIT is fastest and lightest.
 // Nil mtlCmdBuff will do nothing.
-void MVKSemaphoreMTLFence::encodeWait(id<MTLCommandBuffer> mtlCmdBuff) {
+void MVKSemaphoreMTLFence::encodeWait(id<MTLCommandBuffer> mtlCmdBuff, uint64_t) {
 	id<MTLBlitCommandEncoder> mtlCmdEnc = mtlCmdBuff.blitCommandEncoder;
 	[mtlCmdEnc waitForFence: _mtlFence];
 	[mtlCmdEnc endEncoding];
@@ -90,7 +90,7 @@ void MVKSemaphoreMTLFence::encodeWait(id<MTLCommandBuffer> mtlCmdBuff) {
 
 // Could use any encoder. Assume BLIT is fastest and lightest.
 // Nil mtlCmdBuff will do nothing.
-void MVKSemaphoreMTLFence::encodeSignal(id<MTLCommandBuffer> mtlCmdBuff) {
+void MVKSemaphoreMTLFence::encodeSignal(id<MTLCommandBuffer> mtlCmdBuff, uint64_t) {
 	id<MTLBlitCommandEncoder> mtlCmdEnc = mtlCmdBuff.blitCommandEncoder;
 	[mtlCmdEnc updateFence: _mtlFence];
 	[mtlCmdEnc endEncoding];
@@ -109,12 +109,12 @@ MVKSemaphoreMTLFence::~MVKSemaphoreMTLFence() {
 #pragma mark MVKSemaphoreMTLEvent
 
 // Nil mtlCmdBuff will do nothing.
-void MVKSemaphoreMTLEvent::encodeWait(id<MTLCommandBuffer> mtlCmdBuff) {
+void MVKSemaphoreMTLEvent::encodeWait(id<MTLCommandBuffer> mtlCmdBuff, uint64_t) {
 	[mtlCmdBuff encodeWaitForEvent: _mtlEvent value: _mtlEventValue++];
 }
 
 // Nil mtlCmdBuff will do nothing.
-void MVKSemaphoreMTLEvent::encodeSignal(id<MTLCommandBuffer> mtlCmdBuff) {
+void MVKSemaphoreMTLEvent::encodeSignal(id<MTLCommandBuffer> mtlCmdBuff, uint64_t) {
 	[mtlCmdBuff encodeSignalEvent: _mtlEvent value: _mtlEventValue];
 }
 
@@ -131,11 +131,11 @@ MVKSemaphoreMTLEvent::~MVKSemaphoreMTLEvent() {
 #pragma mark -
 #pragma mark MVKSemaphoreEmulated
 
-void MVKSemaphoreEmulated::encodeWait(id<MTLCommandBuffer> mtlCmdBuff) {
+void MVKSemaphoreEmulated::encodeWait(id<MTLCommandBuffer> mtlCmdBuff, uint64_t) {
 	if ( !mtlCmdBuff ) { _blocker.wait(UINT64_MAX, true); }
 }
 
-void MVKSemaphoreEmulated::encodeSignal(id<MTLCommandBuffer> mtlCmdBuff) {
+void MVKSemaphoreEmulated::encodeSignal(id<MTLCommandBuffer> mtlCmdBuff, uint64_t) {
 	if ( !mtlCmdBuff ) { _blocker.release(); }
 }
 
@@ -144,6 +144,116 @@ MVKSemaphoreEmulated::MVKSemaphoreEmulated(MVKDevice* device, const VkSemaphoreC
 	_blocker(false, 1) {}
 
 
+#pragma mark -
+#pragma mark MVKTimelineSemaphoreMTLEvent
+
+// Nil mtlCmdBuff will do nothing.
+void MVKTimelineSemaphoreMTLEvent::encodeWait(id<MTLCommandBuffer> mtlCmdBuff, uint64_t value) {
+	[mtlCmdBuff encodeWaitForEvent: _mtlEvent value: value];
+}
+
+// Nil mtlCmdBuff will do nothing.
+void MVKTimelineSemaphoreMTLEvent::encodeSignal(id<MTLCommandBuffer> mtlCmdBuff, uint64_t value) {
+	[mtlCmdBuff encodeSignalEvent: _mtlEvent value: value];
+}
+
+void MVKTimelineSemaphoreMTLEvent::signal(const VkSemaphoreSignalInfo* pSignalInfo) {
+	_mtlEvent.signaledValue = pSignalInfo->value;
+}
+
+bool MVKTimelineSemaphoreMTLEvent::registerWait(MVKFenceSitter* sitter, const VkSemaphoreWaitInfo* pWaitInfo, uint32_t index) {
+	if (_mtlEvent.signaledValue >= pWaitInfo->pValues[index]) { return true; }
+	lock_guard<mutex> lock(_lock);
+	sitter->await();
+	auto addRslt = _sitters.insert(sitter);
+	if (addRslt.second) {
+		retain();
+		[_mtlEvent notifyListener: sitter->getMTLSharedEventListener()
+						  atValue: pWaitInfo->pValues[index]
+							block: ^(id<MTLSharedEvent>, uint64_t) {
+			lock_guard<mutex> blockLock(_lock);
+			if (_sitters.count(sitter)) { sitter->signaled(); }
+			release();
+		}];
+	}
+	return false;
+}
+
+void MVKTimelineSemaphoreMTLEvent::unregisterWait(MVKFenceSitter* sitter) {
+	lock_guard<mutex> lock(_lock);
+	_sitters.erase(sitter);
+}
+
+MVKTimelineSemaphoreMTLEvent::MVKTimelineSemaphoreMTLEvent(MVKDevice* device, const VkSemaphoreCreateInfo* pCreateInfo, const VkSemaphoreTypeCreateInfo* pTypeCreateInfo) :
+	MVKTimelineSemaphore(device, pCreateInfo, pTypeCreateInfo),
+	_mtlEvent([device->getMTLDevice() newSharedEvent]) { 	//retained
+
+	_mtlEvent.signaledValue = pTypeCreateInfo->initialValue;
+}
+
+MVKTimelineSemaphoreMTLEvent::~MVKTimelineSemaphoreMTLEvent() {
+    [_mtlEvent release];
+}
+
+
+#pragma mark -
+#pragma mark MVKSemaphoreEmulated
+
+void MVKTimelineSemaphoreEmulated::encodeWait(id<MTLCommandBuffer> mtlCmdBuff, uint64_t value) {
+	unique_lock<mutex> lock(_lock);
+	if ( !mtlCmdBuff ) { _blocker.wait(lock, [=]() { return _value >= value; }); }
+}
+
+void MVKTimelineSemaphoreEmulated::encodeSignal(id<MTLCommandBuffer> mtlCmdBuff, uint64_t value) {
+	lock_guard<mutex> lock(_lock);
+	if ( !mtlCmdBuff ) { signalImpl(value); }
+}
+
+void MVKTimelineSemaphoreEmulated::signal(const VkSemaphoreSignalInfo* pSignalInfo) {
+	lock_guard<mutex> lock(_lock);
+	signalImpl(pSignalInfo->value);
+}
+
+void MVKTimelineSemaphoreEmulated::signalImpl(uint64_t value) {
+	if (value > _value) {
+		_value = value;
+		_blocker.notify_all();
+		for (auto& sittersForValue : _sitters) {
+			if (sittersForValue.first < value) { continue; }
+			for (auto* sitter : sittersForValue.second) {
+				sitter->signaled();
+			}
+		}
+	}
+}
+
+bool MVKTimelineSemaphoreEmulated::registerWait(MVKFenceSitter* sitter, const VkSemaphoreWaitInfo* pWaitInfo, uint32_t index) {
+	lock_guard<mutex> lock(_lock);
+	if (pWaitInfo->pValues[index] >= _value) { return true; }
+	uint64_t value = pWaitInfo->pValues[index];
+	if (!_sitters.count(value)) { _sitters.emplace(make_pair(value, unordered_set<MVKFenceSitter*>())); }
+	auto addRslt = _sitters[value].insert(sitter);
+	if (addRslt.second) { sitter->await(); }
+	return false;
+}
+
+void MVKTimelineSemaphoreEmulated::unregisterWait(MVKFenceSitter* sitter) {
+	MVKSmallVector<uint64_t> emptySets;
+	for (auto& sittersForValue : _sitters) {
+		sittersForValue.second.erase(sitter);
+		// Can't destroy while iterating...
+		if (sittersForValue.second.empty()) {
+			emptySets.push_back(sittersForValue.first);
+		}
+	}
+	for (auto value : emptySets) { _sitters.erase(value); }
+}
+
+MVKTimelineSemaphoreEmulated::MVKTimelineSemaphoreEmulated(MVKDevice* device, const VkSemaphoreCreateInfo* pCreateInfo, const VkSemaphoreTypeCreateInfo* pTypeCreateInfo) :
+	MVKTimelineSemaphore(device, pCreateInfo, pTypeCreateInfo),
+	_value(pTypeCreateInfo->initialValue) {}
+
+
 #pragma mark -
 #pragma mark MVKFence
 
@@ -156,7 +266,7 @@ void MVKFence::addSitter(MVKFenceSitter* fenceSitter) {
 
 	// Ensure each fence only added once to each fence sitter
 	auto addRslt = _fenceSitters.insert(fenceSitter);	// pair with second element true if was added
-	if (addRslt.second) { fenceSitter->awaitFence(this); }
+	if (addRslt.second) { fenceSitter->await(); }
 }
 
 void MVKFence::removeSitter(MVKFenceSitter* fenceSitter) {
@@ -173,7 +283,7 @@ void MVKFence::signal() {
 
 	// Notify all the fence sitters, and clear them from this instance.
     for (auto& fs : _fenceSitters) {
-        fs->fenceSignaled(this);
+        fs->signaled();
     }
 	_fenceSitters.clear();
 }
@@ -192,6 +302,16 @@ bool MVKFence::getIsSignaled() {
 }
 
 
+#pragma mark -
+#pragma mark MVKFenceSitter
+
+MTLSharedEventListener* MVKFenceSitter::getMTLSharedEventListener() {
+	// TODO: Use dispatch queue from device?
+	if (!_listener) { _listener = [MTLSharedEventListener new]; }
+	return _listener;
+}
+
+
 #pragma mark -
 #pragma mark MVKEventNative
 
@@ -295,6 +415,33 @@ VkResult mvkWaitForFences(MVKDevice* device,
 	return rslt;
 }
 
+// Create a blocking fence sitter, add it to each semaphore, wait, then remove it.
+VkResult mvkWaitSemaphores(MVKDevice* device,
+						   const VkSemaphoreWaitInfo* pWaitInfo,
+						   uint64_t timeout) {
+
+	VkResult rslt = VK_SUCCESS;
+	bool waitAny = mvkIsAnyFlagEnabled(pWaitInfo->flags, VK_SEMAPHORE_WAIT_ANY_BIT);
+	bool alreadySignaled = false;
+	MVKFenceSitter fenceSitter(!waitAny);
+
+	for (uint32_t i = 0; i < pWaitInfo->semaphoreCount; i++) {
+		if (((MVKTimelineSemaphore*)pWaitInfo->pSemaphores[i])->registerWait(&fenceSitter, pWaitInfo, i) && waitAny) {
+			// In this case, we don't need to wait.
+			alreadySignaled = true;
+			break;
+		}
+	}
+
+	if ( !alreadySignaled && !fenceSitter.wait(timeout) ) { rslt = VK_TIMEOUT; }
+
+	for (uint32_t i = 0; i < pWaitInfo->semaphoreCount; i++) {
+		((MVKTimelineSemaphore*)pWaitInfo->pSemaphores[i])->unregisterWait(&fenceSitter);
+	}
+
+	return rslt;
+}
+
 
 #pragma mark -
 #pragma mark MVKMetalCompiler
diff --git a/MoltenVK/MoltenVK/Layers/MVKExtensions.def b/MoltenVK/MoltenVK/Layers/MVKExtensions.def
index c40ce89b..0364cd96 100644
--- a/MoltenVK/MoltenVK/Layers/MVKExtensions.def
+++ b/MoltenVK/MoltenVK/Layers/MVKExtensions.def
@@ -75,6 +75,7 @@ MVK_EXTENSION(KHR_storage_buffer_storage_class, KHR_STORAGE_BUFFER_STORAGE_CLASS
 MVK_EXTENSION(KHR_surface, KHR_SURFACE, INSTANCE)
 MVK_EXTENSION(KHR_swapchain, KHR_SWAPCHAIN, DEVICE)
 MVK_EXTENSION(KHR_swapchain_mutable_format, KHR_SWAPCHAIN_MUTABLE_FORMAT, DEVICE)
+MVK_EXTENSION(KHR_timeline_semaphore, KHR_TIMELINE_SEMAPHORE, DEVICE)
 MVK_EXTENSION(KHR_uniform_buffer_standard_layout, KHR_UNIFORM_BUFFER_STANDARD_LAYOUT, DEVICE)
 MVK_EXTENSION(KHR_variable_pointers, KHR_VARIABLE_POINTERS, DEVICE)
 MVK_EXTENSION(EXT_debug_marker, EXT_DEBUG_MARKER, DEVICE)
diff --git a/MoltenVK/MoltenVK/Vulkan/vulkan.mm b/MoltenVK/MoltenVK/Vulkan/vulkan.mm
index 1ad42bfa..208c5664 100644
--- a/MoltenVK/MoltenVK/Vulkan/vulkan.mm
+++ b/MoltenVK/MoltenVK/Vulkan/vulkan.mm
@@ -2663,6 +2663,45 @@ MVK_PUBLIC_SYMBOL VkResult vkGetPhysicalDeviceSurfaceFormats2KHR(
 }
 
 
+#pragma mark -
+#pragma mark VK_KHR_timeline_semaphore
+
+MVK_PUBLIC_SYMBOL VkResult vkGetSemaphoreCounterValueKHR(
+	VkDevice									device,
+	VkSemaphore									semaphore,
+	uint64_t*									pValue) {
+
+	MVKTraceVulkanCallStart();
+	auto* mvkSem4 = (MVKTimelineSemaphore*)semaphore;
+	*pValue = mvkSem4->getCounterValue();
+	MVKTraceVulkanCallEnd();
+	return VK_SUCCESS;
+}
+
+MVK_PUBLIC_SYMBOL VkResult vkSignalSemaphoreKHR(
+	VkDevice									device,
+	const VkSemaphoreSignalInfoKHR*				pSignalInfo) {
+
+	MVKTraceVulkanCallStart();
+	auto* mvkSem4 = (MVKTimelineSemaphore*)pSignalInfo->semaphore;
+	mvkSem4->signal(pSignalInfo);
+	MVKTraceVulkanCallEnd();
+	return VK_SUCCESS;
+}
+
+MVK_PUBLIC_SYMBOL VkResult vkWaitSemaphoresKHR(
+	VkDevice									device,
+	const VkSemaphoreWaitInfoKHR*				pWaitInfo,
+	uint64_t									timeout) {
+
+	MVKTraceVulkanCallStart();
+	MVKDevice* mvkDev = MVKDevice::getMVKDevice(device);
+	VkResult rslt = mvkWaitSemaphores(mvkDev, pWaitInfo, timeout);
+	MVKTraceVulkanCallEnd();
+	return rslt;
+}
+
+
 #pragma mark -
 #pragma mark VK_EXT_debug_report extension