From 2b7f9aee43ebd5d8cddf53a158c5bb987d1adfa4 Mon Sep 17 00:00:00 2001 From: Chip Davis Date: Tue, 27 Oct 2020 16:10:57 -0500 Subject: [PATCH] Support the VK_KHR_timeline_semaphore extension. This implementation uses `MTLSharedEvent` where possible, and emulates it on the host otherwise. Unlike binary semaphores, `MTLSharedEvent`s map well to timeline semaphores; there should be no problems using them when they're available. I'm extremely confident in the `MTLSharedEvent`-based implementation. It passes nearly all the synchronization tests. I'm less confident in the emulated implementation. --- MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm | 38 ++++- MoltenVK/MoltenVK/GPUObjects/MVKImage.mm | 4 +- MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm | 3 + MoltenVK/MoltenVK/GPUObjects/MVKQueue.h | 4 +- MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm | 35 ++++- MoltenVK/MoltenVK/GPUObjects/MVKSync.h | 135 ++++++++++++++-- MoltenVK/MoltenVK/GPUObjects/MVKSync.mm | 163 +++++++++++++++++++- MoltenVK/MoltenVK/Layers/MVKExtensions.def | 1 + MoltenVK/MoltenVK/Vulkan/vulkan.mm | 39 +++++ 9 files changed, 383 insertions(+), 39 deletions(-) diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm index 372d3bd4..411348cd 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm @@ -118,6 +118,11 @@ void MVKPhysicalDevice::getFeatures(VkPhysicalDeviceFeatures2* features) { shaderSGTypesFeatures->shaderSubgroupExtendedTypes = _metalFeatures.subgroupSize != 0; break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES: { + auto* timelineSem4Features = (VkPhysicalDeviceTimelineSemaphoreFeatures*)next; + timelineSem4Features->timelineSemaphore = true; + break; + } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES_KHR: { auto* uboLayoutFeatures = (VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR*)next; uboLayoutFeatures->uniformBufferStandardLayout = true; @@ -305,6 +310,11 @@ void MVKPhysicalDevice::getProperties(VkPhysicalDeviceProperties2* properties) { } break; #endif + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_PROPERTIES: { + auto* timelineSem4Props = (VkPhysicalDeviceTimelineSemaphoreProperties*)next; + timelineSem4Props->maxTimelineSemaphoreValueDifference = std::numeric_limits::max(); + break; + } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_PROPERTIES_EXT: { auto* inlineUniformBlockProps = (VkPhysicalDeviceInlineUniformBlockPropertiesEXT*)next; inlineUniformBlockProps->maxInlineUniformBlockSize = _metalFeatures.dynamicMTLBufferSize; @@ -2668,12 +2678,30 @@ void MVKDevice::destroyFence(MVKFence* mvkFence, MVKSemaphore* MVKDevice::createSemaphore(const VkSemaphoreCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator) { - if (_useMTLFenceForSemaphores) { - return new MVKSemaphoreMTLFence(this, pCreateInfo); - } else if (_useMTLEventForSemaphores) { - return new MVKSemaphoreMTLEvent(this, pCreateInfo); + const VkSemaphoreTypeCreateInfo* pTypeCreateInfo = nullptr; + for (auto* next = (const VkBaseInStructure*)pCreateInfo->pNext; next; next = next->pNext) { + switch (next->sType) { + case VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO: + pTypeCreateInfo = (VkSemaphoreTypeCreateInfo*)next; + break; + default: + break; + } + } + if (pTypeCreateInfo && pTypeCreateInfo->semaphoreType == VK_SEMAPHORE_TYPE_TIMELINE) { + if (_pMetalFeatures->events) { + return new MVKTimelineSemaphoreMTLEvent(this, pCreateInfo, pTypeCreateInfo); + } else { + return new MVKTimelineSemaphoreEmulated(this, pCreateInfo, pTypeCreateInfo); + } } else { - return new MVKSemaphoreEmulated(this, pCreateInfo); + if (_useMTLFenceForSemaphores) { + return new MVKSemaphoreMTLFence(this, pCreateInfo); + } else if (_useMTLEventForSemaphores) { + return new MVKSemaphoreMTLEvent(this, pCreateInfo); + } else { + return new MVKSemaphoreEmulated(this, pCreateInfo); + } } } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm index ce164b96..48a6ae43 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm @@ -1263,13 +1263,13 @@ void MVKPresentableSwapchainImage::signalPresentationSemaphore(idencodeSignal(mtlCmdBuff); } + if (mvkSem) { mvkSem->encodeSignal(mtlCmdBuff, 0); } } } // Signal either or both of the semaphore and fence in the specified tracker pair. void MVKPresentableSwapchainImage::signal(MVKSwapchainSignaler& signaler, id mtlCmdBuff) { - if (signaler.first) { signaler.first->encodeSignal(mtlCmdBuff); } + if (signaler.first) { signaler.first->encodeSignal(mtlCmdBuff, 0); } if (signaler.second) { signaler.second->signal(); } } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm index f27496f5..76193070 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm @@ -660,6 +660,9 @@ void MVKInstance::initProcAddrs() { ADD_DVC_EXT2_ENTRY_POINT(vkGetDeviceGroupSurfacePresentModesKHR, KHR_SWAPCHAIN, KHR_DEVICE_GROUP); ADD_DVC_EXT2_ENTRY_POINT(vkGetPhysicalDevicePresentRectanglesKHR, KHR_SWAPCHAIN, KHR_DEVICE_GROUP); ADD_DVC_EXT2_ENTRY_POINT(vkAcquireNextImage2KHR, KHR_SWAPCHAIN, KHR_DEVICE_GROUP); + ADD_DVC_EXT_ENTRY_POINT(vkGetSemaphoreCounterValueKHR, KHR_TIMELINE_SEMAPHORE); + ADD_DVC_EXT_ENTRY_POINT(vkSignalSemaphoreKHR, KHR_TIMELINE_SEMAPHORE); + ADD_DVC_EXT_ENTRY_POINT(vkWaitSemaphoresKHR, KHR_TIMELINE_SEMAPHORE); ADD_DVC_EXT_ENTRY_POINT(vkDebugMarkerSetObjectTagEXT, EXT_DEBUG_MARKER); ADD_DVC_EXT_ENTRY_POINT(vkDebugMarkerSetObjectNameEXT, EXT_DEBUG_MARKER); ADD_DVC_EXT_ENTRY_POINT(vkCmdDebugMarkerBeginEXT, EXT_DEBUG_MARKER); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h index dde55fb2..5df84015 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h @@ -178,7 +178,7 @@ protected: friend class MVKQueue; MVKQueue* _queue; - MVKSmallVector _waitSemaphores; + MVKSmallVector> _waitSemaphores; bool _trackPerformance; }; @@ -203,7 +203,7 @@ protected: void finish(); virtual void submitCommandBuffers() {} - MVKSmallVector _signalSemaphores; + MVKSmallVector> _signalSemaphores; MVKFence* _fence; id _activeMTLCommandBuffer; }; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm index 595ea60b..9233c9c0 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm @@ -226,7 +226,7 @@ MVKQueueSubmission::MVKQueueSubmission(MVKQueue* queue, _waitSemaphores.reserve(waitSemaphoreCount); for (uint32_t i = 0; i < waitSemaphoreCount; i++) { - _waitSemaphores.push_back((MVKSemaphore*)pWaitSemaphores[i]); + _waitSemaphores.push_back(make_pair((MVKSemaphore*)pWaitSemaphores[i], (uint64_t)0)); } } @@ -241,13 +241,13 @@ void MVKQueueCommandBufferSubmission::execute() { _queue->_submissionCaptureScope->beginScope(); // If using encoded semaphore waiting, do so now. - for (auto* ws : _waitSemaphores) { ws->encodeWait(getActiveMTLCommandBuffer()); } + for (auto& ws : _waitSemaphores) { ws.first->encodeWait(getActiveMTLCommandBuffer(), ws.second); } // Submit each command buffer. submitCommandBuffers(); // If using encoded semaphore signaling, do so now. - for (auto* ss : _signalSemaphores) { ss->encodeSignal(getActiveMTLCommandBuffer()); } + for (auto& ss : _signalSemaphores) { ss.first->encodeSignal(getActiveMTLCommandBuffer(), ss.second); } // Commit the last MTLCommandBuffer. // Nothing after this because callback might destroy this instance before this function ends. @@ -278,7 +278,7 @@ void MVKQueueCommandBufferSubmission::setActiveMTLCommandBuffer(idencodeWait(nil); } + for (auto& ws : _waitSemaphores) { ws.first->encodeWait(nil, ws.second); } MVKDevice* mkvDev = _queue->_device; uint64_t startTime = mkvDev->getPerformanceTimestamp(); @@ -308,7 +308,7 @@ void MVKQueueCommandBufferSubmission::finish() { _queue->_submissionCaptureScope->endScope(); // If using inline semaphore signaling, do so now. - for (auto& ss : _signalSemaphores) { ss->encodeSignal(nil); } + for (auto& ss : _signalSemaphores) { ss.first->encodeSignal(nil, ss.second); } // If a fence exists, signal it. if (_fence) { _fence->signal(); } @@ -325,10 +325,29 @@ MVKQueueCommandBufferSubmission::MVKQueueCommandBufferSubmission(MVKQueue* queue // pSubmit can be null if just tracking the fence alone if (pSubmit) { + VkTimelineSemaphoreSubmitInfo* pTimelineSubmit = nullptr; + for (const auto* next = (const VkBaseInStructure*)pSubmit->pNext; next; next = next->pNext) { + switch (next->sType) { + case VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO: + pTimelineSubmit = (VkTimelineSemaphoreSubmitInfo*)next; + break; + default: + break; + } + } + if (pTimelineSubmit) { + // Presentation doesn't support timeline semaphores, so handle wait values here. + uint32_t wsCnt = pTimelineSubmit->waitSemaphoreValueCount; + for (uint32_t i = 0; i < wsCnt; i++) { + _waitSemaphores[i].second = pTimelineSubmit->pWaitSemaphoreValues[i]; + } + } uint32_t ssCnt = pSubmit->signalSemaphoreCount; _signalSemaphores.reserve(ssCnt); for (uint32_t i = 0; i < ssCnt; i++) { - _signalSemaphores.push_back((MVKSemaphore*)pSubmit->pSignalSemaphores[i]); + auto ss = make_pair((MVKSemaphore*)pSubmit->pSignalSemaphores[i], (uint64_t)0); + if (pTimelineSubmit) { ss.second = pTimelineSubmit->pSignalSemaphoreValues[i]; } + _signalSemaphores.push_back(ss); } } @@ -348,12 +367,12 @@ void MVKQueuePresentSurfaceSubmission::execute() { // If the semaphores are not encodable, wait on them inline after presenting. // The semaphores know what to do. id mtlCmdBuff = getMTLCommandBuffer(); - for (auto& ws : _waitSemaphores) { ws->encodeWait(mtlCmdBuff); } + for (auto& ws : _waitSemaphores) { ws.first->encodeWait(mtlCmdBuff, 0); } for (int i = 0; i < _presentInfo.size(); i++ ) { MVKPresentableSwapchainImage *img = _presentInfo[i].presentableImage; img->presentCAMetalDrawable(mtlCmdBuff, _presentInfo[i]); } - for (auto& ws : _waitSemaphores) { ws->encodeWait(nil); } + for (auto& ws : _waitSemaphores) { ws.first->encodeWait(nil, 0); } [mtlCmdBuff commit]; // Let Xcode know the current frame is done, then start a new frame diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSync.h b/MoltenVK/MoltenVK/GPUObjects/MVKSync.h index 917255c3..8dd79451 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKSync.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKSync.h @@ -120,6 +120,9 @@ public: /** Returns the debug report object type of this object. */ VkDebugReportObjectTypeEXT getVkDebugReportObjectType() override { return VK_DEBUG_REPORT_OBJECT_TYPE_SEMAPHORE_EXT; } + /** Returns the type of this semaphore. */ + virtual VkSemaphoreType getSemaphoreType() { return VK_SEMAPHORE_TYPE_BINARY; } + /** * Wait for this semaphore to be signaled. * @@ -132,8 +135,11 @@ public: * code path, once with a mtlCmdBuff to support encoding a wait on the command buffer if the * subclass supports command encoding, and once without a mtlCmdBuff, at the point in the * code path where the code should block if the subclass does not support command encoding. + * + * 'value' only applies if this semaphore is a timeline semaphore. It is the value to wait for the + * semaphore to have. */ - virtual void encodeWait(id mtlCmdBuff) = 0; + virtual void encodeWait(id mtlCmdBuff, uint64_t value) = 0; /** * Signals this semaphore. @@ -147,8 +153,11 @@ public: * code path, once with a mtlCmdBuff to support encoding a wait on the command buffer if the * subclass supports command encoding, and once without a mtlCmdBuff, at the point in the * code path where the code should block if the subclass does not support command encoding. + * + * 'value' only applies if this semaphore is a timeline semaphore. It is the value to assign the semaphore + * upon completion. */ - virtual void encodeSignal(id mtlCmdBuff) = 0; + virtual void encodeSignal(id mtlCmdBuff, uint64_t value) = 0; /** Returns whether this semaphore uses command encoding. */ virtual bool isUsingCommandEncoding() = 0; @@ -171,8 +180,8 @@ protected: class MVKSemaphoreMTLFence : public MVKSemaphore { public: - void encodeWait(id mtlCmdBuff) override; - void encodeSignal(id mtlCmdBuff) override; + void encodeWait(id mtlCmdBuff, uint64_t) override; + void encodeSignal(id mtlCmdBuff, uint64_t) override; bool isUsingCommandEncoding() override { return true; } MVKSemaphoreMTLFence(MVKDevice* device, const VkSemaphoreCreateInfo* pCreateInfo); @@ -191,8 +200,8 @@ protected: class MVKSemaphoreMTLEvent : public MVKSemaphore { public: - void encodeWait(id mtlCmdBuff) override; - void encodeSignal(id mtlCmdBuff) override; + void encodeWait(id mtlCmdBuff, uint64_t) override; + void encodeSignal(id mtlCmdBuff, uint64_t) override; bool isUsingCommandEncoding() override { return true; } MVKSemaphoreMTLEvent(MVKDevice* device, const VkSemaphoreCreateInfo* pCreateInfo); @@ -212,8 +221,8 @@ protected: class MVKSemaphoreEmulated : public MVKSemaphore { public: - void encodeWait(id mtlCmdBuff) override; - void encodeSignal(id mtlCmdBuff) override; + void encodeWait(id mtlCmdBuff, uint64_t) override; + void encodeSignal(id mtlCmdBuff, uint64_t) override; bool isUsingCommandEncoding() override { return false; } MVKSemaphoreEmulated(MVKDevice* device, const VkSemaphoreCreateInfo* pCreateInfo); @@ -223,6 +232,91 @@ protected: }; +#pragma mark - +#pragma mark MVKTimelineSemaphore + +/** Abstract class that represents a Vulkan timeline semaphore. */ +class MVKTimelineSemaphore : public MVKSemaphore { + +public: + + VkSemaphoreType getSemaphoreType() override { return VK_SEMAPHORE_TYPE_TIMELINE; } + + /** Returns the current value of the semaphore counter. */ + virtual uint64_t getCounterValue() = 0; + + /** Signals this semaphore on the host. */ + virtual void signal(const VkSemaphoreSignalInfo* pSignalInfo) = 0; + + /** Registers a wait for this semaphore on the host. Returns true if the semaphore is already signaled. */ + virtual bool registerWait(MVKFenceSitter* sitter, const VkSemaphoreWaitInfo* pWaitInfo, uint32_t index) = 0; + + /** Stops waiting for this semaphore. */ + virtual void unregisterWait(MVKFenceSitter* sitter) = 0; + +#pragma mark Construction + + MVKTimelineSemaphore(MVKDevice* device, const VkSemaphoreCreateInfo* pCreateInfo, const VkSemaphoreTypeCreateInfo* pTypeCreateInfo) + : MVKSemaphore(device, pCreateInfo) {} + +}; + + +#pragma mark - +#pragma mark MVKTimelineSemaphoreMTLEvent + +/** An MVKTimelineSemaphore that uses MTLSharedEvent to provide synchronization. */ +class MVKTimelineSemaphoreMTLEvent : public MVKTimelineSemaphore { + +public: + void encodeWait(id mtlCmdBuff, uint64_t value) override; + void encodeSignal(id mtlCmdBuff, uint64_t value) override; + bool isUsingCommandEncoding() override { return true; } + + uint64_t getCounterValue() override { return _mtlEvent.signaledValue; } + void signal(const VkSemaphoreSignalInfo* pSignalInfo) override; + bool registerWait(MVKFenceSitter* sitter, const VkSemaphoreWaitInfo* pWaitInfo, uint32_t index) override; + void unregisterWait(MVKFenceSitter* sitter) override; + + MVKTimelineSemaphoreMTLEvent(MVKDevice* device, const VkSemaphoreCreateInfo* pCreateInfo, const VkSemaphoreTypeCreateInfo* pTypeCreateInfo); + + ~MVKTimelineSemaphoreMTLEvent() override; + +protected: + id _mtlEvent; + std::mutex _lock; + std::unordered_set _sitters; +}; + + +#pragma mark - +#pragma mark MVKTimelineSemaphoreEmulated + +/** An MVKTimelineSemaphore that uses CPU synchronization to provide synchronization functionality. */ +class MVKTimelineSemaphoreEmulated : public MVKTimelineSemaphore { + +public: + void encodeWait(id mtlCmdBuff, uint64_t value) override; + void encodeSignal(id mtlCmdBuff, uint64_t value) override; + bool isUsingCommandEncoding() override { return false; } + + uint64_t getCounterValue() override { return _value; } + void signal(const VkSemaphoreSignalInfo* pSignalInfo) override; + bool registerWait(MVKFenceSitter* sitter, const VkSemaphoreWaitInfo* pWaitInfo, uint32_t index) override; + void unregisterWait(MVKFenceSitter* sitter) override; + + MVKTimelineSemaphoreEmulated(MVKDevice* device, const VkSemaphoreCreateInfo* pCreateInfo, const VkSemaphoreTypeCreateInfo* pTypeCreateInfo); + +protected: + void signalImpl(uint64_t value); + + std::atomic _value; + std::mutex _lock; + std::condition_variable _blocker; + std::unordered_map> _sitters; +}; + + #pragma mark - #pragma mark MVKFence @@ -240,11 +334,10 @@ public: /** * If this fence has not been signaled yet, adds the specified fence sitter to the * internal list of fence sitters that will be notified when this fence is signaled, - * and then calls addUnsignaledFence() on the fence sitter so it is aware that it - * will be signaled. + * and then calls await() on the fence sitter so it is aware that it will be signaled. * * Does nothing if this fence has already been signaled, and does not call - * addUnsignaledFence() on the fence sitter. + * await() on the fence sitter. * * Each fence sitter should only listen once for each fence. Adding the same fence sitter * more than once in between each fence reset and signal results in undefined behaviour. @@ -282,7 +375,7 @@ protected: #pragma mark - #pragma mark MVKFenceSitter -/** An object that responds to signals from MVKFences. */ +/** An object that responds to signals from MVKFences and MVKTimelineSemaphores. */ class MVKFenceSitter : public MVKBaseObject { public: @@ -309,11 +402,16 @@ public: private: friend class MVKFence; + friend class MVKTimelineSemaphoreMTLEvent; + friend class MVKTimelineSemaphoreEmulated; - void awaitFence(MVKFence* fence) { _blocker.reserve(); } - void fenceSignaled(MVKFence* fence) { _blocker.release(); } + MTLSharedEventListener* getMTLSharedEventListener(); + + void await() { _blocker.reserve(); } + void signaled() { _blocker.release(); } MVKSemaphoreImpl _blocker; + MTLSharedEventListener* _listener = nil; }; @@ -411,6 +509,15 @@ VkResult mvkWaitForFences(MVKDevice* device, VkBool32 waitAll, uint64_t timeout = UINT64_MAX); +/** + * Blocks the current thread until any or all of the specified + * semaphores have been signaled at the specified values, or the + * specified timeout occurs. + */ +VkResult mvkWaitSemaphores(MVKDevice* device, + const VkSemaphoreWaitInfo* pWaitInfo, + uint64_t timeout = UINT64_MAX); + #pragma mark - #pragma mark MVKMetalCompiler diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSync.mm b/MoltenVK/MoltenVK/GPUObjects/MVKSync.mm index c63a16e5..e5ea2c56 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKSync.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKSync.mm @@ -82,7 +82,7 @@ MVKSemaphoreImpl::~MVKSemaphoreImpl() { // Could use any encoder. Assume BLIT is fastest and lightest. // Nil mtlCmdBuff will do nothing. -void MVKSemaphoreMTLFence::encodeWait(id mtlCmdBuff) { +void MVKSemaphoreMTLFence::encodeWait(id mtlCmdBuff, uint64_t) { id mtlCmdEnc = mtlCmdBuff.blitCommandEncoder; [mtlCmdEnc waitForFence: _mtlFence]; [mtlCmdEnc endEncoding]; @@ -90,7 +90,7 @@ void MVKSemaphoreMTLFence::encodeWait(id mtlCmdBuff) { // Could use any encoder. Assume BLIT is fastest and lightest. // Nil mtlCmdBuff will do nothing. -void MVKSemaphoreMTLFence::encodeSignal(id mtlCmdBuff) { +void MVKSemaphoreMTLFence::encodeSignal(id mtlCmdBuff, uint64_t) { id mtlCmdEnc = mtlCmdBuff.blitCommandEncoder; [mtlCmdEnc updateFence: _mtlFence]; [mtlCmdEnc endEncoding]; @@ -109,12 +109,12 @@ MVKSemaphoreMTLFence::~MVKSemaphoreMTLFence() { #pragma mark MVKSemaphoreMTLEvent // Nil mtlCmdBuff will do nothing. -void MVKSemaphoreMTLEvent::encodeWait(id mtlCmdBuff) { +void MVKSemaphoreMTLEvent::encodeWait(id mtlCmdBuff, uint64_t) { [mtlCmdBuff encodeWaitForEvent: _mtlEvent value: _mtlEventValue++]; } // Nil mtlCmdBuff will do nothing. -void MVKSemaphoreMTLEvent::encodeSignal(id mtlCmdBuff) { +void MVKSemaphoreMTLEvent::encodeSignal(id mtlCmdBuff, uint64_t) { [mtlCmdBuff encodeSignalEvent: _mtlEvent value: _mtlEventValue]; } @@ -131,11 +131,11 @@ MVKSemaphoreMTLEvent::~MVKSemaphoreMTLEvent() { #pragma mark - #pragma mark MVKSemaphoreEmulated -void MVKSemaphoreEmulated::encodeWait(id mtlCmdBuff) { +void MVKSemaphoreEmulated::encodeWait(id mtlCmdBuff, uint64_t) { if ( !mtlCmdBuff ) { _blocker.wait(UINT64_MAX, true); } } -void MVKSemaphoreEmulated::encodeSignal(id mtlCmdBuff) { +void MVKSemaphoreEmulated::encodeSignal(id mtlCmdBuff, uint64_t) { if ( !mtlCmdBuff ) { _blocker.release(); } } @@ -144,6 +144,116 @@ MVKSemaphoreEmulated::MVKSemaphoreEmulated(MVKDevice* device, const VkSemaphoreC _blocker(false, 1) {} +#pragma mark - +#pragma mark MVKTimelineSemaphoreMTLEvent + +// Nil mtlCmdBuff will do nothing. +void MVKTimelineSemaphoreMTLEvent::encodeWait(id mtlCmdBuff, uint64_t value) { + [mtlCmdBuff encodeWaitForEvent: _mtlEvent value: value]; +} + +// Nil mtlCmdBuff will do nothing. +void MVKTimelineSemaphoreMTLEvent::encodeSignal(id mtlCmdBuff, uint64_t value) { + [mtlCmdBuff encodeSignalEvent: _mtlEvent value: value]; +} + +void MVKTimelineSemaphoreMTLEvent::signal(const VkSemaphoreSignalInfo* pSignalInfo) { + _mtlEvent.signaledValue = pSignalInfo->value; +} + +bool MVKTimelineSemaphoreMTLEvent::registerWait(MVKFenceSitter* sitter, const VkSemaphoreWaitInfo* pWaitInfo, uint32_t index) { + if (_mtlEvent.signaledValue >= pWaitInfo->pValues[index]) { return true; } + lock_guard lock(_lock); + sitter->await(); + auto addRslt = _sitters.insert(sitter); + if (addRslt.second) { + retain(); + [_mtlEvent notifyListener: sitter->getMTLSharedEventListener() + atValue: pWaitInfo->pValues[index] + block: ^(id, uint64_t) { + lock_guard blockLock(_lock); + if (_sitters.count(sitter)) { sitter->signaled(); } + release(); + }]; + } + return false; +} + +void MVKTimelineSemaphoreMTLEvent::unregisterWait(MVKFenceSitter* sitter) { + lock_guard lock(_lock); + _sitters.erase(sitter); +} + +MVKTimelineSemaphoreMTLEvent::MVKTimelineSemaphoreMTLEvent(MVKDevice* device, const VkSemaphoreCreateInfo* pCreateInfo, const VkSemaphoreTypeCreateInfo* pTypeCreateInfo) : + MVKTimelineSemaphore(device, pCreateInfo, pTypeCreateInfo), + _mtlEvent([device->getMTLDevice() newSharedEvent]) { //retained + + _mtlEvent.signaledValue = pTypeCreateInfo->initialValue; +} + +MVKTimelineSemaphoreMTLEvent::~MVKTimelineSemaphoreMTLEvent() { + [_mtlEvent release]; +} + + +#pragma mark - +#pragma mark MVKSemaphoreEmulated + +void MVKTimelineSemaphoreEmulated::encodeWait(id mtlCmdBuff, uint64_t value) { + unique_lock lock(_lock); + if ( !mtlCmdBuff ) { _blocker.wait(lock, [=]() { return _value >= value; }); } +} + +void MVKTimelineSemaphoreEmulated::encodeSignal(id mtlCmdBuff, uint64_t value) { + lock_guard lock(_lock); + if ( !mtlCmdBuff ) { signalImpl(value); } +} + +void MVKTimelineSemaphoreEmulated::signal(const VkSemaphoreSignalInfo* pSignalInfo) { + lock_guard lock(_lock); + signalImpl(pSignalInfo->value); +} + +void MVKTimelineSemaphoreEmulated::signalImpl(uint64_t value) { + if (value > _value) { + _value = value; + _blocker.notify_all(); + for (auto& sittersForValue : _sitters) { + if (sittersForValue.first < value) { continue; } + for (auto* sitter : sittersForValue.second) { + sitter->signaled(); + } + } + } +} + +bool MVKTimelineSemaphoreEmulated::registerWait(MVKFenceSitter* sitter, const VkSemaphoreWaitInfo* pWaitInfo, uint32_t index) { + lock_guard lock(_lock); + if (pWaitInfo->pValues[index] >= _value) { return true; } + uint64_t value = pWaitInfo->pValues[index]; + if (!_sitters.count(value)) { _sitters.emplace(make_pair(value, unordered_set())); } + auto addRslt = _sitters[value].insert(sitter); + if (addRslt.second) { sitter->await(); } + return false; +} + +void MVKTimelineSemaphoreEmulated::unregisterWait(MVKFenceSitter* sitter) { + MVKSmallVector emptySets; + for (auto& sittersForValue : _sitters) { + sittersForValue.second.erase(sitter); + // Can't destroy while iterating... + if (sittersForValue.second.empty()) { + emptySets.push_back(sittersForValue.first); + } + } + for (auto value : emptySets) { _sitters.erase(value); } +} + +MVKTimelineSemaphoreEmulated::MVKTimelineSemaphoreEmulated(MVKDevice* device, const VkSemaphoreCreateInfo* pCreateInfo, const VkSemaphoreTypeCreateInfo* pTypeCreateInfo) : + MVKTimelineSemaphore(device, pCreateInfo, pTypeCreateInfo), + _value(pTypeCreateInfo->initialValue) {} + + #pragma mark - #pragma mark MVKFence @@ -156,7 +266,7 @@ void MVKFence::addSitter(MVKFenceSitter* fenceSitter) { // Ensure each fence only added once to each fence sitter auto addRslt = _fenceSitters.insert(fenceSitter); // pair with second element true if was added - if (addRslt.second) { fenceSitter->awaitFence(this); } + if (addRslt.second) { fenceSitter->await(); } } void MVKFence::removeSitter(MVKFenceSitter* fenceSitter) { @@ -173,7 +283,7 @@ void MVKFence::signal() { // Notify all the fence sitters, and clear them from this instance. for (auto& fs : _fenceSitters) { - fs->fenceSignaled(this); + fs->signaled(); } _fenceSitters.clear(); } @@ -192,6 +302,16 @@ bool MVKFence::getIsSignaled() { } +#pragma mark - +#pragma mark MVKFenceSitter + +MTLSharedEventListener* MVKFenceSitter::getMTLSharedEventListener() { + // TODO: Use dispatch queue from device? + if (!_listener) { _listener = [MTLSharedEventListener new]; } + return _listener; +} + + #pragma mark - #pragma mark MVKEventNative @@ -295,6 +415,33 @@ VkResult mvkWaitForFences(MVKDevice* device, return rslt; } +// Create a blocking fence sitter, add it to each semaphore, wait, then remove it. +VkResult mvkWaitSemaphores(MVKDevice* device, + const VkSemaphoreWaitInfo* pWaitInfo, + uint64_t timeout) { + + VkResult rslt = VK_SUCCESS; + bool waitAny = mvkIsAnyFlagEnabled(pWaitInfo->flags, VK_SEMAPHORE_WAIT_ANY_BIT); + bool alreadySignaled = false; + MVKFenceSitter fenceSitter(!waitAny); + + for (uint32_t i = 0; i < pWaitInfo->semaphoreCount; i++) { + if (((MVKTimelineSemaphore*)pWaitInfo->pSemaphores[i])->registerWait(&fenceSitter, pWaitInfo, i) && waitAny) { + // In this case, we don't need to wait. + alreadySignaled = true; + break; + } + } + + if ( !alreadySignaled && !fenceSitter.wait(timeout) ) { rslt = VK_TIMEOUT; } + + for (uint32_t i = 0; i < pWaitInfo->semaphoreCount; i++) { + ((MVKTimelineSemaphore*)pWaitInfo->pSemaphores[i])->unregisterWait(&fenceSitter); + } + + return rslt; +} + #pragma mark - #pragma mark MVKMetalCompiler diff --git a/MoltenVK/MoltenVK/Layers/MVKExtensions.def b/MoltenVK/MoltenVK/Layers/MVKExtensions.def index c40ce89b..0364cd96 100644 --- a/MoltenVK/MoltenVK/Layers/MVKExtensions.def +++ b/MoltenVK/MoltenVK/Layers/MVKExtensions.def @@ -75,6 +75,7 @@ MVK_EXTENSION(KHR_storage_buffer_storage_class, KHR_STORAGE_BUFFER_STORAGE_CLASS MVK_EXTENSION(KHR_surface, KHR_SURFACE, INSTANCE) MVK_EXTENSION(KHR_swapchain, KHR_SWAPCHAIN, DEVICE) MVK_EXTENSION(KHR_swapchain_mutable_format, KHR_SWAPCHAIN_MUTABLE_FORMAT, DEVICE) +MVK_EXTENSION(KHR_timeline_semaphore, KHR_TIMELINE_SEMAPHORE, DEVICE) MVK_EXTENSION(KHR_uniform_buffer_standard_layout, KHR_UNIFORM_BUFFER_STANDARD_LAYOUT, DEVICE) MVK_EXTENSION(KHR_variable_pointers, KHR_VARIABLE_POINTERS, DEVICE) MVK_EXTENSION(EXT_debug_marker, EXT_DEBUG_MARKER, DEVICE) diff --git a/MoltenVK/MoltenVK/Vulkan/vulkan.mm b/MoltenVK/MoltenVK/Vulkan/vulkan.mm index 1ad42bfa..208c5664 100644 --- a/MoltenVK/MoltenVK/Vulkan/vulkan.mm +++ b/MoltenVK/MoltenVK/Vulkan/vulkan.mm @@ -2663,6 +2663,45 @@ MVK_PUBLIC_SYMBOL VkResult vkGetPhysicalDeviceSurfaceFormats2KHR( } +#pragma mark - +#pragma mark VK_KHR_timeline_semaphore + +MVK_PUBLIC_SYMBOL VkResult vkGetSemaphoreCounterValueKHR( + VkDevice device, + VkSemaphore semaphore, + uint64_t* pValue) { + + MVKTraceVulkanCallStart(); + auto* mvkSem4 = (MVKTimelineSemaphore*)semaphore; + *pValue = mvkSem4->getCounterValue(); + MVKTraceVulkanCallEnd(); + return VK_SUCCESS; +} + +MVK_PUBLIC_SYMBOL VkResult vkSignalSemaphoreKHR( + VkDevice device, + const VkSemaphoreSignalInfoKHR* pSignalInfo) { + + MVKTraceVulkanCallStart(); + auto* mvkSem4 = (MVKTimelineSemaphore*)pSignalInfo->semaphore; + mvkSem4->signal(pSignalInfo); + MVKTraceVulkanCallEnd(); + return VK_SUCCESS; +} + +MVK_PUBLIC_SYMBOL VkResult vkWaitSemaphoresKHR( + VkDevice device, + const VkSemaphoreWaitInfoKHR* pWaitInfo, + uint64_t timeout) { + + MVKTraceVulkanCallStart(); + MVKDevice* mvkDev = MVKDevice::getMVKDevice(device); + VkResult rslt = mvkWaitSemaphores(mvkDev, pWaitInfo, timeout); + MVKTraceVulkanCallEnd(); + return rslt; +} + + #pragma mark - #pragma mark VK_EXT_debug_report extension