Support optional use of MTLFence for Vulkan semaphores via the

MVK_ALLOW_METAL_FENCES environment variable. Refactor MVKSemaphore class into separate MVKSemaphoreMTLFence, MVKSemaphoreMTLEvent, and MVKSemaphoreEmulated subclasses. Add MVK_ALLOW_METAL_FENCES environment variable to optionally enable using MTLFence for Vulkan semaphores. Add MVKPhysicalDeviceMetalFeatures::fences to track MTLFence availability. Update VK_MVK_MOLTENVK_SPEC_VERSION to version 22.
2019-08-31 22:37:33 -04:00 · 2019-08-31 22:37:33 -04:00 · fd5f0ada2c
commit fd5f0ada2c
parent 99743c8f84
8 changed files with 175 additions and 57 deletions
--- a/Docs/Whats_New.md
+++ b/Docs/Whats_New.md
@ -33,6 +33,7 @@ Released TBD
 - `vkCmdClearImage():` Set error if attempt made to clear 1D image, and fix validation of depth attachment formats.
 - `vkCreateRenderPass():` Return `VK_ERROR_FORMAT_NOT_SUPPORTED` if format not supported.
 - `vkCmdFillBuffer():` Improve performance 150x by using parallelism more effectively.
+- Support optional use of `MTLFence` for Vulkan semaphores via the `MVK_ALLOW_METAL_FENCES` environment variable.
 - Remove error logging on `VK_TIMEOUT` of `VkSemaphore` and `VkFence`.
 - Consolidate the various linkable objects into a `MVKLinkableMixin` template base class.
 - Use `MVKVector` whenever possible in MoltenVK, especially within render loop.
@ -43,6 +44,7 @@ Released TBD
  `MVKConfiguration::presentWithCommandBuffer` is now obsolete.
 - Don't use `MTLCommandBuffer push/popDebugGroup` if not available.
 - Add ability to automatically cause an *Xcode* GPU capture without developer intervention.
+- Update `VK_MVK_MOLTENVK_SPEC_VERSION` to version 22.



--- a/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
+++ b/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
@ -55,7 +55,7 @@ typedef unsigned long MTLLanguageVersion;
 #define MVK_MAKE_VERSION(major, minor, patch)    (((major) * 10000) + ((minor) * 100) + (patch))
 #define MVK_VERSION     MVK_MAKE_VERSION(MVK_VERSION_MAJOR, MVK_VERSION_MINOR, MVK_VERSION_PATCH)

-#define VK_MVK_MOLTENVK_SPEC_VERSION            21
+#define VK_MVK_MOLTENVK_SPEC_VERSION            22
 #define VK_MVK_MOLTENVK_EXTENSION_NAME          "VK_MVK_moltenvk"

 /**
@ -114,9 +114,11 @@ typedef unsigned long MTLLanguageVersion;
 * 3. Setting the MVK_CONFIG_FORCE_LOW_POWER_GPU runtime environment variable or MoltenVK compile-time
 *    build setting to 1 will force MoltenVK to use a low-power GPU, if one is availble on the device.
 *
- * 4. Setting the MVK_ALLOW_METAL_EVENTS runtime environment variable or MoltenVK compile-time build
- *    setting to 1 will cause MoltenVK to use Metal events, if they are available on the device, for
- *    for VkSemaphore sychronization behaviour. This is disabled by default.
+ * 4. Setting the MVK_ALLOW_METAL_FENCES or MVK_ALLOW_METAL_EVENTS runtime environment variable
+ *    or MoltenVK compile-time build setting to 1 will cause MoltenVK to use MTLFence or MTLEvent
+ *    if they are available on the device, for VkSemaphore sychronization behaviour.
+ *    If both variables are set, MVK_ALLOW_METAL_FENCES takes priority over MVK_ALLOW_METAL_EVENTS.
+ *    Both options are disabled by default.
 *
 * 5. The MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE runtime environment variable or MoltenVK compile-time
 *    build setting controls whether Xcode should run an automatic GPU capture without the user
@ -206,7 +208,7 @@ typedef struct {
 	 * buffers (VK_COMMAND_BUFFER_LEVEL_SECONDARY), or for primary command buffers that are intended
 	 * to be submitted to multiple queues concurrently (VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT).
 	 *
-	 * When enabling this features, be aware that one Metal command buffer is required for each Vulkan
+	 * When enabling this feature, be aware that one Metal command buffer is required for each Vulkan
 	 * command buffer. Depending on the number of command buffers that you use, you may also need to
 	 * change the value of the maxActiveMetalCommandBuffersPerQueue setting.
 	 *
@ -538,12 +540,13 @@ typedef struct {
 	VkBool32 arrayOfSamplers;			 	  	/**< If true, arrays of texture samplers is supported. */
 	MTLLanguageVersion mslVersionEnum;			/**< The version of the Metal Shading Language available on this device, as a Metal enumeration. */
 	VkBool32 depthSampleCompare;				/**< If true, depth texture samplers support the comparison of the pixel value against a reference value. */
-	VkBool32 events;							/**< If true, Metal synchronization events are supported. */
+	VkBool32 events;							/**< If true, Metal synchronization events (MTLEvent) are supported. */
 	VkBool32 memoryBarriers;					/**< If true, full memory barriers within Metal render passes are supported. */
 	VkBool32 multisampleLayeredRendering;       /**< If true, layered rendering to multiple multi-sampled cube or texture array layers is supported. */
 	VkBool32 stencilFeedback;					/**< If true, fragment shaders that write to [[stencil]] outputs are supported. */
 	VkBool32 textureBuffers;					/**< If true, textures of type MTLTextureTypeBuffer are supported. */
 	VkBool32 postDepthCoverage;					/**< If true, coverage masks in fragment shaders post-depth-test are supported. */
+	VkBool32 fences;							/**< If true, Metal synchronization fences (MTLFence) are supported. */
 } MVKPhysicalDeviceMetalFeatures;

 /**
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
@ -659,10 +659,15 @@ public:
    /** Performance statistics. */
    MVKPerformanceStatistics _performanceStatistics;

-	// Indicates whether semaphores should use MTLEvents if available.
+	// Indicates whether semaphores should use a MTLFence if available.
+	// Set by the MVK_ALLOW_METAL_FENCES environment variable if MTLFences are available.
+	// This should be a temporary fix after some repair to semaphore handling.
+	bool _useMTLFenceForSemaphores;
+
+	// Indicates whether semaphores should use a MTLEvent if available.
 	// Set by the MVK_ALLOW_METAL_EVENTS environment variable if MTLEvents are available.
 	// This should be a temporary fix after some repair to semaphore handling.
-	bool _useMTLEventsForSemaphores;
+	bool _useMTLEventForSemaphores;


 #pragma mark Construction
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
@ -609,6 +609,9 @@ VkResult MVKPhysicalDevice::getPresentRectangles(MVKSurface* surface,
 // In order to allow a Metal command buffer to be prefilled before it is formally submitted to
 // a Vulkan queue, we need to enforce that each Vulkan queue family can have only one Metal queue.
 // In order to provide parallel queue operations, we therefore provide multiple queue families.
+// In addition, Metal queues are always general purpose, so the default behaviour is for all
+// queue families to support graphics + compute + transfer, unless the app indicates it
+// requires queue family specialization.
 MVKVector<MVKQueueFamily*>& MVKPhysicalDevice::getQueueFamilies() {
 	if (_queueFamilies.empty()) {
 		VkQueueFamilyProperties qfProps;
@ -773,6 +776,7 @@ void MVKPhysicalDevice::initMetalFeatures() {
 		_metalFeatures.mslVersionEnum = MTLLanguageVersion1_2;
        _metalFeatures.shaderSpecialization = true;
        _metalFeatures.stencilViews = true;
+		_metalFeatures.fences = true;
    }

    if ( [_mtlDevice supportsFeatureSet: MTLFeatureSet_iOS_GPUFamily1_v4] ) {
@ -839,6 +843,7 @@ void MVKPhysicalDevice::initMetalFeatures() {
 		_metalFeatures.arrayOfTextures = true;
 		_metalFeatures.arrayOfSamplers = true;
 		_metalFeatures.presentModeImmediate = true;
+		_metalFeatures.fences = true;
    }

    if ( [_mtlDevice supportsFeatureSet: MTLFeatureSet_macOS_GPUFamily1_v4] ) {
@ -1906,7 +1911,13 @@ void MVKDevice::destroyFence(MVKFence* mvkFence,

 MVKSemaphore* MVKDevice::createSemaphore(const VkSemaphoreCreateInfo* pCreateInfo,
 										 const VkAllocationCallbacks* pAllocator) {
-	return new MVKSemaphore(this, pCreateInfo);
+	if (_useMTLFenceForSemaphores) {
+		return new MVKSemaphoreMTLFence(this, pCreateInfo);
+	} else if (_useMTLEventForSemaphores) {
+		return new MVKSemaphoreMTLEvent(this, pCreateInfo);
+	} else {
+		return new MVKSemaphoreEmulated(this, pCreateInfo);
+	}
 }

 void MVKDevice::destroySemaphore(MVKSemaphore* mvkSem4,
@ -2355,11 +2366,15 @@ void MVKDevice::initPhysicalDevice(MVKPhysicalDevice* physicalDevice, const VkDe
 	_pProperties = &_physicalDevice->_properties;
 	_pMemoryProperties = &_physicalDevice->_memoryProperties;

-	_useMTLEventsForSemaphores = MVK_ALLOW_METAL_EVENTS;
-	if (_pMetalFeatures->events) {
-		MVK_SET_FROM_ENV_OR_BUILD_BOOL(_useMTLEventsForSemaphores, MVK_ALLOW_METAL_EVENTS);
+	_useMTLFenceForSemaphores = false;
+	if (_pMetalFeatures->fences) {
+		MVK_SET_FROM_ENV_OR_BUILD_BOOL(_useMTLFenceForSemaphores, MVK_ALLOW_METAL_FENCES);
 	}
-	MVKLogInfo("%s MTLEvent for semaphores.", _useMTLEventsForSemaphores ? "Using" : "NOT using");
+	_useMTLEventForSemaphores = false;
+	if (_pMetalFeatures->events) {
+		MVK_SET_FROM_ENV_OR_BUILD_BOOL(_useMTLEventForSemaphores, MVK_ALLOW_METAL_EVENTS);
+	}
+	MVKLogInfo("Using %s for semaphores.", _useMTLFenceForSemaphores ? "MTLFence" : (_useMTLEventForSemaphores ? "MTLEvent" : "emulation"));

 #if MVK_MACOS
 	// If we have selected a high-power GPU and want to force the window system
--- a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm
@ -169,7 +169,7 @@ void MVKSwapchain::signalWhenAvailable(uint32_t imageIndex, MVKSemaphore* semaph
 		// impose unacceptable performance costs to handle this particular case.
 		@autoreleasepool {
 			MVKSemaphore* mvkSem = signaler.first;
-			id<MTLCommandBuffer> mtlCmdBuff = (mvkSem && mvkSem->isUsingMTLEvent()
+			id<MTLCommandBuffer> mtlCmdBuff = (mvkSem && mvkSem->isUsingCommandEncoding()
 											   ? [_device->getQueue()->getMTLCommandQueue() commandBufferWithUnretainedReferences]
 											   : nil);
 			signal(signaler, mtlCmdBuff);
--- a/MoltenVK/MoltenVK/GPUObjects/MVKSync.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKSync.h
@ -109,7 +109,7 @@ private:
 #pragma mark -
 #pragma mark MVKSemaphore

-/** Represents a Vulkan semaphore. */
+/** Abstract class that represents a Vulkan semaphore. */
 class MVKSemaphore : public MVKVulkanAPIDeviceObject {

 public:
@ -123,53 +123,106 @@ public:
 	/**
 	 * Wait for this semaphore to be signaled.
 	 *
-	 * If this instance is using MTLEvent AND the mtlCmdBuff is not nil, a MTLEvent wait
-	 * is encoded on the mtlCmdBuff, and this call returns immediately. Otherwise, if this
-	 * instance is NOT using MTLEvent, AND the mtlCmdBuff is nil, this call blocks
+	 * If the subclass uses command encoding AND the mtlCmdBuff is not nil, a wait
+	 * is encoded on the mtlCmdBuff, and this call returns immediately. Otherwise, if the
+	 * subclass does NOT use command encoding, AND the mtlCmdBuff is nil, this call blocks
 	 * indefinitely until this semaphore is signaled. Other combinations do nothing.
 	 *
-	 * This design allows this function to be blindly called twice, from different points in
-	 * the code path, once with a mtlCmdBuff to support encoding a wait on the command buffer
-	 * if this instance supports MTLEvents, and once without a mtlCmdBuff, at the point in
-	 * the code path where the code should block if this instance does not support MTLEvents.
+	 * This design allows this function to be blindly called twice, from different points in the
+	 * code path, once with a mtlCmdBuff to support encoding a wait on the command buffer if the
+	 * subclass supports command encoding, and once without a mtlCmdBuff, at the point in the
+	 * code path where the code should block if the subclass does not support command encoding.
 	 */
-	void encodeWait(id<MTLCommandBuffer> mtlCmdBuff);
+	virtual void encodeWait(id<MTLCommandBuffer> mtlCmdBuff) = 0;

 	/**
 	 * Signals this semaphore.
 	 *
-	 * If this instance is using MTLEvent AND the mtlCmdBuff is not nil, a MTLEvent signal
-	 * is encoded on the mtlCmdBuff. Otherwise, if this instance is NOT using MTLEvent,
+	 * If the subclass uses command encoding AND the mtlCmdBuff is not nil, a signal is
+	 * encoded on the mtlCmdBuff. Otherwise, if the subclass does NOT use command encoding,
 	 * AND the mtlCmdBuff is nil, this call immediately signals any waiting calls.
 	 * Either way, this call returns immediately. Other combinations do nothing.
 	 *
-	 * This design allows this function to be blindly called twice, from different points in
-	 * the code path, once with a mtlCmdBuff to support encoding a signal on the command buffer
-	 * if this instance supports MTLEvents, and once without a mtlCmdBuff, at the point in
-	 * the code path where the code should immediately signal any existing waits, if this
-	 * instance does not support MTLEvents.
+	 * This design allows this function to be blindly called twice, from different points in the
+	 * code path, once with a mtlCmdBuff to support encoding a wait on the command buffer if the
+	 * subclass supports command encoding, and once without a mtlCmdBuff, at the point in the
+	 * code path where the code should block if the subclass does not support command encoding.
 	 */
-	void encodeSignal(id<MTLCommandBuffer> mtlCmdBuff);
+	virtual void encodeSignal(id<MTLCommandBuffer> mtlCmdBuff) = 0;

-	/** Returns whether this semaphore is using a MTLEvent. */
-	bool isUsingMTLEvent() { return _mtlEvent != nil; }
+	/** Returns whether this semaphore uses command encoding. */
+	virtual bool isUsingCommandEncoding() = 0;


 #pragma mark Construction

-    MVKSemaphore(MVKDevice* device, const VkSemaphoreCreateInfo* pCreateInfo);
-
-    ~MVKSemaphore() override;
+    MVKSemaphore(MVKDevice* device, const VkSemaphoreCreateInfo* pCreateInfo) : MVKVulkanAPIDeviceObject(device) {}

 protected:
 	void propogateDebugName() override {}

-	MVKSemaphoreImpl _blocker;
+};
+
+
+#pragma mark -
+#pragma mark MVKSemaphoreMTLFence
+
+/** An MVKSemaphore that uses MTLFence to provide synchronization. */
+class MVKSemaphoreMTLFence : public MVKSemaphore {
+
+public:
+	void encodeWait(id<MTLCommandBuffer> mtlCmdBuff) override;
+	void encodeSignal(id<MTLCommandBuffer> mtlCmdBuff) override;
+	bool isUsingCommandEncoding() override { return true; }
+
+	MVKSemaphoreMTLFence(MVKDevice* device, const VkSemaphoreCreateInfo* pCreateInfo);
+
+	~MVKSemaphoreMTLFence() override;
+
+protected:
+	id<MTLFence> _mtlFence;
+};
+
+
+#pragma mark -
+#pragma mark MVKSemaphoreMTLEvent
+
+/** An MVKSemaphore that uses MTLEvent to provide synchronization. */
+class MVKSemaphoreMTLEvent : public MVKSemaphore {
+
+public:
+	void encodeWait(id<MTLCommandBuffer> mtlCmdBuff) override;
+	void encodeSignal(id<MTLCommandBuffer> mtlCmdBuff) override;
+	bool isUsingCommandEncoding() override { return true; }
+
+	MVKSemaphoreMTLEvent(MVKDevice* device, const VkSemaphoreCreateInfo* pCreateInfo);
+
+	~MVKSemaphoreMTLEvent() override;
+
+protected:
 	id<MTLEvent> _mtlEvent;
 	std::atomic<uint64_t> _mtlEventValue;
 };


+#pragma mark -
+#pragma mark MVKSemaphoreEmulated
+
+/** An MVKSemaphore that uses CPU synchronization to provide synchronization functionality. */
+class MVKSemaphoreEmulated : public MVKSemaphore {
+
+public:
+	void encodeWait(id<MTLCommandBuffer> mtlCmdBuff) override;
+	void encodeSignal(id<MTLCommandBuffer> mtlCmdBuff) override;
+	bool isUsingCommandEncoding() override { return false; }
+
+	MVKSemaphoreEmulated(MVKDevice* device, const VkSemaphoreCreateInfo* pCreateInfo);
+
+protected:
+	MVKSemaphoreImpl _blocker;
+};
+
+
 #pragma mark -
 #pragma mark MVKFence

@ -267,7 +320,7 @@ private:
 #pragma mark -
 #pragma mark MVKEvent

-/** Represents a Vulkan semaphore. */
+/** Abstract class that represents a Vulkan event. */
 class MVKEvent : public MVKVulkanAPIDeviceObject {

 public:
--- a/MoltenVK/MoltenVK/GPUObjects/MVKSync.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKSync.mm
@ -78,35 +78,72 @@ MVKSemaphoreImpl::~MVKSemaphoreImpl() {


 #pragma mark -
-#pragma mark MVKSemaphore
+#pragma mark MVKSemaphoreMTLEvent

-void MVKSemaphore::encodeWait(id<MTLCommandBuffer> mtlCmdBuff) {
-	if (_mtlEvent && mtlCmdBuff) {
-		[mtlCmdBuff encodeWaitForEvent: _mtlEvent value: _mtlEventValue++];
-	} else if ( !_mtlEvent && !mtlCmdBuff ) {
-		_blocker.wait(UINT64_MAX, true);
-	}
+// Could use any encoder. Assume BLIT is fastest and lightest.
+// Nil mtlCmdBuff will do nothing.
+void MVKSemaphoreMTLFence::encodeWait(id<MTLCommandBuffer> mtlCmdBuff) {
+	id<MTLBlitCommandEncoder> mtlCmdEnc = mtlCmdBuff.blitCommandEncoder;
+	[mtlCmdEnc waitForFence: _mtlFence];
+	[mtlCmdEnc endEncoding];
 }

-void MVKSemaphore::encodeSignal(id<MTLCommandBuffer> mtlCmdBuff) {
-	if (_mtlEvent && mtlCmdBuff) {
-		[mtlCmdBuff encodeSignalEvent: _mtlEvent value: _mtlEventValue];
-	} else if ( !_mtlEvent && !mtlCmdBuff ) {
-		 _blocker.release();
-	}
+// Could use any encoder. Assume BLIT is fastest and lightest.
+// Nil mtlCmdBuff will do nothing.
+void MVKSemaphoreMTLFence::encodeSignal(id<MTLCommandBuffer> mtlCmdBuff) {
+	id<MTLBlitCommandEncoder> mtlCmdEnc = mtlCmdBuff.blitCommandEncoder;
+	[mtlCmdEnc updateFence: _mtlFence];
+	[mtlCmdEnc endEncoding];
 }

-MVKSemaphore::MVKSemaphore(MVKDevice* device, const VkSemaphoreCreateInfo* pCreateInfo) :
-	MVKVulkanAPIDeviceObject(device),
-	_blocker(false, 1),
-	_mtlEvent(device->_useMTLEventsForSemaphores ? [device->getMTLDevice() newEvent] : nil),
+MVKSemaphoreMTLFence::MVKSemaphoreMTLFence(MVKDevice* device, const VkSemaphoreCreateInfo* pCreateInfo) :
+	MVKSemaphore(device, pCreateInfo),
+	_mtlFence([device->getMTLDevice() newFence]) {}		//retained
+
+MVKSemaphoreMTLFence::~MVKSemaphoreMTLFence() {
+	[_mtlFence release];
+}
+
+
+#pragma mark -
+#pragma mark MVKSemaphoreMTLEvent
+
+// Nil mtlCmdBuff will do nothing.
+void MVKSemaphoreMTLEvent::encodeWait(id<MTLCommandBuffer> mtlCmdBuff) {
+	[mtlCmdBuff encodeWaitForEvent: _mtlEvent value: _mtlEventValue++];
+}
+
+// Nil mtlCmdBuff will do nothing.
+void MVKSemaphoreMTLEvent::encodeSignal(id<MTLCommandBuffer> mtlCmdBuff) {
+	[mtlCmdBuff encodeSignalEvent: _mtlEvent value: _mtlEventValue];
+}
+
+MVKSemaphoreMTLEvent::MVKSemaphoreMTLEvent(MVKDevice* device, const VkSemaphoreCreateInfo* pCreateInfo) :
+	MVKSemaphore(device, pCreateInfo),
+	_mtlEvent([device->getMTLDevice() newEvent]),	//retained
 	_mtlEventValue(1) {}

-MVKSemaphore::~MVKSemaphore() {
+MVKSemaphoreMTLEvent::~MVKSemaphoreMTLEvent() {
    [_mtlEvent release];
 }


+#pragma mark -
+#pragma mark MVKSemaphoreEmulated
+
+void MVKSemaphoreEmulated::encodeWait(id<MTLCommandBuffer> mtlCmdBuff) {
+	if ( !mtlCmdBuff ) { _blocker.wait(UINT64_MAX, true); }
+}
+
+void MVKSemaphoreEmulated::encodeSignal(id<MTLCommandBuffer> mtlCmdBuff) {
+	if ( !mtlCmdBuff ) { _blocker.release(); }
+}
+
+MVKSemaphoreEmulated::MVKSemaphoreEmulated(MVKDevice* device, const VkSemaphoreCreateInfo* pCreateInfo) :
+	MVKSemaphore(device, pCreateInfo),
+	_blocker(false, 1) {}
+
+
 #pragma mark -
 #pragma mark MVKFence

--- a/MoltenVK/MoltenVK/Utility/MVKEnvironment.h
+++ b/MoltenVK/MoltenVK/Utility/MVKEnvironment.h
@ -157,7 +157,10 @@
 #   define MVK_CONFIG_FORCE_LOW_POWER_GPU    0
 #endif

-/** Allow the use of Metal events for VkSemaphore synchronization behaviour. Disabled by default. */
+/** Allow the use of MTLFence or MTLEvent for VkSemaphore synchronization behaviour. Disabled by default. */
+#ifndef MVK_ALLOW_METAL_FENCES
+#   define MVK_ALLOW_METAL_FENCES    0
+#endif
 #ifndef MVK_ALLOW_METAL_EVENTS
 #   define MVK_ALLOW_METAL_EVENTS    0
 #endif