Merge pull request #1709 from KhronosGroup/SingleQueueSemaphore

Merge SingleQueueSemaphore branch into master
2022-09-02 11:51:53 -04:00 · 2022-09-02 11:51:53 -04:00 · 5bae97a4ae
commit 5bae97a4ae
parent 8692a9df52 d7b5a36ddc
9 changed files with 150 additions and 118 deletions
--- a/Docs/Whats_New.md
+++ b/Docs/Whats_New.md
@ -20,6 +20,10 @@ Released TBD

 - Add support for extensions:
 	- `VK_KHR_shader_float_controls`
+- Vulkan semaphore functional improvements:
+	- Replace use of `MTLFence` with an option to limit to a single Vulkan queue and use Metal's implicit submisison order guarantees.
+	- Support option to force use of `MTLEvents` for Vulkan semaphores on NVIDIA and Rosetta2.
+	- `MVKConfiguration` replace booleans `semaphoreUseMTLEvent` and `semaphoreUseMTLFence` with enumerated `semaphoreSupportStyle`.
 - Support config option to automatically use Metal argument buffers when `VK_EXT_descriptor_indexing` 
  extension is enabled. `MVKConfiguration::useMetalArgumentBuffers` (`MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS`) 
  is now an enum field. The use of Metal argument buffers is still disabled by default (`MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS_NEVER`).
--- a/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
+++ b/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
@ -104,6 +104,15 @@ typedef enum MVKUseMetalArgumentBuffers {
 	MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS_MAX_ENUM            = 0x7FFFFFFF
 } MVKUseMetalArgumentBuffers;

+/** Identifies the Metal functionality used to support Vulkan semaphore functionality (VkSemaphore). */
+typedef enum MVKVkSemaphoreSupportStyle {
+	MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_CALLBACK                = 0,	/**< Use CPU callbacks upon GPU submission completion. This is the slowest technique. */
+	MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_METAL_EVENTS_WHERE_SAFE = 1,	/**< Use Metal events (MTLEvent) when available on the platform, and where safe. This will revert to same as MVK_CONFIG_VK_SEMAPHORE_USE_SINGLE_QUEUE on some NVIDIA GPUs and Rosetta2, due to potential challenges with MTLEvents on those platforms. */
+	MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_METAL_EVENTS            = 2,	/**< Always use Metal events (MTLEvent) when available on the platform. */
+	MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_SINGLE_QUEUE            = 3,	/**< Limit Vulkan to a single queue, with no explicit semaphore synchronization, and use Metal's implicit guarantees that all operations submitted to a queue will give the same result as if they had been run in submission order. */
+	MVK_CONFIG_VK_SEMAPHORE_MAX_ENUM                              = 0x7FFFFFFF
+} MVKVkSemaphoreSupportStyle;
+
 /**
 * MoltenVK configuration settings.
 *
@ -565,52 +574,37 @@ typedef struct {
 	 */
 	VkBool32 forceLowPowerGPU;

-	/**
-	 * Use MTLFence, if it is available on the device, for VkSemaphore synchronization behaviour.
-	 *
-	 * This parameter interacts with semaphoreUseMTLEvent. If both are enabled, on GPUs other than
-	 * NVIDIA, semaphoreUseMTLEvent takes priority and MTLEvent will be used if it is available,
-	 * otherwise MTLFence will be used if it is available. On NVIDIA GPUs, MTLEvent is disabled
-	 * for VkSemaphores, so CPU-based synchronization will be used unless semaphoreUseMTLFence
-	 * is enabled and MTLFence is available.
-	 *
-	 * In the special case of VK_SEMAPHORE_TYPE_TIMELINE semaphores, MoltenVK will always
-	 * use MTLSharedEvent if it is available on the platform, regardless of the values of
-	 * semaphoreUseMTLEvent or semaphoreUseMTLFence.
-	 *
-	 * The value of this parameter must be changed before creating a VkDevice,
-	 * for the change to take effect.
-	 *
-	 * The initial value or this parameter is set by the
-	 * MVK_ALLOW_METAL_FENCES
-	 * runtime environment variable or MoltenVK compile-time build setting.
-	 * If neither is set, this setting is disabled by default, and VkSemaphore will not use MTLFence.
-	 */
+	/** Deprecated. Use semaphoreSupportStyle instead. */
 	VkBool32 semaphoreUseMTLFence;

 	/**
-	 * Use MTLEvent, if it is available on the device, for VkSemaphore synchronization behaviour.
+	 * Determines the style used to implement Vulkan semaphore (VkSemaphore) functionality in Metal.
+	 * See the documentation of the MVKVkSemaphoreSupportStyle for the options.
 	 *
-	 * This parameter interacts with semaphoreUseMTLFence. If both are enabled, on GPUs other than
-	 * NVIDIA, semaphoreUseMTLEvent takes priority and MTLEvent will be used if it is available,
-	 * otherwise MTLFence will be used if it is available. On NVIDIA GPUs, MTLEvent is disabled
-	 * for VkSemaphores, so CPU-based synchronization will be used unless semaphoreUseMTLFence
-	 * is enabled and MTLFence is available.
+	 * In the special case of VK_SEMAPHORE_TYPE_TIMELINE semaphores, MoltenVK will always use
+	 * MTLSharedEvent if it is available on the platform, regardless of the value of this parameter.
 	 *
-	 * In the special case of VK_SEMAPHORE_TYPE_TIMELINE semaphores, MoltenVK will always
-	 * use MTLSharedEvent if it is available on the platform, regardless of the values of
-	 * semaphoreUseMTLEvent or semaphoreUseMTLFence.
-	 *
-	 * The value of this parameter must be changed before creating a VkDevice,
+	 * The value of this parameter must be changed before creating a VkInstance,
 	 * for the change to take effect.
 	 *
 	 * The initial value or this parameter is set by the
-	 * MVK_ALLOW_METAL_EVENTS
+	 * MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE
 	 * runtime environment variable or MoltenVK compile-time build setting.
-	 * If neither is set, this setting is enabled by default, and VkSemaphore will use MTLEvent,
-	 * if it is available, except on NVIDIA GPUs.
+	 * If neither is set, this setting is set to
+	 * MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_METAL_EVENTS_WHERE_SAFE by default,
+	 * and MoltenVK will use MTLEvent, except on NVIDIA GPU, and Rosetta2 environments,
+	 * where it will use a single queue with implicit synchronization
+	 * (as if this parameter was set to MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_SINGLE_QUEUE).
+	 *
+	 * This parameter interacts with the deprecated legacy parameters semaphoreUseMTLEvent
+	 * and semaphoreUseMTLFence. If semaphoreUseMTLEvent is enabled, this parameter
+	 * will be set to MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_METAL_EVENTS_WHERE_SAFE.
+	 * If semaphoreUseMTLEvent is disabled, and semaphoreUseMTLFence is enabled,
+	 * this parameter will be set to MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_SINGLE_QUEUE.
+	 * Structurally, this parameter replaces, and is aliased by, semaphoreUseMTLEvent.
 	 */
-	VkBool32 semaphoreUseMTLEvent;
+	MVKVkSemaphoreSupportStyle semaphoreSupportStyle;
+#define semaphoreUseMTLEvent semaphoreSupportStyle

 	/**
 	 * Controls whether Metal should run an automatic GPU capture without the user having to
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
@ -90,6 +90,12 @@ typedef NSUInteger MTLTimestamp;
 #pragma mark -
 #pragma mark MVKPhysicalDevice

+typedef enum {
+	MVKSemaphoreStyleUseMTLEvent,
+	MVKSemaphoreStyleUseEmulation,
+	MVKSemaphoreStyleSingleQueue,
+} MVKSemaphoreStyle;
+
 /** VkPhysicalDeviceVulkan12Features entries that did not originate in a prior extension. */
 typedef struct MVKPhysicalDeviceVulkan12FeaturesNoExt {
 	VkBool32 samplerMirrorClampToEdge;
@ -397,6 +403,7 @@ protected:
 	void initLimits();
 	void initGPUInfoProperties();
 	void initMemoryProperties();
+	void initVkSemaphoreStyle();
 	void setMemoryHeap(uint32_t heapIndex, VkDeviceSize heapSize, VkMemoryHeapFlags heapFlags);
 	void setMemoryType(uint32_t typeIndex, uint32_t heapIndex, VkMemoryPropertyFlags propertyFlags);
 	uint64_t getVRAMSize();
@ -427,6 +434,7 @@ protected:
 	MVKSmallVector<MVKQueueFamily*, kMVKQueueFamilyCount> _queueFamilies;
 	MVKPixelFormats _pixelFormats;
 	id<MTLCounterSet> _timestampMTLCounterSet;
+	MVKSemaphoreStyle _vkSemaphoreStyle;
 	uint32_t _allMemoryTypes;
 	uint32_t _hostVisibleMemoryTypes;
 	uint32_t _hostCoherentMemoryTypes;
@ -445,12 +453,6 @@ typedef struct MVKMTLBlitEncoder {
 	id<MTLCommandBuffer> mtlCmdBuffer = nil;
 } MVKMTLBlitEncoder;

-typedef enum {
-	MVKSemaphoreStyleUseMTLEvent,
-	MVKSemaphoreStyleUseMTLFence,
-	MVKSemaphoreStyleUseEmulation
-} MVKSemaphoreStyle;
-
 /** Represents a Vulkan logical GPU device, associated with a physical device. */
 class MVKDevice : public MVKDispatchableVulkanAPIObject {

@ -884,7 +886,6 @@ protected:
    id<MTLBuffer> _globalVisibilityResultMTLBuffer = nil;
 	id<MTLSamplerState> _defaultMTLSamplerState = nil;
 	id<MTLBuffer> _dummyBlitMTLBuffer = nil;
-	MVKSemaphoreStyle _vkSemaphoreStyle = MVKSemaphoreStyleUseEmulation;
    uint32_t _globalVisibilityQueryCount = 0;
 	bool _logActivityPerformanceInline = false;
 	bool _isPerformanceTracking = false;
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
@ -1309,17 +1309,21 @@ MVKArrayRef<MVKQueueFamily*> MVKPhysicalDevice::getQueueFamilies() {
 		qfProps.queueFlags = (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT);
 		_queueFamilies.push_back(new MVKQueueFamily(this, qfIdx++, &qfProps));

-		// Dedicated graphics queue family...or another general-purpose queue family.
-		if (specialize) { qfProps.queueFlags = (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_TRANSFER_BIT); }
-		_queueFamilies.push_back(new MVKQueueFamily(this, qfIdx++, &qfProps));
+		// Single queue semaphore requires using a single queue for everything
+		// So don't allow anyone to have more than one
+		if (_vkSemaphoreStyle != MVKSemaphoreStyleSingleQueue) {
+			// Dedicated graphics queue family...or another general-purpose queue family.
+			if (specialize) { qfProps.queueFlags = (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_TRANSFER_BIT); }
+			_queueFamilies.push_back(new MVKQueueFamily(this, qfIdx++, &qfProps));

-		// Dedicated compute queue family...or another general-purpose queue family.
-		if (specialize) { qfProps.queueFlags = (VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT); }
-		_queueFamilies.push_back(new MVKQueueFamily(this, qfIdx++, &qfProps));
+			// Dedicated compute queue family...or another general-purpose queue family.
+			if (specialize) { qfProps.queueFlags = (VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT); }
+			_queueFamilies.push_back(new MVKQueueFamily(this, qfIdx++, &qfProps));

-		// Dedicated transfer queue family...or another general-purpose queue family.
-		if (specialize) { qfProps.queueFlags = VK_QUEUE_TRANSFER_BIT; }
-		_queueFamilies.push_back(new MVKQueueFamily(this, qfIdx++, &qfProps));
+			// Dedicated transfer queue family...or another general-purpose queue family.
+			if (specialize) { qfProps.queueFlags = VK_QUEUE_TRANSFER_BIT; }
+			_queueFamilies.push_back(new MVKQueueFamily(this, qfIdx++, &qfProps));
+		}

 		MVKAssert(kMVKQueueFamilyCount >= _queueFamilies.size(), "Adjust value of kMVKQueueFamilyCount.");
 	}
@ -1440,6 +1444,7 @@ MVKPhysicalDevice::MVKPhysicalDevice(MVKInstance* mvkInstance, id<MTLDevice> mtl
 	initMemoryProperties();
 	initExternalMemoryProperties();
 	initCounterSets();
+	initVkSemaphoreStyle();
 	logGPUInfo();
 }

@ -3110,6 +3115,35 @@ void MVKPhysicalDevice::initCounterSets() {
 	}
 }

+// Determine whether Vulkan semaphores should use a MTLEvent, CPU callbacks, or should limit
+// Vulkan to a single queue and use Metal's implicit guarantees that all operations submitted
+// to a queue will give the same result as if they had been run in submission order.
+// MTLEvents for semaphores can sometimes prove troublesome on some platforms,
+// and so may optionally be disabled on those platforms.
+void MVKPhysicalDevice::initVkSemaphoreStyle() {
+
+	// Default to CPU callback if other options unavailable.
+	_vkSemaphoreStyle = MVKSemaphoreStyleUseEmulation;
+
+	switch (mvkConfig().semaphoreSupportStyle) {
+		case MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_METAL_EVENTS_WHERE_SAFE: {
+			bool isNVIDIA = _properties.vendorID == kNVVendorId;
+			bool isRosetta2 = _properties.vendorID == kAppleVendorId && !MVK_APPLE_SILICON;
+			if (_metalFeatures.events && !(isRosetta2 || isNVIDIA)) { _vkSemaphoreStyle = MVKSemaphoreStyleUseMTLEvent; }
+			break;
+		}
+		case MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_METAL_EVENTS:
+			if (_metalFeatures.events) { _vkSemaphoreStyle = MVKSemaphoreStyleUseMTLEvent; }
+			break;
+		case MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_SINGLE_QUEUE:
+			_vkSemaphoreStyle = MVKSemaphoreStyleSingleQueue;
+			break;
+		case MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_CALLBACK:
+		default:
+			break;
+	}
+}
+
 // Workaround for a bug in Intel Iris Plus Graphics driver where the counterSets array is
 // not properly retained internally, and becomes a zombie when counterSets is called more
 // than once, which occurs when an app creates more than one VkInstance. This workaround
@ -3642,10 +3676,10 @@ MVKSemaphore* MVKDevice::createSemaphore(const VkSemaphoreCreateInfo* pCreateInf
 			return new MVKTimelineSemaphoreEmulated(this, pCreateInfo, pTypeCreateInfo, pExportInfo, pImportInfo);
 		}
 	} else {
-		switch (_vkSemaphoreStyle) {
+		switch (_physicalDevice->_vkSemaphoreStyle) {
 			case MVKSemaphoreStyleUseMTLEvent:  return new MVKSemaphoreMTLEvent(this, pCreateInfo, pExportInfo, pImportInfo);
-			case MVKSemaphoreStyleUseMTLFence:  return new MVKSemaphoreMTLFence(this, pCreateInfo, pExportInfo, pImportInfo);
 			case MVKSemaphoreStyleUseEmulation: return new MVKSemaphoreEmulated(this, pCreateInfo, pExportInfo, pImportInfo);
+			case MVKSemaphoreStyleSingleQueue:  return new MVKSemaphoreSingleQueue(this, pCreateInfo, pExportInfo, pImportInfo);
 		}
 	}
 }
@ -4438,25 +4472,15 @@ void MVKDevice::initPhysicalDevice(MVKPhysicalDevice* physicalDevice, const VkDe
 	_pProperties = &_physicalDevice->_properties;
 	_pMemoryProperties = &_physicalDevice->_memoryProperties;

-	// Decide whether Vulkan semaphores should use a MTLEvent or MTLFence if they are available.
-	// Prefer MTLEvent, because MTLEvent handles sync across MTLCommandBuffers and MTLCommandQueues.
-	// However, do not allow use of MTLEvents on Rosetta2 (x86 build on M1 runtime) or NVIDIA GPUs,
-	// which have demonstrated trouble with MTLEvents. In that case, since MTLFence use is disabled
-	// by default, unless MTLFence is deliberately enabled, CPU emulation will be used.
-	bool isNVIDIA = _pProperties->vendorID == kNVVendorId;
-	bool isRosetta2 = _pProperties->vendorID == kAppleVendorId && !MVK_APPLE_SILICON;
-	bool canUseMTLEventForSem4 = _pMetalFeatures->events && mvkConfig().semaphoreUseMTLEvent && !(isRosetta2 || isNVIDIA);
-	bool canUseMTLFenceForSem4 = _pMetalFeatures->fences && mvkConfig().semaphoreUseMTLFence;
-	_vkSemaphoreStyle = canUseMTLEventForSem4 ? MVKSemaphoreStyleUseMTLEvent : (canUseMTLFenceForSem4 ? MVKSemaphoreStyleUseMTLFence : MVKSemaphoreStyleUseEmulation);
-	switch (_vkSemaphoreStyle) {
+	switch (_physicalDevice->_vkSemaphoreStyle) {
 		case MVKSemaphoreStyleUseMTLEvent:
-			MVKLogInfo("Using MTLEvent for Vulkan semaphores.");
-			break;
-		case MVKSemaphoreStyleUseMTLFence:
-			MVKLogInfo("Using MTLFence for Vulkan semaphores.");
+			MVKLogInfo("Vulkan semaphores using MTLEvent.");
 			break;
 		case MVKSemaphoreStyleUseEmulation:
-			MVKLogInfo("Using emulation for Vulkan semaphores.");
+			MVKLogInfo("Vulkan semaphores using CPU callbacks upon GPU submission completion.");
+			break;
+		case MVKSemaphoreStyleSingleQueue:
+			MVKLogInfo("Vulkan semaphores using Metal implicit guarantees within a single queue.");
 			break;
 	}
 }
--- a/MoltenVK/MoltenVK/GPUObjects/MVKSync.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKSync.h
@ -201,27 +201,29 @@ protected:


 #pragma mark -
-#pragma mark MVKSemaphoreMTLFence
+#pragma mark MVKSemaphoreSingleQueue

-/** An MVKSemaphore that uses MTLFence to provide synchronization. */
-class MVKSemaphoreMTLFence : public MVKSemaphore {
+/**
+ * An MVKSemaphore that uses Metal's built-in guarantees on single-queue submission to provide semaphore-like guarantees.
+ *
+ * Relies on Metal's enabled-by-default hazard tracking, and will need to start doing things with MTLFences
+ * if we start using things with MTLHazardTrackingModeUntracked
+ */
+class MVKSemaphoreSingleQueue : public MVKSemaphore {

 public:
 	void encodeWait(id<MTLCommandBuffer> mtlCmdBuff, uint64_t) override;
 	void encodeSignal(id<MTLCommandBuffer> mtlCmdBuff, uint64_t) override;
 	uint64_t deferSignal() override;
 	void encodeDeferredSignal(id<MTLCommandBuffer> mtlCmdBuff, uint64_t) override;
-	bool isUsingCommandEncoding() override { return true; }
+	bool isUsingCommandEncoding() override { return false; }

-	MVKSemaphoreMTLFence(MVKDevice* device,
-						 const VkSemaphoreCreateInfo* pCreateInfo,
-						 const VkExportMetalObjectCreateInfoEXT* pExportInfo,
-						 const VkImportMetalSharedEventInfoEXT* pImportInfo);
+	MVKSemaphoreSingleQueue(MVKDevice* device,
+	                        const VkSemaphoreCreateInfo* pCreateInfo,
+	                        const VkExportMetalObjectCreateInfoEXT* pExportInfo,
+	                        const VkImportMetalSharedEventInfoEXT* pImportInfo);

-	~MVKSemaphoreMTLFence() override;
-
-protected:
-	id<MTLFence> _mtlFence;
+	~MVKSemaphoreSingleQueue() override;
 };


--- a/MoltenVK/MoltenVK/GPUObjects/MVKSync.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKSync.mm
@ -77,47 +77,34 @@ MVKSemaphoreImpl::~MVKSemaphoreImpl() {


 #pragma mark -
-#pragma mark MVKSemaphoreMTLFence
+#pragma mark MVKSemaphoreSingleQueue

-// Could use any encoder. Assume BLIT is fastest and lightest.
-// Nil mtlCmdBuff will do nothing.
-void MVKSemaphoreMTLFence::encodeWait(id<MTLCommandBuffer> mtlCmdBuff, uint64_t) {
-	id<MTLBlitCommandEncoder> mtlCmdEnc = mtlCmdBuff.blitCommandEncoder;
-	[mtlCmdEnc waitForFence: _mtlFence];
-	[mtlCmdEnc endEncoding];
+void MVKSemaphoreSingleQueue::encodeWait(id<MTLCommandBuffer> mtlCmdBuff, uint64_t) {
+	// Metal will handle all synchronization for us automatically
 }

-// Could use any encoder. Assume BLIT is fastest and lightest.
-// Nil mtlCmdBuff will do nothing.
-void MVKSemaphoreMTLFence::encodeSignal(id<MTLCommandBuffer> mtlCmdBuff, uint64_t) {
-	id<MTLBlitCommandEncoder> mtlCmdEnc = mtlCmdBuff.blitCommandEncoder;
-	[mtlCmdEnc updateFence: _mtlFence];
-	[mtlCmdEnc endEncoding];
+void MVKSemaphoreSingleQueue::encodeSignal(id<MTLCommandBuffer> mtlCmdBuff, uint64_t) {
+	// Metal will handle all synchronization for us automatically
 }

-uint64_t MVKSemaphoreMTLFence::deferSignal() {
+uint64_t MVKSemaphoreSingleQueue::deferSignal() {
 	return 0;
 }

-void MVKSemaphoreMTLFence::encodeDeferredSignal(id<MTLCommandBuffer> mtlCmdBuff, uint64_t) {
+void MVKSemaphoreSingleQueue::encodeDeferredSignal(id<MTLCommandBuffer> mtlCmdBuff, uint64_t) {
 	encodeSignal(mtlCmdBuff, 0);
 }

-MVKSemaphoreMTLFence::MVKSemaphoreMTLFence(MVKDevice* device,
-										   const VkSemaphoreCreateInfo* pCreateInfo,
-										   const VkExportMetalObjectCreateInfoEXT* pExportInfo,
-										   const VkImportMetalSharedEventInfoEXT* pImportInfo) : MVKSemaphore(device, pCreateInfo) {
-
-	_mtlFence = [device->getMTLDevice() newFence];		//retained
-
+MVKSemaphoreSingleQueue::MVKSemaphoreSingleQueue(MVKDevice* device,
+                                                 const VkSemaphoreCreateInfo* pCreateInfo,
+                                                 const VkExportMetalObjectCreateInfoEXT* pExportInfo,
+                                                 const VkImportMetalSharedEventInfoEXT* pImportInfo) : MVKSemaphore(device, pCreateInfo) {
 	if ((pImportInfo && pImportInfo->mtlSharedEvent) || (pExportInfo && pExportInfo->exportObjectType == VK_EXPORT_METAL_OBJECT_TYPE_METAL_SHARED_EVENT_BIT_EXT)) {
-		setConfigurationResult(reportError(VK_ERROR_INITIALIZATION_FAILED, "vkCreateEvent(): MTLSharedEvent is not available with VkSemaphores that use MTLFence."));
+		setConfigurationResult(reportError(VK_ERROR_INITIALIZATION_FAILED, "vkCreateEvent(): MTLSharedEvent is not available with VkSemaphores that use implicit synchronization."));
 	}
 }

-MVKSemaphoreMTLFence::~MVKSemaphoreMTLFence() {
-	[_mtlFence release];
-}
+MVKSemaphoreSingleQueue::~MVKSemaphoreSingleQueue() = default;


 #pragma mark -
--- a/MoltenVK/MoltenVK/Utility/MVKEnvironment.cpp
+++ b/MoltenVK/MoltenVK/Utility/MVKEnvironment.cpp
@ -46,12 +46,11 @@ static void mvkInitConfigFromEnvVars() {
 	MVK_SET_FROM_ENV_OR_BUILD_BOOL  (evCfg.defaultGPUCaptureScopeQueueFamilyIndex, MVK_CONFIG_DEFAULT_GPU_CAPTURE_SCOPE_QUEUE_FAMILY_INDEX);
 	MVK_SET_FROM_ENV_OR_BUILD_BOOL  (evCfg.defaultGPUCaptureScopeQueueIndex,       MVK_CONFIG_DEFAULT_GPU_CAPTURE_SCOPE_QUEUE_INDEX);
 	MVK_SET_FROM_ENV_OR_BUILD_BOOL  (evCfg.fastMathEnabled,                        MVK_CONFIG_FAST_MATH_ENABLED);
-
 	MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.logLevel,                               MVK_CONFIG_LOG_LEVEL);
 	MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.traceVulkanCalls,                       MVK_CONFIG_TRACE_VULKAN_CALLS);
 	MVK_SET_FROM_ENV_OR_BUILD_BOOL  (evCfg.forceLowPowerGPU,                       MVK_CONFIG_FORCE_LOW_POWER_GPU);
 	MVK_SET_FROM_ENV_OR_BUILD_BOOL  (evCfg.semaphoreUseMTLFence,                   MVK_ALLOW_METAL_FENCES);
-	MVK_SET_FROM_ENV_OR_BUILD_BOOL  (evCfg.semaphoreUseMTLEvent,                   MVK_ALLOW_METAL_EVENTS);
+	MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.semaphoreSupportStyle,                  MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE);
 	MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.autoGPUCaptureScope,                    MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE);
 	MVK_SET_FROM_ENV_OR_BUILD_STRING(evCfg.autoGPUCaptureOutputFilepath,           MVK_CONFIG_AUTO_GPU_CAPTURE_OUTPUT_FILE, evGPUCapFileStrObj);
 	MVK_SET_FROM_ENV_OR_BUILD_BOOL  (evCfg.texture1DAs2D,                          MVK_CONFIG_TEXTURE_1D_AS_2D);
@ -63,6 +62,18 @@ static void mvkInitConfigFromEnvVars() {
 	MVK_SET_FROM_ENV_OR_BUILD_BOOL  (evCfg.resumeLostDevice,                       MVK_CONFIG_RESUME_LOST_DEVICE);
 	MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.useMetalArgumentBuffers,                MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS);

+	// Deprected legacy VkSemaphore MVK_ALLOW_METAL_FENCES and MVK_ALLOW_METAL_EVENTS config.
+	// Legacy MVK_ALLOW_METAL_EVENTS is covered by MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE,
+	// but for backwards compatibility, if legacy MVK_ALLOW_METAL_EVENTS is explicitly
+	// disabled, disable semaphoreUseMTLEvent (aliased as semaphoreSupportStyle value
+	// MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_CALLBACK), and let mvkSetConfig() further
+	// process legacy behavior based on the value of legacy semaphoreUseMTLFence).
+	bool sem4UseMTLEvent;
+	MVK_SET_FROM_ENV_OR_BUILD_BOOL(sem4UseMTLEvent, MVK_ALLOW_METAL_EVENTS);
+	if ( !sem4UseMTLEvent ) {
+		evCfg.semaphoreUseMTLEvent = (MVKVkSemaphoreSupportStyle)false;		// Disabled. Also semaphoreSupportStyle MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_CALLBACK.
+	}
+
 	mvkSetConfig(evCfg);
 }

@ -90,7 +101,14 @@ void mvkSetConfig(const MVKConfiguration& mvkConfig) {
 	_mvkConfig.apiVersionToAdvertise = VK_MAKE_VERSION(VK_VERSION_MAJOR(_mvkConfig.apiVersionToAdvertise),
 													   VK_VERSION_MINOR(_mvkConfig.apiVersionToAdvertise),
 													   VK_HEADER_VERSION);
-	
+
+	// Deprecated legacy support for specific case where semaphoreUseMTLFence is enabled and legacy
+	// semaphoreUseMTLEvent (now aliased to semaphoreSupportStyle) is disabled. In this case the user
+	// had been using the legacy MTLFence, so use MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_SINGLE_QUEUE now.
+	if (_mvkConfig.semaphoreUseMTLFence && !_mvkConfig.semaphoreUseMTLEvent) {
+		_mvkConfig.semaphoreSupportStyle = MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_SINGLE_QUEUE;
+	}
+
 	// Set capture file path string
 	if (_mvkConfig.autoGPUCaptureOutputFilepath) {
 		_autoGPUCaptureOutputFile = _mvkConfig.autoGPUCaptureOutputFilepath;
--- a/MoltenVK/MoltenVK/Utility/MVKEnvironment.h
+++ b/MoltenVK/MoltenVK/Utility/MVKEnvironment.h
@ -231,15 +231,16 @@ void mvkSetConfig(const MVKConfiguration& mvkConfig);
 #endif

 /**
- * Allow the use of MTLFence or MTLEvent for VkSemaphore synchronization behaviour.
- * By default:
- *   - MVK_ALLOW_METAL_EVENTS is enabled
- *   - MVK_ALLOW_METAL_FENCES is disabled
- * */
-#ifndef MVK_ALLOW_METAL_EVENTS
+ * Determines the style used to implement Vulkan semaphore (VkSemaphore) functionality in Metal.
+ * By default, use Metal events, if availalble, on most platforms.
+ */
+#ifndef MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE
+#   define MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE    MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_METAL_EVENTS_WHERE_SAFE
+#endif
+#ifndef MVK_ALLOW_METAL_EVENTS		// Deprecated
 #   define MVK_ALLOW_METAL_EVENTS    1
 #endif
-#ifndef MVK_ALLOW_METAL_FENCES
+#ifndef MVK_ALLOW_METAL_FENCES		// Deprecated
 #   define MVK_ALLOW_METAL_FENCES    0
 #endif

--- a/Scripts/runcts
+++ b/Scripts/runcts
@ -108,6 +108,7 @@ export MVK_CONFIG_RESUME_LOST_DEVICE=1
 export MVK_CONFIG_FAST_MATH_ENABLED=1
 export MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS=0  #(2 = VK_EXT_descriptor_indexing enabled)
 export MVK_CONFIG_FORCE_LOW_POWER_GPU=0
+export MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE=2  #(2 = MTLEvents always)


 # -------------- Operation --------------------