Merge pull request #1693 from tellowkrinkle/SingleQueueSemaphore

Replace MTLFence semaphores with forcing a single queue
This commit is contained in:
Bill Hollings 2022-08-30 15:04:58 -04:00 committed by GitHub
commit c652ebc745
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 87 additions and 86 deletions

View File

@ -566,36 +566,34 @@ typedef struct {
VkBool32 forceLowPowerGPU;
/**
* Use MTLFence, if it is available on the device, for VkSemaphore synchronization behaviour.
* Use Metal's implicit guarantees that all operations submitted to a queue will give the same result as
* if they had been run in submission order to implement VkSemaphore synchronization as no-ops.
*
* This parameter interacts with semaphoreUseMTLEvent. If both are enabled, on GPUs other than
* NVIDIA, semaphoreUseMTLEvent takes priority and MTLEvent will be used if it is available,
* otherwise MTLFence will be used if it is available. On NVIDIA GPUs, MTLEvent is disabled
* for VkSemaphores, so CPU-based synchronization will be used unless semaphoreUseMTLFence
* is enabled and MTLFence is available.
* This requires all submissions be made to the same queue, and to guarantee that, MoltenVK will expose
* only one queue to the application.
*
* In the special case of VK_SEMAPHORE_TYPE_TIMELINE semaphores, MoltenVK will always
* use MTLSharedEvent if it is available on the platform, regardless of the values of
* semaphoreUseMTLEvent or semaphoreUseMTLFence.
*
* The value of this parameter must be changed before creating a VkDevice,
* for the change to take effect.
* The value of this parameter must be changed before creating a VkDevice for the change to take effect.
*
* The initial value or this parameter is set by the
* MVK_ALLOW_METAL_FENCES
* MVK_ALLOW_SINGLE_QUEUE_SEMAPHORE
* runtime environment variable or MoltenVK compile-time build setting.
* If neither is set, this setting is disabled by default, and VkSemaphore will not use MTLFence.
* If neither is set, this setting is enabled by default, and VkSemaphore will force a single queue
* on NVIDIA GPUs and whenever MVK_ALLOW_METAL_EVENTS is not also set.
*/
VkBool32 semaphoreUseMTLFence;
VkBool32 semaphoreUseSingleQueue;
/**
* Use MTLEvent, if it is available on the device, for VkSemaphore synchronization behaviour.
*
* This parameter interacts with semaphoreUseMTLFence. If both are enabled, on GPUs other than
* This parameter interacts with semaphoreUseSingleQueue. If both are enabled, on GPUs other than
* NVIDIA, semaphoreUseMTLEvent takes priority and MTLEvent will be used if it is available,
* otherwise MTLFence will be used if it is available. On NVIDIA GPUs, MTLEvent is disabled
* for VkSemaphores, so CPU-based synchronization will be used unless semaphoreUseMTLFence
* is enabled and MTLFence is available.
* for VkSemaphores, so CPU-based synchronization will be used unless semaphoreUseSingleQueue
* is enabled.
*
* In the special case of VK_SEMAPHORE_TYPE_TIMELINE semaphores, MoltenVK will always
* use MTLSharedEvent if it is available on the platform, regardless of the values of

View File

@ -90,6 +90,12 @@ typedef NSUInteger MTLTimestamp;
#pragma mark -
#pragma mark MVKPhysicalDevice
typedef enum {
MVKSemaphoreStyleUseMTLEvent,
MVKSemaphoreStyleUseEmulation,
MVKSemaphoreStyleSingleQueue,
} MVKSemaphoreStyle;
/** VkPhysicalDeviceVulkan12Features entries that did not originate in a prior extension. */
typedef struct MVKPhysicalDeviceVulkan12FeaturesNoExt {
VkBool32 samplerMirrorClampToEdge;
@ -407,6 +413,7 @@ protected:
void initExtensions();
void initCounterSets();
bool needsCounterSetRetained();
MVKSemaphoreStyle getSemaphoreStyle();
MVKArrayRef<MVKQueueFamily*> getQueueFamilies();
void initPipelineCacheUUID();
uint32_t getHighestGPUCapability();
@ -445,12 +452,6 @@ typedef struct MVKMTLBlitEncoder {
id<MTLCommandBuffer> mtlCmdBuffer = nil;
} MVKMTLBlitEncoder;
typedef enum {
MVKSemaphoreStyleUseMTLEvent,
MVKSemaphoreStyleUseMTLFence,
MVKSemaphoreStyleUseEmulation
} MVKSemaphoreStyle;
/** Represents a Vulkan logical GPU device, associated with a physical device. */
class MVKDevice : public MVKDispatchableVulkanAPIObject {

View File

@ -1309,17 +1309,22 @@ MVKArrayRef<MVKQueueFamily*> MVKPhysicalDevice::getQueueFamilies() {
qfProps.queueFlags = (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT);
_queueFamilies.push_back(new MVKQueueFamily(this, qfIdx++, &qfProps));
// Dedicated graphics queue family...or another general-purpose queue family.
if (specialize) { qfProps.queueFlags = (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_TRANSFER_BIT); }
_queueFamilies.push_back(new MVKQueueFamily(this, qfIdx++, &qfProps));
// Single queue semaphore requires using a single queue for everything
// So don't allow anyone to have more than one
if (getSemaphoreStyle() != MVKSemaphoreStyleSingleQueue)
{
// Dedicated graphics queue family...or another general-purpose queue family.
if (specialize) { qfProps.queueFlags = (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_TRANSFER_BIT); }
_queueFamilies.push_back(new MVKQueueFamily(this, qfIdx++, &qfProps));
// Dedicated compute queue family...or another general-purpose queue family.
if (specialize) { qfProps.queueFlags = (VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT); }
_queueFamilies.push_back(new MVKQueueFamily(this, qfIdx++, &qfProps));
// Dedicated compute queue family...or another general-purpose queue family.
if (specialize) { qfProps.queueFlags = (VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT); }
_queueFamilies.push_back(new MVKQueueFamily(this, qfIdx++, &qfProps));
// Dedicated transfer queue family...or another general-purpose queue family.
if (specialize) { qfProps.queueFlags = VK_QUEUE_TRANSFER_BIT; }
_queueFamilies.push_back(new MVKQueueFamily(this, qfIdx++, &qfProps));
// Dedicated transfer queue family...or another general-purpose queue family.
if (specialize) { qfProps.queueFlags = VK_QUEUE_TRANSFER_BIT; }
_queueFamilies.push_back(new MVKQueueFamily(this, qfIdx++, &qfProps));
}
MVKAssert(kMVKQueueFamilyCount >= _queueFamilies.size(), "Adjust value of kMVKQueueFamilyCount.");
}
@ -3110,6 +3115,23 @@ void MVKPhysicalDevice::initCounterSets() {
}
}
MVKSemaphoreStyle MVKPhysicalDevice::getSemaphoreStyle() {
// Decide whether Vulkan semaphores should use a MTLEvent or forcing a single queue if they are available.
// Prefer MTLEvent, because MTLEvent handles sync across MTLCommandBuffers and MTLCommandQueues.
// However, do not allow use of MTLEvents on Rosetta2 (x86 build on M1 runtime) or NVIDIA GPUs,
// which have demonstrated trouble with MTLEvents. In that case, since a single queue will be used
// unless the option for it it has been disabled, in which case CPU emulation will be used
bool isNVIDIA = _properties.vendorID == kNVVendorId;
bool isRosetta2 = _properties.vendorID == kAppleVendorId && !MVK_APPLE_SILICON;
if (_metalFeatures.events && mvkConfig().semaphoreUseMTLEvent && !(isRosetta2 || isNVIDIA)) {
return MVKSemaphoreStyleUseMTLEvent;
}
if (mvkConfig().semaphoreUseSingleQueue) {
return MVKSemaphoreStyleSingleQueue;
}
return MVKSemaphoreStyleUseEmulation;
}
// Workaround for a bug in Intel Iris Plus Graphics driver where the counterSets array is
// not properly retained internally, and becomes a zombie when counterSets is called more
// than once, which occurs when an app creates more than one VkInstance. This workaround
@ -3644,8 +3666,8 @@ MVKSemaphore* MVKDevice::createSemaphore(const VkSemaphoreCreateInfo* pCreateInf
} else {
switch (_vkSemaphoreStyle) {
case MVKSemaphoreStyleUseMTLEvent: return new MVKSemaphoreMTLEvent(this, pCreateInfo, pExportInfo, pImportInfo);
case MVKSemaphoreStyleUseMTLFence: return new MVKSemaphoreMTLFence(this, pCreateInfo, pExportInfo, pImportInfo);
case MVKSemaphoreStyleUseEmulation: return new MVKSemaphoreEmulated(this, pCreateInfo, pExportInfo, pImportInfo);
case MVKSemaphoreStyleSingleQueue: return new MVKSemaphoreSingleQueue(this, pCreateInfo, pExportInfo, pImportInfo);
}
}
}
@ -4438,26 +4460,17 @@ void MVKDevice::initPhysicalDevice(MVKPhysicalDevice* physicalDevice, const VkDe
_pProperties = &_physicalDevice->_properties;
_pMemoryProperties = &_physicalDevice->_memoryProperties;
// Decide whether Vulkan semaphores should use a MTLEvent or MTLFence if they are available.
// Prefer MTLEvent, because MTLEvent handles sync across MTLCommandBuffers and MTLCommandQueues.
// However, do not allow use of MTLEvents on Rosetta2 (x86 build on M1 runtime) or NVIDIA GPUs,
// which have demonstrated trouble with MTLEvents. In that case, since MTLFence use is disabled
// by default, unless MTLFence is deliberately enabled, CPU emulation will be used.
bool isNVIDIA = _pProperties->vendorID == kNVVendorId;
bool isRosetta2 = _pProperties->vendorID == kAppleVendorId && !MVK_APPLE_SILICON;
bool canUseMTLEventForSem4 = _pMetalFeatures->events && mvkConfig().semaphoreUseMTLEvent && !(isRosetta2 || isNVIDIA);
bool canUseMTLFenceForSem4 = _pMetalFeatures->fences && mvkConfig().semaphoreUseMTLFence;
_vkSemaphoreStyle = canUseMTLEventForSem4 ? MVKSemaphoreStyleUseMTLEvent : (canUseMTLFenceForSem4 ? MVKSemaphoreStyleUseMTLFence : MVKSemaphoreStyleUseEmulation);
_vkSemaphoreStyle = _physicalDevice->getSemaphoreStyle();
switch (_vkSemaphoreStyle) {
case MVKSemaphoreStyleUseMTLEvent:
MVKLogInfo("Using MTLEvent for Vulkan semaphores.");
break;
case MVKSemaphoreStyleUseMTLFence:
MVKLogInfo("Using MTLFence for Vulkan semaphores.");
break;
case MVKSemaphoreStyleUseEmulation:
MVKLogInfo("Using emulation for Vulkan semaphores.");
break;
case MVKSemaphoreStyleSingleQueue:
MVKLogInfo("Using Metal implicit guarantees within a single queue for Vulkan semaphores.");
break;
}
}

View File

@ -201,27 +201,29 @@ protected:
#pragma mark -
#pragma mark MVKSemaphoreMTLFence
#pragma mark MVKSemaphoreSingleQueue
/** An MVKSemaphore that uses MTLFence to provide synchronization. */
class MVKSemaphoreMTLFence : public MVKSemaphore {
/**
* An MVKSemaphore that uses Metal's built-in guarantees on single-queue submission to provide semaphore-like guarantees.
*
* Relies on Metal's enabled-by-default hazard tracking, and will need to start doing things with MTLFences
* if we start using things with MTLHazardTrackingModeUntracked
*/
class MVKSemaphoreSingleQueue : public MVKSemaphore {
public:
void encodeWait(id<MTLCommandBuffer> mtlCmdBuff, uint64_t) override;
void encodeSignal(id<MTLCommandBuffer> mtlCmdBuff, uint64_t) override;
uint64_t deferSignal() override;
void encodeDeferredSignal(id<MTLCommandBuffer> mtlCmdBuff, uint64_t) override;
bool isUsingCommandEncoding() override { return true; }
bool isUsingCommandEncoding() override { return false; }
MVKSemaphoreMTLFence(MVKDevice* device,
const VkSemaphoreCreateInfo* pCreateInfo,
const VkExportMetalObjectCreateInfoEXT* pExportInfo,
const VkImportMetalSharedEventInfoEXT* pImportInfo);
MVKSemaphoreSingleQueue(MVKDevice* device,
const VkSemaphoreCreateInfo* pCreateInfo,
const VkExportMetalObjectCreateInfoEXT* pExportInfo,
const VkImportMetalSharedEventInfoEXT* pImportInfo);
~MVKSemaphoreMTLFence() override;
protected:
id<MTLFence> _mtlFence;
~MVKSemaphoreSingleQueue() override;
};

View File

@ -77,47 +77,34 @@ MVKSemaphoreImpl::~MVKSemaphoreImpl() {
#pragma mark -
#pragma mark MVKSemaphoreMTLFence
#pragma mark MVKSemaphoreSingleQueue
// Could use any encoder. Assume BLIT is fastest and lightest.
// Nil mtlCmdBuff will do nothing.
void MVKSemaphoreMTLFence::encodeWait(id<MTLCommandBuffer> mtlCmdBuff, uint64_t) {
id<MTLBlitCommandEncoder> mtlCmdEnc = mtlCmdBuff.blitCommandEncoder;
[mtlCmdEnc waitForFence: _mtlFence];
[mtlCmdEnc endEncoding];
void MVKSemaphoreSingleQueue::encodeWait(id<MTLCommandBuffer> mtlCmdBuff, uint64_t) {
// Metal will handle all synchronization for us automatically
}
// Could use any encoder. Assume BLIT is fastest and lightest.
// Nil mtlCmdBuff will do nothing.
void MVKSemaphoreMTLFence::encodeSignal(id<MTLCommandBuffer> mtlCmdBuff, uint64_t) {
id<MTLBlitCommandEncoder> mtlCmdEnc = mtlCmdBuff.blitCommandEncoder;
[mtlCmdEnc updateFence: _mtlFence];
[mtlCmdEnc endEncoding];
void MVKSemaphoreSingleQueue::encodeSignal(id<MTLCommandBuffer> mtlCmdBuff, uint64_t) {
// Metal will handle all synchronization for us automatically
}
uint64_t MVKSemaphoreMTLFence::deferSignal() {
uint64_t MVKSemaphoreSingleQueue::deferSignal() {
return 0;
}
void MVKSemaphoreMTLFence::encodeDeferredSignal(id<MTLCommandBuffer> mtlCmdBuff, uint64_t) {
void MVKSemaphoreSingleQueue::encodeDeferredSignal(id<MTLCommandBuffer> mtlCmdBuff, uint64_t) {
encodeSignal(mtlCmdBuff, 0);
}
MVKSemaphoreMTLFence::MVKSemaphoreMTLFence(MVKDevice* device,
const VkSemaphoreCreateInfo* pCreateInfo,
const VkExportMetalObjectCreateInfoEXT* pExportInfo,
const VkImportMetalSharedEventInfoEXT* pImportInfo) : MVKSemaphore(device, pCreateInfo) {
_mtlFence = [device->getMTLDevice() newFence]; //retained
MVKSemaphoreSingleQueue::MVKSemaphoreSingleQueue(MVKDevice* device,
const VkSemaphoreCreateInfo* pCreateInfo,
const VkExportMetalObjectCreateInfoEXT* pExportInfo,
const VkImportMetalSharedEventInfoEXT* pImportInfo) : MVKSemaphore(device, pCreateInfo) {
if ((pImportInfo && pImportInfo->mtlSharedEvent) || (pExportInfo && pExportInfo->exportObjectType == VK_EXPORT_METAL_OBJECT_TYPE_METAL_SHARED_EVENT_BIT_EXT)) {
setConfigurationResult(reportError(VK_ERROR_INITIALIZATION_FAILED, "vkCreateEvent(): MTLSharedEvent is not available with VkSemaphores that use MTLFence."));
setConfigurationResult(reportError(VK_ERROR_INITIALIZATION_FAILED, "vkCreateEvent(): MTLSharedEvent is not available with VkSemaphores that use implicit synchronization."));
}
}
MVKSemaphoreMTLFence::~MVKSemaphoreMTLFence() {
[_mtlFence release];
}
MVKSemaphoreSingleQueue::~MVKSemaphoreSingleQueue() = default;
#pragma mark -

View File

@ -50,7 +50,7 @@ static void mvkInitConfigFromEnvVars() {
MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.logLevel, MVK_CONFIG_LOG_LEVEL);
MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.traceVulkanCalls, MVK_CONFIG_TRACE_VULKAN_CALLS);
MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.forceLowPowerGPU, MVK_CONFIG_FORCE_LOW_POWER_GPU);
MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.semaphoreUseMTLFence, MVK_ALLOW_METAL_FENCES);
MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.semaphoreUseSingleQueue, MVK_ALLOW_SINGLE_QUEUE_SEMAPHORE);
MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.semaphoreUseMTLEvent, MVK_ALLOW_METAL_EVENTS);
MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.autoGPUCaptureScope, MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE);
MVK_SET_FROM_ENV_OR_BUILD_STRING(evCfg.autoGPUCaptureOutputFilepath, MVK_CONFIG_AUTO_GPU_CAPTURE_OUTPUT_FILE, evGPUCapFileStrObj);

View File

@ -231,16 +231,16 @@ void mvkSetConfig(const MVKConfiguration& mvkConfig);
#endif
/**
* Allow the use of MTLFence or MTLEvent for VkSemaphore synchronization behaviour.
* Allow the use of a single queue or MTLEvent for VkSemaphore synchronization behaviour.
* By default:
* - MVK_ALLOW_METAL_EVENTS is enabled
* - MVK_ALLOW_METAL_FENCES is disabled
* - MVK_ALLOW_SINGLE_QUEUE_SEMAPHORE is enabled
* */
#ifndef MVK_ALLOW_METAL_EVENTS
# define MVK_ALLOW_METAL_EVENTS 1
#endif
#ifndef MVK_ALLOW_METAL_FENCES
# define MVK_ALLOW_METAL_FENCES 0
#ifndef MVK_ALLOW_SINGLE_QUEUE_SEMAPHORE
# define MVK_ALLOW_SINGLE_QUEUE_SEMAPHORE 1
#endif
/** Substitute Metal 2D textures for Vulkan 1D images. Enabled by default. */