Merge remote-tracking branch 'origin/master' into stencil-image-views

2018-09-11 12:14:38 -05:00 · 2018-09-11 12:14:38 -05:00 · ad057e3478
commit ad057e3478
parent 6947fdf57e 3371bd3987
13 changed files with 197 additions and 285 deletions
--- a/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
+++ b/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
@ -60,26 +60,138 @@ extern "C" {
 /**
 * MoltenVK configuration settings.
 *
- * The default value of several of these settings is deterined at build time by the presence
+ * To change the MoltenVK configuration settings, use the vkGetMoltenVKConfigurationMVK() and
- * of a DEBUG build setting, By default the DEBUG build setting is defined when MoltenVK is
+ * vkSetMoltenVKConfigurationMVK() functions to retrieve, modify, and set a copy of this structure.
 * compiled in Debug mode, and not defined when compiled in Release mode. The default values
 * of the other settings is determined by other compile build settings when MoltenVK is compiled.
 * See the description of the individual configuration structure members for more information.
 *
 * To be active, some configuration settings must be set before a VkDevice is created.
 * See the description of the individual configuration structure members for more information.
 *
 * The initial value of several of these settings is deterined when MolttenVK is compiled by the
 * presence of a DEBUG build setting, By default the DEBUG build setting is present when MoltenVK
 * is compiled in Debug mode, and not present when compiled in Release mode. The initial values
 * of the other settings are determined by other build settings when MoltenVK is compiled.
 * See the description of the individual configuration structure members for more information.
 */
 typedef struct {
-    VkBool32 debugMode;                            /**< If enabled, several debugging capabilities will be enabled. Shader code will be logged during Runtime Shader Conversion. Adjusts settings that might trigger Metal validation but are otherwise acceptable to Metal runtime. Improves support for Xcode GPU Frame Capture. Default value is true in the presence of the DEBUG build setting, and false otherwise. */
+
-    VkBool32 shaderConversionFlipVertexY;          /**< If enabled, MSL vertex shader code created during Runtime Shader Conversion will flip the Y-axis of each vertex, as Vulkan coordinate system is inverse of OpenGL. Initial value is set by the MVK_CONFIG_SHADER_CONVERSION_FLIP_VERTEX_Y build setting when MoltenVK is compiled. By default the MVK_CONFIG_SHADER_CONVERSION_FLIP_VERTEX_Y build setting is set to true. */
+	/**
-	VkBool32 synchronousQueueSubmits;              /**< If enabled, queue command submissions (vkQueueSubmit() & vkQueuePresentKHR()) will be processed on the thread that called the submission function. If disabled, processing will be dispatched to a GCD dispatch_queue whose priority is determined by VkDeviceQueueCreateInfo::pQueuePriorities during vkCreateDevice(). Initial value is set by the MVK_CONFIG_SYNCHRONOUS_QUEUE_SUBMITS build setting when MoltenVK is compiled. By default the MVK_CONFIG_SYNCHRONOUS_QUEUE_SUBMITS build setting is set to false, and command processing will be handled on a prioritizable queue thread. Changing the value of this parameter must be done before creating a VkDevice, for the change to take effect. */
+	 * If enabled, debugging capabilities will be enabled, including logging shader code
-    VkBool32 supportLargeQueryPools;               /**< Metal allows only 8192 occlusion queries per MTLBuffer. If enabled, MoltenVK allocates a MTLBuffer for each query pool, allowing each query pool to support 8192 queries, which may slow performance or cause unexpected behaviour if the query pool is not established prior to a Metal renderpass, or if the query pool is changed within a Metal renderpass. If disabled, one MTLBuffer will be shared by all query pools, which improves performance, but limits the total device queries to 8192. Initial value is set by the MVK_CONFIG_SUPPORT_LARGE_QUERY_POOLS build setting when MoltenVK is compiled. By default the MVK_CONFIG_SUPPORT_LARGE_QUERY_POOLS build setting is set to true. */
+	 * during runtime shader conversion.
-	VkBool32 presentWithCommandBuffer;             /**< If enabled, each surface presentation is scheduled using a command buffer. Enabling this setting may improve rendering frame synchronization, but may result in reduced frame rates. Initial value is set by the MVK_CONFIG_PRESENT_WITH_COMMAND_BUFFER build setting when MoltenVK is compiled. By default the MVK_CONFIG_PRESENT_WITH_COMMAND_BUFFER build setting is set to true. */
+	 *
-	VkBool32 swapchainMagFilterUseNearest;         /**< If enabled, swapchain images will use simple Nearest sampling when magnifying the swapchain image to fit a physical display surface. If disabled, swapchain images will use Linear sampling when magnifying the swapchain image to fit a physical display surface. Enabling this setting avoids smearing effects when swapchain images are simple interger multiples of display pixels (eg- macOS Retina, and typical of graphics apps and games), but may cause aliasing effects when using non-integer display scaling. Initial value is set by the MVK_CONFIG_SWAPCHAIN_MAG_FILTER_USE_NEAREST build setting when MoltenVK is compiled. By default the MVK_CONFIG_SWAPCHAIN_MAG_FILTER_USE_NEAREST build setting is set to true. */
+	 * Initial value is true in the presence of the DEBUG build setting, and false otherwise.
-    VkBool32 displayWatermark;                     /**< If enabled, a MoltenVK logo watermark will be rendered on top of the scene. This can be enabled for publicity during demos. Initial value is set by the MVK_CONFIG_DISPLAY_WATERMARK build setting when MoltenVK is compiled. By default the MVK_CONFIG_DISPLAY_WATERMARK build setting is set to false. */
+	 */
-    VkBool32 performanceTracking;                  /**< If enabled, per-frame performance statistics are tracked, optionally logged, and can be retrieved via the vkGetSwapchainPerformanceMVK() function, and various performance statistics are tracked, logged, and can be retrieved via the vkGetPerformanceStatisticsMVK() function. Initial value is true in the presence of the DEBUG build setting, and false otherwise. */
+    VkBool32 debugMode;
-    uint32_t performanceLoggingFrameCount;         /**< If non-zero, performance statistics will be periodically logged to the console, on a repeating cycle of this many frames per swapchain. The performanceTracking capability must also be enabled. Initial value is 300 in the presence of the DEBUG build setting, and zero otherwise. */
+
-	uint64_t metalCompileTimeout;			       /**< The maximum amount of time, in nanoseconds, to wait for a Metal library, function or pipeline state object to be compiled and created. If an internal error occurs with the Metal compiler, it can stall the thread for up to 30 seconds. Setting this value limits the delay to that amount of time. Initial value is set by the MVK_CONFIG_METAL_COMPILE_TIMEOUT build setting when MoltenVK is compiled. By default the MVK_CONFIG_METAL_COMPILE_TIMEOUT build setting is infinite. */
+	/**
 	 * If enabled, MSL vertex shader code created during runtime shader conversion will
 	 * flip the Y-axis of each vertex, as the Vulkan Y-axis is the inverse of OpenGL.
 	 * An alternate way to reverse the Y-axis is to employ a negative Y-axis value on
 	 * the viewport, in which case this parameter can be disabled.
 	 *
 	 * Initial value is set by the MVK_CONFIG_SHADER_CONVERSION_FLIP_VERTEX_Y build setting
 	 * when MoltenVK is compiled. By default the MVK_CONFIG_SHADER_CONVERSION_FLIP_VERTEX_Y
 	 * build setting is set to true.
 	 */
    VkBool32 shaderConversionFlipVertexY;
 	/**
 	 * If enabled, queue command submissions (vkQueueSubmit() & vkQueuePresentKHR())
 	 * will be processed on the thread that called the submission function. If disabled,
 	 * processing will be dispatched to a GCD dispatch_queue whose priority is determined
 	 * by VkDeviceQueueCreateInfo::pQueuePriorities during vkCreateDevice().
 	 *
 	 * Initial value is set by the MVK_CONFIG_SYNCHRONOUS_QUEUE_SUBMITS build setting when
 	 * MoltenVK is compiled. By default the MVK_CONFIG_SYNCHRONOUS_QUEUE_SUBMITS build setting
 	 * is set to false, and command processing will be handled on a prioritizable queue thread.
 	 * Changing the value of this parameter must be done before creating a VkDevice,
 	 * for the change to take effect.
 	 */
 	VkBool32 synchronousQueueSubmits;
 	/**
 	 * The maximum number of command buffers that can be concurrently active per Vulkan command pool.
 	 *
 	 * Initial value is set by the MVK_CONFIG_MAX_ACTIVE_METAL_COMMAND_BUFFERS_PER_POOL build setting
 	 * when MoltenVK is compiled. By default the MVK_CONFIG_MAX_ACTIVE_METAL_COMMAND_BUFFERS_PER_POOL
 	 * build setting is set to 64. Changing the value of this parameter must be done before creating
 	 * a VkDevice, for the change to take effect.
 	 */
 	uint32_t maxActiveMetalCommandBuffersPerPool;
 	/**
 	 * Metal allows only 8192 occlusion queries per MTLBuffer. If enabled, MoltenVK
 	 * allocates a MTLBuffer for each query pool, allowing each query pool to support
 	 * 8192 queries, which may slow performance or cause unexpected behaviour if the query
 	 * pool is not established prior to a Metal renderpass, or if the query pool is changed
 	 * within a renderpass. If disabled, one MTLBuffer will be shared by all query pools,
 	 * which improves performance, but limits the total device queries to 8192.
 	 *
 	 * Initial value is set by the MVK_CONFIG_SUPPORT_LARGE_QUERY_POOLS build setting
 	 * when MoltenVK is compiled. By default the MVK_CONFIG_SUPPORT_LARGE_QUERY_POOLS
 	 * build setting is set to true.
 	 */
 	VkBool32 supportLargeQueryPools;
 	/**
 	 * If enabled, each surface presentation is scheduled using a command buffer. Enabling this
 	 * setting may improve rendering frame synchronization, but may result in reduced frame rates.
 	 *
 	 * Initial value is set by the MVK_CONFIG_PRESENT_WITH_COMMAND_BUFFER build setting when MoltenVK
 	 * is compiled. By default the MVK_CONFIG_PRESENT_WITH_COMMAND_BUFFER build setting is set to true.
 	 */
 	VkBool32 presentWithCommandBuffer;
 	/**
 	 * If enabled, swapchain images will use simple Nearest sampling when magnifying the
 	 * swapchain image to fit a physical display surface. If disabled, swapchain images will
 	 * use Linear sampling when magnifying the swapchain image to fit a physical display surface.
 	 * Enabling this setting avoids smearing effects when swapchain images are simple interger
 	 * multiples of display pixels (eg- macOS Retina, and typical of graphics apps and games),
 	 * but may cause aliasing effects when using non-integer display scaling.
 	 *
 	 * Initial value is set by the MVK_CONFIG_SWAPCHAIN_MAG_FILTER_USE_NEAREST build setting
 	 * when MoltenVK is compiled. By default the MVK_CONFIG_SWAPCHAIN_MAG_FILTER_USE_NEAREST
 	 * build setting is set to true.
 	 */
 	VkBool32 swapchainMagFilterUseNearest;
 	/**
 	 * The maximum amount of time, in nanoseconds, to wait for a Metal library, function, or
 	 * pipeline state object to be compiled and created by the Metal compiler. An internal error
 	 * within the Metal compiler can stall the thread for up to 30 seconds. Setting this value
 	 * limits that delay to a specified amount of time, allowing shader compilations to fail fast.
 	 *
 	 * Initial value is set by the MVK_CONFIG_METAL_COMPILE_TIMEOUT build setting when MoltenVK
 	 * is compiled. By default the MVK_CONFIG_METAL_COMPILE_TIMEOUT build setting is infinite.
 	 */
 	uint64_t metalCompileTimeout;
 	/**
 	 * If enabled, per-frame performance statistics are tracked, optionally logged, and can be
 	 * retrieved via the vkGetSwapchainPerformanceMVK() function, and various performance statistics
 	 * are tracked, logged, and can be retrieved via the vkGetPerformanceStatisticsMVK() function.
 	 *
 	 * Initial value is true in the presence of the DEBUG build setting, and false otherwise.
 	 */
 	VkBool32 performanceTracking;
 	/**
 	 * If non-zero, performance statistics will be periodically logged to the console, on a repeating
 	 * cycle of this many frames per swapchain. The performanceTracking capability must also be enabled.
 	 *
 	 * Initial value is 300 in the presence of the DEBUG build setting, and zero otherwise.
 	 */
 	uint32_t performanceLoggingFrameCount;
 	/**
 	 * If enabled, a MoltenVK logo watermark will be rendered on top of the scene.
 	 * This can be enabled for publicity during demos.
 	 *
 	 * Initial value is set by the MVK_CONFIG_DISPLAY_WATERMARK build setting when MoltenVK
 	 * is compiled. By default the MVK_CONFIG_DISPLAY_WATERMARK build setting is set to false.
 	 */
 	VkBool32 displayWatermark;
 } MVKConfiguration;
 /** Features provided by the current implementation of Metal on the current device. */
--- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
@ -438,64 +438,6 @@ protected:
 };
 #pragma mark -
 #pragma mark MVKMTLCommandBufferCountdown
 /**
 * Abstract class that can be initialized with the number of active MTLCommandBuffers and the 
 * ID of the MTLCommandBuffer after those tracked by this countdown, counts down as each earlier 
 * active MTLCommandBuffer completes, and takes action when the countdown reaches zero.
 *
 * Subclasses must override the finish() member function to perform the action
 * that is to be taken upon completion of the countdown.
 *
 * This class is not thread-safe. When using this class with multiple threads,
 * you must ensure that operations that change the count value are guarded.
 */
 class MVKMTLCommandBufferCountdown : public MVKBaseObject {
 public:
 	/** 
 	 * Sets the number of active MTLCommandBuffers and the ID of the next MTLCommandBuffer
     * after those tracked by this countdown. This countdown is interested in MTLCommandBuffers
     * whose ID's are less than the specified ID.
 	 *
 	 * If the count is zero, the finish() member function is called.
 	 *
 	 * Returns whether the count is zero. If this function returns true, it is possible
 	 * that this intance has completed and has been destroyed. No further references should be
 	 * made to this instance.
 	 */
 	bool setActiveMTLCommandBufferCount(uint32_t count, MVKMTLCommandBufferID mtlCmdBuffID);
 	/** 
 	 * Called when the MTLCommandBuffer with the specified ID has completed. If the specified
 	 * ID is less than the ID registered via the setActiveMTLCommandBufferCount() function, 
     * the count of active MTLCommandBuffers is decremented. If the count is zero, the finish() 
     * member function is called.
 	 *
 	 * Returns whether the count is now at zero. If this function returns true, it is possible
 	 * that this intance has completed and has been destroyed. No further references should be
 	 * made to this instance.
 	 */
 	bool mtlCommandBufferHasCompleted(MVKMTLCommandBufferID mtlCmdBuffID);
 	/** Returns the current count value. */
 	uint32_t getCount();
 protected:
 	/** Performs the action to take when the count has reached zero. */
 	virtual void finish() = 0;
    bool checkFinished();
 	uint32_t _activeMTLCommandBufferCount;
 	MVKMTLCommandBufferID _maxMTLCmdBuffID;
 };
 #pragma mark -
 #pragma mark Support functions
--- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
@ -526,35 +526,6 @@ MVKCommandEncoder::MVKCommandEncoder(MVKCommandBuffer* cmdBuffer,
 }
 #pragma mark -
 #pragma mark MVKMTLCommandBufferCountdown
 bool MVKMTLCommandBufferCountdown::setActiveMTLCommandBufferCount(uint32_t count,
                                                                  MVKMTLCommandBufferID mtlCmdBuffID) {
    _activeMTLCommandBufferCount = count;
    _maxMTLCmdBuffID = mtlCmdBuffID;
    return checkFinished();
 }
 bool MVKMTLCommandBufferCountdown::mtlCommandBufferHasCompleted(MVKMTLCommandBufferID mtlCmdBuffID) {
    if ( (_activeMTLCommandBufferCount > 0) && (mtlCmdBuffID < _maxMTLCmdBuffID) ) {
        _activeMTLCommandBufferCount--;
    }
    return checkFinished();
 }
 // If the count of active MTLCommandBuffers is zero, calls the finish() member function.
 // Returns whether the count is now at zero.
 bool MVKMTLCommandBufferCountdown::checkFinished() {
    bool isDone = (_activeMTLCommandBufferCount == 0);
    if (isDone) { finish();	}
    return isDone;
 }
 uint32_t MVKMTLCommandBufferCountdown::getCount() { return _activeMTLCommandBufferCount; }
 #pragma mark -
 #pragma mark Support functions
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
@ -557,6 +557,7 @@ protected:
 	MVKResource* addResource(MVKResource* rez);
 	MVKResource* removeResource(MVKResource* rez);
    void initPerformanceTracking();
 	void initQueues(const VkDeviceCreateInfo* pCreateInfo);
    const char* getActivityPerformanceDescription(MVKPerformanceTracker& shaderCompilationEvent);
 	uint64_t getPerformanceTimestampImpl();
 	void addActivityPerformanceImpl(MVKPerformanceTracker& shaderCompilationEvent,
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
@ -1056,20 +1056,19 @@ void MVKPhysicalDevice::logGPUInfo() {
 			   [[[NSUUID alloc] initWithUUIDBytes: _properties.pipelineCacheUUID] autorelease].UUIDString.UTF8String);
 }
-/** Initializes the queue families supported by this instance. */
+// Initializes the queue families supported by this instance.
 void MVKPhysicalDevice::initQueueFamilies() {
 	// TODO: determine correct values
 	VkQueueFamilyProperties qfProps;
 	uint32_t qfIdx;
 	qfProps.queueCount = 1;		// In Metal, each family must have a single queue
 	qfIdx = 0;
 	qfProps.queueFlags = (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT);
 	qfProps.queueCount = 8;
 	qfProps.timestampValidBits = 64;
 	qfProps.minImageTransferGranularity = { 1, 1, 1};
-	uint32_t qfCount = 1;
+	_queueFamilies.push_back(new MVKQueueFamily(this, qfIdx, &qfProps));
 	for (uint32_t qfIdx = 0; qfIdx < qfCount; qfIdx++) {
 		_queueFamilies.push_back(new MVKQueueFamily(this, qfIdx, &qfProps));
 	}
 }
 MVKPhysicalDevice::MVKPhysicalDevice(MVKInstance* mvkInstance, id<MTLDevice> mtlDevice) {
@ -1553,18 +1552,7 @@ MVKDevice::MVKDevice(MVKPhysicalDevice* physicalDevice, const VkDeviceCreateInfo
 												  pCreateInfo->ppEnabledExtensionNames,
 												  getInstance()->getDriverLayer()->getSupportedExtensions()));
-	// Create the queues
+	initQueues(pCreateInfo);
 	uint32_t qrCnt = pCreateInfo->queueCreateInfoCount;
 	for (uint32_t qrIdx = 0; qrIdx < qrCnt; qrIdx++) {
 		const VkDeviceQueueCreateInfo* pQFInfo = &pCreateInfo->pQueueCreateInfos[qrIdx];
 		uint32_t qfIdx = pQFInfo->queueFamilyIndex;
 		MVKQueueFamily* qFam = _physicalDevice->_queueFamilies[qfIdx];
 		_queuesByQueueFamilyIndex.resize(qfIdx + 1);	// Ensure an entry for this queue family exists
 		auto& queues = _queuesByQueueFamilyIndex[qfIdx];
 		for (uint32_t qIdx = 0; qIdx < pQFInfo->queueCount; qIdx++) {
 			queues.push_back(new MVKQueue(this, qFam, qIdx, pQFInfo->pQueuePriorities[qIdx]));
 		}
 	}
 	string logMsg = "Created VkDevice to run on GPU %s with the following Vulkan extensions enabled:";
 	logMsg += _enabledExtensions.enabledNamesString("\n\t\t", true);
@ -1592,6 +1580,21 @@ void MVKDevice::initPerformanceTracking() {
 	_performanceStatistics.queue.mtlQueueAccess = initPerf;
 }
 // Create the command queues
 void MVKDevice::initQueues(const VkDeviceCreateInfo* pCreateInfo) {
 	uint32_t qrCnt = pCreateInfo->queueCreateInfoCount;
 	for (uint32_t qrIdx = 0; qrIdx < qrCnt; qrIdx++) {
 		const VkDeviceQueueCreateInfo* pQFInfo = &pCreateInfo->pQueueCreateInfos[qrIdx];
 		uint32_t qfIdx = pQFInfo->queueFamilyIndex;
 		MVKQueueFamily* qFam = _physicalDevice->_queueFamilies[qfIdx];
 		_queuesByQueueFamilyIndex.resize(qfIdx + 1);	// Ensure an entry for this queue family exists
 		auto& queues = _queuesByQueueFamilyIndex[qfIdx];
 		for (uint32_t qIdx = 0; qIdx < pQFInfo->queueCount; qIdx++) {
 			queues.push_back(new MVKQueue(this, qFam, qIdx, pQFInfo->pQueuePriorities[qIdx]));
 		}
 	}
 }
 MVKDevice::~MVKDevice() {
 	for (auto& queues : _queuesByQueueFamilyIndex) {
 		mvkDestroyContainerContents(queues);
--- a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm
@ -345,16 +345,17 @@ void MVKInstance::logVersions() {
 // Init config.
 void MVKInstance::initConfig() {
-	_mvkConfig.debugMode					= MVK_DEBUG;
+	_mvkConfig.debugMode							= MVK_DEBUG;
-	_mvkConfig.shaderConversionFlipVertexY 	= MVK_CONFIG_SHADER_CONVERSION_FLIP_VERTEX_Y;
+	_mvkConfig.shaderConversionFlipVertexY			= MVK_CONFIG_SHADER_CONVERSION_FLIP_VERTEX_Y;
-	_mvkConfig.synchronousQueueSubmits		= MVK_CONFIG_SYNCHRONOUS_QUEUE_SUBMITS;
+	_mvkConfig.synchronousQueueSubmits				= MVK_CONFIG_SYNCHRONOUS_QUEUE_SUBMITS;
-	_mvkConfig.supportLargeQueryPools		= MVK_CONFIG_SUPPORT_LARGE_QUERY_POOLS;
+	_mvkConfig.maxActiveMetalCommandBuffersPerPool	= MVK_CONFIG_MAX_ACTIVE_METAL_COMMAND_BUFFERS_PER_POOL;
-	_mvkConfig.presentWithCommandBuffer		= MVK_CONFIG_PRESENT_WITH_COMMAND_BUFFER;
+	_mvkConfig.supportLargeQueryPools				= MVK_CONFIG_SUPPORT_LARGE_QUERY_POOLS;
-	_mvkConfig.swapchainMagFilterUseNearest	= MVK_CONFIG_SWAPCHAIN_MAG_FILTER_USE_NEAREST;
+	_mvkConfig.presentWithCommandBuffer				= MVK_CONFIG_PRESENT_WITH_COMMAND_BUFFER;
-	_mvkConfig.displayWatermark				= MVK_CONFIG_DISPLAY_WATERMARK;
+	_mvkConfig.swapchainMagFilterUseNearest			= MVK_CONFIG_SWAPCHAIN_MAG_FILTER_USE_NEAREST;
-	_mvkConfig.performanceTracking			= MVK_DEBUG;
+	_mvkConfig.displayWatermark						= MVK_CONFIG_DISPLAY_WATERMARK;
-	_mvkConfig.performanceLoggingFrameCount	= MVK_DEBUG ? 300 : 0;
+	_mvkConfig.performanceTracking					= MVK_DEBUG;
-	_mvkConfig.metalCompileTimeout			= MVK_CONFIG_METAL_COMPILE_TIMEOUT;
+	_mvkConfig.performanceLoggingFrameCount			= MVK_DEBUG ? 300 : 0;
 	_mvkConfig.metalCompileTimeout					= MVK_CONFIG_METAL_COMPILE_TIMEOUT;
 }
 VkResult MVKInstance::verifyLayers(uint32_t count, const char* const* names) {
--- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h
@ -46,7 +46,7 @@ public:
 	inline uint32_t getIndex() { return _queueFamilyIndex; }
 	/** Populates the specified properties structure. */
-	void getProperties(VkQueueFamilyProperties* queueProperties) {
+	inline void getProperties(VkQueueFamilyProperties* queueProperties) {
 		if (queueProperties) { *queueProperties = _properties; }
 	}
@ -87,29 +87,6 @@ public:
 	/** Block the current thread until this queue is idle. */
 	VkResult waitIdle(MVKCommandUse cmdBuffUse);
 	/**
 	 * Retrieves a MTLCommandBuffer instance from the contained MTLCommandQueue, adds a 
 	 * completion handler to it so that the mtlCommandBufferHasCompleted() function will 
 	 * be called when the MTLCommandBuffer completes, and returns the MTLCommandBuffer.
 	 */
 	id<MTLCommandBuffer> makeMTLCommandBuffer(NSString* mtlCmdBuffLabel);
 	/** Called automatically when the specified MTLCommandBuffer with the specified ID has completed. */
 	void mtlCommandBufferHasCompleted(id<MTLCommandBuffer> mtlCmdBuff, MVKMTLCommandBufferID mtlCmdBuffID);
 	/**
 	 * Registers the specified countdown object. This function sets the count value
 	 * of the countdown object to the current number of incomplete MTLCommandBuffers,
 	 * and marks the countdown object with the ID of the most recently registered
 	 * MTLCommandBuffer. The countdown object will be decremented each time any
 	 * MTLCommandBuffer with an ID less than the ID of the most recent MTLCommandBuffer
 	 * at the time the countdown object was registered.
 	 *
 	 * If the current number of incomplete MTLCommandBuffers is zero, the countdown
 	 * object will indicate that it is already completed, and will not be registered.
 	 */
 	void registerMTLCommandBufferCountdown(MVKMTLCommandBufferCountdown* countdown);
    /** Returns the command encoding pool. */
    inline MVKCommandEncodingPool* getCommandEncodingPool() { return &_commandEncodingPool; }
@ -161,9 +138,6 @@ protected:
 	dispatch_queue_t _execQueue;
 	id<MTLCommandQueue> _mtlQueue;
 	std::string _name;
 	std::vector<MVKMTLCommandBufferCountdown*> _completionCountdowns;
 	std::mutex _completionLock;
 	uint32_t _activeMTLCommandBufferCount;
 	MVKMTLCommandBufferID _nextMTLCmdBuffID;
    MVKCommandEncodingPool _commandEncodingPool;
 	MVKGPUCaptureScope* _submissionCaptureScope;
@ -171,26 +145,6 @@ protected:
 };
 #pragma mark -
 #pragma mark MVKQueueCommandBufferSubmissionCountdown
 /** Counts down MTLCommandBuffers on behalf of an MVKQueueCommandBufferSubmission instance. */
 class MVKQueueCommandBufferSubmissionCountdown : public MVKMTLCommandBufferCountdown {
 public:
 	/** Constructs an instance. */
 	MVKQueueCommandBufferSubmissionCountdown(MVKQueueCommandBufferSubmission* qSub);
 protected:
 	/** Performs the action to take when the count has reached zero. */
 	virtual void finish();
 	MVKQueueCommandBufferSubmission* _qSub;
 };
 #pragma mark -
 #pragma mark MVKQueueSubmission
@ -247,7 +201,7 @@ public:
 	id<MTLCommandBuffer> getActiveMTLCommandBuffer();
 	/** Commits and releases the currently active MTLCommandBuffer. */
-	void commitActiveMTLCommandBuffer();
+	void commitActiveMTLCommandBuffer(bool signalCompletion = false);
 	/** 
     * Constructs an instance for the device and queue.
@ -265,9 +219,6 @@ public:
 protected:
 	friend MVKCommandEncoder;
    NSString* getMTLCommandBufferName();
 	MVKQueueCommandBufferSubmissionCountdown _cmdBuffCountdown;
 	std::vector<MVKCommandBuffer*> _cmdBuffers;
 	std::vector<MVKSemaphore*> _signalSemaphores;
 	MVKFence* _fence;
--- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm
@ -16,6 +16,7 @@
 * limitations under the License.
 */
 #include "MVKInstance.h"
 #include "MVKQueue.h"
 #include "MVKSwapchain.h"
 #include "MVKSync.h"
@ -37,7 +38,8 @@ id<MTLCommandQueue> MVKQueueFamily::getMTLCommandQueue(uint32_t queueIndex) {
 	lock_guard<mutex> lock(_qLock);
 	id<MTLCommandQueue> mtlQ = _mtlQueues[queueIndex];
 	if ( !mtlQ ) {
-		mtlQ = [_physicalDevice->getMTLDevice() newCommandQueue];	// retained
+		uint32_t maxCmdBuffs = _physicalDevice->getInstance()->getMoltenVKConfiguration()->maxActiveMetalCommandBuffersPerPool;
 		mtlQ = [_physicalDevice->getMTLDevice() newCommandQueueWithMaxCommandBufferCount: maxCmdBuffs];		// retained
 		_mtlQueues[queueIndex] = mtlQ;
 	}
 	return mtlQ;
@ -124,62 +126,6 @@ VkResult MVKQueue::waitIdle(MVKCommandUse cmdBuffUse) {
 	return mvkWaitForFences(1, &fence, false);
 }
 // This function is guarded against conflict with the mtlCommandBufferHasCompleted()
 // function, but is not threadsafe against calls to this function itself, or to the
 // registerMTLCommandBufferCountdown() function from multiple threads. It is assumed
 // that this function and the registerMTLCommandBufferCountdown() function are called
 // from a single thread.
 id<MTLCommandBuffer> MVKQueue::makeMTLCommandBuffer(NSString* mtlCmdBuffLabel) {
 	// Retrieve a MTLCommandBuffer from the MTLCommandQueue.
 	id<MTLCommandBuffer> mtlCmdBuffer = [_mtlQueue commandBufferWithUnretainedReferences];
    mtlCmdBuffer.label = mtlCmdBuffLabel;
 	// Assign a unique ID to the MTLCommandBuffer, and track when it completes.
    MVKMTLCommandBufferID mtlCmdBuffID = _nextMTLCmdBuffID++;
 	[mtlCmdBuffer addCompletedHandler: ^(id<MTLCommandBuffer> mtlCmdBuff) {
 		this->mtlCommandBufferHasCompleted(mtlCmdBuff, mtlCmdBuffID);
 	}];
    // Keep a running count of the active MTLCommandBuffers.
    // This needs to be guarded against a race condition with a MTLCommandBuffer completing.
    lock_guard<mutex> lock(_completionLock);
 	_activeMTLCommandBufferCount++;
 	return mtlCmdBuffer;
 }
 // This function must be called after all corresponding calls to makeMTLCommandBuffer() and from the same thead.
 void MVKQueue::registerMTLCommandBufferCountdown(MVKMTLCommandBufferCountdown* countdown) {
 	lock_guard<mutex> lock(_completionLock);
 	if ( !countdown->setActiveMTLCommandBufferCount(_activeMTLCommandBufferCount, _nextMTLCmdBuffID) ) {
 		_completionCountdowns.push_back(countdown);
 	}
 }
 void MVKQueue::mtlCommandBufferHasCompleted(id<MTLCommandBuffer> mtlCmdBuff, MVKMTLCommandBufferID mtlCmdBuffID) {
 	lock_guard<mutex> lock(_completionLock);
 	_activeMTLCommandBufferCount--;
 	// Iterate through the countdowns, letting them know about the completion, and
 	// remove any countdowns that have completed by eliding them out of the array.
 	uint32_t ccCnt = (uint32_t)_completionCountdowns.size();
 	uint32_t currCCIdx = 0;
 	for (uint32_t ccIdx = 0; ccIdx < ccCnt; ccIdx++) {
 		MVKMTLCommandBufferCountdown* mvkCD = _completionCountdowns[ccIdx];
 		if ( !mvkCD->mtlCommandBufferHasCompleted(mtlCmdBuffID) ) {
 			// Only retain the countdown if it has not just completed.
 			// Move it forward in the array if any preceding countdowns have been removed.
 			if (currCCIdx != ccIdx) { _completionCountdowns[currCCIdx] = mvkCD; }
 			currCCIdx++;
 		}
 	}
 	// If any countdowns were removed, clear out the extras at the end
 	if (currCCIdx < ccCnt) { _completionCountdowns.resize(currCCIdx); }
 }
 #pragma mark Construction
@ -191,7 +137,6 @@ MVKQueue::MVKQueue(MVKDevice* device, MVKQueueFamily* queueFamily, uint32_t inde
 	_queueFamily = queueFamily;
 	_index = index;
 	_priority = priority;
 	_activeMTLCommandBufferCount = 0;
 	_nextMTLCmdBuffID = 1;
 	initName();
@ -220,7 +165,7 @@ void MVKQueue::initExecQueue() {
 	}
 }
-// Creates and initializes the Metal queue.
+// Retrieves and initializes the Metal command queue.
 void MVKQueue::initMTLCommandQueue() {
 	uint64_t startTime = _device->getPerformanceTimestamp();
 	_mtlQueue = _queueFamily->getMTLCommandQueue(_index);	// not retained (cached in queue family)
@ -236,15 +181,6 @@ void MVKQueue::initGPUCaptureScopes() {
 }
 MVKQueue::~MVKQueue() {
    // Delay destroying this queue until registerMTLCommandBufferCountdown() is done.
    // registerMTLCommandBufferCountdown() can trigger a queue submission to finish(),
    // which may trigger semaphores that control a queue waitIdle(). If that waitIdle()
    // is being called by the app just prior to device and queue destruction, a rare race
    // condition exists if registerMTLCommandBufferCountdown() does not complete before
    // this queue is destroyed. If _completionLock is destroyed along with this queue,
    // before registerMTLCommandBufferCountdown() completes, a SIGABRT crash will arise
    // in the destructor of the lock created in registerMTLCommandBufferCountdown().
    lock_guard<mutex> lock(_completionLock);
 	destroyExecQueue();
 	_submissionCaptureScope->destroy();
 	_presentationCaptureScope->destroy();
@ -259,16 +195,6 @@ void MVKQueue::destroyExecQueue() {
 }
 #pragma mark -
 #pragma mark MVKQueueCommandBufferSubmissionCountdown
 MVKQueueCommandBufferSubmissionCountdown::MVKQueueCommandBufferSubmissionCountdown(MVKQueueCommandBufferSubmission* qSub) {
 	_qSub = qSub;
 }
 void MVKQueueCommandBufferSubmissionCountdown::finish() { _qSub->finish(); }
 #pragma mark -
 #pragma mark MVKQueueSubmission
@ -296,14 +222,11 @@ void MVKQueueSubmission::recordResult(VkResult vkResult) {
 #pragma mark -
 #pragma mark MVKQueueCommandBufferSubmission
 std::atomic<uint32_t> _subCount;
 void MVKQueueCommandBufferSubmission::execute() {
 //	MVKLogDebug("Executing submission %p.", this);
-	auto cs = _queue->_submissionCaptureScope;
+	_queue->_submissionCaptureScope->beginScope();
 	cs->beginScope();
    // Execute each command buffer, or if no command buffers, but a fence or semaphores,
    // create an empty MTLCommandBuffer to trigger the semaphores and fence.
@ -319,23 +242,20 @@ void MVKQueueCommandBufferSubmission::execute() {
 		}
    }
-	commitActiveMTLCommandBuffer();
+	// Nothing after this because callback might destroy this instance before this function ends.
-
+	commitActiveMTLCommandBuffer(true);
 	cs->endScope();
    // Register for callback when MTLCommandBuffers have completed
    _queue->registerMTLCommandBufferCountdown(&_cmdBuffCountdown);
 }
 id<MTLCommandBuffer> MVKQueueCommandBufferSubmission::getActiveMTLCommandBuffer() {
 	if ( !_activeMTLCommandBuffer ) {
-		_activeMTLCommandBuffer = _queue->makeMTLCommandBuffer(getMTLCommandBufferName());
+		_activeMTLCommandBuffer = [_queue->_mtlQueue commandBufferWithUnretainedReferences];
 		_activeMTLCommandBuffer.label = mvkMTLCommandBufferLabel(_cmdBuffUse);
 		[_activeMTLCommandBuffer enqueue];
 	}
 	return _activeMTLCommandBuffer;
 }
-void MVKQueueCommandBufferSubmission::commitActiveMTLCommandBuffer() {
+void MVKQueueCommandBufferSubmission::commitActiveMTLCommandBuffer(bool signalCompletion) {
 	// Wait on each wait semaphore in turn. It doesn't matter which order they are signalled.
 	// We have delayed this as long as possible to allow as much filling of the MTLCommandBuffer
@ -345,24 +265,26 @@ void MVKQueueCommandBufferSubmission::commitActiveMTLCommandBuffer() {
 		for (auto& ws : _waitSemaphores) { ws->wait(); }
 	}
-	[_activeMTLCommandBuffer commit];
+	if (signalCompletion) {
-	_activeMTLCommandBuffer = nil;			// not retained
+		[_activeMTLCommandBuffer addCompletedHandler: ^(id<MTLCommandBuffer> mtlCmdBuff) {
-}
+			this->finish();
 		}];
 	}
-// Returns an NSString suitable for use as a label
+	// Use temp var because callback may destroy this instance before this function ends.
-NSString* MVKQueueCommandBufferSubmission::getMTLCommandBufferName() {
+	id<MTLCommandBuffer> mtlCmdBuff = _activeMTLCommandBuffer;
-    switch (_cmdBuffUse) {
+	_activeMTLCommandBuffer = nil;			// not retained
-        case kMVKCommandUseQueueSubmit:
+	[mtlCmdBuff commit];
            return [NSString stringWithFormat: @"%@ (virtual for sync)", mvkMTLCommandBufferLabel(_cmdBuffUse)];
        default:
            return mvkMTLCommandBufferLabel(_cmdBuffUse);
    }
 }
 void MVKQueueCommandBufferSubmission::finish() {
 //	MVKLogDebug("Finishing submission %p. Submission count %u.", this, _subCount--);
 	// Performed here instead of as part of execute() for rare case where app destroys queue
 	// immediately after a waitIdle() is cleared by fence below, taking the capture scope with it.
 	_queue->_submissionCaptureScope->endScope();
 	// Signal each of the signal semaphores.
    for (auto& ss : _signalSemaphores) { ss->signal(); }
@ -380,7 +302,7 @@ MVKQueueCommandBufferSubmission::MVKQueueCommandBufferSubmission(MVKDevice* devi
        : MVKQueueSubmission(device,
 							 queue,
 							 (pSubmit ? pSubmit->waitSemaphoreCount : 0),
-							 (pSubmit ? pSubmit->pWaitSemaphores : nullptr)), _cmdBuffCountdown(this) {
+							 (pSubmit ? pSubmit->pWaitSemaphores : nullptr)) {
    // pSubmit can be null if just tracking the fence alone
    if (pSubmit) {
@ -403,6 +325,7 @@ MVKQueueCommandBufferSubmission::MVKQueueCommandBufferSubmission(MVKDevice* devi
    _cmdBuffUse= cmdBuffUse;
 	_activeMTLCommandBuffer = nil;
 //	static std::atomic<uint32_t> _subCount;
 //	MVKLogDebug("Creating submission %p. Submission count %u.", this, ++_subCount);
 }
--- a/MoltenVK/MoltenVK/Utility/MVKEnvironment.h
+++ b/MoltenVK/MoltenVK/Utility/MVKEnvironment.h
@ -49,6 +49,14 @@
 #   define MVK_CONFIG_SYNCHRONOUS_QUEUE_SUBMITS    0
 #endif
 /**
 * The number of concurrently active Metal command buffers that can be
 * allocated per Vulkan command pool. Default is Metal's default value of 64.
 */
 #ifndef MVK_CONFIG_MAX_ACTIVE_METAL_COMMAND_BUFFERS_PER_POOL
 #   define MVK_CONFIG_MAX_ACTIVE_METAL_COMMAND_BUFFERS_PER_POOL    64
 #endif
 /** Support more than 8192 occlusion queries per buffer. Enabled by default. */
 #ifndef MVK_CONFIG_SUPPORT_LARGE_QUERY_POOLS
 #   define MVK_CONFIG_SUPPORT_LARGE_QUERY_POOLS    1
--- a/MoltenVKPackaging.xcodeproj/xcshareddata/xcschemes/MoltenVK
+++ b/MoltenVKPackaging.xcodeproj/xcshareddata/xcschemes/MoltenVK
--- a/MoltenVKPackaging.xcodeproj/xcshareddata/xcschemes/MoltenVK
+++ b/MoltenVKPackaging.xcodeproj/xcshareddata/xcschemes/MoltenVK
--- a/MoltenVKPackaging.xcodeproj/xcshareddata/xcschemes/MoltenVK
+++ b/MoltenVKPackaging.xcodeproj/xcshareddata/xcschemes/MoltenVK
--- a/MoltenVKPackaging.xcodeproj/xcshareddata/xcschemes/MoltenVK
+++ b/MoltenVKPackaging.xcodeproj/xcshareddata/xcschemes/MoltenVK