Merge main branch into VK_EXT_layer_settings branch.

2023-12-09 13:31:53 -05:00 · 2023-12-09 13:31:53 -05:00 · ac46188bba
commit ac46188bba
parent 10625a8638 e6a3886313
90 changed files with 4878 additions and 2855 deletions
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@ -10,12 +10,12 @@ on:

 # A workflow run is made up of one or more jobs that can run sequentially or in parallel
 # See the following, which includes links to supported macOS versions, including supported Xcode versions
-# https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners#supported-runners-and-hardware-resources
+# https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners/about-github-hosted-runners#supported-runners-and-hardware-resources
 jobs:
  build:
    strategy:
      matrix:
-        xcode: [ "14.3.1" ]
+        xcode: [ "15.0" ]
        platform: [ "all", "macos", "ios" ]
        os: [ "macos-13" ]
        upload_artifacts: [ true ]
@ -39,6 +39,11 @@ jobs:
      # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
      - uses: actions/checkout@v3

+      # Python 3.12 removed distutils, which is used by glslang::update_glslang_sources.py called from fetchDependencies
+      - uses: actions/setup-python@v4
+        with:
+          python-version: '3.11'
+
      - name: Select Xcode version
        run: sudo xcode-select -switch "${XCODE_DEV_PATH}"

--- a/Common/MVKOSExtensions.h
+++ b/Common/MVKOSExtensions.h
@ -24,6 +24,9 @@
 #include <limits>


+#pragma mark -
+#pragma mark Operating System versions
+
 typedef float MVKOSVersion;

 /*** Constant indicating unsupported functionality in an OS. */
@ -39,44 +42,58 @@ static const MVKOSVersion kMVKOSVersionUnsupported = std::numeric_limits<MVKOSVe
 MVKOSVersion mvkOSVersion();

 /** Returns a MVKOSVersion built from the version components. */
-inline MVKOSVersion mvkMakeOSVersion(uint32_t major, uint32_t minor, uint32_t patch) {
+static inline MVKOSVersion mvkMakeOSVersion(uint32_t major, uint32_t minor, uint32_t patch) {
 	return (float)major + ((float)minor / 100.0f) + ((float)patch / 10000.0f);
 }

 /** Returns whether the operating system version is at least minVer. */
-inline bool mvkOSVersionIsAtLeast(MVKOSVersion minVer) { return mvkOSVersion() >= minVer; }
+static inline bool mvkOSVersionIsAtLeast(MVKOSVersion minVer) { return mvkOSVersion() >= minVer; }

 /**
 * Returns whether the operating system version is at least the appropriate min version.
- * The constant kMVKOSVersionUnsupported can be used for either value to cause the test
- * to always fail on that OS, which is useful for indidicating functionalty guarded by
+ * The constant kMVKOSVersionUnsupported can be used for any of the values to cause the test
+ * to always fail on that OS, which is useful for indicating that functionalty guarded by
 * this test is not supported on that OS.
 */
-inline bool mvkOSVersionIsAtLeast(MVKOSVersion macOSMinVer, MVKOSVersion iOSMinVer, MVKOSVersion visionOSMinVer) {
+static inline bool mvkOSVersionIsAtLeast(MVKOSVersion macOSMinVer,
+										 MVKOSVersion iOSMinVer,
+										 MVKOSVersion visionOSMinVer) {
 #if MVK_MACOS
 	return mvkOSVersionIsAtLeast(macOSMinVer);
 #endif
+#if MVK_IOS_OR_TVOS
+	return mvkOSVersionIsAtLeast(iOSMinVer);
+#endif
 #if MVK_VISIONOS
 	return mvkOSVersionIsAtLeast(visionOSMinVer);
-#elif MVK_IOS_OR_TVOS
-	return mvkOSVersionIsAtLeast(iOSMinVer);
 #endif
 }

+
+#pragma mark -
+#pragma mark Timestamps
+
 /**
- * Returns a monotonic timestamp value for use in Vulkan and performance timestamping.
+ * Returns a monotonic tick value for use in Vulkan and performance timestamping.
 *
- * The returned value corresponds to the number of CPU "ticks" since the app was initialized.
- *
- * Calling this value twice, subtracting the first value from the second, and then multiplying
- * the result by the value returned by mvkGetTimestampPeriod() will provide an indication of the
- * number of nanoseconds between the two calls. The convenience function mvkGetElapsedMilliseconds()
- * can be used to perform this calculation.
+ * The returned value corresponds to the number of CPU ticks since an arbitrary 
+ * point in the past, and does not increment while the system is asleep.
 */
 uint64_t mvkGetTimestamp();

-/** Returns the number of nanoseconds between each increment of the value returned by mvkGetTimestamp(). */
-double mvkGetTimestampPeriod();
+/** 
+ * Returns the number of runtime nanoseconds since an arbitrary point in the past,
+ * excluding any time spent while the system is asleep.
+ *
+ * This value corresponds to the timestamps returned by Metal presentation timings.
+ */
+uint64_t mvkGetRuntimeNanoseconds();
+
+/**
+ * Returns the number of nanoseconds since an arbitrary point in the past,
+ * including any time spent while the system is asleep.
+ */
+uint64_t mvkGetContinuousNanoseconds();

 /**
 * Returns the number of nanoseconds elapsed between startTimestamp and endTimestamp,
@ -94,73 +111,27 @@ uint64_t mvkGetElapsedNanoseconds(uint64_t startTimestamp = 0, uint64_t endTimes
 */
 double mvkGetElapsedMilliseconds(uint64_t startTimestamp = 0, uint64_t endTimestamp = 0);

-/** Returns the current absolute time in nanoseconds. */
-uint64_t mvkGetAbsoluteTime();
-
-/** Ensures the block is executed on the main thread. */
-void mvkDispatchToMainAndWait(dispatch_block_t block);
-

 #pragma mark -
 #pragma mark Process environment

 /**
- * Returns the value of the environment variable at the given name,
- * or an empty string if no environment variable with that name exists.
- *
- * If pWasFound is not null, its value is set to true if the environment
- * variable exists, or false if not.
+ * Sets the value of the environment variable at the given name, into the
+ * std::string, and returns whether the environment variable was found.
 */
-std::string mvkGetEnvVar(std::string varName, bool* pWasFound = nullptr);
+bool mvkGetEnvVar(const char* evName, std::string& evStr);
+
+/**
+ * Returns a pointer to a string containing the value of the environment variable at
+ * the given name, or returns the default value if the environment variable was not set.
+ */
+const char* mvkGetEnvVarString(const char* evName, std::string& evStr, const char* defaultValue = "");

 /**
 * Returns the value of the environment variable at the given name,
- * or zero if no environment variable with that name exists.
- *
- * If pWasFound is not null, its value is set to true if the environment
- * variable exists, or false if not.
+ * or returns the default value if the environment variable was not set.
 */
-int64_t mvkGetEnvVarInt64(std::string varName, bool* pWasFound = nullptr);
-
-/**
- * Returns the value of the environment variable at the given name,
- * or false if no environment variable with that name exists.
- *
- * If pWasFound is not null, its value is set to true if the environment
- * variable exists, or false if not.
- */
-bool mvkGetEnvVarBool(std::string varName, bool* pWasFound = nullptr);
-
-#define MVK_SET_FROM_ENV_OR_BUILD_BOOL(cfgVal, EV)				\
-	do {														\
-		bool wasFound = false;									\
-		bool ev = mvkGetEnvVarBool(#EV, &wasFound);				\
-		cfgVal = wasFound ? ev : EV;							\
-	} while(false)
-
-#define MVK_SET_FROM_ENV_OR_BUILD_INT64(cfgVal, EV)				\
-	do {														\
-		bool wasFound = false;									\
-		int64_t ev = mvkGetEnvVarInt64(#EV, &wasFound);			\
-		cfgVal = wasFound ? ev : EV;							\
-	} while(false)
-
-// Pointer cast permits cfgVal to be an enum var
-#define MVK_SET_FROM_ENV_OR_BUILD_INT32(cfgVal, EV)				\
-	do {														\
-		bool wasFound = false;									\
-		int64_t ev = mvkGetEnvVarInt64(#EV, &wasFound);			\
-		int64_t val = wasFound ? ev : EV;						\
-		*(int32_t*)&cfgVal = (int32_t)std::min(std::max(val, (int64_t)INT32_MIN), (int64_t)INT32_MAX);	\
-	} while(false)
-
-#define MVK_SET_FROM_ENV_OR_BUILD_STRING(cfgVal, EV, strObj)	\
-	do {														\
-		bool wasFound = false;									\
-		std::string ev = mvkGetEnvVar(#EV, &wasFound);			\
-		strObj = wasFound ? std::move(ev) : EV;					\
-		cfgVal = strObj.c_str();								\
-	} while(false)
+double mvkGetEnvVarNumber(const char* evName, double defaultValue = 0.0);


 #pragma mark -
@ -178,8 +149,12 @@ uint64_t mvkGetUsedMemorySize();
 /** Returns the size of a page of host memory on this platform. */
 uint64_t mvkGetHostMemoryPageSize();

+
 #pragma mark -
 #pragma mark Threading

 /** Returns the amount of avaliable CPU cores. */
 uint32_t mvkGetAvaliableCPUCores();
+
+/** Ensures the block is executed on the main thread. */
+void mvkDispatchToMainAndWait(dispatch_block_t block);
--- a/Common/MVKOSExtensions.mm
+++ b/Common/MVKOSExtensions.mm
@ -29,6 +29,10 @@

 using namespace std;

+
+#pragma mark -
+#pragma mark Operating System versions
+
 MVKOSVersion mvkOSVersion() {
 	static MVKOSVersion _mvkOSVersion = 0;
 	if ( !_mvkOSVersion ) {
@ -38,64 +42,57 @@ MVKOSVersion mvkOSVersion() {
 	return _mvkOSVersion;
 }

-static uint64_t _mvkTimestampBase;
-static double _mvkTimestampPeriod;
+
+#pragma mark -
+#pragma mark Timestamps
+
 static mach_timebase_info_data_t _mvkMachTimebase;

-uint64_t mvkGetTimestamp() { return mach_absolute_time() - _mvkTimestampBase; }
+uint64_t mvkGetTimestamp() { return mach_absolute_time(); }

-double mvkGetTimestampPeriod() { return _mvkTimestampPeriod; }
+uint64_t mvkGetRuntimeNanoseconds() { return mach_absolute_time() * _mvkMachTimebase.numer / _mvkMachTimebase.denom; }
+
+uint64_t mvkGetContinuousNanoseconds() { return mach_continuous_time() * _mvkMachTimebase.numer / _mvkMachTimebase.denom; }

 uint64_t mvkGetElapsedNanoseconds(uint64_t startTimestamp, uint64_t endTimestamp) {
 	if (endTimestamp == 0) { endTimestamp = mvkGetTimestamp(); }
-	return (endTimestamp - startTimestamp) * _mvkTimestampPeriod;
+	return (endTimestamp - startTimestamp) * _mvkMachTimebase.numer / _mvkMachTimebase.denom;
 }

 double mvkGetElapsedMilliseconds(uint64_t startTimestamp, uint64_t endTimestamp) {
 	return mvkGetElapsedNanoseconds(startTimestamp, endTimestamp) / 1e6;
 }

-uint64_t mvkGetAbsoluteTime() { return mach_continuous_time() * _mvkMachTimebase.numer / _mvkMachTimebase.denom; }
-
-// Initialize timestamping capabilities on app startup.
-//Called automatically when the framework is loaded and initialized.
+// Initialize timestamp capabilities on app startup.
+// Called automatically when the framework is loaded and initialized.
 static bool _mvkTimestampsInitialized = false;
 __attribute__((constructor)) static void MVKInitTimestamps() {
 	if (_mvkTimestampsInitialized ) { return; }
 	_mvkTimestampsInitialized = true;

-	_mvkTimestampBase = mach_absolute_time();
 	mach_timebase_info(&_mvkMachTimebase);
-	_mvkTimestampPeriod = (double)_mvkMachTimebase.numer / (double)_mvkMachTimebase.denom;
-}
-
-void mvkDispatchToMainAndWait(dispatch_block_t block) {
-	if (NSThread.isMainThread) {
-		block();
-	} else {
-		dispatch_sync(dispatch_get_main_queue(), block);
-	}
 }


 #pragma mark -
 #pragma mark Process environment

-string mvkGetEnvVar(string varName, bool* pWasFound) {
+bool mvkGetEnvVar(const char* varName, string& evStr) {
 	@autoreleasepool {
 		NSDictionary* nsEnv = [[NSProcessInfo processInfo] environment];
-		NSString* envStr = nsEnv[@(varName.c_str())];
-		if (pWasFound) { *pWasFound = envStr != nil; }
-		return envStr ? envStr.UTF8String : "";
+		NSString* nsStr = nsEnv[@(varName)];
+		if (nsStr) { evStr = nsStr.UTF8String; }
+		return nsStr != nil;
 	}
 }

-int64_t mvkGetEnvVarInt64(string varName, bool* pWasFound) {
-	return strtoll(mvkGetEnvVar(varName, pWasFound).c_str(), NULL, 0);
+const char* mvkGetEnvVarString(const char* varName, string& evStr, const char* defaultValue) {
+	return mvkGetEnvVar(varName, evStr) ? evStr.c_str() : defaultValue;
 }

-bool mvkGetEnvVarBool(std::string varName, bool* pWasFound) {
-	return mvkGetEnvVarInt64(varName, pWasFound) != 0;
+double mvkGetEnvVarNumber(const char* varName, double defaultValue) {
+	string evStr;
+	return mvkGetEnvVar(varName, evStr) ? strtod(evStr.c_str(), nullptr) : defaultValue;
 }


@ -144,6 +141,7 @@ uint64_t mvkGetUsedMemorySize() {

 uint64_t mvkGetHostMemoryPageSize() { return sysconf(_SC_PAGESIZE); }

+
 #pragma mark -
 #pragma mark Threading

@ -151,3 +149,11 @@ uint64_t mvkGetHostMemoryPageSize() { return sysconf(_SC_PAGESIZE); }
 uint32_t mvkGetAvaliableCPUCores() {
    return (uint32_t)[[NSProcessInfo processInfo] activeProcessorCount];
 }
+
+void mvkDispatchToMainAndWait(dispatch_block_t block) {
+	if (NSThread.isMainThread) {
+		block();
+	} else {
+		dispatch_sync(dispatch_get_main_queue(), block);
+	}
+}
--- a/Demos/Cube/Cube.xcodeproj/project.pbxproj
+++ b/Demos/Cube/Cube.xcodeproj/project.pbxproj
@ -3,7 +3,7 @@
 	archiveVersion = 1;
 	classes = {
 	};
-	objectVersion = 52;
+	objectVersion = 54;
 	objects = {

 /* Begin PBXBuildFile section */
--- a/Demos/Cube/iOS/DemoViewController.m
+++ b/Demos/Cube/iOS/DemoViewController.m
@ -30,15 +30,9 @@
 	struct demo demo;
 }

-(void) dealloc {
-	demo_cleanup(&demo);
-	[_displayLink release];
-	[super dealloc];
-}
-
-/** Since this is a single-view app, init Vulkan when the view is loaded. */
-(void) viewDidLoad {
-	[super viewDidLoad];
+/** Since this is a single-view app, initialize Vulkan as view is appearing. */
+-(void) viewWillAppear: (BOOL) animated {
+	[super viewWillAppear: animated];

 	self.view.contentScaleFactor = UIScreen.mainScreen.nativeScale;

@ -68,6 +62,13 @@
 	demo_resize(&demo);
 }

+-(void) viewDidDisappear: (BOOL) animated {
+	[_displayLink invalidate];
+	[_displayLink release];
+	demo_cleanup(&demo);
+	[super viewDidDisappear: animated];
+}
+
@end


--- a/Demos/Cube/macOS/DemoViewController.m
+++ b/Demos/Cube/macOS/DemoViewController.m
@ -18,6 +18,7 @@

 #import "DemoViewController.h"
 #import <QuartzCore/CAMetalLayer.h>
+#import <CoreVideo/CVDisplayLink.h>

 #include <MoltenVK/mvk_vulkan.h>
 #include "../../Vulkan-Tools/cube/cube.c"
@ -27,27 +28,34 @@
 #pragma mark DemoViewController

@implementation DemoViewController {
-	CVDisplayLinkRef	_displayLink;
+	CVDisplayLinkRef _displayLink;
 	struct demo demo;
+	uint32_t _maxFrameCount;
+	uint64_t _frameCount;
+	BOOL _stop;
+	BOOL _useDisplayLink;
 }

-(void) dealloc {
-	demo_cleanup(&demo);
-	CVDisplayLinkRelease(_displayLink);
-	[super dealloc];
-}
-
-/** Since this is a single-view app, initialize Vulkan during view loading. */
-(void) viewDidLoad {
-	[super viewDidLoad];
+/** Since this is a single-view app, initialize Vulkan as view is appearing. */
+-(void) viewWillAppear {
+	[super viewWillAppear];

 	self.view.wantsLayer = YES;		// Back the view with a layer created by the makeBackingLayer method.

-	// Enabling this will sync the rendering loop with the natural display link (60 fps).
-	// Disabling this will allow the rendering loop to run flat out, limited only by the rendering speed.
-	bool useDisplayLink = true;
+	// Enabling this will sync the rendering loop with the natural display link
+	// (monitor refresh rate, typically 60 fps). Disabling this will allow the
+	// rendering loop to run flat out, limited only by the rendering speed.
+	_useDisplayLink = YES;

-	VkPresentModeKHR vkPresentMode = useDisplayLink ? VK_PRESENT_MODE_FIFO_KHR : VK_PRESENT_MODE_IMMEDIATE_KHR;
+	// If this value is set to zero, the demo will render frames until the window is closed.
+	// If this value is not zero, it establishes a maximum number of frames that will be
+	// rendered, and once this count has been reached, the demo will stop rendering.
+	// Once rendering is finished, if _useDisplayLink is false, the demo will immediately
+	// clean up the Vulkan objects, or if _useDisplayLink is true, the demo will delay
+	// cleaning up Vulkan objects until the window is closed.
+	_maxFrameCount = 0;
+
+	VkPresentModeKHR vkPresentMode = _useDisplayLink ? VK_PRESENT_MODE_FIFO_KHR : VK_PRESENT_MODE_IMMEDIATE_KHR;
 	char vkPresentModeStr[64];
 	sprintf(vkPresentModeStr, "%d", vkPresentMode);

@ -55,19 +63,33 @@
 	int argc = sizeof(argv)/sizeof(char*);
 	demo_main(&demo, self.view.layer, argc, argv);

-	if (useDisplayLink) {
+	_stop = NO;
+	_frameCount = 0;
+	if (_useDisplayLink) {
 		CVDisplayLinkCreateWithActiveCGDisplays(&_displayLink);
-		CVDisplayLinkSetOutputCallback(_displayLink, &DisplayLinkCallback, &demo);
+		CVDisplayLinkSetOutputCallback(_displayLink, &DisplayLinkCallback, self);
 		CVDisplayLinkStart(_displayLink);
 	} else {
 		dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
-			while(true) {
+			do {
 				demo_draw(&demo);
-			}
+				_stop = _stop || (_maxFrameCount && ++_frameCount >= _maxFrameCount);
+			} while( !_stop );
+			demo_cleanup(&demo);
 		});
 	}
 }

+-(void) viewDidDisappear {
+	_stop = YES;
+	if (_useDisplayLink) {
+		CVDisplayLinkRelease(_displayLink);
+		demo_cleanup(&demo);
+	}
+
+	[super viewDidDisappear];
+}
+

 #pragma mark Display loop callback function

@ -78,7 +100,11 @@ static CVReturn DisplayLinkCallback(CVDisplayLinkRef displayLink,
 									CVOptionFlags flagsIn,
 									CVOptionFlags* flagsOut,
 									void* target) {
-	demo_draw((struct demo*)target);
+	DemoViewController* demoVC =(DemoViewController*)target;
+	if ( !demoVC->_stop ) {
+		demo_draw(&demoVC->demo);
+		demoVC->_stop = (demoVC->_maxFrameCount && ++demoVC->_frameCount >= demoVC->_maxFrameCount);
+	}
 	return kCVReturnSuccess;
 }

--- a/Docs/MoltenVK_Runtime_UserGuide.md
+++ b/Docs/MoltenVK_Runtime_UserGuide.md
@ -312,7 +312,8 @@ In addition to core *Vulkan* functionality, **MoltenVK**  also supports the foll
 - `VK_KHR_16bit_storage`
 - `VK_KHR_8bit_storage`
 - `VK_KHR_bind_memory2`
- `VK_KHR_buffer_device_address` *(requires GPU Tier 2 argument buffers support)*
+- `VK_KHR_buffer_device_address`
+  - *Requires GPU Tier 2 argument buffers support.*
 - `VK_KHR_copy_commands2`
 - `VK_KHR_create_renderpass2`
 - `VK_KHR_dedicated_allocation`
@ -322,7 +323,8 @@ In addition to core *Vulkan* functionality, **MoltenVK**  also supports the foll
 - `VK_KHR_device_group_creation`
 - `VK_KHR_driver_properties`
 - `VK_KHR_dynamic_rendering`
- `VK_KHR_fragment_shader_barycentric` *(requires Metal 2.2 on Mac or Metal 2.3 on iOS)*
+- `VK_KHR_fragment_shader_barycentric`
+  - *Requires Metal 2.2 on Mac or Metal 2.3 on iOS.*
 - `VK_KHR_get_memory_requirements2`
 - `VK_KHR_get_physical_device_properties2`
 - `VK_KHR_get_surface_capabilities2`
@ -337,70 +339,103 @@ In addition to core *Vulkan* functionality, **MoltenVK**  also supports the foll
 - `VK_KHR_portability_subset`
 - `VK_KHR_push_descriptor`
 - `VK_KHR_relaxed_block_layout`
- `VK_KHR_sampler_mirror_clamp_to_edge` *(requires a Mac GPU or Apple family 7 GPU)*
+- `VK_KHR_sampler_mirror_clamp_to_edge`
+  - *Requires a Mac GPU or Apple family 7 GPU.*
 - `VK_KHR_sampler_ycbcr_conversion`
 - `VK_KHR_separate_depth_stencil_layouts`
 - `VK_KHR_shader_draw_parameters`
 - `VK_KHR_shader_float_controls`
 - `VK_KHR_shader_float16_int8`
 - `VK_KHR_shader_non_semantic_info`
- `VK_KHR_shader_subgroup_extended_types` *(requires Metal 2.1 on Mac or Metal 2.2 and Apple family 4 on iOS)*
+- `VK_KHR_shader_subgroup_extended_types`
+  - *Requires Metal 2.1 on Mac or Metal 2.2 and Apple family 4 on iOS.*
 - `VK_KHR_spirv_1_4`
 - `VK_KHR_storage_buffer_storage_class`
 - `VK_KHR_surface`
 - `VK_KHR_swapchain`
 - `VK_KHR_swapchain_mutable_format`
+- `VK_KHR_synchronization2`
 - `VK_KHR_timeline_semaphore`
 - `VK_KHR_uniform_buffer_standard_layout`
 - `VK_KHR_variable_pointers`
- `VK_EXT_4444_formats` *(requires 16-bit formats and either native texture swizzling or manual swizzling to be enabled)*
- `VK_EXT_buffer_device_address` *(requires GPU Tier 2 argument buffers support)*
- `VK_EXT_calibrated_timestamps` *(requires Metal 2.2)*
+- `VK_EXT_4444_formats`
+  - *Requires 16-bit formats and either native texture swizzling or manual swizzling to be enabled.*
+- `VK_EXT_buffer_device_address`
+  - *Requires GPU Tier 2 argument buffers support.*
+- `VK_EXT_calibrated_timestamps`
+  - *Requires Metal 2.2.*
 - `VK_EXT_debug_marker`
 - `VK_EXT_debug_report`
 - `VK_EXT_debug_utils`
- `VK_EXT_descriptor_indexing` *(initial release limited to Metal Tier 1: 96/128 textures, 
-  16 samplers, except macOS 11.0 (Big Sur) or later, or on older versions of macOS using 
-  an Intel GPU, and if Metal argument buffers enabled in config)*
+- `VK_EXT_descriptor_indexing` 
+  - *Initial release limited to Metal Tier 1: 96/128 textures, 
+    16 samplers, except macOS 11.0 (Big Sur) or later, or on older versions of macOS using 
+    an Intel GPU, and if Metal argument buffers enabled in config.*
+- `VK_EXT_extended_dynamic_state`
+  - *Requires Metal 3.1 for `VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE`.*
+- `VK_EXT_extended_dynamic_state2`
+  - *Primitive restart is always enabled, as Metal does not support disabling it (`VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE_EXT`).*
+- `VK_EXT_extended_dynamic_state3`
+  - *Metal does not support `VK_POLYGON_MODE_POINT`*
 - `VK_EXT_external_memory_host`
- `VK_EXT_fragment_shader_interlock` *(requires Metal 2.0 and Raster Order Groups)*
+- `VK_EXT_fragment_shader_interlock`
+  - *Requires Metal 2.0 and Raster Order Groups.*
+- `VK_EXT_hdr_metadata`
+  - *macOS only.*
+- `VK_EXT_headless_surface`
 - `VK_EXT_host_query_reset`
 - `VK_EXT_image_robustness`
 - `VK_EXT_inline_uniform_block`
 - `VK_EXT_layer_settings`
- `VK_EXT_memory_budget` *(requires Metal 2.0)*
+- `VK_EXT_memory_budget`
+  - *Requires Metal 2.0.*
 - `VK_EXT_metal_objects`
 - `VK_EXT_metal_surface`
 - `VK_EXT_pipeline_creation_cache_control`
 - `VK_EXT_pipeline_creation_feedback`
- `VK_EXT_post_depth_coverage` *(iOS and macOS, requires family 4 (A11) or better Apple GPU)*
+- `VK_EXT_post_depth_coverage` 
+  - *iOS and macOS, requires family 4 (A11) or better Apple GPU.*
 - `VK_EXT_private_data `
 - `VK_EXT_robustness2`
 - `VK_EXT_sample_locations`
 - `VK_EXT_scalar_block_layout`
 - `VK_EXT_separate_stencil_usage`
- `VK_EXT_shader_atomic_float` *(requires Metal 3.0)*
- `VK_EXT_shader_demote_to_helper_invocation` *(requires Metal Shading Language 2.3)*
- `VK_EXT_shader_stencil_export` *(requires Mac GPU family 2 or iOS GPU family 5)*
- `VK_EXT_shader_subgroup_ballot` *(requires Mac GPU family 2 or Apple GPU family 4)*
- `VK_EXT_shader_subgroup_vote` *(requires Mac GPU family 2 or Apple GPU family 4)*
+- `VK_EXT_shader_atomic_float` 
+  - *Requires Metal 3.0.*
+- `VK_EXT_shader_demote_to_helper_invocation` 
+  - *Requires Metal Shading Language 2.3.*
+- `VK_EXT_shader_stencil_export`
+  - *Requires Mac GPU family 2 or iOS GPU family 5.*
+- `VK_EXT_shader_subgroup_ballot` 
+  - *Requires Mac GPU family 2 or Apple GPU family 4.*
+- `VK_EXT_shader_subgroup_vote`
+  - *Requires Mac GPU family 2 or Apple GPU family 4.*
 - `VK_EXT_shader_viewport_index_layer`
- `VK_EXT_subgroup_size_control` *(requires Metal 2.1 on Mac or Metal 2.2 and Apple family 4 on iOS)*
+- `VK_EXT_subgroup_size_control`
+  - *Requires Metal 2.1 on Mac or Metal 2.2 and Apple family 4 on iOS.*
 - `VK_EXT_surface_maintenance1`
 - `VK_EXT_swapchain_colorspace`
 - `VK_EXT_swapchain_maintenance1`
 - `VK_EXT_vertex_attribute_divisor`
- `VK_EXT_texel_buffer_alignment` *(requires Metal 2.0)*
- `VK_EXT_texture_compression_astc_hdr` *(iOS and macOS, requires family 6 (A13) or better Apple GPU)*
- `VK_MVK_ios_surface` *(iOS) (Obsolete. Use `VK_EXT_metal_surface` instead.)*
- `VK_MVK_macos_surface` *(macOS) (Obsolete. Use `VK_EXT_metal_surface` instead.)*
+- `VK_EXT_texel_buffer_alignment`
+  - *Requires Metal 2.0.*
+- `VK_EXT_texture_compression_astc_hdr`
+  - *iOS and macOS, requires family 6 (A13) or better Apple GPU.*
+- `VK_MVK_ios_surface` 
+  - *Obsolete. Use `VK_EXT_metal_surface` instead.*
+- `VK_MVK_macos_surface` 
+  - *Obsolete. Use `VK_EXT_metal_surface` instead.*
 - `VK_AMD_gpu_shader_half_float`
 - `VK_AMD_negative_viewport_height`
- `VK_AMD_shader_image_load_store_lod` *(requires Apple GPU)*
- `VK_AMD_shader_trinary_minmax` *(requires Metal 2.1)*
- `VK_IMG_format_pvrtc` *(requires Apple GPU)*
+- `VK_AMD_shader_image_load_store_lod`
+  - *Requires Apple GPU.*
+- `VK_AMD_shader_trinary_minmax`
+  - *Requires Metal 2.1.*
+- `VK_IMG_format_pvrtc`
+  - *Requires Apple GPU.*
 - `VK_INTEL_shader_integer_functions2`
- `VK_NV_fragment_shader_barycentric` *(requires Metal 2.2 on Mac or Metal 2.3 on iOS)*
+- `VK_NV_fragment_shader_barycentric`
+  - *Requires Metal 2.2 on Mac or Metal 2.3 on iOS.*
 - `VK_NV_glsl_shader`

 In order to visibly display your content on *macOS*, *iOS*, or *tvOS*, you must enable the
@ -517,11 +552,19 @@ you can address the issue as follows:

 - Errors encountered during **Runtime Shader Conversion** are logged to the console.

+<<<<<<< HEAD
 - To help understand conversion issues during **Runtime Shader Conversion**, you can 
  enable the logging of the *SPIR-V* and *MSL* shader source code during shader conversion, 
  by turning on the `MVKConfiguration::debugMode` configuration parameter, or setting the 
  value of the `MVK_CONFIG_DEBUG` runtime environment variable to `1`. See the 
  [*MoltenVK Configuration*](#moltenvk_config) description above.
+=======
+- To help understand conversion issues during **Runtime Shader Conversion**, you can enable the 
+  logging of the *SPIR-V* and *MSL* shader source code during shader conversion, by turning on the 
+  `MVKConfiguration::debugMode` configuration parameter, or setting the value of the `MVK_CONFIG_DEBUG` 
+  runtime environment variable to `1`. See the [*MoltenVK Configuration*](#moltenvk_config) 
+  description above.
+>>>>>>> e6a3886313a270e93a327cdb822f856fb75393b2

  Enabling debug mode in **MoltenVK** includes shader conversion logging, which causes both 
  the incoming *SPIR-V* code and the converted *MSL* source code to be logged to the console 
@ -588,9 +631,9 @@ vailable when you request it, resulting in frame delays and visual stuttering.
 <a name="timestamping"></a>
 ### Timestamping

-On non-Apple Silicon devices (older Mac devices), the GPU can switch power and performance 
-states as required by usage. This affects the GPU timestamps retrievable through the Vulkan 
-API. As a result, the value of `VkPhysicalDeviceLimits::timestampPeriod` can vary over time. 
+On non-Apple GPUs (older Mac devices), the GPU can switch power and performance states as 
+required by usage. This affects the GPU timestamps retrievable through the Vulkan API. 
+As a result, the value of `VkPhysicalDeviceLimits::timestampPeriod` can vary over time. 
 Consider calling `vkGetPhysicalDeviceProperties()`, when needed, and retrieve the current
 value of `VkPhysicalDeviceLimits::timestampPeriod`, to help you calibrate recent GPU 
 timestamps queried through the Vulkan API.
@ -622,6 +665,8 @@ Known **MoltenVK** Limitations
 ------------------------------

 This section documents the known limitations in this version of **MoltenVK**.
+  
+- See [above](#interaction) for known limitations for specific Vulkan extensions.

 - On *macOS* versions prior to *macOS 10.15.6*, native host-coherent image device memory is not available.
  Because of this, changes made to `VkImage VK_MEMORY_PROPERTY_HOST_COHERENT_BIT` device memory by the CPU 
@ -645,4 +690,3 @@ This section documents the known limitations in this version of **MoltenVK**.
  use the *Vulkan Loader and Layers* from the *[Vulkan SDK](https://vulkan.lunarg.com/sdk/home)*. 
  Refer to the *Vulkan SDK [Getting Started](https://vulkan.lunarg.com/doc/sdk/latest/mac/getting_started.html)* 
  document for more info.
-
--- a/Docs/Whats_New.md
+++ b/Docs/Whats_New.md
@ -13,15 +13,57 @@ Copyright (c) 2015-2023 [The Brenwill Workshop Ltd.](http://www.brenwill.com)



-MoltenVK 1.2.6
+MoltenVK 1.2.7
 --------------

 Released TBD

 - Add support for extensions:
+	- `VK_EXT_extended_dynamic_state3` *(Metal does not support `VK_POLYGON_MODE_POINT`)*
+	- `VK_EXT_headless_surface`
 	- `VK_EXT_layer_settings`
- Fix rare case where vertex attribute buffers are not bound to Metal 
-  when no other bindings change between pipelines.
+- Fix regression that broke `VK_POLYGON_MODE_LINE`.
+- Fix regression in marking rendering state dirty after `vkCmdClearAttachments()`.
+- Reduce disk space consumed after running `fetchDependencies` script by removing intermediate file caches.
+- Fix rare deadlock during launch via `dlopen()`.
+- Fix initial value of `VkPhysicalDeviceLimits::timestampPeriod` on non-Apple Silicon GPUs.
+- Fix swapchain and surface bugs when windowing system is accessed from off the main thread.
+- Update to latest SPIRV-Cross:
+  - MSL: Fix regression error in argument buffer runtime arrays.
+  - MSL: Work around broken cube texture gradients on Apple Silicon.
+
+
+
+MoltenVK 1.2.6
+--------------
+
+Released 2023/10/17
+
+- Add support for extensions:
+	- `VK_KHR_synchronization2`
+	- `VK_EXT_extended_dynamic_state` *(requires Metal 3.1 for `VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE`)*
+	- `VK_EXT_extended_dynamic_state2`
+- Fix rare case where vertex attribute buffers are not bound to Metal when no other bindings change between pipelines.
+- Ensure objects retained for life of `MTLCommandBuffer` during `vkCmdBlitImage()` & `vkQueuePresentKHR()`.
+- Fix case where a `CAMetalDrawable` with invalid pixel format causes onscreen flickering.
+- Fix deadlock when reporting debug message on `MVKInstance` destruction.
+- Fix MSL code used in `vkCmdBlitImage()` on depth-stencil formats.
+- Improve behavior of swapchain image presentation stalls caused by Metal regression.
+- `VkPhysicalDeviceLimits::timestampPeriod` set to 1.0 on Apple GPUs, and calculated dynamically on non-Apple GPUs.
+- Add `MVKConfiguration::timestampPeriodLowPassAlpha` and environment variable 
+  `MVK_CONFIG_TIMESTAMP_PERIOD_LOWPASS_ALPHA`, to add a configurable lowpass filter 
+  for varying `VkPhysicalDeviceLimits::timestampPeriod` on non-Apple GPUs.
+- Add several additional performance trackers, available via logging, or the `mvk_private_api.h` API.
+- Deprecate `MVK_DEBUG` env var, and add `MVK_CONFIG_DEBUG` env var to replace it. 
+- Update `MVK_CONFIGURATION_API_VERSION` and `MVK_PRIVATE_API_VERSION` to `38`.
+- Update dependency libraries to match _Vulkan SDK 1.3.268_.
+- Update to latest SPIRV-Cross:
+  - MSL: Workaround Metal 3.1 regression bug on recursive input structs.
+  - MSL: fix extraction of global variables, in case of atomics.
+  - MSL: Workaround bizarre crash on macOS.
+  - MSL: runtime array over argument buffers.
+  - MSL: Make rw texture fences optional.
+  - MSL: Prevent RAW hazards on read_write textures.



@ -31,6 +73,7 @@ MoltenVK 1.2.5
 Released 2023/08/15

 - Add support for extensions:
+	- `VK_KHR_deferred_host_operations`
 	- `VK_KHR_incremental_present`
 	- `VK_KHR_shader_non_semantic_info`
 	- `VK_EXT_4444_formats`
--- a/ExternalRevisions/SPIRV-Cross_repo_revision
+++ b/ExternalRevisions/SPIRV-Cross_repo_revision
@ -1 +1 @@
-bccaa94db814af33d8ef05c153e7c34d8bd4d685
+50e90dd74e0e43e243f12a70f0326d2cf8ed3945
--- a/ExternalRevisions/Vulkan-Headers_repo_revision
+++ b/ExternalRevisions/Vulkan-Headers_repo_revision
@ -1 +1 @@
-85c2334e92e215cce34e8e0ed8b2dce4700f4a50
+19a863ccce773ff393b186329478b1eb1a519fd3
--- a/ExternalRevisions/Vulkan-Tools_repo_revision
+++ b/ExternalRevisions/Vulkan-Tools_repo_revision
@ -1 +1 @@
-300d9bf6b3cf7b237ee5e2c1d0ae10b9236f82d3
+1532001f7edae559af1988293eec90bc5e2607d5
--- a/ExternalRevisions/glslang_repo_revision
+++ b/ExternalRevisions/glslang_repo_revision
@ -1 +1 @@
-76b52ebf77833908dc4c0dd6c70a9c357ac720bd
+be564292f00c5bf0d7251c11f1c9618eb1117762
--- a/MoltenVK/MoltenVK.xcodeproj/project.pbxproj
+++ b/MoltenVK/MoltenVK.xcodeproj/project.pbxproj
@ -46,7 +46,7 @@
 		2FEA0A6724902F9F00EEF3AD /* MVKCommonEnvironment.h in Headers */ = {isa = PBXBuildFile; fileRef = A9F0429D1FB4CF82009FCCB8 /* MVKCommonEnvironment.h */; };
 		2FEA0A6824902F9F00EEF3AD /* MVKWatermark.h in Headers */ = {isa = PBXBuildFile; fileRef = A98149491FB6A3F7005F00B4 /* MVKWatermark.h */; };
 		2FEA0A6924902F9F00EEF3AD /* MVKOSExtensions.h in Headers */ = {isa = PBXBuildFile; fileRef = A9B51BD6225E986A00AC74D2 /* MVKOSExtensions.h */; };
-		2FEA0A6A24902F9F00EEF3AD /* MVKCmdRenderPass.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB7721C7DFB4800632CA3 /* MVKCmdRenderPass.h */; };
+		2FEA0A6A24902F9F00EEF3AD /* MVKCmdRendering.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB7721C7DFB4800632CA3 /* MVKCmdRendering.h */; };
 		2FEA0A6B24902F9F00EEF3AD /* MVKCmdPipeline.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB76E1C7DFB4800632CA3 /* MVKCmdPipeline.h */; };
 		2FEA0A6C24902F9F00EEF3AD /* MVKSmallVectorAllocator.h in Headers */ = {isa = PBXBuildFile; fileRef = A9F3D9D924732A4C00745190 /* MVKSmallVectorAllocator.h */; };
 		2FEA0A6D24902F9F00EEF3AD /* MVKPipeline.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB78D1C7DFB4800632CA3 /* MVKPipeline.h */; };
@ -90,7 +90,7 @@
 		2FEA0A9424902F9F00EEF3AD /* MVKCommandPool.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB77B1C7DFB4800632CA3 /* MVKCommandPool.mm */; };
 		2FEA0A9524902F9F00EEF3AD /* MVKCmdDraw.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7751C7DFB4800632CA3 /* MVKCmdDraw.mm */; };
 		2FEA0A9624902F9F00EEF3AD /* MVKCommandBuffer.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7791C7DFB4800632CA3 /* MVKCommandBuffer.mm */; };
-		2FEA0A9724902F9F00EEF3AD /* MVKCmdRenderPass.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7731C7DFB4800632CA3 /* MVKCmdRenderPass.mm */; };
+		2FEA0A9724902F9F00EEF3AD /* MVKCmdRendering.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7731C7DFB4800632CA3 /* MVKCmdRendering.mm */; };
 		2FEA0A9824902F9F00EEF3AD /* MVKBuffer.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7801C7DFB4800632CA3 /* MVKBuffer.mm */; };
 		2FEA0A9924902F9F00EEF3AD /* mvk_datatypes.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7A91C7DFB4800632CA3 /* mvk_datatypes.mm */; };
 		2FEA0A9A24902F9F00EEF3AD /* MVKExtensions.mm in Sources */ = {isa = PBXBuildFile; fileRef = A909F65E213B190700FCD6BE /* MVKExtensions.mm */; };
@ -117,7 +117,7 @@
 		2FEA0AAF24902F9F00EEF3AD /* MVKLayers.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7A11C7DFB4800632CA3 /* MVKLayers.mm */; };
 		2FEA0AB024902F9F00EEF3AD /* MVKFramebuffer.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7881C7DFB4800632CA3 /* MVKFramebuffer.mm */; };
 		2FEA0AB124902F9F00EEF3AD /* MVKMTLBufferAllocation.mm in Sources */ = {isa = PBXBuildFile; fileRef = A9C96DCF1DDC20C20053187F /* MVKMTLBufferAllocation.mm */; };
-		2FEA0AB224902F9F00EEF3AD /* CAMetalLayer+MoltenVK.m in Sources */ = {isa = PBXBuildFile; fileRef = A9E53DD62100B197002781DD /* CAMetalLayer+MoltenVK.m */; };
+		2FEA0AB224902F9F00EEF3AD /* CAMetalLayer+MoltenVK.mm in Sources */ = {isa = PBXBuildFile; fileRef = A9E53DD62100B197002781DD /* CAMetalLayer+MoltenVK.mm */; };
 		2FEA0AB324902F9F00EEF3AD /* MVKCmdDispatch.mm in Sources */ = {isa = PBXBuildFile; fileRef = A9096E5D1F81E16300DFBEA6 /* MVKCmdDispatch.mm */; };
 		2FEA0AB424902F9F00EEF3AD /* MVKCmdDebug.mm in Sources */ = {isa = PBXBuildFile; fileRef = A99C90ED229455B300A061DA /* MVKCmdDebug.mm */; };
 		45003E73214AD4E500E989CB /* MVKExtensions.def in Headers */ = {isa = PBXBuildFile; fileRef = 45003E6F214AD4C900E989CB /* MVKExtensions.def */; };
@ -172,10 +172,10 @@
 		A94FB7C11C7DFB4800632CA3 /* MVKCmdQueries.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB7701C7DFB4800632CA3 /* MVKCmdQueries.h */; };
 		A94FB7C21C7DFB4800632CA3 /* MVKCmdQueries.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7711C7DFB4800632CA3 /* MVKCmdQueries.mm */; };
 		A94FB7C31C7DFB4800632CA3 /* MVKCmdQueries.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7711C7DFB4800632CA3 /* MVKCmdQueries.mm */; };
-		A94FB7C41C7DFB4800632CA3 /* MVKCmdRenderPass.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB7721C7DFB4800632CA3 /* MVKCmdRenderPass.h */; };
-		A94FB7C51C7DFB4800632CA3 /* MVKCmdRenderPass.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB7721C7DFB4800632CA3 /* MVKCmdRenderPass.h */; };
-		A94FB7C61C7DFB4800632CA3 /* MVKCmdRenderPass.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7731C7DFB4800632CA3 /* MVKCmdRenderPass.mm */; };
-		A94FB7C71C7DFB4800632CA3 /* MVKCmdRenderPass.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7731C7DFB4800632CA3 /* MVKCmdRenderPass.mm */; };
+		A94FB7C41C7DFB4800632CA3 /* MVKCmdRendering.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB7721C7DFB4800632CA3 /* MVKCmdRendering.h */; };
+		A94FB7C51C7DFB4800632CA3 /* MVKCmdRendering.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB7721C7DFB4800632CA3 /* MVKCmdRendering.h */; };
+		A94FB7C61C7DFB4800632CA3 /* MVKCmdRendering.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7731C7DFB4800632CA3 /* MVKCmdRendering.mm */; };
+		A94FB7C71C7DFB4800632CA3 /* MVKCmdRendering.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7731C7DFB4800632CA3 /* MVKCmdRendering.mm */; };
 		A94FB7C81C7DFB4800632CA3 /* MVKCmdDraw.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB7741C7DFB4800632CA3 /* MVKCmdDraw.h */; };
 		A94FB7C91C7DFB4800632CA3 /* MVKCmdDraw.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB7741C7DFB4800632CA3 /* MVKCmdDraw.h */; };
 		A94FB7CA1C7DFB4800632CA3 /* MVKCmdDraw.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7751C7DFB4800632CA3 /* MVKCmdDraw.mm */; };
@ -331,6 +331,10 @@
 		A9B51BD8225E986A00AC74D2 /* MVKOSExtensions.mm in Sources */ = {isa = PBXBuildFile; fileRef = A9B51BD2225E986A00AC74D2 /* MVKOSExtensions.mm */; };
 		A9B51BD9225E986A00AC74D2 /* MVKOSExtensions.h in Headers */ = {isa = PBXBuildFile; fileRef = A9B51BD6225E986A00AC74D2 /* MVKOSExtensions.h */; };
 		A9B51BDA225E986A00AC74D2 /* MVKOSExtensions.h in Headers */ = {isa = PBXBuildFile; fileRef = A9B51BD6225E986A00AC74D2 /* MVKOSExtensions.h */; };
+		A9C327562AAFBD390025EE79 /* MVKConfigMembers.def in Headers */ = {isa = PBXBuildFile; fileRef = A9C327542AAF8A770025EE79 /* MVKConfigMembers.def */; };
+		A9C327572AAFBD3A0025EE79 /* MVKConfigMembers.def in Headers */ = {isa = PBXBuildFile; fileRef = A9C327542AAF8A770025EE79 /* MVKConfigMembers.def */; };
+		A9C327582AAFBD3A0025EE79 /* MVKConfigMembers.def in Headers */ = {isa = PBXBuildFile; fileRef = A9C327542AAF8A770025EE79 /* MVKConfigMembers.def */; };
+		A9C327592AAFBD3B0025EE79 /* MVKConfigMembers.def in Headers */ = {isa = PBXBuildFile; fileRef = A9C327542AAF8A770025EE79 /* MVKConfigMembers.def */; };
 		A9C96DD01DDC20C20053187F /* MVKMTLBufferAllocation.h in Headers */ = {isa = PBXBuildFile; fileRef = A9C96DCE1DDC20C20053187F /* MVKMTLBufferAllocation.h */; };
 		A9C96DD11DDC20C20053187F /* MVKMTLBufferAllocation.h in Headers */ = {isa = PBXBuildFile; fileRef = A9C96DCE1DDC20C20053187F /* MVKMTLBufferAllocation.h */; };
 		A9C96DD21DDC20C20053187F /* MVKMTLBufferAllocation.mm in Sources */ = {isa = PBXBuildFile; fileRef = A9C96DCF1DDC20C20053187F /* MVKMTLBufferAllocation.mm */; };
@ -356,8 +360,8 @@
 		A9E53DE62100B197002781DD /* NSString+MoltenVK.mm in Sources */ = {isa = PBXBuildFile; fileRef = A9E53DD42100B197002781DD /* NSString+MoltenVK.mm */; };
 		A9E53DE72100B197002781DD /* MTLTextureDescriptor+MoltenVK.m in Sources */ = {isa = PBXBuildFile; fileRef = A9E53DD52100B197002781DD /* MTLTextureDescriptor+MoltenVK.m */; };
 		A9E53DE82100B197002781DD /* MTLTextureDescriptor+MoltenVK.m in Sources */ = {isa = PBXBuildFile; fileRef = A9E53DD52100B197002781DD /* MTLTextureDescriptor+MoltenVK.m */; };
-		A9E53DE92100B197002781DD /* CAMetalLayer+MoltenVK.m in Sources */ = {isa = PBXBuildFile; fileRef = A9E53DD62100B197002781DD /* CAMetalLayer+MoltenVK.m */; };
-		A9E53DEA2100B197002781DD /* CAMetalLayer+MoltenVK.m in Sources */ = {isa = PBXBuildFile; fileRef = A9E53DD62100B197002781DD /* CAMetalLayer+MoltenVK.m */; };
+		A9E53DE92100B197002781DD /* CAMetalLayer+MoltenVK.mm in Sources */ = {isa = PBXBuildFile; fileRef = A9E53DD62100B197002781DD /* CAMetalLayer+MoltenVK.mm */; };
+		A9E53DEA2100B197002781DD /* CAMetalLayer+MoltenVK.mm in Sources */ = {isa = PBXBuildFile; fileRef = A9E53DD62100B197002781DD /* CAMetalLayer+MoltenVK.mm */; };
 		A9E53DF32100B302002781DD /* MTLRenderPassDescriptor+MoltenVK.h in Headers */ = {isa = PBXBuildFile; fileRef = A9E53DEE2100B302002781DD /* MTLRenderPassDescriptor+MoltenVK.h */; };
 		A9E53DF42100B302002781DD /* MTLRenderPassDescriptor+MoltenVK.h in Headers */ = {isa = PBXBuildFile; fileRef = A9E53DEE2100B302002781DD /* MTLRenderPassDescriptor+MoltenVK.h */; };
 		A9E53DF52100B302002781DD /* MTLRenderPassDescriptor+MoltenVK.m in Sources */ = {isa = PBXBuildFile; fileRef = A9E53DF22100B302002781DD /* MTLRenderPassDescriptor+MoltenVK.m */; };
@ -416,7 +420,7 @@
 		DCFD7F0B2A45BC6E007BBBF7 /* MVKCommonEnvironment.h in Headers */ = {isa = PBXBuildFile; fileRef = A9F0429D1FB4CF82009FCCB8 /* MVKCommonEnvironment.h */; };
 		DCFD7F0C2A45BC6E007BBBF7 /* MVKWatermark.h in Headers */ = {isa = PBXBuildFile; fileRef = A98149491FB6A3F7005F00B4 /* MVKWatermark.h */; };
 		DCFD7F0D2A45BC6E007BBBF7 /* MVKOSExtensions.h in Headers */ = {isa = PBXBuildFile; fileRef = A9B51BD6225E986A00AC74D2 /* MVKOSExtensions.h */; };
-		DCFD7F0E2A45BC6E007BBBF7 /* MVKCmdRenderPass.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB7721C7DFB4800632CA3 /* MVKCmdRenderPass.h */; };
+		DCFD7F0E2A45BC6E007BBBF7 /* MVKCmdRendering.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB7721C7DFB4800632CA3 /* MVKCmdRendering.h */; };
 		DCFD7F0F2A45BC6E007BBBF7 /* MVKCmdPipeline.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB76E1C7DFB4800632CA3 /* MVKCmdPipeline.h */; };
 		DCFD7F102A45BC6E007BBBF7 /* MVKSmallVectorAllocator.h in Headers */ = {isa = PBXBuildFile; fileRef = A9F3D9D924732A4C00745190 /* MVKSmallVectorAllocator.h */; };
 		DCFD7F112A45BC6E007BBBF7 /* MVKPipeline.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB78D1C7DFB4800632CA3 /* MVKPipeline.h */; };
@ -462,7 +466,7 @@
 		DCFD7F3A2A45BC6E007BBBF7 /* MVKCommandPool.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB77B1C7DFB4800632CA3 /* MVKCommandPool.mm */; };
 		DCFD7F3B2A45BC6E007BBBF7 /* MVKCmdDraw.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7751C7DFB4800632CA3 /* MVKCmdDraw.mm */; };
 		DCFD7F3C2A45BC6E007BBBF7 /* MVKCommandBuffer.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7791C7DFB4800632CA3 /* MVKCommandBuffer.mm */; };
-		DCFD7F3D2A45BC6E007BBBF7 /* MVKCmdRenderPass.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7731C7DFB4800632CA3 /* MVKCmdRenderPass.mm */; };
+		DCFD7F3D2A45BC6E007BBBF7 /* MVKCmdRendering.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7731C7DFB4800632CA3 /* MVKCmdRendering.mm */; };
 		DCFD7F3E2A45BC6E007BBBF7 /* MVKBuffer.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7801C7DFB4800632CA3 /* MVKBuffer.mm */; };
 		DCFD7F3F2A45BC6E007BBBF7 /* MVKEnvironment.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A9A5E9C525C0822700E9085E /* MVKEnvironment.cpp */; };
 		DCFD7F402A45BC6E007BBBF7 /* mvk_datatypes.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7A91C7DFB4800632CA3 /* mvk_datatypes.mm */; };
@ -491,7 +495,7 @@
 		DCFD7F572A45BC6E007BBBF7 /* MVKFramebuffer.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7881C7DFB4800632CA3 /* MVKFramebuffer.mm */; };
 		DCFD7F582A45BC6E007BBBF7 /* MTLRenderPassStencilAttachmentDescriptor+MoltenVK.m in Sources */ = {isa = PBXBuildFile; fileRef = 453638302508A4C6000EFFD3 /* MTLRenderPassStencilAttachmentDescriptor+MoltenVK.m */; };
 		DCFD7F592A45BC6E007BBBF7 /* MVKMTLBufferAllocation.mm in Sources */ = {isa = PBXBuildFile; fileRef = A9C96DCF1DDC20C20053187F /* MVKMTLBufferAllocation.mm */; };
-		DCFD7F5A2A45BC6E007BBBF7 /* CAMetalLayer+MoltenVK.m in Sources */ = {isa = PBXBuildFile; fileRef = A9E53DD62100B197002781DD /* CAMetalLayer+MoltenVK.m */; };
+		DCFD7F5A2A45BC6E007BBBF7 /* CAMetalLayer+MoltenVK.mm in Sources */ = {isa = PBXBuildFile; fileRef = A9E53DD62100B197002781DD /* CAMetalLayer+MoltenVK.mm */; };
 		DCFD7F5B2A45BC6E007BBBF7 /* MVKCmdDispatch.mm in Sources */ = {isa = PBXBuildFile; fileRef = A9096E5D1F81E16300DFBEA6 /* MVKCmdDispatch.mm */; };
 		DCFD7F5C2A45BC6E007BBBF7 /* MVKCmdDebug.mm in Sources */ = {isa = PBXBuildFile; fileRef = A99C90ED229455B300A061DA /* MVKCmdDebug.mm */; };
 /* End PBXBuildFile section */
@ -591,8 +595,8 @@
 		A94FB76F1C7DFB4800632CA3 /* MVKCmdPipeline.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = MVKCmdPipeline.mm; sourceTree = "<group>"; };
 		A94FB7701C7DFB4800632CA3 /* MVKCmdQueries.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MVKCmdQueries.h; sourceTree = "<group>"; };
 		A94FB7711C7DFB4800632CA3 /* MVKCmdQueries.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = MVKCmdQueries.mm; sourceTree = "<group>"; };
-		A94FB7721C7DFB4800632CA3 /* MVKCmdRenderPass.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MVKCmdRenderPass.h; sourceTree = "<group>"; };
-		A94FB7731C7DFB4800632CA3 /* MVKCmdRenderPass.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = MVKCmdRenderPass.mm; sourceTree = "<group>"; };
+		A94FB7721C7DFB4800632CA3 /* MVKCmdRendering.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MVKCmdRendering.h; sourceTree = "<group>"; };
+		A94FB7731C7DFB4800632CA3 /* MVKCmdRendering.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = MVKCmdRendering.mm; sourceTree = "<group>"; };
 		A94FB7741C7DFB4800632CA3 /* MVKCmdDraw.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MVKCmdDraw.h; sourceTree = "<group>"; };
 		A94FB7751C7DFB4800632CA3 /* MVKCmdDraw.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = MVKCmdDraw.mm; sourceTree = "<group>"; };
 		A94FB7761C7DFB4800632CA3 /* MVKCommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MVKCommand.h; sourceTree = "<group>"; };
@ -670,6 +674,7 @@
 		A9B51BD2225E986A00AC74D2 /* MVKOSExtensions.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = MVKOSExtensions.mm; sourceTree = "<group>"; };
 		A9B51BD6225E986A00AC74D2 /* MVKOSExtensions.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MVKOSExtensions.h; sourceTree = "<group>"; };
 		A9B8EE0A1A98D796009C5A02 /* libMoltenVK.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libMoltenVK.a; sourceTree = BUILT_PRODUCTS_DIR; };
+		A9C327542AAF8A770025EE79 /* MVKConfigMembers.def */ = {isa = PBXFileReference; explicitFileType = sourcecode.cpp.h; fileEncoding = 4; path = MVKConfigMembers.def; sourceTree = "<group>"; };
 		A9C83DCD24533E22003E5261 /* MVKCommandTypePools.def */ = {isa = PBXFileReference; explicitFileType = sourcecode.cpp.h; fileEncoding = 4; path = MVKCommandTypePools.def; sourceTree = "<group>"; };
 		A9C86CB61C55B8350096CAF2 /* MoltenVKShaderConverter.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; name = MoltenVKShaderConverter.xcodeproj; path = ../MoltenVKShaderConverter/MoltenVKShaderConverter.xcodeproj; sourceTree = "<group>"; };
 		A9C96DCE1DDC20C20053187F /* MVKMTLBufferAllocation.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MVKMTLBufferAllocation.h; sourceTree = "<group>"; };
@ -686,7 +691,7 @@
 		A9E53DD32100B197002781DD /* MTLSamplerDescriptor+MoltenVK.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "MTLSamplerDescriptor+MoltenVK.h"; sourceTree = "<group>"; };
 		A9E53DD42100B197002781DD /* NSString+MoltenVK.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = "NSString+MoltenVK.mm"; sourceTree = "<group>"; };
 		A9E53DD52100B197002781DD /* MTLTextureDescriptor+MoltenVK.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = "MTLTextureDescriptor+MoltenVK.m"; sourceTree = "<group>"; };
-		A9E53DD62100B197002781DD /* CAMetalLayer+MoltenVK.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = "CAMetalLayer+MoltenVK.m"; sourceTree = "<group>"; };
+		A9E53DD62100B197002781DD /* CAMetalLayer+MoltenVK.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = "CAMetalLayer+MoltenVK.mm"; sourceTree = "<group>"; };
 		A9E53DEE2100B302002781DD /* MTLRenderPassDescriptor+MoltenVK.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "MTLRenderPassDescriptor+MoltenVK.h"; sourceTree = "<group>"; };
 		A9E53DF22100B302002781DD /* MTLRenderPassDescriptor+MoltenVK.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = "MTLRenderPassDescriptor+MoltenVK.m"; sourceTree = "<group>"; };
 		A9E53DFA21064F84002781DD /* MTLRenderPipelineDescriptor+MoltenVK.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = "MTLRenderPipelineDescriptor+MoltenVK.m"; sourceTree = "<group>"; };
@ -739,8 +744,8 @@
 				A94FB76F1C7DFB4800632CA3 /* MVKCmdPipeline.mm */,
 				A94FB7701C7DFB4800632CA3 /* MVKCmdQueries.h */,
 				A94FB7711C7DFB4800632CA3 /* MVKCmdQueries.mm */,
-				A94FB7721C7DFB4800632CA3 /* MVKCmdRenderPass.h */,
-				A94FB7731C7DFB4800632CA3 /* MVKCmdRenderPass.mm */,
+				A94FB7721C7DFB4800632CA3 /* MVKCmdRendering.h */,
+				A94FB7731C7DFB4800632CA3 /* MVKCmdRendering.mm */,
 				A94FB76C1C7DFB4800632CA3 /* MVKCmdTransfer.h */,
 				A94FB76D1C7DFB4800632CA3 /* MVKCmdTransfer.mm */,
 				A94FB7761C7DFB4800632CA3 /* MVKCommand.h */,
@ -843,6 +848,7 @@
 				4553AEF62251617100E8EBCD /* MVKBlockObserver.m */,
 				45557A5121C9EFF3008868BD /* MVKCodec.h */,
 				45557A4D21C9EFF3008868BD /* MVKCodec.mm */,
+				A9C327542AAF8A770025EE79 /* MVKConfigMembers.def */,
 				45557A5721CD83C3008868BD /* MVKDXTnCodec.def */,
 				A9A5E9C525C0822700E9085E /* MVKEnvironment.cpp */,
 				A98149431FB6A3F7005F00B4 /* MVKEnvironment.h */,
@ -883,7 +889,7 @@
 			isa = PBXGroup;
 			children = (
 				A9E53DD12100B197002781DD /* CAMetalLayer+MoltenVK.h */,
-				A9E53DD62100B197002781DD /* CAMetalLayer+MoltenVK.m */,
+				A9E53DD62100B197002781DD /* CAMetalLayer+MoltenVK.mm */,
 				453638312508A4C7000EFFD3 /* MTLRenderPassDepthAttachmentDescriptor+MoltenVK.h */,
 				4536382F2508A4C6000EFFD3 /* MTLRenderPassDepthAttachmentDescriptor+MoltenVK.m */,
 				A9E53DEE2100B302002781DD /* MTLRenderPassDescriptor+MoltenVK.h */,
@ -989,7 +995,7 @@
 				2FEA0A6724902F9F00EEF3AD /* MVKCommonEnvironment.h in Headers */,
 				2FEA0A6824902F9F00EEF3AD /* MVKWatermark.h in Headers */,
 				2FEA0A6924902F9F00EEF3AD /* MVKOSExtensions.h in Headers */,
-				2FEA0A6A24902F9F00EEF3AD /* MVKCmdRenderPass.h in Headers */,
+				2FEA0A6A24902F9F00EEF3AD /* MVKCmdRendering.h in Headers */,
 				2FEA0A6B24902F9F00EEF3AD /* MVKCmdPipeline.h in Headers */,
 				2FEA0A6C24902F9F00EEF3AD /* MVKSmallVectorAllocator.h in Headers */,
 				2FEA0A6D24902F9F00EEF3AD /* MVKPipeline.h in Headers */,
@ -1008,6 +1014,7 @@
 				2FEA0A7824902F9F00EEF3AD /* MVKDeviceMemory.h in Headers */,
 				2FEA0A7924902F9F00EEF3AD /* MVKMTLResourceBindings.h in Headers */,
 				2FEA0A7A24902F9F00EEF3AD /* MVKExtensions.def in Headers */,
+				A9C327572AAFBD3A0025EE79 /* MVKConfigMembers.def in Headers */,
 				2FEA0A7B24902F9F00EEF3AD /* mvk_datatypes.hpp in Headers */,
 				2FEA0A7C24902F9F00EEF3AD /* MVKCommandEncodingPool.h in Headers */,
 				2FEA0A7D24902F9F00EEF3AD /* MVKResource.h in Headers */,
@ -1067,9 +1074,10 @@
 				A9F042A41FB4CF83009FCCB8 /* MVKCommonEnvironment.h in Headers */,
 				A981495D1FB6A3F7005F00B4 /* MVKWatermark.h in Headers */,
 				A9B51BD9225E986A00AC74D2 /* MVKOSExtensions.h in Headers */,
-				A94FB7C41C7DFB4800632CA3 /* MVKCmdRenderPass.h in Headers */,
+				A94FB7C41C7DFB4800632CA3 /* MVKCmdRendering.h in Headers */,
 				A94FB7BC1C7DFB4800632CA3 /* MVKCmdPipeline.h in Headers */,
 				A9F3D9DC24732A4D00745190 /* MVKSmallVectorAllocator.h in Headers */,
+				A9C327562AAFBD390025EE79 /* MVKConfigMembers.def in Headers */,
 				A94FB7F81C7DFB4800632CA3 /* MVKPipeline.h in Headers */,
 				A94FB7F01C7DFB4800632CA3 /* MVKImage.h in Headers */,
 				4553AEFD2251617100E8EBCD /* MVKBlockObserver.h in Headers */,
@ -1143,10 +1151,11 @@
 				A9F042A51FB4CF83009FCCB8 /* MVKCommonEnvironment.h in Headers */,
 				A981495E1FB6A3F7005F00B4 /* MVKWatermark.h in Headers */,
 				A9B51BDA225E986A00AC74D2 /* MVKOSExtensions.h in Headers */,
-				A94FB7C51C7DFB4800632CA3 /* MVKCmdRenderPass.h in Headers */,
+				A94FB7C51C7DFB4800632CA3 /* MVKCmdRendering.h in Headers */,
 				A94FB7BD1C7DFB4800632CA3 /* MVKCmdPipeline.h in Headers */,
 				A9F3D9DD24732A4D00745190 /* MVKSmallVectorAllocator.h in Headers */,
 				A94FB7F91C7DFB4800632CA3 /* MVKPipeline.h in Headers */,
+				A9C327582AAFBD3A0025EE79 /* MVKConfigMembers.def in Headers */,
 				A94FB7F11C7DFB4800632CA3 /* MVKImage.h in Headers */,
 				4553AEFE2251617100E8EBCD /* MVKBlockObserver.h in Headers */,
 				A94FB7B91C7DFB4800632CA3 /* MVKCmdTransfer.h in Headers */,
@ -1204,6 +1213,7 @@
 				DCFD7EFD2A45BC6E007BBBF7 /* MVKSync.h in Headers */,
 				DCFD7EFE2A45BC6E007BBBF7 /* MVKDevice.h in Headers */,
 				DCFD7EFF2A45BC6E007BBBF7 /* MVKSmallVector.h in Headers */,
+				A9C327592AAFBD3B0025EE79 /* MVKConfigMembers.def in Headers */,
 				DCFD7F002A45BC6E007BBBF7 /* MVKCommandPool.h in Headers */,
 				DCFD7F012A45BC6E007BBBF7 /* MVKShaderModule.h in Headers */,
 				DCFD7F022A45BC6E007BBBF7 /* MVKVulkanAPIObject.h in Headers */,
@ -1218,7 +1228,7 @@
 				DCFD7F0B2A45BC6E007BBBF7 /* MVKCommonEnvironment.h in Headers */,
 				DCFD7F0C2A45BC6E007BBBF7 /* MVKWatermark.h in Headers */,
 				DCFD7F0D2A45BC6E007BBBF7 /* MVKOSExtensions.h in Headers */,
-				DCFD7F0E2A45BC6E007BBBF7 /* MVKCmdRenderPass.h in Headers */,
+				DCFD7F0E2A45BC6E007BBBF7 /* MVKCmdRendering.h in Headers */,
 				DCFD7F0F2A45BC6E007BBBF7 /* MVKCmdPipeline.h in Headers */,
 				DCFD7F102A45BC6E007BBBF7 /* MVKSmallVectorAllocator.h in Headers */,
 				DCFD7F112A45BC6E007BBBF7 /* MVKPipeline.h in Headers */,
@ -1664,7 +1674,7 @@
 				2FEA0A9424902F9F00EEF3AD /* MVKCommandPool.mm in Sources */,
 				2FEA0A9524902F9F00EEF3AD /* MVKCmdDraw.mm in Sources */,
 				2FEA0A9624902F9F00EEF3AD /* MVKCommandBuffer.mm in Sources */,
-				2FEA0A9724902F9F00EEF3AD /* MVKCmdRenderPass.mm in Sources */,
+				2FEA0A9724902F9F00EEF3AD /* MVKCmdRendering.mm in Sources */,
 				2FEA0A9824902F9F00EEF3AD /* MVKBuffer.mm in Sources */,
 				2FEA0A9924902F9F00EEF3AD /* mvk_datatypes.mm in Sources */,
 				2FEA0A9A24902F9F00EEF3AD /* MVKExtensions.mm in Sources */,
@ -1693,7 +1703,7 @@
 				2FEA0AAF24902F9F00EEF3AD /* MVKLayers.mm in Sources */,
 				2FEA0AB024902F9F00EEF3AD /* MVKFramebuffer.mm in Sources */,
 				2FEA0AB124902F9F00EEF3AD /* MVKMTLBufferAllocation.mm in Sources */,
-				2FEA0AB224902F9F00EEF3AD /* CAMetalLayer+MoltenVK.m in Sources */,
+				2FEA0AB224902F9F00EEF3AD /* CAMetalLayer+MoltenVK.mm in Sources */,
 				2FEA0AB324902F9F00EEF3AD /* MVKCmdDispatch.mm in Sources */,
 				2FEA0AB424902F9F00EEF3AD /* MVKCmdDebug.mm in Sources */,
 			);
@ -1724,7 +1734,7 @@
 				A94FB7D61C7DFB4800632CA3 /* MVKCommandPool.mm in Sources */,
 				A94FB7CA1C7DFB4800632CA3 /* MVKCmdDraw.mm in Sources */,
 				A94FB7D21C7DFB4800632CA3 /* MVKCommandBuffer.mm in Sources */,
-				A94FB7C61C7DFB4800632CA3 /* MVKCmdRenderPass.mm in Sources */,
+				A94FB7C61C7DFB4800632CA3 /* MVKCmdRendering.mm in Sources */,
 				A94FB7DE1C7DFB4800632CA3 /* MVKBuffer.mm in Sources */,
 				A9A5E9C725C0822700E9085E /* MVKEnvironment.cpp in Sources */,
 				A94FB82A1C7DFB4800632CA3 /* mvk_datatypes.mm in Sources */,
@ -1753,7 +1763,7 @@
 				A94FB7EE1C7DFB4800632CA3 /* MVKFramebuffer.mm in Sources */,
 				453638382508A4C7000EFFD3 /* MTLRenderPassStencilAttachmentDescriptor+MoltenVK.m in Sources */,
 				A9C96DD21DDC20C20053187F /* MVKMTLBufferAllocation.mm in Sources */,
-				A9E53DE92100B197002781DD /* CAMetalLayer+MoltenVK.m in Sources */,
+				A9E53DE92100B197002781DD /* CAMetalLayer+MoltenVK.mm in Sources */,
 				A9096E5E1F81E16300DFBEA6 /* MVKCmdDispatch.mm in Sources */,
 				A99C90F0229455B300A061DA /* MVKCmdDebug.mm in Sources */,
 			);
@ -1784,7 +1794,7 @@
 				A94FB7D71C7DFB4800632CA3 /* MVKCommandPool.mm in Sources */,
 				A94FB7CB1C7DFB4800632CA3 /* MVKCmdDraw.mm in Sources */,
 				A94FB7D31C7DFB4800632CA3 /* MVKCommandBuffer.mm in Sources */,
-				A94FB7C71C7DFB4800632CA3 /* MVKCmdRenderPass.mm in Sources */,
+				A94FB7C71C7DFB4800632CA3 /* MVKCmdRendering.mm in Sources */,
 				A94FB7DF1C7DFB4800632CA3 /* MVKBuffer.mm in Sources */,
 				A9A5E9C925C0822700E9085E /* MVKEnvironment.cpp in Sources */,
 				A94FB82B1C7DFB4800632CA3 /* mvk_datatypes.mm in Sources */,
@ -1813,7 +1823,7 @@
 				A94FB7EF1C7DFB4800632CA3 /* MVKFramebuffer.mm in Sources */,
 				4536383A2508A4C7000EFFD3 /* MTLRenderPassStencilAttachmentDescriptor+MoltenVK.m in Sources */,
 				A9C96DD31DDC20C20053187F /* MVKMTLBufferAllocation.mm in Sources */,
-				A9E53DEA2100B197002781DD /* CAMetalLayer+MoltenVK.m in Sources */,
+				A9E53DEA2100B197002781DD /* CAMetalLayer+MoltenVK.mm in Sources */,
 				A9096E5F1F81E16300DFBEA6 /* MVKCmdDispatch.mm in Sources */,
 				A99C90F1229455B300A061DA /* MVKCmdDebug.mm in Sources */,
 			);
@ -1844,7 +1854,7 @@
 				DCFD7F3A2A45BC6E007BBBF7 /* MVKCommandPool.mm in Sources */,
 				DCFD7F3B2A45BC6E007BBBF7 /* MVKCmdDraw.mm in Sources */,
 				DCFD7F3C2A45BC6E007BBBF7 /* MVKCommandBuffer.mm in Sources */,
-				DCFD7F3D2A45BC6E007BBBF7 /* MVKCmdRenderPass.mm in Sources */,
+				DCFD7F3D2A45BC6E007BBBF7 /* MVKCmdRendering.mm in Sources */,
 				DCFD7F3E2A45BC6E007BBBF7 /* MVKBuffer.mm in Sources */,
 				DCFD7F3F2A45BC6E007BBBF7 /* MVKEnvironment.cpp in Sources */,
 				DCFD7F402A45BC6E007BBBF7 /* mvk_datatypes.mm in Sources */,
@ -1873,7 +1883,7 @@
 				DCFD7F572A45BC6E007BBBF7 /* MVKFramebuffer.mm in Sources */,
 				DCFD7F582A45BC6E007BBBF7 /* MTLRenderPassStencilAttachmentDescriptor+MoltenVK.m in Sources */,
 				DCFD7F592A45BC6E007BBBF7 /* MVKMTLBufferAllocation.mm in Sources */,
-				DCFD7F5A2A45BC6E007BBBF7 /* CAMetalLayer+MoltenVK.m in Sources */,
+				DCFD7F5A2A45BC6E007BBBF7 /* CAMetalLayer+MoltenVK.mm in Sources */,
 				DCFD7F5B2A45BC6E007BBBF7 /* MVKCmdDispatch.mm in Sources */,
 				DCFD7F5C2A45BC6E007BBBF7 /* MVKCmdDebug.mm in Sources */,
 			);
--- a/MoltenVK/MoltenVK/API/mvk_config.h
+++ b/MoltenVK/MoltenVK/API/mvk_config.h
@ -45,13 +45,13 @@ extern "C" {
 */
 #define MVK_VERSION_MAJOR   1
 #define MVK_VERSION_MINOR   2
-#define MVK_VERSION_PATCH   5
+#define MVK_VERSION_PATCH   7

 #define MVK_MAKE_VERSION(major, minor, patch)    (((major) * 10000) + ((minor) * 100) + (patch))
 #define MVK_VERSION     MVK_MAKE_VERSION(MVK_VERSION_MAJOR, MVK_VERSION_MINOR, MVK_VERSION_PATCH)


-#define MVK_CONFIGURATION_API_VERSION   37
+#define MVK_CONFIGURATION_API_VERSION   38

 /** Identifies the level of logging MoltenVK should be limited to outputting. */
 typedef enum MVKConfigLogLevel {
@ -138,10 +138,11 @@ typedef enum MVKConfigCompressionAlgorithm {

 /** Identifies the style of activity performance logging to use. */
 typedef enum MVKConfigActivityPerformanceLoggingStyle {
-	MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_FRAME_COUNT     = 0,	/**< Repeatedly log performance after a configured number of frames. */
-	MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_IMMEDIATE       = 1,	/**< Log immediately after each performance measurement. */
-	MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_DEVICE_LIFETIME = 2,	/**< Log at the end of the VkDevice lifetime. This is useful for one-shot apps such as testing frameworks. */
-	MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_MAX_ENUM        = 0x7FFFFFFF,
+	MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_FRAME_COUNT                = 0,	/**< Repeatedly log performance after a configured number of frames. */
+	MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_IMMEDIATE                  = 1,	/**< Log immediately after each performance measurement. */
+	MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_DEVICE_LIFETIME            = 2,	/**< Log at the end of the VkDevice lifetime. This is useful for one-shot apps such as testing frameworks. */
+	MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_DEVICE_LIFETIME_ACCUMULATE = 3,	/**< Log at the end of the VkDevice lifetime, but continue to accumulate across mulitiple VkDevices throughout the app process. This is useful for testing frameworks that create many VkDevices serially. */
+	MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_MAX_ENUM                   = 0x7FFFFFFF,
 } MVKConfigActivityPerformanceLoggingStyle;

 /**
@ -786,6 +787,8 @@ typedef struct {
 	/**
 	 * Controls when MoltenVK should log activity performance events.
 	 *
+	 * The performanceTracking parameter must also be enabled.
+	 *
 	 * The value of this parameter must be changed before creating a VkDevice,
 	 * for the change to take effect.
 	 *
@ -916,6 +919,9 @@ typedef struct {
 	/**
 	 * Maximize the concurrent executing compilation tasks.
 	 *
+	 * The value of this parameter must be changed before creating a VkInstance,
+	 * for the change to take effect.
+	 *
 	 * The initial value or this parameter is set by the
 	 * MVK_CONFIG_SHOULD_MAXIMIZE_CONCURRENT_COMPILATION
 	 * runtime environment variable or MoltenVK compile-time build setting.
@ -923,6 +929,38 @@ typedef struct {
 	 */
 	VkBool32 shouldMaximizeConcurrentCompilation;

+	/**
+	 * This parameter is ignored on Apple Silicon devices.
+	 *
+	 * Non-Apple GPUs can have a dynamic timestamp period, which varies over time according to GPU
+	 * workload. Depending on how often the app samples the VkPhysicalDeviceLimits::timestampPeriod
+	 * value using vkGetPhysicalDeviceProperties(), the app may want up-to-date, but potentially
+	 * volatile values, or it may find average values more useful.
+	 *
+	 * The value of this parameter sets the alpha (A) value of a simple lowpass filter 
+	 * on the timestampPeriod value, of the form:
+	 *
+	 *   TPout = (1 - A)TPout + (A * TPin)
+	 *
+	 * The alpha value can be set to a float between 0.0 and 1.0. Values of alpha closer to
+	 * 0.0 cause the value of timestampPeriod to vary slowly over time and be less volatile,
+	 * and values of alpha closer to 1.0 cause the value of timestampPeriod to vary quickly
+	 * and be more volatile.
+	 *
+	 * Apps that query the timestampPeriod value infrequently will prefer low volatility, whereas
+	 * apps that query frequently may prefer higher volatility, to track more recent changes.
+	 *
+	 * The value of this parameter can be changed at any time, and will affect subsequent queries.
+	 *
+	 * The initial value or this parameter is set by the
+	 * MVK_CONFIG_TIMESTAMP_PERIOD_LOWPASS_ALPHA
+	 * runtime environment variable or MoltenVK compile-time build setting.
+	 * If neither is set, this parameter is set to 1.0 by default,
+	 * indicating that the timestampPeriod will vary relatively slowly,
+	 * with the expectation that the app is querying this value infrequently.
+	 */
+	float timestampPeriodLowPassAlpha;
+
 } MVKConfiguration;


--- a/MoltenVK/MoltenVK/API/mvk_datatypes.h
+++ b/MoltenVK/MoltenVK/API/mvk_datatypes.h
@ -383,6 +383,9 @@ MTLViewport mvkMTLViewportFromVkViewport(VkViewport vkViewport);
 /** Returns the Metal MTLScissorRect corresponding to the specified Vulkan VkRect2D. */
 MTLScissorRect mvkMTLScissorRectFromVkRect2D(VkRect2D vkRect);

+/** Returns the Vulkan VkRect2D corresponding to the specified  Metal MTLScissorRect. */
+VkRect2D mvkVkRect2DFromMTLScissorRect(MTLScissorRect mtlScissorRect);
+
 /** Returns the Metal MTLCompareFunction corresponding to the specified Vulkan VkCompareOp, */
 MTLCompareFunction mvkMTLCompareFunctionFromVkCompareOp(VkCompareOp vkOp);

@ -414,13 +417,13 @@ MTLWinding mvkMTLWindingFromSpvExecutionMode(uint32_t spvMode);
 MTLTessellationPartitionMode mvkMTLTessellationPartitionModeFromSpvExecutionMode(uint32_t spvMode);

 /**
- * Returns the combination of Metal MTLRenderStage bits corresponding to the specified Vulkan VkPiplineStageFlags,
+ * Returns the combination of Metal MTLRenderStage bits corresponding to the specified Vulkan VkPipelineStageFlags2,
 * taking into consideration whether the barrier is to be placed before or after the specified pipeline stages.
 */
-MTLRenderStages mvkMTLRenderStagesFromVkPipelineStageFlags(VkPipelineStageFlags vkStages, bool placeBarrierBefore);
+MTLRenderStages mvkMTLRenderStagesFromVkPipelineStageFlags(VkPipelineStageFlags2 vkStages, bool placeBarrierBefore);

-/** Returns the combination of Metal MTLBarrierScope bits corresponding to the specified Vulkan VkAccessFlags. */
-MTLBarrierScope mvkMTLBarrierScopeFromVkAccessFlags(VkAccessFlags vkAccess);
+/** Returns the combination of Metal MTLBarrierScope bits corresponding to the specified Vulkan VkAccessFlags2. */
+MTLBarrierScope mvkMTLBarrierScopeFromVkAccessFlags(VkAccessFlags2 vkAccess);

 #pragma mark -
 #pragma mark Geometry conversions
--- a/MoltenVK/MoltenVK/API/mvk_private_api.h
+++ b/MoltenVK/MoltenVK/API/mvk_private_api.h
@ -44,7 +44,7 @@ typedef unsigned long MTLArgumentBuffersTier;
 */


-#define MVK_PRIVATE_API_VERSION   37
+#define MVK_PRIVATE_API_VERSION   38


 /** Identifies the type of rounding Metal uses for float to integer conversions in particular calculatons. */
@ -151,47 +151,60 @@ typedef struct {
 	MTLArgumentBuffersTier argumentBuffersTier;		/**< The argument buffer tier available on this device, as a Metal enumeration. */
 	VkBool32 needsSampleDrefLodArrayWorkaround;		/**< If true, sampling from arrayed depth images with explicit LoD is broken and needs a workaround. */
 	VkDeviceSize hostMemoryPageSize;				/**< The size of a page of host memory on this platform. */
+	VkBool32 dynamicVertexStride;					/**< If true, VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE is supported. */
+	VkBool32 needsCubeGradWorkaround;				/**< If true, sampling from cube textures with explicit gradients is broken and needs a workaround. */
 } MVKPhysicalDeviceMetalFeatures;

-/** MoltenVK performance of a particular type of activity. */
+/**
+ * MoltenVK performance of a particular type of activity.
+ * Durations are recorded in milliseconds. Memory sizes are recorded in kilobytes.
+ */
 typedef struct {
-    uint32_t count;             /**< The number of activities of this type. */
-	double latestDuration;      /**< The latest (most recent) duration of the activity, in milliseconds. */
-    double averageDuration;     /**< The average duration of the activity, in milliseconds. */
-    double minimumDuration;     /**< The minimum duration of the activity, in milliseconds. */
-    double maximumDuration;     /**< The maximum duration of the activity, in milliseconds. */
+    uint32_t count;       /**< The number of activities of this type. */
+	double latest;        /**< The latest (most recent) value of the activity. */
+    double average;       /**< The average value of the activity. */
+    double minimum;       /**< The minimum value of the activity. */
+    double maximum;       /**< The maximum value of the activity. */
 } MVKPerformanceTracker;

 /** MoltenVK performance of shader compilation activities. */
 typedef struct {
-	MVKPerformanceTracker hashShaderCode;				/** Create a hash from the incoming shader code. */
-    MVKPerformanceTracker spirvToMSL;					/** Convert SPIR-V to MSL source code. */
-    MVKPerformanceTracker mslCompile;					/** Compile MSL source code into a MTLLibrary. */
-    MVKPerformanceTracker mslLoad;						/** Load pre-compiled MSL code into a MTLLibrary. */
-	MVKPerformanceTracker mslCompress;					/** Compress MSL source code after compiling a MTLLibrary, to hold it in a pipeline cache. */
-	MVKPerformanceTracker mslDecompress;				/** Decompress MSL source code to write the MSL when serializing a pipeline cache. */
-	MVKPerformanceTracker shaderLibraryFromCache;		/** Retrieve a shader library from the cache, lazily creating it if needed. */
-    MVKPerformanceTracker functionRetrieval;			/** Retrieve a MTLFunction from a MTLLibrary. */
-    MVKPerformanceTracker functionSpecialization;		/** Specialize a retrieved MTLFunction. */
-    MVKPerformanceTracker pipelineCompile;				/** Compile MTLFunctions into a pipeline. */
-	MVKPerformanceTracker glslToSPRIV;					/** Convert GLSL to SPIR-V code. */
+	MVKPerformanceTracker hashShaderCode;				/** Create a hash from the incoming shader code, in milliseconds. */
+    MVKPerformanceTracker spirvToMSL;					/** Convert SPIR-V to MSL source code, in milliseconds. */
+    MVKPerformanceTracker mslCompile;					/** Compile MSL source code into a MTLLibrary, in milliseconds. */
+    MVKPerformanceTracker mslLoad;						/** Load pre-compiled MSL code into a MTLLibrary, in milliseconds. */
+	MVKPerformanceTracker mslCompress;					/** Compress MSL source code after compiling a MTLLibrary, to hold it in a pipeline cache, in milliseconds. */
+	MVKPerformanceTracker mslDecompress;				/** Decompress MSL source code to write the MSL when serializing a pipeline cache, in milliseconds. */
+	MVKPerformanceTracker shaderLibraryFromCache;		/** Retrieve a shader library from the cache, lazily creating it if needed, in milliseconds. */
+    MVKPerformanceTracker functionRetrieval;			/** Retrieve a MTLFunction from a MTLLibrary, in milliseconds. */
+    MVKPerformanceTracker functionSpecialization;		/** Specialize a retrieved MTLFunction, in milliseconds. */
+    MVKPerformanceTracker pipelineCompile;				/** Compile MTLFunctions into a pipeline, in milliseconds. */
+	MVKPerformanceTracker glslToSPRIV;					/** Convert GLSL to SPIR-V code, in milliseconds. */
 } MVKShaderCompilationPerformance;

 /** MoltenVK performance of pipeline cache activities. */
 typedef struct {
-	MVKPerformanceTracker sizePipelineCache;			/** Calculate the size of cache data required to write MSL to pipeline cache data stream. */
-	MVKPerformanceTracker writePipelineCache;			/** Write MSL to pipeline cache data stream. */
-	MVKPerformanceTracker readPipelineCache;			/** Read MSL from pipeline cache data stream. */
+	MVKPerformanceTracker sizePipelineCache;			/** Calculate the size of cache data required to write MSL to pipeline cache data stream, in milliseconds. */
+	MVKPerformanceTracker writePipelineCache;			/** Write MSL to pipeline cache data stream, in milliseconds. */
+	MVKPerformanceTracker readPipelineCache;			/** Read MSL from pipeline cache data stream, in milliseconds. */
 } MVKPipelineCachePerformance;

 /** MoltenVK performance of queue activities. */
 typedef struct {
-	MVKPerformanceTracker mtlQueueAccess;               /** Create an MTLCommandQueue or access an existing cached instance. */
-	MVKPerformanceTracker mtlCommandBufferCompletion;   /** Completion of a MTLCommandBuffer on the GPU, from commit to completion callback. */
-	MVKPerformanceTracker nextCAMetalDrawable;			/** Retrieve next CAMetalDrawable from CAMetalLayer during presentation. */
-	MVKPerformanceTracker frameInterval;				/** Frame presentation interval (1000/FPS). */
+	MVKPerformanceTracker retrieveMTLCommandBuffer;     /** Retrieve a MTLCommandBuffer from a MTLQueue, in milliseconds. */
+	MVKPerformanceTracker commandBufferEncoding;        /** Encode a single VkCommandBuffer to a MTLCommandBuffer (excludes MTLCommandBuffer encoding from configured immediate prefilling), in milliseconds. */
+	MVKPerformanceTracker submitCommandBuffers;         /** Submit and encode all VkCommandBuffers in a vkQueueSubmit() operation to MTLCommandBuffers (including both prefilled and deferred encoding), in milliseconds. */
+	MVKPerformanceTracker mtlCommandBufferExecution;    /** Execute a MTLCommandBuffer on the GPU, from commit to completion callback, in milliseconds. */
+	MVKPerformanceTracker retrieveCAMetalDrawable;      /** Retrieve next CAMetalDrawable from a CAMetalLayer, in milliseconds. */
+	MVKPerformanceTracker presentSwapchains;            /** Present the swapchains in a vkQueuePresentKHR() on the GPU, from commit to presentation callback, in milliseconds. */
+	MVKPerformanceTracker frameInterval;                /** Frame presentation interval (1000/FPS), in milliseconds. */
 } MVKQueuePerformance;

+/** MoltenVK performance of device activities. */
+typedef struct {
+	MVKPerformanceTracker gpuMemoryAllocated;		/** GPU memory allocated, in kilobytes. */
+} MVKDevicePerformance;
+
 /**
 * MoltenVK performance. You can retrieve a copy of this structure using the vkGetPerformanceStatisticsMVK() function.
 *
@ -209,6 +222,7 @@ typedef struct {
 	MVKShaderCompilationPerformance shaderCompilation;	/** Shader compilations activities. */
 	MVKPipelineCachePerformance pipelineCache;			/** Pipeline cache activities. */
 	MVKQueuePerformance queue;          				/** Queue activities. */
+	MVKDevicePerformance device;          				/** Device activities. */
 } MVKPerformanceStatistics;


--- a/MoltenVK/MoltenVK/Commands/MVKCmdDispatch.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCmdDispatch.mm
@ -46,7 +46,7 @@ void MVKCmdDispatch::encode(MVKCommandEncoder* cmdEncoder) {
 	MTLRegion mtlThreadgroupCount = MTLRegionMake3D(_baseGroupX, _baseGroupY, _baseGroupZ, _groupCountX, _groupCountY, _groupCountZ);
 	cmdEncoder->finalizeDispatchState();	// Ensure all updated state has been submitted to Metal
 	id<MTLComputeCommandEncoder> mtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseDispatch);
-	auto* pipeline = (MVKComputePipeline*)cmdEncoder->_computePipelineState.getPipeline();
+	auto* pipeline = cmdEncoder->_computePipelineState.getComputePipeline();
 	if (pipeline->allowsDispatchBase()) {
 		if ([mtlEncoder respondsToSelector: @selector(setStageInRegion:)]) {
 			// We'll use the stage-input region to pass the base along to the shader.
--- a/MoltenVK/MoltenVK/Commands/MVKCmdDraw.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCmdDraw.h
@ -37,10 +37,12 @@ class MVKCmdBindVertexBuffers : public MVKCommand {

 public:
 	VkResult setContent(MVKCommandBuffer* cmdBuff,
-						uint32_t startBinding,
+						uint32_t firstBinding,
 						uint32_t bindingCount,
 						const VkBuffer* pBuffers,
-						const VkDeviceSize* pOffsets);
+						const VkDeviceSize* pOffsets,
+						const VkDeviceSize* pSizes,
+						const VkDeviceSize* pStrides);

    void encode(MVKCommandEncoder* cmdEncoder) override;

--- a/MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm
@ -30,20 +30,23 @@

 template <size_t N>
 VkResult MVKCmdBindVertexBuffers<N>::setContent(MVKCommandBuffer* cmdBuff,
-												uint32_t startBinding,
+												uint32_t firstBinding,
 												uint32_t bindingCount,
 												const VkBuffer* pBuffers,
-												const VkDeviceSize* pOffsets) {
-
+												const VkDeviceSize* pOffsets,
+												const VkDeviceSize* pSizes,
+												const VkDeviceSize* pStrides) {
 	MVKDevice* mvkDvc = cmdBuff->getDevice();
 	_bindings.clear();	// Clear for reuse
    _bindings.reserve(bindingCount);
    MVKMTLBufferBinding b;
    for (uint32_t bindIdx = 0; bindIdx < bindingCount; bindIdx++) {
        MVKBuffer* mvkBuffer = (MVKBuffer*)pBuffers[bindIdx];
-        b.index = mvkDvc->getMetalBufferIndexForVertexAttributeBinding(startBinding + bindIdx);
+        b.index = mvkDvc->getMetalBufferIndexForVertexAttributeBinding(firstBinding + bindIdx);
        b.mtlBuffer = mvkBuffer->getMTLBuffer();
        b.offset = mvkBuffer->getMTLBufferOffset() + pOffsets[bindIdx];
+		b.size = pSizes ? (uint32_t)pSizes[bindIdx] : 0;
+		b.stride = pStrides ? (uint32_t)pStrides[bindIdx] : 0;
        _bindings.push_back(b);
    }

@ -141,12 +144,11 @@ void MVKCmdDraw::encodeIndexedIndirect(MVKCommandEncoder* cmdEncoder) {

 void MVKCmdDraw::encode(MVKCommandEncoder* cmdEncoder) {

-    if (_vertexCount == 0 || _instanceCount == 0) {
-        // Nothing to do.
-        return;
-    }
+	if (_vertexCount == 0 || _instanceCount == 0) { return; }	// Nothing to do.

-	auto* pipeline = (MVKGraphicsPipeline*)cmdEncoder->_graphicsPipelineState.getPipeline();
+	cmdEncoder->restartMetalRenderPassIfNeeded();
+
+	auto* pipeline = cmdEncoder->_graphicsPipelineState.getGraphicsPipeline();

 	// Metal doesn't support triangle fans, so encode it as triangles via an indexed indirect triangles command instead.
 	if (pipeline->getVkPrimitiveTopology() == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN) {
@ -169,7 +171,7 @@ void MVKCmdDraw::encode(MVKCommandEncoder* cmdEncoder) {
 	} tessParams;
    uint32_t outControlPointCount = 0;
    if (pipeline->isTessellationPipeline()) {
-        tessParams.inControlPointCount = pipeline->getInputControlPointCount();
+        tessParams.inControlPointCount = cmdEncoder->_renderingState.getPatchControlPoints();
        outControlPointCount = pipeline->getOutputControlPointCount();
        tessParams.patchCount = mvkCeilingDivide(_vertexCount, tessParams.inControlPointCount) * _instanceCount;
    }
@ -296,13 +298,13 @@ void MVKCmdDraw::encode(MVKCommandEncoder* cmdEncoder) {
                    uint32_t instanceCount = _instanceCount * viewCount;
                    cmdEncoder->_graphicsResourcesState.offsetZeroDivisorVertexBuffers(stage, pipeline, _firstInstance);
                    if (cmdEncoder->_pDeviceMetalFeatures->baseVertexInstanceDrawing) {
-                        [cmdEncoder->_mtlRenderEncoder drawPrimitives: cmdEncoder->_mtlPrimitiveType
+                        [cmdEncoder->_mtlRenderEncoder drawPrimitives: cmdEncoder->_renderingState.getPrimitiveType()
                                                          vertexStart: _firstVertex
                                                          vertexCount: _vertexCount
                                                        instanceCount: instanceCount
                                                         baseInstance: _firstInstance];
                    } else {
-                        [cmdEncoder->_mtlRenderEncoder drawPrimitives: cmdEncoder->_mtlPrimitiveType
+                        [cmdEncoder->_mtlRenderEncoder drawPrimitives: cmdEncoder->_renderingState.getPrimitiveType()
                                                          vertexStart: _firstVertex
                                                          vertexCount: _vertexCount
                                                        instanceCount: instanceCount];
@ -366,12 +368,11 @@ void MVKCmdDrawIndexed::encodeIndexedIndirect(MVKCommandEncoder* cmdEncoder) {

 void MVKCmdDrawIndexed::encode(MVKCommandEncoder* cmdEncoder) {

-    if (_indexCount == 0 || _instanceCount == 0) {
-        // Nothing to do.
-        return;
-    }
+	if (_indexCount == 0 || _instanceCount == 0) { return; }	// Nothing to do.

-	auto* pipeline = (MVKGraphicsPipeline*)cmdEncoder->_graphicsPipelineState.getPipeline();
+	cmdEncoder->restartMetalRenderPassIfNeeded();
+
+	auto* pipeline = cmdEncoder->_graphicsPipelineState.getGraphicsPipeline();

 	// Metal doesn't support triangle fans, so encode it as triangles via an indexed indirect triangles command instead.
 	if (pipeline->getVkPrimitiveTopology() == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN) {
@ -398,7 +399,7 @@ void MVKCmdDrawIndexed::encode(MVKCommandEncoder* cmdEncoder) {
 	} tessParams;
    uint32_t outControlPointCount = 0;
    if (pipeline->isTessellationPipeline()) {
-        tessParams.inControlPointCount = pipeline->getInputControlPointCount();
+        tessParams.inControlPointCount = cmdEncoder->_renderingState.getPatchControlPoints();
        outControlPointCount = pipeline->getOutputControlPointCount();
        tessParams.patchCount = mvkCeilingDivide(_indexCount, tessParams.inControlPointCount) * _instanceCount;
    }
@ -530,7 +531,7 @@ void MVKCmdDrawIndexed::encode(MVKCommandEncoder* cmdEncoder) {
                    uint32_t instanceCount = _instanceCount * viewCount;
                    cmdEncoder->_graphicsResourcesState.offsetZeroDivisorVertexBuffers(stage, pipeline, _firstInstance);
                    if (cmdEncoder->_pDeviceMetalFeatures->baseVertexInstanceDrawing) {
-                        [cmdEncoder->_mtlRenderEncoder drawIndexedPrimitives: cmdEncoder->_mtlPrimitiveType
+                        [cmdEncoder->_mtlRenderEncoder drawIndexedPrimitives: cmdEncoder->_renderingState.getPrimitiveType()
                                                                  indexCount: _indexCount
                                                                   indexType: (MTLIndexType)ibb.mtlIndexType
                                                                 indexBuffer: ibb.mtlBuffer
@ -539,7 +540,7 @@ void MVKCmdDrawIndexed::encode(MVKCommandEncoder* cmdEncoder) {
                                                                  baseVertex: _vertexOffset
                                                                baseInstance: _firstInstance];
                    } else {
-                        [cmdEncoder->_mtlRenderEncoder drawIndexedPrimitives: cmdEncoder->_mtlPrimitiveType
+                        [cmdEncoder->_mtlRenderEncoder drawIndexedPrimitives: cmdEncoder->_renderingState.getPrimitiveType()
                                                                  indexCount: _indexCount
                                                                   indexType: (MTLIndexType)ibb.mtlIndexType
                                                                 indexBuffer: ibb.mtlBuffer
@ -646,7 +647,9 @@ void MVKCmdDrawIndirect::encodeIndexedIndirect(MVKCommandEncoder* cmdEncoder) {

 void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) {

-	auto* pipeline = (MVKGraphicsPipeline*)cmdEncoder->_graphicsPipelineState.getPipeline();
+	cmdEncoder->restartMetalRenderPassIfNeeded();
+
+	auto* pipeline = cmdEncoder->_graphicsPipelineState.getGraphicsPipeline();

 	// Metal doesn't support triangle fans, so encode it as indexed indirect triangles instead.
 	if (pipeline->getVkPrimitiveTopology() == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN) {
@ -683,7 +686,7 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) {
        // encoding and execution. So we don't know how big to make the buffers.
        // We must assume an arbitrarily large number of vertices may be submitted.
        // But not too many, or we'll exhaust available VRAM.
-        inControlPointCount = pipeline->getInputControlPointCount();
+        inControlPointCount = cmdEncoder->_renderingState.getPatchControlPoints();
        outControlPointCount = pipeline->getOutputControlPointCount();
        vertexCount = kMVKMaxDrawIndirectVertexCount;
        patchCount = mvkCeilingDivide(vertexCount, inControlPointCount);
@ -925,7 +928,7 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) {
                        cmdEncoder->_graphicsResourcesState.beginMetalRenderPass();
                        cmdEncoder->getPushConstants(VK_SHADER_STAGE_VERTEX_BIT)->beginMetalRenderPass();
                    } else {
-                        [cmdEncoder->_mtlRenderEncoder drawPrimitives: cmdEncoder->_mtlPrimitiveType
+                        [cmdEncoder->_mtlRenderEncoder drawPrimitives: cmdEncoder->_renderingState.getPrimitiveType()
                                                       indirectBuffer: mtlIndBuff
                                                 indirectBufferOffset: mtlIndBuffOfst];
                        mtlIndBuffOfst += needsInstanceAdjustment ? sizeof(MTLDrawPrimitivesIndirectArguments) : _mtlIndirectBufferStride;
@ -987,6 +990,7 @@ VkResult MVKCmdDrawIndexedIndirect::setContent(MVKCommandBuffer* cmdBuff,
 }

 void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder) {
+	cmdEncoder->restartMetalRenderPassIfNeeded();
 	encode(cmdEncoder, cmdEncoder->_graphicsResourcesState._mtlIndexBufferBinding);
 }

@ -996,7 +1000,7 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder, const MVKI

    MVKIndexMTLBufferBinding ibb = ibbOrig;
 	MVKIndexMTLBufferBinding ibbTriFan = ibb;
-    auto* pipeline = (MVKGraphicsPipeline*)cmdEncoder->_graphicsPipelineState.getPipeline();
+    auto* pipeline = cmdEncoder->_graphicsPipelineState.getGraphicsPipeline();

 	MVKVertexAdjustments vtxAdjmts;
 	vtxAdjmts.mtlIndexType = ibb.mtlIndexType;
@ -1031,7 +1035,7 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder, const MVKI
        // encoding and execution. So we don't know how big to make the buffers.
        // We must assume an arbitrarily large number of vertices may be submitted.
        // But not too many, or we'll exhaust available VRAM.
-        inControlPointCount = pipeline->getInputControlPointCount();
+        inControlPointCount = cmdEncoder->_renderingState.getPatchControlPoints();
        outControlPointCount = pipeline->getOutputControlPointCount();
        vertexCount = kMVKMaxDrawIndirectVertexCount;
        patchCount = mvkCeilingDivide(vertexCount, inControlPointCount);
@ -1312,7 +1316,7 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder, const MVKI
                        cmdEncoder->getPushConstants(VK_SHADER_STAGE_VERTEX_BIT)->beginMetalRenderPass();
                    } else {
 						cmdEncoder->_graphicsResourcesState.offsetZeroDivisorVertexBuffers(stage, pipeline, _directCmdFirstInstance);
-                        [cmdEncoder->_mtlRenderEncoder drawIndexedPrimitives: cmdEncoder->_mtlPrimitiveType
+                        [cmdEncoder->_mtlRenderEncoder drawIndexedPrimitives: cmdEncoder->_renderingState.getPrimitiveType()
                                                                   indexType: (MTLIndexType)ibb.mtlIndexType
                                                                 indexBuffer: ibb.mtlBuffer
                                                           indexBufferOffset: ibb.offset
--- a/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.h
@ -30,6 +30,34 @@ class MVKDescriptorSet;
 class MVKDescriptorUpdateTemplate;


+#pragma mark -
+#pragma mark MVKCmdExecuteCommands
+
+/**
+ * Vulkan command to execute secondary command buffers.
+ * Template class to balance vector pre-allocations between very common low counts and fewer larger counts.
+ */
+template <size_t N>
+class MVKCmdExecuteCommands : public MVKCommand {
+
+public:
+	VkResult setContent(MVKCommandBuffer* cmdBuff,
+						uint32_t commandBuffersCount,
+						const VkCommandBuffer* pCommandBuffers);
+
+	void encode(MVKCommandEncoder* cmdEncoder) override;
+
+protected:
+	MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;
+
+	MVKSmallVector<MVKCommandBuffer*, N> _secondaryCommandBuffers;
+};
+
+// Concrete template class implementations.
+typedef MVKCmdExecuteCommands<1> MVKCmdExecuteCommands1;
+typedef MVKCmdExecuteCommands<16> MVKCmdExecuteCommandsMulti;
+
+
 #pragma mark -
 #pragma mark MVKCmdPipelineBarrier

@ -41,6 +69,9 @@ template <size_t N>
 class MVKCmdPipelineBarrier : public MVKCommand {

 public:
+	VkResult setContent(MVKCommandBuffer* cmdBuff,
+						const VkDependencyInfo* pDependencyInfo);
+
 	VkResult setContent(MVKCommandBuffer* cmdBuff,
 						VkPipelineStageFlags srcStageMask,
 						VkPipelineStageFlags dstStageMask,
@ -59,8 +90,6 @@ protected:
 	bool coversTextures();

 	MVKSmallVector<MVKPipelineBarrier, N> _barriers;
-	VkPipelineStageFlags _srcStageMask;
-	VkPipelineStageFlags _dstStageMask;
 	VkDependencyFlags _dependencyFlags;
 };

@ -281,34 +310,26 @@ protected:


 #pragma mark -
-#pragma mark MVKCmdSetResetEvent
+#pragma mark MVKCmdSetEvent

-/** Abstract Vulkan command to set or reset an event. */
-class MVKCmdSetResetEvent : public MVKCommand {
+/** Vulkan command to set an event. */
+class MVKCmdSetEvent : public MVKCommand {

 public:
+	VkResult setContent(MVKCommandBuffer* cmdBuff,
+						VkEvent event,
+						const VkDependencyInfo* pDependencyInfo);
+
 	VkResult setContent(MVKCommandBuffer* cmdBuff,
 						VkEvent event,
 						VkPipelineStageFlags stageMask);

-protected:
-	MVKEvent* _mvkEvent;
-
-};
-
-
-#pragma mark -
-#pragma mark MVKCmdSetEvent
-
-/** Vulkan command to set an event. */
-class MVKCmdSetEvent : public MVKCmdSetResetEvent {
-
-public:
 	void encode(MVKCommandEncoder* cmdEncoder) override;

 protected:
 	MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;

+	MVKEvent* _mvkEvent;
 };


@ -316,14 +337,19 @@ protected:
 #pragma mark MVKCmdResetEvent

 /** Vulkan command to reset an event. */
-class MVKCmdResetEvent : public MVKCmdSetResetEvent {
+class MVKCmdResetEvent : public MVKCommand {

 public:
+	VkResult setContent(MVKCommandBuffer* cmdBuff,
+						VkEvent event,
+						VkPipelineStageFlags2 stageMask);
+
 	void encode(MVKCommandEncoder* cmdEncoder) override;

 protected:
 	MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;

+	MVKEvent* _mvkEvent;
 };


@ -339,6 +365,11 @@ template <size_t N>
 class MVKCmdWaitEvents : public MVKCommand {

 public:
+	VkResult setContent(MVKCommandBuffer* cmdBuff,
+						uint32_t eventCount,
+						const VkEvent* pEvents,
+						const VkDependencyInfo* pDependencyInfos);
+
 	VkResult setContent(MVKCommandBuffer* cmdBuff,
 						uint32_t eventCount,
 						const VkEvent* pEvents,
--- a/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm
@ -26,9 +26,59 @@
 #include "mvk_datatypes.hpp"


+#pragma mark -
+#pragma mark MVKCmdExecuteCommands
+
+template <size_t N>
+VkResult MVKCmdExecuteCommands<N>::setContent(MVKCommandBuffer* cmdBuff,
+											  uint32_t commandBuffersCount,
+											  const VkCommandBuffer* pCommandBuffers) {
+	// Add clear values
+	_secondaryCommandBuffers.clear();	// Clear for reuse
+	_secondaryCommandBuffers.reserve(commandBuffersCount);
+	for (uint32_t cbIdx = 0; cbIdx < commandBuffersCount; cbIdx++) {
+		_secondaryCommandBuffers.push_back(MVKCommandBuffer::getMVKCommandBuffer(pCommandBuffers[cbIdx]));
+	}
+	cmdBuff->recordExecuteCommands(_secondaryCommandBuffers.contents());
+
+	return VK_SUCCESS;
+}
+
+template <size_t N>
+void MVKCmdExecuteCommands<N>::encode(MVKCommandEncoder* cmdEncoder) {
+    for (auto& cb : _secondaryCommandBuffers) { cmdEncoder->encodeSecondary(cb); }
+}
+
+template class MVKCmdExecuteCommands<1>;
+template class MVKCmdExecuteCommands<16>;
+
+
 #pragma mark -
 #pragma mark MVKCmdPipelineBarrier

+template <size_t N>
+VkResult MVKCmdPipelineBarrier<N>::setContent(MVKCommandBuffer* cmdBuff,
+											  const VkDependencyInfo* pDependencyInfo) {
+	_dependencyFlags = pDependencyInfo->dependencyFlags;
+
+	_barriers.clear();	// Clear for reuse
+	_barriers.reserve(pDependencyInfo->memoryBarrierCount + 
+					  pDependencyInfo->bufferMemoryBarrierCount +
+					  pDependencyInfo->imageMemoryBarrierCount);
+
+	for (uint32_t i = 0; i < pDependencyInfo->memoryBarrierCount; i++) {
+		_barriers.emplace_back(pDependencyInfo->pMemoryBarriers[i]);
+	}
+	for (uint32_t i = 0; i < pDependencyInfo->bufferMemoryBarrierCount; i++) {
+		_barriers.emplace_back(pDependencyInfo->pBufferMemoryBarriers[i]);
+	}
+	for (uint32_t i = 0; i < pDependencyInfo->imageMemoryBarrierCount; i++) {
+		_barriers.emplace_back(pDependencyInfo->pImageMemoryBarriers[i]);
+	}
+
+	return VK_SUCCESS;
+}
+
 template <size_t N>
 VkResult MVKCmdPipelineBarrier<N>::setContent(MVKCommandBuffer* cmdBuff,
 											  VkPipelineStageFlags srcStageMask,
@ -40,21 +90,19 @@ VkResult MVKCmdPipelineBarrier<N>::setContent(MVKCommandBuffer* cmdBuff,
 											  const VkBufferMemoryBarrier* pBufferMemoryBarriers,
 											  uint32_t imageMemoryBarrierCount,
 											  const VkImageMemoryBarrier* pImageMemoryBarriers) {
-	_srcStageMask = srcStageMask;
-	_dstStageMask = dstStageMask;
 	_dependencyFlags = dependencyFlags;

 	_barriers.clear();	// Clear for reuse
 	_barriers.reserve(memoryBarrierCount + bufferMemoryBarrierCount + imageMemoryBarrierCount);

 	for (uint32_t i = 0; i < memoryBarrierCount; i++) {
-		_barriers.emplace_back(pMemoryBarriers[i]);
+		_barriers.emplace_back(pMemoryBarriers[i], srcStageMask, dstStageMask);
 	}
 	for (uint32_t i = 0; i < bufferMemoryBarrierCount; i++) {
-		_barriers.emplace_back(pBufferMemoryBarriers[i]);
+		_barriers.emplace_back(pBufferMemoryBarriers[i], srcStageMask, dstStageMask);
 	}
 	for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) {
-		_barriers.emplace_back(pImageMemoryBarriers[i]);
+		_barriers.emplace_back(pImageMemoryBarriers[i], srcStageMask, dstStageMask);
 	}

 	return VK_SUCCESS;
@ -67,13 +115,9 @@ void MVKCmdPipelineBarrier<N>::encode(MVKCommandEncoder* cmdEncoder) {
 	// Calls below invoke MTLBlitCommandEncoder so must apply this first.
 	// Check if pipeline barriers are available and we are in a renderpass.
 	if (cmdEncoder->getDevice()->_pMetalFeatures->memoryBarriers && cmdEncoder->_mtlRenderEncoder) {
-		MTLRenderStages srcStages = mvkMTLRenderStagesFromVkPipelineStageFlags(_srcStageMask, false);
-		MTLRenderStages dstStages = mvkMTLRenderStagesFromVkPipelineStageFlags(_dstStageMask, true);
-
-		id<MTLResource> resources[_barriers.size()];
-		uint32_t rezCnt = 0;
-
 		for (auto& b : _barriers) {
+			MTLRenderStages srcStages = mvkMTLRenderStagesFromVkPipelineStageFlags(b.srcStageMask, false);
+			MTLRenderStages dstStages = mvkMTLRenderStagesFromVkPipelineStageFlags(b.dstStageMask, true);
 			switch (b.type) {
 				case MVKPipelineBarrier::Memory: {
 					MTLBarrierScope scope = (mvkMTLBarrierScopeFromVkAccessFlags(b.srcAccessMask) |
@ -84,27 +128,30 @@ void MVKCmdPipelineBarrier<N>::encode(MVKCommandEncoder* cmdEncoder) {
 					break;
 				}

-				case MVKPipelineBarrier::Buffer:
-					resources[rezCnt++] = b.mvkBuffer->getMTLBuffer();
+				case MVKPipelineBarrier::Buffer: {
+					id<MTLResource> mtlRez = b.mvkBuffer->getMTLBuffer();
+					[cmdEncoder->_mtlRenderEncoder memoryBarrierWithResources: &mtlRez
+																		count: 1
+																  afterStages: srcStages
+																 beforeStages: dstStages];
 					break;
-
-				case MVKPipelineBarrier::Image:
-                    for (uint8_t planeIndex = 0; planeIndex < b.mvkImage->getPlaneCount(); planeIndex++) {
-                        resources[rezCnt++] = b.mvkImage->getMTLTexture(planeIndex);
-                    }
+				}
+				case MVKPipelineBarrier::Image: {
+					uint32_t plnCnt = b.mvkImage->getPlaneCount();
+					id<MTLResource> mtlRezs[plnCnt];
+					for (uint8_t plnIdx = 0; plnIdx < plnCnt; plnIdx++) {
+						mtlRezs[plnIdx] = b.mvkImage->getMTLTexture(plnIdx);
+					}
+					[cmdEncoder->_mtlRenderEncoder memoryBarrierWithResources: mtlRezs
+																		count: plnCnt
+																  afterStages: srcStages
+																 beforeStages: dstStages];
 					break;
-
+				}
 				default:
 					break;
 			}
 		}
-
-		if (rezCnt) {
-			[cmdEncoder->_mtlRenderEncoder memoryBarrierWithResources: resources
-																count: rezCnt
-														  afterStages: srcStages
-														 beforeStages: dstStages];
-		}
 	} else if (cmdEncoder->getDevice()->_pMetalFeatures->textureBarriers) {
 #if !MVK_MACCAT
 		if (coversTextures()) { [cmdEncoder->_mtlRenderEncoder textureBarrier]; }
@ -138,15 +185,15 @@ void MVKCmdPipelineBarrier<N>::encode(MVKCommandEncoder* cmdEncoder) {
 	for (auto& b : _barriers) {
 		switch (b.type) {
 			case MVKPipelineBarrier::Memory:
-				mvkDvc->applyMemoryBarrier(_srcStageMask, _dstStageMask, b, cmdEncoder, cmdUse);
+				mvkDvc->applyMemoryBarrier(b, cmdEncoder, cmdUse);
 				break;

 			case MVKPipelineBarrier::Buffer:
-				b.mvkBuffer->applyBufferMemoryBarrier(_srcStageMask, _dstStageMask, b, cmdEncoder, cmdUse);
+				b.mvkBuffer->applyBufferMemoryBarrier(b, cmdEncoder, cmdUse);
 				break;

 			case MVKPipelineBarrier::Image:
-				b.mvkImage->applyImageMemoryBarrier(_srcStageMask, _dstStageMask, b, cmdEncoder, cmdUse);
+				b.mvkImage->applyImageMemoryBarrier(b, cmdEncoder, cmdUse);
 				break;

 			default:
@ -493,19 +540,23 @@ MVKCmdPushDescriptorSetWithTemplate::~MVKCmdPushDescriptorSetWithTemplate() {


 #pragma mark -
-#pragma mark MVKCmdSetResetEvent
+#pragma mark MVKCmdSetEvent

-VkResult MVKCmdSetResetEvent::setContent(MVKCommandBuffer* cmdBuff,
-										 VkEvent event,
-										 VkPipelineStageFlags stageMask) {
+VkResult MVKCmdSetEvent::setContent(MVKCommandBuffer* cmdBuff,
+									VkEvent event,
+									VkPipelineStageFlags stageMask) {
 	_mvkEvent = (MVKEvent*)event;

 	return VK_SUCCESS;
 }

+VkResult MVKCmdSetEvent::setContent(MVKCommandBuffer* cmdBuff,
+									VkEvent event,
+									const VkDependencyInfo* pDependencyInfo) {
+	_mvkEvent = (MVKEvent*)event;

-#pragma mark -
-#pragma mark MVKCmdSetEvent
+	return VK_SUCCESS;
+}

 void MVKCmdSetEvent::encode(MVKCommandEncoder* cmdEncoder) {
 	cmdEncoder->signalEvent(_mvkEvent, true);
@ -515,6 +566,14 @@ void MVKCmdSetEvent::encode(MVKCommandEncoder* cmdEncoder) {
 #pragma mark -
 #pragma mark MVKCmdResetEvent

+VkResult MVKCmdResetEvent::setContent(MVKCommandBuffer* cmdBuff,
+									  VkEvent event,
+									  VkPipelineStageFlags2 stageMask) {
+	_mvkEvent = (MVKEvent*)event;
+
+	return VK_SUCCESS;
+}
+
 void MVKCmdResetEvent::encode(MVKCommandEncoder* cmdEncoder) {
 	cmdEncoder->signalEvent(_mvkEvent, false);
 }
@ -523,6 +582,20 @@ void MVKCmdResetEvent::encode(MVKCommandEncoder* cmdEncoder) {
 #pragma mark -
 #pragma mark MVKCmdWaitEvents

+template <size_t N>
+VkResult MVKCmdWaitEvents<N>::setContent(MVKCommandBuffer* cmdBuff,
+										 uint32_t eventCount,
+										 const VkEvent* pEvents,
+										 const VkDependencyInfo* pDependencyInfos) {
+	_mvkEvents.clear();	// Clear for reuse
+	_mvkEvents.reserve(eventCount);
+	for (uint32_t i = 0; i < eventCount; i++) {
+		_mvkEvents.push_back((MVKEvent*)pEvents[i]);
+	}
+
+	return VK_SUCCESS;
+}
+
 template <size_t N>
 VkResult MVKCmdWaitEvents<N>::setContent(MVKCommandBuffer* cmdBuff,
 										 uint32_t eventCount,
--- a/MoltenVK/MoltenVK/Commands/MVKCmdQueries.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCmdQueries.h
@ -85,7 +85,7 @@ class MVKCmdWriteTimestamp : public MVKCmdQuery {

 public:
 	VkResult setContent(MVKCommandBuffer* cmdBuff,
-						VkPipelineStageFlagBits pipelineStage,
+						VkPipelineStageFlags2 stage,
 						VkQueryPool queryPool,
 						uint32_t query);

@ -94,7 +94,7 @@ public:
 protected:
 	MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;

-    VkPipelineStageFlagBits _pipelineStage;
+	VkPipelineStageFlags2 _stage;
 };


--- a/MoltenVK/MoltenVK/Commands/MVKCmdQueries.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCmdQueries.mm
@ -77,13 +77,13 @@ void MVKCmdEndQuery::encode(MVKCommandEncoder* cmdEncoder) {
 #pragma mark MVKCmdWriteTimestamp

 VkResult MVKCmdWriteTimestamp::setContent(MVKCommandBuffer* cmdBuff,
-										  VkPipelineStageFlagBits pipelineStage,
+										  VkPipelineStageFlags2 stage,
 										  VkQueryPool queryPool,
 										  uint32_t query) {

 	VkResult rslt = MVKCmdQuery::setContent(cmdBuff, queryPool, query);

-	_pipelineStage = pipelineStage;
+	_stage = stage;

 	cmdBuff->recordTimestampCommand();

--- a/MoltenVK/MoltenVK/Commands/MVKCmdRenderPass.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCmdRenderPass.h
@ -1,5 +1,5 @@
 /*
- * MVKCmdRenderPass.h
+ * MVKCmdRendering.h
 *
 * Copyright (c) 2015-2023 The Brenwill Workshop Ltd. (http://www.brenwill.com)
 *
@ -46,7 +46,6 @@ public:

 protected:

-	MVKSmallVector<MVKSmallVector<MTLSamplePosition>> _subpassSamplePositions;
 	MVKRenderPass* _renderPass;
 	MVKFramebuffer* _framebuffer;
 	VkRect2D _renderArea;
@ -203,37 +202,28 @@ public:
 protected:
 	MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;

-	MVKSmallVector<MTLSamplePosition, 8> _samplePositions;
+	MVKSmallVector<VkSampleLocationEXT, kMVKMaxSampleCount> _sampleLocations;
 };


 #pragma mark -
-#pragma mark MVKCmdExecuteCommands
+#pragma mark MVKCmdSetSampleLocationsEnable

-/**
- * Vulkan command to execute secondary command buffers.
- * Template class to balance vector pre-allocations between very common low counts and fewer larger counts.
- */
-template <size_t N>
-class MVKCmdExecuteCommands : public MVKCommand {
+/** Vulkan command to dynamically enable custom sample locations. */
+class MVKCmdSetSampleLocationsEnable : public MVKCommand {

 public:
 	VkResult setContent(MVKCommandBuffer* cmdBuff,
-						uint32_t commandBuffersCount,
-						const VkCommandBuffer* pCommandBuffers);
+						VkBool32 sampleLocationsEnable);

 	void encode(MVKCommandEncoder* cmdEncoder) override;

 protected:
 	MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;

-	MVKSmallVector<MVKCommandBuffer*, N> _secondaryCommandBuffers;
+	VkBool32 _sampleLocationsEnable;
 };

-// Concrete template class implementations.
-typedef MVKCmdExecuteCommands<1> MVKCmdExecuteCommands1;
-typedef MVKCmdExecuteCommands<16> MVKCmdExecuteCommandsMulti;
-

 #pragma mark -
 #pragma mark MVKCmdSetViewport
@ -295,25 +285,6 @@ typedef MVKCmdSetScissor<1> MVKCmdSetScissor1;
 typedef MVKCmdSetScissor<kMVKMaxViewportScissorCount> MVKCmdSetScissorMulti;


-#pragma mark -
-#pragma mark MVKCmdSetLineWidth
-
-/** Vulkan command to set the line width. */
-class MVKCmdSetLineWidth : public MVKCommand {
-
-public:
-    VkResult setContent(MVKCommandBuffer* cmdBuff,
-					float lineWidth);
-
-    void encode(MVKCommandEncoder* cmdEncoder) override;
-
-protected:
-	MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;
-
-    float _lineWidth;
-};
-
-
 #pragma mark -
 #pragma mark MVKCmdSetDepthBias

@ -337,6 +308,25 @@ protected:
 };


+#pragma mark -
+#pragma mark MVKCmdSetDepthBiasEnable
+
+/** Vulkan command to dynamically enable or disable depth bias. */
+class MVKCmdSetDepthBiasEnable : public MVKCommand {
+
+public:
+	VkResult setContent(MVKCommandBuffer* cmdBuff,
+						VkBool32 depthBiasEnable);
+
+	void encode(MVKCommandEncoder* cmdEncoder) override;
+
+protected:
+	MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;
+
+	VkBool32 _depthBiasEnable;
+};
+
+
 #pragma mark -
 #pragma mark MVKCmdSetBlendConstants

@ -352,31 +342,129 @@ public:
 protected:
 	MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;

-    float _red;
-    float _green;
-    float _blue;
-    float _alpha;
+	float _blendConstants[4] = {};
 };


 #pragma mark -
-#pragma mark MVKCmdSetDepthBounds
+#pragma mark MVKCmdSetDepthTestEnable

-/** Vulkan command to set depth bounds. */
-class MVKCmdSetDepthBounds : public MVKCommand {
+/** Vulkan command to dynamically enable depth testing. */
+class MVKCmdSetDepthTestEnable : public MVKCommand {

 public:
 	VkResult setContent(MVKCommandBuffer* cmdBuff,
-						float minDepthBounds,
-						float maxDepthBounds);
+						VkBool32 depthTestEnable);

-    void encode(MVKCommandEncoder* cmdEncoder) override;
+	void encode(MVKCommandEncoder* cmdEncoder) override;

 protected:
 	MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;

-    float _minDepthBounds;
-    float _maxDepthBounds;
+	VkBool32 _depthTestEnable;
+};
+
+
+#pragma mark -
+#pragma mark MVKCmdSetDepthWriteEnable
+
+/** Vulkan command to dynamically enable depth writing. */
+class MVKCmdSetDepthWriteEnable : public MVKCommand {
+
+public:
+	VkResult setContent(MVKCommandBuffer* cmdBuff,
+						VkBool32 depthWriteEnable);
+
+	void encode(MVKCommandEncoder* cmdEncoder) override;
+
+protected:
+	MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;
+
+	VkBool32 _depthWriteEnable;
+};
+
+
+#pragma mark -
+#pragma mark MVKCmdSetDepthClipEnable
+
+/** Vulkan command to dynamically enable depth clip. */
+class MVKCmdSetDepthClipEnable : public MVKCommand {
+
+public:
+	VkResult setContent(MVKCommandBuffer* cmdBuff,
+						VkBool32 depthClipEnable);
+
+	void encode(MVKCommandEncoder* cmdEncoder) override;
+
+protected:
+	MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;
+
+	VkBool32 _depthClipEnable;
+};
+
+
+#pragma mark -
+#pragma mark MVKCmdSetDepthCompareOp
+
+/** Vulkan command to dynamically set the depth compare operation. */
+class MVKCmdSetDepthCompareOp : public MVKCommand {
+
+public:
+	VkResult setContent(MVKCommandBuffer* cmdBuff,
+						VkCompareOp depthCompareOp);
+
+	void encode(MVKCommandEncoder* cmdEncoder) override;
+
+protected:
+	MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;
+
+	VkCompareOp _depthCompareOp;
+};
+
+
+#pragma mark -
+#pragma mark MVKCmdSetStencilTestEnable
+
+/** Vulkan command to dynamically enable stencil testing. */
+class MVKCmdSetStencilTestEnable : public MVKCommand {
+
+public:
+	VkResult setContent(MVKCommandBuffer* cmdBuff,
+						VkBool32 stencilTestEnable);
+
+	void encode(MVKCommandEncoder* cmdEncoder) override;
+
+protected:
+	MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;
+
+	VkBool32 _stencilTestEnable;
+};
+
+
+#pragma mark -
+#pragma mark MVKCmdSetStencilOp
+
+/** Vulkan command to dynamically set the stencil operations. */
+class MVKCmdSetStencilOp : public MVKCommand {
+
+public:
+	VkResult setContent(MVKCommandBuffer* cmdBuff,
+						VkStencilFaceFlags faceMask,
+						VkStencilOp failOp,
+						VkStencilOp passOp,
+						VkStencilOp depthFailOp,
+						VkCompareOp compareOp);
+
+	void encode(MVKCommandEncoder* cmdEncoder) override;
+
+protected:
+	MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;
+
+	VkStencilFaceFlags _faceMask;
+	VkStencilOp _failOp;
+	VkStencilOp _passOp;
+	VkStencilOp _depthFailOp;
+	VkCompareOp _compareOp;
 };


@ -442,3 +530,136 @@ protected:
    uint32_t _stencilReference;
 };

+
+#pragma mark -
+#pragma mark MVKCmdSetCullMode
+
+/** Vulkan command to dynamically set the cull mode. */
+class MVKCmdSetCullMode : public MVKCommand {
+
+public:
+    VkResult setContent(MVKCommandBuffer* cmdBuff,
+                        VkCullModeFlags cullMode);
+
+    void encode(MVKCommandEncoder* cmdEncoder) override;
+
+protected:
+    MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;
+
+	VkCullModeFlags _cullMode;
+};
+
+
+#pragma mark -
+#pragma mark MVKCmdSetFrontFace
+
+/** Vulkan command to dynamically set the front facing winding order. */
+class MVKCmdSetFrontFace : public MVKCommand {
+
+public:
+    VkResult setContent(MVKCommandBuffer* cmdBuff,
+                        VkFrontFace frontFace);
+
+    void encode(MVKCommandEncoder* cmdEncoder) override;
+
+protected:
+    MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;
+
+	VkFrontFace _frontFace;
+};
+
+
+#pragma mark -
+#pragma mark MVKCmdSetPatchControlPoints
+
+/** Vulkan command to dynamically set the number of patch control points. */
+class MVKCmdSetPatchControlPoints : public MVKCommand {
+
+public:
+	VkResult setContent(MVKCommandBuffer* cmdBuff,
+						uint32_t patchControlPoints);
+
+	void encode(MVKCommandEncoder* cmdEncoder) override;
+
+protected:
+	MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;
+
+	uint32_t _patchControlPoints;
+};
+
+
+#pragma mark -
+#pragma mark MVKCmdSetPolygonMode
+
+/** Vulkan command to dynamically set the polygon mode. */
+class MVKCmdSetPolygonMode : public MVKCommand {
+
+public:
+	VkResult setContent(MVKCommandBuffer* cmdBuff,
+						VkPolygonMode polygonMode);
+
+	void encode(MVKCommandEncoder* cmdEncoder) override;
+
+protected:
+	MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;
+
+	VkPolygonMode _polygonMode;
+};
+
+
+#pragma mark -
+#pragma mark MVKCmdSetPrimitiveTopology
+
+/** Vulkan command to dynamically set the primitive topology. */
+class MVKCmdSetPrimitiveTopology : public MVKCommand {
+
+public:
+	VkResult setContent(MVKCommandBuffer* cmdBuff,
+						VkPrimitiveTopology primitiveTopology);
+
+	void encode(MVKCommandEncoder* cmdEncoder) override;
+
+protected:
+	MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;
+
+	VkPrimitiveTopology _primitiveTopology;
+};
+
+
+#pragma mark -
+#pragma mark MVKCmdSetPrimitiveRestartEnable
+
+/** Vulkan command to dynamically enable or disable primitive restart functionality. */
+class MVKCmdSetPrimitiveRestartEnable : public MVKCommand {
+
+public:
+	VkResult setContent(MVKCommandBuffer* cmdBuff,
+						VkBool32 primitiveRestartEnable);
+
+	void encode(MVKCommandEncoder* cmdEncoder) override;
+
+protected:
+	MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;
+
+	VkBool32 _primitiveRestartEnable;
+};
+
+
+#pragma mark -
+#pragma mark MVKCmdSetRasterizerDiscardEnable
+
+/** Vulkan command to dynamically enable or disable rasterization. */
+class MVKCmdSetRasterizerDiscardEnable : public MVKCommand {
+
+public:
+	VkResult setContent(MVKCommandBuffer* cmdBuff,
+						VkBool32 rasterizerDiscardEnable);
+
+	void encode(MVKCommandEncoder* cmdEncoder) override;
+
+protected:
+	MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;
+
+	VkBool32 _rasterizerDiscardEnable;
+};
+
--- a/MoltenVK/MoltenVK/Commands/MVKCmdRenderPass.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCmdRenderPass.mm
@ -1,5 +1,5 @@
 /*
- * MVKCmdRenderPass.mm
+ * MVKCmdRendering.mm
 *
 * Copyright (c) 2015-2023 The Brenwill Workshop Ltd. (http://www.brenwill.com)
 *
@ -16,7 +16,7 @@
 * limitations under the License.
 */

-#include "MVKCmdRenderPass.h"
+#include "MVKCmdRendering.h"
 #include "MVKCommandBuffer.h"
 #include "MVKCommandPool.h"
 #include "MVKFramebuffer.h"
@ -36,30 +36,6 @@ VkResult MVKCmdBeginRenderPassBase::setContent(MVKCommandBuffer* cmdBuff,
 	_renderPass = (MVKRenderPass*)pRenderPassBegin->renderPass;
 	_framebuffer = (MVKFramebuffer*)pRenderPassBegin->framebuffer;
 	_renderArea = pRenderPassBegin->renderArea;
-	_subpassSamplePositions.clear();
-
-	for (const auto* next = (VkBaseInStructure*)pRenderPassBegin->pNext; next; next = next->pNext) {
-		switch (next->sType) {
-			case VK_STRUCTURE_TYPE_RENDER_PASS_SAMPLE_LOCATIONS_BEGIN_INFO_EXT: {
-				// Build an array of arrays, one array of sample positions for each subpass index.
-				// For subpasses not included in VkRenderPassSampleLocationsBeginInfoEXT, the resulting array of samples will be empty.
-				_subpassSamplePositions.resize(_renderPass->getSubpassCount());
-				auto* pRPSampLocnsInfo = (VkRenderPassSampleLocationsBeginInfoEXT*)next;
-				for (uint32_t spSLIdx = 0; spSLIdx < pRPSampLocnsInfo->postSubpassSampleLocationsCount; spSLIdx++) {
-					auto& spsl = pRPSampLocnsInfo->pPostSubpassSampleLocations[spSLIdx];
-					uint32_t spIdx = spsl.subpassIndex;
-					auto& spSampPosns = _subpassSamplePositions[spIdx];
-					for (uint32_t slIdx = 0; slIdx < spsl.sampleLocationsInfo.sampleLocationsCount; slIdx++) {
-						auto& sl = spsl.sampleLocationsInfo.pSampleLocations[slIdx];
-						spSampPosns.push_back(MTLSamplePositionMake(sl.x, sl.y));
-					}
-				}
-				break;
-			}
-			default:
-				break;
-		}
-	}

 	cmdBuff->_currentSubpassInfo.beginRenderpass(_renderPass);

@ -86,15 +62,6 @@ VkResult MVKCmdBeginRenderPass<N_CV, N_A>::setContent(MVKCommandBuffer* cmdBuff,

 template <size_t N_CV, size_t N_A>
 void MVKCmdBeginRenderPass<N_CV, N_A>::encode(MVKCommandEncoder* cmdEncoder) {
-
-	// Convert the sample position array of arrays to an array of array-references,
-	// so that it can be passed to the command encoder.
-	size_t spSPCnt = _subpassSamplePositions.size();
-	MVKArrayRef<MTLSamplePosition> spSPRefs[spSPCnt];
-	for (uint32_t spSPIdx = 0; spSPIdx < spSPCnt; spSPIdx++) {
-		spSPRefs[spSPIdx] = _subpassSamplePositions[spSPIdx].contents();
-	}
-	
 	cmdEncoder->beginRenderpass(this,
 								_contents,
 								_renderPass,
@ -102,7 +69,7 @@ void MVKCmdBeginRenderPass<N_CV, N_A>::encode(MVKCommandEncoder* cmdEncoder) {
 								_renderArea,
 								_clearValues.contents(),
 								_attachments.contents(),
-								MVKArrayRef(spSPRefs, spSPCnt));
+								kMVKCommandUseBeginRenderPass);
 }

 template class MVKCmdBeginRenderPass<1, 0>;
@ -217,46 +184,31 @@ void MVKCmdEndRendering::encode(MVKCommandEncoder* cmdEncoder) {

 VkResult MVKCmdSetSampleLocations::setContent(MVKCommandBuffer* cmdBuff,
 											  const VkSampleLocationsInfoEXT* pSampleLocationsInfo) {
-
+	_sampleLocations.clear();
 	for (uint32_t slIdx = 0; slIdx < pSampleLocationsInfo->sampleLocationsCount; slIdx++) {
-		auto& sl = pSampleLocationsInfo->pSampleLocations[slIdx];
-		_samplePositions.push_back(MTLSamplePositionMake(sl.x, sl.y));
+		_sampleLocations.push_back(pSampleLocationsInfo->pSampleLocations[slIdx]);
 	}
-
 	return VK_SUCCESS;
 }

 void MVKCmdSetSampleLocations::encode(MVKCommandEncoder* cmdEncoder) {
-	cmdEncoder->setDynamicSamplePositions(_samplePositions.contents());
+	cmdEncoder->_renderingState.setSampleLocations(_sampleLocations.contents(), true);
 }


 #pragma mark -
-#pragma mark MVKCmdExecuteCommands
-
-template <size_t N>
-VkResult MVKCmdExecuteCommands<N>::setContent(MVKCommandBuffer* cmdBuff,
-											  uint32_t commandBuffersCount,
-											  const VkCommandBuffer* pCommandBuffers) {
-	// Add clear values
-	_secondaryCommandBuffers.clear();	// Clear for reuse
-	_secondaryCommandBuffers.reserve(commandBuffersCount);
-	for (uint32_t cbIdx = 0; cbIdx < commandBuffersCount; cbIdx++) {
-		_secondaryCommandBuffers.push_back(MVKCommandBuffer::getMVKCommandBuffer(pCommandBuffers[cbIdx]));
-	}
-	cmdBuff->recordExecuteCommands(_secondaryCommandBuffers.contents());
+#pragma mark MVKCmdSetSampleLocationsEnable

+VkResult MVKCmdSetSampleLocationsEnable::setContent(MVKCommandBuffer* cmdBuff,
+													VkBool32 sampleLocationsEnable) {
+	_sampleLocationsEnable = sampleLocationsEnable;
 	return VK_SUCCESS;
 }

-template <size_t N>
-void MVKCmdExecuteCommands<N>::encode(MVKCommandEncoder* cmdEncoder) {
-    for (auto& cb : _secondaryCommandBuffers) { cmdEncoder->encodeSecondary(cb); }
+void MVKCmdSetSampleLocationsEnable::encode(MVKCommandEncoder* cmdEncoder) {
+	cmdEncoder->_renderingState.setSampleLocationsEnable(_sampleLocationsEnable, true);
 }

-template class MVKCmdExecuteCommands<1>;
-template class MVKCmdExecuteCommands<16>;
-

 #pragma mark -
 #pragma mark MVKCmdSetViewport
@ -267,7 +219,7 @@ VkResult MVKCmdSetViewport<N>::setContent(MVKCommandBuffer* cmdBuff,
 										  uint32_t viewportCount,
 										  const VkViewport* pViewports) {
 	_firstViewport = firstViewport;
-	_viewports.clear();	// Clear for reuse
+	_viewports.clear();
 	_viewports.reserve(viewportCount);
 	for (uint32_t vpIdx = 0; vpIdx < viewportCount; vpIdx++) {
 		_viewports.push_back(pViewports[vpIdx]);
@ -278,7 +230,7 @@ VkResult MVKCmdSetViewport<N>::setContent(MVKCommandBuffer* cmdBuff,

 template <size_t N>
 void MVKCmdSetViewport<N>::encode(MVKCommandEncoder* cmdEncoder) {
-	cmdEncoder->_viewportState.setViewports(_viewports.contents(), _firstViewport, true);
+	cmdEncoder->_renderingState.setViewports(_viewports.contents(), _firstViewport, true);
 }

 template class MVKCmdSetViewport<1>;
@ -294,7 +246,7 @@ VkResult MVKCmdSetScissor<N>::setContent(MVKCommandBuffer* cmdBuff,
 										 uint32_t scissorCount,
 										 const VkRect2D* pScissors) {
 	_firstScissor = firstScissor;
-	_scissors.clear();	// Clear for reuse
+	_scissors.clear();
 	_scissors.reserve(scissorCount);
 	for (uint32_t sIdx = 0; sIdx < scissorCount; sIdx++) {
 		_scissors.push_back(pScissors[sIdx]);
@ -305,31 +257,13 @@ VkResult MVKCmdSetScissor<N>::setContent(MVKCommandBuffer* cmdBuff,

 template <size_t N>
 void MVKCmdSetScissor<N>::encode(MVKCommandEncoder* cmdEncoder) {
-    cmdEncoder->_scissorState.setScissors(_scissors.contents(), _firstScissor, true);
+    cmdEncoder->_renderingState.setScissors(_scissors.contents(), _firstScissor, true);
 }

 template class MVKCmdSetScissor<1>;
 template class MVKCmdSetScissor<kMVKMaxViewportScissorCount>;


-#pragma mark -
-#pragma mark MVKCmdSetLineWidth
-
-VkResult MVKCmdSetLineWidth::setContent(MVKCommandBuffer* cmdBuff,
-										float lineWidth) {
-    _lineWidth = lineWidth;
-
-    // Validate
-    if (_lineWidth != 1.0 || cmdBuff->getDevice()->_enabledFeatures.wideLines) {
-        return cmdBuff->reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdSetLineWidth(): The current device does not support wide lines.");
-    }
-
-	return VK_SUCCESS;
-}
-
-void MVKCmdSetLineWidth::encode(MVKCommandEncoder* cmdEncoder) {}
-
-
 #pragma mark -
 #pragma mark MVKCmdSetDepthBias

@ -345,9 +279,23 @@ VkResult MVKCmdSetDepthBias::setContent(MVKCommandBuffer* cmdBuff,
 }

 void MVKCmdSetDepthBias::encode(MVKCommandEncoder* cmdEncoder) {
-    cmdEncoder->_depthBiasState.setDepthBias(_depthBiasConstantFactor,
-                                             _depthBiasSlopeFactor,
-                                             _depthBiasClamp);
+	cmdEncoder->_renderingState.setDepthBias(_depthBiasConstantFactor,
+											   _depthBiasSlopeFactor,
+											   _depthBiasClamp);
+}
+
+
+#pragma mark -
+#pragma mark MVKCmdSetDepthBiasEnable
+
+VkResult MVKCmdSetDepthBiasEnable::setContent(MVKCommandBuffer* cmdBuff,
+											  VkBool32 depthBiasEnable) {
+	_depthBiasEnable = depthBiasEnable;
+	return VK_SUCCESS;
+}
+
+void MVKCmdSetDepthBiasEnable::encode(MVKCommandEncoder* cmdEncoder) {
+	cmdEncoder->_renderingState.setDepthBiasEnable(_depthBiasEnable);
 }


@ -356,37 +304,105 @@ void MVKCmdSetDepthBias::encode(MVKCommandEncoder* cmdEncoder) {

 VkResult MVKCmdSetBlendConstants::setContent(MVKCommandBuffer* cmdBuff,
 											 const float blendConst[4]) {
-    _red = blendConst[0];
-    _green = blendConst[1];
-    _blue = blendConst[2];
-    _alpha = blendConst[3];
-
+	mvkCopy(_blendConstants, blendConst, 4);
 	return VK_SUCCESS;
 }

 void MVKCmdSetBlendConstants::encode(MVKCommandEncoder* cmdEncoder) {
-    cmdEncoder->_blendColorState.setBlendColor(_red, _green, _blue, _alpha, true);
+    cmdEncoder->_renderingState.setBlendConstants(_blendConstants, true);
 }


 #pragma mark -
-#pragma mark MVKCmdSetDepthBounds
-
-VkResult MVKCmdSetDepthBounds::setContent(MVKCommandBuffer* cmdBuff,
-										  float minDepthBounds,
-										  float maxDepthBounds) {
-    _minDepthBounds = minDepthBounds;
-    _maxDepthBounds = maxDepthBounds;
-
-    // Validate
-    if (cmdBuff->getDevice()->_enabledFeatures.depthBounds) {
-        return cmdBuff->reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdSetDepthBounds(): The current device does not support setting depth bounds.");
-    }
+#pragma mark MVKCmdSetDepthTestEnable

+VkResult MVKCmdSetDepthTestEnable::setContent(MVKCommandBuffer* cmdBuff,
+											  VkBool32 depthTestEnable) {
+	_depthTestEnable = depthTestEnable;
 	return VK_SUCCESS;
 }

-void MVKCmdSetDepthBounds::encode(MVKCommandEncoder* cmdEncoder) {}
+void MVKCmdSetDepthTestEnable::encode(MVKCommandEncoder* cmdEncoder) {
+	cmdEncoder->_depthStencilState.setDepthTestEnable(_depthTestEnable);
+}
+
+
+#pragma mark -
+#pragma mark MVKCmdSetDepthWriteEnable
+
+VkResult MVKCmdSetDepthWriteEnable::setContent(MVKCommandBuffer* cmdBuff,
+											   VkBool32 depthWriteEnable) {
+	_depthWriteEnable = depthWriteEnable;
+	return VK_SUCCESS;
+}
+
+void MVKCmdSetDepthWriteEnable::encode(MVKCommandEncoder* cmdEncoder) {
+	cmdEncoder->_depthStencilState.setDepthWriteEnable(_depthWriteEnable);
+}
+
+
+#pragma mark -
+#pragma mark MVKCmdSetDepthClipEnable
+
+VkResult MVKCmdSetDepthClipEnable::setContent(MVKCommandBuffer* cmdBuff,
+											  VkBool32 depthClipEnable) {
+	_depthClipEnable = depthClipEnable;
+	return VK_SUCCESS;
+}
+
+void MVKCmdSetDepthClipEnable::encode(MVKCommandEncoder* cmdEncoder) {
+	cmdEncoder->_renderingState.setDepthClipEnable(_depthClipEnable, true);
+}
+
+
+#pragma mark -
+#pragma mark MVKCmdSetDepthCompareOp
+
+VkResult MVKCmdSetDepthCompareOp::setContent(MVKCommandBuffer* cmdBuff,
+											 VkCompareOp depthCompareOp) {
+	_depthCompareOp = depthCompareOp;
+	return VK_SUCCESS;
+}
+
+void MVKCmdSetDepthCompareOp::encode(MVKCommandEncoder* cmdEncoder) {
+	cmdEncoder->_depthStencilState.setDepthCompareOp(_depthCompareOp);
+}
+
+
+#pragma mark -
+#pragma mark MVKCmdSetStencilTestEnable
+
+VkResult MVKCmdSetStencilTestEnable::setContent(MVKCommandBuffer* cmdBuff,
+											  VkBool32 stencilTestEnable) {
+	_stencilTestEnable = stencilTestEnable;
+	return VK_SUCCESS;
+}
+
+void MVKCmdSetStencilTestEnable::encode(MVKCommandEncoder* cmdEncoder) {
+	cmdEncoder->_depthStencilState.setStencilTestEnable(_stencilTestEnable);
+}
+
+
+#pragma mark -
+#pragma mark MVKCmdSetStencilOp
+
+VkResult MVKCmdSetStencilOp::setContent(MVKCommandBuffer* cmdBuff,
+										VkStencilFaceFlags faceMask,
+										VkStencilOp failOp,
+										VkStencilOp passOp,
+										VkStencilOp depthFailOp,
+										VkCompareOp compareOp) {
+	_faceMask = faceMask;
+	_failOp = failOp;
+	_passOp = passOp;
+	_depthFailOp = depthFailOp;
+	_compareOp = compareOp;
+	return VK_SUCCESS;
+}
+
+void MVKCmdSetStencilOp::encode(MVKCommandEncoder* cmdEncoder) {
+	cmdEncoder->_depthStencilState.setStencilOp(_faceMask, _failOp, _passOp, _depthFailOp, _compareOp);
+}


 #pragma mark -
@ -436,6 +452,103 @@ VkResult MVKCmdSetStencilReference::setContent(MVKCommandBuffer* cmdBuff,
 }

 void MVKCmdSetStencilReference::encode(MVKCommandEncoder* cmdEncoder) {
-    cmdEncoder->_stencilReferenceValueState.setReferenceValues(_faceMask, _stencilReference);
+    cmdEncoder->_renderingState.setStencilReferenceValues(_faceMask, _stencilReference);
 }

+
+#pragma mark -
+#pragma mark MVKCmdSetCullMode
+
+VkResult MVKCmdSetCullMode::setContent(MVKCommandBuffer* cmdBuff,
+                                       VkCullModeFlags cullMode) {
+	_cullMode = cullMode;
+	return VK_SUCCESS;
+}
+
+void MVKCmdSetCullMode::encode(MVKCommandEncoder* cmdEncoder) {
+	cmdEncoder->_renderingState.setCullMode(_cullMode, true);
+}
+
+
+#pragma mark -
+#pragma mark MVKCmdSetFrontFace
+
+VkResult MVKCmdSetFrontFace::setContent(MVKCommandBuffer* cmdBuff,
+                                       VkFrontFace frontFace) {
+	_frontFace = frontFace;
+	return VK_SUCCESS;
+}
+
+void MVKCmdSetFrontFace::encode(MVKCommandEncoder* cmdEncoder) {
+	cmdEncoder->_renderingState.setFrontFace(_frontFace, true);
+}
+
+
+#pragma mark -
+#pragma mark MVKCmdSetPatchControlPoints
+
+VkResult MVKCmdSetPatchControlPoints::setContent(MVKCommandBuffer* cmdBuff,
+												 uint32_t patchControlPoints) {
+	_patchControlPoints = patchControlPoints;
+	return VK_SUCCESS;
+}
+
+void MVKCmdSetPatchControlPoints::encode(MVKCommandEncoder* cmdEncoder) {
+	cmdEncoder->_renderingState.setPatchControlPoints(_patchControlPoints, true);
+}
+
+
+#pragma mark -
+#pragma mark MVKCmdSetPolygonMode
+
+VkResult MVKCmdSetPolygonMode::setContent(MVKCommandBuffer* cmdBuff,
+										  VkPolygonMode polygonMode) {
+	_polygonMode = polygonMode;
+	return VK_SUCCESS;
+}
+
+void MVKCmdSetPolygonMode::encode(MVKCommandEncoder* cmdEncoder) {
+	cmdEncoder->_renderingState.setPolygonMode(_polygonMode, true);
+}
+
+
+#pragma mark -
+#pragma mark MVKCmdSetPrimitiveTopology
+
+VkResult MVKCmdSetPrimitiveTopology::setContent(MVKCommandBuffer* cmdBuff,
+												VkPrimitiveTopology primitiveTopology) {
+	_primitiveTopology = primitiveTopology;
+	return VK_SUCCESS;
+}
+
+void MVKCmdSetPrimitiveTopology::encode(MVKCommandEncoder* cmdEncoder) {
+	cmdEncoder->_renderingState.setPrimitiveTopology(_primitiveTopology, true);
+}
+
+
+#pragma mark -
+#pragma mark MVKCmdSetPrimitiveRestartEnable
+
+VkResult MVKCmdSetPrimitiveRestartEnable::setContent(MVKCommandBuffer* cmdBuff,
+													 VkBool32 primitiveRestartEnable) {
+	_primitiveRestartEnable = primitiveRestartEnable;
+	return VK_SUCCESS;
+}
+
+void MVKCmdSetPrimitiveRestartEnable::encode(MVKCommandEncoder* cmdEncoder) {
+	cmdEncoder->_renderingState.setPrimitiveRestartEnable(_primitiveRestartEnable, true);
+}
+
+
+#pragma mark -
+#pragma mark MVKCmdSetRasterizerDiscardEnable
+
+VkResult MVKCmdSetRasterizerDiscardEnable::setContent(MVKCommandBuffer* cmdBuff,
+													  VkBool32 rasterizerDiscardEnable) {
+	_rasterizerDiscardEnable = rasterizerDiscardEnable;
+	return VK_SUCCESS;
+}
+
+void MVKCmdSetRasterizerDiscardEnable::encode(MVKCommandEncoder* cmdEncoder) {
+	cmdEncoder->_renderingState.setRasterizerDiscardEnable(_rasterizerDiscardEnable, true);
+}
--- a/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm
@ -504,11 +504,12 @@ void MVKCmdBlitImage<N>::encode(MVKCommandEncoder* cmdEncoder, MVKCommandUse com
 			if (cmdEncoder->getDevice()->_pMetalFeatures->nativeTextureSwizzle &&
 				_srcImage->needsSwizzle()) {
 				// Use a view that has a swizzle on it.
-				srcMTLTex = [[srcMTLTex newTextureViewWithPixelFormat:srcMTLTex.pixelFormat
-														  textureType:srcMTLTex.textureType
-															   levels:NSMakeRange(0, srcMTLTex.mipmapLevelCount)
-															   slices:NSMakeRange(0, srcMTLTex.arrayLength)
-															  swizzle:_srcImage->getPixelFormats()->getMTLTextureSwizzleChannels(_srcImage->getVkFormat())] autorelease];
+				srcMTLTex = [srcMTLTex newTextureViewWithPixelFormat:srcMTLTex.pixelFormat
+														 textureType:srcMTLTex.textureType
+															  levels:NSMakeRange(0, srcMTLTex.mipmapLevelCount)
+															  slices:NSMakeRange(0, srcMTLTex.arrayLength)
+															 swizzle:_srcImage->getPixelFormats()->getMTLTextureSwizzleChannels(_srcImage->getVkFormat())];
+				[cmdEncoder->_mtlCmdBuffer addCompletedHandler: ^(id<MTLCommandBuffer>) { [srcMTLTex release]; }];
 			}
            cmdEncoder->endCurrentMetalEncoding();

@ -551,9 +552,7 @@ void MVKCmdBlitImage<N>::encode(MVKCommandEncoder* cmdEncoder, MVKCommandUse com
                                                         textureType: MTLTextureType2DArray
                                                              levels: NSMakeRange(0, srcMTLTex.mipmapLevelCount)
                                                              slices: NSMakeRange(0, srcMTLTex.arrayLength)];
-                [cmdEncoder->_mtlCmdBuffer addCompletedHandler: ^(id<MTLCommandBuffer>) {
-                    [srcMTLTex release];
-                }];
+                [cmdEncoder->_mtlCmdBuffer addCompletedHandler: ^(id<MTLCommandBuffer>) { [srcMTLTex release]; }];
            }
            blitKey.dstMTLPixelFormat = _dstImage->getMTLPixelFormat(dstPlaneIndex);
            blitKey.srcFilter = mvkMTLSamplerMinMagFilterFromVkFilter(_filter);
@ -655,9 +654,7 @@ void MVKCmdBlitImage<N>::encode(MVKCommandEncoder* cmdEncoder, MVKCommandUse com
 #endif
                        }
                        id<MTLTexture> stencilMTLTex = [srcMTLTex newTextureViewWithPixelFormat: stencilFmt];
-                        [cmdEncoder->_mtlCmdBuffer addCompletedHandler: ^(id<MTLCommandBuffer>) {
-                            [stencilMTLTex release];
-                        }];
+                        [cmdEncoder->_mtlCmdBuffer addCompletedHandler: ^(id<MTLCommandBuffer>) { [stencilMTLTex release]; }];
                        [mtlRendEnc setFragmentTexture: stencilMTLTex atIndex: 1];
                    } else {
                        [mtlRendEnc setFragmentTexture: srcMTLTex atIndex: 1];
@ -1509,11 +1506,10 @@ void MVKCmdClearAttachments<N>::encode(MVKCommandEncoder* cmdEncoder) {

 	// Return to the previous rendering state on the next render activity
 	cmdEncoder->_graphicsPipelineState.markDirty();
+	cmdEncoder->_graphicsResourcesState.markDirty();
 	cmdEncoder->_depthStencilState.markDirty();
-	cmdEncoder->_stencilReferenceValueState.markDirty();
-    cmdEncoder->_depthBiasState.markDirty();
-    cmdEncoder->_viewportState.markDirty();
-    cmdEncoder->_scissorState.markDirty();
+	cmdEncoder->_renderingState.markDirty();
+	cmdEncoder->_occlusionQueryState.markDirty();
 }

 template <size_t N>
--- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
@ -143,7 +143,7 @@ public:
    bool _needsVisibilityResultMTLBuffer;

 	/** Called when a MVKCmdExecuteCommands is added to this command buffer. */
-	void recordExecuteCommands(const MVKArrayRef<MVKCommandBuffer*> secondaryCommandBuffers);
+	void recordExecuteCommands(MVKArrayRef<MVKCommandBuffer*const> secondaryCommandBuffers);

 	/** Called when a timestamp command is added. */
 	void recordTimestampCommand();
@ -182,7 +182,6 @@ protected:
 	friend class MVKCommandEncoder;
 	friend class MVKCommandPool;

-	MVKBaseObject* getBaseObject() override { return this; };
 	void propagateDebugName() override {}
 	void init(const VkCommandBufferAllocateInfo* pAllocateInfo);
 	bool canExecute();
@ -251,21 +250,23 @@ public:
 						 const VkRect2D& renderArea,
 						 MVKArrayRef<VkClearValue> clearValues,
 						 MVKArrayRef<MVKImageView*> attachments,
-						 MVKArrayRef<MVKArrayRef<MTLSamplePosition>> subpassSamplePositions,
-						 MVKCommandUse cmdUse = kMVKCommandUseBeginRenderPass);
+						 MVKCommandUse cmdUse);

 	/** Begins the next render subpass. */
 	void beginNextSubpass(MVKCommand* subpassCmd, VkSubpassContents renderpassContents);

-	/** Sets the dynamic custom sample positions to use when rendering. */
-	void setDynamicSamplePositions(MVKArrayRef<MTLSamplePosition> dynamicSamplePositions);
-
 	/** Begins dynamic rendering. */
 	void beginRendering(MVKCommand* rendCmd, const VkRenderingInfo* pRenderingInfo);

 	/** Begins a Metal render pass for the current render subpass. */
 	void beginMetalRenderPass(MVKCommandUse cmdUse);

+	/** 
+	 * If a Metal render pass has started, and it needs to be restarted,
+	 * then end the existing Metal render pass, and start a new one.
+	 */
+	void restartMetalRenderPassIfNeeded();
+
 	/** If a render encoder is active, encodes store actions for all attachments to it. */
 	void encodeStoreActions(bool storeOverride = false);

@ -301,14 +302,11 @@ public:
 	/** Encodes an operation to signal an event to a status. */
 	void signalEvent(MVKEvent* mvkEvent, bool status);

-    /**
-     * If a pipeline is currently bound, returns whether the current pipeline permits dynamic
-     * setting of the specified state. If no pipeline is currently bound, returns true.
-     */
-    bool supportsDynamicState(VkDynamicState state);
+	/** Clips the rect to ensure it fits inside the render area.  */
+	VkRect2D clipToRenderArea(VkRect2D rect);

 	/** Clips the scissor to ensure it fits inside the render area.  */
-	VkRect2D clipToRenderArea(VkRect2D scissor);
+	MTLScissorRect clipToRenderArea(MTLScissorRect scissor);

 	/** Called by each graphics draw command to establish any outstanding state just prior to performing the draw. */
 	void finalizeDrawState(MVKGraphicsStage stage);
@ -362,6 +360,9 @@ public:
 	/** Returns the push constants associated with the specified shader stage. */
 	MVKPushConstantsCommandEncoderState* getPushConstants(VkShaderStageFlagBits shaderStage);

+	/** Encode the buffer binding as a vertex attribute buffer. */
+	void encodeVertexAttributeBuffer(MVKMTLBufferBinding& b, bool isDynamicStride);
+
    /**
 	 * Copy bytes into the Metal encoder at a Metal vertex buffer index, and optionally indicate
 	 * that this binding might override a desriptor binding. If so, the descriptor binding will
@ -436,37 +437,25 @@ public:
 	id<MTLRenderCommandEncoder> _mtlRenderEncoder;

    /** Tracks the current graphics pipeline bound to the encoder. */
-    MVKPipelineCommandEncoderState _graphicsPipelineState;
+	MVKPipelineCommandEncoderState _graphicsPipelineState;
+
+	/** Tracks the current graphics resources state of the encoder. */
+	MVKGraphicsResourcesCommandEncoderState _graphicsResourcesState;

    /** Tracks the current compute pipeline bound to the encoder. */
-    MVKPipelineCommandEncoderState _computePipelineState;
+	MVKPipelineCommandEncoderState _computePipelineState;

-    /** Tracks the current viewport state of the encoder. */
-    MVKViewportCommandEncoderState _viewportState;
-
-    /** Tracks the current scissor state of the encoder. */
-    MVKScissorCommandEncoderState _scissorState;
-
-    /** Tracks the current depth bias state of the encoder. */
-    MVKDepthBiasCommandEncoderState _depthBiasState;
-
-    /** Tracks the current blend color state of the encoder. */
-    MVKBlendColorCommandEncoderState _blendColorState;
+	/** Tracks the current compute resources state of the encoder. */
+	MVKComputeResourcesCommandEncoderState _computeResourcesState;

    /** Tracks the current depth stencil state of the encoder. */
    MVKDepthStencilCommandEncoderState _depthStencilState;

-    /** Tracks the current stencil reference value state of the encoder. */
-    MVKStencilReferenceValueCommandEncoderState _stencilReferenceValueState;
+	/** Tracks the current rendering states of the encoder. */
+	MVKRenderingCommandEncoderState _renderingState;

-    /** Tracks the current graphics resources state of the encoder. */
-    MVKGraphicsResourcesCommandEncoderState _graphicsResourcesState;
-
-    /** Tracks the current compute resources state of the encoder. */
-    MVKComputeResourcesCommandEncoderState _computeResourcesState;
-
-	/** The type of primitive that will be rendered. */
-	MTLPrimitiveType _mtlPrimitiveType;
+	/** Tracks the occlusion query state of the encoder. */
+	MVKOcclusionQueryCommandEncoderState _occlusionQueryState;

    /** The size of the threadgroup for the compute shader. */
    MTLSize _mtlThreadgroupSize;
@ -495,7 +484,6 @@ protected:
 	void encodeGPUCounterSample(MVKGPUCounterQueryPool* mvkQryPool, uint32_t sampleIndex, MVKCounterSamplingFlags samplingPoints);
 	void encodeTimestampStageCounterSamples();
 	id<MTLFence> getStageCountersMTLFence();
-	MVKArrayRef<MTLSamplePosition> getCustomSamplePositions();
 	NSString* getMTLRenderCommandEncoderName(MVKCommandUse cmdUse);
 	template<typename T> void retainIfImmediatelyEncoding(T& mtlEnc);
 	template<typename T> void endMetalEncoding(T& mtlEnc);
@ -511,8 +499,6 @@ protected:
 	MVKSmallVector<GPUCounterQuery, 16> _timestampStageCounterQueries;
 	MVKSmallVector<VkClearValue, kMVKDefaultAttachmentCount> _clearValues;
 	MVKSmallVector<MVKImageView*, kMVKDefaultAttachmentCount> _attachments;
-	MVKSmallVector<MTLSamplePosition> _dynamicSamplePositions;
-	MVKSmallVector<MVKSmallVector<MTLSamplePosition>> _subpassSamplePositions;
 	id<MTLComputeCommandEncoder> _mtlComputeEncoder;
 	id<MTLBlitCommandEncoder> _mtlBlitEncoder;
 	id<MTLFence> _stageCountersMTLFence;
@ -521,7 +507,6 @@ protected:
 	MVKPushConstantsCommandEncoderState _tessEvalPushConstants;
 	MVKPushConstantsCommandEncoderState _fragmentPushConstants;
 	MVKPushConstantsCommandEncoderState _computePushConstants;
-    MVKOcclusionQueryCommandEncoderState _occlusionQueryState;
 	MVKPrefillMetalCommandBuffersStyle _prefillStyle;
 	VkSubpassContents _subpassContents;
 	uint32_t _renderSubpassIndex;
@ -536,9 +521,6 @@ protected:
 #pragma mark -
 #pragma mark Support functions

-/** Returns a name, suitable for use as a MTLCommandBuffer label, based on the MVKCommandUse. */
-NSString* mvkMTLCommandBufferLabel(MVKCommandUse cmdUse);
-
 /** Returns a name, suitable for use as a MTLRenderCommandEncoder label, based on the MVKCommandUse. */
 NSString* mvkMTLRenderCommandEncoderLabel(MVKCommandUse cmdUse);

--- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
@ -25,7 +25,7 @@
 #include "MVKFoundation.h"
 #include "MTLRenderPassDescriptor+MoltenVK.h"
 #include "MVKCmdDraw.h"
-#include "MVKCmdRenderPass.h"
+#include "MVKCmdRendering.h"
 #include <sys/mman.h>

 using namespace std;
@ -120,7 +120,7 @@ VkResult MVKCommandBuffer::begin(const VkCommandBufferBeginInfo* pBeginInfo) {

    if(_device->shouldPrefillMTLCommandBuffers() && !(_isSecondary || _supportsConcurrentExecution)) {
 		@autoreleasepool {
-			_prefilledMTLCmdBuffer = [_commandPool->getMTLCommandBuffer(0) retain];    // retained
+			_prefilledMTLCmdBuffer = [_commandPool->getMTLCommandBuffer(kMVKCommandUseBeginCommandBuffer, 0) retain];    // retained
 			auto prefillStyle = getMVKConfig().prefillMetalCommandBuffers;
 			if (prefillStyle == MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS_STYLE_IMMEDIATE_ENCODING ||
 				prefillStyle == MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS_STYLE_IMMEDIATE_ENCODING_NO_AUTORELEASE ) {
@ -260,7 +260,7 @@ bool MVKCommandBuffer::canExecute() {
 	}

 	_wasExecuted = true;
-	return true;
+	return wasConfigurationSuccessful();
 }

 // Return the number of bits set in the view mask, with a minimum value of 1.
@ -310,7 +310,7 @@ MVKCommandBuffer::~MVKCommandBuffer() {
 }

 // Promote the initial visibility buffer and indication of timestamp use from the secondary buffers.
-void MVKCommandBuffer::recordExecuteCommands(const MVKArrayRef<MVKCommandBuffer*> secondaryCommandBuffers) {
+void MVKCommandBuffer::recordExecuteCommands(MVKArrayRef<MVKCommandBuffer*const> secondaryCommandBuffers) {
 	for (MVKCommandBuffer* cmdBuff : secondaryCommandBuffers) {
 		if (cmdBuff->_needsVisibilityResultMTLBuffer) { _needsVisibilityResultMTLBuffer = true; }
 		if (cmdBuff->_hasStageCounterTimestampCommand) { _hasStageCounterTimestampCommand = true; }
@ -335,11 +335,19 @@ void MVKCommandBuffer::recordBindPipeline(MVKCmdBindPipeline* mvkBindPipeline) {
 #pragma mark -
 #pragma mark MVKCommandEncoder

+// Activity performance tracking is put here to deliberately exclude when
+// MVKConfiguration::prefillMetalCommandBuffers is set to immediate prefilling,
+// because that would include app time between command submissions.
 void MVKCommandEncoder::encode(id<MTLCommandBuffer> mtlCmdBuff,
 							   MVKCommandEncodingContext* pEncodingContext) {
+	MVKDevice* mvkDev = getDevice();
+	uint64_t startTime = mvkDev->getPerformanceTimestamp();
+
    beginEncoding(mtlCmdBuff, pEncodingContext);
    encodeCommands(_cmdBuffer->_head);
    endEncoding();
+
+	mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.queue.commandBufferEncoding, startTime);
 }

 void MVKCommandEncoder::beginEncoding(id<MTLCommandBuffer> mtlCmdBuff, MVKCommandEncodingContext* pEncodingContext) {
@ -434,7 +442,6 @@ void MVKCommandEncoder::beginRendering(MVKCommand* rendCmd, const VkRenderingInf
 					pRenderingInfo->renderArea,
 					MVKArrayRef(clearValues, attCnt),
 					MVKArrayRef(imageViews, attCnt),
-					MVKArrayRef<MVKArrayRef<MTLSamplePosition>>(),
 					kMVKCommandUseBeginRendering);

 	// If we've just created new transient objects, once retained by this encoder,
@ -454,7 +461,6 @@ void MVKCommandEncoder::beginRenderpass(MVKCommand* passCmd,
 										const VkRect2D& renderArea,
 										MVKArrayRef<VkClearValue> clearValues,
 										MVKArrayRef<MVKImageView*> attachments,
-										MVKArrayRef<MVKArrayRef<MTLSamplePosition>> subpassSamplePositions,
 										MVKCommandUse cmdUse) {
 	_pEncodingContext->setRenderingContext(renderPass, framebuffer);
 	_renderArea = renderArea;
@ -463,13 +469,6 @@ void MVKCommandEncoder::beginRenderpass(MVKCommand* passCmd,
 	_clearValues.assign(clearValues.begin(), clearValues.end());
 	_attachments.assign(attachments.begin(), attachments.end());

-	// Copy the sample positions array of arrays, one array of sample positions for each subpass index.
-	_subpassSamplePositions.resize(subpassSamplePositions.size);
-	for (uint32_t spSPIdx = 0; spSPIdx < subpassSamplePositions.size; spSPIdx++) {
-		_subpassSamplePositions[spSPIdx].assign(subpassSamplePositions[spSPIdx].begin(),
-												subpassSamplePositions[spSPIdx].end());
-	}
-
 	setSubpass(passCmd, subpassContents, 0, cmdUse);
 }

@ -510,10 +509,6 @@ void MVKCommandEncoder::beginNextMultiviewPass() {
 	beginMetalRenderPass(kMVKCommandUseNextSubpass);
 }

-void MVKCommandEncoder::setDynamicSamplePositions(MVKArrayRef<MTLSamplePosition> dynamicSamplePositions) {
-	_dynamicSamplePositions.assign(dynamicSamplePositions.begin(), dynamicSamplePositions.end());
-}
-
 // Retain encoders when prefilling, because prefilling may span multiple autorelease pools.
 template<typename T>
 void MVKCommandEncoder::retainIfImmediatelyEncoding(T& mtlEnc) {
@ -528,7 +523,6 @@ void MVKCommandEncoder::endMetalEncoding(T& mtlEnc) {
 	mtlEnc = nil;
 }

-
 // Creates _mtlRenderEncoder and marks cached render state as dirty so it will be set into the _mtlRenderEncoder.
 void MVKCommandEncoder::beginMetalRenderPass(MVKCommandUse cmdUse) {

@ -584,8 +578,8 @@ void MVKCommandEncoder::beginMetalRenderPass(MVKCommandUse cmdUse) {
 	// If no custom sample positions are established, size will be zero,
 	// and Metal will default to using default sample postions.
 	if (_pDeviceMetalFeatures->programmableSamplePositions) {
-		auto cstmSampPosns = getCustomSamplePositions();
-		[mtlRPDesc setSamplePositions: cstmSampPosns.data count: cstmSampPosns.size];
+		auto sampPosns = _renderingState.getSamplePositions();
+		[mtlRPDesc setSamplePositions: sampPosns.data() count: sampPosns.size()];
 	}

    _mtlRenderEncoder = [_mtlCmdBuffer renderCommandEncoderWithDescriptor: mtlRPDesc];
@ -599,29 +593,22 @@ void MVKCommandEncoder::beginMetalRenderPass(MVKCommandUse cmdUse) {

    _graphicsPipelineState.beginMetalRenderPass();
    _graphicsResourcesState.beginMetalRenderPass();
-    _viewportState.beginMetalRenderPass();
-    _scissorState.beginMetalRenderPass();
-    _depthBiasState.beginMetalRenderPass();
-    _blendColorState.beginMetalRenderPass();
+	_depthStencilState.beginMetalRenderPass();
+    _renderingState.beginMetalRenderPass();
    _vertexPushConstants.beginMetalRenderPass();
    _tessCtlPushConstants.beginMetalRenderPass();
    _tessEvalPushConstants.beginMetalRenderPass();
    _fragmentPushConstants.beginMetalRenderPass();
-    _depthStencilState.beginMetalRenderPass();
-    _stencilReferenceValueState.beginMetalRenderPass();
    _occlusionQueryState.beginMetalRenderPass();
 }

-// If custom sample positions have been set, return them, otherwise return an empty array.
-// For Metal, VkPhysicalDeviceSampleLocationsPropertiesEXT::variableSampleLocations is false.
-// As such, Vulkan requires that sample positions must be established at the beginning of
-// a renderpass, and that both pipeline and dynamic sample locations must be the same as those
-// set for each subpass. Therefore, the only sample positions of use are those set for each
-// subpass when the renderpass begins. The pipeline and dynamic sample positions are ignored.
-MVKArrayRef<MTLSamplePosition> MVKCommandEncoder::getCustomSamplePositions() {
-	return (_renderSubpassIndex < _subpassSamplePositions.size()
-			? _subpassSamplePositions[_renderSubpassIndex].contents()
-			: MVKArrayRef<MTLSamplePosition>());
+void MVKCommandEncoder::restartMetalRenderPassIfNeeded() {
+	if ( !_mtlRenderEncoder ) { return; }
+
+	if (_renderingState.needsMetalRenderPassRestart()) {
+		encodeStoreActions(true);
+		beginMetalRenderPass(kMVKCommandUseRestartSubpass);
+	}
 }

 void MVKCommandEncoder::encodeStoreActions(bool storeOverride) {
@ -698,24 +685,23 @@ void MVKCommandEncoder::signalEvent(MVKEvent* mvkEvent, bool status) {
 	mvkEvent->encodeSignal(_mtlCmdBuffer, status);
 }

-bool MVKCommandEncoder::supportsDynamicState(VkDynamicState state) {
-    MVKGraphicsPipeline* gpl = (MVKGraphicsPipeline*)_graphicsPipelineState.getPipeline();
-    return !gpl || gpl->supportsDynamicState(state);
+VkRect2D MVKCommandEncoder::clipToRenderArea(VkRect2D rect) {
+
+	uint32_t raLeft = max(_renderArea.offset.x, 0);
+	uint32_t raRight = raLeft + _renderArea.extent.width;
+	uint32_t raBottom = max(_renderArea.offset.y, 0);
+	uint32_t raTop = raBottom + _renderArea.extent.height;
+
+	rect.offset.x      = mvkClamp<uint32_t>(rect.offset.x, raLeft, max(raRight - 1, raLeft));
+	rect.offset.y      = mvkClamp<uint32_t>(rect.offset.y, raBottom, max(raTop - 1, raBottom));
+	rect.extent.width  = min<uint32_t>(rect.extent.width, raRight - rect.offset.x);
+	rect.extent.height = min<uint32_t>(rect.extent.height, raTop - rect.offset.y);
+
+	return rect;
 }

-VkRect2D MVKCommandEncoder::clipToRenderArea(VkRect2D scissor) {
-
-	int32_t raLeft = _renderArea.offset.x;
-	int32_t raRight = raLeft + _renderArea.extent.width;
-	int32_t raBottom = _renderArea.offset.y;
-	int32_t raTop = raBottom + _renderArea.extent.height;
-
-	scissor.offset.x		= mvkClamp(scissor.offset.x, raLeft, max(raRight - 1, raLeft));
-	scissor.offset.y		= mvkClamp(scissor.offset.y, raBottom, max(raTop - 1, raBottom));
-	scissor.extent.width	= min<int32_t>(scissor.extent.width, raRight - scissor.offset.x);
-	scissor.extent.height	= min<int32_t>(scissor.extent.height, raTop - scissor.offset.y);
-
-	return scissor;
+MTLScissorRect MVKCommandEncoder::clipToRenderArea(MTLScissorRect scissor) {
+	return mvkMTLScissorRectFromVkRect2D(clipToRenderArea(mvkVkRect2DFromMTLScissorRect(scissor)));
 }

 void MVKCommandEncoder::finalizeDrawState(MVKGraphicsStage stage) {
@ -725,16 +711,12 @@ void MVKCommandEncoder::finalizeDrawState(MVKGraphicsStage stage) {
    }
    _graphicsPipelineState.encode(stage);    // Must do first..it sets others
    _graphicsResourcesState.encode(stage);   // Before push constants, to allow them to override.
-    _viewportState.encode(stage);
-    _scissorState.encode(stage);
-    _depthBiasState.encode(stage);
-    _blendColorState.encode(stage);
+	_depthStencilState.encode(stage);
+    _renderingState.encode(stage);
    _vertexPushConstants.encode(stage);
    _tessCtlPushConstants.encode(stage);
    _tessEvalPushConstants.encode(stage);
    _fragmentPushConstants.encode(stage);
-    _depthStencilState.encode(stage);
-    _stencilReferenceValueState.encode(stage);
    _occlusionQueryState.encode(stage);
 }

@ -823,16 +805,12 @@ void MVKCommandEncoder::endMetalRenderEncoding() {

    _graphicsPipelineState.endMetalRenderPass();
    _graphicsResourcesState.endMetalRenderPass();
-    _viewportState.endMetalRenderPass();
-    _scissorState.endMetalRenderPass();
-    _depthBiasState.endMetalRenderPass();
-    _blendColorState.endMetalRenderPass();
+	_depthStencilState.endMetalRenderPass();
+    _renderingState.endMetalRenderPass();
    _vertexPushConstants.endMetalRenderPass();
    _tessCtlPushConstants.endMetalRenderPass();
    _tessEvalPushConstants.endMetalRenderPass();
    _fragmentPushConstants.endMetalRenderPass();
-    _depthStencilState.endMetalRenderPass();
-    _stencilReferenceValueState.endMetalRenderPass();
    _occlusionQueryState.endMetalRenderPass();
 }

@ -924,6 +902,42 @@ void MVKCommandEncoder::setVertexBytes(id<MTLRenderCommandEncoder> mtlEncoder,
 	}
 }

+void MVKCommandEncoder::encodeVertexAttributeBuffer(MVKMTLBufferBinding& b, bool isDynamicStride) {
+	if (_device->_pMetalFeatures->dynamicVertexStride) {
+#if MVK_XCODE_15
+		NSUInteger mtlStride = isDynamicStride ? b.stride : MTLAttributeStrideStatic;
+		if (b.isInline) {
+			[_mtlRenderEncoder setVertexBytes: b.mtlBytes
+									   length: b.size
+							  attributeStride: mtlStride
+									  atIndex: b.index];
+		} else if (b.justOffset) {
+			[_mtlRenderEncoder setVertexBufferOffset: b.offset
+									 attributeStride: mtlStride
+											 atIndex: b.index];
+		} else {
+			[_mtlRenderEncoder setVertexBuffer: b.mtlBuffer
+										offset: b.offset
+							   attributeStride: mtlStride
+									   atIndex: b.index];
+		}
+#endif
+	} else {
+		if (b.isInline) {
+			[_mtlRenderEncoder setVertexBytes: b.mtlBytes
+									   length: b.size
+									  atIndex: b.index];
+		} else if (b.justOffset) {
+			[_mtlRenderEncoder setVertexBufferOffset: b.offset
+											 atIndex: b.index];
+		} else {
+			[_mtlRenderEncoder setVertexBuffer: b.mtlBuffer
+										offset: b.offset
+									   atIndex: b.index];
+		}
+	}
+}
+
 void MVKCommandEncoder::setFragmentBytes(id<MTLRenderCommandEncoder> mtlEncoder,
                                         const void* bytes,
                                         NSUInteger length,
@ -1123,39 +1137,35 @@ void MVKCommandEncoder::finishQueries() {

 MVKCommandEncoder::MVKCommandEncoder(MVKCommandBuffer* cmdBuffer,
 									 MVKPrefillMetalCommandBuffersStyle prefillStyle) : MVKBaseDeviceObject(cmdBuffer->getDevice()),
-        _cmdBuffer(cmdBuffer),
-        _graphicsPipelineState(this),
-        _computePipelineState(this),
-        _viewportState(this),
-        _scissorState(this),
-        _depthBiasState(this),
-        _blendColorState(this),
-        _depthStencilState(this),
-        _stencilReferenceValueState(this),
-        _graphicsResourcesState(this),
-        _computeResourcesState(this),
-        _vertexPushConstants(this, VK_SHADER_STAGE_VERTEX_BIT),
-        _tessCtlPushConstants(this, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT),
-        _tessEvalPushConstants(this, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT),
-        _fragmentPushConstants(this, VK_SHADER_STAGE_FRAGMENT_BIT),
-        _computePushConstants(this, VK_SHADER_STAGE_COMPUTE_BIT),
-        _occlusionQueryState(this),
-		_prefillStyle(prefillStyle){
+	_cmdBuffer(cmdBuffer),
+	_graphicsPipelineState(this),
+	_graphicsResourcesState(this),
+	_computePipelineState(this),
+	_computeResourcesState(this),
+	_depthStencilState(this),
+	_renderingState(this),
+	_occlusionQueryState(this),
+	_vertexPushConstants(this, VK_SHADER_STAGE_VERTEX_BIT),
+	_tessCtlPushConstants(this, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT),
+	_tessEvalPushConstants(this, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT),
+	_fragmentPushConstants(this, VK_SHADER_STAGE_FRAGMENT_BIT),
+	_computePushConstants(this, VK_SHADER_STAGE_COMPUTE_BIT),
+	_prefillStyle(prefillStyle){

-            _pDeviceFeatures = &_device->_enabledFeatures;
-            _pDeviceMetalFeatures = _device->_pMetalFeatures;
-            _pDeviceProperties = _device->_pProperties;
-            _pDeviceMemoryProperties = _device->_pMemoryProperties;
-            _pActivatedQueries = nullptr;
-            _mtlCmdBuffer = nil;
-            _mtlRenderEncoder = nil;
-            _mtlComputeEncoder = nil;
-			_mtlComputeEncoderUse = kMVKCommandUseNone;
-            _mtlBlitEncoder = nil;
-            _mtlBlitEncoderUse = kMVKCommandUseNone;
-			_pEncodingContext = nullptr;
-			_stageCountersMTLFence = nil;
-			_flushCount = 0;
+	_pDeviceFeatures = &_device->_enabledFeatures;
+	_pDeviceMetalFeatures = _device->_pMetalFeatures;
+	_pDeviceProperties = _device->_pProperties;
+	_pDeviceMemoryProperties = _device->_pMemoryProperties;
+	_pActivatedQueries = nullptr;
+	_mtlCmdBuffer = nil;
+	_mtlRenderEncoder = nil;
+	_mtlComputeEncoder = nil;
+	_mtlComputeEncoderUse = kMVKCommandUseNone;
+	_mtlBlitEncoder = nil;
+	_mtlBlitEncoderUse = kMVKCommandUseNone;
+	_pEncodingContext = nullptr;
+	_stageCountersMTLFence = nil;
+	_flushCount = 0;
 }

 MVKCommandEncoder::~MVKCommandEncoder() {
@ -1169,19 +1179,6 @@ MVKCommandEncoder::~MVKCommandEncoder() {
 #pragma mark -
 #pragma mark Support functions

-NSString* mvkMTLCommandBufferLabel(MVKCommandUse cmdUse) {
-	switch (cmdUse) {
-		case kMVKCommandUseEndCommandBuffer:                return @"vkEndCommandBuffer (Prefilled) CommandBuffer";
-		case kMVKCommandUseQueueSubmit:                     return @"vkQueueSubmit CommandBuffer";
-		case kMVKCommandUseQueuePresent:                    return @"vkQueuePresentKHR CommandBuffer";
-		case kMVKCommandUseQueueWaitIdle:                   return @"vkQueueWaitIdle CommandBuffer";
-		case kMVKCommandUseDeviceWaitIdle:                  return @"vkDeviceWaitIdle CommandBuffer";
-		case kMVKCommandUseAcquireNextImage:                return @"vkAcquireNextImageKHR CommandBuffer";
-		case kMVKCommandUseInvalidateMappedMemoryRanges:    return @"vkInvalidateMappedMemoryRanges CommandBuffer";
-		default:                                            return @"Unknown Use CommandBuffer";
-	}
-}
-
 NSString* mvkMTLRenderCommandEncoderLabel(MVKCommandUse cmdUse) {
    switch (cmdUse) {
 		case kMVKCommandUseBeginRendering:                  return @"vkCmdBeginRendering RenderEncoder";
--- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h
@ -21,6 +21,7 @@
 #include "MVKMTLResourceBindings.h"
 #include "MVKCommandResourceFactory.h"
 #include "MVKDevice.h"
+#include "MVKPipeline.h"
 #include "MVKDescriptor.h"
 #include "MVKSmallVector.h"
 #include "MVKBitArray.h"
@ -81,7 +82,7 @@ public:
    /**
     * If the content of this instance is dirty, marks this instance as no longer dirty
     * and calls the encodeImpl() function to encode the content onto the Metal encoder.
-	 * Marking dirty is done in advance so that subclass encodeImpl() implementations
+	 * Marking clean is done in advance so that subclass encodeImpl() implementations
 	 * can override to leave this instance in a dirty state.
     * Subclasses must override the encodeImpl() function to do the actual work.
     */
@ -96,8 +97,21 @@ public:
    MVKCommandEncoderState(MVKCommandEncoder* cmdEncoder) : _cmdEncoder(cmdEncoder) {}

 protected:
-    virtual void encodeImpl(uint32_t stage) = 0;
+	enum StateScope {
+		Static = 0,
+		Dynamic,
+		Count
+	};
+
+	virtual void encodeImpl(uint32_t stage) = 0;
 	MVKDevice* getDevice();
+	bool isDynamicState(MVKRenderStateType state);
+	template <typename T> T& getContent(T* iVarAry, bool isDynamic) {
+		return iVarAry[isDynamic ? StateScope::Dynamic : StateScope::Static];
+	}
+	template <typename T> T& getContent(T* iVarAry, MVKRenderStateType state) {
+		return getContent(iVarAry, isDynamicState(state));
+	}

    MVKCommandEncoder* _cmdEncoder;
 	bool _isDirty = false;
@ -108,20 +122,17 @@ protected:
 #pragma mark -
 #pragma mark MVKPipelineCommandEncoderState

-/** Holds encoder state established by pipeline commands. */
+/** Abstract class to hold encoder state established by pipeline commands. */
 class MVKPipelineCommandEncoderState : public MVKCommandEncoderState {

 public:
+	void bindPipeline(MVKPipeline* pipeline);

-	/** Binds the pipeline. */
-    void bindPipeline(MVKPipeline* pipeline);
-
-    /** Returns the currently bound pipeline. */
    MVKPipeline* getPipeline();
+	MVKGraphicsPipeline* getGraphicsPipeline() { return (MVKGraphicsPipeline*)getPipeline(); }
+	MVKComputePipeline* getComputePipeline() { return (MVKComputePipeline*)getPipeline(); }

-    /** Constructs this instance for the specified command encoder. */
-    MVKPipelineCommandEncoderState(MVKCommandEncoder* cmdEncoder)
-        : MVKCommandEncoderState(cmdEncoder) {}
+    MVKPipelineCommandEncoderState(MVKCommandEncoder* cmdEncoder) : MVKCommandEncoderState(cmdEncoder) {}

 protected:
    void encodeImpl(uint32_t stage) override;
@ -130,62 +141,6 @@ protected:
 };


-#pragma mark -
-#pragma mark MVKViewportCommandEncoderState
-
-/** Holds encoder state established by viewport commands. */
-class MVKViewportCommandEncoderState : public MVKCommandEncoderState {
-
-public:
-
-	/**
-	 * Sets one or more of the viewports, starting at the first index.
-	 * The isSettingDynamically indicates that the scissor is being changed dynamically,
-	 * which is only allowed if the pipeline was created as VK_DYNAMIC_STATE_SCISSOR.
-	 */
-	void setViewports(const MVKArrayRef<VkViewport> viewports,
-					  uint32_t firstViewport,
-					  bool isSettingDynamically);
-
-    /** Constructs this instance for the specified command encoder. */
-    MVKViewportCommandEncoderState(MVKCommandEncoder* cmdEncoder)
-        : MVKCommandEncoderState(cmdEncoder) {}
-
-protected:
-    void encodeImpl(uint32_t stage) override;
-
-    MVKSmallVector<VkViewport, kMVKMaxViewportScissorCount> _viewports, _dynamicViewports;
-};
-
-
-#pragma mark -
-#pragma mark MVKScissorCommandEncoderState
-
-/** Holds encoder state established by viewport commands. */
-class MVKScissorCommandEncoderState : public MVKCommandEncoderState {
-
-public:
-
-	/**
-	 * Sets one or more of the scissors, starting at the first index.
-	 * The isSettingDynamically indicates that the scissor is being changed dynamically,
-	 * which is only allowed if the pipeline was created as VK_DYNAMIC_STATE_SCISSOR.
-	 */
-	void setScissors(const MVKArrayRef<VkRect2D> scissors,
-					 uint32_t firstScissor,
-					 bool isSettingDynamically);
-
-    /** Constructs this instance for the specified command encoder. */
-    MVKScissorCommandEncoderState(MVKCommandEncoder* cmdEncoder)
-        : MVKCommandEncoderState(cmdEncoder) {}
-
-protected:
-    void encodeImpl(uint32_t stage) override;
-
-    MVKSmallVector<VkRect2D, kMVKMaxViewportScissorCount> _scissors, _dynamicScissors;
-};
-
-
 #pragma mark -
 #pragma mark MVKPushConstantsCommandEncoderState

@ -226,16 +181,29 @@ public:
    /** Sets the depth stencil state during pipeline binding. */
    void setDepthStencilState(const VkPipelineDepthStencilStateCreateInfo& vkDepthStencilInfo);

-    /** 
-     * Sets the stencil compare mask value of the indicated faces
-     * to the specified value, from explicit dynamic command.
-     */
+	/** Enables or disables depth testing, from explicit dynamic command. */
+	void setDepthTestEnable(VkBool32 depthTestEnable);
+
+	/** Enables or disables depth writing, from explicit dynamic command. */
+	void setDepthWriteEnable(VkBool32 depthWriteEnable);
+
+	/** Sets the depth compare operation, from explicit dynamic command. */
+	void setDepthCompareOp(VkCompareOp depthCompareOp);
+
+	/** Enables or disables stencil testing, from explicit dynamic command. */
+	void setStencilTestEnable(VkBool32 stencilTestEnable);
+
+	/** Sets the stencil operations of the indicated faces from explicit dynamic command. */
+	void setStencilOp(VkStencilFaceFlags faceMask,
+					  VkStencilOp failOp,
+					  VkStencilOp passOp,
+					  VkStencilOp depthFailOp,
+					  VkCompareOp compareOp);
+
+    /** Sets the stencil compare mask value of the indicated faces from explicit dynamic command. */
    void setStencilCompareMask(VkStencilFaceFlags faceMask, uint32_t stencilCompareMask);

-    /**
-     * Sets the stencil write mask value of the indicated faces
-     * to the specified value, from explicit dynamic command.
-     */
+    /** Sets the stencil write mask value of the indicated faces from explicit dynamic command. */
    void setStencilWriteMask(VkStencilFaceFlags faceMask, uint32_t stencilWriteMask);

 	void beginMetalRenderPass() override;
@ -246,96 +214,124 @@ public:

 protected:
    void encodeImpl(uint32_t stage) override;
-    void setStencilState(MVKMTLStencilDescriptorData& stencilInfo,
-                         const VkStencilOpState& vkStencil,
-                         bool enabled);
+	MVKMTLDepthStencilDescriptorData& getData(MVKRenderStateType state) { return getContent(_depthStencilData, state); }
+	template <typename T> void setContent(T& content, T value) {
+		if (content != value) {
+			content = value;
+			markDirty();
+		}
+	}
+	void setStencilState(MVKMTLStencilDescriptorData& sData, const VkStencilOpState& vkStencil);
+	void setStencilOp(MVKMTLStencilDescriptorData& sData, VkStencilOp failOp,
+					  VkStencilOp passOp, VkStencilOp depthFailOp, VkCompareOp compareOp);

-    MVKMTLDepthStencilDescriptorData _depthStencilData = kMVKMTLDepthStencilDescriptorDataDefault;
+	MVKMTLDepthStencilDescriptorData _depthStencilData[StateScope::Count];
+	bool _depthTestEnabled[StateScope::Count] = {};
 	bool _hasDepthAttachment = false;
 	bool _hasStencilAttachment = false;
 };


 #pragma mark -
-#pragma mark MVKStencilReferenceValueCommandEncoderState
+#pragma mark MVKRenderingCommandEncoderState

-/** Holds encoder state established by stencil reference values commands. */
-class MVKStencilReferenceValueCommandEncoderState : public MVKCommandEncoderState {
-
-public:
-
-    /** Sets the stencil references during pipeline binding. */
-    void setReferenceValues(const VkPipelineDepthStencilStateCreateInfo& vkDepthStencilInfo);
-
-    /** Sets the stencil state from explicit dynamic command. */
-    void setReferenceValues(VkStencilFaceFlags faceMask, uint32_t stencilReference);
-
-    /** Constructs this instance for the specified command encoder. */
-    MVKStencilReferenceValueCommandEncoderState(MVKCommandEncoder* cmdEncoder)
-        : MVKCommandEncoderState(cmdEncoder) {}
-
-protected:
-    void encodeImpl(uint32_t stage) override;
-
-    uint32_t _frontFaceValue = 0;
-    uint32_t _backFaceValue = 0;
+struct MVKDepthBias {
+	float depthBiasConstantFactor;
+	float depthBiasSlopeFactor;
+	float depthBiasClamp;
 };

-
-#pragma mark -
-#pragma mark MVKDepthBiasCommandEncoderState
-
-/** Holds encoder state established by depth bias commands. */
-class MVKDepthBiasCommandEncoderState : public MVKCommandEncoderState {
-
-public:
-
-    /** Sets the depth bias during pipeline binding. */
-    void setDepthBias(const VkPipelineRasterizationStateCreateInfo& vkRasterInfo);
-
-    /** Sets the depth bias dynamically. */
-    void setDepthBias(float depthBiasConstantFactor,
-                      float depthBiasSlopeFactor,
-                      float depthBiasClamp);
-
-    /** Constructs this instance for the specified command encoder. */
-    MVKDepthBiasCommandEncoderState(MVKCommandEncoder* cmdEncoder)
-        : MVKCommandEncoderState(cmdEncoder) {}
-
-protected:
-    void encodeImpl(uint32_t stage) override;
-
-    float _depthBiasConstantFactor = 0;
-    float _depthBiasClamp = 0;
-    float _depthBiasSlopeFactor = 0;
-    bool _isEnabled = false;
+struct MVKStencilReference {
+	uint32_t frontFaceValue;
+	uint32_t backFaceValue;
 };

+struct MVKMTLViewports {
+	MTLViewport viewports[kMVKMaxViewportScissorCount];
+	uint32_t viewportCount;
+};

-#pragma mark -
-#pragma mark MVKBlendColorCommandEncoderState
-
-/** Holds encoder state established by blend color commands. */
-class MVKBlendColorCommandEncoderState : public MVKCommandEncoderState {
+struct MVKMTLScissors {
+	MTLScissorRect scissors[kMVKMaxViewportScissorCount];
+	uint32_t scissorCount;
+};

+/** Holds encoder state established by various rendering state commands. */
+class MVKRenderingCommandEncoderState : public MVKCommandEncoderState {
 public:
+	void setCullMode(VkCullModeFlags cullMode, bool isDynamic);

-    /** Sets the blend color, either as part of pipeline binding, or dynamically. */
-    void setBlendColor(float red, float green,
-                       float blue, float alpha,
-                       bool isDynamic);
+	void setFrontFace(VkFrontFace frontFace, bool isDynamic);

-    /** Constructs this instance for the specified command encoder. */
-    MVKBlendColorCommandEncoderState(MVKCommandEncoder* cmdEncoder)
-        : MVKCommandEncoderState(cmdEncoder) {}
+	void setPolygonMode(VkPolygonMode polygonMode, bool isDynamic);
+
+	void setBlendConstants(float blendConstants[4], bool isDynamic);
+
+	void setDepthBias(const VkPipelineRasterizationStateCreateInfo& vkRasterInfo);
+	void setDepthBias(float depthBiasConstantFactor, float depthBiasSlopeFactor, float depthBiasClamp);
+	void setDepthBiasEnable(VkBool32 depthBiasEnable);
+	void setDepthClipEnable(bool depthClip, bool isDynamic);
+
+	void setStencilReferenceValues(const VkPipelineDepthStencilStateCreateInfo& vkDepthStencilInfo);
+	void setStencilReferenceValues(VkStencilFaceFlags faceMask, uint32_t stencilReference);
+
+	void setViewports(const MVKArrayRef<VkViewport> viewports, uint32_t firstViewport, bool isDynamic);
+	void setScissors(const MVKArrayRef<VkRect2D> scissors, uint32_t firstScissor, bool isDynamic);
+
+	void setPrimitiveRestartEnable(VkBool32 primitiveRestartEnable, bool isDynamic);
+
+	void setRasterizerDiscardEnable(VkBool32 rasterizerDiscardEnable, bool isDynamic);
+
+	void setPrimitiveTopology(VkPrimitiveTopology topology, bool isDynamic);
+	MTLPrimitiveType getPrimitiveType();
+
+	void setPatchControlPoints(uint32_t patchControlPoints, bool isDynamic);
+	uint32_t getPatchControlPoints();
+
+	void setSampleLocationsEnable(VkBool32 sampleLocationsEnable, bool isDynamic);
+	void setSampleLocations(const MVKArrayRef<VkSampleLocationEXT> sampleLocations, bool isDynamic);
+	MVKArrayRef<MTLSamplePosition> getSamplePositions();
+
+	void beginMetalRenderPass() override;
+	bool needsMetalRenderPassRestart();
+
+	bool isDirty(MVKRenderStateType state);
+	void markDirty() override;
+
+	MVKRenderingCommandEncoderState(MVKCommandEncoder* cmdEncoder) : MVKCommandEncoderState(cmdEncoder) {}

 protected:
-    void encodeImpl(uint32_t stage) override;
+	void encodeImpl(uint32_t stage) override;
+	bool isDrawingTriangles();
+	template <typename T> void setContent(T* iVarAry, T* pVal, MVKRenderStateType state, bool isDynamic) {
+		auto* pIVar = &iVarAry[isDynamic ? StateScope::Dynamic : StateScope::Static];
+		if( !mvkAreEqual(pVal, pIVar) ) {
+			*pIVar = *pVal;
+			_dirtyStates.enable(state);
+			_modifiedStates.enable(state);
+			MVKCommandEncoderState::markDirty();	// Avoid local markDirty() as it marks all states dirty.
+		}
+	}

-    float _red = 0;
-    float _green = 0;
-    float _blue = 0;
-    float _alpha = 0;
+	MVKSmallVector<MTLSamplePosition, kMVKMaxSampleCount> _mtlSampleLocations[StateScope::Count] = {};
+	MVKMTLViewports _mtlViewports[StateScope::Count] = {};
+	MVKMTLScissors _mtlScissors[StateScope::Count] = {};
+	MVKColor32 _mtlBlendConstants[StateScope::Count] = {};
+	MVKDepthBias _mtlDepthBias[StateScope::Count] = {};
+	MVKStencilReference _mtlStencilReference[StateScope::Count] = {};
+	MTLCullMode _mtlCullMode[StateScope::Count] = { MTLCullModeNone, MTLCullModeNone };
+	MTLWinding _mtlFrontFace[StateScope::Count] = { MTLWindingClockwise, MTLWindingClockwise };
+	MTLPrimitiveType _mtlPrimitiveTopology[StateScope::Count] = { MTLPrimitiveTypePoint, MTLPrimitiveTypePoint };
+	MTLDepthClipMode _mtlDepthClipEnable[StateScope::Count] = { MTLDepthClipModeClip, MTLDepthClipModeClip };
+	MTLTriangleFillMode _mtlPolygonMode[StateScope::Count] = { MTLTriangleFillModeFill, MTLTriangleFillModeFill };
+	uint32_t _mtlPatchControlPoints[StateScope::Count] = {};
+	MVKRenderStateFlags _dirtyStates;
+	MVKRenderStateFlags _modifiedStates;
+	bool _mtlSampleLocationsEnable[StateScope::Count] = {};
+	bool _mtlDepthBiasEnable[StateScope::Count] = {};
+	bool _mtlPrimitiveRestartEnable[StateScope::Count] = {};
+	bool _mtlRasterizerDiscardEnable[StateScope::Count] = {};
+	bool _cullBothFaces[StateScope::Count] = {};
 };


@ -457,7 +453,7 @@ protected:
 		contents[index] = value;
 	}

-	void assertMissingSwizzles(bool needsSwizzle, const char* stageName, const MVKArrayRef<MVKMTLTextureBinding> texBindings);
+	void assertMissingSwizzles(bool needsSwizzle, const char* stageName, MVKArrayRef<const MVKMTLTextureBinding> texBindings);
 	void encodeMetalArgumentBuffer(MVKShaderStage stage);
 	virtual void bindMetalArgumentBuffer(MVKShaderStage stage, MVKMTLBufferBinding& buffBind) = 0;

@ -547,7 +543,7 @@ public:
                        const char* pStageName,
                        bool fullImageViewSwizzle,
                        std::function<void(MVKCommandEncoder*, MVKMTLBufferBinding&)> bindBuffer,
-                        std::function<void(MVKCommandEncoder*, MVKMTLBufferBinding&, const MVKArrayRef<uint32_t>)> bindImplicitBuffer,
+                        std::function<void(MVKCommandEncoder*, MVKMTLBufferBinding&, MVKArrayRef<const uint32_t>)> bindImplicitBuffer,
                        std::function<void(MVKCommandEncoder*, MVKMTLTextureBinding&)> bindTexture,
                        std::function<void(MVKCommandEncoder*, MVKMTLSamplerStateBinding&)> bindSampler);

--- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
@ -25,20 +25,31 @@

 using namespace std;

+#define shouldUpdateFace(face)  mvkAreAllFlagsEnabled(faceMask, VK_STENCIL_FACE_##face##_BIT)
+

 #pragma mark -
 #pragma mark MVKCommandEncoderState

 MVKVulkanAPIObject* MVKCommandEncoderState::getVulkanAPIObject() { return _cmdEncoder->getVulkanAPIObject(); };
+
 MVKDevice* MVKCommandEncoderState::getDevice() { return _cmdEncoder->getDevice(); }

+bool MVKCommandEncoderState::isDynamicState(MVKRenderStateType state) {
+	auto* gpl = _cmdEncoder->_graphicsPipelineState.getGraphicsPipeline();
+	return !gpl || gpl->isDynamicState(state);
+}
+

 #pragma mark -
 #pragma mark MVKPipelineCommandEncoderState

 void MVKPipelineCommandEncoderState::bindPipeline(MVKPipeline* pipeline) {
-    if (pipeline != _pipeline) markDirty();
-    _pipeline = pipeline;
+	if (pipeline == _pipeline) { return; }
+
+	_pipeline = pipeline;
+	_pipeline->wasBound(_cmdEncoder);
+	markDirty();
 }

 MVKPipeline* MVKPipelineCommandEncoderState::getPipeline() { return _pipeline; }
@ -51,112 +62,6 @@ void MVKPipelineCommandEncoderState::encodeImpl(uint32_t stage) {
 }


-#pragma mark -
-#pragma mark MVKViewportCommandEncoderState
-
-void MVKViewportCommandEncoderState::setViewports(const MVKArrayRef<VkViewport> viewports,
-												  uint32_t firstViewport,
-												  bool isSettingDynamically) {
-
-	size_t vpCnt = viewports.size;
-	uint32_t maxViewports = getDevice()->_pProperties->limits.maxViewports;
-	if ((firstViewport + vpCnt > maxViewports) ||
-		(firstViewport >= maxViewports) ||
-		(isSettingDynamically && vpCnt == 0))
-		return;
-
-	auto& usingViewports = isSettingDynamically ? _dynamicViewports : _viewports;
-
-	if (firstViewport + vpCnt > usingViewports.size()) {
-		usingViewports.resize(firstViewport + vpCnt);
-	}
-
-    bool dirty;
-	bool mustSetDynamically = _cmdEncoder->supportsDynamicState(VK_DYNAMIC_STATE_VIEWPORT);
-	if (isSettingDynamically || (!mustSetDynamically && vpCnt > 0)) {
-        dirty = memcmp(&usingViewports[firstViewport], &viewports[0], vpCnt * sizeof(VkViewport)) != 0;
-		std::copy(viewports.begin(), viewports.end(), usingViewports.begin() + firstViewport);
-	} else {
-        dirty = !usingViewports.empty();
-		usingViewports.clear();
-	}
-
-	if (dirty) markDirty();
-}
-
-void MVKViewportCommandEncoderState::encodeImpl(uint32_t stage) {
-    if (stage != kMVKGraphicsStageRasterization) { return; }
-	auto& usingViewports = _viewports.size() > 0 ? _viewports : _dynamicViewports;
-	if (usingViewports.empty()) { return; }
-
-    if (_cmdEncoder->_pDeviceFeatures->multiViewport) {
-		size_t vpCnt = usingViewports.size();
-		MTLViewport mtlViewports[vpCnt];
-		for (uint32_t vpIdx = 0; vpIdx < vpCnt; vpIdx++) {
-			mtlViewports[vpIdx] = mvkMTLViewportFromVkViewport(usingViewports[vpIdx]);
-		}
-#if MVK_MACOS_OR_IOS
-        [_cmdEncoder->_mtlRenderEncoder setViewports: mtlViewports count: vpCnt];
-#endif
-	} else {
-        [_cmdEncoder->_mtlRenderEncoder setViewport: mvkMTLViewportFromVkViewport(usingViewports[0])];
-    }
-}
-
-
-#pragma mark -
-#pragma mark MVKScissorCommandEncoderState
-
-void MVKScissorCommandEncoderState::setScissors(const MVKArrayRef<VkRect2D> scissors,
-                                                uint32_t firstScissor,
-												bool isSettingDynamically) {
-
-	size_t sCnt = scissors.size;
-	uint32_t maxScissors = getDevice()->_pProperties->limits.maxViewports;
-	if ((firstScissor + sCnt > maxScissors) ||
-		(firstScissor >= maxScissors) ||
-		(isSettingDynamically && sCnt == 0))
-		return;
-
-	auto& usingScissors = isSettingDynamically ? _dynamicScissors : _scissors;
-
-	if (firstScissor + sCnt > usingScissors.size()) {
-		usingScissors.resize(firstScissor + sCnt);
-	}
-
-    bool dirty;
-	bool mustSetDynamically = _cmdEncoder->supportsDynamicState(VK_DYNAMIC_STATE_SCISSOR);
-	if (isSettingDynamically || (!mustSetDynamically && sCnt > 0)) {
-        dirty = memcmp(&usingScissors[firstScissor], &scissors[0], sCnt * sizeof(VkRect2D)) != 0;
-		std::copy(scissors.begin(), scissors.end(), usingScissors.begin() + firstScissor);
-	} else {
-        dirty = !usingScissors.empty();
-		usingScissors.clear();
-	}
-
-	if (dirty) markDirty();
-}
-
-void MVKScissorCommandEncoderState::encodeImpl(uint32_t stage) {
-	if (stage != kMVKGraphicsStageRasterization) { return; }
-	auto& usingScissors = _scissors.size() > 0 ? _scissors : _dynamicScissors;
-	if (usingScissors.empty()) { return; }
-
-	if (_cmdEncoder->_pDeviceFeatures->multiViewport) {
-		size_t sCnt = usingScissors.size();
-		MTLScissorRect mtlScissors[sCnt];
-		for (uint32_t sIdx = 0; sIdx < sCnt; sIdx++) {
-			mtlScissors[sIdx] = mvkMTLScissorRectFromVkRect2D(_cmdEncoder->clipToRenderArea(usingScissors[sIdx]));
-		}
-#if MVK_MACOS_OR_IOS
-		[_cmdEncoder->_mtlRenderEncoder setScissorRects: mtlScissors count: sCnt];
-#endif
-	} else {
-		[_cmdEncoder->_mtlRenderEncoder setScissorRect: mvkMTLScissorRectFromVkRect2D(_cmdEncoder->clipToRenderArea(usingScissors[0]))];
-	}
-}
-
-
 #pragma mark -
 #pragma mark MVKPushConstantsCommandEncoderState

@ -165,7 +70,7 @@ void MVKPushConstantsCommandEncoderState:: setPushConstants(uint32_t offset, MVK
 	// Typically any MSL struct that contains a float4 will also have a size that is rounded up to a multiple of a float4 size.
 	// Ensure that we pass along enough content to cover this extra space even if it is never actually accessed by the shader.
 	size_t pcSizeAlign = getDevice()->_pMetalFeatures->pushConstantSizeAlignment;
-    size_t pcSize = pushConstants.size;
+    size_t pcSize = pushConstants.size();
 	size_t pcBuffSize = mvkAlignByteCount(offset + pcSize, pcSizeAlign);
    mvkEnsureSize(_pushConstants, pcBuffSize);
    copy(pushConstants.begin(), pushConstants.end(), _pushConstants.begin() + offset);
@ -245,7 +150,7 @@ void MVKPushConstantsCommandEncoderState::encodeImpl(uint32_t stage) {
 }

 bool MVKPushConstantsCommandEncoderState::isTessellating() {
-	MVKGraphicsPipeline* gp = (MVKGraphicsPipeline*)_cmdEncoder->_graphicsPipelineState.getPipeline();
+	auto* gp = _cmdEncoder->_graphicsPipelineState.getGraphicsPipeline();
 	return gp ? gp->isTessellationPipeline() : false;
 }

@ -254,74 +159,84 @@ bool MVKPushConstantsCommandEncoderState::isTessellating() {
 #pragma mark MVKDepthStencilCommandEncoderState

 void MVKDepthStencilCommandEncoderState:: setDepthStencilState(const VkPipelineDepthStencilStateCreateInfo& vkDepthStencilInfo) {
-    auto oldData = _depthStencilData;
+	auto& depthEnabled = _depthTestEnabled[StateScope::Static];
+	auto oldDepthEnabled = depthEnabled;
+	depthEnabled = static_cast<bool>(vkDepthStencilInfo.depthTestEnable);

-    if (vkDepthStencilInfo.depthTestEnable) {
-        _depthStencilData.depthCompareFunction = mvkMTLCompareFunctionFromVkCompareOp(vkDepthStencilInfo.depthCompareOp);
-        _depthStencilData.depthWriteEnabled = vkDepthStencilInfo.depthWriteEnable;
-    } else {
-        _depthStencilData.depthCompareFunction = kMVKMTLDepthStencilDescriptorDataDefault.depthCompareFunction;
-        _depthStencilData.depthWriteEnabled = kMVKMTLDepthStencilDescriptorDataDefault.depthWriteEnabled;
-    }
+	auto& dsData = _depthStencilData[StateScope::Static];
+	auto oldData = dsData;
+	dsData.depthCompareFunction = mvkMTLCompareFunctionFromVkCompareOp(vkDepthStencilInfo.depthCompareOp);
+	dsData.depthWriteEnabled = vkDepthStencilInfo.depthWriteEnable;

-    setStencilState(_depthStencilData.frontFaceStencilData, vkDepthStencilInfo.front, vkDepthStencilInfo.stencilTestEnable);
-    setStencilState(_depthStencilData.backFaceStencilData, vkDepthStencilInfo.back, vkDepthStencilInfo.stencilTestEnable);
+	dsData.stencilTestEnabled = static_cast<bool>(vkDepthStencilInfo.stencilTestEnable);
+	setStencilState(dsData.frontFaceStencilData, vkDepthStencilInfo.front);
+	setStencilState(dsData.backFaceStencilData, vkDepthStencilInfo.back);

-    if (!(oldData == _depthStencilData)) markDirty();
+	if (depthEnabled != oldDepthEnabled || dsData != oldData) { markDirty(); }
 }

-void MVKDepthStencilCommandEncoderState::setStencilState(MVKMTLStencilDescriptorData& stencilInfo,
-                                                         const VkStencilOpState& vkStencil,
-                                                         bool enabled) {
-    if ( !enabled ) {
-        stencilInfo = kMVKMTLStencilDescriptorDataDefault;
-        return;
-    }
-
-    stencilInfo.enabled = true;
-    stencilInfo.stencilCompareFunction = mvkMTLCompareFunctionFromVkCompareOp(vkStencil.compareOp);
-    stencilInfo.stencilFailureOperation = mvkMTLStencilOperationFromVkStencilOp(vkStencil.failOp);
-    stencilInfo.depthFailureOperation = mvkMTLStencilOperationFromVkStencilOp(vkStencil.depthFailOp);
-    stencilInfo.depthStencilPassOperation = mvkMTLStencilOperationFromVkStencilOp(vkStencil.passOp);
-
-    if ( !_cmdEncoder->supportsDynamicState(VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK) ) {
-		stencilInfo.readMask = vkStencil.compareMask;
-	}
-    if ( !_cmdEncoder->supportsDynamicState(VK_DYNAMIC_STATE_STENCIL_WRITE_MASK) ) {
-		stencilInfo.writeMask = vkStencil.writeMask;
-	}
+void MVKDepthStencilCommandEncoderState::setStencilState(MVKMTLStencilDescriptorData& sData,
+                                                         const VkStencilOpState& vkStencil) {
+	sData.readMask = vkStencil.compareMask;
+	sData.writeMask = vkStencil.writeMask;
+    sData.stencilCompareFunction = mvkMTLCompareFunctionFromVkCompareOp(vkStencil.compareOp);
+    sData.stencilFailureOperation = mvkMTLStencilOperationFromVkStencilOp(vkStencil.failOp);
+    sData.depthFailureOperation = mvkMTLStencilOperationFromVkStencilOp(vkStencil.depthFailOp);
+    sData.depthStencilPassOperation = mvkMTLStencilOperationFromVkStencilOp(vkStencil.passOp);
+}
+
+void MVKDepthStencilCommandEncoderState::setDepthTestEnable(VkBool32 depthTestEnable) {
+	setContent(_depthTestEnabled[StateScope::Dynamic], static_cast<bool>(depthTestEnable));
+}
+
+void MVKDepthStencilCommandEncoderState::setDepthWriteEnable(VkBool32 depthWriteEnable) {
+	setContent(_depthStencilData[StateScope::Dynamic].depthWriteEnabled, static_cast<bool>(depthWriteEnable));
+}
+
+void MVKDepthStencilCommandEncoderState::setDepthCompareOp(VkCompareOp depthCompareOp) {
+	setContent(_depthStencilData[StateScope::Dynamic].depthCompareFunction,
+			   (uint8_t)mvkMTLCompareFunctionFromVkCompareOp(depthCompareOp));
+}
+
+void MVKDepthStencilCommandEncoderState::setStencilTestEnable(VkBool32 stencilTestEnable) {
+	setContent(_depthStencilData[StateScope::Dynamic].stencilTestEnabled, static_cast<bool>(stencilTestEnable));
+}
+
+void MVKDepthStencilCommandEncoderState::setStencilOp(MVKMTLStencilDescriptorData& sData,
+													  VkStencilOp failOp,
+													  VkStencilOp passOp,
+													  VkStencilOp depthFailOp,
+													  VkCompareOp compareOp) {
+	auto oldData = sData;
+	sData.stencilCompareFunction = mvkMTLCompareFunctionFromVkCompareOp(compareOp);
+	sData.stencilFailureOperation = mvkMTLStencilOperationFromVkStencilOp(failOp);
+	sData.depthFailureOperation = mvkMTLStencilOperationFromVkStencilOp(depthFailOp);
+	sData.depthStencilPassOperation = mvkMTLStencilOperationFromVkStencilOp(passOp);
+	if (sData != oldData) { markDirty(); }
+}
+
+void MVKDepthStencilCommandEncoderState::setStencilOp(VkStencilFaceFlags faceMask,
+													  VkStencilOp failOp,
+													  VkStencilOp passOp,
+													  VkStencilOp depthFailOp,
+													  VkCompareOp compareOp) {
+	auto& dsData = _depthStencilData[StateScope::Dynamic];
+	if (shouldUpdateFace(FRONT)) { setStencilOp(dsData.frontFaceStencilData, failOp, passOp, depthFailOp, compareOp); }
+	if (shouldUpdateFace(BACK)) { setStencilOp(dsData.backFaceStencilData, failOp, passOp, depthFailOp, compareOp); }
 }

-// We don't check for dynamic state here, because if this is called before pipeline is set,
-// it may not be accurate, and if not dynamic, pipeline will override when it is encoded anyway.
 void MVKDepthStencilCommandEncoderState::setStencilCompareMask(VkStencilFaceFlags faceMask,
-                                                               uint32_t stencilCompareMask) {
-    auto oldData = _depthStencilData;
-
-    if (mvkAreAllFlagsEnabled(faceMask, VK_STENCIL_FACE_FRONT_BIT)) {
-        _depthStencilData.frontFaceStencilData.readMask = stencilCompareMask;
-    }
-    if (mvkAreAllFlagsEnabled(faceMask, VK_STENCIL_FACE_BACK_BIT)) {
-        _depthStencilData.backFaceStencilData.readMask = stencilCompareMask;
-    }
-
-    if (!(oldData == _depthStencilData)) markDirty();
+															   uint32_t stencilCompareMask) {
+	auto& dsData = _depthStencilData[StateScope::Dynamic];
+	if (shouldUpdateFace(FRONT)) { setContent(dsData.frontFaceStencilData.readMask, stencilCompareMask); }
+	if (shouldUpdateFace(BACK)) { setContent(dsData.backFaceStencilData.readMask, stencilCompareMask); }
 }

-// We don't check for dynamic state here, because if this is called before pipeline is set,
-// it may not be accurate, and if not dynamic, pipeline will override when it is encoded anyway.
 void MVKDepthStencilCommandEncoderState::setStencilWriteMask(VkStencilFaceFlags faceMask,
-                                                             uint32_t stencilWriteMask) {
-    auto oldData = _depthStencilData;
-
-    if (mvkAreAllFlagsEnabled(faceMask, VK_STENCIL_FACE_FRONT_BIT)) {
-        _depthStencilData.frontFaceStencilData.writeMask = stencilWriteMask;
-    }
-    if (mvkAreAllFlagsEnabled(faceMask, VK_STENCIL_FACE_BACK_BIT)) {
-        _depthStencilData.backFaceStencilData.writeMask = stencilWriteMask;
-    }
-
-    if (!(oldData == _depthStencilData)) markDirty();
+															 uint32_t stencilWriteMask) {
+	auto& dsData = _depthStencilData[StateScope::Dynamic];
+	if (shouldUpdateFace(FRONT)) { setContent(dsData.frontFaceStencilData.writeMask, stencilWriteMask); }
+	if (shouldUpdateFace(BACK)) { setContent(dsData.backFaceStencilData.writeMask, stencilWriteMask); }
 }

 void MVKDepthStencilCommandEncoderState::beginMetalRenderPass() {
@ -337,132 +252,349 @@ void MVKDepthStencilCommandEncoderState::beginMetalRenderPass() {
 	if (_hasStencilAttachment != prevHasStencilAttachment) { markDirty(); }
 }

+// Combine static and dynamic depth/stencil data
 void MVKDepthStencilCommandEncoderState::encodeImpl(uint32_t stage) {
-	auto cmdEncPool = _cmdEncoder->getCommandEncodingPool();
-	switch (stage) {
-		case kMVKGraphicsStageRasterization: {
-			// If renderpass does not have a depth or a stencil attachment, disable corresponding test
-			MVKMTLDepthStencilDescriptorData adjustedDSData = _depthStencilData;
-			adjustedDSData.disable(!_hasDepthAttachment, !_hasStencilAttachment);
-			[_cmdEncoder->_mtlRenderEncoder setDepthStencilState: cmdEncPool->getMTLDepthStencilState(adjustedDSData)];
-			break;
-		}
-		default:		// Do nothing on other stages
-			break;
+	if (stage != kMVKGraphicsStageRasterization) { return; }
+
+	MVKMTLDepthStencilDescriptorData dsData;
+
+	if (_hasDepthAttachment && getContent(_depthTestEnabled, DepthTestEnable)) {
+		dsData.depthCompareFunction = getData(DepthCompareOp).depthCompareFunction;
+		dsData.depthWriteEnabled = getData(DepthWriteEnable).depthWriteEnabled;
+	}
+
+	if (_hasStencilAttachment && getData(StencilTestEnable).stencilTestEnabled) {
+		dsData.stencilTestEnabled = true;
+
+		auto& frontFace = dsData.frontFaceStencilData;
+		auto& backFace  = dsData.backFaceStencilData;
+
+		const auto& srcRM = getData(StencilCompareMask);
+		frontFace.readMask  = srcRM.frontFaceStencilData.readMask;
+		backFace.readMask   = srcRM.backFaceStencilData.readMask;
+
+		const auto& srcWM = getData(StencilWriteMask);
+		frontFace.writeMask = srcWM.frontFaceStencilData.writeMask;
+		backFace.writeMask  = srcWM.backFaceStencilData.writeMask;
+
+		const auto& srcSOp = getData(StencilOp);
+		frontFace.stencilCompareFunction    = srcSOp.frontFaceStencilData.stencilCompareFunction;
+		frontFace.stencilFailureOperation   = srcSOp.frontFaceStencilData.stencilFailureOperation;
+		frontFace.depthFailureOperation     = srcSOp.frontFaceStencilData.depthFailureOperation;
+		frontFace.depthStencilPassOperation = srcSOp.frontFaceStencilData.depthStencilPassOperation;
+
+		backFace.stencilCompareFunction     = srcSOp.backFaceStencilData.stencilCompareFunction;
+		backFace.stencilFailureOperation    = srcSOp.backFaceStencilData.stencilFailureOperation;
+		backFace.depthFailureOperation      = srcSOp.backFaceStencilData.depthFailureOperation;
+		backFace.depthStencilPassOperation  = srcSOp.backFaceStencilData.depthStencilPassOperation;
+	}
+
+	[_cmdEncoder->_mtlRenderEncoder setDepthStencilState: _cmdEncoder->getCommandEncodingPool()->getMTLDepthStencilState(dsData)];
+}
+
+
+#pragma mark -
+#pragma mark MVKRenderingCommandEncoderState
+
+#define getMTLContent(state)  getContent(_mtl##state, state)
+#define setMTLContent(state)  setContent(_mtl##state, &mtl##state, state, isDynamic)
+
+void MVKRenderingCommandEncoderState::setCullMode(VkCullModeFlags cullMode, bool isDynamic) {
+	auto mtlCullMode = mvkMTLCullModeFromVkCullModeFlags(cullMode);
+	setMTLContent(CullMode);
+	_cullBothFaces[isDynamic ? StateScope::Dynamic : StateScope::Static] = (cullMode == VK_CULL_MODE_FRONT_AND_BACK);
+}
+
+void MVKRenderingCommandEncoderState::setFrontFace(VkFrontFace frontFace, bool isDynamic) {
+	auto mtlFrontFace = mvkMTLWindingFromVkFrontFace(frontFace);
+	setMTLContent(FrontFace);
+}
+
+void MVKRenderingCommandEncoderState::setPolygonMode(VkPolygonMode polygonMode, bool isDynamic) {
+	auto mtlPolygonMode = mvkMTLTriangleFillModeFromVkPolygonMode(polygonMode);
+	setMTLContent(PolygonMode);
+}
+
+void MVKRenderingCommandEncoderState::setBlendConstants(float blendConstants[4], bool isDynamic) {
+	MVKColor32 mtlBlendConstants;
+	mvkCopy(mtlBlendConstants.float32, blendConstants, 4);
+	setMTLContent(BlendConstants);
+}
+
+void MVKRenderingCommandEncoderState::setDepthBias(const VkPipelineRasterizationStateCreateInfo& vkRasterInfo) {
+	bool isDynamic = false;
+
+	bool mtlDepthBiasEnable = static_cast<bool>(vkRasterInfo.depthBiasEnable);
+	setMTLContent(DepthBiasEnable);
+
+	MVKDepthBias mtlDepthBias = {
+		.depthBiasConstantFactor = vkRasterInfo.depthBiasConstantFactor,
+		.depthBiasSlopeFactor = vkRasterInfo.depthBiasSlopeFactor,
+		.depthBiasClamp = vkRasterInfo.depthBiasClamp
+	};
+	setMTLContent(DepthBias);
+}
+
+void MVKRenderingCommandEncoderState::setDepthBias(float depthBiasConstantFactor,
+													 float depthBiasSlopeFactor,
+													 float depthBiasClamp) {
+	bool isDynamic = true;
+	MVKDepthBias mtlDepthBias = {
+		.depthBiasConstantFactor = depthBiasConstantFactor,
+		.depthBiasSlopeFactor = depthBiasSlopeFactor,
+		.depthBiasClamp = depthBiasClamp
+	};
+	setMTLContent(DepthBias);
+}
+
+void MVKRenderingCommandEncoderState::setDepthBiasEnable(VkBool32 depthBiasEnable) {
+	bool isDynamic = true;
+	bool mtlDepthBiasEnable = static_cast<bool>(depthBiasEnable);
+	setMTLContent(DepthBiasEnable);
+}
+
+void MVKRenderingCommandEncoderState::setDepthClipEnable(bool depthClip, bool isDynamic) {
+	auto mtlDepthClipEnable = depthClip ? MTLDepthClipModeClip : MTLDepthClipModeClamp;
+	setMTLContent(DepthClipEnable);
+}
+
+void MVKRenderingCommandEncoderState::setStencilReferenceValues(const VkPipelineDepthStencilStateCreateInfo& vkDepthStencilInfo) {
+	bool isDynamic = false;
+	MVKStencilReference mtlStencilReference = {
+		.frontFaceValue = vkDepthStencilInfo.front.reference,
+		.backFaceValue = vkDepthStencilInfo.back.reference
+	};
+	setMTLContent(StencilReference);
+}
+
+void MVKRenderingCommandEncoderState::setStencilReferenceValues(VkStencilFaceFlags faceMask, uint32_t stencilReference) {
+	bool isDynamic = true;
+	MVKStencilReference mtlStencilReference = _mtlStencilReference[StateScope::Dynamic];
+	if (shouldUpdateFace(FRONT)) { mtlStencilReference.frontFaceValue = stencilReference; }
+	if (shouldUpdateFace(BACK)) { mtlStencilReference.backFaceValue = stencilReference; }
+	setMTLContent(StencilReference);
+}
+
+void MVKRenderingCommandEncoderState::setViewports(const MVKArrayRef<VkViewport> viewports,
+													 uint32_t firstViewport,
+													 bool isDynamic) {
+	uint32_t maxViewports = getDevice()->_pProperties->limits.maxViewports;
+	if (firstViewport >= maxViewports) { return; }
+
+	MVKMTLViewports mtlViewports = isDynamic ? _mtlViewports[StateScope::Dynamic] : _mtlViewports[StateScope::Static];
+	size_t vpCnt = min((uint32_t)viewports.size(), maxViewports - firstViewport);
+	for (uint32_t vpIdx = 0; vpIdx < vpCnt; vpIdx++) {
+		mtlViewports.viewports[firstViewport + vpIdx] = mvkMTLViewportFromVkViewport(viewports[vpIdx]);
+		mtlViewports.viewportCount = max(mtlViewports.viewportCount, vpIdx + 1);
+	}
+	setMTLContent(Viewports);
+}
+
+void MVKRenderingCommandEncoderState::setScissors(const MVKArrayRef<VkRect2D> scissors,
+													uint32_t firstScissor,
+													bool isDynamic) {
+	uint32_t maxScissors = getDevice()->_pProperties->limits.maxViewports;
+	if (firstScissor >= maxScissors) { return; }
+
+	MVKMTLScissors mtlScissors = isDynamic ? _mtlScissors[StateScope::Dynamic] : _mtlScissors[StateScope::Static];
+	size_t sCnt = min((uint32_t)scissors.size(), maxScissors - firstScissor);
+	for (uint32_t sIdx = 0; sIdx < sCnt; sIdx++) {
+		mtlScissors.scissors[firstScissor + sIdx] = mvkMTLScissorRectFromVkRect2D(scissors[sIdx]);
+		mtlScissors.scissorCount = max(mtlScissors.scissorCount, sIdx + 1);
+	}
+	setMTLContent(Scissors);
+}
+
+void MVKRenderingCommandEncoderState::setPrimitiveRestartEnable(VkBool32 primitiveRestartEnable, bool isDynamic) {
+	bool mtlPrimitiveRestartEnable = static_cast<bool>(primitiveRestartEnable);
+	setMTLContent(PrimitiveRestartEnable);
+}
+
+void MVKRenderingCommandEncoderState::setRasterizerDiscardEnable(VkBool32 rasterizerDiscardEnable, bool isDynamic) {
+	bool mtlRasterizerDiscardEnable = static_cast<bool>(rasterizerDiscardEnable);
+	setMTLContent(RasterizerDiscardEnable);
+}
+
+// This value is retrieved, not encoded, so don't mark this encoder as dirty.
+void MVKRenderingCommandEncoderState::setPrimitiveTopology(VkPrimitiveTopology topology, bool isDynamic) {
+	getContent(_mtlPrimitiveTopology, isDynamic) = mvkMTLPrimitiveTypeFromVkPrimitiveTopology(topology);
+}
+
+MTLPrimitiveType MVKRenderingCommandEncoderState::getPrimitiveType() {
+	return getMTLContent(PrimitiveTopology);
+}
+
+bool MVKRenderingCommandEncoderState::isDrawingTriangles() {
+	switch (getPrimitiveType()) {
+		case MTLPrimitiveTypeTriangle:      return true;
+		case MTLPrimitiveTypeTriangleStrip: return true;
+		default:                            return false;
 	}
 }

-
-#pragma mark -
-#pragma mark MVKStencilReferenceValueCommandEncoderState
-
-void MVKStencilReferenceValueCommandEncoderState:: setReferenceValues(const VkPipelineDepthStencilStateCreateInfo& vkDepthStencilInfo) {
-
-    // If ref values are to be set dynamically, don't set them here.
-    if (_cmdEncoder->supportsDynamicState(VK_DYNAMIC_STATE_STENCIL_REFERENCE)) { return; }
-
-    if (_frontFaceValue != vkDepthStencilInfo.front.reference || _backFaceValue != vkDepthStencilInfo.back.reference)
-        markDirty();
-
-    _frontFaceValue = vkDepthStencilInfo.front.reference;
-    _backFaceValue = vkDepthStencilInfo.back.reference;
+// This value is retrieved, not encoded, so don't mark this encoder as dirty.
+void MVKRenderingCommandEncoderState::setPatchControlPoints(uint32_t patchControlPoints, bool isDynamic) {
+	getContent(_mtlPatchControlPoints, isDynamic) = patchControlPoints;
 }

-// We don't check for dynamic state here, because if this is called before pipeline is set,
-// it may not be accurate, and if not dynamic, pipeline will override when it is encoded anyway.
-void MVKStencilReferenceValueCommandEncoderState::setReferenceValues(VkStencilFaceFlags faceMask,
-                                                                     uint32_t stencilReference) {
-    bool dirty = false;
-    if (mvkAreAllFlagsEnabled(faceMask, VK_STENCIL_FACE_FRONT_BIT)) {
-        dirty |= (_frontFaceValue != stencilReference);
-        _frontFaceValue = stencilReference;
-    }
-    if (mvkAreAllFlagsEnabled(faceMask, VK_STENCIL_FACE_BACK_BIT)) {
-        dirty |= (_backFaceValue != stencilReference);
-        _backFaceValue = stencilReference;
-    }
-    if (dirty) markDirty();
+uint32_t MVKRenderingCommandEncoderState::getPatchControlPoints() {
+	return getMTLContent(PatchControlPoints);
 }

-void MVKStencilReferenceValueCommandEncoderState::encodeImpl(uint32_t stage) {
-    if (stage != kMVKGraphicsStageRasterization) { return; }
-    [_cmdEncoder->_mtlRenderEncoder setStencilFrontReferenceValue: _frontFaceValue
-                                               backReferenceValue: _backFaceValue];
+void MVKRenderingCommandEncoderState::setSampleLocationsEnable(VkBool32 sampleLocationsEnable, bool isDynamic) {
+	bool slEnbl = static_cast<bool>(sampleLocationsEnable);
+	auto& mtlSampLocEnbl = getContent(_mtlSampleLocationsEnable, isDynamic);
+
+	if (slEnbl == mtlSampLocEnbl) { return; }
+
+	mtlSampLocEnbl = slEnbl;
+
+	// This value is retrieved, not encoded, so don't mark this encoder as dirty.
+	_dirtyStates.enable(SampleLocationsEnable);
 }

+void MVKRenderingCommandEncoderState::setSampleLocations(MVKArrayRef<VkSampleLocationEXT> sampleLocations, bool isDynamic) {
+	auto& mtlSampPosns = getContent(_mtlSampleLocations, isDynamic);
+	size_t slCnt = sampleLocations.size();

-#pragma mark -
-#pragma mark MVKDepthBiasCommandEncoderState
+	// When comparing new vs current, make use of fact that MTLSamplePosition & VkSampleLocationEXT have same memory footprint.
+	if (slCnt == mtlSampPosns.size() &&
+		mvkAreEqual((MTLSamplePosition*)sampleLocations.data(),
+					mtlSampPosns.data(), slCnt)) {
+		return;
+	}

-void MVKDepthBiasCommandEncoderState::setDepthBias(const VkPipelineRasterizationStateCreateInfo& vkRasterInfo) {
+	mtlSampPosns.clear();
+	for (uint32_t slIdx = 0; slIdx < slCnt; slIdx++) {
+		auto& sl = sampleLocations[slIdx];
+		mtlSampPosns.push_back(MTLSamplePositionMake(mvkClamp(sl.x, kMVKMinSampleLocationCoordinate, kMVKMaxSampleLocationCoordinate),
+													 mvkClamp(sl.y, kMVKMinSampleLocationCoordinate, kMVKMaxSampleLocationCoordinate)));
+	}

-    auto wasEnabled = _isEnabled;
-    _isEnabled = vkRasterInfo.depthBiasEnable;
-
-    // If ref values are to be set dynamically, don't set them here.
-    if (_cmdEncoder->supportsDynamicState(VK_DYNAMIC_STATE_DEPTH_BIAS)) { return; }
-
-    if (_isEnabled != wasEnabled || _depthBiasConstantFactor != vkRasterInfo.depthBiasConstantFactor
-        || _depthBiasSlopeFactor != vkRasterInfo.depthBiasSlopeFactor || _depthBiasClamp != vkRasterInfo.depthBiasClamp) {
-
-        markDirty();
-        _depthBiasConstantFactor = vkRasterInfo.depthBiasConstantFactor;
-        _depthBiasSlopeFactor = vkRasterInfo.depthBiasSlopeFactor;
-        _depthBiasClamp = vkRasterInfo.depthBiasClamp;
-    }
+	// This value is retrieved, not encoded, so don't mark this encoder as dirty.
+	_dirtyStates.enable(SampleLocations);
 }

-// We don't check for dynamic state here, because if this is called before pipeline is set,
-// it may not be accurate, and if not dynamic, pipeline will override when it is encoded anyway.
-void MVKDepthBiasCommandEncoderState::setDepthBias(float depthBiasConstantFactor,
-                                                   float depthBiasSlopeFactor,
-                                                   float depthBiasClamp) {
-
-    if (_depthBiasConstantFactor != depthBiasConstantFactor || _depthBiasSlopeFactor != depthBiasSlopeFactor
-        || _depthBiasClamp != depthBiasClamp) {
-
-        markDirty();
-        _depthBiasConstantFactor = depthBiasConstantFactor;
-        _depthBiasSlopeFactor = depthBiasSlopeFactor;
-        _depthBiasClamp = depthBiasClamp;
-    }
+MVKArrayRef<MTLSamplePosition> MVKRenderingCommandEncoderState::getSamplePositions() {
+	return getMTLContent(SampleLocationsEnable) ? getMTLContent(SampleLocations).contents() : MVKArrayRef<MTLSamplePosition>();
 }

-void MVKDepthBiasCommandEncoderState::encodeImpl(uint32_t stage) {
-    if (stage != kMVKGraphicsStageRasterization) { return; }
-    if (_isEnabled) {
-        [_cmdEncoder->_mtlRenderEncoder setDepthBias: _depthBiasConstantFactor
-                                          slopeScale: _depthBiasSlopeFactor
-                                               clamp: _depthBiasClamp];
-    } else {
-        [_cmdEncoder->_mtlRenderEncoder setDepthBias: 0 slopeScale: 0 clamp: 0];
-    }
+// Return whether state is dirty, and mark it not dirty
+bool MVKRenderingCommandEncoderState::isDirty(MVKRenderStateType state) {
+	bool rslt = _dirtyStates.isEnabled(state);
+	_dirtyStates.disable(state);
+	return rslt;
 }

+// Don't force sample location & sample location enable to become dirty if they weren't already, because
+// this may cause needsMetalRenderPassRestart() to trigger an unnecessary Metal renderpass restart.
+void MVKRenderingCommandEncoderState::markDirty() {
+	MVKCommandEncoderState::markDirty();

-#pragma mark -
-#pragma mark MVKBlendColorCommandEncoderState
+	bool wasSLDirty = _dirtyStates.isEnabled(SampleLocations);
+	bool wasSLEnblDirty = _dirtyStates.isEnabled(SampleLocationsEnable);
+	
+	_dirtyStates.enableAll();

-void MVKBlendColorCommandEncoderState::setBlendColor(float red, float green,
-                                                     float blue, float alpha,
-                                                     bool isDynamic) {
-    // Abort if we are using dynamic, but call is not dynamic.
-	if ( !isDynamic && _cmdEncoder->supportsDynamicState(VK_DYNAMIC_STATE_BLEND_CONSTANTS) ) { return; }
-
-    if (_red != red || _green != green || _blue != blue || _alpha != alpha) {
-        markDirty();
-        _red = red;
-        _green = green;
-        _blue = blue;
-        _alpha = alpha;
-    }
+	_dirtyStates.set(SampleLocations, wasSLDirty);
+	_dirtyStates.set(SampleLocationsEnable, wasSLEnblDirty);
 }

-void MVKBlendColorCommandEncoderState::encodeImpl(uint32_t stage) {
-    if (stage != kMVKGraphicsStageRasterization) { return; }
-    [_cmdEncoder->_mtlRenderEncoder setBlendColorRed: _red green: _green blue: _blue alpha: _alpha];
+// Don't call parent beginMetalRenderPass() because it 
+// will call local markDirty() which is too aggressive.
+void MVKRenderingCommandEncoderState::beginMetalRenderPass() {
+	if (_isModified) {
+		_dirtyStates = _modifiedStates;
+		MVKCommandEncoderState::markDirty();
+	}
 }

+// Don't use || on isDirty calls, to ensure they both get called, so that the dirty flag of each will be cleared.
+bool MVKRenderingCommandEncoderState::needsMetalRenderPassRestart() {
+	bool isSLDirty = isDirty(SampleLocations);
+	bool isSLEnblDirty = isDirty(SampleLocationsEnable);
+	return isSLDirty || isSLEnblDirty;
+}
+
+#pragma mark Encoding
+
+void MVKRenderingCommandEncoderState::encodeImpl(uint32_t stage) {
+	if (stage != kMVKGraphicsStageRasterization) { return; }
+
+	auto& rendEnc = _cmdEncoder->_mtlRenderEncoder;
+
+	if (isDirty(PolygonMode)) { [rendEnc setTriangleFillMode: getMTLContent(PolygonMode)]; }
+	if (isDirty(CullMode)) { [rendEnc setCullMode: getMTLContent(CullMode)]; }
+	if (isDirty(FrontFace)) { [rendEnc setFrontFacingWinding: getMTLContent(FrontFace)]; }
+	if (isDirty(BlendConstants)) {
+		auto& bcFlt = getMTLContent(BlendConstants).float32;
+		[rendEnc setBlendColorRed: bcFlt[0] green: bcFlt[1] blue: bcFlt[2] alpha: bcFlt[3]];
+	}
+	if (isDirty(DepthBiasEnable) || isDirty(DepthBias)) {
+		if (getMTLContent(DepthBiasEnable)) {
+			auto& db = getMTLContent(DepthBias);
+			[rendEnc setDepthBias: db.depthBiasConstantFactor
+					   slopeScale: db.depthBiasSlopeFactor
+							clamp: db.depthBiasClamp];
+		} else {
+			[rendEnc setDepthBias: 0 slopeScale: 0 clamp: 0];
+		}
+	}
+	if (isDirty(DepthClipEnable) && _cmdEncoder->_pDeviceFeatures->depthClamp) {
+		[rendEnc setDepthClipMode: getMTLContent(DepthClipEnable)];
+	}
+
+	if (isDirty(StencilReference)) {
+		auto& sr = getMTLContent(StencilReference);
+		[rendEnc setStencilFrontReferenceValue: sr.frontFaceValue backReferenceValue: sr.backFaceValue];
+	}
+
+	// Validate
+	// In Metal, primitive restart cannot be disabled.
+	// Just issue warning here, as it is very likely the app is not actually expecting
+	// to use primitive restart at all, and is just setting this as a "just-in-case",
+	// and forcing an error here would be unexpected to the app (including CTS).
+	auto mtlPrimType = getPrimitiveType();
+	if (isDirty(PrimitiveRestartEnable) && !getMTLContent(PrimitiveRestartEnable) &&
+		(mtlPrimType == MTLPrimitiveTypeTriangleStrip || mtlPrimType == MTLPrimitiveTypeLineStrip)) {
+		reportWarning(VK_ERROR_FEATURE_NOT_PRESENT, "Metal does not support disabling primitive restart.");
+	}
+
+	if (isDirty(Viewports)) {
+		auto& mtlViewports = getMTLContent(Viewports);
+		if (_cmdEncoder->_pDeviceFeatures->multiViewport) {
+#if MVK_MACOS_OR_IOS
+			[rendEnc setViewports: mtlViewports.viewports count: mtlViewports.viewportCount];
+#endif
+		} else {
+			[rendEnc setViewport: mtlViewports.viewports[0]];
+		}
+	}
+
+	// If rasterizing discard has been dynamically enabled, or culling has been dynamically 
+	// set to front-and-back, emulate this by using zeroed scissor rectangles.
+	if (isDirty(Scissors)) {
+		static MTLScissorRect zeroRect = {};
+		auto mtlScissors = getMTLContent(Scissors);
+		bool shouldDiscard = ((_mtlRasterizerDiscardEnable[StateScope::Dynamic] && isDynamicState(RasterizerDiscardEnable)) ||
+							  (isDrawingTriangles() && _cullBothFaces[StateScope::Dynamic] && isDynamicState(CullMode)));
+		for (uint32_t sIdx = 0; sIdx < mtlScissors.scissorCount; sIdx++) {
+			mtlScissors.scissors[sIdx] = shouldDiscard ? zeroRect : _cmdEncoder->clipToRenderArea(mtlScissors.scissors[sIdx]);
+		}
+
+		if (_cmdEncoder->_pDeviceFeatures->multiViewport) {
+#if MVK_MACOS_OR_IOS
+			[rendEnc setScissorRects: mtlScissors.scissors count: mtlScissors.scissorCount];
+#endif
+		} else {
+			[rendEnc setScissorRect: mtlScissors.scissors[0]];
+		}
+	}
+}
+
+#undef getMTLContent
+#undef setMTLContent
+

 #pragma mark -
 #pragma mark MVKResourcesCommandEncoderState
@ -488,7 +620,7 @@ void MVKResourcesCommandEncoderState::bindDescriptorSet(uint32_t descSetIndex,
 		// Update dynamic buffer offsets
 		uint32_t baseDynOfstIdx = dslMTLRezIdxOffsets.getMetalResourceIndexes().dynamicOffsetBufferIndex;
 		uint32_t doCnt = descSet->getDynamicOffsetDescriptorCount();
-		for (uint32_t doIdx = 0; doIdx < doCnt && dynamicOffsetIndex < dynamicOffsets.size; doIdx++) {
+		for (uint32_t doIdx = 0; doIdx < doCnt && dynamicOffsetIndex < dynamicOffsets.size(); doIdx++) {
 			updateImplicitBuffer(_dynamicOffsets, baseDynOfstIdx + doIdx, dynamicOffsets[dynamicOffsetIndex++]);
 		}

@ -594,7 +726,7 @@ void MVKResourcesCommandEncoderState::markDirty() {
 }

 // If a swizzle is needed for this stage, iterates all the bindings and logs errors for those that need texture swizzling.
-void MVKResourcesCommandEncoderState::assertMissingSwizzles(bool needsSwizzle, const char* stageName, const MVKArrayRef<MVKMTLTextureBinding> texBindings) {
+void MVKResourcesCommandEncoderState::assertMissingSwizzles(bool needsSwizzle, const char* stageName, MVKArrayRef<const MVKMTLTextureBinding> texBindings) {
 	if (needsSwizzle) {
 		for (auto& tb : texBindings) {
 			VkComponentMapping vkcm = mvkUnpackSwizzle(tb.swizzle);
@ -684,7 +816,7 @@ void MVKGraphicsResourcesCommandEncoderState::encodeBindings(MVKShaderStage stag
                                                             const char* pStageName,
                                                             bool fullImageViewSwizzle,
                                                             std::function<void(MVKCommandEncoder*, MVKMTLBufferBinding&)> bindBuffer,
-                                                             std::function<void(MVKCommandEncoder*, MVKMTLBufferBinding&, const MVKArrayRef<uint32_t>)> bindImplicitBuffer,
+                                                             std::function<void(MVKCommandEncoder*, MVKMTLBufferBinding&, MVKArrayRef<const uint32_t>)> bindImplicitBuffer,
                                                             std::function<void(MVKCommandEncoder*, MVKMTLTextureBinding&)> bindTexture,
                                                             std::function<void(MVKCommandEncoder*, MVKMTLSamplerStateBinding&)> bindSampler) {

@ -772,11 +904,16 @@ void MVKGraphicsResourcesCommandEncoderState::markDirty() {
    }
 }

+#if !MVK_XCODE_15
+static const NSUInteger MTLAttributeStrideStatic = NSUIntegerMax;
+#endif
+
 void MVKGraphicsResourcesCommandEncoderState::encodeImpl(uint32_t stage) {

-    MVKGraphicsPipeline* pipeline = (MVKGraphicsPipeline*)getPipeline();
+	auto* pipeline = _cmdEncoder->_graphicsPipelineState.getGraphicsPipeline();
    bool fullImageViewSwizzle = pipeline->fullImageViewSwizzle() || getDevice()->_pMetalFeatures->nativeTextureSwizzle;
    bool forTessellation = pipeline->isTessellationPipeline();
+	bool isDynamicVertexStride = pipeline->isDynamicState(VertexStride);

 	if (stage == kMVKGraphicsStageVertex) {
        encodeBindings(kMVKShaderStageVertex, "vertex", fullImageViewSwizzle,
@ -795,10 +932,10 @@ void MVKGraphicsResourcesCommandEncoderState::encodeImpl(uint32_t stage) {
                                                                                                             offset: b.offset
                                                                                                            atIndex: b.index];
                       },
-                       [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, const MVKArrayRef<uint32_t> s)->void {
+                       [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, MVKArrayRef<const uint32_t> s)->void {
                           cmdEncoder->setComputeBytes(cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl),
-                                                       s.data,
-                                                       s.size * sizeof(uint32_t),
+                                                       s.data(),
+                                                       s.byteSize(),
                                                       b.index);
                       },
                       [](MVKCommandEncoder* cmdEncoder, MVKMTLTextureBinding& b)->void {
@ -812,33 +949,24 @@ void MVKGraphicsResourcesCommandEncoderState::encodeImpl(uint32_t stage) {

 	} else if (!forTessellation && stage == kMVKGraphicsStageRasterization) {
        encodeBindings(kMVKShaderStageVertex, "vertex", fullImageViewSwizzle,
-                       [pipeline](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b)->void {
+					   [pipeline, isDynamicVertexStride](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b)->void {
                           // The app may have bound more vertex attribute buffers than used by the pipeline.
                           // We must not bind those extra buffers to the shader because they might overwrite
                           // any implicit buffers used by the pipeline.
                           if (pipeline->isValidVertexBufferIndex(kMVKShaderStageVertex, b.index)) {
-                               if (b.isInline) {
-                                   cmdEncoder->setVertexBytes(cmdEncoder->_mtlRenderEncoder,
-                                                              b.mtlBytes,
-                                                              b.size,
-                                                              b.index);
-                               } else {
-                                   if (b.justOffset) {
-                                       [cmdEncoder->_mtlRenderEncoder setVertexBufferOffset: b.offset
-                                                                                    atIndex: b.index];
-                                   } else {
-                                       [cmdEncoder->_mtlRenderEncoder setVertexBuffer: b.mtlBuffer
-                                                                               offset: b.offset
-                                                                              atIndex: b.index];
-                                   }
+                               cmdEncoder->encodeVertexAttributeBuffer(b, isDynamicVertexStride);

-                                   // Add any translated vertex bindings for this binding
+							   // Add any translated vertex bindings for this binding
+							   if ( !b.isInline ) {
                                   auto xltdVtxBindings = pipeline->getTranslatedVertexBindings();
                                   for (auto& xltdBind : xltdVtxBindings) {
                                       if (b.index == pipeline->getMetalBufferIndexForVertexAttributeBinding(xltdBind.binding)) {
-                                           [cmdEncoder->_mtlRenderEncoder setVertexBuffer: b.mtlBuffer
-                                                                                   offset: b.offset + xltdBind.translationOffset
-                                                                                  atIndex: pipeline->getMetalBufferIndexForVertexAttributeBinding(xltdBind.translationBinding)];
+                                           MVKMTLBufferBinding bx = { 
+                                               .mtlBuffer = b.mtlBuffer,
+                                               .offset = b.offset + xltdBind.translationOffset,
+                                               .stride = b.stride,
+											   .index = static_cast<uint16_t>(pipeline->getMetalBufferIndexForVertexAttributeBinding(xltdBind.translationBinding)) };
+										   cmdEncoder->encodeVertexAttributeBuffer(bx, isDynamicVertexStride);
                                       }
                                   }
                               }
@ -846,10 +974,10 @@ void MVKGraphicsResourcesCommandEncoderState::encodeImpl(uint32_t stage) {
                               b.isDirty = true;	// We haven't written it out, so leave dirty until next time.
 						   }
                       },
-                       [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, const MVKArrayRef<uint32_t> s)->void {
+                       [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, MVKArrayRef<const uint32_t> s)->void {
                           cmdEncoder->setVertexBytes(cmdEncoder->_mtlRenderEncoder,
-                                                      s.data,
-                                                      s.size * sizeof(uint32_t),
+                                                      s.data(),
+                                                      s.byteSize(),
                                                      b.index);
                       },
                       [](MVKCommandEncoder* cmdEncoder, MVKMTLTextureBinding& b)->void {
@ -879,10 +1007,10 @@ void MVKGraphicsResourcesCommandEncoderState::encodeImpl(uint32_t stage) {
                                                                                                             offset: b.offset
                                                                                                            atIndex: b.index];
                       },
-                       [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, const MVKArrayRef<uint32_t> s)->void {
+                       [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, MVKArrayRef<const uint32_t> s)->void {
                           cmdEncoder->setComputeBytes(cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl),
-                                                       s.data,
-                                                       s.size * sizeof(uint32_t),
+                                                       s.data(),
+                                                       s.byteSize(),
                                                       b.index);
                       },
                       [](MVKCommandEncoder* cmdEncoder, MVKMTLTextureBinding& b)->void {
@ -898,24 +1026,13 @@ void MVKGraphicsResourcesCommandEncoderState::encodeImpl(uint32_t stage) {

    if (forTessellation && stage == kMVKGraphicsStageRasterization) {
        encodeBindings(kMVKShaderStageTessEval, "tessellation evaluation", fullImageViewSwizzle,
-                       [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b)->void {
-                           if (b.isInline)
-                               cmdEncoder->setVertexBytes(cmdEncoder->_mtlRenderEncoder,
-                                                          b.mtlBytes,
-                                                          b.size,
-                                                          b.index);
-                           else if (b.justOffset)
-                               [cmdEncoder->_mtlRenderEncoder setVertexBufferOffset: b.offset
-                                                                            atIndex: b.index];
-                           else
-                               [cmdEncoder->_mtlRenderEncoder setVertexBuffer: b.mtlBuffer
-                                                                       offset: b.offset
-                                                                      atIndex: b.index];
+					   [isDynamicVertexStride](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b)->void {
+                           cmdEncoder->encodeVertexAttributeBuffer(b, isDynamicVertexStride);
                       },
-                       [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, const MVKArrayRef<uint32_t> s)->void {
+                       [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, MVKArrayRef<const uint32_t> s)->void {
                           cmdEncoder->setVertexBytes(cmdEncoder->_mtlRenderEncoder,
-                                                      s.data,
-                                                      s.size * sizeof(uint32_t),
+                                                      s.data(),
+                                                      s.byteSize(),
                                                      b.index);
                       },
                       [](MVKCommandEncoder* cmdEncoder, MVKMTLTextureBinding& b)->void {
@ -945,10 +1062,10 @@ void MVKGraphicsResourcesCommandEncoderState::encodeImpl(uint32_t stage) {
                                                                         offset: b.offset
                                                                        atIndex: b.index];
                       },
-                       [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, const MVKArrayRef<uint32_t> s)->void {
+                       [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, MVKArrayRef<const uint32_t> s)->void {
                           cmdEncoder->setFragmentBytes(cmdEncoder->_mtlRenderEncoder,
-                                                        s.data,
-                                                        s.size * sizeof(uint32_t),
+                                                        s.data(),
+                                                        s.byteSize(),
                                                        b.index);
                       },
                       [](MVKCommandEncoder* cmdEncoder, MVKMTLTextureBinding& b)->void {
--- a/MoltenVK/MoltenVK/Commands/MVKCommandPool.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandPool.h
@ -23,7 +23,7 @@
 #include "MVKCommandEncodingPool.h"
 #include "MVKCommand.h"
 #include "MVKCmdPipeline.h"
-#include "MVKCmdRenderPass.h"
+#include "MVKCmdRendering.h"
 #include "MVKCmdDispatch.h"
 #include "MVKCmdDraw.h"
 #include "MVKCmdTransfer.h"
@ -82,7 +82,7 @@ public:
 	 * Returns a retained MTLCommandBuffer created from the indexed queue
 	 * within the queue family for which this command pool was created.
 	 */
-	id<MTLCommandBuffer> getMTLCommandBuffer(uint32_t queueIndex);
+	id<MTLCommandBuffer> getMTLCommandBuffer(MVKCommandUse cmdUse, uint32_t queueIndex);

 	/** Release any held but unused memory back to the system. */
 	void trim();
--- a/MoltenVK/MoltenVK/Commands/MVKCommandPool.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandPool.mm
@ -77,8 +77,8 @@ void MVKCommandPool::freeCommandBuffers(uint32_t commandBufferCount,
 	}
 }

-id<MTLCommandBuffer> MVKCommandPool::getMTLCommandBuffer(uint32_t queueIndex) {
-	return _device->getQueue(_queueFamilyIndex, queueIndex)->getMTLCommandBuffer(kMVKCommandUseEndCommandBuffer, true);
+id<MTLCommandBuffer> MVKCommandPool::getMTLCommandBuffer(MVKCommandUse cmdUse, uint32_t queueIndex) {
+	return _device->getQueue(_queueFamilyIndex, queueIndex)->getMTLCommandBuffer(cmdUse, true);
 }

 // Clear the command type pool member variables.
--- a/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.h
@ -210,27 +210,24 @@ namespace std {
 * change as early as possible.
 */
 typedef struct MVKMTLStencilDescriptorData {
-    bool enabled;                       /**< Indicates whether stencil testing for this face is enabled. */
+	uint32_t readMask;					/**< The bit-mask to apply when comparing the stencil buffer value to the reference value. */
+	uint32_t writeMask;					/**< The bit-mask to apply when writing values to the stencil buffer. */
    uint8_t stencilCompareFunction;		/**< The stencil compare function (interpreted as MTLCompareFunction). */
    uint8_t stencilFailureOperation;	/**< The operation to take when the stencil test fails (interpreted as MTLStencilOperation). */
    uint8_t depthFailureOperation;		/**< The operation to take when the stencil test passes, but the depth test fails (interpreted as MTLStencilOperation). */
    uint8_t depthStencilPassOperation;	/**< The operation to take when both the stencil and depth tests pass (interpreted as MTLStencilOperation). */
-    uint32_t readMask;					/**< The bit-mask to apply when comparing the stencil buffer value to the reference value. */
-    uint32_t writeMask;					/**< The bit-mask to apply when writing values to the stencil buffer. */
+
+	bool operator==(const MVKMTLStencilDescriptorData& rhs) const { return mvkAreEqual(this, &rhs); }
+	bool operator!=(const MVKMTLStencilDescriptorData& rhs) const { return !(*this == rhs); }

    MVKMTLStencilDescriptorData() {
-
-        // Start with all zeros to ensure memory comparisons will work,
-        // even if the structure contains alignment gaps.
-        mvkClear(this);
-
-        enabled = false;
+        mvkClear(this);  // Clear all memory to ensure memory comparisons will work.
+		mvkEnableAllFlags(readMask);
+		mvkEnableAllFlags(writeMask);
        stencilCompareFunction = MTLCompareFunctionAlways;
        stencilFailureOperation = MTLStencilOperationKeep;
        depthFailureOperation = MTLStencilOperationKeep;
        depthStencilPassOperation = MTLStencilOperationKeep;
-        readMask = static_cast<uint32_t>(~0);
-        writeMask = static_cast<uint32_t>(~0);
    }

 } MVKMTLStencilDescriptorData;
@ -247,34 +244,32 @@ const MVKMTLStencilDescriptorData kMVKMTLStencilDescriptorDataDefault;
 * change as early as possible.
 */
 typedef struct MVKMTLDepthStencilDescriptorData {
-    uint8_t depthCompareFunction;		/**< The depth compare function (interpreted as MTLCompareFunction). */
-    bool depthWriteEnabled;				/**< Indicates whether depth writing is enabled. */
    MVKMTLStencilDescriptorData frontFaceStencilData;
    MVKMTLStencilDescriptorData backFaceStencilData;
+	uint8_t depthCompareFunction;		/**< The depth compare function (interpreted as MTLCompareFunction). */
+	bool depthWriteEnabled;				/**< Indicates whether depth writing is enabled. */
+	bool stencilTestEnabled;			/**< Indicates whether stencil testing is enabled. */

 	bool operator==(const MVKMTLDepthStencilDescriptorData& rhs) const { return mvkAreEqual(this, &rhs); }
+	bool operator!=(const MVKMTLDepthStencilDescriptorData& rhs) const { return !(*this == rhs); }

 	std::size_t hash() const {
 		return mvkHash((uint64_t*)this, sizeof(*this) / sizeof(uint64_t));
 	}
-
-	/** Disable depth and/or stencil testing. */
-	void disable(bool disableDepth, bool disableStencil) {
-		if (disableDepth) {
-			depthCompareFunction = MTLCompareFunctionAlways;
-			depthWriteEnabled = false;
-		}
-		if (disableStencil) {
-			frontFaceStencilData = kMVKMTLStencilDescriptorDataDefault;
-			backFaceStencilData = kMVKMTLStencilDescriptorDataDefault;
-		}
+	void disableDepth() {
+		depthCompareFunction = MTLCompareFunctionAlways;
+		depthWriteEnabled = false;
+	}
+	void disableStencil() {
+		stencilTestEnabled = false;
+		frontFaceStencilData = kMVKMTLStencilDescriptorDataDefault;
+		backFaceStencilData = kMVKMTLStencilDescriptorDataDefault;
 	}

 	MVKMTLDepthStencilDescriptorData() {
-		// Start with all zeros to ensure memory comparisons will work,
-		// even if the structure contains alignment gaps.
-		mvkClear(this);
-		disable(true, true);
+		mvkClear(this);  // Clear all memory to ensure memory comparisons will work.
+		disableDepth();
+		disableStencil();
 	}

 } __attribute__((aligned(sizeof(uint64_t)))) MVKMTLDepthStencilDescriptorData;
--- a/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm
@ -286,7 +286,7 @@ id<MTLFunction> MVKCommandResourceFactory::newBlitFragFunction(MVKRPSKeyBlitImg&
 		[msl appendLineMVK: @"                         constant TexSubrez& subRez [[buffer(0)]]) {"];
 		[msl appendLineMVK: @"    FragmentOutputs out;"];
 		if (mvkIsAnyFlagEnabled(blitKey.srcAspect, (VK_IMAGE_ASPECT_DEPTH_BIT))) {
-			[msl appendFormat: @"    out.depth = tex.sample(ce_sampler, varyings.v_texCoord%@%@, level(subRez.lod)).%c;", coordArg, sliceArg, swizzleArg[0]];
+			[msl appendFormat: @"    out.depth = tex.sample(ce_sampler, varyings.v_texCoord%@%@, level(subRez.lod));", coordArg, sliceArg];
 			[msl appendLineMVK];
 		}
 		if (mvkIsAnyFlagEnabled(blitKey.srcAspect, (VK_IMAGE_ASPECT_STENCIL_BIT))) {
@ -433,9 +433,10 @@ id<MTLDepthStencilState> MVKCommandResourceFactory::newMTLDepthStencilState(bool
 }

 id<MTLDepthStencilState> MVKCommandResourceFactory::newMTLDepthStencilState(MVKMTLDepthStencilDescriptorData& dsData) {
-	MTLStencilDescriptor* fsDesc = newMTLStencilDescriptor(dsData.frontFaceStencilData);	// temp retain
-	MTLStencilDescriptor* bsDesc = newMTLStencilDescriptor(dsData.backFaceStencilData);		// temp retain
-	MTLDepthStencilDescriptor* dsDesc = [MTLDepthStencilDescriptor new];					// temp retain
+	bool testStencil = dsData.stencilTestEnabled;
+	auto* fsDesc = testStencil ? newMTLStencilDescriptor(dsData.frontFaceStencilData) : nil;  // temp retain
+	auto* bsDesc = testStencil ? newMTLStencilDescriptor(dsData.backFaceStencilData) : nil;   // temp retain
+	auto* dsDesc = [MTLDepthStencilDescriptor new];											  // temp retain
    dsDesc.depthCompareFunction = (MTLCompareFunction)dsData.depthCompareFunction;
    dsDesc.depthWriteEnabled = dsData.depthWriteEnabled;
 	dsDesc.frontFaceStencil = fsDesc;
@ -443,16 +444,14 @@ id<MTLDepthStencilState> MVKCommandResourceFactory::newMTLDepthStencilState(MVKM

 	id<MTLDepthStencilState> dss = [getMTLDevice() newDepthStencilStateWithDescriptor: dsDesc];

-	[fsDesc release];																		// temp release
-	[bsDesc release];																		// temp release
-	[dsDesc release];																		// temp release
+	[fsDesc release];	// temp release
+	[bsDesc release];	// temp release
+	[dsDesc release];	// temp release

 	return dss;
 }

 MTLStencilDescriptor* MVKCommandResourceFactory::newMTLStencilDescriptor(MVKMTLStencilDescriptorData& sData) {
-    if ( !sData.enabled ) { return nil; }
-
    MTLStencilDescriptor* sDesc = [MTLStencilDescriptor new];		// retained
    sDesc.stencilCompareFunction = (MTLCompareFunction)sData.stencilCompareFunction;
    sDesc.stencilFailureOperation = (MTLStencilOperation)sData.stencilFailureOperation;
@ -623,7 +622,7 @@ id<MTLFunction> MVKCommandResourceFactory::newFunctionNamed(const char* funcName
 	NSString* nsFuncName = [[NSString alloc] initWithUTF8String: funcName];		// temp retained
 	id<MTLFunction> mtlFunc = [_mtlLibrary newFunctionWithName: nsFuncName];	// retained
 	[nsFuncName release];														// temp release
-	_device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.functionRetrieval, startTime);
+	_device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.functionRetrieval, startTime);
 	return mtlFunc;
 }

@ -636,7 +635,7 @@ id<MTLFunction> MVKCommandResourceFactory::newMTLFunction(NSString* mslSrcCode,
 		id<MTLLibrary> mtlLib = [getMTLDevice() newLibraryWithSource: mslSrcCode
 															 options: getDevice()->getMTLCompileOptions()
 															   error: &err];	// temp retain
-		_device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.mslCompile, startTime);
+		_device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.mslCompile, startTime);

 		if (err) {
 			reportError(VK_ERROR_INITIALIZATION_FAILED,
@ -645,7 +644,7 @@ id<MTLFunction> MVKCommandResourceFactory::newMTLFunction(NSString* mslSrcCode,
 		} else {
 			startTime = _device->getPerformanceTimestamp();
 			mtlFunc = [mtlLib newFunctionWithName: funcName];
-			_device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.functionRetrieval, startTime);
+			_device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.functionRetrieval, startTime);
 		}

 		[mtlLib release];														// temp release
@ -689,7 +688,7 @@ void MVKCommandResourceFactory::initMTLLibrary() {
                                                   options: getDevice()->getMTLCompileOptions()
                                                     error: &err];    // retained
 		MVKAssert( !err, "Could not compile command shaders (Error code %li):\n%s", (long)err.code, err.localizedDescription.UTF8String);
-		_device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.mslCompile, startTime);
+		_device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.mslCompile, startTime);
    }
 }

--- a/MoltenVK/MoltenVK/Commands/MVKCommandTypePools.def
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandTypePools.def
@ -81,18 +81,31 @@ MVK_CMD_TYPE_POOL(EndRenderPass)
 MVK_CMD_TYPE_POOLS_FROM_3_THRESHOLDS(BeginRendering, 1, 2, 4)
 MVK_CMD_TYPE_POOL(EndRendering)
 MVK_CMD_TYPE_POOL(SetSampleLocations)
+MVK_CMD_TYPE_POOL(SetSampleLocationsEnable)
 MVK_CMD_TYPE_POOLS_FROM_THRESHOLD(ExecuteCommands, 1)
 MVK_CMD_TYPE_POOLS_FROM_2_THRESHOLDS(BindDescriptorSetsStatic, 1, 4)
 MVK_CMD_TYPE_POOLS_FROM_THRESHOLD(BindDescriptorSetsDynamic, 4)
 MVK_CMD_TYPE_POOLS_FROM_THRESHOLD(SetViewport, 1)
 MVK_CMD_TYPE_POOLS_FROM_THRESHOLD(SetScissor, 1)
-MVK_CMD_TYPE_POOL(SetLineWidth)
-MVK_CMD_TYPE_POOL(SetDepthBias)
 MVK_CMD_TYPE_POOL(SetBlendConstants)
-MVK_CMD_TYPE_POOL(SetDepthBounds)
+MVK_CMD_TYPE_POOL(SetDepthBias)
+MVK_CMD_TYPE_POOL(SetDepthBiasEnable)
+MVK_CMD_TYPE_POOL(SetDepthTestEnable)
+MVK_CMD_TYPE_POOL(SetDepthWriteEnable)
+MVK_CMD_TYPE_POOL(SetDepthClipEnable)
+MVK_CMD_TYPE_POOL(SetDepthCompareOp)
+MVK_CMD_TYPE_POOL(SetStencilTestEnable)
+MVK_CMD_TYPE_POOL(SetStencilOp)
 MVK_CMD_TYPE_POOL(SetStencilCompareMask)
 MVK_CMD_TYPE_POOL(SetStencilWriteMask)
 MVK_CMD_TYPE_POOL(SetStencilReference)
+MVK_CMD_TYPE_POOL(SetCullMode)
+MVK_CMD_TYPE_POOL(SetFrontFace)
+MVK_CMD_TYPE_POOL(SetPrimitiveTopology)
+MVK_CMD_TYPE_POOL(SetPrimitiveRestartEnable)
+MVK_CMD_TYPE_POOL(SetPolygonMode)
+MVK_CMD_TYPE_POOL(SetPatchControlPoints)
+MVK_CMD_TYPE_POOL(SetRasterizerDiscardEnable)
 MVK_CMD_TYPE_POOLS_FROM_2_THRESHOLDS(BindVertexBuffers, 1, 2)
 MVK_CMD_TYPE_POOL(BindIndexBuffer)
 MVK_CMD_TYPE_POOL(Draw)
--- a/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.h
+++ b/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.h
@ -99,7 +99,6 @@ public:
 protected:
 	friend class MVKMTLBufferAllocation;
 	
-	MVKBaseObject* getBaseObject() override { return this; };
 	MVKMTLBufferAllocation* newObject() override;
    void returnAllocationUnlocked(MVKMTLBufferAllocation* ba);
    void returnAllocation(MVKMTLBufferAllocation* ba);
--- a/MoltenVK/MoltenVK/Commands/MVKMTLResourceBindings.h
+++ b/MoltenVK/MoltenVK/Commands/MVKMTLResourceBindings.h
@ -67,6 +67,7 @@ typedef struct MVKMTLBufferBinding {
    union { id<MTLBuffer> mtlBuffer = nil; id<MTLBuffer> mtlResource; const void* mtlBytes; }; // aliases
    VkDeviceSize offset = 0;
    uint32_t size = 0;
+	uint32_t stride = 0;
 	uint16_t index = 0;
    bool justOffset = false;
    bool isDirty = true;
@ -78,14 +79,16 @@ typedef struct MVKMTLBufferBinding {
    void update(const MVKMTLBufferBinding &other) {
        if (mtlBuffer != other.mtlBuffer || size != other.size || other.isInline) {
            mtlBuffer = other.mtlBuffer;
+			offset = other.offset;
            size = other.size;
+			stride = other.stride;
            isInline = other.isInline;
-            offset = other.offset;
            justOffset = false;
 			isOverridden = false;
 			isDirty = true;
-        } else if (offset != other.offset) {
+        } else if (offset != other.offset || stride != other.stride) {
            offset = other.offset;
+			stride = other.stride;
            justOffset = !isOverridden && (!isDirty || justOffset);
 			isOverridden = false;
            isDirty = true;
@ -112,8 +115,10 @@ typedef struct MVKPipelineBarrier {
 	} MVKPipelineBarrierType;

 	MVKPipelineBarrierType type = None;
-	VkAccessFlags srcAccessMask = 0;
-	VkAccessFlags dstAccessMask = 0;
+	VkPipelineStageFlags2 srcStageMask = 0;
+	VkAccessFlags2 srcAccessMask = 0;
+	VkPipelineStageFlags2 dstStageMask = 0;
+	VkAccessFlags2 dstAccessMask = 0;
 	uint8_t srcQueueFamilyIndex = 0;
 	uint8_t dstQueueFamilyIndex = 0;
 	union { MVKBuffer* mvkBuffer = nullptr; MVKImage* mvkImage; MVKResource* mvkResource; };
@ -136,15 +141,29 @@ typedef struct MVKPipelineBarrier {
 	bool isBufferBarrier() { return type == Buffer; }
 	bool isImageBarrier() { return type == Image; }

-	MVKPipelineBarrier(const VkMemoryBarrier& vkBarrier) :
+	MVKPipelineBarrier(const VkMemoryBarrier2& vkBarrier) :
 		type(Memory),
+		srcStageMask(vkBarrier.srcStageMask),
 		srcAccessMask(vkBarrier.srcAccessMask),
+		dstStageMask(vkBarrier.dstStageMask),
 		dstAccessMask(vkBarrier.dstAccessMask)
 		{}

-	MVKPipelineBarrier(const VkBufferMemoryBarrier& vkBarrier) :
-		type(Buffer),
+	MVKPipelineBarrier(const VkMemoryBarrier& vkBarrier,
+					   VkPipelineStageFlags srcStageMask,
+					   VkPipelineStageFlags dstStageMask) :
+		type(Memory),
+		srcStageMask(srcStageMask),
 		srcAccessMask(vkBarrier.srcAccessMask),
+		dstStageMask(dstStageMask),
+		dstAccessMask(vkBarrier.dstAccessMask)
+		{}
+
+	MVKPipelineBarrier(const VkBufferMemoryBarrier2& vkBarrier) :
+		type(Buffer),
+		srcStageMask(vkBarrier.srcStageMask),
+		srcAccessMask(vkBarrier.srcAccessMask),
+		dstStageMask(vkBarrier.dstStageMask),
 		dstAccessMask(vkBarrier.dstAccessMask),
 		srcQueueFamilyIndex(vkBarrier.srcQueueFamilyIndex),
 		dstQueueFamilyIndex(vkBarrier.dstQueueFamilyIndex),
@ -153,9 +172,45 @@ typedef struct MVKPipelineBarrier {
 		size(vkBarrier.size)
 		{}

-	MVKPipelineBarrier(const VkImageMemoryBarrier& vkBarrier) :
-		type(Image),
+	MVKPipelineBarrier(const VkBufferMemoryBarrier& vkBarrier,
+					   VkPipelineStageFlags srcStageMask,
+					   VkPipelineStageFlags dstStageMask) :
+		type(Buffer),
+		srcStageMask(srcStageMask),
 		srcAccessMask(vkBarrier.srcAccessMask),
+		dstStageMask(dstStageMask),
+		dstAccessMask(vkBarrier.dstAccessMask),
+		srcQueueFamilyIndex(vkBarrier.srcQueueFamilyIndex),
+		dstQueueFamilyIndex(vkBarrier.dstQueueFamilyIndex),
+		mvkBuffer((MVKBuffer*)vkBarrier.buffer),
+		offset(vkBarrier.offset),
+		size(vkBarrier.size)
+		{}
+
+	MVKPipelineBarrier(const VkImageMemoryBarrier2& vkBarrier) :
+		type(Image),
+		srcStageMask(vkBarrier.srcStageMask),
+		srcAccessMask(vkBarrier.srcAccessMask),
+		dstStageMask(vkBarrier.dstStageMask),
+		dstAccessMask(vkBarrier.dstAccessMask),
+		srcQueueFamilyIndex(vkBarrier.srcQueueFamilyIndex),
+		dstQueueFamilyIndex(vkBarrier.dstQueueFamilyIndex),
+		mvkImage((MVKImage*)vkBarrier.image),
+		newLayout(vkBarrier.newLayout),
+		aspectMask(vkBarrier.subresourceRange.aspectMask),
+		baseArrayLayer(vkBarrier.subresourceRange.baseArrayLayer),
+		layerCount(vkBarrier.subresourceRange.layerCount),
+		baseMipLevel(vkBarrier.subresourceRange.baseMipLevel),
+		levelCount(vkBarrier.subresourceRange.levelCount)
+		{}
+
+	MVKPipelineBarrier(const VkImageMemoryBarrier& vkBarrier,
+					   VkPipelineStageFlags srcStageMask,
+					   VkPipelineStageFlags dstStageMask) :
+		type(Image),
+		srcStageMask(srcStageMask),
+		srcAccessMask(vkBarrier.srcAccessMask),
+		dstStageMask(dstStageMask),
 		dstAccessMask(vkBarrier.dstAccessMask),
 		srcQueueFamilyIndex(vkBarrier.srcQueueFamilyIndex),
 		dstQueueFamilyIndex(vkBarrier.dstQueueFamilyIndex),
--- a/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.h
@ -52,16 +52,12 @@ public:
 	VkResult bindDeviceMemory2(const VkBindBufferMemoryInfo* pBindInfo);

 	/** Applies the specified global memory barrier. */
-	void applyMemoryBarrier(VkPipelineStageFlags srcStageMask,
-							VkPipelineStageFlags dstStageMask,
-							MVKPipelineBarrier& barrier,
+	void applyMemoryBarrier(MVKPipelineBarrier& barrier,
 							MVKCommandEncoder* cmdEncoder,
 							MVKCommandUse cmdUse) override;

 	/** Applies the specified buffer memory barrier. */
-	void applyBufferMemoryBarrier(VkPipelineStageFlags srcStageMask,
-								  VkPipelineStageFlags dstStageMask,
-								  MVKPipelineBarrier& barrier,
+	void applyBufferMemoryBarrier(MVKPipelineBarrier& barrier,
 								  MVKCommandEncoder* cmdEncoder,
 								  MVKCommandUse cmdUse);

@ -95,9 +91,7 @@ protected:
 	friend class MVKDeviceMemory;

 	void propagateDebugName() override;
-	bool needsHostReadSync(VkPipelineStageFlags srcStageMask,
-						   VkPipelineStageFlags dstStageMask,
-						   MVKPipelineBarrier& barrier);
+	bool needsHostReadSync(MVKPipelineBarrier& barrier);
    bool overlaps(VkDeviceSize offset, VkDeviceSize size, VkDeviceSize &overlapOffset, VkDeviceSize &overlapSize);
 	bool shouldFlushHostMemory();
 	VkResult flushToDevice(VkDeviceSize offset, VkDeviceSize size);
--- a/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm
@ -94,25 +94,21 @@ VkResult MVKBuffer::bindDeviceMemory2(const VkBindBufferMemoryInfo* pBindInfo) {
 	return bindDeviceMemory((MVKDeviceMemory*)pBindInfo->memory, pBindInfo->memoryOffset);
 }

-void MVKBuffer::applyMemoryBarrier(VkPipelineStageFlags srcStageMask,
-								   VkPipelineStageFlags dstStageMask,
-								   MVKPipelineBarrier& barrier,
+void MVKBuffer::applyMemoryBarrier(MVKPipelineBarrier& barrier,
                                   MVKCommandEncoder* cmdEncoder,
                                   MVKCommandUse cmdUse) {
 #if MVK_MACOS
-	if ( needsHostReadSync(srcStageMask, dstStageMask, barrier) ) {
+	if ( needsHostReadSync(barrier) ) {
 		[cmdEncoder->getMTLBlitEncoder(cmdUse) synchronizeResource: getMTLBuffer()];
 	}
 #endif
 }

-void MVKBuffer::applyBufferMemoryBarrier(VkPipelineStageFlags srcStageMask,
-										 VkPipelineStageFlags dstStageMask,
-										 MVKPipelineBarrier& barrier,
+void MVKBuffer::applyBufferMemoryBarrier(MVKPipelineBarrier& barrier,
                                         MVKCommandEncoder* cmdEncoder,
                                         MVKCommandUse cmdUse) {
 #if MVK_MACOS
-	if ( needsHostReadSync(srcStageMask, dstStageMask, barrier) ) {
+	if ( needsHostReadSync(barrier) ) {
 		[cmdEncoder->getMTLBlitEncoder(cmdUse) synchronizeResource: getMTLBuffer()];
 	}
 #endif
@ -120,11 +116,9 @@ void MVKBuffer::applyBufferMemoryBarrier(VkPipelineStageFlags srcStageMask,

 // Returns whether the specified buffer memory barrier requires a sync between this
 // buffer and host memory for the purpose of the host reading texture memory.
-bool MVKBuffer::needsHostReadSync(VkPipelineStageFlags srcStageMask,
-								  VkPipelineStageFlags dstStageMask,
-								  MVKPipelineBarrier& barrier) {
+bool MVKBuffer::needsHostReadSync(MVKPipelineBarrier& barrier) {
 #if MVK_MACOS
-	return (mvkIsAnyFlagEnabled(dstStageMask, (VK_PIPELINE_STAGE_HOST_BIT)) &&
+	return (mvkIsAnyFlagEnabled(barrier.dstStageMask, (VK_PIPELINE_STAGE_HOST_BIT)) &&
 			mvkIsAnyFlagEnabled(barrier.dstAccessMask, (VK_ACCESS_HOST_READ_BIT)) &&
 			isMemoryHostAccessible() && (!isMemoryHostCoherent() || _isHostCoherentTexelBuffer));
 #endif
@ -238,6 +232,7 @@ MVKBuffer::MVKBuffer(MVKDevice* device, const VkBufferCreateInfo* pCreateInfo) :
 }

 void MVKBuffer::initExternalMemory(VkExternalMemoryHandleTypeFlags handleTypes) {
+	if ( !handleTypes ) { return; }
 	if (mvkIsOnlyAnyFlagEnabled(handleTypes, VK_EXTERNAL_MEMORY_HANDLE_TYPE_MTLBUFFER_BIT_KHR)) {
 		_externalMemoryHandleTypes = handleTypes;
 		auto& xmProps = getPhysicalDevice()->getExternalBufferProperties(VK_EXTERNAL_MEMORY_HANDLE_TYPE_MTLBUFFER_BIT_KHR);
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm
@ -729,7 +729,7 @@ void MVKBufferDescriptor::bind(MVKCommandEncoder* cmdEncoder,
 							   MVKArrayRef<uint32_t> dynamicOffsets,
 							   uint32_t& dynamicOffsetIndex) {
 	MVKMTLBufferBinding bb;
-	NSUInteger bufferDynamicOffset = (usesDynamicBufferOffsets() && dynamicOffsets.size > dynamicOffsetIndex
+	NSUInteger bufferDynamicOffset = (usesDynamicBufferOffsets() && dynamicOffsets.size() > dynamicOffsetIndex
 									  ? dynamicOffsets[dynamicOffsetIndex++] : 0);
 	if (_mvkBuffer) {
 		bb.mtlBuffer = _mvkBuffer->getMTLBuffer();
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
@ -53,7 +53,6 @@ class MVKSemaphore;
 class MVKTimelineSemaphore;
 class MVKDeferredOperation;
 class MVKEvent;
-class MVKSemaphoreImpl;
 class MVKQueryPool;
 class MVKShaderModule;
 class MVKPipelineCache;
@ -74,16 +73,22 @@ class MVKPrivateDataSlot;


 /** The buffer index to use for vertex content. */
-const static uint32_t kMVKVertexContentBufferIndex = 0;
+static constexpr uint32_t kMVKVertexContentBufferIndex = 0;

 // Parameters to define the sizing of inline collections
-const static uint32_t kMVKQueueFamilyCount = 4;
-const static uint32_t kMVKQueueCountPerQueueFamily = 1;		// Must be 1. See comments in MVKPhysicalDevice::getQueueFamilies()
-const static uint32_t kMVKMinSwapchainImageCount = 2;
-const static uint32_t kMVKMaxSwapchainImageCount = 3;
-const static uint32_t kMVKMaxColorAttachmentCount = 8;
-const static uint32_t kMVKMaxViewportScissorCount = 16;
-const static uint32_t kMVKMaxDescriptorSetCount = SPIRV_CROSS_NAMESPACE::kMaxArgumentBuffers;
+static constexpr uint32_t   kMVKQueueFamilyCount = 4;
+static constexpr uint32_t   kMVKQueueCountPerQueueFamily = 1;		// Must be 1. See comments in MVKPhysicalDevice::getQueueFamilies()
+static constexpr uint32_t   kMVKMinSwapchainImageCount = 2;
+static constexpr uint32_t   kMVKMaxSwapchainImageCount = 3;
+static constexpr uint32_t   kMVKMaxColorAttachmentCount = 8;
+static constexpr uint32_t   kMVKMaxViewportScissorCount = 16;
+static constexpr uint32_t   kMVKMaxDescriptorSetCount = SPIRV_CROSS_NAMESPACE::kMaxArgumentBuffers;
+static constexpr uint32_t   kMVKMaxSampleCount = 8;
+static constexpr uint32_t   kMVKSampleLocationCoordinateGridSize = 16;
+static constexpr float      kMVKMinSampleLocationCoordinate = 0.0;
+static constexpr float      kMVKMaxSampleLocationCoordinate = (float)(kMVKSampleLocationCoordinateGridSize - 1) / (float)kMVKSampleLocationCoordinateGridSize;
+static constexpr VkExtent2D kMVKSampleLocationPixelGridSize = { 1, 1 };
+static constexpr VkExtent2D kMVKSampleLocationPixelGridSizeNotSupported = { 0, 0 };

 #if !MVK_XCODE_12
 typedef NSUInteger MTLTimestamp;
@ -398,11 +403,12 @@ protected:
 	uint64_t getRecommendedMaxWorkingSetSize();
 	uint64_t getCurrentAllocatedSize();
 	uint32_t getMaxSamplerCount();
+	uint32_t getMaxPerSetDescriptorCount();
 	void initExternalMemoryProperties();
 	void initExtensions();
 	void initCounterSets();
 	bool needsCounterSetRetained();
-	void updateTimestampsAndPeriod();
+	void updateTimestampPeriod();
 	MVKArrayRef<MVKQueueFamily*> getQueueFamilies();
 	void initPipelineCacheUUID();
 	uint32_t getHighestGPUCapability();
@ -440,6 +446,11 @@ protected:
 #pragma mark -
 #pragma mark MVKDevice

+typedef enum {
+	MVKActivityPerformanceValueTypeDuration,
+	MVKActivityPerformanceValueTypeByteCount,
+} MVKActivityPerformanceValueType;
+
 typedef struct MVKMTLBlitEncoder {
 	id<MTLBlitCommandEncoder> mtlBlitEncoder = nil;
 	id<MTLCommandBuffer> mtlCmdBuffer = nil;
@ -677,43 +688,45 @@ public:
 	void removeTimelineSemaphore(MVKTimelineSemaphore* sem4, uint64_t value);

 	/** Applies the specified global memory barrier to all resource issued by this device. */
-	void applyMemoryBarrier(VkPipelineStageFlags srcStageMask,
-							VkPipelineStageFlags dstStageMask,
-							MVKPipelineBarrier& barrier,
+	void applyMemoryBarrier(MVKPipelineBarrier& barrier,
 							MVKCommandEncoder* cmdEncoder,
 							MVKCommandUse cmdUse);

    /**
 	 * If performance is being tracked, returns a monotonic timestamp value for use performance timestamping.
-	 *
 	 * The returned value corresponds to the number of CPU "ticks" since the app was initialized.
 	 *
-	 * Calling this value twice, subtracting the first value from the second, and then multiplying
-	 * the result by the value returned by mvkGetTimestampPeriod() will provide an indication of the
-	 * number of nanoseconds between the two calls. The convenience function mvkGetElapsedMilliseconds()
-	 * can be used to perform this calculation.
+	 * Call this function twice, then use the functions mvkGetElapsedNanoseconds() or mvkGetElapsedMilliseconds()
+	 * to determine the number of nanoseconds or milliseconds between the two calls.
     */
    uint64_t getPerformanceTimestamp() { return _isPerformanceTracking ? mvkGetTimestamp() : 0; }

    /**
-     * If performance is being tracked, adds the performance for an activity with a duration
-     * interval between the start and end times, to the given performance statistics.
+     * If performance is being tracked, adds the performance for an activity with a duration interval
+	 * between the start and end times, measured in milliseconds, to the given performance statistics.
     *
     * If endTime is zero or not supplied, the current time is used.
     */
-	void addActivityPerformance(MVKPerformanceTracker& activityTracker,
+	void addPerformanceInterval(MVKPerformanceTracker& perfTracker,
 								uint64_t startTime, uint64_t endTime = 0) {
 		if (_isPerformanceTracking) {
-			updateActivityPerformance(activityTracker, startTime, endTime);
-
-			// Log call not locked. Very minor chance that the tracker data will be updated during log call,
-			// resulting in an inconsistent report. Not worth taking lock perf hit for rare inline reporting.
-			if (_activityPerformanceLoggingStyle == MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_IMMEDIATE) {
-				logActivityPerformance(activityTracker, _performanceStatistics, true);
-			}
+			updateActivityPerformance(perfTracker, mvkGetElapsedMilliseconds(startTime, endTime));
 		}
 	};

+	/**
+	 * If performance is being tracked, adds the performance for an activity
+	 * with a kilobyte count, to the given performance statistics.
+	 */
+	void addPerformanceByteCount(MVKPerformanceTracker& perfTracker, uint64_t byteCount) {
+		if (_isPerformanceTracking) {
+			updateActivityPerformance(perfTracker, double(byteCount / KIBI));
+		}
+	};
+
+	/** Updates the given performance statistic. */
+	void updateActivityPerformance(MVKPerformanceTracker& activity, double currentValue);
+
    /** Populates the specified statistics structure from the current activity performance statistics. */
    void getPerformanceStatistics(MVKPerformanceStatistics* pPerf);

@ -885,8 +898,10 @@ protected:
 	template<typename S> void enableFeatures(S* pRequested, VkBool32* pEnabledBools, const VkBool32* pRequestedBools, const VkBool32* pAvailableBools, uint32_t count);
 	void enableExtensions(const VkDeviceCreateInfo* pCreateInfo);
    const char* getActivityPerformanceDescription(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats);
-	void logActivityPerformance(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats, bool isInline = false);
-	void updateActivityPerformance(MVKPerformanceTracker& activity, uint64_t startTime, uint64_t endTime);
+	MVKActivityPerformanceValueType getActivityPerformanceValueType(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats);
+	void logActivityInline(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats);
+	void logActivityDuration(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats, bool isInline = false);
+	void logActivityByteCount(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats, bool isInline = false);
 	void getDescriptorVariableDescriptorCountLayoutSupport(const VkDescriptorSetLayoutCreateInfo* pCreateInfo,
 														   VkDescriptorSetLayoutSupport* pSupport,
 														   VkDescriptorSetVariableDescriptorCountLayoutSupport* pVarDescSetCountSupport);
@ -908,7 +923,6 @@ protected:
 	id<MTLSamplerState> _defaultMTLSamplerState = nil;
 	id<MTLBuffer> _dummyBlitMTLBuffer = nil;
    uint32_t _globalVisibilityQueryCount = 0;
-	MVKConfigActivityPerformanceLoggingStyle _activityPerformanceLoggingStyle = MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_FRAME_COUNT;
 	bool _isPerformanceTracking = false;
 	bool _isCurrentlyAutoGPUCapturing = false;
 	bool _isUsingMetalArgumentBuffers = false;
@ -952,13 +966,9 @@ public:
 	bool isUsingPipelineStageMetalArgumentBuffers() { return isUsingMetalArgumentBuffers() && !_device->_pMetalFeatures->descriptorSetArgumentBuffers; };

 	/** Constructs an instance for the specified device. */
-    MVKDeviceTrackingMixin(MVKDevice* device) : _device(device) { assert(_device); }
-
-	virtual ~MVKDeviceTrackingMixin() {}
+	MVKDeviceTrackingMixin(MVKDevice* device) : _device(device) { assert(_device); }

 protected:
-	virtual MVKBaseObject* getBaseObject() = 0;
-
 	MVKDevice* _device;
 };

@ -973,9 +983,6 @@ public:

 	/** Constructs an instance for the specified device. */
 	MVKBaseDeviceObject(MVKDevice* device) : MVKDeviceTrackingMixin(device) {}
-
-protected:
-	MVKBaseObject* getBaseObject() override { return this; };
 };


@ -992,10 +999,6 @@ public:

 	/** Constructs an instance for the specified device. */
 	MVKVulkanAPIDeviceObject(MVKDevice* device) : MVKDeviceTrackingMixin(device) {}
-
-protected:
-	MVKBaseObject* getBaseObject() override { return this; };
-
 };


@ -1048,7 +1051,6 @@ public:

 protected:
 	T* newObject() override { return new T(_device); }
-	MVKBaseObject* getBaseObject() override { return this; };

 };

@ -1056,6 +1058,15 @@ protected:
 #pragma mark -
 #pragma mark Support functions

+/**
+ * Returns an autoreleased array containing the MTLDevices available on this system,
+ * sorted according to power, with higher power GPU's at the front of the array.
+ * This ensures that a lazy app that simply grabs the first GPU will get a high-power
+ * one by default. If MVKConfiguration::forceLowPowerGPU is enabled, the returned
+ * array will only include low-power devices. The intance may be a nullptr.
+ */
+NSArray<id<MTLDevice>>* mvkGetAvailableMTLDevicesArray(MVKInstance* instance);
+
 /** Returns the registry ID of the specified device, or zero if the device does not have a registry ID. */
 uint64_t mvkGetRegistryID(id<MTLDevice> mtlDevice);

--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
@ -75,9 +75,6 @@ static const uint32_t kAMDRadeonRX5500DeviceId = 0x7340;
 static const uint32_t kAMDRadeonRX6800DeviceId = 0x73bf;
 static const uint32_t kAMDRadeonRX6700DeviceId = 0x73df;

-static const VkExtent2D kMetalSamplePositionGridSize = { 1, 1 };
-static const VkExtent2D kMetalSamplePositionGridSizeNotSupported = { 0, 0 };
-
 static const uint32_t kMaxTimeDomains = 2;

 #pragma clang diagnostic pop
@ -131,9 +128,9 @@ void MVKPhysicalDevice::getFeatures(VkPhysicalDeviceFeatures2* features) {
 		.shaderInputAttachmentArrayDynamicIndexing = _metalFeatures.arrayOfTextures,
 		.shaderUniformTexelBufferArrayDynamicIndexing = _metalFeatures.arrayOfTextures,
 		.shaderStorageTexelBufferArrayDynamicIndexing = _metalFeatures.arrayOfTextures,
-		.shaderUniformBufferArrayNonUniformIndexing = false,
+		.shaderUniformBufferArrayNonUniformIndexing = true,
 		.shaderSampledImageArrayNonUniformIndexing = _metalFeatures.arrayOfTextures && _metalFeatures.arrayOfSamplers,
-		.shaderStorageBufferArrayNonUniformIndexing = false,
+		.shaderStorageBufferArrayNonUniformIndexing = true,
 		.shaderStorageImageArrayNonUniformIndexing = _metalFeatures.arrayOfTextures,
 		.shaderInputAttachmentArrayNonUniformIndexing = _metalFeatures.arrayOfTextures,
 		.shaderUniformTexelBufferArrayNonUniformIndexing = _metalFeatures.arrayOfTextures,
@ -320,6 +317,11 @@ void MVKPhysicalDevice::getFeatures(VkPhysicalDeviceFeatures2* features) {
 				subgroupSizeFeatures->computeFullSubgroups = _metalFeatures.simdPermute || _metalFeatures.quadPermute;
 				break;
 			}
+			case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES: {
+				auto* synch2Features = (VkPhysicalDeviceSynchronization2Features*)next;
+				synch2Features->synchronization2 = true;
+				break;
+			}
 			case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXTURE_COMPRESSION_ASTC_HDR_FEATURES: {
 				auto* astcHDRFeatures = (VkPhysicalDeviceTextureCompressionASTCHDRFeatures*)next;
 				astcHDRFeatures->textureCompressionASTC_HDR = _metalFeatures.astcHDRTextures;
@ -382,6 +384,53 @@ void MVKPhysicalDevice::getFeatures(VkPhysicalDeviceFeatures2* features) {
 				formatFeatures->formatA4B4G4R4 = canSupport4444;
 				break;
 			}
+			case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT: {
+				auto* extDynState = (VkPhysicalDeviceExtendedDynamicStateFeaturesEXT*)next;
+				extDynState->extendedDynamicState = true;
+				break;
+			}
+			case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_2_FEATURES_EXT: {
+				auto* extDynState2 = (VkPhysicalDeviceExtendedDynamicState2FeaturesEXT*)next;
+				extDynState2->extendedDynamicState2 = true;
+				extDynState2->extendedDynamicState2LogicOp = false;
+				extDynState2->extendedDynamicState2PatchControlPoints = true;
+				break;
+			}
+			case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_3_FEATURES_EXT: {
+				auto* extDynState3 = (VkPhysicalDeviceExtendedDynamicState3FeaturesEXT*)next;
+				extDynState3->extendedDynamicState3TessellationDomainOrigin = false;
+				extDynState3->extendedDynamicState3DepthClampEnable = true;
+				extDynState3->extendedDynamicState3PolygonMode = true;
+				extDynState3->extendedDynamicState3RasterizationSamples = false;
+				extDynState3->extendedDynamicState3SampleMask = false;
+				extDynState3->extendedDynamicState3AlphaToCoverageEnable = false;
+				extDynState3->extendedDynamicState3AlphaToOneEnable = false;
+				extDynState3->extendedDynamicState3LogicOpEnable = false;
+				extDynState3->extendedDynamicState3ColorBlendEnable = false;
+				extDynState3->extendedDynamicState3ColorBlendEquation = false;
+				extDynState3->extendedDynamicState3ColorWriteMask = false;
+				extDynState3->extendedDynamicState3RasterizationStream = false;
+				extDynState3->extendedDynamicState3ConservativeRasterizationMode = false;
+				extDynState3->extendedDynamicState3ExtraPrimitiveOverestimationSize = false;
+				extDynState3->extendedDynamicState3DepthClipEnable = true;
+				extDynState3->extendedDynamicState3SampleLocationsEnable = true;
+				extDynState3->extendedDynamicState3ColorBlendAdvanced = false;
+				extDynState3->extendedDynamicState3ProvokingVertexMode = false;
+				extDynState3->extendedDynamicState3LineRasterizationMode = false;
+				extDynState3->extendedDynamicState3LineStippleEnable = false;
+				extDynState3->extendedDynamicState3DepthClipNegativeOneToOne = false;
+				extDynState3->extendedDynamicState3ViewportWScalingEnable = false;
+				extDynState3->extendedDynamicState3ViewportSwizzle = false;
+				extDynState3->extendedDynamicState3CoverageToColorEnable = false;
+				extDynState3->extendedDynamicState3CoverageToColorLocation = false;
+				extDynState3->extendedDynamicState3CoverageModulationMode = false;
+				extDynState3->extendedDynamicState3CoverageModulationTableEnable = false;
+				extDynState3->extendedDynamicState3CoverageModulationTable = false;
+				extDynState3->extendedDynamicState3CoverageReductionMode = false;
+				extDynState3->extendedDynamicState3RepresentativeFragmentTestEnable = false;
+				extDynState3->extendedDynamicState3ShadingRateImageEnable = false;
+				break;
+			}
 			case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_INTERLOCK_FEATURES_EXT: {
 				auto* interlockFeatures = (VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT*)next;
 				interlockFeatures->fragmentShaderSampleInterlock = _metalFeatures.rasterOrderGroups;
@ -451,7 +500,7 @@ void MVKPhysicalDevice::getFeatures(VkPhysicalDeviceFeatures2* features) {
 }

 void MVKPhysicalDevice::getProperties(VkPhysicalDeviceProperties* properties) {
-	updateTimestampsAndPeriod();
+	updateTimestampPeriod();
 	*properties = _properties;
 }

@ -476,9 +525,7 @@ void MVKPhysicalDevice::getProperties(VkPhysicalDeviceProperties2* properties) {
 	supportedProps11.maxMultiviewViewCount = 32;
 	supportedProps11.maxMultiviewInstanceIndex = canUseInstancingForMultiview() ? uintMax / 32 : uintMax;
 	supportedProps11.protectedNoFault = false;
-	supportedProps11.maxPerSetDescriptors = 4 * (_metalFeatures.maxPerStageBufferCount +
-												 _metalFeatures.maxPerStageTextureCount +
-												 _metalFeatures.maxPerStageSamplerCount);
+	supportedProps11.maxPerSetDescriptors = getMaxPerSetDescriptorCount();
 	supportedProps11.maxMemoryAllocationSize = _metalFeatures.maxMTLBufferSize;

 	// Create a SSOT for these Vulkan 1.2 properties, which can be queried via two mechanisms here.
@ -730,11 +777,11 @@ void MVKPhysicalDevice::getProperties(VkPhysicalDeviceProperties2* properties) {
 			case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT: {
 				auto* sampLocnProps = (VkPhysicalDeviceSampleLocationsPropertiesEXT*)next;
 				sampLocnProps->sampleLocationSampleCounts = _metalFeatures.supportedSampleCounts;
-				sampLocnProps->maxSampleLocationGridSize = kMetalSamplePositionGridSize;
-				sampLocnProps->sampleLocationCoordinateRange[0] = 0.0;
-				sampLocnProps->sampleLocationCoordinateRange[1] = (15.0 / 16.0);
-				sampLocnProps->sampleLocationSubPixelBits = 4;
-				sampLocnProps->variableSampleLocations = VK_FALSE;
+				sampLocnProps->maxSampleLocationGridSize = kMVKSampleLocationPixelGridSize;
+				sampLocnProps->sampleLocationCoordinateRange[0] = kMVKMinSampleLocationCoordinate;
+				sampLocnProps->sampleLocationCoordinateRange[1] = kMVKMaxSampleLocationCoordinate;
+				sampLocnProps->sampleLocationSubPixelBits = mvkPowerOfTwoExponent(kMVKSampleLocationCoordinateGridSize);
+				sampLocnProps->variableSampleLocations = true;
 				break;
 			}
 			case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: {
@ -843,8 +890,8 @@ void MVKPhysicalDevice::getMultisampleProperties(VkSampleCountFlagBits samples,
 												 VkMultisamplePropertiesEXT* pMultisampleProperties) {
 	if (pMultisampleProperties) {
 		pMultisampleProperties->maxSampleLocationGridSize = (mvkIsOnlyAnyFlagEnabled(samples, _metalFeatures.supportedSampleCounts)
-															 ? kMetalSamplePositionGridSize
-															 : kMetalSamplePositionGridSizeNotSupported);
+															 ? kMVKSampleLocationPixelGridSize
+															 : kMVKSampleLocationPixelGridSizeNotSupported);
 	}
 }

@ -1155,8 +1202,8 @@ VkResult MVKPhysicalDevice::getSurfaceSupport(uint32_t queueFamilyIndex,
    isHeadless = getMTLDevice().isHeadless;
 #endif
    
-	// If this device is headless or the surface does not have a CAMetalLayer, it is not supported.
-    *pSupported = !(isHeadless || (surface->getCAMetalLayer() == nil));
+	// If this device is headless, the surface must be headless.
+	*pSupported = isHeadless ? surface->isHeadless() : wasConfigurationSuccessful();
 	return *pSupported ? VK_SUCCESS : surface->getConfigurationResult();
 }

@ -1215,13 +1262,12 @@ VkResult MVKPhysicalDevice::getSurfaceCapabilities(	const VkPhysicalDeviceSurfac

 	// The CAlayer underlying the surface must be a CAMetalLayer.
 	MVKSurface* surface = (MVKSurface*)pSurfaceInfo->surface;
-	CAMetalLayer* mtlLayer = surface->getCAMetalLayer();
-	if ( !mtlLayer ) { return surface->getConfigurationResult(); }
+	if ( !surface->wasConfigurationSuccessful() ) { return surface->getConfigurationResult(); }

 	VkSurfaceCapabilitiesKHR& surfCaps = pSurfaceCapabilities->surfaceCapabilities;
 	surfCaps.minImageCount = _metalFeatures.minSwapchainImageCount;
 	surfCaps.maxImageCount = _metalFeatures.maxSwapchainImageCount;
-	surfCaps.currentExtent = mvkGetNaturalExtent(mtlLayer);
+	surfCaps.currentExtent = surface->getNaturalExtent();
 	surfCaps.minImageExtent = { 1, 1 };
 	surfCaps.maxImageExtent = { _properties.limits.maxImageDimension2D, _properties.limits.maxImageDimension2D };
 	surfCaps.maxImageArrayLayers = 1;
@ -1300,9 +1346,7 @@ VkResult MVKPhysicalDevice::getSurfaceFormats(MVKSurface* surface,
 											  uint32_t* pCount,
 											  VkSurfaceFormatKHR* pSurfaceFormats) {

-	// The layer underlying the surface view must be a CAMetalLayer.
-	CAMetalLayer* mtlLayer = surface->getCAMetalLayer();
-	if ( !mtlLayer ) { return surface->getConfigurationResult(); }
+	if ( !surface->wasConfigurationSuccessful() ) { return surface->getConfigurationResult(); }

 #define addSurfFmt(MTL_FMT) \
 	do { \
@ -1425,9 +1469,7 @@ VkResult MVKPhysicalDevice::getSurfacePresentModes(MVKSurface* surface,
 												   uint32_t* pCount,
 												   VkPresentModeKHR* pPresentModes) {

-	// The layer underlying the surface view must be a CAMetalLayer.
-	CAMetalLayer* mtlLayer = surface->getCAMetalLayer();
-	if ( !mtlLayer ) { return surface->getConfigurationResult(); }
+	if ( !surface->wasConfigurationSuccessful() ) { return surface->getConfigurationResult(); }

 #define ADD_VK_PRESENT_MODE(VK_PM)																	\
 	do {																							\
@ -1455,9 +1497,7 @@ VkResult MVKPhysicalDevice::getPresentRectangles(MVKSurface* surface,
 												 uint32_t* pRectCount,
 												 VkRect2D* pRects) {

-	// The layer underlying the surface view must be a CAMetalLayer.
-	CAMetalLayer* mtlLayer = surface->getCAMetalLayer();
-	if ( !mtlLayer ) { return surface->getConfigurationResult(); }
+	if ( !surface->wasConfigurationSuccessful() ) { return surface->getConfigurationResult(); }

 	if ( !pRects ) {
 		*pRectCount = 1;
@ -1469,7 +1509,7 @@ VkResult MVKPhysicalDevice::getPresentRectangles(MVKSurface* surface,
 	*pRectCount = 1;

 	pRects[0].offset = { 0, 0 };
-	pRects[0].extent = mvkGetNaturalExtent(mtlLayer);
+	pRects[0].extent = surface->getNaturalExtent();

 	return VK_SUCCESS;
 }
@ -1525,7 +1565,7 @@ MVKArrayRef<MVKQueueFamily*> MVKPhysicalDevice::getQueueFamilies() {
 VkResult MVKPhysicalDevice::getQueueFamilyProperties(uint32_t* pCount,
 													 VkQueueFamilyProperties* pQueueFamilyProperties) {
 	auto qFams = getQueueFamilies();
-	uint32_t qfCnt = uint32_t(qFams.size);
+	uint32_t qfCnt = uint32_t(qFams.size());

 	// If properties aren't actually being requested yet, simply update the returned count
 	if ( !pQueueFamilyProperties ) {
@ -1570,21 +1610,25 @@ VkResult MVKPhysicalDevice::getQueueFamilyProperties(uint32_t* pCount,
 // If needed, update the timestamp period for this device, using a crude lowpass filter to level out
 // wild temporary changes, particularly during initial queries before much GPU activity has occurred.
 // On Apple GPUs, CPU & GPU timestamps are the same, and timestamp period never changes.
-void MVKPhysicalDevice::updateTimestampsAndPeriod() {
-	if (_properties.vendorID == kAppleVendorId) { return; }
+void MVKPhysicalDevice::updateTimestampPeriod() {
+	if (_properties.vendorID != kAppleVendorId &&
+		[_mtlDevice respondsToSelector: @selector(sampleTimestamps:gpuTimestamp:)]) {

-	MTLTimestamp earlierCPUTs = _prevCPUTimestamp;
-	MTLTimestamp earlierGPUTs = _prevGPUTimestamp;
-	[_mtlDevice sampleTimestamps: &_prevCPUTimestamp gpuTimestamp: &_prevGPUTimestamp];
-	double elapsedCPUNanos = _prevCPUTimestamp - earlierCPUTs;
-	double elapsedGPUTicks = _prevGPUTimestamp - earlierGPUTs;
-	if (elapsedCPUNanos && elapsedGPUTicks) {		// Ensure not zero
-		float tsPeriod = elapsedCPUNanos / elapsedGPUTicks;
+		MTLTimestamp earlierCPUTs = _prevCPUTimestamp;
+		MTLTimestamp earlierGPUTs = _prevGPUTimestamp;
+		[_mtlDevice sampleTimestamps: &_prevCPUTimestamp gpuTimestamp: &_prevGPUTimestamp];
+		double elapsedCPUNanos = _prevCPUTimestamp - earlierCPUTs;
+		double elapsedGPUTicks = _prevGPUTimestamp - earlierGPUTs;

-		// Basic lowpass filter Y = (1 - a)Y + a*X.
-		// The lower a is, the slower Y will change over time.
-		static const float a = 0.05;
-		_properties.limits.timestampPeriod = ((1.0 - a) * _properties.limits.timestampPeriod) + (a * tsPeriod);
+		// Don't update period the first time through, or if no time elapsed.
+		if (earlierCPUTs && elapsedCPUNanos && elapsedGPUTicks) {
+			// Basic lowpass filter TPout = (1 - A)TPout + (A * TPin).
+			// The lower A is, the slower TPout will change over time.
+			auto& vkTsp = _properties.limits.timestampPeriod;
+			float a = getMVKConfig().timestampPeriodLowPassAlpha;
+			float tsPeriod = elapsedCPUNanos / elapsedGPUTicks;
+			vkTsp = ((1.0 - a) * vkTsp) + (a * tsPeriod);
+		}
 	}
 }

@ -1689,10 +1733,15 @@ void MVKPhysicalDevice::initMetalFeatures() {
 	_metalFeatures.minSwapchainImageCount = kMVKMinSwapchainImageCount;
 	_metalFeatures.maxSwapchainImageCount = kMVKMaxSwapchainImageCount;

-	_metalFeatures.vertexStrideAlignment = 4;
-
 	_metalFeatures.maxPerStageStorageTextureCount = 8;

+	_metalFeatures.vertexStrideAlignment = supportsMTLGPUFamily(Apple5) ? 1 : 4;
+
+#if MVK_XCODE_15
+	// Dynamic vertex stride needs to have everything aligned - compiled with support for vertex stride calls, and supported by both runtime OS and GPU.
+	_metalFeatures.dynamicVertexStride = mvkOSVersionIsAtLeast(14.0, 17.0, 1.0) && (supportsMTLGPUFamily(Apple4) || supportsMTLGPUFamily(Mac2));
+#endif
+
 	// GPU-specific features
 	switch (_properties.vendorID) {
 		case kAMDVendorId:
@ -1703,6 +1752,7 @@ void MVKPhysicalDevice::initMetalFeatures() {
 			if (!mvkOSVersionIsAtLeast(14.0, 17.0, 1.0)) {
 				_metalFeatures.needsSampleDrefLodArrayWorkaround = true;
 			}
+			_metalFeatures.needsCubeGradWorkaround = true;
 			// fallthrough
 		case kIntelVendorId:
 		case kNVVendorId:
@ -2183,6 +2233,8 @@ void MVKPhysicalDevice::initMetalFeatures() {

 	if ([_mtlDevice respondsToSelector: @selector(argumentBuffersSupport)]) {
 		_metalFeatures.argumentBuffersTier = _mtlDevice.argumentBuffersSupport;
+	} else {
+		_metalFeatures.argumentBuffersTier = MTLArgumentBuffersTier1;
 	}

 #define checkSupportsMTLCounterSamplingPoint(mtlSP, mvkSP)  \
@ -2343,7 +2395,7 @@ void MVKPhysicalDevice::initFeatures() {
 	mvkClear(&_vulkan12FeaturesNoExt);		// Start with everything cleared
 	_vulkan12FeaturesNoExt.samplerMirrorClampToEdge = _metalFeatures.samplerMirrorClampToEdge;
 	_vulkan12FeaturesNoExt.drawIndirectCount = false;
-	_vulkan12FeaturesNoExt.descriptorIndexing = true;
+	_vulkan12FeaturesNoExt.descriptorIndexing = _metalFeatures.arrayOfTextures && _metalFeatures.arrayOfSamplers;
 	_vulkan12FeaturesNoExt.samplerFilterMinmax = false;
 	_vulkan12FeaturesNoExt.shaderOutputViewportIndex = _features.multiViewport;
 	_vulkan12FeaturesNoExt.shaderOutputLayer = _metalFeatures.layeredRendering;
@ -2404,7 +2456,7 @@ void MVKPhysicalDevice::initLimits() {
    _properties.limits.maxVertexInputAttributes = 31;
    _properties.limits.maxVertexInputBindings = 31;

-    _properties.limits.maxVertexInputBindingStride = (2 * KIBI);
+    _properties.limits.maxVertexInputBindingStride = supportsMTLGPUFamily(Apple2) ? kMVKUndefinedLargeUInt32 : (4 * KIBI);
 	_properties.limits.maxVertexInputAttributeOffset = _properties.limits.maxVertexInputBindingStride - 1;

 	_properties.limits.maxPerStageDescriptorSamplers = _metalFeatures.maxPerStageSamplerCount;
@ -2613,7 +2665,10 @@ void MVKPhysicalDevice::initLimits() {
    _properties.limits.optimalBufferCopyRowPitchAlignment = 1;

 	_properties.limits.timestampComputeAndGraphics = VK_TRUE;
-	_properties.limits.timestampPeriod = mvkGetTimestampPeriod();	// Will be 1.0 on Apple Silicon
+
+	// On non-Apple GPU's, this can vary over time, and is calculated based on actual GPU activity.
+	_properties.limits.timestampPeriod = 1.0;
+	updateTimestampPeriod();

    _properties.limits.pointSizeRange[0] = 1;
 	switch (_properties.vendorID) {
@ -2633,7 +2688,7 @@ void MVKPhysicalDevice::initLimits() {
    _properties.limits.pointSizeGranularity = 1;
    _properties.limits.lineWidthRange[0] = 1;
    _properties.limits.lineWidthRange[1] = 1;
-    _properties.limits.lineWidthGranularity = 1;
+    _properties.limits.lineWidthGranularity = 0;

    _properties.limits.standardSampleLocations = VK_TRUE;
    _properties.limits.strictLines = _properties.vendorID == kIntelVendorId || _properties.vendorID == kNVVendorId;
@ -2689,7 +2744,7 @@ void MVKPhysicalDevice::initLimits() {
    _properties.limits.maxComputeWorkGroupCount[1] = kMVKUndefinedLargeUInt32;
    _properties.limits.maxComputeWorkGroupCount[2] = kMVKUndefinedLargeUInt32;

-    _properties.limits.maxDrawIndexedIndexValue = numeric_limits<uint32_t>::max() - 1;	// Support both fullDrawIndexUint32 and automatic primitive restart.
+    _properties.limits.maxDrawIndexedIndexValue = numeric_limits<uint32_t>::max();
    _properties.limits.maxDrawIndirectCount = kMVKUndefinedLargeUInt32;


@ -3056,32 +3111,23 @@ uint64_t MVKPhysicalDevice::getVRAMSize() {
 	}
 }

+// If possible, retrieve from the MTLDevice, otherwise from available memory size, or a fixed conservative estimate.
 uint64_t MVKPhysicalDevice::getRecommendedMaxWorkingSetSize() {
-#if MVK_MACOS
+#if MVK_XCODE_15 || MVK_MACOS
 	if ( [_mtlDevice respondsToSelector: @selector(recommendedMaxWorkingSetSize)]) {
 		return _mtlDevice.recommendedMaxWorkingSetSize;
 	}
 #endif
-#if MVK_IOS_OR_TVOS
-	// GPU and CPU use shared memory. Estimate the current free memory in the system.
 	uint64_t freeMem = mvkGetAvailableMemorySize();
-	if (freeMem) { return freeMem; }
-#endif
-
-	return 128 * MEBI;		// Conservative minimum for macOS GPU's & iOS shared memory
+	return freeMem ? freeMem : 256 * MEBI;
 }

+// If possible, retrieve from the MTLDevice, otherwise use the current memory used by this process.
 uint64_t MVKPhysicalDevice::getCurrentAllocatedSize() {
 	if ( [_mtlDevice respondsToSelector: @selector(currentAllocatedSize)] ) {
 		return _mtlDevice.currentAllocatedSize;
 	}
-#if MVK_IOS_OR_TVOS
-	// We can use the current memory used by this process as a reasonable approximation.
 	return mvkGetUsedMemorySize();
-#endif
-#if MVK_MACOS
-	return 0;
-#endif
 }

 // When using argument buffers, Metal imposes a hard limit on the number of MTLSamplerState
@ -3096,6 +3142,13 @@ uint32_t MVKPhysicalDevice::getMaxSamplerCount() {
 	}
 }

+// Vulkan imposes a minimum maximum of 1024 descriptors per set.
+uint32_t MVKPhysicalDevice::getMaxPerSetDescriptorCount() {
+	return max(4 * (_metalFeatures.maxPerStageBufferCount +
+					_metalFeatures.maxPerStageTextureCount +
+					_metalFeatures.maxPerStageSamplerCount), 1024u);
+}
+
 void MVKPhysicalDevice::initExternalMemoryProperties() {

 	// Common
@ -3149,6 +3202,9 @@ void MVKPhysicalDevice::initExtensions() {
 		pWritableExtns->vk_KHR_fragment_shader_barycentric.enabled = false;
 		pWritableExtns->vk_NV_fragment_shader_barycentric.enabled = false;
 	}
+	if (!_metalFeatures.arrayOfTextures || !_metalFeatures.arrayOfSamplers) {
+		pWritableExtns->vk_EXT_descriptor_indexing.enabled = false;
+	}
    
    // The relevant functions are not available if not built with Xcode 14.
 #if MVK_XCODE_14
@ -3249,31 +3305,14 @@ bool MVKPhysicalDevice::needsCounterSetRetained() {
 }

 void MVKPhysicalDevice::logGPUInfo() {
-	string devTypeStr;
-	switch (_properties.deviceType) {
-		case VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU:
-			devTypeStr = "Discrete";
-			break;
-		case VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU:
-			devTypeStr = "Integrated";
-			break;
-		case VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU:
-			devTypeStr = "Virtual";
-			break;
-		case VK_PHYSICAL_DEVICE_TYPE_CPU:
-			devTypeStr = "CPU Emulation";
-			break;
-		default:
-			devTypeStr = "Unknown";
-			break;
-	}
-
 	string logMsg = "GPU device:";
 	logMsg += "\n\t\tmodel: %s";
 	logMsg += "\n\t\ttype: %s";
 	logMsg += "\n\t\tvendorID: %#06x";
 	logMsg += "\n\t\tdeviceID: %#06x";
 	logMsg += "\n\t\tpipelineCacheUUID: %s";
+	logMsg += "\n\t\tGPU memory available: %llu MB";
+	logMsg += "\n\t\tGPU memory used: %llu MB";
 	logMsg += "\n\tsupports the following Metal Versions, GPU's and Feature Sets:";
 	logMsg += "\n\t\tMetal Shading Language %s";

@ -3356,9 +3395,29 @@ void MVKPhysicalDevice::logGPUInfo() {
 	}
 #endif

+	string devTypeStr;
+	switch (_properties.deviceType) {
+		case VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU:
+			devTypeStr = "Discrete";
+			break;
+		case VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU:
+			devTypeStr = "Integrated";
+			break;
+		case VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU:
+			devTypeStr = "Virtual";
+			break;
+		case VK_PHYSICAL_DEVICE_TYPE_CPU:
+			devTypeStr = "CPU Emulation";
+			break;
+		default:
+			devTypeStr = "Unknown";
+			break;
+	}
+
 	NSUUID* nsUUID = [[NSUUID alloc] initWithUUIDBytes: _properties.pipelineCacheUUID];		// temp retain
-	MVKLogInfo(logMsg.c_str(), _properties.deviceName, devTypeStr.c_str(),
+	MVKLogInfo(logMsg.c_str(), getName(), devTypeStr.c_str(),
 			   _properties.vendorID, _properties.deviceID, nsUUID.UUIDString.UTF8String,
+			   getRecommendedMaxWorkingSetSize() / MEBI, getCurrentAllocatedSize() / MEBI,
 			   SPIRVToMSLConversionOptions::printMSLVersion(_metalFeatures.mslVersion).c_str());
 	[nsUUID release];																		// temp release
 }
@ -3366,7 +3425,11 @@ void MVKPhysicalDevice::logGPUInfo() {
 MVKPhysicalDevice::~MVKPhysicalDevice() {
 	mvkDestroyContainerContents(_queueFamilies);
 	[_timestampMTLCounterSet release];
+
+	uint64_t memUsed = getCurrentAllocatedSize();	// Retrieve before releasing MTLDevice
 	[_mtlDevice release];
+
+	MVKLogInfo("Destroyed VkPhysicalDevice for GPU %s with %llu MB of GPU memory still allocated.", getName(), memUsed / MEBI);
 }


@ -3375,12 +3438,13 @@ MVKPhysicalDevice::~MVKPhysicalDevice() {

 // Returns core device commands and enabled extension device commands.
 PFN_vkVoidFunction MVKDevice::getProcAddr(const char* pName) {
-	MVKEntryPoint* pMVKPA = _physicalDevice->_mvkInstance->getEntryPoint(pName);
-	uint32_t apiVersion = _physicalDevice->_mvkInstance->_appInfo.apiVersion;
+	MVKInstance* pMVKInst = _physicalDevice->_mvkInstance;
+	MVKEntryPoint* pMVKPA = pMVKInst->getEntryPoint(pName);
+	uint32_t apiVersion = pMVKInst->_appInfo.apiVersion;

-	bool isSupported = (pMVKPA &&											// Command exists and...
-						pMVKPA->isDevice &&									// ...is a device command and...
-						pMVKPA->isEnabled(apiVersion, _enabledExtensions));	// ...is a core or enabled extension command.
+	bool isSupported = (pMVKPA &&																			// Command exists and...
+						pMVKPA->isDevice &&																	// ...is a device command and...
+						pMVKPA->isEnabled(apiVersion, _enabledExtensions, &pMVKInst->_enabledExtensions));	// ...is a core or enabled extension command.

 	return isSupported ? pMVKPA->functionPointer : nullptr;
 }
@ -3442,7 +3506,7 @@ void MVKDevice::getDescriptorSetLayoutSupport(const VkDescriptorSetLayoutCreateI
 	for (uint32_t i = 0; i < pCreateInfo->bindingCount; i++) {
 		descriptorCount += pCreateInfo->pBindings[i].descriptorCount;
 	}
-	pSupport->supported = (descriptorCount < ((_physicalDevice->_metalFeatures.maxPerStageBufferCount + _physicalDevice->_metalFeatures.maxPerStageTextureCount + _physicalDevice->_metalFeatures.maxPerStageSamplerCount) * 2));
+	pSupport->supported = (descriptorCount < _physicalDevice->getMaxPerSetDescriptorCount());

 	// Check whether the layout has a variable-count descriptor, and if so, whether we can support it.
 	for (auto* next = (VkBaseOutStructure*)pSupport->pNext; next; next = next->pNext) {
@ -3601,14 +3665,14 @@ void MVKDevice::getCalibratedTimestamps(uint32_t timestampCount,
 	MTLTimestamp cpuStamp, gpuStamp;
 	uint64_t cpuStart, cpuEnd;

-	cpuStart = mvkGetAbsoluteTime();
+	cpuStart = mvkGetContinuousNanoseconds();
 	[getMTLDevice() sampleTimestamps: &cpuStamp gpuTimestamp: &gpuStamp];
 	// Sample again to calculate the maximum deviation. Note that the
 	// -[MTLDevice sampleTimestamps:gpuTimestamp:] method guarantees that CPU
 	// timestamps are in nanoseconds. We don't want to call the method again,
 	// because that could result in an expensive syscall to query the GPU time-
 	// stamp.
-	cpuEnd = mvkGetAbsoluteTime();
+	cpuEnd = mvkGetContinuousNanoseconds();
 	for (uint32_t tsIdx = 0; tsIdx < timestampCount; ++tsIdx) {
 		switch (pTimestampInfos[tsIdx].timeDomain) {
 			case VK_TIME_DOMAIN_DEVICE_EXT:
@ -4172,43 +4236,63 @@ void MVKDevice::removeTimelineSemaphore(MVKTimelineSemaphore* sem4, uint64_t val
 	mvkRemoveFirstOccurance(_awaitingTimelineSem4s, make_pair(sem4, value));
 }

-void MVKDevice::applyMemoryBarrier(VkPipelineStageFlags srcStageMask,
-								   VkPipelineStageFlags dstStageMask,
-								   MVKPipelineBarrier& barrier,
+void MVKDevice::applyMemoryBarrier(MVKPipelineBarrier& barrier,
 								   MVKCommandEncoder* cmdEncoder,
 								   MVKCommandUse cmdUse) {
-	if (!mvkIsAnyFlagEnabled(dstStageMask, VK_PIPELINE_STAGE_HOST_BIT) ||
+	if (!mvkIsAnyFlagEnabled(barrier.dstStageMask, VK_PIPELINE_STAGE_HOST_BIT) ||
 		!mvkIsAnyFlagEnabled(barrier.dstAccessMask, VK_ACCESS_HOST_READ_BIT) ) { return; }
 	lock_guard<mutex> lock(_rezLock);
 	for (auto& rez : _resources) {
-		rez->applyMemoryBarrier(srcStageMask, dstStageMask, barrier, cmdEncoder, cmdUse);
+		rez->applyMemoryBarrier(barrier, cmdEncoder, cmdUse);
 	}
 }

-void MVKDevice::updateActivityPerformance(MVKPerformanceTracker& activity,
-										  uint64_t startTime, uint64_t endTime) {
-
-	double currInterval = mvkGetElapsedMilliseconds(startTime, endTime);
+void MVKDevice::updateActivityPerformance(MVKPerformanceTracker& activity, double currentValue) {
 	lock_guard<mutex> lock(_perfLock);

-	activity.latestDuration = currInterval;
-	activity.minimumDuration = ((activity.minimumDuration == 0.0)
-								? currInterval :
-								min(currInterval, activity.minimumDuration));
-	activity.maximumDuration = max(currInterval, activity.maximumDuration);
-	double totalInterval = (activity.averageDuration * activity.count++) + currInterval;
-	activity.averageDuration = totalInterval / activity.count;
+	activity.latest = currentValue;
+	activity.minimum = ((activity.minimum == 0.0)
+								? currentValue :
+								min(currentValue, activity.minimum));
+	activity.maximum = max(currentValue, activity.maximum);
+	double total = (activity.average * activity.count++) + currentValue;
+	activity.average = total / activity.count;
+
+	if (_isPerformanceTracking && getMVKConfig().activityPerformanceLoggingStyle == MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_IMMEDIATE) {
+		logActivityInline(activity, _performanceStatistics);
+	}
 }

-void MVKDevice::logActivityPerformance(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats, bool isInline) {
-	MVKLogInfo("%s%s%s avg: %.3f ms, latest: %.3f ms, min: %.3f ms, max: %.3f ms, count: %d",
-			   (isInline ? "" : "  "),
+void MVKDevice::logActivityInline(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats) {
+	if (getActivityPerformanceValueType(activity, _performanceStatistics) == MVKActivityPerformanceValueTypeByteCount) {
+		logActivityByteCount(activity, _performanceStatistics, true);
+	} else {
+		logActivityDuration(activity, _performanceStatistics, true);
+	}
+}
+void MVKDevice::logActivityDuration(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats, bool isInline) {
+	const char* fmt = (isInline
+					   ? "%s performance avg: %.3f ms, latest: %.3f ms, min: %.3f ms, max: %.3f ms, count: %d"
+					   : "  %-45s avg: %.3f ms, latest: %.3f ms, min: %.3f ms, max: %.3f ms, count: %d");
+	MVKLogInfo(fmt,
 			   getActivityPerformanceDescription(activity, perfStats),
-			   (isInline ? " performance" : ""),
-			   activity.averageDuration,
-			   activity.latestDuration,
-			   activity.minimumDuration,
-			   activity.maximumDuration,
+			   activity.average,
+			   activity.latest,
+			   activity.minimum,
+			   activity.maximum,
+			   activity.count);
+}
+
+void MVKDevice::logActivityByteCount(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats, bool isInline) {
+	const char* fmt = (isInline
+					   ? "%s avg: %5llu MB, latest: %5llu MB, min: %5llu MB, max: %5llu MB, count: %d"
+					   : "  %-45s avg: %5llu MB, latest: %5llu MB, min: %5llu MB, max: %5llu MB, count: %d");
+	MVKLogInfo(fmt,
+			   getActivityPerformanceDescription(activity, perfStats),
+			   uint64_t(activity.average) / KIBI,
+			   uint64_t(activity.latest) / KIBI,
+			   uint64_t(activity.minimum) / KIBI,
+			   uint64_t(activity.maximum) / KIBI,
 			   activity.count);
 }

@ -4218,49 +4302,71 @@ void MVKDevice::logPerformanceSummary() {
 	MVKPerformanceStatistics perfStats;
 	getPerformanceStatistics(&perfStats);

-	logActivityPerformance(perfStats.queue.frameInterval, perfStats);
-	logActivityPerformance(perfStats.queue.nextCAMetalDrawable, perfStats);
-	logActivityPerformance(perfStats.queue.mtlCommandBufferCompletion, perfStats);
-	logActivityPerformance(perfStats.queue.mtlQueueAccess, perfStats);
-	logActivityPerformance(perfStats.shaderCompilation.hashShaderCode, perfStats);
-	logActivityPerformance(perfStats.shaderCompilation.spirvToMSL, perfStats);
-	logActivityPerformance(perfStats.shaderCompilation.mslCompile, perfStats);
-	logActivityPerformance(perfStats.shaderCompilation.mslLoad, perfStats);
-	logActivityPerformance(perfStats.shaderCompilation.mslCompress, perfStats);
-	logActivityPerformance(perfStats.shaderCompilation.mslDecompress, perfStats);
-	logActivityPerformance(perfStats.shaderCompilation.shaderLibraryFromCache, perfStats);
-	logActivityPerformance(perfStats.shaderCompilation.functionRetrieval, perfStats);
-	logActivityPerformance(perfStats.shaderCompilation.functionSpecialization, perfStats);
-	logActivityPerformance(perfStats.shaderCompilation.pipelineCompile, perfStats);
-	logActivityPerformance(perfStats.pipelineCache.sizePipelineCache, perfStats);
-	logActivityPerformance(perfStats.pipelineCache.readPipelineCache, perfStats);
-	logActivityPerformance(perfStats.pipelineCache.writePipelineCache, perfStats);
+#define logDuration(s)   logActivityDuration(perfStats.s, perfStats)
+#define logByteCount(s)  logActivityByteCount(perfStats.s, perfStats)
+
+	logDuration(queue.frameInterval);
+	logDuration(queue.retrieveMTLCommandBuffer);
+	logDuration(queue.commandBufferEncoding);
+	logDuration(queue.submitCommandBuffers);
+	logDuration(queue.mtlCommandBufferExecution);
+	logDuration(queue.retrieveCAMetalDrawable);
+	logDuration(queue.presentSwapchains);
+	logDuration(shaderCompilation.hashShaderCode);
+	logDuration(shaderCompilation.spirvToMSL);
+	logDuration(shaderCompilation.mslCompile);
+	logDuration(shaderCompilation.mslLoad);
+	logDuration(shaderCompilation.mslCompress);
+	logDuration(shaderCompilation.mslDecompress);
+	logDuration(shaderCompilation.shaderLibraryFromCache);
+	logDuration(shaderCompilation.functionRetrieval);
+	logDuration(shaderCompilation.functionSpecialization);
+	logDuration(shaderCompilation.pipelineCompile);
+	logDuration(pipelineCache.sizePipelineCache);
+	logDuration(pipelineCache.readPipelineCache);
+	logDuration(pipelineCache.writePipelineCache);
+	logByteCount(device.gpuMemoryAllocated);
+#undef logDuration
+#undef logByteCount
 }

 const char* MVKDevice::getActivityPerformanceDescription(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats) {
-	if (&activity == &perfStats.shaderCompilation.hashShaderCode) { return "Hash shader SPIR-V code"; }
-	if (&activity == &perfStats.shaderCompilation.spirvToMSL) { return "Convert SPIR-V to MSL source code"; }
-	if (&activity == &perfStats.shaderCompilation.mslCompile) { return "Compile MSL source code into a MTLLibrary"; }
-	if (&activity == &perfStats.shaderCompilation.mslLoad) { return "Load pre-compiled MSL code into a MTLLibrary"; }
-	if (&activity == &perfStats.shaderCompilation.mslCompress) { return "Compress MSL source code after compiling a MTLLibrary"; }
-	if (&activity == &perfStats.shaderCompilation.mslDecompress) { return "Decompress MSL source code during pipeline cache write"; }
-	if (&activity == &perfStats.shaderCompilation.shaderLibraryFromCache) { return "Retrieve shader library from the cache"; }
-	if (&activity == &perfStats.shaderCompilation.functionRetrieval) { return "Retrieve a MTLFunction from a MTLLibrary"; }
-	if (&activity == &perfStats.shaderCompilation.functionSpecialization) { return "Specialize a retrieved MTLFunction"; }
-	if (&activity == &perfStats.shaderCompilation.pipelineCompile) { return "Compile MTLFunctions into a pipeline"; }
-	if (&activity == &perfStats.pipelineCache.sizePipelineCache) { return "Calculate cache size required to write MSL to pipeline cache"; }
-	if (&activity == &perfStats.pipelineCache.readPipelineCache) { return "Read MSL from pipeline cache"; }
-	if (&activity == &perfStats.pipelineCache.writePipelineCache) { return "Write MSL to pipeline cache"; }
-	if (&activity == &perfStats.queue.mtlQueueAccess) { return "Access MTLCommandQueue"; }
-	if (&activity == &perfStats.queue.mtlCommandBufferCompletion) { return "Complete MTLCommandBuffer"; }
-	if (&activity == &perfStats.queue.nextCAMetalDrawable) { return "Retrieve a CAMetalDrawable from CAMetalLayer"; }
-	if (&activity == &perfStats.queue.frameInterval) { return "Frame interval"; }
-	return "Unknown performance activity";
+#define ifActivityReturnName(s, n)  if (&activity == &perfStats.s) return n
+	ifActivityReturnName(shaderCompilation.hashShaderCode,         "Hash shader SPIR-V code");
+	ifActivityReturnName(shaderCompilation.spirvToMSL,             "Convert SPIR-V to MSL source code");
+	ifActivityReturnName(shaderCompilation.mslCompile,             "Compile MSL into a MTLLibrary");
+	ifActivityReturnName(shaderCompilation.mslLoad,                "Load pre-compiled MSL into a MTLLibrary");
+	ifActivityReturnName(shaderCompilation.mslCompress,            "Compress MSL after compiling a MTLLibrary");
+	ifActivityReturnName(shaderCompilation.mslDecompress,          "Decompress MSL for pipeline cache write");
+	ifActivityReturnName(shaderCompilation.shaderLibraryFromCache, "Retrieve shader library from the cache");
+	ifActivityReturnName(shaderCompilation.functionRetrieval,      "Retrieve a MTLFunction from a MTLLibrary");
+	ifActivityReturnName(shaderCompilation.functionSpecialization, "Specialize a retrieved MTLFunction");
+	ifActivityReturnName(shaderCompilation.pipelineCompile,        "Compile MTLFunctions into a pipeline");
+	ifActivityReturnName(pipelineCache.sizePipelineCache,          "Calculate pipeline cache size");
+	ifActivityReturnName(pipelineCache.readPipelineCache,          "Read MSL from pipeline cache");
+	ifActivityReturnName(pipelineCache.writePipelineCache,         "Write MSL to pipeline cache");
+	ifActivityReturnName(queue.retrieveMTLCommandBuffer,           "Retrieve a MTLCommandBuffer");
+	ifActivityReturnName(queue.commandBufferEncoding,              "Encode VkCommandBuffer to MTLCommandBuffer");
+	ifActivityReturnName(queue.submitCommandBuffers,               "vkQueueSubmit() encoding to MTLCommandBuffers");
+	ifActivityReturnName(queue.mtlCommandBufferExecution,          "Execute a MTLCommandBuffer on GPU");
+	ifActivityReturnName(queue.retrieveCAMetalDrawable,            "Retrieve a CAMetalDrawable");
+	ifActivityReturnName(queue.presentSwapchains,                  "Present swapchains in on GPU");
+	ifActivityReturnName(queue.frameInterval,                      "Frame interval");
+	ifActivityReturnName(device.gpuMemoryAllocated,                "GPU memory allocated");
+	return                                                         "Unknown performance activity";
+#undef ifActivityReturnName
+}
+
+MVKActivityPerformanceValueType MVKDevice::getActivityPerformanceValueType(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats) {
+	if (&activity == &perfStats.device.gpuMemoryAllocated) return MVKActivityPerformanceValueTypeByteCount;
+	return MVKActivityPerformanceValueTypeDuration;
 }

 void MVKDevice::getPerformanceStatistics(MVKPerformanceStatistics* pPerf) {
-    lock_guard<mutex> lock(_perfLock);
+	addPerformanceByteCount(_performanceStatistics.device.gpuMemoryAllocated,
+							_physicalDevice->getCurrentAllocatedSize());

+	lock_guard<mutex> lock(_perfLock);
    if (pPerf) { *pPerf = _performanceStatistics; }
 }

@ -4597,33 +4703,15 @@ MVKDevice::MVKDevice(MVKPhysicalDevice* physicalDevice, const VkDeviceCreateInfo
 	startAutoGPUCapture(MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_DEVICE, getMTLDevice());

 	MVKLogInfo("Created VkDevice to run on GPU %s with the following %d Vulkan extensions enabled:%s",
-			   _pProperties->deviceName,
-			   _enabledExtensions.getEnabledCount(),
-			   _enabledExtensions.enabledNamesString("\n\t\t", true).c_str());
+			   getName(), _enabledExtensions.getEnabledCount(), _enabledExtensions.enabledNamesString("\n\t\t", true).c_str());
 }

+// Perf stats that last the duration of the app process.
+static MVKPerformanceStatistics _processPerformanceStatistics = {};
+
 void MVKDevice::initPerformanceTracking() {
-
 	_isPerformanceTracking = getMVKConfig().performanceTracking;
-	_activityPerformanceLoggingStyle = getMVKConfig().activityPerformanceLoggingStyle;
-
-	_performanceStatistics.shaderCompilation.hashShaderCode = {};
-    _performanceStatistics.shaderCompilation.spirvToMSL = {};
-    _performanceStatistics.shaderCompilation.mslCompile = {};
-    _performanceStatistics.shaderCompilation.mslLoad = {};
-	_performanceStatistics.shaderCompilation.mslCompress = {};
-	_performanceStatistics.shaderCompilation.mslDecompress = {};
-	_performanceStatistics.shaderCompilation.shaderLibraryFromCache = {};
-    _performanceStatistics.shaderCompilation.functionRetrieval = {};
-    _performanceStatistics.shaderCompilation.functionSpecialization = {};
-    _performanceStatistics.shaderCompilation.pipelineCompile = {};
-	_performanceStatistics.pipelineCache.sizePipelineCache = {};
-	_performanceStatistics.pipelineCache.writePipelineCache = {};
-	_performanceStatistics.pipelineCache.readPipelineCache = {};
-	_performanceStatistics.queue.mtlQueueAccess = {};
-	_performanceStatistics.queue.mtlCommandBufferCompletion = {};
-	_performanceStatistics.queue.nextCAMetalDrawable = {};
-	_performanceStatistics.queue.frameInterval = {};
+	_performanceStatistics = _processPerformanceStatistics;
 }

 void MVKDevice::initPhysicalDevice(MVKPhysicalDevice* physicalDevice, const VkDeviceCreateInfo* pCreateInfo) {
@ -4920,9 +5008,16 @@ void MVKDevice::reservePrivateData(const VkDeviceCreateInfo* pCreateInfo) {
 }

 MVKDevice::~MVKDevice() {
-	if (_activityPerformanceLoggingStyle == MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_DEVICE_LIFETIME) {
-		MVKLogInfo("Device activity performance summary:");
-		logPerformanceSummary();
+	if (_isPerformanceTracking) {
+		auto perfLogStyle = getMVKConfig().activityPerformanceLoggingStyle;
+		if (perfLogStyle == MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_DEVICE_LIFETIME) {
+			MVKLogInfo("Device activity performance summary:");
+			logPerformanceSummary();
+		} else if (perfLogStyle == MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_DEVICE_LIFETIME_ACCUMULATE) {
+			MVKLogInfo("Process activity performance summary:");
+			logPerformanceSummary();
+			_processPerformanceStatistics = _performanceStatistics;
+		}
 	}

 	for (auto& queues : _queuesByQueueFamilyIndex) {
@ -4938,12 +5033,58 @@ MVKDevice::~MVKDevice() {
 	stopAutoGPUCapture(MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_DEVICE);

 	mvkDestroyContainerContents(_privateDataSlots);
+
+	MVKLogInfo("Destroyed VkDevice on GPU %s with %d Vulkan extensions enabled.",
+			   getName(), _enabledExtensions.getEnabledCount());
 }


 #pragma mark -
 #pragma mark Support functions

+NSArray<id<MTLDevice>>* mvkGetAvailableMTLDevicesArray(MVKInstance* instance) {
+	NSMutableArray* mtlDevs = [NSMutableArray array];	// autoreleased
+
+#if MVK_MACOS
+	NSArray* rawMTLDevs = [MTLCopyAllDevices() autorelease];
+	bool forceLowPower = mvkGetMVKConfig(instance).forceLowPowerGPU;
+
+	// Populate the array of appropriate MTLDevices
+	for (id<MTLDevice> md in rawMTLDevs) {
+		if ( !forceLowPower || md.isLowPower ) { [mtlDevs addObject: md]; }
+	}
+
+	// Sort by power
+	[mtlDevs sortUsingComparator: ^(id<MTLDevice> md1, id<MTLDevice> md2) {
+		BOOL md1IsLP = md1.isLowPower;
+		BOOL md2IsLP = md2.isLowPower;
+
+		if (md1IsLP == md2IsLP) {
+			// If one device is headless and the other one is not, select the
+			// one that is not headless first.
+			BOOL md1IsHeadless = md1.isHeadless;
+			BOOL md2IsHeadless = md2.isHeadless;
+			if (md1IsHeadless == md2IsHeadless ) {
+				return NSOrderedSame;
+			}
+			return md2IsHeadless ? NSOrderedAscending : NSOrderedDescending;
+		}
+
+		return md2IsLP ? NSOrderedAscending : NSOrderedDescending;
+	}];
+
+	// If the survey found at least one device, return the array.
+	if (mtlDevs.count) { return mtlDevs; }
+
+#endif	// MVK_MACOS
+
+	// For other OS's, or for macOS if the survey returned empty, use the default device.
+	id<MTLDevice> md = [MTLCreateSystemDefaultDevice() autorelease];
+	if (md) { [mtlDevs addObject: md]; }
+
+	return mtlDevs;		// retained
+}
+
 uint64_t mvkGetRegistryID(id<MTLDevice> mtlDevice) {
 	return [mtlDevice respondsToSelector: @selector(registryID)] ? mtlDevice.registryID : 0;
 }
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceFeatureStructs.def
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceFeatureStructs.def
@ -55,6 +55,7 @@ MVK_DEVICE_FEATURE(ShaderAtomicInt64,                 SHADER_ATOMIC_INT64,
 MVK_DEVICE_FEATURE(ShaderFloat16Int8,                 SHADER_FLOAT16_INT8,                    2)
 MVK_DEVICE_FEATURE(ShaderSubgroupExtendedTypes,       SHADER_SUBGROUP_EXTENDED_TYPES,         1)
 MVK_DEVICE_FEATURE(SubgroupSizeControl,               SUBGROUP_SIZE_CONTROL,                  2)
+MVK_DEVICE_FEATURE(Synchronization2,                  SYNCHRONIZATION_2,                      1)
 MVK_DEVICE_FEATURE(TextureCompressionASTCHDR,         TEXTURE_COMPRESSION_ASTC_HDR,           1)
 MVK_DEVICE_FEATURE(TimelineSemaphore,                 TIMELINE_SEMAPHORE,                     1)
 MVK_DEVICE_FEATURE(UniformBufferStandardLayout,       UNIFORM_BUFFER_STANDARD_LAYOUT,         1)
@ -63,6 +64,9 @@ MVK_DEVICE_FEATURE(VulkanMemoryModel,                 VULKAN_MEMORY_MODEL,
 MVK_DEVICE_FEATURE_EXTN(FragmentShaderBarycentric,    FRAGMENT_SHADER_BARYCENTRIC,     KHR,   1)
 MVK_DEVICE_FEATURE_EXTN(PortabilitySubset,            PORTABILITY_SUBSET,              KHR,  15)
 MVK_DEVICE_FEATURE_EXTN(4444Formats,                  4444_FORMATS,                    EXT,   2)
+MVK_DEVICE_FEATURE_EXTN(ExtendedDynamicState,         EXTENDED_DYNAMIC_STATE,          EXT,   1)
+MVK_DEVICE_FEATURE_EXTN(ExtendedDynamicState2,        EXTENDED_DYNAMIC_STATE_2,        EXT,   3)
+MVK_DEVICE_FEATURE_EXTN(ExtendedDynamicState3,        EXTENDED_DYNAMIC_STATE_3,        EXT,  31)
 MVK_DEVICE_FEATURE_EXTN(FragmentShaderInterlock,      FRAGMENT_SHADER_INTERLOCK,       EXT,   3)
 MVK_DEVICE_FEATURE_EXTN(PipelineCreationCacheControl, PIPELINE_CREATION_CACHE_CONTROL, EXT,   1)
 MVK_DEVICE_FEATURE_EXTN(Robustness2,                  ROBUSTNESS_2,                    EXT,   3)
--- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.h
@ -31,6 +31,7 @@
 class MVKImage;
 class MVKImageView;
 class MVKSwapchain;
+class MVKQueue;
 class MVKCommandEncoder;


@ -73,9 +74,7 @@ protected:
 	bool overlaps(VkSubresourceLayout& imgLayout, VkDeviceSize offset, VkDeviceSize size);
    void propagateDebugName();
    MVKImageMemoryBinding* getMemoryBinding() const;
-	void applyImageMemoryBarrier(VkPipelineStageFlags srcStageMask,
-								 VkPipelineStageFlags dstStageMask,
-								 MVKPipelineBarrier& barrier,
+	void applyImageMemoryBarrier(MVKPipelineBarrier& barrier,
 								 MVKCommandEncoder* cmdEncoder,
 								 MVKCommandUse cmdUse);
 	void pullFromDeviceOnCompletion(MVKCommandEncoder* cmdEncoder,
@ -118,9 +117,7 @@ public:
    VkResult bindDeviceMemory(MVKDeviceMemory* mvkMem, VkDeviceSize memOffset) override;

    /** Applies the specified global memory barrier. */
-    void applyMemoryBarrier(VkPipelineStageFlags srcStageMask,
-                            VkPipelineStageFlags dstStageMask,
-                            MVKPipelineBarrier& barrier,
+    void applyMemoryBarrier(MVKPipelineBarrier& barrier,
                            MVKCommandEncoder* cmdEncoder,
                            MVKCommandUse cmdUse) override;

@ -132,9 +129,7 @@ protected:
    friend MVKImage;

    void propagateDebugName() override;
-    bool needsHostReadSync(VkPipelineStageFlags srcStageMask,
-                           VkPipelineStageFlags dstStageMask,
-                           MVKPipelineBarrier& barrier);
+    bool needsHostReadSync(MVKPipelineBarrier& barrier);
    bool shouldFlushHostMemory();
    VkResult flushToDevice(VkDeviceSize offset, VkDeviceSize size);
    VkResult pullFromDevice(VkDeviceSize offset, VkDeviceSize size);
@ -250,9 +245,7 @@ public:
 	virtual VkResult bindDeviceMemory2(const VkBindImageMemoryInfo* pBindInfo);

 	/** Applies the specified image memory barrier. */
-	void applyImageMemoryBarrier(VkPipelineStageFlags srcStageMask,
-								 VkPipelineStageFlags dstStageMask,
-								 MVKPipelineBarrier& barrier,
+	void applyImageMemoryBarrier(MVKPipelineBarrier& barrier,
 								 MVKCommandEncoder* cmdEncoder,
 								 MVKCommandUse cmdUse);

@ -385,14 +378,8 @@ class MVKSwapchainImage : public MVKImage {

 public:

-	/** Binds this resource to the specified offset within the specified memory allocation. */
 	VkResult bindDeviceMemory(MVKDeviceMemory* mvkMem, VkDeviceSize memOffset, uint8_t planeIndex) override;

-#pragma mark Metal
-
-	/** Returns the Metal texture used by the CAMetalDrawable underlying this image. */
-	id<MTLTexture> getMTLTexture(uint8_t planeIndex) override;
-

 #pragma mark Construction

@ -406,11 +393,10 @@ public:
 protected:
 	friend class MVKPeerSwapchainImage;

-	virtual id<CAMetalDrawable> getCAMetalDrawable() = 0;
 	void detachSwapchain();

+	std::mutex _detachmentLock;
 	MVKSwapchain* _swapchain;
-	std::mutex _swapchainLock;
 	uint32_t _swapchainIndex;
 };

@ -429,6 +415,7 @@ typedef struct MVKSwapchainImageAvailability {
 /** Presentation info. */
 typedef struct  {
 	MVKPresentableSwapchainImage* presentableImage;
+	MVKQueue* queue;				// The queue on which the vkQueuePresentKHR() command was executed.
 	MVKFence* fence;				// VK_EXT_swapchain_maintenance1 fence signaled when resources can be destroyed
 	uint64_t desiredPresentTime;  	// VK_GOOGLE_display_timing desired presentation time in nanoseconds
 	uint32_t presentID;           	// VK_GOOGLE_display_timing presentID
@ -451,35 +438,46 @@ public:

 #pragma mark Metal

-	/** Presents the contained drawable to the OS. */
-	void presentCAMetalDrawable(id<MTLCommandBuffer> mtlCmdBuff, MVKImagePresentInfo presentInfo);
+	id<MTLTexture> getMTLTexture(uint8_t planeIndex) override;

+	/** Presents the contained drawable to the OS. */
+	VkResult presentCAMetalDrawable(id<MTLCommandBuffer> mtlCmdBuff, MVKImagePresentInfo presentInfo);
+
+	/** Called when the presentation begins. */
+	void beginPresentation(const MVKImagePresentInfo& presentInfo);
+
+	/** Called via callback when the presentation completes. */
+	void endPresentation(const MVKImagePresentInfo& presentInfo,
+						 const MVKSwapchainSignaler& signaler,
+						 uint64_t actualPresentTime = 0);

 #pragma mark Construction

 	MVKPresentableSwapchainImage(MVKDevice* device, const VkImageCreateInfo* pCreateInfo,
 								 MVKSwapchain* swapchain, uint32_t swapchainIndex);

+	void destroy() override;
+
 	~MVKPresentableSwapchainImage() override;

 protected:
 	friend MVKSwapchain;

-	id<CAMetalDrawable> getCAMetalDrawable() override;
-	void addPresentedHandler(id<CAMetalDrawable> mtlDrawable, MVKImagePresentInfo presentInfo);
+	id<CAMetalDrawable> getCAMetalDrawable();
+	void addPresentedHandler(id<CAMetalDrawable> mtlDrawable, MVKImagePresentInfo presentInfo, MVKSwapchainSignaler signaler);
 	void releaseMetalDrawable();
 	MVKSwapchainImageAvailability getAvailability();
-	void makeAvailable(const MVKSwapchainSignaler& signaler);
 	void makeAvailable();
-	void acquireAndSignalWhenAvailable(MVKSemaphore* semaphore, MVKFence* fence);
-	void renderWatermark(id<MTLCommandBuffer> mtlCmdBuff);
+	VkResult acquireAndSignalWhenAvailable(MVKSemaphore* semaphore, MVKFence* fence);
+	MVKSwapchainSignaler getPresentationSignaler();

-	id<CAMetalDrawable> _mtlDrawable;
-	id<MTLCommandBuffer> _presentingMTLCmdBuff;
+	id<CAMetalDrawable> _mtlDrawable = nil;
+	id<MTLTexture> _mtlTextureHeadless = nil;
 	MVKSwapchainImageAvailability _availability;
 	MVKSmallVector<MVKSwapchainSignaler, 1> _availabilitySignalers;
-	MVKSwapchainSignaler _preSignaler;
+	MVKSwapchainSignaler _preSignaler = {};
 	std::mutex _availabilityLock;
+	uint64_t _presentationStartTime = 0;
 };


@ -491,7 +489,8 @@ class MVKPeerSwapchainImage : public MVKSwapchainImage {

 public:

-	/** Binds this resource according to the specified bind information. */
+	id<MTLTexture> getMTLTexture(uint8_t planeIndex) override;
+
 	VkResult bindDeviceMemory2(const VkBindImageMemoryInfo* pBindInfo) override;


@ -501,10 +500,6 @@ public:
 						  const VkImageCreateInfo* pCreateInfo,
 						  MVKSwapchain* swapchain,
 						  uint32_t swapchainIndex);
-
-protected:
-	id<CAMetalDrawable> getCAMetalDrawable() override;
-
 };


--- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm
@ -19,13 +19,16 @@
 #include "MVKImage.h"
 #include "MVKQueue.h"
 #include "MVKSwapchain.h"
+#include "MVKSurface.h"
 #include "MVKCommandBuffer.h"
 #include "MVKCmdDebug.h"
 #include "MVKFoundation.h"
 #include "MVKOSExtensions.h"
 #include "MVKCodec.h"
+
 #import "MTLTextureDescriptor+MoltenVK.h"
 #import "MTLSamplerDescriptor+MoltenVK.h"
+#import "CAMetalLayer+MoltenVK.h"

 using namespace std;
 using namespace SPIRV_CROSS_NAMESPACE;
@ -303,9 +306,7 @@ MVKImageMemoryBinding* MVKImagePlane::getMemoryBinding() const {
    return (_image->_memoryBindings.size() > 1) ? _image->_memoryBindings[_planeIndex] : _image->_memoryBindings[0];
 }

-void MVKImagePlane::applyImageMemoryBarrier(VkPipelineStageFlags srcStageMask,
-											VkPipelineStageFlags dstStageMask,
-											MVKPipelineBarrier& barrier,
+void MVKImagePlane::applyImageMemoryBarrier(MVKPipelineBarrier& barrier,
 											MVKCommandEncoder* cmdEncoder,
 											MVKCommandUse cmdUse) {

@ -322,7 +323,7 @@ void MVKImagePlane::applyImageMemoryBarrier(VkPipelineStageFlags srcStageMask,
 						 : (layerStart + barrier.layerCount));

 	MVKImageMemoryBinding* memBind = getMemoryBinding();
-	bool needsSync = memBind->needsHostReadSync(srcStageMask, dstStageMask, barrier);
+	bool needsSync = memBind->needsHostReadSync(barrier);
 	bool needsPull = ((!memBind->_mtlTexelBuffer || memBind->_ownsTexelBuffer) &&
 					  memBind->isMemoryHostCoherent() &&
 					  barrier.newLayout == VK_IMAGE_LAYOUT_GENERAL &&
@ -443,13 +444,11 @@ VkResult MVKImageMemoryBinding::bindDeviceMemory(MVKDeviceMemory* mvkMem, VkDevi
    return _deviceMemory ? _deviceMemory->addImageMemoryBinding(this) : VK_SUCCESS;
 }

-void MVKImageMemoryBinding::applyMemoryBarrier(VkPipelineStageFlags srcStageMask,
-                                               VkPipelineStageFlags dstStageMask,
-                                               MVKPipelineBarrier& barrier,
+void MVKImageMemoryBinding::applyMemoryBarrier(MVKPipelineBarrier& barrier,
                                               MVKCommandEncoder* cmdEncoder,
                                               MVKCommandUse cmdUse) {
 #if MVK_MACOS
-    if ( needsHostReadSync(srcStageMask, dstStageMask, barrier) ) {
+    if (needsHostReadSync(barrier)) {
        for(uint8_t planeIndex = beginPlaneIndex(); planeIndex < endPlaneIndex(); planeIndex++) {
            [cmdEncoder->getMTLBlitEncoder(cmdUse) synchronizeResource: _image->_planes[planeIndex]->_mtlTexture];
        }
@ -468,9 +467,7 @@ void MVKImageMemoryBinding::propagateDebugName() {

 // Returns whether the specified image memory barrier requires a sync between this
 // texture and host memory for the purpose of the host reading texture memory.
-bool MVKImageMemoryBinding::needsHostReadSync(VkPipelineStageFlags srcStageMask,
-                                              VkPipelineStageFlags dstStageMask,
-                                              MVKPipelineBarrier& barrier) {
+bool MVKImageMemoryBinding::needsHostReadSync(MVKPipelineBarrier& barrier) {
 #if MVK_MACOS
    return ((barrier.newLayout == VK_IMAGE_LAYOUT_GENERAL) &&
            mvkIsAnyFlagEnabled(barrier.dstAccessMask, (VK_ACCESS_HOST_READ_BIT | VK_ACCESS_MEMORY_READ_BIT)) &&
@ -624,15 +621,13 @@ bool MVKImage::getIsValidViewFormat(VkFormat viewFormat) {

 #pragma mark Resource memory

-void MVKImage::applyImageMemoryBarrier(VkPipelineStageFlags srcStageMask,
-									   VkPipelineStageFlags dstStageMask,
-									   MVKPipelineBarrier& barrier,
+void MVKImage::applyImageMemoryBarrier(MVKPipelineBarrier& barrier,
 									   MVKCommandEncoder* cmdEncoder,
 									   MVKCommandUse cmdUse) {

 	for (uint8_t planeIndex = 0; planeIndex < _planes.size(); planeIndex++) {
 		if ( !_hasChromaSubsampling || mvkIsAnyFlagEnabled(barrier.aspectMask, (VK_IMAGE_ASPECT_PLANE_0_BIT << planeIndex)) ) {
-			_planes[planeIndex]->applyImageMemoryBarrier(srcStageMask, dstStageMask, barrier, cmdEncoder, cmdUse);
+			_planes[planeIndex]->applyImageMemoryBarrier(barrier, cmdEncoder, cmdUse);
 		}
    }
 }
@ -1149,6 +1144,7 @@ bool MVKImage::validateLinear(const VkImageCreateInfo* pCreateInfo, bool isAttac
 }

 void MVKImage::initExternalMemory(VkExternalMemoryHandleTypeFlags handleTypes) {
+	if ( !handleTypes ) { return; }
 	if (mvkIsOnlyAnyFlagEnabled(handleTypes, VK_EXTERNAL_MEMORY_HANDLE_TYPE_MTLTEXTURE_BIT_KHR)) {
        auto& xmProps = getPhysicalDevice()->getExternalImageProperties(VK_EXTERNAL_MEMORY_HANDLE_TYPE_MTLTEXTURE_BIT_KHR);
        for(auto& memoryBinding : _memoryBindings) {
@ -1175,12 +1171,6 @@ VkResult MVKSwapchainImage::bindDeviceMemory(MVKDeviceMemory* mvkMem, VkDeviceSi
 }


-#pragma mark Metal
-
-// Overridden to always retrieve the MTLTexture directly from the CAMetalDrawable.
-id<MTLTexture> MVKSwapchainImage::getMTLTexture(uint8_t planeIndex) { return [getCAMetalDrawable() texture]; }
-
-
 #pragma mark Construction

 MVKSwapchainImage::MVKSwapchainImage(MVKDevice* device,
@ -1192,8 +1182,9 @@ MVKSwapchainImage::MVKSwapchainImage(MVKDevice* device,
 }

 void MVKSwapchainImage::detachSwapchain() {
-	lock_guard<mutex> lock(_swapchainLock);
+	lock_guard<mutex> lock(_detachmentLock);
 	_swapchain = nullptr;
+	_device = nullptr;
 }

 void MVKSwapchainImage::destroy() {
@ -1217,58 +1208,57 @@ MVKSwapchainImageAvailability MVKPresentableSwapchainImage::getAvailability() {
 	return _availability;
 }

-// If present, signal the semaphore for the first waiter for the given image.
-static void signalPresentationSemaphore(const MVKSwapchainSignaler& signaler, id<MTLCommandBuffer> mtlCmdBuff) {
-	if (signaler.semaphore) { signaler.semaphore->encodeDeferredSignal(mtlCmdBuff, signaler.semaphoreSignalToken); }
-}
-
-// Signal either or both of the semaphore and fence in the specified tracker pair.
-static void signal(const MVKSwapchainSignaler& signaler, id<MTLCommandBuffer> mtlCmdBuff) {
-	if (signaler.semaphore) { signaler.semaphore->encodeDeferredSignal(mtlCmdBuff, signaler.semaphoreSignalToken); }
-	if (signaler.fence) { signaler.fence->signal(); }
-}
-
 // Tell the semaphore and fence that they are being tracked for future signaling.
-static void markAsTracked(const MVKSwapchainSignaler& signaler) {
+static void track(const MVKSwapchainSignaler& signaler) {
 	if (signaler.semaphore) { signaler.semaphore->retain(); }
 	if (signaler.fence) { signaler.fence->retain(); }
 }

-// Tell the semaphore and fence that they are no longer being tracked for future signaling.
-static void unmarkAsTracked(const MVKSwapchainSignaler& signaler) {
+static void signal(MVKSemaphore* semaphore, uint64_t semaphoreSignalToken, id<MTLCommandBuffer> mtlCmdBuff) {
+	if (semaphore) { semaphore->encodeDeferredSignal(mtlCmdBuff, semaphoreSignalToken); }
+}
+
+static void signal(MVKFence* fence) {
+	if (fence) { fence->signal(); }
+}
+
+// Signal the semaphore and fence and tell them that they are no longer being tracked for future signaling.
+static void signalAndUntrack(const MVKSwapchainSignaler& signaler) {
+	signal(signaler.semaphore, signaler.semaphoreSignalToken, nil);
 	if (signaler.semaphore) { signaler.semaphore->release(); }
+
+	signal(signaler.fence);
 	if (signaler.fence) { signaler.fence->release(); }
 }

-static void signalAndUnmarkAsTracked(const MVKSwapchainSignaler& signaler) {
-	signal(signaler, nil);
-	unmarkAsTracked(signaler);
-}
+VkResult MVKPresentableSwapchainImage::acquireAndSignalWhenAvailable(MVKSemaphore* semaphore, MVKFence* fence) {
+
+	// Now that this image is being acquired, release the existing drawable and its texture.
+	// This is not done earlier so the texture is retained for any post-processing such as screen captures, etc.
+	releaseMetalDrawable();

-void MVKPresentableSwapchainImage::acquireAndSignalWhenAvailable(MVKSemaphore* semaphore, MVKFence* fence) {
 	lock_guard<mutex> lock(_availabilityLock);

 	// Upon acquisition, update acquisition ID immediately, to move it to the back of the chain,
 	// so other images will be preferred if either all images are available or no images are available.
 	_availability.acquisitionID = _swapchain->getNextAcquisitionID();

-	// Now that this image is being acquired, release the existing drawable and its texture.
-	// This is not done earlier so the texture is retained for any post-processing such as screen captures, etc.
-	releaseMetalDrawable();
-
 	auto signaler = MVKSwapchainSignaler{fence, semaphore, semaphore ? semaphore->deferSignal() : 0};
 	if (_availability.isAvailable) {
 		_availability.isAvailable = false;

-		// If signalling through a MTLEvent, and there's no command buffer presenting me, use an ephemeral MTLCommandBuffer.
+		// If signalling through a MTLEvent, signal through an ephemeral MTLCommandBuffer.
 		// Another option would be to use MTLSharedEvent in MVKSemaphore, but that might
 		// impose unacceptable performance costs to handle this particular case.
 		@autoreleasepool {
 			MVKSemaphore* mvkSem = signaler.semaphore;
-			id<MTLCommandBuffer> mtlCmdBuff = (mvkSem && mvkSem->isUsingCommandEncoding()
-											   ? _device->getAnyQueue()->getMTLCommandBuffer(kMVKCommandUseAcquireNextImage)
-											   : nil);
-			signal(signaler, mtlCmdBuff);
+			id<MTLCommandBuffer> mtlCmdBuff = nil;
+			if (mvkSem && mvkSem->isUsingCommandEncoding()) {
+				mtlCmdBuff = _device->getAnyQueue()->getMTLCommandBuffer(kMVKCommandUseAcquireNextImage);
+				if ( !mtlCmdBuff ) { setConfigurationResult(VK_ERROR_OUT_OF_POOL_MEMORY); }
+			}
+			signal(signaler.semaphore, signaler.semaphoreSignalToken, mtlCmdBuff);
+			signal(signaler.fence);
 			[mtlCmdBuff commit];
 		}

@ -1276,45 +1266,65 @@ void MVKPresentableSwapchainImage::acquireAndSignalWhenAvailable(MVKSemaphore* s
 	} else {
 		_availabilitySignalers.push_back(signaler);
 	}
-	markAsTracked(signaler);
+	track(signaler);
+
+	return getConfigurationResult();
 }

+// Calling nextDrawable may result in a nil drawable, or a drawable with no pixel format.
+// Attempt several times to retrieve a good drawable, and set an error to trigger the
+// swapchain to be re-established if one cannot be retrieved.
 id<CAMetalDrawable> MVKPresentableSwapchainImage::getCAMetalDrawable() {
-	while ( !_mtlDrawable ) {
-		@autoreleasepool {      // Reclaim auto-released drawable object before end of loop
-			uint64_t startTime = _device->getPerformanceTimestamp();

-			_mtlDrawable = [_swapchain->_mtlLayer.nextDrawable retain];
-			if ( !_mtlDrawable ) { MVKLogError("CAMetalDrawable could not be acquired."); }
+	if (_mtlTextureHeadless) { return nil; }	// If headless, there is no drawable.

-			_device->addActivityPerformance(_device->_performanceStatistics.queue.nextCAMetalDrawable, startTime);
+	if ( !_mtlDrawable ) {
+		@autoreleasepool {
+			bool hasInvalidFormat = false;
+			uint32_t attemptCnt = _swapchain->getImageCount();	// Attempt a resonable number of times
+			for (uint32_t attemptIdx = 0; !_mtlDrawable && attemptIdx < attemptCnt; attemptIdx++) {
+				uint64_t startTime = _device->getPerformanceTimestamp();
+				_mtlDrawable = [_swapchain->getCAMetalLayer().nextDrawable retain];	// retained
+				_device->addPerformanceInterval(_device->_performanceStatistics.queue.retrieveCAMetalDrawable, startTime);
+				hasInvalidFormat = _mtlDrawable && !_mtlDrawable.texture.pixelFormat;
+				if (hasInvalidFormat) { releaseMetalDrawable(); }
+			}
+			if (hasInvalidFormat) {
+				setConfigurationResult(reportError(VK_ERROR_OUT_OF_DATE_KHR, "CAMetalDrawable with valid format could not be acquired after %d attempts.", attemptCnt));
+			} else if ( !_mtlDrawable ) {
+				setConfigurationResult(reportError(VK_ERROR_OUT_OF_POOL_MEMORY, "CAMetalDrawable could not be acquired after %d attempts.", attemptCnt));
+			}
 		}
 	}
 	return _mtlDrawable;
 }

+// If not headless, retrieve the MTLTexture directly from the CAMetalDrawable.
+id<MTLTexture> MVKPresentableSwapchainImage::getMTLTexture(uint8_t planeIndex) {
+	return _mtlTextureHeadless ? _mtlTextureHeadless : getCAMetalDrawable().texture;
+}
+
 // Present the drawable and make myself available only once the command buffer has completed.
 // Pass MVKImagePresentInfo by value because it may not exist when the callback runs.
-void MVKPresentableSwapchainImage::presentCAMetalDrawable(id<MTLCommandBuffer> mtlCmdBuff,
-														  MVKImagePresentInfo presentInfo) {
-	lock_guard<mutex> lock(_availabilityLock);
-
-	_swapchain->willPresentSurface(getMTLTexture(0), mtlCmdBuff);
+VkResult MVKPresentableSwapchainImage::presentCAMetalDrawable(id<MTLCommandBuffer> mtlCmdBuff,
+															  MVKImagePresentInfo presentInfo) {
+	_swapchain->renderWatermark(getMTLTexture(0), mtlCmdBuff);

 	// According to Apple, it is more performant to call MTLDrawable present from within a
 	// MTLCommandBuffer scheduled-handler than it is to call MTLCommandBuffer presentDrawable:.
 	// But get current drawable now, intead of in handler, because a new drawable might be acquired by then.
 	// Attach present handler before presenting to avoid race condition.
 	id<CAMetalDrawable> mtlDrwbl = getCAMetalDrawable();
+	MVKSwapchainSignaler signaler = getPresentationSignaler();
 	[mtlCmdBuff addScheduledHandler: ^(id<MTLCommandBuffer> mcb) {
+
+		addPresentedHandler(mtlDrwbl, presentInfo, signaler);
+
 		// Try to do any present mode transitions as late as possible in an attempt
 		// to avoid visual disruptions on any presents already on the queue.
 		if (presentInfo.presentMode != VK_PRESENT_MODE_MAX_ENUM_KHR) {
 			mtlDrwbl.layer.displaySyncEnabledMVK = (presentInfo.presentMode != VK_PRESENT_MODE_IMMEDIATE_KHR);
 		}
-		if (presentInfo.hasPresentTime) {
-			addPresentedHandler(mtlDrwbl, presentInfo);
-		}
 		if (presentInfo.desiredPresentTime) {
 			[mtlDrwbl presentAtTime: (double)presentInfo.desiredPresentTime * 1.0e-9];
 		} else {
@ -1322,7 +1332,30 @@ void MVKPresentableSwapchainImage::presentCAMetalDrawable(id<MTLCommandBuffer> m
 		}
 	}];

-	MVKSwapchainSignaler signaler;
+	// Ensure this image, the drawable, and the present fence are not destroyed while
+	// awaiting MTLCommandBuffer completion. We retain the drawable separately because
+	// a new drawable might be acquired by this image by then.
+	// Signal the fence from this callback, because the last one or two presentation
+	// completion callbacks can occasionally stall.
+	retain();
+	[mtlDrwbl retain];
+	auto* fence = presentInfo.fence;
+	if (fence) { fence->retain(); }
+	[mtlCmdBuff addCompletedHandler: ^(id<MTLCommandBuffer> mcb) {
+		signal(fence);
+		if (fence) { fence->release(); }
+		[mtlDrwbl release];
+		release();
+	}];
+
+	signal(signaler.semaphore, signaler.semaphoreSignalToken, mtlCmdBuff);
+
+	return getConfigurationResult();
+}
+
+MVKSwapchainSignaler MVKPresentableSwapchainImage::getPresentationSignaler() {
+	lock_guard<mutex> lock(_availabilityLock);
+
 	// Mark this image as available if no semaphores or fences are waiting to be signaled.
 	_availability.isAvailable = _availabilitySignalers.empty();
 	if (_availability.isAvailable) {
@ -1331,93 +1364,91 @@ void MVKPresentableSwapchainImage::presentCAMetalDrawable(id<MTLCommandBuffer> m
 		// when an app uses a single semaphore or fence for more than one swapchain image.
 		// Because the semaphore or fence will be signaled by more than one image, it will
 		// get out of sync, and the final use of the image would not be signaled as a result.
-		signaler = _preSignaler;
+		return _preSignaler;
 	} else {
 		// If this image is not yet available, extract and signal the first semaphore and fence.
+		MVKSwapchainSignaler signaler;
 		auto sigIter = _availabilitySignalers.begin();
 		signaler = *sigIter;
 		_availabilitySignalers.erase(sigIter);
+		return signaler;
 	}
-
-	// Ensure this image, the drawable, and the present fence are not destroyed while
-	// awaiting MTLCommandBuffer completion. We retain the drawable separately because
-	// a new drawable might be acquired by this image by then.
-	retain();
-	[mtlDrwbl retain];
-	auto* fence = presentInfo.fence;
-	if (fence) { fence->retain(); }
-	[mtlCmdBuff addCompletedHandler: ^(id<MTLCommandBuffer> mcb) {
-		[mtlDrwbl release];
-		makeAvailable(signaler);
-		release();
-		if (fence) {
-			fence->signal();
-			fence->release();
-		}
-	}];
-
-	signalPresentationSemaphore(signaler, mtlCmdBuff);
 }

-// Pass MVKImagePresentInfo by value because it may not exist when the callback runs.
+// Pass MVKImagePresentInfo & MVKSwapchainSignaler by value because they may not exist when the callback runs.
 void MVKPresentableSwapchainImage::addPresentedHandler(id<CAMetalDrawable> mtlDrawable,
-													   MVKImagePresentInfo presentInfo) {
+													   MVKImagePresentInfo presentInfo,
+													   MVKSwapchainSignaler signaler) {
+	beginPresentation(presentInfo);
+
 #if !MVK_OS_SIMULATOR
 	if ([mtlDrawable respondsToSelector: @selector(addPresentedHandler:)]) {
-		retain();	// Ensure this image is not destroyed while awaiting presentation
-		[mtlDrawable addPresentedHandler: ^(id<MTLDrawable> drawable) {
-			// Since we're in a callback, it's possible that the swapchain has been released by now.
-			// Lock the swapchain, and test if it is present before doing anything with it.
-			lock_guard<mutex> cblock(_swapchainLock);
-			if (_swapchain) { _swapchain->recordPresentTime(presentInfo, drawable.presentedTime * 1.0e9); }
-			release();
+		[mtlDrawable addPresentedHandler: ^(id<MTLDrawable> mtlDrwbl) {
+			endPresentation(presentInfo, signaler, mtlDrwbl.presentedTime * 1.0e9);
 		}];
-		return;
-	}
+	} else
 #endif
-
-	// If MTLDrawable.presentedTime/addPresentedHandler isn't supported,
-	// treat it as if the present happened when requested.
-	// Since this function may be called in a callback, it's possible that
-	// the swapchain has been released by the time this function runs.
-	// Lock the swapchain, and test if it is present before doing anything with it.
-	lock_guard<mutex> lock(_swapchainLock);
-	if (_swapchain) {_swapchain->recordPresentTime(presentInfo); }
+	{
+		// If MTLDrawable.presentedTime/addPresentedHandler isn't supported,
+		// treat it as if the present happened when requested.
+		endPresentation(presentInfo, signaler);
+	}
 }

-// Resets the MTLTexture and CAMetalDrawable underlying this image.
+// Ensure this image and the swapchain are not destroyed while awaiting presentation
+void MVKPresentableSwapchainImage::beginPresentation(const MVKImagePresentInfo& presentInfo) {
+	retain();
+	_swapchain->beginPresentation(presentInfo);
+	_presentationStartTime = getDevice()->getPerformanceTimestamp();
+}
+
+void MVKPresentableSwapchainImage::endPresentation(const MVKImagePresentInfo& presentInfo,
+												   const MVKSwapchainSignaler& signaler,
+												   uint64_t actualPresentTime) {
+
+	// If the presentation time is not available, use the current nanosecond runtime clock,
+	// which should be reasonably accurate (sub-ms) to the presentation time. The presentation
+	// time will not be available if the presentation did not actually happen, such as when
+	// running headless, or on a test harness that is not attached to the windowing system.
+	if (actualPresentTime == 0) { actualPresentTime = mvkGetRuntimeNanoseconds(); }
+
+	{	// Scope to avoid deadlock if release() is run within detachment lock
+		// If I have become detached from the swapchain, it means the swapchain, and possibly the
+		// VkDevice, have been destroyed by the time of this callback, so do not reference them.
+		lock_guard<mutex> lock(_detachmentLock);
+		if (_device) { _device->addPerformanceInterval(_device->_performanceStatistics.queue.presentSwapchains, _presentationStartTime); }
+		if (_swapchain) { _swapchain->endPresentation(presentInfo, actualPresentTime); }
+	}
+
+	// Makes an image available for acquisition by the app.
+	// If any semaphores are waiting to be signaled when this image becomes available, the
+	// earliest semaphore is signaled, and this image remains unavailable for other uses.
+	signalAndUntrack(signaler);
+	release();
+}
+
+// Releases the CAMetalDrawable underlying this image.
 void MVKPresentableSwapchainImage::releaseMetalDrawable() {
-    for (uint8_t planeIndex = 0; planeIndex < _planes.size(); ++planeIndex) {
-        _planes[planeIndex]->releaseMTLTexture();
-    }
    [_mtlDrawable release];
 	_mtlDrawable = nil;
 }

-// Makes an image available for acquisition by the app.
-// If any semaphores are waiting to be signaled when this image becomes available, the
-// earliest semaphore is signaled, and this image remains unavailable for other uses.
-void MVKPresentableSwapchainImage::makeAvailable(const MVKSwapchainSignaler& signaler) {
-	lock_guard<mutex> lock(_availabilityLock);
-
-	signalAndUnmarkAsTracked(signaler);
-}
-
 // Signal, untrack, and release any signalers that are tracking.
+// Release the drawable before the lock, as it may trigger completion callback.
 void MVKPresentableSwapchainImage::makeAvailable() {
+	releaseMetalDrawable();
 	lock_guard<mutex> lock(_availabilityLock);

 	if ( !_availability.isAvailable ) {
-		signalAndUnmarkAsTracked(_preSignaler);
+		signalAndUntrack(_preSignaler);
 		for (auto& sig : _availabilitySignalers) {
-			signalAndUnmarkAsTracked(sig);
+			signalAndUntrack(sig);
 		}
 		_availabilitySignalers.clear();
 		_availability.isAvailable = true;
 	}
 }

-
 #pragma mark Construction

 MVKPresentableSwapchainImage::MVKPresentableSwapchainImage(MVKDevice* device,
@ -1426,17 +1457,34 @@ MVKPresentableSwapchainImage::MVKPresentableSwapchainImage(MVKDevice* device,
 														   uint32_t swapchainIndex) :
 	MVKSwapchainImage(device, pCreateInfo, swapchain, swapchainIndex) {

-	_mtlDrawable = nil;
-
 	_availability.acquisitionID = _swapchain->getNextAcquisitionID();
 	_availability.isAvailable = true;
-	_preSignaler = MVKSwapchainSignaler{nullptr, nullptr, 0};
+
+	if (swapchain->isHeadless()) {
+		@autoreleasepool {
+			MTLTextureDescriptor* mtlTexDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat: getMTLPixelFormat()
+																								  width: pCreateInfo->extent.width
+																								 height: pCreateInfo->extent.height
+																							  mipmapped: NO];
+			mtlTexDesc.usageMVK = MTLTextureUsageRenderTarget;
+			mtlTexDesc.storageModeMVK = MTLStorageModePrivate;
+
+			_mtlTextureHeadless = [[getMTLDevice() newTextureWithDescriptor: mtlTexDesc] retain];	// retained
+		}
+	}
+}
+
+
+void MVKPresentableSwapchainImage::destroy() {
+	releaseMetalDrawable();
+	[_mtlTextureHeadless release];
+	_mtlTextureHeadless = nil;
+	MVKSwapchainImage::destroy();
 }

 // Unsignaled signalers will exist if this image is acquired more than it is presented.
 // Ensure they are signaled and untracked so the fences and semaphores will be released.
 MVKPresentableSwapchainImage::~MVKPresentableSwapchainImage() {
-	releaseMetalDrawable();
 	makeAvailable();
 }

@ -1464,8 +1512,8 @@ VkResult MVKPeerSwapchainImage::bindDeviceMemory2(const VkBindImageMemoryInfo* p

 #pragma mark Metal

-id<CAMetalDrawable> MVKPeerSwapchainImage::getCAMetalDrawable() {
-	return ((MVKSwapchainImage*)_swapchain->getPresentableImage(_swapchainIndex))->getCAMetalDrawable();
+id<MTLTexture> MVKPeerSwapchainImage::getMTLTexture(uint8_t planeIndex) { 
+	return ((MVKSwapchainImage*)_swapchain->getPresentableImage(_swapchainIndex))->getMTLTexture(planeIndex);
 }


@ -1627,6 +1675,14 @@ VkResult MVKImageViewPlane::initSwizzledMTLPixelFormat(const VkImageViewCreateIn
 			adjustAnyComponentSwizzleValue(a, R, A, B, G, R);
 			break;

+		case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
+			// Metal doesn't support this directly, so use a swizzle to get the ordering right.
+			adjustAnyComponentSwizzleValue(r, B, B, G, R, A);
+			adjustAnyComponentSwizzleValue(g, G, B, G, R, A);
+			adjustAnyComponentSwizzleValue(b, R, B, G, R, A);
+			adjustAnyComponentSwizzleValue(a, A, B, G, R, A);
+			break;
+
 		default:
 			break;
 	}
--- a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.h
@ -41,9 +41,14 @@ typedef struct MVKEntryPoint {
 	bool isDevice;

 	bool isCore() { return !ext1Name && !ext2Name; }
-	bool isEnabled(uint32_t enabledVersion, const MVKExtensionList& extList) {
-		return ((isCore() && MVK_VULKAN_API_VERSION_CONFORM(enabledVersion) >= apiVersion) ||
-				extList.isEnabled(ext1Name) || extList.isEnabled(ext2Name));
+	bool isEnabled(uint32_t enabledVersion, const MVKExtensionList& extList, const MVKExtensionList* instExtList = nullptr) {
+		bool isAPISupported = MVK_VULKAN_API_VERSION_CONFORM(enabledVersion) >= apiVersion;
+		auto isExtnSupported = [this, isAPISupported](const MVKExtensionList& extList) {
+			return extList.isEnabled(this->ext1Name) && (isAPISupported || !this->ext2Name || extList.isEnabled(this->ext2Name));
+		};
+		return ((isCore() && isAPISupported) ||
+				isExtnSupported(extList) ||
+				(instExtList && isExtnSupported(*instExtList)));
 	}

 } MVKEntryPoint;
@ -115,6 +120,9 @@ public:
 	MVKSurface* createSurface(const VkMetalSurfaceCreateInfoEXT* pCreateInfo,
 							  const VkAllocationCallbacks* pAllocator);

+	MVKSurface* createSurface(const VkHeadlessSurfaceCreateInfoEXT* pCreateInfo,
+							  const VkAllocationCallbacks* pAllocator);
+
 	MVKSurface* createSurface(const Vk_PLATFORM_SurfaceCreateInfoMVK* pCreateInfo,
 							  const VkAllocationCallbacks* pAllocator);

@ -181,9 +189,8 @@ protected:

 	void propagateDebugName() override {}
 	void initProcAddrs();
-	void initDebugCallbacks(const VkInstanceCreateInfo* pCreateInfo);
 	void initMVKConfig(const VkInstanceCreateInfo* pCreateInfo);
-	NSArray<id<MTLDevice>>* getAvailableMTLDevicesArray();
+	void initDebugCallbacks(const VkInstanceCreateInfo* pCreateInfo);
 	VkDebugReportFlagsEXT getVkDebugReportFlagsFromLogLevel(MVKConfigLogLevel logLevel);
 	VkDebugUtilsMessageSeverityFlagBitsEXT getVkDebugUtilsMessageSeverityFlagBitsFromLogLevel(MVKConfigLogLevel logLevel);
 	VkDebugUtilsMessageTypeFlagsEXT getVkDebugUtilsMessageTypesFlagBitsFromLogLevel(MVKConfigLogLevel logLevel);
@ -197,6 +204,7 @@ protected:
 	MVKSmallVector<MVKDebugReportCallback*> _debugReportCallbacks;
 	MVKSmallVector<MVKDebugUtilsMessenger*> _debugUtilMessengers;
 	std::unordered_map<std::string, MVKEntryPoint> _entryPoints;
+	std::string _autoGPUCaptureOutputFilepath;
 	std::mutex _dcbLock;
 	bool _hasDebugReportCallbacks;
 	bool _hasDebugUtilsMessengers;
--- a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm
@ -102,6 +102,11 @@ MVKSurface* MVKInstance::createSurface(const VkMetalSurfaceCreateInfoEXT* pCreat
 	return new MVKSurface(this, pCreateInfo, pAllocator);
 }

+MVKSurface* MVKInstance::createSurface(const VkHeadlessSurfaceCreateInfoEXT* pCreateInfo,
+									   const VkAllocationCallbacks* pAllocator) {
+	return new MVKSurface(this, pCreateInfo, pAllocator);
+}
+
 MVKSurface* MVKInstance::createSurface(const Vk_PLATFORM_SurfaceCreateInfoMVK* pCreateInfo,
 									   const VkAllocationCallbacks* pAllocator) {
 	return new MVKSurface(this, pCreateInfo, pAllocator);
@ -238,94 +243,37 @@ void MVKInstance::debugReportMessage(MVKVulkanAPIObject* mvkAPIObj, MVKConfigLog

 VkDebugReportFlagsEXT MVKInstance::getVkDebugReportFlagsFromLogLevel(MVKConfigLogLevel logLevel) {
 	switch (logLevel) {
-		case MVK_CONFIG_LOG_LEVEL_DEBUG:
-			return VK_DEBUG_REPORT_DEBUG_BIT_EXT;
-		case MVK_CONFIG_LOG_LEVEL_INFO:
-			return VK_DEBUG_REPORT_INFORMATION_BIT_EXT;
-		case MVK_CONFIG_LOG_LEVEL_WARNING:
-			return VK_DEBUG_REPORT_WARNING_BIT_EXT;
-		case MVK_CONFIG_LOG_LEVEL_ERROR:
-		default:
-			return VK_DEBUG_REPORT_ERROR_BIT_EXT;
+		case MVK_CONFIG_LOG_LEVEL_ERROR:    return VK_DEBUG_REPORT_ERROR_BIT_EXT;
+		case MVK_CONFIG_LOG_LEVEL_WARNING:  return VK_DEBUG_REPORT_WARNING_BIT_EXT;
+		case MVK_CONFIG_LOG_LEVEL_INFO:     return VK_DEBUG_REPORT_INFORMATION_BIT_EXT;
+		case MVK_CONFIG_LOG_LEVEL_DEBUG:    return VK_DEBUG_REPORT_DEBUG_BIT_EXT;
+		default:                            return VK_DEBUG_REPORT_ERROR_BIT_EXT;
 	}
 }

 VkDebugUtilsMessageSeverityFlagBitsEXT MVKInstance::getVkDebugUtilsMessageSeverityFlagBitsFromLogLevel(MVKConfigLogLevel logLevel) {
 	switch (logLevel) {
-		case MVK_CONFIG_LOG_LEVEL_DEBUG:
-			return VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT;
-		case MVK_CONFIG_LOG_LEVEL_INFO:
-			return VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT;
-		case MVK_CONFIG_LOG_LEVEL_WARNING:
-			return VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT;
-		case MVK_CONFIG_LOG_LEVEL_ERROR:
-		default:
-			return VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT;
+		case MVK_CONFIG_LOG_LEVEL_ERROR:    return VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT;
+		case MVK_CONFIG_LOG_LEVEL_WARNING:  return VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT;
+		case MVK_CONFIG_LOG_LEVEL_INFO:     return VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT;
+		case MVK_CONFIG_LOG_LEVEL_DEBUG:    return VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT;
+		default:                            return VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT;
 	}
 }

 VkDebugUtilsMessageTypeFlagsEXT MVKInstance::getVkDebugUtilsMessageTypesFlagBitsFromLogLevel(MVKConfigLogLevel logLevel) {
 	switch (logLevel) {
-		case MVK_CONFIG_LOG_LEVEL_DEBUG:
-		case MVK_CONFIG_LOG_LEVEL_INFO:
-		case MVK_CONFIG_LOG_LEVEL_WARNING:
-			return VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT;
-		case MVK_CONFIG_LOG_LEVEL_ERROR:
-		default:
-			return VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT;
+		case MVK_CONFIG_LOG_LEVEL_ERROR:    return VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT;
+		case MVK_CONFIG_LOG_LEVEL_WARNING:  return VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT;
+		case MVK_CONFIG_LOG_LEVEL_DEBUG:    return VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT;
+		case MVK_CONFIG_LOG_LEVEL_INFO:     return VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT;
+		default:                            return VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT;
 	}
 }


 #pragma mark Object Creation

-// Returns an autoreleased array containing the MTLDevices available on this system, sorted according
-// to power, with higher power GPU's at the front of the array. This ensures that a lazy app that simply
-// grabs the first GPU will get a high-power one by default. If MVKConfiguration::forceLowPowerGPU is set,
-// the returned array will only include low-power devices.
-NSArray<id<MTLDevice>>* MVKInstance::getAvailableMTLDevicesArray() {
-	NSMutableArray* mtlDevs = [NSMutableArray array];
-
-#if MVK_MACOS
-	NSArray* rawMTLDevs = [MTLCopyAllDevices() autorelease];
-	if (rawMTLDevs) {
-		bool forceLowPower = getMVKConfig().forceLowPowerGPU;
-
-		// Populate the array of appropriate MTLDevices
-		for (id<MTLDevice> md in rawMTLDevs) {
-			if ( !forceLowPower || md.isLowPower ) { [mtlDevs addObject: md]; }
-		}
-
-		// Sort by power
-		[mtlDevs sortUsingComparator: ^(id<MTLDevice> md1, id<MTLDevice> md2) {
-			BOOL md1IsLP = md1.isLowPower;
-			BOOL md2IsLP = md2.isLowPower;
-
-			if (md1IsLP == md2IsLP) {
-				// If one device is headless and the other one is not, select the
-				// one that is not headless first.
-				BOOL md1IsHeadless = md1.isHeadless;
-				BOOL md2IsHeadless = md2.isHeadless;
-				if (md1IsHeadless == md2IsHeadless ) {
-					return NSOrderedSame;
-				}
-				return md2IsHeadless ? NSOrderedAscending : NSOrderedDescending;
-			}
-
-			return md2IsLP ? NSOrderedAscending : NSOrderedDescending;
-		}];
-
-	}
-#endif	// MVK_MACOS
-
-#if MVK_IOS_OR_TVOS
-	id<MTLDevice> md = [MTLCreateSystemDefaultDevice() autorelease];
-	if (md) { [mtlDevs addObject: md]; }
-#endif	// MVK_IOS_OR_TVOS
-
-	return mtlDevs;		// retained
-}
-
 MVKInstance::MVKInstance(const VkInstanceCreateInfo* pCreateInfo) : _enabledExtensions(this) {

 	initDebugCallbacks(pCreateInfo);	// Do before any creation activities
@ -347,7 +295,7 @@ MVKInstance::MVKInstance(const VkInstanceCreateInfo* pCreateInfo) : _enabledExte
 	// This effort creates a number of autoreleased instances of Metal
 	// and other Obj-C classes, so wrap it all in an autorelease pool.
 	@autoreleasepool {
-		NSArray<id<MTLDevice>>* mtlDevices = getAvailableMTLDevicesArray();
+		NSArray<id<MTLDevice>>* mtlDevices = mvkGetAvailableMTLDevicesArray(this);
 		_physicalDevices.reserve(mtlDevices.count);
 		for (id<MTLDevice> mtlDev in mtlDevices) {
 			_physicalDevices.push_back(new MVKPhysicalDevice(this, mtlDev));
@ -398,20 +346,13 @@ void MVKInstance::initDebugCallbacks(const VkInstanceCreateInfo* pCreateInfo) {
 	}
 }

-#define STR(NAME) #NAME
-#define CHECK_CONFIG(name, configSetting, type)  \
-	if(mvkStringsAreEqual(pSetting->pSettingName, STR(MVK_CONFIG_##name))) {  \
-		_mvkConfig.configSetting = *(type*)(pSetting->pValues);  \
-		continue;  \
-	}
-
-// If the VK_EXT_layer_settings extension is enabled, initialize the local 
+// If the VK_EXT_layer_settings extension is enabled, initialize the local
 // MVKConfiguration from the global version built from environment variables.
 void MVKInstance::initMVKConfig(const VkInstanceCreateInfo* pCreateInfo) {

 	if ( !_enabledExtensions.vk_EXT_layer_settings.enabled ) { return; }

-	_mvkConfig = getMVKConfig();
+	_mvkConfig = mvkConfig();

 	VkLayerSettingsCreateInfoEXT* pLSCreateInfo = nil;
 	for (const auto* next = (VkBaseInStructure*)pCreateInfo->pNext; next; next = next->pNext) {
@ -429,42 +370,15 @@ void MVKInstance::initMVKConfig(const VkInstanceCreateInfo* pCreateInfo) {
 	for (uint32_t lsIdx = 0; lsIdx < pLSCreateInfo->settingCount; lsIdx++) {
 		const auto* pSetting = &pLSCreateInfo->pSettings[lsIdx];

-		CHECK_CONFIG(DEBUG, debugMode, VkBool32);
-		CHECK_CONFIG(SHADER_CONVERSION_FLIP_VERTEX_Y, shaderConversionFlipVertexY, VkBool32);
-		CHECK_CONFIG(SYNCHRONOUS_QUEUE_SUBMITS, synchronousQueueSubmits, VkBool32);
-		CHECK_CONFIG(PREFILL_METAL_COMMAND_BUFFERS, prefillMetalCommandBuffers, MVKPrefillMetalCommandBuffersStyle);
-		CHECK_CONFIG(MAX_ACTIVE_METAL_COMMAND_BUFFERS_PER_QUEUE, maxActiveMetalCommandBuffersPerQueue, uint32_t);
-		CHECK_CONFIG(SUPPORT_LARGE_QUERY_POOLS, supportLargeQueryPools, VkBool32);
-		CHECK_CONFIG(PRESENT_WITH_COMMAND_BUFFER, presentWithCommandBuffer, VkBool32);
-		CHECK_CONFIG(SWAPCHAIN_MIN_MAG_FILTER_USE_NEAREST, swapchainMinMagFilterUseNearest, VkBool32);
-		CHECK_CONFIG(METAL_COMPILE_TIMEOUT, metalCompileTimeout, uint64_t);
-		CHECK_CONFIG(PERFORMANCE_TRACKING, performanceTracking, VkBool32);
-		CHECK_CONFIG(PERFORMANCE_LOGGING_FRAME_COUNT, performanceLoggingFrameCount, uint32_t);
-		CHECK_CONFIG(ACTIVITY_PERFORMANCE_LOGGING_STYLE, activityPerformanceLoggingStyle, MVKConfigActivityPerformanceLoggingStyle);
-		CHECK_CONFIG(DISPLAY_WATERMARK, displayWatermark, VkBool32);
-		CHECK_CONFIG(SPECIALIZED_QUEUE_FAMILIES, specializedQueueFamilies, VkBool32);
-		CHECK_CONFIG(SWITCH_SYSTEM_GPU, switchSystemGPU, VkBool32);
-		CHECK_CONFIG(FULL_IMAGE_VIEW_SWIZZLE, fullImageViewSwizzle, VkBool32);
-		CHECK_CONFIG(DEFAULT_GPU_CAPTURE_SCOPE_QUEUE_FAMILY_INDEX, defaultGPUCaptureScopeQueueFamilyIndex, VkBool32);
-		CHECK_CONFIG(DEFAULT_GPU_CAPTURE_SCOPE_QUEUE_INDEX, defaultGPUCaptureScopeQueueIndex, VkBool32);
-		CHECK_CONFIG(FAST_MATH_ENABLED, fastMathEnabled, MVKConfigFastMath);
-		CHECK_CONFIG(LOG_LEVEL, logLevel, MVKConfigLogLevel);
-		CHECK_CONFIG(TRACE_VULKAN_CALLS, traceVulkanCalls, MVKConfigTraceVulkanCalls);
-		CHECK_CONFIG(FORCE_LOW_POWER_GPU, forceLowPowerGPU, VkBool32);
-		CHECK_CONFIG(VK_SEMAPHORE_SUPPORT_STYLE, semaphoreSupportStyle, MVKVkSemaphoreSupportStyle);
-		CHECK_CONFIG(AUTO_GPU_CAPTURE_SCOPE, autoGPUCaptureScope, MVKConfigAutoGPUCaptureScope);
-		CHECK_CONFIG(AUTO_GPU_CAPTURE_OUTPUT_FILE, autoGPUCaptureOutputFilepath, const char*);
-		CHECK_CONFIG(TEXTURE_1D_AS_2D, texture1DAs2D, VkBool32);
-		CHECK_CONFIG(PREALLOCATE_DESCRIPTORS, preallocateDescriptors, VkBool32);
-		CHECK_CONFIG(USE_COMMAND_POOLING, useCommandPooling, VkBool32);
-		CHECK_CONFIG(USE_MTLHEAP, useMTLHeap, VkBool32);
-		CHECK_CONFIG(API_VERSION_TO_ADVERTISE, apiVersionToAdvertise, uint32_t);
-		CHECK_CONFIG(ADVERTISE_EXTENSIONS, advertiseExtensions, uint32_t);
-		CHECK_CONFIG(RESUME_LOST_DEVICE, resumeLostDevice, VkBool32);
-		CHECK_CONFIG(USE_METAL_ARGUMENT_BUFFERS, useMetalArgumentBuffers, MVKUseMetalArgumentBuffers);
-		CHECK_CONFIG(SHADER_COMPRESSION_ALGORITHM, shaderSourceCompressionAlgorithm, MVKConfigCompressionAlgorithm);
-		CHECK_CONFIG(SHOULD_MAXIMIZE_CONCURRENT_COMPILATION, shouldMaximizeConcurrentCompilation, VkBool32);
+#define STR(name) #name
+#define MVK_CONFIG_MEMBER(member, mbrType, name) \
+		if(mvkStringsAreEqual(pSetting->pSettingName, STR(MVK_CONFIG_##name))) {  \
+			_mvkConfig.member = *(mbrType*)(pSetting->pValues);  \
+			continue;  \
+		}
+#include "MVKConfigMembers.def"
 	}
+	mvkSetConfig(_mvkConfig, _mvkConfig, _autoGPUCaptureOutputFilepath);
 }

 #define ADD_ENTRY_POINT_MAP(name, func, api, ext1, ext2, isDev)	\
@ -507,8 +421,8 @@ void MVKInstance::initMVKConfig(const VkInstanceCreateInfo* pCreateInfo) {
 #define ADD_INST_EXT_ENTRY_POINT(func, EXT)				ADD_ENTRY_POINT(func, 0, VK_##EXT##_EXTENSION_NAME, nullptr, false)
 #define ADD_DVC_EXT_ENTRY_POINT(func, EXT)				ADD_ENTRY_POINT(func, 0, VK_##EXT##_EXTENSION_NAME, nullptr, true)

-#define ADD_INST_EXT2_ENTRY_POINT(func, EXT1, EXT2)		ADD_ENTRY_POINT(func, 0, VK_##EXT1##_EXTENSION_NAME, VK_##EXT2##_EXTENSION_NAME, false)
-#define ADD_DVC_EXT2_ENTRY_POINT(func, EXT1, EXT2)		ADD_ENTRY_POINT(func, 0, VK_##EXT1##_EXTENSION_NAME, VK_##EXT2##_EXTENSION_NAME, true)
+#define ADD_INST_EXT2_ENTRY_POINT(func, API, EXT1, EXT2)	ADD_ENTRY_POINT(func, VK_API_VERSION_##API, VK_##EXT1##_EXTENSION_NAME, VK_##EXT2##_EXTENSION_NAME, false)
+#define ADD_DVC_EXT2_ENTRY_POINT(func, API, EXT1, EXT2)		ADD_ENTRY_POINT(func, VK_API_VERSION_##API, VK_##EXT1##_EXTENSION_NAME, VK_##EXT2##_EXTENSION_NAME, true)

 // Add an open function, not tied to core or an extension.
 #define ADD_INST_OPEN_ENTRY_POINT(func)					ADD_ENTRY_POINT(func, 0, nullptr, nullptr, false)
@ -553,21 +467,23 @@ void MVKInstance::initProcAddrs() {
 	ADD_INST_EXT_ENTRY_POINT(vkGetPhysicalDeviceSurfacePresentModesKHR, KHR_SURFACE);
 	ADD_INST_EXT_ENTRY_POINT(vkGetPhysicalDeviceSurfaceCapabilities2KHR, KHR_GET_SURFACE_CAPABILITIES_2);
 	ADD_INST_EXT_ENTRY_POINT(vkGetPhysicalDeviceSurfaceFormats2KHR, KHR_GET_SURFACE_CAPABILITIES_2);
+	ADD_INST_EXT_ENTRY_POINT(vkCreateHeadlessSurfaceEXT, EXT_HEADLESS_SURFACE);
+	ADD_INST_EXT_ENTRY_POINT(vkCreateMetalSurfaceEXT, EXT_METAL_SURFACE);
 	ADD_INST_EXT_ENTRY_POINT(vkCreateDebugReportCallbackEXT, EXT_DEBUG_REPORT);
 	ADD_INST_EXT_ENTRY_POINT(vkDestroyDebugReportCallbackEXT, EXT_DEBUG_REPORT);
 	ADD_INST_EXT_ENTRY_POINT(vkDebugReportMessageEXT, EXT_DEBUG_REPORT);
-	ADD_INST_EXT_ENTRY_POINT(vkSetDebugUtilsObjectNameEXT, EXT_DEBUG_UTILS);
-	ADD_INST_EXT_ENTRY_POINT(vkSetDebugUtilsObjectTagEXT, EXT_DEBUG_UTILS);
-	ADD_INST_EXT_ENTRY_POINT(vkQueueBeginDebugUtilsLabelEXT, EXT_DEBUG_UTILS);
-	ADD_INST_EXT_ENTRY_POINT(vkQueueEndDebugUtilsLabelEXT, EXT_DEBUG_UTILS);
-	ADD_INST_EXT_ENTRY_POINT(vkQueueInsertDebugUtilsLabelEXT, EXT_DEBUG_UTILS);
-	ADD_INST_EXT_ENTRY_POINT(vkCmdBeginDebugUtilsLabelEXT, EXT_DEBUG_UTILS);
-	ADD_INST_EXT_ENTRY_POINT(vkCmdEndDebugUtilsLabelEXT, EXT_DEBUG_UTILS);
-	ADD_INST_EXT_ENTRY_POINT(vkCmdInsertDebugUtilsLabelEXT, EXT_DEBUG_UTILS);
+	// n.b. Despite that VK_EXT_debug_utils is an instance extension, these functions are device functions.
+	ADD_DVC_EXT_ENTRY_POINT(vkSetDebugUtilsObjectNameEXT, EXT_DEBUG_UTILS);
+	ADD_DVC_EXT_ENTRY_POINT(vkSetDebugUtilsObjectTagEXT, EXT_DEBUG_UTILS);
+	ADD_DVC_EXT_ENTRY_POINT(vkQueueBeginDebugUtilsLabelEXT, EXT_DEBUG_UTILS);
+	ADD_DVC_EXT_ENTRY_POINT(vkQueueEndDebugUtilsLabelEXT, EXT_DEBUG_UTILS);
+	ADD_DVC_EXT_ENTRY_POINT(vkQueueInsertDebugUtilsLabelEXT, EXT_DEBUG_UTILS);
+	ADD_DVC_EXT_ENTRY_POINT(vkCmdBeginDebugUtilsLabelEXT, EXT_DEBUG_UTILS);
+	ADD_DVC_EXT_ENTRY_POINT(vkCmdEndDebugUtilsLabelEXT, EXT_DEBUG_UTILS);
+	ADD_DVC_EXT_ENTRY_POINT(vkCmdInsertDebugUtilsLabelEXT, EXT_DEBUG_UTILS);
 	ADD_INST_EXT_ENTRY_POINT(vkCreateDebugUtilsMessengerEXT, EXT_DEBUG_UTILS);
 	ADD_INST_EXT_ENTRY_POINT(vkDestroyDebugUtilsMessengerEXT, EXT_DEBUG_UTILS);
 	ADD_INST_EXT_ENTRY_POINT(vkSubmitDebugUtilsMessageEXT, EXT_DEBUG_UTILS);
-	ADD_INST_EXT_ENTRY_POINT(vkCreateMetalSurfaceEXT, EXT_METAL_SURFACE);

 #ifdef VK_USE_PLATFORM_IOS_MVK
 	ADD_INST_EXT_ENTRY_POINT(vkCreateIOSSurfaceMVK, MVK_IOS_SURFACE);
@ -762,16 +678,16 @@ void MVKInstance::initProcAddrs() {
 	ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdResetEvent2, KHR, KHR_SYNCHRONIZATION_2);
 	ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdResolveImage2, KHR, KHR_COPY_COMMANDS_2);
 	ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdSetCullMode, EXT, EXT_EXTENDED_DYNAMIC_STATE);
-	ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdSetDepthBiasEnable, EXT, EXT_EXTENDED_DYNAMIC_STATE);
+	ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdSetDepthBiasEnable, EXT, EXT_EXTENDED_DYNAMIC_STATE_2);
 	ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdSetDepthBoundsTestEnable, EXT, EXT_EXTENDED_DYNAMIC_STATE);
 	ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdSetDepthCompareOp, EXT, EXT_EXTENDED_DYNAMIC_STATE);
 	ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdSetDepthTestEnable, EXT, EXT_EXTENDED_DYNAMIC_STATE);
 	ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdSetDepthWriteEnable, EXT, EXT_EXTENDED_DYNAMIC_STATE);
 	ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdSetEvent2, KHR, KHR_SYNCHRONIZATION_2);
 	ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdSetFrontFace, EXT, EXT_EXTENDED_DYNAMIC_STATE);
-	ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdSetPrimitiveRestartEnable, EXT, EXT_EXTENDED_DYNAMIC_STATE);
+	ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdSetPrimitiveRestartEnable, EXT, EXT_EXTENDED_DYNAMIC_STATE_2);
 	ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdSetPrimitiveTopology, EXT, EXT_EXTENDED_DYNAMIC_STATE);
-	ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdSetRasterizerDiscardEnable, EXT, EXT_EXTENDED_DYNAMIC_STATE);
+	ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdSetRasterizerDiscardEnable, EXT, EXT_EXTENDED_DYNAMIC_STATE_2);
 	ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdSetScissorWithCount, EXT, EXT_EXTENDED_DYNAMIC_STATE);
 	ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdSetStencilOp, EXT, EXT_EXTENDED_DYNAMIC_STATE);
 	ADD_DVC_1_3_PROMOTED_ENTRY_POINT(vkCmdSetStencilTestEnable, EXT, EXT_EXTENDED_DYNAMIC_STATE);
@ -796,16 +712,16 @@ void MVKInstance::initProcAddrs() {
 	ADD_DVC_EXT_ENTRY_POINT(vkMapMemory2KHR, KHR_MAP_MEMORY_2);
 	ADD_DVC_EXT_ENTRY_POINT(vkUnmapMemory2KHR, KHR_MAP_MEMORY_2);
 	ADD_DVC_EXT_ENTRY_POINT(vkCmdPushDescriptorSetKHR, KHR_PUSH_DESCRIPTOR);
-	ADD_DVC_EXT2_ENTRY_POINT(vkCmdPushDescriptorSetWithTemplateKHR, KHR_PUSH_DESCRIPTOR, KHR_DESCRIPTOR_UPDATE_TEMPLATE);
+	ADD_DVC_EXT2_ENTRY_POINT(vkCmdPushDescriptorSetWithTemplateKHR, 1_1, KHR_PUSH_DESCRIPTOR, KHR_DESCRIPTOR_UPDATE_TEMPLATE);
 	ADD_DVC_EXT_ENTRY_POINT(vkCreateSwapchainKHR, KHR_SWAPCHAIN);
 	ADD_DVC_EXT_ENTRY_POINT(vkDestroySwapchainKHR, KHR_SWAPCHAIN);
 	ADD_DVC_EXT_ENTRY_POINT(vkGetSwapchainImagesKHR, KHR_SWAPCHAIN);
 	ADD_DVC_EXT_ENTRY_POINT(vkAcquireNextImageKHR, KHR_SWAPCHAIN);
 	ADD_DVC_EXT_ENTRY_POINT(vkQueuePresentKHR, KHR_SWAPCHAIN);
-	ADD_DVC_EXT2_ENTRY_POINT(vkGetDeviceGroupPresentCapabilitiesKHR, KHR_SWAPCHAIN, KHR_DEVICE_GROUP);
-	ADD_DVC_EXT2_ENTRY_POINT(vkGetDeviceGroupSurfacePresentModesKHR, KHR_SWAPCHAIN, KHR_DEVICE_GROUP);
-	ADD_DVC_EXT2_ENTRY_POINT(vkGetPhysicalDevicePresentRectanglesKHR, KHR_SWAPCHAIN, KHR_DEVICE_GROUP);
-	ADD_DVC_EXT2_ENTRY_POINT(vkAcquireNextImage2KHR, KHR_SWAPCHAIN, KHR_DEVICE_GROUP);
+	ADD_DVC_EXT2_ENTRY_POINT(vkGetDeviceGroupPresentCapabilitiesKHR, 1_1, KHR_SWAPCHAIN, KHR_DEVICE_GROUP);
+	ADD_DVC_EXT2_ENTRY_POINT(vkGetDeviceGroupSurfacePresentModesKHR, 1_1, KHR_SWAPCHAIN, KHR_DEVICE_GROUP);
+	ADD_DVC_EXT2_ENTRY_POINT(vkGetPhysicalDevicePresentRectanglesKHR, 1_1, KHR_SWAPCHAIN, KHR_DEVICE_GROUP);
+	ADD_DVC_EXT2_ENTRY_POINT(vkAcquireNextImage2KHR, 1_1, KHR_SWAPCHAIN, KHR_DEVICE_GROUP);
 	ADD_DVC_EXT_ENTRY_POINT(vkGetCalibratedTimestampsEXT, EXT_CALIBRATED_TIMESTAMPS);
 	ADD_DVC_EXT_ENTRY_POINT(vkGetPhysicalDeviceCalibrateableTimeDomainsEXT, EXT_CALIBRATED_TIMESTAMPS);
 	ADD_DVC_EXT_ENTRY_POINT(vkDebugMarkerSetObjectTagEXT, EXT_DEBUG_MARKER);
@ -825,6 +741,29 @@ void MVKInstance::initProcAddrs() {
 	ADD_DVC_EXT_ENTRY_POINT(vkReleaseSwapchainImagesEXT, EXT_SWAPCHAIN_MAINTENANCE_1);
 	ADD_DVC_EXT_ENTRY_POINT(vkGetRefreshCycleDurationGOOGLE, GOOGLE_DISPLAY_TIMING);
 	ADD_DVC_EXT_ENTRY_POINT(vkGetPastPresentationTimingGOOGLE, GOOGLE_DISPLAY_TIMING);
+	ADD_DVC_EXT_ENTRY_POINT(vkCmdSetLogicOpEXT, EXT_EXTENDED_DYNAMIC_STATE_2);
+	ADD_DVC_EXT_ENTRY_POINT(vkCmdSetPatchControlPointsEXT, EXT_EXTENDED_DYNAMIC_STATE_2);
+	ADD_DVC_EXT_ENTRY_POINT(vkCmdSetAlphaToCoverageEnableEXT, EXT_EXTENDED_DYNAMIC_STATE_3);
+	ADD_DVC_EXT_ENTRY_POINT(vkCmdSetAlphaToOneEnableEXT, EXT_EXTENDED_DYNAMIC_STATE_3);
+	ADD_DVC_EXT_ENTRY_POINT(vkCmdSetColorBlendAdvancedEXT, EXT_EXTENDED_DYNAMIC_STATE_3);
+	ADD_DVC_EXT_ENTRY_POINT(vkCmdSetColorBlendEnableEXT, EXT_EXTENDED_DYNAMIC_STATE_3);
+	ADD_DVC_EXT_ENTRY_POINT(vkCmdSetColorBlendEquationEXT, EXT_EXTENDED_DYNAMIC_STATE_3);
+	ADD_DVC_EXT_ENTRY_POINT(vkCmdSetColorWriteMaskEXT, EXT_EXTENDED_DYNAMIC_STATE_3);
+	ADD_DVC_EXT_ENTRY_POINT(vkCmdSetConservativeRasterizationModeEXT, EXT_EXTENDED_DYNAMIC_STATE_3);
+	ADD_DVC_EXT_ENTRY_POINT(vkCmdSetDepthClampEnableEXT, EXT_EXTENDED_DYNAMIC_STATE_3);
+	ADD_DVC_EXT_ENTRY_POINT(vkCmdSetDepthClipEnableEXT, EXT_EXTENDED_DYNAMIC_STATE_3);
+	ADD_DVC_EXT_ENTRY_POINT(vkCmdSetDepthClipNegativeOneToOneEXT, EXT_EXTENDED_DYNAMIC_STATE_3);
+	ADD_DVC_EXT_ENTRY_POINT(vkCmdSetExtraPrimitiveOverestimationSizeEXT, EXT_EXTENDED_DYNAMIC_STATE_3);
+	ADD_DVC_EXT_ENTRY_POINT(vkCmdSetLineRasterizationModeEXT, EXT_EXTENDED_DYNAMIC_STATE_3);
+	ADD_DVC_EXT_ENTRY_POINT(vkCmdSetLineStippleEnableEXT, EXT_EXTENDED_DYNAMIC_STATE_3);
+	ADD_DVC_EXT_ENTRY_POINT(vkCmdSetLogicOpEnableEXT, EXT_EXTENDED_DYNAMIC_STATE_3);
+	ADD_DVC_EXT_ENTRY_POINT(vkCmdSetPolygonModeEXT, EXT_EXTENDED_DYNAMIC_STATE_3);
+	ADD_DVC_EXT_ENTRY_POINT(vkCmdSetProvokingVertexModeEXT, EXT_EXTENDED_DYNAMIC_STATE_3);
+	ADD_DVC_EXT_ENTRY_POINT(vkCmdSetRasterizationSamplesEXT, EXT_EXTENDED_DYNAMIC_STATE_3);
+	ADD_DVC_EXT_ENTRY_POINT(vkCmdSetRasterizationStreamEXT, EXT_EXTENDED_DYNAMIC_STATE_3);
+	ADD_DVC_EXT_ENTRY_POINT(vkCmdSetSampleLocationsEnableEXT, EXT_EXTENDED_DYNAMIC_STATE_3);
+	ADD_DVC_EXT_ENTRY_POINT(vkCmdSetSampleMaskEXT, EXT_EXTENDED_DYNAMIC_STATE_3);
+	ADD_DVC_EXT_ENTRY_POINT(vkCmdSetTessellationDomainOriginEXT, EXT_EXTENDED_DYNAMIC_STATE_3);
 }

 void MVKInstance::logVersions() {
@ -850,6 +789,11 @@ MVKInstance::~MVKInstance() {
 	_useCreationCallbacks = true;
 	mvkDestroyContainerContents(_physicalDevices);

+	// Since this message may invoke debug callbacks, do it before locking callbacks.
+	MVKLogInfo("Destroying VkInstance for Vulkan version %s with %d Vulkan extensions enabled.",
+			   mvkGetVulkanVersionString(_appInfo.apiVersion).c_str(),
+			   _enabledExtensions.getEnabledCount());
+
 	lock_guard<mutex> lock(_dcbLock);
 	mvkDestroyContainerContents(_debugReportCallbacks);
 }
--- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h
@ -132,7 +132,10 @@ public:
 	/** Returns the debug report object type of this object. */
 	VkDebugReportObjectTypeEXT getVkDebugReportObjectType() override { return VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_EXT; }

-	/** Binds this pipeline to the specified command encoder. */
+	/** Called when the pipeline has been bound to the command encoder. */
+	virtual void wasBound(MVKCommandEncoder* cmdEncoder) {}
+
+	/** Encodes this pipeline to the command encoder. */
 	virtual void encode(MVKCommandEncoder* cmdEncoder, uint32_t stage = 0) = 0;

 	/** Binds the push constants to a command encoder. */
@ -218,6 +221,56 @@ struct MVKStagedDescriptorBindingUse {
 	MVKBitArray stages[4] = {};
 };

+/** Enumeration identifying different state content types. */
+enum MVKRenderStateType {
+	Unknown = 0,
+	BlendConstants,
+	CullMode,
+	DepthBias,
+	DepthBiasEnable,
+	DepthBounds,
+	DepthBoundsTestEnable,
+	DepthClipEnable,
+	DepthCompareOp,
+	DepthTestEnable,
+	DepthWriteEnable,
+	FrontFace,
+	LineWidth,
+	LogicOp,
+	LogicOpEnable,
+	PatchControlPoints,
+	PolygonMode,
+	PrimitiveRestartEnable,
+	PrimitiveTopology,
+	RasterizerDiscardEnable,
+	SampleLocations,
+	SampleLocationsEnable,
+	Scissors,
+	StencilCompareMask,
+	StencilOp,
+	StencilReference,
+	StencilTestEnable,
+	StencilWriteMask,
+	VertexStride,
+	Viewports,
+	MVKRenderStateTypeCount
+};
+
+/** Boolean tracking of rendering state. */
+struct MVKRenderStateFlags {
+	void enable(MVKRenderStateType rs) { if (rs) { mvkEnableFlags(_stateFlags, getFlagMask(rs)); } }
+	void disable(MVKRenderStateType rs) { if (rs) { mvkDisableFlags(_stateFlags, getFlagMask(rs)); } }
+	void set(MVKRenderStateType rs, bool val) { val? enable(rs) : disable(rs); }
+	void enableAll() { mvkEnableAllFlags(_stateFlags); }
+	void disableAll() { mvkDisableAllFlags(_stateFlags); }
+	bool isEnabled(MVKRenderStateType rs) { return mvkIsAnyFlagEnabled(_stateFlags, getFlagMask(rs)); }
+protected:
+	uint32_t getFlagMask(MVKRenderStateType rs) { return rs ? (1u << (rs - 1u)) : 0; }	 // Ignore Unknown type
+	
+	uint32_t _stateFlags = 0;
+	static_assert(sizeof(_stateFlags) * 8 >= MVKRenderStateTypeCount - 1, "_stateFlags is too small to support the number of flags in MVKRenderStateType."); // Ignore Unknown type
+};
+
 /** Represents an Vulkan graphics pipeline. */
 class MVKGraphicsPipeline : public MVKPipeline {

@ -226,18 +279,16 @@ public:
 	/** Returns the number and order of stages in this pipeline. Draws commands must encode this pipeline once per stage. */
 	void getStages(MVKPiplineStages& stages);

-	/** Binds this pipeline to the specified command encoder. */
+	virtual void wasBound(MVKCommandEncoder* cmdEncoder) override;
+
 	void encode(MVKCommandEncoder* cmdEncoder, uint32_t stage = 0) override;

-    /** Returns whether this pipeline permits dynamic setting of the specifie state. */
-    bool supportsDynamicState(VkDynamicState state);
+    /** Returns whether this pipeline permits dynamic setting of the state. */
+	bool isDynamicState(MVKRenderStateType state) { return _dynamicState.isEnabled(state); }

    /** Returns whether this pipeline has tessellation shaders. */
    bool isTessellationPipeline() { return _tessInfo.patchControlPoints > 0; }

-    /** Returns the number of input tessellation patch control points. */
-    uint32_t getInputControlPointCount() { return _tessInfo.patchControlPoints; }
-
    /** Returns the number of output tessellation patch control points. */
    uint32_t getOutputControlPointCount() { return _outputControlPointCount; }

@ -271,9 +322,6 @@ public:
 	/** Returns true if the tessellation control shader needs a buffer to store its per-patch output. */
 	bool needsTessCtlPatchOutputBuffer() { return _needsTessCtlPatchOutputBuffer; }

-	/** Returns whether this pipeline has custom sample positions enabled. */
-	bool isUsingCustomSamplePositions() { return _isUsingCustomSamplePositions; }
-
 	/** Returns the Vulkan primitive topology. */
 	VkPrimitiveTopology getVkPrimitiveTopology() { return _vkPrimitiveTopology; }

@ -286,9 +334,6 @@ public:
 	 */
 	bool isValidVertexBufferIndex(MVKShaderStage stage, uint32_t mtlBufferIndex);

-	/** Returns the custom samples used by this pipeline. */
-	MVKArrayRef<MTLSamplePosition> getCustomSamplePositions() { return _customSamplePositions.contents(); }
-
 	/** Returns the Metal vertex buffer index to use for the specified vertex attribute binding number.  */
 	uint32_t getMetalBufferIndexForVertexAttributeBinding(uint32_t binding) { return _device->getMetalBufferIndexForVertexAttributeBinding(binding); }

@ -320,7 +365,8 @@ protected:
    id<MTLComputePipelineState> getOrCompilePipeline(MTLComputePipelineDescriptor* plDesc, id<MTLComputePipelineState>& plState, const char* compilerType);
 	bool compileTessVertexStageState(MTLComputePipelineDescriptor* vtxPLDesc, MVKMTLFunction* pVtxFunctions, VkPipelineCreationFeedback* pVertexFB);
 	bool compileTessControlStageState(MTLComputePipelineDescriptor* tcPLDesc, VkPipelineCreationFeedback* pTessCtlFB);
-	void initCustomSamplePositions(const VkGraphicsPipelineCreateInfo* pCreateInfo);
+	void initDynamicState(const VkGraphicsPipelineCreateInfo* pCreateInfo);
+	void initSampleLocations(const VkGraphicsPipelineCreateInfo* pCreateInfo);
    void initMTLRenderPipelineState(const VkGraphicsPipelineCreateInfo* pCreateInfo, const SPIRVTessReflectionData& reflectData, VkPipelineCreationFeedback* pPipelineFB, const VkPipelineShaderStageCreateInfo* pVertexSS, VkPipelineCreationFeedback* pVertexFB, const VkPipelineShaderStageCreateInfo* pTessCtlSS, VkPipelineCreationFeedback* pTessCtlFB, const VkPipelineShaderStageCreateInfo* pTessEvalSS, VkPipelineCreationFeedback* pTessEvalFB, const VkPipelineShaderStageCreateInfo* pFragmentSS, VkPipelineCreationFeedback* pFragmentFB);
    void initShaderConversionConfig(SPIRVToMSLConversionConfiguration& shaderConfig, const VkGraphicsPipelineCreateInfo* pCreateInfo, const SPIRVTessReflectionData& reflectData);
 	void initReservedVertexAttributeBufferCount(const VkGraphicsPipelineCreateInfo* pCreateInfo);
@ -356,11 +402,11 @@ protected:
 	VkPipelineTessellationStateCreateInfo _tessInfo;
 	VkPipelineRasterizationStateCreateInfo _rasterInfo;
 	VkPipelineDepthStencilStateCreateInfo _depthStencilInfo;
+	MVKRenderStateFlags _dynamicState;

 	MVKSmallVector<VkViewport, kMVKMaxViewportScissorCount> _viewports;
 	MVKSmallVector<VkRect2D, kMVKMaxViewportScissorCount> _scissors;
-	MVKSmallVector<VkDynamicState> _dynamicState;
-	MVKSmallVector<MTLSamplePosition> _customSamplePositions;
+	MVKSmallVector<VkSampleLocationEXT> _sampleLocations;
 	MVKSmallVector<MVKTranslatedVertexBinding> _translatedVertexBindings;
 	MVKSmallVector<MVKZeroDivisorVertexBinding> _zeroDivisorVertexBindings;
 	MVKSmallVector<MVKStagedMTLArgumentEncoders> _mtlArgumentEncoders;
@ -374,11 +420,7 @@ protected:
 	id<MTLComputePipelineState> _mtlTessControlStageState = nil;
 	id<MTLRenderPipelineState> _mtlPipelineState = nil;

-    float _blendConstants[4] = { 0.0, 0.0, 0.0, 1.0 };
-	MTLCullMode _mtlCullMode;
-	MTLWinding _mtlFrontWinding;
-	MTLTriangleFillMode _mtlFillMode;
-	MTLDepthClipMode _mtlDepthClipMode;
+	float _blendConstants[4] = {};
 	MVKShaderImplicitRezBinding _reservedVertexAttributeBufferCount;
 	MVKShaderImplicitRezBinding _viewRangeBufferIndex;
 	MVKShaderImplicitRezBinding _outputBufferIndex;
@ -387,6 +429,8 @@ protected:
 	uint32_t _tessCtlPatchOutputBufferIndex = 0;
 	uint32_t _tessCtlLevelBufferIndex = 0;

+	bool _primitiveRestartEnable = true;
+	bool _hasRasterInfo = false;
 	bool _needsVertexSwizzleBuffer = false;
 	bool _needsVertexBufferSizeBuffer = false;
 	bool _needsVertexDynamicOffsetBuffer = false;
@ -407,7 +451,7 @@ protected:
 	bool _needsFragmentViewRangeBuffer = false;
 	bool _isRasterizing = false;
 	bool _isRasterizingColor = false;
-	bool _isUsingCustomSamplePositions = false;
+	bool _sampleLocationsEnable = false;
 };


@ -419,7 +463,6 @@ class MVKComputePipeline : public MVKPipeline {

 public:

-	/** Binds this pipeline to the specified command encoder. */
 	void encode(MVKCommandEncoder* cmdEncoder, uint32_t = 0) override;

 	/** Returns if this pipeline allows non-zero dispatch bases in vkCmdDispatchBase(). */
--- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
@ -49,7 +49,7 @@ void MVKPipelineLayout::bindDescriptorSets(MVKCommandEncoder* cmdEncoder,
                                           MVKArrayRef<uint32_t> dynamicOffsets) {
 	if (!cmdEncoder) { clearConfigurationResult(); }
 	uint32_t dynamicOffsetIndex = 0;
-	size_t dsCnt = descriptorSets.size;
+	size_t dsCnt = descriptorSets.size();
 	for (uint32_t dsIdx = 0; dsIdx < dsCnt; dsIdx++) {
 		MVKDescriptorSet* descSet = descriptorSets[dsIdx];
 		uint32_t dslIdx = firstSet + dsIdx;
@ -229,6 +229,13 @@ MVKPipeline::MVKPipeline(MVKDevice* device, MVKPipelineCache* pipelineCache, MVK
 #pragma mark -
 #pragma mark MVKGraphicsPipeline

+// Set retrieve-only rendering state when pipeline is bound, as it's too late at draw command.
+void MVKGraphicsPipeline::wasBound(MVKCommandEncoder* cmdEncoder) {
+	cmdEncoder->_renderingState.setPatchControlPoints(_tessInfo.patchControlPoints, false);
+	cmdEncoder->_renderingState.setSampleLocations(_sampleLocations.contents(), false);
+	cmdEncoder->_renderingState.setSampleLocationsEnable(_sampleLocationsEnable, false);
+}
+
 void MVKGraphicsPipeline::getStages(MVKPiplineStages& stages) {
    if (isTessellationPipeline()) {
        stages.push_back(kMVKGraphicsStageVertex);
@ -292,24 +299,21 @@ void MVKGraphicsPipeline::encode(MVKCommandEncoder* cmdEncoder, uint32_t stage)

            // Depth stencil state - Cleared _depthStencilInfo values will disable depth testing
 			cmdEncoder->_depthStencilState.setDepthStencilState(_depthStencilInfo);
-			cmdEncoder->_stencilReferenceValueState.setReferenceValues(_depthStencilInfo);

            // Rasterization
-            cmdEncoder->_blendColorState.setBlendColor(_blendConstants[0], _blendConstants[1],
-                                                       _blendConstants[2], _blendConstants[3], false);
-            cmdEncoder->_depthBiasState.setDepthBias(_rasterInfo);
-            cmdEncoder->_viewportState.setViewports(_viewports.contents(), 0, false);
-            cmdEncoder->_scissorState.setScissors(_scissors.contents(), 0, false);
-            cmdEncoder->_mtlPrimitiveType = mvkMTLPrimitiveTypeFromVkPrimitiveTopology(_vkPrimitiveTopology);
-
-            [mtlCmdEnc setCullMode: _mtlCullMode];
-            [mtlCmdEnc setFrontFacingWinding: _mtlFrontWinding];
-            [mtlCmdEnc setTriangleFillMode: _mtlFillMode];
-
-            if (_device->_enabledFeatures.depthClamp) {
-                [mtlCmdEnc setDepthClipMode: _mtlDepthClipMode];
-            }
-
+			cmdEncoder->_renderingState.setPrimitiveTopology(_vkPrimitiveTopology, false);
+			cmdEncoder->_renderingState.setPrimitiveRestartEnable(_primitiveRestartEnable, false);
+			cmdEncoder->_renderingState.setBlendConstants(_blendConstants, false);
+			cmdEncoder->_renderingState.setStencilReferenceValues(_depthStencilInfo);
+            cmdEncoder->_renderingState.setViewports(_viewports.contents(), 0, false);
+            cmdEncoder->_renderingState.setScissors(_scissors.contents(), 0, false);
+			if (_hasRasterInfo) {
+				cmdEncoder->_renderingState.setCullMode(_rasterInfo.cullMode, false);
+				cmdEncoder->_renderingState.setFrontFace(_rasterInfo.frontFace, false);
+				cmdEncoder->_renderingState.setPolygonMode(_rasterInfo.polygonMode, false);
+				cmdEncoder->_renderingState.setDepthBias(_rasterInfo);
+				cmdEncoder->_renderingState.setDepthClipEnable( !_rasterInfo.depthClampEnable, false );
+			}
            break;
    }

@ -320,21 +324,6 @@ void MVKGraphicsPipeline::encode(MVKCommandEncoder* cmdEncoder, uint32_t stage)
    cmdEncoder->_graphicsResourcesState.bindViewRangeBuffer(_viewRangeBufferIndex, _needsVertexViewRangeBuffer, _needsFragmentViewRangeBuffer);
 }

-bool MVKGraphicsPipeline::supportsDynamicState(VkDynamicState state) {
-	for (auto& ds : _dynamicState) {
-		if (state == ds) {
-			// Some dynamic states have other restrictions
-			switch (state) {
-				case VK_DYNAMIC_STATE_DEPTH_BIAS:
-					return _rasterInfo.depthBiasEnable;
-				default:
-					return true;
-			}
-		}
-	}
-	return false;
-}
-
 static const char vtxCompilerType[] = "Vertex stage pipeline for tessellation";

 bool MVKGraphicsPipeline::compileTessVertexStageState(MTLComputePipelineDescriptor* vtxPLDesc,
@ -414,6 +403,10 @@ MVKGraphicsPipeline::MVKGraphicsPipeline(MVKDevice* device,
 										 const VkGraphicsPipelineCreateInfo* pCreateInfo) :
 	MVKPipeline(device, pipelineCache, (MVKPipelineLayout*)pCreateInfo->layout, pCreateInfo->flags, parent) {

+
+	// Extract dynamic state first, as it can affect many configurations.
+	initDynamicState(pCreateInfo);
+
 	// Determine rasterization early, as various other structs are validated and interpreted in this context.
 	const VkPipelineRenderingCreateInfo* pRendInfo = getRenderingCreateInfo(pCreateInfo);
 	_isRasterizing = !isRasterizationDisabled(pCreateInfo);
@ -509,17 +502,12 @@ MVKGraphicsPipeline::MVKGraphicsPipeline(MVKDevice* device,
 	initMTLRenderPipelineState(pCreateInfo, reflectData, pPipelineFB, pVertexSS, pVertexFB, pTessCtlSS, pTessCtlFB, pTessEvalSS, pTessEvalFB, pFragmentSS, pFragmentFB);
 	if ( !_hasValidMTLPipelineStates ) { return; }

-	// Track dynamic state
-	const VkPipelineDynamicStateCreateInfo* pDS = pCreateInfo->pDynamicState;
-	if (pDS) {
-		for (uint32_t i = 0; i < pDS->dynamicStateCount; i++) {
-			_dynamicState.push_back(pDS->pDynamicStates[i]);
-		}
-	}
-
 	// Blending - must ignore allowed bad pColorBlendState pointer if rasterization disabled or no color attachments
 	if (_isRasterizingColor && pCreateInfo->pColorBlendState) {
-		memcpy(&_blendConstants, &pCreateInfo->pColorBlendState->blendConstants, sizeof(_blendConstants));
+		mvkCopy(_blendConstants, pCreateInfo->pColorBlendState->blendConstants, 4);
+	} else {
+		static float defaultBlendConstants[4] = { 0, 0.0, 0.0, 1.0 };
+		mvkCopy(_blendConstants, defaultBlendConstants, 4);
 	}

 	// Topology
@ -527,27 +515,13 @@ MVKGraphicsPipeline::MVKGraphicsPipeline(MVKDevice* device,
 				   ? pCreateInfo->pInputAssemblyState->topology
 				   : VK_PRIMITIVE_TOPOLOGY_POINT_LIST);

+	_primitiveRestartEnable = pCreateInfo->pInputAssemblyState ? pCreateInfo->pInputAssemblyState->primitiveRestartEnable : true;
+
 	// Rasterization
-	_mtlCullMode = MTLCullModeNone;
-	_mtlFrontWinding = MTLWindingCounterClockwise;
-	_mtlFillMode = MTLTriangleFillModeFill;
-	_mtlDepthClipMode = MTLDepthClipModeClip;
-	bool hasRasterInfo = mvkSetOrClear(&_rasterInfo, pCreateInfo->pRasterizationState);
-	if (hasRasterInfo) {
-		_mtlCullMode = mvkMTLCullModeFromVkCullModeFlags(_rasterInfo.cullMode);
-		_mtlFrontWinding = mvkMTLWindingFromVkFrontFace(_rasterInfo.frontFace);
-		_mtlFillMode = mvkMTLTriangleFillModeFromVkPolygonMode(_rasterInfo.polygonMode);
-		if (_rasterInfo.depthClampEnable) {
-			if (_device->_enabledFeatures.depthClamp) {
-				_mtlDepthClipMode = MTLDepthClipModeClamp;
-			} else {
-				setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "This device does not support depth clamping."));
-			}
-		}
-	}
+	_hasRasterInfo = mvkSetOrClear(&_rasterInfo, pCreateInfo->pRasterizationState);

 	// Must run after _isRasterizing and _dynamicState are populated
-	initCustomSamplePositions(pCreateInfo);
+	initSampleLocations(pCreateInfo);

 	// Depth stencil content - clearing will disable depth and stencil testing
 	// Must ignore allowed bad pDepthStencilState pointer if rasterization disabled or no depth or stencil attachment format
@ -557,26 +531,84 @@ MVKGraphicsPipeline::MVKGraphicsPipeline(MVKDevice* device,
 	// Viewports and scissors - must ignore allowed bad pViewportState pointer if rasterization is disabled
 	auto pVPState = _isRasterizing ? pCreateInfo->pViewportState : nullptr;
 	if (pVPState) {
-		uint32_t vpCnt = pVPState->viewportCount;
+
+		// If viewports are dynamic, ignore them here.
+		uint32_t vpCnt = (pVPState->pViewports && !isDynamicState(Viewports)) ? pVPState->viewportCount : 0;
 		_viewports.reserve(vpCnt);
 		for (uint32_t vpIdx = 0; vpIdx < vpCnt; vpIdx++) {
-			// If viewport is dyanamic, we still add a dummy so that the count will be tracked.
-			VkViewport vp;
-			if ( !supportsDynamicState(VK_DYNAMIC_STATE_VIEWPORT) ) { vp = pVPState->pViewports[vpIdx]; }
-			_viewports.push_back(vp);
+			_viewports.push_back(pVPState->pViewports[vpIdx]);
 		}

-		uint32_t sCnt = pVPState->scissorCount;
+		// If scissors are dynamic, ignore them here.
+		uint32_t sCnt = (pVPState->pScissors && !isDynamicState(Scissors)) ? pVPState->scissorCount : 0;
 		_scissors.reserve(sCnt);
 		for (uint32_t sIdx = 0; sIdx < sCnt; sIdx++) {
-			// If scissor is dyanamic, we still add a dummy so that the count will be tracked.
-			VkRect2D sc;
-			if ( !supportsDynamicState(VK_DYNAMIC_STATE_SCISSOR) ) { sc = pVPState->pScissors[sIdx]; }
-			_scissors.push_back(sc);
+			_scissors.push_back(pVPState->pScissors[sIdx]);
 		}
 	}
 }

+static MVKRenderStateType getRenderStateType(VkDynamicState vkDynamicState) {
+	switch (vkDynamicState) {
+		case VK_DYNAMIC_STATE_BLEND_CONSTANTS:             return BlendConstants;
+		case VK_DYNAMIC_STATE_CULL_MODE:                   return CullMode;
+		case VK_DYNAMIC_STATE_DEPTH_BIAS:                  return DepthBias;
+		case VK_DYNAMIC_STATE_DEPTH_BIAS_ENABLE:           return DepthBiasEnable;
+		case VK_DYNAMIC_STATE_DEPTH_BOUNDS:                return DepthBounds;
+		case VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE:    return DepthBoundsTestEnable;
+		case VK_DYNAMIC_STATE_DEPTH_CLAMP_ENABLE_EXT:      return DepthClipEnable;
+		case VK_DYNAMIC_STATE_DEPTH_CLIP_ENABLE_EXT:       return DepthClipEnable;
+		case VK_DYNAMIC_STATE_DEPTH_COMPARE_OP:            return DepthCompareOp;
+		case VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE:           return DepthTestEnable;
+		case VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE:          return DepthWriteEnable;
+		case VK_DYNAMIC_STATE_FRONT_FACE:                  return FrontFace;
+		case VK_DYNAMIC_STATE_LINE_WIDTH:                  return LineWidth;
+		case VK_DYNAMIC_STATE_LOGIC_OP_EXT:                return LogicOp;
+		case VK_DYNAMIC_STATE_LOGIC_OP_ENABLE_EXT:         return LogicOpEnable;
+		case VK_DYNAMIC_STATE_PATCH_CONTROL_POINTS_EXT:    return PatchControlPoints;
+		case VK_DYNAMIC_STATE_POLYGON_MODE_EXT:            return PolygonMode;
+		case VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE:    return PrimitiveRestartEnable;
+		case VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY:          return PrimitiveTopology;
+		case VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE:   return RasterizerDiscardEnable;
+		case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT:        return SampleLocations;
+		case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_ENABLE_EXT: return SampleLocationsEnable;
+		case VK_DYNAMIC_STATE_SCISSOR:                     return Scissors;
+		case VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT:          return Scissors;
+		case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:        return StencilCompareMask;
+		case VK_DYNAMIC_STATE_STENCIL_OP:                  return StencilOp;
+		case VK_DYNAMIC_STATE_STENCIL_REFERENCE:           return StencilReference;
+		case VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE:         return StencilTestEnable;
+		case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:          return StencilWriteMask;
+		case VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE: return VertexStride;
+		case VK_DYNAMIC_STATE_VIEWPORT:                    return Viewports;
+		case VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT:         return Viewports;
+		default:                                           return Unknown;
+	}
+}
+
+// This is executed first during pipeline creation. Do not depend on any internal state here.
+void MVKGraphicsPipeline::initDynamicState(const VkGraphicsPipelineCreateInfo* pCreateInfo) {
+	const auto* pDS = pCreateInfo->pDynamicState;
+	if ( !pDS ) { return; }
+
+	for (uint32_t i = 0; i < pDS->dynamicStateCount; i++) {
+		auto dynStateType = getRenderStateType(pDS->pDynamicStates[i]);
+		bool isDynamic = true;
+
+		// Some dynamic states have other restrictions
+		switch (dynStateType) {
+			case VertexStride:
+				isDynamic = _device->_pMetalFeatures->dynamicVertexStride;
+				if ( !isDynamic ) { setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "This device and platform does not support VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE (macOS 14.0 or iOS/tvOS 17.0, plus either Apple4 or Mac2 GPU).")); }
+				break;
+			default:
+				break;
+		}
+
+		if (isDynamic) { _dynamicState.enable(dynStateType); }
+	}
+}
+
 // Either returns an existing pipeline state or compiles a new one.
 id<MTLRenderPipelineState> MVKGraphicsPipeline::getOrCompilePipeline(MTLRenderPipelineDescriptor* plDesc,
 																	 id<MTLRenderPipelineState>& plState) {
@ -603,7 +635,7 @@ id<MTLComputePipelineState> MVKGraphicsPipeline::getOrCompilePipeline(MTLCompute
 }

 // Must run after _isRasterizing and _dynamicState are populated
-void MVKGraphicsPipeline::initCustomSamplePositions(const VkGraphicsPipelineCreateInfo* pCreateInfo) {
+void MVKGraphicsPipeline::initSampleLocations(const VkGraphicsPipelineCreateInfo* pCreateInfo) {

 	// Must ignore allowed bad pMultisampleState pointer if rasterization disabled
 	if ( !(_isRasterizing && pCreateInfo->pMultisampleState) ) { return; }
@ -612,12 +644,9 @@ void MVKGraphicsPipeline::initCustomSamplePositions(const VkGraphicsPipelineCrea
 		switch (next->sType) {
 			case VK_STRUCTURE_TYPE_PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT: {
 				auto* pSampLocnsCreateInfo = (VkPipelineSampleLocationsStateCreateInfoEXT*)next;
-				_isUsingCustomSamplePositions = pSampLocnsCreateInfo->sampleLocationsEnable;
-				if (_isUsingCustomSamplePositions && !supportsDynamicState(VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT)) {
-					for (uint32_t slIdx = 0; slIdx < pSampLocnsCreateInfo->sampleLocationsInfo.sampleLocationsCount; slIdx++) {
-						auto& sl = pSampLocnsCreateInfo->sampleLocationsInfo.pSampleLocations[slIdx];
-						_customSamplePositions.push_back(MTLSamplePositionMake(sl.x, sl.y));
-					}
+				_sampleLocationsEnable = pSampLocnsCreateInfo->sampleLocationsEnable;
+				for (uint32_t slIdx = 0; slIdx < pSampLocnsCreateInfo->sampleLocationsInfo.sampleLocationsCount; slIdx++) {
+					_sampleLocations.push_back(pSampLocnsCreateInfo->sampleLocationsInfo.pSampleLocations[slIdx]);
 				}
 				break;
 			}
@ -1311,6 +1340,10 @@ bool MVKGraphicsPipeline::addFragmentShaderToPipeline(MTLRenderPipelineDescripto
 	return true;
 }

+#if !MVK_XCODE_15
+static const NSUInteger MTLBufferLayoutStrideDynamic = NSUIntegerMax;
+#endif
+
 template<class T>
 bool MVKGraphicsPipeline::addVertexInputToPipeline(T* inputDesc,
 												   const VkPipelineVertexInputStateCreateInfo* pVI,
@ -1328,31 +1361,30 @@ bool MVKGraphicsPipeline::addVertexInputToPipeline(T* inputDesc,
    }

    // Vertex buffer bindings
-	uint32_t vbCnt = pVI->vertexBindingDescriptionCount;
+	bool isVtxStrideStatic = !isDynamicState(VertexStride);
 	uint32_t maxBinding = 0;
+	uint32_t vbCnt = pVI->vertexBindingDescriptionCount;
    for (uint32_t i = 0; i < vbCnt; i++) {
        const VkVertexInputBindingDescription* pVKVB = &pVI->pVertexBindingDescriptions[i];
        if (shaderConfig.isVertexBufferUsed(pVKVB->binding)) {

-			// Vulkan allows any stride, but Metal only allows multiples of 4.
-            // TODO: We could try to expand the buffer to the required alignment in that case.
-			VkDeviceSize mtlVtxStrideAlignment = _device->_pMetalFeatures->vertexStrideAlignment;
-            if ((pVKVB->stride % mtlVtxStrideAlignment) != 0) {
-				setConfigurationResult(reportError(VK_ERROR_INITIALIZATION_FAILED, "Under Metal, vertex attribute binding strides must be aligned to %llu bytes.", mtlVtxStrideAlignment));
+			// Vulkan allows any stride, but Metal requires multiples of 4 on older GPUs.
+            if (isVtxStrideStatic && (pVKVB->stride % _device->_pMetalFeatures->vertexStrideAlignment) != 0) {
+				setConfigurationResult(reportError(VK_ERROR_INITIALIZATION_FAILED, "Under Metal, vertex attribute binding strides must be aligned to %llu bytes.", _device->_pMetalFeatures->vertexStrideAlignment));
                return false;
            }

 			maxBinding = max(pVKVB->binding, maxBinding);
 			uint32_t vbIdx = getMetalBufferIndexForVertexAttributeBinding(pVKVB->binding);
 			auto vbDesc = inputDesc.layouts[vbIdx];
-			if (pVKVB->stride == 0) {
+			if (isVtxStrideStatic && pVKVB->stride == 0) {
 				// Stride can't be 0, it will be set later to attributes' maximum offset + size
 				// to prevent it from being larger than the underlying buffer permits.
 				vbDesc.stride = 0;
 				vbDesc.stepFunction = (decltype(vbDesc.stepFunction))MTLStepFunctionConstant;
 				vbDesc.stepRate = 0;
 			} else {
-				vbDesc.stride = pVKVB->stride;
+				vbDesc.stride = isVtxStrideStatic ? pVKVB->stride : MTLBufferLayoutStrideDynamic;
 				vbDesc.stepFunction = (decltype(vbDesc.stepFunction))mvkMTLStepFunctionFromVkVertexInputRate(pVKVB->inputRate, isTessellationPipeline());
 				vbDesc.stepRate = 1;
 			}
@ -1386,52 +1418,54 @@ bool MVKGraphicsPipeline::addVertexInputToPipeline(T* inputDesc,
 		if (shaderConfig.isShaderInputLocationUsed(pVKVA->location)) {
 			uint32_t vaBinding = pVKVA->binding;
 			uint32_t vaOffset = pVKVA->offset;
+			auto vaDesc = inputDesc.attributes[pVKVA->location];
+			auto mtlFormat = (decltype(vaDesc.format))getPixelFormats()->getMTLVertexFormat(pVKVA->format);

 			// Vulkan allows offsets to exceed the buffer stride, but Metal doesn't.
 			// If this is the case, fetch a translated artificial buffer binding, using the same MTLBuffer,
 			// but that is translated so that the reduced VA offset fits into the binding stride.
-			const VkVertexInputBindingDescription* pVKVB = pVI->pVertexBindingDescriptions;
-			uint32_t attrSize = 0;
-			for (uint32_t j = 0; j < vbCnt; j++, pVKVB++) {
-				if (pVKVB->binding == pVKVA->binding) {
-					attrSize = getPixelFormats()->getBytesPerBlock(pVKVA->format);
-					if (pVKVB->stride == 0) {
-						// The step is set to constant, but we need to change stride to be non-zero for metal.
-						// Look for the maximum offset + size to set as the stride.
-						uint32_t vbIdx = getMetalBufferIndexForVertexAttributeBinding(pVKVB->binding);
-						auto vbDesc = inputDesc.layouts[vbIdx];
-						uint32_t strideLowBound = vaOffset + attrSize;
-						if (vbDesc.stride < strideLowBound) vbDesc.stride = strideLowBound;
-					} else if (vaOffset && vaOffset + attrSize > pVKVB->stride) {
-						// Move vertex attribute offset into the stride. This vertex attribute may be
-						// combined with other vertex attributes into the same translated buffer binding.
-						// But if the reduced offset combined with the vertex attribute size still won't
-						// fit into the buffer binding stride, force the vertex attribute offset to zero,
-						// effectively dedicating this vertex attribute to its own buffer binding.
-						uint32_t origOffset = vaOffset;
-						vaOffset %= pVKVB->stride;
-						if (vaOffset + attrSize > pVKVB->stride) {
-							vaOffset = 0;
+			if (isVtxStrideStatic) {
+				const VkVertexInputBindingDescription* pVKVB = pVI->pVertexBindingDescriptions;
+				uint32_t attrSize = 0;
+				for (uint32_t j = 0; j < vbCnt; j++, pVKVB++) {
+					if (pVKVB->binding == pVKVA->binding) {
+						attrSize = getPixelFormats()->getBytesPerBlock(pVKVA->format);
+						if (pVKVB->stride == 0) {
+							// The step is set to constant, but we need to change stride to be non-zero for metal.
+							// Look for the maximum offset + size to set as the stride.
+							uint32_t vbIdx = getMetalBufferIndexForVertexAttributeBinding(pVKVB->binding);
+							auto vbDesc = inputDesc.layouts[vbIdx];
+							uint32_t strideLowBound = vaOffset + attrSize;
+							if (vbDesc.stride < strideLowBound) vbDesc.stride = strideLowBound;
+						} else if (vaOffset && vaOffset + attrSize > pVKVB->stride) {
+							// Move vertex attribute offset into the stride. This vertex attribute may be
+							// combined with other vertex attributes into the same translated buffer binding.
+							// But if the reduced offset combined with the vertex attribute size still won't
+							// fit into the buffer binding stride, force the vertex attribute offset to zero,
+							// effectively dedicating this vertex attribute to its own buffer binding.
+							uint32_t origOffset = vaOffset;
+							vaOffset %= pVKVB->stride;
+							if (vaOffset + attrSize > pVKVB->stride) {
+								vaOffset = 0;
+							}
+							vaBinding = getTranslatedVertexBinding(vaBinding, origOffset - vaOffset, maxBinding);
+							if (zeroDivisorBindings.count(pVKVB->binding)) {
+								zeroDivisorBindings.insert(vaBinding);
+							}
 						}
-						vaBinding = getTranslatedVertexBinding(vaBinding, origOffset - vaOffset, maxBinding);
-                        if (zeroDivisorBindings.count(pVKVB->binding)) {
-                            zeroDivisorBindings.insert(vaBinding);
-                        }
+						break;
 					}
-					break;
+				}
+				if (pVKVB->stride && attrSize > pVKVB->stride) {
+					/* Metal does not support overlapping loads. Truncate format vector length to prevent an assertion
+					 * and hope it's not used by the shader. */
+					MTLVertexFormat newFormat = mvkAdjustFormatVectorToSize((MTLVertexFormat)mtlFormat, pVKVB->stride);
+					reportError(VK_SUCCESS, "Found attribute with size (%u) larger than it's binding's stride (%u). Changing descriptor format from %s to %s.",
+								attrSize, pVKVB->stride, getPixelFormats()->getName((MTLVertexFormat)mtlFormat), getPixelFormats()->getName(newFormat));
+					mtlFormat = (decltype(vaDesc.format))newFormat;
 				}
 			}

-			auto vaDesc = inputDesc.attributes[pVKVA->location];
-			auto mtlFormat = (decltype(vaDesc.format))getPixelFormats()->getMTLVertexFormat(pVKVA->format);
-			if (pVKVB->stride && attrSize > pVKVB->stride) {
-				/* Metal does not support overlapping loads. Truncate format vector length to prevent an assertion
-				 * and hope it's not used by the shader. */
-				MTLVertexFormat newFormat = mvkAdjustFormatVectorToSize((MTLVertexFormat)mtlFormat, pVKVB->stride);
-				reportError(VK_SUCCESS, "Found attribute with size (%u) larger than it's binding's stride (%u). Changing descriptor format from %s to %s.",
-					attrSize, pVKVB->stride, getPixelFormats()->getName((MTLVertexFormat)mtlFormat), getPixelFormats()->getName(newFormat));
-				mtlFormat = (decltype(vaDesc.format))newFormat;
-			}
 			vaDesc.format = mtlFormat;
 			vaDesc.bufferIndex = (decltype(vaDesc.bufferIndex))getMetalBufferIndexForVertexAttributeBinding(vaBinding);
 			vaDesc.offset = vaOffset;
@ -1607,7 +1641,7 @@ void MVKGraphicsPipeline::addFragmentOutputToPipeline(MTLRenderPipelineDescripto

    // Multisampling - must ignore allowed bad pMultisampleState pointer if rasterization disabled
    if (_isRasterizing && pCreateInfo->pMultisampleState) {
-        plDesc.sampleCount = mvkSampleCountFromVkSampleCountFlagBits(pCreateInfo->pMultisampleState->rasterizationSamples);
+        plDesc.rasterSampleCount = mvkSampleCountFromVkSampleCountFlagBits(pCreateInfo->pMultisampleState->rasterizationSamples);
        plDesc.alphaToCoverageEnabled = pCreateInfo->pMultisampleState->alphaToCoverageEnable;
        plDesc.alphaToOneEnabled = pCreateInfo->pMultisampleState->alphaToOneEnable;

@ -1646,6 +1680,7 @@ void MVKGraphicsPipeline::initShaderConversionConfig(SPIRVToMSLConversionConfigu
 	shaderConfig.options.mslOptions.argument_buffers = useMetalArgBuff;
 	shaderConfig.options.mslOptions.force_active_argument_buffer_resources = useMetalArgBuff;
 	shaderConfig.options.mslOptions.pad_argument_buffer_resources = useMetalArgBuff;
+	shaderConfig.options.mslOptions.agx_manual_cube_grad_fixup = _device->_pMetalFeatures->needsCubeGradWorkaround;

    MVKPipelineLayout* layout = (MVKPipelineLayout*)pCreateInfo->layout;
    layout->populateShaderConversionConfig(shaderConfig);
@ -1701,6 +1736,7 @@ void MVKGraphicsPipeline::initShaderConversionConfig(SPIRVToMSLConversionConfigu
    shaderConfig.options.mslOptions.multiview = mvkIsMultiview(pRendInfo->viewMask);
    shaderConfig.options.mslOptions.multiview_layered_rendering = getPhysicalDevice()->canUseInstancingForMultiview();
    shaderConfig.options.mslOptions.view_index_from_device_index = mvkAreAllFlagsEnabled(pCreateInfo->flags, VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT);
+	shaderConfig.options.mslOptions.replace_recursive_inputs = mvkOSVersionIsAtLeast(14.0, 17.0, 1.0);
 #if MVK_MACOS
    shaderConfig.options.mslOptions.emulate_subgroups = !_device->_pMetalFeatures->simdPermute;
 #endif
@ -1897,17 +1933,22 @@ void MVKGraphicsPipeline::addPrevStageOutputToShaderConversionConfig(SPIRVToMSLC
    }
 }

-// We render points if either the topology or polygon fill mode dictate it
+// We render points if either the static topology or static polygon fill mode dictate it
 bool MVKGraphicsPipeline::isRenderingPoints(const VkGraphicsPipelineCreateInfo* pCreateInfo) {
-	return ((pCreateInfo->pInputAssemblyState && (pCreateInfo->pInputAssemblyState->topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST)) ||
-			(pCreateInfo->pRasterizationState && (pCreateInfo->pRasterizationState->polygonMode == VK_POLYGON_MODE_POINT)));
+	return ((pCreateInfo->pInputAssemblyState && 
+			 (pCreateInfo->pInputAssemblyState->topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST) &&
+			 !isDynamicState(PrimitiveTopology)) ||
+			(pCreateInfo->pRasterizationState && 
+			 (pCreateInfo->pRasterizationState->polygonMode == VK_POLYGON_MODE_POINT) &&
+			 !isDynamicState(PolygonMode)));
 }

-// We disable rasterization if either rasterizerDiscard is enabled or the cull mode dictates it.
+// We disable rasterization if either static rasterizerDiscard is enabled or the static cull mode dictates it.
 bool MVKGraphicsPipeline::isRasterizationDisabled(const VkGraphicsPipelineCreateInfo* pCreateInfo) {
 	return (pCreateInfo->pRasterizationState &&
-			(pCreateInfo->pRasterizationState->rasterizerDiscardEnable ||
-			 ((pCreateInfo->pRasterizationState->cullMode == VK_CULL_MODE_FRONT_AND_BACK) && pCreateInfo->pInputAssemblyState &&
+			((pCreateInfo->pRasterizationState->rasterizerDiscardEnable && !isDynamicState(RasterizerDiscardEnable)) ||
+			 ((pCreateInfo->pRasterizationState->cullMode == VK_CULL_MODE_FRONT_AND_BACK) && !isDynamicState(CullMode) &&
+			  pCreateInfo->pInputAssemblyState &&
 			  (mvkMTLPrimitiveTopologyClassFromVkPrimitiveTopology(pCreateInfo->pInputAssemblyState->topology) == MTLPrimitiveTopologyClassTriangle))));
 }

@ -2105,6 +2146,7 @@ MVKMTLFunction MVKComputePipeline::getMTLFunction(const VkComputePipelineCreateI
    shaderConfig.options.mslOptions.buffer_size_buffer_index = _bufferSizeBufferIndex.stages[kMVKShaderStageCompute];
 	shaderConfig.options.mslOptions.dynamic_offsets_buffer_index = _dynamicOffsetBufferIndex.stages[kMVKShaderStageCompute];
    shaderConfig.options.mslOptions.indirect_params_buffer_index = _indirectParamsIndex.stages[kMVKShaderStageCompute];
+	shaderConfig.options.mslOptions.replace_recursive_inputs = mvkOSVersionIsAtLeast(14.0, 17.0, 1.0);

    MVKMTLFunction func = ((MVKShaderModule*)pSS->module)->getMTLFunction(&shaderConfig, pSS->pSpecializationInfo, this, pStageFB);
 	if ( !func.getMTLFunction() ) {
@ -2269,7 +2311,7 @@ VkResult MVKPipelineCache::writeDataImpl(size_t* pDataSize, void* pData) {
 // Serializes the data in this cache to a stream
 void MVKPipelineCache::writeData(ostream& outstream, bool isCounting) {
 #if MVK_USE_CEREAL
-	MVKPerformanceTracker& activityTracker = isCounting
+	MVKPerformanceTracker& perfTracker = isCounting
 		? _device->_performanceStatistics.pipelineCache.sizePipelineCache
 		: _device->_performanceStatistics.pipelineCache.writePipelineCache;

@ -2297,7 +2339,7 @@ void MVKPipelineCache::writeData(ostream& outstream, bool isCounting) {
 			writer(cacheIter.getShaderConversionConfig());
 			writer(cacheIter.getShaderConversionResultInfo());
 			writer(cacheIter.getCompressedMSL());
-			_device->addActivityPerformance(activityTracker, startTime);
+			_device->addPerformanceInterval(perfTracker, startTime);
 		}
 	}

@ -2366,7 +2408,7 @@ void MVKPipelineCache::readData(const VkPipelineCacheCreateInfo* pCreateInfo) {

 					// Add the shader library to the staging cache.
 					MVKShaderLibraryCache* slCache = getShaderLibraryCache(smKey);
-					_device->addActivityPerformance(_device->_performanceStatistics.pipelineCache.readPipelineCache, startTime);
+					_device->addPerformanceInterval(_device->_performanceStatistics.pipelineCache.readPipelineCache, startTime);
 					slCache->addShaderLibrary(&shaderConversionConfig, resultInfo, compressedMSL);

 					break;
@ -2475,7 +2517,9 @@ namespace SPIRV_CROSS_NAMESPACE {
 				opt.force_sample_rate_shading,
 				opt.manual_helper_invocation_updates,
 				opt.check_discarded_frag_stores,
-				opt.sample_dref_lod_array_as_grad);
+				opt.sample_dref_lod_array_as_grad,
+				opt.replace_recursive_inputs,
+				opt.agx_manual_cube_grad_fixup);
 	}

 	template<class Archive>
--- a/MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.h
@ -20,7 +20,7 @@

 #include "MVKBaseObject.h"
 #include "MVKOSExtensions.h"
-#include "mvk_datatypes.h"
+#include "mvk_datatypes.hpp"
 #include <spirv_msl.hpp>
 #include <unordered_map>

--- a/MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.mm
@ -529,6 +529,7 @@ MTLClearColor MVKPixelFormats::getMTLClearColor(VkClearValue vkClearValue, VkFor
 #define OFFSET_SNORM(COLOR, BIT_WIDTH)    OFFSET_NORM(-1.0, COLOR, BIT_WIDTH - 1)
 				switch (vkFormat) {
 					case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
+					case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
 					case VK_FORMAT_A4R4G4B4_UNORM_PACK16:
 					case VK_FORMAT_A4B4G4R4_UNORM_PACK16:
 						OFFSET_UNORM(red, 4)
@ -831,7 +832,7 @@ void MVKPixelFormats::initVkFormatCapabilities() {

 	addVkFormatDesc( R4G4_UNORM_PACK8, Invalid, Invalid, Invalid, Invalid, 1, 1, 1, ColorFloat );
 	addVkFormatDesc( R4G4B4A4_UNORM_PACK16, ABGR4Unorm, Invalid, Invalid, Invalid, 1, 1, 2, ColorFloat );
-	addVkFormatDesc( B4G4R4A4_UNORM_PACK16, Invalid, Invalid, Invalid, Invalid, 1, 1, 2, ColorFloat );
+	addVkFormatDescSwizzled( B4G4R4A4_UNORM_PACK16, ABGR4Unorm, Invalid, Invalid, Invalid, 1, 1, 2, ColorFloat, B, G, R, A );
 	addVkFormatDescSwizzled( A4R4G4B4_UNORM_PACK16, ABGR4Unorm, Invalid, Invalid, Invalid, 1, 1, 2, ColorFloat, G, B, A, R );
 	addVkFormatDescSwizzled( A4B4G4R4_UNORM_PACK16, ABGR4Unorm, Invalid, Invalid, Invalid, 1, 1, 2, ColorFloat, A, B, G, R );

@ -1482,26 +1483,21 @@ void MVKPixelFormats::addMTLVertexFormatCapabilities(id<MTLDevice> mtlDevice,
 	}
 }

-// If supporting a physical device, retrieve the MTLDevice from it,
-// otherwise create a temp copy of the system default MTLDevice.
+// If supporting a physical device, retrieve the MTLDevice from it, otherwise
+// retrieve the array of physical GPU devices, and use the first one.
+// Retrieving the GPUs creates a number of autoreleased instances of Metal
+// and other Obj-C classes, so wrap it all in an autorelease pool.
 void MVKPixelFormats::modifyMTLFormatCapabilities() {
 	if (_physicalDevice) {
 		modifyMTLFormatCapabilities(_physicalDevice->getMTLDevice());
 	} else {
-#if MVK_IOS_OR_TVOS
-		id<MTLDevice> mtlDevice = MTLCreateSystemDefaultDevice();	// temp retained
-#endif
-#if MVK_MACOS
-		NSArray<id<MTLDevice>>* mtlDevices = MTLCopyAllDevices();	// temp retained
-		id<MTLDevice> mtlDevice = [mtlDevices count] > 0 ? [mtlDevices[0] retain] : MTLCreateSystemDefaultDevice();			// temp retained
-		[mtlDevices release];										// temp release
-#endif
-		modifyMTLFormatCapabilities(mtlDevice);
-		[mtlDevice release];										// release temp instance
+		@autoreleasepool {
+			auto* mtlDevs = mvkGetAvailableMTLDevicesArray(nullptr);
+			if (mtlDevs.count) { modifyMTLFormatCapabilities(mtlDevs[0]); }
+		}
 	}
 }

-
 // Mac Catalyst does not support feature sets, so we redefine them to GPU families in MVKDevice.h.
 #if MVK_MACCAT
 #define addFeatSetMTLPixFmtCaps(FEAT_SET, MTL_FMT, CAPS)  \
--- a/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.h
@ -56,7 +56,7 @@ public:
    virtual void endQuery(uint32_t query, MVKCommandEncoder* cmdEncoder);

    /** Finishes the specified queries and marks them as available. */
-    virtual void finishQueries(const MVKArrayRef<uint32_t> queries);
+    virtual void finishQueries(MVKArrayRef<const uint32_t> queries);

 	/** Resets the results and availability status of the specified queries. */
 	virtual void resetResults(uint32_t firstQuery, uint32_t queryCount, MVKCommandEncoder* cmdEncoder);
@ -212,7 +212,7 @@ class MVKTimestampQueryPool : public MVKGPUCounterQueryPool {

 public:
 	void endQuery(uint32_t query, MVKCommandEncoder* cmdEncoder) override;
-	void finishQueries(const MVKArrayRef<uint32_t> queries) override;
+	void finishQueries(MVKArrayRef<const uint32_t> queries) override;

 #pragma mark Construction

--- a/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm
@ -52,7 +52,7 @@ void MVKQueryPool::endQuery(uint32_t query, MVKCommandEncoder* cmdEncoder) {
 }

 // Mark queries as available
-void MVKQueryPool::finishQueries(const MVKArrayRef<uint32_t> queries) {
+void MVKQueryPool::finishQueries(MVKArrayRef<const uint32_t> queries) {
    lock_guard<mutex> lock(_availabilityLock);
    for (uint32_t qry : queries) {
        if (_availability[qry] == DeviceAvailable) {
@ -379,9 +379,9 @@ void MVKTimestampQueryPool::endQuery(uint32_t query, MVKCommandEncoder* cmdEncod
 }

 // If not using MTLCounterSampleBuffer, update timestamp values, then mark queries as available
-void MVKTimestampQueryPool::finishQueries(const MVKArrayRef<uint32_t> queries) {
+void MVKTimestampQueryPool::finishQueries(MVKArrayRef<const uint32_t> queries) {
 	if ( !_mtlCounterBuffer ) {
-		uint64_t ts = mvkGetTimestamp();
+		uint64_t ts = mvkGetElapsedNanoseconds();
 		for (uint32_t qry : queries) { _timestamps[qry] = ts; }
 	}
 	MVKQueryPool::finishQueries(queries);
--- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h
@ -86,10 +86,14 @@ public:
 	/** Returns a pointer to the Vulkan instance. */
 	MVKInstance* getInstance() override { return _device->getInstance(); }

+	/** Return the name of this queue. */
+	const std::string& getName() { return _name; }
+
 #pragma mark Queue submissions

 	/** Submits the specified command buffers to the queue. */
-	VkResult submit(uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence, MVKCommandUse cmdUse);
+	template <typename S>
+	VkResult submit(uint32_t submitCount, const S* pSubmits, VkFence fence, MVKCommandUse cmdUse);

 	/** Submits the specified presentation command to the queue. */
 	VkResult submit(const VkPresentInfoKHR* pPresentInfo);
@ -97,10 +101,6 @@ public:
 	/** Block the current thread until this queue is idle. */
 	VkResult waitIdle(MVKCommandUse cmdUse);

-	/** Return the name of this queue. */
-	const std::string& getName() { return _name; }
-
-
 #pragma mark Metal

 	/** Returns the Metal queue underlying this queue. */
@ -135,36 +135,53 @@ protected:
 	friend class MVKQueueCommandBufferSubmission;
 	friend class MVKQueuePresentSurfaceSubmission;

-	MVKBaseObject* getBaseObject() override { return this; };
 	void propagateDebugName() override;
 	void initName();
 	void initExecQueue();
 	void initMTLCommandQueue();
-	void initGPUCaptureScopes();
 	void destroyExecQueue();
 	VkResult submit(MVKQueueSubmission* qSubmit);
 	NSString* getMTLCommandBufferLabel(MVKCommandUse cmdUse);
+	void handleMTLCommandBufferError(id<MTLCommandBuffer> mtlCmdBuff);

 	MVKQueueFamily* _queueFamily;
-	uint32_t _index;
-	float _priority;
-	dispatch_queue_t _execQueue;
-	id<MTLCommandQueue> _mtlQueue;
 	std::string _name;
-	NSString* _mtlCmdBuffLabelEndCommandBuffer;
-	NSString* _mtlCmdBuffLabelQueueSubmit;
-	NSString* _mtlCmdBuffLabelQueuePresent;
-	NSString* _mtlCmdBuffLabelDeviceWaitIdle;
-	NSString* _mtlCmdBuffLabelQueueWaitIdle;
-	NSString* _mtlCmdBuffLabelAcquireNextImage;
-	NSString* _mtlCmdBuffLabelInvalidateMappedMemoryRanges;
-	MVKGPUCaptureScope* _submissionCaptureScope;
+	dispatch_queue_t _execQueue;
+	id<MTLCommandQueue> _mtlQueue = nil;
+	NSString* _mtlCmdBuffLabelBeginCommandBuffer = nil;
+	NSString* _mtlCmdBuffLabelQueueSubmit = nil;
+	NSString* _mtlCmdBuffLabelQueuePresent = nil;
+	NSString* _mtlCmdBuffLabelDeviceWaitIdle = nil;
+	NSString* _mtlCmdBuffLabelQueueWaitIdle = nil;
+	NSString* _mtlCmdBuffLabelAcquireNextImage = nil;
+	NSString* _mtlCmdBuffLabelInvalidateMappedMemoryRanges = nil;
+	MVKGPUCaptureScope* _submissionCaptureScope = nil;
+	float _priority;
+	uint32_t _index;
 };


 #pragma mark -
 #pragma mark MVKQueueSubmission

+typedef struct MVKSemaphoreSubmitInfo {
+private:
+	MVKSemaphore* _semaphore;
+public:
+	uint64_t value;
+	VkPipelineStageFlags2 stageMask;
+	uint32_t deviceIndex;
+
+	void encodeWait(id<MTLCommandBuffer> mtlCmdBuff);
+	void encodeSignal(id<MTLCommandBuffer> mtlCmdBuff);
+	MVKSemaphoreSubmitInfo(const VkSemaphoreSubmitInfo& semaphoreSubmitInfo);
+	MVKSemaphoreSubmitInfo(const VkSemaphore semaphore, VkPipelineStageFlags stageMask);
+	MVKSemaphoreSubmitInfo(const MVKSemaphoreSubmitInfo& other);
+	MVKSemaphoreSubmitInfo& operator=(const MVKSemaphoreSubmitInfo& other);
+	~MVKSemaphoreSubmitInfo();
+
+} MVKSemaphoreSubmitInfo;
+
 /** This is an abstract class for an operation that can be submitted to an MVKQueue. */
 class MVKQueueSubmission : public MVKBaseObject, public MVKConfigurableMixin {

@ -178,11 +195,16 @@ public:
 	 *
 	 * Upon completion of this function, no further calls should be made to this instance.
 	 */
-	virtual void execute() = 0;
+	virtual VkResult execute() = 0;
+
+	MVKQueueSubmission(MVKQueue* queue,
+					   uint32_t waitSemaphoreInfoCount,
+					   const VkSemaphoreSubmitInfo* pWaitSemaphoreSubmitInfos);

 	MVKQueueSubmission(MVKQueue* queue,
 					   uint32_t waitSemaphoreCount,
-					   const VkSemaphore* pWaitSemaphores);
+					   const VkSemaphore* pWaitSemaphores,
+					   const VkPipelineStageFlags* pWaitDstStageMask);

 	~MVKQueueSubmission() override;

@ -190,15 +212,25 @@ protected:
 	friend class MVKQueue;

 	virtual void finish() = 0;
+	MVKDevice* getDevice() { return _queue->getDevice(); }

 	MVKQueue* _queue;
-	MVKSmallVector<std::pair<MVKSemaphore*, uint64_t>> _waitSemaphores;
+	MVKSmallVector<MVKSemaphoreSubmitInfo> _waitSemaphores;
 };


 #pragma mark -
 #pragma mark MVKQueueCommandBufferSubmission

+typedef struct MVKCommandBufferSubmitInfo {
+	MVKCommandBuffer* commandBuffer;
+	uint32_t deviceMask;
+
+	MVKCommandBufferSubmitInfo(const VkCommandBufferSubmitInfo& commandBufferInfo);
+	MVKCommandBufferSubmitInfo(VkCommandBuffer commandBuffer);
+
+} MVKCommandBufferSubmitInfo;
+
 /**
 * Submits an empty set of command buffers to the queue.
 * Used for fence-only command submissions.
@ -206,9 +238,17 @@ protected:
 class MVKQueueCommandBufferSubmission : public MVKQueueSubmission {

 public:
-	void execute() override;
+	VkResult execute() override;

-	MVKQueueCommandBufferSubmission(MVKQueue* queue, const VkSubmitInfo* pSubmit, VkFence fence, MVKCommandUse cmdUse);
+	MVKQueueCommandBufferSubmission(MVKQueue* queue, 
+									const VkSubmitInfo2* pSubmit,
+									VkFence fence, 
+									MVKCommandUse cmdUse);
+
+	MVKQueueCommandBufferSubmission(MVKQueue* queue, 
+									const VkSubmitInfo* pSubmit,
+									VkFence fence,
+									MVKCommandUse cmdUse);

 	~MVKQueueCommandBufferSubmission() override;

@ -217,16 +257,16 @@ protected:

 	id<MTLCommandBuffer> getActiveMTLCommandBuffer();
 	void setActiveMTLCommandBuffer(id<MTLCommandBuffer> mtlCmdBuff);
-	void commitActiveMTLCommandBuffer(bool signalCompletion = false);
+	VkResult commitActiveMTLCommandBuffer(bool signalCompletion = false);
 	void finish() override;
 	virtual void submitCommandBuffers() {}

 	MVKCommandEncodingContext _encodingContext;
-	MVKSmallVector<std::pair<MVKSemaphore*, uint64_t>> _signalSemaphores;
-	MVKFence* _fence;
-	id<MTLCommandBuffer> _activeMTLCommandBuffer;
-	MVKCommandUse _commandUse;
-	bool _emulatedWaitDone; //Used to track if we've already waited for emulated semaphores.
+	MVKSmallVector<MVKSemaphoreSubmitInfo> _signalSemaphores;
+	MVKFence* _fence = nullptr;
+	id<MTLCommandBuffer> _activeMTLCommandBuffer = nil;
+	MVKCommandUse _commandUse = kMVKCommandUseNone;
+	bool _emulatedWaitDone = false;		//Used to track if we've already waited for emulated semaphores.
 };


@ -238,25 +278,20 @@ template <size_t N>
 class MVKQueueFullCommandBufferSubmission : public MVKQueueCommandBufferSubmission {

 public:
-	MVKQueueFullCommandBufferSubmission(MVKQueue* queue, const VkSubmitInfo* pSubmit, VkFence fence) :
-		MVKQueueCommandBufferSubmission(queue, pSubmit, fence, kMVKCommandUseQueueSubmit) {
+	MVKQueueFullCommandBufferSubmission(MVKQueue* queue, 
+										const VkSubmitInfo2* pSubmit,
+										VkFence fence,
+										MVKCommandUse cmdUse);

-			// pSubmit can be null if just tracking the fence alone
-			if (pSubmit) {
-				uint32_t cbCnt = pSubmit->commandBufferCount;
-				_cmdBuffers.reserve(cbCnt);
-				for (uint32_t i = 0; i < cbCnt; i++) {
-					MVKCommandBuffer* cb = MVKCommandBuffer::getMVKCommandBuffer(pSubmit->pCommandBuffers[i]);
-					_cmdBuffers.push_back(cb);
-					setConfigurationResult(cb->getConfigurationResult());
-				}
-			}
-		}
+	MVKQueueFullCommandBufferSubmission(MVKQueue* queue, 
+										const VkSubmitInfo* pSubmit,
+										VkFence fence,
+										MVKCommandUse cmdUse);

 protected:
 	void submitCommandBuffers() override;

-	MVKSmallVector<MVKCommandBuffer*, N> _cmdBuffers;
+	MVKSmallVector<MVKCommandBufferSubmitInfo, N> _cmdBuffers;
 };


@ -267,7 +302,7 @@ protected:
 class MVKQueuePresentSurfaceSubmission : public MVKQueueSubmission {

 public:
-	void execute() override;
+	VkResult execute() override;

 	MVKQueuePresentSurfaceSubmission(MVKQueue* queue,
 									 const VkPresentInfoKHR* pPresentInfo);
--- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm
@ -18,6 +18,7 @@

 #include "MVKInstance.h"
 #include "MVKQueue.h"
+#include "MVKSurface.h"
 #include "MVKSwapchain.h"
 #include "MVKSync.h"
 #include "MVKFoundation.h"
@ -68,7 +69,7 @@ void MVKQueue::propagateDebugName() { setLabelIfNotNil(_mtlQueue, _debugName); }

 // Execute the queue submission under an autoreleasepool to ensure transient Metal objects are autoreleased.
 // This is critical for apps that don't use standard OS autoreleasing runloop threading.
-static inline void execute(MVKQueueSubmission* qSubmit) { @autoreleasepool { qSubmit->execute(); } }
+static inline VkResult execute(MVKQueueSubmission* qSubmit) { @autoreleasepool { return qSubmit->execute(); } }

 // Executes the submmission, either immediately, or by dispatching to an execution queue.
 // Submissions to the execution queue are wrapped in a dedicated autoreleasepool.
@ -79,43 +80,50 @@ VkResult MVKQueue::submit(MVKQueueSubmission* qSubmit) {

 	if ( !qSubmit ) { return VK_SUCCESS; }     // Ignore nils

-	VkResult rslt = qSubmit->getConfigurationResult();     // Extract result before submission to avoid race condition with early destruction
+	// Extract result before submission to avoid race condition with early destruction
+	// Submit regardless of config result, to ensure submission semaphores and fences are signalled.
+	// The submissions will ensure a misconfiguration will be safe to execute.
+	VkResult rslt = qSubmit->getConfigurationResult();
 	if (_execQueue) {
 		dispatch_async(_execQueue, ^{ execute(qSubmit); } );
 	} else {
-		execute(qSubmit);
+		rslt = execute(qSubmit);
 	}
 	return rslt;
 }

-VkResult MVKQueue::submit(uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence, MVKCommandUse cmdUse) {
+static inline uint32_t getCommandBufferCount(const VkSubmitInfo2* pSubmitInfo) { return pSubmitInfo->commandBufferInfoCount; }
+static inline uint32_t getCommandBufferCount(const VkSubmitInfo* pSubmitInfo) { return pSubmitInfo->commandBufferCount; }
+
+template <typename S>
+VkResult MVKQueue::submit(uint32_t submitCount, const S* pSubmits, VkFence fence, MVKCommandUse cmdUse) {

    // Fence-only submission
    if (submitCount == 0 && fence) {
-        return submit(new MVKQueueCommandBufferSubmission(this, nullptr, fence, cmdUse));
+        return submit(new MVKQueueCommandBufferSubmission(this, (S*)nullptr, fence, cmdUse));
    }

    VkResult rslt = VK_SUCCESS;
    for (uint32_t sIdx = 0; sIdx < submitCount; sIdx++) {
        VkFence fenceOrNil = (sIdx == (submitCount - 1)) ? fence : VK_NULL_HANDLE; // last one gets the fence

-		const VkSubmitInfo* pVkSub = &pSubmits[sIdx];
+		const S* pVkSub = &pSubmits[sIdx];
 		MVKQueueCommandBufferSubmission* mvkSub;
-		uint32_t cbCnt = pVkSub->commandBufferCount;
+		uint32_t cbCnt = getCommandBufferCount(pVkSub);
 		if (cbCnt <= 1) {
-			mvkSub = new MVKQueueFullCommandBufferSubmission<1>(this, pVkSub, fenceOrNil);
+			mvkSub = new MVKQueueFullCommandBufferSubmission<1>(this, pVkSub, fenceOrNil, cmdUse);
 		} else if (cbCnt <= 16) {
-			mvkSub = new MVKQueueFullCommandBufferSubmission<16>(this, pVkSub, fenceOrNil);
+			mvkSub = new MVKQueueFullCommandBufferSubmission<16>(this, pVkSub, fenceOrNil, cmdUse);
 		} else if (cbCnt <= 32) {
-			mvkSub = new MVKQueueFullCommandBufferSubmission<32>(this, pVkSub, fenceOrNil);
+			mvkSub = new MVKQueueFullCommandBufferSubmission<32>(this, pVkSub, fenceOrNil, cmdUse);
 		} else if (cbCnt <= 64) {
-			mvkSub = new MVKQueueFullCommandBufferSubmission<64>(this, pVkSub, fenceOrNil);
+			mvkSub = new MVKQueueFullCommandBufferSubmission<64>(this, pVkSub, fenceOrNil, cmdUse);
 		} else if (cbCnt <= 128) {
-			mvkSub = new MVKQueueFullCommandBufferSubmission<128>(this, pVkSub, fenceOrNil);
+			mvkSub = new MVKQueueFullCommandBufferSubmission<128>(this, pVkSub, fenceOrNil, cmdUse);
 		} else if (cbCnt <= 256) {
-			mvkSub = new MVKQueueFullCommandBufferSubmission<256>(this, pVkSub, fenceOrNil);
+			mvkSub = new MVKQueueFullCommandBufferSubmission<256>(this, pVkSub, fenceOrNil, cmdUse);
 		} else {
-			mvkSub = new MVKQueueFullCommandBufferSubmission<512>(this, pVkSub, fenceOrNil);
+			mvkSub = new MVKQueueFullCommandBufferSubmission<512>(this, pVkSub, fenceOrNil, cmdUse);
 		}

        VkResult subRslt = submit(mvkSub);
@ -124,33 +132,30 @@ VkResult MVKQueue::submit(uint32_t submitCount, const VkSubmitInfo* pSubmits, Vk
    return rslt;
 }

+// Concrete implementations of templated MVKQueue::submit().
+template VkResult MVKQueue::submit(uint32_t submitCount, const VkSubmitInfo2* pSubmits, VkFence fence, MVKCommandUse cmdUse);
+template VkResult MVKQueue::submit(uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence, MVKCommandUse cmdUse);
+
 VkResult MVKQueue::submit(const VkPresentInfoKHR* pPresentInfo) {
 	return submit(new MVKQueuePresentSurfaceSubmission(this, pPresentInfo));
 }

-// Create an empty submit struct and fence, submit to queue and wait on fence.
 VkResult MVKQueue::waitIdle(MVKCommandUse cmdUse) {

-	if (_device->getConfigurationResult() != VK_SUCCESS) { return _device->getConfigurationResult(); }
+	VkResult rslt = _device->getConfigurationResult();
+	if (rslt != VK_SUCCESS) { return rslt; }

-	VkFenceCreateInfo vkFenceInfo = {
-		.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
-		.pNext = nullptr,
-		.flags = 0,
-	};
+	auto* mtlCmdBuff = getMTLCommandBuffer(cmdUse);
+	[mtlCmdBuff commit];
+	[mtlCmdBuff waitUntilCompleted];

-	// The MVKFence is retained by the command submission, and may outlive this function while
-	// the command submission finishes, so we can't allocate MVKFence locally on the stack.
-	MVKFence* mvkFence = new MVKFence(_device, &vkFenceInfo);
-	VkFence vkFence = (VkFence)mvkFence;
-	submit(0, nullptr, vkFence, cmdUse);
-	VkResult rslt = mvkWaitForFences(_device, 1, &vkFence, false);
-	mvkFence->destroy();
-	return rslt;
+	return VK_SUCCESS;
 }

 id<MTLCommandBuffer> MVKQueue::getMTLCommandBuffer(MVKCommandUse cmdUse, bool retainRefs) {
 	id<MTLCommandBuffer> mtlCmdBuff = nil;
+	MVKDevice* mvkDev = getDevice();
+	uint64_t startTime = mvkDev->getPerformanceTimestamp();
 #if MVK_XCODE_12
 	if ([_mtlQueue respondsToSelector: @selector(commandBufferWithDescriptor:)]) {
 		MTLCommandBufferDescriptor* mtlCmdBuffDesc = [MTLCommandBufferDescriptor new];	// temp retain
@ -167,53 +172,126 @@ id<MTLCommandBuffer> MVKQueue::getMTLCommandBuffer(MVKCommandUse cmdUse, bool re
 	} else {
 		mtlCmdBuff = [_mtlQueue commandBufferWithUnretainedReferences];
 	}
-	setLabelIfNotNil(mtlCmdBuff, getMTLCommandBufferLabel(cmdUse));
+	mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.queue.retrieveMTLCommandBuffer, startTime);
+	NSString* mtlCmdBuffLabel = getMTLCommandBufferLabel(cmdUse);
+	setLabelIfNotNil(mtlCmdBuff, mtlCmdBuffLabel);
+	[mtlCmdBuff addCompletedHandler: ^(id<MTLCommandBuffer> mtlCB) { handleMTLCommandBufferError(mtlCB); }];
+
+	if ( !mtlCmdBuff ) { reportError(VK_ERROR_OUT_OF_POOL_MEMORY, "%s could not be acquired.", mtlCmdBuffLabel.UTF8String); }
 	return mtlCmdBuff;
 }

 NSString* MVKQueue::getMTLCommandBufferLabel(MVKCommandUse cmdUse) {
-#define CASE_GET_LABEL(cmdUse)  \
-	case kMVKCommandUse ##cmdUse:  \
-		if ( !_mtlCmdBuffLabel ##cmdUse ) { _mtlCmdBuffLabel ##cmdUse = [[NSString stringWithFormat: @"%@ on Queue %d-%d", mvkMTLCommandBufferLabel(kMVKCommandUse ##cmdUse), _queueFamily->getIndex(), _index] retain]; }  \
-		return _mtlCmdBuffLabel ##cmdUse
+#define CASE_GET_LABEL(cu)  \
+	case kMVKCommandUse ##cu:  \
+		if ( !_mtlCmdBuffLabel ##cu ) { _mtlCmdBuffLabel ##cu = [[NSString stringWithFormat: @"%s MTLCommandBuffer on Queue %d-%d", mvkVkCommandName(kMVKCommandUse ##cu), _queueFamily->getIndex(), _index] retain]; }  \
+		return _mtlCmdBuffLabel ##cu

 	switch (cmdUse) {
-		CASE_GET_LABEL(EndCommandBuffer);
+		CASE_GET_LABEL(BeginCommandBuffer);
 		CASE_GET_LABEL(QueueSubmit);
 		CASE_GET_LABEL(QueuePresent);
 		CASE_GET_LABEL(QueueWaitIdle);
 		CASE_GET_LABEL(DeviceWaitIdle);
 		CASE_GET_LABEL(AcquireNextImage);
 		CASE_GET_LABEL(InvalidateMappedMemoryRanges);
-		default: return mvkMTLCommandBufferLabel(cmdUse);
+		default:
+			MVKAssert(false, "Uncached MTLCommandBuffer label for command use %s.", mvkVkCommandName(cmdUse));
+			return [NSString stringWithFormat: @"%s MTLCommandBuffer on Queue %d-%d", mvkVkCommandName(cmdUse), _queueFamily->getIndex(), _index];
 	}
 #undef CASE_GET_LABEL
 }

+#if MVK_XCODE_12
+static const char* mvkStringFromMTLCommandEncoderErrorState(MTLCommandEncoderErrorState errState) {
+	switch (errState) {
+		case MTLCommandEncoderErrorStateUnknown:   return "unknown";
+		case MTLCommandEncoderErrorStateAffected:  return "affected";
+		case MTLCommandEncoderErrorStateCompleted: return "completed";
+		case MTLCommandEncoderErrorStateFaulted:   return "faulted";
+		case MTLCommandEncoderErrorStatePending:   return "pending";
+	}
+	return "unknown";
+}
+#endif
+
+void MVKQueue::handleMTLCommandBufferError(id<MTLCommandBuffer> mtlCmdBuff) {
+	if (mtlCmdBuff.status != MTLCommandBufferStatusError) { return; }
+
+	// If a command buffer error has occurred, report the error. If the error affects
+	// the physical device, always mark both the device and physical device as lost.
+	// If the error is local to this command buffer, optionally mark the device (but not the
+	// physical device) as lost, depending on the value of MVKConfiguration::resumeLostDevice.
+	VkResult vkErr = VK_ERROR_UNKNOWN;
+	bool markDeviceLoss = !getMVKConfig().resumeLostDevice;
+	bool markPhysicalDeviceLoss = false;
+	switch (mtlCmdBuff.error.code) {
+		case MTLCommandBufferErrorBlacklisted:
+		case MTLCommandBufferErrorNotPermitted:	// May also be used for command buffers executed in the background without the right entitlement.
+#if MVK_MACOS && !MVK_MACCAT
+		case MTLCommandBufferErrorDeviceRemoved:
+#endif
+			vkErr = VK_ERROR_DEVICE_LOST;
+			markDeviceLoss = true;
+			markPhysicalDeviceLoss = true;
+			break;
+		case MTLCommandBufferErrorTimeout:
+			vkErr = VK_TIMEOUT;
+			break;
+#if MVK_XCODE_13
+		case MTLCommandBufferErrorStackOverflow:
+#endif
+		case MTLCommandBufferErrorPageFault:
+		case MTLCommandBufferErrorOutOfMemory:
+		default:
+			vkErr = VK_ERROR_OUT_OF_DEVICE_MEMORY;
+			break;
+	}
+	reportError(vkErr, "MTLCommandBuffer \"%s\" execution failed (code %li): %s",
+				mtlCmdBuff.label ? mtlCmdBuff.label.UTF8String : "",
+				mtlCmdBuff.error.code, mtlCmdBuff.error.localizedDescription.UTF8String);
+	if (markDeviceLoss) { getDevice()->markLost(markPhysicalDeviceLoss); }
+
+#if MVK_XCODE_12
+	if (&MTLCommandBufferEncoderInfoErrorKey != nullptr) {
+		if (NSArray<id<MTLCommandBufferEncoderInfo>>* mtlEncInfo = mtlCmdBuff.error.userInfo[MTLCommandBufferEncoderInfoErrorKey]) {
+			MVKLogInfo("Encoders for %p \"%s\":", mtlCmdBuff, mtlCmdBuff.label ? mtlCmdBuff.label.UTF8String : "");
+			for (id<MTLCommandBufferEncoderInfo> enc in mtlEncInfo) {
+				MVKLogInfo(" - %s: %s", enc.label.UTF8String, mvkStringFromMTLCommandEncoderErrorState(enc.errorState));
+				if (enc.debugSignposts.count > 0) {
+					MVKLogInfo("   Debug signposts:");
+					for (NSString* signpost in enc.debugSignposts) {
+						MVKLogInfo("    - %s", signpost.UTF8String);
+					}
+				}
+			}
+		}
+	}
+	if ([mtlCmdBuff respondsToSelector: @selector(logs)]) {
+		bool isFirstMsg = true;
+		for (id<MTLFunctionLog> log in mtlCmdBuff.logs) {
+			if (isFirstMsg) {
+				MVKLogInfo("Shader log messages:");
+				isFirstMsg = false;
+			}
+			MVKLogInfo("%s", log.description.UTF8String);
+		}
+	}
+#endif
+}

 #pragma mark Construction

 #define MVK_DISPATCH_QUEUE_QOS_CLASS		QOS_CLASS_USER_INITIATED

-MVKQueue::MVKQueue(MVKDevice* device, MVKQueueFamily* queueFamily, uint32_t index, float priority)
-        : MVKDeviceTrackingMixin(device) {
-
+MVKQueue::MVKQueue(MVKDevice* device, MVKQueueFamily* queueFamily, uint32_t index, float priority) : MVKDeviceTrackingMixin(device) {
 	_queueFamily = queueFamily;
 	_index = index;
 	_priority = priority;

-	_mtlCmdBuffLabelEndCommandBuffer = nil;
-	_mtlCmdBuffLabelQueueSubmit = nil;
-	_mtlCmdBuffLabelQueuePresent = nil;
-	_mtlCmdBuffLabelDeviceWaitIdle = nil;
-	_mtlCmdBuffLabelQueueWaitIdle = nil;
-	_mtlCmdBuffLabelAcquireNextImage = nil;
-	_mtlCmdBuffLabelInvalidateMappedMemoryRanges = nil;
-
 	initName();
 	initExecQueue();
 	initMTLCommandQueue();
-	initGPUCaptureScopes();
 }

 void MVKQueue::initName() {
@ -236,23 +314,15 @@ void MVKQueue::initExecQueue() {
 	}
 }

-// Retrieves and initializes the Metal command queue.
+// Retrieves and initializes the Metal command queue and Xcode GPU capture scopes
 void MVKQueue::initMTLCommandQueue() {
-	uint64_t startTime = _device->getPerformanceTimestamp();
 	_mtlQueue = _queueFamily->getMTLCommandQueue(_index);	// not retained (cached in queue family)
-	_device->addActivityPerformance(_device->_performanceStatistics.queue.mtlQueueAccess, startTime);
-}

-// Initializes Xcode GPU capture scopes
-void MVKQueue::initGPUCaptureScopes() {
 	_submissionCaptureScope = new MVKGPUCaptureScope(this);
-
 	if (_queueFamily->getIndex() == getMVKConfig().defaultGPUCaptureScopeQueueFamilyIndex &&
 		_index == getMVKConfig().defaultGPUCaptureScopeQueueIndex) {
-
 		getDevice()->startAutoGPUCapture(MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_FRAME, _mtlQueue);
 		_submissionCaptureScope->makeDefault();
-
 	}
 	_submissionCaptureScope->beginScope();	// Allow Xcode to capture the first frame if desired.
 }
@ -261,7 +331,7 @@ MVKQueue::~MVKQueue() {
 	destroyExecQueue();
 	_submissionCaptureScope->destroy();

-	[_mtlCmdBuffLabelEndCommandBuffer release];
+	[_mtlCmdBuffLabelBeginCommandBuffer release];
 	[_mtlCmdBuffLabelQueueSubmit release];
 	[_mtlCmdBuffLabelQueuePresent release];
 	[_mtlCmdBuffLabelDeviceWaitIdle release];
@ -282,23 +352,89 @@ void MVKQueue::destroyExecQueue() {
 #pragma mark -
 #pragma mark MVKQueueSubmission

+void MVKSemaphoreSubmitInfo::encodeWait(id<MTLCommandBuffer> mtlCmdBuff) {
+	if (_semaphore) { _semaphore->encodeWait(mtlCmdBuff, value); }
+}
+
+void MVKSemaphoreSubmitInfo::encodeSignal(id<MTLCommandBuffer> mtlCmdBuff) {
+	if (_semaphore) { _semaphore->encodeSignal(mtlCmdBuff, value); }
+}
+
+MVKSemaphoreSubmitInfo::MVKSemaphoreSubmitInfo(const VkSemaphoreSubmitInfo& semaphoreSubmitInfo) :
+	_semaphore((MVKSemaphore*)semaphoreSubmitInfo.semaphore),
+	value(semaphoreSubmitInfo.value),
+	stageMask(semaphoreSubmitInfo.stageMask),
+	deviceIndex(semaphoreSubmitInfo.deviceIndex) {
+		if (_semaphore) { _semaphore->retain(); }
+}
+
+MVKSemaphoreSubmitInfo::MVKSemaphoreSubmitInfo(const VkSemaphore semaphore,
+											   VkPipelineStageFlags stageMask) :
+	_semaphore((MVKSemaphore*)semaphore),
+	value(0),
+	stageMask(stageMask),
+	deviceIndex(0) {
+		if (_semaphore) { _semaphore->retain(); }
+}
+
+MVKSemaphoreSubmitInfo::MVKSemaphoreSubmitInfo(const MVKSemaphoreSubmitInfo& other) :
+	_semaphore(other._semaphore),
+	value(other.value),
+	stageMask(other.stageMask),
+	deviceIndex(other.deviceIndex) {
+		if (_semaphore) { _semaphore->retain(); }
+}
+
+MVKSemaphoreSubmitInfo& MVKSemaphoreSubmitInfo::operator=(const MVKSemaphoreSubmitInfo& other) {
+	// Retain new object first in case it's the same object
+	if (other._semaphore) {other._semaphore->retain(); }
+	if (_semaphore) { _semaphore->release(); }
+	_semaphore = other._semaphore;
+
+	value = other.value;
+	stageMask = other.stageMask;
+	deviceIndex = other.deviceIndex;
+	return *this;
+}
+
+MVKSemaphoreSubmitInfo::~MVKSemaphoreSubmitInfo() {
+	if (_semaphore) { _semaphore->release(); }
+}
+
+MVKCommandBufferSubmitInfo::MVKCommandBufferSubmitInfo(const VkCommandBufferSubmitInfo& commandBufferInfo) :
+	commandBuffer(MVKCommandBuffer::getMVKCommandBuffer(commandBufferInfo.commandBuffer)),
+	deviceMask(commandBufferInfo.deviceMask) {}
+
+MVKCommandBufferSubmitInfo::MVKCommandBufferSubmitInfo(VkCommandBuffer commandBuffer) :
+	commandBuffer(MVKCommandBuffer::getMVKCommandBuffer(commandBuffer)),
+	deviceMask(0) {}
+
+MVKQueueSubmission::MVKQueueSubmission(MVKQueue* queue,
+									   uint32_t waitSemaphoreInfoCount,
+									   const VkSemaphoreSubmitInfo* pWaitSemaphoreSubmitInfos) {
+	_queue = queue;
+	_queue->retain();	// Retain here and release in destructor. See note for MVKQueueCommandBufferSubmission::finish().
+
+	_waitSemaphores.reserve(waitSemaphoreInfoCount);
+	for (uint32_t i = 0; i < waitSemaphoreInfoCount; i++) {
+		_waitSemaphores.emplace_back(pWaitSemaphoreSubmitInfos[i]);
+	}
+}
+
 MVKQueueSubmission::MVKQueueSubmission(MVKQueue* queue,
 									   uint32_t waitSemaphoreCount,
-									   const VkSemaphore* pWaitSemaphores) {
+									   const VkSemaphore* pWaitSemaphores,
+									   const VkPipelineStageFlags* pWaitDstStageMask) {
 	_queue = queue;
 	_queue->retain();	// Retain here and release in destructor. See note for MVKQueueCommandBufferSubmission::finish().

 	_waitSemaphores.reserve(waitSemaphoreCount);
 	for (uint32_t i = 0; i < waitSemaphoreCount; i++) {
-		auto* sem4 = (MVKSemaphore*)pWaitSemaphores[i];
-		sem4->retain();
-		uint64_t sem4Val = 0;
-		_waitSemaphores.emplace_back(sem4, sem4Val);
+		_waitSemaphores.emplace_back(pWaitSemaphores[i], pWaitDstStageMask ? pWaitDstStageMask[i] : 0);
 	}
 }

 MVKQueueSubmission::~MVKQueueSubmission() {
-	for (auto s : _waitSemaphores) { s.first->release(); }
 	_queue->release();
 }

@ -306,22 +442,22 @@ MVKQueueSubmission::~MVKQueueSubmission() {
 #pragma mark -
 #pragma mark MVKQueueCommandBufferSubmission

-void MVKQueueCommandBufferSubmission::execute() {
+VkResult MVKQueueCommandBufferSubmission::execute() {

 	_queue->_submissionCaptureScope->beginScope();

 	// If using encoded semaphore waiting, do so now.
-	for (auto& ws : _waitSemaphores) { ws.first->encodeWait(getActiveMTLCommandBuffer(), ws.second); }
+	for (auto& ws : _waitSemaphores) { ws.encodeWait(getActiveMTLCommandBuffer()); }

 	// Submit each command buffer.
 	submitCommandBuffers();

 	// If using encoded semaphore signaling, do so now.
-	for (auto& ss : _signalSemaphores) { ss.first->encodeSignal(getActiveMTLCommandBuffer(), ss.second); }
+	for (auto& ss : _signalSemaphores) { ss.encodeSignal(getActiveMTLCommandBuffer()); }

 	// Commit the last MTLCommandBuffer.
 	// Nothing after this because callback might destroy this instance before this function ends.
-	commitActiveMTLCommandBuffer(true);
+	return commitActiveMTLCommandBuffer(true);
 }

 // Returns the active MTLCommandBuffer, lazily retrieving it from the queue if needed.
@ -341,24 +477,11 @@ void MVKQueueCommandBufferSubmission::setActiveMTLCommandBuffer(id<MTLCommandBuf
 	[_activeMTLCommandBuffer enqueue];
 }

-#if MVK_XCODE_12
-static const char* mvkStringFromErrorState(MTLCommandEncoderErrorState errState) {
-	switch (errState) {
-		case MTLCommandEncoderErrorStateUnknown: return "unknown";
-		case MTLCommandEncoderErrorStateAffected: return "affected";
-		case MTLCommandEncoderErrorStateCompleted: return "completed";
-		case MTLCommandEncoderErrorStateFaulted: return "faulted";
-		case MTLCommandEncoderErrorStatePending: return "pending";
-	}
-	return "unknown";
-}
-#endif
-
 // Commits and releases the currently active MTLCommandBuffer, optionally signalling
 // when the MTLCommandBuffer is done. The first time this is called, it will wait on
 // any semaphores. We have delayed signalling the semaphores as long as possible to
 // allow as much filling of the MTLCommandBuffer as possible before forcing a wait.
-void MVKQueueCommandBufferSubmission::commitActiveMTLCommandBuffer(bool signalCompletion) {
+VkResult MVKQueueCommandBufferSubmission::commitActiveMTLCommandBuffer(bool signalCompletion) {

 	// If using inline semaphore waiting, do so now.
 	// When prefilled command buffers are used, multiple commits will happen because native semaphore
@ -368,7 +491,7 @@ void MVKQueueCommandBufferSubmission::commitActiveMTLCommandBuffer(bool signalCo
 	// should be more performant when prefilled command buffers aren't used, because we spend time encoding commands
 	// first, thus giving the command buffer signalling these semaphores more time to complete.
 	if ( !_emulatedWaitDone ) {
-		for (auto& ws : _waitSemaphores) { ws.first->encodeWait(nil, ws.second); }
+		for (auto& ws : _waitSemaphores) { ws.encodeWait(nil); }
 		_emulatedWaitDone = true;
 	}

@ -380,72 +503,29 @@ void MVKQueueCommandBufferSubmission::commitActiveMTLCommandBuffer(bool signalCo

 	// If we need to signal completion, use getActiveMTLCommandBuffer() to ensure at least
 	// one MTLCommandBuffer is used, otherwise if this instance has no content, it will not
-	// finish(), signal the fence and semaphores ,and be destroyed.
+	// finish(), signal the fence and semaphores, and be destroyed.
 	// Use temp var for MTLCommandBuffer commit and release because completion callback
 	// may destroy this instance before this function ends.
 	id<MTLCommandBuffer> mtlCmdBuff = signalCompletion ? getActiveMTLCommandBuffer() : _activeMTLCommandBuffer;
 	_activeMTLCommandBuffer = nil;

-	MVKDevice* mvkDev = _queue->getDevice();
+	MVKDevice* mvkDev = getDevice();
 	uint64_t startTime = mvkDev->getPerformanceTimestamp();
 	[mtlCmdBuff addCompletedHandler: ^(id<MTLCommandBuffer> mtlCB) {
-		if (mtlCB.status == MTLCommandBufferStatusError) {
-			// If a command buffer error has occurred, report the error. If the error affects
-			// the physical device, always mark both the device and physical device as lost.
-			// If the error is local to this command buffer, optionally mark the device (but not the
-			// physical device) as lost, depending on the value of MVKConfiguration::resumeLostDevice.
-			getVulkanAPIObject()->reportError(VK_ERROR_DEVICE_LOST, "MTLCommandBuffer \"%s\" execution failed (code %li): %s", mtlCB.label ? mtlCB.label.UTF8String : "", mtlCB.error.code, mtlCB.error.localizedDescription.UTF8String);
-			switch (mtlCB.error.code) {
-				case MTLCommandBufferErrorBlacklisted:
-				case MTLCommandBufferErrorNotPermitted:	// May also be used for command buffers executed in the background without the right entitlement.
-#if MVK_MACOS && !MVK_MACCAT
-				case MTLCommandBufferErrorDeviceRemoved:
-#endif
-					mvkDev->markLost(true);
-					break;
-				default:
-					if ( !getMVKConfig().resumeLostDevice ) { mvkDev->markLost(); }
-					break;
-			}
-#if MVK_XCODE_12
-			if (getMVKConfig().debugMode) {
-				if (&MTLCommandBufferEncoderInfoErrorKey != nullptr) {
-					if (NSArray<id<MTLCommandBufferEncoderInfo>>* mtlEncInfo = mtlCB.error.userInfo[MTLCommandBufferEncoderInfoErrorKey]) {
-						MVKLogInfo("Encoders for %p \"%s\":", mtlCB, mtlCB.label ? mtlCB.label.UTF8String : "");
-						for (id<MTLCommandBufferEncoderInfo> enc in mtlEncInfo) {
-							MVKLogInfo(" - %s: %s", enc.label.UTF8String, mvkStringFromErrorState(enc.errorState));
-							if (enc.debugSignposts.count > 0) {
-								MVKLogInfo("   Debug signposts:");
-								for (NSString* signpost in enc.debugSignposts) {
-									MVKLogInfo("    - %s", signpost.UTF8String);
-								}
-							}
-						}
-					}
-				}
-			}
-#endif
-		}
-#if MVK_XCODE_12
-		if (getMVKConfig().debugMode && [mtlCB respondsToSelector: @selector(logs)]) {
-			bool isFirstMsg = true;
-			for (id<MTLFunctionLog> log in mtlCB.logs) {
-				if (isFirstMsg) {
-					MVKLogInfo("Shader log messages:");
-					isFirstMsg = false;
-				}
-				MVKLogInfo("%s", log.description.UTF8String);
-			}
-		}
-#endif
-
-		// Ensure finish() is the last thing the completetion callback does.
-		mvkDev->addActivityPerformance(mvkDev->_performanceStatistics.queue.mtlCommandBufferCompletion, startTime);
-		if (signalCompletion) { this->finish(); }
+		mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.queue.mtlCommandBufferExecution, startTime);
+		if (signalCompletion) { this->finish(); }	// Must be the last thing the completetion callback does.
 	}];

+	// Retrieve the result before committing MTLCommandBuffer, because finish() will destroy this instance.
+	VkResult rslt = mtlCmdBuff ? getConfigurationResult() : VK_ERROR_OUT_OF_POOL_MEMORY;
 	[mtlCmdBuff commit];
 	[mtlCmdBuff release];		// retained
+
+	// If we need to signal completion, but an error occurred and the MTLCommandBuffer
+	// was not created, call the finish() function directly.
+	if (signalCompletion && !mtlCmdBuff) { finish(); }
+
+	return rslt;
 }

 // Be sure to retain() any API objects referenced in this function, and release() them in the
@ -460,7 +540,7 @@ void MVKQueueCommandBufferSubmission::finish() {
 	_queue->_submissionCaptureScope->endScope();

 	// If using inline semaphore signaling, do so now.
-	for (auto& ss : _signalSemaphores) { ss.first->encodeSignal(nil, ss.second); }
+	for (auto& ss : _signalSemaphores) { ss.encodeSignal(nil); }

 	// If a fence exists, signal it.
 	if (_fence) { _fence->signal(); }
@ -472,18 +552,53 @@ void MVKQueueCommandBufferSubmission::finish() {
 // be destroyed on the waiting thread before this submission is done with them. We therefore
 // retain() each here to ensure they live long enough for this submission to finish using them.
 MVKQueueCommandBufferSubmission::MVKQueueCommandBufferSubmission(MVKQueue* queue,
-																 const VkSubmitInfo* pSubmit,
+																 const VkSubmitInfo2* pSubmit,
 																 VkFence fence,
 																 MVKCommandUse cmdUse) :
 	MVKQueueSubmission(queue,
-					   (pSubmit ? pSubmit->waitSemaphoreCount : 0),
-					   (pSubmit ? pSubmit->pWaitSemaphores : nullptr)),
-	_commandUse(cmdUse),
-	_emulatedWaitDone(false) {
+					   pSubmit ? pSubmit->waitSemaphoreInfoCount : 0,
+					   pSubmit ? pSubmit->pWaitSemaphoreInfos : nullptr),
+	_fence((MVKFence*)fence),
+	_commandUse(cmdUse) {
+	
+	if (_fence) { _fence->retain(); }
+
+	// pSubmit can be null if just tracking the fence alone
+	if (pSubmit) {
+		uint32_t ssCnt = pSubmit->signalSemaphoreInfoCount;
+		_signalSemaphores.reserve(ssCnt);
+		for (uint32_t i = 0; i < ssCnt; i++) {
+			_signalSemaphores.emplace_back(pSubmit->pSignalSemaphoreInfos[i]);
+		}
+	}
+}
+
+// On device loss, the fence and signal semaphores may be signalled early, and they might then
+// be destroyed on the waiting thread before this submission is done with them. We therefore
+// retain() each here to ensure they live long enough for this submission to finish using them.
+MVKQueueCommandBufferSubmission::MVKQueueCommandBufferSubmission(MVKQueue* queue,
+																 const VkSubmitInfo* pSubmit,
+																 VkFence fence,
+																 MVKCommandUse cmdUse)
+	: MVKQueueSubmission(queue,
+						 pSubmit ? pSubmit->waitSemaphoreCount : 0,
+						 pSubmit ? pSubmit->pWaitSemaphores : nullptr,
+						 pSubmit ? pSubmit->pWaitDstStageMask : nullptr),
+
+	_fence((MVKFence*)fence),
+	_commandUse(cmdUse) {
+	
+	if (_fence) { _fence->retain(); }

    // pSubmit can be null if just tracking the fence alone
    if (pSubmit) {
-        VkTimelineSemaphoreSubmitInfo* pTimelineSubmit = nullptr;
+		uint32_t ssCnt = pSubmit->signalSemaphoreCount;
+		_signalSemaphores.reserve(ssCnt);
+		for (uint32_t i = 0; i < ssCnt; i++) {
+			_signalSemaphores.emplace_back(pSubmit->pSignalSemaphores[i], 0);
+		}
+
+		VkTimelineSemaphoreSubmitInfo* pTimelineSubmit = nullptr;
        for (const auto* next = (const VkBaseInStructure*)pSubmit->pNext; next; next = next->pNext) {
            switch (next->sType) {
                case VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO:
@ -494,37 +609,66 @@ MVKQueueCommandBufferSubmission::MVKQueueCommandBufferSubmission(MVKQueue* queue
            }
        }
        if (pTimelineSubmit) {
-            // Presentation doesn't support timeline semaphores, so handle wait values here.
-            uint32_t wsCnt = pTimelineSubmit->waitSemaphoreValueCount;
-            for (uint32_t i = 0; i < wsCnt; i++) {
-                _waitSemaphores[i].second = pTimelineSubmit->pWaitSemaphoreValues[i];
+            uint32_t wsvCnt = pTimelineSubmit->waitSemaphoreValueCount;
+            for (uint32_t i = 0; i < wsvCnt; i++) {
+                _waitSemaphores[i].value = pTimelineSubmit->pWaitSemaphoreValues[i];
            }
+
+			uint32_t ssvCnt = pTimelineSubmit->signalSemaphoreValueCount;
+			for (uint32_t i = 0; i < ssvCnt; i++) {
+				_signalSemaphores[i].value = pTimelineSubmit->pSignalSemaphoreValues[i];
+			}
        }
-        uint32_t ssCnt = pSubmit->signalSemaphoreCount;
-        _signalSemaphores.reserve(ssCnt);
-		for (uint32_t i = 0; i < ssCnt; i++) {
-			auto* sem4 = (MVKSemaphore*)pSubmit->pSignalSemaphores[i];
-			sem4->retain();
-			uint64_t sem4Val = pTimelineSubmit ? pTimelineSubmit->pSignalSemaphoreValues[i] : 0;
-			_signalSemaphores.emplace_back(sem4, sem4Val);
-		}
    }
-
-	_fence = (MVKFence*)fence;
-	if (_fence) { _fence->retain(); }
-
-	_activeMTLCommandBuffer = nil;
 }

 MVKQueueCommandBufferSubmission::~MVKQueueCommandBufferSubmission() {
 	if (_fence) { _fence->release(); }
-	for (auto s : _signalSemaphores) { s.first->release(); }
 }


 template <size_t N>
 void MVKQueueFullCommandBufferSubmission<N>::submitCommandBuffers() {
-	for (auto& cb : _cmdBuffers) { cb->submit(this, &_encodingContext); }
+	MVKDevice* mvkDev = getDevice();
+	uint64_t startTime = mvkDev->getPerformanceTimestamp();
+
+	for (auto& cbInfo : _cmdBuffers) { cbInfo.commandBuffer->submit(this, &_encodingContext); }
+
+	mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.queue.submitCommandBuffers, startTime);
+}
+
+template <size_t N>
+MVKQueueFullCommandBufferSubmission<N>::MVKQueueFullCommandBufferSubmission(MVKQueue* queue,
+																			const VkSubmitInfo2* pSubmit,
+																			VkFence fence,
+																			MVKCommandUse cmdUse)
+	: MVKQueueCommandBufferSubmission(queue, pSubmit, fence, cmdUse) {
+
+	if (pSubmit) {
+		uint32_t cbCnt = pSubmit->commandBufferInfoCount;
+		_cmdBuffers.reserve(cbCnt);
+		for (uint32_t i = 0; i < cbCnt; i++) {
+			_cmdBuffers.emplace_back(pSubmit->pCommandBufferInfos[i]);
+			setConfigurationResult(_cmdBuffers.back().commandBuffer->getConfigurationResult());
+		}
+	}
+}
+
+template <size_t N>
+MVKQueueFullCommandBufferSubmission<N>::MVKQueueFullCommandBufferSubmission(MVKQueue* queue,
+																			const VkSubmitInfo* pSubmit,
+																			VkFence fence,
+																			MVKCommandUse cmdUse)
+	: MVKQueueCommandBufferSubmission(queue, pSubmit, fence, cmdUse) {
+
+	if (pSubmit) {
+		uint32_t cbCnt = pSubmit->commandBufferCount;
+		_cmdBuffers.reserve(cbCnt);
+		for (uint32_t i = 0; i < cbCnt; i++) {
+			_cmdBuffers.emplace_back(pSubmit->pCommandBuffers[i]);
+			setConfigurationResult(_cmdBuffers.back().commandBuffer->getConfigurationResult());
+		}
+	}
 }


@ -534,24 +678,34 @@ void MVKQueueFullCommandBufferSubmission<N>::submitCommandBuffers() {
 // If the semaphores are encodable, wait on them by encoding them on the MTLCommandBuffer before presenting.
 // If the semaphores are not encodable, wait on them inline after presenting.
 // The semaphores know what to do.
-void MVKQueuePresentSurfaceSubmission::execute() {
-	id<MTLCommandBuffer> mtlCmdBuff = _queue->getMTLCommandBuffer(kMVKCommandUseQueuePresent);
-	[mtlCmdBuff enqueue];
-	for (auto& ws : _waitSemaphores) { ws.first->encodeWait(mtlCmdBuff, 0); }
+VkResult MVKQueuePresentSurfaceSubmission::execute() {
+	// MTLCommandBuffer retain references to avoid rare case where objects are destroyed too early. 
+	// Although testing could not determine which objects were being lost, queue present MTLCommandBuffers
+	// are used only once per frame, and retain so few objects, that blanket retention is still performant.
+	id<MTLCommandBuffer> mtlCmdBuff = _queue->getMTLCommandBuffer(kMVKCommandUseQueuePresent, true);

-	// Add completion handler that will destroy this submission only once the MTLCommandBuffer
-	// is finished with the resources retained here, including the wait semaphores.
-	// Completion handlers are also added in presentCAMetalDrawable() to retain the swapchain images.
-	[mtlCmdBuff addCompletedHandler: ^(id<MTLCommandBuffer> mcb) {
-		this->finish();
-	}];
-
-	for (int i = 0; i < _presentInfo.size(); i++ ) {
-		_presentInfo[i].presentableImage->presentCAMetalDrawable(mtlCmdBuff, _presentInfo[i]);
+	for (auto& ws : _waitSemaphores) {
+		ws.encodeWait(mtlCmdBuff);	// Encoded semaphore waits
+		ws.encodeWait(nil);			// Inline semaphore waits
 	}

-	for (auto& ws : _waitSemaphores) { ws.first->encodeWait(nil, 0); }
-	[mtlCmdBuff commit];
+	for (int i = 0; i < _presentInfo.size(); i++ ) {
+		setConfigurationResult(_presentInfo[i].presentableImage->presentCAMetalDrawable(mtlCmdBuff, _presentInfo[i]));
+	}
+
+	if ( !mtlCmdBuff ) { setConfigurationResult(VK_ERROR_OUT_OF_POOL_MEMORY); }	// Check after images may set error.
+
+	// Add completion callback to the MTLCommandBuffer to call finish(), 
+	// or if the MTLCommandBuffer could not be created, call finish() directly.
+	// Retrieve the result first, because finish() will destroy this instance.
+	VkResult rslt = getConfigurationResult();
+	if (mtlCmdBuff) {
+		[mtlCmdBuff addCompletedHandler: ^(id<MTLCommandBuffer> mtlCB) { this->finish(); }];
+		[mtlCmdBuff commit];
+	} else {
+		finish();
+	}
+	return rslt;
 }

 void MVKQueuePresentSurfaceSubmission::finish() {
@ -563,7 +717,7 @@ void MVKQueuePresentSurfaceSubmission::finish() {
 	cs->beginScope();
 	if (_queue->_queueFamily->getIndex() == getMVKConfig().defaultGPUCaptureScopeQueueFamilyIndex &&
 		_queue->_index == getMVKConfig().defaultGPUCaptureScopeQueueIndex) {
-		_queue->getDevice()->stopAutoGPUCapture(MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_FRAME);
+		getDevice()->stopAutoGPUCapture(MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_FRAME);
 	}

 	this->destroy();
@ -571,7 +725,7 @@ void MVKQueuePresentSurfaceSubmission::finish() {

 MVKQueuePresentSurfaceSubmission::MVKQueuePresentSurfaceSubmission(MVKQueue* queue,
 																   const VkPresentInfoKHR* pPresentInfo)
-	: MVKQueueSubmission(queue, pPresentInfo->waitSemaphoreCount, pPresentInfo->pWaitSemaphores) {
+	: MVKQueueSubmission(queue, pPresentInfo->waitSemaphoreCount, pPresentInfo->pWaitSemaphores, nullptr) {

 	const VkPresentTimesInfoGOOGLE* pPresentTimesInfo = nullptr;
 	const VkSwapchainPresentFenceInfoEXT* pPresentFenceInfo = nullptr;
@ -623,6 +777,7 @@ MVKQueuePresentSurfaceSubmission::MVKQueuePresentSurfaceSubmission(MVKQueue* que
 	for (uint32_t scIdx = 0; scIdx < scCnt; scIdx++) {
 		MVKSwapchain* mvkSC = (MVKSwapchain*)pPresentInfo->pSwapchains[scIdx];
 		MVKImagePresentInfo presentInfo = {};	// Start with everything zeroed
+		presentInfo.queue = _queue;
 		presentInfo.presentableImage = mvkSC->getPresentableImage(pPresentInfo->pImageIndices[scIdx]);
 		presentInfo.presentMode = pPresentModes ? pPresentModes[scIdx] : VK_PRESENT_MODE_MAX_ENUM_KHR;
 		presentInfo.fence = pFences ? (MVKFence*)pFences[scIdx] : nullptr;
--- a/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.h
@ -116,8 +116,8 @@ public:
 	void populateMTLRenderPassDescriptor(MTLRenderPassDescriptor* mtlRPDesc,
 										 uint32_t passIdx,
 										 MVKFramebuffer* framebuffer,
-										 const MVKArrayRef<MVKImageView*> attachments,
-										 const MVKArrayRef<VkClearValue> clearValues,
+										 MVKArrayRef<MVKImageView*const> attachments,
+										 MVKArrayRef<const VkClearValue> clearValues,
 										 bool isRenderingEntireAttachment,
                                         bool loadOverride = false);

@ -126,7 +126,7 @@ public:
 	 * when the render area is smaller than the full framebuffer size.
 	 */
 	void populateClearAttachments(MVKClearAttachments& clearAtts,
-								  const MVKArrayRef<VkClearValue> clearValues);
+								  MVKArrayRef<const VkClearValue> clearValues);

 	/**
 	 * Populates the specified vector with VkClearRects for clearing views of a specified multiview
@ -140,11 +140,11 @@ public:
 	/** If a render encoder is active, sets the store actions for all attachments to it. */
 	void encodeStoreActions(MVKCommandEncoder* cmdEncoder,
 							bool isRenderingEntireAttachment,
-							const MVKArrayRef<MVKImageView*> attachments,
+							MVKArrayRef<MVKImageView*const> attachments,
 							bool storeOverride = false);

 	/** Resolves any resolve attachments that cannot be handled by native Metal subpass resolve behavior. */
-	void resolveUnresolvableAttachments(MVKCommandEncoder* cmdEncoder, const MVKArrayRef<MVKImageView*> attachments);
+	void resolveUnresolvableAttachments(MVKCommandEncoder* cmdEncoder, MVKArrayRef<MVKImageView*const> attachments);

 	MVKRenderSubpass(MVKRenderPass* renderPass, const VkSubpassDescription* pCreateInfo,
 					 const VkRenderPassInputAttachmentAspectCreateInfo* pInputAspects,
@ -265,6 +265,22 @@ protected:
 #pragma mark -
 #pragma mark MVKRenderPass

+/** Collects together VkSubpassDependency and VkMemoryBarrier2. */
+typedef struct MVKSubpassDependency {
+	uint32_t              srcSubpass;
+	uint32_t              dstSubpass;
+	VkPipelineStageFlags2 srcStageMask;
+	VkPipelineStageFlags2 dstStageMask;
+	VkAccessFlags2        srcAccessMask;
+	VkAccessFlags2        dstAccessMask;
+	VkDependencyFlags     dependencyFlags;
+	int32_t               viewOffset;
+
+	MVKSubpassDependency(const VkSubpassDependency& spDep, int32_t viewOffset);
+	MVKSubpassDependency(const VkSubpassDependency2& spDep, const VkMemoryBarrier2* pMemBar);
+
+} MVKSubpassDependency;
+
 /** Represents a Vulkan render pass. */
 class MVKRenderPass : public MVKVulkanAPIDeviceObject {

@ -308,7 +324,7 @@ protected:

 	MVKSmallVector<MVKAttachmentDescription> _attachments;
 	MVKSmallVector<MVKRenderSubpass> _subpasses;
-	MVKSmallVector<VkSubpassDependency2> _subpassDependencies;
+	MVKSmallVector<MVKSubpassDependency> _subpassDependencies;
 	VkRenderingFlags _renderingFlags = 0;

 };
--- a/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm
@ -138,8 +138,8 @@ uint32_t MVKRenderSubpass::getViewCountUpToMetalPass(uint32_t passIdx) const {
 void MVKRenderSubpass::populateMTLRenderPassDescriptor(MTLRenderPassDescriptor* mtlRPDesc,
 													   uint32_t passIdx,
 													   MVKFramebuffer* framebuffer,
-													   const MVKArrayRef<MVKImageView*> attachments,
-													   const MVKArrayRef<VkClearValue> clearValues,
+													   MVKArrayRef<MVKImageView*const> attachments,
+													   MVKArrayRef<const VkClearValue> clearValues,
 													   bool isRenderingEntireAttachment,
 													   bool loadOverride) {
 	MVKPixelFormats* pixFmts = _renderPass->getPixelFormats();
@ -279,7 +279,7 @@ void MVKRenderSubpass::populateMTLRenderPassDescriptor(MTLRenderPassDescriptor*

 void MVKRenderSubpass::encodeStoreActions(MVKCommandEncoder* cmdEncoder,
                                          bool isRenderingEntireAttachment,
-										  const MVKArrayRef<MVKImageView*> attachments,
+                                          MVKArrayRef<MVKImageView*const> attachments,
                                          bool storeOverride) {
    if (!cmdEncoder->_mtlRenderEncoder) { return; }
 	if (!_renderPass->getDevice()->_pMetalFeatures->deferredStoreActions) { return; }
@ -308,7 +308,7 @@ void MVKRenderSubpass::encodeStoreActions(MVKCommandEncoder* cmdEncoder,
 }

 void MVKRenderSubpass::populateClearAttachments(MVKClearAttachments& clearAtts,
-												const MVKArrayRef<VkClearValue> clearValues) {
+												MVKArrayRef<const VkClearValue> clearValues) {
 	uint32_t caCnt = getColorAttachmentCount();
 	for (uint32_t caIdx = 0; caIdx < caCnt; caIdx++) {
 		uint32_t attIdx = _colorAttachments[caIdx].attachment;
@ -394,7 +394,7 @@ MVKMTLFmtCaps MVKRenderSubpass::getRequiredFormatCapabilitiesForAttachmentAt(uin
 	return caps;
 }

-void MVKRenderSubpass::resolveUnresolvableAttachments(MVKCommandEncoder* cmdEncoder, const MVKArrayRef<MVKImageView*> attachments) {
+void MVKRenderSubpass::resolveUnresolvableAttachments(MVKCommandEncoder* cmdEncoder, MVKArrayRef<MVKImageView*const> attachments) {
 	MVKPixelFormats* pixFmts = cmdEncoder->getPixelFormats();
 	size_t raCnt = _resolveAttachments.size();
 	for (uint32_t raIdx = 0; raIdx < raCnt; raIdx++) {
@ -904,6 +904,26 @@ MVKAttachmentDescription::MVKAttachmentDescription(MVKRenderPass* renderPass,
 #pragma mark -
 #pragma mark MVKRenderPass

+MVKSubpassDependency::MVKSubpassDependency(const VkSubpassDependency& spDep, int32_t viewOffset) :
+	srcSubpass(spDep.srcSubpass),
+	dstSubpass(spDep.dstSubpass),
+	srcStageMask(spDep.srcStageMask),
+	dstStageMask(spDep.dstStageMask),
+	srcAccessMask(spDep.srcAccessMask),
+	dstAccessMask(spDep.dstAccessMask),
+	dependencyFlags(spDep.dependencyFlags),
+	viewOffset(viewOffset) {}
+
+MVKSubpassDependency::MVKSubpassDependency(const VkSubpassDependency2& spDep, const VkMemoryBarrier2* pMemBar) :
+	srcSubpass(spDep.srcSubpass),
+	dstSubpass(spDep.dstSubpass),
+	srcStageMask(pMemBar ? pMemBar->srcStageMask : spDep.srcStageMask),
+	dstStageMask(pMemBar ? pMemBar->dstStageMask : spDep.dstStageMask),
+	srcAccessMask(pMemBar ? pMemBar->srcAccessMask : spDep.srcAccessMask),
+	dstAccessMask(pMemBar ? pMemBar->dstAccessMask : spDep.dstAccessMask),
+	dependencyFlags(spDep.dependencyFlags),
+	viewOffset(spDep.viewOffset) {}
+
 VkExtent2D MVKRenderPass::getRenderAreaGranularity() {
    if (_device->_pMetalFeatures->tileBasedDeferredRendering) {
        // This is the tile area.
@ -954,19 +974,7 @@ MVKRenderPass::MVKRenderPass(MVKDevice* device,
 	}
 	_subpassDependencies.reserve(pCreateInfo->dependencyCount);
 	for (uint32_t i = 0; i < pCreateInfo->dependencyCount; i++) {
-		VkSubpassDependency2 dependency = {
-			.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
-			.pNext = nullptr,
-			.srcSubpass = pCreateInfo->pDependencies[i].srcSubpass,
-			.dstSubpass = pCreateInfo->pDependencies[i].dstSubpass,
-			.srcStageMask = pCreateInfo->pDependencies[i].srcStageMask,
-			.dstStageMask = pCreateInfo->pDependencies[i].dstStageMask,
-			.srcAccessMask = pCreateInfo->pDependencies[i].srcAccessMask,
-			.dstAccessMask = pCreateInfo->pDependencies[i].dstAccessMask,
-			.dependencyFlags = pCreateInfo->pDependencies[i].dependencyFlags,
-			.viewOffset = viewOffsets ? viewOffsets[i] : 0,
-		};
-		_subpassDependencies.push_back(dependency);
+		_subpassDependencies.emplace_back(pCreateInfo->pDependencies[i], viewOffsets ? viewOffsets[i] : 0);
 	}

 	// Link attachments to subpasses
@ -991,7 +999,19 @@ MVKRenderPass::MVKRenderPass(MVKDevice* device,
 	}
 	_subpassDependencies.reserve(pCreateInfo->dependencyCount);
 	for (uint32_t i = 0; i < pCreateInfo->dependencyCount; i++) {
-		_subpassDependencies.push_back(pCreateInfo->pDependencies[i]);
+		auto& spDep = pCreateInfo->pDependencies[i];
+
+		const VkMemoryBarrier2* pMemoryBarrier2 = nullptr;
+		for (auto* next = (const VkBaseInStructure*)spDep.pNext; next; next = next->pNext) {
+			switch (next->sType) {
+				case VK_STRUCTURE_TYPE_MEMORY_BARRIER_2:
+					pMemoryBarrier2 = (const VkMemoryBarrier2*)next;
+					break;
+				default:
+					break;
+			}
+		}
+		_subpassDependencies.emplace_back(spDep, pMemoryBarrier2);
 	}

 	// Link attachments to subpasses
--- a/MoltenVK/MoltenVK/GPUObjects/MVKResource.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKResource.h
@ -60,9 +60,7 @@ public:
 	}

 	/** Applies the specified global memory barrier. */
-	virtual void applyMemoryBarrier(VkPipelineStageFlags srcStageMask,
-									VkPipelineStageFlags dstStageMask,
-									MVKPipelineBarrier& barrier,
+	virtual void applyMemoryBarrier(MVKPipelineBarrier& barrier,
 									MVKCommandEncoder* cmdEncoder,
 									MVKCommandUse cmdUse) = 0;

--- a/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.h
@ -43,7 +43,7 @@ using namespace mvk;
 typedef struct MVKMTLFunction {
 	SPIRVToMSLConversionResultInfo shaderConversionResults;
 	MTLSize threadGroupSize;
-	inline id<MTLFunction> getMTLFunction() { return _mtlFunction; }
+	id<MTLFunction> getMTLFunction() { return _mtlFunction; }

 	MVKMTLFunction(id<MTLFunction> mtlFunc, const SPIRVToMSLConversionResultInfo scRslts, MTLSize tgSize);
 	MVKMTLFunction(const MVKMTLFunction& other);
--- a/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm
@ -36,10 +36,11 @@ MVKMTLFunction::MVKMTLFunction(const MVKMTLFunction& other) {
 }

 MVKMTLFunction& MVKMTLFunction::operator=(const MVKMTLFunction& other) {
-	if (_mtlFunction != other._mtlFunction) {
-		[_mtlFunction release];
-		_mtlFunction = [other._mtlFunction retain];		// retained
-	}
+	// Retain new object first in case it's the same object
+	[other._mtlFunction retain];
+	[_mtlFunction release];
+	_mtlFunction = other._mtlFunction;
+
 	shaderConversionResults = other.shaderConversionResults;
 	threadGroupSize = other.threadGroupSize;
 	return *this;
@ -80,7 +81,7 @@ MVKMTLFunction MVKShaderLibrary::getMTLFunction(const VkSpecializationInfo* pSpe

 			uint64_t startTime = pShaderFeedback ? mvkGetTimestamp() : mvkDev->getPerformanceTimestamp();
 			id<MTLFunction> mtlFunc = [[_mtlLibrary newFunctionWithName: mtlFuncName] autorelease];
-			mvkDev->addActivityPerformance(mvkDev->_performanceStatistics.shaderCompilation.functionRetrieval, startTime);
+			mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.shaderCompilation.functionRetrieval, startTime);
 			if (pShaderFeedback) {
 				if (mtlFunc) {
 					mvkEnableFlags(pShaderFeedback->flags, VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT);
@ -156,7 +157,7 @@ void MVKShaderLibrary::compressMSL(const string& msl) {
 	MVKDevice* mvkDev = _owner->getDevice();
 	uint64_t startTime = mvkDev->getPerformanceTimestamp();
 	_compressedMSL.compress(msl, getMVKConfig().shaderSourceCompressionAlgorithm);
-	mvkDev->addActivityPerformance(mvkDev->_performanceStatistics.shaderCompilation.mslCompress, startTime);
+	mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.shaderCompilation.mslCompress, startTime);
 }

 // Decompresses the cached MSL into the string.
@ -164,7 +165,7 @@ void MVKShaderLibrary::decompressMSL(string& msl) {
 	MVKDevice* mvkDev = _owner->getDevice();
 	uint64_t startTime = mvkDev->getPerformanceTimestamp();
 	_compressedMSL.decompress(msl);
-	mvkDev->addActivityPerformance(mvkDev->_performanceStatistics.shaderCompilation.mslDecompress, startTime);
+	mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.shaderCompilation.mslDecompress, startTime);
 }

 MVKShaderLibrary::MVKShaderLibrary(MVKVulkanAPIDeviceObject* owner,
@ -207,7 +208,7 @@ MVKShaderLibrary::MVKShaderLibrary(MVKVulkanAPIDeviceObject* owner,
        handleCompilationError(err, "Compiled shader module creation");
        [shdrData release];
    }
-    mvkDev->addActivityPerformance(mvkDev->_performanceStatistics.shaderCompilation.mslLoad, startTime);
+    mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.shaderCompilation.mslLoad, startTime);
 }

 MVKShaderLibrary::MVKShaderLibrary(const MVKShaderLibrary& other) {
@ -283,7 +284,7 @@ MVKShaderLibrary* MVKShaderLibraryCache::findShaderLibrary(SPIRVToMSLConversionC
 		if (slPair.first.matches(*pShaderConfig)) {
 			pShaderConfig->alignWith(slPair.first);
 			MVKDevice* mvkDev = _owner->getDevice();
-			mvkDev->addActivityPerformance(mvkDev->_performanceStatistics.shaderCompilation.shaderLibraryFromCache, startTime);
+			mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.shaderCompilation.shaderLibraryFromCache, startTime);
 			if (pShaderFeedback) {
 				pShaderFeedback->duration += mvkGetElapsedNanoseconds(startTime);
 			}
@ -363,7 +364,7 @@ bool MVKShaderModule::convert(SPIRVToMSLConversionConfiguration* pShaderConfig,
 		GLSLToSPIRVConversionResult glslConversionResult;
 		uint64_t startTime = _device->getPerformanceTimestamp();
 		bool wasConverted = _glslConverter.convert(getMVKGLSLConversionShaderStage(pShaderConfig), glslConversionResult, shouldLogCode, false);
-		_device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.glslToSPRIV, startTime);
+		_device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.glslToSPRIV, startTime);

 		if (wasConverted) {
 			if (shouldLogCode) { MVKLogInfo("%s", glslConversionResult.resultLog.c_str()); }
@ -376,7 +377,7 @@ bool MVKShaderModule::convert(SPIRVToMSLConversionConfiguration* pShaderConfig,

 	uint64_t startTime = _device->getPerformanceTimestamp();
 	bool wasConverted = _spvConverter.convert(*pShaderConfig, conversionResult, shouldLogCode, shouldLogCode, shouldLogEstimatedGLSL);
-	_device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.spirvToMSL, startTime);
+	_device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.spirvToMSL, startTime);

 	if (wasConverted) {
 		if (shouldLogCode) { MVKLogInfo("%s", conversionResult.resultLog.c_str()); }
@ -436,7 +437,7 @@ MVKShaderModule::MVKShaderModule(MVKDevice* device,

 			uint64_t startTime = _device->getPerformanceTimestamp();
 			codeHash = mvkHash(pCreateInfo->pCode, spvCount);
-			_device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.hashShaderCode, startTime);
+			_device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.hashShaderCode, startTime);

 			_spvConverter.setSPIRV(pCreateInfo->pCode, spvCount);

@ -450,7 +451,7 @@ MVKShaderModule::MVKShaderModule(MVKDevice* device,
 			uint64_t startTime = _device->getPerformanceTimestamp();
 			codeHash = mvkHash(&magicNum);
 			codeHash = mvkHash(pMSLCode, mslCodeLen, codeHash);
-			_device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.hashShaderCode, startTime);
+			_device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.hashShaderCode, startTime);

 			SPIRVToMSLConversionResult conversionResult;
 			conversionResult.msl = pMSLCode;
@ -466,7 +467,7 @@ MVKShaderModule::MVKShaderModule(MVKDevice* device,
 			uint64_t startTime = _device->getPerformanceTimestamp();
 			codeHash = mvkHash(&magicNum);
 			codeHash = mvkHash(pMSLCode, mslCodeLen, codeHash);
-			_device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.hashShaderCode, startTime);
+			_device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.hashShaderCode, startTime);

 			_directMSLLibrary = new MVKShaderLibrary(this, (void*)(pMSLCode), mslCodeLen);

@ -479,7 +480,7 @@ MVKShaderModule::MVKShaderModule(MVKDevice* device,

 				uint64_t startTime = _device->getPerformanceTimestamp();
 				codeHash = mvkHash(pGLSL, codeSize);
-				_device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.hashShaderCode, startTime);
+				_device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.hashShaderCode, startTime);

 				_glslConverter.setGLSL(pGLSL, glslLen);
 			} else {
--- a/MoltenVK/MoltenVK/GPUObjects/MVKSurface.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKSurface.h
@ -24,17 +24,8 @@
 #import <Metal/Metal.h>
 #import <QuartzCore/CAMetalLayer.h>

-#ifdef VK_USE_PLATFORM_IOS_MVK
-#	define PLATFORM_VIEW_CLASS	UIView
-#	import <UIKit/UIView.h>
-#endif
-
-#ifdef VK_USE_PLATFORM_MACOS_MVK
-#	define PLATFORM_VIEW_CLASS	NSView
-#	import <AppKit/NSView.h>
-#endif
-
 class MVKInstance;
+class MVKSwapchain;

@class MVKBlockObserver;

@ -55,12 +46,17 @@ public:
 	/** Returns a pointer to the Vulkan instance. */
 	MVKInstance* getInstance() override { return _mvkInstance; }

-    /** Returns the CAMetalLayer underlying this surface.  */
-    inline CAMetalLayer* getCAMetalLayer() {
-        std::lock_guard<std::mutex> lock(_layerLock);
-        return _mtlCAMetalLayer;
-    }
+    /** Returns the CAMetalLayer underlying this surface. */
+	CAMetalLayer* getCAMetalLayer();

+	/** Returns the extent of this surface. */
+	VkExtent2D getExtent();
+
+	/** Returns the extent for which the underlying CAMetalLayer will not need to be scaled when composited. */
+	VkExtent2D getNaturalExtent();
+
+	/** Returns whether this surface is headless. */
+	bool isHeadless() { return !_mtlCAMetalLayer && wasConfigurationSuccessful(); }

 #pragma mark Construction

@ -68,6 +64,10 @@ public:
 			   const VkMetalSurfaceCreateInfoEXT* pCreateInfo,
 			   const VkAllocationCallbacks* pAllocator);

+	MVKSurface(MVKInstance* mvkInstance,
+			   const VkHeadlessSurfaceCreateInfoEXT* pCreateInfo,
+			   const VkAllocationCallbacks* pAllocator);
+
 	MVKSurface(MVKInstance* mvkInstance,
 			   const Vk_PLATFORM_SurfaceCreateInfoMVK* pCreateInfo,
 			   const VkAllocationCallbacks* pAllocator);
@ -75,13 +75,18 @@ public:
 	~MVKSurface() override;

 protected:
+	friend class MVKSwapchain;
+
 	void propagateDebugName() override {}
-	void initLayerObserver();
+	void setActiveSwapchain(MVKSwapchain* swapchain);
+	void initLayer(CAMetalLayer* mtlLayer, const char* vkFuncName, bool isHeadless);
 	void releaseLayer();

-	MVKInstance* _mvkInstance;
-	CAMetalLayer* _mtlCAMetalLayer;
-	MVKBlockObserver* _layerObserver;
 	std::mutex _layerLock;
+	MVKInstance* _mvkInstance = nullptr;
+	CAMetalLayer* _mtlCAMetalLayer = nil;
+	MVKBlockObserver* _layerObserver = nil;
+	MVKSwapchain* _activeSwapchain = nullptr;
+	VkExtent2D _headlessExtent = {0xFFFFFFFF, 0xFFFFFFFF};
 };

--- a/MoltenVK/MoltenVK/GPUObjects/MVKSurface.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKSurface.mm
@ -17,11 +17,26 @@
 */

 #include "MVKSurface.h"
+#include "MVKSwapchain.h"
 #include "MVKInstance.h"
 #include "MVKFoundation.h"
 #include "MVKOSExtensions.h"
+#include "mvk_datatypes.hpp"
+
+#import "CAMetalLayer+MoltenVK.h"
 #import "MVKBlockObserver.h"

+#ifdef VK_USE_PLATFORM_IOS_MVK
+#	define PLATFORM_VIEW_CLASS	UIView
+#	import <UIKit/UIView.h>
+#endif
+
+#ifdef VK_USE_PLATFORM_MACOS_MVK
+#	define PLATFORM_VIEW_CLASS	NSView
+#	import <AppKit/NSView.h>
+#endif
+
+
 // We need to double-dereference the name to first convert to the platform symbol, then to a string.
 #define STR_PLATFORM(NAME) #NAME
 #define STR(NAME) STR_PLATFORM(NAME)
@ -29,54 +44,68 @@

 #pragma mark MVKSurface

+CAMetalLayer* MVKSurface::getCAMetalLayer() {
+	std::lock_guard<std::mutex> lock(_layerLock);
+	return _mtlCAMetalLayer;
+}
+
+VkExtent2D MVKSurface::getExtent() {
+	return _mtlCAMetalLayer ? mvkVkExtent2DFromCGSize(_mtlCAMetalLayer.drawableSize) : _headlessExtent;
+}
+
+VkExtent2D MVKSurface::getNaturalExtent() {
+	return _mtlCAMetalLayer ? mvkVkExtent2DFromCGSize(_mtlCAMetalLayer.naturalDrawableSizeMVK) : _headlessExtent;
+}
+
+// Per spec, headless surface extent is set from the swapchain.
+void MVKSurface::setActiveSwapchain(MVKSwapchain* swapchain) {
+	_activeSwapchain = swapchain;
+	_headlessExtent = swapchain->getImageExtent();
+}
+
 MVKSurface::MVKSurface(MVKInstance* mvkInstance,
 					   const VkMetalSurfaceCreateInfoEXT* pCreateInfo,
 					   const VkAllocationCallbacks* pAllocator) : _mvkInstance(mvkInstance) {
+	initLayer((CAMetalLayer*)pCreateInfo->pLayer, "vkCreateMetalSurfaceEXT", false);
+}

-	_mtlCAMetalLayer = (CAMetalLayer*)[pCreateInfo->pLayer retain];
-	initLayerObserver();
+MVKSurface::MVKSurface(MVKInstance* mvkInstance,
+					   const VkHeadlessSurfaceCreateInfoEXT* pCreateInfo,
+					   const VkAllocationCallbacks* pAllocator) : _mvkInstance(mvkInstance) {
+	initLayer(nil, "vkCreateHeadlessSurfaceEXT", true);
 }

 // pCreateInfo->pView can be either a CAMetalLayer or a view (NSView/UIView).
 MVKSurface::MVKSurface(MVKInstance* mvkInstance,
 					   const Vk_PLATFORM_SurfaceCreateInfoMVK* pCreateInfo,
 					   const VkAllocationCallbacks* pAllocator) : _mvkInstance(mvkInstance) {
+	MVKLogWarn("%s() is deprecated. Use vkCreateMetalSurfaceEXT() from the VK_EXT_metal_surface extension.", STR(vkCreate_PLATFORM_SurfaceMVK));

 	// Get the platform object contained in pView
-	id<NSObject> obj = (id<NSObject>)pCreateInfo->pView;
-
 	// If it's a view (NSView/UIView), extract the layer, otherwise assume it's already a CAMetalLayer.
+	id<NSObject> obj = (id<NSObject>)pCreateInfo->pView;
 	if ([obj isKindOfClass: [PLATFORM_VIEW_CLASS class]]) {
-		if ( !NSThread.isMainThread ) {
-			MVKLogInfo("%s(): You are not calling this function from the main thread. %s should only be accessed from the main thread. When using this function outside the main thread, consider passing the CAMetalLayer itself in %s::pView, instead of the %s.",
-					   STR(vkCreate_PLATFORM_SurfaceMVK), STR(PLATFORM_VIEW_CLASS), STR(Vk_PLATFORM_SurfaceCreateInfoMVK), STR(PLATFORM_VIEW_CLASS));
-		}
-		obj = ((PLATFORM_VIEW_CLASS*)obj).layer;
+		__block id<NSObject> layer;
+		mvkDispatchToMainAndWait(^{ layer = ((PLATFORM_VIEW_CLASS*)obj).layer; });
+		obj = layer;
 	}

 	// Confirm that we were provided with a CAMetalLayer
-	if ([obj isKindOfClass: [CAMetalLayer class]]) {
-		_mtlCAMetalLayer = (CAMetalLayer*)[obj retain];		// retained
-	} else {
-		setConfigurationResult(reportError(VK_ERROR_INITIALIZATION_FAILED,
-										   "%s(): On-screen rendering requires a layer of type CAMetalLayer.",
-										   STR(vkCreate_PLATFORM_SurfaceMVK)));
-		_mtlCAMetalLayer = nil;
-	}
-
-	initLayerObserver();
+	initLayer([obj isKindOfClass: CAMetalLayer.class] ? (CAMetalLayer*)obj : nil, STR(vkCreate_PLATFORM_SurfaceMVK), false);
 }

-// Sometimes, the owning view can replace its CAMetalLayer. In that case, the client needs to recreate the surface.
-void MVKSurface::initLayerObserver() {
+void MVKSurface::initLayer(CAMetalLayer* mtlLayer, const char* vkFuncName, bool isHeadless) {

-	_layerObserver = nil;
-	if ( ![_mtlCAMetalLayer.delegate isKindOfClass: [PLATFORM_VIEW_CLASS class]] ) { return; }
+	_mtlCAMetalLayer = [mtlLayer retain];	// retained
+	if ( !_mtlCAMetalLayer && !isHeadless ) { setConfigurationResult(reportError(VK_ERROR_SURFACE_LOST_KHR, "%s(): On-screen rendering requires a layer of type CAMetalLayer.", vkFuncName)); }

-	_layerObserver = [MVKBlockObserver observerWithBlock: ^(NSString* path, id, NSDictionary*, void*) {
-		if ( ![path isEqualToString: @"layer"] ) { return; }
-		this->releaseLayer();
-	} forObject: _mtlCAMetalLayer.delegate atKeyPath: @"layer"];
+	// Sometimes, the owning view can replace its CAMetalLayer.
+	// When that happens, the app needs to recreate the surface.
+	if ([_mtlCAMetalLayer.delegate isKindOfClass: [PLATFORM_VIEW_CLASS class]]) {
+		_layerObserver = [MVKBlockObserver observerWithBlock: ^(NSString* path, id, NSDictionary*, void*) {
+			if ([path isEqualToString: @"layer"]) { this->releaseLayer(); }
+		} forObject: _mtlCAMetalLayer.delegate atKeyPath: @"layer"];
+	}
 }

 void MVKSurface::releaseLayer() {
--- a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.h
@ -23,13 +23,10 @@
 #include "MVKSmallVector.h"
 #include <mutex>

-#import "CAMetalLayer+MoltenVK.h"
 #import <Metal/Metal.h>

 class MVKWatermark;

-@class MVKBlockObserver;
-

 #pragma mark -
 #pragma mark MVKSwapchain
@ -45,11 +42,20 @@ public:
 	/** Returns the debug report object type of this object. */
 	VkDebugReportObjectTypeEXT getVkDebugReportObjectType() override { return VK_DEBUG_REPORT_OBJECT_TYPE_SWAPCHAIN_KHR_EXT; }

+	/** Returns the CAMetalLayer underlying the surface used by this swapchain. */
+	CAMetalLayer* getCAMetalLayer();
+
+	/** Returns whether the surface is headless. */
+	bool isHeadless();
+
 	/** Returns the number of images in this swapchain. */
-	inline uint32_t getImageCount() { return (uint32_t)_presentableImages.size(); }
+	uint32_t getImageCount() { return (uint32_t)_presentableImages.size(); }
+
+	/** Returns the size of the images in this swapchain. */
+	VkExtent2D getImageExtent() { return _imageExtent; }

 	/** Returns the image at the specified index. */
-	inline MVKPresentableSwapchainImage* getPresentableImage(uint32_t index) { return _presentableImages[index]; }
+	MVKPresentableSwapchainImage* getPresentableImage(uint32_t index) { return _presentableImages[index]; }

 	/**
 	 * Returns the array of presentable images associated with this swapchain.
@ -76,19 +82,8 @@ public:
 	/** Releases swapchain images. */
 	VkResult releaseImages(const VkReleaseSwapchainImagesInfoEXT* pReleaseInfo);

-	/** Returns whether the parent surface is now lost and this swapchain must be recreated. */
-	bool getIsSurfaceLost() { return _surfaceLost; }
-
-	/** Returns whether this swapchain is optimally sized for the surface. */
-	bool hasOptimalSurface();
-
 	/** Returns the status of the surface. Surface loss takes precedence over sub-optimal errors. */
-	VkResult getSurfaceStatus() {
-		if (_device->getConfigurationResult() != VK_SUCCESS) { return _device->getConfigurationResult(); }
-		if (getIsSurfaceLost()) { return VK_ERROR_SURFACE_LOST_KHR; }
-		if ( !hasOptimalSurface() ) { return VK_SUBOPTIMAL_KHR; }
-		return VK_SUCCESS;
-	}
+	VkResult getSurfaceStatus();

 	/** Adds HDR metadata to this swapchain. */
 	void setHDRMetadataEXT(const VkHdrMetadataEXT& metadata);
@ -118,45 +113,29 @@ protected:
 						  VkSwapchainPresentScalingCreateInfoEXT* pScalingInfo,
 						  uint32_t imgCnt);
 	void initSurfaceImages(const VkSwapchainCreateInfoKHR* pCreateInfo, uint32_t imgCnt);
-	void releaseLayer();
-	void releaseUndisplayedSurfaces();
+	bool getIsSurfaceLost();
+	bool hasOptimalSurface();
 	uint64_t getNextAcquisitionID();
-    void willPresentSurface(id<MTLTexture> mtlTexture, id<MTLCommandBuffer> mtlCmdBuff);
    void renderWatermark(id<MTLTexture> mtlTexture, id<MTLCommandBuffer> mtlCmdBuff);
    void markFrameInterval();
-	void recordPresentTime(const MVKImagePresentInfo& presentInfo, uint64_t actualPresentTime = 0);
+	void beginPresentation(const MVKImagePresentInfo& presentInfo);
+	void endPresentation(const MVKImagePresentInfo& presentInfo, uint64_t actualPresentTime = 0);
+	void forceUnpresentedImageCompletion();

-	CAMetalLayer* _mtlLayer = nil;
+	MVKSurface* _surface = nullptr;
    MVKWatermark* _licenseWatermark = nullptr;
 	MVKSmallVector<MVKPresentableSwapchainImage*, kMVKMaxSwapchainImageCount> _presentableImages;
 	MVKSmallVector<VkPresentModeKHR, 2> _compatiblePresentModes;
 	static const int kMaxPresentationHistory = 60;
 	VkPastPresentationTimingGOOGLE _presentTimingHistory[kMaxPresentationHistory];
 	std::atomic<uint64_t> _currentAcquisitionID = 0;
-    MVKBlockObserver* _layerObserver = nil;
 	std::mutex _presentHistoryLock;
-	std::mutex _layerLock;
 	uint64_t _lastFrameTime = 0;
-	VkExtent2D _mtlLayerDrawableExtent = {0, 0};
+	VkExtent2D _imageExtent = {0, 0};
+	std::atomic<uint32_t> _unpresentedImageCount = 0;
 	uint32_t _currentPerfLogFrameCount = 0;
 	uint32_t _presentHistoryCount = 0;
 	uint32_t _presentHistoryIndex = 0;
 	uint32_t _presentHistoryHeadIndex = 0;
-	std::atomic<bool> _surfaceLost = false;
 	bool _isDeliberatelyScaled = false;
 };
-
-
-#pragma mark -
-#pragma mark Support functions
-
-/**
- * Returns the natural extent of the CAMetalLayer.
- *
- * The natural extent is the size of the bounds property of the layer,
- * multiplied by the contentsScale property of the layer, rounded
- * to nearest integer using half-to-even rounding.
- */
-static inline VkExtent2D mvkGetNaturalExtent(CAMetalLayer* mtlLayer) {
-	return mvkVkExtent2DFromCGSize(mtlLayer.naturalDrawableSizeMVK);
-}
--- a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm
@ -26,9 +26,11 @@
 #include "MVKWatermarkTextureContent.h"
 #include "MVKWatermarkShaderSource.h"
 #include "mvk_datatypes.hpp"
+#include <libkern/OSByteOrder.h>
+
+#import "CAMetalLayer+MoltenVK.h"
 #import "MVKBlockObserver.h"

-#include <libkern/OSByteOrder.h>

 using namespace std;

@ -47,6 +49,10 @@ void MVKSwapchain::propagateDebugName() {
 	}
 }

+CAMetalLayer* MVKSwapchain::getCAMetalLayer() { return _surface->getCAMetalLayer(); }
+
+bool MVKSwapchain::isHeadless() { return _surface->isHeadless(); }
+
 VkResult MVKSwapchain::getImages(uint32_t* pCount, VkImage* pSwapchainImages) {

 	// Get the number of surface images
@ -95,9 +101,8 @@ VkResult MVKSwapchain::acquireNextImage(uint64_t timeout,
 	// Return the index of the image with the shortest wait,
 	// and signal the semaphore and fence when it's available
 	*pImageIndex = minWaitImage->_swapchainIndex;
-	minWaitImage->acquireAndSignalWhenAvailable((MVKSemaphore*)semaphore, (MVKFence*)fence);
-
-	return getSurfaceStatus();
+	VkResult rslt = minWaitImage->acquireAndSignalWhenAvailable((MVKSemaphore*)semaphore, (MVKFence*)fence);
+	return rslt ? rslt : getSurfaceStatus();
 }

 VkResult MVKSwapchain::releaseImages(const VkReleaseSwapchainImagesInfoEXT* pReleaseInfo) {
@ -105,38 +110,39 @@ VkResult MVKSwapchain::releaseImages(const VkReleaseSwapchainImagesInfoEXT* pRel
 		getPresentableImage(pReleaseInfo->pImageIndices[imgIdxIdx])->makeAvailable();
 	}

-	return VK_SUCCESS;
+	return _surface->getConfigurationResult();
 }

 uint64_t MVKSwapchain::getNextAcquisitionID() { return ++_currentAcquisitionID; }

-// Releases any surfaces that are not currently being displayed,
-// so they can be used by a different swapchain.
-void MVKSwapchain::releaseUndisplayedSurfaces() {}
+bool MVKSwapchain::getIsSurfaceLost() {
+	VkResult surfRslt = _surface->getConfigurationResult();
+	setConfigurationResult(surfRslt);
+	return surfRslt != VK_SUCCESS;
+}

+VkResult MVKSwapchain::getSurfaceStatus() {
+	if (_device->getConfigurationResult() != VK_SUCCESS) { return _device->getConfigurationResult(); }
+	if (getIsSurfaceLost()) { return VK_ERROR_SURFACE_LOST_KHR; }
+	if ( !hasOptimalSurface() ) { return VK_SUBOPTIMAL_KHR; }
+	return VK_SUCCESS;
+}

-// This swapchain is optimally sized for the surface if the app has specified deliberate
-// swapchain scaling, or the CAMetalLayer drawableSize has not changed since the swapchain
-// was created, and the CAMetalLayer will not need to be scaled when composited.
+// This swapchain is optimally sized for the surface if the app has specified 
+// deliberate swapchain scaling, or the surface extent has not changed since the
+// swapchain was created, and the surface will not need to be scaled when composited.
 bool MVKSwapchain::hasOptimalSurface() {
 	if (_isDeliberatelyScaled) { return true; }

-	VkExtent2D drawExtent = mvkVkExtent2DFromCGSize(_mtlLayer.drawableSize);
-	return (mvkVkExtent2DsAreEqual(drawExtent, _mtlLayerDrawableExtent) &&
-			mvkVkExtent2DsAreEqual(drawExtent, mvkGetNaturalExtent(_mtlLayer)));
+	VkExtent2D surfExtent = _surface->getExtent();
+	return (mvkVkExtent2DsAreEqual(surfExtent, _imageExtent) &&
+			mvkVkExtent2DsAreEqual(surfExtent, _surface->getNaturalExtent()));
 }


 #pragma mark Rendering

-// Called automatically when a swapchain image is about to be presented to the surface by the queue.
-// Activities include marking the frame interval and rendering the watermark if needed.
-void MVKSwapchain::willPresentSurface(id<MTLTexture> mtlTexture, id<MTLCommandBuffer> mtlCmdBuff) {
-    markFrameInterval();
-    renderWatermark(mtlTexture, mtlCmdBuff);
-}
-
-// If the product has not been fully licensed, renders the watermark image to the surface.
+// Renders the watermark image to the surface.
 void MVKSwapchain::renderWatermark(id<MTLTexture> mtlTexture, id<MTLCommandBuffer> mtlCmdBuff) {
    if (getMVKConfig().displayWatermark) {
        if ( !_licenseWatermark ) {
@ -158,22 +164,22 @@ void MVKSwapchain::renderWatermark(id<MTLTexture> mtlTexture, id<MTLCommandBuffe
 }

 // Calculates and remembers the time interval between frames.
+// Not threadsafe. Ensure this is called from a threadsafe environment.
 void MVKSwapchain::markFrameInterval() {
-	if ( !(getMVKConfig().performanceTracking || _licenseWatermark) ) { return; }
-
 	uint64_t prevFrameTime = _lastFrameTime;
 	_lastFrameTime = mvkGetTimestamp();

 	if (prevFrameTime == 0) { return; }		// First frame starts at first presentation

-	_device->addActivityPerformance(_device->_performanceStatistics.queue.frameInterval, prevFrameTime, _lastFrameTime);
+	_device->updateActivityPerformance(_device->_performanceStatistics.queue.frameInterval, mvkGetElapsedMilliseconds(prevFrameTime, _lastFrameTime));

-	uint32_t perfLogCntLimit = getMVKConfig().performanceLoggingFrameCount;
-	if ((perfLogCntLimit > 0) && (++_currentPerfLogFrameCount >= perfLogCntLimit)) {
+	auto& mvkCfg = getMVKConfig();
+	bool shouldLogOnFrames = mvkCfg.performanceTracking && mvkCfg.activityPerformanceLoggingStyle == MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_FRAME_COUNT;
+	if (shouldLogOnFrames && (mvkCfg.performanceLoggingFrameCount > 0) && (++_currentPerfLogFrameCount >= mvkCfg.performanceLoggingFrameCount)) {
 		_currentPerfLogFrameCount = 0;
 		MVKLogInfo("Performance statistics reporting every: %d frames, avg FPS: %.2f, elapsed time: %.3f seconds:",
-				   perfLogCntLimit,
-				   (1000.0 / _device->_performanceStatistics.queue.frameInterval.averageDuration),
+				   mvkCfg.performanceLoggingFrameCount,
+				   (1000.0 / _device->_performanceStatistics.queue.frameInterval.average),
 				   mvkGetElapsedMilliseconds() / 1000.0);
 		if (getMVKConfig().activityPerformanceLoggingStyle == MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_FRAME_COUNT) {
 			_device->logPerformanceSummary();
@ -181,6 +187,125 @@ void MVKSwapchain::markFrameInterval() {
 	}
 }

+VkResult MVKSwapchain::getRefreshCycleDuration(VkRefreshCycleDurationGOOGLE *pRefreshCycleDuration) {
+	if (_device->getConfigurationResult() != VK_SUCCESS) { return _device->getConfigurationResult(); }
+
+	auto* screen = getCAMetalLayer().screenMVK;		// Will be nil if headless
+#if MVK_MACOS && !MVK_MACCAT
+	double framesPerSecond = 60;
+	if (screen) {
+		CGDirectDisplayID displayId = [[[screen deviceDescription] objectForKey:@"NSScreenNumber"] unsignedIntValue];
+		CGDisplayModeRef mode = CGDisplayCopyDisplayMode(displayId);
+		framesPerSecond = CGDisplayModeGetRefreshRate(mode);
+		CGDisplayModeRelease(mode);
+#if MVK_XCODE_13
+		if (framesPerSecond == 0 && [screen respondsToSelector: @selector(maximumFramesPerSecond)])
+			framesPerSecond = [screen maximumFramesPerSecond];
+#endif
+		// Builtin panels, e.g., on MacBook, report a zero refresh rate.
+		if (framesPerSecond == 0)
+			framesPerSecond = 60.0;
+	}
+#elif MVK_IOS_OR_TVOS || MVK_MACCAT
+	NSInteger framesPerSecond = 60;
+	if ([screen respondsToSelector: @selector(maximumFramesPerSecond)]) {
+		framesPerSecond = screen.maximumFramesPerSecond;
+	}
+#elif MVK_VISIONOS
+	NSInteger framesPerSecond = 90;		// TODO: See if this can be obtained from OS instead
+#endif
+
+	pRefreshCycleDuration->refreshDuration = (uint64_t)1e9 / framesPerSecond;
+	return VK_SUCCESS;
+}
+
+VkResult MVKSwapchain::getPastPresentationTiming(uint32_t *pCount, VkPastPresentationTimingGOOGLE *pPresentationTimings) {
+	if (_device->getConfigurationResult() != VK_SUCCESS) { return _device->getConfigurationResult(); }
+
+	VkResult res = VK_SUCCESS;
+
+	std::lock_guard<std::mutex> lock(_presentHistoryLock);
+	if (pPresentationTimings == nullptr) {
+		*pCount = _presentHistoryCount;
+	} else {
+		uint32_t countRemaining = std::min(_presentHistoryCount, *pCount);
+		uint32_t outIndex = 0;
+
+		res = (*pCount >= _presentHistoryCount) ? VK_SUCCESS : VK_INCOMPLETE;
+		*pCount = countRemaining;
+
+		while (countRemaining > 0) {
+			pPresentationTimings[outIndex] = _presentTimingHistory[_presentHistoryHeadIndex];
+			countRemaining--;
+			_presentHistoryCount--;
+			_presentHistoryHeadIndex = (_presentHistoryHeadIndex + 1) % kMaxPresentationHistory;
+			outIndex++;
+		}
+	}
+
+	return res;
+}
+
+void MVKSwapchain::beginPresentation(const MVKImagePresentInfo& presentInfo) {
+	_unpresentedImageCount++;
+}
+
+void MVKSwapchain::endPresentation(const MVKImagePresentInfo& presentInfo, uint64_t actualPresentTime) {
+	_unpresentedImageCount--;
+
+	std::lock_guard<std::mutex> lock(_presentHistoryLock);
+
+	markFrameInterval();
+	if (_presentHistoryCount < kMaxPresentationHistory) {
+		_presentHistoryCount++;
+	} else {
+		_presentHistoryHeadIndex = (_presentHistoryHeadIndex + 1) % kMaxPresentationHistory;
+	}
+
+	_presentTimingHistory[_presentHistoryIndex].presentID = presentInfo.presentID;
+	_presentTimingHistory[_presentHistoryIndex].desiredPresentTime = presentInfo.desiredPresentTime;
+	_presentTimingHistory[_presentHistoryIndex].actualPresentTime = actualPresentTime;
+	// These details are not available in Metal
+	_presentTimingHistory[_presentHistoryIndex].earliestPresentTime = actualPresentTime;
+	_presentTimingHistory[_presentHistoryIndex].presentMargin = 0;
+	_presentHistoryIndex = (_presentHistoryIndex + 1) % kMaxPresentationHistory;
+}
+
+// Because of a regression in Metal, the most recent one or two presentations may not complete
+// and call back. To work around this, if there are any uncompleted presentations, change the
+// drawableSize of the CAMetalLayer, which will trigger presentation completion and callbacks.
+// The drawableSize will be set to a correct size by the next swapchain created on the same surface.
+void MVKSwapchain::forceUnpresentedImageCompletion() {
+	if (_unpresentedImageCount) {
+		getCAMetalLayer().drawableSize = { 1,1 };
+	}
+}
+
+void MVKSwapchain::setLayerNeedsDisplay(const VkPresentRegionKHR* pRegion) {
+	auto* mtlLayer = getCAMetalLayer();
+	if (!pRegion || pRegion->rectangleCount == 0) {
+		[mtlLayer setNeedsDisplay];
+		return;
+	}
+
+	for (uint32_t i = 0; i < pRegion->rectangleCount; ++i) {
+		CGRect cgRect = mvkCGRectFromVkRectLayerKHR(pRegion->pRectangles[i]);
+#if MVK_MACOS
+		// VK_KHR_incremental_present specifies an upper-left origin, but macOS by default
+		// uses a lower-left origin.
+		cgRect.origin.y = mtlLayer.bounds.size.height - cgRect.origin.y;
+#endif
+		// We were given rectangles in pixels, but -[CALayer setNeedsDisplayInRect:] wants them
+		// in points, which is pixels / contentsScale.
+		CGFloat scaleFactor = mtlLayer.contentsScale;
+		cgRect.origin.x /= scaleFactor;
+		cgRect.origin.y /= scaleFactor;
+		cgRect.size.width /= scaleFactor;
+		cgRect.size.height /= scaleFactor;
+		[mtlLayer setNeedsDisplayInRect:cgRect];
+	}
+}
+
 #if MVK_MACOS
 struct CIE1931XY {
 	uint16_t x;
@ -237,19 +362,32 @@ void MVKSwapchain::setHDRMetadataEXT(const VkHdrMetadataEXT& metadata) {
 	CAEDRMetadata* caMetadata = [CAEDRMetadata HDR10MetadataWithDisplayInfo: colorVolData
 																contentInfo: lightLevelData
 														 opticalOutputScale: 1];
-	_mtlLayer.EDRMetadata = caMetadata;
+	auto* mtlLayer = getCAMetalLayer();
+	mtlLayer.EDRMetadata = caMetadata;
+	mtlLayer.wantsExtendedDynamicRangeContent = YES;
 	[caMetadata release];
 	[colorVolData release];
 	[lightLevelData release];
-	_mtlLayer.wantsExtendedDynamicRangeContent = YES;
 #endif
 }


 #pragma mark Construction

-MVKSwapchain::MVKSwapchain(MVKDevice* device,
-						   const VkSwapchainCreateInfoKHR* pCreateInfo) : MVKVulkanAPIDeviceObject(device) {
+MVKSwapchain::MVKSwapchain(MVKDevice* device, const VkSwapchainCreateInfoKHR* pCreateInfo)
+	: MVKVulkanAPIDeviceObject(device),
+	_surface((MVKSurface*)pCreateInfo->surface),
+	_imageExtent(pCreateInfo->imageExtent) {
+
+	// Check if oldSwapchain is properly set
+	auto* oldSwapchain = (MVKSwapchain*)pCreateInfo->oldSwapchain;
+	if (oldSwapchain == _surface->_activeSwapchain) {
+		_surface->setActiveSwapchain(this);
+	} else {
+		setConfigurationResult(reportError(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR, "vkCreateSwapchainKHR(): pCreateInfo->oldSwapchain does not match the VkSwapchain that is in use by the surface"));
+		return;
+	}
+
 	memset(_presentTimingHistory, 0, sizeof(_presentTimingHistory));

 	// Retrieve the scaling and present mode structs if they are supplied.
@ -280,10 +418,6 @@ MVKSwapchain::MVKSwapchain(MVKDevice* device,
 		}
 	}

-	// If applicable, release any surfaces (not currently being displayed) from the old swapchain.
-	MVKSwapchain* oldSwapchain = (MVKSwapchain*)pCreateInfo->oldSwapchain;
-	if (oldSwapchain) { oldSwapchain->releaseUndisplayedSurfaces(); }
-
 	uint32_t imgCnt = mvkClamp(pCreateInfo->minImageCount,
 							   _device->_pMetalFeatures->minSwapchainImageCount,
 							   _device->_pMetalFeatures->maxSwapchainImageCount);
@ -333,85 +467,86 @@ void MVKSwapchain::initCAMetalLayer(const VkSwapchainCreateInfoKHR* pCreateInfo,
 									VkSwapchainPresentScalingCreateInfoEXT* pScalingInfo,
 									uint32_t imgCnt) {

-	MVKSurface* mvkSrfc = (MVKSurface*)pCreateInfo->surface;
-	_mtlLayer = mvkSrfc->getCAMetalLayer();
-	if ( !_mtlLayer ) {
-		setConfigurationResult(mvkSrfc->getConfigurationResult());
-		_surfaceLost = true;
-		return;
-	}
+	auto* mtlLayer = getCAMetalLayer();
+	if ( !mtlLayer || getIsSurfaceLost() ) { return; }

 	auto minMagFilter = getMVKConfig().swapchainMinMagFilterUseNearest ? kCAFilterNearest : kCAFilterLinear;
-	_mtlLayer.device = getMTLDevice();
-	_mtlLayer.pixelFormat = getPixelFormats()->getMTLPixelFormat(pCreateInfo->imageFormat);
-	_mtlLayer.maximumDrawableCountMVK = imgCnt;
-	_mtlLayer.displaySyncEnabledMVK = (pCreateInfo->presentMode != VK_PRESENT_MODE_IMMEDIATE_KHR);
-	_mtlLayer.minificationFilter = minMagFilter;
-	_mtlLayer.magnificationFilter = minMagFilter;
-	_mtlLayer.contentsGravity = getCALayerContentsGravity(pScalingInfo);
-	_mtlLayer.framebufferOnly = !mvkIsAnyFlagEnabled(pCreateInfo->imageUsage, (VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
-																			   VK_IMAGE_USAGE_TRANSFER_DST_BIT |
-																			   VK_IMAGE_USAGE_SAMPLED_BIT |
-																			   VK_IMAGE_USAGE_STORAGE_BIT));
-	// Remember the extent to later detect if it has changed under the covers,
-	// and set the drawable size of the CAMetalLayer from the extent.
-	_mtlLayerDrawableExtent = pCreateInfo->imageExtent;
-	_mtlLayer.drawableSize = mvkCGSizeFromVkExtent2D(_mtlLayerDrawableExtent);
+	mtlLayer.drawableSize = mvkCGSizeFromVkExtent2D(_imageExtent);
+	mtlLayer.device = getMTLDevice();
+	mtlLayer.pixelFormat = getPixelFormats()->getMTLPixelFormat(pCreateInfo->imageFormat);
+	mtlLayer.maximumDrawableCountMVK = imgCnt;
+	mtlLayer.displaySyncEnabledMVK = (pCreateInfo->presentMode != VK_PRESENT_MODE_IMMEDIATE_KHR);
+	mtlLayer.minificationFilter = minMagFilter;
+	mtlLayer.magnificationFilter = minMagFilter;
+	mtlLayer.contentsGravity = getCALayerContentsGravity(pScalingInfo);
+	mtlLayer.framebufferOnly = !mvkIsAnyFlagEnabled(pCreateInfo->imageUsage, (VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
+																			  VK_IMAGE_USAGE_TRANSFER_DST_BIT |
+																			  VK_IMAGE_USAGE_SAMPLED_BIT |
+																			  VK_IMAGE_USAGE_STORAGE_BIT));
+
+	// Because of a regression in Metal, the most recent one or two presentations may not
+	// complete and call back. Changing the CAMetalLayer drawableSize will force any incomplete
+	// presentations on the oldSwapchain to complete and call back, but if the drawableSize
+	// is not changing from the previous, we force those completions first.
+	auto* oldSwapchain = (MVKSwapchain*)pCreateInfo->oldSwapchain;
+	if (oldSwapchain && mvkVkExtent2DsAreEqual(pCreateInfo->imageExtent, _surface->getExtent())) {
+		oldSwapchain->forceUnpresentedImageCompletion();
+	}

 	if (pCreateInfo->compositeAlpha != VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR) {
-		_mtlLayer.opaque = pCreateInfo->compositeAlpha == VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
+		mtlLayer.opaque = pCreateInfo->compositeAlpha == VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
 	}

 	switch (pCreateInfo->imageColorSpace) {
 		case VK_COLOR_SPACE_SRGB_NONLINEAR_KHR:
-			_mtlLayer.colorspaceNameMVK = kCGColorSpaceSRGB;
-			_mtlLayer.wantsExtendedDynamicRangeContentMVK = NO;
+			mtlLayer.colorspaceNameMVK = kCGColorSpaceSRGB;
+			mtlLayer.wantsExtendedDynamicRangeContentMVK = NO;
 			break;
 		case VK_COLOR_SPACE_DISPLAY_P3_NONLINEAR_EXT:
-			_mtlLayer.colorspaceNameMVK = kCGColorSpaceDisplayP3;
-			_mtlLayer.wantsExtendedDynamicRangeContentMVK = YES;
+			mtlLayer.colorspaceNameMVK = kCGColorSpaceDisplayP3;
+			mtlLayer.wantsExtendedDynamicRangeContentMVK = YES;
 			break;
 		case VK_COLOR_SPACE_EXTENDED_SRGB_LINEAR_EXT:
-			_mtlLayer.colorspaceNameMVK = kCGColorSpaceExtendedLinearSRGB;
-			_mtlLayer.wantsExtendedDynamicRangeContentMVK = YES;
+			mtlLayer.colorspaceNameMVK = kCGColorSpaceExtendedLinearSRGB;
+			mtlLayer.wantsExtendedDynamicRangeContentMVK = YES;
 			break;
 		case VK_COLOR_SPACE_EXTENDED_SRGB_NONLINEAR_EXT:
-			_mtlLayer.colorspaceNameMVK = kCGColorSpaceExtendedSRGB;
-			_mtlLayer.wantsExtendedDynamicRangeContentMVK = YES;
+			mtlLayer.colorspaceNameMVK = kCGColorSpaceExtendedSRGB;
+			mtlLayer.wantsExtendedDynamicRangeContentMVK = YES;
 			break;
 		case VK_COLOR_SPACE_DISPLAY_P3_LINEAR_EXT:
-			_mtlLayer.colorspaceNameMVK = kCGColorSpaceExtendedLinearDisplayP3;
-			_mtlLayer.wantsExtendedDynamicRangeContentMVK = YES;
+			mtlLayer.colorspaceNameMVK = kCGColorSpaceExtendedLinearDisplayP3;
+			mtlLayer.wantsExtendedDynamicRangeContentMVK = YES;
 			break;
 		case VK_COLOR_SPACE_DCI_P3_NONLINEAR_EXT:
-			_mtlLayer.colorspaceNameMVK = kCGColorSpaceDCIP3;
-			_mtlLayer.wantsExtendedDynamicRangeContentMVK = YES;
+			mtlLayer.colorspaceNameMVK = kCGColorSpaceDCIP3;
+			mtlLayer.wantsExtendedDynamicRangeContentMVK = YES;
 			break;
 		case VK_COLOR_SPACE_BT709_NONLINEAR_EXT:
-			_mtlLayer.colorspaceNameMVK = kCGColorSpaceITUR_709;
-			_mtlLayer.wantsExtendedDynamicRangeContentMVK = NO;
+			mtlLayer.colorspaceNameMVK = kCGColorSpaceITUR_709;
+			mtlLayer.wantsExtendedDynamicRangeContentMVK = NO;
 			break;
 		case VK_COLOR_SPACE_BT2020_LINEAR_EXT:
-			_mtlLayer.colorspaceNameMVK = kCGColorSpaceExtendedLinearITUR_2020;
-			_mtlLayer.wantsExtendedDynamicRangeContentMVK = YES;
+			mtlLayer.colorspaceNameMVK = kCGColorSpaceExtendedLinearITUR_2020;
+			mtlLayer.wantsExtendedDynamicRangeContentMVK = YES;
 			break;
 #if MVK_XCODE_12
 		case VK_COLOR_SPACE_HDR10_ST2084_EXT:
-			_mtlLayer.colorspaceNameMVK = kCGColorSpaceITUR_2100_PQ;
-			_mtlLayer.wantsExtendedDynamicRangeContentMVK = YES;
+			mtlLayer.colorspaceNameMVK = kCGColorSpaceITUR_2100_PQ;
+			mtlLayer.wantsExtendedDynamicRangeContentMVK = YES;
 			break;
 		case VK_COLOR_SPACE_HDR10_HLG_EXT:
-			_mtlLayer.colorspaceNameMVK = kCGColorSpaceITUR_2100_HLG;
-			_mtlLayer.wantsExtendedDynamicRangeContentMVK = YES;
+			mtlLayer.colorspaceNameMVK = kCGColorSpaceITUR_2100_HLG;
+			mtlLayer.wantsExtendedDynamicRangeContentMVK = YES;
 			break;
 #endif
 		case VK_COLOR_SPACE_ADOBERGB_NONLINEAR_EXT:
-			_mtlLayer.colorspaceNameMVK = kCGColorSpaceAdobeRGB1998;
-			_mtlLayer.wantsExtendedDynamicRangeContentMVK = NO;
+			mtlLayer.colorspaceNameMVK = kCGColorSpaceAdobeRGB1998;
+			mtlLayer.wantsExtendedDynamicRangeContentMVK = NO;
 			break;
 		case VK_COLOR_SPACE_PASS_THROUGH_EXT:
-			_mtlLayer.colorspace = nil;
-			_mtlLayer.wantsExtendedDynamicRangeContentMVK = NO;
+			mtlLayer.colorspace = nil;
+			mtlLayer.wantsExtendedDynamicRangeContentMVK = NO;
 			break;
 		default:
 			setConfigurationResult(reportError(VK_ERROR_FORMAT_NOT_SUPPORTED, "vkCreateSwapchainKHR(): Metal does not support VkColorSpaceKHR value %d.", pCreateInfo->imageColorSpace));
@ -421,22 +556,6 @@ void MVKSwapchain::initCAMetalLayer(const VkSwapchainCreateInfoKHR* pCreateInfo,
 	// TODO: set additional CAMetalLayer properties before extracting drawables:
 	//	- presentsWithTransaction
 	//	- drawsAsynchronously
-
-	if ( [_mtlLayer.delegate isKindOfClass: [PLATFORM_VIEW_CLASS class]] ) {
-		// Sometimes, the owning view can replace its CAMetalLayer. In that case, the client
-		// needs to recreate the swapchain, or no content will be displayed.
-		_layerObserver = [MVKBlockObserver observerWithBlock: ^(NSString* path, id, NSDictionary*, void*) {
-			if ( ![path isEqualToString: @"layer"] ) { return; }
-			this->releaseLayer();
-		} forObject: _mtlLayer.delegate atKeyPath: @"layer"];
-	}
-}
-
-void MVKSwapchain::releaseLayer() {
-	std::lock_guard<std::mutex> lock(_layerLock);
-	_surfaceLost = true;
-	[_layerObserver release];
-	_layerObserver = nil;
 }

 // Initializes the array of images used for the surface of this swapchain.
@ -460,13 +579,12 @@ void MVKSwapchain::initSurfaceImages(const VkSwapchainCreateInfoKHR* pCreateInfo
 	}

    VkExtent2D imgExtent = pCreateInfo->imageExtent;
-
    VkImageCreateInfo imgInfo = {
        .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
        .pNext = VK_NULL_HANDLE,
        .imageType = VK_IMAGE_TYPE_2D,
-        .format = getPixelFormats()->getVkFormat(_mtlLayer.pixelFormat),
-        .extent = { imgExtent.width, imgExtent.height, 1 },
+        .format = pCreateInfo->imageFormat,
+        .extent = mvkVkExtent3DFromVkExtent2D(imgExtent),
        .mipLevels = 1,
        .arrayLayers = 1,
        .samples = VK_SAMPLE_COUNT_1_BIT,
@ -492,133 +610,34 @@ void MVKSwapchain::initSurfaceImages(const VkSwapchainCreateInfoKHR* pCreateInfo
 		_presentableImages.push_back(_device->createPresentableSwapchainImage(&imgInfo, this, imgIdx, nullptr));
 	}

-	NSString* screenName = @"Main Screen";
+	auto* mtlLayer = getCAMetalLayer();
+	if (mtlLayer) {
+		NSString* screenName = @"Main Screen";
 #if MVK_MACOS && !MVK_MACCAT
-	if ([_mtlLayer.screenMVK respondsToSelector:@selector(localizedName)]) {
-		screenName = _mtlLayer.screenMVK.localizedName;
-	}
-#endif
-    MVKLogInfo("Created %d swapchain images with initial size (%d, %d) and contents scale %.1f for screen %s.",
-			   imgCnt, imgExtent.width, imgExtent.height, _mtlLayer.contentsScale, screenName.UTF8String);
-}
-
-VkResult MVKSwapchain::getRefreshCycleDuration(VkRefreshCycleDurationGOOGLE *pRefreshCycleDuration) {
-	if (_device->getConfigurationResult() != VK_SUCCESS) { return _device->getConfigurationResult(); }
-
-#if MVK_VISIONOS
-    // TODO: See if this can be obtained from OS instead
-    NSInteger framesPerSecond = 90;
-#elif MVK_IOS_OR_TVOS || MVK_MACCAT
-	NSInteger framesPerSecond = 60;
-	UIScreen* screen = _mtlLayer.screenMVK;
-	if ([screen respondsToSelector: @selector(maximumFramesPerSecond)]) {
-		framesPerSecond = screen.maximumFramesPerSecond;
-	}
-#elif MVK_MACOS && !MVK_MACCAT
-	NSScreen* screen = _mtlLayer.screenMVK;
-	CGDirectDisplayID displayId = [[[screen deviceDescription] objectForKey:@"NSScreenNumber"] unsignedIntValue];
-	CGDisplayModeRef mode = CGDisplayCopyDisplayMode(displayId);
-	double framesPerSecond = CGDisplayModeGetRefreshRate(mode);
-	CGDisplayModeRelease(mode);
-#if MVK_XCODE_13
-	if (framesPerSecond == 0 && [screen respondsToSelector: @selector(maximumFramesPerSecond)])
-     	framesPerSecond = [screen maximumFramesPerSecond];
-#endif
-
-	// Builtin panels, e.g., on MacBook, report a zero refresh rate.
-	if (framesPerSecond == 0)
-		framesPerSecond = 60.0;
-#endif
-
-	pRefreshCycleDuration->refreshDuration = (uint64_t)1e9 / framesPerSecond;
-	return VK_SUCCESS;
-}
-
-VkResult MVKSwapchain::getPastPresentationTiming(uint32_t *pCount, VkPastPresentationTimingGOOGLE *pPresentationTimings) {
-	if (_device->getConfigurationResult() != VK_SUCCESS) { return _device->getConfigurationResult(); }
-
-	VkResult res = VK_SUCCESS;
-
-	std::lock_guard<std::mutex> lock(_presentHistoryLock);
-	if (pPresentationTimings == nullptr) {
-		*pCount = _presentHistoryCount;
-	} else {
-		uint32_t countRemaining = std::min(_presentHistoryCount, *pCount);
-		uint32_t outIndex = 0;
-
-		res = (*pCount >= _presentHistoryCount) ? VK_SUCCESS : VK_INCOMPLETE;
-		*pCount = countRemaining;
-
-		while (countRemaining > 0) {
-			pPresentationTimings[outIndex] = _presentTimingHistory[_presentHistoryHeadIndex];
-			countRemaining--;
-			_presentHistoryCount--;
-			_presentHistoryHeadIndex = (_presentHistoryHeadIndex + 1) % kMaxPresentationHistory;
-			outIndex++;
+		auto* screen = mtlLayer.screenMVK;
+		if ([screen respondsToSelector:@selector(localizedName)]) {
+			screenName = screen.localizedName;
 		}
-	}
-
-	return res;
-}
-
-void MVKSwapchain::recordPresentTime(const MVKImagePresentInfo& presentInfo, uint64_t actualPresentTime) {
-	std::lock_guard<std::mutex> lock(_presentHistoryLock);
-	if (_presentHistoryCount < kMaxPresentationHistory) {
-		_presentHistoryCount++;
-	} else {
-		_presentHistoryHeadIndex = (_presentHistoryHeadIndex + 1) % kMaxPresentationHistory;
-	}
-
-	// If actual present time is not available, use desired time instead, and if that
-	// hasn't been set, use the current time, which should be reasonably accurate (sub-ms),
-	// since we are here as part of the addPresentedHandler: callback.
-	if (actualPresentTime == 0) { actualPresentTime = presentInfo.desiredPresentTime; }
-	if (actualPresentTime == 0) { actualPresentTime = CACurrentMediaTime() * 1.0e9; }
-
-	_presentTimingHistory[_presentHistoryIndex].presentID = presentInfo.presentID;
-	_presentTimingHistory[_presentHistoryIndex].desiredPresentTime = presentInfo.desiredPresentTime;
-	_presentTimingHistory[_presentHistoryIndex].actualPresentTime = actualPresentTime;
-	// These details are not available in Metal
-	_presentTimingHistory[_presentHistoryIndex].earliestPresentTime = actualPresentTime;
-	_presentTimingHistory[_presentHistoryIndex].presentMargin = 0;
-	_presentHistoryIndex = (_presentHistoryIndex + 1) % kMaxPresentationHistory;
-}
-
-void MVKSwapchain::setLayerNeedsDisplay(const VkPresentRegionKHR* pRegion) {
-	if (!pRegion || pRegion->rectangleCount == 0) {
-		[_mtlLayer setNeedsDisplay];
-		return;
-	}
-
-	for (uint32_t i = 0; i < pRegion->rectangleCount; ++i) {
-		CGRect cgRect = mvkCGRectFromVkRectLayerKHR(pRegion->pRectangles[i]);
-#if MVK_MACOS
-		// VK_KHR_incremental_present specifies an upper-left origin, but macOS by default
-		// uses a lower-left origin.
-		cgRect.origin.y = _mtlLayer.bounds.size.height - cgRect.origin.y;
 #endif
-		// We were given rectangles in pixels, but -[CALayer setNeedsDisplayInRect:] wants them
-		// in points, which is pixels / contentsScale.
-		CGFloat scaleFactor = _mtlLayer.contentsScale;
-		cgRect.origin.x /= scaleFactor;
-		cgRect.origin.y /= scaleFactor;
-		cgRect.size.width /= scaleFactor;
-		cgRect.size.height /= scaleFactor;
-		[_mtlLayer setNeedsDisplayInRect:cgRect];
+		MVKLogInfo("Created %d swapchain images with size (%d, %d) and contents scale %.1f in layer %s (%p) on screen %s.",
+				   imgCnt, imgExtent.width, imgExtent.height, mtlLayer.contentsScale, mtlLayer.name.UTF8String, mtlLayer, screenName.UTF8String);
+	} else {
+		MVKLogInfo("Created %d swapchain images with size (%d, %d) on headless surface.", imgCnt, imgExtent.width, imgExtent.height);
 	}
 }

-// A retention loop exists between the swapchain and its images. The swapchain images
-// retain the swapchain because they can be in flight when the app destroys the swapchain.
-// Release the images now, when the app destroys the swapchain, so they will be destroyed when
-// no longer held by the presentation flow, and will in turn release the swapchain for destruction.
 void MVKSwapchain::destroy() {
+	// If this swapchain was not replaced by a new swapchain, remove this swapchain
+	// from the surface, and force any outstanding presentations to complete.
+	if (_surface->_activeSwapchain == this) {
+		_surface->_activeSwapchain = nullptr;
+		forceUnpresentedImageCompletion();
+	}
 	for (auto& img : _presentableImages) { _device->destroyPresentableSwapchainImage(img, NULL); }
 	MVKVulkanAPIDeviceObject::destroy();
 }

 MVKSwapchain::~MVKSwapchain() {
    if (_licenseWatermark) { _licenseWatermark->destroy(); }
-	releaseLayer();
 }

--- a/MoltenVK/MoltenVK/GPUObjects/MVKSync.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKSync.h
@ -63,6 +63,9 @@ public:
 	/** Returns whether this instance is in a reserved state. */
 	bool isReserved();

+	/** Returns the number of outstanding reservations. */
+	uint32_t getReservationCount();
+
 	/**
 	 * Blocks processing on the current thread until any or all (depending on configuration) outstanding
     * reservations have been released, or until the specified timeout interval in nanoseconds expires.
@ -89,20 +92,19 @@ public:
 	 * require a separate call to the release() function to cause the semaphore to stop blocking.
 	 */
    MVKSemaphoreImpl(bool waitAll = true, uint32_t reservationCount = 0)
-        : _shouldWaitAll(waitAll), _reservationCount(reservationCount) {}
+        : _reservationCount(reservationCount), _shouldWaitAll(waitAll) {}

-    /** Destructor. */
    ~MVKSemaphoreImpl();


 private:
 	bool operator()();
-    inline bool isClear() { return _reservationCount == 0; }    // Not thread-safe
+    bool isClear() { return _reservationCount == 0; }    // Not thread-safe

 	std::mutex _lock;
 	std::condition_variable _blocker;
-	bool _shouldWaitAll;
 	uint32_t _reservationCount;
+	bool _shouldWaitAll;
 };


--- a/MoltenVK/MoltenVK/GPUObjects/MVKSync.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKSync.mm
@ -50,6 +50,11 @@ bool MVKSemaphoreImpl::isReserved() {
 	return !isClear();
 }

+uint32_t MVKSemaphoreImpl::getReservationCount() {
+	lock_guard<mutex> lock(_lock);
+	return _reservationCount;
+}
+
 bool MVKSemaphoreImpl::wait(uint64_t timeout, bool reserveAgain) {
    unique_lock<mutex> lock(_lock);

@ -123,7 +128,7 @@ uint64_t MVKSemaphoreMTLEvent::deferSignal() {
 }

 void MVKSemaphoreMTLEvent::encodeDeferredSignal(id<MTLCommandBuffer> mtlCmdBuff, uint64_t deferToken) {
-	if (mtlCmdBuff) { [mtlCmdBuff encodeSignalEvent: _mtlEvent value: deferToken]; }
+	[mtlCmdBuff encodeSignalEvent: _mtlEvent value: deferToken];
 }

 MVKSemaphoreMTLEvent::MVKSemaphoreMTLEvent(MVKDevice* device,
@ -583,7 +588,7 @@ void MVKMetalCompiler::compile(unique_lock<mutex>& lock, dispatch_block_t block)

 	if (_compileError) { handleError(); }

-	mvkDev->addActivityPerformance(*_pPerformanceTracker, _startTime);
+	mvkDev->addPerformanceInterval(*_pPerformanceTracker, _startTime);
 }

 void MVKMetalCompiler::handleError() {
--- a/MoltenVK/MoltenVK/Layers/MVKExtensions.def
+++ b/MoltenVK/MoltenVK/Layers/MVKExtensions.def
@ -91,6 +91,7 @@ MVK_EXTENSION(KHR_storage_buffer_storage_class,       KHR_STORAGE_BUFFER_STORAGE
 MVK_EXTENSION(KHR_surface,                            KHR_SURFACE,                            INSTANCE, 10.11,  8.0,  1.0)
 MVK_EXTENSION(KHR_swapchain,                          KHR_SWAPCHAIN,                          DEVICE,   10.11,  8.0,  1.0)
 MVK_EXTENSION(KHR_swapchain_mutable_format,           KHR_SWAPCHAIN_MUTABLE_FORMAT,           DEVICE,   10.11,  8.0,  1.0)
+MVK_EXTENSION(KHR_synchronization2,                   KHR_SYNCHRONIZATION_2,                  DEVICE,   10.11,  8.0,  1.0)
 MVK_EXTENSION(KHR_timeline_semaphore,                 KHR_TIMELINE_SEMAPHORE,                 DEVICE,   10.11,  8.0,  1.0)
 MVK_EXTENSION(KHR_uniform_buffer_standard_layout,     KHR_UNIFORM_BUFFER_STANDARD_LAYOUT,     DEVICE,   10.11,  8.0,  1.0)
 MVK_EXTENSION(KHR_variable_pointers,                  KHR_VARIABLE_POINTERS,                  DEVICE,   10.11,  8.0,  1.0)
@ -102,9 +103,13 @@ MVK_EXTENSION(EXT_debug_marker,                       EXT_DEBUG_MARKER,
 MVK_EXTENSION(EXT_debug_report,                       EXT_DEBUG_REPORT,                       INSTANCE, 10.11,  8.0,  1.0)
 MVK_EXTENSION(EXT_debug_utils,                        EXT_DEBUG_UTILS,                        INSTANCE, 10.11,  8.0,  1.0)
 MVK_EXTENSION(EXT_descriptor_indexing,                EXT_DESCRIPTOR_INDEXING,                DEVICE,   10.11,  8.0,  1.0)
+MVK_EXTENSION(EXT_extended_dynamic_state,             EXT_EXTENDED_DYNAMIC_STATE,             DEVICE,   10.11,  8.0,  1.0)
+MVK_EXTENSION(EXT_extended_dynamic_state2,            EXT_EXTENDED_DYNAMIC_STATE_2,           DEVICE,   10.11,  8.0,  1.0)
+MVK_EXTENSION(EXT_extended_dynamic_state3,            EXT_EXTENDED_DYNAMIC_STATE_3,           DEVICE,   10.11,  8.0,  1.0)
 MVK_EXTENSION(EXT_external_memory_host,               EXT_EXTERNAL_MEMORY_HOST,               DEVICE,   10.11,  8.0,  1.0)
 MVK_EXTENSION(EXT_fragment_shader_interlock,          EXT_FRAGMENT_SHADER_INTERLOCK,          DEVICE,   10.13, 11.0,  1.0)
 MVK_EXTENSION(EXT_hdr_metadata,                       EXT_HDR_METADATA,                       DEVICE,   10.15, MVK_NA, MVK_NA)
+MVK_EXTENSION(EXT_headless_surface,                   EXT_HEADLESS_SURFACE,                   INSTANCE, 10.11,  8.0,  1.0)
 MVK_EXTENSION(EXT_host_query_reset,                   EXT_HOST_QUERY_RESET,                   DEVICE,   10.11,  8.0,  1.0)
 MVK_EXTENSION(EXT_image_robustness,                   EXT_IMAGE_ROBUSTNESS,                   DEVICE,   10.11,  8.0,  1.0)
 MVK_EXTENSION(EXT_inline_uniform_block,               EXT_INLINE_UNIFORM_BLOCK,               DEVICE,   10.11,  8.0,  1.0)
--- a/MoltenVK/MoltenVK/OS/CAMetalLayer+MoltenVK.h
+++ b/MoltenVK/MoltenVK/OS/CAMetalLayer+MoltenVK.h
@ -23,12 +23,10 @@
 #import <QuartzCore/QuartzCore.h>

 #if MVK_IOS_OR_TVOS || MVK_MACCAT
-#	define PLATFORM_SCREEN_CLASS	UIScreen
 #	include <UIKit/UIScreen.h>
 #endif

 #if MVK_MACOS && !MVK_MACCAT
-#	define PLATFORM_SCREEN_CLASS	NSScreen
 #	include <AppKit/NSScreen.h>
 #endif

@ -76,9 +74,16 @@
 */
@property(nonatomic, readwrite) CFStringRef colorspaceNameMVK;

-#if !MVK_VISIONOS
+#if MVK_IOS_OR_TVOS || MVK_MACCAT
 /** Returns the screen on which this layer is rendering. */
-@property(nonatomic, readonly) PLATFORM_SCREEN_CLASS* screenMVK;
+@property(nonatomic, readonly) UIScreen* screenMVK;
+#endif
+
+#if MVK_MACOS && !MVK_MACCAT
+/** Returns the screen on which this layer is rendering. */
+@property(nonatomic, readonly) NSScreen* screenMVK;
+
+@property(nonatomic, readonly) NSScreen* privateScreenMVKImpl;
 #endif

@end
--- a/MoltenVK/MoltenVK/OS/CAMetalLayer+MoltenVK.mm
+++ b/MoltenVK/MoltenVK/OS/CAMetalLayer+MoltenVK.mm
@ -18,6 +18,7 @@


 #include "CAMetalLayer+MoltenVK.h"
+#include "MVKOSExtensions.h"

 #if MVK_MACOS && !MVK_MACCAT
 #	include <AppKit/NSApplication.h>
@ -88,6 +89,13 @@

 #if MVK_MACOS && !MVK_MACCAT
 -(NSScreen*) screenMVK {
+	__block NSScreen* screen;
+	mvkDispatchToMainAndWait(^{ screen = self.privateScreenMVKImpl; });
+	return screen;
+}
+
+// Search for the screen currently displaying the layer, and default to the main screen if it can't be found.
+-(NSScreen*) privateScreenMVKImpl {
 	// If this layer has a delegate that is an NSView, and the view is in a window, retrieve the screen from the window.
 	if ([self.delegate isKindOfClass: NSView.class]) {
 		NSWindow* window = ((NSView*)self.delegate).window;
--- a/MoltenVK/MoltenVK/Utility/MVKBaseObject.h
+++ b/MoltenVK/MoltenVK/Utility/MVKBaseObject.h
@ -57,7 +57,7 @@ public:
 	void reportMessage(MVKConfigLogLevel logLevel, const char* format, ...) __printflike(3, 4);

 	/**
-	 * Report a Vulkan error message, on behalf of the object, which may be nil.
+	 * Report a message, on behalf of the object, which may be nil.
 	 * Reporting includes logging to a standard system logging stream, and if the object
 	 * is not nil and has access to the VkInstance, the message will also be forwarded
 	 * to the VkInstance for output to the Vulkan debug report messaging API.
@ -65,14 +65,19 @@ public:
 	static void reportMessage(MVKBaseObject* mvkObj, MVKConfigLogLevel logLevel, const char* format, ...) __printflike(3, 4);

 	/**
-	 * Report a Vulkan error message, on behalf of the object, which may be nil.
+	 * Report a Vulkan result message. This includes logging to a standard system logging stream,
+	 * and some subclasses will also forward the message to their VkInstance for output to the
+	 * Vulkan debug report messaging API.
+	 */
+	VkResult reportResult(VkResult vkRslt, MVKConfigLogLevel logLevel, const char* format, ...) __printflike(4, 5);
+
+	/**
+	 * Report a Vulkan result message, on behalf of the object. which may be nil.
 	 * Reporting includes logging to a standard system logging stream, and if the object
 	 * is not nil and has access to the VkInstance, the message will also be forwarded
 	 * to the VkInstance for output to the Vulkan debug report messaging API.
-	 *
-	 * This is the core reporting implementation. Other similar functions delegate here.
 	 */
-	static void reportMessage(MVKBaseObject* mvkObj, MVKConfigLogLevel logLevel, const char* format, va_list args) __printflike(3, 0);
+	static VkResult reportResult(MVKBaseObject* mvkObj, VkResult vkRslt, MVKConfigLogLevel logLevel, const char* format, ...) __printflike(4, 5);

 	/**
 	 * Report a Vulkan error message. This includes logging to a standard system logging stream,
@ -90,19 +95,29 @@ public:
 	static VkResult reportError(MVKBaseObject* mvkObj, VkResult vkErr, const char* format, ...) __printflike(3, 4);

 	/**
-	 * Report a Vulkan error message, on behalf of the object. which may be nil.
+	 * Report a Vulkan warning message. This includes logging to a standard system logging stream,
+	 * and some subclasses will also forward the message to their VkInstance for output to the
+	 * Vulkan debug report messaging API.
+	 */
+	VkResult reportWarning(VkResult vkRslt, const char* format, ...) __printflike(3, 4);
+
+	/**
+	 * Report a Vulkan warning message, on behalf of the object. which may be nil.
 	 * Reporting includes logging to a standard system logging stream, and if the object
 	 * is not nil and has access to the VkInstance, the message will also be forwarded
 	 * to the VkInstance for output to the Vulkan debug report messaging API.
-	 *
-	 * This is the core reporting implementation. Other similar functions delegate here.
 	 */
-	static VkResult reportError(MVKBaseObject* mvkObj, VkResult vkErr, const char* format, va_list args) __printflike(3, 0);
+	static VkResult reportWarning(MVKBaseObject* mvkObj, VkResult vkRslt, const char* format, ...) __printflike(3, 4);

 	/** Destroys this object. Default behaviour simply deletes it. Subclasses may override to delay deletion. */
 	virtual void destroy() { delete this; }

-    virtual ~MVKBaseObject() {}
+	virtual ~MVKBaseObject() {}
+
+protected:
+	static VkResult reportResult(MVKBaseObject* mvkObj, VkResult vkRslt, MVKConfigLogLevel logLevel, const char* format, va_list args) __printflike(4, 0);
+	static void reportMessage(MVKBaseObject* mvkObj, MVKConfigLogLevel logLevel, const char* format, va_list args) __printflike(3, 0);
+
 };


@ -135,7 +150,7 @@ public:
 	 * Called when this instance has been retained as a reference by another object,
 	 * indicating that this instance will not be deleted until that reference is released.
 	 */
-	void retain() { _refCount++; }
+	void retain() { _refCount.fetch_add(1, std::memory_order_relaxed); }

 	/**
 	 * Called when this instance has been released as a reference from another object.
@ -146,7 +161,7 @@ public:
 	 * Note that the destroy() function is called on the BaseClass.
 	 * Releasing will not call any overridden destroy() function in a descendant class.
 	 */
-	void release() { if (--_refCount == 0) { BaseClass::destroy(); } }
+	void release() { if (_refCount.fetch_sub(1, std::memory_order_acq_rel) == 1) { BaseClass::destroy(); } }

 	/**
 	 * Marks this instance as destroyed. If all previous references to this instance
@ -158,15 +173,10 @@ public:
 	MVKReferenceCountingMixin() : _refCount(1) {}

 	/** Copy starts with fresh reference counts. */
-	MVKReferenceCountingMixin(const MVKReferenceCountingMixin& other) {
-		_refCount = 1;
-	}
+	MVKReferenceCountingMixin(const MVKReferenceCountingMixin& other) : _refCount(1) {}

-	/** Copy starts with fresh reference counts. */
-	MVKReferenceCountingMixin& operator=(const MVKReferenceCountingMixin& other) {
-		_refCount = 1;
-		return *this;
-	}
+	/** Don't overwrite refcounted objects. */
+	MVKReferenceCountingMixin& operator=(const MVKReferenceCountingMixin& other) = delete;

 protected:
 	std::atomic<uint32_t> _refCount;
@ -202,3 +212,15 @@ public:
 protected:
 	VkResult _configurationResult = VK_SUCCESS;
 };
+
+
+#pragma mark -
+#pragma mark Support functions
+
+/**
+ * If the object is not a nullptr, returns the MoltenVK configuration info for the
+ * VkInstance that created the object, otherwise returns the global configuration info.
+ */
+static inline const MVKConfiguration& mvkGetMVKConfig(MVKBaseObject* mvkObj) {
+	return mvkObj ? mvkObj->getMVKConfig() : mvkConfig();
+}
--- a/MoltenVK/MoltenVK/Utility/MVKBaseObject.mm
+++ b/MoltenVK/MoltenVK/Utility/MVKBaseObject.mm
@ -27,24 +27,19 @@
 using namespace std;


-static const char* getReportingLevelString(MVKConfigLogLevel logLevel) {
-	switch (logLevel) {
-		case MVK_CONFIG_LOG_LEVEL_DEBUG:
-			return "mvk-debug";
-		case MVK_CONFIG_LOG_LEVEL_INFO:
-			return "mvk-info";
-		case MVK_CONFIG_LOG_LEVEL_WARNING:
-			return "mvk-warn";
-		case MVK_CONFIG_LOG_LEVEL_ERROR:
-		default:
-			return "mvk-error";
-	}
-}
-
-
 #pragma mark -
 #pragma mark MVKBaseObject

+static const char* getReportingLevelString(MVKConfigLogLevel logLevel) {
+	switch (logLevel) {
+		case MVK_CONFIG_LOG_LEVEL_ERROR:    return "mvk-error";
+		case MVK_CONFIG_LOG_LEVEL_WARNING:  return "mvk-warn";
+		case MVK_CONFIG_LOG_LEVEL_INFO:     return "mvk-info";
+		case MVK_CONFIG_LOG_LEVEL_DEBUG:    return "mvk-debug";
+		default:                            return "mvk-unknown";
+	}
+}
+
 string MVKBaseObject::getClassName() { return mvk::getTypeName(this); }

 const MVKConfiguration& MVKBaseObject::getMVKConfig() {
@ -109,10 +104,43 @@ void MVKBaseObject::reportMessage(MVKBaseObject* mvkObj, MVKConfigLogLevel logLe
 	free(redoBuff);
 }

+VkResult MVKBaseObject::reportResult(VkResult vkErr, MVKConfigLogLevel logLevel, const char* format, ...) {
+	va_list args;
+	va_start(args, format);
+	VkResult rslt = reportResult(this, vkErr, logLevel, format, args);
+	va_end(args);
+	return rslt;
+}
+
+VkResult MVKBaseObject::reportResult(MVKBaseObject* mvkObj, VkResult vkErr, MVKConfigLogLevel logLevel, const char* format, ...) {
+	va_list args;
+	va_start(args, format);
+	VkResult rslt = reportResult(mvkObj, vkErr, logLevel, format, args);
+	va_end(args);
+	return rslt;
+}
+
+VkResult MVKBaseObject::reportResult(MVKBaseObject* mvkObj, VkResult vkRslt, MVKConfigLogLevel logLevel, const char* format, va_list args) {
+
+	// Prepend the result code to the format string
+	const char* vkRsltName = mvkVkResultName(vkRslt);
+	size_t rsltLen = strlen(vkRsltName) + strlen(format) + 4;
+	char fmtStr[rsltLen];
+	snprintf(fmtStr, rsltLen, "%s: %s", vkRsltName, format);
+
+	// Report the message
+	va_list lclArgs;
+	va_copy(lclArgs, args);
+	reportMessage(mvkObj, logLevel, fmtStr, lclArgs);
+	va_end(lclArgs);
+
+	return vkRslt;
+}
+
 VkResult MVKBaseObject::reportError(VkResult vkErr, const char* format, ...) {
 	va_list args;
 	va_start(args, format);
-	VkResult rslt = reportError(this, vkErr, format, args);
+	VkResult rslt = reportResult(this, vkErr, MVK_CONFIG_LOG_LEVEL_ERROR, format, args);
 	va_end(args);
 	return rslt;
 }
@ -120,25 +148,23 @@ VkResult MVKBaseObject::reportError(VkResult vkErr, const char* format, ...) {
 VkResult MVKBaseObject::reportError(MVKBaseObject* mvkObj, VkResult vkErr, const char* format, ...) {
 	va_list args;
 	va_start(args, format);
-	VkResult rslt = reportError(mvkObj, vkErr, format, args);
+	VkResult rslt = reportResult(mvkObj, vkErr, MVK_CONFIG_LOG_LEVEL_ERROR, format, args);
 	va_end(args);
 	return rslt;
 }

-// This is the core reporting implementation. Other similar functions delegate here.
-VkResult MVKBaseObject::reportError(MVKBaseObject* mvkObj, VkResult vkErr, const char* format, va_list args) {
-
-	// Prepend the error code to the format string
-	const char* vkRsltName = mvkVkResultName(vkErr);
-	size_t rsltLen = strlen(vkRsltName) + strlen(format) + 4;
-	char fmtStr[rsltLen];
-	snprintf(fmtStr, rsltLen, "%s: %s", vkRsltName, format);
-
-	// Report the error
-	va_list lclArgs;
-	va_copy(lclArgs, args);
-	reportMessage(mvkObj, MVK_CONFIG_LOG_LEVEL_ERROR, fmtStr, lclArgs);
-	va_end(lclArgs);
-
-	return vkErr;
+VkResult MVKBaseObject::reportWarning(VkResult vkErr, const char* format, ...) {
+	va_list args;
+	va_start(args, format);
+	VkResult rslt = reportResult(this, vkErr, MVK_CONFIG_LOG_LEVEL_WARNING, format, args);
+	va_end(args);
+	return rslt;
+}
+
+VkResult MVKBaseObject::reportWarning(MVKBaseObject* mvkObj, VkResult vkErr, const char* format, ...) {
+	va_list args;
+	va_start(args, format);
+	VkResult rslt = reportResult(mvkObj, vkErr, MVK_CONFIG_LOG_LEVEL_WARNING, format, args);
+	va_end(args);
+	return rslt;
 }
--- a/MoltenVK/MoltenVK/Utility/MVKBitArray.h
+++ b/MoltenVK/MoltenVK/Utility/MVKBitArray.h
@ -90,7 +90,11 @@ public:
 	 * and optionally clears that bit. If no bits are set, returns the size() of this bit array.
 	 */
 	size_t getIndexOfFirstSetBit(size_t startIndex, bool shouldClear) {
-		size_t startSecIdx = std::max(getIndexOfSection(startIndex), _clearedSectionCount);
+		size_t startSecIdx = getIndexOfSection(startIndex);
+		if (startSecIdx < _clearedSectionCount) {
+			startSecIdx = _clearedSectionCount;
+			startIndex = 0;
+		}
 		size_t bitIdx = startSecIdx << SectionMaskSize;
 		size_t secCnt = getSectionCount();
 		for (size_t secIdx = startSecIdx; secIdx < secCnt; secIdx++) {
@ -101,6 +105,7 @@ public:
 				if (shouldClear) { clearBit(bitIdx); }
 				return std::min(bitIdx, _bitCount);
 			}
+			startIndex = 0;
 		}
 		return std::min(bitIdx, _bitCount);
 	}
--- a/MoltenVK/MoltenVK/Utility/MVKConfigMembers.def
+++ b/MoltenVK/MoltenVK/Utility/MVKConfigMembers.def
@ -0,0 +1,86 @@
+/*
+ * MVKConfigMembers.def
+ *
+ * Copyright (c) 2015-2023 The Brenwill Workshop Ltd. (http://www.brenwill.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License", Int64)
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// The items in the list below describe the members of the MVKConfiguration struct.
+// When a new member is added to the MVKConfiguration struct, a corresponding description
+// must be added here.
+//
+// To use this file, define the macro:
+//
+//   MVK_CONFIG_MEMBER(member, mbrType, name)
+//
+// and if strings are handled differently:
+//
+//   MVK_CONFIG_MEMBER_STRING(member, mbrType,  name)
+//
+// then #include this file inline with your code.
+//
+// The name prameter is the name of the configuration parameter, which is used as the name
+// of the environment variable, and build setting, that sets the config value, and is entered
+// here without the "MVK_CONFIG_" prefix.
+
+
+#ifndef MVK_CONFIG_MEMBER
+#error MVK_CONFIG_MEMBER must be defined before including this file
+#endif
+
+#ifndef MVK_CONFIG_MEMBER_STRING
+#	define MVK_CONFIG_MEMBER_STRING(member, mbrType, name)  MVK_CONFIG_MEMBER(member, mbrType, name)
+#endif
+
+MVK_CONFIG_MEMBER(debugMode,                              VkBool32,                                 DEBUG)
+MVK_CONFIG_MEMBER(shaderConversionFlipVertexY,            VkBool32,                                 SHADER_CONVERSION_FLIP_VERTEX_Y)
+MVK_CONFIG_MEMBER(synchronousQueueSubmits,                VkBool32,                                 SYNCHRONOUS_QUEUE_SUBMITS)
+MVK_CONFIG_MEMBER(prefillMetalCommandBuffers,             MVKPrefillMetalCommandBuffersStyle,       PREFILL_METAL_COMMAND_BUFFERS)
+MVK_CONFIG_MEMBER(maxActiveMetalCommandBuffersPerQueue,   uint32_t,                                 MAX_ACTIVE_METAL_COMMAND_BUFFERS_PER_QUEUE)
+MVK_CONFIG_MEMBER(supportLargeQueryPools,                 VkBool32,                                 SUPPORT_LARGE_QUERY_POOLS)
+MVK_CONFIG_MEMBER(presentWithCommandBuffer,               VkBool32,                                 PRESENT_WITH_COMMAND_BUFFER)
+MVK_CONFIG_MEMBER(swapchainMinMagFilterUseNearest,        VkBool32,                                 SWAPCHAIN_MAG_FILTER_USE_NEAREST)	// Deprecated legacy renaming
+MVK_CONFIG_MEMBER(swapchainMinMagFilterUseNearest,        VkBool32,                                 SWAPCHAIN_MIN_MAG_FILTER_USE_NEAREST)
+MVK_CONFIG_MEMBER(metalCompileTimeout,                    uint64_t,                                 METAL_COMPILE_TIMEOUT)
+MVK_CONFIG_MEMBER(performanceTracking,                    VkBool32,                                 PERFORMANCE_TRACKING)
+MVK_CONFIG_MEMBER(performanceLoggingFrameCount,           uint32_t,                                 PERFORMANCE_LOGGING_FRAME_COUNT)
+MVK_CONFIG_MEMBER(activityPerformanceLoggingStyle,        MVKConfigActivityPerformanceLoggingStyle, ACTIVITY_PERFORMANCE_LOGGING_STYLE)
+MVK_CONFIG_MEMBER(displayWatermark,                       VkBool32,                                 DISPLAY_WATERMARK)
+MVK_CONFIG_MEMBER(specializedQueueFamilies,               VkBool32,                                 SPECIALIZED_QUEUE_FAMILIES)
+MVK_CONFIG_MEMBER(switchSystemGPU,                        VkBool32,                                 SWITCH_SYSTEM_GPU)
+MVK_CONFIG_MEMBER(fullImageViewSwizzle,                   VkBool32,                                 FULL_IMAGE_VIEW_SWIZZLE)
+MVK_CONFIG_MEMBER(defaultGPUCaptureScopeQueueFamilyIndex, VkBool32,                                 DEFAULT_GPU_CAPTURE_SCOPE_QUEUE_FAMILY_INDEX)
+MVK_CONFIG_MEMBER(defaultGPUCaptureScopeQueueIndex,       VkBool32,                                 DEFAULT_GPU_CAPTURE_SCOPE_QUEUE_INDEX)
+MVK_CONFIG_MEMBER(fastMathEnabled,                        MVKConfigFastMath,                        FAST_MATH_ENABLED)
+MVK_CONFIG_MEMBER(logLevel,                               MVKConfigLogLevel,                        LOG_LEVEL)
+MVK_CONFIG_MEMBER(traceVulkanCalls,                       MVKConfigTraceVulkanCalls,                TRACE_VULKAN_CALLS)
+MVK_CONFIG_MEMBER(forceLowPowerGPU,                       VkBool32,                                 FORCE_LOW_POWER_GPU)
+MVK_CONFIG_MEMBER(semaphoreUseMTLFence,                   VkBool32,                                 ALLOW_METAL_FENCES)  				// Deprecated legacy
+MVK_CONFIG_MEMBER(semaphoreSupportStyle,                  MVKVkSemaphoreSupportStyle,               VK_SEMAPHORE_SUPPORT_STYLE)
+MVK_CONFIG_MEMBER(autoGPUCaptureScope,                    MVKConfigAutoGPUCaptureScope,             AUTO_GPU_CAPTURE_SCOPE)
+MVK_CONFIG_MEMBER_STRING(autoGPUCaptureOutputFilepath,    char*,                                    AUTO_GPU_CAPTURE_OUTPUT_FILE)
+MVK_CONFIG_MEMBER(texture1DAs2D,                          VkBool32,                                 TEXTURE_1D_AS_2D)
+MVK_CONFIG_MEMBER(preallocateDescriptors,                 VkBool32,                                 PREALLOCATE_DESCRIPTORS)
+MVK_CONFIG_MEMBER(useCommandPooling,                      VkBool32,                                 USE_COMMAND_POOLING)
+MVK_CONFIG_MEMBER(useMTLHeap,                             VkBool32,                                 USE_MTLHEAP)
+MVK_CONFIG_MEMBER(apiVersionToAdvertise,                  uint32_t,                                 API_VERSION_TO_ADVERTISE)
+MVK_CONFIG_MEMBER(advertiseExtensions,                    uint32_t,                                 ADVERTISE_EXTENSIONS)
+MVK_CONFIG_MEMBER(resumeLostDevice,                       VkBool32,                                 RESUME_LOST_DEVICE)
+MVK_CONFIG_MEMBER(useMetalArgumentBuffers,                MVKUseMetalArgumentBuffers,               USE_METAL_ARGUMENT_BUFFERS)
+MVK_CONFIG_MEMBER(shaderSourceCompressionAlgorithm,       MVKConfigCompressionAlgorithm,            SHADER_COMPRESSION_ALGORITHM)
+MVK_CONFIG_MEMBER(shouldMaximizeConcurrentCompilation,    VkBool32,                                 SHOULD_MAXIMIZE_CONCURRENT_COMPILATION)
+MVK_CONFIG_MEMBER(timestampPeriodLowPassAlpha,            float,                                    TIMESTAMP_PERIOD_LOWPASS_ALPHA)
+
+#undef MVK_CONFIG_MEMBER
+#undef MVK_CONFIG_MEMBER_STRING
--- a/MoltenVK/MoltenVK/Utility/MVKEnvironment.cpp
+++ b/MoltenVK/MoltenVK/Utility/MVKEnvironment.cpp
@ -18,57 +18,42 @@

 #include "MVKEnvironment.h"
 #include "MVKOSExtensions.h"
+#include "MVKFoundation.h"


+// Return the expected size of MVKConfiguration, based on contents of MVKConfigMembers.def.
+static constexpr uint32_t getExpectedMVKConfigurationSize() {
+#define MVK_CONFIG_MEMBER(member, mbrType, name)         cfgSize += sizeof(mbrType);
+	uint32_t cfgSize = 0;
+#include "MVKConfigMembers.def"
+	return cfgSize;
+}
+
 static bool _mvkConfigInitialized = false;
 static void mvkInitConfigFromEnvVars() {
+	static_assert(getExpectedMVKConfigurationSize() == sizeof(MVKConfiguration), "MVKConfigMembers.def does not match the members of MVKConfiguration.");
+
 	_mvkConfigInitialized = true;

 	MVKConfiguration evCfg;
 	std::string evGPUCapFileStrObj;

-	MVK_SET_FROM_ENV_OR_BUILD_BOOL  (evCfg.debugMode,                              MVK_CONFIG_DEBUG);
-	MVK_SET_FROM_ENV_OR_BUILD_BOOL  (evCfg.shaderConversionFlipVertexY,            MVK_CONFIG_SHADER_CONVERSION_FLIP_VERTEX_Y);
-	MVK_SET_FROM_ENV_OR_BUILD_BOOL  (evCfg.synchronousQueueSubmits,                MVK_CONFIG_SYNCHRONOUS_QUEUE_SUBMITS);
-	MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.prefillMetalCommandBuffers,             MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS);
-	MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.maxActiveMetalCommandBuffersPerQueue,   MVK_CONFIG_MAX_ACTIVE_METAL_COMMAND_BUFFERS_PER_QUEUE);
-	MVK_SET_FROM_ENV_OR_BUILD_BOOL  (evCfg.supportLargeQueryPools,                 MVK_CONFIG_SUPPORT_LARGE_QUERY_POOLS);
-	MVK_SET_FROM_ENV_OR_BUILD_BOOL  (evCfg.presentWithCommandBuffer,               MVK_CONFIG_PRESENT_WITH_COMMAND_BUFFER);
-	MVK_SET_FROM_ENV_OR_BUILD_BOOL  (evCfg.swapchainMinMagFilterUseNearest,        MVK_CONFIG_SWAPCHAIN_MAG_FILTER_USE_NEAREST);	// Deprecated legacy env var
-	MVK_SET_FROM_ENV_OR_BUILD_BOOL  (evCfg.swapchainMinMagFilterUseNearest,        MVK_CONFIG_SWAPCHAIN_MIN_MAG_FILTER_USE_NEAREST);
-	MVK_SET_FROM_ENV_OR_BUILD_INT64 (evCfg.metalCompileTimeout,                    MVK_CONFIG_METAL_COMPILE_TIMEOUT);
-	MVK_SET_FROM_ENV_OR_BUILD_BOOL  (evCfg.performanceTracking,                    MVK_CONFIG_PERFORMANCE_TRACKING);
-	MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.performanceLoggingFrameCount,           MVK_CONFIG_PERFORMANCE_LOGGING_FRAME_COUNT);
-	MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.activityPerformanceLoggingStyle,        MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE);
-	MVK_SET_FROM_ENV_OR_BUILD_BOOL  (evCfg.displayWatermark,                       MVK_CONFIG_DISPLAY_WATERMARK);
-	MVK_SET_FROM_ENV_OR_BUILD_BOOL  (evCfg.specializedQueueFamilies,               MVK_CONFIG_SPECIALIZED_QUEUE_FAMILIES);
-	MVK_SET_FROM_ENV_OR_BUILD_BOOL  (evCfg.switchSystemGPU,                        MVK_CONFIG_SWITCH_SYSTEM_GPU);
-	MVK_SET_FROM_ENV_OR_BUILD_BOOL  (evCfg.fullImageViewSwizzle,                   MVK_CONFIG_FULL_IMAGE_VIEW_SWIZZLE);
-	MVK_SET_FROM_ENV_OR_BUILD_BOOL  (evCfg.defaultGPUCaptureScopeQueueFamilyIndex, MVK_CONFIG_DEFAULT_GPU_CAPTURE_SCOPE_QUEUE_FAMILY_INDEX);
-	MVK_SET_FROM_ENV_OR_BUILD_BOOL  (evCfg.defaultGPUCaptureScopeQueueIndex,       MVK_CONFIG_DEFAULT_GPU_CAPTURE_SCOPE_QUEUE_INDEX);
-	MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.fastMathEnabled,                        MVK_CONFIG_FAST_MATH_ENABLED);
-	MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.logLevel,                               MVK_CONFIG_LOG_LEVEL);
-	MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.traceVulkanCalls,                       MVK_CONFIG_TRACE_VULKAN_CALLS);
-	MVK_SET_FROM_ENV_OR_BUILD_BOOL  (evCfg.forceLowPowerGPU,                       MVK_CONFIG_FORCE_LOW_POWER_GPU);
-	MVK_SET_FROM_ENV_OR_BUILD_BOOL  (evCfg.semaphoreUseMTLFence,                   MVK_ALLOW_METAL_FENCES);
-	MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.semaphoreSupportStyle,                  MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE);
-	MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.autoGPUCaptureScope,                    MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE);
-	MVK_SET_FROM_ENV_OR_BUILD_STRING(evCfg.autoGPUCaptureOutputFilepath,           MVK_CONFIG_AUTO_GPU_CAPTURE_OUTPUT_FILE, evGPUCapFileStrObj);
-	MVK_SET_FROM_ENV_OR_BUILD_BOOL  (evCfg.texture1DAs2D,                          MVK_CONFIG_TEXTURE_1D_AS_2D);
-	MVK_SET_FROM_ENV_OR_BUILD_BOOL  (evCfg.preallocateDescriptors,                 MVK_CONFIG_PREALLOCATE_DESCRIPTORS);
-	MVK_SET_FROM_ENV_OR_BUILD_BOOL  (evCfg.useCommandPooling,                      MVK_CONFIG_USE_COMMAND_POOLING);
-	MVK_SET_FROM_ENV_OR_BUILD_BOOL  (evCfg.useMTLHeap,                             MVK_CONFIG_USE_MTLHEAP);
-	MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.apiVersionToAdvertise,                  MVK_CONFIG_API_VERSION_TO_ADVERTISE);
-	MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.advertiseExtensions,                    MVK_CONFIG_ADVERTISE_EXTENSIONS);
-	MVK_SET_FROM_ENV_OR_BUILD_BOOL  (evCfg.resumeLostDevice,                       MVK_CONFIG_RESUME_LOST_DEVICE);
-	MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.useMetalArgumentBuffers,                MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS);
-	MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.shaderSourceCompressionAlgorithm,       MVK_CONFIG_SHADER_COMPRESSION_ALGORITHM);
-	MVK_SET_FROM_ENV_OR_BUILD_BOOL  (evCfg.shouldMaximizeConcurrentCompilation,    MVK_CONFIG_SHOULD_MAXIMIZE_CONCURRENT_COMPILATION);
+#define STR(name) #name

-	// Support legacy environment variable MVK_DEBUG, but only if it has been explicitly set as an environment variable.
-	bool legacyDebugWasFound = false;
-	bool legacyDebugEV = mvkGetEnvVarBool("MVK_DEBUG", &legacyDebugWasFound);
-	if (legacyDebugWasFound) { evCfg.debugMode = legacyDebugEV; }
+#define MVK_CONFIG_MEMBER(member, mbrType, name) \
+	evCfg.member = (mbrType)mvkGetEnvVarNumber(STR(MVK_CONFIG_##name), MVK_CONFIG_##name);
+
+#define MVK_CONFIG_MEMBER_STRING(member, mbrType, name) \
+	evCfg.member = mvkGetEnvVarString(STR(MVK_CONFIG_##name), evGPUCapFileStrObj, MVK_CONFIG_##name);
+
+#include "MVKConfigMembers.def"
+
+	// At this point, debugMode has been set by env var MVK_CONFIG_DEBUG.
+	// MVK_CONFIG_DEBUG replaced the deprecataed MVK_DEBUG env var, so for 
+	// legacy use, if the MVK_DEBUG env var is explicitly set, override debugMode.
+	double noEV = -3.1415;		// An unlikely env var value.
+	double cvMVKDebug = mvkGetEnvVarNumber("MVK_DEBUG", noEV);
+	if (cvMVKDebug != noEV) { evCfg.debugMode = cvMVKDebug; }

 	// Deprected legacy VkSemaphore MVK_ALLOW_METAL_FENCES and MVK_ALLOW_METAL_EVENTS config.
 	// Legacy MVK_ALLOW_METAL_EVENTS is covered by MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE,
@ -76,9 +61,7 @@ static void mvkInitConfigFromEnvVars() {
 	// disabled, disable semaphoreUseMTLEvent (aliased as semaphoreSupportStyle value
 	// MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_SINGLE_QUEUE), and let mvkSetConfig()
 	// further process legacy behavior of MVK_ALLOW_METAL_FENCES.
-	bool sem4UseMTLEvent;
-	MVK_SET_FROM_ENV_OR_BUILD_BOOL(sem4UseMTLEvent, MVK_ALLOW_METAL_EVENTS);
-	if ( !sem4UseMTLEvent ) {
+	if ( !mvkGetEnvVarNumber("MVK_CONFIG_ALLOW_METAL_EVENTS", 1.0) ) {
 		evCfg.semaphoreUseMTLEvent = (MVKVkSemaphoreSupportStyle)false;		// Disabled. Also semaphoreSupportStyle MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_SINGLE_QUEUE.
 	}

@ -86,18 +69,16 @@ static void mvkInitConfigFromEnvVars() {
 	// MVK_CONFIG_PERFORMANCE_LOGGING_INLINE env var was used, and activityPerformanceLoggingStyle
 	// was not already set by MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE, set
 	// activityPerformanceLoggingStyle to MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_IMMEDIATE.
-	bool logPerfInline;
-	MVK_SET_FROM_ENV_OR_BUILD_BOOL(logPerfInline, MVK_CONFIG_PERFORMANCE_LOGGING_INLINE);
+	bool logPerfInline = mvkGetEnvVarNumber("MVK_CONFIG_PERFORMANCE_LOGGING_INLINE", 0.0);
 	if (logPerfInline && evCfg.activityPerformanceLoggingStyle == MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_FRAME_COUNT) {
 		evCfg.activityPerformanceLoggingStyle = MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_IMMEDIATE;
 	}

-
 	mvkSetConfig(evCfg);
 }

 static MVKConfiguration _mvkConfig;
-static std::string _autoGPUCaptureOutputFile;
+static std::string _autoGPUCaptureOutputFilepath;

 // Returns the MoltenVK config, lazily initializing it if necessary.
 // We initialize lazily instead of in a library constructor function to
@ -109,29 +90,41 @@ const MVKConfiguration& mvkConfig() {
 	return _mvkConfig;
 }

-// Sets config content, and updates any content that needs baking, including copying the contents
-// of strings from the incoming MVKConfiguration member to a corresponding std::string, and then
-// repointing the MVKConfiguration member to the contents of the std::string.
 void mvkSetConfig(const MVKConfiguration& mvkConfig) {
-	_mvkConfig = mvkConfig;
+	mvkSetConfig(_mvkConfig, mvkConfig, _autoGPUCaptureOutputFilepath);
+}
+
+// Sets destination config content from the source content, validates content, 
+// and updates any content that needs baking, including copying the contents of
+// strings from the incoming MVKConfiguration member to a corresponding std::string,
+// and then repointing the MVKConfiguration member to the contents of the std::string.
+void mvkSetConfig(MVKConfiguration& dstMVKConfig,
+				  const MVKConfiguration& srcMVKConfig,
+				  std::string& autoGPUCaptureOutputFilepath) {
+
+	dstMVKConfig = srcMVKConfig;

 	// Ensure the API version is supported, and add the VK_HEADER_VERSION.
-	_mvkConfig.apiVersionToAdvertise = std::min(_mvkConfig.apiVersionToAdvertise, MVK_VULKAN_API_VERSION);
-	_mvkConfig.apiVersionToAdvertise = VK_MAKE_VERSION(VK_VERSION_MAJOR(_mvkConfig.apiVersionToAdvertise),
-													   VK_VERSION_MINOR(_mvkConfig.apiVersionToAdvertise),
-													   VK_HEADER_VERSION);
+	dstMVKConfig.apiVersionToAdvertise = std::min(dstMVKConfig.apiVersionToAdvertise, MVK_VULKAN_API_VERSION);
+	dstMVKConfig.apiVersionToAdvertise = VK_MAKE_VERSION(VK_VERSION_MAJOR(dstMVKConfig.apiVersionToAdvertise),
+														 VK_VERSION_MINOR(dstMVKConfig.apiVersionToAdvertise),
+														 VK_HEADER_VERSION);

 	// Deprecated legacy support for specific case where both legacy semaphoreUseMTLEvent
 	// (now aliased to semaphoreSupportStyle) and legacy semaphoreUseMTLFence are explicitly
 	// disabled by the app. In this case the app had been using CPU emulation, so use
 	// MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_CALLBACK.
-	if ( !_mvkConfig.semaphoreUseMTLEvent && !_mvkConfig.semaphoreUseMTLFence ) {
-		_mvkConfig.semaphoreSupportStyle = MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_CALLBACK;
+	if ( !dstMVKConfig.semaphoreUseMTLEvent && !dstMVKConfig.semaphoreUseMTLFence ) {
+		dstMVKConfig.semaphoreSupportStyle = MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_CALLBACK;
 	}

 	// Set capture file path string
-	if (_mvkConfig.autoGPUCaptureOutputFilepath) {
-		_autoGPUCaptureOutputFile = _mvkConfig.autoGPUCaptureOutputFilepath;
+	if (dstMVKConfig.autoGPUCaptureOutputFilepath) {
+		autoGPUCaptureOutputFilepath = dstMVKConfig.autoGPUCaptureOutputFilepath;
 	}
-	_mvkConfig.autoGPUCaptureOutputFilepath = (char*)_autoGPUCaptureOutputFile.c_str();
+	dstMVKConfig.autoGPUCaptureOutputFilepath = autoGPUCaptureOutputFilepath.c_str();
+
+	// Clamp timestampPeriodLowPassAlpha between 0.0 and 1.0.
+	dstMVKConfig.timestampPeriodLowPassAlpha = mvkClamp(dstMVKConfig.timestampPeriodLowPassAlpha, 0.0f, 1.0f);
 }
+
--- a/MoltenVK/MoltenVK/Utility/MVKEnvironment.h
+++ b/MoltenVK/MoltenVK/Utility/MVKEnvironment.h
@ -23,6 +23,7 @@
 #include "mvk_vulkan.h"
 #include "mvk_config.h"
 #include "MVKLogging.h"
+#include <string>


 // Expose MoltenVK Apple surface extension functionality
@ -69,7 +70,7 @@
 #endif

 #if MVK_TVOS
-# define MVK_SUPPORT_IOSURFACE_BOOL (__TV_OS_VERSION_MIN_REQUIRED >= __TVOS_11_0)
+#	define MVK_SUPPORT_IOSURFACE_BOOL (__TV_OS_VERSION_MIN_REQUIRED >= __TVOS_11_0)
 #endif


@ -79,9 +80,25 @@
 /** Global function to access MoltenVK configuration info. */
 const MVKConfiguration& mvkConfig();

-/** Global function to update MoltenVK configuration info. */
+/** Sets the MoltenVK global configuration content. */
 void mvkSetConfig(const MVKConfiguration& mvkConfig);

+/** 
+ * Sets the content from the source config into the destination 
+ * config, while using the string object to retain string content.
+ */
+void mvkSetConfig(MVKConfiguration& dstMVKConfig, 
+				  const MVKConfiguration& srcMVKConfig,
+				  std::string& autoGPUCaptureOutputFilepath);
+
+/**
+ * Enable debug mode.
+ * By default, disabled for Release builds and enabled for Debug builds.
+ */
+#ifndef MVK_CONFIG_DEBUG
+#	define MVK_CONFIG_DEBUG		MVK_DEBUG
+#endif
+
 /** Flip the vertex coordinate in shaders. Enabled by default. */
 #ifndef MVK_CONFIG_SHADER_CONVERSION_FLIP_VERTEX_Y
 #   define MVK_CONFIG_SHADER_CONVERSION_FLIP_VERTEX_Y    1
@ -244,11 +261,17 @@ void mvkSetConfig(const MVKConfiguration& mvkConfig);
 #ifndef MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE
 #   define MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE    MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_METAL_EVENTS_WHERE_SAFE
 #endif
-#ifndef MVK_ALLOW_METAL_EVENTS		// Deprecated
-#   define MVK_ALLOW_METAL_EVENTS    1
+#ifndef MVK_CONFIG_ALLOW_METAL_EVENTS
+#   define MVK_CONFIG_ALLOW_METAL_EVENTS    1
 #endif
-#ifndef MVK_ALLOW_METAL_FENCES		// Deprecated
-#   define MVK_ALLOW_METAL_FENCES    1
+#ifndef MVK_ALLOW_METAL_EVENTS				// Deprecated
+#   define MVK_ALLOW_METAL_EVENTS    		MVK_CONFIG_ALLOW_METAL_EVENTS
+#endif
+#ifndef MVK_CONFIG_ALLOW_METAL_FENCES
+#   define MVK_CONFIG_ALLOW_METAL_FENCES    1
+#endif
+#ifndef MVK_ALLOW_METAL_FENCES				// Deprecated
+#   define MVK_ALLOW_METAL_FENCES    		MVK_CONFIG_ALLOW_METAL_FENCES
 #endif

 /** Substitute Metal 2D textures for Vulkan 1D images. Enabled by default. */
@ -303,3 +326,11 @@ void mvkSetConfig(const MVKConfiguration& mvkConfig);
 #ifndef MVK_CONFIG_SHOULD_MAXIMIZE_CONCURRENT_COMPILATION
 #  	define MVK_CONFIG_SHOULD_MAXIMIZE_CONCURRENT_COMPILATION    0
 #endif
+
+/**
+ * The alpha value of a lowpass filter tracking VkPhysicalDeviceLimits::timestampPeriod.
+ * This can be set to a float between 0.0 and 1.0.
+ */
+#ifndef MVK_CONFIG_TIMESTAMP_PERIOD_LOWPASS_ALPHA
+#  	define MVK_CONFIG_TIMESTAMP_PERIOD_LOWPASS_ALPHA    1.0
+#endif
--- a/MoltenVK/MoltenVK/Utility/MVKFoundation.cpp
+++ b/MoltenVK/MoltenVK/Utility/MVKFoundation.cpp
@ -21,6 +21,44 @@

 #define CASE_STRINGIFY(V)  case V: return #V

+const char* mvkVkCommandName(MVKCommandUse cmdUse) {
+	switch (cmdUse) {
+		case kMVKCommandUseBeginCommandBuffer:           return "vkBeginCommandBuffer (prefilled VkCommandBuffer)";
+		case kMVKCommandUseQueueSubmit:                  return "vkQueueSubmit";
+		case kMVKCommandUseAcquireNextImage:             return "vkAcquireNextImageKHR";
+		case kMVKCommandUseQueuePresent:                 return "vkQueuePresentKHR";
+		case kMVKCommandUseQueueWaitIdle:                return "vkQueueWaitIdle";
+		case kMVKCommandUseDeviceWaitIdle:               return "vkDeviceWaitIdle";
+		case kMVKCommandUseInvalidateMappedMemoryRanges: return "vkInvalidateMappedMemoryRanges";
+		case kMVKCommandUseBeginRendering:               return "vkCmdBeginRendering";
+		case kMVKCommandUseBeginRenderPass:              return "vkCmdBeginRenderPass";
+		case kMVKCommandUseNextSubpass:                  return "vkCmdNextSubpass";
+		case kMVKCommandUseRestartSubpass:               return "Metal renderpass restart";
+		case kMVKCommandUsePipelineBarrier:              return "vkCmdPipelineBarrier";
+		case kMVKCommandUseBlitImage:                    return "vkCmdBlitImage";
+		case kMVKCommandUseCopyImage:                    return "vkCmdCopyImage";
+		case kMVKCommandUseResolveImage:                 return "vkCmdResolveImage (resolve stage)";
+		case kMVKCommandUseResolveExpandImage:           return "vkCmdResolveImage (expand stage)";
+		case kMVKCommandUseResolveCopyImage:             return "vkCmdResolveImage (copy stage)";
+		case kMVKCommandUseCopyBuffer:                   return "vkCmdCopyBuffer";
+		case kMVKCommandUseCopyBufferToImage:            return "vkCmdCopyBufferToImage";
+		case kMVKCommandUseCopyImageToBuffer:            return "vkCmdCopyImageToBuffer";
+		case kMVKCommandUseFillBuffer:                   return "vkCmdFillBuffer";
+		case kMVKCommandUseUpdateBuffer:                 return "vkCmdUpdateBuffer";
+		case kMVKCommandUseClearAttachments:             return "vkCmdClearAttachments";
+		case kMVKCommandUseClearColorImage:              return "vkCmdClearColorImage";
+		case kMVKCommandUseClearDepthStencilImage:       return "vkCmdClearDepthStencilImage";
+		case kMVKCommandUseResetQueryPool:               return "vkCmdResetQueryPool";
+		case kMVKCommandUseDispatch:                     return "vkCmdDispatch";
+		case kMVKCommandUseTessellationVertexTessCtl:    return "vkCmdDraw (vertex and tess control stages)";
+		case kMVKCommandUseDrawIndirectConvertBuffers:   return "vkCmdDrawIndirect (convert indirect buffers)";
+		case kMVKCommandUseCopyQueryPoolResults:         return "vkCmdCopyQueryPoolResults";
+		case kMVKCommandUseAccumOcclusionQuery:          return "Post-render-pass occlusion query accumulation";
+		case kMVKCommandUseRecordGPUCounterSample:       return "Record GPU Counter Sample";
+		default:                                         return "Unknown Vulkan command";
+	}
+}
+
 const char* mvkVkResultName(VkResult vkResult) {
 	switch (vkResult) {

--- a/MoltenVK/MoltenVK/Utility/MVKFoundation.h
+++ b/MoltenVK/MoltenVK/Utility/MVKFoundation.h
@ -60,10 +60,13 @@ typedef struct {
 #pragma mark -
 #pragma mark Vulkan support

+/** A generic 32-bit color permitting float, int32, or uint32 values. */
+typedef VkClearColorValue MVKColor32;
+
 /** Tracks the Vulkan command currently being used. */
 typedef enum : uint8_t {
    kMVKCommandUseNone = 0,                     /**< No use defined. */
-	kMVKCommandUseEndCommandBuffer,             /**< vkEndCommandBuffer (prefilled VkCommandBuffer). */
+	kMVKCommandUseBeginCommandBuffer,           /**< vkBeginCommandBuffer (prefilled VkCommandBuffer). */
    kMVKCommandUseQueueSubmit,                  /**< vkQueueSubmit. */
 	kMVKCommandUseAcquireNextImage,             /**< vkAcquireNextImageKHR. */
    kMVKCommandUseQueuePresent,                 /**< vkQueuePresentKHR. */
@ -73,7 +76,7 @@ typedef enum : uint8_t {
 	kMVKCommandUseBeginRendering,               /**< vkCmdBeginRendering. */
    kMVKCommandUseBeginRenderPass,              /**< vkCmdBeginRenderPass. */
    kMVKCommandUseNextSubpass,                  /**< vkCmdNextSubpass. */
-	kMVKCommandUseRestartSubpass,               /**< Restart a subpass because of explicit or implicit barrier. */
+	kMVKCommandUseRestartSubpass,               /**< Create a new Metal renderpass due to Metal requirements. */
    kMVKCommandUsePipelineBarrier,              /**< vkCmdPipelineBarrier. */
    kMVKCommandUseBlitImage,                    /**< vkCmdBlitImage. */
    kMVKCommandUseCopyImage,                    /**< vkCmdCopyImage. */
@ -99,11 +102,14 @@ typedef enum : uint8_t {

 /** Represents a given stage of a graphics pipeline. */
 enum MVKGraphicsStage {
-	kMVKGraphicsStageVertex = 0,	/**< The vertex shader stage. */
-	kMVKGraphicsStageTessControl,	/**< The tessellation control shader stage. */
+	kMVKGraphicsStageVertex = 0,	/**< The tessellation vertex compute shader stage. */
+	kMVKGraphicsStageTessControl,	/**< The tessellation control compute shader stage. */
 	kMVKGraphicsStageRasterization	/**< The rest of the pipeline. */
 };

+/** Returns the name of the command defined by the command use. */
+const char* mvkVkCommandName(MVKCommandUse cmdUse);
+
 /** Returns the name of the result value. */
 const char* mvkVkResultName(VkResult vkResult);

@ -139,7 +145,7 @@ static inline std::string mvkGetMoltenVKVersionString(uint32_t mvkVersion) {
 /** Returns whether the specified positive value is a power-of-two. */
 template<typename T>
 static constexpr bool mvkIsPowerOfTwo(T value) {
-	return value && ((value & (value - 1)) == 0);
+	return value > 0 && ((value & (value - 1)) == 0);
 }

 /**
@ -275,21 +281,21 @@ void mvkFlipVertically(void* rowMajorData, uint32_t rowCount, size_t bytesPerRow
 * They are ridiculously large numbers, but low enough to be safely used as both
 * uint and int values without risking overflowing between positive and negative values.
 */
-static  int32_t kMVKUndefinedLargePositiveInt32 =  mvkEnsurePowerOfTwo(std::numeric_limits<int32_t>::max() / 2);
-static  int32_t kMVKUndefinedLargeNegativeInt32 = -kMVKUndefinedLargePositiveInt32;
-static uint32_t kMVKUndefinedLargeUInt32        =  kMVKUndefinedLargePositiveInt32;
-static  int64_t kMVKUndefinedLargePositiveInt64 =  mvkEnsurePowerOfTwo(std::numeric_limits<int64_t>::max() / 2);
-static  int64_t kMVKUndefinedLargeNegativeInt64 = -kMVKUndefinedLargePositiveInt64;
-static uint64_t kMVKUndefinedLargeUInt64        =  kMVKUndefinedLargePositiveInt64;
+static constexpr  int32_t kMVKUndefinedLargePositiveInt32 =  mvkEnsurePowerOfTwo(std::numeric_limits<int32_t>::max() / 2);
+static constexpr  int32_t kMVKUndefinedLargeNegativeInt32 = -kMVKUndefinedLargePositiveInt32;
+static constexpr uint32_t kMVKUndefinedLargeUInt32        =  kMVKUndefinedLargePositiveInt32;
+static constexpr  int64_t kMVKUndefinedLargePositiveInt64 =  mvkEnsurePowerOfTwo(std::numeric_limits<int64_t>::max() / 2);
+static constexpr  int64_t kMVKUndefinedLargeNegativeInt64 = -kMVKUndefinedLargePositiveInt64;
+static constexpr uint64_t kMVKUndefinedLargeUInt64        =  kMVKUndefinedLargePositiveInt64;


 #pragma mark Vulkan structure support functions

 /** Returns a VkExtent2D created from the width and height of a VkExtent3D. */
-static inline VkExtent2D mvkVkExtent2DFromVkExtent3D(VkExtent3D e) { return {e.width, e.height }; }
+static constexpr VkExtent2D mvkVkExtent2DFromVkExtent3D(VkExtent3D e) { return {e.width, e.height }; }

 /** Returns a VkExtent3D, created from a VkExtent2D, and with depth of 1. */
-static inline VkExtent3D mvkVkExtent3DFromVkExtent2D(VkExtent2D e) { return {e.width, e.height, 1U }; }
+static constexpr VkExtent3D mvkVkExtent3DFromVkExtent2D(VkExtent2D e) { return {e.width, e.height, 1U }; }

 /** Returns whether the two Vulkan extents are equal by comparing their respective components. */
 static constexpr bool mvkVkExtent2DsAreEqual(VkExtent2D e1, VkExtent2D e2) {
@ -330,13 +336,13 @@ static constexpr uint32_t mvkPackSwizzle(VkComponentMapping components) {
 }

 /** Unpacks a single 32-bit word containing four swizzle components. */
-static inline VkComponentMapping mvkUnpackSwizzle(uint32_t packed) {
-	VkComponentMapping components;
-	components.r = (VkComponentSwizzle)((packed >> 0) & 0xFF);
-	components.g = (VkComponentSwizzle)((packed >> 8) & 0xFF);
-	components.b = (VkComponentSwizzle)((packed >> 16) & 0xFF);
-	components.a = (VkComponentSwizzle)((packed >> 24) & 0xFF);
-	return components;
+static constexpr VkComponentMapping mvkUnpackSwizzle(uint32_t packed) {
+	return {
+		.r = (VkComponentSwizzle)((packed >> 0) & 0xFF),
+		.g = (VkComponentSwizzle)((packed >> 8) & 0xFF),
+		.b = (VkComponentSwizzle)((packed >> 16) & 0xFF),
+		.a = (VkComponentSwizzle)((packed >> 24) & 0xFF),
+	};
 }

 /**
@ -350,8 +356,8 @@ static inline VkComponentMapping mvkUnpackSwizzle(uint32_t packed) {
 *      and matches any value.
 */
 static constexpr bool mvkVKComponentSwizzlesMatch(VkComponentSwizzle cs1,
-										   VkComponentSwizzle cs2,
-										   VkComponentSwizzle csPos) {
+												  VkComponentSwizzle cs2,
+												  VkComponentSwizzle csPos) {
 	return ((cs1 == cs2) ||
 			((cs1 == VK_COMPONENT_SWIZZLE_IDENTITY) && (cs2 == csPos)) ||
 			((cs2 == VK_COMPONENT_SWIZZLE_IDENTITY) && (cs1 == csPos)) ||
@ -381,24 +387,24 @@ static constexpr bool mvkVkComponentMappingsMatch(VkComponentMapping cm1, VkComp

 /** Rounds the value to nearest integer using half-to-even rounding. */
 static inline double mvkRoundHalfToEven(const double val) {
-	return val - std::remainder(val, 1.0);	// remainder() uses half-to-even rounding, and unfortunately isn't constexpr until C++23.
+	return val - std::remainder(val, 1.0);	// remainder() uses half-to-even rounding, but unfortunately isn't constexpr until C++23.
 }

 /** Returns whether the value will fit inside the numeric type. */
 template<typename T, typename Tval>
-const bool mvkFits(const Tval& val) {
+static constexpr bool mvkFits(const Tval& val) {
 	return val <= std::numeric_limits<T>::max();
 }

 /** Clamps the value between the lower and upper bounds, inclusive. */
 template<typename T>
-const T& mvkClamp(const T& val, const T& lower, const T& upper) {
+static constexpr const T& mvkClamp(const T& val, const T& lower, const T& upper) {
    return std::min(std::max(val, lower), upper);
 }

 /** Returns the result of a division, rounded up. */
 template<typename T, typename U>
-constexpr typename std::common_type<T, U>::type mvkCeilingDivide(T numerator, U denominator) {
+static constexpr typename std::common_type<T, U>::type mvkCeilingDivide(T numerator, U denominator) {
 	typedef typename std::common_type<T, U>::type R;
 	// Short circuit very common usecase of dividing by one.
 	return (denominator == 1) ? numerator : (R(numerator) + denominator - 1) / denominator;
@ -424,18 +430,18 @@ struct MVKAbs<R, T, false> {

 /** Returns the absolute value of the difference of two numbers. */
 template<typename T, typename U>
-constexpr typename std::common_type<T, U>::type mvkAbsDiff(T x, U y) {
+static constexpr typename std::common_type<T, U>::type mvkAbsDiff(T x, U y) {
 	return x >= y ? x - y : y - x;
 }

 /** Returns the greatest common divisor of two numbers. */
 template<typename T>
-constexpr T mvkGreatestCommonDivisorImpl(T a, T b) {
+static constexpr T mvkGreatestCommonDivisorImpl(T a, T b) {
 	return b == 0 ? a : mvkGreatestCommonDivisorImpl(b, a % b);
 }

 template<typename T, typename U>
-constexpr typename std::common_type<T, U>::type mvkGreatestCommonDivisor(T a, U b) {
+static constexpr typename std::common_type<T, U>::type mvkGreatestCommonDivisor(T a, U b) {
 	typedef typename std::common_type<T, U>::type R;
 	typedef typename std::make_unsigned<R>::type UI;
 	return static_cast<R>(mvkGreatestCommonDivisorImpl(static_cast<UI>(MVKAbs<R, T>::eval(a)), static_cast<UI>(MVKAbs<R, U>::eval(b))));
@ -443,7 +449,7 @@ constexpr typename std::common_type<T, U>::type mvkGreatestCommonDivisor(T a, U

 /** Returns the least common multiple of two numbers. */
 template<typename T, typename U>
-constexpr typename std::common_type<T, U>::type mvkLeastCommonMultiple(T a, U b) {
+static constexpr typename std::common_type<T, U>::type mvkLeastCommonMultiple(T a, U b) {
 	typedef typename std::common_type<T, U>::type R;
 	return (a == 0 && b == 0) ? 0 : MVKAbs<R, T>::eval(a) / mvkGreatestCommonDivisor(a, b) * MVKAbs<R, U>::eval(b);
 }
@ -460,7 +466,7 @@ constexpr typename std::common_type<T, U>::type mvkLeastCommonMultiple(T a, U b)
 * value returned by previous calls as the seed in subsequent calls.
 */
 template<class N>
-std::size_t mvkHash(const N* pVals, std::size_t count = 1, std::size_t seed = 5381) {
+static constexpr std::size_t mvkHash(const N* pVals, std::size_t count = 1, std::size_t seed = 5381) {
    std::size_t hash = seed;
    for (std::size_t i = 0; i < count; i++) { hash = ((hash << 5) + hash) ^ pVals[i]; }
    return hash;
@ -475,25 +481,26 @@ std::size_t mvkHash(const N* pVals, std::size_t count = 1, std::size_t seed = 53
 */
 template<typename Type>
 struct MVKArrayRef {
-	Type* data;
-	const size_t size;
+public:
+	constexpr Type* begin() const { return _data; }
+	constexpr Type* end() const { return &_data[_size]; }
+	constexpr Type* data() const { return _data; }
+	constexpr size_t size() const { return _size; }
+	constexpr size_t byteSize() const { return _size * sizeof(Type); }
+	constexpr Type& operator[]( const size_t i ) const { return _data[i]; }
+	constexpr MVKArrayRef() : MVKArrayRef(nullptr, 0) {}
+	constexpr MVKArrayRef(Type* d, size_t s) : _data(d), _size(s) {}
+	template <typename Other, std::enable_if_t<std::is_convertible_v<Other(*)[], Type(*)[]>, bool> = true>
+	constexpr MVKArrayRef(MVKArrayRef<Other> other) : _data(other.data()), _size(other.size()) {}

-	const Type* begin() const { return data; }
-	const Type* end() const { return &data[size]; }
-	const Type& operator[]( const size_t i ) const { return data[i]; }
-	Type& operator[]( const size_t i ) { return data[i]; }
-	MVKArrayRef<Type>& operator=(const MVKArrayRef<Type>& other) {
-		data = other.data;
-		*(size_t*)&size = other.size;
-		return *this;
-	}
-	MVKArrayRef() : MVKArrayRef(nullptr, 0) {}
-	MVKArrayRef(Type* d, size_t s) : data(d), size(s) {}
+protected:
+	Type* _data;
+	size_t _size;
 };

 /** Ensures the size of the specified container is at least the specified size. */
 template<typename C, typename S>
-void mvkEnsureSize(C& container, S size) {
+static void mvkEnsureSize(C& container, S size) {
    if (size > container.size()) { container.resize(size); }
 }

@ -502,7 +509,7 @@ void mvkEnsureSize(C& container, S size) {
 * each object, including freeing the object memory, and clearing the container.
 */
 template<typename C>
-void mvkDestroyContainerContents(C& container) {
+static void mvkDestroyContainerContents(C& container) {
    for (auto elem : container) { elem->destroy(); }
    container.clear();
 }
@ -513,7 +520,7 @@ void mvkDestroyContainerContents(C& container) {
 */
 #ifdef __OBJC__
 template<typename C>
-void mvkReleaseContainerContents(C& container) {
+static void mvkReleaseContainerContents(C& container) {
    for (auto elem : container) { [elem release]; }
    container.clear();
 }
@ -521,14 +528,14 @@ void mvkReleaseContainerContents(C& container) {

 /** Returns whether the container contains an item equal to the value. */
 template<class C, class T>
-bool mvkContains(C& container, const T& val) {
+static constexpr bool mvkContains(C& container, const T& val) {
 	for (const T& cVal : container) { if (cVal == val) { return true; } }
 	return false;
 }

 /** Removes the first occurance of the specified value from the specified container. */
 template<class C, class T>
-void mvkRemoveFirstOccurance(C& container, T val) {
+static void mvkRemoveFirstOccurance(C& container, T val) {
    for (auto iter = container.begin(), end = container.end(); iter != end; iter++) {
        if( *iter == val ) {
            container.erase(iter);
@ -539,7 +546,7 @@ void mvkRemoveFirstOccurance(C& container, T val) {

 /** Removes all occurances of the specified value from the specified container. */
 template<class C, class T>
-void mvkRemoveAllOccurances(C& container, T val) {
+static void mvkRemoveAllOccurances(C& container, T val) {
    container.erase(std::remove(container.begin(), container.end(), val), container.end());
 }

@ -548,7 +555,7 @@ void mvkRemoveAllOccurances(C& container, T val) {

 /** Selects and returns one of the values, based on the platform OS. */
 template<typename T>
-const T& mvkSelectPlatformValue(const T& macOSVal, const T& iOSVal) {
+static constexpr const T& mvkSelectPlatformValue(const T& macOSVal, const T& iOSVal) {
 #if MVK_IOS_OR_TVOS
 	return iOSVal;
 #endif
@ -562,22 +569,29 @@ const T& mvkSelectPlatformValue(const T& macOSVal, const T& iOSVal) {
 * The optional count allows clearing multiple elements in an array.
 */
 template<typename T>
-void mvkClear(T* pVal, size_t count = 1) { if (pVal) { memset(pVal, 0, sizeof(T) * count); } }
+static void mvkClear(T* pDst, size_t count = 1) {
+	if ( !pDst ) { return; }					// Bad pointer
+	if constexpr(std::is_arithmetic_v<T>) { if (count == 1) { *pDst = static_cast<T>(0); } }  // Fast clear of a single primitive
+	memset(pDst, 0, sizeof(T) * count);			// Memory clear of complex content or array
+}

 /**
 * If pVal is not null, overrides the const declaration, and clears the memory occupied by *pVal
 * by writing zeros to all bytes. The optional count allows clearing multiple elements in an array.
 */
 template<typename T>
-void mvkClear(const T* pVal, size_t count = 1) { mvkClear((T*)pVal, count); }
+static void mvkClear(const T* pVal, size_t count = 1) { mvkClear((T*)pVal, count); }

 /**
 * If pSrc and pDst are both not null, copies the contents of the source value to the
 * destination value. The optional count allows copying of multiple elements in an array.
 */
 template<typename T>
-void mvkCopy(T* pDst, const T* pSrc, size_t count = 1) {
-	if (pSrc && pDst) { memcpy(pDst, pSrc, sizeof(T) * count); }
+static void mvkCopy(T* pDst, const T* pSrc, size_t count = 1) {
+	if ( !pDst || !pSrc ) { return; }			// Bad pointers
+	if (pDst == pSrc) { return; }				// Same object
+	if constexpr(std::is_arithmetic_v<T>) { if (count == 1) { *pDst = *pSrc; } }  // Fast copy of a single primitive
+	memcpy(pDst, pSrc, sizeof(T) * count);		// Memory copy of complex content or array
 }

 /**
@ -585,8 +599,11 @@ void mvkCopy(T* pDst, const T* pSrc, size_t count = 1) {
 * otherwise returns false. The optional count allows comparing multiple elements in an array.
 */
 template<typename T>
-bool mvkAreEqual(const T* pV1, const T* pV2, size_t count = 1) {
-	return (pV1 && pV2) ? (memcmp(pV1, pV2, sizeof(T) * count) == 0) : false;
+static constexpr bool mvkAreEqual(const T* pV1, const T* pV2, size_t count = 1) {
+	if ( !pV2 || !pV2 ) { return false; }				// Bad pointers
+	if (pV1 == pV2) { return true; }					// Same object
+	if constexpr(std::is_arithmetic_v<T>) { if (count == 1) { return *pV1 == *pV2; } }  // Fast compare of a single primitive
+	return memcmp(pV1, pV2, sizeof(T) * count) == 0;	// Memory compare of complex content or array
 }

 /**
@ -595,7 +612,7 @@ bool mvkAreEqual(const T* pV1, const T* pV2, size_t count = 1) {
 * which works on individual chars or char arrays, not strings.
 * Returns false if either string is null.
 */
-static constexpr bool mvkStringsAreEqual(const char* pV1, const char* pV2, size_t count = 1) {
+static constexpr bool mvkStringsAreEqual(const char* pV1, const char* pV2) {
 	return pV1 && pV2 && (pV1 == pV2 || strcmp(pV1, pV2) == 0);
 }

@ -628,10 +645,18 @@ static constexpr bool mvkSetOrClear(T* pDest, const T* pSrc) {
 template<typename Tv, typename Tm>
 void mvkEnableFlags(Tv& value, const Tm bitMask) { value = (Tv)(value | bitMask); }

+/** Enables all the flags (sets bits to 1) within the value parameter. */
+template<typename Tv>
+void mvkEnableAllFlags(Tv& value) { value = ~static_cast<Tv>(0); }
+
 /** Disables the flags (sets bits to 0) within the value parameter specified by the bitMask parameter. */
 template<typename Tv, typename Tm>
 void mvkDisableFlags(Tv& value, const Tm bitMask) { value = (Tv)(value & ~(Tv)bitMask); }

+/** Enables all the flags (sets bits to 1) within the value parameter. */
+template<typename Tv>
+void mvkDisableAllFlags(Tv& value) { value = static_cast<Tv>(0); }
+
 /** Returns whether the specified value has ANY of the flags specified in bitMask enabled (set to 1). */
 template<typename Tv, typename Tm>
 static constexpr bool mvkIsAnyFlagEnabled(Tv value, const Tm bitMask) { return ((value & bitMask) != 0); }
--- a/MoltenVK/MoltenVK/Utility/MVKLogging.h
+++ b/MoltenVK/MoltenVK/Utility/MVKLogging.h
@ -57,9 +57,9 @@ extern "C" {
 *		MVKLogErrorIf(cond, fmt, ...)	- same as MVKLogError if boolean "cond" condition expression evaluates to YES,
 *										  otherwise logs nothing.
 *
- *		MVKLogWarning(fmt, ...)		- recommended for not immediately harmful errors
+ *		MVKLogWarn(fmt, ...)			- recommended for not immediately harmful errors
 *										- will print if MVK_LOG_LEVEL_WARNING is set on.
- *		MVKLogWarningIf(cond, fmt, ...) - same as MVKLogWarning if boolean "cond" condition expression evaluates to YES,
+ *		MVKLogWarnIf(cond, fmt, ...) 	- same as MVKLogWarn if boolean "cond" condition expression evaluates to YES,
 *										  otherwise logs nothing.
 *
 *		MVKLogInfo(fmt, ...)			- recommended for general, infrequent, information messages
@ -67,7 +67,7 @@ extern "C" {
 *		MVKLogInfoIf(cond, fmt, ...)	- same as MVKLogInfo if boolean "cond" condition expression evaluates to YES,
 *										  otherwise logs nothing.
 *
- *		MVKLogDebug(fmt, ...)		- recommended for temporary use during debugging
+ *		MVKLogDebug(fmt, ...)			- recommended for temporary use during debugging
 *										- will print if MVK_LOG_LEVEL_DEBUG is set on.
 *		MVKLogDebugIf(cond, fmt, ...)	- same as MVKLogDebug if boolean "cond" condition expression evaluates to YES,
 *										  otherwise logs nothing.
@ -148,11 +148,11 @@ extern "C" {

 // Warning logging - for not immediately harmful errors
 #if MVK_LOG_LEVEL_WARNING
-#	define MVKLogWarning(fmt, ...)			MVKLogWarningImpl(fmt, ##__VA_ARGS__)
-#	define MVKLogWarningIf(cond, fmt, ...)	if(cond) { MVKLogWarningImpl(fmt, ##__VA_ARGS__); }
+#	define MVKLogWarn(fmt, ...)				MVKLogWarnImpl(fmt, ##__VA_ARGS__)
+#	define MVKLogWarnIf(cond, fmt, ...)		if(cond) { MVKLogWarnImpl(fmt, ##__VA_ARGS__); }
 #else
-#    define MVKLogWarning(...)
-#    define MVKLogWarningIf(cond, fmt, ...)
+#    define MVKLogWarn(...)
+#    define MVKLogWarnIf(cond, fmt, ...)
 #endif

 // Info logging - for general, non-performance affecting information messages
@ -182,11 +182,11 @@ extern "C" {
 #	define MVKLogTraceIf(cond, fmt, ...)
 #endif

-#define MVKLogErrorImpl(fmt, ...)		reportMessage(MVK_CONFIG_LOG_LEVEL_ERROR, fmt, ##__VA_ARGS__)
-#define MVKLogWarningImpl(fmt, ...)		reportMessage(MVK_CONFIG_LOG_LEVEL_WARNING, fmt, ##__VA_ARGS__)
-#define MVKLogInfoImpl(fmt, ...)		reportMessage(MVK_CONFIG_LOG_LEVEL_INFO, fmt, ##__VA_ARGS__)
-#define MVKLogDebugImpl(fmt, ...)		reportMessage(MVK_CONFIG_LOG_LEVEL_DEBUG, fmt, ##__VA_ARGS__)
-#define MVKLogTraceImpl(fmt, ...)		reportMessage(MVK_CONFIG_LOG_LEVEL_DEBUG, fmt, ##__VA_ARGS__)
+#define MVKLogErrorImpl(fmt, ...)			reportMessage(MVK_CONFIG_LOG_LEVEL_ERROR, fmt, ##__VA_ARGS__)
+#define MVKLogWarnImpl(fmt, ...)			reportMessage(MVK_CONFIG_LOG_LEVEL_WARNING, fmt, ##__VA_ARGS__)
+#define MVKLogInfoImpl(fmt, ...)			reportMessage(MVK_CONFIG_LOG_LEVEL_INFO, fmt, ##__VA_ARGS__)
+#define MVKLogDebugImpl(fmt, ...)			reportMessage(MVK_CONFIG_LOG_LEVEL_DEBUG, fmt, ##__VA_ARGS__)
+#define MVKLogTraceImpl(fmt, ...)			reportMessage(MVK_CONFIG_LOG_LEVEL_DEBUG, fmt, ##__VA_ARGS__)

 // Assertions
 #ifdef NS_BLOCK_ASSERTIONS
--- a/MoltenVK/MoltenVK/Utility/MVKSmallVector.h
+++ b/MoltenVK/MoltenVK/Utility/MVKSmallVector.h
@ -298,12 +298,12 @@ public:
  reverse_iterator rbegin() const { return reverse_iterator( end() ); }
  reverse_iterator rend()   const { return reverse_iterator( begin() ); }

-  const MVKArrayRef<Type> contents() const { return MVKArrayRef<Type>(data(), size()); }
-        MVKArrayRef<Type> contents()       { return MVKArrayRef<Type>(data(), size()); }
+  MVKArrayRef<const Type> contents() const { return MVKArrayRef<const Type>(data(), size()); }
+  MVKArrayRef<      Type> contents()       { return MVKArrayRef<      Type>(data(), size()); }

-  const Type &operator[]( const size_t i ) const { return alc[i]; }
+  const Type &operator[]( const size_t i ) const  { return alc[i]; }
        Type &operator[]( const size_t i )        { return alc[i]; }
-  const Type &at( const size_t i )         const { return alc[i]; }
+  const Type &at( const size_t i )         const  { return alc[i]; }
        Type &at( const size_t i )                { return alc[i]; }
  const Type &front()                      const  { return alc[0]; }
        Type &front()                             { return alc[0]; }
--- a/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.hpp
+++ b/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.hpp
@ -47,6 +47,12 @@ class MVKPixelFormats;
 * of an MVKBaseObject subclass, which is true for all but static calling functions.
 */

+MTLTextureType mvkMTLTextureTypeFromVkImageTypeObj(VkImageType vkImageType, uint32_t arraySize, bool isMultisample, MVKBaseObject* mvkObj);
+#define mvkMTLTextureTypeFromVkImageType(vkImageType, arraySize, isMultisample) mvkMTLTextureTypeFromVkImageTypeObj(vkImageType, arraySize, isMultisample, this)
+
+MTLTextureType mvkMTLTextureTypeFromVkImageViewTypeObj(VkImageViewType vkImageViewType, bool isMultisample, MVKBaseObject* mvkObj);
+#define mvkMTLTextureTypeFromVkImageViewType(vkImageViewType, isMultisample) mvkMTLTextureTypeFromVkImageViewTypeObj(vkImageViewType, isMultisample, this)
+
 MTLPrimitiveType mvkMTLPrimitiveTypeFromVkPrimitiveTopologyInObj(VkPrimitiveTopology vkTopology, MVKBaseObject* mvkObj);
 #define mvkMTLPrimitiveTypeFromVkPrimitiveTopology(vkTopology) mvkMTLPrimitiveTypeFromVkPrimitiveTopologyInObj(vkTopology, this)

--- a/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm
+++ b/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm
@ -32,119 +32,133 @@ using namespace std;

 #pragma mark Pixel formats

-static MVKPixelFormats _platformPixelFormats;
+static std::unique_ptr<MVKPixelFormats> _platformPixelFormats;
+
+static MVKPixelFormats* getPlatformPixelFormats() {
+	if ( !_platformPixelFormats ) { _platformPixelFormats.reset(new MVKPixelFormats()); }
+	return _platformPixelFormats.get();
+}

 MVK_PUBLIC_SYMBOL bool mvkVkFormatIsSupported(VkFormat vkFormat) {
-	return _platformPixelFormats.isSupported(vkFormat);
+	return getPlatformPixelFormats()->isSupported(vkFormat);
 }

 MVK_PUBLIC_SYMBOL bool mvkMTLPixelFormatIsSupported(MTLPixelFormat mtlFormat) {
-	return _platformPixelFormats.isSupported(mtlFormat);
+	return getPlatformPixelFormats()->isSupported(mtlFormat);
 }

 MVK_PUBLIC_SYMBOL MVKFormatType mvkFormatTypeFromVkFormat(VkFormat vkFormat) {
-	return _platformPixelFormats.getFormatType(vkFormat);
+	return getPlatformPixelFormats()->getFormatType(vkFormat);
 }

 MVK_PUBLIC_SYMBOL MVKFormatType mvkFormatTypeFromMTLPixelFormat(MTLPixelFormat mtlFormat) {
-	return _platformPixelFormats.getFormatType(mtlFormat);
+	return getPlatformPixelFormats()->getFormatType(mtlFormat);
 }

 MVK_PUBLIC_SYMBOL MTLPixelFormat mvkMTLPixelFormatFromVkFormat(VkFormat vkFormat) {
-	return _platformPixelFormats.getMTLPixelFormat(vkFormat);
+	return getPlatformPixelFormats()->getMTLPixelFormat(vkFormat);
 }

 MVK_PUBLIC_SYMBOL VkFormat mvkVkFormatFromMTLPixelFormat(MTLPixelFormat mtlFormat) {
-	return _platformPixelFormats.getVkFormat(mtlFormat);
+	return getPlatformPixelFormats()->getVkFormat(mtlFormat);
 }

 MVK_PUBLIC_SYMBOL uint32_t mvkVkFormatBytesPerBlock(VkFormat vkFormat) {
-	return _platformPixelFormats.getBytesPerBlock(vkFormat);
+	return getPlatformPixelFormats()->getBytesPerBlock(vkFormat);
 }

 MVK_PUBLIC_SYMBOL uint32_t mvkMTLPixelFormatBytesPerBlock(MTLPixelFormat mtlFormat) {
-	return _platformPixelFormats.getBytesPerBlock(mtlFormat);
+	return getPlatformPixelFormats()->getBytesPerBlock(mtlFormat);
 }

 MVK_PUBLIC_SYMBOL VkExtent2D mvkVkFormatBlockTexelSize(VkFormat vkFormat) {
-	return _platformPixelFormats.getBlockTexelSize(vkFormat);
+	return getPlatformPixelFormats()->getBlockTexelSize(vkFormat);
 }

 MVK_PUBLIC_SYMBOL VkExtent2D mvkMTLPixelFormatBlockTexelSize(MTLPixelFormat mtlFormat) {
-	return _platformPixelFormats.getBlockTexelSize(mtlFormat);
+	return getPlatformPixelFormats()->getBlockTexelSize(mtlFormat);
 }

 MVK_PUBLIC_SYMBOL float mvkVkFormatBytesPerTexel(VkFormat vkFormat) {
-	return _platformPixelFormats.getBytesPerTexel(vkFormat);
+	return getPlatformPixelFormats()->getBytesPerTexel(vkFormat);
 }

 MVK_PUBLIC_SYMBOL float mvkMTLPixelFormatBytesPerTexel(MTLPixelFormat mtlFormat) {
-	return _platformPixelFormats.getBytesPerTexel(mtlFormat);
+	return getPlatformPixelFormats()->getBytesPerTexel(mtlFormat);
 }

 MVK_PUBLIC_SYMBOL size_t mvkVkFormatBytesPerRow(VkFormat vkFormat, uint32_t texelsPerRow) {
-	return _platformPixelFormats.getBytesPerRow(vkFormat, texelsPerRow);
+	return getPlatformPixelFormats()->getBytesPerRow(vkFormat, texelsPerRow);
 }

 MVK_PUBLIC_SYMBOL size_t mvkMTLPixelFormatBytesPerRow(MTLPixelFormat mtlFormat, uint32_t texelsPerRow) {
-	return _platformPixelFormats.getBytesPerRow(mtlFormat, texelsPerRow);
+	return getPlatformPixelFormats()->getBytesPerRow(mtlFormat, texelsPerRow);
 }

 MVK_PUBLIC_SYMBOL size_t mvkVkFormatBytesPerLayer(VkFormat vkFormat, size_t bytesPerRow, uint32_t texelRowsPerLayer) {
-	return _platformPixelFormats.getBytesPerLayer(vkFormat, bytesPerRow, texelRowsPerLayer);
+	return getPlatformPixelFormats()->getBytesPerLayer(vkFormat, bytesPerRow, texelRowsPerLayer);
 }

 MVK_PUBLIC_SYMBOL size_t mvkMTLPixelFormatBytesPerLayer(MTLPixelFormat mtlFormat, size_t bytesPerRow, uint32_t texelRowsPerLayer) {
-	return _platformPixelFormats.getBytesPerLayer(mtlFormat, bytesPerRow, texelRowsPerLayer);
+	return getPlatformPixelFormats()->getBytesPerLayer(mtlFormat, bytesPerRow, texelRowsPerLayer);
 }

 MVK_PUBLIC_SYMBOL VkFormatProperties mvkVkFormatProperties(VkFormat vkFormat) {
-	return _platformPixelFormats.getVkFormatProperties(vkFormat);
+	return getPlatformPixelFormats()->getVkFormatProperties(vkFormat);
 }

 MVK_PUBLIC_SYMBOL const char* mvkVkFormatName(VkFormat vkFormat) {
-	return _platformPixelFormats.getName(vkFormat);
+	return getPlatformPixelFormats()->getName(vkFormat);
 }

 MVK_PUBLIC_SYMBOL const char* mvkMTLPixelFormatName(MTLPixelFormat mtlFormat) {
-	return _platformPixelFormats.getName(mtlFormat);
+	return getPlatformPixelFormats()->getName(mtlFormat);
 }

 MVK_PUBLIC_SYMBOL MTLVertexFormat mvkMTLVertexFormatFromVkFormat(VkFormat vkFormat) {
-	return _platformPixelFormats.getMTLVertexFormat(vkFormat);
+	return getPlatformPixelFormats()->getMTLVertexFormat(vkFormat);
 }

 MVK_PUBLIC_SYMBOL MTLClearColor mvkMTLClearColorFromVkClearValue(VkClearValue vkClearValue,
 																 VkFormat vkFormat) {
-	return _platformPixelFormats.getMTLClearColor(vkClearValue, vkFormat);
+	return getPlatformPixelFormats()->getMTLClearColor(vkClearValue, vkFormat);
 }

 MVK_PUBLIC_SYMBOL double mvkMTLClearDepthFromVkClearValue(VkClearValue vkClearValue) {
-	return _platformPixelFormats.getMTLClearDepthValue(vkClearValue);
+	return getPlatformPixelFormats()->getMTLClearDepthValue(vkClearValue);
 }

 MVK_PUBLIC_SYMBOL uint32_t mvkMTLClearStencilFromVkClearValue(VkClearValue vkClearValue) {
-	return _platformPixelFormats.getMTLClearStencilValue(vkClearValue);
+	return getPlatformPixelFormats()->getMTLClearStencilValue(vkClearValue);
 }

 MVK_PUBLIC_SYMBOL bool mvkMTLPixelFormatIsDepthFormat(MTLPixelFormat mtlFormat) {
-	return _platformPixelFormats.isDepthFormat(mtlFormat);
+	return getPlatformPixelFormats()->isDepthFormat(mtlFormat);
 }

 MVK_PUBLIC_SYMBOL bool mvkMTLPixelFormatIsStencilFormat(MTLPixelFormat mtlFormat) {
-	return _platformPixelFormats.isStencilFormat(mtlFormat);
+	return getPlatformPixelFormats()->isStencilFormat(mtlFormat);
 }

 MVK_PUBLIC_SYMBOL bool mvkMTLPixelFormatIsPVRTCFormat(MTLPixelFormat mtlFormat) {
-	return _platformPixelFormats.isPVRTCFormat(mtlFormat);
+	return getPlatformPixelFormats()->isPVRTCFormat(mtlFormat);
 }

+
+#undef mvkMTLTextureTypeFromVkImageType
 MVK_PUBLIC_SYMBOL MTLTextureType mvkMTLTextureTypeFromVkImageType(VkImageType vkImageType,
 																  uint32_t arraySize,
 																  bool isMultisample) {
+	return mvkMTLTextureTypeFromVkImageTypeObj(vkImageType, arraySize, isMultisample, nullptr);
+}
+
+MTLTextureType mvkMTLTextureTypeFromVkImageTypeObj(VkImageType vkImageType,
+												   uint32_t arraySize,
+												   bool isMultisample,
+												   MVKBaseObject* mvkObj) {
 	switch (vkImageType) {
 		case VK_IMAGE_TYPE_3D: return MTLTextureType3D;
-		case VK_IMAGE_TYPE_1D: return (mvkConfig().texture1DAs2D
+		case VK_IMAGE_TYPE_1D: return (mvkGetMVKConfig(mvkObj).texture1DAs2D
 									   ? mvkMTLTextureTypeFromVkImageType(VK_IMAGE_TYPE_2D, arraySize, isMultisample)
 									   : (arraySize > 1 ? MTLTextureType1DArray : MTLTextureType1D));
 		case VK_IMAGE_TYPE_2D:
@ -170,14 +184,22 @@ MVK_PUBLIC_SYMBOL VkImageType mvkVkImageTypeFromMTLTextureType(MTLTextureType mt
 			return VK_IMAGE_TYPE_2D;
 	}
 }
+
+#undef mvkMTLTextureTypeFromVkImageViewType
 MVK_PUBLIC_SYMBOL MTLTextureType mvkMTLTextureTypeFromVkImageViewType(VkImageViewType vkImageViewType,
 																	  bool isMultisample) {
+	return mvkMTLTextureTypeFromVkImageViewTypeObj(vkImageViewType, isMultisample, nullptr);
+}
+
+MTLTextureType mvkMTLTextureTypeFromVkImageViewTypeObj(VkImageViewType vkImageViewType,
+													   bool isMultisample,
+													   MVKBaseObject* mvkObj) {
 	switch (vkImageViewType) {
 		case VK_IMAGE_VIEW_TYPE_3D:			return MTLTextureType3D;
 		case VK_IMAGE_VIEW_TYPE_CUBE:		return MTLTextureTypeCube;
 		case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY:	return MTLTextureTypeCubeArray;
-		case VK_IMAGE_VIEW_TYPE_1D:			return mvkConfig().texture1DAs2D ? mvkMTLTextureTypeFromVkImageViewType(VK_IMAGE_VIEW_TYPE_2D, isMultisample) : MTLTextureType1D;
-		case VK_IMAGE_VIEW_TYPE_1D_ARRAY:	return mvkConfig().texture1DAs2D ? mvkMTLTextureTypeFromVkImageViewType(VK_IMAGE_VIEW_TYPE_2D_ARRAY, isMultisample) : MTLTextureType1DArray;
+		case VK_IMAGE_VIEW_TYPE_1D:			return mvkGetMVKConfig(mvkObj).texture1DAs2D ? mvkMTLTextureTypeFromVkImageViewType(VK_IMAGE_VIEW_TYPE_2D, isMultisample) : MTLTextureType1D;
+		case VK_IMAGE_VIEW_TYPE_1D_ARRAY:	return mvkGetMVKConfig(mvkObj).texture1DAs2D ? mvkMTLTextureTypeFromVkImageViewType(VK_IMAGE_VIEW_TYPE_2D_ARRAY, isMultisample) : MTLTextureType1DArray;

 		case VK_IMAGE_VIEW_TYPE_2D_ARRAY:
 #if MVK_MACOS
@ -192,11 +214,11 @@ MVK_PUBLIC_SYMBOL MTLTextureType mvkMTLTextureTypeFromVkImageViewType(VkImageVie
 }

 MVK_PUBLIC_SYMBOL MTLTextureUsage mvkMTLTextureUsageFromVkImageUsageFlags(VkImageUsageFlags vkImageUsageFlags, MTLPixelFormat mtlPixFmt) {
-	return _platformPixelFormats.getMTLTextureUsage(vkImageUsageFlags, mtlPixFmt);
+	return getPlatformPixelFormats()->getMTLTextureUsage(vkImageUsageFlags, mtlPixFmt);
 }

 MVK_PUBLIC_SYMBOL VkImageUsageFlags mvkVkImageUsageFlagsFromMTLTextureUsage(MTLTextureUsage mtlUsage, MTLPixelFormat mtlFormat) {
-	return _platformPixelFormats.getVkImageUsageFlags(mtlUsage, mtlFormat);
+	return getPlatformPixelFormats()->getVkImageUsageFlags(mtlUsage, mtlFormat);
 }

 MVK_PUBLIC_SYMBOL uint32_t mvkSampleCountFromVkSampleCountFlagBits(VkSampleCountFlagBits vkSampleCountFlag) {
@ -584,23 +606,32 @@ MTLMultisampleStencilResolveFilter mvkMTLMultisampleStencilResolveFilterFromVkRe
 #endif

 MVK_PUBLIC_SYMBOL MTLViewport mvkMTLViewportFromVkViewport(VkViewport vkViewport) {
-	MTLViewport mtlViewport;
-	mtlViewport.originX	= vkViewport.x;
-	mtlViewport.originY	= vkViewport.y;
-	mtlViewport.width	= vkViewport.width;
-	mtlViewport.height	= vkViewport.height;
-	mtlViewport.znear	= vkViewport.minDepth;
-	mtlViewport.zfar	= vkViewport.maxDepth;
-	return mtlViewport;
+	return {
+		.originX = vkViewport.x,
+		.originY = vkViewport.y,
+		.width   = vkViewport.width,
+		.height  = vkViewport.height,
+		.znear   = vkViewport.minDepth,
+		.zfar    = vkViewport.maxDepth
+	};
 }

 MVK_PUBLIC_SYMBOL MTLScissorRect mvkMTLScissorRectFromVkRect2D(VkRect2D vkRect) {
-	MTLScissorRect mtlScissor;
-	mtlScissor.x		= vkRect.offset.x;
-	mtlScissor.y		= vkRect.offset.y;
-	mtlScissor.width	= vkRect.extent.width;
-	mtlScissor.height	= vkRect.extent.height;
-	return mtlScissor;
+	return {
+		.x      = (NSUInteger)max(vkRect.offset.x, 0),
+		.y      = (NSUInteger)max(vkRect.offset.y, 0),
+		.width  = vkRect.extent.width,
+		.height = vkRect.extent.height
+	};
+}
+
+MVK_PUBLIC_SYMBOL VkRect2D mvkVkRect2DFromMTLScissorRect(MTLScissorRect mtlScissorRect) {
+	return {
+		.offset = { .x = (int32_t)mtlScissorRect.x, 
+					.y = (int32_t)mtlScissorRect.y },
+		.extent = { .width = (uint32_t)mtlScissorRect.width, 
+					.height = (uint32_t)mtlScissorRect.height }
+	};
 }

 MVK_PUBLIC_SYMBOL MTLCompareFunction mvkMTLCompareFunctionFromVkCompareOp(VkCompareOp vkOp) {
@ -728,40 +759,50 @@ MTLTessellationPartitionMode mvkMTLTessellationPartitionModeFromSpvExecutionMode
 	}
 }

-MVK_PUBLIC_SYMBOL MTLRenderStages mvkMTLRenderStagesFromVkPipelineStageFlags(VkPipelineStageFlags vkStages,
+MVK_PUBLIC_SYMBOL MTLRenderStages mvkMTLRenderStagesFromVkPipelineStageFlags(VkPipelineStageFlags2 vkStages,
 																			 bool placeBarrierBefore) {
 	// Although there are many combined render/compute/host stages in Vulkan, there are only two render
 	// stages in Metal. If the Vulkan stage did not map ONLY to a specific Metal render stage, then if the
 	// barrier is to be placed before the render stages, it should come before the vertex stage, otherwise
 	// if the barrier is to be placed after the render stages, it should come after the fragment stage.
 	if (placeBarrierBefore) {
-		bool placeBeforeFragment = mvkIsOnlyAnyFlagEnabled(vkStages, (VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
-																		VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
-																		VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
-																		VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT |
-																		VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT));
+		bool placeBeforeFragment = mvkIsOnlyAnyFlagEnabled(vkStages, (VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT |
+																		VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT |
+																		VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT |
+																		VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT |
+																		VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT));
 		return placeBeforeFragment ? MTLRenderStageFragment : MTLRenderStageVertex;
 	} else {
-		bool placeAfterVertex = mvkIsOnlyAnyFlagEnabled(vkStages, (VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT |
-																	 VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT |
-																	 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT |
-																	 VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
-																	 VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT |
-																	 VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT));
+		bool placeAfterVertex = mvkIsOnlyAnyFlagEnabled(vkStages, (VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT |
+																	 VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT |
+																	 VK_PIPELINE_STAGE_2_VERTEX_INPUT_BIT |
+																	 VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT |
+																	 VK_PIPELINE_STAGE_2_TESSELLATION_CONTROL_SHADER_BIT |
+																	 VK_PIPELINE_STAGE_2_TESSELLATION_EVALUATION_SHADER_BIT));
 		return placeAfterVertex ? MTLRenderStageVertex : MTLRenderStageFragment;
 	}
 }

-MVK_PUBLIC_SYMBOL MTLBarrierScope mvkMTLBarrierScopeFromVkAccessFlags(VkAccessFlags vkAccess) {
+MVK_PUBLIC_SYMBOL MTLBarrierScope mvkMTLBarrierScopeFromVkAccessFlags(VkAccessFlags2 vkAccess) {
 	MTLBarrierScope mtlScope = MTLBarrierScope(0);
-	if ( mvkIsAnyFlagEnabled(vkAccess, VK_ACCESS_INDIRECT_COMMAND_READ_BIT | VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT) ) {
+	if ( mvkIsAnyFlagEnabled(vkAccess, (VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT | 
+										VK_ACCESS_2_INDEX_READ_BIT |
+										VK_ACCESS_2_VERTEX_ATTRIBUTE_READ_BIT | 
+										VK_ACCESS_2_UNIFORM_READ_BIT)) ) {
 		mtlScope |= MTLBarrierScopeBuffers;
 	}
-	if ( mvkIsAnyFlagEnabled(vkAccess, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT) ) {
+	if ( mvkIsAnyFlagEnabled(vkAccess, (VK_ACCESS_2_SHADER_READ_BIT | 
+										VK_ACCESS_2_SHADER_WRITE_BIT |
+										VK_ACCESS_2_MEMORY_READ_BIT | 
+										VK_ACCESS_2_MEMORY_WRITE_BIT)) ) {
 		mtlScope |= MTLBarrierScopeBuffers | MTLBarrierScopeTextures;
 	}
 #if MVK_MACOS
-	if ( mvkIsAnyFlagEnabled(vkAccess, VK_ACCESS_INPUT_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT) ) {
+	if ( mvkIsAnyFlagEnabled(vkAccess, (VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT | 
+										VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT |
+										VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | 
+										VK_ACCESS_2_MEMORY_READ_BIT |
+										VK_ACCESS_2_MEMORY_WRITE_BIT)) ) {
 		mtlScope |= MTLBarrierScopeRenderTargets;
 	}
 #endif
--- a/MoltenVK/MoltenVK/Vulkan/vulkan.mm
+++ b/MoltenVK/MoltenVK/Vulkan/vulkan.mm
@ -1466,7 +1466,6 @@ MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetLineWidth(
 	float                                       lineWidth) {

 	MVKTraceVulkanCallStart();
-    MVKAddCmd(SetLineWidth, commandBuffer, lineWidth);
 	MVKTraceVulkanCallEnd();
 }

@ -1496,7 +1495,6 @@ MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetDepthBounds(
 	float                                       maxDepthBounds) {

 	MVKTraceVulkanCallStart();
-    MVKAddCmd(SetDepthBounds, commandBuffer, minDepthBounds, maxDepthBounds);
 	MVKTraceVulkanCallEnd();
 }

@ -1564,13 +1562,14 @@ MVK_PUBLIC_VULKAN_SYMBOL void vkCmdBindIndexBuffer(

 MVK_PUBLIC_VULKAN_SYMBOL void vkCmdBindVertexBuffers(
    VkCommandBuffer                             commandBuffer,
-    uint32_t                                    startBinding,
+    uint32_t                                    firstBinding,
    uint32_t                                    bindingCount,
    const VkBuffer*                             pBuffers,
    const VkDeviceSize*                         pOffsets) {
 	
 	MVKTraceVulkanCallStart();
-	MVKAddCmdFrom2Thresholds(BindVertexBuffers, bindingCount, 1, 2, commandBuffer, startBinding, bindingCount, pBuffers, pOffsets);
+	MVKAddCmdFrom2Thresholds(BindVertexBuffers, bindingCount, 1, 2, commandBuffer, 
+							 firstBinding, bindingCount, pBuffers, pOffsets, nullptr, nullptr);
 	MVKTraceVulkanCallEnd();
 }

@ -1964,7 +1963,7 @@ static void mvkCmdBeginRenderPass(
 	
 	MVKAddCmdFrom5Thresholds(BeginRenderPass,
 							 pRenderPassBegin->clearValueCount, 1, 2,
-							 attachments.size, 0, 1, 2,
+							 attachments.size(), 0, 1, 2,
 							 commandBuffer,
 							 pRenderPassBegin,
 							 pSubpassBeginInfo,
@ -2517,8 +2516,8 @@ MVK_PUBLIC_VULKAN_SYMBOL VkResult vkWaitSemaphores(
 #pragma mark Vulkan 1.3 calls

 MVK_PUBLIC_VULKAN_SYMBOL void vkCmdBeginRendering(
-        VkCommandBuffer                             commandBuffer,
-        const VkRenderingInfo*                      pRenderingInfo) {
+    VkCommandBuffer                             commandBuffer,
+    const VkRenderingInfo*                      pRenderingInfo) {

    MVKTraceVulkanCallStart();
    MVKAddCmdFrom3Thresholds(BeginRendering, pRenderingInfo->colorAttachmentCount,
@ -2526,98 +2525,340 @@ MVK_PUBLIC_VULKAN_SYMBOL void vkCmdBeginRendering(
    MVKTraceVulkanCallEnd();
 }

-MVK_PUBLIC_VULKAN_SYMBOL void vkCmdEndRendering(
-        VkCommandBuffer                             commandBuffer) {
+MVK_PUBLIC_VULKAN_SYMBOL void vkCmdBindVertexBuffers2(
+    VkCommandBuffer                             commandBuffer,
+    uint32_t                                    firstBinding,
+    uint32_t                                    bindingCount,
+    const VkBuffer*                             pBuffers,
+    const VkDeviceSize*                         pOffsets,
+    const VkDeviceSize*                         pSizes,
+    const VkDeviceSize*                         pStrides) {

    MVKTraceVulkanCallStart();
-    MVKAddCmd(EndRendering, commandBuffer);
+	MVKAddCmdFrom2Thresholds(BindVertexBuffers, bindingCount, 1, 2, commandBuffer,
+							 firstBinding, bindingCount, pBuffers, pOffsets, pSizes, pStrides);
    MVKTraceVulkanCallEnd();
 }

-MVK_PUBLIC_VULKAN_STUB(vkCmdBindVertexBuffers2, void, VkCommandBuffer, uint32_t, uint32_t, const VkBuffer*, const VkDeviceSize*, const VkDeviceSize*, const VkDeviceSize*)
-
 MVK_PUBLIC_VULKAN_SYMBOL void vkCmdBlitImage2(
-        VkCommandBuffer                             commandBuffer,
-        const VkBlitImageInfo2*                     pBlitImageInfo) {
-    MVKTraceVulkanCallStart();
+    VkCommandBuffer                             commandBuffer,
+    const VkBlitImageInfo2*                     pBlitImageInfo) {
+
+	MVKTraceVulkanCallStart();
    MVKAddCmdFromThreshold(BlitImage, pBlitImageInfo->regionCount, 1, commandBuffer,
                           pBlitImageInfo);
    MVKTraceVulkanCallEnd();
 }

 MVK_PUBLIC_VULKAN_SYMBOL void vkCmdCopyBuffer2(
-        VkCommandBuffer commandBuffer,
-        const VkCopyBufferInfo2* pCopyBufferInfo) {
-    MVKTraceVulkanCallStart();
+    VkCommandBuffer commandBuffer,
+    const VkCopyBufferInfo2* pCopyBufferInfo) {
+    
+	MVKTraceVulkanCallStart();
    MVKAddCmdFromThreshold(CopyBuffer, pCopyBufferInfo->regionCount, 1, commandBuffer, pCopyBufferInfo);
    MVKTraceVulkanCallEnd();
 }

 MVK_PUBLIC_VULKAN_SYMBOL void vkCmdCopyBufferToImage2(
-        VkCommandBuffer                             commandBuffer,
-        const VkCopyBufferToImageInfo2*             pCopyBufferToImageInfo) {
-    MVKTraceVulkanCallStart();
+    VkCommandBuffer                             commandBuffer,
+    const VkCopyBufferToImageInfo2*             pCopyBufferToImageInfo) {
+
+	MVKTraceVulkanCallStart();
    MVKAddCmdFrom3Thresholds(BufferImageCopy, pCopyBufferToImageInfo->regionCount, 1, 4, 8, commandBuffer,
                             pCopyBufferToImageInfo);
    MVKTraceVulkanCallEnd();
 }

 MVK_PUBLIC_VULKAN_SYMBOL void vkCmdCopyImage2(
-        VkCommandBuffer                             commandBuffer,
-        const VkCopyImageInfo2*                     pCopyImageInfo) {
-    MVKTraceVulkanCallStart();
+    VkCommandBuffer                             commandBuffer,
+    const VkCopyImageInfo2*                     pCopyImageInfo) {
+
+	MVKTraceVulkanCallStart();
    MVKAddCmdFromThreshold(CopyImage, pCopyImageInfo->regionCount, 1, commandBuffer,
                           pCopyImageInfo);
    MVKTraceVulkanCallEnd();
 }

 MVK_PUBLIC_VULKAN_SYMBOL void vkCmdCopyImageToBuffer2(
-        VkCommandBuffer                             commandBuffer,
-        const VkCopyImageToBufferInfo2*             pCopyImageInfo) {
-    MVKTraceVulkanCallStart();
+    VkCommandBuffer                             commandBuffer,
+    const VkCopyImageToBufferInfo2*             pCopyImageInfo) {
+
+	MVKTraceVulkanCallStart();
    MVKAddCmdFrom3Thresholds(BufferImageCopy, pCopyImageInfo->regionCount, 1, 4, 8, commandBuffer,
                             pCopyImageInfo);
    MVKTraceVulkanCallEnd();
 }

-MVK_PUBLIC_VULKAN_STUB(vkCmdPipelineBarrier2, void, VkCommandBuffer, const VkDependencyInfo*)
-MVK_PUBLIC_VULKAN_STUB(vkCmdResetEvent2, void, VkCommandBuffer, VkEvent, VkPipelineStageFlags2 stageMask)
+MVK_PUBLIC_VULKAN_SYMBOL void vkCmdEndRendering(
+    VkCommandBuffer                             commandBuffer) {
+
+    MVKTraceVulkanCallStart();
+    MVKAddCmd(EndRendering, commandBuffer);
+    MVKTraceVulkanCallEnd();
+}
+
+MVK_PUBLIC_VULKAN_SYMBOL void vkCmdPipelineBarrier2(
+    VkCommandBuffer                             commandBuffer,
+    const VkDependencyInfo*                     pDependencyInfo) {
+
+	MVKTraceVulkanCallStart();
+	uint32_t barrierCount = pDependencyInfo->memoryBarrierCount + pDependencyInfo->bufferMemoryBarrierCount + pDependencyInfo->imageMemoryBarrierCount;
+	MVKAddCmdFrom2Thresholds(PipelineBarrier, barrierCount, 1, 4, commandBuffer, pDependencyInfo);
+	MVKTraceVulkanCallEnd();
+}
+
+MVK_PUBLIC_VULKAN_SYMBOL void vkCmdResetEvent2(
+    VkCommandBuffer                             commandBuffer,
+    VkEvent                                     event,
+    VkPipelineStageFlags2                       stageMask) {
+
+	MVKTraceVulkanCallStart();
+	MVKAddCmd(ResetEvent, commandBuffer, event, stageMask);
+	MVKTraceVulkanCallEnd();
+}

 MVK_PUBLIC_VULKAN_SYMBOL void vkCmdResolveImage2(
-        VkCommandBuffer commandBuffer,
-        const VkResolveImageInfo2* pResolveImageInfo) {
-    MVKTraceVulkanCallStart();
+    VkCommandBuffer commandBuffer,
+    const VkResolveImageInfo2* pResolveImageInfo) {
+
+	MVKTraceVulkanCallStart();
    MVKAddCmdFromThreshold(ResolveImage, pResolveImageInfo->regionCount, 1, commandBuffer,
                           pResolveImageInfo);
    MVKTraceVulkanCallEnd();
 }

-MVK_PUBLIC_VULKAN_STUB(vkCmdSetCullMode, void, VkCommandBuffer, VkCullModeFlags)
-MVK_PUBLIC_VULKAN_STUB(vkCmdSetDepthBiasEnable, void, VkCommandBuffer, VkBool32)
-MVK_PUBLIC_VULKAN_STUB(vkCmdSetDepthBoundsTestEnable, void, VkCommandBuffer, VkBool32)
-MVK_PUBLIC_VULKAN_STUB(vkCmdSetDepthCompareOp, void, VkCommandBuffer, VkCompareOp)
-MVK_PUBLIC_VULKAN_STUB(vkCmdSetDepthTestEnable, void, VkCommandBuffer, VkBool32)
-MVK_PUBLIC_VULKAN_STUB(vkCmdSetDepthWriteEnable, void, VkCommandBuffer, VkBool32)
-MVK_PUBLIC_VULKAN_STUB(vkCmdSetEvent2, void, VkCommandBuffer, VkEvent, const VkDependencyInfo*)
-MVK_PUBLIC_VULKAN_STUB(vkCmdSetFrontFace, void, VkCommandBuffer, VkFrontFace)
-MVK_PUBLIC_VULKAN_STUB(vkCmdSetPrimitiveRestartEnable, void, VkCommandBuffer, VkBool32)
-MVK_PUBLIC_VULKAN_STUB(vkCmdSetPrimitiveTopology, void, VkCommandBuffer, VkPrimitiveTopology)
-MVK_PUBLIC_VULKAN_STUB(vkCmdSetRasterizerDiscardEnable, void, VkCommandBuffer, VkBool32)
-MVK_PUBLIC_VULKAN_STUB(vkCmdSetScissorWithCount, void, VkCommandBuffer, uint32_t, const VkRect2D*)
-MVK_PUBLIC_VULKAN_STUB(vkCmdSetStencilOp, void, VkCommandBuffer, VkStencilFaceFlags, VkStencilOp, VkStencilOp, VkStencilOp, VkCompareOp)
-MVK_PUBLIC_VULKAN_STUB(vkCmdSetStencilTestEnable, void, VkCommandBuffer, VkBool32)
-MVK_PUBLIC_VULKAN_STUB(vkCmdSetViewportWithCount, void, VkCommandBuffer, uint32_t, const VkViewport*)
-MVK_PUBLIC_VULKAN_STUB(vkCmdWaitEvents2, void, VkCommandBuffer, uint32_t, const VkEvent*, const VkDependencyInfo*)
-MVK_PUBLIC_VULKAN_STUB(vkCmdWriteTimestamp2, void, VkCommandBuffer, VkPipelineStageFlags2, VkQueryPool, uint32_t)
-MVK_PUBLIC_VULKAN_STUB_VKRESULT(vkCreatePrivateDataSlot, VkDevice, const VkPrivateDataSlotCreateInfo*, const VkAllocationCallbacks*, VkPrivateDataSlot*)
-MVK_PUBLIC_VULKAN_STUB(vkDestroyPrivateDataSlot, void, VkDevice, VkPrivateDataSlot, const VkAllocationCallbacks*)
+MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetCullMode(
+    VkCommandBuffer                             commandBuffer,
+    VkCullModeFlags                             cullMode) {
+
+    MVKTraceVulkanCallStart();
+    MVKAddCmd(SetCullMode, commandBuffer, cullMode);
+    MVKTraceVulkanCallEnd();
+}
+
+MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetDepthBiasEnable(
+    VkCommandBuffer                             commandBuffer,
+    VkBool32                                    depthBiasEnable) {
+
+    MVKTraceVulkanCallStart();
+	MVKAddCmd(SetDepthBiasEnable, commandBuffer, depthBiasEnable);
+    MVKTraceVulkanCallEnd();
+}
+
+MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetDepthBoundsTestEnable(
+    VkCommandBuffer                             commandBuffer,
+    VkBool32                                    depthBoundsTestEnable) {
+    
+    MVKTraceVulkanCallStart();
+    MVKTraceVulkanCallEnd();
+}
+
+MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetDepthCompareOp(
+    VkCommandBuffer                             commandBuffer,
+    VkCompareOp                                 depthCompareOp) {
+    
+    MVKTraceVulkanCallStart();
+	MVKAddCmd(SetDepthCompareOp, commandBuffer, depthCompareOp);
+    MVKTraceVulkanCallEnd();
+}
+
+MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetDepthTestEnable(
+    VkCommandBuffer                             commandBuffer,
+    VkBool32                                    depthTestEnable) {
+    
+    MVKTraceVulkanCallStart();
+	MVKAddCmd(SetDepthTestEnable, commandBuffer, depthTestEnable);
+    MVKTraceVulkanCallEnd();
+}
+
+MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetDepthWriteEnable(
+    VkCommandBuffer                             commandBuffer,
+    VkBool32                                    depthWriteEnable) {
+    
+    MVKTraceVulkanCallStart();
+	MVKAddCmd(SetDepthWriteEnable, commandBuffer, depthWriteEnable);
+    MVKTraceVulkanCallEnd();
+}
+
+MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetEvent2(
+    VkCommandBuffer                             commandBuffer,
+    VkEvent                                     event,
+    const VkDependencyInfo*                     pDependencyInfo) {
+
+	MVKTraceVulkanCallStart();
+	MVKAddCmd(SetEvent, commandBuffer, event, pDependencyInfo);
+	MVKTraceVulkanCallEnd();
+}
+
+MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetFrontFace(
+    VkCommandBuffer                             commandBuffer,
+    VkFrontFace                                 frontFace) {
+    
+    MVKTraceVulkanCallStart();
+    MVKAddCmd(SetFrontFace, commandBuffer, frontFace);
+    MVKTraceVulkanCallEnd();
+}
+
+MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetPrimitiveRestartEnable(
+    VkCommandBuffer                             commandBuffer,
+	VkBool32                                    primitiveRestartEnable) {
+
+	MVKTraceVulkanCallStart();
+	MVKAddCmd(SetPrimitiveRestartEnable, commandBuffer, primitiveRestartEnable);
+	MVKTraceVulkanCallEnd();
+}
+
+MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetPrimitiveTopology(
+    VkCommandBuffer                             commandBuffer,
+    VkPrimitiveTopology                         primitiveTopology) {
+    
+    MVKTraceVulkanCallStart();
+	MVKAddCmd(SetPrimitiveTopology, commandBuffer, primitiveTopology);
+    MVKTraceVulkanCallEnd();
+}
+
+MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetRasterizerDiscardEnable(
+    VkCommandBuffer                             commandBuffer,
+	VkBool32                                    rasterizerDiscardEnable) {
+
+	MVKTraceVulkanCallStart();
+	MVKAddCmd(SetRasterizerDiscardEnable, commandBuffer, rasterizerDiscardEnable);
+	MVKTraceVulkanCallEnd();
+}
+
+MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetScissorWithCount(
+    VkCommandBuffer                             commandBuffer,
+    uint32_t                                    scissorCount,
+    const VkRect2D*                             pScissors) {
+    
+    MVKTraceVulkanCallStart();
+    MVKAddCmdFromThreshold(SetScissor, scissorCount, 1, commandBuffer, 0, scissorCount, pScissors);
+    MVKTraceVulkanCallEnd();
+}
+
+MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetStencilOp(
+    VkCommandBuffer                             commandBuffer,
+    VkStencilFaceFlags                          faceMask,
+    VkStencilOp                                 failOp,
+    VkStencilOp                                 passOp,
+    VkStencilOp                                 depthFailOp,
+    VkCompareOp                                 compareOp) {
+    
+	MVKTraceVulkanCallStart();
+	MVKAddCmd(SetStencilOp, commandBuffer, faceMask, failOp, passOp, depthFailOp, compareOp);
+	MVKTraceVulkanCallEnd();
+}
+
+MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetStencilTestEnable(
+    VkCommandBuffer                             commandBuffer,
+    VkBool32                                    stencilTestEnable) {
+    
+	MVKTraceVulkanCallStart();
+	MVKAddCmd(SetStencilTestEnable, commandBuffer, stencilTestEnable);
+	MVKTraceVulkanCallEnd();
+}
+
+MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetViewportWithCount(
+    VkCommandBuffer                             commandBuffer,
+    uint32_t                                    viewportCount,
+    const VkViewport*                           pViewports) {
+    
+    MVKTraceVulkanCallStart();
+    MVKAddCmdFromThreshold(SetViewport, viewportCount, 1, commandBuffer, 0, viewportCount, pViewports);
+    MVKTraceVulkanCallEnd();
+}
+
+MVK_PUBLIC_VULKAN_SYMBOL void vkCmdWaitEvents2(
+    VkCommandBuffer                             commandBuffer,
+    uint32_t                                    eventCount,
+    const VkEvent*                              pEvents,
+    const VkDependencyInfo*                     pDependencyInfos) {
+
+	MVKTraceVulkanCallStart();
+	MVKAddCmdFromThreshold(WaitEvents, eventCount, 1, commandBuffer, eventCount, pEvents, pDependencyInfos);
+	MVKTraceVulkanCallEnd();
+}
+
+MVK_PUBLIC_VULKAN_SYMBOL void vkCmdWriteTimestamp2(
+    VkCommandBuffer                             commandBuffer,
+    VkPipelineStageFlags2                       stage,
+    VkQueryPool                                 queryPool,
+    uint32_t                                    query) {
+
+	MVKTraceVulkanCallStart();
+	MVKAddCmd(WriteTimestamp, commandBuffer, stage, queryPool, query);
+	MVKTraceVulkanCallEnd();
+}
+
+MVK_PUBLIC_VULKAN_SYMBOL VkResult vkCreatePrivateDataSlot(
+	VkDevice                                    device,
+	const VkPrivateDataSlotCreateInfoEXT*       pCreateInfo,
+	const VkAllocationCallbacks*                pAllocator,
+	VkPrivateDataSlotEXT*                       pPrivateDataSlot) {
+
+	MVKTraceVulkanCallStart();
+	MVKDevice* mvkDev = MVKDevice::getMVKDevice(device);
+	VkResult rslt = mvkDev->createPrivateDataSlot(pCreateInfo, pAllocator, pPrivateDataSlot);
+	MVKTraceVulkanCallEnd();
+	return rslt;
+}
+
+MVK_PUBLIC_VULKAN_SYMBOL void vkDestroyPrivateDataSlot(
+	VkDevice                                    device,
+	VkPrivateDataSlotEXT                        privateDataSlot,
+	const VkAllocationCallbacks*                pAllocator) {
+
+	MVKTraceVulkanCallStart();
+	MVKDevice* mvkDev = MVKDevice::getMVKDevice(device);
+	mvkDev->destroyPrivateDataSlot(privateDataSlot, pAllocator);
+	MVKTraceVulkanCallEnd();
+}
+
 MVK_PUBLIC_VULKAN_STUB(vkGetDeviceBufferMemoryRequirements, void, VkDevice, const VkDeviceBufferMemoryRequirements*, VkMemoryRequirements2*)
 MVK_PUBLIC_VULKAN_STUB(vkGetDeviceImageMemoryRequirements, void, VkDevice, const VkDeviceImageMemoryRequirements*, VkMemoryRequirements2*)
 MVK_PUBLIC_VULKAN_STUB(vkGetDeviceImageSparseMemoryRequirements, void, VkDevice, const VkDeviceImageMemoryRequirements*, uint32_t*, VkSparseImageMemoryRequirements2*)
 MVK_PUBLIC_VULKAN_STUB_VKRESULT(vkGetPhysicalDeviceToolProperties, VkPhysicalDevice, uint32_t*, VkPhysicalDeviceToolProperties*)
-MVK_PUBLIC_VULKAN_STUB(vkGetPrivateData, void, VkDevice, VkObjectType, uint64_t, VkPrivateDataSlot, uint64_t*)
-MVK_PUBLIC_VULKAN_STUB_VKRESULT(vkQueueSubmit2, VkQueue, uint32_t, const VkSubmitInfo2*, VkFence)
-MVK_PUBLIC_VULKAN_STUB_VKRESULT(vkSetPrivateData, VkDevice, VkObjectType, uint64_t, VkPrivateDataSlot, uint64_t)
+
+MVK_PUBLIC_VULKAN_SYMBOL void vkGetPrivateData(
+											   VkDevice                                    device,
+											   VkObjectType                                objectType,
+											   uint64_t                                    objectHandle,
+											   VkPrivateDataSlotEXT                        privateDataSlot,
+											   uint64_t*                                   pData) {
+
+	MVKTraceVulkanCallStart();
+	MVKPrivateDataSlot* mvkPDS = (MVKPrivateDataSlot*)privateDataSlot;
+	*pData = mvkPDS->getData(objectType, objectHandle);
+	MVKTraceVulkanCallEnd();
+}
+
+MVK_PUBLIC_VULKAN_SYMBOL VkResult vkQueueSubmit2(
+    VkQueue                                     queue,
+    uint32_t                                    submitCount,
+    const VkSubmitInfo2*                        pSubmits,
+    VkFence                                     fence) {
+
+	MVKTraceVulkanCallStart();
+	MVKQueue* mvkQ = MVKQueue::getMVKQueue(queue);
+	VkResult rslt = mvkQ->submit(submitCount, pSubmits, fence, kMVKCommandUseQueueSubmit);
+	MVKTraceVulkanCallEnd();
+	return rslt;
+}
+
+MVK_PUBLIC_VULKAN_SYMBOL VkResult vkSetPrivateData(
+	VkDevice                                    device,
+	VkObjectType                                objectType,
+	uint64_t                                    objectHandle,
+	VkPrivateDataSlotEXT                        privateDataSlot,
+	uint64_t                                    data) {
+
+	MVKTraceVulkanCallStart();
+	MVKPrivateDataSlot* mvkPDS = (MVKPrivateDataSlot*)privateDataSlot;
+	mvkPDS->setData(objectType, objectHandle, data);
+	MVKTraceVulkanCallEnd();
+	return VK_SUCCESS;
+}
+

 #pragma mark -
 #pragma mark VK_KHR_bind_memory2 extension
@ -3102,6 +3343,17 @@ MVK_PUBLIC_VULKAN_SYMBOL VkResult vkGetPhysicalDeviceSurfaceFormats2KHR(
 }


+#pragma mark -
+#pragma mark VK_KHR_synchronization2
+
+MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdPipelineBarrier2, KHR);
+MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdResetEvent2, KHR);
+MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdSetEvent2, KHR);
+MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdWaitEvents2, KHR);
+MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdWriteTimestamp2, KHR);
+MVK_PUBLIC_VULKAN_CORE_ALIAS(vkQueueSubmit2, KHR);
+
+
 #pragma mark -
 #pragma mark VK_KHR_timeline_semaphore

@ -3355,6 +3607,233 @@ MVK_PUBLIC_VULKAN_SYMBOL void vkSubmitDebugUtilsMessageEXT(
 }


+#pragma mark -
+#pragma mark VK_EXT_extended_dynamic_state
+
+MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdBindVertexBuffers2, EXT);
+MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdSetCullMode, EXT);
+MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdSetDepthBoundsTestEnable, EXT);
+MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdSetDepthCompareOp, EXT);
+MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdSetDepthTestEnable, EXT);
+MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdSetDepthWriteEnable, EXT);
+MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdSetFrontFace, EXT);
+MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdSetPrimitiveTopology, EXT);
+MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdSetScissorWithCount, EXT);
+MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdSetStencilOp, EXT);
+MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdSetStencilTestEnable, EXT);
+MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdSetViewportWithCount, EXT);
+
+
+#pragma mark -
+#pragma mark VK_EXT_extended_dynamic_state2
+MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdSetDepthBiasEnable, EXT);
+
+MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetLogicOpEXT(
+    VkCommandBuffer                             commandBuffer,
+    VkLogicOp                                   logicOp) {
+
+    MVKTraceVulkanCallStart();
+    MVKTraceVulkanCallEnd();
+}
+
+MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetPatchControlPointsEXT(
+    VkCommandBuffer                             commandBuffer,
+    uint32_t                                    patchControlPoints) {
+
+    MVKTraceVulkanCallStart();
+	MVKAddCmd(SetPatchControlPoints, commandBuffer, patchControlPoints);
+    MVKTraceVulkanCallEnd();
+}
+
+MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdSetPrimitiveRestartEnable, EXT);
+MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdSetRasterizerDiscardEnable, EXT);
+
+
+#pragma mark -
+#pragma mark VK_EXT_extended_dynamic_state3
+
+MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetAlphaToCoverageEnableEXT(
+    VkCommandBuffer                             commandBuffer,
+    VkBool32                                    alphaToCoverageEnable) {
+
+    MVKTraceVulkanCallStart();
+    MVKTraceVulkanCallEnd();
+}
+
+MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetAlphaToOneEnableEXT(
+    VkCommandBuffer                             commandBuffer,
+    VkBool32                                    alphaToOneEnable) {
+
+    MVKTraceVulkanCallStart();
+    MVKTraceVulkanCallEnd();
+}
+
+MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetColorBlendAdvancedEXT(
+    VkCommandBuffer                             commandBuffer,
+    uint32_t                                    firstAttachment,
+    uint32_t                                    attachmentCount,
+    const VkColorBlendAdvancedEXT*              pColorBlendAdvanced) {
+
+    MVKTraceVulkanCallStart();
+    MVKTraceVulkanCallEnd();
+}
+
+MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetColorBlendEnableEXT(
+    VkCommandBuffer                             commandBuffer,
+    uint32_t                                    firstAttachment,
+    uint32_t                                    attachmentCount,
+    const VkBool32*                             pColorBlendEnables) {
+
+    MVKTraceVulkanCallStart();
+    MVKTraceVulkanCallEnd();
+}
+
+MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetColorBlendEquationEXT(
+    VkCommandBuffer                             commandBuffer,
+    uint32_t                                    firstAttachment,
+    uint32_t                                    attachmentCount,
+    const VkColorBlendEquationEXT*              pColorBlendEquations) {
+
+    MVKTraceVulkanCallStart();
+    MVKTraceVulkanCallEnd();
+}
+
+MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetColorWriteMaskEXT(
+    VkCommandBuffer                             commandBuffer,
+    uint32_t                                    firstAttachment,
+    uint32_t                                    attachmentCount,
+    const VkColorComponentFlags*                pColorWriteMasks) {
+
+    MVKTraceVulkanCallStart();
+    MVKTraceVulkanCallEnd();
+}
+
+MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetConservativeRasterizationModeEXT(
+    VkCommandBuffer                             commandBuffer,
+    VkConservativeRasterizationModeEXT          conservativeRasterizationMode) {
+
+    MVKTraceVulkanCallStart();
+    MVKTraceVulkanCallEnd();
+}
+
+MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetDepthClampEnableEXT(
+    VkCommandBuffer                             commandBuffer,
+	VkBool32                                    depthClampEnable) {
+
+    MVKTraceVulkanCallStart();
+	MVKAddCmd(SetDepthClipEnable, commandBuffer, !depthClampEnable);
+    MVKTraceVulkanCallEnd();
+}
+
+MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetDepthClipEnableEXT(
+    VkCommandBuffer                             commandBuffer,
+	VkBool32                                    depthClipEnable) {
+
+    MVKTraceVulkanCallStart();
+	MVKAddCmd(SetDepthClipEnable, commandBuffer, depthClipEnable);
+    MVKTraceVulkanCallEnd();
+}
+
+MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetDepthClipNegativeOneToOneEXT(
+    VkCommandBuffer                             commandBuffer,
+    VkBool32                                    negativeOneToOne) {
+
+    MVKTraceVulkanCallStart();
+    MVKTraceVulkanCallEnd();
+}
+
+MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetExtraPrimitiveOverestimationSizeEXT(
+    VkCommandBuffer                             commandBuffer,
+    float                                       extraPrimitiveOverestimationSize) {
+
+    MVKTraceVulkanCallStart();
+    MVKTraceVulkanCallEnd();
+}
+
+MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetLineRasterizationModeEXT(
+    VkCommandBuffer                             commandBuffer,
+    VkLineRasterizationModeEXT                  lineRasterizationMode) {
+
+    MVKTraceVulkanCallStart();
+    MVKTraceVulkanCallEnd();
+}
+
+MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetLineStippleEnableEXT(
+    VkCommandBuffer                             commandBuffer,
+    VkBool32                                    stippledLineEnable) {
+
+    MVKTraceVulkanCallStart();
+    MVKTraceVulkanCallEnd();
+}
+
+MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetLogicOpEnableEXT(
+    VkCommandBuffer                             commandBuffer,
+    VkBool32                                    logicOpEnable) {
+
+    MVKTraceVulkanCallStart();
+    MVKTraceVulkanCallEnd();
+}
+
+MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetPolygonModeEXT(
+    VkCommandBuffer                             commandBuffer,
+    VkPolygonMode                               polygonMode) {
+
+    MVKTraceVulkanCallStart();
+	MVKAddCmd(SetPolygonMode, commandBuffer, polygonMode);
+    MVKTraceVulkanCallEnd();
+}
+
+MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetProvokingVertexModeEXT(
+    VkCommandBuffer                             commandBuffer,
+    VkProvokingVertexModeEXT                    provokingVertexMode) {
+
+    MVKTraceVulkanCallStart();
+    MVKTraceVulkanCallEnd();
+}
+
+MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetRasterizationSamplesEXT(
+    VkCommandBuffer                             commandBuffer,
+    VkSampleCountFlagBits                       rasterizationSamples) {
+
+    MVKTraceVulkanCallStart();
+    MVKTraceVulkanCallEnd();
+}
+
+MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetRasterizationStreamEXT(
+    VkCommandBuffer                             commandBuffer,
+    uint32_t                                    rasterizationStream) {
+
+    MVKTraceVulkanCallStart();
+    MVKTraceVulkanCallEnd();
+}
+
+MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetSampleLocationsEnableEXT(
+    VkCommandBuffer                             commandBuffer,
+	VkBool32                                    sampleLocationsEnable) {
+
+    MVKTraceVulkanCallStart();
+	MVKAddCmd(SetSampleLocationsEnable, commandBuffer, sampleLocationsEnable);
+    MVKTraceVulkanCallEnd();
+}
+
+MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetSampleMaskEXT(
+    VkCommandBuffer                             commandBuffer,
+    VkSampleCountFlagBits                       samples,
+    const VkSampleMask*                         pSampleMask) {
+
+    MVKTraceVulkanCallStart();
+    MVKTraceVulkanCallEnd();
+}
+
+MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetTessellationDomainOriginEXT(
+    VkCommandBuffer                             commandBuffer,
+    VkTessellationDomainOrigin                  domainOrigin) {
+
+    MVKTraceVulkanCallStart();
+    MVKTraceVulkanCallEnd();
+}
+
+
 #pragma mark -
 #pragma mark VK_EXT_external_memory_host extension

@ -3390,6 +3869,26 @@ MVK_PUBLIC_VULKAN_SYMBOL void vkSetHdrMetadataEXT(
 }


+#pragma mark -
+#pragma mark VK_EXT_headless_surface extension
+
+MVK_PUBLIC_VULKAN_SYMBOL VkResult vkCreateHeadlessSurfaceEXT(
+    VkInstance                                  instance,
+    const VkHeadlessSurfaceCreateInfoEXT*       pCreateInfo,
+    const VkAllocationCallbacks*                pAllocator,
+    VkSurfaceKHR*                               pSurface) {
+
+	MVKTraceVulkanCallStart();
+	MVKInstance* mvkInst = MVKInstance::getMVKInstance(instance);
+	MVKSurface* mvkSrfc = mvkInst->createSurface(pCreateInfo, pAllocator);
+	*pSurface = (VkSurfaceKHR)mvkSrfc;
+	VkResult rslt = mvkSrfc->getConfigurationResult();
+	if (rslt < 0) { *pSurface = VK_NULL_HANDLE; mvkInst->destroySurface(mvkSrfc, pAllocator); }
+	MVKTraceVulkanCallEnd();
+	return rslt;
+}
+
+
 #pragma mark -
 #pragma mark VK_EXT_host_query_reset extension

@ -3433,56 +3932,12 @@ MVK_PUBLIC_VULKAN_SYMBOL void vkExportMetalObjectsEXT(
 #pragma mark -
 #pragma mark VK_EXT_private_data extension

-MVK_PUBLIC_VULKAN_SYMBOL VkResult vkCreatePrivateDataSlotEXT(
-	VkDevice                                    device,
-	const VkPrivateDataSlotCreateInfoEXT*       pCreateInfo,
-	const VkAllocationCallbacks*                pAllocator,
-	VkPrivateDataSlotEXT*                       pPrivateDataSlot) {
+MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCreatePrivateDataSlot, EXT);
+MVK_PUBLIC_VULKAN_CORE_ALIAS(vkDestroyPrivateDataSlot, EXT);
+MVK_PUBLIC_VULKAN_CORE_ALIAS(vkGetPrivateData, EXT);
+MVK_PUBLIC_VULKAN_CORE_ALIAS(vkSetPrivateData, EXT);

-	MVKTraceVulkanCallStart();
-	MVKDevice* mvkDev = MVKDevice::getMVKDevice(device);
-	VkResult rslt = mvkDev->createPrivateDataSlot(pCreateInfo, pAllocator, pPrivateDataSlot);
-	MVKTraceVulkanCallEnd();
-	return rslt;
-}

-MVK_PUBLIC_VULKAN_SYMBOL void vkDestroyPrivateDataSlotEXT(
-	VkDevice                                    device,
-	VkPrivateDataSlotEXT                        privateDataSlot,
-	const VkAllocationCallbacks*                pAllocator) {
-
-	MVKTraceVulkanCallStart();
-	MVKDevice* mvkDev = MVKDevice::getMVKDevice(device);
-	mvkDev->destroyPrivateDataSlot(privateDataSlot, pAllocator);
-	MVKTraceVulkanCallEnd();
-}
-
-MVK_PUBLIC_VULKAN_SYMBOL VkResult vkSetPrivateDataEXT(
-	VkDevice                                    device,
-	VkObjectType                                objectType,
-	uint64_t                                    objectHandle,
-	VkPrivateDataSlotEXT                        privateDataSlot,
-	uint64_t                                    data) {
-
-	MVKTraceVulkanCallStart();
-	MVKPrivateDataSlot* mvkPDS = (MVKPrivateDataSlot*)privateDataSlot;
-	mvkPDS->setData(objectType, objectHandle, data);
-	MVKTraceVulkanCallEnd();
-	return VK_SUCCESS;
-}
-
-MVK_PUBLIC_VULKAN_SYMBOL void vkGetPrivateDataEXT(
-	VkDevice                                    device,
-	VkObjectType                                objectType,
-	uint64_t                                    objectHandle,
-	VkPrivateDataSlotEXT                        privateDataSlot,
-	uint64_t*                                   pData) {
-
-	MVKTraceVulkanCallStart();
-	MVKPrivateDataSlot* mvkPDS = (MVKPrivateDataSlot*)privateDataSlot;
-	*pData = mvkPDS->getData(objectType, objectHandle);
-	MVKTraceVulkanCallEnd();
-}

 #pragma mark -
 #pragma mark VK_EXT_sample_locations extension
@ -3507,6 +3962,7 @@ void vkCmdSetSampleLocationsEXT(
 	MVKTraceVulkanCallEnd();
 }

+
 #pragma mark -
 #pragma mark VK_GOOGLE_display_timing extension

@ -3535,12 +3991,14 @@ MVK_PUBLIC_VULKAN_SYMBOL VkResult vkGetPastPresentationTimingGOOGLE(
 	return rslt;
 }

+
 #pragma mark -
 #pragma mark VK_AMD_draw_indirect_count

 MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdDrawIndexedIndirectCount, AMD);
 MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdDrawIndirectCount, AMD);

+
 #pragma mark -
 #pragma mark iOS & macOS surface extensions

--- a/MoltenVKShaderConverter/MoltenVKShaderConverter.xcodeproj/xcshareddata/xcschemes/MoltenVKShaderConverter.xcscheme
+++ b/MoltenVKShaderConverter/MoltenVKShaderConverter.xcodeproj/xcshareddata/xcschemes/MoltenVKShaderConverter.xcscheme
@ -97,7 +97,7 @@
            isEnabled = "NO">
         </CommandLineArgument>
         <CommandLineArgument
-            argument = "2.4"
+            argument = "3.1"
            isEnabled = "NO">
         </CommandLineArgument>
         <CommandLineArgument
--- a/MoltenVKShaderConverter/MoltenVKShaderConverterTool/MoltenVKShaderConverterTool.cpp
+++ b/MoltenVKShaderConverter/MoltenVKShaderConverterTool/MoltenVKShaderConverterTool.cpp
@ -220,7 +220,9 @@ bool MoltenVKShaderConverterTool::convertSPIRV(const vector<uint32_t>& spv,
 	mslContext.options.shouldFlipVertexY = _shouldFlipVertexY;
 	mslContext.options.mslOptions.argument_buffers = _useMetalArgumentBuffers;
 	mslContext.options.mslOptions.force_active_argument_buffer_resources = _useMetalArgumentBuffers;
-	mslContext.options.mslOptions.pad_argument_buffer_resources = _useMetalArgumentBuffers;
+	mslContext.options.mslOptions.pad_argument_buffer_resources = false;
+	mslContext.options.mslOptions.argument_buffers_tier = SPIRV_CROSS_NAMESPACE::CompilerMSL::Options::ArgumentBuffersTier::Tier2;
+	mslContext.options.mslOptions.replace_recursive_inputs = mvkOSVersionIsAtLeast(14.0, 17.0, 1.0);

 	SPIRVToMSLConverter spvConverter;
 	spvConverter.setSPIRV(spv);
@ -424,7 +426,10 @@ MoltenVKShaderConverterTool::MoltenVKShaderConverterTool(int argc, const char* a
 	_quietMode = false;
 	_useMetalArgumentBuffers = false;

-	if (mvkOSVersionIsAtLeast(13.0)) {
+	if (mvkOSVersionIsAtLeast(14.0)) {
+		_mslVersionMajor = 3;
+		_mslVersionMinor = 1;
+	} else 	if (mvkOSVersionIsAtLeast(13.0)) {
 		_mslVersionMajor = 3;
 		_mslVersionMinor = 0;
 	} else if (mvkOSVersionIsAtLeast(12.0)) {
--- a/README.md
+++ b/README.md
@ -149,24 +149,17 @@ for which to build the external libraries. The platform choices include:
 	--maccat 
 	--tvos 
 	--tvossim
-	--visionos 
-	--visionossim
-
-The `visionos` and `visionossim` selections require Xcode 15+.

 You can specify multiple of these selections. The result is a single `XCFramework` 
 for each external dependency library, with each `XCFramework` containing binaries for 
 each of the requested platforms. 

-The `--all` selection is the same as entering all of the other platform choices, except 
-`--visionos` and `--visionossim`, and will result in a single `XCFramework` for each 
-external dependency library, with each `XCFramework` containing binaries for all supported 
-platforms and simulators. The `--visionos` and `--visionossim` selections must be invoked
-with a separate invocation of `fetchDependencies`, because those selections require 
-Xcode 15+, and will cause a multi-platform build on older versions of Xcode to abort.
+The `--all` selection is the same as entering all of the other platform choices, 
+and will result in a single `XCFramework` for each external dependency library, 
+with each `XCFramework` containing binaries for all supported platforms and simulators. 

-Running `fetchDependencies` repeatedly with different platforms will accumulate 
-targets in the `XCFramework`.
+Running `fetchDependencies` repeatedly with different platforms will accumulate targets 
+in the `XCFramework`, if the `--keep-cache` option is used on each invocation.

 For more information about the external open-source libraries used by **MoltenVK**,
 see the [`ExternalRevisions/README.md`](ExternalRevisions/README.md) document.
@ -263,8 +256,6 @@ from the command line. The following `make` targets are provided:
 	make maccat
 	make tvos
 	make tvossim
-	make visionos
-	make visionossim
 	
 	make all-debug
 	make macos-debug
@ -273,15 +264,12 @@ from the command line. The following `make` targets are provided:
 	make maccat-debug
 	make tvos-debug
 	make tvossim-debug
-	make visionos-debug
-	make visionossim-debug
 	
 	make clean
 	make install

 - Running `make` repeatedly with different targets will accumulate binaries for these different targets.
- The `all` target executes all platform targets, except `visionos` and `visionossim`, as these require
-  Xcode 15+, and will abort a multi-platform build on older versions of Xcode.
+- The `all` target executes all platform targets.
 - The `all` target is the default target. Running `make` with no arguments is the same as running `make all`.
 - The `*-debug` targets build the binaries using the **_Debug_** configuration.
 - The `install` target will copy the most recently built `MoltenVK.xcframework` into the 
--- a/Scripts/create_ext_lib_xcframeworks.sh
+++ b/Scripts/create_ext_lib_xcframeworks.sh
@ -1,5 +1,7 @@
 #!/bin/bash

+if [ "${SKIP_PACKAGING}" = "Y" ]; then exit 0; fi
+
 . "${PROJECT_DIR}/Scripts/create_xcframework_func.sh"

 export MVK_EXT_DIR="${PROJECT_DIR}/External"
--- a/Scripts/package_ext_libs_finish.sh
+++ b/Scripts/package_ext_libs_finish.sh
@ -1,5 +1,7 @@
 #!/bin/bash

+if [ "${SKIP_PACKAGING}" = "Y" ]; then exit 0; fi
+
 set -e

 export MVK_EXT_LIB_DST_PATH="${PROJECT_DIR}/External/build/"
@ -7,6 +9,12 @@ export MVK_EXT_LIB_DST_PATH="${PROJECT_DIR}/External/build/"
 # Assign symlink to Latest
 ln -sfn "${CONFIGURATION}" "${MVK_EXT_LIB_DST_PATH}/Latest"

+# Remove the large Intermediates directory if no longer needed
+if [ "${KEEP_CACHE}" != "Y" ]; then
+	echo Removing Intermediates library at "${MVK_EXT_LIB_DST_PATH}/Intermediates"
+	rm -rf "${MVK_EXT_LIB_DST_PATH}/Intermediates"
+fi
+
 # Clean MoltenVK to ensure the next MoltenVK build will use the latest external library versions.
 make --quiet clean

--- a/Scripts/runcts
+++ b/Scripts/runcts
@ -113,7 +113,7 @@ export MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS=0          #(2 = VK_EXT_descriptor_
 export MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE=2          #(2 = MTLEvents always)
 export MVK_CONFIG_SHADER_COMPRESSION_ALGORITHM=0        #(2 = ZLIB, 3 = LZ4)
 export MVK_CONFIG_PERFORMANCE_TRACKING=0
-export MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE=2  #(2 = Device lifetime)
+export MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE=3  #(2 = Device lifetime, 3 = Process lifetime)

 # -------------- Operation --------------------

@ -122,8 +122,12 @@ start_time=${SECONDS}

 "${cts_vk_dir}/deqp-vk"                    \
 --deqp-archive-dir="${cts_vk_dir}/.."      \
+--deqp-log-filename="/dev/null"            \
 --deqp-log-images=disable                  \
 --deqp-log-shader-sources=disable          \
+--deqp-shadercache=disable                 \
+--deqp-log-decompiled-spirv=disable        \
+--deqp-log-flush=disable                   \
 --deqp-caselist-file="${caselist_file}"    \
 &> "${results_file}"

--- a/Templates/spirv-tools/build.zip
+++ b/Templates/spirv-tools/build.zip
--- a/18
+++ b/18
@ -67,6 +67,12 @@
 #      --no-parallel-build
 #              Build the external libraries serially instead of in parallel. This is the default.
 #
+#      --keep-cache
+#              Do not remove the External/build/Intermediates cache directory after building.
+#              Removing the Intermediates directory returns significant disk space after the
+#              build, and is the default behaviour. Use this option if you intend to run this
+#              script repeatedly to incrementally build one platform at a time.
+#
 #      --glslang-root path
 #              "path" specifies a directory path to a KhronosGroup/glslang repository.
 #              This repository does need to be built and the build directory must be in the
@ -117,6 +123,7 @@ V_HEADERS_ROOT=""
 SPIRV_CROSS_ROOT=""
 GLSLANG_ROOT=""
 BLD_SPV_TLS=""
+export KEEP_CACHE=""

 while (( "$#" )); do
  case "$1" in
@ -191,6 +198,10 @@ while (( "$#" )); do
         XC_USE_BCKGND=""
         shift 1
         ;;
+       --keep-cache)
+         KEEP_CACHE="Y"
+         shift 1
+         ;;
       -v)
         XC_BUILD_VERBOSITY=""
         shift 1
@ -410,7 +421,6 @@ function execute_xcodebuild_command () {
 # 2 - Platform
 # 3 - Destination (Optional. Defaults to same as platform)
 function build_impl() {
-    BLD_SPECIFIED="Y"
 	XC_OS=${1}
 	XC_PLTFM=${2}
 	if [ "${3}" != "" ]; then
@ -442,7 +452,9 @@ function build_impl() {
 # Select whether or not to run the build in parallel.
 # 1 - OS
 # 2 - platform
+# 3 - Destination (Optional. Defaults to same as platform)
 function build() {
+    BLD_SPECIFIED="Y"
 	if [ "$XC_USE_BCKGND" != "" ]; then
 		build_impl "${1}" "${2}" "${3}" &
 	else
@ -453,6 +465,7 @@ function build() {
 EXT_DEPS=ExternalDependencies
 XC_PROJ="${EXT_DEPS}.xcodeproj"
 XC_DD_PATH="${EXT_DIR}/build"
+export SKIP_PACKAGING="Y"

 # Determine if xcpretty is present
 XCPRETTY_PATH=$(command -v xcpretty 2> /dev/null || true) # ignore failures
@ -512,9 +525,10 @@ if [ "$XC_USE_BCKGND" != "" ]; then
 fi

 if [ "$BLD_SPECIFIED" != "" ]; then
-	# Build XCFrameworks, update latest symlink, and clean MoltenVK for rebuild
+	# Build XCFrameworks, update latest symlink, remove intermediates, and clean MoltenVK for rebuild
 	PROJECT_DIR="."
 	CONFIGURATION=${XC_CONFIG}
+	SKIP_PACKAGING=""
 	. "./Scripts/create_ext_lib_xcframeworks.sh"
 	. "./Scripts/package_ext_libs_finish.sh"
 else