diff --git a/Docs/MoltenVK_Runtime_UserGuide.md b/Docs/MoltenVK_Runtime_UserGuide.md
index 49881456..3b110afd 100644
--- a/Docs/MoltenVK_Runtime_UserGuide.md
+++ b/Docs/MoltenVK_Runtime_UserGuide.md
@@ -290,10 +290,11 @@ In addition to core *Vulkan* functionality, **MoltenVK**  also supports the foll
 - `VK_KHR_portability_subset`
 - `VK_KHR_push_descriptor`
 - `VK_KHR_relaxed_block_layout`
-- `VK_KHR_sampler_mirror_clamp_to_edge` *(macOS)*
+- `VK_KHR_sampler_mirror_clamp_to_edge` *(requires a Mac GPU or Apple family 7 GPU)*
 - `VK_KHR_sampler_ycbcr_conversion`
 - `VK_KHR_shader_draw_parameters`
 - `VK_KHR_shader_float16_int8`
+- `VK_KHR_shader_subgroup_extended_types` *(requires Metal 2.1 on Mac or Metal 2.2 and Apple family 4 on iOS)*
 - `VK_KHR_storage_buffer_storage_class`
 - `VK_KHR_surface`
 - `VK_KHR_swapchain`
@@ -317,6 +318,7 @@ In addition to core *Vulkan* functionality, **MoltenVK**  also supports the foll
 - `VK_EXT_scalar_block_layout`
 - `VK_EXT_shader_stencil_export` *(requires Mac GPU family 2 or iOS GPU family 5)*
 - `VK_EXT_shader_viewport_index_layer`
+- `VK_EXT_subgroup_size_control` *(requires Metal 2.1 on Mac or Metal 2.2 and Apple family 4 on iOS)*
 - `VK_EXT_swapchain_colorspace`
 - `VK_EXT_vertex_attribute_divisor`
 - `VK_EXT_texel_buffer_alignment` *(requires Metal 2.0)*
diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md
index 330df3bf..0d5b1896 100644
--- a/Docs/Whats_New.md
+++ b/Docs/Whats_New.md
@@ -19,10 +19,12 @@ MoltenVK 1.1.1
 Released TBD
 
 - Add support for extensions:
+	- `VK_KHR_sampler_mirror_clamp_to_edge` (iOS)
 	- `VK_KHR_timeline_semaphore`
 	- `VK_EXT_descriptor_indexing` (initial release limited to Metal Tier 1: 96/128 textures, 16 samplers)
 	- `VK_EXT_post_depth_coverage` (macOS)
 	- `VK_EXT_private_data`
+	- `VK_EXT_subgroup_size_control`
 	- `VK_EXT_texture_compression_astc_hdr`
 	- `VK_AMD_shader_image_load_store` (macOS)
 	- `VK_IMG_format_pvrtc` (macOS)
diff --git a/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h b/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
index 992c891d..47f20dec 100644
--- a/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
+++ b/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
@@ -616,7 +616,7 @@ typedef struct {
 	VkBool32 placementHeaps;					/**< If true, MTLHeap objects support placement of resources. */
 	VkDeviceSize pushConstantSizeAlignment;		/**< The alignment used internally when allocating memory for push constants. Must be PoT. */
 	uint32_t maxTextureLayers;					/**< The maximum number of layers in an array texture. */
-    uint32_t subgroupSize;			            /**< The number of threads in a SIMD-group. */
+    uint32_t maxSubgroupSize;			        /**< The maximum number of threads in a SIMD-group. */
 	VkDeviceSize vertexStrideAlignment;         /**< The alignment used for the stride of vertex attribute bindings. */
 	VkBool32 indirectTessellationDrawing;		/**< If true, tessellation draw calls support parameters held in a GPU buffer. */
 	VkBool32 nonUniformThreadgroups;			/**< If true, the device supports arbitrary-sized grids in compute workloads. */
@@ -634,6 +634,7 @@ typedef struct {
 	VkBool32 quadPermute;						/**< If true, quadgroup permutation functions (vote, ballot, shuffle) are supported in shaders. */
 	VkBool32 simdPermute;						/**< If true, SIMD-group permutation functions (vote, ballot, shuffle) are supported in shaders. */
 	VkBool32 simdReduction;						/**< If true, SIMD-group reduction functions (arithmetic) are supported in shaders. */
+    uint32_t minSubgroupSize;			        /**< The minimum number of threads in a SIMD-group. */
 } MVKPhysicalDeviceMetalFeatures;
 
 /** MoltenVK performance of a particular type of activity. */
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
index 70535678..1b575c6d 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
@@ -53,6 +53,14 @@ using namespace std;
 #define supportsMTLFeatureSet(MFS)	[_mtlDevice supportsFeatureSet: MTLFeatureSet_ ##MFS]
 #define supportsMTLGPUFamily(GPUF)	([_mtlDevice respondsToSelector: @selector(supportsFamily:)] && [_mtlDevice supportsFamily: MTLGPUFamily ##GPUF])
 
+static const uint32_t kAMDVendorId = 0x1002;
+static const uint32_t kAppleVendorId = 0x106b;
+static const uint32_t kIntelVendorId = 0x8086;
+static const uint32_t kNVVendorId = 0x10de;
+
+static const uint32_t kAMDRadeonRX5700XTDeviceId = 0x731f;
+static const uint32_t kAMDRadeonRX5500XTDeviceId = 0x7340;
+
 
 #pragma mark -
 #pragma mark MVKPhysicalDevice
@@ -192,6 +200,12 @@ void MVKPhysicalDevice::getFeatures(VkPhysicalDeviceFeatures2* features) {
 				scalarLayoutFeatures->scalarBlockLayout = true;
 				break;
 			}
+			case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES_EXT: {
+				auto* subgroupSizeFeatures = (VkPhysicalDeviceSubgroupSizeControlFeaturesEXT*)next;
+				subgroupSizeFeatures->subgroupSizeControl = _metalFeatures.simdPermute || _metalFeatures.quadPermute;
+				subgroupSizeFeatures->computeFullSubgroups = _metalFeatures.simdPermute || _metalFeatures.quadPermute;
+				break;
+			}
 			case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT: {
 				auto* texelBuffAlignFeatures = (VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT*)next;
 				texelBuffAlignFeatures->texelBufferAlignment = _metalFeatures.texelBuffers && [_mtlDevice respondsToSelector: @selector(minimumLinearTextureAlignmentForPixelFormat:)];
@@ -319,7 +333,7 @@ void MVKPhysicalDevice::getProperties(VkPhysicalDeviceProperties2* properties) {
 			}
             case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES: {
                 auto* subgroupProps = (VkPhysicalDeviceSubgroupProperties*)next;
-                subgroupProps->subgroupSize = _metalFeatures.subgroupSize;
+                subgroupProps->subgroupSize = _metalFeatures.maxSubgroupSize;
                 subgroupProps->supportedStages = VK_SHADER_STAGE_COMPUTE_BIT;
                 if (_features.tessellationShader) {
                     subgroupProps->supportedStages |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
@@ -392,6 +406,14 @@ void MVKPhysicalDevice::getProperties(VkPhysicalDeviceProperties2* properties) {
 				robustness2Props->robustUniformBufferAccessSizeAlignment = 1;
 				break;
 			}
+			case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT: {
+				auto* subgroupSizeProps = (VkPhysicalDeviceSubgroupSizeControlPropertiesEXT*)next;
+				subgroupSizeProps->minSubgroupSize = _metalFeatures.minSubgroupSize;
+				subgroupSizeProps->maxSubgroupSize = _metalFeatures.maxSubgroupSize;
+				subgroupSizeProps->maxComputeWorkgroupSubgroups = _properties.limits.maxComputeWorkGroupInvocations / _metalFeatures.minSubgroupSize;
+				subgroupSizeProps->requiredSubgroupSizeStages = 0;
+				break;
+			}
 			case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_PROPERTIES_EXT: {
 				auto* texelBuffAlignProps = (VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT*)next;
 				// Save the 'next' pointer; we'll unintentionally overwrite it
@@ -1435,18 +1457,43 @@ void MVKPhysicalDevice::initMetalFeatures() {
         }
     }
 
-    _metalFeatures.subgroupSize = 1;
+    _metalFeatures.minSubgroupSize = _metalFeatures.maxSubgroupSize = 1;
 #if MVK_MACOS
     if (_metalFeatures.simdPermute) {
-        static const uint32_t kAMDVendorId = 0x1002;
-        _metalFeatures.subgroupSize = (_properties.vendorID == kAMDVendorId) ? 64 : 32;
+        // Based on data from Sascha Willems' Vulkan Hardware Database.
+        // This would be a lot easier and less painful if MTLDevice had properties for this...
+        _metalFeatures.maxSubgroupSize = (_properties.vendorID == kAMDVendorId) ? 64 : 32;
+        switch (_properties.vendorID) {
+            case kIntelVendorId:
+                _metalFeatures.minSubgroupSize = 8;
+                break;
+            case kAMDVendorId:
+                switch (_properties.deviceID) {
+                    case kAMDRadeonRX5700XTDeviceId:
+                    case kAMDRadeonRX5500XTDeviceId:
+                        _metalFeatures.minSubgroupSize = 32;
+                        break;
+                    default:
+                        _metalFeatures.minSubgroupSize = _metalFeatures.maxSubgroupSize;
+                        break;
+                }
+                break;
+            case kAppleVendorId:
+                // XXX Minimum thread execution width for Apple GPUs is unknown, but assumed to be 4. May be greater.
+                _metalFeatures.minSubgroupSize = 4;
+                break;
+            default:
+                _metalFeatures.minSubgroupSize = _metalFeatures.maxSubgroupSize;
+                break;
+        }
     }
 #endif
 #if MVK_IOS
     if (_metalFeatures.simdPermute) {
-        _metalFeatures.subgroupSize = 32;
+        _metalFeatures.minSubgroupSize = 4;
+        _metalFeatures.maxSubgroupSize = 32;
     } else if (_metalFeatures.quadPermute) {
-        _metalFeatures.subgroupSize = 4;
+        _metalFeatures.minSubgroupSize = _metalFeatures.maxSubgroupSize = 4;
     }
 #endif
 
@@ -1930,8 +1977,6 @@ void MVKPhysicalDevice::initLimits() {
     _properties.limits.lineWidthRange[1] = 1;
     _properties.limits.lineWidthGranularity = 1;
 
-    static const uint32_t kIntelVendorId = 0x8086;
-    static const uint32_t kNVVendorId = 0x10de;
     _properties.limits.standardSampleLocations = VK_TRUE;
     _properties.limits.strictLines = _properties.vendorID == kIntelVendorId || _properties.vendorID == kNVVendorId;
 
@@ -2038,7 +2083,6 @@ static uint32_t mvkGetEntryProperty(io_registry_entry_t entry, CFStringRef prope
 
 void MVKPhysicalDevice::initGPUInfoProperties() {
 
-	static const uint32_t kIntelVendorId = 0x8086;
 	bool isFound = false;
 
 	bool isIntegrated = _mtlDevice.isLowPower;
@@ -2048,7 +2092,7 @@ void MVKPhysicalDevice::initGPUInfoProperties() {
 	if (supportsMTLGPUFamily(Apple5)) {
 		// This is an Apple GPU. It won't have a 'device-id' property, so fill it in
 		// like on iOS/tvOS.
-		_properties.vendorID = 0x106b;	// Apple's PCI ID
+		_properties.vendorID = kAppleVendorId;
 #if MVK_MACOS_APPLE_SILICON
 		if (supportsMTLGPUFamily(Apple7)) {
 			_properties.deviceID = 0xa140;
@@ -2130,7 +2174,7 @@ void MVKPhysicalDevice::initGPUInfoProperties() {
 		devID = coreCnt > 2 ? 0xa081 : 0xa080;
 	}
 
-	_properties.vendorID = 0x0000106b;	// Apple's PCI ID
+	_properties.vendorID = kAppleVendorId;
 	_properties.deviceID = devID;
 	_properties.deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;
 	strlcpy(_properties.deviceName, _mtlDevice.name.UTF8String, VK_MAX_PHYSICAL_DEVICE_NAME_SIZE);
@@ -2147,7 +2191,7 @@ void MVKPhysicalDevice::initGPUInfoProperties() {
 		devID = 0xa101;
 	}
 
-  _properties.vendorID = 0x0000106b;  // Apple's PCI ID
+  _properties.vendorID = kAppleVendorId;
   _properties.deviceID = devID;
   _properties.deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;
   strlcpy(_properties.deviceName, _mtlDevice.name.UTF8String, VK_MAX_PHYSICAL_DEVICE_NAME_SIZE);
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
index cbe42663..7a160fa2 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
@@ -1000,6 +1000,7 @@ bool MVKGraphicsPipeline::addTessCtlShaderToPipeline(MTLComputePipelineDescripto
 	shaderContext.options.mslOptions.buffer_size_buffer_index = _bufferSizeBufferIndex.stages[kMVKShaderStageTessCtl];
 	shaderContext.options.mslOptions.capture_output_to_buffer = true;
 	shaderContext.options.mslOptions.multi_patch_workgroup = true;
+	shaderContext.options.mslOptions.fixed_subgroup_size = mvkIsAnyFlagEnabled(_pTessCtlSS->flags, VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT) ? 0 : _device->_pMetalFeatures->maxSubgroupSize;
 	addPrevStageOutputToShaderConverterContext(shaderContext, vtxOutputs);
 
 	MVKMTLFunction func = ((MVKShaderModule*)_pTessCtlSS->module)->getMTLFunction(&shaderContext, _pTessCtlSS->pSpecializationInfo, _pipelineCache);
@@ -1090,6 +1091,7 @@ bool MVKGraphicsPipeline::addFragmentShaderToPipeline(MTLRenderPipelineDescripto
 		shaderContext.options.mslOptions.view_mask_buffer_index = _viewRangeBufferIndex.stages[kMVKShaderStageFragment];
 		shaderContext.options.entryPointName = _pFragmentSS->pName;
 		shaderContext.options.mslOptions.capture_output_to_buffer = false;
+		shaderContext.options.mslOptions.fixed_subgroup_size = mvkIsAnyFlagEnabled(_pFragmentSS->flags, VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT) ? 0 : _device->_pMetalFeatures->maxSubgroupSize;
 		if (pCreateInfo->pMultisampleState) {
 			if (pCreateInfo->pMultisampleState->pSampleMask && pCreateInfo->pMultisampleState->pSampleMask[0] != 0xffffffff) {
 				shaderContext.options.mslOptions.additional_fixed_sample_mask = pCreateInfo->pMultisampleState->pSampleMask[0];
@@ -1484,7 +1486,6 @@ void MVKGraphicsPipeline::initMVKShaderConverterContext(SPIRVToMSLConversionConf
     shaderContext.options.mslOptions.multiview = mvkRendPass->isMultiview();
     shaderContext.options.mslOptions.multiview_layered_rendering = getDevice()->getPhysicalDevice()->canUseInstancingForMultiview();
     shaderContext.options.mslOptions.view_index_from_device_index = mvkAreAllFlagsEnabled(pCreateInfo->flags, VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT);
-    shaderContext.options.mslOptions.fixed_subgroup_size = _device->_pMetalFeatures->subgroupSize;
 #if MVK_MACOS
     shaderContext.options.mslOptions.emulate_subgroups = !_device->_pMetalFeatures->simdPermute;
 #endif
@@ -1647,6 +1648,7 @@ MVKComputePipeline::MVKComputePipeline(MVKDevice* device,
 		MTLComputePipelineDescriptor* plDesc = [MTLComputePipelineDescriptor new];	// temp retain
 		plDesc.computeFunction = mtlFunc;
 		plDesc.maxTotalThreadsPerThreadgroup = _mtlThreadgroupSize.width * _mtlThreadgroupSize.height * _mtlThreadgroupSize.depth;
+		plDesc.threadGroupSizeIsMultipleOfThreadExecutionWidth = mvkIsAnyFlagEnabled(pCreateInfo->stage.flags, VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT);
 
 		// Metal does not allow the name of the pipeline to be changed after it has been created,
 		// and we need to create the Metal pipeline immediately to provide error feedback to app.
@@ -1690,7 +1692,7 @@ MVKMTLFunction MVKComputePipeline::getMTLFunction(const VkComputePipelineCreateI
 	shaderContext.options.mslOptions.texture_buffer_native = _device->_pMetalFeatures->textureBuffers;
 	shaderContext.options.mslOptions.dispatch_base = _allowsDispatchBase;
 	shaderContext.options.mslOptions.texture_1D_as_2D = mvkTreatTexture1DAs2D();
-    shaderContext.options.mslOptions.fixed_subgroup_size = _device->_pMetalFeatures->subgroupSize;
+    shaderContext.options.mslOptions.fixed_subgroup_size = mvkIsAnyFlagEnabled(pSS->flags, VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT) ? 0 : _device->_pMetalFeatures->maxSubgroupSize;
 #if MVK_MACOS
     shaderContext.options.mslOptions.emulate_subgroups = !_device->_pMetalFeatures->simdPermute;
 #endif
diff --git a/MoltenVK/MoltenVK/Layers/MVKExtensions.def b/MoltenVK/MoltenVK/Layers/MVKExtensions.def
index 267e4f24..a35dbcdb 100644
--- a/MoltenVK/MoltenVK/Layers/MVKExtensions.def
+++ b/MoltenVK/MoltenVK/Layers/MVKExtensions.def
@@ -95,6 +95,7 @@ MVK_EXTENSION(EXT_robustness2, EXT_ROBUSTNESS_2, DEVICE)
 MVK_EXTENSION(EXT_scalar_block_layout, EXT_SCALAR_BLOCK_LAYOUT, DEVICE)
 MVK_EXTENSION(EXT_shader_stencil_export, EXT_SHADER_STENCIL_EXPORT, DEVICE)
 MVK_EXTENSION(EXT_shader_viewport_index_layer, EXT_SHADER_VIEWPORT_INDEX_LAYER, DEVICE)
+MVK_EXTENSION(EXT_subgroup_size_control, EXT_SUBGROUP_SIZE_CONTROL, DEVICE)
 MVK_EXTENSION(EXT_swapchain_colorspace, EXT_SWAPCHAIN_COLOR_SPACE, INSTANCE)
 MVK_EXTENSION(EXT_texel_buffer_alignment, EXT_TEXEL_BUFFER_ALIGNMENT, DEVICE)
 MVK_EXTENSION(EXT_texture_compression_astc_hdr, EXT_TEXTURE_COMPRESSION_ASTC_HDR, DEVICE)
diff --git a/MoltenVK/MoltenVK/Layers/MVKExtensions.mm b/MoltenVK/MoltenVK/Layers/MVKExtensions.mm
index 0dc34002..0e0eba84 100644
--- a/MoltenVK/MoltenVK/Layers/MVKExtensions.mm
+++ b/MoltenVK/MoltenVK/Layers/MVKExtensions.mm
@@ -69,6 +69,7 @@ static bool mvkIsSupportedOnPlatform(VkExtensionProperties* pProperties) {
 	MVK_EXTENSION_MIN_OS(EXT_MEMORY_BUDGET,                  10.13, 11.0)
 	MVK_EXTENSION_MIN_OS(EXT_POST_DEPTH_COVERAGE,            10.16, 11.0)
 	MVK_EXTENSION_MIN_OS(EXT_SHADER_STENCIL_EXPORT,          10.14, 12.0)
+	MVK_EXTENSION_MIN_OS(EXT_SUBGROUP_SIZE_CONTROL,          10.14, 13.0)
 	MVK_EXTENSION_MIN_OS(EXT_TEXEL_BUFFER_ALIGNMENT,         10.13, 11.0)
 	MVK_EXTENSION_MIN_OS(EXT_TEXTURE_COMPRESSION_ASTC_HDR,   10.16, 13.0)
 	MVK_EXTENSION_MIN_OS(AMD_SHADER_TRINARY_MINMAX,          10.14, 12.0)