MVKPhysicalDevice: Correct subgroup properties.

On systems not supporting this, the subgroup size is set to 1.

Make sure the subgroup size is fixed in the shader, at least until we
implement `VK_EXT_subgroup_size_control`.

According to the Metal feature set tables, SIMD-group reduction is only
supported on Mac family 2 GPUs and Apple family 7 GPUs. Previously, we
were exposing these on all Mac GPUs.

Quadgroup permutation is supported on all Apple GPUs starting from
family 4. We use them for regular group non-uniform ops as well, so
these are considered to have a subgroup size of 4. On Mac, it's a bit
more complicated. The 2.1 tables say that all Mac GPUs support this, but
the 3.0 and 4.0 tables say that only family 2 supports quadgroup ops.
I've allowed quad ops on family 1 for now.

Unfortunately, my testing shows that SIMD-group functions don't work in
fragment shaders on Mojave, so no fragment shader support until Metal 3.

Update SPIRV-Cross to pull in changes needed for all this.
This commit is contained in:
Chip Davis 2020-11-17 14:22:06 -06:00
parent ce85a96d80
commit 8e11c41c40
4 changed files with 62 additions and 21 deletions

View File

@ -1 +1 @@
b3c59263a0e8f921df15757c7b28407306a16830
3d16060c3243e9f7bfd027de6e2e27c348d1791e

View File

@ -631,6 +631,9 @@ typedef struct {
VkBool32 renderLinearTextures; /**< If true, linear textures are renderable. */
VkBool32 pullModelInterpolation; /**< If true, explicit interpolation functions are supported. */
VkBool32 samplerMirrorClampToEdge; /**< If true, the mirrored clamp to edge address mode is supported in samplers. */
VkBool32 quadPermute; /**< If true, quadgroup permutation functions (vote, ballot, shuffle) are supported in shaders. */
VkBool32 simdPermute; /**< If true, SIMD-group permutation functions (vote, ballot, shuffle) are supported in shaders. */
VkBool32 simdReduction; /**< If true, SIMD-group reduction functions (arithmetic) are supported in shaders. */
} MVKPhysicalDeviceMetalFeatures;
/** MoltenVK performance of a particular type of activity. */

View File

@ -115,7 +115,7 @@ void MVKPhysicalDevice::getFeatures(VkPhysicalDeviceFeatures2* features) {
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_EXTENDED_TYPES_FEATURES: {
auto* shaderSGTypesFeatures = (VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures*)next;
shaderSGTypesFeatures->shaderSubgroupExtendedTypes = _metalFeatures.subgroupSize != 0;
shaderSGTypesFeatures->shaderSubgroupExtendedTypes = _metalFeatures.simdPermute || _metalFeatures.quadPermute;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES: {
@ -317,28 +317,32 @@ void MVKPhysicalDevice::getProperties(VkPhysicalDeviceProperties2* properties) {
pushDescProps->maxPushDescriptors = _properties.limits.maxPerStageResources;
break;
}
#if MVK_MACOS
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES:
if (mvkOSVersionIsAtLeast(10.14)) {
auto* subgroupProps = (VkPhysicalDeviceSubgroupProperties*)next;
subgroupProps->subgroupSize = _metalFeatures.subgroupSize;
subgroupProps->supportedStages =
VK_SHADER_STAGE_COMPUTE_BIT |
VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT |
VK_SHADER_STAGE_FRAGMENT_BIT;
subgroupProps->supportedOperations =
VK_SUBGROUP_FEATURE_BASIC_BIT |
VK_SUBGROUP_FEATURE_VOTE_BIT |
VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES: {
auto* subgroupProps = (VkPhysicalDeviceSubgroupProperties*)next;
subgroupProps->subgroupSize = _metalFeatures.subgroupSize;
subgroupProps->supportedStages = VK_SHADER_STAGE_COMPUTE_BIT;
if (_features.tessellationShader) {
subgroupProps->supportedStages |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
}
if (mvkOSVersionIsAtLeast(10.15, 13.0)) {
subgroupProps->supportedStages |= VK_SHADER_STAGE_FRAGMENT_BIT;
}
subgroupProps->supportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT;
if (_metalFeatures.simdPermute || _metalFeatures.quadPermute) {
subgroupProps->supportedOperations |= VK_SUBGROUP_FEATURE_VOTE_BIT |
VK_SUBGROUP_FEATURE_BALLOT_BIT |
VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
// VK_SUBGROUP_FEATURE_CLUSTERED_BIT |
VK_SUBGROUP_FEATURE_QUAD_BIT;
subgroupProps->quadOperationsInAllStages = true;
VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT;
}
if (_metalFeatures.simdReduction) {
subgroupProps->supportedOperations |= VK_SUBGROUP_FEATURE_ARITHMETIC_BIT;
}
if (_metalFeatures.quadPermute) {
subgroupProps->supportedOperations |= VK_SUBGROUP_FEATURE_QUAD_BIT;
}
subgroupProps->quadOperationsInAllStages = _metalFeatures.quadPermute;
break;
#endif
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_PROPERTIES: {
auto* timelineSem4Props = (VkPhysicalDeviceTimelineSemaphoreProperties*)next;
timelineSem4Props->maxTimelineSemaphoreValueDifference = std::numeric_limits<uint64_t>::max();
@ -1202,6 +1206,7 @@ void MVKPhysicalDevice::initMetalFeatures() {
}
if (supportsMTLGPUFamily(Apple4)) {
_metalFeatures.nativeTextureSwizzle = true;
_metalFeatures.quadPermute = true;
}
}
@ -1291,9 +1296,11 @@ void MVKPhysicalDevice::initMetalFeatures() {
}
if (supportsMTLGPUFamily(Apple4)) {
_metalFeatures.nativeTextureSwizzle = true;
_metalFeatures.quadPermute = true;
}
if (supportsMTLGPUFamily(Apple6) ) {
_metalFeatures.astcHDRTextures = true;
_metalFeatures.simdPermute = true;
}
}
@ -1312,6 +1319,7 @@ void MVKPhysicalDevice::initMetalFeatures() {
_metalFeatures.multisampleLayeredRendering = _metalFeatures.layeredRendering;
_metalFeatures.samplerClampToBorder = true;
_metalFeatures.samplerMirrorClampToEdge = true;
_metalFeatures.simdReduction = true;
}
}
#endif
@ -1359,6 +1367,8 @@ void MVKPhysicalDevice::initMetalFeatures() {
_metalFeatures.events = true;
_metalFeatures.memoryBarriers = true;
_metalFeatures.textureBuffers = true;
_metalFeatures.quadPermute = true;
_metalFeatures.simdPermute = true;
}
if (supportsMTLFeatureSet(macOS_GPUFamily2_v1)) {
@ -1366,6 +1376,7 @@ void MVKPhysicalDevice::initMetalFeatures() {
_metalFeatures.stencilFeedback = true;
_metalFeatures.depthResolve = true;
_metalFeatures.stencilResolve = true;
_metalFeatures.simdReduction = true;
}
if ( mvkOSVersionIsAtLeast(10.15) ) {
@ -1424,12 +1435,20 @@ void MVKPhysicalDevice::initMetalFeatures() {
}
}
_metalFeatures.subgroupSize = 1;
#if MVK_MACOS
if (mvkOSVersionIsAtLeast(10.14)) {
if (_metalFeatures.simdPermute) {
static const uint32_t kAMDVendorId = 0x1002;
_metalFeatures.subgroupSize = (_properties.vendorID == kAMDVendorId) ? 64 : 32;
}
#endif
#if MVK_IOS
if (_metalFeatures.simdPermute) {
_metalFeatures.subgroupSize = 32;
} else if (_metalFeatures.quadPermute) {
_metalFeatures.subgroupSize = 4;
}
#endif
#define setMSLVersion(maj, min) \
_metalFeatures.mslVersion = SPIRV_CROSS_NAMESPACE::CompilerMSL::Options::make_msl_version(maj, min);

View File

@ -1481,6 +1481,14 @@ void MVKGraphicsPipeline::initMVKShaderConverterContext(SPIRVToMSLConversionConf
shaderContext.options.mslOptions.multiview = mvkRendPass->isMultiview();
shaderContext.options.mslOptions.multiview_layered_rendering = getDevice()->getPhysicalDevice()->canUseInstancingForMultiview();
shaderContext.options.mslOptions.view_index_from_device_index = mvkAreAllFlagsEnabled(pCreateInfo->flags, VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT);
shaderContext.options.mslOptions.fixed_subgroup_size = _device->_pMetalFeatures->subgroupSize;
#if MVK_MACOS
shaderContext.options.mslOptions.emulate_subgroups = !_device->_pMetalFeatures->simdPermute;
#endif
#if MVK_IOS_OR_TVOS
shaderContext.options.mslOptions.emulate_subgroups = !_device->_pMetalFeatures->quadPermute;
shaderContext.options.mslOptions.ios_use_simdgroup_functions = !!_device->_pMetalFeatures->simdPermute;
#endif
shaderContext.options.tessPatchKind = reflectData.patchKind;
shaderContext.options.numTessControlPoints = reflectData.numControlPoints;
@ -1679,6 +1687,14 @@ MVKMTLFunction MVKComputePipeline::getMTLFunction(const VkComputePipelineCreateI
shaderContext.options.mslOptions.texture_buffer_native = _device->_pMetalFeatures->textureBuffers;
shaderContext.options.mslOptions.dispatch_base = _allowsDispatchBase;
shaderContext.options.mslOptions.texture_1D_as_2D = mvkTreatTexture1DAs2D();
shaderContext.options.mslOptions.fixed_subgroup_size = _device->_pMetalFeatures->subgroupSize;
#if MVK_MACOS
shaderContext.options.mslOptions.emulate_subgroups = !_device->_pMetalFeatures->simdPermute;
#endif
#if MVK_IOS_OR_TVOS
shaderContext.options.mslOptions.emulate_subgroups = !_device->_pMetalFeatures->quadPermute;
shaderContext.options.mslOptions.ios_use_simdgroup_functions = !!_device->_pMetalFeatures->simdPermute;
#endif
MVKPipelineLayout* layout = (MVKPipelineLayout*)pCreateInfo->layout;
layout->populateShaderConverterContext(shaderContext);
@ -1954,6 +1970,7 @@ namespace SPIRV_CROSS_NAMESPACE {
opt.device_index,
opt.enable_frag_output_mask,
opt.additional_fixed_sample_mask,
opt.fixed_subgroup_size,
opt.enable_point_size_builtin,
opt.enable_frag_depth_builtin,
opt.enable_frag_stencil_ref_builtin,
@ -1981,6 +1998,8 @@ namespace SPIRV_CROSS_NAMESPACE {
opt.multi_patch_workgroup,
opt.vertex_for_tessellation,
opt.arrayed_subpass_input,
opt.ios_use_simdgroup_functions,
opt.emulate_subgroups,
opt.vertex_index_type);
}