MVKPhysicalDevice: Correct subgroup properties.
On systems not supporting this, the subgroup size is set to 1. Make sure the subgroup size is fixed in the shader, at least until we implement `VK_EXT_subgroup_size_control`. According to the Metal feature set tables, SIMD-group reduction is only supported on Mac family 2 GPUs and Apple family 7 GPUs. Previously, we were exposing these on all Mac GPUs. Quadgroup permutation is supported on all Apple GPUs starting from family 4. We use them for regular group non-uniform ops as well, so these are considered to have a subgroup size of 4. On Mac, it's a bit more complicated. The 2.1 tables say that all Mac GPUs support this, but the 3.0 and 4.0 tables say that only family 2 supports quadgroup ops. I've allowed quad ops on family 1 for now. Unfortunately, my testing shows that SIMD-group functions don't work in fragment shaders on Mojave, so no fragment shader support until Metal 3. Update SPIRV-Cross to pull in changes needed for all this.
This commit is contained in:
parent
ce85a96d80
commit
8e11c41c40
@ -1 +1 @@
|
||||
b3c59263a0e8f921df15757c7b28407306a16830
|
||||
3d16060c3243e9f7bfd027de6e2e27c348d1791e
|
||||
|
@ -631,6 +631,9 @@ typedef struct {
|
||||
VkBool32 renderLinearTextures; /**< If true, linear textures are renderable. */
|
||||
VkBool32 pullModelInterpolation; /**< If true, explicit interpolation functions are supported. */
|
||||
VkBool32 samplerMirrorClampToEdge; /**< If true, the mirrored clamp to edge address mode is supported in samplers. */
|
||||
VkBool32 quadPermute; /**< If true, quadgroup permutation functions (vote, ballot, shuffle) are supported in shaders. */
|
||||
VkBool32 simdPermute; /**< If true, SIMD-group permutation functions (vote, ballot, shuffle) are supported in shaders. */
|
||||
VkBool32 simdReduction; /**< If true, SIMD-group reduction functions (arithmetic) are supported in shaders. */
|
||||
} MVKPhysicalDeviceMetalFeatures;
|
||||
|
||||
/** MoltenVK performance of a particular type of activity. */
|
||||
|
@ -115,7 +115,7 @@ void MVKPhysicalDevice::getFeatures(VkPhysicalDeviceFeatures2* features) {
|
||||
}
|
||||
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_EXTENDED_TYPES_FEATURES: {
|
||||
auto* shaderSGTypesFeatures = (VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures*)next;
|
||||
shaderSGTypesFeatures->shaderSubgroupExtendedTypes = _metalFeatures.subgroupSize != 0;
|
||||
shaderSGTypesFeatures->shaderSubgroupExtendedTypes = _metalFeatures.simdPermute || _metalFeatures.quadPermute;
|
||||
break;
|
||||
}
|
||||
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES: {
|
||||
@ -317,28 +317,32 @@ void MVKPhysicalDevice::getProperties(VkPhysicalDeviceProperties2* properties) {
|
||||
pushDescProps->maxPushDescriptors = _properties.limits.maxPerStageResources;
|
||||
break;
|
||||
}
|
||||
#if MVK_MACOS
|
||||
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES:
|
||||
if (mvkOSVersionIsAtLeast(10.14)) {
|
||||
auto* subgroupProps = (VkPhysicalDeviceSubgroupProperties*)next;
|
||||
subgroupProps->subgroupSize = _metalFeatures.subgroupSize;
|
||||
subgroupProps->supportedStages =
|
||||
VK_SHADER_STAGE_COMPUTE_BIT |
|
||||
VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT |
|
||||
VK_SHADER_STAGE_FRAGMENT_BIT;
|
||||
subgroupProps->supportedOperations =
|
||||
VK_SUBGROUP_FEATURE_BASIC_BIT |
|
||||
VK_SUBGROUP_FEATURE_VOTE_BIT |
|
||||
VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
|
||||
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES: {
|
||||
auto* subgroupProps = (VkPhysicalDeviceSubgroupProperties*)next;
|
||||
subgroupProps->subgroupSize = _metalFeatures.subgroupSize;
|
||||
subgroupProps->supportedStages = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
if (_features.tessellationShader) {
|
||||
subgroupProps->supportedStages |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
|
||||
}
|
||||
if (mvkOSVersionIsAtLeast(10.15, 13.0)) {
|
||||
subgroupProps->supportedStages |= VK_SHADER_STAGE_FRAGMENT_BIT;
|
||||
}
|
||||
subgroupProps->supportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT;
|
||||
if (_metalFeatures.simdPermute || _metalFeatures.quadPermute) {
|
||||
subgroupProps->supportedOperations |= VK_SUBGROUP_FEATURE_VOTE_BIT |
|
||||
VK_SUBGROUP_FEATURE_BALLOT_BIT |
|
||||
VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
|
||||
VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
|
||||
// VK_SUBGROUP_FEATURE_CLUSTERED_BIT |
|
||||
VK_SUBGROUP_FEATURE_QUAD_BIT;
|
||||
subgroupProps->quadOperationsInAllStages = true;
|
||||
VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT;
|
||||
}
|
||||
if (_metalFeatures.simdReduction) {
|
||||
subgroupProps->supportedOperations |= VK_SUBGROUP_FEATURE_ARITHMETIC_BIT;
|
||||
}
|
||||
if (_metalFeatures.quadPermute) {
|
||||
subgroupProps->supportedOperations |= VK_SUBGROUP_FEATURE_QUAD_BIT;
|
||||
}
|
||||
subgroupProps->quadOperationsInAllStages = _metalFeatures.quadPermute;
|
||||
break;
|
||||
#endif
|
||||
}
|
||||
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_PROPERTIES: {
|
||||
auto* timelineSem4Props = (VkPhysicalDeviceTimelineSemaphoreProperties*)next;
|
||||
timelineSem4Props->maxTimelineSemaphoreValueDifference = std::numeric_limits<uint64_t>::max();
|
||||
@ -1202,6 +1206,7 @@ void MVKPhysicalDevice::initMetalFeatures() {
|
||||
}
|
||||
if (supportsMTLGPUFamily(Apple4)) {
|
||||
_metalFeatures.nativeTextureSwizzle = true;
|
||||
_metalFeatures.quadPermute = true;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1291,9 +1296,11 @@ void MVKPhysicalDevice::initMetalFeatures() {
|
||||
}
|
||||
if (supportsMTLGPUFamily(Apple4)) {
|
||||
_metalFeatures.nativeTextureSwizzle = true;
|
||||
_metalFeatures.quadPermute = true;
|
||||
}
|
||||
if (supportsMTLGPUFamily(Apple6) ) {
|
||||
_metalFeatures.astcHDRTextures = true;
|
||||
_metalFeatures.simdPermute = true;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1312,6 +1319,7 @@ void MVKPhysicalDevice::initMetalFeatures() {
|
||||
_metalFeatures.multisampleLayeredRendering = _metalFeatures.layeredRendering;
|
||||
_metalFeatures.samplerClampToBorder = true;
|
||||
_metalFeatures.samplerMirrorClampToEdge = true;
|
||||
_metalFeatures.simdReduction = true;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -1359,6 +1367,8 @@ void MVKPhysicalDevice::initMetalFeatures() {
|
||||
_metalFeatures.events = true;
|
||||
_metalFeatures.memoryBarriers = true;
|
||||
_metalFeatures.textureBuffers = true;
|
||||
_metalFeatures.quadPermute = true;
|
||||
_metalFeatures.simdPermute = true;
|
||||
}
|
||||
|
||||
if (supportsMTLFeatureSet(macOS_GPUFamily2_v1)) {
|
||||
@ -1366,6 +1376,7 @@ void MVKPhysicalDevice::initMetalFeatures() {
|
||||
_metalFeatures.stencilFeedback = true;
|
||||
_metalFeatures.depthResolve = true;
|
||||
_metalFeatures.stencilResolve = true;
|
||||
_metalFeatures.simdReduction = true;
|
||||
}
|
||||
|
||||
if ( mvkOSVersionIsAtLeast(10.15) ) {
|
||||
@ -1424,12 +1435,20 @@ void MVKPhysicalDevice::initMetalFeatures() {
|
||||
}
|
||||
}
|
||||
|
||||
_metalFeatures.subgroupSize = 1;
|
||||
#if MVK_MACOS
|
||||
if (mvkOSVersionIsAtLeast(10.14)) {
|
||||
if (_metalFeatures.simdPermute) {
|
||||
static const uint32_t kAMDVendorId = 0x1002;
|
||||
_metalFeatures.subgroupSize = (_properties.vendorID == kAMDVendorId) ? 64 : 32;
|
||||
}
|
||||
#endif
|
||||
#if MVK_IOS
|
||||
if (_metalFeatures.simdPermute) {
|
||||
_metalFeatures.subgroupSize = 32;
|
||||
} else if (_metalFeatures.quadPermute) {
|
||||
_metalFeatures.subgroupSize = 4;
|
||||
}
|
||||
#endif
|
||||
|
||||
#define setMSLVersion(maj, min) \
|
||||
_metalFeatures.mslVersion = SPIRV_CROSS_NAMESPACE::CompilerMSL::Options::make_msl_version(maj, min);
|
||||
|
@ -1481,6 +1481,14 @@ void MVKGraphicsPipeline::initMVKShaderConverterContext(SPIRVToMSLConversionConf
|
||||
shaderContext.options.mslOptions.multiview = mvkRendPass->isMultiview();
|
||||
shaderContext.options.mslOptions.multiview_layered_rendering = getDevice()->getPhysicalDevice()->canUseInstancingForMultiview();
|
||||
shaderContext.options.mslOptions.view_index_from_device_index = mvkAreAllFlagsEnabled(pCreateInfo->flags, VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT);
|
||||
shaderContext.options.mslOptions.fixed_subgroup_size = _device->_pMetalFeatures->subgroupSize;
|
||||
#if MVK_MACOS
|
||||
shaderContext.options.mslOptions.emulate_subgroups = !_device->_pMetalFeatures->simdPermute;
|
||||
#endif
|
||||
#if MVK_IOS_OR_TVOS
|
||||
shaderContext.options.mslOptions.emulate_subgroups = !_device->_pMetalFeatures->quadPermute;
|
||||
shaderContext.options.mslOptions.ios_use_simdgroup_functions = !!_device->_pMetalFeatures->simdPermute;
|
||||
#endif
|
||||
|
||||
shaderContext.options.tessPatchKind = reflectData.patchKind;
|
||||
shaderContext.options.numTessControlPoints = reflectData.numControlPoints;
|
||||
@ -1679,6 +1687,14 @@ MVKMTLFunction MVKComputePipeline::getMTLFunction(const VkComputePipelineCreateI
|
||||
shaderContext.options.mslOptions.texture_buffer_native = _device->_pMetalFeatures->textureBuffers;
|
||||
shaderContext.options.mslOptions.dispatch_base = _allowsDispatchBase;
|
||||
shaderContext.options.mslOptions.texture_1D_as_2D = mvkTreatTexture1DAs2D();
|
||||
shaderContext.options.mslOptions.fixed_subgroup_size = _device->_pMetalFeatures->subgroupSize;
|
||||
#if MVK_MACOS
|
||||
shaderContext.options.mslOptions.emulate_subgroups = !_device->_pMetalFeatures->simdPermute;
|
||||
#endif
|
||||
#if MVK_IOS_OR_TVOS
|
||||
shaderContext.options.mslOptions.emulate_subgroups = !_device->_pMetalFeatures->quadPermute;
|
||||
shaderContext.options.mslOptions.ios_use_simdgroup_functions = !!_device->_pMetalFeatures->simdPermute;
|
||||
#endif
|
||||
|
||||
MVKPipelineLayout* layout = (MVKPipelineLayout*)pCreateInfo->layout;
|
||||
layout->populateShaderConverterContext(shaderContext);
|
||||
@ -1954,6 +1970,7 @@ namespace SPIRV_CROSS_NAMESPACE {
|
||||
opt.device_index,
|
||||
opt.enable_frag_output_mask,
|
||||
opt.additional_fixed_sample_mask,
|
||||
opt.fixed_subgroup_size,
|
||||
opt.enable_point_size_builtin,
|
||||
opt.enable_frag_depth_builtin,
|
||||
opt.enable_frag_stencil_ref_builtin,
|
||||
@ -1981,6 +1998,8 @@ namespace SPIRV_CROSS_NAMESPACE {
|
||||
opt.multi_patch_workgroup,
|
||||
opt.vertex_for_tessellation,
|
||||
opt.arrayed_subpass_input,
|
||||
opt.ios_use_simdgroup_functions,
|
||||
opt.emulate_subgroups,
|
||||
opt.vertex_index_type);
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user