Fixes for VK_EXT_extended_dynamic_state.

- MVKPipeline only work around zero stride if stride is static.
- Ensure dynamic vertex stride is not enabled on builds before Xcode 15.
- Add MVKRenderStateType::LineWidth for track all default options (unrelated).
This commit is contained in:
Bill Hollings 2023-10-16 17:24:03 -04:00
parent 61b8712178
commit d706ed0a63
3 changed files with 49 additions and 43 deletions

View File

@ -1711,7 +1711,11 @@ void MVKPhysicalDevice::initMetalFeatures() {
_metalFeatures.maxPerStageStorageTextureCount = 8; _metalFeatures.maxPerStageStorageTextureCount = 8;
_metalFeatures.vertexStrideAlignment = supportsMTLGPUFamily(Apple5) ? 1 : 4; _metalFeatures.vertexStrideAlignment = supportsMTLGPUFamily(Apple5) ? 1 : 4;
#if MVK_XCODE_15
// Dynamic vertex stride needs to have everything aligned - compiled with support for vertex stride calls, and supported by both runtime OS and GPU.
_metalFeatures.dynamicVertexStride = mvkOSVersionIsAtLeast(14.0, 17.0, 1.0) && (supportsMTLGPUFamily(Apple4) || supportsMTLGPUFamily(Mac2)); _metalFeatures.dynamicVertexStride = mvkOSVersionIsAtLeast(14.0, 17.0, 1.0) && (supportsMTLGPUFamily(Apple4) || supportsMTLGPUFamily(Mac2));
#endif
// GPU-specific features // GPU-specific features
switch (_properties.vendorID) { switch (_properties.vendorID) {

View File

@ -232,6 +232,7 @@ enum MVKRenderStateType {
DepthTestEnable, DepthTestEnable,
DepthWriteEnable, DepthWriteEnable,
FrontFace, FrontFace,
LineWidth,
LogicOp, LogicOp,
LogicOpEnable, LogicOpEnable,
PatchControlPoints, PatchControlPoints,

View File

@ -560,6 +560,7 @@ static MVKRenderStateType getRenderStateType(VkDynamicState vkDynamicState) {
case VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE: return DepthTestEnable; case VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE: return DepthTestEnable;
case VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE: return DepthWriteEnable; case VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE: return DepthWriteEnable;
case VK_DYNAMIC_STATE_FRONT_FACE: return FrontFace; case VK_DYNAMIC_STATE_FRONT_FACE: return FrontFace;
case VK_DYNAMIC_STATE_LINE_WIDTH: return LineWidth;
case VK_DYNAMIC_STATE_LOGIC_OP_EXT: return LogicOp; case VK_DYNAMIC_STATE_LOGIC_OP_EXT: return LogicOp;
case VK_DYNAMIC_STATE_LOGIC_OP_ENABLE_EXT: return LogicOpEnable; case VK_DYNAMIC_STATE_LOGIC_OP_ENABLE_EXT: return LogicOpEnable;
case VK_DYNAMIC_STATE_PATCH_CONTROL_POINTS_EXT: return PatchControlPoints; case VK_DYNAMIC_STATE_PATCH_CONTROL_POINTS_EXT: return PatchControlPoints;
@ -1366,18 +1367,16 @@ bool MVKGraphicsPipeline::addVertexInputToPipeline(T* inputDesc,
const VkVertexInputBindingDescription* pVKVB = &pVI->pVertexBindingDescriptions[i]; const VkVertexInputBindingDescription* pVKVB = &pVI->pVertexBindingDescriptions[i];
if (shaderConfig.isVertexBufferUsed(pVKVB->binding)) { if (shaderConfig.isVertexBufferUsed(pVKVB->binding)) {
// Vulkan allows any stride, but Metal only allows multiples of 4. // Vulkan allows any stride, but Metal requires multiples of 4 on older GPUs.
// TODO: We could try to expand the buffer to the required alignment in that case. if (isVtxStrideStatic && (pVKVB->stride % _device->_pMetalFeatures->vertexStrideAlignment) != 0) {
VkDeviceSize mtlVtxStrideAlignment = _device->_pMetalFeatures->vertexStrideAlignment; setConfigurationResult(reportError(VK_ERROR_INITIALIZATION_FAILED, "Under Metal, vertex attribute binding strides must be aligned to %llu bytes.", _device->_pMetalFeatures->vertexStrideAlignment));
if ((pVKVB->stride % mtlVtxStrideAlignment) != 0) {
setConfigurationResult(reportError(VK_ERROR_INITIALIZATION_FAILED, "Under Metal, vertex attribute binding strides must be aligned to %llu bytes.", mtlVtxStrideAlignment));
return false; return false;
} }
maxBinding = max(pVKVB->binding, maxBinding); maxBinding = max(pVKVB->binding, maxBinding);
uint32_t vbIdx = getMetalBufferIndexForVertexAttributeBinding(pVKVB->binding); uint32_t vbIdx = getMetalBufferIndexForVertexAttributeBinding(pVKVB->binding);
auto vbDesc = inputDesc.layouts[vbIdx]; auto vbDesc = inputDesc.layouts[vbIdx];
if (pVKVB->stride == 0) { if (isVtxStrideStatic && pVKVB->stride == 0) {
// Stride can't be 0, it will be set later to attributes' maximum offset + size // Stride can't be 0, it will be set later to attributes' maximum offset + size
// to prevent it from being larger than the underlying buffer permits. // to prevent it from being larger than the underlying buffer permits.
vbDesc.stride = 0; vbDesc.stride = 0;
@ -1418,52 +1417,54 @@ bool MVKGraphicsPipeline::addVertexInputToPipeline(T* inputDesc,
if (shaderConfig.isShaderInputLocationUsed(pVKVA->location)) { if (shaderConfig.isShaderInputLocationUsed(pVKVA->location)) {
uint32_t vaBinding = pVKVA->binding; uint32_t vaBinding = pVKVA->binding;
uint32_t vaOffset = pVKVA->offset; uint32_t vaOffset = pVKVA->offset;
auto vaDesc = inputDesc.attributes[pVKVA->location];
auto mtlFormat = (decltype(vaDesc.format))getPixelFormats()->getMTLVertexFormat(pVKVA->format);
// Vulkan allows offsets to exceed the buffer stride, but Metal doesn't. // Vulkan allows offsets to exceed the buffer stride, but Metal doesn't.
// If this is the case, fetch a translated artificial buffer binding, using the same MTLBuffer, // If this is the case, fetch a translated artificial buffer binding, using the same MTLBuffer,
// but that is translated so that the reduced VA offset fits into the binding stride. // but that is translated so that the reduced VA offset fits into the binding stride.
const VkVertexInputBindingDescription* pVKVB = pVI->pVertexBindingDescriptions; if (isVtxStrideStatic) {
uint32_t attrSize = 0; const VkVertexInputBindingDescription* pVKVB = pVI->pVertexBindingDescriptions;
for (uint32_t j = 0; j < vbCnt; j++, pVKVB++) { uint32_t attrSize = 0;
if (pVKVB->binding == pVKVA->binding) { for (uint32_t j = 0; j < vbCnt; j++, pVKVB++) {
attrSize = getPixelFormats()->getBytesPerBlock(pVKVA->format); if (pVKVB->binding == pVKVA->binding) {
if (pVKVB->stride == 0) { attrSize = getPixelFormats()->getBytesPerBlock(pVKVA->format);
// The step is set to constant, but we need to change stride to be non-zero for metal. if (pVKVB->stride == 0) {
// Look for the maximum offset + size to set as the stride. // The step is set to constant, but we need to change stride to be non-zero for metal.
uint32_t vbIdx = getMetalBufferIndexForVertexAttributeBinding(pVKVB->binding); // Look for the maximum offset + size to set as the stride.
auto vbDesc = inputDesc.layouts[vbIdx]; uint32_t vbIdx = getMetalBufferIndexForVertexAttributeBinding(pVKVB->binding);
uint32_t strideLowBound = vaOffset + attrSize; auto vbDesc = inputDesc.layouts[vbIdx];
if (vbDesc.stride < strideLowBound) vbDesc.stride = strideLowBound; uint32_t strideLowBound = vaOffset + attrSize;
} else if (vaOffset && vaOffset + attrSize > pVKVB->stride) { if (vbDesc.stride < strideLowBound) vbDesc.stride = strideLowBound;
// Move vertex attribute offset into the stride. This vertex attribute may be } else if (vaOffset && vaOffset + attrSize > pVKVB->stride) {
// combined with other vertex attributes into the same translated buffer binding. // Move vertex attribute offset into the stride. This vertex attribute may be
// But if the reduced offset combined with the vertex attribute size still won't // combined with other vertex attributes into the same translated buffer binding.
// fit into the buffer binding stride, force the vertex attribute offset to zero, // But if the reduced offset combined with the vertex attribute size still won't
// effectively dedicating this vertex attribute to its own buffer binding. // fit into the buffer binding stride, force the vertex attribute offset to zero,
uint32_t origOffset = vaOffset; // effectively dedicating this vertex attribute to its own buffer binding.
vaOffset %= pVKVB->stride; uint32_t origOffset = vaOffset;
if (vaOffset + attrSize > pVKVB->stride) { vaOffset %= pVKVB->stride;
vaOffset = 0; if (vaOffset + attrSize > pVKVB->stride) {
vaOffset = 0;
}
vaBinding = getTranslatedVertexBinding(vaBinding, origOffset - vaOffset, maxBinding);
if (zeroDivisorBindings.count(pVKVB->binding)) {
zeroDivisorBindings.insert(vaBinding);
}
} }
vaBinding = getTranslatedVertexBinding(vaBinding, origOffset - vaOffset, maxBinding); break;
if (zeroDivisorBindings.count(pVKVB->binding)) {
zeroDivisorBindings.insert(vaBinding);
}
} }
break; }
if (pVKVB->stride && attrSize > pVKVB->stride) {
/* Metal does not support overlapping loads. Truncate format vector length to prevent an assertion
* and hope it's not used by the shader. */
MTLVertexFormat newFormat = mvkAdjustFormatVectorToSize((MTLVertexFormat)mtlFormat, pVKVB->stride);
reportError(VK_SUCCESS, "Found attribute with size (%u) larger than it's binding's stride (%u). Changing descriptor format from %s to %s.",
attrSize, pVKVB->stride, getPixelFormats()->getName((MTLVertexFormat)mtlFormat), getPixelFormats()->getName(newFormat));
mtlFormat = (decltype(vaDesc.format))newFormat;
} }
} }
auto vaDesc = inputDesc.attributes[pVKVA->location];
auto mtlFormat = (decltype(vaDesc.format))getPixelFormats()->getMTLVertexFormat(pVKVA->format);
if (pVKVB->stride && attrSize > pVKVB->stride) {
/* Metal does not support overlapping loads. Truncate format vector length to prevent an assertion
* and hope it's not used by the shader. */
MTLVertexFormat newFormat = mvkAdjustFormatVectorToSize((MTLVertexFormat)mtlFormat, pVKVB->stride);
reportError(VK_SUCCESS, "Found attribute with size (%u) larger than it's binding's stride (%u). Changing descriptor format from %s to %s.",
attrSize, pVKVB->stride, getPixelFormats()->getName((MTLVertexFormat)mtlFormat), getPixelFormats()->getName(newFormat));
mtlFormat = (decltype(vaDesc.format))newFormat;
}
vaDesc.format = mtlFormat; vaDesc.format = mtlFormat;
vaDesc.bufferIndex = (decltype(vaDesc.bufferIndex))getMetalBufferIndexForVertexAttributeBinding(vaBinding); vaDesc.bufferIndex = (decltype(vaDesc.bufferIndex))getMetalBufferIndexForVertexAttributeBinding(vaBinding);
vaDesc.offset = vaOffset; vaDesc.offset = vaOffset;