Merge pull request #2164 from js6i/native-atomics

Enable use of native texture atomics.
This commit is contained in:
Bill Hollings 2024-02-27 10:20:18 -05:00 committed by GitHub
commit b56c152a12
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 49 additions and 19 deletions

View File

@ -359,6 +359,7 @@ typedef struct {
VkDeviceSize hostMemoryPageSize; /**< The size of a page of host memory on this platform. */ VkDeviceSize hostMemoryPageSize; /**< The size of a page of host memory on this platform. */
VkBool32 dynamicVertexStride; /**< If true, VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE is supported. */ VkBool32 dynamicVertexStride; /**< If true, VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE is supported. */
VkBool32 needsCubeGradWorkaround; /**< If true, sampling from cube textures with explicit gradients is broken and needs a workaround. */ VkBool32 needsCubeGradWorkaround; /**< If true, sampling from cube textures with explicit gradients is broken and needs a workaround. */
VkBool32 nativeTextureAtomics; /**< If true, atomic operations on textures are supported natively. */
} MVKPhysicalDeviceMetalFeatures; } MVKPhysicalDeviceMetalFeatures;

View File

@ -290,7 +290,11 @@ id<MTLTexture> MVKBufferView::getMTLTexture() {
MTLTextureUsage usage = MTLTextureUsageShaderRead; MTLTextureUsage usage = MTLTextureUsageShaderRead;
if ( mvkIsAnyFlagEnabled(_buffer->getUsage(), VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT) ) { if ( mvkIsAnyFlagEnabled(_buffer->getUsage(), VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT) ) {
usage |= MTLTextureUsageShaderWrite; usage |= MTLTextureUsageShaderWrite;
#if MVK_XCODE_15
if (getPhysicalDevice()->useNativeTextureAtomics())
usage |= MTLTextureUsageShaderAtomic;
#endif
} }
id<MTLBuffer> mtlBuff; id<MTLBuffer> mtlBuff;
VkDeviceSize mtlBuffOffset; VkDeviceSize mtlBuffOffset;

View File

@ -323,7 +323,7 @@ void MVKDescriptorSetLayoutBinding::push(MVKCommandEncoder* cmdEncoder,
if (_applyToStage[i]) { if (_applyToStage[i]) {
tb.index = mtlIdxs.stages[i].textureIndex + rezIdx + planeIndex; tb.index = mtlIdxs.stages[i].textureIndex + rezIdx + planeIndex;
BIND_GRAPHICS_OR_COMPUTE(cmdEncoder, bindTexture, pipelineBindPoint, i, tb); BIND_GRAPHICS_OR_COMPUTE(cmdEncoder, bindTexture, pipelineBindPoint, i, tb);
if (_info.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) { if (_info.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE && !getPhysicalDevice()->useNativeTextureAtomics()) {
bb.index = mtlIdxs.stages[i].bufferIndex + rezIdx; bb.index = mtlIdxs.stages[i].bufferIndex + rezIdx;
BIND_GRAPHICS_OR_COMPUTE(cmdEncoder, bindBuffer, pipelineBindPoint, i, bb); BIND_GRAPHICS_OR_COMPUTE(cmdEncoder, bindBuffer, pipelineBindPoint, i, bb);
} }
@ -348,7 +348,7 @@ void MVKDescriptorSetLayoutBinding::push(MVKCommandEncoder* cmdEncoder,
if (_applyToStage[i]) { if (_applyToStage[i]) {
tb.index = mtlIdxs.stages[i].textureIndex + rezIdx; tb.index = mtlIdxs.stages[i].textureIndex + rezIdx;
BIND_GRAPHICS_OR_COMPUTE(cmdEncoder, bindTexture, pipelineBindPoint, i, tb); BIND_GRAPHICS_OR_COMPUTE(cmdEncoder, bindTexture, pipelineBindPoint, i, tb);
if (_info.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) { if (_info.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER && !getPhysicalDevice()->useNativeTextureAtomics()) {
bb.index = mtlIdxs.stages[i].bufferIndex + rezIdx; bb.index = mtlIdxs.stages[i].bufferIndex + rezIdx;
BIND_GRAPHICS_OR_COMPUTE(cmdEncoder, bindBuffer, pipelineBindPoint, i, bb); BIND_GRAPHICS_OR_COMPUTE(cmdEncoder, bindBuffer, pipelineBindPoint, i, bb);
} }
@ -440,7 +440,9 @@ void MVKDescriptorSetLayoutBinding::addMTLArgumentDescriptors(NSMutableArray<MTL
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().textureIndex, MTLDataTypeTexture, MTLArgumentAccessReadWrite); addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().textureIndex, MTLDataTypeTexture, MTLArgumentAccessReadWrite);
addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().bufferIndex, MTLDataTypePointer, MTLArgumentAccessReadWrite); // Needed for atomic operations if (!getPhysicalDevice()->useNativeTextureAtomics()) { // Needed for emulated atomic operations
addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().bufferIndex, MTLDataTypePointer, MTLArgumentAccessReadWrite);
}
break; break;
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
@ -449,7 +451,9 @@ void MVKDescriptorSetLayoutBinding::addMTLArgumentDescriptors(NSMutableArray<MTL
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().textureIndex, MTLDataTypeTexture, MTLArgumentAccessReadWrite); addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().textureIndex, MTLDataTypeTexture, MTLArgumentAccessReadWrite);
addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().bufferIndex, MTLDataTypePointer, MTLArgumentAccessReadWrite); // Needed for atomic operations if (!getPhysicalDevice()->useNativeTextureAtomics()) { // Needed for emulated atomic operations
addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().bufferIndex, MTLDataTypePointer, MTLArgumentAccessReadWrite);
}
break; break;
case VK_DESCRIPTOR_TYPE_SAMPLER: case VK_DESCRIPTOR_TYPE_SAMPLER:
@ -664,7 +668,7 @@ void MVKDescriptorSetLayoutBinding::initMetalResourceIndexOffsets(const VkDescri
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
setResourceIndexOffset(textureIndex); setResourceIndexOffset(textureIndex);
setResourceIndexOffset(bufferIndex); if (!getPhysicalDevice()->useNativeTextureAtomics()) setResourceIndexOffset(bufferIndex);
if (pBinding->descriptorCount > 1 && !_device->_pMetalFeatures->arrayOfTextures) { if (pBinding->descriptorCount > 1 && !_device->_pMetalFeatures->arrayOfTextures) {
_layout->setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "Device %s does not support arrays of textures.", _device->getName())); _layout->setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "Device %s does not support arrays of textures.", _device->getName()));
@ -927,7 +931,7 @@ void MVKImageDescriptor::bind(MVKCommandEncoder* cmdEncoder,
if (stages[i]) { if (stages[i]) {
tb.index = mtlIndexes.stages[i].textureIndex + elementIndex + planeIndex; tb.index = mtlIndexes.stages[i].textureIndex + elementIndex + planeIndex;
BIND_GRAPHICS_OR_COMPUTE(cmdEncoder, bindTexture, pipelineBindPoint, i, tb); BIND_GRAPHICS_OR_COMPUTE(cmdEncoder, bindTexture, pipelineBindPoint, i, tb);
if (descType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) { if (descType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE && !cmdEncoder->getPhysicalDevice()->useNativeTextureAtomics()) {
bb.index = mtlIndexes.stages[i].bufferIndex + elementIndex + planeIndex; bb.index = mtlIndexes.stages[i].bufferIndex + elementIndex + planeIndex;
BIND_GRAPHICS_OR_COMPUTE(cmdEncoder, bindBuffer, pipelineBindPoint, i, bb); BIND_GRAPHICS_OR_COMPUTE(cmdEncoder, bindBuffer, pipelineBindPoint, i, bb);
} }
@ -958,7 +962,7 @@ void MVKImageDescriptor::encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderS
if (encodeUsage) { if (encodeUsage) {
rezEncState->encodeResourceUsage(stage, mtlTexture, getMTLResourceUsage(), mvkDSLBind->getMTLRenderStages()); rezEncState->encodeResourceUsage(stage, mtlTexture, getMTLResourceUsage(), mvkDSLBind->getMTLRenderStages());
} }
if (descType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) { if (descType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE && !mvkDSLBind->getPhysicalDevice()->useNativeTextureAtomics()) {
id<MTLTexture> mtlTex = mtlTexture.parentTexture ? mtlTexture.parentTexture : mtlTexture; id<MTLTexture> mtlTex = mtlTexture.parentTexture ? mtlTexture.parentTexture : mtlTexture;
id<MTLBuffer> mtlBuff = mtlTex.buffer; id<MTLBuffer> mtlBuff = mtlTex.buffer;
if (mtlBuff) { if (mtlBuff) {
@ -1226,7 +1230,7 @@ void MVKTexelBufferDescriptor::bind(MVKCommandEncoder* cmdEncoder,
if (stages[i]) { if (stages[i]) {
tb.index = mtlIndexes.stages[i].textureIndex + elementIndex; tb.index = mtlIndexes.stages[i].textureIndex + elementIndex;
BIND_GRAPHICS_OR_COMPUTE(cmdEncoder, bindTexture, pipelineBindPoint, i, tb); BIND_GRAPHICS_OR_COMPUTE(cmdEncoder, bindTexture, pipelineBindPoint, i, tb);
if (descType == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) { if (descType == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER && !cmdEncoder->getPhysicalDevice()->useNativeTextureAtomics()) {
bb.index = mtlIndexes.stages[i].bufferIndex + elementIndex; bb.index = mtlIndexes.stages[i].bufferIndex + elementIndex;
BIND_GRAPHICS_OR_COMPUTE(cmdEncoder, bindBuffer, pipelineBindPoint, i, bb); BIND_GRAPHICS_OR_COMPUTE(cmdEncoder, bindBuffer, pipelineBindPoint, i, bb);
} }
@ -1251,7 +1255,7 @@ void MVKTexelBufferDescriptor::encodeToMetalArgumentBuffer(MVKResourcesCommandEn
rezEncState->encodeResourceUsage(stage, mtlTexture, getMTLResourceUsage(), mvkDSLBind->getMTLRenderStages()); rezEncState->encodeResourceUsage(stage, mtlTexture, getMTLResourceUsage(), mvkDSLBind->getMTLRenderStages());
} }
if (descType == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) { if (descType == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER && !mvkDSLBind->getPhysicalDevice()->useNativeTextureAtomics()) {
id<MTLBuffer> mtlBuff = mtlTexture.buffer; id<MTLBuffer> mtlBuff = mtlTexture.buffer;
if (mtlBuff) { if (mtlBuff) {
if (encodeToArgBuffer) { if (encodeToArgBuffer) {

View File

@ -761,7 +761,8 @@ void MVKDescriptorPool::initMetalArgumentBuffer(const VkDescriptorPoolCreateInfo
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
mtlTexCnt += poolSize.descriptorCount; mtlTexCnt += poolSize.descriptorCount;
mtlBuffCnt += poolSize.descriptorCount; if (!getPhysicalDevice()->useNativeTextureAtomics())
mtlBuffCnt += poolSize.descriptorCount;
break; break;
case VK_DESCRIPTOR_TYPE_SAMPLER: case VK_DESCRIPTOR_TYPE_SAMPLER:

View File

@ -360,6 +360,9 @@ public:
return _metalFeatures.argumentBuffers && getMVKConfig().useMetalArgumentBuffers != MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS_NEVER; return _metalFeatures.argumentBuffers && getMVKConfig().useMetalArgumentBuffers != MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS_NEVER;
}; };
/** Returns whether native texture atomics are supported and should be used. */
bool useNativeTextureAtomics() { return _metalFeatures.nativeTextureAtomics; }
#pragma mark Construction #pragma mark Construction

View File

@ -1810,6 +1810,8 @@ void MVKPhysicalDevice::initMetalFeatures() {
#if MVK_XCODE_15 #if MVK_XCODE_15
// Dynamic vertex stride needs to have everything aligned - compiled with support for vertex stride calls, and supported by both runtime OS and GPU. // Dynamic vertex stride needs to have everything aligned - compiled with support for vertex stride calls, and supported by both runtime OS and GPU.
_metalFeatures.dynamicVertexStride = mvkOSVersionIsAtLeast(14.0, 17.0, 1.0) && (supportsMTLGPUFamily(Apple4) || supportsMTLGPUFamily(Mac2)); _metalFeatures.dynamicVertexStride = mvkOSVersionIsAtLeast(14.0, 17.0, 1.0) && (supportsMTLGPUFamily(Apple4) || supportsMTLGPUFamily(Mac2));
_metalFeatures.nativeTextureAtomics = mvkOSVersionIsAtLeast(14.0, 17.0, 1.0) && (supportsMTLGPUFamily(Metal3) || supportsMTLGPUFamily(Apple6) || supportsMTLGPUFamily(Mac2));
#endif #endif
// GPU-specific features // GPU-specific features
@ -3682,7 +3684,10 @@ void MVKDevice::getDescriptorVariableDescriptorCountLayoutSupport(const VkDescri
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
mtlTexCnt += pBind->descriptorCount; mtlTexCnt += pBind->descriptorCount;
mtlBuffCnt += pBind->descriptorCount;
if (getPhysicalDevice()->useNativeTextureAtomics())
mtlBuffCnt += pBind->descriptorCount;
maxVarDescCount = min(_pMetalFeatures->maxPerStageTextureCount - mtlTexCnt, maxVarDescCount = min(_pMetalFeatures->maxPerStageTextureCount - mtlTexCnt,
_pMetalFeatures->maxPerStageBufferCount - mtlBuffCnt); _pMetalFeatures->maxPerStageBufferCount - mtlBuffCnt);
break; break;

View File

@ -372,6 +372,7 @@ protected:
bool _isAliasable; bool _isAliasable;
bool _hasExtendedUsage; bool _hasExtendedUsage;
bool _hasMutableFormat; bool _hasMutableFormat;
bool _shouldSupportAtomics;
bool _isLinearForAtomics; bool _isLinearForAtomics;
}; };

View File

@ -861,7 +861,7 @@ MTLTextureUsage MVKImage::getMTLTextureUsage(MTLPixelFormat mtlPixFmt) {
needsReinterpretation = needsReinterpretation || !pixFmts->compatibleAsLinearOrSRGB(mtlPixFmt, viewFmt); needsReinterpretation = needsReinterpretation || !pixFmts->compatibleAsLinearOrSRGB(mtlPixFmt, viewFmt);
} }
MTLTextureUsage mtlUsage = pixFmts->getMTLTextureUsage(getCombinedUsage(), mtlPixFmt, _samples, _isLinear || _isLinearForAtomics, needsReinterpretation, _hasExtendedUsage); MTLTextureUsage mtlUsage = pixFmts->getMTLTextureUsage(getCombinedUsage(), mtlPixFmt, _samples, _isLinear || _isLinearForAtomics, needsReinterpretation, _hasExtendedUsage, _shouldSupportAtomics);
// Metal before 3.0 doesn't support 3D compressed textures, so we'll // Metal before 3.0 doesn't support 3D compressed textures, so we'll
// decompress the texture ourselves, and we need to be able to write to it. // decompress the texture ourselves, and we need to be able to write to it.
@ -935,10 +935,12 @@ MVKImage::MVKImage(MVKDevice* device, const VkImageCreateInfo* pCreateInfo) : MV
// If this is a storage image of format R32_UINT or R32_SINT, or MUTABLE_FORMAT is set // If this is a storage image of format R32_UINT or R32_SINT, or MUTABLE_FORMAT is set
// and R32_UINT is in the set of possible view formats, then we must use a texel buffer, // and R32_UINT is in the set of possible view formats, then we must use a texel buffer,
// or image atomics won't work. // or image atomics won't work.
_isLinearForAtomics = (_arrayLayers == 1 && _mipLevels == 1 && getImageType() == VK_IMAGE_TYPE_2D && mvkIsAnyFlagEnabled(getCombinedUsage(), VK_IMAGE_USAGE_STORAGE_BIT) && _shouldSupportAtomics = mvkIsAnyFlagEnabled(getCombinedUsage(), VK_IMAGE_USAGE_STORAGE_BIT) && _mipLevels == 1 &&
((_vkFormat == VK_FORMAT_R32_UINT || _vkFormat == VK_FORMAT_R32_SINT) || ((_vkFormat == VK_FORMAT_R32_UINT || _vkFormat == VK_FORMAT_R32_SINT) ||
(_hasMutableFormat && pixFmts->getViewClass(_vkFormat) == MVKMTLViewClass::Color32 && (_hasMutableFormat && pixFmts->getViewClass(_vkFormat) == MVKMTLViewClass::Color32 && (getIsValidViewFormat(VK_FORMAT_R32_UINT) || getIsValidViewFormat(VK_FORMAT_R32_SINT))));
(getIsValidViewFormat(VK_FORMAT_R32_UINT) || getIsValidViewFormat(VK_FORMAT_R32_SINT)))));
if (_shouldSupportAtomics && !getPhysicalDevice()->useNativeTextureAtomics())
_isLinearForAtomics = _arrayLayers == 1 && getImageType() == VK_IMAGE_TYPE_2D;
_is3DCompressed = (getImageType() == VK_IMAGE_TYPE_3D) && (pixFmts->getFormatType(pCreateInfo->format) == kMVKFormatCompressed) && !_device->_pMetalFeatures->native3DCompressedTextures; _is3DCompressed = (getImageType() == VK_IMAGE_TYPE_3D) && (pixFmts->getFormatType(pCreateInfo->format) == kMVKFormatCompressed) && !_device->_pMetalFeatures->native3DCompressedTextures;
_isDepthStencilAttachment = (mvkAreAllFlagsEnabled(pCreateInfo->usage, VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) || _isDepthStencilAttachment = (mvkAreAllFlagsEnabled(pCreateInfo->usage, VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) ||

View File

@ -388,7 +388,8 @@ public:
VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT, VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT,
bool isLinear = false, bool isLinear = false,
bool needsReinterpretation = true, bool needsReinterpretation = true,
bool isExtended = false); bool isExtended = false,
bool supportAtomics = false);
/** Enumerates all formats that support the given features, calling a specified function for each one. */ /** Enumerates all formats that support the given features, calling a specified function for each one. */
void enumerateSupportedFormats(const VkFormatProperties3& properties, bool any, std::function<bool(VkFormat)> func); void enumerateSupportedFormats(const VkFormatProperties3& properties, bool any, std::function<bool(VkFormat)> func);

View File

@ -695,7 +695,8 @@ MTLTextureUsage MVKPixelFormats::getMTLTextureUsage(VkImageUsageFlags vkImageUsa
VkSampleCountFlagBits samples, VkSampleCountFlagBits samples,
bool isLinear, bool isLinear,
bool needsReinterpretation, bool needsReinterpretation,
bool isExtended) { bool isExtended,
bool supportAtomics) {
bool isDepthFmt = isDepthFormat(mtlFormat); bool isDepthFmt = isDepthFormat(mtlFormat);
bool isStencilFmt = isStencilFormat(mtlFormat); bool isStencilFmt = isStencilFormat(mtlFormat);
bool isCombinedDepthStencilFmt = isDepthFmt && isStencilFmt; bool isCombinedDepthStencilFmt = isDepthFmt && isStencilFmt;
@ -719,6 +720,13 @@ MTLTextureUsage MVKPixelFormats::getMTLTextureUsage(VkImageUsageFlags vkImageUsa
mvkEnableFlags(mtlUsage, MTLTextureUsageShaderWrite); mvkEnableFlags(mtlUsage, MTLTextureUsageShaderWrite);
} }
#if MVK_XCODE_15
if (supportAtomics) {
mvkEnableFlags(mtlUsage, MTLTextureUsageShaderAtomic);
}
#endif
#if MVK_MACOS #if MVK_MACOS
// Clearing a linear image may use shader writes. // Clearing a linear image may use shader writes.
if (mvkIsAnyFlagEnabled(vkImageUsageFlags, (VK_IMAGE_USAGE_TRANSFER_DST_BIT)) && if (mvkIsAnyFlagEnabled(vkImageUsageFlags, (VK_IMAGE_USAGE_TRANSFER_DST_BIT)) &&