Enable use of native texture atomics.

This commit conditionally skips the emulated image atomics paths if native
texture atomics are available and a configuration option is set.

Apart from unlocking some potential performance benefits from not having to
force some textures to be linear, it also makes texture atomics work with
argument buffers.
This commit is contained in:
Jan Sikorski 2024-01-26 15:17:49 +01:00
parent 37361ccd55
commit c03893b5ad
10 changed files with 49 additions and 19 deletions

View File

@ -359,6 +359,7 @@ typedef struct {
VkDeviceSize hostMemoryPageSize; /**< The size of a page of host memory on this platform. */
VkBool32 dynamicVertexStride; /**< If true, VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE is supported. */
VkBool32 needsCubeGradWorkaround; /**< If true, sampling from cube textures with explicit gradients is broken and needs a workaround. */
VkBool32 nativeTextureAtomics; /**< If true, atomic operations on textures are supported natively. */
} MVKPhysicalDeviceMetalFeatures;

View File

@ -290,7 +290,11 @@ id<MTLTexture> MVKBufferView::getMTLTexture() {
MTLTextureUsage usage = MTLTextureUsageShaderRead;
if ( mvkIsAnyFlagEnabled(_buffer->getUsage(), VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT) ) {
usage |= MTLTextureUsageShaderWrite;
usage |= MTLTextureUsageShaderWrite;
#if MVK_XCODE_15
if (getPhysicalDevice()->useNativeTextureAtomics())
usage |= MTLTextureUsageShaderAtomic;
#endif
}
id<MTLBuffer> mtlBuff;
VkDeviceSize mtlBuffOffset;

View File

@ -323,7 +323,7 @@ void MVKDescriptorSetLayoutBinding::push(MVKCommandEncoder* cmdEncoder,
if (_applyToStage[i]) {
tb.index = mtlIdxs.stages[i].textureIndex + rezIdx + planeIndex;
BIND_GRAPHICS_OR_COMPUTE(cmdEncoder, bindTexture, pipelineBindPoint, i, tb);
if (_info.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) {
if (_info.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE && !getPhysicalDevice()->useNativeTextureAtomics()) {
bb.index = mtlIdxs.stages[i].bufferIndex + rezIdx;
BIND_GRAPHICS_OR_COMPUTE(cmdEncoder, bindBuffer, pipelineBindPoint, i, bb);
}
@ -348,7 +348,7 @@ void MVKDescriptorSetLayoutBinding::push(MVKCommandEncoder* cmdEncoder,
if (_applyToStage[i]) {
tb.index = mtlIdxs.stages[i].textureIndex + rezIdx;
BIND_GRAPHICS_OR_COMPUTE(cmdEncoder, bindTexture, pipelineBindPoint, i, tb);
if (_info.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) {
if (_info.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER && !getPhysicalDevice()->useNativeTextureAtomics()) {
bb.index = mtlIdxs.stages[i].bufferIndex + rezIdx;
BIND_GRAPHICS_OR_COMPUTE(cmdEncoder, bindBuffer, pipelineBindPoint, i, bb);
}
@ -440,7 +440,9 @@ void MVKDescriptorSetLayoutBinding::addMTLArgumentDescriptors(NSMutableArray<MTL
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().textureIndex, MTLDataTypeTexture, MTLArgumentAccessReadWrite);
addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().bufferIndex, MTLDataTypePointer, MTLArgumentAccessReadWrite); // Needed for atomic operations
if (!getPhysicalDevice()->useNativeTextureAtomics()) { // Needed for emulated atomic operations
addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().bufferIndex, MTLDataTypePointer, MTLArgumentAccessReadWrite);
}
break;
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
@ -449,7 +451,9 @@ void MVKDescriptorSetLayoutBinding::addMTLArgumentDescriptors(NSMutableArray<MTL
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().textureIndex, MTLDataTypeTexture, MTLArgumentAccessReadWrite);
addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().bufferIndex, MTLDataTypePointer, MTLArgumentAccessReadWrite); // Needed for atomic operations
if (!getPhysicalDevice()->useNativeTextureAtomics()) { // Needed for emulated atomic operations
addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().bufferIndex, MTLDataTypePointer, MTLArgumentAccessReadWrite);
}
break;
case VK_DESCRIPTOR_TYPE_SAMPLER:
@ -664,7 +668,7 @@ void MVKDescriptorSetLayoutBinding::initMetalResourceIndexOffsets(const VkDescri
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
setResourceIndexOffset(textureIndex);
setResourceIndexOffset(bufferIndex);
if (!getPhysicalDevice()->useNativeTextureAtomics()) setResourceIndexOffset(bufferIndex);
if (pBinding->descriptorCount > 1 && !_device->_pMetalFeatures->arrayOfTextures) {
_layout->setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "Device %s does not support arrays of textures.", _device->getName()));
@ -927,7 +931,7 @@ void MVKImageDescriptor::bind(MVKCommandEncoder* cmdEncoder,
if (stages[i]) {
tb.index = mtlIndexes.stages[i].textureIndex + elementIndex + planeIndex;
BIND_GRAPHICS_OR_COMPUTE(cmdEncoder, bindTexture, pipelineBindPoint, i, tb);
if (descType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) {
if (descType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE && !cmdEncoder->getPhysicalDevice()->useNativeTextureAtomics()) {
bb.index = mtlIndexes.stages[i].bufferIndex + elementIndex + planeIndex;
BIND_GRAPHICS_OR_COMPUTE(cmdEncoder, bindBuffer, pipelineBindPoint, i, bb);
}
@ -958,7 +962,7 @@ void MVKImageDescriptor::encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderS
if (encodeUsage) {
rezEncState->encodeResourceUsage(stage, mtlTexture, getMTLResourceUsage(), mvkDSLBind->getMTLRenderStages());
}
if (descType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) {
if (descType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE && !mvkDSLBind->getPhysicalDevice()->useNativeTextureAtomics()) {
id<MTLTexture> mtlTex = mtlTexture.parentTexture ? mtlTexture.parentTexture : mtlTexture;
id<MTLBuffer> mtlBuff = mtlTex.buffer;
if (mtlBuff) {
@ -1226,7 +1230,7 @@ void MVKTexelBufferDescriptor::bind(MVKCommandEncoder* cmdEncoder,
if (stages[i]) {
tb.index = mtlIndexes.stages[i].textureIndex + elementIndex;
BIND_GRAPHICS_OR_COMPUTE(cmdEncoder, bindTexture, pipelineBindPoint, i, tb);
if (descType == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) {
if (descType == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER && !cmdEncoder->getPhysicalDevice()->useNativeTextureAtomics()) {
bb.index = mtlIndexes.stages[i].bufferIndex + elementIndex;
BIND_GRAPHICS_OR_COMPUTE(cmdEncoder, bindBuffer, pipelineBindPoint, i, bb);
}
@ -1251,7 +1255,7 @@ void MVKTexelBufferDescriptor::encodeToMetalArgumentBuffer(MVKResourcesCommandEn
rezEncState->encodeResourceUsage(stage, mtlTexture, getMTLResourceUsage(), mvkDSLBind->getMTLRenderStages());
}
if (descType == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) {
if (descType == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER && !mvkDSLBind->getPhysicalDevice()->useNativeTextureAtomics()) {
id<MTLBuffer> mtlBuff = mtlTexture.buffer;
if (mtlBuff) {
if (encodeToArgBuffer) {

View File

@ -761,7 +761,8 @@ void MVKDescriptorPool::initMetalArgumentBuffer(const VkDescriptorPoolCreateInfo
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
mtlTexCnt += poolSize.descriptorCount;
mtlBuffCnt += poolSize.descriptorCount;
if (!getPhysicalDevice()->useNativeTextureAtomics())
mtlBuffCnt += poolSize.descriptorCount;
break;
case VK_DESCRIPTOR_TYPE_SAMPLER:

View File

@ -360,6 +360,9 @@ public:
return _metalFeatures.argumentBuffers && getMVKConfig().useMetalArgumentBuffers != MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS_NEVER;
};
/** Returns whether native texture atomics are supported and should be used. */
bool useNativeTextureAtomics() { return _metalFeatures.nativeTextureAtomics; }
#pragma mark Construction

View File

@ -1810,6 +1810,8 @@ void MVKPhysicalDevice::initMetalFeatures() {
#if MVK_XCODE_15
// Dynamic vertex stride needs to have everything aligned - compiled with support for vertex stride calls, and supported by both runtime OS and GPU.
_metalFeatures.dynamicVertexStride = mvkOSVersionIsAtLeast(14.0, 17.0, 1.0) && (supportsMTLGPUFamily(Apple4) || supportsMTLGPUFamily(Mac2));
_metalFeatures.nativeTextureAtomics = mvkOSVersionIsAtLeast(14.0, 17.0, 1.0) && (supportsMTLGPUFamily(Metal3) || supportsMTLGPUFamily(Apple6) || supportsMTLGPUFamily(Mac2));
#endif
// GPU-specific features
@ -3682,7 +3684,10 @@ void MVKDevice::getDescriptorVariableDescriptorCountLayoutSupport(const VkDescri
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
mtlTexCnt += pBind->descriptorCount;
mtlBuffCnt += pBind->descriptorCount;
if (getPhysicalDevice()->useNativeTextureAtomics())
mtlBuffCnt += pBind->descriptorCount;
maxVarDescCount = min(_pMetalFeatures->maxPerStageTextureCount - mtlTexCnt,
_pMetalFeatures->maxPerStageBufferCount - mtlBuffCnt);
break;

View File

@ -372,6 +372,7 @@ protected:
bool _isAliasable;
bool _hasExtendedUsage;
bool _hasMutableFormat;
bool _shouldSupportAtomics;
bool _isLinearForAtomics;
};

View File

@ -861,7 +861,7 @@ MTLTextureUsage MVKImage::getMTLTextureUsage(MTLPixelFormat mtlPixFmt) {
needsReinterpretation = needsReinterpretation || !pixFmts->compatibleAsLinearOrSRGB(mtlPixFmt, viewFmt);
}
MTLTextureUsage mtlUsage = pixFmts->getMTLTextureUsage(getCombinedUsage(), mtlPixFmt, _samples, _isLinear || _isLinearForAtomics, needsReinterpretation, _hasExtendedUsage);
MTLTextureUsage mtlUsage = pixFmts->getMTLTextureUsage(getCombinedUsage(), mtlPixFmt, _samples, _isLinear || _isLinearForAtomics, needsReinterpretation, _hasExtendedUsage, _shouldSupportAtomics);
// Metal before 3.0 doesn't support 3D compressed textures, so we'll
// decompress the texture ourselves, and we need to be able to write to it.
@ -935,10 +935,12 @@ MVKImage::MVKImage(MVKDevice* device, const VkImageCreateInfo* pCreateInfo) : MV
// If this is a storage image of format R32_UINT or R32_SINT, or MUTABLE_FORMAT is set
// and R32_UINT is in the set of possible view formats, then we must use a texel buffer,
// or image atomics won't work.
_isLinearForAtomics = (_arrayLayers == 1 && _mipLevels == 1 && getImageType() == VK_IMAGE_TYPE_2D && mvkIsAnyFlagEnabled(getCombinedUsage(), VK_IMAGE_USAGE_STORAGE_BIT) &&
((_vkFormat == VK_FORMAT_R32_UINT || _vkFormat == VK_FORMAT_R32_SINT) ||
(_hasMutableFormat && pixFmts->getViewClass(_vkFormat) == MVKMTLViewClass::Color32 &&
(getIsValidViewFormat(VK_FORMAT_R32_UINT) || getIsValidViewFormat(VK_FORMAT_R32_SINT)))));
_shouldSupportAtomics = mvkIsAnyFlagEnabled(getCombinedUsage(), VK_IMAGE_USAGE_STORAGE_BIT) && _mipLevels == 1 &&
((_vkFormat == VK_FORMAT_R32_UINT || _vkFormat == VK_FORMAT_R32_SINT) ||
(_hasMutableFormat && pixFmts->getViewClass(_vkFormat) == MVKMTLViewClass::Color32 && (getIsValidViewFormat(VK_FORMAT_R32_UINT) || getIsValidViewFormat(VK_FORMAT_R32_SINT))));
if (_shouldSupportAtomics && !getPhysicalDevice()->useNativeTextureAtomics())
_isLinearForAtomics = _arrayLayers == 1 && getImageType() == VK_IMAGE_TYPE_2D;
_is3DCompressed = (getImageType() == VK_IMAGE_TYPE_3D) && (pixFmts->getFormatType(pCreateInfo->format) == kMVKFormatCompressed) && !_device->_pMetalFeatures->native3DCompressedTextures;
_isDepthStencilAttachment = (mvkAreAllFlagsEnabled(pCreateInfo->usage, VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) ||

View File

@ -388,7 +388,8 @@ public:
VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT,
bool isLinear = false,
bool needsReinterpretation = true,
bool isExtended = false);
bool isExtended = false,
bool supportAtomics = false);
/** Enumerates all formats that support the given features, calling a specified function for each one. */
void enumerateSupportedFormats(const VkFormatProperties3& properties, bool any, std::function<bool(VkFormat)> func);

View File

@ -695,7 +695,8 @@ MTLTextureUsage MVKPixelFormats::getMTLTextureUsage(VkImageUsageFlags vkImageUsa
VkSampleCountFlagBits samples,
bool isLinear,
bool needsReinterpretation,
bool isExtended) {
bool isExtended,
bool supportAtomics) {
bool isDepthFmt = isDepthFormat(mtlFormat);
bool isStencilFmt = isStencilFormat(mtlFormat);
bool isCombinedDepthStencilFmt = isDepthFmt && isStencilFmt;
@ -719,6 +720,13 @@ MTLTextureUsage MVKPixelFormats::getMTLTextureUsage(VkImageUsageFlags vkImageUsa
mvkEnableFlags(mtlUsage, MTLTextureUsageShaderWrite);
}
#if MVK_XCODE_15
if (supportAtomics) {
mvkEnableFlags(mtlUsage, MTLTextureUsageShaderAtomic);
}
#endif
#if MVK_MACOS
// Clearing a linear image may use shader writes.
if (mvkIsAnyFlagEnabled(vkImageUsageFlags, (VK_IMAGE_USAGE_TRANSFER_DST_BIT)) &&