diff --git a/MoltenVK/MoltenVK/API/mvk_private_api.h b/MoltenVK/MoltenVK/API/mvk_private_api.h index 4a6ae35e..5ff8b626 100644 --- a/MoltenVK/MoltenVK/API/mvk_private_api.h +++ b/MoltenVK/MoltenVK/API/mvk_private_api.h @@ -359,6 +359,7 @@ typedef struct { VkDeviceSize hostMemoryPageSize; /**< The size of a page of host memory on this platform. */ VkBool32 dynamicVertexStride; /**< If true, VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE is supported. */ VkBool32 needsCubeGradWorkaround; /**< If true, sampling from cube textures with explicit gradients is broken and needs a workaround. */ + VkBool32 nativeTextureAtomics; /**< If true, atomic operations on textures are supported natively. */ } MVKPhysicalDeviceMetalFeatures; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm b/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm index 7eeb17e2..a8a9b38b 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm @@ -290,7 +290,11 @@ id MVKBufferView::getMTLTexture() { MTLTextureUsage usage = MTLTextureUsageShaderRead; if ( mvkIsAnyFlagEnabled(_buffer->getUsage(), VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT) ) { - usage |= MTLTextureUsageShaderWrite; + usage |= MTLTextureUsageShaderWrite; +#if MVK_XCODE_15 + if (getPhysicalDevice()->useNativeTextureAtomics()) + usage |= MTLTextureUsageShaderAtomic; +#endif } id mtlBuff; VkDeviceSize mtlBuffOffset; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm index efc5dbcb..65056a55 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm @@ -323,7 +323,7 @@ void MVKDescriptorSetLayoutBinding::push(MVKCommandEncoder* cmdEncoder, if (_applyToStage[i]) { tb.index = mtlIdxs.stages[i].textureIndex + rezIdx + planeIndex; BIND_GRAPHICS_OR_COMPUTE(cmdEncoder, bindTexture, pipelineBindPoint, i, tb); - if (_info.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) { + if (_info.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE && !getPhysicalDevice()->useNativeTextureAtomics()) { bb.index = mtlIdxs.stages[i].bufferIndex + rezIdx; BIND_GRAPHICS_OR_COMPUTE(cmdEncoder, bindBuffer, pipelineBindPoint, i, bb); } @@ -348,7 +348,7 @@ void MVKDescriptorSetLayoutBinding::push(MVKCommandEncoder* cmdEncoder, if (_applyToStage[i]) { tb.index = mtlIdxs.stages[i].textureIndex + rezIdx; BIND_GRAPHICS_OR_COMPUTE(cmdEncoder, bindTexture, pipelineBindPoint, i, tb); - if (_info.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) { + if (_info.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER && !getPhysicalDevice()->useNativeTextureAtomics()) { bb.index = mtlIdxs.stages[i].bufferIndex + rezIdx; BIND_GRAPHICS_OR_COMPUTE(cmdEncoder, bindBuffer, pipelineBindPoint, i, bb); } @@ -440,7 +440,9 @@ void MVKDescriptorSetLayoutBinding::addMTLArgumentDescriptors(NSMutableArrayuseNativeTextureAtomics()) { // Needed for emulated atomic operations + addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().bufferIndex, MTLDataTypePointer, MTLArgumentAccessReadWrite); + } break; case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: @@ -449,7 +451,9 @@ void MVKDescriptorSetLayoutBinding::addMTLArgumentDescriptors(NSMutableArrayuseNativeTextureAtomics()) { // Needed for emulated atomic operations + addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().bufferIndex, MTLDataTypePointer, MTLArgumentAccessReadWrite); + } break; case VK_DESCRIPTOR_TYPE_SAMPLER: @@ -664,7 +668,7 @@ void MVKDescriptorSetLayoutBinding::initMetalResourceIndexOffsets(const VkDescri case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: setResourceIndexOffset(textureIndex); - setResourceIndexOffset(bufferIndex); + if (!getPhysicalDevice()->useNativeTextureAtomics()) setResourceIndexOffset(bufferIndex); if (pBinding->descriptorCount > 1 && !_device->_pMetalFeatures->arrayOfTextures) { _layout->setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "Device %s does not support arrays of textures.", _device->getName())); @@ -927,7 +931,7 @@ void MVKImageDescriptor::bind(MVKCommandEncoder* cmdEncoder, if (stages[i]) { tb.index = mtlIndexes.stages[i].textureIndex + elementIndex + planeIndex; BIND_GRAPHICS_OR_COMPUTE(cmdEncoder, bindTexture, pipelineBindPoint, i, tb); - if (descType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) { + if (descType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE && !cmdEncoder->getPhysicalDevice()->useNativeTextureAtomics()) { bb.index = mtlIndexes.stages[i].bufferIndex + elementIndex + planeIndex; BIND_GRAPHICS_OR_COMPUTE(cmdEncoder, bindBuffer, pipelineBindPoint, i, bb); } @@ -958,7 +962,7 @@ void MVKImageDescriptor::encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderS if (encodeUsage) { rezEncState->encodeResourceUsage(stage, mtlTexture, getMTLResourceUsage(), mvkDSLBind->getMTLRenderStages()); } - if (descType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) { + if (descType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE && !mvkDSLBind->getPhysicalDevice()->useNativeTextureAtomics()) { id mtlTex = mtlTexture.parentTexture ? mtlTexture.parentTexture : mtlTexture; id mtlBuff = mtlTex.buffer; if (mtlBuff) { @@ -1226,7 +1230,7 @@ void MVKTexelBufferDescriptor::bind(MVKCommandEncoder* cmdEncoder, if (stages[i]) { tb.index = mtlIndexes.stages[i].textureIndex + elementIndex; BIND_GRAPHICS_OR_COMPUTE(cmdEncoder, bindTexture, pipelineBindPoint, i, tb); - if (descType == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) { + if (descType == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER && !cmdEncoder->getPhysicalDevice()->useNativeTextureAtomics()) { bb.index = mtlIndexes.stages[i].bufferIndex + elementIndex; BIND_GRAPHICS_OR_COMPUTE(cmdEncoder, bindBuffer, pipelineBindPoint, i, bb); } @@ -1251,7 +1255,7 @@ void MVKTexelBufferDescriptor::encodeToMetalArgumentBuffer(MVKResourcesCommandEn rezEncState->encodeResourceUsage(stage, mtlTexture, getMTLResourceUsage(), mvkDSLBind->getMTLRenderStages()); } - if (descType == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) { + if (descType == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER && !mvkDSLBind->getPhysicalDevice()->useNativeTextureAtomics()) { id mtlBuff = mtlTexture.buffer; if (mtlBuff) { if (encodeToArgBuffer) { diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm index 824de5b0..2b05095b 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm @@ -761,7 +761,8 @@ void MVKDescriptorPool::initMetalArgumentBuffer(const VkDescriptorPoolCreateInfo case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: mtlTexCnt += poolSize.descriptorCount; - mtlBuffCnt += poolSize.descriptorCount; + if (!getPhysicalDevice()->useNativeTextureAtomics()) + mtlBuffCnt += poolSize.descriptorCount; break; case VK_DESCRIPTOR_TYPE_SAMPLER: diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h index 6747bfa4..0102fa1a 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h @@ -360,6 +360,9 @@ public: return _metalFeatures.argumentBuffers && getMVKConfig().useMetalArgumentBuffers != MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS_NEVER; }; + /** Returns whether native texture atomics are supported and should be used. */ + bool useNativeTextureAtomics() { return _metalFeatures.nativeTextureAtomics; } + #pragma mark Construction diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm index ef7fbcbe..f275106f 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm @@ -1810,6 +1810,8 @@ void MVKPhysicalDevice::initMetalFeatures() { #if MVK_XCODE_15 // Dynamic vertex stride needs to have everything aligned - compiled with support for vertex stride calls, and supported by both runtime OS and GPU. _metalFeatures.dynamicVertexStride = mvkOSVersionIsAtLeast(14.0, 17.0, 1.0) && (supportsMTLGPUFamily(Apple4) || supportsMTLGPUFamily(Mac2)); + + _metalFeatures.nativeTextureAtomics = mvkOSVersionIsAtLeast(14.0, 17.0, 1.0) && (supportsMTLGPUFamily(Metal3) || supportsMTLGPUFamily(Apple6) || supportsMTLGPUFamily(Mac2)); #endif // GPU-specific features @@ -3682,7 +3684,10 @@ void MVKDevice::getDescriptorVariableDescriptorCountLayoutSupport(const VkDescri case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: mtlTexCnt += pBind->descriptorCount; - mtlBuffCnt += pBind->descriptorCount; + + if (getPhysicalDevice()->useNativeTextureAtomics()) + mtlBuffCnt += pBind->descriptorCount; + maxVarDescCount = min(_pMetalFeatures->maxPerStageTextureCount - mtlTexCnt, _pMetalFeatures->maxPerStageBufferCount - mtlBuffCnt); break; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.h b/MoltenVK/MoltenVK/GPUObjects/MVKImage.h index 9cc6a282..7a032f6b 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.h @@ -372,6 +372,7 @@ protected: bool _isAliasable; bool _hasExtendedUsage; bool _hasMutableFormat; + bool _shouldSupportAtomics; bool _isLinearForAtomics; }; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm index 5c4e06f9..bb53a0dd 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm @@ -861,7 +861,7 @@ MTLTextureUsage MVKImage::getMTLTextureUsage(MTLPixelFormat mtlPixFmt) { needsReinterpretation = needsReinterpretation || !pixFmts->compatibleAsLinearOrSRGB(mtlPixFmt, viewFmt); } - MTLTextureUsage mtlUsage = pixFmts->getMTLTextureUsage(getCombinedUsage(), mtlPixFmt, _samples, _isLinear || _isLinearForAtomics, needsReinterpretation, _hasExtendedUsage); + MTLTextureUsage mtlUsage = pixFmts->getMTLTextureUsage(getCombinedUsage(), mtlPixFmt, _samples, _isLinear || _isLinearForAtomics, needsReinterpretation, _hasExtendedUsage, _shouldSupportAtomics); // Metal before 3.0 doesn't support 3D compressed textures, so we'll // decompress the texture ourselves, and we need to be able to write to it. @@ -935,10 +935,12 @@ MVKImage::MVKImage(MVKDevice* device, const VkImageCreateInfo* pCreateInfo) : MV // If this is a storage image of format R32_UINT or R32_SINT, or MUTABLE_FORMAT is set // and R32_UINT is in the set of possible view formats, then we must use a texel buffer, // or image atomics won't work. - _isLinearForAtomics = (_arrayLayers == 1 && _mipLevels == 1 && getImageType() == VK_IMAGE_TYPE_2D && mvkIsAnyFlagEnabled(getCombinedUsage(), VK_IMAGE_USAGE_STORAGE_BIT) && - ((_vkFormat == VK_FORMAT_R32_UINT || _vkFormat == VK_FORMAT_R32_SINT) || - (_hasMutableFormat && pixFmts->getViewClass(_vkFormat) == MVKMTLViewClass::Color32 && - (getIsValidViewFormat(VK_FORMAT_R32_UINT) || getIsValidViewFormat(VK_FORMAT_R32_SINT))))); + _shouldSupportAtomics = mvkIsAnyFlagEnabled(getCombinedUsage(), VK_IMAGE_USAGE_STORAGE_BIT) && _mipLevels == 1 && + ((_vkFormat == VK_FORMAT_R32_UINT || _vkFormat == VK_FORMAT_R32_SINT) || + (_hasMutableFormat && pixFmts->getViewClass(_vkFormat) == MVKMTLViewClass::Color32 && (getIsValidViewFormat(VK_FORMAT_R32_UINT) || getIsValidViewFormat(VK_FORMAT_R32_SINT)))); + + if (_shouldSupportAtomics && !getPhysicalDevice()->useNativeTextureAtomics()) + _isLinearForAtomics = _arrayLayers == 1 && getImageType() == VK_IMAGE_TYPE_2D; _is3DCompressed = (getImageType() == VK_IMAGE_TYPE_3D) && (pixFmts->getFormatType(pCreateInfo->format) == kMVKFormatCompressed) && !_device->_pMetalFeatures->native3DCompressedTextures; _isDepthStencilAttachment = (mvkAreAllFlagsEnabled(pCreateInfo->usage, VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) || diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.h b/MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.h index 9ebe3a36..db9cc9d4 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.h @@ -388,7 +388,8 @@ public: VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT, bool isLinear = false, bool needsReinterpretation = true, - bool isExtended = false); + bool isExtended = false, + bool supportAtomics = false); /** Enumerates all formats that support the given features, calling a specified function for each one. */ void enumerateSupportedFormats(const VkFormatProperties3& properties, bool any, std::function func); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.mm b/MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.mm index 1cfeac51..cf96c2ad 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.mm @@ -695,7 +695,8 @@ MTLTextureUsage MVKPixelFormats::getMTLTextureUsage(VkImageUsageFlags vkImageUsa VkSampleCountFlagBits samples, bool isLinear, bool needsReinterpretation, - bool isExtended) { + bool isExtended, + bool supportAtomics) { bool isDepthFmt = isDepthFormat(mtlFormat); bool isStencilFmt = isStencilFormat(mtlFormat); bool isCombinedDepthStencilFmt = isDepthFmt && isStencilFmt; @@ -719,6 +720,13 @@ MTLTextureUsage MVKPixelFormats::getMTLTextureUsage(VkImageUsageFlags vkImageUsa mvkEnableFlags(mtlUsage, MTLTextureUsageShaderWrite); } + +#if MVK_XCODE_15 + if (supportAtomics) { + mvkEnableFlags(mtlUsage, MTLTextureUsageShaderAtomic); + } +#endif + #if MVK_MACOS // Clearing a linear image may use shader writes. if (mvkIsAnyFlagEnabled(vkImageUsageFlags, (VK_IMAGE_USAGE_TRANSFER_DST_BIT)) &&