From e1baea9a958acca73c7fa9f4eda0404f3d53c2f9 Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Wed, 1 May 2024 09:10:59 -0400 Subject: [PATCH] Refactor public MVKDevice content into MVKDeviceTrackingMixin functions. This is a non-functional code-maintenance change. Previously, MVKDevice contained a significant amount of publicly exposed internal content. This patch adds functions to MVKDeviceTrackingMixin to better encapsulate, consolidate & streamline access to this content. - Make MVKDeviceTrackingMixin a friend of MVKDevice & MVKPhysicalDevice. - Hide public MVKDevice content behind MVKDeviceTrackingMixin functions. - Remove similar MVKDevice content pointers from MVKCommandEncoder. - MVKDevice remove getPhysicalDevice(), getPixelFormats() & getMTLDevice(), to focus access through MVKDeviceTrackingMixin. - Move performance tracking functions to MVKDeviceTrackingMixin to remove need to reference MVKDevice multiple times when marking performance values. - Subclass MVKQueueSubmission, MVKMetalCompiler, MVKShaderLibrary, and MVKShaderLibraryCache from MVKBaseDeviceObject to make use of these changes. --- MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm | 92 +++++----- MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm | 10 +- MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm | 32 ++-- MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h | 12 -- .../MoltenVK/Commands/MVKCommandBuffer.mm | 33 ++-- .../Commands/MVKCommandEncoderState.mm | 23 +-- .../Commands/MVKCommandEncodingPool.mm | 6 +- .../Commands/MVKCommandResourceFactory.mm | 22 +-- .../Commands/MVKMTLBufferAllocation.mm | 6 +- MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm | 27 +-- MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm | 11 +- .../MoltenVK/GPUObjects/MVKDescriptorSet.mm | 17 +- MoltenVK/MoltenVK/GPUObjects/MVKDevice.h | 161 ++++++++---------- MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm | 89 +++++----- .../MoltenVK/GPUObjects/MVKDeviceMemory.mm | 20 +-- .../MoltenVK/GPUObjects/MVKFramebuffer.mm | 4 +- MoltenVK/MoltenVK/GPUObjects/MVKImage.mm | 63 +++---- MoltenVK/MoltenVK/GPUObjects/MVKInstance.h | 4 +- MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h | 6 +- MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm | 100 +++++------ MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm | 9 +- MoltenVK/MoltenVK/GPUObjects/MVKQueue.h | 2 +- MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm | 39 ++--- MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm | 12 +- .../MoltenVK/GPUObjects/MVKShaderModule.h | 10 +- .../MoltenVK/GPUObjects/MVKShaderModule.mm | 82 ++++----- MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm | 9 +- MoltenVK/MoltenVK/GPUObjects/MVKSync.h | 4 +- MoltenVK/MoltenVK/GPUObjects/MVKSync.mm | 13 +- 29 files changed, 459 insertions(+), 459 deletions(-) diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm b/MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm index 714bddbe..ede27d21 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm @@ -97,7 +97,7 @@ VkResult MVKCmdDraw::setContent(MVKCommandBuffer* cmdBuff, _firstInstance = firstInstance; // Validate - if ((_firstInstance != 0) && !(cmdBuff->getDevice()->_pMetalFeatures->baseVertexInstanceDrawing)) { + if ((_firstInstance != 0) && !(cmdBuff->getMetalFeatures().baseVertexInstanceDrawing)) { return cmdBuff->reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdDraw(): The current device does not support drawing with a non-zero base instance."); } @@ -149,6 +149,8 @@ void MVKCmdDraw::encode(MVKCommandEncoder* cmdEncoder) { cmdEncoder->restartMetalRenderPassIfNeeded(); auto* pipeline = cmdEncoder->getGraphicsPipeline(); + auto& mtlFeats = cmdEncoder->getMetalFeatures(); + auto& dvcLimits = cmdEncoder->getDeviceProperties().limits; // Metal doesn't support triangle fans, so encode it as triangles via an indexed indirect triangles command instead. if (pipeline->getVkPrimitiveTopology() == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN) { @@ -187,7 +189,7 @@ void MVKCmdDraw::encode(MVKCommandEncoder* cmdEncoder) { case kMVKGraphicsStageVertex: { mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl); if (pipeline->needsVertexOutputBuffer()) { - vtxOutBuff = cmdEncoder->getTempMTLBuffer(_vertexCount * _instanceCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents, true); + vtxOutBuff = cmdEncoder->getTempMTLBuffer(_vertexCount * _instanceCount * 4 * dvcLimits.maxVertexOutputComponents, true); [mtlTessCtlEncoder setBuffer: vtxOutBuff->_mtlBuffer offset: vtxOutBuff->_offset atIndex: pipeline->getOutputBufferIndex().stages[kMVKShaderStageVertex]]; @@ -197,7 +199,7 @@ void MVKCmdDraw::encode(MVKCommandEncoder* cmdEncoder) { // _firstInstance * stride, since that is the expected behaviour for a divisor of 0. cmdEncoder->_graphicsResourcesState.offsetZeroDivisorVertexBuffers(stage, pipeline, _firstInstance); id vtxState = pipeline->getTessVertexStageState(); - if (cmdEncoder->getDevice()->_pMetalFeatures->nonUniformThreadgroups) { + if (mtlFeats.nonUniformThreadgroups) { #if MVK_MACOS_OR_IOS [mtlTessCtlEncoder dispatchThreads: MTLSizeMake(_vertexCount, _instanceCount, 1) threadsPerThreadgroup: MTLSizeMake(vtxState.threadExecutionWidth, 1, 1)]; @@ -217,13 +219,13 @@ void MVKCmdDraw::encode(MVKCommandEncoder* cmdEncoder) { case kMVKGraphicsStageTessControl: { mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl); if (pipeline->needsTessCtlOutputBuffer()) { - tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * tessParams.patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerVertexOutputComponents, true); + tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * tessParams.patchCount * 4 * dvcLimits.maxTessellationControlPerVertexOutputComponents, true); [mtlTessCtlEncoder setBuffer: tcOutBuff->_mtlBuffer offset: tcOutBuff->_offset atIndex: pipeline->getOutputBufferIndex().stages[kMVKShaderStageTessCtl]]; } if (pipeline->needsTessCtlPatchOutputBuffer()) { - tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(tessParams.patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerPatchOutputComponents, true); + tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(tessParams.patchCount * 4 * dvcLimits.maxTessellationControlPerPatchOutputComponents, true); [mtlTessCtlEncoder setBuffer: tcPatchOutBuff->_mtlBuffer offset: tcPatchOutBuff->_offset atIndex: pipeline->getTessCtlPatchOutputBufferIndex()]; @@ -244,11 +246,11 @@ void MVKCmdDraw::encode(MVKCommandEncoder* cmdEncoder) { NSUInteger sgSize = pipeline->getTessControlStageState().threadExecutionWidth; NSUInteger wgSize = mvkLeastCommonMultiple(outControlPointCount, sgSize); - while (wgSize > cmdEncoder->getDevice()->_pProperties->limits.maxComputeWorkGroupSize[0]) { + while (wgSize > dvcLimits.maxComputeWorkGroupSize[0]) { sgSize >>= 1; wgSize = mvkLeastCommonMultiple(outControlPointCount, sgSize); } - if (cmdEncoder->getDevice()->_pMetalFeatures->nonUniformThreadgroups) { + if (mtlFeats.nonUniformThreadgroups) { #if MVK_MACOS_OR_IOS [mtlTessCtlEncoder dispatchThreads: MTLSizeMake(tessParams.patchCount * outControlPointCount, 1, 1) threadsPerThreadgroup: MTLSizeMake(wgSize, 1, 1)]; @@ -297,7 +299,7 @@ void MVKCmdDraw::encode(MVKCommandEncoder* cmdEncoder) { uint32_t viewCount = subpass->isMultiview() ? subpass->getViewCountInMetalPass(cmdEncoder->getMultiviewPassIndex()) : 1; uint32_t instanceCount = _instanceCount * viewCount; cmdEncoder->_graphicsResourcesState.offsetZeroDivisorVertexBuffers(stage, pipeline, _firstInstance); - if (cmdEncoder->_pDeviceMetalFeatures->baseVertexInstanceDrawing) { + if (mtlFeats.baseVertexInstanceDrawing) { [cmdEncoder->_mtlRenderEncoder drawPrimitives: cmdEncoder->_renderingState.getPrimitiveType() vertexStart: _firstVertex vertexCount: _vertexCount @@ -332,11 +334,11 @@ VkResult MVKCmdDrawIndexed::setContent(MVKCommandBuffer* cmdBuff, _firstInstance = firstInstance; // Validate - MVKDevice* mvkDvc = cmdBuff->getDevice(); - if ((_firstInstance != 0) && !(mvkDvc->_pMetalFeatures->baseVertexInstanceDrawing)) { + auto& mtlFeats = cmdBuff->getMetalFeatures(); + if ((_firstInstance != 0) && !(mtlFeats.baseVertexInstanceDrawing)) { return cmdBuff->reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdDrawIndexed(): The current device does not support drawing with a non-zero base instance."); } - if ((_vertexOffset != 0) && !(mvkDvc->_pMetalFeatures->baseVertexInstanceDrawing)) { + if ((_vertexOffset != 0) && !(mtlFeats.baseVertexInstanceDrawing)) { return cmdBuff->reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdDrawIndexed(): The current device does not support drawing with a non-zero base vertex."); } @@ -373,6 +375,8 @@ void MVKCmdDrawIndexed::encode(MVKCommandEncoder* cmdEncoder) { cmdEncoder->restartMetalRenderPassIfNeeded(); auto* pipeline = cmdEncoder->getGraphicsPipeline(); + auto& mtlFeats = cmdEncoder->getMetalFeatures(); + auto& dvcLimits = cmdEncoder->getDeviceProperties().limits; // Metal doesn't support triangle fans, so encode it as triangles via an indexed indirect triangles command instead. if (pipeline->getVkPrimitiveTopology() == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN) { @@ -414,7 +418,7 @@ void MVKCmdDrawIndexed::encode(MVKCommandEncoder* cmdEncoder) { case kMVKGraphicsStageVertex: { mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl); if (pipeline->needsVertexOutputBuffer()) { - vtxOutBuff = cmdEncoder->getTempMTLBuffer(_indexCount * _instanceCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents, true); + vtxOutBuff = cmdEncoder->getTempMTLBuffer(_indexCount * _instanceCount * 4 * dvcLimits.maxVertexOutputComponents, true); [mtlTessCtlEncoder setBuffer: vtxOutBuff->_mtlBuffer offset: vtxOutBuff->_offset atIndex: pipeline->getOutputBufferIndex().stages[kMVKShaderStageVertex]]; @@ -427,7 +431,7 @@ void MVKCmdDrawIndexed::encode(MVKCommandEncoder* cmdEncoder) { // _firstInstance * stride, since that is the expected behaviour for a divisor of 0. cmdEncoder->_graphicsResourcesState.offsetZeroDivisorVertexBuffers(stage, pipeline, _firstInstance); id vtxState = ibb.mtlIndexType == MTLIndexTypeUInt16 ? pipeline->getTessVertexStageIndex16State() : pipeline->getTessVertexStageIndex32State(); - if (cmdEncoder->getDevice()->_pMetalFeatures->nonUniformThreadgroups) { + if (mtlFeats.nonUniformThreadgroups) { #if MVK_MACOS_OR_IOS [mtlTessCtlEncoder dispatchThreads: MTLSizeMake(_indexCount, _instanceCount, 1) threadsPerThreadgroup: MTLSizeMake(vtxState.threadExecutionWidth, 1, 1)]; @@ -447,13 +451,13 @@ void MVKCmdDrawIndexed::encode(MVKCommandEncoder* cmdEncoder) { case kMVKGraphicsStageTessControl: { mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl); if (pipeline->needsTessCtlOutputBuffer()) { - tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * tessParams.patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerVertexOutputComponents, true); + tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * tessParams.patchCount * 4 * dvcLimits.maxTessellationControlPerVertexOutputComponents, true); [mtlTessCtlEncoder setBuffer: tcOutBuff->_mtlBuffer offset: tcOutBuff->_offset atIndex: pipeline->getOutputBufferIndex().stages[kMVKShaderStageTessCtl]]; } if (pipeline->needsTessCtlPatchOutputBuffer()) { - tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(tessParams.patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerPatchOutputComponents, true); + tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(tessParams.patchCount * 4 * dvcLimits.maxTessellationControlPerPatchOutputComponents, true); [mtlTessCtlEncoder setBuffer: tcPatchOutBuff->_mtlBuffer offset: tcPatchOutBuff->_offset atIndex: pipeline->getTessCtlPatchOutputBufferIndex()]; @@ -475,11 +479,11 @@ void MVKCmdDrawIndexed::encode(MVKCommandEncoder* cmdEncoder) { // an index buffer here. NSUInteger sgSize = pipeline->getTessControlStageState().threadExecutionWidth; NSUInteger wgSize = mvkLeastCommonMultiple(outControlPointCount, sgSize); - while (wgSize > cmdEncoder->getDevice()->_pProperties->limits.maxComputeWorkGroupSize[0]) { + while (wgSize > dvcLimits.maxComputeWorkGroupSize[0]) { sgSize >>= 1; wgSize = mvkLeastCommonMultiple(outControlPointCount, sgSize); } - if (cmdEncoder->getDevice()->_pMetalFeatures->nonUniformThreadgroups) { + if (mtlFeats.nonUniformThreadgroups) { #if MVK_MACOS_OR_IOS [mtlTessCtlEncoder dispatchThreads: MTLSizeMake(tessParams.patchCount * outControlPointCount, 1, 1) threadsPerThreadgroup: MTLSizeMake(wgSize, 1, 1)]; @@ -530,7 +534,7 @@ void MVKCmdDrawIndexed::encode(MVKCommandEncoder* cmdEncoder) { uint32_t viewCount = subpass->isMultiview() ? subpass->getViewCountInMetalPass(cmdEncoder->getMultiviewPassIndex()) : 1; uint32_t instanceCount = _instanceCount * viewCount; cmdEncoder->_graphicsResourcesState.offsetZeroDivisorVertexBuffers(stage, pipeline, _firstInstance); - if (cmdEncoder->_pDeviceMetalFeatures->baseVertexInstanceDrawing) { + if (mtlFeats.baseVertexInstanceDrawing) { [cmdEncoder->_mtlRenderEncoder drawIndexedPrimitives: cmdEncoder->_renderingState.getPrimitiveType() indexCount: _indexCount indexType: (MTLIndexType)ibb.mtlIndexType @@ -575,11 +579,11 @@ VkResult MVKCmdDrawIndirect::setContent(MVKCommandBuffer* cmdBuff, _drawCount = drawCount; // Validate - MVKDevice* mvkDvc = cmdBuff->getDevice(); - if ( !mvkDvc->_pMetalFeatures->indirectDrawing ) { + auto& mtlFeats = cmdBuff->getMetalFeatures(); + if ( !mtlFeats.indirectDrawing ) { return cmdBuff->reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdDrawIndirect(): The current device does not support indirect drawing."); } - if (cmdBuff->_lastTessellationPipeline && !mvkDvc->_pMetalFeatures->indirectTessellationDrawing) { + if (cmdBuff->_lastTessellationPipeline && !mtlFeats.indirectTessellationDrawing) { return cmdBuff->reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdDrawIndirect(): The current device does not support indirect tessellated drawing."); } @@ -623,7 +627,7 @@ void MVKCmdDrawIndirect::encodeIndexedIndirect(MVKCommandEncoder* cmdEncoder) { [mtlConvertEncoder setBuffer: ibb.mtlBuffer offset: ibb.offset atIndex: 4]; - if (cmdEncoder->getDevice()->_pMetalFeatures->nonUniformThreadgroups) { + if (cmdEncoder->getMetalFeatures().nonUniformThreadgroups) { #if MVK_MACOS_OR_IOS [mtlConvertEncoder dispatchThreads: MTLSizeMake(_drawCount, 1, 1) threadsPerThreadgroup: MTLSizeMake(mtlConvertState.threadExecutionWidth, 1, 1)]; @@ -650,6 +654,8 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) { cmdEncoder->restartMetalRenderPassIfNeeded(); auto* pipeline = cmdEncoder->getGraphicsPipeline(); + auto& mtlFeats = cmdEncoder->getMetalFeatures(); + auto& dvcLimits = cmdEncoder->getDeviceProperties().limits; // Metal doesn't support triangle fans, so encode it as indexed indirect triangles instead. if (pipeline->getVkPrimitiveTopology() == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN) { @@ -691,10 +697,10 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) { vertexCount = kMVKMaxDrawIndirectVertexCount; patchCount = mvkCeilingDivide(vertexCount, inControlPointCount); VkDeviceSize indirectSize = (2 * sizeof(MTLDispatchThreadgroupsIndirectArguments) + sizeof(MTLDrawPatchIndirectArguments)) * _drawCount; - if (cmdEncoder->_pDeviceMetalFeatures->mslVersion >= 20100) { + if (mtlFeats.mslVersion >= 20100) { indirectSize += sizeof(MTLStageInRegionIndirectArguments) * _drawCount; } - paramsIncr = std::max((size_t)cmdEncoder->getDevice()->_pProperties->limits.minUniformBufferOffsetAlignment, sizeof(uint32_t) * 2); + paramsIncr = std::max((size_t)dvcLimits.minUniformBufferOffsetAlignment, sizeof(uint32_t) * 2); VkDeviceSize paramsSize = paramsIncr * _drawCount; tempIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize, true); mtlIndBuff = tempIndirectBuff->_mtlBuffer; @@ -702,20 +708,20 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) { tcParamsBuff = cmdEncoder->getTempMTLBuffer(paramsSize, true); mtlParmBuffOfst = tcParamsBuff->_offset; if (pipeline->needsVertexOutputBuffer()) { - vtxOutBuff = cmdEncoder->getTempMTLBuffer(vertexCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents, true); + vtxOutBuff = cmdEncoder->getTempMTLBuffer(vertexCount * 4 * dvcLimits.maxVertexOutputComponents, true); } if (pipeline->needsTessCtlOutputBuffer()) { - tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerVertexOutputComponents, true); + tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * patchCount * 4 * dvcLimits.maxTessellationControlPerVertexOutputComponents, true); } if (pipeline->needsTessCtlPatchOutputBuffer()) { - tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerPatchOutputComponents, true); + tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(patchCount * 4 * dvcLimits.maxTessellationControlPerPatchOutputComponents, true); } tcLevelBuff = cmdEncoder->getTempMTLBuffer(patchCount * sizeof(MTLQuadTessellationFactorsHalf), true); vtxThreadExecWidth = pipeline->getTessVertexStageState().threadExecutionWidth; NSUInteger sgSize = pipeline->getTessControlStageState().threadExecutionWidth; tcWorkgroupSize = mvkLeastCommonMultiple(outControlPointCount, sgSize); - while (tcWorkgroupSize > cmdEncoder->getDevice()->_pProperties->limits.maxComputeWorkGroupSize[0]) { + while (tcWorkgroupSize > dvcLimits.maxComputeWorkGroupSize[0]) { sgSize >>= 1; tcWorkgroupSize = mvkLeastCommonMultiple(outControlPointCount, sgSize); } @@ -776,7 +782,7 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) { &tcWorkgroupSize, sizeof(tcWorkgroupSize), 8); - if (cmdEncoder->getDevice()->_pMetalFeatures->nonUniformThreadgroups) { + if (mtlFeats.nonUniformThreadgroups) { #if MVK_MACOS_OR_IOS [mtlTessCtlEncoder dispatchThreads: MTLSizeMake(_drawCount, 1, 1) threadsPerThreadgroup: MTLSizeMake(mtlConvertState.threadExecutionWidth, 1, 1)]; @@ -818,7 +824,7 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) { &viewCount, sizeof(viewCount), 4); - if (cmdEncoder->getDevice()->_pMetalFeatures->nonUniformThreadgroups) { + if (mtlFeats.nonUniformThreadgroups) { #if MVK_MACOS_OR_IOS [mtlConvertEncoder dispatchThreads: MTLSizeMake(_drawCount, 1, 1) threadsPerThreadgroup: MTLSizeMake(mtlConvertState.threadExecutionWidth, 1, 1)]; @@ -895,7 +901,7 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) { break; case kMVKGraphicsStageRasterization: if (pipeline->isTessellationPipeline()) { - if (cmdEncoder->getDevice()->_pMetalFeatures->indirectTessellationDrawing) { + if (mtlFeats.indirectTessellationDrawing) { if (pipeline->needsTessCtlOutputBuffer()) { [cmdEncoder->_mtlRenderEncoder setVertexBuffer: tcOutBuff->_mtlBuffer offset: tcOutBuff->_offset @@ -978,11 +984,11 @@ VkResult MVKCmdDrawIndexedIndirect::setContent(MVKCommandBuffer* cmdBuff, _directCmdFirstInstance = directCmdFirstInstance; // Validate - MVKDevice* mvkDvc = cmdBuff->getDevice(); - if ( !mvkDvc->_pMetalFeatures->indirectDrawing ) { + auto& mtlFeats = cmdBuff->getMetalFeatures(); + if ( !mtlFeats.indirectDrawing ) { return cmdBuff->reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdDrawIndexedIndirect(): The current device does not support indirect drawing."); } - if (cmdBuff->_lastTessellationPipeline && !mvkDvc->_pMetalFeatures->indirectTessellationDrawing) { + if (cmdBuff->_lastTessellationPipeline && !mtlFeats.indirectTessellationDrawing) { return cmdBuff->reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdDrawIndexedIndirect(): The current device does not support indirect tessellated drawing."); } @@ -1001,6 +1007,8 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder, const MVKI MVKIndexMTLBufferBinding ibb = ibbOrig; MVKIndexMTLBufferBinding ibbTriFan = ibb; auto* pipeline = cmdEncoder->getGraphicsPipeline(); + auto& mtlFeats = cmdEncoder->getMetalFeatures(); + auto& dvcLimits = cmdEncoder->getDeviceProperties().limits; MVKVertexAdjustments vtxAdjmts; vtxAdjmts.mtlIndexType = ibb.mtlIndexType; @@ -1040,10 +1048,10 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder, const MVKI vertexCount = kMVKMaxDrawIndirectVertexCount; patchCount = mvkCeilingDivide(vertexCount, inControlPointCount); VkDeviceSize indirectSize = (sizeof(MTLDispatchThreadgroupsIndirectArguments) + sizeof(MTLDrawPatchIndirectArguments)) * _drawCount; - if (cmdEncoder->_pDeviceMetalFeatures->mslVersion >= 20100) { + if (mtlFeats.mslVersion >= 20100) { indirectSize += sizeof(MTLStageInRegionIndirectArguments) * _drawCount; } - paramsIncr = std::max((size_t)cmdEncoder->getDevice()->_pProperties->limits.minUniformBufferOffsetAlignment, sizeof(uint32_t) * 2); + paramsIncr = std::max((size_t)dvcLimits.minUniformBufferOffsetAlignment, sizeof(uint32_t) * 2); VkDeviceSize paramsSize = paramsIncr * _drawCount; tempIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize, true); mtlIndBuff = tempIndirectBuff->_mtlBuffer; @@ -1051,13 +1059,13 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder, const MVKI tcParamsBuff = cmdEncoder->getTempMTLBuffer(paramsSize, true); mtlParmBuffOfst = tcParamsBuff->_offset; if (pipeline->needsVertexOutputBuffer()) { - vtxOutBuff = cmdEncoder->getTempMTLBuffer(vertexCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents, true); + vtxOutBuff = cmdEncoder->getTempMTLBuffer(vertexCount * 4 * dvcLimits.maxVertexOutputComponents, true); } if (pipeline->needsTessCtlOutputBuffer()) { - tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerVertexOutputComponents, true); + tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * patchCount * 4 * dvcLimits.maxTessellationControlPerVertexOutputComponents, true); } if (pipeline->needsTessCtlPatchOutputBuffer()) { - tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerPatchOutputComponents, true); + tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(patchCount * 4 * dvcLimits.maxTessellationControlPerPatchOutputComponents, true); } tcLevelBuff = cmdEncoder->getTempMTLBuffer(patchCount * sizeof(MTLQuadTessellationFactorsHalf), true); vtxIndexBuff = cmdEncoder->getTempMTLBuffer(ibb.mtlBuffer.length, true); @@ -1068,7 +1076,7 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder, const MVKI NSUInteger sgSize = pipeline->getTessControlStageState().threadExecutionWidth; tcWorkgroupSize = mvkLeastCommonMultiple(outControlPointCount, sgSize); - while (tcWorkgroupSize > cmdEncoder->getDevice()->_pProperties->limits.maxComputeWorkGroupSize[0]) { + while (tcWorkgroupSize > dvcLimits.maxComputeWorkGroupSize[0]) { sgSize >>= 1; tcWorkgroupSize = mvkLeastCommonMultiple(outControlPointCount, sgSize); } @@ -1200,7 +1208,7 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder, const MVKI [mtlConvertEncoder setBuffer: ibbTriFan.mtlBuffer offset: ibbTriFan.offset atIndex: 7]; - if (cmdEncoder->getDevice()->_pMetalFeatures->nonUniformThreadgroups) { + if (mtlFeats.nonUniformThreadgroups) { #if MVK_MACOS_OR_IOS [mtlConvertEncoder dispatchThreads: MTLSizeMake(_drawCount, 1, 1) threadsPerThreadgroup: MTLSizeMake(mtlConvertState.threadExecutionWidth, 1, 1)]; @@ -1282,7 +1290,7 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder, const MVKI break; case kMVKGraphicsStageRasterization: if (pipeline->isTessellationPipeline()) { - if (cmdEncoder->getDevice()->_pMetalFeatures->indirectTessellationDrawing) { + if (mtlFeats.indirectTessellationDrawing) { if (pipeline->needsTessCtlOutputBuffer()) { [cmdEncoder->_mtlRenderEncoder setVertexBuffer: tcOutBuff->_mtlBuffer offset: tcOutBuff->_offset diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm b/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm index 0818adf1..697cca24 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm @@ -110,11 +110,13 @@ VkResult MVKCmdPipelineBarrier::setContent(MVKCommandBuffer* cmdBuff, template void MVKCmdPipelineBarrier::encode(MVKCommandEncoder* cmdEncoder) { + + auto& mtlFeats = cmdEncoder->getMetalFeatures(); #if MVK_MACOS // Calls below invoke MTLBlitCommandEncoder so must apply this first. // Check if pipeline barriers are available and we are in a renderpass. - if (cmdEncoder->getDevice()->_pMetalFeatures->memoryBarriers && cmdEncoder->_mtlRenderEncoder) { + if (mtlFeats.memoryBarriers && cmdEncoder->_mtlRenderEncoder) { for (auto& b : _barriers) { MTLRenderStages srcStages = mvkMTLRenderStagesFromVkPipelineStageFlags(b.srcStageMask, false); MTLRenderStages dstStages = mvkMTLRenderStagesFromVkPipelineStageFlags(b.dstStageMask, true); @@ -161,7 +163,7 @@ void MVKCmdPipelineBarrier::encode(MVKCommandEncoder* cmdEncoder) { // into separate Metal renderpasses. Since this is a potentially expensive operation, // verify that at least one attachment is being used both as an input and render attachment // by checking for a VK_IMAGE_LAYOUT_GENERAL layout. - if (cmdEncoder->_mtlRenderEncoder && cmdEncoder->getDevice()->_pMetalFeatures->tileBasedDeferredRendering) { + if (cmdEncoder->_mtlRenderEncoder && mtlFeats.tileBasedDeferredRendering) { bool needsRenderpassRestart = false; for (auto& b : _barriers) { if (b.type == MVKPipelineBarrier::Image && b.newLayout == VK_IMAGE_LAYOUT_GENERAL) { @@ -388,7 +390,7 @@ VkResult MVKCmdPushDescriptorSet::setContent(MVKCommandBuffer* cmdBuff, _pipelineLayout->retain(); // Add the descriptor writes - MVKDevice* mvkDvc = cmdBuff->getDevice(); + auto& enabledExtns = cmdBuff->getEnabledExtensions(); clearDescriptorWrites(); // Clear for reuse _descriptorWrites.reserve(descriptorWriteCount); for (uint32_t dwIdx = 0; dwIdx < descriptorWriteCount; dwIdx++) { @@ -410,7 +412,7 @@ VkResult MVKCmdPushDescriptorSet::setContent(MVKCommandBuffer* cmdBuff, std::copy_n(descWrite.pTexelBufferView, descWrite.descriptorCount, pNewTexelBufferView); descWrite.pTexelBufferView = pNewTexelBufferView; } - if (mvkDvc->_enabledExtensions.vk_EXT_inline_uniform_block.enabled) { + if (enabledExtns.vk_EXT_inline_uniform_block.enabled) { const VkWriteDescriptorSetInlineUniformBlockEXT* pInlineUniformBlock = nullptr; for (const auto* next = (VkBaseInStructure*)descWrite.pNext; next; next = next->pNext) { switch (next->sType) { diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm b/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm index a99d4f69..7b8a6817 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm @@ -319,7 +319,7 @@ VkResult MVKCmdBlitImage::setContent(MVKCommandBuffer* cmdBuff, _filter = filter; - bool isDestUnwritableLinear = MVK_MACOS && !cmdBuff->getDevice()->_pMetalFeatures->renderLinearTextures && _dstImage->getIsLinear(); + bool isDestUnwritableLinear = MVK_MACOS && !cmdBuff->getMetalFeatures().renderLinearTextures && _dstImage->getIsLinear(); _vkImageBlits.clear(); // Clear for reuse for (uint32_t rIdx = 0; rIdx < regionCount; rIdx++) { @@ -350,7 +350,7 @@ VkResult MVKCmdBlitImage::setContent(MVKCommandBuffer* cmdBuff, _filter = pBlitImageInfo->filter; - bool isDestUnwritableLinear = MVK_MACOS && !cmdBuff->getDevice()->_pMetalFeatures->renderLinearTextures && _dstImage->getIsLinear(); + bool isDestUnwritableLinear = MVK_MACOS && !cmdBuff->getMetalFeatures().renderLinearTextures && _dstImage->getIsLinear(); _vkImageBlits.clear(); // Clear for reuse _vkImageBlits.reserve(pBlitImageInfo->regionCount); @@ -457,6 +457,7 @@ void MVKCmdBlitImage::populateVertices(MVKVertexPosTex* vertices, const VkIma template void MVKCmdBlitImage::encode(MVKCommandEncoder* cmdEncoder, MVKCommandUse commandUse) { + auto& mtlFeats = cmdEncoder->getMetalFeatures(); size_t vkIBCnt = _vkImageBlits.size(); VkImageCopy vkImageCopies[vkIBCnt]; MVKImageBlitRender mvkBlitRenders[vkIBCnt]; @@ -507,7 +508,7 @@ void MVKCmdBlitImage::encode(MVKCommandEncoder* cmdEncoder, MVKCommandUse com id srcMTLTex = _srcImage->getMTLTexture(srcPlaneIndex); id dstMTLTex = _dstImage->getMTLTexture(dstPlaneIndex); if (blitCnt && srcMTLTex && dstMTLTex) { - if (cmdEncoder->getDevice()->_pMetalFeatures->nativeTextureSwizzle && + if (mtlFeats.nativeTextureSwizzle && _srcImage->needsSwizzle()) { // Use a view that has a swizzle on it. srcMTLTex = [srcMTLTex newTextureViewWithPixelFormat:srcMTLTex.pixelFormat @@ -564,7 +565,7 @@ void MVKCmdBlitImage::encode(MVKCommandEncoder* cmdEncoder, MVKCommandUse com blitKey.srcFilter = mvkMTLSamplerMinMagFilterFromVkFilter(_filter); blitKey.srcAspect = mvkIBR.region.srcSubresource.aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT); blitKey.dstSampleCount = mvkSampleCountFromVkSampleCountFlagBits(_dstImage->getSampleCount()); - if (!cmdEncoder->getDevice()->_pMetalFeatures->nativeTextureSwizzle && + if (!mtlFeats.nativeTextureSwizzle && _srcImage->needsSwizzle()) { VkComponentMapping vkMapping = _srcImage->getPixelFormats()->getVkComponentMapping(_srcImage->getVkFormat()); blitKey.srcSwizzleR = vkMapping.r; @@ -581,7 +582,7 @@ void MVKCmdBlitImage::encode(MVKCommandEncoder* cmdEncoder, MVKCommandUse com mtlDepthAttDesc.level = mvkIBR.region.dstSubresource.mipLevel; mtlStencilAttDesc.level = mvkIBR.region.dstSubresource.mipLevel; - bool isLayeredBlit = blitKey.dstSampleCount > 1 ? cmdEncoder->getDevice()->_pMetalFeatures->multisampleLayeredRendering : cmdEncoder->getDevice()->_pMetalFeatures->layeredRendering; + bool isLayeredBlit = blitKey.dstSampleCount > 1 ? mtlFeats.multisampleLayeredRendering : mtlFeats.layeredRendering; uint32_t layCnt = mvkIBR.region.srcSubresource.layerCount; if (_dstImage->getMTLTextureType() == MTLTextureType3D) { @@ -761,13 +762,14 @@ inline VkResult MVKCmdResolveImage::validate(MVKCommandBuffer* cmdBuff, const template void MVKCmdResolveImage::encode(MVKCommandEncoder* cmdEncoder) { + auto& mtlFeats = cmdEncoder->getMetalFeatures(); size_t vkIRCnt = _vkImageResolves.size(); VkImageBlit expansionRegions[vkIRCnt]; VkImageCopy copyRegions[vkIRCnt]; // If we can do layered rendering to a multisample texture, I can resolve all the layers at once. uint32_t layerCnt = 0; - if (cmdEncoder->getDevice()->_pMetalFeatures->multisampleLayeredRendering) { + if (mtlFeats.multisampleLayeredRendering) { layerCnt = (uint32_t)_vkImageResolves.size(); } else { for (VkImageResolve2& vkIR : _vkImageResolves) { layerCnt += vkIR.dstSubresource.layerCount; } @@ -820,7 +822,7 @@ void MVKCmdResolveImage::encode(MVKCommandEncoder* cmdEncoder) { // direct resolve, but that of the DESTINATION if we need a temporary transfer image. mtlResolveSlices[sliceCnt].dstSubresource = vkIR.dstSubresource; mtlResolveSlices[sliceCnt].srcSubresource = needXfrImage ? vkIR.dstSubresource : vkIR.srcSubresource; - if (cmdEncoder->getDevice()->_pMetalFeatures->multisampleLayeredRendering) { + if (mtlFeats.multisampleLayeredRendering) { sliceCnt++; } else { uint32_t layCnt = vkIR.dstSubresource.layerCount; @@ -961,7 +963,7 @@ void MVKCmdCopyBuffer::encode(MVKCommandEncoder* cmdEncoder) { id dstMTLBuff = _dstBuffer->getMTLBuffer(); NSUInteger dstMTLBuffOffset = _dstBuffer->getMTLBufferOffset(); - VkDeviceSize buffAlign = cmdEncoder->getDevice()->_pMetalFeatures->mtlCopyBufferAlignment; + VkDeviceSize buffAlign = cmdEncoder->getMetalFeatures().mtlCopyBufferAlignment; for (const auto& cpyRgn : _bufferCopyRegions) { const bool useComputeCopy = buffAlign > 1 && (cpyRgn.srcOffset % buffAlign != 0 || @@ -1149,7 +1151,7 @@ void MVKCmdBufferImageCopy::encode(MVKCommandEncoder* cmdEncoder) { // If we're copying to mip level 0, we can skip the copy and just decode // directly into the image. Otherwise, we need to use an intermediate buffer. if (_toImage && _image->getIsCompressed() && mtlTexture.textureType == MTLTextureType3D && - !cmdEncoder->getDevice()->_pMetalFeatures->native3DCompressedTextures) { + !cmdEncoder->getMetalFeatures().native3DCompressedTextures) { MVKCmdCopyBufferToImageInfo info; info.srcRowStride = bytesPerRow & 0xffffffff; @@ -1496,7 +1498,7 @@ void MVKCmdClearAttachments::encode(MVKCommandEncoder* cmdEncoder) { // Apple GPUs do not support rendering/writing to an attachment and then reading from // that attachment within a single Metal renderpass. So, if any of the attachments just // cleared is an input attachment, we need to restart into separate Metal renderpasses. - if (cmdEncoder->getDevice()->_pMetalFeatures->tileBasedDeferredRendering) { + if (cmdEncoder->getMetalFeatures().tileBasedDeferredRendering) { bool needsRenderpassRestart = false; for (uint32_t caIdx = 0; caIdx < caCnt; caIdx++) { if (_rpsKey.isAttachmentEnabled(caIdx) && subpass->isColorAttachmentAlsoInputAttachment(caIdx)) { @@ -1562,7 +1564,7 @@ VkResult MVKCmdClearImage::setContent(MVKCommandBuffer* cmdBuff, // Validate MVKMTLFmtCaps mtlFmtCaps = cmdBuff->getPixelFormats()->getCapabilities(_image->getMTLPixelFormat(planeIndex)); - bool isDestUnwritableLinear = MVK_MACOS && !cmdBuff->getDevice()->_pMetalFeatures->renderLinearTextures && _image->getIsLinear(); + bool isDestUnwritableLinear = MVK_MACOS && !cmdBuff->getMetalFeatures().renderLinearTextures && _image->getIsLinear(); uint32_t reqCap = isDS ? kMVKMTLFmtCapsDSAtt : (isDestUnwritableLinear ? kMVKMTLFmtCapsWrite : kMVKMTLFmtCapsColorAtt); if (!mvkAreAllFlagsEnabled(mtlFmtCaps, reqCap)) { return cmdBuff->reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdClear%sImage(): Format %s cannot be cleared on this device.", (isDS ? "DepthStencil" : "Color"), cmdBuff->getPixelFormats()->getName(_image->getVkFormat())); @@ -1588,6 +1590,7 @@ void MVKCmdClearImage::encode(MVKCommandEncoder* cmdEncoder) { cmdEncoder->endCurrentMetalEncoding(); + auto& mtlFeats = cmdEncoder->getMetalFeatures(); MVKPixelFormats* pixFmts = cmdEncoder->getPixelFormats(); for (auto& srRange : _subresourceRanges) { uint8_t planeIndex = MVKImage::getPlaneFromVkImageAspectFlags(srRange.aspectMask); @@ -1595,7 +1598,7 @@ void MVKCmdClearImage::encode(MVKCommandEncoder* cmdEncoder) { if ( !imgMTLTex ) { continue; } #if MVK_MACOS - if ( _image->getIsLinear() && !cmdEncoder->getDevice()->_pMetalFeatures->renderLinearTextures ) { + if (_image->getIsLinear() && !mtlFeats.renderLinearTextures) { // These images cannot be rendered. Instead, use a compute shader. // Luckily for us, linear images only have one mip and one array layer under Metal. assert( !isDS ); @@ -1608,7 +1611,7 @@ void MVKCmdClearImage::encode(MVKCommandEncoder* cmdEncoder) { cmdEncoder->setComputeBytes(mtlComputeEnc, &_clearValue, sizeof(_clearValue), 0); MTLSize gridSize = mvkMTLSizeFromVkExtent3D(_image->getExtent3D()); MTLSize tgSize = MTLSizeMake(mtlClearState.threadExecutionWidth, 1, 1); - if (cmdEncoder->getDevice()->_pMetalFeatures->nonUniformThreadgroups) { + if (mtlFeats.nonUniformThreadgroups) { [mtlComputeEnc dispatchThreads: gridSize threadsPerThreadgroup: tgSize]; } else { MTLSize tgCount = MTLSizeMake(gridSize.width / tgSize.width, gridSize.height, gridSize.depth); @@ -1681,8 +1684,7 @@ void MVKCmdClearImage::encode(MVKCommandEncoder* cmdEncoder) { } // If we can do layered rendering, I can clear all the layers at once. - if (cmdEncoder->getDevice()->_pMetalFeatures->layeredRendering && - (_image->getSampleCount() == VK_SAMPLE_COUNT_1_BIT || cmdEncoder->getDevice()->_pMetalFeatures->multisampleLayeredRendering)) { + if (mtlFeats.layeredRendering && (mtlFeats.multisampleLayeredRendering || _image->getSampleCount() == VK_SAMPLE_COUNT_1_BIT)) { if (is3D) { mtlRPCADesc.depthPlane = layerStart; mtlRPDADesc.depthPlane = layerStart; diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h index 6bcea53f..a3d190f7 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h +++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h @@ -421,18 +421,6 @@ public: /** Context for tracking information across multiple encodings. */ MVKCommandEncodingContext* _pEncodingContext; - /** A reference to the Metal features supported by the device. */ - const MVKPhysicalDeviceMetalFeatures* _pDeviceMetalFeatures; - - /** A reference to the Vulkan features supported by the device. */ - const VkPhysicalDeviceFeatures* _pDeviceFeatures; - - /** Pointer to the properties of the device. */ - const VkPhysicalDeviceProperties* _pDeviceProperties; - - /** Pointer to the memory properties of the device. */ - const VkPhysicalDeviceMemoryProperties* _pDeviceMemoryProperties; - /** The command buffer whose commands are being encoded. */ MVKCommandBuffer* _cmdBuffer; diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm index 031830aa..55aabe7e 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm @@ -320,7 +320,7 @@ void MVKCommandBuffer::recordExecuteCommands(MVKArrayRef // Track whether a stage-based timestamp command has been added, so we know // to update the timestamp command fence when ending a Metal command encoder. void MVKCommandBuffer::recordTimestampCommand() { - _hasStageCounterTimestampCommand = mvkIsAnyFlagEnabled(_device->_pMetalFeatures->counterSamplingPoints, MVK_COUNTER_SAMPLING_AT_PIPELINE_STAGE); + _hasStageCounterTimestampCommand = mvkIsAnyFlagEnabled(getMetalFeatures().counterSamplingPoints, MVK_COUNTER_SAMPLING_AT_PIPELINE_STAGE); } @@ -340,14 +340,13 @@ void MVKCommandBuffer::recordBindPipeline(MVKCmdBindPipeline* mvkBindPipeline) { // because that would include app time between command submissions. void MVKCommandEncoder::encode(id mtlCmdBuff, MVKCommandEncodingContext* pEncodingContext) { - MVKDevice* mvkDev = getDevice(); - uint64_t startTime = mvkDev->getPerformanceTimestamp(); + uint64_t startTime = getPerformanceTimestamp(); beginEncoding(mtlCmdBuff, pEncodingContext); encodeCommands(_cmdBuffer->_head); endEncoding(); - mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.queue.commandBufferEncoding, startTime); + addPerformanceInterval(getPerformanceStats().queue.commandBufferEncoding, startTime); } void MVKCommandEncoder::beginEncoding(id mtlCmdBuff, MVKCommandEncodingContext* pEncodingContext) { @@ -494,9 +493,8 @@ void MVKCommandEncoder::setSubpass(MVKCommand* subpassCmd, _renderSubpassIndex = subpassIndex; _multiviewPassIndex = 0; - _canUseLayeredRendering = (_device->_pMetalFeatures->layeredRendering && - (_device->_pMetalFeatures->multisampleLayeredRendering || - (getSubpass()->getSampleCount() == VK_SAMPLE_COUNT_1_BIT))); + auto& mtlFeats = getMetalFeatures(); + _canUseLayeredRendering = mtlFeats.layeredRendering && (mtlFeats.multisampleLayeredRendering || getSubpass()->getSampleCount() == VK_SAMPLE_COUNT_1_BIT); beginMetalRenderPass(cmdUse); } @@ -539,7 +537,7 @@ void MVKCommandEncoder::beginMetalRenderPass(MVKCommandUse cmdUse) { isRestart); if (_cmdBuffer->_needsVisibilityResultMTLBuffer) { if ( !_pEncodingContext->visibilityResultBuffer ) { - _pEncodingContext->visibilityResultBuffer = getTempMTLBuffer(_pDeviceMetalFeatures->maxQueryBufferSize, true, true); + _pEncodingContext->visibilityResultBuffer = getTempMTLBuffer(getMetalFeatures().maxQueryBufferSize, true, true); } mtlRPDesc.visibilityResultBuffer = _pEncodingContext->visibilityResultBuffer->_mtlBuffer; } @@ -577,7 +575,7 @@ void MVKCommandEncoder::beginMetalRenderPass(MVKCommandUse cmdUse) { // If programmable sample positions are supported, set them into the render pass descriptor. // If no custom sample positions are established, size will be zero, // and Metal will default to using default sample postions. - if (_pDeviceMetalFeatures->programmableSamplePositions) { + if (getMetalFeatures().programmableSamplePositions) { auto sampPosns = _renderingState.getSamplePositions(); [mtlRPDesc setSamplePositions: sampPosns.data() count: sampPosns.size()]; } @@ -892,7 +890,8 @@ void MVKCommandEncoder::setVertexBytes(id mtlEncoder, NSUInteger length, uint32_t mtlBuffIndex, bool descOverride) { - if (_pDeviceMetalFeatures->dynamicMTLBufferSize && length <= _pDeviceMetalFeatures->dynamicMTLBufferSize) { + auto& mtlFeats = getMetalFeatures(); + if (mtlFeats.dynamicMTLBufferSize && length <= mtlFeats.dynamicMTLBufferSize) { [mtlEncoder setVertexBytes: bytes length: length atIndex: mtlBuffIndex]; } else { const MVKMTLBufferAllocation* mtlBuffAlloc = copyToTempMTLBufferAllocation(bytes, length); @@ -905,7 +904,7 @@ void MVKCommandEncoder::setVertexBytes(id mtlEncoder, } void MVKCommandEncoder::encodeVertexAttributeBuffer(MVKMTLBufferBinding& b, bool isDynamicStride) { - if (_device->_pMetalFeatures->dynamicVertexStride) { + if (getMetalFeatures().dynamicVertexStride) { #if MVK_XCODE_15 NSUInteger mtlStride = isDynamicStride ? b.stride : MTLAttributeStrideStatic; if (b.isInline) { @@ -945,7 +944,8 @@ void MVKCommandEncoder::setFragmentBytes(id mtlEncoder, NSUInteger length, uint32_t mtlBuffIndex, bool descOverride) { - if (_pDeviceMetalFeatures->dynamicMTLBufferSize && length <= _pDeviceMetalFeatures->dynamicMTLBufferSize) { + auto& mtlFeats = getMetalFeatures(); + if (mtlFeats.dynamicMTLBufferSize && length <= mtlFeats.dynamicMTLBufferSize) { [mtlEncoder setFragmentBytes: bytes length: length atIndex: mtlBuffIndex]; } else { const MVKMTLBufferAllocation* mtlBuffAlloc = copyToTempMTLBufferAllocation(bytes, length); @@ -962,7 +962,8 @@ void MVKCommandEncoder::setComputeBytes(id mtlEncoder, NSUInteger length, uint32_t mtlBuffIndex, bool descOverride) { - if (_pDeviceMetalFeatures->dynamicMTLBufferSize && length <= _pDeviceMetalFeatures->dynamicMTLBufferSize) { + auto& mtlFeats = getMetalFeatures(); + if (mtlFeats.dynamicMTLBufferSize && length <= mtlFeats.dynamicMTLBufferSize) { [mtlEncoder setBytes: bytes length: length atIndex: mtlBuffIndex]; } else { const MVKMTLBufferAllocation* mtlBuffAlloc = copyToTempMTLBufferAllocation(bytes, length); @@ -1035,7 +1036,7 @@ void MVKCommandEncoder::markTimestamp(MVKTimestampQueryPool* pQueryPool, uint32_ addActivatedQueries(pQueryPool, query, queryCount); if (pQueryPool->hasMTLCounterBuffer()) { - MVKCounterSamplingFlags sampPts = _device->_pMetalFeatures->counterSamplingPoints; + MVKCounterSamplingFlags sampPts = getMetalFeatures().counterSamplingPoints; for (uint32_t qOfst = 0; qOfst < queryCount; qOfst++) { if (mvkIsAnyFlagEnabled(sampPts, MVK_COUNTER_SAMPLING_AT_PIPELINE_STAGE)) { _timestampStageCounterQueries.push_back({ pQueryPool, query + qOfst }); @@ -1155,10 +1156,6 @@ MVKCommandEncoder::MVKCommandEncoder(MVKCommandBuffer* cmdBuffer, _computePushConstants(this, VK_SHADER_STAGE_COMPUTE_BIT), _prefillStyle(prefillStyle){ - _pDeviceFeatures = &_device->_enabledFeatures; - _pDeviceMetalFeatures = _device->_pMetalFeatures; - _pDeviceProperties = _device->_pProperties; - _pDeviceMemoryProperties = _device->_pMemoryProperties; _pActivatedQueries = nullptr; _mtlCmdBuffer = nil; _mtlRenderEncoder = nil; diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm index 405b7833..9f2318b2 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm @@ -69,7 +69,7 @@ void MVKPushConstantsCommandEncoderState:: setPushConstants(uint32_t offset, MVK // MSL structs can have a larger size than the equivalent C struct due to MSL alignment needs. // Typically any MSL struct that contains a float4 will also have a size that is rounded up to a multiple of a float4 size. // Ensure that we pass along enough content to cover this extra space even if it is never actually accessed by the shader. - size_t pcSizeAlign = getDevice()->_pMetalFeatures->pushConstantSizeAlignment; + size_t pcSizeAlign = _cmdEncoder->getMetalFeatures().pushConstantSizeAlignment; size_t pcSize = pushConstants.size(); size_t pcBuffSize = mvkAlignByteCount(offset + pcSize, pcSizeAlign); mvkEnsureSize(_pushConstants, pcBuffSize); @@ -369,7 +369,7 @@ void MVKRenderingCommandEncoderState::setStencilReferenceValues(VkStencilFaceFla void MVKRenderingCommandEncoderState::setViewports(const MVKArrayRef viewports, uint32_t firstViewport, bool isDynamic) { - uint32_t maxViewports = getDevice()->_pProperties->limits.maxViewports; + uint32_t maxViewports = _cmdEncoder->getDeviceProperties().limits.maxViewports; if (firstViewport >= maxViewports) { return; } MVKMTLViewports mtlViewports = isDynamic ? _mtlViewports[StateScope::Dynamic] : _mtlViewports[StateScope::Static]; @@ -384,7 +384,7 @@ void MVKRenderingCommandEncoderState::setViewports(const MVKArrayRef void MVKRenderingCommandEncoderState::setScissors(const MVKArrayRef scissors, uint32_t firstScissor, bool isDynamic) { - uint32_t maxScissors = getDevice()->_pProperties->limits.maxViewports; + uint32_t maxScissors = _cmdEncoder->getDeviceProperties().limits.maxViewports; if (firstScissor >= maxScissors) { return; } MVKMTLScissors mtlScissors = isDynamic ? _mtlScissors[StateScope::Dynamic] : _mtlScissors[StateScope::Static]; @@ -538,6 +538,7 @@ void MVKRenderingCommandEncoderState::encodeImpl(uint32_t stage) { if (stage != kMVKGraphicsStageRasterization) { return; } auto& rendEnc = _cmdEncoder->_mtlRenderEncoder; + auto& enabledFeats = _cmdEncoder->getEnabledFeatures(); if (isDirty(PolygonMode)) { [rendEnc setTriangleFillMode: getMTLContent(PolygonMode)]; } if (isDirty(CullMode)) { [rendEnc setCullMode: getMTLContent(CullMode)]; } @@ -566,13 +567,13 @@ void MVKRenderingCommandEncoderState::encodeImpl(uint32_t stage) { [rendEnc setDepthBias: 0 slopeScale: 0 clamp: 0]; } } - if (isDirty(DepthClipEnable) && _cmdEncoder->_pDeviceFeatures->depthClamp) { + if (isDirty(DepthClipEnable) && enabledFeats.depthClamp) { [rendEnc setDepthClipMode: getMTLContent(DepthClipEnable)]; } #if MVK_USE_METAL_PRIVATE_API if (getMVKConfig().useMetalPrivateAPI && (isDirty(DepthBoundsTestEnable) || isDirty(DepthBounds)) && - _cmdEncoder->_pDeviceFeatures->depthBounds) { + enabledFeats.depthBounds) { if (getMTLContent(DepthBoundsTestEnable)) { auto& db = getMTLContent(DepthBounds); [(id)_cmdEncoder->_mtlRenderEncoder setDepthBoundsTestAMD: YES @@ -603,7 +604,7 @@ void MVKRenderingCommandEncoderState::encodeImpl(uint32_t stage) { if (isDirty(Viewports)) { auto& mtlViewports = getMTLContent(Viewports); - if (_cmdEncoder->_pDeviceFeatures->multiViewport) { + if (enabledFeats.multiViewport) { #if MVK_MACOS_OR_IOS [rendEnc setViewports: mtlViewports.viewports count: mtlViewports.viewportCount]; #endif @@ -623,7 +624,7 @@ void MVKRenderingCommandEncoderState::encodeImpl(uint32_t stage) { mtlScissors.scissors[sIdx] = shouldDiscard ? zeroRect : _cmdEncoder->clipToRenderArea(mtlScissors.scissors[sIdx]); } - if (_cmdEncoder->_pDeviceFeatures->multiViewport) { + if (enabledFeats.multiViewport) { #if MVK_MACOS_OR_IOS [rendEnc setScissorRects: mtlScissors.scissors count: mtlScissors.scissorCount]; #endif @@ -949,7 +950,7 @@ void MVKGraphicsResourcesCommandEncoderState::markDirty() { void MVKGraphicsResourcesCommandEncoderState::encodeImpl(uint32_t stage) { auto* pipeline = _cmdEncoder->getGraphicsPipeline(); - bool fullImageViewSwizzle = pipeline->fullImageViewSwizzle() || getDevice()->_pMetalFeatures->nativeTextureSwizzle; + bool fullImageViewSwizzle = pipeline->fullImageViewSwizzle() || _cmdEncoder->getMetalFeatures().nativeTextureSwizzle; bool forTessellation = pipeline->isTessellationPipeline(); bool isDynamicVertexStride = pipeline->isDynamicState(VertexStride); @@ -1369,12 +1370,12 @@ void MVKOcclusionQueryCommandEncoderState::endMetalRenderPass() { // In most cases, a MTLCommandBuffer corresponds to a Vulkan command submit (VkSubmitInfo), // and so the error text is framed in terms of the Vulkan submit. void MVKOcclusionQueryCommandEncoderState::beginOcclusionQuery(MVKOcclusionQueryPool* pQueryPool, uint32_t query, VkQueryControlFlags flags) { - if (_cmdEncoder->_pEncodingContext->mtlVisibilityResultOffset + kMVKQuerySlotSizeInBytes <= _cmdEncoder->_pDeviceMetalFeatures->maxQueryBufferSize) { - bool shouldCount = _cmdEncoder->_pDeviceFeatures->occlusionQueryPrecise && mvkAreAllFlagsEnabled(flags, VK_QUERY_CONTROL_PRECISE_BIT); + if (_cmdEncoder->_pEncodingContext->mtlVisibilityResultOffset + kMVKQuerySlotSizeInBytes <= _cmdEncoder->getMetalFeatures().maxQueryBufferSize) { + bool shouldCount = _cmdEncoder->getEnabledFeatures().occlusionQueryPrecise && mvkAreAllFlagsEnabled(flags, VK_QUERY_CONTROL_PRECISE_BIT); _mtlVisibilityResultMode = shouldCount ? MTLVisibilityResultModeCounting : MTLVisibilityResultModeBoolean; _mtlRenderPassQueries.emplace_back(pQueryPool, query, _cmdEncoder->_pEncodingContext->mtlVisibilityResultOffset); } else { - reportError(VK_ERROR_OUT_OF_DEVICE_MEMORY, "vkCmdBeginQuery(): The maximum number of queries in a single Vulkan command submission is %llu.", _cmdEncoder->_pDeviceMetalFeatures->maxQueryBufferSize / kMVKQuerySlotSizeInBytes); + reportError(VK_ERROR_OUT_OF_DEVICE_MEMORY, "vkCmdBeginQuery(): The maximum number of queries in a single Vulkan command submission is %llu.", _cmdEncoder->getMetalFeatures().maxQueryBufferSize / kMVKQuerySlotSizeInBytes); _mtlVisibilityResultMode = MTLVisibilityResultModeDisabled; _cmdEncoder->_pEncodingContext->mtlVisibilityResultOffset -= kMVKQuerySlotSizeInBytes; } diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.mm b/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.mm index b7b9ec65..fa33d89a 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.mm @@ -174,9 +174,9 @@ void MVKCommandEncodingPool::clear() { #pragma mark Construction MVKCommandEncodingPool::MVKCommandEncodingPool(MVKCommandPool* commandPool) : _commandPool(commandPool), - _mtlBufferAllocator(commandPool->getDevice(), commandPool->getDevice()->_pMetalFeatures->maxMTLBufferSize, true), - _privateMtlBufferAllocator(commandPool->getDevice(), commandPool->getDevice()->_pMetalFeatures->maxMTLBufferSize, true, false, MTLStorageModePrivate), - _dedicatedMtlBufferAllocator(commandPool->getDevice(), commandPool->getDevice()->_pMetalFeatures->maxQueryBufferSize, true, true, MTLStorageModePrivate) { + _mtlBufferAllocator(commandPool->getDevice(), commandPool->getMetalFeatures().maxMTLBufferSize, true), + _privateMtlBufferAllocator(commandPool->getDevice(), commandPool->getMetalFeatures().maxMTLBufferSize, true, false, MTLStorageModePrivate), + _dedicatedMtlBufferAllocator(commandPool->getDevice(), commandPool->getMetalFeatures().maxQueryBufferSize, true, true, MTLStorageModePrivate) { } MVKCommandEncodingPool::~MVKCommandEncodingPool() { diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm b/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm index 74597fdd..1452c63b 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm @@ -32,7 +32,7 @@ using namespace std; id MVKCommandResourceFactory::newCmdBlitImageMTLRenderPipelineState(MVKRPSKeyBlitImg& blitKey, MVKVulkanAPIDeviceObject* owner) { - bool isLayeredBlit = blitKey.dstSampleCount > 1 ? _device->_pMetalFeatures->multisampleLayeredRendering : _device->_pMetalFeatures->layeredRendering; + bool isLayeredBlit = blitKey.dstSampleCount > 1 ? getMetalFeatures().multisampleLayeredRendering : getMetalFeatures().layeredRendering; id vtxFunc = newFunctionNamed(isLayeredBlit ? "vtxCmdBlitImageLayered" : "vtxCmdBlitImage"); // temp retain id fragFunc = newBlitFragFunction(blitKey); // temp retain MTLRenderPipelineDescriptor* plDesc = [MTLRenderPipelineDescriptor new]; // temp retain @@ -188,7 +188,7 @@ static void getSwizzleString(char swizzleStr[4], VkComponentMapping vkMapping) { id MVKCommandResourceFactory::newBlitFragFunction(MVKRPSKeyBlitImg& blitKey) { @autoreleasepool { - bool isLayeredBlit = blitKey.dstSampleCount > 1 ? _device->_pMetalFeatures->multisampleLayeredRendering : _device->_pMetalFeatures->layeredRendering; + bool isLayeredBlit = blitKey.dstSampleCount > 1 ? getMetalFeatures().multisampleLayeredRendering : getMetalFeatures().layeredRendering; NSString* typeStr = getMTLFormatTypeString(blitKey.getSrcMTLPixelFormat()); bool isArrayType = blitKey.isSrcArrayType(); @@ -228,7 +228,7 @@ id MVKCommandResourceFactory::newBlitFragFunction(MVKRPSKeyBlitImg& } NSString* sliceArg = isArrayType ? (isLayeredBlit ? @", subRez.slice + varyings.v_layer" : @", subRez.slice") : @""; NSString* srcFilter = isLinearFilter ? @"linear" : @"nearest"; - if (!getDevice()->_pMetalFeatures->nativeTextureSwizzle) { + if (!getMetalFeatures().nativeTextureSwizzle) { getSwizzleString(swizzleArg, blitKey.getSrcSwizzle()); } @@ -620,11 +620,11 @@ id MVKCommandResourceFactory::newAccumulateOcclusionQue // Returns the retained MTLFunction with the name. // The caller is responsible for releasing the returned function object. id MVKCommandResourceFactory::newFunctionNamed(const char* funcName) { - uint64_t startTime = _device->getPerformanceTimestamp(); + uint64_t startTime = getPerformanceTimestamp(); NSString* nsFuncName = [[NSString alloc] initWithUTF8String: funcName]; // temp retained id mtlFunc = [_mtlLibrary newFunctionWithName: nsFuncName]; // retained [nsFuncName release]; // temp release - _device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.functionRetrieval, startTime); + addPerformanceInterval(getPerformanceStats().shaderCompilation.functionRetrieval, startTime); return mtlFunc; } @@ -633,20 +633,20 @@ id MVKCommandResourceFactory::newMTLFunction(NSString* mslSrcCode, id mtlFunc = nil; NSError* err = nil; - uint64_t startTime = _device->getPerformanceTimestamp(); + uint64_t startTime = getPerformanceTimestamp(); id mtlLib = [getMTLDevice() newLibraryWithSource: mslSrcCode options: getDevice()->getMTLCompileOptions() error: &err]; // temp retain - _device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.mslCompile, startTime); + addPerformanceInterval(getPerformanceStats().shaderCompilation.mslCompile, startTime); if (err) { reportError(VK_ERROR_INITIALIZATION_FAILED, "Could not compile support shader from MSL source (Error code %li):\n%s\n%s", (long)err.code, mslSrcCode.UTF8String, err.localizedDescription.UTF8String); } else { - startTime = _device->getPerformanceTimestamp(); + startTime = getPerformanceTimestamp(); mtlFunc = [mtlLib newFunctionWithName: funcName]; - _device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.functionRetrieval, startTime); + addPerformanceInterval(getPerformanceStats().shaderCompilation.functionRetrieval, startTime); } [mtlLib release]; // temp release @@ -685,12 +685,12 @@ MVKCommandResourceFactory::MVKCommandResourceFactory(MVKDevice* device) : MVKBas void MVKCommandResourceFactory::initMTLLibrary() { @autoreleasepool { NSError* err = nil; - uint64_t startTime = _device->getPerformanceTimestamp(); + uint64_t startTime = getPerformanceTimestamp(); _mtlLibrary = [getMTLDevice() newLibraryWithSource: _MVKStaticCmdShaderSource options: getDevice()->getMTLCompileOptions() error: &err]; // retained MVKAssert( !err, "Could not compile command shaders (Error code %li):\n%s", (long)err.code, err.localizedDescription.UTF8String); - _device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.mslCompile, startTime); + addPerformanceInterval(getPerformanceStats().shaderCompilation.mslCompile, startTime); } } diff --git a/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.mm b/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.mm index 96ad7b3a..f0d8017c 100644 --- a/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.mm +++ b/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.mm @@ -45,7 +45,7 @@ MVKMTLBufferAllocation* MVKMTLBufferAllocationPool::newObject() { // Adds a new MTLBuffer to the buffer pool and resets the next offset to the start of it void MVKMTLBufferAllocationPool::addMTLBuffer() { MTLResourceOptions mbOpts = (_mtlStorageMode << MTLResourceStorageModeShift) | MTLResourceCPUCacheModeDefaultCache; - _mtlBuffers.push_back({ [_device->getMTLDevice() newBufferWithLength: _mtlBufferLength options: mbOpts], 0 }); + _mtlBuffers.push_back({ [getMTLDevice() newBufferWithLength: _mtlBufferLength options: mbOpts], 0 }); _nextOffset = 0; } @@ -120,7 +120,7 @@ MVKMTLBufferAllocation* MVKMTLBufferAllocator::acquireMTLBufferRegion(NSUInteger MVKAssert(length <= _maxAllocationLength, "This MVKMTLBufferAllocator has been configured to dispense MVKMTLBufferRegions no larger than %lu bytes.", (unsigned long)_maxAllocationLength); // Can't allocate a segment smaller than the minimum MTLBuffer alignment. - length = std::max(length, _device->_pMetalFeatures->mtlBufferAlignment); + length = std::max(length, getMetalFeatures().mtlBufferAlignment); // Convert max length to the next power-of-two exponent to use as a lookup NSUInteger p2Exp = mvkPowerOfTwoExponent(length); @@ -128,7 +128,7 @@ MVKMTLBufferAllocation* MVKMTLBufferAllocator::acquireMTLBufferRegion(NSUInteger } MVKMTLBufferAllocator::MVKMTLBufferAllocator(MVKDevice* device, NSUInteger maxRegionLength, bool makeThreadSafe, bool isDedicated, MTLStorageMode mtlStorageMode) : MVKBaseDeviceObject(device) { - _maxAllocationLength = std::max(maxRegionLength, _device->_pMetalFeatures->mtlBufferAlignment); + _maxAllocationLength = std::max(maxRegionLength, getMetalFeatures().mtlBufferAlignment); _isThreadSafe = makeThreadSafe; // Convert max length to the next power-of-two exponent diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm b/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm index 8898f08e..cb173ed3 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm @@ -42,8 +42,9 @@ void MVKBuffer::propagateDebugName() { #pragma mark Resource memory VkResult MVKBuffer::getMemoryRequirements(VkMemoryRequirements* pMemoryRequirements) { - if (_device->_pMetalFeatures->placementHeaps) { - MTLSizeAndAlign sizeAndAlign = [_device->getMTLDevice() heapBufferSizeAndAlignWithLength: getByteCount() options: MTLResourceStorageModePrivate]; + if (getMetalFeatures().placementHeaps) { + MTLSizeAndAlign sizeAndAlign = [getMTLDevice() heapBufferSizeAndAlignWithLength: getByteCount() + options: MTLResourceStorageModePrivate]; pMemoryRequirements->size = sizeAndAlign.size; pMemoryRequirements->alignment = sizeAndAlign.align; } else { @@ -82,7 +83,7 @@ VkResult MVKBuffer::bindDeviceMemory(MVKDeviceMemory* mvkMem, VkDeviceSize memOf #if MVK_MACOS if (_deviceMemory) { _isHostCoherentTexelBuffer = (!isUnifiedMemoryGPU() && - !_device->_pMetalFeatures->sharedLinearTextures && + !getMetalFeatures().sharedLinearTextures && _deviceMemory->isMemoryHostCoherent() && mvkIsAnyFlagEnabled(_usage, (VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT))); @@ -199,8 +200,8 @@ id MVKBuffer::getMTLBufferCache() { lock_guard lock(_lock); if (_mtlBufferCache) { return _mtlBufferCache; } - _mtlBufferCache = [_device->getMTLDevice() newBufferWithLength: getByteCount() - options: MTLResourceStorageModeManaged]; // retained + _mtlBufferCache = [getMTLDevice() newBufferWithLength: getByteCount() + options: MTLResourceStorageModeManaged]; // retained flushToDevice(_deviceMemoryOffset, _byteCount); } #endif @@ -217,7 +218,7 @@ uint64_t MVKBuffer::getMTLBufferGPUAddress() { #pragma mark Construction MVKBuffer::MVKBuffer(MVKDevice* device, const VkBufferCreateInfo* pCreateInfo) : MVKResource(device), _usage(pCreateInfo->usage) { - _byteAlignment = _device->_pMetalFeatures->mtlBufferAlignment; + _byteAlignment = getMetalFeatures().mtlBufferAlignment; _byteCount = pCreateInfo->size; for (const auto* next = (const VkBaseInStructure*)pCreateInfo->pNext; next; next = next->pNext) { @@ -284,7 +285,8 @@ void MVKBufferView::propagateDebugName() { #pragma mark Metal id MVKBufferView::getMTLTexture() { - if ( !_mtlTexture && _mtlPixelFormat && _device->_pMetalFeatures->texelBuffers) { + auto& mtlFeats = getMetalFeatures(); + if ( !_mtlTexture && _mtlPixelFormat && mtlFeats.texelBuffers) { // Lock and check again in case another thread has created the texture. lock_guard lock(_lock); @@ -300,7 +302,7 @@ id MVKBufferView::getMTLTexture() { } id mtlBuff; VkDeviceSize mtlBuffOffset; - if ( !_device->_pMetalFeatures->sharedLinearTextures && _buffer->isMemoryHostCoherent() ) { + if ( !mtlFeats.sharedLinearTextures && _buffer->isMemoryHostCoherent() ) { mtlBuff = _buffer->getMTLBufferCache(); mtlBuffOffset = _offset; } else { @@ -308,7 +310,7 @@ id MVKBufferView::getMTLTexture() { mtlBuffOffset = _buffer->getMTLBufferOffset() + _offset; } MTLTextureDescriptor* mtlTexDesc; - if ( _device->_pMetalFeatures->textureBuffers ) { + if ( mtlFeats.textureBuffers ) { mtlTexDesc = [MTLTextureDescriptor textureBufferDescriptorWithPixelFormat: _mtlPixelFormat width: _textureSize.width resourceOptions: (mtlBuff.cpuCacheMode << MTLResourceCPUCacheModeShift) | (mtlBuff.storageMode << MTLResourceStorageModeShift) @@ -347,10 +349,11 @@ MVKBufferView::MVKBufferView(MVKDevice* device, const VkBufferViewCreateInfo* pC if (byteCount == VK_WHOLE_SIZE) { byteCount = _buffer->getByteCount() - pCreateInfo->offset; } // Remaining bytes in buffer size_t blockCount = byteCount / bytesPerBlock; - if ( !_device->_pMetalFeatures->textureBuffers ) { + auto& mtlFeats = getMetalFeatures(); + if ( !mtlFeats.textureBuffers ) { // But Metal requires the texture to be a 2D texture. Determine the number of 2D rows we need and their width. // Multiple rows will automatically align with PoT max texture dimension, but need to align upwards if less than full single row. - size_t maxBlocksPerRow = _device->_pMetalFeatures->maxTextureDimension / fmtBlockSize.width; + size_t maxBlocksPerRow = mtlFeats.maxTextureDimension / fmtBlockSize.width; size_t blocksPerRow = min(blockCount, maxBlocksPerRow); _mtlBytesPerRow = mvkAlignByteCount(blocksPerRow * bytesPerBlock, _device->getVkFormatTexelBufferAlignment(pCreateInfo->format, this)); @@ -367,7 +370,7 @@ MVKBufferView::MVKBufferView(MVKDevice* device, const VkBufferViewCreateInfo* pC _mtlBytesPerRow = mvkAlignByteCount(byteCount, _device->getVkFormatTexelBufferAlignment(pCreateInfo->format, this)); } - if ( !_device->_pMetalFeatures->texelBuffers ) { + if ( !mtlFeats.texelBuffers ) { setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "Texel buffers are not supported on this device.")); } } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm index 65056a55..d2fa7763 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm @@ -619,6 +619,7 @@ void MVKDescriptorSetLayoutBinding::initMetalResourceIndexOffsets(const VkDescri } \ } while(false) + auto& mtlFeats = getMetalFeatures(); MVKShaderStageResourceBinding& bindIdxs = _mtlResourceIndexOffsets.stages[stage]; MVKShaderStageResourceBinding& dslCnts = _layout->_mtlResourceCounts.stages[stage]; @@ -627,7 +628,7 @@ void MVKDescriptorSetLayoutBinding::initMetalResourceIndexOffsets(const VkDescri case VK_DESCRIPTOR_TYPE_SAMPLER: setResourceIndexOffset(samplerIndex); - if (pBinding->descriptorCount > 1 && !_device->_pMetalFeatures->arrayOfSamplers) { + if (pBinding->descriptorCount > 1 && !mtlFeats.arrayOfSamplers) { _layout->setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "Device %s does not support arrays of samplers.", _device->getName())); } break; @@ -637,10 +638,10 @@ void MVKDescriptorSetLayoutBinding::initMetalResourceIndexOffsets(const VkDescri setResourceIndexOffset(samplerIndex); if (pBinding->descriptorCount > 1) { - if ( !_device->_pMetalFeatures->arrayOfTextures ) { + if ( !mtlFeats.arrayOfTextures ) { _layout->setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "Device %s does not support arrays of textures.", _device->getName())); } - if ( !_device->_pMetalFeatures->arrayOfSamplers ) { + if ( !mtlFeats.arrayOfSamplers ) { _layout->setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "Device %s does not support arrays of samplers.", _device->getName())); } } @@ -660,7 +661,7 @@ void MVKDescriptorSetLayoutBinding::initMetalResourceIndexOffsets(const VkDescri case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: setResourceIndexOffset(textureIndex); - if (pBinding->descriptorCount > 1 && !_device->_pMetalFeatures->arrayOfTextures) { + if (pBinding->descriptorCount > 1 && !mtlFeats.arrayOfTextures) { _layout->setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "Device %s does not support arrays of textures.", _device->getName())); } break; @@ -670,7 +671,7 @@ void MVKDescriptorSetLayoutBinding::initMetalResourceIndexOffsets(const VkDescri setResourceIndexOffset(textureIndex); if (!getPhysicalDevice()->useNativeTextureAtomics()) setResourceIndexOffset(bufferIndex); - if (pBinding->descriptorCount > 1 && !_device->_pMetalFeatures->arrayOfTextures) { + if (pBinding->descriptorCount > 1 && !mtlFeats.arrayOfTextures) { _layout->setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "Device %s does not support arrays of textures.", _device->getName())); } break; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm index 2b05095b..856bccee 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm @@ -98,7 +98,9 @@ void MVKDescriptorSetLayout::pushDescriptorSet(MVKCommandEncoder* cmdEncoder, if (!_isPushDescriptorLayout) return; if (!cmdEncoder) { clearConfigurationResult(); } - for (const VkWriteDescriptorSet& descWrite : descriptorWrites) { + + auto& enabledExtns = getEnabledExtensions(); + for (const VkWriteDescriptorSet& descWrite : descriptorWrites) { uint32_t dstBinding = descWrite.dstBinding; uint32_t dstArrayElement = descWrite.dstArrayElement; uint32_t descriptorCount = descWrite.descriptorCount; @@ -106,7 +108,7 @@ void MVKDescriptorSetLayout::pushDescriptorSet(MVKCommandEncoder* cmdEncoder, const VkDescriptorBufferInfo* pBufferInfo = descWrite.pBufferInfo; const VkBufferView* pTexelBufferView = descWrite.pTexelBufferView; const VkWriteDescriptorSetInlineUniformBlockEXT* pInlineUniformBlock = nullptr; - if (_device->_enabledExtensions.vk_EXT_inline_uniform_block.enabled) { + if (enabledExtns.vk_EXT_inline_uniform_block.enabled) { for (const auto* next = (VkBaseInStructure*)descWrite.pNext; next; next = next->pNext) { switch (next->sType) { case VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_INLINE_UNIFORM_BLOCK_EXT: { @@ -489,7 +491,7 @@ VkResult MVKDescriptorPool::allocateDescriptorSet(MVKDescriptorSetLayout* mvkDSL VkResult rslt = VK_ERROR_OUT_OF_POOL_MEMORY; NSUInteger mtlArgBuffAllocSize = mvkDSL->getMTLArgumentEncoder().mtlArgumentEncoderSize; NSUInteger mtlArgBuffAlignedSize = mvkAlignByteCount(mtlArgBuffAllocSize, - getDevice()->_pMetalFeatures->mtlBufferAlignment); + getMetalFeatures().mtlBufferAlignment); size_t dsCnt = _descriptorSetAvailablility.size(); _descriptorSetAvailablility.enumerateEnabledBits(true, [&](size_t dsIdx) { @@ -713,7 +715,7 @@ MVKDescriptorPool::MVKDescriptorPool(MVKDevice* device, const VkDescriptorPoolCr _hasPooledDescriptors(getMVKConfig().preallocateDescriptors), // Set this first! Accessed by MVKDescriptorSet constructor and getPoolSize() in following lines. _descriptorSets(pCreateInfo->maxSets, MVKDescriptorSet(this)), _descriptorSetAvailablility(pCreateInfo->maxSets, true), - _inlineBlockMTLBufferAllocator(device, device->_pMetalFeatures->dynamicMTLBufferSize, true), + _inlineBlockMTLBufferAllocator(device, getMetalFeatures().dynamicMTLBufferSize, true), _uniformBufferDescriptors(getPoolSize(pCreateInfo, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER)), _storageBufferDescriptors(getPoolSize(pCreateInfo, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)), _uniformBufferDynamicDescriptors(getPoolSize(pCreateInfo, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC)), @@ -735,6 +737,7 @@ void MVKDescriptorPool::initMetalArgumentBuffer(const VkDescriptorPoolCreateInfo if ( !isUsingDescriptorSetMetalArgumentBuffers() ) { return; } + auto& mtlFeats = getMetalFeatures(); @autoreleasepool { NSUInteger mtlBuffCnt = 0; NSUInteger mtlTexCnt = 0; @@ -803,14 +806,14 @@ void MVKDescriptorPool::initMetalArgumentBuffer(const VkDescriptorPoolCreateInfo // the alignment of each descriptor set Metal argument buffer offset. NSUInteger overheadPerDescSet = (2 * getMetalArgumentBufferResourceStorageSize(1, 1, 1) - getMetalArgumentBufferResourceStorageSize(2, 2, 2) + - _device->_pMetalFeatures->mtlBufferAlignment); + mtlFeats.mtlBufferAlignment); // Measure the size of an argument buffer that would hold all of the resources // managed in this pool, then add any overhead for all the descriptor sets. NSUInteger metalArgBuffSize = getMetalArgumentBufferResourceStorageSize(mtlBuffCnt, mtlTexCnt, mtlSampCnt); metalArgBuffSize += (overheadPerDescSet * (pCreateInfo->maxSets - 1)); // metalArgBuffSize already includes overhead for one descriptor set if (metalArgBuffSize) { - NSUInteger maxMTLBuffSize = _device->_pMetalFeatures->maxMTLBufferSize; + NSUInteger maxMTLBuffSize = mtlFeats.maxMTLBufferSize; if (metalArgBuffSize > maxMTLBuffSize) { setConfigurationResult(reportError(VK_ERROR_FRAGMENTATION, "vkCreateDescriptorPool(): The requested descriptor storage of %d MB is larger than the maximum descriptor storage of %d MB per VkDescriptorPool.", (uint32_t)(metalArgBuffSize / MEBI), (uint32_t)(maxMTLBuffSize / MEBI))); metalArgBuffSize = maxMTLBuffSize; @@ -902,7 +905,7 @@ void mvkUpdateDescriptorSets(uint32_t writeCount, MVKDescriptorSet* dstSet = (MVKDescriptorSet*)pDescWrite->dstSet; const VkWriteDescriptorSetInlineUniformBlockEXT* pInlineUniformBlock = nullptr; - if (dstSet->getDevice()->_enabledExtensions.vk_EXT_inline_uniform_block.enabled) { + if (dstSet->getEnabledExtensions().vk_EXT_inline_uniform_block.enabled) { for (const auto* next = (VkBaseInStructure*)pDescWrite->pNext; next; next = next->pNext) { switch (next->sType) { case VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_INLINE_UNIFORM_BLOCK_EXT: { diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h index 673f74c5..c41a7a2f 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h @@ -474,16 +474,10 @@ public: VkDebugReportObjectTypeEXT getVkDebugReportObjectType() override { return VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT; } /** Returns a pointer to the Vulkan instance. */ - MVKInstance* getInstance() override { return _physicalDevice->getInstance(); } - - /** Returns the physical device underlying this logical device. */ - MVKPhysicalDevice* getPhysicalDevice() { return _physicalDevice; } - - /** Returns info about the pixel format supported by the physical device. */ - MVKPixelFormats* getPixelFormats() { return &_physicalDevice->_pixelFormats; } + MVKInstance* getInstance() override { return _physicalDevice->_mvkInstance; } /** Returns the name of this device. */ - const char* getName() { return _pProperties->deviceName; } + const char* getName() { return _physicalDevice->_properties.deviceName; } /** Returns the common resource factory for creating command resources. */ MVKCommandResourceFactory* getCommandResourceFactory() { return _commandResourceFactory; } @@ -698,44 +692,6 @@ public: MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse); - /** - * If performance is being tracked, returns a monotonic timestamp value for use performance timestamping. - * The returned value corresponds to the number of CPU "ticks" since the app was initialized. - * - * Call this function twice, then use the functions mvkGetElapsedNanoseconds() or mvkGetElapsedMilliseconds() - * to determine the number of nanoseconds or milliseconds between the two calls. - */ - uint64_t getPerformanceTimestamp() { return _isPerformanceTracking ? mvkGetTimestamp() : 0; } - - /** - * If performance is being tracked, adds the performance for an activity with a duration interval - * between the start and end times, measured in milliseconds, to the given performance statistics. - * - * If endTime is zero or not supplied, the current time is used. - */ - void addPerformanceInterval(MVKPerformanceTracker& perfTracker, - uint64_t startTime, uint64_t endTime = 0) { - if (_isPerformanceTracking) { - updateActivityPerformance(perfTracker, mvkGetElapsedMilliseconds(startTime, endTime)); - } - }; - - /** - * If performance is being tracked, adds the performance for an activity - * with a kilobyte count, to the given performance statistics. - */ - void addPerformanceByteCount(MVKPerformanceTracker& perfTracker, uint64_t byteCount) { - if (_isPerformanceTracking) { - updateActivityPerformance(perfTracker, double(byteCount / KIBI)); - } - }; - - /** Updates the given performance statistic. */ - void updateActivityPerformance(MVKPerformanceTracker& activity, double currentValue); - - /** Populates the specified statistics structure from the current activity performance statistics. */ - void getPerformanceStatistics(MVKPerformanceStatistics* pPerf); - /** Invalidates the memory regions. */ VkResult invalidateMappedMemoryRanges(uint32_t memRangeCount, const VkMappedMemoryRange* pMemRanges); @@ -748,15 +704,15 @@ public: /** Returns the number of views to be rendered in the given multiview pass. */ uint32_t getViewCountInMetalPass(uint32_t viewMask, uint32_t passIdx) const; + /** Populates the specified statistics structure from the current activity performance statistics. */ + void getPerformanceStatistics(MVKPerformanceStatistics* pPerf); + /** Log all performance statistics. */ void logPerformanceSummary(); #pragma mark Metal - /** Returns the underlying Metal device. */ - id getMTLDevice() { return _physicalDevice->getMTLDevice(); } - /** Returns whether this device is using Metal argument buffers. */ bool isUsingMetalArgumentBuffers() { return _isUsingMetalArgumentBuffers; }; @@ -837,37 +793,6 @@ public: void getMetalObjects(VkExportMetalObjectsInfoEXT* pMetalObjectsInfo); -#pragma mark Properties directly accessible - - /** The list of Vulkan extensions, indicating whether each has been enabled by the app for this device. */ - MVKExtensionList _enabledExtensions; - - /** Device features available and enabled. */ - VkPhysicalDeviceFeatures _enabledFeatures; - - // List of extended device feature enabling structures, as public member variables. -#define MVK_DEVICE_FEATURE(structName, enumName, flagCount) \ - VkPhysicalDevice##structName##Features _enabled##structName##Features; -#define MVK_DEVICE_FEATURE_EXTN(structName, enumName, extnSfx, flagCount) \ - VkPhysicalDevice##structName##Features##extnSfx _enabled##structName##Features; -#include "MVKDeviceFeatureStructs.def" - - /** VkPhysicalDeviceVulkan12Features entries that did not originate in a prior extension available and enabled. */ - MVKPhysicalDeviceVulkan12FeaturesNoExt _enabledVulkan12FeaturesNoExt; - - /** Pointer to the Metal-specific features of the underlying physical device. */ - const MVKPhysicalDeviceMetalFeatures* _pMetalFeatures; - - /** Pointer to the properties of the underlying physical device. */ - const VkPhysicalDeviceProperties* _pProperties; - - /** Pointer to the memory properties of the underlying physical device. */ - const VkPhysicalDeviceMemoryProperties* _pMemoryProperties; - - /** Performance statistics. */ - MVKPerformanceStatistics _performanceStatistics; - - #pragma mark Construction /** Constructs an instance on the specified physical device. */ @@ -905,6 +830,7 @@ protected: template void enableFeatures(S* pEnabled, const S* pRequested, const S* pAvailable, uint32_t count); template void enableFeatures(S* pRequested, VkBool32* pEnabledBools, const VkBool32* pRequestedBools, const VkBool32* pAvailableBools, uint32_t count); void enableExtensions(const VkDeviceCreateInfo* pCreateInfo); + void updateActivityPerformance(MVKPerformanceTracker& activity, double currentValue); const char* getActivityPerformanceDescription(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats); MVKActivityPerformanceValueType getActivityPerformanceValueType(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats); void logActivityInline(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats); @@ -915,6 +841,18 @@ protected: VkDescriptorSetVariableDescriptorCountLayoutSupport* pVarDescSetCountSupport); MVKPhysicalDevice* _physicalDevice = nullptr; + MVKExtensionList _enabledExtensions; + VkPhysicalDeviceFeatures _enabledFeatures; + MVKPhysicalDeviceVulkan12FeaturesNoExt _enabledVulkan12FeaturesNoExt; + + // List of extended device feature enabling structures, as member variables. +#define MVK_DEVICE_FEATURE(structName, enumName, flagCount) \ + VkPhysicalDevice##structName##Features _enabled##structName##Features; +#define MVK_DEVICE_FEATURE_EXTN(structName, enumName, extnSfx, flagCount) \ + VkPhysicalDevice##structName##Features##extnSfx _enabled##structName##Features; +#include "MVKDeviceFeatureStructs.def" + + MVKPerformanceStatistics _performanceStats; MVKCommandResourceFactory* _commandResourceFactory = nullptr; MVKSmallVector, kMVKQueueFamilyCount> _queuesByQueueFamilyIndex; MVKSmallVector _resources; @@ -956,28 +894,75 @@ public: MVKDevice* getDevice() { return _device; } /** Returns the physical device underlying this logical device. */ - MVKPhysicalDevice* getPhysicalDevice() { return _device->getPhysicalDevice(); } + MVKPhysicalDevice* getPhysicalDevice() { return _device->_physicalDevice; } /** Returns the underlying Metal device. */ - id getMTLDevice() { return _device->getMTLDevice(); } + id getMTLDevice() { return _device->_physicalDevice->_mtlDevice; } /** Returns whether the GPU is a unified memory device. */ - bool isUnifiedMemoryGPU() { return getPhysicalDevice()->_hasUnifiedMemory; } + bool isUnifiedMemoryGPU() { return _device->_physicalDevice->_hasUnifiedMemory; } /** Returns whether the GPU is Apple Silicon. */ - bool isAppleGPU() { return getPhysicalDevice()->_isAppleGPU; } + bool isAppleGPU() { return _device->_physicalDevice->_isAppleGPU; } /** Returns info about the pixel format supported by the physical device. */ - MVKPixelFormats* getPixelFormats() { return _device->getPixelFormats(); } + MVKPixelFormats* getPixelFormats() { return &_device->_physicalDevice->_pixelFormats; } /** Returns whether this device is using Metal argument buffers. */ - bool isUsingMetalArgumentBuffers() { return _device->isUsingMetalArgumentBuffers(); }; + bool isUsingMetalArgumentBuffers() { return _device->_isUsingMetalArgumentBuffers; }; /** Returns whether this device is using one Metal argument buffer for each descriptor set, on multiple pipeline and pipeline stages. */ - bool isUsingDescriptorSetMetalArgumentBuffers() { return isUsingMetalArgumentBuffers() && _device->_pMetalFeatures->descriptorSetArgumentBuffers; }; + bool isUsingDescriptorSetMetalArgumentBuffers() { return _device->_isUsingMetalArgumentBuffers && getMetalFeatures().descriptorSetArgumentBuffers; }; /** Returns whether this device is using one Metal argument buffer for each descriptor set-pipeline-stage combination. */ - bool isUsingPipelineStageMetalArgumentBuffers() { return isUsingMetalArgumentBuffers() && !_device->_pMetalFeatures->descriptorSetArgumentBuffers; }; + bool isUsingPipelineStageMetalArgumentBuffers() { return _device->_isUsingMetalArgumentBuffers && !getMetalFeatures().descriptorSetArgumentBuffers; }; + + /** The list of Vulkan extensions, indicating whether each has been enabled by the app for this device. */ + MVKExtensionList& getEnabledExtensions() { return _device->_enabledExtensions; } + + /** Device features available and enabled. */ + VkPhysicalDeviceFeatures& getEnabledFeatures() { return _device->_enabledFeatures; } + + // List of extended device feature enabling structures, as getEnabledXXXFeatures() functions. +#define MVK_DEVICE_FEATURE(structName, enumName, flagCount) \ + VkPhysicalDevice##structName##Features& getEnabled##structName##Features() { return _device->_enabled##structName##Features; } +#define MVK_DEVICE_FEATURE_EXTN(structName, enumName, extnSfx, flagCount) \ + VkPhysicalDevice##structName##Features##extnSfx& getEnabled##structName##Features() { return _device->_enabled##structName##Features; } +#include "MVKDeviceFeatureStructs.def" + + /** Pointer to the Metal-specific features of the underlying physical device. */ + const MVKPhysicalDeviceMetalFeatures& getMetalFeatures() { return _device->_physicalDevice->_metalFeatures; } + + /** Pointer to the properties of the underlying physical device. */ + const VkPhysicalDeviceProperties& getDeviceProperties() { return _device->_physicalDevice->_properties; } + + /** Pointer to the memory properties of the underlying physical device. */ + const VkPhysicalDeviceMemoryProperties& getDeviceMemoryProperties() { return _device->_physicalDevice->_memoryProperties; } + + /** Performance statistics. */ + MVKPerformanceStatistics& getPerformanceStats() { return _device->_performanceStats; } + + /** + * If performance is being tracked, returns a monotonic timestamp value for use performance timestamping. + * The returned value corresponds to the number of CPU "ticks" since the app was initialized. + * + * Call this function twice, then use the functions mvkGetElapsedNanoseconds() or mvkGetElapsedMilliseconds() + * to determine the number of nanoseconds or milliseconds between the two calls. + */ + uint64_t getPerformanceTimestamp() { return _device->_isPerformanceTracking ? mvkGetTimestamp() : 0; } + + /** + * If performance is being tracked, adds the performance for an activity with a duration interval + * between the start and end times, measured in milliseconds, to the given performance statistics. + * + * If endTime is zero or not supplied, the current time is used. + * If addAlways is true, the duration is tracked even if performance tracking is disabled. + */ + void addPerformanceInterval(MVKPerformanceTracker& perfTracker, uint64_t startTime, uint64_t endTime = 0, bool addAlways = false) { + if (_device->_isPerformanceTracking || addAlways) { + _device->updateActivityPerformance(perfTracker, mvkGetElapsedMilliseconds(startTime, endTime)); + } + }; /** Constructs an instance for the specified device. */ MVKDeviceTrackingMixin(MVKDevice* device) : _device(device) { assert(_device); } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm index 6529f7b4..40de0432 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm @@ -1353,7 +1353,7 @@ VkResult MVKPhysicalDevice::getSurfaceSupport(uint32_t queueFamilyIndex, // Check whether this is a headless device bool isHeadless = false; #if MVK_MACOS - isHeadless = getMTLDevice().isHeadless; + isHeadless = _mtlDevice.isHeadless; #endif // If this device is headless, the surface must be headless. @@ -3743,42 +3743,42 @@ void MVKDevice::getDescriptorVariableDescriptorCountLayoutSupport(const VkDescri if (bindIdx == varBindingIdx) { requestedCount = std::max(pBind->descriptorCount, 1u); } else { + auto& mtlFeats = _physicalDevice->_metalFeatures; switch (pBind->descriptorType) { case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: mtlBuffCnt += pBind->descriptorCount; - maxVarDescCount = _pMetalFeatures->maxPerStageBufferCount - mtlBuffCnt; + maxVarDescCount = mtlFeats.maxPerStageBufferCount - mtlBuffCnt; break; case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT: - maxVarDescCount = (uint32_t)min(_pMetalFeatures->maxMTLBufferSize, numeric_limits::max()); + maxVarDescCount = (uint32_t)min(mtlFeats.maxMTLBufferSize, numeric_limits::max()); break; case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: mtlTexCnt += pBind->descriptorCount; - maxVarDescCount = _pMetalFeatures->maxPerStageTextureCount - mtlTexCnt; + maxVarDescCount = mtlFeats.maxPerStageTextureCount - mtlTexCnt; break; case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: mtlTexCnt += pBind->descriptorCount; - - if (getPhysicalDevice()->useNativeTextureAtomics()) + if (_physicalDevice->useNativeTextureAtomics()) { mtlBuffCnt += pBind->descriptorCount; - - maxVarDescCount = min(_pMetalFeatures->maxPerStageTextureCount - mtlTexCnt, - _pMetalFeatures->maxPerStageBufferCount - mtlBuffCnt); + } + maxVarDescCount = min(mtlFeats.maxPerStageTextureCount - mtlTexCnt, + mtlFeats.maxPerStageBufferCount - mtlBuffCnt); break; case VK_DESCRIPTOR_TYPE_SAMPLER: mtlSampCnt += pBind->descriptorCount; - maxVarDescCount = _pMetalFeatures->maxPerStageSamplerCount - mtlSampCnt; + maxVarDescCount = mtlFeats.maxPerStageSamplerCount - mtlSampCnt; break; case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: mtlTexCnt += pBind->descriptorCount; mtlSampCnt += pBind->descriptorCount; - maxVarDescCount = min(_pMetalFeatures->maxPerStageTextureCount - mtlTexCnt, - _pMetalFeatures->maxPerStageSamplerCount - mtlSampCnt); + maxVarDescCount = min(mtlFeats.maxPerStageTextureCount - mtlTexCnt, + mtlFeats.maxPerStageSamplerCount - mtlSampCnt); break; default: break; @@ -3838,7 +3838,7 @@ void MVKDevice::getCalibratedTimestamps(uint32_t timestampCount, uint64_t cpuStart, cpuEnd; cpuStart = mvkGetContinuousNanoseconds(); - [getMTLDevice() sampleTimestamps: &cpuStamp gpuTimestamp: &gpuStamp]; + [_physicalDevice->_mtlDevice sampleTimestamps: &cpuStamp gpuTimestamp: &gpuStamp]; // Sample again to calculate the maximum deviation. Note that the // -[MTLDevice sampleTimestamps:gpuTimestamp:] method guarantees that CPU // timestamps are in nanoseconds. We don't want to call the method again, @@ -3887,8 +3887,9 @@ uint32_t MVKDevice::getVulkanMemoryTypeIndex(MTLStorageMode mtlStorageMode) { break; } - for (uint32_t mtIdx = 0; mtIdx < _pMemoryProperties->memoryTypeCount; mtIdx++) { - if (_pMemoryProperties->memoryTypes[mtIdx].propertyFlags == vkMemFlags) { return mtIdx; } + auto& memProps = _physicalDevice->_memoryProperties; + for (uint32_t mtIdx = 0; mtIdx < memProps.memoryTypeCount; mtIdx++) { + if (memProps.memoryTypes[mtIdx].propertyFlags == vkMemFlags) { return mtIdx; } } MVKAssert(false, "Could not find memory type corresponding to VkMemoryPropertyFlags %d", vkMemFlags); return 0; @@ -4010,7 +4011,7 @@ MVKSemaphore* MVKDevice::createSemaphore(const VkSemaphoreCreateInfo* pCreateInf } if (pTypeCreateInfo && pTypeCreateInfo->semaphoreType == VK_SEMAPHORE_TYPE_TIMELINE) { - if (_pMetalFeatures->events) { + if (_physicalDevice->_metalFeatures.events) { return new MVKTimelineSemaphoreMTLEvent(this, pCreateInfo, pTypeCreateInfo, pExportInfo, pImportInfo); } else { return new MVKTimelineSemaphoreEmulated(this, pCreateInfo, pTypeCreateInfo, pExportInfo, pImportInfo); @@ -4055,7 +4056,7 @@ MVKEvent* MVKDevice::createEvent(const VkEventCreateInfo* pCreateInfo, } } - if (_pMetalFeatures->events) { + if (_physicalDevice->_metalFeatures.events) { return new MVKEventNative(this, pCreateInfo, pExportInfo, pImportInfo); } else { return new MVKEventEmulated(this, pCreateInfo, pExportInfo, pImportInfo); @@ -4432,15 +4433,15 @@ void MVKDevice::updateActivityPerformance(MVKPerformanceTracker& activity, doubl activity.average = total / activity.count; if (_isPerformanceTracking && getMVKConfig().activityPerformanceLoggingStyle == MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_IMMEDIATE) { - logActivityInline(activity, _performanceStatistics); + logActivityInline(activity, _performanceStats); } } void MVKDevice::logActivityInline(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats) { - if (getActivityPerformanceValueType(activity, _performanceStatistics) == MVKActivityPerformanceValueTypeByteCount) { - logActivityByteCount(activity, _performanceStatistics, true); + if (getActivityPerformanceValueType(activity, _performanceStats) == MVKActivityPerformanceValueTypeByteCount) { + logActivityByteCount(activity, _performanceStats, true); } else { - logActivityDuration(activity, _performanceStatistics, true); + logActivityDuration(activity, _performanceStats, true); } } void MVKDevice::logActivityDuration(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats, bool isInline) { @@ -4538,11 +4539,12 @@ MVKActivityPerformanceValueType MVKDevice::getActivityPerformanceValueType(MVKPe } void MVKDevice::getPerformanceStatistics(MVKPerformanceStatistics* pPerf) { - addPerformanceByteCount(_performanceStatistics.device.gpuMemoryAllocated, - _physicalDevice->getCurrentAllocatedSize()); - + if (_isPerformanceTracking) { + updateActivityPerformance(_performanceStats.device.gpuMemoryAllocated, + double(_physicalDevice->getCurrentAllocatedSize() / KIBI)); + } lock_guard lock(_perfLock); - if (pPerf) { *pPerf = _performanceStatistics; } + if (pPerf) { *pPerf = _performanceStats; } } VkResult MVKDevice::invalidateMappedMemoryRanges(uint32_t memRangeCount, const VkMappedMemoryRange* pMemRanges) { @@ -4619,13 +4621,13 @@ uint32_t MVKDevice::getViewCountInMetalPass(uint32_t viewMask, uint32_t passIdx) #pragma mark Metal uint32_t MVKDevice::getMetalBufferIndexForVertexAttributeBinding(uint32_t binding) { - return ((_pMetalFeatures->maxPerStageBufferCount - 1) - binding); + return ((_physicalDevice->_metalFeatures.maxPerStageBufferCount - 1) - binding); } VkDeviceSize MVKDevice::getVkFormatTexelBufferAlignment(VkFormat format, MVKBaseObject* mvkObj) { VkDeviceSize deviceAlignment = 0; - id mtlDev = getMTLDevice(); - MVKPixelFormats* mvkPixFmts = getPixelFormats(); + id mtlDev = _physicalDevice->_mtlDevice; + MVKPixelFormats* mvkPixFmts = &_physicalDevice->_pixelFormats; if ([mtlDev respondsToSelector: @selector(minimumLinearTextureAlignmentForPixelFormat:)]) { MTLPixelFormat mtlPixFmt = mvkPixFmts->getMTLPixelFormat(format); if (mvkPixFmts->getChromaSubsamplingPlaneCount(format) >= 2) { @@ -4635,7 +4637,7 @@ VkDeviceSize MVKDevice::getVkFormatTexelBufferAlignment(VkFormat format, MVKBase } deviceAlignment = [mtlDev minimumLinearTextureAlignmentForPixelFormat: mtlPixFmt]; } - return deviceAlignment ? deviceAlignment : _pProperties->limits.minTexelBufferOffsetAlignment; + return deviceAlignment ? deviceAlignment : _physicalDevice->_properties.limits.minTexelBufferOffsetAlignment; } id MVKDevice::getGlobalVisibilityResultMTLBuffer() { @@ -4649,7 +4651,7 @@ uint32_t MVKDevice::expandVisibilityResultMTLBuffer(uint32_t queryCount) { // Ensure we don't overflow the maximum number of queries _globalVisibilityQueryCount += queryCount; VkDeviceSize reqBuffLen = (VkDeviceSize)_globalVisibilityQueryCount * kMVKQuerySlotSizeInBytes; - VkDeviceSize maxBuffLen = _pMetalFeatures->maxQueryBufferSize; + VkDeviceSize maxBuffLen = _physicalDevice->_metalFeatures.maxQueryBufferSize; VkDeviceSize newBuffLen = min(reqBuffLen, maxBuffLen); _globalVisibilityQueryCount = uint32_t(newBuffLen / kMVKQuerySlotSizeInBytes); @@ -4657,10 +4659,10 @@ uint32_t MVKDevice::expandVisibilityResultMTLBuffer(uint32_t queryCount) { reportError(VK_ERROR_OUT_OF_DEVICE_MEMORY, "vkCreateQueryPool(): A maximum of %d total queries are available on this device in its current configuration. See the API notes for the MVKConfiguration.supportLargeQueryPools configuration parameter for more info.", _globalVisibilityQueryCount); } - NSUInteger mtlBuffLen = mvkAlignByteCount(newBuffLen, _pMetalFeatures->mtlBufferAlignment); + NSUInteger mtlBuffLen = mvkAlignByteCount(newBuffLen, _physicalDevice->_metalFeatures.mtlBufferAlignment); MTLResourceOptions mtlBuffOpts = MTLResourceStorageModeShared | MTLResourceCPUCacheModeDefaultCache; [_globalVisibilityResultMTLBuffer release]; - _globalVisibilityResultMTLBuffer = [getMTLDevice() newBufferWithLength: mtlBuffLen options: mtlBuffOpts]; // retained + _globalVisibilityResultMTLBuffer = [_physicalDevice->_mtlDevice newBufferWithLength: mtlBuffLen options: mtlBuffOpts]; // retained return _globalVisibilityQueryCount - queryCount; // Might be lower than requested if an overflow occurred } @@ -4674,7 +4676,7 @@ id MVKDevice::getDefaultMTLSamplerState() { @autoreleasepool { MTLSamplerDescriptor* mtlSampDesc = [[MTLSamplerDescriptor new] autorelease]; mtlSampDesc.supportArgumentBuffers = isUsingMetalArgumentBuffers(); - _defaultMTLSamplerState = [getMTLDevice() newSamplerStateWithDescriptor: mtlSampDesc]; // retained + _defaultMTLSamplerState = [_physicalDevice->_mtlDevice newSamplerStateWithDescriptor: mtlSampDesc]; // retained } } } @@ -4688,7 +4690,7 @@ id MVKDevice::getDummyBlitMTLBuffer() { lock_guard lock(_rezLock); if ( !_dummyBlitMTLBuffer ) { @autoreleasepool { - _dummyBlitMTLBuffer = [getMTLDevice() newBufferWithLength: 1 options: MTLResourceStorageModePrivate]; + _dummyBlitMTLBuffer = [_physicalDevice->_mtlDevice newBufferWithLength: 1 options: MTLResourceStorageModePrivate]; } } } @@ -4697,7 +4699,7 @@ id MVKDevice::getDummyBlitMTLBuffer() { MTLCompileOptions* MVKDevice::getMTLCompileOptions(bool requestFastMath, bool preserveInvariance) { MTLCompileOptions* mtlCompOpt = [MTLCompileOptions new]; - mtlCompOpt.languageVersion = _pMetalFeatures->mslVersionEnum; + mtlCompOpt.languageVersion = _physicalDevice->_metalFeatures.mslVersionEnum; mtlCompOpt.fastMathEnabled = (getMVKConfig().fastMathEnabled == MVK_CONFIG_FAST_MATH_ALWAYS || (getMVKConfig().fastMathEnabled == MVK_CONFIG_FAST_MATH_ON_DEMAND && requestFastMath)); #if MVK_XCODE_12 @@ -4781,7 +4783,7 @@ void MVKDevice::getMetalObjects(VkExportMetalObjectsInfoEXT* pMetalObjectsInfo) switch (next->sType) { case VK_STRUCTURE_TYPE_EXPORT_METAL_DEVICE_INFO_EXT: { auto* pDvcInfo = (VkExportMetalDeviceInfoEXT*)next; - pDvcInfo->mtlDevice = getMTLDevice(); + pDvcInfo->mtlDevice = _physicalDevice->_mtlDevice; break; } case VK_STRUCTURE_TYPE_EXPORT_METAL_COMMAND_QUEUE_INFO_EXT: { @@ -4859,8 +4861,9 @@ MVKDevice::MVKDevice(MVKPhysicalDevice* physicalDevice, const VkDeviceCreateInfo // In a multi-GPU system, if we are using the high-power GPU and want the window system // to also use that GPU to avoid copying content between GPUs, force the window system // to use the high-power GPU by calling the MTLCreateSystemDefaultDevice() function. + id mtlDev = _physicalDevice->_mtlDevice; if (_enabledExtensions.vk_KHR_swapchain.enabled && getMVKConfig().switchSystemGPU && - !(_physicalDevice->_mtlDevice.isLowPower || _physicalDevice->_mtlDevice.isHeadless) ) { + !(mtlDev.isLowPower || mtlDev.isHeadless) ) { MTLCreateSystemDefaultDevice(); } #endif @@ -4875,18 +4878,18 @@ MVKDevice::MVKDevice(MVKPhysicalDevice* physicalDevice, const VkDeviceCreateInfo _commandResourceFactory = new MVKCommandResourceFactory(this); - startAutoGPUCapture(MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_DEVICE, getMTLDevice()); + startAutoGPUCapture(MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_DEVICE, _physicalDevice->_mtlDevice); MVKLogInfo("Created VkDevice to run on GPU %s with the following %d Vulkan extensions enabled:%s", getName(), _enabledExtensions.getEnabledCount(), _enabledExtensions.enabledNamesString("\n\t\t", true).c_str()); } // Perf stats that last the duration of the app process. -static MVKPerformanceStatistics _processPerformanceStatistics = {}; +static MVKPerformanceStatistics _processPerformanceStats = {}; void MVKDevice::initPerformanceTracking() { _isPerformanceTracking = getMVKConfig().performanceTracking; - _performanceStatistics = _processPerformanceStatistics; + _performanceStats = _processPerformanceStats; } void MVKDevice::initPhysicalDevice(MVKPhysicalDevice* physicalDevice, const VkDeviceCreateInfo* pCreateInfo) { @@ -4910,10 +4913,6 @@ void MVKDevice::initPhysicalDevice(MVKPhysicalDevice* physicalDevice, const VkDe else _physicalDevice = physicalDevice; - _pMetalFeatures = _physicalDevice->getMetalFeatures(); - _pProperties = &_physicalDevice->_properties; - _pMemoryProperties = &_physicalDevice->_memoryProperties; - switch (_physicalDevice->_vkSemaphoreStyle) { case MVKSemaphoreStyleUseMTLEvent: MVKLogInfo("Vulkan semaphores using MTLEvent."); @@ -5191,7 +5190,7 @@ MVKDevice::~MVKDevice() { } else if (perfLogStyle == MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_DEVICE_LIFETIME_ACCUMULATE) { MVKLogInfo("Process activity performance summary:"); logPerformanceSummary(); - _processPerformanceStatistics = _performanceStatistics; + _processPerformanceStats = _performanceStats; } } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.mm index 447026d2..1ec51848 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.mm @@ -139,7 +139,7 @@ VkResult MVKDeviceMemory::addBuffer(MVKBuffer* mvkBuff) { } if (!ensureMTLBuffer() ) { - return reportError(VK_ERROR_OUT_OF_DEVICE_MEMORY, "Could not bind a VkBuffer to a VkDeviceMemory of size %llu bytes. The maximum memory-aligned size of a VkDeviceMemory that supports a VkBuffer is %llu bytes.", _allocationSize, _device->_pMetalFeatures->maxMTLBufferSize); + return reportError(VK_ERROR_OUT_OF_DEVICE_MEMORY, "Could not bind a VkBuffer to a VkDeviceMemory of size %llu bytes. The maximum memory-aligned size of a VkDeviceMemory that supports a VkBuffer is %llu bytes.", _allocationSize, getMetalFeatures().maxMTLBufferSize); } // In the dedicated case, we already saved the buffer we're going to use. @@ -183,7 +183,7 @@ bool MVKDeviceMemory::ensureMTLHeap() { if (_isHostMemImported) { return true; } // Don't bother if we don't have placement heaps. - if (!getDevice()->_pMetalFeatures->placementHeaps) { return true; } + if (!getMetalFeatures().placementHeaps) { return true; } // Can't create MTLHeaps of zero size. if (_allocationSize == 0) { return true; } @@ -206,7 +206,7 @@ bool MVKDeviceMemory::ensureMTLHeap() { // to untracked, since Vulkan uses explicit barriers anyway. heapDesc.hazardTrackingMode = MTLHazardTrackingModeTracked; heapDesc.size = _allocationSize; - _mtlHeap = [_device->getMTLDevice() newHeapWithDescriptor: heapDesc]; // retained + _mtlHeap = [getMTLDevice() newHeapWithDescriptor: heapDesc]; // retained [heapDesc release]; if (!_mtlHeap) { return false; } @@ -221,9 +221,9 @@ bool MVKDeviceMemory::ensureMTLBuffer() { if (_mtlBuffer) { return true; } - NSUInteger memLen = mvkAlignByteCount(_allocationSize, _device->_pMetalFeatures->mtlBufferAlignment); + NSUInteger memLen = mvkAlignByteCount(_allocationSize, getMetalFeatures().mtlBufferAlignment); - if (memLen > _device->_pMetalFeatures->maxMTLBufferSize) { return false; } + if (memLen > getMetalFeatures().maxMTLBufferSize) { return false; } // If host memory was already allocated, it is copied into the new MTLBuffer, and then released. if (_mtlHeap) { @@ -258,7 +258,7 @@ bool MVKDeviceMemory::ensureHostMemory() { if (_pMemory) { return true; } if ( !_pHostMemory) { - size_t memAlign = _device->_pMetalFeatures->mtlBufferAlignment; + size_t memAlign = getMetalFeatures().mtlBufferAlignment; NSUInteger memLen = mvkAlignByteCount(_allocationSize, memAlign); int err = posix_memalign(&_pHostMemory, memAlign, memLen); if (err) { return false; } @@ -284,7 +284,7 @@ MVKDeviceMemory::MVKDeviceMemory(MVKDevice* device, const VkAllocationCallbacks* pAllocator) : MVKVulkanAPIDeviceObject(device) { // Set Metal memory parameters _vkMemAllocFlags = 0; - _vkMemPropFlags = _device->_pMemoryProperties->memoryTypes[pAllocateInfo->memoryTypeIndex].propertyFlags; + _vkMemPropFlags = getDeviceMemoryProperties().memoryTypes[pAllocateInfo->memoryTypeIndex].propertyFlags; _mtlStorageMode = getPhysicalDevice()->getMTLStorageModeFromVkMemoryPropertyFlags(_vkMemPropFlags); _mtlCPUCacheMode = mvkMTLCPUCacheModeFromVkMemoryPropertyFlags(_vkMemPropFlags); @@ -359,13 +359,13 @@ MVKDeviceMemory::MVKDeviceMemory(MVKDevice* device, if (!((MVKImage*)dedicatedImage)->_isLinear) { setConfigurationResult(reportError(VK_ERROR_OUT_OF_DEVICE_MEMORY, "vkAllocateMemory(): Host-coherent VkDeviceMemory objects cannot be associated with optimal-tiling images.")); } else { - if (!_device->_pMetalFeatures->sharedLinearTextures) { + if (!getMetalFeatures().sharedLinearTextures) { // Need to use the managed mode for images. _mtlStorageMode = MTLStorageModeManaged; } // Nonetheless, we need a buffer to be able to map the memory at will. if (!ensureMTLBuffer() ) { - setConfigurationResult(reportError(VK_ERROR_OUT_OF_DEVICE_MEMORY, "vkAllocateMemory(): Could not allocate a host-coherent VkDeviceMemory of size %llu bytes. The maximum memory-aligned size of a host-coherent VkDeviceMemory is %llu bytes.", _allocationSize, _device->_pMetalFeatures->maxMTLBufferSize)); + setConfigurationResult(reportError(VK_ERROR_OUT_OF_DEVICE_MEMORY, "vkAllocateMemory(): Could not allocate a host-coherent VkDeviceMemory of size %llu bytes. The maximum memory-aligned size of a host-coherent VkDeviceMemory is %llu bytes.", _allocationSize, getMetalFeatures().maxMTLBufferSize)); } } } @@ -392,7 +392,7 @@ MVKDeviceMemory::MVKDeviceMemory(MVKDevice* device, // If memory was imported, a MTLBuffer must be created on it. // Or if a MTLBuffer will be exported, ensure it exists. if ((isMemoryHostCoherent() || _isHostMemImported || willExportMTLBuffer) && !ensureMTLBuffer() ) { - setConfigurationResult(reportError(VK_ERROR_OUT_OF_DEVICE_MEMORY, "vkAllocateMemory(): Could not allocate a host-coherent or exportable VkDeviceMemory of size %llu bytes. The maximum memory-aligned size of a host-coherent VkDeviceMemory is %llu bytes.", _allocationSize, _device->_pMetalFeatures->maxMTLBufferSize)); + setConfigurationResult(reportError(VK_ERROR_OUT_OF_DEVICE_MEMORY, "vkAllocateMemory(): Could not allocate a host-coherent or exportable VkDeviceMemory of size %llu bytes. The maximum memory-aligned size of a host-coherent VkDeviceMemory is %llu bytes.", _allocationSize, getMetalFeatures().maxMTLBufferSize)); } } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKFramebuffer.mm b/MoltenVK/MoltenVK/GPUObjects/MVKFramebuffer.mm index f1ece6ae..688c01bb 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKFramebuffer.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKFramebuffer.mm @@ -37,7 +37,7 @@ id MVKFramebuffer::getDummyAttachmentMTLTexture(MVKRenderSubpass* su MTLTextureDescriptor* mtlTexDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat: MTLPixelFormatR8Unorm width: fbExtent.width height: fbExtent.height mipmapped: NO]; if (subpass->isMultiview()) { #if MVK_MACOS_OR_IOS - if (sampleCount > 1 && getDevice()->_pMetalFeatures->multisampleLayeredRendering) { + if (sampleCount > 1 && getMetalFeatures().multisampleLayeredRendering) { mtlTexDesc.textureType = MTLTextureType2DMultisampleArray; mtlTexDesc.sampleCount = sampleCount; } else { @@ -49,7 +49,7 @@ id MVKFramebuffer::getDummyAttachmentMTLTexture(MVKRenderSubpass* su mtlTexDesc.arrayLength = subpass->getViewCountInMetalPass(passIdx); } else if (fbLayerCount > 1) { #if MVK_MACOS - if (sampleCount > 1 && getDevice()->_pMetalFeatures->multisampleLayeredRendering) { + if (sampleCount > 1 && getMetalFeatures().multisampleLayeredRendering) { mtlTexDesc.textureType = MTLTextureType2DMultisampleArray; mtlTexDesc.sampleCount = sampleCount; } else { diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm index 2a5f22bf..ce8276f7 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm @@ -146,7 +146,7 @@ void MVKImagePlane::initSubresources(const VkImageCreateInfo* pCreateInfo) { VkDeviceSize offset = 0; if (_planeIndex > 0 && _image->getMemoryBindingCount() == 1) { - if (!_image->_isLinear && !_image->_isLinearForAtomics && _image->getDevice()->_pMetalFeatures->placementHeaps) { + if (!_image->_isLinear && !_image->_isLinearForAtomics && _image->getMetalFeatures().placementHeaps) { // For textures allocated directly on the heap, we need to obey the size and alignment // requirements reported by the device. MTLTextureDescriptor* mtlTexDesc = _image->_planes[_planeIndex-1]->newMTLTextureDescriptor(); // temp retain @@ -387,16 +387,17 @@ VkResult MVKImageMemoryBinding::getMemoryRequirements(VkMemoryRequirements* pMem } VkResult MVKImageMemoryBinding::getMemoryRequirements(const void*, VkMemoryRequirements2* pMemoryRequirements) { + auto& mtlFeats = getMetalFeatures(); pMemoryRequirements->sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2; for (auto* next = (VkBaseOutStructure*)pMemoryRequirements->pNext; next; next = next->pNext) { switch (next->sType) { case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: { auto* dedicatedReqs = (VkMemoryDedicatedRequirements*)next; bool writable = mvkIsAnyFlagEnabled(_image->getCombinedUsage(), VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT); - bool canUseTexelBuffer = _device->_pMetalFeatures->texelBuffers && _image->_isLinear && !_image->getIsCompressed(); + bool canUseTexelBuffer = mtlFeats.texelBuffers && _image->_isLinear && !_image->getIsCompressed(); dedicatedReqs->requiresDedicatedAllocation = _requiresDedicatedMemoryAllocation; dedicatedReqs->prefersDedicatedAllocation = (dedicatedReqs->requiresDedicatedAllocation || - (!canUseTexelBuffer && (writable || !_device->_pMetalFeatures->placementHeaps))); + (!canUseTexelBuffer && (writable || !mtlFeats.placementHeaps))); break; } default: @@ -411,13 +412,14 @@ VkResult MVKImageMemoryBinding::bindDeviceMemory(MVKDeviceMemory* mvkMem, VkDevi if (_deviceMemory) { _deviceMemory->removeImageMemoryBinding(this); } MVKResource::bindDeviceMemory(mvkMem, memOffset); - bool usesTexelBuffer = _device->_pMetalFeatures->texelBuffers && _deviceMemory; // Texel buffers available - usesTexelBuffer = usesTexelBuffer && (isMemoryHostAccessible() || _device->_pMetalFeatures->placementHeaps) && _image->_isLinear && !_image->getIsCompressed(); // Applicable memory layout + auto& mtlFeats = getMetalFeatures(); + bool usesTexelBuffer = mtlFeats.texelBuffers && _deviceMemory; // Texel buffers available + usesTexelBuffer = usesTexelBuffer && (isMemoryHostAccessible() || mtlFeats.placementHeaps) && _image->_isLinear && !_image->getIsCompressed(); // Applicable memory layout // macOS before 10.15.5 cannot use shared memory for texel buffers. - usesTexelBuffer = usesTexelBuffer && (_device->_pMetalFeatures->sharedLinearTextures || !isMemoryHostCoherent()); + usesTexelBuffer = usesTexelBuffer && (mtlFeats.sharedLinearTextures || !isMemoryHostCoherent()); - if (_image->_isLinearForAtomics || (usesTexelBuffer && _device->_pMetalFeatures->placementHeaps)) { + if (_image->_isLinearForAtomics || (usesTexelBuffer && mtlFeats.placementHeaps)) { if (usesTexelBuffer && _deviceMemory->ensureMTLBuffer()) { _mtlTexelBuffer = _deviceMemory->_mtlBuffer; _mtlTexelBufferOffset = getDeviceMemoryOffset(); @@ -471,7 +473,7 @@ bool MVKImageMemoryBinding::needsHostReadSync(MVKPipelineBarrier& barrier) { #if MVK_MACOS return ( !isUnifiedMemoryGPU() && (barrier.newLayout == VK_IMAGE_LAYOUT_GENERAL) && mvkIsAnyFlagEnabled(barrier.dstAccessMask, (VK_ACCESS_HOST_READ_BIT | VK_ACCESS_MEMORY_READ_BIT)) && - isMemoryHostAccessible() && (!_device->_pMetalFeatures->sharedLinearTextures || !isMemoryHostCoherent())); + isMemoryHostAccessible() && (!getMetalFeatures().sharedLinearTextures || !isMemoryHostCoherent())); #else return false; #endif @@ -953,7 +955,7 @@ VkResult MVKImage::setMTLTexture(uint8_t planeIndex, id mtlTexture) _usage = getPixelFormats()->getVkImageUsageFlags(mtlTexture.usage, mtlTexture.pixelFormat); _stencilUsage = _usage; - if (_device->_pMetalFeatures->ioSurfaces) { + if (getMetalFeatures().ioSurfaces) { _ioSurface = mtlTexture.iosurface; if (_ioSurface) { CFRetain(_ioSurface); } } @@ -976,7 +978,7 @@ VkResult MVKImage::useIOSurface(IOSurfaceRef ioSurface) { // Don't recreate existing. But special case of incoming nil if already nil means create a new IOSurface. if (ioSurface && _ioSurface == ioSurface) { return VK_SUCCESS; } - if (!_device->_pMetalFeatures->ioSurfaces) { return reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkUseIOSurfaceMVK() : IOSurfaces are not supported on this platform."); } + if (!getMetalFeatures().ioSurfaces) { return reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkUseIOSurfaceMVK() : IOSurfaces are not supported on this platform."); } #if MVK_SUPPORT_IOSURFACE_BOOL @@ -1053,7 +1055,7 @@ MTLStorageMode MVKImage::getMTLStorageMode() { #if MVK_MACOS // For macOS prior to 10.15.5, textures cannot use Shared storage mode, so change to Managed storage mode. // All Apple GPUs support shared linear textures, so this only applies to other GPUs. - if (stgMode == MTLStorageModeShared && !_device->_pMetalFeatures->sharedLinearTextures) { + if (stgMode == MTLStorageModeShared && !getMetalFeatures().sharedLinearTextures) { stgMode = MTLStorageModeManaged; } #endif @@ -1144,6 +1146,7 @@ MVKImage::MVKImage(MVKDevice* device, const VkImageCreateInfo* pCreateInfo) : MV _mipLevels = validateMipLevels(pCreateInfo, isAttachment); _isLinear = validateLinear(pCreateInfo, isAttachment); + auto& mtlFeats = getMetalFeatures(); MVKPixelFormats* pixFmts = getPixelFormats(); _vkFormat = pCreateInfo->format; _isAliasable = mvkIsAnyFlagEnabled(pCreateInfo->flags, VK_IMAGE_CREATE_ALIAS_BIT); @@ -1159,10 +1162,10 @@ MVKImage::MVKImage(MVKDevice* device, const VkImageCreateInfo* pCreateInfo) : MV _isLinearForAtomics = _shouldSupportAtomics && !getPhysicalDevice()->useNativeTextureAtomics() && _arrayLayers == 1 && getImageType() == VK_IMAGE_TYPE_2D; - _is3DCompressed = (getImageType() == VK_IMAGE_TYPE_3D) && (pixFmts->getFormatType(pCreateInfo->format) == kMVKFormatCompressed) && !_device->_pMetalFeatures->native3DCompressedTextures; + _is3DCompressed = (getImageType() == VK_IMAGE_TYPE_3D) && (pixFmts->getFormatType(pCreateInfo->format) == kMVKFormatCompressed) && !mtlFeats.native3DCompressedTextures; _isDepthStencilAttachment = (mvkAreAllFlagsEnabled(pCreateInfo->usage, VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) || mvkAreAllFlagsEnabled(pixFmts->getVkFormatProperties3(pCreateInfo->format).optimalTilingFeatures, VK_FORMAT_FEATURE_2_DEPTH_STENCIL_ATTACHMENT_BIT)); - _canSupportMTLTextureView = !_isDepthStencilAttachment || _device->_pMetalFeatures->stencilViews; + _canSupportMTLTextureView = !_isDepthStencilAttachment || mtlFeats.stencilViews; _rowByteAlignment = _isLinear || _isLinearForAtomics ? _device->getVkFormatTexelBufferAlignment(pCreateInfo->format, this) : mvkEnsurePowerOfTwo(pixFmts->getBytesPerBlock(pCreateInfo->format)); VkExtent2D blockTexelSizeOfPlane[3]; @@ -1188,21 +1191,21 @@ MVKImage::MVKImage(MVKDevice* device, const VkImageCreateInfo* pCreateInfo) : MV } _planes[planeIndex]->initSubresources(pCreateInfo); MVKImageMemoryBinding* memoryBinding = _planes[planeIndex]->getMemoryBinding(); - if (!_isLinear && !_isLinearForAtomics && _device->_pMetalFeatures->placementHeaps) { + if (!_isLinear && !_isLinearForAtomics && mtlFeats.placementHeaps) { MTLTextureDescriptor* mtlTexDesc = _planes[planeIndex]->newMTLTextureDescriptor(); // temp retain - MTLSizeAndAlign sizeAndAlign = [_device->getMTLDevice() heapTextureSizeAndAlignWithDescriptor: mtlTexDesc]; + MTLSizeAndAlign sizeAndAlign = [getMTLDevice() heapTextureSizeAndAlignWithDescriptor: mtlTexDesc]; [mtlTexDesc release]; // Textures allocated on heaps must be aligned to the alignment reported here, // so make sure there's enough space to hold all the planes after alignment. memoryBinding->_byteCount = mvkAlignByteRef(memoryBinding->_byteCount, sizeAndAlign.align) + sizeAndAlign.size; memoryBinding->_byteAlignment = std::max(memoryBinding->_byteAlignment, (VkDeviceSize)sizeAndAlign.align); - } else if (_isLinearForAtomics && _device->_pMetalFeatures->placementHeaps) { + } else if (_isLinearForAtomics && mtlFeats.placementHeaps) { NSUInteger bufferLength = 0; for (uint32_t mipLvl = 0; mipLvl < _mipLevels; mipLvl++) { VkExtent3D mipExtent = getExtent3D(planeIndex, mipLvl); bufferLength += getBytesPerLayer(planeIndex, mipLvl) * mipExtent.depth * _arrayLayers; } - MTLSizeAndAlign sizeAndAlign = [_device->getMTLDevice() heapBufferSizeAndAlignWithLength: bufferLength options: MTLResourceStorageModePrivate]; + MTLSizeAndAlign sizeAndAlign = [getMTLDevice() heapBufferSizeAndAlignWithLength: bufferLength options: MTLResourceStorageModePrivate]; memoryBinding->_byteCount += sizeAndAlign.size; memoryBinding->_byteAlignment = std::max(std::max(memoryBinding->_byteAlignment, _rowByteAlignment), (VkDeviceSize)sizeAndAlign.align); } else { @@ -1269,7 +1272,7 @@ VkSampleCountFlagBits MVKImage::validateSamples(const VkImageCreateInfo* pCreate validSamples = VK_SAMPLE_COUNT_1_BIT; } - if (pCreateInfo->arrayLayers > 1 && !_device->_pMetalFeatures->multisampleArrayTextures ) { + if (pCreateInfo->arrayLayers > 1 && !getMetalFeatures().multisampleArrayTextures ) { setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCreateImage() : This device does not support multisampled array textures. Setting sample count to 1.")); validSamples = VK_SAMPLE_COUNT_1_BIT; } @@ -1513,9 +1516,9 @@ id MVKPresentableSwapchainImage::getCAMetalDrawable() { bool hasInvalidFormat = false; uint32_t attemptCnt = _swapchain->getImageCount(); // Attempt a resonable number of times for (uint32_t attemptIdx = 0; !_mtlDrawable && attemptIdx < attemptCnt; attemptIdx++) { - uint64_t startTime = _device->getPerformanceTimestamp(); + uint64_t startTime = getPerformanceTimestamp(); _mtlDrawable = [_swapchain->getCAMetalLayer().nextDrawable retain]; // retained - _device->addPerformanceInterval(_device->_performanceStatistics.queue.retrieveCAMetalDrawable, startTime); + addPerformanceInterval(getPerformanceStats().queue.retrieveCAMetalDrawable, startTime); hasInvalidFormat = _mtlDrawable && !_mtlDrawable.texture.pixelFormat; if (hasInvalidFormat) { releaseMetalDrawable(); } } @@ -1629,7 +1632,7 @@ void MVKPresentableSwapchainImage::addPresentedHandler(id mtlDr void MVKPresentableSwapchainImage::beginPresentation(const MVKImagePresentInfo& presentInfo) { retain(); _swapchain->beginPresentation(presentInfo); - _presentationStartTime = getDevice()->getPerformanceTimestamp(); + _presentationStartTime = getPerformanceTimestamp(); } void MVKPresentableSwapchainImage::endPresentation(const MVKImagePresentInfo& presentInfo, @@ -1646,7 +1649,7 @@ void MVKPresentableSwapchainImage::endPresentation(const MVKImagePresentInfo& pr // If I have become detached from the swapchain, it means the swapchain, and possibly the // VkDevice, have been destroyed by the time of this callback, so do not reference them. lock_guard lock(_detachmentLock); - if (_device) { _device->addPerformanceInterval(_device->_performanceStatistics.queue.presentSwapchains, _presentationStartTime); } + if (_device) { addPerformanceInterval(getPerformanceStats().queue.presentSwapchains, _presentationStartTime); } if (_swapchain) { _swapchain->endPresentation(presentInfo, actualPresentTime); } } @@ -2124,7 +2127,7 @@ VkResult MVKImageViewPlane::initSwizzledMTLPixelFormat(const VkImageViewCreateIn // Enable either native or shader swizzling, depending on what is available, preferring native, and return whether successful. bool MVKImageViewPlane::enableSwizzling() { - _useNativeSwizzle = _device->_pMetalFeatures->nativeTextureSwizzle; + _useNativeSwizzle = getMetalFeatures().nativeTextureSwizzle; _useShaderSwizzle = !_useNativeSwizzle && getMVKConfig().fullImageViewSwizzle; return _useNativeSwizzle || _useShaderSwizzle; } @@ -2261,15 +2264,16 @@ MVKImageView::MVKImageView(MVKDevice* device, const VkImageViewCreateInfo* pCrea _subresourceRange.layerCount = _image->getLayerCount() - _subresourceRange.baseArrayLayer; } + auto& mtlFeats = getMetalFeatures(); bool isAttachment = mvkIsAnyFlagEnabled(_usage, (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)); if (isAttachment && _subresourceRange.layerCount > 1) { - if ( !_device->_pMetalFeatures->layeredRendering ) { + if ( !mtlFeats.layeredRendering ) { setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCreateImageView() : This device does not support rendering to array (layered) attachments.")); } - if (_image->getSampleCount() != VK_SAMPLE_COUNT_1_BIT && !_device->_pMetalFeatures->multisampleLayeredRendering ) { + if (_image->getSampleCount() != VK_SAMPLE_COUNT_1_BIT && !mtlFeats.multisampleLayeredRendering ) { setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCreateImageView() : This device does not support rendering to multisampled array (layered) attachments.")); } } @@ -2414,8 +2418,9 @@ bool MVKSampler::getConstexprSampler(mvk::MSLResourceBinding& resourceBinding) { // Ensure available Metal features. MTLSamplerAddressMode MVKSampler::getMTLSamplerAddressMode(VkSamplerAddressMode vkMode) { - if ((vkMode == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER && !_device->_pMetalFeatures->samplerClampToBorder) || - (vkMode == VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE && !_device->_pMetalFeatures->samplerMirrorClampToEdge)) { + auto& mtlFeats = getMetalFeatures(); + if ((vkMode == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER && !mtlFeats.samplerClampToBorder) || + (vkMode == VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE && !mtlFeats.samplerMirrorClampToEdge)) { return MTLSamplerAddressModeClampToZero; } return mvkMTLSamplerAddressModeFromVkSamplerAddressMode(vkMode); @@ -2448,7 +2453,7 @@ MTLSamplerDescriptor* MVKSampler::newMTLSamplerDescriptor(const VkSamplerCreateI mtlSampDesc.lodMinClamp = pCreateInfo->minLod; mtlSampDesc.lodMaxClamp = pCreateInfo->maxLod; mtlSampDesc.maxAnisotropy = (pCreateInfo->anisotropyEnable - ? mvkClamp(pCreateInfo->maxAnisotropy, 1.0f, _device->_pProperties->limits.maxSamplerAnisotropy) + ? mvkClamp(pCreateInfo->maxAnisotropy, 1.0f, getDeviceProperties().limits.maxSamplerAnisotropy) : 1); mtlSampDesc.normalizedCoordinates = !pCreateInfo->unnormalizedCoordinates; mtlSampDesc.supportArgumentBuffers = isUsingMetalArgumentBuffers(); @@ -2483,7 +2488,7 @@ MVKSampler::MVKSampler(MVKDevice* device, const VkSamplerCreateInfo* pCreateInfo } } - _requiresConstExprSampler = (pCreateInfo->compareEnable && !_device->_pMetalFeatures->depthSampleCompare) || _ycbcrConversion; + _requiresConstExprSampler = (pCreateInfo->compareEnable && !getMetalFeatures().depthSampleCompare) || _ycbcrConversion; @autoreleasepool { auto mtlDev = getMTLDevice(); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.h b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.h index 6492f386..b8295d2a 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.h @@ -159,9 +159,6 @@ public: /** Returns whether debug callbacks are being used. */ bool hasDebugCallbacks() { return _hasDebugReportCallbacks || _hasDebugUtilsMessengers; } - /** The list of Vulkan extensions, indicating whether each has been enabled by the app. */ - const MVKExtensionList _enabledExtensions; - #pragma mark Object Creation @@ -198,6 +195,7 @@ protected: void logVersions(); VkResult verifyLayers(uint32_t count, const char* const* names); + MVKExtensionList _enabledExtensions; MVKConfiguration _mvkConfig; VkApplicationInfo _appInfo; MVKSmallVector _physicalDevices; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h index 11ce6165..44fe65d1 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h @@ -164,7 +164,7 @@ public: /** Returns whether the pipeline creation fail if a pipeline compile is required. */ bool shouldFailOnPipelineCompileRequired() { - return (_device->_enabledPipelineCreationCacheControlFeatures.pipelineCreationCacheControl && + return (getEnabledPipelineCreationCacheControlFeatures().pipelineCreationCacheControl && mvkIsAnyFlagEnabled(_flags, VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT)); } @@ -590,7 +590,7 @@ public: MVKRenderPipelineCompiler(MVKVulkanAPIDeviceObject* owner) : MVKMetalCompiler(owner) { _compilerType = "Render pipeline"; - _pPerformanceTracker = &_owner->getDevice()->_performanceStatistics.shaderCompilation.pipelineCompile; + _pPerformanceTracker = &getPerformanceStats().shaderCompilation.pipelineCompile; } ~MVKRenderPipelineCompiler() override; @@ -635,7 +635,7 @@ public: MVKComputePipelineCompiler(MVKVulkanAPIDeviceObject* owner, const char* compilerType = nullptr) : MVKMetalCompiler(owner) { _compilerType = compilerType ? compilerType : "Compute pipeline"; - _pPerformanceTracker = &_owner->getDevice()->_performanceStatistics.shaderCompilation.pipelineCompile; + _pPerformanceTracker = &getPerformanceStats().shaderCompilation.pipelineCompile; } ~MVKComputePipelineCompiler() override; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm index 02170359..4b77291c 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm @@ -602,7 +602,7 @@ void MVKGraphicsPipeline::initDynamicState(const VkGraphicsPipelineCreateInfo* p // Some dynamic states have other restrictions switch (dynStateType) { case VertexStride: - isDynamic = _device->_pMetalFeatures->dynamicVertexStride; + isDynamic = getMetalFeatures().dynamicVertexStride; if ( !isDynamic ) { setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "This device and platform does not support VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE (macOS 14.0 or iOS/tvOS 17.0, plus either Apple4 or Mac2 GPU).")); } break; default: @@ -1216,7 +1216,7 @@ bool MVKGraphicsPipeline::addTessCtlShaderToPipeline(MTLComputePipelineDescripto shaderConfig.options.mslOptions.dynamic_offsets_buffer_index = _dynamicOffsetBufferIndex.stages[kMVKShaderStageTessCtl]; shaderConfig.options.mslOptions.capture_output_to_buffer = true; shaderConfig.options.mslOptions.multi_patch_workgroup = true; - shaderConfig.options.mslOptions.fixed_subgroup_size = mvkIsAnyFlagEnabled(pTessCtlSS->flags, VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT) ? 0 : _device->_pMetalFeatures->maxSubgroupSize; + shaderConfig.options.mslOptions.fixed_subgroup_size = mvkIsAnyFlagEnabled(pTessCtlSS->flags, VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT) ? 0 : getMetalFeatures().maxSubgroupSize; addPrevStageOutputToShaderConversionConfig(shaderConfig, vtxOutputs); addNextStageInputToShaderConversionConfig(shaderConfig, teInputs); @@ -1318,6 +1318,7 @@ bool MVKGraphicsPipeline::addFragmentShaderToPipeline(MTLRenderPipelineDescripto SPIRVShaderOutputs& shaderOutputs, const VkPipelineShaderStageCreateInfo* pFragmentSS, VkPipelineCreationFeedback* pFragmentFB) { + auto& mtlFeats = getMetalFeatures(); if (pFragmentSS) { shaderConfig.options.entryPointStage = spv::ExecutionModelFragment; shaderConfig.options.mslOptions.swizzle_buffer_index = _swizzleBufferIndex.stages[kMVKShaderStageFragment]; @@ -1326,9 +1327,9 @@ bool MVKGraphicsPipeline::addFragmentShaderToPipeline(MTLRenderPipelineDescripto shaderConfig.options.mslOptions.view_mask_buffer_index = _viewRangeBufferIndex.stages[kMVKShaderStageFragment]; shaderConfig.options.entryPointName = pFragmentSS->pName; shaderConfig.options.mslOptions.capture_output_to_buffer = false; - shaderConfig.options.mslOptions.fixed_subgroup_size = mvkIsAnyFlagEnabled(pFragmentSS->flags, VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT) ? 0 : _device->_pMetalFeatures->maxSubgroupSize; + shaderConfig.options.mslOptions.fixed_subgroup_size = mvkIsAnyFlagEnabled(pFragmentSS->flags, VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT) ? 0 : mtlFeats.maxSubgroupSize; shaderConfig.options.mslOptions.check_discarded_frag_stores = true; - if (_device->_pMetalFeatures->needsSampleDrefLodArrayWorkaround) { + if (mtlFeats.needsSampleDrefLodArrayWorkaround) { shaderConfig.options.mslOptions.sample_dref_lod_array_as_grad = true; } if (_isRasterizing && pCreateInfo->pMultisampleState) { // Must ignore allowed bad pMultisampleState pointer if rasterization disabled @@ -1407,8 +1408,8 @@ bool MVKGraphicsPipeline::addVertexInputToPipeline(T* inputDesc, if (shaderConfig.isVertexBufferUsed(pVKVB->binding)) { // Vulkan allows any stride, but Metal requires multiples of 4 on older GPUs. - if (isVtxStrideStatic && (pVKVB->stride % _device->_pMetalFeatures->vertexStrideAlignment) != 0) { - setConfigurationResult(reportError(VK_ERROR_INITIALIZATION_FAILED, "Under Metal, vertex attribute binding strides must be aligned to %llu bytes.", _device->_pMetalFeatures->vertexStrideAlignment)); + if (isVtxStrideStatic && (pVKVB->stride % getMetalFeatures().vertexStrideAlignment) != 0) { + setConfigurationResult(reportError(VK_ERROR_INITIALIZATION_FAILED, "Under Metal, vertex attribute binding strides must be aligned to %llu bytes.", getMetalFeatures().vertexStrideAlignment)); return false; } @@ -1599,7 +1600,7 @@ void MVKGraphicsPipeline::addTessellationToPipeline(MTLRenderPipelineDescriptor* } } - plDesc.maxTessellationFactor = _device->_pProperties->limits.maxTessellationGenerationLevel; + plDesc.maxTessellationFactor = getDeviceProperties().limits.maxTessellationGenerationLevel; plDesc.tessellationFactorFormat = MTLTessellationFactorFormatHalf; // FIXME Use Float when it becomes available plDesc.tessellationFactorStepFunction = MTLTessellationFactorStepFunctionPerPatch; plDesc.tessellationOutputWindingOrder = mvkMTLWindingFromSpvExecutionMode(reflectData.windingOrder); @@ -1676,7 +1677,7 @@ void MVKGraphicsPipeline::addFragmentOutputToPipeline(MTLRenderPipelineDescripto // In Vulkan, it's perfectly valid to render without any attachments. In Metal, if that // isn't supported, and we have no attachments, then we have to add a dummy attachment. - if (!getDevice()->_pMetalFeatures->renderWithoutAttachments && + if (!getMetalFeatures().renderWithoutAttachments && !caCnt && !pRendInfo->depthAttachmentFormat && !pRendInfo->stencilAttachmentFormat) { MTLRenderPipelineColorAttachmentDescriptor* colorDesc = plDesc.colorAttachments[0]; @@ -1721,16 +1722,17 @@ void MVKGraphicsPipeline::initShaderConversionConfig(SPIRVToMSLConversionConfigu } } - shaderConfig.options.mslOptions.msl_version = _device->_pMetalFeatures->mslVersion; - shaderConfig.options.mslOptions.texel_buffer_texture_width = _device->_pMetalFeatures->maxTextureDimension; + auto& mtlFeats = getMetalFeatures(); + shaderConfig.options.mslOptions.msl_version = mtlFeats.mslVersion; + shaderConfig.options.mslOptions.texel_buffer_texture_width = mtlFeats.maxTextureDimension; shaderConfig.options.mslOptions.r32ui_linear_texture_alignment = (uint32_t)_device->getVkFormatTexelBufferAlignment(VK_FORMAT_R32_UINT, this); - shaderConfig.options.mslOptions.texture_buffer_native = _device->_pMetalFeatures->textureBuffers; + shaderConfig.options.mslOptions.texture_buffer_native = mtlFeats.textureBuffers; bool useMetalArgBuff = isUsingMetalArgumentBuffers(); shaderConfig.options.mslOptions.argument_buffers = useMetalArgBuff; shaderConfig.options.mslOptions.force_active_argument_buffer_resources = useMetalArgBuff; shaderConfig.options.mslOptions.pad_argument_buffer_resources = useMetalArgBuff; - shaderConfig.options.mslOptions.agx_manual_cube_grad_fixup = _device->_pMetalFeatures->needsCubeGradWorkaround; + shaderConfig.options.mslOptions.agx_manual_cube_grad_fixup = mtlFeats.needsCubeGradWorkaround; MVKPipelineLayout* layout = (MVKPipelineLayout*)pCreateInfo->layout; layout->populateShaderConversionConfig(shaderConfig); @@ -1775,24 +1777,24 @@ void MVKGraphicsPipeline::initShaderConversionConfig(SPIRVToMSLConversionConfigu } } - shaderConfig.options.mslOptions.ios_support_base_vertex_instance = getDevice()->_pMetalFeatures->baseVertexInstanceDrawing; + shaderConfig.options.mslOptions.ios_support_base_vertex_instance = mtlFeats.baseVertexInstanceDrawing; shaderConfig.options.mslOptions.texture_1D_as_2D = getMVKConfig().texture1DAs2D; shaderConfig.options.mslOptions.enable_point_size_builtin = isRenderingPoints(pCreateInfo) || reflectData.pointMode; shaderConfig.options.mslOptions.enable_frag_depth_builtin = pixFmts->isDepthFormat(pixFmts->getMTLPixelFormat(pRendInfo->depthAttachmentFormat)); shaderConfig.options.mslOptions.enable_frag_stencil_ref_builtin = pixFmts->isStencilFormat(pixFmts->getMTLPixelFormat(pRendInfo->stencilAttachmentFormat)); shaderConfig.options.shouldFlipVertexY = getMVKConfig().shaderConversionFlipVertexY; - shaderConfig.options.mslOptions.swizzle_texture_samples = _fullImageViewSwizzle && !getDevice()->_pMetalFeatures->nativeTextureSwizzle; + shaderConfig.options.mslOptions.swizzle_texture_samples = _fullImageViewSwizzle && !mtlFeats.nativeTextureSwizzle; shaderConfig.options.mslOptions.tess_domain_origin_lower_left = pTessDomainOriginState && pTessDomainOriginState->domainOrigin == VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT; shaderConfig.options.mslOptions.multiview = mvkIsMultiview(pRendInfo->viewMask); shaderConfig.options.mslOptions.multiview_layered_rendering = getPhysicalDevice()->canUseInstancingForMultiview(); shaderConfig.options.mslOptions.view_index_from_device_index = mvkAreAllFlagsEnabled(pCreateInfo->flags, VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT); shaderConfig.options.mslOptions.replace_recursive_inputs = mvkOSVersionIsAtLeast(14.0, 17.0, 1.0); #if MVK_MACOS - shaderConfig.options.mslOptions.emulate_subgroups = !_device->_pMetalFeatures->simdPermute; + shaderConfig.options.mslOptions.emulate_subgroups = !mtlFeats.simdPermute; #endif #if MVK_IOS_OR_TVOS - shaderConfig.options.mslOptions.emulate_subgroups = !_device->_pMetalFeatures->quadPermute; - shaderConfig.options.mslOptions.ios_use_simdgroup_functions = !!_device->_pMetalFeatures->simdPermute; + shaderConfig.options.mslOptions.emulate_subgroups = !mtlFeats.quadPermute; + shaderConfig.options.mslOptions.ios_use_simdgroup_functions = !!mtlFeats.simdPermute; #endif shaderConfig.options.tessPatchKind = reflectData.patchKind; @@ -2136,16 +2138,17 @@ MVKComputePipeline::MVKComputePipeline(MVKDevice* device, pPipelineFB->duration = mvkGetElapsedNanoseconds(pipelineStart); } - if (_needsSwizzleBuffer && _swizzleBufferIndex.stages[kMVKShaderStageCompute] > _device->_pMetalFeatures->maxPerStageBufferCount) { + auto& mtlFeats = getMetalFeatures(); + if (_needsSwizzleBuffer && _swizzleBufferIndex.stages[kMVKShaderStageCompute] > mtlFeats.maxPerStageBufferCount) { setConfigurationResult(reportError(VK_ERROR_INVALID_SHADER_NV, "Compute shader requires swizzle buffer, but there is no free slot to pass it.")); } - if (_needsBufferSizeBuffer && _bufferSizeBufferIndex.stages[kMVKShaderStageCompute] > _device->_pMetalFeatures->maxPerStageBufferCount) { + if (_needsBufferSizeBuffer && _bufferSizeBufferIndex.stages[kMVKShaderStageCompute] > mtlFeats.maxPerStageBufferCount) { setConfigurationResult(reportError(VK_ERROR_INVALID_SHADER_NV, "Compute shader requires buffer size buffer, but there is no free slot to pass it.")); } - if (_needsDynamicOffsetBuffer && _dynamicOffsetBufferIndex.stages[kMVKShaderStageCompute] > _device->_pMetalFeatures->maxPerStageBufferCount) { + if (_needsDynamicOffsetBuffer && _dynamicOffsetBufferIndex.stages[kMVKShaderStageCompute] > mtlFeats.maxPerStageBufferCount) { setConfigurationResult(reportError(VK_ERROR_INVALID_SHADER_NV, "Compute shader requires dynamic offset buffer, but there is no free slot to pass it.")); } - if (_needsDispatchBaseBuffer && _indirectParamsIndex.stages[kMVKShaderStageCompute] > _device->_pMetalFeatures->maxPerStageBufferCount) { + if (_needsDispatchBaseBuffer && _indirectParamsIndex.stages[kMVKShaderStageCompute] > mtlFeats.maxPerStageBufferCount) { setConfigurationResult(reportError(VK_ERROR_INVALID_SHADER_NV, "Compute shader requires dispatch base buffer, but there is no free slot to pass it.")); } } @@ -2157,17 +2160,18 @@ MVKMTLFunction MVKComputePipeline::getMTLFunction(const VkComputePipelineCreateI const VkPipelineShaderStageCreateInfo* pSS = &pCreateInfo->stage; if ( !mvkAreAllFlagsEnabled(pSS->stage, VK_SHADER_STAGE_COMPUTE_BIT) ) { return MVKMTLFunctionNull; } + auto& mtlFeats = getMetalFeatures(); SPIRVToMSLConversionConfiguration shaderConfig; shaderConfig.options.entryPointName = pCreateInfo->stage.pName; shaderConfig.options.entryPointStage = spv::ExecutionModelGLCompute; - shaderConfig.options.mslOptions.msl_version = _device->_pMetalFeatures->mslVersion; - shaderConfig.options.mslOptions.texel_buffer_texture_width = _device->_pMetalFeatures->maxTextureDimension; + shaderConfig.options.mslOptions.msl_version = mtlFeats.mslVersion; + shaderConfig.options.mslOptions.texel_buffer_texture_width = mtlFeats.maxTextureDimension; shaderConfig.options.mslOptions.r32ui_linear_texture_alignment = (uint32_t)_device->getVkFormatTexelBufferAlignment(VK_FORMAT_R32_UINT, this); - shaderConfig.options.mslOptions.swizzle_texture_samples = _fullImageViewSwizzle && !getDevice()->_pMetalFeatures->nativeTextureSwizzle; - shaderConfig.options.mslOptions.texture_buffer_native = _device->_pMetalFeatures->textureBuffers; + shaderConfig.options.mslOptions.swizzle_texture_samples = _fullImageViewSwizzle && !mtlFeats.nativeTextureSwizzle; + shaderConfig.options.mslOptions.texture_buffer_native = mtlFeats.textureBuffers; shaderConfig.options.mslOptions.dispatch_base = _allowsDispatchBase; shaderConfig.options.mslOptions.texture_1D_as_2D = getMVKConfig().texture1DAs2D; - shaderConfig.options.mslOptions.fixed_subgroup_size = mvkIsAnyFlagEnabled(pSS->flags, VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT) ? 0 : _device->_pMetalFeatures->maxSubgroupSize; + shaderConfig.options.mslOptions.fixed_subgroup_size = mvkIsAnyFlagEnabled(pSS->flags, VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT) ? 0 : mtlFeats.maxSubgroupSize; bool useMetalArgBuff = isUsingMetalArgumentBuffers(); shaderConfig.options.mslOptions.argument_buffers = useMetalArgBuff; @@ -2175,11 +2179,11 @@ MVKMTLFunction MVKComputePipeline::getMTLFunction(const VkComputePipelineCreateI shaderConfig.options.mslOptions.pad_argument_buffer_resources = useMetalArgBuff; #if MVK_MACOS - shaderConfig.options.mslOptions.emulate_subgroups = !_device->_pMetalFeatures->simdPermute; + shaderConfig.options.mslOptions.emulate_subgroups = !mtlFeats.simdPermute; #endif #if MVK_IOS_OR_TVOS - shaderConfig.options.mslOptions.emulate_subgroups = !_device->_pMetalFeatures->quadPermute; - shaderConfig.options.mslOptions.ios_use_simdgroup_functions = !!_device->_pMetalFeatures->simdPermute; + shaderConfig.options.mslOptions.emulate_subgroups = !mtlFeats.quadPermute; + shaderConfig.options.mslOptions.ios_use_simdgroup_functions = !!mtlFeats.simdPermute; #endif MVKPipelineLayout* layout = (MVKPipelineLayout*)pCreateInfo->layout; @@ -2224,7 +2228,7 @@ MVKMTLFunction MVKComputePipeline::getMTLFunction(const VkComputePipelineCreateI } uint32_t MVKComputePipeline::getImplicitBufferIndex(uint32_t bufferIndexOffset) { - return _device->_pMetalFeatures->maxPerStageBufferCount - (bufferIndexOffset + 1); + return getMetalFeatures().maxPerStageBufferCount - (bufferIndexOffset + 1); } bool MVKComputePipeline::usesPhysicalStorageBufferAddressesCapability(MVKShaderStage stage) { @@ -2367,19 +2371,19 @@ VkResult MVKPipelineCache::writeDataImpl(size_t* pDataSize, void* pData) { void MVKPipelineCache::writeData(ostream& outstream, bool isCounting) { #if MVK_USE_CEREAL MVKPerformanceTracker& perfTracker = isCounting - ? _device->_performanceStatistics.pipelineCache.sizePipelineCache - : _device->_performanceStatistics.pipelineCache.writePipelineCache; + ? getPerformanceStats().pipelineCache.sizePipelineCache + : getPerformanceStats().pipelineCache.writePipelineCache; uint32_t cacheEntryType; cereal::BinaryOutputArchive writer(outstream); // Write the data header...after ensuring correct byte-order. - const VkPhysicalDeviceProperties* pDevProps = _device->_pProperties; + auto& devProps = getDeviceProperties(); writer(NSSwapHostIntToLittle(kDataHeaderSize)); writer(NSSwapHostIntToLittle(VK_PIPELINE_CACHE_HEADER_VERSION_ONE)); - writer(NSSwapHostIntToLittle(pDevProps->vendorID)); - writer(NSSwapHostIntToLittle(pDevProps->deviceID)); - writer(pDevProps->pipelineCacheUUID); + writer(NSSwapHostIntToLittle(devProps.vendorID)); + writer(NSSwapHostIntToLittle(devProps.deviceID)); + writer(devProps.pipelineCacheUUID); // Shader libraries // Output a cache entry for each shader library, including the shader module key in each entry. @@ -2388,13 +2392,13 @@ void MVKPipelineCache::writeData(ostream& outstream, bool isCounting) { MVKShaderModuleKey smKey = scPair.first; MVKShaderCacheIterator cacheIter(scPair.second); while (cacheIter.next()) { - uint64_t startTime = _device->getPerformanceTimestamp(); + uint64_t startTime = getPerformanceTimestamp(); writer(cacheEntryType); writer(smKey); writer(cacheIter.getShaderConversionConfig()); writer(cacheIter.getShaderConversionResultInfo()); writer(cacheIter.getCompressedMSL()); - _device->addPerformanceInterval(perfTracker, startTime); + addPerformanceInterval(perfTracker, startTime); } } @@ -2425,7 +2429,7 @@ void MVKPipelineCache::readData(const VkPipelineCacheCreateInfo* pCreateInfo) { // Read the data header...and ensure correct byte-order. uint32_t hdrComponent; uint8_t pcUUID[VK_UUID_SIZE]; - const VkPhysicalDeviceProperties* pDevProps = _device->_pProperties; + auto& dvcProps = getDeviceProperties(); reader(hdrComponent); // Header size if (NSSwapLittleIntToHost(hdrComponent) != kDataHeaderSize) { return; } @@ -2434,20 +2438,20 @@ void MVKPipelineCache::readData(const VkPipelineCacheCreateInfo* pCreateInfo) { if (NSSwapLittleIntToHost(hdrComponent) != VK_PIPELINE_CACHE_HEADER_VERSION_ONE) { return; } reader(hdrComponent); // Vendor ID - if (NSSwapLittleIntToHost(hdrComponent) != pDevProps->vendorID) { return; } + if (NSSwapLittleIntToHost(hdrComponent) != dvcProps.vendorID) { return; } reader(hdrComponent); // Device ID - if (NSSwapLittleIntToHost(hdrComponent) != pDevProps->deviceID) { return; } + if (NSSwapLittleIntToHost(hdrComponent) != dvcProps.deviceID) { return; } reader(pcUUID); // Pipeline cache UUID - if ( !mvkAreEqual(pcUUID, pDevProps->pipelineCacheUUID, VK_UUID_SIZE) ) { return; } + if ( !mvkAreEqual(pcUUID, dvcProps.pipelineCacheUUID, VK_UUID_SIZE) ) { return; } bool done = false; while ( !done ) { reader(cacheEntryType); switch (cacheEntryType) { case MVKPipelineCacheEntryTypeShaderLibrary: { - uint64_t startTime = _device->getPerformanceTimestamp(); + uint64_t startTime = getPerformanceTimestamp(); MVKShaderModuleKey smKey; reader(smKey); @@ -2463,7 +2467,7 @@ void MVKPipelineCache::readData(const VkPipelineCacheCreateInfo* pCreateInfo) { // Add the shader library to the staging cache. MVKShaderLibraryCache* slCache = getShaderLibraryCache(smKey); - _device->addPerformanceInterval(_device->_performanceStatistics.pipelineCache.readPipelineCache, startTime); + addPerformanceInterval(getPerformanceStats().pipelineCache.readPipelineCache, startTime); slCache->addShaderLibrary(&shaderConversionConfig, resultInfo, compressedMSL); break; @@ -2726,7 +2730,7 @@ void serialize(Archive & archive, MVKCompressor& comp) { MVKPipelineCache::MVKPipelineCache(MVKDevice* device, const VkPipelineCacheCreateInfo* pCreateInfo) : MVKVulkanAPIDeviceObject(device), - _isExternallySynchronized(device->_enabledPipelineCreationCacheControlFeatures.pipelineCreationCacheControl && + _isExternallySynchronized(getEnabledPipelineCreationCacheControlFeatures().pipelineCreationCacheControl && mvkIsAnyFlagEnabled(pCreateInfo->flags, VK_PIPELINE_CACHE_CREATE_EXTERNALLY_SYNCHRONIZED_BIT)) { readData(pCreateInfo); @@ -2745,7 +2749,7 @@ id MVKRenderPipelineCompiler::newMTLRenderPipelineState( unique_lock lock(_completionLock); compile(lock, ^{ - auto mtlDev = _owner->getMTLDevice(); + auto mtlDev = getMTLDevice(); @synchronized (mtlDev) { [mtlDev newRenderPipelineStateWithDescriptor: mtlRPLDesc completionHandler: ^(id ps, NSError* error) { @@ -2779,7 +2783,7 @@ id MVKComputePipelineCompiler::newMTLComputePipelineSta unique_lock lock(_completionLock); compile(lock, ^{ - auto mtlDev = _owner->getMTLDevice(); + auto mtlDev = getMTLDevice(); @synchronized (mtlDev) { [mtlDev newComputePipelineStateWithFunction: mtlFunction completionHandler: ^(id ps, NSError* error) { @@ -2796,7 +2800,7 @@ id MVKComputePipelineCompiler::newMTLComputePipelineSta unique_lock lock(_completionLock); compile(lock, ^{ - auto mtlDev = _owner->getMTLDevice(); + auto mtlDev = getMTLDevice(); @synchronized (mtlDev) { [mtlDev newComputePipelineStateWithDescriptor: plDesc options: MTLPipelineOptionNone diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm b/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm index 4e6c89aa..42685760 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm @@ -293,7 +293,7 @@ void MVKOcclusionQueryPool::beginQueryAddedTo(uint32_t query, MVKCommandBuffer* // In multiview passes, one query is used for each view. NSUInteger queryCount = cmdBuffer->getViewCount(); NSUInteger offset = getVisibilityResultOffset(query); - NSUInteger maxOffset = getDevice()->_pMetalFeatures->maxQueryBufferSize - kMVKQuerySlotSizeInBytes * queryCount; + NSUInteger maxOffset = getMetalFeatures().maxQueryBufferSize - kMVKQuerySlotSizeInBytes * queryCount; if (offset > maxOffset) { cmdBuffer->setConfigurationResult(reportError(VK_ERROR_OUT_OF_DEVICE_MEMORY, "vkCmdBeginQuery(): The query offset value %lu is larger than the maximum offset value %lu available on this device.", offset, maxOffset)); } @@ -311,8 +311,9 @@ MVKOcclusionQueryPool::MVKOcclusionQueryPool(MVKDevice* device, _queryIndexOffset = 0; // Ensure we don't overflow the maximum number of queries + auto& mtlFeats = getMetalFeatures(); VkDeviceSize reqBuffLen = (VkDeviceSize)pCreateInfo->queryCount * kMVKQuerySlotSizeInBytes; - VkDeviceSize maxBuffLen = _device->_pMetalFeatures->maxQueryBufferSize; + VkDeviceSize maxBuffLen = mtlFeats.maxQueryBufferSize; VkDeviceSize newBuffLen = min(reqBuffLen, maxBuffLen); if (reqBuffLen > maxBuffLen) { @@ -321,7 +322,7 @@ MVKOcclusionQueryPool::MVKOcclusionQueryPool(MVKDevice* device, uint32_t(newBuffLen / kMVKQuerySlotSizeInBytes)); } - NSUInteger mtlBuffLen = mvkAlignByteCount(newBuffLen, _device->_pMetalFeatures->mtlBufferAlignment); + NSUInteger mtlBuffLen = mvkAlignByteCount(newBuffLen, mtlFeats.mtlBufferAlignment); MTLResourceOptions mtlBuffOpts = MTLResourceStorageModeShared | MTLResourceCPUCacheModeDefaultCache; _visibilityResultMTLBuffer = [getMTLDevice() newBufferWithLength: mtlBuffLen options: mtlBuffOpts]; // retained @@ -463,7 +464,7 @@ MVKTimestampQueryPool::MVKTimestampQueryPool(MVKDevice* device, const VkQueryPoo MVKPipelineStatisticsQueryPool::MVKPipelineStatisticsQueryPool(MVKDevice* device, const VkQueryPoolCreateInfo* pCreateInfo) : MVKGPUCounterQueryPool(device, pCreateInfo) { - if ( !_device->_enabledFeatures.pipelineStatisticsQuery ) { + if ( !getEnabledFeatures().pipelineStatisticsQuery ) { setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCreateQueryPool: VK_QUERY_TYPE_PIPELINE_STATISTICS is not supported.")); } } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h index 33254634..3f72538d 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h @@ -184,7 +184,7 @@ public: } MVKSemaphoreSubmitInfo; /** This is an abstract class for an operation that can be submitted to an MVKQueue. */ -class MVKQueueSubmission : public MVKBaseObject, public MVKConfigurableMixin { +class MVKQueueSubmission : public MVKBaseDeviceObject, public MVKConfigurableMixin { public: diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm index db424d2e..79aee679 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm @@ -154,8 +154,7 @@ VkResult MVKQueue::waitIdle(MVKCommandUse cmdUse) { id MVKQueue::getMTLCommandBuffer(MVKCommandUse cmdUse, bool retainRefs) { id mtlCmdBuff = nil; - MVKDevice* mvkDev = getDevice(); - uint64_t startTime = mvkDev->getPerformanceTimestamp(); + uint64_t startTime = getPerformanceTimestamp(); #if MVK_XCODE_12 if ([_mtlQueue respondsToSelector: @selector(commandBufferWithDescriptor:)]) { MTLCommandBufferDescriptor* mtlCmdBuffDesc = [MTLCommandBufferDescriptor new]; // temp retain @@ -172,7 +171,7 @@ id MVKQueue::getMTLCommandBuffer(MVKCommandUse cmdUse, bool re } else { mtlCmdBuff = [_mtlQueue commandBufferWithUnretainedReferences]; } - mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.queue.retrieveMTLCommandBuffer, startTime); + addPerformanceInterval(getPerformanceStats().queue.retrieveMTLCommandBuffer, startTime); NSString* mtlCmdBuffLabel = getMTLCommandBufferLabel(cmdUse); setLabelIfNotNil(mtlCmdBuff, mtlCmdBuffLabel); [mtlCmdBuff addCompletedHandler: ^(id mtlCB) { handleMTLCommandBufferError(mtlCB); }]; @@ -412,11 +411,12 @@ MVKCommandBufferSubmitInfo::MVKCommandBufferSubmitInfo(VkCommandBuffer commandBu MVKQueueSubmission::MVKQueueSubmission(MVKQueue* queue, uint32_t waitSemaphoreInfoCount, - const VkSemaphoreSubmitInfo* pWaitSemaphoreSubmitInfos) { - _queue = queue; - _queue->retain(); // Retain here and release in destructor. See note for MVKQueueCommandBufferSubmission::finish(). + const VkSemaphoreSubmitInfo* pWaitSemaphoreSubmitInfos) : + MVKBaseDeviceObject(queue->getDevice()), + _queue(queue) { - _creationTime = getDevice()->getPerformanceTimestamp(); // call getDevice() only after _queue is defined + _queue->retain(); // Retain here and release in destructor. See note for MVKQueueCommandBufferSubmission::finish(). + _creationTime = getPerformanceTimestamp(); _waitSemaphores.reserve(waitSemaphoreInfoCount); for (uint32_t i = 0; i < waitSemaphoreInfoCount; i++) { @@ -427,11 +427,12 @@ MVKQueueSubmission::MVKQueueSubmission(MVKQueue* queue, MVKQueueSubmission::MVKQueueSubmission(MVKQueue* queue, uint32_t waitSemaphoreCount, const VkSemaphore* pWaitSemaphores, - const VkPipelineStageFlags* pWaitDstStageMask) { - _queue = queue; - _queue->retain(); // Retain here and release in destructor. See note for MVKQueueCommandBufferSubmission::finish(). + const VkPipelineStageFlags* pWaitDstStageMask) : + MVKBaseDeviceObject(queue->getDevice()), + _queue(queue) { - _creationTime = getDevice()->getPerformanceTimestamp(); // call getDevice() only after _queue is defined + _queue->retain(); // Retain here and release in destructor. See note for MVKQueueCommandBufferSubmission::finish(). + _creationTime = getPerformanceTimestamp(); _waitSemaphores.reserve(waitSemaphoreCount); for (uint32_t i = 0; i < waitSemaphoreCount; i++) { @@ -455,8 +456,7 @@ VkResult MVKQueueCommandBufferSubmission::execute() { for (auto& ws : _waitSemaphores) { ws.encodeWait(getActiveMTLCommandBuffer()); } // Wait time from an async vkQueueSubmit() call to starting submit and encoding of the command buffers - MVKDevice* mvkDev = getDevice(); - mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.queue.waitSubmitCommandBuffers, _creationTime); + addPerformanceInterval(_queue->getPerformanceStats().queue.waitSubmitCommandBuffers, _creationTime); // Submit each command buffer. submitCommandBuffers(); @@ -518,10 +518,9 @@ VkResult MVKQueueCommandBufferSubmission::commitActiveMTLCommandBuffer(bool sign id mtlCmdBuff = signalCompletion ? getActiveMTLCommandBuffer() : _activeMTLCommandBuffer; _activeMTLCommandBuffer = nil; - MVKDevice* mvkDev = getDevice(); - uint64_t startTime = mvkDev->getPerformanceTimestamp(); + uint64_t startTime = getPerformanceTimestamp(); [mtlCmdBuff addCompletedHandler: ^(id mtlCB) { - mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.queue.mtlCommandBufferExecution, startTime); + addPerformanceInterval(getPerformanceStats().queue.mtlCommandBufferExecution, startTime); if (signalCompletion) { this->finish(); } // Must be the last thing the completetion callback does. }]; @@ -638,12 +637,11 @@ MVKQueueCommandBufferSubmission::~MVKQueueCommandBufferSubmission() { template void MVKQueueFullCommandBufferSubmission::submitCommandBuffers() { - MVKDevice* mvkDev = getDevice(); - uint64_t startTime = mvkDev->getPerformanceTimestamp(); + uint64_t startTime = getPerformanceTimestamp(); for (auto& cbInfo : _cmdBuffers) { cbInfo.commandBuffer->submit(this, &_encodingContext); } - mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.queue.submitCommandBuffers, startTime); + addPerformanceInterval(getPerformanceStats().queue.submitCommandBuffers, startTime); } template @@ -699,8 +697,7 @@ VkResult MVKQueuePresentSurfaceSubmission::execute() { } // Wait time from an async vkQueuePresentKHR() call to starting presentation of the swapchains - MVKDevice* mvkDev = getDevice(); - mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.queue.waitPresentSwapchains, _creationTime); + addPerformanceInterval(getPerformanceStats().queue.waitPresentSwapchains, _creationTime); for (int i = 0; i < _presentInfo.size(); i++ ) { setConfigurationResult(_presentInfo[i].presentableImage->presentCAMetalDrawable(mtlCmdBuff, _presentInfo[i])); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm b/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm index 6fabf293..1ff74a48 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm @@ -263,7 +263,7 @@ void MVKRenderSubpass::populateMTLRenderPassDescriptor(MTLRenderPassDescriptor* // Vulkan supports rendering without attachments, but older Metal does not. // If Metal does not support rendering without attachments, create a dummy attachment to pass Metal validation. if (caUsedCnt == 0 && depthRPAttIdx == VK_ATTACHMENT_UNUSED && stencilRPAttIdx == VK_ATTACHMENT_UNUSED) { - if (_renderPass->getDevice()->_pMetalFeatures->renderWithoutAttachments) { + if (_renderPass->getMetalFeatures().renderWithoutAttachments) { mtlRPDesc.defaultRasterSampleCount = mvkSampleCountFromVkSampleCountFlagBits(_defaultSampleCount); } else { MTLRenderPassColorAttachmentDescriptor* mtlColorAttDesc = mtlRPDesc.colorAttachments[0]; @@ -282,7 +282,7 @@ void MVKRenderSubpass::encodeStoreActions(MVKCommandEncoder* cmdEncoder, MVKArrayRef attachments, bool storeOverride) { if (!cmdEncoder->_mtlRenderEncoder) { return; } - if (!_renderPass->getDevice()->_pMetalFeatures->deferredStoreActions) { return; } + if (!_renderPass->getMetalFeatures().deferredStoreActions) { return; } MVKPixelFormats* pixFmts = _renderPass->getPixelFormats(); uint32_t caCnt = getColorAttachmentCount(); @@ -414,7 +414,7 @@ void MVKRenderSubpass::resolveUnresolvableAttachments(MVKCommandEncoder* cmdEnco [mtlComputeEnc setTexture: caImgView->getMTLTexture() atIndex: 1]; MTLSize gridSize = mvkMTLSizeFromVkExtent3D(raImgView->getExtent3D()); MTLSize tgSize = MTLSizeMake(mtlRslvState.threadExecutionWidth, 1, 1); - if (cmdEncoder->getDevice()->_pMetalFeatures->nonUniformThreadgroups) { + if (cmdEncoder->getMetalFeatures().nonUniformThreadgroups) { [mtlComputeEnc dispatchThreads: gridSize threadsPerThreadgroup: tgSize]; } else { MTLSize tgCount = MTLSizeMake(gridSize.width / tgSize.width, gridSize.height, gridSize.depth); @@ -676,7 +676,7 @@ bool MVKAttachmentDescription::populateMTLRenderPassAttachmentDescriptor(MTLRend // If the device supports late-specified store actions, we'll use those, and then set them later. // That way, if we wind up doing a tessellated draw, we can set the store action to store then, // and then when the render pass actually ends, we can use the true store action. - if ( _renderPass->getDevice()->_pMetalFeatures->deferredStoreActions ) { + if (_renderPass->getMetalFeatures().deferredStoreActions) { mtlAttDesc.storeAction = MTLStoreActionUnknown; } else { // For a combined depth-stencil format in an attachment with VK_IMAGE_ASPECT_STENCIL_BIT, @@ -777,7 +777,7 @@ MTLStoreAction MVKAttachmentDescription::getMTLStoreAction(MVKRenderSubpass* sub } // If a resolve attachment exists, this attachment must resolve once complete. - if (hasResolveAttachment && canResolveFormat && !_renderPass->getDevice()->_pMetalFeatures->combinedStoreResolveAction) { + if (hasResolveAttachment && canResolveFormat && !_renderPass->getMetalFeatures().combinedStoreResolveAction) { return MTLStoreActionMultisampleResolve; } // Memoryless can't be stored. @@ -926,7 +926,7 @@ MVKSubpassDependency::MVKSubpassDependency(const VkSubpassDependency2& spDep, co viewOffset(spDep.viewOffset) {} VkExtent2D MVKRenderPass::getRenderAreaGranularity() { - if (_device->_pMetalFeatures->tileBasedDeferredRendering) { + if (getMetalFeatures().tileBasedDeferredRendering) { // This is the tile area. // FIXME: We really ought to use MTLRenderCommandEncoder.tile{Width,Height}, but that requires // creating a command buffer. diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.h b/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.h index 45927e4c..eab6a43b 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.h @@ -60,7 +60,7 @@ private: const MVKMTLFunction MVKMTLFunctionNull(nil, SPIRVToMSLConversionResultInfo(), MTLSizeMake(1, 1, 1)); /** Wraps a single MTLLibrary. */ -class MVKShaderLibrary : public MVKBaseObject { +class MVKShaderLibrary : public MVKBaseDeviceObject { public: @@ -128,7 +128,7 @@ protected: #pragma mark MVKShaderLibraryCache /** Represents a cache of shader libraries for one shader module. */ -class MVKShaderLibraryCache : public MVKBaseObject { +class MVKShaderLibraryCache : public MVKBaseDeviceObject { public: @@ -149,7 +149,7 @@ public: bool* pWasAdded, VkPipelineCreationFeedback* pShaderFeedback, uint64_t startTime = 0); - MVKShaderLibraryCache(MVKVulkanAPIDeviceObject* owner) : _owner(owner) {}; + MVKShaderLibraryCache(MVKVulkanAPIDeviceObject* owner) : MVKBaseDeviceObject(owner->getDevice()), _owner(owner) {}; ~MVKShaderLibraryCache() override; @@ -273,7 +273,7 @@ public: MVKShaderLibraryCompiler(MVKVulkanAPIDeviceObject* owner) : MVKMetalCompiler(owner) { _compilerType = "Shader library"; - _pPerformanceTracker = &_owner->getDevice()->_performanceStatistics.shaderCompilation.mslCompile; + _pPerformanceTracker = &getPerformanceStats().shaderCompilation.mslCompile; } ~MVKShaderLibraryCompiler() override; @@ -311,7 +311,7 @@ public: MVKFunctionSpecializer(MVKVulkanAPIDeviceObject* owner) : MVKMetalCompiler(owner) { _compilerType = "Function specialization"; - _pPerformanceTracker = &_owner->getDevice()->_performanceStatistics.shaderCompilation.functionSpecialization; + _pPerformanceTracker = &getPerformanceStats().shaderCompilation.functionSpecialization; } ~MVKFunctionSpecializer() override; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm b/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm index 0d02baae..da62a2e8 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm @@ -75,14 +75,13 @@ MVKMTLFunction MVKShaderLibrary::getMTLFunction(const VkSpecializationInfo* pSpe if ( !_mtlLibrary ) { return MVKMTLFunctionNull; } - @synchronized (_owner->getMTLDevice()) { + @synchronized (getMTLDevice()) { @autoreleasepool { NSString* mtlFuncName = @(_shaderConversionResultInfo.entryPoint.mtlFunctionName.c_str()); - MVKDevice* mvkDev = _owner->getDevice(); - uint64_t startTime = pShaderFeedback ? mvkGetTimestamp() : mvkDev->getPerformanceTimestamp(); + uint64_t startTime = pShaderFeedback ? mvkGetTimestamp() : getPerformanceTimestamp(); id mtlFunc = [[_mtlLibrary newFunctionWithName: mtlFuncName] autorelease]; - mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.shaderCompilation.functionRetrieval, startTime); + addPerformanceInterval(getPerformanceStats().shaderCompilation.functionRetrieval, startTime); if (pShaderFeedback) { if (mtlFunc) { mvkEnableFlags(pShaderFeedback->flags, VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT); @@ -94,7 +93,7 @@ MVKMTLFunction MVKShaderLibrary::getMTLFunction(const VkSpecializationInfo* pSpe // If the Metal device supports shader specialization, and the Metal function expects to be specialized, // populate Metal function constant values from the Vulkan specialization info, and compile a specialized // Metal function, otherwise simply use the unspecialized Metal function. - if (mvkDev->_pMetalFeatures->shaderSpecialization) { + if (getMetalFeatures().shaderSpecialization) { NSArray* mtlFCs = mtlFunc.functionConstantsDictionary.allValues; if (mtlFCs.count > 0) { // The Metal shader contains function constants and expects to be specialized. @@ -155,22 +154,23 @@ void MVKShaderLibrary::setWorkgroupSize(uint32_t x, uint32_t y, uint32_t z) { // Sets the cached MSL source code, after first compressing it. void MVKShaderLibrary::compressMSL(const string& msl) { - MVKDevice* mvkDev = _owner->getDevice(); - uint64_t startTime = mvkDev->getPerformanceTimestamp(); + uint64_t startTime = getPerformanceTimestamp(); _compressedMSL.compress(msl, getMVKConfig().shaderSourceCompressionAlgorithm); - mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.shaderCompilation.mslCompress, startTime); + addPerformanceInterval(getPerformanceStats().shaderCompilation.mslCompress, startTime); } // Decompresses the cached MSL into the string. void MVKShaderLibrary::decompressMSL(string& msl) { - MVKDevice* mvkDev = _owner->getDevice(); - uint64_t startTime = mvkDev->getPerformanceTimestamp(); + uint64_t startTime = getPerformanceTimestamp(); _compressedMSL.decompress(msl); - mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.shaderCompilation.mslDecompress, startTime); + addPerformanceInterval(getPerformanceStats().shaderCompilation.mslDecompress, startTime); } MVKShaderLibrary::MVKShaderLibrary(MVKVulkanAPIDeviceObject* owner, - const SPIRVToMSLConversionResult& conversionResult) : _owner(owner) { + const SPIRVToMSLConversionResult& conversionResult) : + MVKBaseDeviceObject(owner->getDevice()), + _owner(owner) { + _shaderConversionResultInfo = conversionResult.resultInfo; compressMSL(conversionResult.msl); compileLibrary(conversionResult.msl); @@ -178,7 +178,10 @@ MVKShaderLibrary::MVKShaderLibrary(MVKVulkanAPIDeviceObject* owner, MVKShaderLibrary::MVKShaderLibrary(MVKVulkanAPIDeviceObject* owner, const SPIRVToMSLConversionResultInfo& resultInfo, - const MVKCompressor compressedMSL) : _owner(owner) { + const MVKCompressor compressedMSL) : + MVKBaseDeviceObject(owner->getDevice()), + _owner(owner) { + _shaderConversionResultInfo = resultInfo; _compressedMSL = compressedMSL; string msl; @@ -196,24 +199,28 @@ void MVKShaderLibrary::compileLibrary(const string& msl) { MVKShaderLibrary::MVKShaderLibrary(MVKVulkanAPIDeviceObject* owner, const void* mslCompiledCodeData, - size_t mslCompiledCodeLength) : _owner(owner) { - MVKDevice* mvkDev = _owner->getDevice(); - uint64_t startTime = mvkDev->getPerformanceTimestamp(); + size_t mslCompiledCodeLength) : + MVKBaseDeviceObject(owner->getDevice()), + _owner(owner) { + + uint64_t startTime = getPerformanceTimestamp(); @autoreleasepool { dispatch_data_t shdrData = dispatch_data_create(mslCompiledCodeData, mslCompiledCodeLength, NULL, DISPATCH_DATA_DESTRUCTOR_DEFAULT); NSError* err = nil; - _mtlLibrary = [mvkDev->getMTLDevice() newLibraryWithData: shdrData error: &err]; // retained + _mtlLibrary = [getMTLDevice() newLibraryWithData: shdrData error: &err]; // retained handleCompilationError(err, "Compiled shader module creation"); [shdrData release]; } - mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.shaderCompilation.mslLoad, startTime); + addPerformanceInterval(getPerformanceStats().shaderCompilation.mslLoad, startTime); } -MVKShaderLibrary::MVKShaderLibrary(const MVKShaderLibrary& other) { - _owner = other._owner; +MVKShaderLibrary::MVKShaderLibrary(const MVKShaderLibrary& other) : + MVKBaseDeviceObject(other._device), + _owner(other._owner) { + _mtlLibrary = [other._mtlLibrary retain]; _shaderConversionResultInfo = other._shaderConversionResultInfo; _compressedMSL = other._compressedMSL; @@ -284,8 +291,7 @@ MVKShaderLibrary* MVKShaderLibraryCache::findShaderLibrary(SPIRVToMSLConversionC for (auto& slPair : _shaderLibraries) { if (slPair.first.matches(*pShaderConfig)) { pShaderConfig->alignWith(slPair.first); - MVKDevice* mvkDev = _owner->getDevice(); - mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.shaderCompilation.shaderLibraryFromCache, startTime); + addPerformanceInterval(getPerformanceStats().shaderCompilation.shaderLibraryFromCache, startTime); if (pShaderFeedback) { pShaderFeedback->duration += mvkGetElapsedNanoseconds(startTime); } @@ -337,7 +343,7 @@ MVKMTLFunction MVKShaderModule::getMTLFunction(SPIRVToMSLConversionConfiguration VkPipelineCreationFeedback* pShaderFeedback) { MVKShaderLibrary* mvkLib = _directMSLLibrary; if ( !mvkLib ) { - uint64_t startTime = pShaderFeedback ? mvkGetTimestamp() : _device->getPerformanceTimestamp(); + uint64_t startTime = pShaderFeedback ? mvkGetTimestamp() : getPerformanceTimestamp(); MVKPipelineCache* pipelineCache = pipeline->getPipelineCache(); if (pipelineCache) { mvkLib = pipelineCache->getShaderLibrary(pShaderConfig, this, pipeline, pShaderFeedback, startTime); @@ -363,9 +369,9 @@ bool MVKShaderModule::convert(SPIRVToMSLConversionConfiguration* pShaderConfig, if ( !_spvConverter.hasSPIRV() && _glslConverter.hasGLSL() ) { GLSLToSPIRVConversionResult glslConversionResult; - uint64_t startTime = _device->getPerformanceTimestamp(); + uint64_t startTime = getPerformanceTimestamp(); bool wasConverted = _glslConverter.convert(getMVKGLSLConversionShaderStage(pShaderConfig), glslConversionResult, shouldLogCode, false); - _device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.glslToSPRIV, startTime); + addPerformanceInterval(getPerformanceStats().shaderCompilation.glslToSPRIV, startTime); if (wasConverted) { if (shouldLogCode) { MVKLogInfo("%s", glslConversionResult.resultLog.c_str()); } @@ -376,9 +382,9 @@ bool MVKShaderModule::convert(SPIRVToMSLConversionConfiguration* pShaderConfig, shouldLogEstimatedGLSL = false; } - uint64_t startTime = _device->getPerformanceTimestamp(); + uint64_t startTime = getPerformanceTimestamp(); bool wasConverted = _spvConverter.convert(*pShaderConfig, conversionResult, shouldLogCode, shouldLogCode, shouldLogEstimatedGLSL); - _device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.spirvToMSL, startTime); + addPerformanceInterval(getPerformanceStats().shaderCompilation.spirvToMSL, startTime); const char* dumpDir = getMVKConfig().shaderDumpDir; if (dumpDir && *dumpDir) { @@ -472,9 +478,9 @@ MVKShaderModule::MVKShaderModule(MVKDevice* device, case kMVKMagicNumberSPIRVCode: { // SPIR-V code size_t spvCount = (codeSize + 3) >> 2; // Round up if byte length not exactly on uint32_t boundary - uint64_t startTime = _device->getPerformanceTimestamp(); + uint64_t startTime = getPerformanceTimestamp(); codeHash = mvkHash(pCreateInfo->pCode, spvCount); - _device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.hashShaderCode, startTime); + addPerformanceInterval(getPerformanceStats().shaderCompilation.hashShaderCode, startTime); _spvConverter.setSPIRV(pCreateInfo->pCode, spvCount); @@ -485,10 +491,10 @@ MVKShaderModule::MVKShaderModule(MVKDevice* device, char* pMSLCode = (char*)(uintptr_t(pCreateInfo->pCode) + hdrSize); size_t mslCodeLen = codeSize - hdrSize; - uint64_t startTime = _device->getPerformanceTimestamp(); + uint64_t startTime = getPerformanceTimestamp(); codeHash = mvkHash(&magicNum); codeHash = mvkHash(pMSLCode, mslCodeLen, codeHash); - _device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.hashShaderCode, startTime); + addPerformanceInterval(getPerformanceStats().shaderCompilation.hashShaderCode, startTime); SPIRVToMSLConversionResult conversionResult; conversionResult.msl = pMSLCode; @@ -501,23 +507,23 @@ MVKShaderModule::MVKShaderModule(MVKDevice* device, char* pMSLCode = (char*)(uintptr_t(pCreateInfo->pCode) + hdrSize); size_t mslCodeLen = codeSize - hdrSize; - uint64_t startTime = _device->getPerformanceTimestamp(); + uint64_t startTime = getPerformanceTimestamp(); codeHash = mvkHash(&magicNum); codeHash = mvkHash(pMSLCode, mslCodeLen, codeHash); - _device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.hashShaderCode, startTime); + addPerformanceInterval(getPerformanceStats().shaderCompilation.hashShaderCode, startTime); _directMSLLibrary = new MVKShaderLibrary(this, (void*)(pMSLCode), mslCodeLen); break; } default: // Could be GLSL source code - if (_device->_enabledExtensions.vk_NV_glsl_shader.enabled) { + if (getEnabledExtensions().vk_NV_glsl_shader.enabled) { const char* pGLSL = (char*)pCreateInfo->pCode; size_t glslLen = codeSize - 1; - uint64_t startTime = _device->getPerformanceTimestamp(); + uint64_t startTime = getPerformanceTimestamp(); codeHash = mvkHash(pGLSL, codeSize); - _device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.hashShaderCode, startTime); + addPerformanceInterval(getPerformanceStats().shaderCompilation.hashShaderCode, startTime); _glslConverter.setGLSL(pGLSL, glslLen); } else { @@ -542,9 +548,9 @@ id MVKShaderLibraryCompiler::newMTLLibrary(NSString* mslSourceCode, unique_lock lock(_completionLock); compile(lock, ^{ - auto mtlDev = _owner->getMTLDevice(); + auto mtlDev = getMTLDevice(); @synchronized (mtlDev) { - auto mtlCompileOptions = _owner->getDevice()->getMTLCompileOptions(shaderConversionResults.entryPoint.supportsFastMath, + auto mtlCompileOptions = getDevice()->getMTLCompileOptions(shaderConversionResults.entryPoint.supportsFastMath, shaderConversionResults.isPositionInvariant); MVKLogInfoIf(getMVKConfig().debugMode, "Compiling Metal shader%s.", mtlCompileOptions.fastMathEnabled ? " with FastMath enabled" : ""); [mtlDev newLibraryWithSource: mslSourceCode diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm index 855b7986..973f5b0b 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm @@ -171,7 +171,7 @@ void MVKSwapchain::markFrameInterval() { if (prevFrameTime == 0) { return; } // First frame starts at first presentation - _device->updateActivityPerformance(_device->_performanceStatistics.queue.frameInterval, mvkGetElapsedMilliseconds(prevFrameTime, _lastFrameTime)); + addPerformanceInterval(getPerformanceStats().queue.frameInterval, prevFrameTime, _lastFrameTime, true); auto& mvkCfg = getMVKConfig(); bool shouldLogOnFrames = mvkCfg.performanceTracking && mvkCfg.activityPerformanceLoggingStyle == MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_FRAME_COUNT; @@ -179,7 +179,7 @@ void MVKSwapchain::markFrameInterval() { _currentPerfLogFrameCount = 0; MVKLogInfo("Performance statistics reporting every: %d frames, avg FPS: %.2f, elapsed time: %.3f seconds:", mvkCfg.performanceLoggingFrameCount, - (1000.0 / _device->_performanceStatistics.queue.frameInterval.average), + (1000.0 / getPerformanceStats().queue.frameInterval.average), mvkGetElapsedMilliseconds() / 1000.0); if (getMVKConfig().activityPerformanceLoggingStyle == MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_FRAME_COUNT) { _device->logPerformanceSummary(); @@ -419,9 +419,10 @@ MVKSwapchain::MVKSwapchain(MVKDevice* device, const VkSwapchainCreateInfoKHR* pC } } + auto& mtlFeats = getMetalFeatures(); uint32_t imgCnt = mvkClamp(pCreateInfo->minImageCount, - _device->_pMetalFeatures->minSwapchainImageCount, - _device->_pMetalFeatures->maxSwapchainImageCount); + mtlFeats.minSwapchainImageCount, + mtlFeats.maxSwapchainImageCount); initCAMetalLayer(pCreateInfo, pScalingInfo, imgCnt); initSurfaceImages(pCreateInfo, imgCnt); // After initCAMetalLayer() } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSync.h b/MoltenVK/MoltenVK/GPUObjects/MVKSync.h index c9f1c722..11de708c 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKSync.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKSync.h @@ -605,7 +605,7 @@ VkResult mvkWaitSemaphores(MVKDevice* device, * * Instances of this class are one-shot, and can only be used for a single compilation. */ -class MVKMetalCompiler : public MVKBaseObject { +class MVKMetalCompiler : public MVKBaseDeviceObject { public: @@ -618,7 +618,7 @@ public: #pragma mark Construction - MVKMetalCompiler(MVKVulkanAPIDeviceObject* owner) : _owner(owner) {} + MVKMetalCompiler(MVKVulkanAPIDeviceObject* owner) : MVKBaseDeviceObject(owner->getDevice()), _owner(owner) {} ~MVKMetalCompiler() override; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSync.mm b/MoltenVK/MoltenVK/GPUObjects/MVKSync.mm index 35532b31..29d367a5 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKSync.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKSync.mm @@ -141,10 +141,10 @@ MVKSemaphoreMTLEvent::MVKSemaphoreMTLEvent(MVKDevice* device, _mtlEvent = [pImportInfo->mtlSharedEvent retain]; // retained _mtlEventValue = pImportInfo->mtlSharedEvent.signaledValue + 1; } else if (pExportInfo && pExportInfo->exportObjectType == VK_EXPORT_METAL_OBJECT_TYPE_METAL_SHARED_EVENT_BIT_EXT) { - _mtlEvent = [device->getMTLDevice() newSharedEvent]; //retained + _mtlEvent = [getMTLDevice() newSharedEvent]; //retained _mtlEventValue = ((id)_mtlEvent).signaledValue + 1; } else { - _mtlEvent = [device->getMTLDevice() newEvent]; //retained + _mtlEvent = [getMTLDevice() newEvent]; //retained _mtlEventValue = 1; } } @@ -241,7 +241,7 @@ MVKTimelineSemaphoreMTLEvent::MVKTimelineSemaphoreMTLEvent(MVKDevice* device, // Import or create a Metal event _mtlEvent = (pImportInfo && pImportInfo->mtlSharedEvent ? [pImportInfo->mtlSharedEvent retain] - : [device->getMTLDevice() newSharedEvent]); //retained + : [getMTLDevice() newSharedEvent]); //retained if (pTypeCreateInfo) { _mtlEvent.signaledValue = pTypeCreateInfo->initialValue; @@ -423,7 +423,7 @@ MVKEventNative::MVKEventNative(MVKDevice* device, // Import or create a Metal event _mtlEvent = (pImportInfo ? [pImportInfo->mtlSharedEvent retain] - : [device->getMTLDevice() newSharedEvent]); //retained + : [getMTLDevice() newSharedEvent]); //retained } MVKEventNative::~MVKEventNative() { @@ -570,8 +570,7 @@ VkResult mvkWaitSemaphores(MVKDevice* device, void MVKMetalCompiler::compile(unique_lock& lock, dispatch_block_t block) { MVKAssert( _startTime == 0, "%s compile occurred already in this instance. Instances of %s should only be used for a single compile activity.", _compilerType.c_str(), getClassName().c_str()); - MVKDevice* mvkDev = _owner->getDevice(); - _startTime = mvkDev->getPerformanceTimestamp(); + _startTime = getPerformanceTimestamp(); dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{ @autoreleasepool { block(); } }); @@ -588,7 +587,7 @@ void MVKMetalCompiler::compile(unique_lock& lock, dispatch_block_t block) if (_compileError) { handleError(); } - mvkDev->addPerformanceInterval(*_pPerformanceTracker, _startTime); + addPerformanceInterval(*_pPerformanceTracker, _startTime); } void MVKMetalCompiler::handleError() {