diff --git a/Common/MVKCommonEnvironment.h b/Common/MVKCommonEnvironment.h index 4fe27b4f..d1531261 100644 --- a/Common/MVKCommonEnvironment.h +++ b/Common/MVKCommonEnvironment.h @@ -75,6 +75,9 @@ extern "C" { /** Directive to identify public symbols. */ #define MVK_PUBLIC_SYMBOL __attribute__((visibility("default"))) +/** Directive to make a public alias of another symbol. */ +#define MVK_PUBLIC_ALIAS(ALIAS, TARGET) asm(".globl _" #ALIAS "\n\t_" #ALIAS " = _" #TARGET) + #ifdef __cplusplus } diff --git a/Demos/LunarG-VulkanSamples/API-Samples/API-Samples.xcodeproj/project.pbxproj b/Demos/LunarG-VulkanSamples/API-Samples/API-Samples.xcodeproj/project.pbxproj index a573e925..793b789d 100644 --- a/Demos/LunarG-VulkanSamples/API-Samples/API-Samples.xcodeproj/project.pbxproj +++ b/Demos/LunarG-VulkanSamples/API-Samples/API-Samples.xcodeproj/project.pbxproj @@ -537,7 +537,11 @@ 29B97313FDCFA39411CA2CEA /* Project object */ = { isa = PBXProject; attributes = { +<<<<<<< HEAD LastUpgradeCheck = 1200; +======= + LastUpgradeCheck = 1170; +>>>>>>> d1353632775b86abd4a527c9f6114dda2d4405fa }; buildConfigurationList = C01FCF4E08A954540054247B /* Build configuration list for PBXProject "API-Samples" */; compatibilityVersion = "Xcode 8.0"; diff --git a/Demos/LunarG-VulkanSamples/API-Samples/API-Samples.xcodeproj/xcshareddata/xcschemes/API-Samples-iOS.xcscheme b/Demos/LunarG-VulkanSamples/API-Samples/API-Samples.xcodeproj/xcshareddata/xcschemes/API-Samples-iOS.xcscheme index 6dfcf408..72c7418e 100644 --- a/Demos/LunarG-VulkanSamples/API-Samples/API-Samples.xcodeproj/xcshareddata/xcschemes/API-Samples-iOS.xcscheme +++ b/Demos/LunarG-VulkanSamples/API-Samples/API-Samples.xcodeproj/xcshareddata/xcschemes/API-Samples-iOS.xcscheme @@ -1,6 +1,10 @@ >>>>>> d1353632775b86abd4a527c9f6114dda2d4405fa version = "2.0"> >>>>>> d1353632775b86abd4a527c9f6114dda2d4405fa version = "2.0"> >>>>>> d1353632775b86abd4a527c9f6114dda2d4405fa }; buildConfigurationList = C01FCF4E08A954540054247B /* Build configuration list for PBXProject "Cube" */; compatibilityVersion = "Xcode 8.0"; diff --git a/Demos/LunarG-VulkanSamples/Cube/Cube.xcodeproj/xcshareddata/xcschemes/Cube-iOS.xcscheme b/Demos/LunarG-VulkanSamples/Cube/Cube.xcodeproj/xcshareddata/xcschemes/Cube-iOS.xcscheme index 90782be8..8b26c54d 100644 --- a/Demos/LunarG-VulkanSamples/Cube/Cube.xcodeproj/xcshareddata/xcschemes/Cube-iOS.xcscheme +++ b/Demos/LunarG-VulkanSamples/Cube/Cube.xcodeproj/xcshareddata/xcschemes/Cube-iOS.xcscheme @@ -1,6 +1,10 @@ >>>>>> d1353632775b86abd4a527c9f6114dda2d4405fa version = "2.0"> >>>>>> d1353632775b86abd4a527c9f6114dda2d4405fa version = "2.0"> >>>>>> d1353632775b86abd4a527c9f6114dda2d4405fa version = "2.0"> >>>>>> d1353632775b86abd4a527c9f6114dda2d4405fa }; buildConfigurationList = C01FCF4E08A954540054247B /* Build configuration list for PBXProject "Hologram" */; compatibilityVersion = "Xcode 8.0"; diff --git a/Demos/LunarG-VulkanSamples/Hologram/Hologram.xcodeproj/xcshareddata/xcschemes/Hologram-iOS.xcscheme b/Demos/LunarG-VulkanSamples/Hologram/Hologram.xcodeproj/xcshareddata/xcschemes/Hologram-iOS.xcscheme index b045bc2d..50ff9d4f 100644 --- a/Demos/LunarG-VulkanSamples/Hologram/Hologram.xcodeproj/xcshareddata/xcschemes/Hologram-iOS.xcscheme +++ b/Demos/LunarG-VulkanSamples/Hologram/Hologram.xcodeproj/xcshareddata/xcschemes/Hologram-iOS.xcscheme @@ -1,6 +1,10 @@ >>>>>> d1353632775b86abd4a527c9f6114dda2d4405fa version = "2.0"> >>>>>> d1353632775b86abd4a527c9f6114dda2d4405fa version = "2.0"> encodeStoreActions(true); mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl); if (pipeline->needsVertexOutputBuffer()) { vtxOutBuff = cmdEncoder->getTempMTLBuffer(_vertexCount * _instanceCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents); @@ -243,17 +242,20 @@ void MVKCmdDraw::encode(MVKCommandEncoder* cmdEncoder) { cmdEncoder->_graphicsResourcesState.beginMetalRenderPass(); cmdEncoder->getPushConstants(VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)->beginMetalRenderPass(); } else { + MVKRenderSubpass* subpass = cmdEncoder->getSubpass(); + uint32_t viewCount = subpass->isMultiview() ? subpass->getViewCountInMetalPass(cmdEncoder->getMultiviewPassIndex()) : 1; + uint32_t instanceCount = _instanceCount * viewCount; if (cmdEncoder->_pDeviceMetalFeatures->baseVertexInstanceDrawing) { [cmdEncoder->_mtlRenderEncoder drawPrimitives: cmdEncoder->_mtlPrimitiveType vertexStart: _firstVertex vertexCount: _vertexCount - instanceCount: _instanceCount + instanceCount: instanceCount baseInstance: _firstInstance]; } else { [cmdEncoder->_mtlRenderEncoder drawPrimitives: cmdEncoder->_mtlPrimitiveType vertexStart: _firstVertex vertexCount: _vertexCount - instanceCount: _instanceCount]; + instanceCount: instanceCount]; } } break; @@ -328,7 +330,6 @@ void MVKCmdDrawIndexed::encode(MVKCommandEncoder* cmdEncoder) { switch (stage) { case kMVKGraphicsStageVertex: { - cmdEncoder->encodeStoreActions(true); mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl); if (pipeline->needsVertexOutputBuffer()) { vtxOutBuff = cmdEncoder->getTempMTLBuffer(_indexCount * _instanceCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents); @@ -440,13 +441,16 @@ void MVKCmdDrawIndexed::encode(MVKCommandEncoder* cmdEncoder) { cmdEncoder->_graphicsResourcesState.beginMetalRenderPass(); cmdEncoder->getPushConstants(VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)->beginMetalRenderPass(); } else { + MVKRenderSubpass* subpass = cmdEncoder->getSubpass(); + uint32_t viewCount = subpass->isMultiview() ? subpass->getViewCountInMetalPass(cmdEncoder->getMultiviewPassIndex()) : 1; + uint32_t instanceCount = _instanceCount * viewCount; if (cmdEncoder->_pDeviceMetalFeatures->baseVertexInstanceDrawing) { [cmdEncoder->_mtlRenderEncoder drawIndexedPrimitives: cmdEncoder->_mtlPrimitiveType indexCount: _indexCount indexType: (MTLIndexType)ibb.mtlIndexType indexBuffer: ibb.mtlBuffer indexBufferOffset: idxBuffOffset - instanceCount: _instanceCount + instanceCount: instanceCount baseVertex: _vertexOffset baseInstance: _firstInstance]; } else { @@ -455,7 +459,7 @@ void MVKCmdDrawIndexed::encode(MVKCommandEncoder* cmdEncoder) { indexType: (MTLIndexType)ibb.mtlIndexType indexBuffer: ibb.mtlBuffer indexBufferOffset: idxBuffOffset - instanceCount: _instanceCount]; + instanceCount: instanceCount]; } } break; @@ -499,11 +503,13 @@ static const uint32_t kMVKDrawIndirectVertexCountUpperBound = 131072; void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) { auto* pipeline = (MVKGraphicsPipeline*)cmdEncoder->_graphicsPipelineState.getPipeline(); + bool needsInstanceAdjustment = cmdEncoder->getSubpass()->isMultiview() && + cmdEncoder->getDevice()->getPhysicalDevice()->canUseInstancingForMultiview(); // The indirect calls for dispatchThreadgroups:... and drawPatches:... have different formats. // We have to convert from the drawPrimitives:... format to them. // While we're at it, we can create the temporary output buffers once and reuse them // for each draw. - const MVKMTLBufferAllocation* tcIndirectBuff = nullptr; + const MVKMTLBufferAllocation* tempIndirectBuff = nullptr; const MVKMTLBufferAllocation* tcParamsBuff = nullptr; const MVKMTLBufferAllocation* vtxOutBuff = nullptr; const MVKMTLBufferAllocation* tcOutBuff = nullptr; @@ -513,7 +519,8 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) { uint32_t inControlPointCount = 0, outControlPointCount = 0; VkDeviceSize paramsIncr = 0; - VkDeviceSize mtlTCIndBuffOfst = 0; + id mtlIndBuff = _mtlIndirectBuffer; + VkDeviceSize mtlIndBuffOfst = _mtlIndirectBufferOffset; VkDeviceSize mtlParmBuffOfst = 0; NSUInteger vtxThreadExecWidth = 0; NSUInteger tcWorkgroupSize = 0; @@ -533,8 +540,9 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) { } paramsIncr = std::max((size_t)cmdEncoder->getDevice()->_pProperties->limits.minUniformBufferOffsetAlignment, sizeof(uint32_t) * 2); VkDeviceSize paramsSize = paramsIncr * _drawCount; - tcIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize); - mtlTCIndBuffOfst = tcIndirectBuff->_offset; + tempIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize); + mtlIndBuff = tempIndirectBuff->_mtlBuffer; + mtlIndBuffOfst = tempIndirectBuff->_offset; tcParamsBuff = cmdEncoder->getTempMTLBuffer(paramsSize); mtlParmBuffOfst = tcParamsBuff->_offset; if (pipeline->needsVertexOutputBuffer()) { @@ -555,31 +563,35 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) { sgSize >>= 1; tcWorkgroupSize = mvkLeastCommonMultiple(outControlPointCount, sgSize); } + } else if (needsInstanceAdjustment) { + // In this case, we need to adjust the instance count for the views being drawn. + VkDeviceSize indirectSize = sizeof(MTLDrawPrimitivesIndirectArguments) * _drawCount; + tempIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize); + mtlIndBuff = tempIndirectBuff->_mtlBuffer; + mtlIndBuffOfst = tempIndirectBuff->_offset; } MVKPiplineStages stages; pipeline->getStages(stages); - VkDeviceSize mtlIndBuffOfst = _mtlIndirectBufferOffset; - for (uint32_t drawIdx = 0; drawIdx < _drawCount; drawIdx++) { for (uint32_t s : stages) { auto stage = MVKGraphicsStage(s); id mtlTessCtlEncoder = nil; - if (drawIdx == 0 && stage == kMVKGraphicsStageVertex) { + if (drawIdx == 0 && stage == kMVKGraphicsStageVertex && pipeline->isTessellationPipeline()) { // We need the indirect buffers now. This must be done before finalizing // draw state, or the pipeline will get overridden. This is a good time // to do it, since it will require switching to compute anyway. Do it all // at once to get it over with. cmdEncoder->encodeStoreActions(true); mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl); - id mtlConvertState = cmdEncoder->getCommandEncodingPool()->getCmdDrawIndirectConvertBuffersMTLComputePipelineState(false); + id mtlConvertState = cmdEncoder->getCommandEncodingPool()->getCmdDrawIndirectTessConvertBuffersMTLComputePipelineState(false); [mtlTessCtlEncoder setComputePipelineState: mtlConvertState]; [mtlTessCtlEncoder setBuffer: _mtlIndirectBuffer offset: _mtlIndirectBufferOffset atIndex: 0]; - [mtlTessCtlEncoder setBuffer: tcIndirectBuff->_mtlBuffer - offset: tcIndirectBuff->_offset + [mtlTessCtlEncoder setBuffer: tempIndirectBuff->_mtlBuffer + offset: tempIndirectBuff->_offset atIndex: 1]; [mtlTessCtlEncoder setBuffer: tcParamsBuff->_mtlBuffer offset: tcParamsBuff->_offset @@ -617,6 +629,45 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) { [mtlTessCtlEncoder dispatchThreadgroups: MTLSizeMake(mvkCeilingDivide(_drawCount, mtlConvertState.threadExecutionWidth), 1, 1) threadsPerThreadgroup: MTLSizeMake(mtlConvertState.threadExecutionWidth, 1, 1)]; } + } else if (drawIdx == 0 && needsInstanceAdjustment) { + // Similarly, for multiview, we need to adjust the instance count now. + // Unfortunately, this requires switching to compute. + // TODO: Consider using tile shaders to avoid this cost. + cmdEncoder->encodeStoreActions(true); + id mtlConvertEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseMultiviewInstanceCountAdjust); + id mtlConvertState = cmdEncoder->getCommandEncodingPool()->getCmdDrawIndirectMultiviewConvertBuffersMTLComputePipelineState(false); + uint32_t viewCount; + [mtlConvertEncoder setComputePipelineState: mtlConvertState]; + [mtlConvertEncoder setBuffer: _mtlIndirectBuffer + offset: _mtlIndirectBufferOffset + atIndex: 0]; + [mtlConvertEncoder setBuffer: tempIndirectBuff->_mtlBuffer + offset: tempIndirectBuff->_offset + atIndex: 1]; + cmdEncoder->setComputeBytes(mtlConvertEncoder, + &_mtlIndirectBufferStride, + sizeof(_mtlIndirectBufferStride), + 2); + cmdEncoder->setComputeBytes(mtlConvertEncoder, + &_drawCount, + sizeof(_drawCount), + 3); + viewCount = cmdEncoder->getSubpass()->getViewCountInMetalPass(cmdEncoder->getMultiviewPassIndex()); + cmdEncoder->setComputeBytes(mtlConvertEncoder, + &viewCount, + sizeof(viewCount), + 4); + if (cmdEncoder->getDevice()->_pMetalFeatures->nonUniformThreadgroups) { +#if MVK_MACOS_OR_IOS + [mtlConvertEncoder dispatchThreads: MTLSizeMake(_drawCount, 1, 1) + threadsPerThreadgroup: MTLSizeMake(mtlConvertState.threadExecutionWidth, 1, 1)]; +#endif + } else { + [mtlConvertEncoder dispatchThreadgroups: MTLSizeMake(mvkCeilingDivide(_drawCount, mtlConvertState.threadExecutionWidth), 1, 1) + threadsPerThreadgroup: MTLSizeMake(mtlConvertState.threadExecutionWidth, 1, 1)]; + } + // Switch back to rendering now, since we don't have compute stages to run anyway. + cmdEncoder->beginMetalRenderPass(true); } cmdEncoder->finalizeDrawState(stage); // Ensure all updated state has been submitted to Metal @@ -625,7 +676,6 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) { switch (stage) { case kMVKGraphicsStageVertex: - cmdEncoder->encodeStoreActions(true); mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl); if (pipeline->needsVertexOutputBuffer()) { [mtlTessCtlEncoder setBuffer: vtxOutBuff->_mtlBuffer @@ -635,14 +685,14 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) { // We must assume we can read up to the maximum number of vertices. [mtlTessCtlEncoder setStageInRegion: MTLRegionMake2D(0, 0, vertexCount, vertexCount)]; if ([mtlTessCtlEncoder respondsToSelector: @selector(setStageInRegionWithIndirectBuffer:indirectBufferOffset:)]) { - [mtlTessCtlEncoder setStageInRegionWithIndirectBuffer: tcIndirectBuff->_mtlBuffer - indirectBufferOffset: mtlTCIndBuffOfst]; - mtlTCIndBuffOfst += sizeof(MTLStageInRegionIndirectArguments); + [mtlTessCtlEncoder setStageInRegionWithIndirectBuffer: mtlIndBuff + indirectBufferOffset: mtlIndBuffOfst]; + mtlIndBuffOfst += sizeof(MTLStageInRegionIndirectArguments); } - [mtlTessCtlEncoder dispatchThreadgroupsWithIndirectBuffer: tcIndirectBuff->_mtlBuffer - indirectBufferOffset: mtlTCIndBuffOfst + [mtlTessCtlEncoder dispatchThreadgroupsWithIndirectBuffer: mtlIndBuff + indirectBufferOffset: mtlIndBuffOfst threadsPerThreadgroup: MTLSizeMake(vtxThreadExecWidth, 1, 1)]; - mtlTCIndBuffOfst += sizeof(MTLDispatchThreadgroupsIndirectArguments); + mtlIndBuffOfst += sizeof(MTLDispatchThreadgroupsIndirectArguments); // Mark pipeline, resources, and tess control push constants as dirty // so I apply them during the next stage. cmdEncoder->_graphicsPipelineState.beginMetalRenderPass(); @@ -674,10 +724,10 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) { offset: vtxOutBuff->_offset atIndex: kMVKTessCtlInputBufferIndex]; } - [mtlTessCtlEncoder dispatchThreadgroupsWithIndirectBuffer: tcIndirectBuff->_mtlBuffer - indirectBufferOffset: mtlTCIndBuffOfst + [mtlTessCtlEncoder dispatchThreadgroupsWithIndirectBuffer: mtlIndBuff + indirectBufferOffset: mtlIndBuffOfst threadsPerThreadgroup: MTLSizeMake(tcWorkgroupSize, 1, 1)]; - mtlTCIndBuffOfst += sizeof(MTLDispatchThreadgroupsIndirectArguments); + mtlIndBuffOfst += sizeof(MTLDispatchThreadgroupsIndirectArguments); // Running this stage prematurely ended the render pass, so we have to start it up again. // TODO: On iOS, maybe we could use a tile shader to avoid this. cmdEncoder->beginMetalRenderPass(true); @@ -705,22 +755,22 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) { [cmdEncoder->_mtlRenderEncoder drawPatches: outControlPointCount patchIndexBuffer: nil patchIndexBufferOffset: 0 - indirectBuffer: tcIndirectBuff->_mtlBuffer - indirectBufferOffset: mtlTCIndBuffOfst]; + indirectBuffer: mtlIndBuff + indirectBufferOffset: mtlIndBuffOfst]; #endif } - mtlTCIndBuffOfst += sizeof(MTLDrawPatchIndirectArguments); - // Mark pipeline, resources, and tess control push constants as dirty + mtlIndBuffOfst += sizeof(MTLDrawPatchIndirectArguments); + // Mark pipeline, resources, and vertex push constants as dirty // so I apply them during the next stage. cmdEncoder->_graphicsPipelineState.beginMetalRenderPass(); cmdEncoder->_graphicsResourcesState.beginMetalRenderPass(); cmdEncoder->getPushConstants(VK_SHADER_STAGE_VERTEX_BIT)->beginMetalRenderPass(); } else { [cmdEncoder->_mtlRenderEncoder drawPrimitives: cmdEncoder->_mtlPrimitiveType - indirectBuffer: _mtlIndirectBuffer + indirectBuffer: mtlIndBuff indirectBufferOffset: mtlIndBuffOfst]; - mtlIndBuffOfst += _mtlIndirectBufferStride; + mtlIndBuffOfst += needsInstanceAdjustment ? sizeof(MTLDrawPrimitivesIndirectArguments) : _mtlIndirectBufferStride; } break; } @@ -759,11 +809,13 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder) { MVKIndexMTLBufferBinding& ibb = cmdEncoder->_graphicsResourcesState._mtlIndexBufferBinding; auto* pipeline = (MVKGraphicsPipeline*)cmdEncoder->_graphicsPipelineState.getPipeline(); + bool needsInstanceAdjustment = cmdEncoder->getSubpass()->isMultiview() && + cmdEncoder->getDevice()->getPhysicalDevice()->canUseInstancingForMultiview(); // The indirect calls for dispatchThreadgroups:... and drawPatches:... have different formats. // We have to convert from the drawIndexedPrimitives:... format to them. // While we're at it, we can create the temporary output buffers once and reuse them // for each draw. - const MVKMTLBufferAllocation* tcIndirectBuff = nullptr; + const MVKMTLBufferAllocation* tempIndirectBuff = nullptr; const MVKMTLBufferAllocation* tcParamsBuff = nullptr; const MVKMTLBufferAllocation* vtxOutBuff = nullptr; const MVKMTLBufferAllocation* tcOutBuff = nullptr; @@ -774,7 +826,9 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder) { uint32_t inControlPointCount = 0, outControlPointCount = 0; VkDeviceSize paramsIncr = 0; - VkDeviceSize mtlTCIndBuffOfst = 0; + id mtlIndBuff = _mtlIndirectBuffer; + VkDeviceSize mtlIndBuffOfst = _mtlIndirectBufferOffset; + VkDeviceSize mtlTempIndBuffOfst = _mtlIndirectBufferOffset; VkDeviceSize mtlParmBuffOfst = 0; NSUInteger vtxThreadExecWidth = 0; NSUInteger tcWorkgroupSize = 0; @@ -794,9 +848,10 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder) { } paramsIncr = std::max((size_t)cmdEncoder->getDevice()->_pProperties->limits.minUniformBufferOffsetAlignment, sizeof(uint32_t) * 2); VkDeviceSize paramsSize = paramsIncr * _drawCount; - tcIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize); - mtlTCIndBuffOfst = tcIndirectBuff->_offset; - tcParamsBuff = cmdEncoder->getTempMTLBuffer(paramsSize); + tempIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize); + mtlIndBuff = tempIndirectBuff->_mtlBuffer; + mtlTempIndBuffOfst = tempIndirectBuff->_offset; + tcParamsBuff = cmdEncoder->getTempMTLBuffer(paramsSize); mtlParmBuffOfst = tcParamsBuff->_offset; if (pipeline->needsVertexOutputBuffer()) { vtxOutBuff = cmdEncoder->getTempMTLBuffer(vertexCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents); @@ -820,18 +875,22 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder) { sgSize >>= 1; tcWorkgroupSize = mvkLeastCommonMultiple(outControlPointCount, sgSize); } + } else if (needsInstanceAdjustment) { + // In this case, we need to adjust the instance count for the views being drawn. + VkDeviceSize indirectSize = sizeof(MTLDrawIndexedPrimitivesIndirectArguments) * _drawCount; + tempIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize); + mtlIndBuff = tempIndirectBuff->_mtlBuffer; + mtlTempIndBuffOfst = tempIndirectBuff->_offset; } MVKPiplineStages stages; pipeline->getStages(stages); - VkDeviceSize mtlIndBuffOfst = _mtlIndirectBufferOffset; - for (uint32_t drawIdx = 0; drawIdx < _drawCount; drawIdx++) { for (uint32_t s : stages) { auto stage = MVKGraphicsStage(s); id mtlTessCtlEncoder = nil; - if (stage == kMVKGraphicsStageVertex) { + if (stage == kMVKGraphicsStageVertex && pipeline->isTessellationPipeline()) { cmdEncoder->encodeStoreActions(true); mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl); // We need the indirect buffers now. This must be done before finalizing @@ -839,13 +898,13 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder) { // to do it, since it will require switching to compute anyway. Do it all // at once to get it over with. if (drawIdx == 0) { - id mtlConvertState = cmdEncoder->getCommandEncodingPool()->getCmdDrawIndirectConvertBuffersMTLComputePipelineState(true); + id mtlConvertState = cmdEncoder->getCommandEncodingPool()->getCmdDrawIndirectTessConvertBuffersMTLComputePipelineState(true); [mtlTessCtlEncoder setComputePipelineState: mtlConvertState]; [mtlTessCtlEncoder setBuffer: _mtlIndirectBuffer offset: _mtlIndirectBufferOffset atIndex: 0]; - [mtlTessCtlEncoder setBuffer: tcIndirectBuff->_mtlBuffer - offset: tcIndirectBuff->_offset + [mtlTessCtlEncoder setBuffer: tempIndirectBuff->_mtlBuffer + offset: tempIndirectBuff->_offset atIndex: 1]; [mtlTessCtlEncoder setBuffer: tcParamsBuff->_mtlBuffer offset: tcParamsBuff->_offset @@ -891,10 +950,50 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder) { [mtlTessCtlEncoder setBuffer: _mtlIndirectBuffer offset: mtlIndBuffOfst atIndex: 2]; - [mtlTessCtlEncoder dispatchThreadgroupsWithIndirectBuffer: tcIndirectBuff->_mtlBuffer - indirectBufferOffset: mtlTCIndBuffOfst + [mtlTessCtlEncoder dispatchThreadgroupsWithIndirectBuffer: mtlIndBuff + indirectBufferOffset: mtlTempIndBuffOfst threadsPerThreadgroup: MTLSizeMake(vtxThreadExecWidth, 1, 1)]; mtlIndBuffOfst += sizeof(MTLDrawIndexedPrimitivesIndirectArguments); + } else if (drawIdx == 0 && needsInstanceAdjustment) { + // Similarly, for multiview, we need to adjust the instance count now. + // Unfortunately, this requires switching to compute. Luckily, we don't also + // have to copy the index buffer. + // TODO: Consider using tile shaders to avoid this cost. + cmdEncoder->encodeStoreActions(true); + id mtlConvertEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseMultiviewInstanceCountAdjust); + id mtlConvertState = cmdEncoder->getCommandEncodingPool()->getCmdDrawIndirectMultiviewConvertBuffersMTLComputePipelineState(true); + uint32_t viewCount; + [mtlConvertEncoder setComputePipelineState: mtlConvertState]; + [mtlConvertEncoder setBuffer: _mtlIndirectBuffer + offset: _mtlIndirectBufferOffset + atIndex: 0]; + [mtlConvertEncoder setBuffer: tempIndirectBuff->_mtlBuffer + offset: tempIndirectBuff->_offset + atIndex: 1]; + cmdEncoder->setComputeBytes(mtlConvertEncoder, + &_mtlIndirectBufferStride, + sizeof(_mtlIndirectBufferStride), + 2); + cmdEncoder->setComputeBytes(mtlConvertEncoder, + &_drawCount, + sizeof(_drawCount), + 3); + viewCount = cmdEncoder->getSubpass()->getViewCountInMetalPass(cmdEncoder->getMultiviewPassIndex()); + cmdEncoder->setComputeBytes(mtlConvertEncoder, + &viewCount, + sizeof(viewCount), + 4); + if (cmdEncoder->getDevice()->_pMetalFeatures->nonUniformThreadgroups) { +#if MVK_MACOS_OR_IOS + [mtlConvertEncoder dispatchThreads: MTLSizeMake(_drawCount, 1, 1) + threadsPerThreadgroup: MTLSizeMake(mtlConvertState.threadExecutionWidth, 1, 1)]; +#endif + } else { + [mtlConvertEncoder dispatchThreadgroups: MTLSizeMake(mvkCeilingDivide(_drawCount, mtlConvertState.threadExecutionWidth), 1, 1) + threadsPerThreadgroup: MTLSizeMake(mtlConvertState.threadExecutionWidth, 1, 1)]; + } + // Switch back to rendering now, since we don't have compute stages to run anyway. + cmdEncoder->beginMetalRenderPass(true); } cmdEncoder->finalizeDrawState(stage); // Ensure all updated state has been submitted to Metal @@ -903,7 +1002,6 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder) { switch (stage) { case kMVKGraphicsStageVertex: - cmdEncoder->encodeStoreActions(true); mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl); if (pipeline->needsVertexOutputBuffer()) { [mtlTessCtlEncoder setBuffer: vtxOutBuff->_mtlBuffer @@ -915,14 +1013,14 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder) { atIndex: pipeline->getIndirectParamsIndex().stages[kMVKShaderStageVertex]]; [mtlTessCtlEncoder setStageInRegion: MTLRegionMake2D(0, 0, vertexCount, vertexCount)]; if ([mtlTessCtlEncoder respondsToSelector: @selector(setStageInRegionWithIndirectBuffer:indirectBufferOffset:)]) { - [mtlTessCtlEncoder setStageInRegionWithIndirectBuffer: tcIndirectBuff->_mtlBuffer - indirectBufferOffset: mtlTCIndBuffOfst]; - mtlTCIndBuffOfst += sizeof(MTLStageInRegionIndirectArguments); + [mtlTessCtlEncoder setStageInRegionWithIndirectBuffer: mtlIndBuff + indirectBufferOffset: mtlTempIndBuffOfst]; + mtlTempIndBuffOfst += sizeof(MTLStageInRegionIndirectArguments); } - [mtlTessCtlEncoder dispatchThreadgroupsWithIndirectBuffer: tcIndirectBuff->_mtlBuffer - indirectBufferOffset: mtlTCIndBuffOfst + [mtlTessCtlEncoder dispatchThreadgroupsWithIndirectBuffer: mtlIndBuff + indirectBufferOffset: mtlTempIndBuffOfst threadsPerThreadgroup: MTLSizeMake(vtxThreadExecWidth, 1, 1)]; - mtlTCIndBuffOfst += sizeof(MTLDispatchThreadgroupsIndirectArguments); + mtlTempIndBuffOfst += sizeof(MTLDispatchThreadgroupsIndirectArguments); // Mark pipeline, resources, and tess control push constants as dirty // so I apply them during the next stage. cmdEncoder->_graphicsPipelineState.beginMetalRenderPass(); @@ -954,10 +1052,10 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder) { offset: vtxOutBuff->_offset atIndex: kMVKTessCtlInputBufferIndex]; } - [mtlTessCtlEncoder dispatchThreadgroupsWithIndirectBuffer: tcIndirectBuff->_mtlBuffer - indirectBufferOffset: mtlTCIndBuffOfst + [mtlTessCtlEncoder dispatchThreadgroupsWithIndirectBuffer: mtlIndBuff + indirectBufferOffset: mtlTempIndBuffOfst threadsPerThreadgroup: MTLSizeMake(tcWorkgroupSize, 1, 1)]; - mtlTCIndBuffOfst += sizeof(MTLDispatchThreadgroupsIndirectArguments); + mtlTempIndBuffOfst += sizeof(MTLDispatchThreadgroupsIndirectArguments); // Running this stage prematurely ended the render pass, so we have to start it up again. // TODO: On iOS, maybe we could use a tile shader to avoid this. cmdEncoder->beginMetalRenderPass(true); @@ -985,12 +1083,12 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder) { [cmdEncoder->_mtlRenderEncoder drawPatches: outControlPointCount patchIndexBuffer: nil patchIndexBufferOffset: 0 - indirectBuffer: tcIndirectBuff->_mtlBuffer - indirectBufferOffset: mtlTCIndBuffOfst]; + indirectBuffer: mtlIndBuff + indirectBufferOffset: mtlTempIndBuffOfst]; #endif } - mtlTCIndBuffOfst += sizeof(MTLDrawPatchIndirectArguments); + mtlTempIndBuffOfst += sizeof(MTLDrawPatchIndirectArguments); // Mark pipeline, resources, and tess control push constants as dirty // so I apply them during the next stage. cmdEncoder->_graphicsPipelineState.beginMetalRenderPass(); @@ -1001,9 +1099,9 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder) { indexType: (MTLIndexType)ibb.mtlIndexType indexBuffer: ibb.mtlBuffer indexBufferOffset: ibb.offset - indirectBuffer: _mtlIndirectBuffer - indirectBufferOffset: mtlIndBuffOfst]; - mtlIndBuffOfst += _mtlIndirectBufferStride; + indirectBuffer: mtlIndBuff + indirectBufferOffset: mtlTempIndBuffOfst]; + mtlTempIndBuffOfst += needsInstanceAdjustment ? sizeof(MTLDrawIndexedPrimitivesIndirectArguments) : _mtlIndirectBufferStride; } break; } diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdQueries.mm b/MoltenVK/MoltenVK/Commands/MVKCmdQueries.mm index f5360acb..b8de9319 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdQueries.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCmdQueries.mm @@ -52,7 +52,13 @@ VkResult MVKCmdBeginQuery::setContent(MVKCommandBuffer* cmdBuff, } void MVKCmdBeginQuery::encode(MVKCommandEncoder* cmdEncoder) { - _queryPool->beginQuery(_query, _flags, cmdEncoder); + // In a multiview render pass, multiple queries are produced, one for each view. + // Therefore, when encoding, we must offset the query by the number of views already + // drawn in all previous Metal passes. + uint32_t query = _query; + if (cmdEncoder->getMultiviewPassIndex() > 0) + query += cmdEncoder->getSubpass()->getViewCountUpToMetalPass(cmdEncoder->getMultiviewPassIndex() - 1); + _queryPool->beginQuery(query, _flags, cmdEncoder); } @@ -60,7 +66,10 @@ void MVKCmdBeginQuery::encode(MVKCommandEncoder* cmdEncoder) { #pragma mark MVKCmdEndQuery void MVKCmdEndQuery::encode(MVKCommandEncoder* cmdEncoder) { - _queryPool->endQuery(_query, cmdEncoder); + uint32_t query = _query; + if (cmdEncoder->getMultiviewPassIndex() > 0) + query += cmdEncoder->getSubpass()->getViewCountUpToMetalPass(cmdEncoder->getMultiviewPassIndex() - 1); + _queryPool->endQuery(query, cmdEncoder); } @@ -80,7 +89,10 @@ VkResult MVKCmdWriteTimestamp::setContent(MVKCommandBuffer* cmdBuff, } void MVKCmdWriteTimestamp::encode(MVKCommandEncoder* cmdEncoder) { - cmdEncoder->markTimestamp(_queryPool, _query); + uint32_t query = _query; + if (cmdEncoder->getMultiviewPassIndex() > 0) + query += cmdEncoder->getSubpass()->getViewCountUpToMetalPass(cmdEncoder->getMultiviewPassIndex() - 1); + cmdEncoder->markTimestamp(_queryPool, query); } diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdRenderPass.h b/MoltenVK/MoltenVK/Commands/MVKCmdRenderPass.h index 76573a6b..1dd2ea5a 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdRenderPass.h +++ b/MoltenVK/MoltenVK/Commands/MVKCmdRenderPass.h @@ -28,6 +28,31 @@ class MVKRenderPass; class MVKFramebuffer; +#pragma mark - +#pragma mark MVKCmdBeginRenderPassBase + +/** + * Abstract base class of MVKCmdBeginRenderPass. + * Contains all pieces that are independent of the templated portions. + */ +class MVKCmdBeginRenderPassBase : public MVKCommand { + +public: + VkResult setContent(MVKCommandBuffer* cmdBuff, + const VkRenderPassBeginInfo* pRenderPassBegin, + VkSubpassContents contents); + + inline MVKRenderPass* getRenderPass() { return _renderPass; } + +protected: + + MVKRenderPass* _renderPass; + MVKFramebuffer* _framebuffer; + VkRect2D _renderArea; + VkSubpassContents _contents; +}; + + #pragma mark - #pragma mark MVKCmdBeginRenderPass @@ -36,12 +61,15 @@ class MVKFramebuffer; * Template class to balance vector pre-allocations between very common low counts and fewer larger counts. */ template -class MVKCmdBeginRenderPass : public MVKCommand { +class MVKCmdBeginRenderPass : public MVKCmdBeginRenderPassBase { public: VkResult setContent(MVKCommandBuffer* cmdBuff, const VkRenderPassBeginInfo* pRenderPassBegin, VkSubpassContents contents); + VkResult setContent(MVKCommandBuffer* cmdBuff, + const VkRenderPassBeginInfo* pRenderPassBegin, + const VkSubpassBeginInfo* pSubpassBeginInfo); void encode(MVKCommandEncoder* cmdEncoder) override; @@ -49,10 +77,6 @@ protected: MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; MVKSmallVector _clearValues; - MVKRenderPass* _renderPass; - MVKFramebuffer* _framebuffer; - VkRect2D _renderArea; - VkSubpassContents _contents; }; // Concrete template class implementations. @@ -70,6 +94,9 @@ class MVKCmdNextSubpass : public MVKCommand { public: VkResult setContent(MVKCommandBuffer* cmdBuff, VkSubpassContents contents); + VkResult setContent(MVKCommandBuffer* cmdBuff, + const VkSubpassBeginInfo* pSubpassBeginInfo, + const VkSubpassEndInfo* pSubpassEndInfo); void encode(MVKCommandEncoder* cmdEncoder) override; @@ -88,6 +115,8 @@ class MVKCmdEndRenderPass : public MVKCommand { public: VkResult setContent(MVKCommandBuffer* cmdBuff); + VkResult setContent(MVKCommandBuffer* cmdBuff, + const VkSubpassEndInfo* pSubpassEndInfo); void encode(MVKCommandEncoder* cmdEncoder) override; diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdRenderPass.mm b/MoltenVK/MoltenVK/Commands/MVKCmdRenderPass.mm index a0666c68..2e1e5ad1 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdRenderPass.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCmdRenderPass.mm @@ -25,6 +25,21 @@ #include "mvk_datatypes.hpp" +#pragma mark - +#pragma mark MVKCmdBeginRenderPassBase + +VkResult MVKCmdBeginRenderPassBase::setContent(MVKCommandBuffer* cmdBuff, + const VkRenderPassBeginInfo* pRenderPassBegin, + VkSubpassContents contents) { + _contents = contents; + _renderPass = (MVKRenderPass*)pRenderPassBegin->renderPass; + _framebuffer = (MVKFramebuffer*)pRenderPassBegin->framebuffer; + _renderArea = pRenderPassBegin->renderArea; + + return VK_SUCCESS; +} + + #pragma mark - #pragma mark MVKCmdBeginRenderPass @@ -32,10 +47,7 @@ template VkResult MVKCmdBeginRenderPass::setContent(MVKCommandBuffer* cmdBuff, const VkRenderPassBeginInfo* pRenderPassBegin, VkSubpassContents contents) { - _contents = contents; - _renderPass = (MVKRenderPass*)pRenderPassBegin->renderPass; - _framebuffer = (MVKFramebuffer*)pRenderPassBegin->framebuffer; - _renderArea = pRenderPassBegin->renderArea; + MVKCmdBeginRenderPassBase::setContent(cmdBuff, pRenderPassBegin, contents); // Add clear values uint32_t cvCnt = pRenderPassBegin->clearValueCount; @@ -48,10 +60,17 @@ VkResult MVKCmdBeginRenderPass::setContent(MVKCommandBuffer* cmdBuff, return VK_SUCCESS; } +template +VkResult MVKCmdBeginRenderPass::setContent(MVKCommandBuffer* cmdBuff, + const VkRenderPassBeginInfo* pRenderPassBegin, + const VkSubpassBeginInfo* pSubpassBeginInfo) { + return setContent(cmdBuff, pRenderPassBegin, pSubpassBeginInfo->contents); +} + template void MVKCmdBeginRenderPass::encode(MVKCommandEncoder* cmdEncoder) { // MVKLogDebug("Encoding vkCmdBeginRenderPass(). Elapsed time: %.6f ms.", mvkGetElapsedMilliseconds()); - cmdEncoder->beginRenderpass(_contents, _renderPass, _framebuffer, _renderArea, _clearValues.contents()); + cmdEncoder->beginRenderpass(this, _contents, _renderPass, _framebuffer, _renderArea, _clearValues.contents()); } template class MVKCmdBeginRenderPass<1>; @@ -69,8 +88,17 @@ VkResult MVKCmdNextSubpass::setContent(MVKCommandBuffer* cmdBuff, return VK_SUCCESS; } +VkResult MVKCmdNextSubpass::setContent(MVKCommandBuffer* cmdBuff, + const VkSubpassBeginInfo* pBeginSubpassInfo, + const VkSubpassEndInfo* pEndSubpassInfo) { + return setContent(cmdBuff, pBeginSubpassInfo->contents); +} + void MVKCmdNextSubpass::encode(MVKCommandEncoder* cmdEncoder) { - cmdEncoder->beginNextSubpass(_contents); + if (cmdEncoder->getMultiviewPassIndex() + 1 < cmdEncoder->getSubpass()->getMultiviewMetalPassCount()) + cmdEncoder->beginNextMultiviewPass(); + else + cmdEncoder->beginNextSubpass(this, _contents); } @@ -81,9 +109,17 @@ VkResult MVKCmdEndRenderPass::setContent(MVKCommandBuffer* cmdBuff) { return VK_SUCCESS; } +VkResult MVKCmdEndRenderPass::setContent(MVKCommandBuffer* cmdBuff, + const VkSubpassEndInfo* pEndSubpassInfo) { + return VK_SUCCESS; +} + void MVKCmdEndRenderPass::encode(MVKCommandEncoder* cmdEncoder) { // MVKLogDebug("Encoding vkCmdEndRenderPass(). Elapsed time: %.6f ms.", mvkGetElapsedMilliseconds()); - cmdEncoder->endRenderpass(); + if (cmdEncoder->getMultiviewPassIndex() + 1 < cmdEncoder->getSubpass()->getMultiviewMetalPassCount()) + cmdEncoder->beginNextMultiviewPass(); + else + cmdEncoder->endRenderpass(); } @@ -100,6 +136,7 @@ VkResult MVKCmdExecuteCommands::setContent(MVKCommandBuffer* cmdBuff, for (uint32_t cbIdx = 0; cbIdx < commandBuffersCount; cbIdx++) { _secondaryCommandBuffers.push_back(MVKCommandBuffer::getMVKCommandBuffer(pCommandBuffers[cbIdx])); } + cmdBuff->recordExecuteCommands(_secondaryCommandBuffers.contents()); return VK_SUCCESS; } diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.h b/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.h index 7e9d9113..4bc8b115 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.h +++ b/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.h @@ -254,10 +254,12 @@ public: void encode(MVKCommandEncoder* cmdEncoder) override; protected: - uint32_t getVertexCount(); - void populateVertices(simd::float4* vertices, float attWidth, float attHeight); - uint32_t populateVertices(simd::float4* vertices, uint32_t startVertex, - VkClearRect& clearRect, float attWidth, float attHeight); + uint32_t getVertexCount(MVKCommandEncoder* cmdEncoder); + void populateVertices(MVKCommandEncoder* cmdEncoder, simd::float4* vertices, + float attWidth, float attHeight); + uint32_t populateVertices(MVKCommandEncoder* cmdEncoder, simd::float4* vertices, + uint32_t startVertex, VkClearRect& clearRect, + float attWidth, float attHeight); virtual VkClearValue& getClearValue(uint32_t attIdx) = 0; virtual void setClearValue(uint32_t attIdx, const VkClearValue& clearValue) = 0; diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm b/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm index 84d56bcd..f494748e 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm @@ -124,11 +124,18 @@ void MVKCmdCopyImage::encode(MVKCommandEncoder* cmdEncoder, MVKCommandUse com // Extent is provided in source texels. If the source is compressed but the // destination is not, each destination pixel will consume an entire source block, // so we must downscale the destination extent by the size of the source block. + // Likewise if the destination is compressed and source is not, each source pixel + // will map to a block of pixels in the destination texture, and we need to + // adjust destination's extent accordingly. VkExtent3D dstExtent = vkIC.extent; if (isSrcCompressed && !isDstCompressed) { VkExtent2D srcBlockExtent = pixFmts->getBlockTexelSize(srcMTLPixFmt); dstExtent.width /= srcBlockExtent.width; dstExtent.height /= srcBlockExtent.height; + } else if (!isSrcCompressed && isDstCompressed) { + VkExtent2D dstBlockExtent = pixFmts->getBlockTexelSize(dstMTLPixFmt); + dstExtent.width *= dstBlockExtent.width; + dstExtent.height *= dstBlockExtent.height; } auto& dstCpy = vkDstCopies[copyIdx]; dstCpy.bufferOffset = tmpBuffSize; @@ -948,27 +955,34 @@ VkResult MVKCmdClearAttachments::setContent(MVKCommandBuffer* cmdBuff, // Returns the total number of vertices needed to clear all layers of all rectangles. template -uint32_t MVKCmdClearAttachments::getVertexCount() { +uint32_t MVKCmdClearAttachments::getVertexCount(MVKCommandEncoder* cmdEncoder) { uint32_t vtxCnt = 0; - for (auto& rect : _clearRects) { - vtxCnt += 6 * rect.layerCount; + if (cmdEncoder->getSubpass()->isMultiview()) { + // In this case, all the layer counts will be one. We want to use the number of views in the current multiview pass. + vtxCnt = (uint32_t)_clearRects.size() * cmdEncoder->getSubpass()->getViewCountInMetalPass(cmdEncoder->getMultiviewPassIndex()) * 6; + } else { + for (auto& rect : _clearRects) { + vtxCnt += 6 * rect.layerCount; + } } return vtxCnt; } // Populates the vertices for all clear rectangles within an attachment of the specified size. template -void MVKCmdClearAttachments::populateVertices(simd::float4* vertices, float attWidth, float attHeight) { +void MVKCmdClearAttachments::populateVertices(MVKCommandEncoder* cmdEncoder, simd::float4* vertices, + float attWidth, float attHeight) { uint32_t vtxIdx = 0; for (auto& rect : _clearRects) { - vtxIdx = populateVertices(vertices, vtxIdx, rect, attWidth, attHeight); + vtxIdx = populateVertices(cmdEncoder, vertices, vtxIdx, rect, attWidth, attHeight); } } // Populates the vertices, starting at the vertex, from the specified rectangle within // an attachment of the specified size. Returns the next vertex that needs to be populated. template -uint32_t MVKCmdClearAttachments::populateVertices(simd::float4* vertices, +uint32_t MVKCmdClearAttachments::populateVertices(MVKCommandEncoder* cmdEncoder, + simd::float4* vertices, uint32_t startVertex, VkClearRect& clearRect, float attWidth, @@ -990,8 +1004,17 @@ uint32_t MVKCmdClearAttachments::populateVertices(simd::float4* vertices, simd::float4 vtx; uint32_t vtxIdx = startVertex; - uint32_t startLayer = clearRect.baseArrayLayer; - uint32_t endLayer = startLayer + clearRect.layerCount; + uint32_t startLayer, endLayer; + if (cmdEncoder->getSubpass()->isMultiview()) { + // In a multiview pass, the baseArrayLayer will be 0 and the layerCount will be 1. + // Use the view count instead. We already set the base slice properly in the + // MTLRenderPassDescriptor, so we don't need to offset the starting layer. + startLayer = 0; + endLayer = cmdEncoder->getSubpass()->getViewCountInMetalPass(cmdEncoder->getMultiviewPassIndex()); + } else { + startLayer = clearRect.baseArrayLayer; + endLayer = startLayer + clearRect.layerCount; + } for (uint32_t layer = startLayer; layer < endLayer; layer++) { vtx.z = 0.0; @@ -1032,12 +1055,12 @@ uint32_t MVKCmdClearAttachments::populateVertices(simd::float4* vertices, template void MVKCmdClearAttachments::encode(MVKCommandEncoder* cmdEncoder) { - uint32_t vtxCnt = getVertexCount(); + uint32_t vtxCnt = getVertexCount(cmdEncoder); simd::float4 vertices[vtxCnt]; simd::float4 clearColors[kMVKClearAttachmentCount]; VkExtent2D fbExtent = cmdEncoder->_framebuffer->getExtent2D(); - populateVertices(vertices, fbExtent.width, fbExtent.height); + populateVertices(cmdEncoder, vertices, fbExtent.width, fbExtent.height); MVKPixelFormats* pixFmts = cmdEncoder->getPixelFormats(); MVKRenderSubpass* subpass = cmdEncoder->getSubpass(); @@ -1045,7 +1068,10 @@ void MVKCmdClearAttachments::encode(MVKCommandEncoder* cmdEncoder) { // Populate the render pipeline state attachment key with info from the subpass and framebuffer. _rpsKey.mtlSampleCount = mvkSampleCountFromVkSampleCountFlagBits(subpass->getSampleCount()); - if (cmdEncoder->_canUseLayeredRendering && cmdEncoder->_framebuffer->getLayerCount() > 1) { _rpsKey.enableLayeredRendering(); } + if (cmdEncoder->_canUseLayeredRendering && + (cmdEncoder->_framebuffer->getLayerCount() > 1 || cmdEncoder->getSubpass()->isMultiview())) { + _rpsKey.enableLayeredRendering(); + } uint32_t caCnt = subpass->getColorAttachmentCount(); for (uint32_t caIdx = 0; caIdx < caCnt; caIdx++) { diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h index a1957ea6..9002d59e 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h +++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h @@ -33,6 +33,8 @@ class MVKQueue; class MVKQueueCommandBufferSubmission; class MVKCommandEncoder; class MVKCommandEncodingPool; +class MVKCmdBeginRenderPassBase; +class MVKCmdNextSubpass; class MVKRenderPass; class MVKFramebuffer; class MVKRenderSubpass; @@ -95,6 +97,8 @@ public: */ id _initialVisibilityResultMTLBuffer; + /** Called when a MVKCmdExecuteCommands is added to this command buffer. */ + void recordExecuteCommands(const MVKArrayRef secondaryCommandBuffers); #pragma mark Tessellation constituent command management @@ -105,6 +109,24 @@ public: MVKCmdBindPipeline* _lastTessellationPipeline; +#pragma mark Multiview render pass command management + + /** Update the last recorded multiview render pass */ + void recordBeginRenderPass(MVKCmdBeginRenderPassBase* mvkBeginRenderPass); + + /** Update the last recorded multiview subpass */ + void recordNextSubpass(); + + /** Forget the last recorded multiview render pass */ + void recordEndRenderPass(); + + /** The most recent recorded multiview render subpass */ + MVKRenderSubpass* _lastMultiviewSubpass; + + /** Returns the currently active multiview render subpass, even for secondary command buffers */ + MVKRenderSubpass* getLastMultiviewSubpass(); + + #pragma mark Construction MVKCommandBuffer(MVKDevice* device) : MVKDeviceTrackingMixin(device) {} @@ -249,14 +271,18 @@ public: void encodeSecondary(MVKCommandBuffer* secondaryCmdBuffer); /** Begins a render pass and establishes initial draw state. */ - void beginRenderpass(VkSubpassContents subpassContents, + void beginRenderpass(MVKCommand* passCmd, + VkSubpassContents subpassContents, MVKRenderPass* renderPass, MVKFramebuffer* framebuffer, VkRect2D& renderArea, MVKArrayRef clearValues); /** Begins the next render subpass. */ - void beginNextSubpass(VkSubpassContents renderpassContents); + void beginNextSubpass(MVKCommand* subpassCmd, VkSubpassContents renderpassContents); + + /** Begins the next multiview Metal render pass. */ + void beginNextMultiviewPass(); /** Begins a Metal render pass for the current render subpass. */ void beginMetalRenderPass(bool loadOverride = false); @@ -267,6 +293,9 @@ public: /** Returns the render subpass that is currently active. */ MVKRenderSubpass* getSubpass(); + /** Returns the index of the currently active multiview subpass, or zero if the current render pass is not multiview. */ + uint32_t getMultiviewPassIndex(); + /** Binds a pipeline to a bind point. */ void bindPipeline(VkPipelineBindPoint pipelineBindPoint, MVKPipeline* pipeline); @@ -428,14 +457,16 @@ public: protected: void addActivatedQuery(MVKQueryPool* pQueryPool, uint32_t query); void finishQueries(); - void setSubpass(VkSubpassContents subpassContents, uint32_t subpassIndex); + void setSubpass(MVKCommand* passCmd, VkSubpassContents subpassContents, uint32_t subpassIndex); void clearRenderArea(); const MVKMTLBufferAllocation* copyToTempMTLBufferAllocation(const void* bytes, NSUInteger length); NSString* getMTLRenderCommandEncoderName(); VkSubpassContents _subpassContents; MVKRenderPass* _renderPass; + MVKCommand* _lastMultiviewPassCmd; uint32_t _renderSubpassIndex; + uint32_t _multiviewPassIndex; VkRect2D _renderArea; MVKActivatedQueries* _pActivatedQueries; MVKSmallVector _clearValues; diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm index ef1cf0d9..24b65a40 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm @@ -26,6 +26,7 @@ #include "MVKLogging.h" #include "MTLRenderPassDescriptor+MoltenVK.h" #include "MVKCmdDraw.h" +#include "MVKCmdRenderPass.h" using namespace std; @@ -76,6 +77,7 @@ VkResult MVKCommandBuffer::reset(VkCommandBufferResetFlags flags) { _commandCount = 0; _initialVisibilityResultMTLBuffer = nil; // not retained _lastTessellationPipeline = nullptr; + _lastMultiviewSubpass = nullptr; setConfigurationResult(VK_NOT_READY); if (mvkAreAllFlagsEnabled(flags, VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT)) { @@ -193,6 +195,19 @@ MVKCommandBuffer::~MVKCommandBuffer() { reset(0); } +// If the initial visibility result buffer has not been set, promote the first visibility result buffer +// found among any of the secondary command buffers, to support the case where a render pass is started in +// the primary command buffer but the visibility query is started inside one of the secondary command buffers. +void MVKCommandBuffer::recordExecuteCommands(const MVKArrayRef secondaryCommandBuffers) { + if (_initialVisibilityResultMTLBuffer == nil) { + for (MVKCommandBuffer* cmdBuff : secondaryCommandBuffers) { + if (cmdBuff->_initialVisibilityResultMTLBuffer) { + _initialVisibilityResultMTLBuffer = cmdBuff->_initialVisibilityResultMTLBuffer; + break; + } + } + } +} #pragma mark - #pragma mark Tessellation constituent command management @@ -202,12 +217,41 @@ void MVKCommandBuffer::recordBindPipeline(MVKCmdBindPipeline* mvkBindPipeline) { } +#pragma mark - +#pragma mark Multiview render pass command management + +void MVKCommandBuffer::recordBeginRenderPass(MVKCmdBeginRenderPassBase* mvkBeginRenderPass) { + MVKRenderPass* mvkRendPass = mvkBeginRenderPass->getRenderPass(); + _lastMultiviewSubpass = mvkRendPass->isMultiview() ? mvkRendPass->getSubpass(0) : nullptr; +} + +void MVKCommandBuffer::recordNextSubpass() { + if (_lastMultiviewSubpass) { + _lastMultiviewSubpass = _lastMultiviewSubpass->getRenderPass()->getSubpass(_lastMultiviewSubpass->getSubpassIndex() + 1); + } +} + +void MVKCommandBuffer::recordEndRenderPass() { + _lastMultiviewSubpass = nullptr; +} + +MVKRenderSubpass* MVKCommandBuffer::getLastMultiviewSubpass() { + if (_doesContinueRenderPass) { + MVKRenderSubpass* subpass = ((MVKRenderPass*)_secondaryInheritanceInfo.renderPass)->getSubpass(_secondaryInheritanceInfo.subpass); + if (subpass->isMultiview()) { return subpass; } + } + return _lastMultiviewSubpass; +} + + #pragma mark - #pragma mark MVKCommandEncoder void MVKCommandEncoder::encode(id mtlCmdBuff) { + _renderPass = nullptr; _subpassContents = VK_SUBPASS_CONTENTS_INLINE; _renderSubpassIndex = 0; + _multiviewPassIndex = 0; _canUseLayeredRendering = false; _mtlCmdBuffer = mtlCmdBuff; // not retained @@ -216,8 +260,15 @@ void MVKCommandEncoder::encode(id mtlCmdBuff) { MVKCommand* cmd = _cmdBuffer->_head; while (cmd) { + uint32_t prevMVPassIdx = _multiviewPassIndex; cmd->encode(this); - cmd = cmd->_next; + if (_multiviewPassIndex > prevMVPassIdx) { + // This means we're in a multiview render pass, and we moved on to the + // next view group. Re-encode all commands in the subpass again for this group. + cmd = _lastMultiviewPassCmd->_next; + } else { + cmd = cmd->_next; + } } endCurrentMetalEncoding(); @@ -232,7 +283,8 @@ void MVKCommandEncoder::encodeSecondary(MVKCommandBuffer* secondaryCmdBuffer) { } } -void MVKCommandEncoder::beginRenderpass(VkSubpassContents subpassContents, +void MVKCommandEncoder::beginRenderpass(MVKCommand* passCmd, + VkSubpassContents subpassContents, MVKRenderPass* renderPass, MVKFramebuffer* framebuffer, VkRect2D& renderArea, @@ -243,19 +295,23 @@ void MVKCommandEncoder::beginRenderpass(VkSubpassContents subpassContents, _isRenderingEntireAttachment = (mvkVkOffset2DsAreEqual(_renderArea.offset, {0,0}) && mvkVkExtent2DsAreEqual(_renderArea.extent, _framebuffer->getExtent2D())); _clearValues.assign(clearValues.begin(), clearValues.end()); - setSubpass(subpassContents, 0); + setSubpass(passCmd, subpassContents, 0); } -void MVKCommandEncoder::beginNextSubpass(VkSubpassContents contents) { - setSubpass(contents, _renderSubpassIndex + 1); +void MVKCommandEncoder::beginNextSubpass(MVKCommand* subpassCmd, VkSubpassContents contents) { + setSubpass(subpassCmd, contents, _renderSubpassIndex + 1); } // Sets the current render subpass to the subpass with the specified index. -void MVKCommandEncoder::setSubpass(VkSubpassContents subpassContents, uint32_t subpassIndex) { +void MVKCommandEncoder::setSubpass(MVKCommand* subpassCmd, + VkSubpassContents subpassContents, + uint32_t subpassIndex) { encodeStoreActions(); + _lastMultiviewPassCmd = subpassCmd; _subpassContents = subpassContents; _renderSubpassIndex = subpassIndex; + _multiviewPassIndex = 0; _canUseLayeredRendering = (_device->_pMetalFeatures->layeredRendering && (_device->_pMetalFeatures->multisampleLayeredRendering || @@ -264,20 +320,34 @@ void MVKCommandEncoder::setSubpass(VkSubpassContents subpassContents, uint32_t s beginMetalRenderPass(); } +void MVKCommandEncoder::beginNextMultiviewPass() { + encodeStoreActions(); + _multiviewPassIndex++; + beginMetalRenderPass(); +} + +uint32_t MVKCommandEncoder::getMultiviewPassIndex() { return _multiviewPassIndex; } + // Creates _mtlRenderEncoder and marks cached render state as dirty so it will be set into the _mtlRenderEncoder. void MVKCommandEncoder::beginMetalRenderPass(bool loadOverride) { endCurrentMetalEncoding(); MTLRenderPassDescriptor* mtlRPDesc = [MTLRenderPassDescriptor renderPassDescriptor]; - getSubpass()->populateMTLRenderPassDescriptor(mtlRPDesc, _framebuffer, _clearValues.contents(), _isRenderingEntireAttachment, loadOverride); + getSubpass()->populateMTLRenderPassDescriptor(mtlRPDesc, _multiviewPassIndex, _framebuffer, _clearValues.contents(), _isRenderingEntireAttachment, loadOverride); mtlRPDesc.visibilityResultBuffer = _occlusionQueryState.getVisibilityResultMTLBuffer(); VkExtent2D fbExtent = _framebuffer->getExtent2D(); mtlRPDesc.renderTargetWidthMVK = min(_renderArea.offset.x + _renderArea.extent.width, fbExtent.width); mtlRPDesc.renderTargetHeightMVK = min(_renderArea.offset.y + _renderArea.extent.height, fbExtent.height); if (_canUseLayeredRendering) { - mtlRPDesc.renderTargetArrayLengthMVK = _framebuffer->getLayerCount(); + if (getSubpass()->isMultiview()) { + // In the case of a multiview pass, the framebuffer layer count will be one. + // We need to use the view count for this multiview pass. + mtlRPDesc.renderTargetArrayLengthMVK = getSubpass()->getViewCountInMetalPass(_multiviewPassIndex); + } else { + mtlRPDesc.renderTargetArrayLengthMVK = _framebuffer->getLayerCount(); + } } _mtlRenderEncoder = [_mtlCmdBuffer renderCommandEncoderWithDescriptor: mtlRPDesc]; // not retained @@ -361,6 +431,10 @@ VkRect2D MVKCommandEncoder::clipToRenderArea(VkRect2D scissor) { } void MVKCommandEncoder::finalizeDrawState(MVKGraphicsStage stage) { + if (stage == kMVKGraphicsStageVertex) { + // Must happen before switching encoders. + encodeStoreActions(true); + } _graphicsPipelineState.encode(stage); // Must do first..it sets others _graphicsResourcesState.encode(stage); _viewportState.encode(stage); @@ -386,16 +460,36 @@ void MVKCommandEncoder::clearRenderArea() { if (clearAttCnt == 0) { return; } - VkClearRect clearRect; - clearRect.rect = _renderArea; - clearRect.baseArrayLayer = 0; - clearRect.layerCount = _framebuffer->getLayerCount(); + if (!getSubpass()->isMultiview()) { + VkClearRect clearRect; + clearRect.rect = _renderArea; + clearRect.baseArrayLayer = 0; + clearRect.layerCount = _framebuffer->getLayerCount(); - // Create and execute a temporary clear attachments command. - // To be threadsafe...do NOT acquire and return the command from the pool. - MVKCmdClearMultiAttachments<1> cmd; - cmd.setContent(_cmdBuffer, clearAttCnt, clearAtts.data(), 1, &clearRect); - cmd.encode(this); + // Create and execute a temporary clear attachments command. + // To be threadsafe...do NOT acquire and return the command from the pool. + MVKCmdClearMultiAttachments<1> cmd; + cmd.setContent(_cmdBuffer, clearAttCnt, clearAtts.data(), 1, &clearRect); + cmd.encode(this); + } else { + // For multiview, it is possible that some attachments need different layers cleared. + // In that case, we'll have to clear them individually. :/ + for (auto& clearAtt : clearAtts) { + MVKSmallVector clearRects; + getSubpass()->populateMultiviewClearRects(clearRects, this, clearAtt.colorAttachment, clearAtt.aspectMask); + // Create and execute a temporary clear attachments command. + // To be threadsafe...do NOT acquire and return the command from the pool. + if (clearRects.size() == 1) { + MVKCmdClearSingleAttachment<1> cmd; + cmd.setContent(_cmdBuffer, 1, &clearAtt, (uint32_t)clearRects.size(), clearRects.data()); + cmd.encode(this); + } else { + MVKCmdClearSingleAttachment<4> cmd; + cmd.setContent(_cmdBuffer, 1, &clearAtt, (uint32_t)clearRects.size(), clearRects.data()); + cmd.encode(this); + } + } + } } void MVKCommandEncoder::finalizeDispatchState() { @@ -559,7 +653,13 @@ void MVKCommandEncoder::markTimestamp(MVKQueryPool* pQueryPool, uint32_t query) // Marks the specified query as activated void MVKCommandEncoder::addActivatedQuery(MVKQueryPool* pQueryPool, uint32_t query) { if ( !_pActivatedQueries ) { _pActivatedQueries = new MVKActivatedQueries(); } - (*_pActivatedQueries)[pQueryPool].push_back(query); + uint32_t endQuery = query + 1; + if (_renderPass && getSubpass()->isMultiview()) { + endQuery = query + getSubpass()->getViewCountInMetalPass(_multiviewPassIndex); + } + while (query < endQuery) { + (*_pActivatedQueries)[pQueryPool].push_back(query++); + } } // Register a command buffer completion handler that finishes each activated query. @@ -653,6 +753,7 @@ NSString* mvkMTLComputeCommandEncoderLabel(MVKCommandUse cmdUse) { case kMVKCommandUseCopyImageToBuffer: return @"vkCmdCopyImageToBuffer ComputeEncoder"; case kMVKCommandUseFillBuffer: return @"vkCmdFillBuffer ComputeEncoder"; case kMVKCommandUseTessellationVertexTessCtl: return @"vkCmdDraw (vertex and tess control stages) ComputeEncoder"; + case kMVKCommandUseMultiviewInstanceCountAdjust: return @"vkCmdDraw (multiview instance count adjustment) ComputeEncoder"; case kMVKCommandUseCopyQueryPoolResults:return @"vkCmdCopyQueryPoolResults ComputeEncoder"; default: return @"Unknown Use ComputeEncoder"; } diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h index 660c74e0..0c6cd714 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h +++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h @@ -427,6 +427,7 @@ protected: MVKMTLBufferBinding swizzleBufferBinding; MVKMTLBufferBinding bufferSizeBufferBinding; + MVKMTLBufferBinding viewRangeBufferBinding; bool areBufferBindingsDirty = false; bool areTextureBindingsDirty = false; @@ -446,6 +447,7 @@ protected: areSamplerStateBindingsDirty = false; swizzleBufferBinding.isDirty = false; bufferSizeBufferBinding.isDirty = false; + viewRangeBufferBinding.isDirty = false; needsSwizzle = false; } @@ -493,6 +495,11 @@ public: bool needTessEvalSizeBuffer, bool needFragmentSizeBuffer); + /** Sets the current view range buffer state. */ + void bindViewRangeBuffer(const MVKShaderImplicitRezBinding& binding, + bool needVertexViewBuffer, + bool needFragmentViewBuffer); + void encodeBindings(MVKShaderStage stage, const char* pStageName, bool fullImageViewSwizzle, diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm index 47be2d9d..450ccaf1 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm @@ -557,6 +557,18 @@ void MVKGraphicsResourcesCommandEncoderState::bindBufferSizeBuffer(const MVKShad _shaderStageResourceBindings[kMVKShaderStageFragment].bufferSizeBufferBinding.isDirty = needFragmentSizeBuffer; } +void MVKGraphicsResourcesCommandEncoderState::bindViewRangeBuffer(const MVKShaderImplicitRezBinding& binding, + bool needVertexViewBuffer, + bool needFragmentViewBuffer) { + for (uint32_t i = kMVKShaderStageVertex; i <= kMVKShaderStageFragment; i++) { + _shaderStageResourceBindings[i].viewRangeBufferBinding.index = binding.stages[i]; + } + _shaderStageResourceBindings[kMVKShaderStageVertex].viewRangeBufferBinding.isDirty = needVertexViewBuffer; + _shaderStageResourceBindings[kMVKShaderStageTessCtl].viewRangeBufferBinding.isDirty = false; + _shaderStageResourceBindings[kMVKShaderStageTessEval].viewRangeBufferBinding.isDirty = false; + _shaderStageResourceBindings[kMVKShaderStageFragment].viewRangeBufferBinding.isDirty = needFragmentViewBuffer; +} + void MVKGraphicsResourcesCommandEncoderState::encodeBindings(MVKShaderStage stage, const char* pStageName, bool fullImageViewSwizzle, @@ -587,6 +599,13 @@ void MVKGraphicsResourcesCommandEncoderState::encodeBindings(MVKShaderStage stag bindImplicitBuffer(_cmdEncoder, shaderStage.bufferSizeBufferBinding, shaderStage.bufferSizes.contents()); } + if (shaderStage.viewRangeBufferBinding.isDirty) { + MVKSmallVector viewRange; + viewRange.push_back(_cmdEncoder->getSubpass()->getFirstViewIndexInMetalPass(_cmdEncoder->getMultiviewPassIndex())); + viewRange.push_back(_cmdEncoder->getSubpass()->getViewCountInMetalPass(_cmdEncoder->getMultiviewPassIndex())); + bindImplicitBuffer(_cmdEncoder, shaderStage.viewRangeBufferBinding, viewRange.contents()); + } + encodeBinding(shaderStage.textureBindings, shaderStage.areTextureBindingsDirty, bindTexture); encodeBinding(shaderStage.samplerStateBindings, shaderStage.areSamplerStateBindingsDirty, bindSampler); } diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.h b/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.h index fc17e59f..52e47043 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.h +++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.h @@ -112,8 +112,11 @@ public: /** Returns a MTLComputePipelineState for decompressing a buffer into a 3D image. */ id getCmdCopyBufferToImage3DDecompressMTLComputePipelineState(bool needsTempBuff); + /** Returns a MTLComputePipelineState for converting an indirect buffer for use in a multiview draw. */ + id getCmdDrawIndirectMultiviewConvertBuffersMTLComputePipelineState(bool indexed); + /** Returns a MTLComputePipelineState for converting an indirect buffer for use in a tessellated draw. */ - id getCmdDrawIndirectConvertBuffersMTLComputePipelineState(bool indexed); + id getCmdDrawIndirectTessConvertBuffersMTLComputePipelineState(bool indexed); /** Returns a MTLComputePipelineState for copying an index buffer for use in an indirect tessellated draw. */ id getCmdDrawIndexedCopyIndexBufferMTLComputePipelineState(MTLIndexType type); @@ -149,7 +152,8 @@ protected: id _mtlCopyBufferBytesComputePipelineState = nil; id _mtlFillBufferComputePipelineState = nil; id _mtlCopyBufferToImage3DDecompressComputePipelineState[2] = {nil, nil}; - id _mtlDrawIndirectConvertBuffersComputePipelineState[2] = {nil, nil}; + id _mtlDrawIndirectMultiviewConvertBuffersComputePipelineState[2] = {nil, nil}; + id _mtlDrawIndirectTessConvertBuffersComputePipelineState[2] = {nil, nil}; id _mtlDrawIndexedCopyIndexBufferComputePipelineState[2] = {nil, nil}; id _mtlCopyQueryPoolResultsComputePipelineState = nil; }; diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.mm b/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.mm index 19d2c90b..da0e661f 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.mm @@ -106,8 +106,12 @@ id MVKCommandEncodingPool::getCmdCopyBufferToImage3DDec MVK_ENC_REZ_ACCESS(_mtlCopyBufferToImage3DDecompressComputePipelineState[needsTempBuff ? 1 : 0], newCmdCopyBufferToImage3DDecompressMTLComputePipelineState(needsTempBuff, _commandPool)); } -id MVKCommandEncodingPool::getCmdDrawIndirectConvertBuffersMTLComputePipelineState(bool indexed) { - MVK_ENC_REZ_ACCESS(_mtlDrawIndirectConvertBuffersComputePipelineState[indexed ? 1 : 0], newCmdDrawIndirectConvertBuffersMTLComputePipelineState(indexed, _commandPool)); +id MVKCommandEncodingPool::getCmdDrawIndirectMultiviewConvertBuffersMTLComputePipelineState(bool indexed) { + MVK_ENC_REZ_ACCESS(_mtlDrawIndirectMultiviewConvertBuffersComputePipelineState[indexed ? 1 : 0], newCmdDrawIndirectMultiviewConvertBuffersMTLComputePipelineState(indexed, _commandPool)); +} + +id MVKCommandEncodingPool::getCmdDrawIndirectTessConvertBuffersMTLComputePipelineState(bool indexed) { + MVK_ENC_REZ_ACCESS(_mtlDrawIndirectTessConvertBuffersComputePipelineState[indexed ? 1 : 0], newCmdDrawIndirectTessConvertBuffersMTLComputePipelineState(indexed, _commandPool)); } id MVKCommandEncodingPool::getCmdDrawIndexedCopyIndexBufferMTLComputePipelineState(MTLIndexType type) { @@ -179,10 +183,15 @@ void MVKCommandEncodingPool::destroyMetalResources() { _mtlCopyBufferToImage3DDecompressComputePipelineState[0] = nil; _mtlCopyBufferToImage3DDecompressComputePipelineState[1] = nil; - [_mtlDrawIndirectConvertBuffersComputePipelineState[0] release]; - [_mtlDrawIndirectConvertBuffersComputePipelineState[1] release]; - _mtlDrawIndirectConvertBuffersComputePipelineState[0] = nil; - _mtlDrawIndirectConvertBuffersComputePipelineState[1] = nil; + [_mtlDrawIndirectMultiviewConvertBuffersComputePipelineState[0] release]; + [_mtlDrawIndirectMultiviewConvertBuffersComputePipelineState[1] release]; + _mtlDrawIndirectMultiviewConvertBuffersComputePipelineState[0] = nil; + _mtlDrawIndirectMultiviewConvertBuffersComputePipelineState[1] = nil; + + [_mtlDrawIndirectTessConvertBuffersComputePipelineState[0] release]; + [_mtlDrawIndirectTessConvertBuffersComputePipelineState[1] release]; + _mtlDrawIndirectTessConvertBuffersComputePipelineState[0] = nil; + _mtlDrawIndirectTessConvertBuffersComputePipelineState[1] = nil; [_mtlDrawIndexedCopyIndexBufferComputePipelineState[0] release]; [_mtlDrawIndexedCopyIndexBufferComputePipelineState[1] release]; diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandPipelineStateFactoryShaderSource.h b/MoltenVK/MoltenVK/Commands/MVKCommandPipelineStateFactoryShaderSource.h index c74508f8..124f6d9f 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandPipelineStateFactoryShaderSource.h +++ b/MoltenVK/MoltenVK/Commands/MVKCommandPipelineStateFactoryShaderSource.h @@ -170,17 +170,41 @@ struct MTLStageInRegionIndirectArguments { }; \n\ #endif \n\ \n\ +kernel void cmdDrawIndirectMultiviewConvertBuffers(const device char* srcBuff [[buffer(0)]], \n\ + device MTLDrawPrimitivesIndirectArguments* destBuff [[buffer(1)]],\n\ + constant uint32_t& srcStride [[buffer(2)]], \n\ + constant uint32_t& drawCount [[buffer(3)]], \n\ + constant uint32_t& viewCount [[buffer(4)]], \n\ + uint idx [[thread_position_in_grid]]) { \n\ + if (idx >= drawCount) { return; } \n\ + const device auto& src = *reinterpret_cast(srcBuff + idx * srcStride);\n\ + destBuff[idx] = src; \n\ + destBuff[idx].instanceCount *= viewCount; \n\ +} \n\ + \n\ +kernel void cmdDrawIndexedIndirectMultiviewConvertBuffers(const device char* srcBuff [[buffer(0)]], \n\ + device MTLDrawIndexedPrimitivesIndirectArguments* destBuff [[buffer(1)]],\n\ + constant uint32_t& srcStride [[buffer(2)]], \n\ + constant uint32_t& drawCount [[buffer(3)]], \n\ + constant uint32_t& viewCount [[buffer(4)]], \n\ + uint idx [[thread_position_in_grid]]) { \n\ + if (idx >= drawCount) { return; } \n\ + const device auto& src = *reinterpret_cast(srcBuff + idx * srcStride);\n\ + destBuff[idx] = src; \n\ + destBuff[idx].instanceCount *= viewCount; \n\ +} \n\ + \n\ #if __METAL_VERSION__ >= 120 \n\ -kernel void cmdDrawIndirectConvertBuffers(const device char* srcBuff [[buffer(0)]], \n\ - device char* destBuff [[buffer(1)]], \n\ - device char* paramsBuff [[buffer(2)]], \n\ - constant uint32_t& srcStride [[buffer(3)]], \n\ - constant uint32_t& inControlPointCount [[buffer(4)]], \n\ - constant uint32_t& outControlPointCount [[buffer(5)]], \n\ - constant uint32_t& drawCount [[buffer(6)]], \n\ - constant uint32_t& vtxThreadExecWidth [[buffer(7)]], \n\ - constant uint32_t& tcWorkgroupSize [[buffer(8)]], \n\ - uint idx [[thread_position_in_grid]]) { \n\ +kernel void cmdDrawIndirectTessConvertBuffers(const device char* srcBuff [[buffer(0)]], \n\ + device char* destBuff [[buffer(1)]], \n\ + device char* paramsBuff [[buffer(2)]], \n\ + constant uint32_t& srcStride [[buffer(3)]], \n\ + constant uint32_t& inControlPointCount [[buffer(4)]], \n\ + constant uint32_t& outControlPointCount [[buffer(5)]], \n\ + constant uint32_t& drawCount [[buffer(6)]], \n\ + constant uint32_t& vtxThreadExecWidth [[buffer(7)]], \n\ + constant uint32_t& tcWorkgroupSize [[buffer(8)]], \n\ + uint idx [[thread_position_in_grid]]) { \n\ if (idx >= drawCount) { return; } \n\ const device auto& src = *reinterpret_cast(srcBuff + idx * srcStride);\n\ device char* dest; \n\ diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.h b/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.h index 213f1248..7672fedb 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.h +++ b/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.h @@ -421,9 +421,13 @@ public: id newCmdCopyBufferToImage3DDecompressMTLComputePipelineState(bool needTempBuf, MVKVulkanAPIDeviceObject* owner); + /** Returns a new MTLComputePipelineState for converting an indirect buffer for use in a multiview draw. */ + id newCmdDrawIndirectMultiviewConvertBuffersMTLComputePipelineState(bool indexed, + MVKVulkanAPIDeviceObject* owner); + /** Returns a new MTLComputePipelineState for converting an indirect buffer for use in a tessellated draw. */ - id newCmdDrawIndirectConvertBuffersMTLComputePipelineState(bool indexed, - MVKVulkanAPIDeviceObject* owner); + id newCmdDrawIndirectTessConvertBuffersMTLComputePipelineState(bool indexed, + MVKVulkanAPIDeviceObject* owner); /** Returns a new MTLComputePipelineState for copying an index buffer for use in a tessellated draw. */ id newCmdDrawIndexedCopyIndexBufferMTLComputePipelineState(MTLIndexType type, diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm b/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm index a92b86e1..a616a642 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm @@ -417,11 +417,18 @@ id MVKCommandResourceFactory::newCmdCopyBufferToImage3D : "cmdCopyBufferToImage3DDecompressDXTn", owner); } -id MVKCommandResourceFactory::newCmdDrawIndirectConvertBuffersMTLComputePipelineState(bool indexed, - MVKVulkanAPIDeviceObject* owner) { +id MVKCommandResourceFactory::newCmdDrawIndirectMultiviewConvertBuffersMTLComputePipelineState(bool indexed, + MVKVulkanAPIDeviceObject* owner) { return newMTLComputePipelineState(indexed - ? "cmdDrawIndexedIndirectConvertBuffers" - : "cmdDrawIndirectConvertBuffers", owner); + ? "cmdDrawIndexedIndirectMultiviewConvertBuffers" + : "cmdDrawIndirectMultiviewConvertBuffers", owner); +} + +id MVKCommandResourceFactory::newCmdDrawIndirectTessConvertBuffersMTLComputePipelineState(bool indexed, + MVKVulkanAPIDeviceObject* owner) { + return newMTLComputePipelineState(indexed + ? "cmdDrawIndexedIndirectTessConvertBuffers" + : "cmdDrawIndirectTessConvertBuffers", owner); } id MVKCommandResourceFactory::newCmdDrawIndexedCopyIndexBufferMTLComputePipelineState(MTLIndexType type, diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm index ed4896f7..a7aa0d7c 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm @@ -17,6 +17,7 @@ */ #include "MVKDescriptorSet.h" +#include "MVKInstance.h" #include "MVKOSExtensions.h" @@ -554,7 +555,8 @@ VkResult MVKDescriptorPool::allocateDescriptorSets(uint32_t count, const VkDescriptorSetLayout* pSetLayouts, VkDescriptorSet* pDescriptorSets) { if (_allocatedSets.size() + count > _maxSets) { - if (_device->_enabledExtensions.vk_KHR_maintenance1.enabled) { + if (_device->_enabledExtensions.vk_KHR_maintenance1.enabled || + _device->getInstance()->getAPIVersion() >= VK_API_VERSION_1_1) { return VK_ERROR_OUT_OF_POOL_MEMORY; // Failure is an acceptable test...don't log as error. } else { return reportError(VK_ERROR_INITIALIZATION_FAILED, "The maximum number of descriptor sets that can be allocated by this descriptor pool is %d.", _maxSets); @@ -576,8 +578,9 @@ VkResult MVKDescriptorPool::allocateDescriptorSets(uint32_t count, VkResult MVKDescriptorPool::freeDescriptorSets(uint32_t count, const VkDescriptorSet* pDescriptorSets) { for (uint32_t dsIdx = 0; dsIdx < count; dsIdx++) { MVKDescriptorSet* mvkDS = (MVKDescriptorSet*)pDescriptorSets[dsIdx]; - freeDescriptorSet(mvkDS); - _allocatedSets.erase(mvkDS); + if (_allocatedSets.erase(mvkDS)) { + freeDescriptorSet(mvkDS); + } } return VK_SUCCESS; } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h index 08718f32..23d74f5d 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h @@ -137,6 +137,14 @@ public: void getExternalBufferProperties(const VkPhysicalDeviceExternalBufferInfo* pExternalBufferInfo, VkExternalBufferProperties* pExternalBufferProperties); + /** Populates the external fence properties supported on this device. */ + void getExternalFenceProperties(const VkPhysicalDeviceExternalFenceInfo* pExternalFenceInfo, + VkExternalFenceProperties* pExternalFenceProperties); + + /** Populates the external semaphore properties supported on this device. */ + void getExternalSemaphoreProperties(const VkPhysicalDeviceExternalSemaphoreInfo* pExternalSemaphoreInfo, + VkExternalSemaphoreProperties* pExternalSemaphoreProperties); + #pragma mark Surfaces /** @@ -297,6 +305,9 @@ public: /** Populates the specified structure with the Metal-specific features of this device. */ inline const MVKPhysicalDeviceMetalFeatures* getMetalFeatures() { return &_metalFeatures; } + /** Returns whether or not vertex instancing can be used to implement multiview. */ + inline bool canUseInstancingForMultiview() { return _metalFeatures.layeredRendering && _metalFeatures.deferredStoreActions; } + /** Returns the underlying Metal device. */ inline id getMTLDevice() { return _mtlDevice; } @@ -415,6 +426,9 @@ public: /** Returns the queue at the specified index within the specified family. */ MVKQueue* getQueue(uint32_t queueFamilyIndex, uint32_t queueIndex); + /** Returns the queue described by the specified structure. */ + MVKQueue* getQueue(const VkDeviceQueueInfo2* queueInfo); + /** Retrieves the queue at the lowest queue and queue family indices used by the app. */ MVKQueue* getAnyQueue(); @@ -549,6 +563,8 @@ public: MVKRenderPass* createRenderPass(const VkRenderPassCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator); + MVKRenderPass* createRenderPass(const VkRenderPassCreateInfo2* pCreateInfo, + const VkAllocationCallbacks* pAllocator); void destroyRenderPass(MVKRenderPass* mvkRP, const VkAllocationCallbacks* pAllocator); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm index 123eb9f9..5aaf4189 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm @@ -91,6 +91,28 @@ void MVKPhysicalDevice::getFeatures(VkPhysicalDeviceFeatures2* features) { f16Features->shaderInt8 = true; break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES: { + auto* multiviewFeatures = (VkPhysicalDeviceMultiviewFeatures*)next; + multiviewFeatures->multiview = true; + multiviewFeatures->multiviewGeometryShader = false; + multiviewFeatures->multiviewTessellationShader = false; // FIXME + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES: { + auto* protectedMemFeatures = (VkPhysicalDeviceProtectedMemoryFeatures*)next; + protectedMemFeatures->protectedMemory = false; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES: { + auto* samplerYcbcrConvFeatures = (VkPhysicalDeviceSamplerYcbcrConversionFeatures*)next; + samplerYcbcrConvFeatures->samplerYcbcrConversion = true; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETERS_FEATURES: { + auto* shaderDrawParamsFeatures = (VkPhysicalDeviceShaderDrawParametersFeatures*)next; + shaderDrawParamsFeatures->shaderDrawParameters = true; + break; + } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES_KHR: { auto* uboLayoutFeatures = (VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR*)next; uboLayoutFeatures->uniformBufferStandardLayout = true; @@ -151,11 +173,6 @@ void MVKPhysicalDevice::getFeatures(VkPhysicalDeviceFeatures2* features) { portabilityFeatures->samplerMipLodBias = false; break; } - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES: { - auto* samplerYcbcrConvFeatures = (VkPhysicalDeviceSamplerYcbcrConversionFeatures*)next; - samplerYcbcrConvFeatures->samplerYcbcrConversion = true; - break; - } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_INTEGER_FUNCTIONS_2_FEATURES_INTEL: { auto* shaderIntFuncsFeatures = (VkPhysicalDeviceShaderIntegerFunctions2FeaturesINTEL*)next; shaderIntFuncsFeatures->shaderIntegerFunctions2 = true; @@ -182,9 +199,19 @@ void MVKPhysicalDevice::getProperties(VkPhysicalDeviceProperties2* properties) { properties->properties = _properties; for (auto* next = (VkBaseOutStructure*)properties->pNext; next; next = next->pNext) { switch ((uint32_t)next->sType) { - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES: { - auto* pointClipProps = (VkPhysicalDevicePointClippingProperties*)next; - pointClipProps->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES; + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES: { + auto* physicalDeviceDriverProps = (VkPhysicalDeviceDriverPropertiesKHR*)next; + strcpy(physicalDeviceDriverProps->driverName, "MoltenVK"); + strcpy(physicalDeviceDriverProps->driverInfo, mvkGetMoltenVKVersionString(MVK_VERSION).c_str()); + physicalDeviceDriverProps->driverID = VK_DRIVER_ID_MOLTENVK; + physicalDeviceDriverProps->conformanceVersion.major = 0; + physicalDeviceDriverProps->conformanceVersion.minor = 0; + physicalDeviceDriverProps->conformanceVersion.subminor = 0; + physicalDeviceDriverProps->conformanceVersion.patch = 0; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES: { + populate((VkPhysicalDeviceIDProperties*)next); break; } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES: { @@ -193,51 +220,31 @@ void MVKPhysicalDevice::getProperties(VkPhysicalDeviceProperties2* properties) { maint3Props->maxMemoryAllocationSize = _metalFeatures.maxMTLBufferSize; break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES: { + auto* multiviewProps = (VkPhysicalDeviceMultiviewProperties*)next; + multiviewProps->maxMultiviewViewCount = 32; + if (canUseInstancingForMultiview()) { + multiviewProps->maxMultiviewInstanceIndex = std::numeric_limits::max() / 32; + } else { + multiviewProps->maxMultiviewInstanceIndex = std::numeric_limits::max(); + } + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES: { + auto* pointClipProps = (VkPhysicalDevicePointClippingProperties*)next; + pointClipProps->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_PROPERTIES: { + auto* protectedMemProps = (VkPhysicalDeviceProtectedMemoryProperties*)next; + protectedMemProps->protectedNoFault = false; + break; + } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: { auto* pushDescProps = (VkPhysicalDevicePushDescriptorPropertiesKHR*)next; pushDescProps->maxPushDescriptors = _properties.limits.maxPerStageResources; break; } - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_PROPERTIES_EXT: { - auto* robustness2Props = (VkPhysicalDeviceRobustness2PropertiesEXT*)next; - // This isn't implemented yet, but when it is, I expect that we'll wind up - // doing it manually. - robustness2Props->robustStorageBufferAccessSizeAlignment = 1; - robustness2Props->robustUniformBufferAccessSizeAlignment = 1; - break; - } - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_PROPERTIES_EXT: { - auto* texelBuffAlignProps = (VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT*)next; - // Save the 'next' pointer; we'll unintentionally overwrite it - // on the next line. Put it back when we're done. - void* savedNext = texelBuffAlignProps->pNext; - *texelBuffAlignProps = _texelBuffAlignProperties; - texelBuffAlignProps->pNext = savedNext; - break; - } - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: { - auto* divisorProps = (VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT*)next; - divisorProps->maxVertexAttribDivisor = kMVKUndefinedLargeUInt32; - break; - } - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES: { - populate((VkPhysicalDeviceIDProperties*)next); - break; - } - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PORTABILITY_SUBSET_PROPERTIES_EXTX: { - auto* portabilityProps = (VkPhysicalDevicePortabilitySubsetPropertiesEXTX*)next; - portabilityProps->minVertexInputBindingStrideAlignment = (uint32_t)_metalFeatures.vertexStrideAlignment; - break; - } - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_PROPERTIES_EXT: { - auto* inlineUniformBlockProps = (VkPhysicalDeviceInlineUniformBlockPropertiesEXT*)next; - inlineUniformBlockProps->maxInlineUniformBlockSize = _metalFeatures.dynamicMTLBufferSize; - inlineUniformBlockProps->maxPerStageDescriptorInlineUniformBlocks = _properties.limits.maxPerStageDescriptorUniformBuffers; - inlineUniformBlockProps->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = _properties.limits.maxPerStageDescriptorUniformBuffers; - inlineUniformBlockProps->maxDescriptorSetInlineUniformBlocks = _properties.limits.maxDescriptorSetUniformBuffers; - inlineUniformBlockProps->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = _properties.limits.maxDescriptorSetUniformBuffers; - break; - } #if MVK_MACOS case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES: if (mvkOSVersionIsAtLeast(10.14)) { @@ -260,15 +267,40 @@ void MVKPhysicalDevice::getProperties(VkPhysicalDeviceProperties2* properties) { } break; #endif - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES: { - auto* physicalDeviceDriverProps = (VkPhysicalDeviceDriverPropertiesKHR*)next; - strcpy(physicalDeviceDriverProps->driverName, "MoltenVK"); - strcpy(physicalDeviceDriverProps->driverInfo, mvkGetMoltenVKVersionString(MVK_VERSION).c_str()); - physicalDeviceDriverProps->driverID = VK_DRIVER_ID_MOLTENVK; - physicalDeviceDriverProps->conformanceVersion.major = 0; - physicalDeviceDriverProps->conformanceVersion.minor = 0; - physicalDeviceDriverProps->conformanceVersion.subminor = 0; - physicalDeviceDriverProps->conformanceVersion.patch = 0; + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_PROPERTIES_EXT: { + auto* inlineUniformBlockProps = (VkPhysicalDeviceInlineUniformBlockPropertiesEXT*)next; + inlineUniformBlockProps->maxInlineUniformBlockSize = _metalFeatures.dynamicMTLBufferSize; + inlineUniformBlockProps->maxPerStageDescriptorInlineUniformBlocks = _properties.limits.maxPerStageDescriptorUniformBuffers; + inlineUniformBlockProps->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = _properties.limits.maxPerStageDescriptorUniformBuffers; + inlineUniformBlockProps->maxDescriptorSetInlineUniformBlocks = _properties.limits.maxDescriptorSetUniformBuffers; + inlineUniformBlockProps->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = _properties.limits.maxDescriptorSetUniformBuffers; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_PROPERTIES_EXT: { + auto* robustness2Props = (VkPhysicalDeviceRobustness2PropertiesEXT*)next; + // This isn't implemented yet, but when it is, I expect that we'll wind up + // doing it manually. + robustness2Props->robustStorageBufferAccessSizeAlignment = 1; + robustness2Props->robustUniformBufferAccessSizeAlignment = 1; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_PROPERTIES_EXT: { + auto* texelBuffAlignProps = (VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT*)next; + // Save the 'next' pointer; we'll unintentionally overwrite it + // on the next line. Put it back when we're done. + void* savedNext = texelBuffAlignProps->pNext; + *texelBuffAlignProps = _texelBuffAlignProperties; + texelBuffAlignProps->pNext = savedNext; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: { + auto* divisorProps = (VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT*)next; + divisorProps->maxVertexAttribDivisor = kMVKUndefinedLargeUInt32; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PORTABILITY_SUBSET_PROPERTIES_EXTX: { + auto* portabilityProps = (VkPhysicalDevicePortabilitySubsetPropertiesEXTX*)next; + portabilityProps->minVertexInputBindingStrideAlignment = (uint32_t)_metalFeatures.vertexStrideAlignment; break; } default: @@ -577,6 +609,24 @@ VkExternalMemoryProperties& MVKPhysicalDevice::getExternalImageProperties(VkExte } } +static const VkExternalFenceProperties _emptyExtFenceProps = {VK_STRUCTURE_TYPE_EXTERNAL_FENCE_PROPERTIES, nullptr, 0, 0, 0}; + +void MVKPhysicalDevice::getExternalFenceProperties(const VkPhysicalDeviceExternalFenceInfo* pExternalFenceInfo, + VkExternalFenceProperties* pExternalFenceProperties) { + void* next = pExternalFenceProperties->pNext; + *pExternalFenceProperties = _emptyExtFenceProps; + pExternalFenceProperties->pNext = next; +} + +static const VkExternalSemaphoreProperties _emptyExtSemProps = {VK_STRUCTURE_TYPE_EXTERNAL_SEMAPHORE_PROPERTIES, nullptr, 0, 0, 0}; + +void MVKPhysicalDevice::getExternalSemaphoreProperties(const VkPhysicalDeviceExternalSemaphoreInfo* pExternalSemaphoreInfo, + VkExternalSemaphoreProperties* pExternalSemaphoreProperties) { + void* next = pExternalSemaphoreProperties->pNext; + *pExternalSemaphoreProperties = _emptyExtSemProps; + pExternalSemaphoreProperties->pNext = next; +} + #pragma mark Surfaces @@ -2335,10 +2385,11 @@ MVKPhysicalDevice::~MVKPhysicalDevice() { // Returns core device commands and enabled extension device commands. PFN_vkVoidFunction MVKDevice::getProcAddr(const char* pName) { MVKEntryPoint* pMVKPA = _physicalDevice->_mvkInstance->getEntryPoint(pName); + uint32_t apiVersion = _physicalDevice->_mvkInstance->_appInfo.apiVersion; - bool isSupported = (pMVKPA && // Command exists and... - pMVKPA->isDevice && // ...is a device command and... - pMVKPA->isEnabled(_enabledExtensions)); // ...is a core or enabled extension command. + bool isSupported = (pMVKPA && // Command exists and... + pMVKPA->isDevice && // ...is a device command and... + pMVKPA->isEnabled(apiVersion, _enabledExtensions)); // ...is a core or enabled extension command. return isSupported ? pMVKPA->functionPointer : nullptr; } @@ -2347,6 +2398,10 @@ MVKQueue* MVKDevice::getQueue(uint32_t queueFamilyIndex, uint32_t queueIndex) { return _queuesByQueueFamilyIndex[queueFamilyIndex][queueIndex]; } +MVKQueue* MVKDevice::getQueue(const VkDeviceQueueInfo2* queueInfo) { + return _queuesByQueueFamilyIndex[queueInfo->queueFamilyIndex][queueInfo->queueIndex]; +} + MVKQueue* MVKDevice::getAnyQueue() { for (auto& queues : _queuesByQueueFamilyIndex) { for (MVKQueue* q : queues) { @@ -2741,6 +2796,11 @@ MVKRenderPass* MVKDevice::createRenderPass(const VkRenderPassCreateInfo* pCreate return new MVKRenderPass(this, pCreateInfo); } +MVKRenderPass* MVKDevice::createRenderPass(const VkRenderPassCreateInfo2* pCreateInfo, + const VkAllocationCallbacks* pAllocator) { + return new MVKRenderPass(this, pCreateInfo); +} + void MVKDevice::destroyRenderPass(MVKRenderPass* mvkRP, const VkAllocationCallbacks* pAllocator) { if (mvkRP) { mvkRP->destroy(); } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.h b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.h index 61956917..3ff1a952 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.h @@ -37,13 +37,15 @@ class MVKDebugUtilsMessenger; /** Tracks info about entry point function pointer addresses. */ typedef struct { PFN_vkVoidFunction functionPointer; + uint32_t apiVersion; const char* ext1Name; const char* ext2Name; bool isDevice; bool isCore() { return !ext1Name && !ext2Name; } - bool isEnabled(const MVKExtensionList& extList) { - return isCore() || extList.isEnabled(ext1Name) || extList.isEnabled(ext2Name); + bool isEnabled(uint32_t enabledVersion, const MVKExtensionList& extList) { + return (isCore() && MVK_VULKAN_API_VERSION_CONFORM(enabledVersion) >= apiVersion) || + extList.isEnabled(ext1Name) || extList.isEnabled(ext2Name); } } MVKEntryPoint; @@ -65,6 +67,9 @@ public: /** Returns a pointer to the Vulkan instance. */ MVKInstance* getInstance() override { return this; } + /** Returns the maximum version of Vulkan the application supports. */ + inline uint32_t getAPIVersion() { return _appInfo.apiVersion; } + /** Returns a pointer to the layer manager. */ inline MVKLayerManager* getLayerManager() { return MVKLayerManager::globalManager(); } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm index 48c6c488..e8d42d6a 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm @@ -39,9 +39,9 @@ MVKEntryPoint* MVKInstance::getEntryPoint(const char* pName) { PFN_vkVoidFunction MVKInstance::getProcAddr(const char* pName) { MVKEntryPoint* pMVKPA = getEntryPoint(pName); - bool isSupported = (pMVKPA && // Command exists and... - (pMVKPA->isDevice || // ...is a device command or... - pMVKPA->isEnabled(_enabledExtensions))); // ...is a core or enabled extension command. + bool isSupported = (pMVKPA && // Command exists and... + (pMVKPA->isDevice || // ...is a device command or... + pMVKPA->isEnabled(_appInfo.apiVersion, _enabledExtensions))); // ...is a core or enabled extension command. return isSupported ? pMVKPA->functionPointer : nullptr; } @@ -336,8 +336,8 @@ MVKInstance::MVKInstance(const VkInstanceCreateInfo* pCreateInfo) : _enabledExte initDebugCallbacks(pCreateInfo); // Do before any creation activities - _appInfo.apiVersion = MVK_VULKAN_API_VERSION; // Default mvkSetOrClear(&_appInfo, pCreateInfo->pApplicationInfo); + if (_appInfo.apiVersion == 0) { _appInfo.apiVersion = VK_API_VERSION_1_0; } // Default initProcAddrs(); // Init function pointers initConfig(); @@ -349,18 +349,6 @@ MVKInstance::MVKInstance(const VkInstanceCreateInfo* pCreateInfo) : _enabledExte getDriverLayer()->getSupportedInstanceExtensions())); logVersions(); // Log the MoltenVK and Vulkan versions - // If we only support Vulkan 1.0, we must report an error if a larger Vulkan version is requested. - // If we support Vulkan 1.1 or better, per spec, we never report an error. - if ((MVK_VULKAN_API_VERSION_CONFORM(MVK_VULKAN_API_VERSION) < - MVK_VULKAN_API_VERSION_CONFORM(VK_API_VERSION_1_1)) && - (MVK_VULKAN_API_VERSION_CONFORM(MVK_VULKAN_API_VERSION) < - MVK_VULKAN_API_VERSION_CONFORM(_appInfo.apiVersion))) { - setConfigurationResult(reportError(VK_ERROR_INCOMPATIBLE_DRIVER, - "Request for Vulkan version %s is not compatible with supported version %s.", - mvkGetVulkanVersionString(_appInfo.apiVersion).c_str(), - mvkGetVulkanVersionString(MVK_VULKAN_API_VERSION).c_str())); - } - // Populate the array of physical GPU devices. // This effort creates a number of autoreleased instances of Metal // and other Obj-C classes, so wrap it all in an autorelease pool. @@ -403,16 +391,19 @@ void MVKInstance::initDebugCallbacks(const VkInstanceCreateInfo* pCreateInfo) { } } -#define ADD_ENTRY_POINT(func, ext1, ext2, isDev) _entryPoints[""#func] = { (PFN_vkVoidFunction)&func, ext1, ext2, isDev } +#define ADD_ENTRY_POINT(func, api, ext1, ext2, isDev) _entryPoints[""#func] = { (PFN_vkVoidFunction)&func, api, ext1, ext2, isDev } -#define ADD_INST_ENTRY_POINT(func) ADD_ENTRY_POINT(func, nullptr, nullptr, false) -#define ADD_DVC_ENTRY_POINT(func) ADD_ENTRY_POINT(func, nullptr, nullptr, true) +#define ADD_INST_ENTRY_POINT(func) ADD_ENTRY_POINT(func, VK_API_VERSION_1_0, nullptr, nullptr, false) +#define ADD_DVC_ENTRY_POINT(func) ADD_ENTRY_POINT(func, VK_API_VERSION_1_0, nullptr, nullptr, true) -#define ADD_INST_EXT_ENTRY_POINT(func, EXT) ADD_ENTRY_POINT(func, VK_ ##EXT ##_EXTENSION_NAME, nullptr, false) -#define ADD_DVC_EXT_ENTRY_POINT(func, EXT) ADD_ENTRY_POINT(func, VK_ ##EXT ##_EXTENSION_NAME, nullptr, true) +#define ADD_INST_1_1_ENTRY_POINT(func) ADD_ENTRY_POINT(func, VK_API_VERSION_1_1, nullptr, nullptr, false) +#define ADD_DVC_1_1_ENTRY_POINT(func) ADD_ENTRY_POINT(func, VK_API_VERSION_1_1, nullptr, nullptr, true) -#define ADD_INST_EXT2_ENTRY_POINT(func, EXT1, EXT2) ADD_ENTRY_POINT(func, VK_ ##EXT1 ##_EXTENSION_NAME, VK_ ##EXT2 ##_EXTENSION_NAME, false) -#define ADD_DVC_EXT2_ENTRY_POINT(func, EXT1, EXT2) ADD_ENTRY_POINT(func, VK_ ##EXT1 ##_EXTENSION_NAME, VK_ ##EXT2 ##_EXTENSION_NAME, true) +#define ADD_INST_EXT_ENTRY_POINT(func, EXT) ADD_ENTRY_POINT(func, 0, VK_ ##EXT ##_EXTENSION_NAME, nullptr, false) +#define ADD_DVC_EXT_ENTRY_POINT(func, EXT) ADD_ENTRY_POINT(func, 0, VK_ ##EXT ##_EXTENSION_NAME, nullptr, true) + +#define ADD_INST_EXT2_ENTRY_POINT(func, EXT1, EXT2) ADD_ENTRY_POINT(func, 0, VK_ ##EXT1 ##_EXTENSION_NAME, VK_ ##EXT2 ##_EXTENSION_NAME, false) +#define ADD_DVC_EXT2_ENTRY_POINT(func, EXT1, EXT2) ADD_ENTRY_POINT(func, 0, VK_ ##EXT1 ##_EXTENSION_NAME, VK_ ##EXT2 ##_EXTENSION_NAME, true) // Initializes the function pointer map. void MVKInstance::initProcAddrs() { @@ -432,6 +423,18 @@ void MVKInstance::initProcAddrs() { ADD_INST_ENTRY_POINT(vkEnumerateDeviceLayerProperties); ADD_INST_ENTRY_POINT(vkGetPhysicalDeviceSparseImageFormatProperties); + ADD_INST_1_1_ENTRY_POINT(vkEnumeratePhysicalDeviceGroups); + ADD_INST_1_1_ENTRY_POINT(vkGetPhysicalDeviceFeatures2); + ADD_INST_1_1_ENTRY_POINT(vkGetPhysicalDeviceProperties2); + ADD_INST_1_1_ENTRY_POINT(vkGetPhysicalDeviceFormatProperties2); + ADD_INST_1_1_ENTRY_POINT(vkGetPhysicalDeviceImageFormatProperties2); + ADD_INST_1_1_ENTRY_POINT(vkGetPhysicalDeviceQueueFamilyProperties2); + ADD_INST_1_1_ENTRY_POINT(vkGetPhysicalDeviceMemoryProperties2); + ADD_INST_1_1_ENTRY_POINT(vkGetPhysicalDeviceSparseImageFormatProperties2); + ADD_INST_1_1_ENTRY_POINT(vkGetPhysicalDeviceExternalFenceProperties); + ADD_INST_1_1_ENTRY_POINT(vkGetPhysicalDeviceExternalBufferProperties); + ADD_INST_1_1_ENTRY_POINT(vkGetPhysicalDeviceExternalSemaphoreProperties); + // Device functions: ADD_DVC_ENTRY_POINT(vkGetDeviceProcAddr); ADD_DVC_ENTRY_POINT(vkDestroyDevice); @@ -555,8 +558,28 @@ void MVKInstance::initProcAddrs() { ADD_DVC_ENTRY_POINT(vkCmdEndRenderPass); ADD_DVC_ENTRY_POINT(vkCmdExecuteCommands); + ADD_DVC_1_1_ENTRY_POINT(vkGetDeviceQueue2); + ADD_DVC_1_1_ENTRY_POINT(vkBindBufferMemory2); + ADD_DVC_1_1_ENTRY_POINT(vkBindImageMemory2); + ADD_DVC_1_1_ENTRY_POINT(vkGetBufferMemoryRequirements2); + ADD_DVC_1_1_ENTRY_POINT(vkGetImageMemoryRequirements2); + ADD_DVC_1_1_ENTRY_POINT(vkGetImageSparseMemoryRequirements2); + ADD_DVC_1_1_ENTRY_POINT(vkGetDeviceGroupPeerMemoryFeatures); + ADD_DVC_1_1_ENTRY_POINT(vkCreateDescriptorUpdateTemplate); + ADD_DVC_1_1_ENTRY_POINT(vkDestroyDescriptorUpdateTemplate); + ADD_DVC_1_1_ENTRY_POINT(vkUpdateDescriptorSetWithTemplate); + ADD_DVC_1_1_ENTRY_POINT(vkGetDescriptorSetLayoutSupport); + ADD_DVC_1_1_ENTRY_POINT(vkCreateSamplerYcbcrConversion); + ADD_DVC_1_1_ENTRY_POINT(vkDestroySamplerYcbcrConversion); + ADD_DVC_1_1_ENTRY_POINT(vkTrimCommandPool); + ADD_DVC_1_1_ENTRY_POINT(vkCmdSetDeviceMask); + ADD_DVC_1_1_ENTRY_POINT(vkCmdDispatchBase); + // Instance extension functions: ADD_INST_EXT_ENTRY_POINT(vkEnumeratePhysicalDeviceGroupsKHR, KHR_DEVICE_GROUP_CREATION); + ADD_INST_EXT_ENTRY_POINT(vkGetPhysicalDeviceExternalFencePropertiesKHR, KHR_EXTERNAL_FENCE_CAPABILITIES); + ADD_INST_EXT_ENTRY_POINT(vkGetPhysicalDeviceExternalBufferPropertiesKHR, KHR_EXTERNAL_MEMORY_CAPABILITIES); + ADD_INST_EXT_ENTRY_POINT(vkGetPhysicalDeviceExternalSemaphorePropertiesKHR, KHR_EXTERNAL_SEMAPHORE_CAPABILITIES); ADD_INST_EXT_ENTRY_POINT(vkGetPhysicalDeviceFeatures2KHR, KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2); ADD_INST_EXT_ENTRY_POINT(vkGetPhysicalDeviceProperties2KHR, KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2); ADD_INST_EXT_ENTRY_POINT(vkGetPhysicalDeviceFormatProperties2KHR, KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2); @@ -609,6 +632,10 @@ void MVKInstance::initProcAddrs() { // Device extension functions: ADD_DVC_EXT_ENTRY_POINT(vkBindBufferMemory2KHR, KHR_BIND_MEMORY_2); ADD_DVC_EXT_ENTRY_POINT(vkBindImageMemory2KHR, KHR_BIND_MEMORY_2); + ADD_DVC_EXT_ENTRY_POINT(vkCreateRenderPass2KHR, KHR_CREATE_RENDERPASS_2); + ADD_DVC_EXT_ENTRY_POINT(vkCmdBeginRenderPass2KHR, KHR_CREATE_RENDERPASS_2); + ADD_DVC_EXT_ENTRY_POINT(vkCmdNextSubpass2KHR, KHR_CREATE_RENDERPASS_2); + ADD_DVC_EXT_ENTRY_POINT(vkCmdEndRenderPass2KHR, KHR_CREATE_RENDERPASS_2); ADD_DVC_EXT_ENTRY_POINT(vkCreateDescriptorUpdateTemplateKHR, KHR_DESCRIPTOR_UPDATE_TEMPLATE); ADD_DVC_EXT_ENTRY_POINT(vkDestroyDescriptorUpdateTemplateKHR, KHR_DESCRIPTOR_UPDATE_TEMPLATE); ADD_DVC_EXT_ENTRY_POINT(vkUpdateDescriptorSetWithTemplateKHR, KHR_DESCRIPTOR_UPDATE_TEMPLATE); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h index 37e98085..9b9b40a0 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h @@ -25,6 +25,7 @@ #include "MVKSmallVector.h" #include #include +#include #include #include @@ -78,6 +79,9 @@ public: /** Returns the current buffer size buffer bindings. */ const MVKShaderImplicitRezBinding& getBufferSizeBufferIndex() { return _bufferSizeBufferIndex; } + /** Returns the current view range buffer binding for multiview draws. */ + const MVKShaderImplicitRezBinding& getViewRangeBufferIndex() { return _viewRangeBufferIndex; } + /** Returns the current indirect parameter buffer bindings. */ const MVKShaderImplicitRezBinding& getIndirectParamsIndex() { return _indirectParamsIndex; } @@ -113,6 +117,7 @@ protected: MVKShaderResourceBinding _pushConstantsMTLResourceIndexes; MVKShaderImplicitRezBinding _swizzleBufferIndex; MVKShaderImplicitRezBinding _bufferSizeBufferIndex; + MVKShaderImplicitRezBinding _viewRangeBufferIndex; MVKShaderImplicitRezBinding _indirectParamsIndex; MVKShaderImplicitRezBinding _outputBufferIndex; uint32_t _tessCtlPatchOutputBufferIndex = 0; @@ -282,6 +287,7 @@ protected: bool addFragmentShaderToPipeline(MTLRenderPipelineDescriptor* plDesc, const VkGraphicsPipelineCreateInfo* pCreateInfo, SPIRVToMSLConversionConfiguration& shaderContext, SPIRVShaderOutputs& prevOutput); template bool addVertexInputToPipeline(T* inputDesc, const VkPipelineVertexInputStateCreateInfo* pVI, const SPIRVToMSLConversionConfiguration& shaderContext); + void adjustVertexInputForMultiview(MTLVertexDescriptor* inputDesc, const VkPipelineVertexInputStateCreateInfo* pVI, uint32_t viewCount, uint32_t oldViewCount = 1); void addTessellationToPipeline(MTLRenderPipelineDescriptor* plDesc, const SPIRVTessReflectionData& reflectData, const VkPipelineTessellationStateCreateInfo* pTS); void addFragmentOutputToPipeline(MTLRenderPipelineDescriptor* plDesc, const VkGraphicsPipelineCreateInfo* pCreateInfo); bool isRenderingPoints(const VkGraphicsPipelineCreateInfo* pCreateInfo); @@ -309,6 +315,7 @@ protected: id _mtlTessVertexStageIndex32State = nil; id _mtlTessControlStageState = nil; id _mtlPipelineState = nil; + std::unordered_map> _multiviewMTLPipelineStates; MTLCullMode _mtlCullMode; MTLWinding _mtlFrontWinding; MTLTriangleFillMode _mtlFillMode; @@ -317,6 +324,7 @@ protected: float _blendConstants[4] = { 0.0, 0.0, 0.0, 1.0 }; uint32_t _outputControlPointCount; + MVKShaderImplicitRezBinding _viewRangeBufferIndex; MVKShaderImplicitRezBinding _outputBufferIndex; uint32_t _tessCtlPatchOutputBufferIndex = 0; uint32_t _tessCtlLevelBufferIndex = 0; @@ -325,6 +333,7 @@ protected: bool _hasDepthStencilInfo; bool _needsVertexSwizzleBuffer = false; bool _needsVertexBufferSizeBuffer = false; + bool _needsVertexViewRangeBuffer = false; bool _needsVertexOutputBuffer = false; bool _needsTessCtlSwizzleBuffer = false; bool _needsTessCtlBufferSizeBuffer = false; @@ -335,6 +344,7 @@ protected: bool _needsTessEvalBufferSizeBuffer = false; bool _needsFragmentSwizzleBuffer = false; bool _needsFragmentBufferSizeBuffer = false; + bool _needsFragmentViewRangeBuffer = false; }; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm index 253c9c41..ce836c14 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm @@ -149,6 +149,10 @@ MVKPipelineLayout::MVKPipelineLayout(MVKDevice* device, _tessCtlLevelBufferIndex = _tessCtlPatchOutputBufferIndex + 1; } } + // Since we currently can't use multiview with tessellation or geometry shaders, + // to conserve the number of buffer bindings, use the same bindings for the + // view range buffer as for the indirect paramters buffer. + _viewRangeBufferIndex = _indirectParamsIndex; } MVKPipelineLayout::~MVKPipelineLayout() { @@ -232,7 +236,11 @@ void MVKGraphicsPipeline::encode(MVKCommandEncoder* cmdEncoder, uint32_t stage) if ( !_mtlPipelineState ) { return; } // Abort if pipeline could not be created. // Render pipeline state - [mtlCmdEnc setRenderPipelineState: _mtlPipelineState]; + if (cmdEncoder->getSubpass()->isMultiview() && !isTessellationPipeline() && !_multiviewMTLPipelineStates.empty()) { + [mtlCmdEnc setRenderPipelineState: _multiviewMTLPipelineStates[cmdEncoder->getSubpass()->getViewCountInMetalPass(cmdEncoder->getMultiviewPassIndex())]]; + } else { + [mtlCmdEnc setRenderPipelineState: _mtlPipelineState]; + } // Depth stencil state if (_hasDepthStencilInfo) { @@ -263,6 +271,7 @@ void MVKGraphicsPipeline::encode(MVKCommandEncoder* cmdEncoder, uint32_t stage) } cmdEncoder->_graphicsResourcesState.bindSwizzleBuffer(_swizzleBufferIndex, _needsVertexSwizzleBuffer, _needsTessCtlSwizzleBuffer, _needsTessEvalSwizzleBuffer, _needsFragmentSwizzleBuffer); cmdEncoder->_graphicsResourcesState.bindBufferSizeBuffer(_bufferSizeBufferIndex, _needsVertexBufferSizeBuffer, _needsTessCtlBufferSizeBuffer, _needsTessEvalBufferSizeBuffer, _needsFragmentBufferSizeBuffer); + cmdEncoder->_graphicsResourcesState.bindViewRangeBuffer(_viewRangeBufferIndex, _needsVertexViewRangeBuffer, _needsFragmentViewRangeBuffer); } bool MVKGraphicsPipeline::supportsDynamicState(VkDynamicState state) { @@ -468,7 +477,35 @@ void MVKGraphicsPipeline::initMTLRenderPipelineState(const VkGraphicsPipelineCre if (!isTessellationPipeline()) { MTLRenderPipelineDescriptor* plDesc = newMTLRenderPipelineDescriptor(pCreateInfo, reflectData); // temp retain if (plDesc) { - getOrCompilePipeline(plDesc, _mtlPipelineState); + MVKRenderPass* mvkRendPass = (MVKRenderPass*)pCreateInfo->renderPass; + MVKRenderSubpass* mvkSubpass = mvkRendPass->getSubpass(pCreateInfo->subpass); + if (mvkSubpass->isMultiview()) { + // We need to adjust the step rate for per-instance attributes to account for the + // extra instances needed to render all views. But, there's a problem: vertex input + // descriptions are static pipeline state. If we need multiple passes, and some have + // different numbers of views to render than others, then the step rate must be different + // for these passes. We'll need to make a pipeline for every pass view count we can see + // in the render pass. This really sucks. + std::unordered_set viewCounts; + for (uint32_t passIdx = 0; passIdx < mvkSubpass->getMultiviewMetalPassCount(); ++passIdx) { + viewCounts.insert(mvkSubpass->getViewCountInMetalPass(passIdx)); + } + auto count = viewCounts.cbegin(); + adjustVertexInputForMultiview(plDesc.vertexDescriptor, pCreateInfo->pVertexInputState, *count); + getOrCompilePipeline(plDesc, _mtlPipelineState); + if (viewCounts.size() > 1) { + _multiviewMTLPipelineStates[*count] = _mtlPipelineState; + uint32_t oldCount = *count++; + for (auto last = viewCounts.cend(); count != last; ++count) { + if (_multiviewMTLPipelineStates.count(*count)) { continue; } + adjustVertexInputForMultiview(plDesc.vertexDescriptor, pCreateInfo->pVertexInputState, *count, oldCount); + getOrCompilePipeline(plDesc, _multiviewMTLPipelineStates[*count]); + oldCount = *count; + } + } + } else { + getOrCompilePipeline(plDesc, _mtlPipelineState); + } } [plDesc release]; // temp release } else { @@ -816,8 +853,9 @@ bool MVKGraphicsPipeline::addVertexShaderToPipeline(MTLRenderPipelineDescriptor* shaderContext.options.mslOptions.indirect_params_buffer_index = _indirectParamsIndex.stages[kMVKShaderStageVertex]; shaderContext.options.mslOptions.shader_output_buffer_index = _outputBufferIndex.stages[kMVKShaderStageVertex]; shaderContext.options.mslOptions.buffer_size_buffer_index = _bufferSizeBufferIndex.stages[kMVKShaderStageVertex]; - shaderContext.options.mslOptions.capture_output_to_buffer = isTessellationPipeline(); - shaderContext.options.mslOptions.disable_rasterization = isTessellationPipeline() || (pCreateInfo->pRasterizationState && (pCreateInfo->pRasterizationState->rasterizerDiscardEnable)); + shaderContext.options.mslOptions.view_mask_buffer_index = _viewRangeBufferIndex.stages[kMVKShaderStageVertex]; + shaderContext.options.mslOptions.capture_output_to_buffer = false; + shaderContext.options.mslOptions.disable_rasterization = pCreateInfo->pRasterizationState && pCreateInfo->pRasterizationState->rasterizerDiscardEnable; addVertexInputToShaderConverterContext(shaderContext, pCreateInfo); MVKMTLFunction func = ((MVKShaderModule*)_pVertexSS->module)->getMTLFunction(&shaderContext, _pVertexSS->pSpecializationInfo, _pipelineCache); @@ -832,6 +870,7 @@ bool MVKGraphicsPipeline::addVertexShaderToPipeline(MTLRenderPipelineDescriptor* plDesc.rasterizationEnabled = !funcRslts.isRasterizationDisabled; _needsVertexSwizzleBuffer = funcRslts.needsSwizzleBuffer; _needsVertexBufferSizeBuffer = funcRslts.needsBufferSizeBuffer; + _needsVertexViewRangeBuffer = funcRslts.needsViewRangeBuffer; _needsVertexOutputBuffer = funcRslts.needsOutputBuffer; // If we need the swizzle buffer and there's no place to put it, we're in serious trouble. @@ -849,6 +888,9 @@ bool MVKGraphicsPipeline::addVertexShaderToPipeline(MTLRenderPipelineDescriptor* if (!verifyImplicitBuffer(_needsVertexOutputBuffer, _indirectParamsIndex, kMVKShaderStageVertex, "indirect parameters", vbCnt)) { return false; } + if (!verifyImplicitBuffer(_needsVertexViewRangeBuffer, _viewRangeBufferIndex, kMVKShaderStageVertex, "view range", vbCnt)) { + return false; + } return true; } @@ -1006,6 +1048,7 @@ bool MVKGraphicsPipeline::addFragmentShaderToPipeline(MTLRenderPipelineDescripto shaderContext.options.entryPointStage = spv::ExecutionModelFragment; shaderContext.options.mslOptions.swizzle_buffer_index = _swizzleBufferIndex.stages[kMVKShaderStageFragment]; shaderContext.options.mslOptions.buffer_size_buffer_index = _bufferSizeBufferIndex.stages[kMVKShaderStageFragment]; + shaderContext.options.mslOptions.view_mask_buffer_index = _viewRangeBufferIndex.stages[kMVKShaderStageFragment]; shaderContext.options.entryPointName = _pFragmentSS->pName; shaderContext.options.mslOptions.capture_output_to_buffer = false; if (pCreateInfo->pMultisampleState && pCreateInfo->pMultisampleState->pSampleMask && pCreateInfo->pMultisampleState->pSampleMask[0] != 0xffffffff) { @@ -1024,12 +1067,16 @@ bool MVKGraphicsPipeline::addFragmentShaderToPipeline(MTLRenderPipelineDescripto auto& funcRslts = func.shaderConversionResults; _needsFragmentSwizzleBuffer = funcRslts.needsSwizzleBuffer; _needsFragmentBufferSizeBuffer = funcRslts.needsBufferSizeBuffer; + _needsFragmentViewRangeBuffer = funcRslts.needsViewRangeBuffer; if (!verifyImplicitBuffer(_needsFragmentSwizzleBuffer, _swizzleBufferIndex, kMVKShaderStageFragment, "swizzle", 0)) { return false; } if (!verifyImplicitBuffer(_needsFragmentBufferSizeBuffer, _bufferSizeBufferIndex, kMVKShaderStageFragment, "buffer size", 0)) { return false; } + if (!verifyImplicitBuffer(_needsFragmentViewRangeBuffer, _viewRangeBufferIndex, kMVKShaderStageFragment, "view range", 0)) { + return false; + } } return true; } @@ -1182,6 +1229,24 @@ template bool MVKGraphicsPipeline::addVertexInputToPipelinevertexBindingDescriptionCount; + const VkVertexInputBindingDescription* pVKVB = pVI->pVertexBindingDescriptions; + for (uint32_t i = 0; i < vbCnt; ++i, ++pVKVB) { + uint32_t vbIdx = getMetalBufferIndexForVertexAttributeBinding(pVKVB->binding); + if (inputDesc.layouts[vbIdx].stepFunction == MTLVertexStepFunctionPerInstance) { + inputDesc.layouts[vbIdx].stepRate = inputDesc.layouts[vbIdx].stepRate / oldViewCount * viewCount; + for (auto& xltdBind : _translatedVertexBindings) { + if (xltdBind.binding == pVKVB->binding) { + uint32_t vbXltdIdx = getMetalBufferIndexForVertexAttributeBinding(xltdBind.translationBinding); + inputDesc.layouts[vbXltdIdx].stepRate = inputDesc.layouts[vbXltdIdx].stepRate / oldViewCount * viewCount; + } + } + } + } +} + // Returns a translated binding for the existing binding and translation offset, creating it if needed. uint32_t MVKGraphicsPipeline::getTranslatedVertexBinding(uint32_t binding, uint32_t translationOffset, uint32_t maxBinding) { // See if a translated binding already exists (for example if more than one VA needs the same translation). @@ -1323,6 +1388,7 @@ void MVKGraphicsPipeline::initMVKShaderConverterContext(SPIRVToMSLConversionConf _outputBufferIndex = layout->getOutputBufferIndex(); _tessCtlPatchOutputBufferIndex = layout->getTessCtlPatchOutputBufferIndex(); _tessCtlLevelBufferIndex = layout->getTessCtlLevelBufferIndex(); + _viewRangeBufferIndex = layout->getViewRangeBufferIndex(); MVKRenderPass* mvkRendPass = (MVKRenderPass*)pCreateInfo->renderPass; MVKRenderSubpass* mvkRenderSubpass = mvkRendPass->getSubpass(pCreateInfo->subpass); @@ -1345,6 +1411,9 @@ void MVKGraphicsPipeline::initMVKShaderConverterContext(SPIRVToMSLConversionConf shaderContext.options.shouldFlipVertexY = _device->_pMVKConfig->shaderConversionFlipVertexY; shaderContext.options.mslOptions.swizzle_texture_samples = _fullImageViewSwizzle && !getDevice()->_pMetalFeatures->nativeTextureSwizzle; shaderContext.options.mslOptions.tess_domain_origin_lower_left = pTessDomainOriginState && pTessDomainOriginState->domainOrigin == VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT; + shaderContext.options.mslOptions.multiview = mvkRendPass->isMultiview(); + shaderContext.options.mslOptions.multiview_layered_rendering = getDevice()->getPhysicalDevice()->canUseInstancingForMultiview(); + shaderContext.options.mslOptions.view_index_from_device_index = mvkAreAllFlagsEnabled(pCreateInfo->flags, VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT); shaderContext.options.tessPatchKind = reflectData.patchKind; shaderContext.options.numTessControlPoints = reflectData.numControlPoints; @@ -1481,7 +1550,7 @@ MVKComputePipeline::MVKComputePipeline(MVKDevice* device, const VkComputePipelineCreateInfo* pCreateInfo) : MVKPipeline(device, pipelineCache, (MVKPipelineLayout*)pCreateInfo->layout, parent) { - _allowsDispatchBase = mvkAreAllFlagsEnabled(pCreateInfo->flags, VK_PIPELINE_CREATE_DISPATCH_BASE); // sic; drafters forgot the 'BIT' suffix + _allowsDispatchBase = mvkAreAllFlagsEnabled(pCreateInfo->flags, VK_PIPELINE_CREATE_DISPATCH_BASE_BIT); MVKMTLFunction func = getMTLFunction(pCreateInfo); _mtlThreadgroupSize = func.threadGroupSize; @@ -1815,6 +1884,7 @@ namespace SPIRV_CROSS_NAMESPACE { opt.swizzle_texture_samples, opt.tess_domain_origin_lower_left, opt.multiview, + opt.multiview_layered_rendering, opt.view_index_from_device_index, opt.dispatch_base, opt.texture_1D_as_2D, @@ -1942,7 +2012,8 @@ namespace mvk { scr.needsPatchOutputBuffer, scr.needsBufferSizeBuffer, scr.needsInputThreadgroupMem, - scr.needsDispatchBaseBuffer); + scr.needsDispatchBaseBuffer, + scr.needsViewRangeBuffer); } } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm b/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm index e0d89d49..68db119a 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm @@ -18,6 +18,7 @@ #include "MVKQueryPool.h" #include "MVKBuffer.h" +#include "MVKRenderPass.h" #include "MVKCommandBuffer.h" #include "MVKCommandEncodingPool.h" #include "MVKOSExtensions.h" @@ -30,8 +31,11 @@ using namespace std; #pragma mark MVKQueryPool void MVKQueryPool::endQuery(uint32_t query, MVKCommandEncoder* cmdEncoder) { + uint32_t queryCount = cmdEncoder->getSubpass()->getViewCountInMetalPass(cmdEncoder->getMultiviewPassIndex()); lock_guard lock(_availabilityLock); - _availability[query] = DeviceAvailable; + for (uint32_t i = query; i < query + queryCount; ++i) { + _availability[i] = DeviceAvailable; + } lock_guard copyLock(_deferredCopiesLock); if (!_deferredCopies.empty()) { // Partition by readiness. @@ -287,7 +291,12 @@ void MVKOcclusionQueryPool::encodeSetResultBuffer(MVKCommandEncoder* cmdEncoder, void MVKOcclusionQueryPool::beginQueryAddedTo(uint32_t query, MVKCommandBuffer* cmdBuffer) { NSUInteger offset = getVisibilityResultOffset(query); - NSUInteger maxOffset = getDevice()->_pMetalFeatures->maxQueryBufferSize - kMVKQuerySlotSizeInBytes; + NSUInteger queryCount = 1; + if (cmdBuffer->getLastMultiviewSubpass()) { + // In multiview passes, one query is used for each view. + queryCount = cmdBuffer->getLastMultiviewSubpass()->getViewCount(); + } + NSUInteger maxOffset = getDevice()->_pMetalFeatures->maxQueryBufferSize - kMVKQuerySlotSizeInBytes * queryCount; if (offset > maxOffset) { cmdBuffer->setConfigurationResult(reportError(VK_ERROR_OUT_OF_DEVICE_MEMORY, "vkCmdBeginQuery(): The query offset value %lu is larger than the maximum offset value %lu available on this device.", offset, maxOffset)); } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.h b/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.h index a0b4bc72..f8decdac 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.h @@ -46,6 +46,12 @@ public: /** Returns the Vulkan API opaque object controlling this object. */ MVKVulkanAPIObject* getVulkanAPIObject() override; + /** Returns the parent render pass of this subpass. */ + inline MVKRenderPass* getRenderPass() { return _renderPass; } + + /** Returns the index of this subpass in its parent render pass. */ + inline uint32_t getSubpassIndex() { return _subpassIndex; } + /** Returns the number of color attachments, which may be zero for depth-only rendering. */ inline uint32_t getColorAttachmentCount() { return uint32_t(_colorAttachments.size()); } @@ -61,11 +67,31 @@ public: /** Returns the Vulkan sample count of the attachments used in this subpass. */ VkSampleCountFlagBits getSampleCount(); + /** Returns whether or not this is a multiview subpass. */ + bool isMultiview() const { return _viewMask != 0; } + + /** Returns the total number of views to be rendered. */ + inline uint32_t getViewCount() const { return __builtin_popcount(_viewMask); } + + /** Returns the number of Metal render passes needed to render all views. */ + uint32_t getMultiviewMetalPassCount() const; + + /** Returns the first view to be rendered in the given multiview pass. */ + uint32_t getFirstViewIndexInMetalPass(uint32_t passIdx) const; + + /** Returns the number of views to be rendered in the given multiview pass. */ + uint32_t getViewCountInMetalPass(uint32_t passIdx) const; + + /** Returns the number of views to be rendered in all multiview passes up to the given one. */ + uint32_t getViewCountUpToMetalPass(uint32_t passIdx) const; + /** * Populates the specified Metal MTLRenderPassDescriptor with content from this - * instance, the specified framebuffer, and the specified array of clear values. + * instance, the specified framebuffer, and the specified array of clear values + * for the specified multiview pass. */ void populateMTLRenderPassDescriptor(MTLRenderPassDescriptor* mtlRPDesc, + uint32_t passIdx, MVKFramebuffer* framebuffer, const MVKArrayRef& clearValues, bool isRenderingEntireAttachment, @@ -78,26 +104,42 @@ public: void populateClearAttachments(MVKClearAttachments& clearAtts, const MVKArrayRef& clearValues); + /** + * Populates the specified vector with VkClearRects for clearing views of a specified multiview + * attachment on first use, when the render area is smaller than the full framebuffer size + * and/or not all views used in this subpass need to be cleared. + */ + void populateMultiviewClearRects(MVKSmallVector& clearRects, + MVKCommandEncoder* cmdEncoder, + uint32_t caIdx, VkImageAspectFlags aspectMask); + /** If a render encoder is active, sets the store actions for all attachments to it. */ void encodeStoreActions(MVKCommandEncoder* cmdEncoder, bool isRenderingEntireAttachment, bool storeOverride = false); /** Constructs an instance for the specified parent renderpass. */ - MVKRenderSubpass(MVKRenderPass* renderPass, const VkSubpassDescription* pCreateInfo); + MVKRenderSubpass(MVKRenderPass* renderPass, const VkSubpassDescription* pCreateInfo, + const VkRenderPassInputAttachmentAspectCreateInfo* pInputAspects, + uint32_t viewMask); + + /** Constructs an instance for the specified parent renderpass. */ + MVKRenderSubpass(MVKRenderPass* renderPass, const VkSubpassDescription2* pCreateInfo); private: friend class MVKRenderPass; friend class MVKRenderPassAttachment; + uint32_t getViewMaskGroupForMetalPass(uint32_t passIdx); MVKMTLFmtCaps getRequiredFormatCapabilitiesForAttachmentAt(uint32_t rpAttIdx); MVKRenderPass* _renderPass; uint32_t _subpassIndex; - MVKSmallVector _inputAttachments; - MVKSmallVector _colorAttachments; - MVKSmallVector _resolveAttachments; + uint32_t _viewMask; + MVKSmallVector _inputAttachments; + MVKSmallVector _colorAttachments; + MVKSmallVector _resolveAttachments; MVKSmallVector _preserveAttachments; - VkAttachmentReference _depthStencilAttachment; + VkAttachmentReference2 _depthStencilAttachment; id _mtlDummyTex = nil; }; @@ -139,6 +181,9 @@ public: bool isStencil, bool storeOverride = false); + /** Populates the specified vector with VkClearRects for clearing views of a multiview attachment on first use. */ + void populateMultiviewClearRects(MVKSmallVector& clearRects, MVKCommandEncoder* cmdEncoder); + /** Returns whether this attachment should be cleared in the subpass. */ bool shouldUseClearAttachment(MVKRenderSubpass* subpass); @@ -146,18 +191,27 @@ public: MVKRenderPassAttachment(MVKRenderPass* renderPass, const VkAttachmentDescription* pCreateInfo); + /** Constructs an instance for the specified parent renderpass. */ + MVKRenderPassAttachment(MVKRenderPass* renderPass, + const VkAttachmentDescription2* pCreateInfo); + protected: + bool isFirstUseOfAttachment(MVKRenderSubpass* subpass); + bool isLastUseOfAttachment(MVKRenderSubpass* subpass); MTLStoreAction getMTLStoreAction(MVKRenderSubpass* subpass, bool isRenderingEntireAttachment, bool hasResolveAttachment, bool isStencil, bool storeOverride); + void validateFormat(); - VkAttachmentDescription _info; + VkAttachmentDescription2 _info; MVKRenderPass* _renderPass; uint32_t _attachmentIndex; uint32_t _firstUseSubpassIdx; uint32_t _lastUseSubpassIdx; + MVKSmallVector _firstUseViewMasks; + MVKSmallVector _lastUseViewMasks; }; @@ -181,9 +235,15 @@ public: /** Returns the format of the color attachment at the specified index. */ MVKRenderSubpass* getSubpass(uint32_t subpassIndex); + /** Returns whether or not this render pass is a multiview render pass. */ + bool isMultiview() const; + /** Constructs an instance for the specified device. */ MVKRenderPass(MVKDevice* device, const VkRenderPassCreateInfo* pCreateInfo); + /** Constructs an instance for the specified device. */ + MVKRenderPass(MVKDevice* device, const VkRenderPassCreateInfo2* pCreateInfo); + protected: friend class MVKRenderSubpass; friend class MVKRenderPassAttachment; @@ -192,7 +252,7 @@ protected: MVKSmallVector _attachments; MVKSmallVector _subpasses; - MVKSmallVector _subpassDependencies; + MVKSmallVector _subpassDependencies; }; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm b/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm index 9d84d9f5..c3eeb6a3 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm @@ -21,6 +21,7 @@ #include "MVKCommandBuffer.h" #include "MVKFoundation.h" #include "mvk_datatypes.hpp" +#include using namespace std; @@ -67,7 +68,109 @@ VkSampleCountFlagBits MVKRenderSubpass::getSampleCount() { return VK_SAMPLE_COUNT_1_BIT; } +// Extract the first view, number of views, and the portion of the mask to be rendered from +// the lowest clump of set bits in a view mask. +static uint32_t getNextViewMaskGroup(uint32_t viewMask, uint32_t* startView, uint32_t* viewCount, uint32_t *groupMask = nullptr) { + // First, find the first set bit. This is the start of the next clump of views to be rendered. + // n.b. ffs(3) returns a 1-based index. This actually bit me during development of this feature. + int pos = ffs(viewMask) - 1; + int end = pos; + if (groupMask) { *groupMask = 0; } + // Now we'll step through the bits one at a time until we find a bit that isn't set. + // This is one past the end of the next clump. Clear the bits as we go, so we can use + // ffs(3) again on the next clump. + // TODO: Find a way to make this faster. + while (viewMask & (1 << end)) { + if (groupMask) { *groupMask |= viewMask & (1 << end); } + viewMask &= ~(1 << (end++)); + } + if (startView) { *startView = pos; } + if (viewCount) { *viewCount = end - pos; } + return viewMask; +} + +// Get the portion of the view mask that will be rendered in the specified Metal render pass. +uint32_t MVKRenderSubpass::getViewMaskGroupForMetalPass(uint32_t passIdx) { + if (!_viewMask) { return 0; } + assert(passIdx < getMultiviewMetalPassCount()); + if (!_renderPass->getDevice()->getPhysicalDevice()->canUseInstancingForMultiview()) { + return 1 << getFirstViewIndexInMetalPass(passIdx); + } + uint32_t mask = _viewMask, groupMask = 0; + for (uint32_t i = 0; i <= passIdx; ++i) { + mask = getNextViewMaskGroup(mask, nullptr, nullptr, &groupMask); + } + return groupMask; +} + +uint32_t MVKRenderSubpass::getMultiviewMetalPassCount() const { + if (!_viewMask) { return 0; } + if (!_renderPass->getDevice()->getPhysicalDevice()->canUseInstancingForMultiview()) { + // If we can't use instanced drawing for this, we'll have to unroll the render pass. + return __builtin_popcount(_viewMask); + } + uint32_t mask = _viewMask; + uint32_t count; + // Step through each clump until there are no more clumps. I'll know this has + // happened when the mask becomes 0, since getNextViewMaskGroup() clears each group of bits + // as it finds them, and returns the remainder of the mask. + for (count = 0; mask != 0; ++count) { + mask = getNextViewMaskGroup(mask, nullptr, nullptr); + } + return count; +} + +uint32_t MVKRenderSubpass::getFirstViewIndexInMetalPass(uint32_t passIdx) const { + if (!_viewMask) { return 0; } + assert(passIdx < getMultiviewMetalPassCount()); + uint32_t mask = _viewMask; + uint32_t startView = 0, viewCount = 0; + if (!_renderPass->getDevice()->getPhysicalDevice()->canUseInstancingForMultiview()) { + for (uint32_t i = 0; mask != 0; ++i) { + mask = getNextViewMaskGroup(mask, &startView, &viewCount); + while (passIdx-- > 0 && viewCount-- > 0) { + startView++; + } + } + } else { + for (uint32_t i = 0; i <= passIdx; ++i) { + mask = getNextViewMaskGroup(mask, &startView, nullptr); + } + } + return startView; +} + +uint32_t MVKRenderSubpass::getViewCountInMetalPass(uint32_t passIdx) const { + if (!_viewMask) { return 0; } + assert(passIdx < getMultiviewMetalPassCount()); + if (!_renderPass->getDevice()->getPhysicalDevice()->canUseInstancingForMultiview()) { + return 1; + } + uint32_t mask = _viewMask; + uint32_t viewCount = 0; + for (uint32_t i = 0; i <= passIdx; ++i) { + mask = getNextViewMaskGroup(mask, nullptr, &viewCount); + } + return viewCount; +} + +uint32_t MVKRenderSubpass::getViewCountUpToMetalPass(uint32_t passIdx) const { + if (!_viewMask) { return 0; } + if (!_renderPass->getDevice()->getPhysicalDevice()->canUseInstancingForMultiview()) { + return passIdx+1; + } + uint32_t mask = _viewMask; + uint32_t totalViewCount = 0; + for (uint32_t i = 0; i <= passIdx; ++i) { + uint32_t viewCount; + mask = getNextViewMaskGroup(mask, nullptr, &viewCount); + totalViewCount += viewCount; + } + return totalViewCount; +} + void MVKRenderSubpass::populateMTLRenderPassDescriptor(MTLRenderPassDescriptor* mtlRPDesc, + uint32_t passIdx, MVKFramebuffer* framebuffer, const MVKArrayRef& clearValues, bool isRenderingEntireAttachment, @@ -89,6 +192,15 @@ void MVKRenderSubpass::populateMTLRenderPassDescriptor(MTLRenderPassDescriptor* bool hasResolveAttachment = (rslvRPAttIdx != VK_ATTACHMENT_UNUSED); if (hasResolveAttachment) { framebuffer->getAttachment(rslvRPAttIdx)->populateMTLRenderPassAttachmentDescriptorResolve(mtlColorAttDesc); + // In a multiview render pass, we need to override the starting layer to ensure + // only the enabled views are loaded. + if (isMultiview()) { + uint32_t startView = getFirstViewIndexInMetalPass(passIdx); + if (mtlColorAttDesc.resolveTexture.textureType == MTLTextureType3D) + mtlColorAttDesc.resolveDepthPlane += startView; + else + mtlColorAttDesc.resolveSlice += startView; + } } // Configure the color attachment @@ -100,6 +212,13 @@ void MVKRenderSubpass::populateMTLRenderPassDescriptor(MTLRenderPassDescriptor* loadOverride)) { mtlColorAttDesc.clearColor = pixFmts->getMTLClearColor(clearValues[clrRPAttIdx], clrMVKRPAtt->getFormat()); } + if (isMultiview()) { + uint32_t startView = getFirstViewIndexInMetalPass(passIdx); + if (mtlColorAttDesc.texture.textureType == MTLTextureType3D) + mtlColorAttDesc.depthPlane += startView; + else + mtlColorAttDesc.slice += startView; + } } } @@ -119,6 +238,9 @@ void MVKRenderSubpass::populateMTLRenderPassDescriptor(MTLRenderPassDescriptor* loadOverride)) { mtlDepthAttDesc.clearDepth = pixFmts->getMTLClearDepthValue(clearValues[dsRPAttIdx]); } + if (isMultiview()) { + mtlDepthAttDesc.slice += getFirstViewIndexInMetalPass(passIdx); + } } if (pixFmts->isStencilFormat(mtlDSFormat)) { MTLRenderPassStencilAttachmentDescriptor* mtlStencilAttDesc = mtlRPDesc.stencilAttachment; @@ -129,6 +251,9 @@ void MVKRenderSubpass::populateMTLRenderPassDescriptor(MTLRenderPassDescriptor* loadOverride)) { mtlStencilAttDesc.clearStencil = pixFmts->getMTLClearStencilValue(clearValues[dsRPAttIdx]); } + if (isMultiview()) { + mtlStencilAttDesc.slice += getFirstViewIndexInMetalPass(passIdx); + } } } @@ -145,7 +270,10 @@ void MVKRenderSubpass::populateMTLRenderPassDescriptor(MTLRenderPassDescriptor* // Add a dummy attachment so this passes validation. VkExtent2D fbExtent = framebuffer->getExtent2D(); MTLTextureDescriptor* mtlTexDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat: MTLPixelFormatR8Unorm width: fbExtent.width height: fbExtent.height mipmapped: NO]; - if (framebuffer->getLayerCount() > 1) { + if (isMultiview()) { + mtlTexDesc.textureType = MTLTextureType2DArray; + mtlTexDesc.arrayLength = getViewCountInMetalPass(passIdx); + } else if (framebuffer->getLayerCount() > 1) { mtlTexDesc.textureType = MTLTextureType2DArray; mtlTexDesc.arrayLength = framebuffer->getLayerCount(); } @@ -222,6 +350,24 @@ void MVKRenderSubpass::populateClearAttachments(MVKClearAttachments& clearAtts, } } +void MVKRenderSubpass::populateMultiviewClearRects(MVKSmallVector& clearRects, + MVKCommandEncoder* cmdEncoder, + uint32_t caIdx, VkImageAspectFlags aspectMask) { + uint32_t attIdx; + assert(this == cmdEncoder->getSubpass()); + if (mvkIsAnyFlagEnabled(aspectMask, VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { + attIdx = _depthStencilAttachment.attachment; + if (attIdx != VK_ATTACHMENT_UNUSED) { + _renderPass->_attachments[attIdx].populateMultiviewClearRects(clearRects, cmdEncoder); + } + return; + } + attIdx = _colorAttachments[caIdx].attachment; + if (attIdx != VK_ATTACHMENT_UNUSED) { + _renderPass->_attachments[attIdx].populateMultiviewClearRects(clearRects, cmdEncoder); + } +} + // Returns the format capabilities required by this render subpass. // It is possible for a subpass to use a single framebuffer attachment for multiple purposes. // For example, a subpass may use a color or depth attachment as an input attachment as well. @@ -253,9 +399,60 @@ MVKMTLFmtCaps MVKRenderSubpass::getRequiredFormatCapabilitiesForAttachmentAt(uin } MVKRenderSubpass::MVKRenderSubpass(MVKRenderPass* renderPass, - const VkSubpassDescription* pCreateInfo) { + const VkSubpassDescription* pCreateInfo, + const VkRenderPassInputAttachmentAspectCreateInfo* pInputAspects, + uint32_t viewMask) { _renderPass = renderPass; _subpassIndex = (uint32_t)_renderPass->_subpasses.size(); + _viewMask = viewMask; + + // Add attachments + _inputAttachments.reserve(pCreateInfo->inputAttachmentCount); + for (uint32_t i = 0; i < pCreateInfo->inputAttachmentCount; i++) { + const VkAttachmentReference& att = pCreateInfo->pInputAttachments[i]; + _inputAttachments.push_back({VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2, nullptr, att.attachment, att.layout, 0}); + } + if (pInputAspects && pInputAspects->aspectReferenceCount) { + for (uint32_t i = 0; i < pInputAspects->aspectReferenceCount; i++) { + const VkInputAttachmentAspectReference& aspectRef = pInputAspects->pAspectReferences[i]; + if (aspectRef.subpass == _subpassIndex) { + _inputAttachments[aspectRef.inputAttachmentIndex].aspectMask = aspectRef.aspectMask; + } + } + } + + _colorAttachments.reserve(pCreateInfo->colorAttachmentCount); + for (uint32_t i = 0; i < pCreateInfo->colorAttachmentCount; i++) { + const VkAttachmentReference& att = pCreateInfo->pColorAttachments[i]; + _colorAttachments.push_back({VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2, nullptr, att.attachment, att.layout, 0}); + } + + if (pCreateInfo->pResolveAttachments) { + _resolveAttachments.reserve(pCreateInfo->colorAttachmentCount); + for (uint32_t i = 0; i < pCreateInfo->colorAttachmentCount; i++) { + const VkAttachmentReference& att = pCreateInfo->pResolveAttachments[i]; + _resolveAttachments.push_back({VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2, nullptr, att.attachment, att.layout, 0}); + } + } + + if (pCreateInfo->pDepthStencilAttachment) { + _depthStencilAttachment.attachment = pCreateInfo->pDepthStencilAttachment->attachment; + _depthStencilAttachment.layout = pCreateInfo->pDepthStencilAttachment->layout; + } else { + _depthStencilAttachment.attachment = VK_ATTACHMENT_UNUSED; + } + + _preserveAttachments.reserve(pCreateInfo->preserveAttachmentCount); + for (uint32_t i = 0; i < pCreateInfo->preserveAttachmentCount; i++) { + _preserveAttachments.push_back(pCreateInfo->pPreserveAttachments[i]); + } +} + +MVKRenderSubpass::MVKRenderSubpass(MVKRenderPass* renderPass, + const VkSubpassDescription2* pCreateInfo) { + _renderPass = renderPass; + _subpassIndex = (uint32_t)_renderPass->_subpasses.size(); + _viewMask = pCreateInfo->viewMask; // Add attachments _inputAttachments.reserve(pCreateInfo->inputAttachmentCount); @@ -310,7 +507,7 @@ bool MVKRenderPassAttachment::populateMTLRenderPassAttachmentDescriptor(MTLRende // attachment AND we're in the first subpass. if ( loadOverride ) { mtlAttDesc.loadAction = MTLLoadActionLoad; - } else if ( isRenderingEntireAttachment && (subpass->_subpassIndex == _firstUseSubpassIdx) ) { + } else if ( isRenderingEntireAttachment && isFirstUseOfAttachment(subpass) ) { VkAttachmentLoadOp loadOp = isStencil ? _info.stencilLoadOp : _info.loadOp; mtlAttDesc.loadAction = mvkMTLLoadActionFromVkAttachmentLoadOp(loadOp); willClear = (loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR); @@ -338,13 +535,48 @@ void MVKRenderPassAttachment::encodeStoreAction(MVKCommandEncoder* cmdEncoder, bool storeOverride) { MTLStoreAction storeAction = getMTLStoreAction(subpass, isRenderingEntireAttachment, hasResolveAttachment, isStencil, storeOverride); MVKPixelFormats* pixFmts = _renderPass->getPixelFormats(); - if (pixFmts->isDepthFormat(pixFmts->getMTLPixelFormat(_info.format)) && !isStencil) { - [cmdEncoder->_mtlRenderEncoder setDepthStoreAction: storeAction]; - } else if (pixFmts->isStencilFormat(pixFmts->getMTLPixelFormat(_info.format)) && isStencil) { - [cmdEncoder->_mtlRenderEncoder setStencilStoreAction: storeAction]; - } else { - [cmdEncoder->_mtlRenderEncoder setColorStoreAction: storeAction atIndex: caIdx]; - } + + MTLPixelFormat mtlFmt = pixFmts->getMTLPixelFormat(_info.format); + bool isDepthFormat = pixFmts->isDepthFormat(mtlFmt); + bool isStencilFormat = pixFmts->isStencilFormat(mtlFmt); + bool isColorFormat = !(isDepthFormat || isStencilFormat); + + if (isColorFormat) { + [cmdEncoder->_mtlRenderEncoder setColorStoreAction: storeAction atIndex: caIdx]; + } else if (isDepthFormat && !isStencil) { + [cmdEncoder->_mtlRenderEncoder setDepthStoreAction: storeAction]; + } else if (isStencilFormat && isStencil) { + [cmdEncoder->_mtlRenderEncoder setStencilStoreAction: storeAction]; + } +} + +void MVKRenderPassAttachment::populateMultiviewClearRects(MVKSmallVector& clearRects, MVKCommandEncoder* cmdEncoder) { + MVKRenderSubpass* subpass = cmdEncoder->getSubpass(); + uint32_t clearMask = subpass->getViewMaskGroupForMetalPass(cmdEncoder->getMultiviewPassIndex()) & _firstUseViewMasks[subpass->_subpassIndex]; + + if (!clearMask) { return; } + VkRect2D renderArea = cmdEncoder->clipToRenderArea({{0, 0}, {kMVKUndefinedLargeUInt32, kMVKUndefinedLargeUInt32}}); + uint32_t startView, viewCount; + do { + clearMask = getNextViewMaskGroup(clearMask, &startView, &viewCount); + clearRects.push_back({renderArea, startView, viewCount}); + } while (clearMask); +} + +bool MVKRenderPassAttachment::isFirstUseOfAttachment(MVKRenderSubpass* subpass) { + if ( subpass->isMultiview() ) { + return _firstUseViewMasks[subpass->_subpassIndex] == subpass->_viewMask; + } else { + return _firstUseSubpassIdx == subpass->_subpassIndex; + } +} + +bool MVKRenderPassAttachment::isLastUseOfAttachment(MVKRenderSubpass* subpass) { + if ( subpass->isMultiview() ) { + return _lastUseViewMasks[subpass->_subpassIndex] == subpass->_viewMask; + } else { + return _lastUseSubpassIdx == subpass->_subpassIndex; + } } MTLStoreAction MVKRenderPassAttachment::getMTLStoreAction(MVKRenderSubpass* subpass, @@ -361,7 +593,7 @@ MTLStoreAction MVKRenderPassAttachment::getMTLStoreAction(MVKRenderSubpass* subp if ( storeOverride ) { return hasResolveAttachment ? MTLStoreActionStoreAndMultisampleResolve : MTLStoreActionStore; } - if ( isRenderingEntireAttachment && (subpass->_subpassIndex == _lastUseSubpassIdx) ) { + if ( isRenderingEntireAttachment && isLastUseOfAttachment(subpass) ) { VkAttachmentStoreOp storeOp = isStencil ? _info.stencilStoreOp : _info.storeOp; return mvkMTLStoreActionFromVkAttachmentStoreOp(storeOp, hasResolveAttachment); } @@ -371,17 +603,16 @@ MTLStoreAction MVKRenderPassAttachment::getMTLStoreAction(MVKRenderSubpass* subp bool MVKRenderPassAttachment::shouldUseClearAttachment(MVKRenderSubpass* subpass) { // If the subpass is not the first subpass to use this attachment, don't clear this attachment - if (subpass->_subpassIndex != _firstUseSubpassIdx) { return false; } + if (subpass->isMultiview()) { + if (_firstUseViewMasks[subpass->_subpassIndex] == 0) { return false; } + } else { + if (subpass->_subpassIndex != _firstUseSubpassIdx) { return false; } + } return (_info.loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR); } -MVKRenderPassAttachment::MVKRenderPassAttachment(MVKRenderPass* renderPass, - const VkAttachmentDescription* pCreateInfo) { - _info = *pCreateInfo; - _renderPass = renderPass; - _attachmentIndex = uint32_t(_renderPass->_attachments.size()); - +void MVKRenderPassAttachment::validateFormat() { // Validate pixel format is supported MVKPixelFormats* pixFmts = _renderPass->getPixelFormats(); if ( !pixFmts->isSupportedOrSubstitutable(_info.format) ) { @@ -391,6 +622,10 @@ MVKRenderPassAttachment::MVKRenderPassAttachment(MVKRenderPass* renderPass, // Determine the indices of the first and last render subpasses to use this attachment. _firstUseSubpassIdx = kMVKUndefinedLargeUInt32; _lastUseSubpassIdx = 0; + if ( _renderPass->isMultiview() ) { + _firstUseViewMasks.reserve(_renderPass->_subpasses.size()); + _lastUseViewMasks.reserve(_renderPass->_subpasses.size()); + } for (auto& subPass : _renderPass->_subpasses) { // If it uses this attachment, the subpass will identify required format capabilities. MVKMTLFmtCaps reqCaps = subPass.getRequiredFormatCapabilitiesForAttachmentAt(_attachmentIndex); @@ -398,6 +633,13 @@ MVKRenderPassAttachment::MVKRenderPassAttachment(MVKRenderPass* renderPass, uint32_t spIdx = subPass._subpassIndex; _firstUseSubpassIdx = min(spIdx, _firstUseSubpassIdx); _lastUseSubpassIdx = max(spIdx, _lastUseSubpassIdx); + if ( subPass.isMultiview() ) { + uint32_t viewMask = subPass._viewMask; + std::for_each(_lastUseViewMasks.begin(), _lastUseViewMasks.end(), [viewMask](uint32_t& mask) { mask &= ~viewMask; }); + _lastUseViewMasks.push_back(viewMask); + std::for_each(_firstUseViewMasks.begin(), _firstUseViewMasks.end(), [&viewMask](uint32_t mask) { viewMask &= ~mask; }); + _firstUseViewMasks.push_back(viewMask); + } // Validate that the attachment pixel format supports the capabilities required by the subpass. // Use MTLPixelFormat to look up capabilities to permit Metal format substitution. @@ -408,6 +650,32 @@ MVKRenderPassAttachment::MVKRenderPassAttachment(MVKRenderPass* renderPass, } } +MVKRenderPassAttachment::MVKRenderPassAttachment(MVKRenderPass* renderPass, + const VkAttachmentDescription* pCreateInfo) { + _info.flags = pCreateInfo->flags; + _info.format = pCreateInfo->format; + _info.samples = pCreateInfo->samples; + _info.loadOp = pCreateInfo->loadOp; + _info.storeOp = pCreateInfo->storeOp; + _info.stencilLoadOp = pCreateInfo->stencilLoadOp; + _info.stencilStoreOp = pCreateInfo->stencilStoreOp; + _info.initialLayout = pCreateInfo->initialLayout; + _info.finalLayout = pCreateInfo->finalLayout; + _renderPass = renderPass; + _attachmentIndex = uint32_t(_renderPass->_attachments.size()); + + validateFormat(); +} + +MVKRenderPassAttachment::MVKRenderPassAttachment(MVKRenderPass* renderPass, + const VkAttachmentDescription2* pCreateInfo) { + _info = *pCreateInfo; + _renderPass = renderPass; + _attachmentIndex = uint32_t(_renderPass->_attachments.size()); + + validateFormat(); +} + #pragma mark - #pragma mark MVKRenderPass @@ -416,9 +684,67 @@ VkExtent2D MVKRenderPass::getRenderAreaGranularity() { return { 1, 1 }; } MVKRenderSubpass* MVKRenderPass::getSubpass(uint32_t subpassIndex) { return &_subpasses[subpassIndex]; } +bool MVKRenderPass::isMultiview() const { return _subpasses[0].isMultiview(); } + MVKRenderPass::MVKRenderPass(MVKDevice* device, const VkRenderPassCreateInfo* pCreateInfo) : MVKVulkanAPIDeviceObject(device) { + const VkRenderPassInputAttachmentAspectCreateInfo* pInputAspectCreateInfo = nullptr; + const VkRenderPassMultiviewCreateInfo* pMultiviewCreateInfo = nullptr; + for (auto* next = (const VkBaseInStructure*)pCreateInfo->pNext; next; next = next->pNext) { + switch (next->sType) { + case VK_STRUCTURE_TYPE_RENDER_PASS_INPUT_ATTACHMENT_ASPECT_CREATE_INFO: + pInputAspectCreateInfo = (const VkRenderPassInputAttachmentAspectCreateInfo*)next; + break; + case VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO: + pMultiviewCreateInfo = (const VkRenderPassMultiviewCreateInfo*)next; + break; + default: + break; + } + } + + const uint32_t* viewMasks = nullptr; + const int32_t* viewOffsets = nullptr; + if (pMultiviewCreateInfo && pMultiviewCreateInfo->subpassCount) { + viewMasks = pMultiviewCreateInfo->pViewMasks; + } + if (pMultiviewCreateInfo && pMultiviewCreateInfo->dependencyCount) { + viewOffsets = pMultiviewCreateInfo->pViewOffsets; + } + + // Add subpasses and dependencies first + _subpasses.reserve(pCreateInfo->subpassCount); + for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { + _subpasses.emplace_back(this, &pCreateInfo->pSubpasses[i], pInputAspectCreateInfo, viewMasks ? viewMasks[i] : 0); + } + _subpassDependencies.reserve(pCreateInfo->dependencyCount); + for (uint32_t i = 0; i < pCreateInfo->dependencyCount; i++) { + VkSubpassDependency2 dependency = { + .sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2, + .pNext = nullptr, + .srcSubpass = pCreateInfo->pDependencies[i].srcSubpass, + .dstSubpass = pCreateInfo->pDependencies[i].dstSubpass, + .srcStageMask = pCreateInfo->pDependencies[i].srcStageMask, + .dstStageMask = pCreateInfo->pDependencies[i].dstStageMask, + .srcAccessMask = pCreateInfo->pDependencies[i].srcAccessMask, + .dstAccessMask = pCreateInfo->pDependencies[i].dstAccessMask, + .dependencyFlags = pCreateInfo->pDependencies[i].dependencyFlags, + .viewOffset = viewOffsets ? viewOffsets[i] : 0, + }; + _subpassDependencies.push_back(dependency); + } + + // Add attachments after subpasses, so each attachment can link to subpasses + _attachments.reserve(pCreateInfo->attachmentCount); + for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { + _attachments.emplace_back(this, &pCreateInfo->pAttachments[i]); + } +} + +MVKRenderPass::MVKRenderPass(MVKDevice* device, + const VkRenderPassCreateInfo2* pCreateInfo) : MVKVulkanAPIDeviceObject(device) { + // Add subpasses and dependencies first _subpasses.reserve(pCreateInfo->subpassCount); for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm b/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm index 91e5b8c0..604b2119 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm @@ -239,6 +239,7 @@ void MVKShaderLibraryCache::merge(MVKShaderLibraryCache* other) { for (auto& otherPair : other->_shaderLibraries) { if ( !findShaderLibrary(&otherPair.first) ) { _shaderLibraries.emplace_back(otherPair.first, new MVKShaderLibrary(*otherPair.second)); + _shaderLibraries.back().second->_owner = _owner; } } } diff --git a/MoltenVK/MoltenVK/Layers/MVKExtensions.def b/MoltenVK/MoltenVK/Layers/MVKExtensions.def index 9826c513..cbb1f16d 100644 --- a/MoltenVK/MoltenVK/Layers/MVKExtensions.def +++ b/MoltenVK/MoltenVK/Layers/MVKExtensions.def @@ -42,13 +42,18 @@ MVK_EXTENSION(KHR_16bit_storage, KHR_16BIT_STORAGE, DEVICE) MVK_EXTENSION(KHR_8bit_storage, KHR_8BIT_STORAGE, DEVICE) MVK_EXTENSION(KHR_bind_memory2, KHR_BIND_MEMORY_2, DEVICE) +MVK_EXTENSION(KHR_create_renderpass2, KHR_CREATE_RENDERPASS_2, DEVICE) MVK_EXTENSION(KHR_dedicated_allocation, KHR_DEDICATED_ALLOCATION, DEVICE) MVK_EXTENSION(KHR_descriptor_update_template, KHR_DESCRIPTOR_UPDATE_TEMPLATE, DEVICE) MVK_EXTENSION(KHR_device_group, KHR_DEVICE_GROUP, DEVICE) MVK_EXTENSION(KHR_device_group_creation, KHR_DEVICE_GROUP_CREATION, INSTANCE) MVK_EXTENSION(KHR_driver_properties, KHR_DRIVER_PROPERTIES, DEVICE) +MVK_EXTENSION(KHR_external_fence, KHR_EXTERNAL_FENCE, DEVICE) +MVK_EXTENSION(KHR_external_fence_capabilities, KHR_EXTERNAL_FENCE_CAPABILITIES, INSTANCE) MVK_EXTENSION(KHR_external_memory, KHR_EXTERNAL_MEMORY, DEVICE) MVK_EXTENSION(KHR_external_memory_capabilities, KHR_EXTERNAL_MEMORY_CAPABILITIES, INSTANCE) +MVK_EXTENSION(KHR_external_semaphore, KHR_EXTERNAL_SEMAPHORE, DEVICE) +MVK_EXTENSION(KHR_external_semaphore_capabilities, KHR_EXTERNAL_SEMAPHORE_CAPABILITIES, INSTANCE) MVK_EXTENSION(KHR_get_memory_requirements2, KHR_GET_MEMORY_REQUIREMENTS_2, DEVICE) MVK_EXTENSION(KHR_get_physical_device_properties2, KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2, INSTANCE) MVK_EXTENSION(KHR_get_surface_capabilities2, KHR_GET_SURFACE_CAPABILITIES_2, INSTANCE) @@ -56,6 +61,7 @@ MVK_EXTENSION(KHR_image_format_list, KHR_IMAGE_FORMAT_LIST, DEVICE) MVK_EXTENSION(KHR_maintenance1, KHR_MAINTENANCE1, DEVICE) MVK_EXTENSION(KHR_maintenance2, KHR_MAINTENANCE2, DEVICE) MVK_EXTENSION(KHR_maintenance3, KHR_MAINTENANCE3, DEVICE) +MVK_EXTENSION(KHR_multiview, KHR_MULTIVIEW, DEVICE) MVK_EXTENSION(KHR_push_descriptor, KHR_PUSH_DESCRIPTOR, DEVICE) MVK_EXTENSION(KHR_relaxed_block_layout, KHR_RELAXED_BLOCK_LAYOUT, DEVICE) MVK_EXTENSION(KHR_sampler_mirror_clamp_to_edge, KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE, DEVICE) diff --git a/MoltenVK/MoltenVK/Utility/MVKEnvironment.h b/MoltenVK/MoltenVK/Utility/MVKEnvironment.h index 2e1c982d..1b93fadd 100644 --- a/MoltenVK/MoltenVK/Utility/MVKEnvironment.h +++ b/MoltenVK/MoltenVK/Utility/MVKEnvironment.h @@ -35,8 +35,8 @@ #endif /** Macro to determine the Vulkan version supported by MoltenVK. */ -#define MVK_VULKAN_API_VERSION VK_MAKE_VERSION(VK_VERSION_MAJOR(VK_API_VERSION_1_0), \ - VK_VERSION_MINOR(VK_API_VERSION_1_0), \ +#define MVK_VULKAN_API_VERSION VK_MAKE_VERSION(VK_VERSION_MAJOR(VK_API_VERSION_1_1), \ + VK_VERSION_MINOR(VK_API_VERSION_1_1), \ VK_HEADER_VERSION) /** diff --git a/MoltenVK/MoltenVK/Utility/MVKFoundation.h b/MoltenVK/MoltenVK/Utility/MVKFoundation.h index b9b44c1b..9748abcc 100644 --- a/MoltenVK/MoltenVK/Utility/MVKFoundation.h +++ b/MoltenVK/MoltenVK/Utility/MVKFoundation.h @@ -86,6 +86,7 @@ typedef enum : uint8_t { kMVKCommandUseResetQueryPool, /**< vkCmdResetQueryPool. */ kMVKCommandUseDispatch, /**< vkCmdDispatch. */ kMVKCommandUseTessellationVertexTessCtl,/**< vkCmdDraw* - vertex and tessellation control stages. */ + kMVKCommandUseMultiviewInstanceCountAdjust,/**< vkCmdDrawIndirect* - adjust instance count for multiview. */ kMVKCommandUseCopyQueryPoolResults /**< vkCmdCopyQueryPoolResults. */ } MVKCommandUse; diff --git a/MoltenVK/MoltenVK/Vulkan/vulkan.mm b/MoltenVK/MoltenVK/Vulkan/vulkan.mm index b84a88c1..f0e18246 100644 --- a/MoltenVK/MoltenVK/Vulkan/vulkan.mm +++ b/MoltenVK/MoltenVK/Vulkan/vulkan.mm @@ -158,9 +158,12 @@ static inline void MVKTraceVulkanCallEndImpl(const char* funcName, uint64_t star MVKAddCmd(baseCmdType ##Multi, vkCmdBuff, ##__VA_ARGS__); \ } +// Define an extension call as an alias of a core call +#define MVK_PUBLIC_CORE_ALIAS(vkf) MVK_PUBLIC_ALIAS(vkf##KHR, vkf) + #pragma mark - -#pragma mark Vulkan calls +#pragma mark Vulkan 1.0 calls MVK_PUBLIC_SYMBOL VkResult vkCreateInstance( const VkInstanceCreateInfo* pCreateInfo, @@ -278,6 +281,8 @@ MVK_PUBLIC_SYMBOL PFN_vkVoidFunction vkGetInstanceProcAddr( func = (PFN_vkVoidFunction)vkEnumerateInstanceExtensionProperties; } else if (strcmp(pName, "vkEnumerateInstanceLayerProperties") == 0) { func = (PFN_vkVoidFunction)vkEnumerateInstanceLayerProperties; + } else if (strcmp(pName, "vkEnumerateInstanceVersion") == 0) { + func = (PFN_vkVoidFunction)vkEnumerateInstanceVersion; } else if (instance) { MVKInstance* mvkInst = MVKInstance::getMVKInstance(instance); func = mvkInst->getProcAddr(pName); @@ -1900,12 +1905,156 @@ MVK_PUBLIC_SYMBOL void vkCmdExecuteCommands( #pragma mark - -#pragma mark VK_KHR_bind_memory2 extension +#pragma mark Vulkan 1.1 calls -MVK_PUBLIC_SYMBOL VkResult vkBindBufferMemory2KHR( +MVK_PUBLIC_SYMBOL VkResult vkEnumerateInstanceVersion( + uint32_t* pApiVersion) { + + MVKTraceVulkanCallStart(); + *pApiVersion = MVK_VULKAN_API_VERSION; + MVKTraceVulkanCallEnd(); + return VK_SUCCESS; +} + +MVK_PUBLIC_SYMBOL VkResult vkEnumeratePhysicalDeviceGroups( + VkInstance instance, + uint32_t* pPhysicalDeviceGroupCount, + VkPhysicalDeviceGroupProperties* pPhysicalDeviceGroupProperties) { + MVKTraceVulkanCallStart(); + MVKInstance* mvkInst = MVKInstance::getMVKInstance(instance); + VkResult rslt = mvkInst->getPhysicalDeviceGroups(pPhysicalDeviceGroupCount, pPhysicalDeviceGroupProperties); + MVKTraceVulkanCallEnd(); + return rslt; +} + +MVK_PUBLIC_SYMBOL void vkGetPhysicalDeviceFeatures2( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceFeatures2* pFeatures) { + + MVKTraceVulkanCallStart(); + MVKPhysicalDevice* mvkPD = MVKPhysicalDevice::getMVKPhysicalDevice(physicalDevice); + mvkPD->getFeatures(pFeatures); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_SYMBOL void vkGetPhysicalDeviceProperties2( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceProperties2* pProperties) { + + MVKTraceVulkanCallStart(); + MVKPhysicalDevice* mvkPD = MVKPhysicalDevice::getMVKPhysicalDevice(physicalDevice); + mvkPD->getProperties(pProperties); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_SYMBOL void vkGetPhysicalDeviceFormatProperties2( + VkPhysicalDevice physicalDevice, + VkFormat format, + VkFormatProperties2* pFormatProperties) { + + MVKTraceVulkanCallStart(); + MVKPhysicalDevice* mvkPD = MVKPhysicalDevice::getMVKPhysicalDevice(physicalDevice); + mvkPD->getFormatProperties(format, pFormatProperties); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_SYMBOL VkResult vkGetPhysicalDeviceImageFormatProperties2( + VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceImageFormatInfo2* pImageFormatInfo, + VkImageFormatProperties2* pImageFormatProperties) { + + MVKTraceVulkanCallStart(); + MVKPhysicalDevice* mvkPD = MVKPhysicalDevice::getMVKPhysicalDevice(physicalDevice); + VkResult rslt = mvkPD->getImageFormatProperties(pImageFormatInfo, pImageFormatProperties); + MVKTraceVulkanCallEnd(); + return rslt; +} + +MVK_PUBLIC_SYMBOL void vkGetPhysicalDeviceQueueFamilyProperties2( + VkPhysicalDevice physicalDevice, + uint32_t* pQueueFamilyPropertyCount, + VkQueueFamilyProperties2* pQueueFamilyProperties) { + + MVKTraceVulkanCallStart(); + MVKPhysicalDevice* mvkPD = MVKPhysicalDevice::getMVKPhysicalDevice(physicalDevice); + mvkPD->getQueueFamilyProperties(pQueueFamilyPropertyCount, pQueueFamilyProperties); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_SYMBOL void vkGetPhysicalDeviceMemoryProperties2( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceMemoryProperties2* pMemoryProperties) { + + MVKTraceVulkanCallStart(); + MVKPhysicalDevice* mvkPD = MVKPhysicalDevice::getMVKPhysicalDevice(physicalDevice); + mvkPD->getMemoryProperties(pMemoryProperties); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_SYMBOL void vkGetPhysicalDeviceSparseImageFormatProperties2( + VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceSparseImageFormatInfo2* pFormatInfo, + uint32_t* pPropertyCount, + VkSparseImageFormatProperties2* pProperties) { + + MVKTraceVulkanCallStart(); + + // Metal does not support sparse images. + // Vulkan spec: "If VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT is not supported for the given arguments, + // pPropertyCount will be set to zero upon return, and no data will be written to pProperties.". + + *pPropertyCount = 0; + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_SYMBOL void vkGetPhysicalDeviceExternalFenceProperties( + VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceExternalFenceInfo* pExternalFenceInfo, + VkExternalFenceProperties* pExternalFenceProperties) { + + MVKTraceVulkanCallStart(); + MVKPhysicalDevice* mvkPD = MVKPhysicalDevice::getMVKPhysicalDevice(physicalDevice); + mvkPD->getExternalFenceProperties(pExternalFenceInfo, pExternalFenceProperties); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_SYMBOL void vkGetPhysicalDeviceExternalBufferProperties( + VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceExternalBufferInfo* pExternalBufferInfo, + VkExternalBufferProperties* pExternalBufferProperties) { + + MVKTraceVulkanCallStart(); + MVKPhysicalDevice* mvkPD = MVKPhysicalDevice::getMVKPhysicalDevice(physicalDevice); + mvkPD->getExternalBufferProperties(pExternalBufferInfo, pExternalBufferProperties); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_SYMBOL void vkGetPhysicalDeviceExternalSemaphoreProperties( + VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceExternalSemaphoreInfo* pExternalSemaphoreInfo, + VkExternalSemaphoreProperties* pExternalSemaphoreProperties) { + + MVKTraceVulkanCallStart(); + MVKPhysicalDevice* mvkPD = MVKPhysicalDevice::getMVKPhysicalDevice(physicalDevice); + mvkPD->getExternalSemaphoreProperties(pExternalSemaphoreInfo, pExternalSemaphoreProperties); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_SYMBOL void vkGetDeviceQueue2( + VkDevice device, + const VkDeviceQueueInfo2* pQueueInfo, + VkQueue* pQueue) { + + MVKTraceVulkanCallStart(); + MVKDevice* mvkDev = MVKDevice::getMVKDevice(device); + *pQueue = mvkDev->getQueue(pQueueInfo)->getVkQueue(); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_SYMBOL VkResult vkBindBufferMemory2( VkDevice device, uint32_t bindInfoCount, - const VkBindBufferMemoryInfoKHR* pBindInfos) { + const VkBindBufferMemoryInfo* pBindInfos) { MVKTraceVulkanCallStart(); VkResult rslt = VK_SUCCESS; @@ -1918,10 +2067,10 @@ MVK_PUBLIC_SYMBOL VkResult vkBindBufferMemory2KHR( return rslt; } -MVK_PUBLIC_SYMBOL VkResult vkBindImageMemory2KHR( +MVK_PUBLIC_SYMBOL VkResult vkBindImageMemory2( VkDevice device, uint32_t bindInfoCount, - const VkBindImageMemoryInfoKHR* pBindInfos) { + const VkBindImageMemoryInfo* pBindInfos) { MVKTraceVulkanCallStart(); VkResult rslt = VK_SUCCESS; @@ -1934,29 +2083,76 @@ MVK_PUBLIC_SYMBOL VkResult vkBindImageMemory2KHR( return rslt; } +MVK_PUBLIC_SYMBOL void vkGetBufferMemoryRequirements2( + VkDevice device, + const VkBufferMemoryRequirementsInfo2* pInfo, + VkMemoryRequirements2* pMemoryRequirements) { -#pragma mark - -#pragma mark VK_KHR_descriptor_update_template extension + MVKTraceVulkanCallStart(); + MVKBuffer* mvkBuff = (MVKBuffer*)pInfo->buffer; + mvkBuff->getMemoryRequirements(pInfo, pMemoryRequirements); + MVKTraceVulkanCallEnd(); +} -MVK_PUBLIC_SYMBOL VkResult vkCreateDescriptorUpdateTemplateKHR( +MVK_PUBLIC_SYMBOL void vkGetImageMemoryRequirements2( + VkDevice device, + const VkImageMemoryRequirementsInfo2* pInfo, + VkMemoryRequirements2* pMemoryRequirements) { + + MVKTraceVulkanCallStart(); + auto* mvkImg = (MVKImage*)pInfo->image; + mvkImg->getMemoryRequirements(pInfo, pMemoryRequirements); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_SYMBOL void vkGetImageSparseMemoryRequirements2( + VkDevice device, + const VkImageSparseMemoryRequirementsInfo2* pInfo, + uint32_t* pSparseMemoryRequirementCount, + VkSparseImageMemoryRequirements2* pSparseMemoryRequirements) { + + MVKTraceVulkanCallStart(); + + // Metal does not support sparse images. + // Vulkan spec: "If the image was not created with VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT then + // pSparseMemoryRequirementCount will be set to zero and pSparseMemoryRequirements will not be written to.". + + *pSparseMemoryRequirementCount = 0; + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_SYMBOL void vkGetDeviceGroupPeerMemoryFeatures( + VkDevice device, + uint32_t heapIndex, + uint32_t localDeviceIndex, + uint32_t remoteDeviceIndex, + VkPeerMemoryFeatureFlags* pPeerMemoryFeatures) { + + MVKTraceVulkanCallStart(); + MVKDevice* mvkDev = MVKDevice::getMVKDevice(device); + mvkDev->getPeerMemoryFeatures(heapIndex, localDeviceIndex, remoteDeviceIndex, pPeerMemoryFeatures); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_SYMBOL VkResult vkCreateDescriptorUpdateTemplate( VkDevice device, - const VkDescriptorUpdateTemplateCreateInfoKHR* pCreateInfo, + const VkDescriptorUpdateTemplateCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, - VkDescriptorUpdateTemplateKHR* pDescriptorUpdateTemplate) { + VkDescriptorUpdateTemplate* pDescriptorUpdateTemplate) { MVKTraceVulkanCallStart(); MVKDevice* mvkDev = MVKDevice::getMVKDevice(device); auto *mvkDUT = mvkDev->createDescriptorUpdateTemplate(pCreateInfo, pAllocator); - *pDescriptorUpdateTemplate = (VkDescriptorUpdateTemplateKHR)mvkDUT; + *pDescriptorUpdateTemplate = (VkDescriptorUpdateTemplate)mvkDUT; VkResult rslt = mvkDUT->getConfigurationResult(); MVKTraceVulkanCallEnd(); return rslt; } -MVK_PUBLIC_SYMBOL void vkDestroyDescriptorUpdateTemplateKHR( +MVK_PUBLIC_SYMBOL void vkDestroyDescriptorUpdateTemplate( VkDevice device, - VkDescriptorUpdateTemplateKHR descriptorUpdateTemplate, + VkDescriptorUpdateTemplate descriptorUpdateTemplate, const VkAllocationCallbacks* pAllocator) { MVKTraceVulkanCallStart(); @@ -1965,10 +2161,10 @@ MVK_PUBLIC_SYMBOL void vkDestroyDescriptorUpdateTemplateKHR( MVKTraceVulkanCallEnd(); } -MVK_PUBLIC_SYMBOL void vkUpdateDescriptorSetWithTemplateKHR( +MVK_PUBLIC_SYMBOL void vkUpdateDescriptorSetWithTemplate( VkDevice device, VkDescriptorSet descriptorSet, - VkDescriptorUpdateTemplateKHR descriptorUpdateTemplate, + VkDescriptorUpdateTemplate descriptorUpdateTemplate, const void* pData) { MVKTraceVulkanCallStart(); @@ -1976,24 +2172,56 @@ MVK_PUBLIC_SYMBOL void vkUpdateDescriptorSetWithTemplateKHR( MVKTraceVulkanCallEnd(); } - -#pragma mark - -#pragma mark VK_KHR_device_group extension - -MVK_PUBLIC_SYMBOL void vkGetDeviceGroupPeerMemoryFeaturesKHR( +MVK_PUBLIC_SYMBOL void vkGetDescriptorSetLayoutSupport( VkDevice device, - uint32_t heapIndex, - uint32_t localDeviceIndex, - uint32_t remoteDeviceIndex, - VkPeerMemoryFeatureFlagsKHR* pPeerMemoryFeatures) { + const VkDescriptorSetLayoutCreateInfo* pCreateInfo, + VkDescriptorSetLayoutSupport* pSupport) { - MVKTraceVulkanCallStart(); - MVKDevice* mvkDev = MVKDevice::getMVKDevice(device); - mvkDev->getPeerMemoryFeatures(heapIndex, localDeviceIndex, remoteDeviceIndex, pPeerMemoryFeatures); - MVKTraceVulkanCallEnd(); + MVKTraceVulkanCallStart(); + MVKDevice* mvkDevice = MVKDevice::getMVKDevice(device); + mvkDevice->getDescriptorSetLayoutSupport(pCreateInfo, pSupport); + MVKTraceVulkanCallEnd(); } -MVK_PUBLIC_SYMBOL void vkCmdSetDeviceMaskKHR( +MVK_PUBLIC_SYMBOL VkResult vkCreateSamplerYcbcrConversion( + VkDevice device, + const VkSamplerYcbcrConversionCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkSamplerYcbcrConversion* pYcbcrConversion) { + + MVKTraceVulkanCallStart(); + MVKDevice* mvkDev = MVKDevice::getMVKDevice(device); + MVKSamplerYcbcrConversion* mvkSampConv = mvkDev->createSamplerYcbcrConversion(pCreateInfo, pAllocator); + *pYcbcrConversion = (VkSamplerYcbcrConversion)mvkSampConv; + VkResult rslt = mvkSampConv->getConfigurationResult(); + MVKTraceVulkanCallEnd(); + return rslt; +} + +MVK_PUBLIC_SYMBOL void vkDestroySamplerYcbcrConversion( + VkDevice device, + VkSamplerYcbcrConversion ycbcrConversion, + const VkAllocationCallbacks* pAllocator) { + + MVKTraceVulkanCallStart(); + if ( !ycbcrConversion ) { return; } + MVKDevice* mvkDev = MVKDevice::getMVKDevice(device); + mvkDev->destroySamplerYcbcrConversion((MVKSamplerYcbcrConversion*)ycbcrConversion, pAllocator); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_SYMBOL void vkTrimCommandPool( + VkDevice device, + VkCommandPool commandPool, + VkCommandPoolTrimFlags flags) { + + MVKTraceVulkanCallStart(); + MVKCommandPool* mvkCmdPool = (MVKCommandPool*)commandPool; + mvkCmdPool->trim(); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_SYMBOL void vkCmdSetDeviceMask( VkCommandBuffer commandBuffer, uint32_t deviceMask) { @@ -2003,7 +2231,7 @@ MVK_PUBLIC_SYMBOL void vkCmdSetDeviceMaskKHR( MVKTraceVulkanCallEnd(); } -MVK_PUBLIC_SYMBOL void vkCmdDispatchBaseKHR( +MVK_PUBLIC_SYMBOL void vkCmdDispatchBase( VkCommandBuffer commandBuffer, uint32_t baseGroupX, uint32_t baseGroupY, @@ -2018,175 +2246,131 @@ MVK_PUBLIC_SYMBOL void vkCmdDispatchBaseKHR( } +#pragma mark - +#pragma mark VK_KHR_bind_memory2 extension + +MVK_PUBLIC_CORE_ALIAS(vkBindBufferMemory2); +MVK_PUBLIC_CORE_ALIAS(vkBindImageMemory2); + + +#pragma mark - +#pragma mark VK_KHR_create_renderpass2 extension + +MVK_PUBLIC_SYMBOL VkResult vkCreateRenderPass2KHR( + VkDevice device, + const VkRenderPassCreateInfo2* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkRenderPass* pRenderPass) { + + MVKTraceVulkanCallStart(); + MVKDevice* mvkDev = MVKDevice::getMVKDevice(device); + MVKRenderPass* mvkRendPass = mvkDev->createRenderPass(pCreateInfo, pAllocator); + *pRenderPass = (VkRenderPass)mvkRendPass; + VkResult rslt = mvkRendPass->getConfigurationResult(); + MVKTraceVulkanCallEnd(); + return rslt; +} + +MVK_PUBLIC_SYMBOL void vkCmdBeginRenderPass2KHR( + VkCommandBuffer commandBuffer, + const VkRenderPassBeginInfo* pRenderPassBegin, + const VkSubpassBeginInfo* pSubpassBeginInfo) { + + MVKTraceVulkanCallStart(); + MVKAddCmdFrom2Thresholds(BeginRenderPass, pRenderPassBegin->clearValueCount, 1, 2, commandBuffer, pRenderPassBegin, pSubpassBeginInfo); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_SYMBOL void vkCmdNextSubpass2KHR( + VkCommandBuffer commandBuffer, + const VkSubpassBeginInfo* pSubpassBeginInfo, + const VkSubpassEndInfo* pSubpassEndInfo) { + + MVKTraceVulkanCallStart(); + MVKAddCmd(NextSubpass, commandBuffer, pSubpassBeginInfo, pSubpassEndInfo); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_SYMBOL void vkCmdEndRenderPass2KHR( + VkCommandBuffer commandBuffer, + const VkSubpassEndInfo* pSubpassEndInfo) { + + MVKTraceVulkanCallStart(); + MVKAddCmd(EndRenderPass, commandBuffer, pSubpassEndInfo); + MVKTraceVulkanCallEnd(); +} + + +#pragma mark - +#pragma mark VK_KHR_descriptor_update_template extension + +MVK_PUBLIC_CORE_ALIAS(vkCreateDescriptorUpdateTemplate); +MVK_PUBLIC_CORE_ALIAS(vkDestroyDescriptorUpdateTemplate); +MVK_PUBLIC_CORE_ALIAS(vkUpdateDescriptorSetWithTemplate); + + +#pragma mark - +#pragma mark VK_KHR_device_group extension + +MVK_PUBLIC_CORE_ALIAS(vkGetDeviceGroupPeerMemoryFeatures); +MVK_PUBLIC_CORE_ALIAS(vkCmdSetDeviceMask); +MVK_PUBLIC_CORE_ALIAS(vkCmdDispatchBase); + + #pragma mark - #pragma mark VK_KHR_device_group_creation extension -MVK_PUBLIC_SYMBOL VkResult vkEnumeratePhysicalDeviceGroupsKHR( - VkInstance instance, - uint32_t* pPhysicalDeviceGroupCount, - VkPhysicalDeviceGroupPropertiesKHR* pPhysicalDeviceGroupProperties) { - MVKTraceVulkanCallStart(); - MVKInstance* mvkInst = MVKInstance::getMVKInstance(instance); - VkResult rslt = mvkInst->getPhysicalDeviceGroups(pPhysicalDeviceGroupCount, pPhysicalDeviceGroupProperties); - MVKTraceVulkanCallEnd(); - return rslt; -} +MVK_PUBLIC_CORE_ALIAS(vkEnumeratePhysicalDeviceGroups); + + +#pragma mark - +#pragma mark VK_KHR_external_fence_capabilities extension + +MVK_PUBLIC_CORE_ALIAS(vkGetPhysicalDeviceExternalFenceProperties); + + +#pragma mark - +#pragma mark VK_KHR_external_memory_capabilities extension + +MVK_PUBLIC_CORE_ALIAS(vkGetPhysicalDeviceExternalBufferProperties); + + +#pragma mark - +#pragma mark VK_KHR_external_semaphore_capabilities extension + +MVK_PUBLIC_CORE_ALIAS(vkGetPhysicalDeviceExternalSemaphoreProperties); #pragma mark - #pragma mark VK_KHR_get_memory_requirements2 extension -MVK_PUBLIC_SYMBOL void vkGetBufferMemoryRequirements2KHR( - VkDevice device, - const VkBufferMemoryRequirementsInfo2KHR* pInfo, - VkMemoryRequirements2KHR* pMemoryRequirements) { - - MVKTraceVulkanCallStart(); - MVKBuffer* mvkBuff = (MVKBuffer*)pInfo->buffer; - mvkBuff->getMemoryRequirements(pInfo, pMemoryRequirements); - MVKTraceVulkanCallEnd(); -} - -MVK_PUBLIC_SYMBOL void vkGetImageMemoryRequirements2KHR( - VkDevice device, - const VkImageMemoryRequirementsInfo2KHR* pInfo, - VkMemoryRequirements2KHR* pMemoryRequirements) { - - MVKTraceVulkanCallStart(); - auto* mvkImg = (MVKImage*)pInfo->image; - mvkImg->getMemoryRequirements(pInfo, pMemoryRequirements); - MVKTraceVulkanCallEnd(); -} - -MVK_PUBLIC_SYMBOL void vkGetImageSparseMemoryRequirements2KHR( - VkDevice device, - const VkImageSparseMemoryRequirementsInfo2KHR* pInfo, - uint32_t* pSparseMemoryRequirementCount, - VkSparseImageMemoryRequirements2KHR* pSparseMemoryRequirements) { - - MVKTraceVulkanCallStart(); - - // Metal does not support sparse images. - // Vulkan spec: "If the image was not created with VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT then - // pSparseMemoryRequirementCount will be set to zero and pSparseMemoryRequirements will not be written to.". - - *pSparseMemoryRequirementCount = 0; - MVKTraceVulkanCallEnd(); -} +MVK_PUBLIC_CORE_ALIAS(vkGetBufferMemoryRequirements2); +MVK_PUBLIC_CORE_ALIAS(vkGetImageMemoryRequirements2); +MVK_PUBLIC_CORE_ALIAS(vkGetImageSparseMemoryRequirements2); #pragma mark - #pragma mark VK_KHR_get_physical_device_properties2 extension -MVK_PUBLIC_SYMBOL void vkGetPhysicalDeviceFeatures2KHR( - VkPhysicalDevice physicalDevice, - VkPhysicalDeviceFeatures2KHR* pFeatures) { - - MVKTraceVulkanCallStart(); - MVKPhysicalDevice* mvkPD = MVKPhysicalDevice::getMVKPhysicalDevice(physicalDevice); - mvkPD->getFeatures(pFeatures); - MVKTraceVulkanCallEnd(); -} - -MVK_PUBLIC_SYMBOL void vkGetPhysicalDeviceProperties2KHR( - VkPhysicalDevice physicalDevice, - VkPhysicalDeviceProperties2KHR* pProperties) { - - MVKTraceVulkanCallStart(); - MVKPhysicalDevice* mvkPD = MVKPhysicalDevice::getMVKPhysicalDevice(physicalDevice); - mvkPD->getProperties(pProperties); - MVKTraceVulkanCallEnd(); -} - -MVK_PUBLIC_SYMBOL void vkGetPhysicalDeviceFormatProperties2KHR( - VkPhysicalDevice physicalDevice, - VkFormat format, - VkFormatProperties2KHR* pFormatProperties) { - - MVKTraceVulkanCallStart(); - MVKPhysicalDevice* mvkPD = MVKPhysicalDevice::getMVKPhysicalDevice(physicalDevice); - mvkPD->getFormatProperties(format, pFormatProperties); - MVKTraceVulkanCallEnd(); -} - -MVK_PUBLIC_SYMBOL VkResult vkGetPhysicalDeviceImageFormatProperties2KHR( - VkPhysicalDevice physicalDevice, - const VkPhysicalDeviceImageFormatInfo2KHR* pImageFormatInfo, - VkImageFormatProperties2KHR* pImageFormatProperties) { - - MVKTraceVulkanCallStart(); - MVKPhysicalDevice* mvkPD = MVKPhysicalDevice::getMVKPhysicalDevice(physicalDevice); - VkResult rslt = mvkPD->getImageFormatProperties(pImageFormatInfo, pImageFormatProperties); - MVKTraceVulkanCallEnd(); - return rslt; -} - -MVK_PUBLIC_SYMBOL void vkGetPhysicalDeviceQueueFamilyProperties2KHR( - VkPhysicalDevice physicalDevice, - uint32_t* pQueueFamilyPropertyCount, - VkQueueFamilyProperties2KHR* pQueueFamilyProperties) { - - MVKTraceVulkanCallStart(); - MVKPhysicalDevice* mvkPD = MVKPhysicalDevice::getMVKPhysicalDevice(physicalDevice); - mvkPD->getQueueFamilyProperties(pQueueFamilyPropertyCount, pQueueFamilyProperties); - MVKTraceVulkanCallEnd(); -} - -MVK_PUBLIC_SYMBOL void vkGetPhysicalDeviceMemoryProperties2KHR( - VkPhysicalDevice physicalDevice, - VkPhysicalDeviceMemoryProperties2KHR* pMemoryProperties) { - - MVKTraceVulkanCallStart(); - MVKPhysicalDevice* mvkPD = MVKPhysicalDevice::getMVKPhysicalDevice(physicalDevice); - mvkPD->getMemoryProperties(pMemoryProperties); - MVKTraceVulkanCallEnd(); -} - -MVK_PUBLIC_SYMBOL void vkGetPhysicalDeviceSparseImageFormatProperties2KHR( - VkPhysicalDevice physicalDevice, - const VkPhysicalDeviceSparseImageFormatInfo2KHR* pFormatInfo, - uint32_t* pPropertyCount, - VkSparseImageFormatProperties2KHR* pProperties) { - - MVKTraceVulkanCallStart(); - - // Metal does not support sparse images. - // Vulkan spec: "If VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT is not supported for the given arguments, - // pPropertyCount will be set to zero upon return, and no data will be written to pProperties.". - - *pPropertyCount = 0; - MVKTraceVulkanCallEnd(); -} +MVK_PUBLIC_CORE_ALIAS(vkGetPhysicalDeviceFeatures2); +MVK_PUBLIC_CORE_ALIAS(vkGetPhysicalDeviceProperties2); +MVK_PUBLIC_CORE_ALIAS(vkGetPhysicalDeviceFormatProperties2); +MVK_PUBLIC_CORE_ALIAS(vkGetPhysicalDeviceImageFormatProperties2); +MVK_PUBLIC_CORE_ALIAS(vkGetPhysicalDeviceQueueFamilyProperties2); +MVK_PUBLIC_CORE_ALIAS(vkGetPhysicalDeviceMemoryProperties2); +MVK_PUBLIC_CORE_ALIAS(vkGetPhysicalDeviceSparseImageFormatProperties2); #pragma mark - #pragma mark VK_KHR_maintenance1 extension -MVK_PUBLIC_SYMBOL void vkTrimCommandPoolKHR( - VkDevice device, - VkCommandPool commandPool, - VkCommandPoolTrimFlagsKHR flags) { - - MVKTraceVulkanCallStart(); - MVKCommandPool* mvkCmdPool = (MVKCommandPool*)commandPool; - mvkCmdPool->trim(); - MVKTraceVulkanCallEnd(); -} +MVK_PUBLIC_CORE_ALIAS(vkTrimCommandPool); #pragma mark - #pragma mark VK_KHR_maintenance3 extension -MVK_PUBLIC_SYMBOL void vkGetDescriptorSetLayoutSupportKHR( - VkDevice device, - const VkDescriptorSetLayoutCreateInfo* pCreateInfo, - VkDescriptorSetLayoutSupportKHR* pSupport) { - - MVKTraceVulkanCallStart(); - MVKDevice* mvkDevice = MVKDevice::getMVKDevice(device); - mvkDevice->getDescriptorSetLayoutSupport(pCreateInfo, pSupport); - MVKTraceVulkanCallEnd(); -} +MVK_PUBLIC_CORE_ALIAS(vkGetDescriptorSetLayoutSupport); #pragma mark - @@ -2221,32 +2405,8 @@ MVK_PUBLIC_SYMBOL void vkCmdPushDescriptorSetWithTemplateKHR( #pragma mark - #pragma mark VK_KHR_sampler_ycbcr_conversion extension -MVK_PUBLIC_SYMBOL VkResult vkCreateSamplerYcbcrConversionKHR( - VkDevice device, - const VkSamplerYcbcrConversionCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkSamplerYcbcrConversion* pYcbcrConversion) { - - MVKTraceVulkanCallStart(); - MVKDevice* mvkDev = MVKDevice::getMVKDevice(device); - MVKSamplerYcbcrConversion* mvkSampConv = mvkDev->createSamplerYcbcrConversion(pCreateInfo, pAllocator); - *pYcbcrConversion = (VkSamplerYcbcrConversion)mvkSampConv; - VkResult rslt = mvkSampConv->getConfigurationResult(); - MVKTraceVulkanCallEnd(); - return rslt; -} - -MVK_PUBLIC_SYMBOL void vkDestroySamplerYcbcrConversionKHR( - VkDevice device, - VkSamplerYcbcrConversion ycbcrConversion, - const VkAllocationCallbacks* pAllocator) { - - MVKTraceVulkanCallStart(); - if ( !ycbcrConversion ) { return; } - MVKDevice* mvkDev = MVKDevice::getMVKDevice(device); - mvkDev->destroySamplerYcbcrConversion((MVKSamplerYcbcrConversion*)ycbcrConversion, pAllocator); - MVKTraceVulkanCallEnd(); -} +MVK_PUBLIC_CORE_ALIAS(vkCreateSamplerYcbcrConversion); +MVK_PUBLIC_CORE_ALIAS(vkDestroySamplerYcbcrConversion); #pragma mark - @@ -2716,21 +2876,6 @@ MVK_PUBLIC_SYMBOL void vkResetQueryPoolEXT( } -#pragma mark - -#pragma mark VK_KHR_external_memory_capabilities extension - -MVK_PUBLIC_SYMBOL void vkGetPhysicalDeviceExternalBufferPropertiesKHR( - VkPhysicalDevice physicalDevice, - const VkPhysicalDeviceExternalBufferInfo* pExternalBufferInfo, - VkExternalBufferProperties* pExternalBufferProperties) { - - MVKTraceVulkanCallStart(); - MVKPhysicalDevice* mvkPD = MVKPhysicalDevice::getMVKPhysicalDevice(physicalDevice); - mvkPD->getExternalBufferProperties(pExternalBufferInfo, pExternalBufferProperties); - MVKTraceVulkanCallEnd(); -} - - #pragma mark - #pragma mark VK_EXT_metal_surface extension diff --git a/MoltenVKPackaging.xcodeproj/project.pbxproj b/MoltenVKPackaging.xcodeproj/project.pbxproj index 13028ecb..541efd7a 100644 --- a/MoltenVKPackaging.xcodeproj/project.pbxproj +++ b/MoltenVKPackaging.xcodeproj/project.pbxproj @@ -321,7 +321,7 @@ A90B2B1D1A9B6170008EE819 /* Project object */ = { isa = PBXProject; attributes = { - LastUpgradeCheck = 1200; + LastUpgradeCheck = 1170; TargetAttributes = { A9FEADBC1F3517480010240E = { DevelopmentTeam = VU3TCKU48B; diff --git a/MoltenVKPackaging.xcodeproj/xcshareddata/xcschemes/MVKShaderConverterTool Package.xcscheme b/MoltenVKPackaging.xcodeproj/xcshareddata/xcschemes/MVKShaderConverterTool Package.xcscheme index e47c8065..b3008226 100644 --- a/MoltenVKPackaging.xcodeproj/xcshareddata/xcschemes/MVKShaderConverterTool Package.xcscheme +++ b/MoltenVKPackaging.xcodeproj/xcshareddata/xcschemes/MVKShaderConverterTool Package.xcscheme @@ -1,6 +1,6 @@ needs_buffer_size_buffer(); _shaderConversionResults.needsInputThreadgroupMem = pMSLCompiler && pMSLCompiler->needs_input_threadgroup_mem(); _shaderConversionResults.needsDispatchBaseBuffer = pMSLCompiler && pMSLCompiler->needs_dispatch_base_buffer(); + _shaderConversionResults.needsViewRangeBuffer = pMSLCompiler && pMSLCompiler->needs_view_mask_buffer(); for (auto& ctxSI : context.shaderInputs) { ctxSI.isUsedByShader = pMSLCompiler->is_msl_shader_input_used(ctxSI.shaderInput.location); diff --git a/MoltenVKShaderConverter/MoltenVKSPIRVToMSLConverter/SPIRVToMSLConverter.h b/MoltenVKShaderConverter/MoltenVKSPIRVToMSLConverter/SPIRVToMSLConverter.h index 22d405b1..f642644a 100644 --- a/MoltenVKShaderConverter/MoltenVKSPIRVToMSLConverter/SPIRVToMSLConverter.h +++ b/MoltenVKShaderConverter/MoltenVKSPIRVToMSLConverter/SPIRVToMSLConverter.h @@ -209,6 +209,7 @@ namespace mvk { bool needsBufferSizeBuffer = false; bool needsInputThreadgroupMem = false; bool needsDispatchBaseBuffer = false; + bool needsViewRangeBuffer = false; void reset() { *this = SPIRVToMSLConversionResults(); } diff --git a/MoltenVKShaderConverter/MoltenVKShaderConverter.xcodeproj/project.pbxproj b/MoltenVKShaderConverter/MoltenVKShaderConverter.xcodeproj/project.pbxproj index d6f3bf6e..f71d67ef 100644 --- a/MoltenVKShaderConverter/MoltenVKShaderConverter.xcodeproj/project.pbxproj +++ b/MoltenVKShaderConverter/MoltenVKShaderConverter.xcodeproj/project.pbxproj @@ -512,7 +512,7 @@ A9F55D25198BE6A7004EC31B /* Project object */ = { isa = PBXProject; attributes = { - LastUpgradeCheck = 1200; + LastUpgradeCheck = 1170; ORGANIZATIONNAME = "The Brenwill Workshop Ltd."; TargetAttributes = { A9092A8C1A81717B00051823 = { diff --git a/MoltenVKShaderConverter/MoltenVKShaderConverter.xcodeproj/xcshareddata/xcschemes/MoltenVKGLSLToSPIRVConverter-iOS.xcscheme b/MoltenVKShaderConverter/MoltenVKShaderConverter.xcodeproj/xcshareddata/xcschemes/MoltenVKGLSLToSPIRVConverter-iOS.xcscheme index 5be07e38..0ec7c34d 100644 --- a/MoltenVKShaderConverter/MoltenVKShaderConverter.xcodeproj/xcshareddata/xcschemes/MoltenVKGLSLToSPIRVConverter-iOS.xcscheme +++ b/MoltenVKShaderConverter/MoltenVKShaderConverter.xcodeproj/xcshareddata/xcschemes/MoltenVKGLSLToSPIRVConverter-iOS.xcscheme @@ -1,6 +1,6 @@