Support the VK_KHR_multiview extension.
Originally, Metal did not support this directly, and still largely doesn't on GPUs other than Apple family 6. Therefore, this implementation uses vertex instancing to draw the needed views. To support the Vulkan requirement that only the layers for the enabled views are loaded and stored in a multiview render pass, this implementation uses multiple Metal render passes for multiple "clumps" of enabled views. For indirect draws, as with tessellation, we must adjust the draw parameters at execution time to account for the extra views, so we need to use deferred store actions here. Without them, tracking the state becomes too involved. If the implementation doesn't support either layered rendering or deferred store actions, multiview render passes are instead unrolled and rendered one view at a time. This will enable us to support the extension even on older devices and OSes, but at the cost of additional command buffer memory and (possibly) worse performance. Eventually, we should consider using vertex amplification to accelerate this, particularly since indirect multiview draws are terrible and currently require a compute pass to adjust the instance count. Also, instanced drawing in itself is terrible due to its subpar performance. But, since vertex amplification on family 6 only supports two views, when `VK_KHR_multiview` mandates a minimum of 6, we'll still need to use instancing to support more than two views. I have tested this extensively against the CTS. I'm very confident in its correctness. The only failing tests are `dEQP-VK.multiview.queries.*`, due to our inadequate implementation of timestamp queries; and `dEQP-VK.multiview.depth.*`, due to what I assume is a bug in the way Metal handles arrayed packed depth/stencil textures, and which may only be a problem on Mojave. I need to test this on Catalina and Big Sur. Update SPIRV-Cross to pull in some fixes necessary for this to work. Fixes #347.
This commit is contained in:
parent
9ef4130edd
commit
34930eaf5b
@ -271,6 +271,7 @@ In addition to core *Vulkan* functionality, **MoltenVK** also supports the foll
|
||||
- `VK_KHR_maintenance1`
|
||||
- `VK_KHR_maintenance2`
|
||||
- `VK_KHR_maintenance3`
|
||||
- `VK_KHR_multiview`
|
||||
- `VK_KHR_push_descriptor`
|
||||
- `VK_KHR_relaxed_block_layout`
|
||||
- `VK_KHR_sampler_mirror_clamp_to_edge` *(macOS)*
|
||||
@ -297,7 +298,7 @@ In addition to core *Vulkan* functionality, **MoltenVK** also supports the foll
|
||||
- `VK_EXT_scalar_block_layout`
|
||||
- `VK_EXT_shader_stencil_export` *(requires Mac GPU family 2 or iOS GPU family 5)*
|
||||
- `VK_EXT_shader_viewport_index_layer`
|
||||
- `VK_EXT_swapchain_colorspace` *(macOS)*
|
||||
- `VK_EXT_swapchain_colorspace`
|
||||
- `VK_EXT_vertex_attribute_divisor`
|
||||
- `VK_EXT_texel_buffer_alignment` *(requires Metal 2.0)*
|
||||
- `VK_EXTX_portability_subset`
|
||||
|
@ -18,6 +18,8 @@ MoltenVK 1.0.45
|
||||
|
||||
Released TBD
|
||||
|
||||
- Add support for extensions:
|
||||
- `VK_KHR_multiview`
|
||||
- Improve performance of tessellation control pipeline stage by processing multiple
|
||||
patches per workgroup.
|
||||
- `vkCmdBindDescriptorSets` order `pDynamicOffsets` by descriptor binding number
|
||||
|
@ -1 +1 @@
|
||||
0376576d2dc0721edfb2c5a0257fdc275f6f39dc
|
||||
bad9dab8df6f2e6b80da9693db247b9357aebd2f
|
||||
|
@ -243,17 +243,20 @@ void MVKCmdDraw::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
cmdEncoder->_graphicsResourcesState.beginMetalRenderPass();
|
||||
cmdEncoder->getPushConstants(VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)->beginMetalRenderPass();
|
||||
} else {
|
||||
MVKRenderSubpass* subpass = cmdEncoder->getSubpass();
|
||||
uint32_t viewCount = subpass->isMultiview() ? subpass->getViewCountInMetalPass(cmdEncoder->getMultiviewPassIndex()) : 1;
|
||||
uint32_t instanceCount = _instanceCount * viewCount;
|
||||
if (cmdEncoder->_pDeviceMetalFeatures->baseVertexInstanceDrawing) {
|
||||
[cmdEncoder->_mtlRenderEncoder drawPrimitives: cmdEncoder->_mtlPrimitiveType
|
||||
vertexStart: _firstVertex
|
||||
vertexCount: _vertexCount
|
||||
instanceCount: _instanceCount
|
||||
instanceCount: instanceCount
|
||||
baseInstance: _firstInstance];
|
||||
} else {
|
||||
[cmdEncoder->_mtlRenderEncoder drawPrimitives: cmdEncoder->_mtlPrimitiveType
|
||||
vertexStart: _firstVertex
|
||||
vertexCount: _vertexCount
|
||||
instanceCount: _instanceCount];
|
||||
instanceCount: instanceCount];
|
||||
}
|
||||
}
|
||||
break;
|
||||
@ -440,13 +443,16 @@ void MVKCmdDrawIndexed::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
cmdEncoder->_graphicsResourcesState.beginMetalRenderPass();
|
||||
cmdEncoder->getPushConstants(VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)->beginMetalRenderPass();
|
||||
} else {
|
||||
MVKRenderSubpass* subpass = cmdEncoder->getSubpass();
|
||||
uint32_t viewCount = subpass->isMultiview() ? subpass->getViewCountInMetalPass(cmdEncoder->getMultiviewPassIndex()) : 1;
|
||||
uint32_t instanceCount = _instanceCount * viewCount;
|
||||
if (cmdEncoder->_pDeviceMetalFeatures->baseVertexInstanceDrawing) {
|
||||
[cmdEncoder->_mtlRenderEncoder drawIndexedPrimitives: cmdEncoder->_mtlPrimitiveType
|
||||
indexCount: _indexCount
|
||||
indexType: (MTLIndexType)ibb.mtlIndexType
|
||||
indexBuffer: ibb.mtlBuffer
|
||||
indexBufferOffset: idxBuffOffset
|
||||
instanceCount: _instanceCount
|
||||
instanceCount: instanceCount
|
||||
baseVertex: _vertexOffset
|
||||
baseInstance: _firstInstance];
|
||||
} else {
|
||||
@ -455,7 +461,7 @@ void MVKCmdDrawIndexed::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
indexType: (MTLIndexType)ibb.mtlIndexType
|
||||
indexBuffer: ibb.mtlBuffer
|
||||
indexBufferOffset: idxBuffOffset
|
||||
instanceCount: _instanceCount];
|
||||
instanceCount: instanceCount];
|
||||
}
|
||||
}
|
||||
break;
|
||||
@ -499,11 +505,13 @@ static const uint32_t kMVKDrawIndirectVertexCountUpperBound = 131072;
|
||||
void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
|
||||
auto* pipeline = (MVKGraphicsPipeline*)cmdEncoder->_graphicsPipelineState.getPipeline();
|
||||
bool needsInstanceAdjustment = cmdEncoder->getSubpass()->isMultiview() &&
|
||||
cmdEncoder->getDevice()->getPhysicalDevice()->canUseInstancingForMultiview();
|
||||
// The indirect calls for dispatchThreadgroups:... and drawPatches:... have different formats.
|
||||
// We have to convert from the drawPrimitives:... format to them.
|
||||
// While we're at it, we can create the temporary output buffers once and reuse them
|
||||
// for each draw.
|
||||
const MVKMTLBufferAllocation* tcIndirectBuff = nullptr;
|
||||
const MVKMTLBufferAllocation* tempIndirectBuff = nullptr;
|
||||
const MVKMTLBufferAllocation* tcParamsBuff = nullptr;
|
||||
const MVKMTLBufferAllocation* vtxOutBuff = nullptr;
|
||||
const MVKMTLBufferAllocation* tcOutBuff = nullptr;
|
||||
@ -513,7 +521,8 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
uint32_t inControlPointCount = 0, outControlPointCount = 0;
|
||||
VkDeviceSize paramsIncr = 0;
|
||||
|
||||
VkDeviceSize mtlTCIndBuffOfst = 0;
|
||||
id<MTLBuffer> mtlIndBuff = _mtlIndirectBuffer;
|
||||
VkDeviceSize mtlIndBuffOfst = _mtlIndirectBufferOffset;
|
||||
VkDeviceSize mtlParmBuffOfst = 0;
|
||||
NSUInteger vtxThreadExecWidth = 0;
|
||||
NSUInteger tcWorkgroupSize = 0;
|
||||
@ -533,8 +542,9 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
}
|
||||
paramsIncr = std::max((size_t)cmdEncoder->getDevice()->_pProperties->limits.minUniformBufferOffsetAlignment, sizeof(uint32_t) * 2);
|
||||
VkDeviceSize paramsSize = paramsIncr * _drawCount;
|
||||
tcIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize);
|
||||
mtlTCIndBuffOfst = tcIndirectBuff->_offset;
|
||||
tempIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize);
|
||||
mtlIndBuff = tempIndirectBuff->_mtlBuffer;
|
||||
mtlIndBuffOfst = tempIndirectBuff->_offset;
|
||||
tcParamsBuff = cmdEncoder->getTempMTLBuffer(paramsSize);
|
||||
mtlParmBuffOfst = tcParamsBuff->_offset;
|
||||
if (pipeline->needsVertexOutputBuffer()) {
|
||||
@ -555,31 +565,35 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
sgSize >>= 1;
|
||||
tcWorkgroupSize = mvkLeastCommonMultiple(outControlPointCount, sgSize);
|
||||
}
|
||||
} else if (needsInstanceAdjustment) {
|
||||
// In this case, we need to adjust the instance count for the views being drawn.
|
||||
VkDeviceSize indirectSize = sizeof(MTLDrawPrimitivesIndirectArguments) * _drawCount;
|
||||
tempIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize);
|
||||
mtlIndBuff = tempIndirectBuff->_mtlBuffer;
|
||||
mtlIndBuffOfst = tempIndirectBuff->_offset;
|
||||
}
|
||||
|
||||
MVKPiplineStages stages;
|
||||
pipeline->getStages(stages);
|
||||
|
||||
VkDeviceSize mtlIndBuffOfst = _mtlIndirectBufferOffset;
|
||||
|
||||
for (uint32_t drawIdx = 0; drawIdx < _drawCount; drawIdx++) {
|
||||
for (uint32_t s : stages) {
|
||||
auto stage = MVKGraphicsStage(s);
|
||||
id<MTLComputeCommandEncoder> mtlTessCtlEncoder = nil;
|
||||
if (drawIdx == 0 && stage == kMVKGraphicsStageVertex) {
|
||||
if (drawIdx == 0 && stage == kMVKGraphicsStageVertex && pipeline->isTessellationPipeline()) {
|
||||
// We need the indirect buffers now. This must be done before finalizing
|
||||
// draw state, or the pipeline will get overridden. This is a good time
|
||||
// to do it, since it will require switching to compute anyway. Do it all
|
||||
// at once to get it over with.
|
||||
cmdEncoder->encodeStoreActions(true);
|
||||
mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl);
|
||||
id<MTLComputePipelineState> mtlConvertState = cmdEncoder->getCommandEncodingPool()->getCmdDrawIndirectConvertBuffersMTLComputePipelineState(false);
|
||||
id<MTLComputePipelineState> mtlConvertState = cmdEncoder->getCommandEncodingPool()->getCmdDrawIndirectTessConvertBuffersMTLComputePipelineState(false);
|
||||
[mtlTessCtlEncoder setComputePipelineState: mtlConvertState];
|
||||
[mtlTessCtlEncoder setBuffer: _mtlIndirectBuffer
|
||||
offset: _mtlIndirectBufferOffset
|
||||
atIndex: 0];
|
||||
[mtlTessCtlEncoder setBuffer: tcIndirectBuff->_mtlBuffer
|
||||
offset: tcIndirectBuff->_offset
|
||||
[mtlTessCtlEncoder setBuffer: tempIndirectBuff->_mtlBuffer
|
||||
offset: tempIndirectBuff->_offset
|
||||
atIndex: 1];
|
||||
[mtlTessCtlEncoder setBuffer: tcParamsBuff->_mtlBuffer
|
||||
offset: tcParamsBuff->_offset
|
||||
@ -617,6 +631,45 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
[mtlTessCtlEncoder dispatchThreadgroups: MTLSizeMake(mvkCeilingDivide<NSUInteger>(_drawCount, mtlConvertState.threadExecutionWidth), 1, 1)
|
||||
threadsPerThreadgroup: MTLSizeMake(mtlConvertState.threadExecutionWidth, 1, 1)];
|
||||
}
|
||||
} else if (drawIdx == 0 && needsInstanceAdjustment) {
|
||||
// Similarly, for multiview, we need to adjust the instance count now.
|
||||
// Unfortunately, this requires switching to compute.
|
||||
// TODO: Consider using tile shaders to avoid this cost.
|
||||
cmdEncoder->encodeStoreActions(true);
|
||||
id<MTLComputeCommandEncoder> mtlConvertEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseMultiviewInstanceCountAdjust);
|
||||
id<MTLComputePipelineState> mtlConvertState = cmdEncoder->getCommandEncodingPool()->getCmdDrawIndirectMultiviewConvertBuffersMTLComputePipelineState(false);
|
||||
uint32_t viewCount;
|
||||
[mtlConvertEncoder setComputePipelineState: mtlConvertState];
|
||||
[mtlConvertEncoder setBuffer: _mtlIndirectBuffer
|
||||
offset: _mtlIndirectBufferOffset
|
||||
atIndex: 0];
|
||||
[mtlConvertEncoder setBuffer: tempIndirectBuff->_mtlBuffer
|
||||
offset: tempIndirectBuff->_offset
|
||||
atIndex: 1];
|
||||
cmdEncoder->setComputeBytes(mtlConvertEncoder,
|
||||
&_mtlIndirectBufferStride,
|
||||
sizeof(_mtlIndirectBufferStride),
|
||||
2);
|
||||
cmdEncoder->setComputeBytes(mtlConvertEncoder,
|
||||
&_drawCount,
|
||||
sizeof(_drawCount),
|
||||
3);
|
||||
viewCount = cmdEncoder->getSubpass()->getViewCountInMetalPass(cmdEncoder->getMultiviewPassIndex());
|
||||
cmdEncoder->setComputeBytes(mtlConvertEncoder,
|
||||
&viewCount,
|
||||
sizeof(viewCount),
|
||||
4);
|
||||
if (cmdEncoder->getDevice()->_pMetalFeatures->nonUniformThreadgroups) {
|
||||
#if MVK_MACOS_OR_IOS
|
||||
[mtlConvertEncoder dispatchThreads: MTLSizeMake(_drawCount, 1, 1)
|
||||
threadsPerThreadgroup: MTLSizeMake(mtlConvertState.threadExecutionWidth, 1, 1)];
|
||||
#endif
|
||||
} else {
|
||||
[mtlConvertEncoder dispatchThreadgroups: MTLSizeMake(mvkCeilingDivide<NSUInteger>(_drawCount, mtlConvertState.threadExecutionWidth), 1, 1)
|
||||
threadsPerThreadgroup: MTLSizeMake(mtlConvertState.threadExecutionWidth, 1, 1)];
|
||||
}
|
||||
// Switch back to rendering now, since we don't have compute stages to run anyway.
|
||||
cmdEncoder->beginMetalRenderPass(true);
|
||||
}
|
||||
|
||||
cmdEncoder->finalizeDrawState(stage); // Ensure all updated state has been submitted to Metal
|
||||
@ -635,14 +688,14 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
// We must assume we can read up to the maximum number of vertices.
|
||||
[mtlTessCtlEncoder setStageInRegion: MTLRegionMake2D(0, 0, vertexCount, vertexCount)];
|
||||
if ([mtlTessCtlEncoder respondsToSelector: @selector(setStageInRegionWithIndirectBuffer:indirectBufferOffset:)]) {
|
||||
[mtlTessCtlEncoder setStageInRegionWithIndirectBuffer: tcIndirectBuff->_mtlBuffer
|
||||
indirectBufferOffset: mtlTCIndBuffOfst];
|
||||
mtlTCIndBuffOfst += sizeof(MTLStageInRegionIndirectArguments);
|
||||
[mtlTessCtlEncoder setStageInRegionWithIndirectBuffer: mtlIndBuff
|
||||
indirectBufferOffset: mtlIndBuffOfst];
|
||||
mtlIndBuffOfst += sizeof(MTLStageInRegionIndirectArguments);
|
||||
}
|
||||
[mtlTessCtlEncoder dispatchThreadgroupsWithIndirectBuffer: tcIndirectBuff->_mtlBuffer
|
||||
indirectBufferOffset: mtlTCIndBuffOfst
|
||||
[mtlTessCtlEncoder dispatchThreadgroupsWithIndirectBuffer: mtlIndBuff
|
||||
indirectBufferOffset: mtlIndBuffOfst
|
||||
threadsPerThreadgroup: MTLSizeMake(vtxThreadExecWidth, 1, 1)];
|
||||
mtlTCIndBuffOfst += sizeof(MTLDispatchThreadgroupsIndirectArguments);
|
||||
mtlIndBuffOfst += sizeof(MTLDispatchThreadgroupsIndirectArguments);
|
||||
// Mark pipeline, resources, and tess control push constants as dirty
|
||||
// so I apply them during the next stage.
|
||||
cmdEncoder->_graphicsPipelineState.beginMetalRenderPass();
|
||||
@ -674,10 +727,10 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
offset: vtxOutBuff->_offset
|
||||
atIndex: kMVKTessCtlInputBufferIndex];
|
||||
}
|
||||
[mtlTessCtlEncoder dispatchThreadgroupsWithIndirectBuffer: tcIndirectBuff->_mtlBuffer
|
||||
indirectBufferOffset: mtlTCIndBuffOfst
|
||||
[mtlTessCtlEncoder dispatchThreadgroupsWithIndirectBuffer: mtlIndBuff
|
||||
indirectBufferOffset: mtlIndBuffOfst
|
||||
threadsPerThreadgroup: MTLSizeMake(tcWorkgroupSize, 1, 1)];
|
||||
mtlTCIndBuffOfst += sizeof(MTLDispatchThreadgroupsIndirectArguments);
|
||||
mtlIndBuffOfst += sizeof(MTLDispatchThreadgroupsIndirectArguments);
|
||||
// Running this stage prematurely ended the render pass, so we have to start it up again.
|
||||
// TODO: On iOS, maybe we could use a tile shader to avoid this.
|
||||
cmdEncoder->beginMetalRenderPass(true);
|
||||
@ -705,22 +758,22 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
[cmdEncoder->_mtlRenderEncoder drawPatches: outControlPointCount
|
||||
patchIndexBuffer: nil
|
||||
patchIndexBufferOffset: 0
|
||||
indirectBuffer: tcIndirectBuff->_mtlBuffer
|
||||
indirectBufferOffset: mtlTCIndBuffOfst];
|
||||
indirectBuffer: mtlIndBuff
|
||||
indirectBufferOffset: mtlIndBuffOfst];
|
||||
#endif
|
||||
}
|
||||
|
||||
mtlTCIndBuffOfst += sizeof(MTLDrawPatchIndirectArguments);
|
||||
// Mark pipeline, resources, and tess control push constants as dirty
|
||||
mtlIndBuffOfst += sizeof(MTLDrawPatchIndirectArguments);
|
||||
// Mark pipeline, resources, and vertex push constants as dirty
|
||||
// so I apply them during the next stage.
|
||||
cmdEncoder->_graphicsPipelineState.beginMetalRenderPass();
|
||||
cmdEncoder->_graphicsResourcesState.beginMetalRenderPass();
|
||||
cmdEncoder->getPushConstants(VK_SHADER_STAGE_VERTEX_BIT)->beginMetalRenderPass();
|
||||
} else {
|
||||
[cmdEncoder->_mtlRenderEncoder drawPrimitives: cmdEncoder->_mtlPrimitiveType
|
||||
indirectBuffer: _mtlIndirectBuffer
|
||||
indirectBuffer: mtlIndBuff
|
||||
indirectBufferOffset: mtlIndBuffOfst];
|
||||
mtlIndBuffOfst += _mtlIndirectBufferStride;
|
||||
mtlIndBuffOfst += needsInstanceAdjustment ? sizeof(MTLDrawPrimitivesIndirectArguments) : _mtlIndirectBufferStride;
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -759,11 +812,13 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
|
||||
MVKIndexMTLBufferBinding& ibb = cmdEncoder->_graphicsResourcesState._mtlIndexBufferBinding;
|
||||
auto* pipeline = (MVKGraphicsPipeline*)cmdEncoder->_graphicsPipelineState.getPipeline();
|
||||
bool needsInstanceAdjustment = cmdEncoder->getSubpass()->isMultiview() &&
|
||||
cmdEncoder->getDevice()->getPhysicalDevice()->canUseInstancingForMultiview();
|
||||
// The indirect calls for dispatchThreadgroups:... and drawPatches:... have different formats.
|
||||
// We have to convert from the drawIndexedPrimitives:... format to them.
|
||||
// While we're at it, we can create the temporary output buffers once and reuse them
|
||||
// for each draw.
|
||||
const MVKMTLBufferAllocation* tcIndirectBuff = nullptr;
|
||||
const MVKMTLBufferAllocation* tempIndirectBuff = nullptr;
|
||||
const MVKMTLBufferAllocation* tcParamsBuff = nullptr;
|
||||
const MVKMTLBufferAllocation* vtxOutBuff = nullptr;
|
||||
const MVKMTLBufferAllocation* tcOutBuff = nullptr;
|
||||
@ -774,7 +829,9 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
uint32_t inControlPointCount = 0, outControlPointCount = 0;
|
||||
VkDeviceSize paramsIncr = 0;
|
||||
|
||||
VkDeviceSize mtlTCIndBuffOfst = 0;
|
||||
id<MTLBuffer> mtlIndBuff = _mtlIndirectBuffer;
|
||||
VkDeviceSize mtlIndBuffOfst = _mtlIndirectBufferOffset;
|
||||
VkDeviceSize mtlTempIndBuffOfst = _mtlIndirectBufferOffset;
|
||||
VkDeviceSize mtlParmBuffOfst = 0;
|
||||
NSUInteger vtxThreadExecWidth = 0;
|
||||
NSUInteger tcWorkgroupSize = 0;
|
||||
@ -794,9 +851,10 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
}
|
||||
paramsIncr = std::max((size_t)cmdEncoder->getDevice()->_pProperties->limits.minUniformBufferOffsetAlignment, sizeof(uint32_t) * 2);
|
||||
VkDeviceSize paramsSize = paramsIncr * _drawCount;
|
||||
tcIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize);
|
||||
mtlTCIndBuffOfst = tcIndirectBuff->_offset;
|
||||
tcParamsBuff = cmdEncoder->getTempMTLBuffer(paramsSize);
|
||||
tempIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize);
|
||||
mtlIndBuff = tempIndirectBuff->_mtlBuffer;
|
||||
mtlTempIndBuffOfst = tempIndirectBuff->_offset;
|
||||
tcParamsBuff = cmdEncoder->getTempMTLBuffer(paramsSize);
|
||||
mtlParmBuffOfst = tcParamsBuff->_offset;
|
||||
if (pipeline->needsVertexOutputBuffer()) {
|
||||
vtxOutBuff = cmdEncoder->getTempMTLBuffer(vertexCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents);
|
||||
@ -820,18 +878,22 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
sgSize >>= 1;
|
||||
tcWorkgroupSize = mvkLeastCommonMultiple(outControlPointCount, sgSize);
|
||||
}
|
||||
} else if (needsInstanceAdjustment) {
|
||||
// In this case, we need to adjust the instance count for the views being drawn.
|
||||
VkDeviceSize indirectSize = sizeof(MTLDrawIndexedPrimitivesIndirectArguments) * _drawCount;
|
||||
tempIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize);
|
||||
mtlIndBuff = tempIndirectBuff->_mtlBuffer;
|
||||
mtlTempIndBuffOfst = tempIndirectBuff->_offset;
|
||||
}
|
||||
|
||||
MVKPiplineStages stages;
|
||||
pipeline->getStages(stages);
|
||||
|
||||
VkDeviceSize mtlIndBuffOfst = _mtlIndirectBufferOffset;
|
||||
|
||||
for (uint32_t drawIdx = 0; drawIdx < _drawCount; drawIdx++) {
|
||||
for (uint32_t s : stages) {
|
||||
auto stage = MVKGraphicsStage(s);
|
||||
id<MTLComputeCommandEncoder> mtlTessCtlEncoder = nil;
|
||||
if (stage == kMVKGraphicsStageVertex) {
|
||||
if (stage == kMVKGraphicsStageVertex && pipeline->isTessellationPipeline()) {
|
||||
cmdEncoder->encodeStoreActions(true);
|
||||
mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl);
|
||||
// We need the indirect buffers now. This must be done before finalizing
|
||||
@ -839,13 +901,13 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
// to do it, since it will require switching to compute anyway. Do it all
|
||||
// at once to get it over with.
|
||||
if (drawIdx == 0) {
|
||||
id<MTLComputePipelineState> mtlConvertState = cmdEncoder->getCommandEncodingPool()->getCmdDrawIndirectConvertBuffersMTLComputePipelineState(true);
|
||||
id<MTLComputePipelineState> mtlConvertState = cmdEncoder->getCommandEncodingPool()->getCmdDrawIndirectTessConvertBuffersMTLComputePipelineState(true);
|
||||
[mtlTessCtlEncoder setComputePipelineState: mtlConvertState];
|
||||
[mtlTessCtlEncoder setBuffer: _mtlIndirectBuffer
|
||||
offset: _mtlIndirectBufferOffset
|
||||
atIndex: 0];
|
||||
[mtlTessCtlEncoder setBuffer: tcIndirectBuff->_mtlBuffer
|
||||
offset: tcIndirectBuff->_offset
|
||||
[mtlTessCtlEncoder setBuffer: tempIndirectBuff->_mtlBuffer
|
||||
offset: tempIndirectBuff->_offset
|
||||
atIndex: 1];
|
||||
[mtlTessCtlEncoder setBuffer: tcParamsBuff->_mtlBuffer
|
||||
offset: tcParamsBuff->_offset
|
||||
@ -891,10 +953,50 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
[mtlTessCtlEncoder setBuffer: _mtlIndirectBuffer
|
||||
offset: mtlIndBuffOfst
|
||||
atIndex: 2];
|
||||
[mtlTessCtlEncoder dispatchThreadgroupsWithIndirectBuffer: tcIndirectBuff->_mtlBuffer
|
||||
indirectBufferOffset: mtlTCIndBuffOfst
|
||||
[mtlTessCtlEncoder dispatchThreadgroupsWithIndirectBuffer: mtlIndBuff
|
||||
indirectBufferOffset: mtlTempIndBuffOfst
|
||||
threadsPerThreadgroup: MTLSizeMake(vtxThreadExecWidth, 1, 1)];
|
||||
mtlIndBuffOfst += sizeof(MTLDrawIndexedPrimitivesIndirectArguments);
|
||||
} else if (drawIdx == 0 && needsInstanceAdjustment) {
|
||||
// Similarly, for multiview, we need to adjust the instance count now.
|
||||
// Unfortunately, this requires switching to compute. Luckily, we don't also
|
||||
// have to copy the index buffer.
|
||||
// TODO: Consider using tile shaders to avoid this cost.
|
||||
cmdEncoder->encodeStoreActions(true);
|
||||
id<MTLComputeCommandEncoder> mtlConvertEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseMultiviewInstanceCountAdjust);
|
||||
id<MTLComputePipelineState> mtlConvertState = cmdEncoder->getCommandEncodingPool()->getCmdDrawIndirectMultiviewConvertBuffersMTLComputePipelineState(true);
|
||||
uint32_t viewCount;
|
||||
[mtlConvertEncoder setComputePipelineState: mtlConvertState];
|
||||
[mtlConvertEncoder setBuffer: _mtlIndirectBuffer
|
||||
offset: _mtlIndirectBufferOffset
|
||||
atIndex: 0];
|
||||
[mtlConvertEncoder setBuffer: tempIndirectBuff->_mtlBuffer
|
||||
offset: tempIndirectBuff->_offset
|
||||
atIndex: 1];
|
||||
cmdEncoder->setComputeBytes(mtlConvertEncoder,
|
||||
&_mtlIndirectBufferStride,
|
||||
sizeof(_mtlIndirectBufferStride),
|
||||
2);
|
||||
cmdEncoder->setComputeBytes(mtlConvertEncoder,
|
||||
&_drawCount,
|
||||
sizeof(_drawCount),
|
||||
3);
|
||||
viewCount = cmdEncoder->getSubpass()->getViewCountInMetalPass(cmdEncoder->getMultiviewPassIndex());
|
||||
cmdEncoder->setComputeBytes(mtlConvertEncoder,
|
||||
&viewCount,
|
||||
sizeof(viewCount),
|
||||
4);
|
||||
if (cmdEncoder->getDevice()->_pMetalFeatures->nonUniformThreadgroups) {
|
||||
#if MVK_MACOS_OR_IOS
|
||||
[mtlConvertEncoder dispatchThreads: MTLSizeMake(_drawCount, 1, 1)
|
||||
threadsPerThreadgroup: MTLSizeMake(mtlConvertState.threadExecutionWidth, 1, 1)];
|
||||
#endif
|
||||
} else {
|
||||
[mtlConvertEncoder dispatchThreadgroups: MTLSizeMake(mvkCeilingDivide<NSUInteger>(_drawCount, mtlConvertState.threadExecutionWidth), 1, 1)
|
||||
threadsPerThreadgroup: MTLSizeMake(mtlConvertState.threadExecutionWidth, 1, 1)];
|
||||
}
|
||||
// Switch back to rendering now, since we don't have compute stages to run anyway.
|
||||
cmdEncoder->beginMetalRenderPass(true);
|
||||
}
|
||||
|
||||
cmdEncoder->finalizeDrawState(stage); // Ensure all updated state has been submitted to Metal
|
||||
@ -915,14 +1017,14 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
atIndex: pipeline->getIndirectParamsIndex().stages[kMVKShaderStageVertex]];
|
||||
[mtlTessCtlEncoder setStageInRegion: MTLRegionMake2D(0, 0, vertexCount, vertexCount)];
|
||||
if ([mtlTessCtlEncoder respondsToSelector: @selector(setStageInRegionWithIndirectBuffer:indirectBufferOffset:)]) {
|
||||
[mtlTessCtlEncoder setStageInRegionWithIndirectBuffer: tcIndirectBuff->_mtlBuffer
|
||||
indirectBufferOffset: mtlTCIndBuffOfst];
|
||||
mtlTCIndBuffOfst += sizeof(MTLStageInRegionIndirectArguments);
|
||||
[mtlTessCtlEncoder setStageInRegionWithIndirectBuffer: mtlIndBuff
|
||||
indirectBufferOffset: mtlTempIndBuffOfst];
|
||||
mtlTempIndBuffOfst += sizeof(MTLStageInRegionIndirectArguments);
|
||||
}
|
||||
[mtlTessCtlEncoder dispatchThreadgroupsWithIndirectBuffer: tcIndirectBuff->_mtlBuffer
|
||||
indirectBufferOffset: mtlTCIndBuffOfst
|
||||
[mtlTessCtlEncoder dispatchThreadgroupsWithIndirectBuffer: mtlIndBuff
|
||||
indirectBufferOffset: mtlTempIndBuffOfst
|
||||
threadsPerThreadgroup: MTLSizeMake(vtxThreadExecWidth, 1, 1)];
|
||||
mtlTCIndBuffOfst += sizeof(MTLDispatchThreadgroupsIndirectArguments);
|
||||
mtlTempIndBuffOfst += sizeof(MTLDispatchThreadgroupsIndirectArguments);
|
||||
// Mark pipeline, resources, and tess control push constants as dirty
|
||||
// so I apply them during the next stage.
|
||||
cmdEncoder->_graphicsPipelineState.beginMetalRenderPass();
|
||||
@ -954,10 +1056,10 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
offset: vtxOutBuff->_offset
|
||||
atIndex: kMVKTessCtlInputBufferIndex];
|
||||
}
|
||||
[mtlTessCtlEncoder dispatchThreadgroupsWithIndirectBuffer: tcIndirectBuff->_mtlBuffer
|
||||
indirectBufferOffset: mtlTCIndBuffOfst
|
||||
[mtlTessCtlEncoder dispatchThreadgroupsWithIndirectBuffer: mtlIndBuff
|
||||
indirectBufferOffset: mtlTempIndBuffOfst
|
||||
threadsPerThreadgroup: MTLSizeMake(tcWorkgroupSize, 1, 1)];
|
||||
mtlTCIndBuffOfst += sizeof(MTLDispatchThreadgroupsIndirectArguments);
|
||||
mtlTempIndBuffOfst += sizeof(MTLDispatchThreadgroupsIndirectArguments);
|
||||
// Running this stage prematurely ended the render pass, so we have to start it up again.
|
||||
// TODO: On iOS, maybe we could use a tile shader to avoid this.
|
||||
cmdEncoder->beginMetalRenderPass(true);
|
||||
@ -985,12 +1087,12 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
[cmdEncoder->_mtlRenderEncoder drawPatches: outControlPointCount
|
||||
patchIndexBuffer: nil
|
||||
patchIndexBufferOffset: 0
|
||||
indirectBuffer: tcIndirectBuff->_mtlBuffer
|
||||
indirectBufferOffset: mtlTCIndBuffOfst];
|
||||
indirectBuffer: mtlIndBuff
|
||||
indirectBufferOffset: mtlTempIndBuffOfst];
|
||||
#endif
|
||||
}
|
||||
|
||||
mtlTCIndBuffOfst += sizeof(MTLDrawPatchIndirectArguments);
|
||||
mtlTempIndBuffOfst += sizeof(MTLDrawPatchIndirectArguments);
|
||||
// Mark pipeline, resources, and tess control push constants as dirty
|
||||
// so I apply them during the next stage.
|
||||
cmdEncoder->_graphicsPipelineState.beginMetalRenderPass();
|
||||
@ -1001,9 +1103,9 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
indexType: (MTLIndexType)ibb.mtlIndexType
|
||||
indexBuffer: ibb.mtlBuffer
|
||||
indexBufferOffset: ibb.offset
|
||||
indirectBuffer: _mtlIndirectBuffer
|
||||
indirectBufferOffset: mtlIndBuffOfst];
|
||||
mtlIndBuffOfst += _mtlIndirectBufferStride;
|
||||
indirectBuffer: mtlIndBuff
|
||||
indirectBufferOffset: mtlTempIndBuffOfst];
|
||||
mtlTempIndBuffOfst += needsInstanceAdjustment ? sizeof(MTLDrawIndexedPrimitivesIndirectArguments) : _mtlIndirectBufferStride;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -52,7 +52,13 @@ VkResult MVKCmdBeginQuery::setContent(MVKCommandBuffer* cmdBuff,
|
||||
}
|
||||
|
||||
void MVKCmdBeginQuery::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
_queryPool->beginQuery(_query, _flags, cmdEncoder);
|
||||
// In a multiview render pass, multiple queries are produced, one for each view.
|
||||
// Therefore, when encoding, we must offset the query by the number of views already
|
||||
// drawn in all previous Metal passes.
|
||||
uint32_t query = _query;
|
||||
if (cmdEncoder->getMultiviewPassIndex() > 0)
|
||||
query += cmdEncoder->getSubpass()->getViewCountUpToMetalPass(cmdEncoder->getMultiviewPassIndex() - 1);
|
||||
_queryPool->beginQuery(query, _flags, cmdEncoder);
|
||||
}
|
||||
|
||||
|
||||
@ -60,7 +66,10 @@ void MVKCmdBeginQuery::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
#pragma mark MVKCmdEndQuery
|
||||
|
||||
void MVKCmdEndQuery::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
_queryPool->endQuery(_query, cmdEncoder);
|
||||
uint32_t query = _query;
|
||||
if (cmdEncoder->getMultiviewPassIndex() > 0)
|
||||
query += cmdEncoder->getSubpass()->getViewCountUpToMetalPass(cmdEncoder->getMultiviewPassIndex() - 1);
|
||||
_queryPool->endQuery(query, cmdEncoder);
|
||||
}
|
||||
|
||||
|
||||
@ -80,7 +89,10 @@ VkResult MVKCmdWriteTimestamp::setContent(MVKCommandBuffer* cmdBuff,
|
||||
}
|
||||
|
||||
void MVKCmdWriteTimestamp::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
cmdEncoder->markTimestamp(_queryPool, _query);
|
||||
uint32_t query = _query;
|
||||
if (cmdEncoder->getMultiviewPassIndex() > 0)
|
||||
query += cmdEncoder->getSubpass()->getViewCountUpToMetalPass(cmdEncoder->getMultiviewPassIndex() - 1);
|
||||
cmdEncoder->markTimestamp(_queryPool, query);
|
||||
}
|
||||
|
||||
|
||||
|
@ -28,6 +28,31 @@ class MVKRenderPass;
|
||||
class MVKFramebuffer;
|
||||
|
||||
|
||||
#pragma mark -
|
||||
#pragma mark MVKCmdBeginRenderPassBase
|
||||
|
||||
/**
|
||||
* Abstract base class of MVKCmdBeginRenderPass.
|
||||
* Contains all pieces that are independent of the templated portions.
|
||||
*/
|
||||
class MVKCmdBeginRenderPassBase : public MVKCommand {
|
||||
|
||||
public:
|
||||
VkResult setContent(MVKCommandBuffer* cmdBuff,
|
||||
const VkRenderPassBeginInfo* pRenderPassBegin,
|
||||
VkSubpassContents contents);
|
||||
|
||||
inline MVKRenderPass* getRenderPass() { return _renderPass; }
|
||||
|
||||
protected:
|
||||
|
||||
MVKRenderPass* _renderPass;
|
||||
MVKFramebuffer* _framebuffer;
|
||||
VkRect2D _renderArea;
|
||||
VkSubpassContents _contents;
|
||||
};
|
||||
|
||||
|
||||
#pragma mark -
|
||||
#pragma mark MVKCmdBeginRenderPass
|
||||
|
||||
@ -36,7 +61,7 @@ class MVKFramebuffer;
|
||||
* Template class to balance vector pre-allocations between very common low counts and fewer larger counts.
|
||||
*/
|
||||
template <size_t N>
|
||||
class MVKCmdBeginRenderPass : public MVKCommand {
|
||||
class MVKCmdBeginRenderPass : public MVKCmdBeginRenderPassBase {
|
||||
|
||||
public:
|
||||
VkResult setContent(MVKCommandBuffer* cmdBuff,
|
||||
@ -49,10 +74,6 @@ protected:
|
||||
MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;
|
||||
|
||||
MVKSmallVector<VkClearValue, N> _clearValues;
|
||||
MVKRenderPass* _renderPass;
|
||||
MVKFramebuffer* _framebuffer;
|
||||
VkRect2D _renderArea;
|
||||
VkSubpassContents _contents;
|
||||
};
|
||||
|
||||
// Concrete template class implementations.
|
||||
|
@ -25,6 +25,21 @@
|
||||
#include "mvk_datatypes.hpp"
|
||||
|
||||
|
||||
#pragma mark -
|
||||
#pragma mark MVKCmdBeginRenderPassBase
|
||||
|
||||
VkResult MVKCmdBeginRenderPassBase::setContent(MVKCommandBuffer* cmdBuff,
|
||||
const VkRenderPassBeginInfo* pRenderPassBegin,
|
||||
VkSubpassContents contents) {
|
||||
_contents = contents;
|
||||
_renderPass = (MVKRenderPass*)pRenderPassBegin->renderPass;
|
||||
_framebuffer = (MVKFramebuffer*)pRenderPassBegin->framebuffer;
|
||||
_renderArea = pRenderPassBegin->renderArea;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
#pragma mark -
|
||||
#pragma mark MVKCmdBeginRenderPass
|
||||
|
||||
@ -32,10 +47,7 @@ template <size_t N>
|
||||
VkResult MVKCmdBeginRenderPass<N>::setContent(MVKCommandBuffer* cmdBuff,
|
||||
const VkRenderPassBeginInfo* pRenderPassBegin,
|
||||
VkSubpassContents contents) {
|
||||
_contents = contents;
|
||||
_renderPass = (MVKRenderPass*)pRenderPassBegin->renderPass;
|
||||
_framebuffer = (MVKFramebuffer*)pRenderPassBegin->framebuffer;
|
||||
_renderArea = pRenderPassBegin->renderArea;
|
||||
MVKCmdBeginRenderPassBase::setContent(cmdBuff, pRenderPassBegin, contents);
|
||||
|
||||
// Add clear values
|
||||
uint32_t cvCnt = pRenderPassBegin->clearValueCount;
|
||||
@ -51,7 +63,7 @@ VkResult MVKCmdBeginRenderPass<N>::setContent(MVKCommandBuffer* cmdBuff,
|
||||
template <size_t N>
|
||||
void MVKCmdBeginRenderPass<N>::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
// MVKLogDebug("Encoding vkCmdBeginRenderPass(). Elapsed time: %.6f ms.", mvkGetElapsedMilliseconds());
|
||||
cmdEncoder->beginRenderpass(_contents, _renderPass, _framebuffer, _renderArea, _clearValues.contents());
|
||||
cmdEncoder->beginRenderpass(this, _contents, _renderPass, _framebuffer, _renderArea, _clearValues.contents());
|
||||
}
|
||||
|
||||
template class MVKCmdBeginRenderPass<1>;
|
||||
@ -70,7 +82,10 @@ VkResult MVKCmdNextSubpass::setContent(MVKCommandBuffer* cmdBuff,
|
||||
}
|
||||
|
||||
void MVKCmdNextSubpass::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
cmdEncoder->beginNextSubpass(_contents);
|
||||
if (cmdEncoder->getMultiviewPassIndex() + 1 < cmdEncoder->getSubpass()->getMultiviewMetalPassCount())
|
||||
cmdEncoder->beginNextMultiviewPass();
|
||||
else
|
||||
cmdEncoder->beginNextSubpass(this, _contents);
|
||||
}
|
||||
|
||||
|
||||
@ -83,7 +98,10 @@ VkResult MVKCmdEndRenderPass::setContent(MVKCommandBuffer* cmdBuff) {
|
||||
|
||||
void MVKCmdEndRenderPass::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
// MVKLogDebug("Encoding vkCmdEndRenderPass(). Elapsed time: %.6f ms.", mvkGetElapsedMilliseconds());
|
||||
cmdEncoder->endRenderpass();
|
||||
if (cmdEncoder->getMultiviewPassIndex() + 1 < cmdEncoder->getSubpass()->getMultiviewMetalPassCount())
|
||||
cmdEncoder->beginNextMultiviewPass();
|
||||
else
|
||||
cmdEncoder->endRenderpass();
|
||||
}
|
||||
|
||||
|
||||
|
@ -254,10 +254,12 @@ public:
|
||||
void encode(MVKCommandEncoder* cmdEncoder) override;
|
||||
|
||||
protected:
|
||||
uint32_t getVertexCount();
|
||||
void populateVertices(simd::float4* vertices, float attWidth, float attHeight);
|
||||
uint32_t populateVertices(simd::float4* vertices, uint32_t startVertex,
|
||||
VkClearRect& clearRect, float attWidth, float attHeight);
|
||||
uint32_t getVertexCount(MVKCommandEncoder* cmdEncoder);
|
||||
void populateVertices(MVKCommandEncoder* cmdEncoder, simd::float4* vertices,
|
||||
float attWidth, float attHeight);
|
||||
uint32_t populateVertices(MVKCommandEncoder* cmdEncoder, simd::float4* vertices,
|
||||
uint32_t startVertex, VkClearRect& clearRect,
|
||||
float attWidth, float attHeight);
|
||||
virtual VkClearValue& getClearValue(uint32_t attIdx) = 0;
|
||||
virtual void setClearValue(uint32_t attIdx, const VkClearValue& clearValue) = 0;
|
||||
|
||||
|
@ -948,27 +948,34 @@ VkResult MVKCmdClearAttachments<N>::setContent(MVKCommandBuffer* cmdBuff,
|
||||
|
||||
// Returns the total number of vertices needed to clear all layers of all rectangles.
|
||||
template <size_t N>
|
||||
uint32_t MVKCmdClearAttachments<N>::getVertexCount() {
|
||||
uint32_t MVKCmdClearAttachments<N>::getVertexCount(MVKCommandEncoder* cmdEncoder) {
|
||||
uint32_t vtxCnt = 0;
|
||||
for (auto& rect : _clearRects) {
|
||||
vtxCnt += 6 * rect.layerCount;
|
||||
if (cmdEncoder->getSubpass()->isMultiview()) {
|
||||
// In this case, all the layer counts will be one. We want to use the number of views in the current multiview pass.
|
||||
vtxCnt = (uint32_t)_clearRects.size() * cmdEncoder->getSubpass()->getViewCountInMetalPass(cmdEncoder->getMultiviewPassIndex()) * 6;
|
||||
} else {
|
||||
for (auto& rect : _clearRects) {
|
||||
vtxCnt += 6 * rect.layerCount;
|
||||
}
|
||||
}
|
||||
return vtxCnt;
|
||||
}
|
||||
|
||||
// Populates the vertices for all clear rectangles within an attachment of the specified size.
|
||||
template <size_t N>
|
||||
void MVKCmdClearAttachments<N>::populateVertices(simd::float4* vertices, float attWidth, float attHeight) {
|
||||
void MVKCmdClearAttachments<N>::populateVertices(MVKCommandEncoder* cmdEncoder, simd::float4* vertices,
|
||||
float attWidth, float attHeight) {
|
||||
uint32_t vtxIdx = 0;
|
||||
for (auto& rect : _clearRects) {
|
||||
vtxIdx = populateVertices(vertices, vtxIdx, rect, attWidth, attHeight);
|
||||
vtxIdx = populateVertices(cmdEncoder, vertices, vtxIdx, rect, attWidth, attHeight);
|
||||
}
|
||||
}
|
||||
|
||||
// Populates the vertices, starting at the vertex, from the specified rectangle within
|
||||
// an attachment of the specified size. Returns the next vertex that needs to be populated.
|
||||
template <size_t N>
|
||||
uint32_t MVKCmdClearAttachments<N>::populateVertices(simd::float4* vertices,
|
||||
uint32_t MVKCmdClearAttachments<N>::populateVertices(MVKCommandEncoder* cmdEncoder,
|
||||
simd::float4* vertices,
|
||||
uint32_t startVertex,
|
||||
VkClearRect& clearRect,
|
||||
float attWidth,
|
||||
@ -990,8 +997,17 @@ uint32_t MVKCmdClearAttachments<N>::populateVertices(simd::float4* vertices,
|
||||
simd::float4 vtx;
|
||||
|
||||
uint32_t vtxIdx = startVertex;
|
||||
uint32_t startLayer = clearRect.baseArrayLayer;
|
||||
uint32_t endLayer = startLayer + clearRect.layerCount;
|
||||
uint32_t startLayer, endLayer;
|
||||
if (cmdEncoder->getSubpass()->isMultiview()) {
|
||||
// In a multiview pass, the baseArrayLayer will be 0 and the layerCount will be 1.
|
||||
// Use the view count instead. We already set the base slice properly in the
|
||||
// MTLRenderPassDescriptor, so we don't need to offset the starting layer.
|
||||
startLayer = 0;
|
||||
endLayer = cmdEncoder->getSubpass()->getViewCountInMetalPass(cmdEncoder->getMultiviewPassIndex());
|
||||
} else {
|
||||
startLayer = clearRect.baseArrayLayer;
|
||||
endLayer = startLayer + clearRect.layerCount;
|
||||
}
|
||||
for (uint32_t layer = startLayer; layer < endLayer; layer++) {
|
||||
|
||||
vtx.z = 0.0;
|
||||
@ -1032,12 +1048,12 @@ uint32_t MVKCmdClearAttachments<N>::populateVertices(simd::float4* vertices,
|
||||
template <size_t N>
|
||||
void MVKCmdClearAttachments<N>::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
|
||||
uint32_t vtxCnt = getVertexCount();
|
||||
uint32_t vtxCnt = getVertexCount(cmdEncoder);
|
||||
simd::float4 vertices[vtxCnt];
|
||||
simd::float4 clearColors[kMVKClearAttachmentCount];
|
||||
|
||||
VkExtent2D fbExtent = cmdEncoder->_framebuffer->getExtent2D();
|
||||
populateVertices(vertices, fbExtent.width, fbExtent.height);
|
||||
populateVertices(cmdEncoder, vertices, fbExtent.width, fbExtent.height);
|
||||
|
||||
MVKPixelFormats* pixFmts = cmdEncoder->getPixelFormats();
|
||||
MVKRenderSubpass* subpass = cmdEncoder->getSubpass();
|
||||
@ -1045,7 +1061,10 @@ void MVKCmdClearAttachments<N>::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
|
||||
// Populate the render pipeline state attachment key with info from the subpass and framebuffer.
|
||||
_rpsKey.mtlSampleCount = mvkSampleCountFromVkSampleCountFlagBits(subpass->getSampleCount());
|
||||
if (cmdEncoder->_canUseLayeredRendering && cmdEncoder->_framebuffer->getLayerCount() > 1) { _rpsKey.enableLayeredRendering(); }
|
||||
if (cmdEncoder->_canUseLayeredRendering &&
|
||||
(cmdEncoder->_framebuffer->getLayerCount() > 1 || cmdEncoder->getSubpass()->isMultiview())) {
|
||||
_rpsKey.enableLayeredRendering();
|
||||
}
|
||||
|
||||
uint32_t caCnt = subpass->getColorAttachmentCount();
|
||||
for (uint32_t caIdx = 0; caIdx < caCnt; caIdx++) {
|
||||
|
@ -33,6 +33,8 @@ class MVKQueue;
|
||||
class MVKQueueCommandBufferSubmission;
|
||||
class MVKCommandEncoder;
|
||||
class MVKCommandEncodingPool;
|
||||
class MVKCmdBeginRenderPassBase;
|
||||
class MVKCmdNextSubpass;
|
||||
class MVKRenderPass;
|
||||
class MVKFramebuffer;
|
||||
class MVKRenderSubpass;
|
||||
@ -105,6 +107,24 @@ public:
|
||||
MVKCmdBindPipeline* _lastTessellationPipeline;
|
||||
|
||||
|
||||
#pragma mark Multiview render pass command management
|
||||
|
||||
/** Update the last recorded multiview render pass */
|
||||
void recordBeginRenderPass(MVKCmdBeginRenderPassBase* mvkBeginRenderPass);
|
||||
|
||||
/** Update the last recorded multiview subpass */
|
||||
void recordNextSubpass();
|
||||
|
||||
/** Forget the last recorded multiview render pass */
|
||||
void recordEndRenderPass();
|
||||
|
||||
/** The most recent recorded multiview render subpass */
|
||||
MVKRenderSubpass* _lastMultiviewSubpass;
|
||||
|
||||
/** Returns the currently active multiview render subpass, even for secondary command buffers */
|
||||
MVKRenderSubpass* getLastMultiviewSubpass();
|
||||
|
||||
|
||||
#pragma mark Construction
|
||||
|
||||
MVKCommandBuffer(MVKDevice* device) : MVKDeviceTrackingMixin(device) {}
|
||||
@ -249,14 +269,18 @@ public:
|
||||
void encodeSecondary(MVKCommandBuffer* secondaryCmdBuffer);
|
||||
|
||||
/** Begins a render pass and establishes initial draw state. */
|
||||
void beginRenderpass(VkSubpassContents subpassContents,
|
||||
void beginRenderpass(MVKCommand* passCmd,
|
||||
VkSubpassContents subpassContents,
|
||||
MVKRenderPass* renderPass,
|
||||
MVKFramebuffer* framebuffer,
|
||||
VkRect2D& renderArea,
|
||||
MVKArrayRef<VkClearValue> clearValues);
|
||||
|
||||
/** Begins the next render subpass. */
|
||||
void beginNextSubpass(VkSubpassContents renderpassContents);
|
||||
void beginNextSubpass(MVKCommand* subpassCmd, VkSubpassContents renderpassContents);
|
||||
|
||||
/** Begins the next multiview Metal render pass. */
|
||||
void beginNextMultiviewPass();
|
||||
|
||||
/** Begins a Metal render pass for the current render subpass. */
|
||||
void beginMetalRenderPass(bool loadOverride = false);
|
||||
@ -267,6 +291,9 @@ public:
|
||||
/** Returns the render subpass that is currently active. */
|
||||
MVKRenderSubpass* getSubpass();
|
||||
|
||||
/** Returns the index of the currently active multiview subpass, or zero if the current render pass is not multiview. */
|
||||
uint32_t getMultiviewPassIndex();
|
||||
|
||||
/** Binds a pipeline to a bind point. */
|
||||
void bindPipeline(VkPipelineBindPoint pipelineBindPoint, MVKPipeline* pipeline);
|
||||
|
||||
@ -428,14 +455,16 @@ public:
|
||||
protected:
|
||||
void addActivatedQuery(MVKQueryPool* pQueryPool, uint32_t query);
|
||||
void finishQueries();
|
||||
void setSubpass(VkSubpassContents subpassContents, uint32_t subpassIndex);
|
||||
void setSubpass(MVKCommand* passCmd, VkSubpassContents subpassContents, uint32_t subpassIndex);
|
||||
void clearRenderArea();
|
||||
const MVKMTLBufferAllocation* copyToTempMTLBufferAllocation(const void* bytes, NSUInteger length);
|
||||
NSString* getMTLRenderCommandEncoderName();
|
||||
|
||||
VkSubpassContents _subpassContents;
|
||||
MVKRenderPass* _renderPass;
|
||||
MVKCommand* _lastMultiviewPassCmd;
|
||||
uint32_t _renderSubpassIndex;
|
||||
uint32_t _multiviewPassIndex;
|
||||
VkRect2D _renderArea;
|
||||
MVKActivatedQueries* _pActivatedQueries;
|
||||
MVKSmallVector<VkClearValue, kMVKDefaultAttachmentCount> _clearValues;
|
||||
|
@ -26,6 +26,7 @@
|
||||
#include "MVKLogging.h"
|
||||
#include "MTLRenderPassDescriptor+MoltenVK.h"
|
||||
#include "MVKCmdDraw.h"
|
||||
#include "MVKCmdRenderPass.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
@ -76,6 +77,7 @@ VkResult MVKCommandBuffer::reset(VkCommandBufferResetFlags flags) {
|
||||
_commandCount = 0;
|
||||
_initialVisibilityResultMTLBuffer = nil; // not retained
|
||||
_lastTessellationPipeline = nullptr;
|
||||
_lastMultiviewSubpass = nullptr;
|
||||
setConfigurationResult(VK_NOT_READY);
|
||||
|
||||
if (mvkAreAllFlagsEnabled(flags, VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT)) {
|
||||
@ -202,12 +204,40 @@ void MVKCommandBuffer::recordBindPipeline(MVKCmdBindPipeline* mvkBindPipeline) {
|
||||
}
|
||||
|
||||
|
||||
#pragma mark -
|
||||
#pragma mark Multiview render pass command management
|
||||
|
||||
void MVKCommandBuffer::recordBeginRenderPass(MVKCmdBeginRenderPassBase* mvkBeginRenderPass) {
|
||||
MVKRenderPass* mvkRendPass = mvkBeginRenderPass->getRenderPass();
|
||||
_lastMultiviewSubpass = mvkRendPass->isMultiview() ? mvkRendPass->getSubpass(0) : nullptr;
|
||||
}
|
||||
|
||||
void MVKCommandBuffer::recordNextSubpass() {
|
||||
if (_lastMultiviewSubpass) {
|
||||
_lastMultiviewSubpass = _lastMultiviewSubpass->getRenderPass()->getSubpass(_lastMultiviewSubpass->getSubpassIndex() + 1);
|
||||
}
|
||||
}
|
||||
|
||||
void MVKCommandBuffer::recordEndRenderPass() {
|
||||
_lastMultiviewSubpass = nullptr;
|
||||
}
|
||||
|
||||
MVKRenderSubpass* MVKCommandBuffer::getLastMultiviewSubpass() {
|
||||
if (_doesContinueRenderPass) {
|
||||
MVKRenderSubpass* subpass = ((MVKRenderPass*)_secondaryInheritanceInfo.renderPass)->getSubpass(_secondaryInheritanceInfo.subpass);
|
||||
if (subpass->isMultiview()) { return subpass; }
|
||||
}
|
||||
return _lastMultiviewSubpass;
|
||||
}
|
||||
|
||||
|
||||
#pragma mark -
|
||||
#pragma mark MVKCommandEncoder
|
||||
|
||||
void MVKCommandEncoder::encode(id<MTLCommandBuffer> mtlCmdBuff) {
|
||||
_subpassContents = VK_SUBPASS_CONTENTS_INLINE;
|
||||
_renderSubpassIndex = 0;
|
||||
_multiviewPassIndex = 0;
|
||||
_canUseLayeredRendering = false;
|
||||
|
||||
_mtlCmdBuffer = mtlCmdBuff; // not retained
|
||||
@ -216,8 +246,15 @@ void MVKCommandEncoder::encode(id<MTLCommandBuffer> mtlCmdBuff) {
|
||||
|
||||
MVKCommand* cmd = _cmdBuffer->_head;
|
||||
while (cmd) {
|
||||
uint32_t prevMVPassIdx = _multiviewPassIndex;
|
||||
cmd->encode(this);
|
||||
cmd = cmd->_next;
|
||||
if (_multiviewPassIndex > prevMVPassIdx) {
|
||||
// This means we're in a multiview render pass, and we moved on to the
|
||||
// next view group. Re-encode all commands in the subpass again for this group.
|
||||
cmd = _lastMultiviewPassCmd->_next;
|
||||
} else {
|
||||
cmd = cmd->_next;
|
||||
}
|
||||
}
|
||||
|
||||
endCurrentMetalEncoding();
|
||||
@ -232,7 +269,8 @@ void MVKCommandEncoder::encodeSecondary(MVKCommandBuffer* secondaryCmdBuffer) {
|
||||
}
|
||||
}
|
||||
|
||||
void MVKCommandEncoder::beginRenderpass(VkSubpassContents subpassContents,
|
||||
void MVKCommandEncoder::beginRenderpass(MVKCommand* passCmd,
|
||||
VkSubpassContents subpassContents,
|
||||
MVKRenderPass* renderPass,
|
||||
MVKFramebuffer* framebuffer,
|
||||
VkRect2D& renderArea,
|
||||
@ -243,19 +281,23 @@ void MVKCommandEncoder::beginRenderpass(VkSubpassContents subpassContents,
|
||||
_isRenderingEntireAttachment = (mvkVkOffset2DsAreEqual(_renderArea.offset, {0,0}) &&
|
||||
mvkVkExtent2DsAreEqual(_renderArea.extent, _framebuffer->getExtent2D()));
|
||||
_clearValues.assign(clearValues.begin(), clearValues.end());
|
||||
setSubpass(subpassContents, 0);
|
||||
setSubpass(passCmd, subpassContents, 0);
|
||||
}
|
||||
|
||||
void MVKCommandEncoder::beginNextSubpass(VkSubpassContents contents) {
|
||||
setSubpass(contents, _renderSubpassIndex + 1);
|
||||
void MVKCommandEncoder::beginNextSubpass(MVKCommand* subpassCmd, VkSubpassContents contents) {
|
||||
setSubpass(subpassCmd, contents, _renderSubpassIndex + 1);
|
||||
}
|
||||
|
||||
// Sets the current render subpass to the subpass with the specified index.
|
||||
void MVKCommandEncoder::setSubpass(VkSubpassContents subpassContents, uint32_t subpassIndex) {
|
||||
void MVKCommandEncoder::setSubpass(MVKCommand* subpassCmd,
|
||||
VkSubpassContents subpassContents,
|
||||
uint32_t subpassIndex) {
|
||||
encodeStoreActions();
|
||||
|
||||
_lastMultiviewPassCmd = subpassCmd;
|
||||
_subpassContents = subpassContents;
|
||||
_renderSubpassIndex = subpassIndex;
|
||||
_multiviewPassIndex = 0;
|
||||
|
||||
_canUseLayeredRendering = (_device->_pMetalFeatures->layeredRendering &&
|
||||
(_device->_pMetalFeatures->multisampleLayeredRendering ||
|
||||
@ -264,20 +306,34 @@ void MVKCommandEncoder::setSubpass(VkSubpassContents subpassContents, uint32_t s
|
||||
beginMetalRenderPass();
|
||||
}
|
||||
|
||||
void MVKCommandEncoder::beginNextMultiviewPass() {
|
||||
encodeStoreActions();
|
||||
_multiviewPassIndex++;
|
||||
beginMetalRenderPass();
|
||||
}
|
||||
|
||||
uint32_t MVKCommandEncoder::getMultiviewPassIndex() { return _multiviewPassIndex; }
|
||||
|
||||
// Creates _mtlRenderEncoder and marks cached render state as dirty so it will be set into the _mtlRenderEncoder.
|
||||
void MVKCommandEncoder::beginMetalRenderPass(bool loadOverride) {
|
||||
|
||||
endCurrentMetalEncoding();
|
||||
|
||||
MTLRenderPassDescriptor* mtlRPDesc = [MTLRenderPassDescriptor renderPassDescriptor];
|
||||
getSubpass()->populateMTLRenderPassDescriptor(mtlRPDesc, _framebuffer, _clearValues.contents(), _isRenderingEntireAttachment, loadOverride);
|
||||
getSubpass()->populateMTLRenderPassDescriptor(mtlRPDesc, _multiviewPassIndex, _framebuffer, _clearValues.contents(), _isRenderingEntireAttachment, loadOverride);
|
||||
mtlRPDesc.visibilityResultBuffer = _occlusionQueryState.getVisibilityResultMTLBuffer();
|
||||
|
||||
VkExtent2D fbExtent = _framebuffer->getExtent2D();
|
||||
mtlRPDesc.renderTargetWidthMVK = min(_renderArea.offset.x + _renderArea.extent.width, fbExtent.width);
|
||||
mtlRPDesc.renderTargetHeightMVK = min(_renderArea.offset.y + _renderArea.extent.height, fbExtent.height);
|
||||
if (_canUseLayeredRendering) {
|
||||
mtlRPDesc.renderTargetArrayLengthMVK = _framebuffer->getLayerCount();
|
||||
if (getSubpass()->isMultiview()) {
|
||||
// In the case of a multiview pass, the framebuffer layer count will be one.
|
||||
// We need to use the view count for this multiview pass.
|
||||
mtlRPDesc.renderTargetArrayLengthMVK = getSubpass()->getViewCountInMetalPass(_multiviewPassIndex);
|
||||
} else {
|
||||
mtlRPDesc.renderTargetArrayLengthMVK = _framebuffer->getLayerCount();
|
||||
}
|
||||
}
|
||||
|
||||
_mtlRenderEncoder = [_mtlCmdBuffer renderCommandEncoderWithDescriptor: mtlRPDesc]; // not retained
|
||||
@ -386,16 +442,36 @@ void MVKCommandEncoder::clearRenderArea() {
|
||||
|
||||
if (clearAttCnt == 0) { return; }
|
||||
|
||||
VkClearRect clearRect;
|
||||
clearRect.rect = _renderArea;
|
||||
clearRect.baseArrayLayer = 0;
|
||||
clearRect.layerCount = _framebuffer->getLayerCount();
|
||||
if (!getSubpass()->isMultiview()) {
|
||||
VkClearRect clearRect;
|
||||
clearRect.rect = _renderArea;
|
||||
clearRect.baseArrayLayer = 0;
|
||||
clearRect.layerCount = _framebuffer->getLayerCount();
|
||||
|
||||
// Create and execute a temporary clear attachments command.
|
||||
// To be threadsafe...do NOT acquire and return the command from the pool.
|
||||
MVKCmdClearMultiAttachments<1> cmd;
|
||||
cmd.setContent(_cmdBuffer, clearAttCnt, clearAtts.data(), 1, &clearRect);
|
||||
cmd.encode(this);
|
||||
// Create and execute a temporary clear attachments command.
|
||||
// To be threadsafe...do NOT acquire and return the command from the pool.
|
||||
MVKCmdClearMultiAttachments<1> cmd;
|
||||
cmd.setContent(_cmdBuffer, clearAttCnt, clearAtts.data(), 1, &clearRect);
|
||||
cmd.encode(this);
|
||||
} else {
|
||||
// For multiview, it is possible that some attachments need different layers cleared.
|
||||
// In that case, we'll have to clear them individually. :/
|
||||
for (auto& clearAtt : clearAtts) {
|
||||
MVKSmallVector<VkClearRect, 1> clearRects;
|
||||
getSubpass()->populateMultiviewClearRects(clearRects, this, clearAtt.colorAttachment, clearAtt.aspectMask);
|
||||
// Create and execute a temporary clear attachments command.
|
||||
// To be threadsafe...do NOT acquire and return the command from the pool.
|
||||
if (clearRects.size() == 1) {
|
||||
MVKCmdClearSingleAttachment<1> cmd;
|
||||
cmd.setContent(_cmdBuffer, 1, &clearAtt, (uint32_t)clearRects.size(), clearRects.data());
|
||||
cmd.encode(this);
|
||||
} else {
|
||||
MVKCmdClearSingleAttachment<4> cmd;
|
||||
cmd.setContent(_cmdBuffer, 1, &clearAtt, (uint32_t)clearRects.size(), clearRects.data());
|
||||
cmd.encode(this);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MVKCommandEncoder::finalizeDispatchState() {
|
||||
@ -559,7 +635,10 @@ void MVKCommandEncoder::markTimestamp(MVKQueryPool* pQueryPool, uint32_t query)
|
||||
// Marks the specified query as activated
|
||||
void MVKCommandEncoder::addActivatedQuery(MVKQueryPool* pQueryPool, uint32_t query) {
|
||||
if ( !_pActivatedQueries ) { _pActivatedQueries = new MVKActivatedQueries(); }
|
||||
(*_pActivatedQueries)[pQueryPool].push_back(query);
|
||||
uint32_t endQuery = query + (getSubpass()->isMultiview() ? getSubpass()->getViewCountInMetalPass(_multiviewPassIndex) : 1);
|
||||
while (query < endQuery) {
|
||||
(*_pActivatedQueries)[pQueryPool].push_back(query++);
|
||||
}
|
||||
}
|
||||
|
||||
// Register a command buffer completion handler that finishes each activated query.
|
||||
@ -653,6 +732,7 @@ NSString* mvkMTLComputeCommandEncoderLabel(MVKCommandUse cmdUse) {
|
||||
case kMVKCommandUseCopyImageToBuffer: return @"vkCmdCopyImageToBuffer ComputeEncoder";
|
||||
case kMVKCommandUseFillBuffer: return @"vkCmdFillBuffer ComputeEncoder";
|
||||
case kMVKCommandUseTessellationVertexTessCtl: return @"vkCmdDraw (vertex and tess control stages) ComputeEncoder";
|
||||
case kMVKCommandUseMultiviewInstanceCountAdjust: return @"vkCmdDraw (multiview instance count adjustment) ComputeEncoder";
|
||||
case kMVKCommandUseCopyQueryPoolResults:return @"vkCmdCopyQueryPoolResults ComputeEncoder";
|
||||
default: return @"Unknown Use ComputeEncoder";
|
||||
}
|
||||
|
@ -427,6 +427,7 @@ protected:
|
||||
|
||||
MVKMTLBufferBinding swizzleBufferBinding;
|
||||
MVKMTLBufferBinding bufferSizeBufferBinding;
|
||||
MVKMTLBufferBinding viewRangeBufferBinding;
|
||||
|
||||
bool areBufferBindingsDirty = false;
|
||||
bool areTextureBindingsDirty = false;
|
||||
@ -446,6 +447,7 @@ protected:
|
||||
areSamplerStateBindingsDirty = false;
|
||||
swizzleBufferBinding.isDirty = false;
|
||||
bufferSizeBufferBinding.isDirty = false;
|
||||
viewRangeBufferBinding.isDirty = false;
|
||||
|
||||
needsSwizzle = false;
|
||||
}
|
||||
@ -493,6 +495,11 @@ public:
|
||||
bool needTessEvalSizeBuffer,
|
||||
bool needFragmentSizeBuffer);
|
||||
|
||||
/** Sets the current view range buffer state. */
|
||||
void bindViewRangeBuffer(const MVKShaderImplicitRezBinding& binding,
|
||||
bool needVertexViewBuffer,
|
||||
bool needFragmentViewBuffer);
|
||||
|
||||
void encodeBindings(MVKShaderStage stage,
|
||||
const char* pStageName,
|
||||
bool fullImageViewSwizzle,
|
||||
|
@ -557,6 +557,18 @@ void MVKGraphicsResourcesCommandEncoderState::bindBufferSizeBuffer(const MVKShad
|
||||
_shaderStageResourceBindings[kMVKShaderStageFragment].bufferSizeBufferBinding.isDirty = needFragmentSizeBuffer;
|
||||
}
|
||||
|
||||
void MVKGraphicsResourcesCommandEncoderState::bindViewRangeBuffer(const MVKShaderImplicitRezBinding& binding,
|
||||
bool needVertexViewBuffer,
|
||||
bool needFragmentViewBuffer) {
|
||||
for (uint32_t i = kMVKShaderStageVertex; i <= kMVKShaderStageFragment; i++) {
|
||||
_shaderStageResourceBindings[i].viewRangeBufferBinding.index = binding.stages[i];
|
||||
}
|
||||
_shaderStageResourceBindings[kMVKShaderStageVertex].viewRangeBufferBinding.isDirty = needVertexViewBuffer;
|
||||
_shaderStageResourceBindings[kMVKShaderStageTessCtl].viewRangeBufferBinding.isDirty = false;
|
||||
_shaderStageResourceBindings[kMVKShaderStageTessEval].viewRangeBufferBinding.isDirty = false;
|
||||
_shaderStageResourceBindings[kMVKShaderStageFragment].viewRangeBufferBinding.isDirty = needFragmentViewBuffer;
|
||||
}
|
||||
|
||||
void MVKGraphicsResourcesCommandEncoderState::encodeBindings(MVKShaderStage stage,
|
||||
const char* pStageName,
|
||||
bool fullImageViewSwizzle,
|
||||
@ -587,6 +599,13 @@ void MVKGraphicsResourcesCommandEncoderState::encodeBindings(MVKShaderStage stag
|
||||
bindImplicitBuffer(_cmdEncoder, shaderStage.bufferSizeBufferBinding, shaderStage.bufferSizes.contents());
|
||||
}
|
||||
|
||||
if (shaderStage.viewRangeBufferBinding.isDirty) {
|
||||
MVKSmallVector<uint32_t, 2> viewRange;
|
||||
viewRange.push_back(_cmdEncoder->getSubpass()->getFirstViewIndexInMetalPass(_cmdEncoder->getMultiviewPassIndex()));
|
||||
viewRange.push_back(_cmdEncoder->getSubpass()->getViewCountInMetalPass(_cmdEncoder->getMultiviewPassIndex()));
|
||||
bindImplicitBuffer(_cmdEncoder, shaderStage.viewRangeBufferBinding, viewRange.contents());
|
||||
}
|
||||
|
||||
encodeBinding<MVKMTLTextureBinding>(shaderStage.textureBindings, shaderStage.areTextureBindingsDirty, bindTexture);
|
||||
encodeBinding<MVKMTLSamplerStateBinding>(shaderStage.samplerStateBindings, shaderStage.areSamplerStateBindingsDirty, bindSampler);
|
||||
}
|
||||
|
@ -112,8 +112,11 @@ public:
|
||||
/** Returns a MTLComputePipelineState for decompressing a buffer into a 3D image. */
|
||||
id<MTLComputePipelineState> getCmdCopyBufferToImage3DDecompressMTLComputePipelineState(bool needsTempBuff);
|
||||
|
||||
/** Returns a MTLComputePipelineState for converting an indirect buffer for use in a multiview draw. */
|
||||
id<MTLComputePipelineState> getCmdDrawIndirectMultiviewConvertBuffersMTLComputePipelineState(bool indexed);
|
||||
|
||||
/** Returns a MTLComputePipelineState for converting an indirect buffer for use in a tessellated draw. */
|
||||
id<MTLComputePipelineState> getCmdDrawIndirectConvertBuffersMTLComputePipelineState(bool indexed);
|
||||
id<MTLComputePipelineState> getCmdDrawIndirectTessConvertBuffersMTLComputePipelineState(bool indexed);
|
||||
|
||||
/** Returns a MTLComputePipelineState for copying an index buffer for use in an indirect tessellated draw. */
|
||||
id<MTLComputePipelineState> getCmdDrawIndexedCopyIndexBufferMTLComputePipelineState(MTLIndexType type);
|
||||
@ -149,7 +152,8 @@ protected:
|
||||
id<MTLComputePipelineState> _mtlCopyBufferBytesComputePipelineState = nil;
|
||||
id<MTLComputePipelineState> _mtlFillBufferComputePipelineState = nil;
|
||||
id<MTLComputePipelineState> _mtlCopyBufferToImage3DDecompressComputePipelineState[2] = {nil, nil};
|
||||
id<MTLComputePipelineState> _mtlDrawIndirectConvertBuffersComputePipelineState[2] = {nil, nil};
|
||||
id<MTLComputePipelineState> _mtlDrawIndirectMultiviewConvertBuffersComputePipelineState[2] = {nil, nil};
|
||||
id<MTLComputePipelineState> _mtlDrawIndirectTessConvertBuffersComputePipelineState[2] = {nil, nil};
|
||||
id<MTLComputePipelineState> _mtlDrawIndexedCopyIndexBufferComputePipelineState[2] = {nil, nil};
|
||||
id<MTLComputePipelineState> _mtlCopyQueryPoolResultsComputePipelineState = nil;
|
||||
};
|
||||
|
@ -106,8 +106,12 @@ id<MTLComputePipelineState> MVKCommandEncodingPool::getCmdCopyBufferToImage3DDec
|
||||
MVK_ENC_REZ_ACCESS(_mtlCopyBufferToImage3DDecompressComputePipelineState[needsTempBuff ? 1 : 0], newCmdCopyBufferToImage3DDecompressMTLComputePipelineState(needsTempBuff, _commandPool));
|
||||
}
|
||||
|
||||
id<MTLComputePipelineState> MVKCommandEncodingPool::getCmdDrawIndirectConvertBuffersMTLComputePipelineState(bool indexed) {
|
||||
MVK_ENC_REZ_ACCESS(_mtlDrawIndirectConvertBuffersComputePipelineState[indexed ? 1 : 0], newCmdDrawIndirectConvertBuffersMTLComputePipelineState(indexed, _commandPool));
|
||||
id<MTLComputePipelineState> MVKCommandEncodingPool::getCmdDrawIndirectMultiviewConvertBuffersMTLComputePipelineState(bool indexed) {
|
||||
MVK_ENC_REZ_ACCESS(_mtlDrawIndirectMultiviewConvertBuffersComputePipelineState[indexed ? 1 : 0], newCmdDrawIndirectMultiviewConvertBuffersMTLComputePipelineState(indexed, _commandPool));
|
||||
}
|
||||
|
||||
id<MTLComputePipelineState> MVKCommandEncodingPool::getCmdDrawIndirectTessConvertBuffersMTLComputePipelineState(bool indexed) {
|
||||
MVK_ENC_REZ_ACCESS(_mtlDrawIndirectTessConvertBuffersComputePipelineState[indexed ? 1 : 0], newCmdDrawIndirectTessConvertBuffersMTLComputePipelineState(indexed, _commandPool));
|
||||
}
|
||||
|
||||
id<MTLComputePipelineState> MVKCommandEncodingPool::getCmdDrawIndexedCopyIndexBufferMTLComputePipelineState(MTLIndexType type) {
|
||||
@ -179,10 +183,15 @@ void MVKCommandEncodingPool::destroyMetalResources() {
|
||||
_mtlCopyBufferToImage3DDecompressComputePipelineState[0] = nil;
|
||||
_mtlCopyBufferToImage3DDecompressComputePipelineState[1] = nil;
|
||||
|
||||
[_mtlDrawIndirectConvertBuffersComputePipelineState[0] release];
|
||||
[_mtlDrawIndirectConvertBuffersComputePipelineState[1] release];
|
||||
_mtlDrawIndirectConvertBuffersComputePipelineState[0] = nil;
|
||||
_mtlDrawIndirectConvertBuffersComputePipelineState[1] = nil;
|
||||
[_mtlDrawIndirectMultiviewConvertBuffersComputePipelineState[0] release];
|
||||
[_mtlDrawIndirectMultiviewConvertBuffersComputePipelineState[1] release];
|
||||
_mtlDrawIndirectMultiviewConvertBuffersComputePipelineState[0] = nil;
|
||||
_mtlDrawIndirectMultiviewConvertBuffersComputePipelineState[1] = nil;
|
||||
|
||||
[_mtlDrawIndirectTessConvertBuffersComputePipelineState[0] release];
|
||||
[_mtlDrawIndirectTessConvertBuffersComputePipelineState[1] release];
|
||||
_mtlDrawIndirectTessConvertBuffersComputePipelineState[0] = nil;
|
||||
_mtlDrawIndirectTessConvertBuffersComputePipelineState[1] = nil;
|
||||
|
||||
[_mtlDrawIndexedCopyIndexBufferComputePipelineState[0] release];
|
||||
[_mtlDrawIndexedCopyIndexBufferComputePipelineState[1] release];
|
||||
|
@ -170,17 +170,41 @@ struct MTLStageInRegionIndirectArguments {
|
||||
}; \n\
|
||||
#endif \n\
|
||||
\n\
|
||||
kernel void cmdDrawIndirectMultiviewConvertBuffers(const device char* srcBuff [[buffer(0)]], \n\
|
||||
device MTLDrawPrimitivesIndirectArguments* destBuff [[buffer(1)]],\n\
|
||||
constant uint32_t& srcStride [[buffer(2)]], \n\
|
||||
constant uint32_t& drawCount [[buffer(3)]], \n\
|
||||
constant uint32_t& viewCount [[buffer(4)]], \n\
|
||||
uint idx [[thread_position_in_grid]]) { \n\
|
||||
if (idx >= drawCount) { return; } \n\
|
||||
const device auto& src = *reinterpret_cast<const device MTLDrawPrimitivesIndirectArguments*>(srcBuff + idx * srcStride);\n\
|
||||
destBuff[idx] = src; \n\
|
||||
destBuff[idx].instanceCount *= viewCount; \n\
|
||||
} \n\
|
||||
\n\
|
||||
kernel void cmdDrawIndexedIndirectMultiviewConvertBuffers(const device char* srcBuff [[buffer(0)]], \n\
|
||||
device MTLDrawIndexedPrimitivesIndirectArguments* destBuff [[buffer(1)]],\n\
|
||||
constant uint32_t& srcStride [[buffer(2)]], \n\
|
||||
constant uint32_t& drawCount [[buffer(3)]], \n\
|
||||
constant uint32_t& viewCount [[buffer(4)]], \n\
|
||||
uint idx [[thread_position_in_grid]]) { \n\
|
||||
if (idx >= drawCount) { return; } \n\
|
||||
const device auto& src = *reinterpret_cast<const device MTLDrawIndexedPrimitivesIndirectArguments*>(srcBuff + idx * srcStride);\n\
|
||||
destBuff[idx] = src; \n\
|
||||
destBuff[idx].instanceCount *= viewCount; \n\
|
||||
} \n\
|
||||
\n\
|
||||
#if __METAL_VERSION__ >= 120 \n\
|
||||
kernel void cmdDrawIndirectConvertBuffers(const device char* srcBuff [[buffer(0)]], \n\
|
||||
device char* destBuff [[buffer(1)]], \n\
|
||||
device char* paramsBuff [[buffer(2)]], \n\
|
||||
constant uint32_t& srcStride [[buffer(3)]], \n\
|
||||
constant uint32_t& inControlPointCount [[buffer(4)]], \n\
|
||||
constant uint32_t& outControlPointCount [[buffer(5)]], \n\
|
||||
constant uint32_t& drawCount [[buffer(6)]], \n\
|
||||
constant uint32_t& vtxThreadExecWidth [[buffer(7)]], \n\
|
||||
constant uint32_t& tcWorkgroupSize [[buffer(8)]], \n\
|
||||
uint idx [[thread_position_in_grid]]) { \n\
|
||||
kernel void cmdDrawIndirectTessConvertBuffers(const device char* srcBuff [[buffer(0)]], \n\
|
||||
device char* destBuff [[buffer(1)]], \n\
|
||||
device char* paramsBuff [[buffer(2)]], \n\
|
||||
constant uint32_t& srcStride [[buffer(3)]], \n\
|
||||
constant uint32_t& inControlPointCount [[buffer(4)]], \n\
|
||||
constant uint32_t& outControlPointCount [[buffer(5)]], \n\
|
||||
constant uint32_t& drawCount [[buffer(6)]], \n\
|
||||
constant uint32_t& vtxThreadExecWidth [[buffer(7)]], \n\
|
||||
constant uint32_t& tcWorkgroupSize [[buffer(8)]], \n\
|
||||
uint idx [[thread_position_in_grid]]) { \n\
|
||||
if (idx >= drawCount) { return; } \n\
|
||||
const device auto& src = *reinterpret_cast<const device MTLDrawPrimitivesIndirectArguments*>(srcBuff + idx * srcStride);\n\
|
||||
device char* dest; \n\
|
||||
|
@ -421,9 +421,13 @@ public:
|
||||
id<MTLComputePipelineState> newCmdCopyBufferToImage3DDecompressMTLComputePipelineState(bool needTempBuf,
|
||||
MVKVulkanAPIDeviceObject* owner);
|
||||
|
||||
/** Returns a new MTLComputePipelineState for converting an indirect buffer for use in a multiview draw. */
|
||||
id<MTLComputePipelineState> newCmdDrawIndirectMultiviewConvertBuffersMTLComputePipelineState(bool indexed,
|
||||
MVKVulkanAPIDeviceObject* owner);
|
||||
|
||||
/** Returns a new MTLComputePipelineState for converting an indirect buffer for use in a tessellated draw. */
|
||||
id<MTLComputePipelineState> newCmdDrawIndirectConvertBuffersMTLComputePipelineState(bool indexed,
|
||||
MVKVulkanAPIDeviceObject* owner);
|
||||
id<MTLComputePipelineState> newCmdDrawIndirectTessConvertBuffersMTLComputePipelineState(bool indexed,
|
||||
MVKVulkanAPIDeviceObject* owner);
|
||||
|
||||
/** Returns a new MTLComputePipelineState for copying an index buffer for use in a tessellated draw. */
|
||||
id<MTLComputePipelineState> newCmdDrawIndexedCopyIndexBufferMTLComputePipelineState(MTLIndexType type,
|
||||
|
@ -417,11 +417,18 @@ id<MTLComputePipelineState> MVKCommandResourceFactory::newCmdCopyBufferToImage3D
|
||||
: "cmdCopyBufferToImage3DDecompressDXTn", owner);
|
||||
}
|
||||
|
||||
id<MTLComputePipelineState> MVKCommandResourceFactory::newCmdDrawIndirectConvertBuffersMTLComputePipelineState(bool indexed,
|
||||
MVKVulkanAPIDeviceObject* owner) {
|
||||
id<MTLComputePipelineState> MVKCommandResourceFactory::newCmdDrawIndirectMultiviewConvertBuffersMTLComputePipelineState(bool indexed,
|
||||
MVKVulkanAPIDeviceObject* owner) {
|
||||
return newMTLComputePipelineState(indexed
|
||||
? "cmdDrawIndexedIndirectConvertBuffers"
|
||||
: "cmdDrawIndirectConvertBuffers", owner);
|
||||
? "cmdDrawIndexedIndirectMultiviewConvertBuffers"
|
||||
: "cmdDrawIndirectMultiviewConvertBuffers", owner);
|
||||
}
|
||||
|
||||
id<MTLComputePipelineState> MVKCommandResourceFactory::newCmdDrawIndirectTessConvertBuffersMTLComputePipelineState(bool indexed,
|
||||
MVKVulkanAPIDeviceObject* owner) {
|
||||
return newMTLComputePipelineState(indexed
|
||||
? "cmdDrawIndexedIndirectTessConvertBuffers"
|
||||
: "cmdDrawIndirectTessConvertBuffers", owner);
|
||||
}
|
||||
|
||||
id<MTLComputePipelineState> MVKCommandResourceFactory::newCmdDrawIndexedCopyIndexBufferMTLComputePipelineState(MTLIndexType type,
|
||||
|
@ -297,6 +297,9 @@ public:
|
||||
/** Populates the specified structure with the Metal-specific features of this device. */
|
||||
inline const MVKPhysicalDeviceMetalFeatures* getMetalFeatures() { return &_metalFeatures; }
|
||||
|
||||
/** Returns whether or not vertex instancing can be used to implement multiview. */
|
||||
inline bool canUseInstancingForMultiview() { return _metalFeatures.layeredRendering && _metalFeatures.deferredStoreActions; }
|
||||
|
||||
/** Returns the underlying Metal device. */
|
||||
inline id<MTLDevice> getMTLDevice() { return _mtlDevice; }
|
||||
|
||||
|
@ -91,6 +91,13 @@ void MVKPhysicalDevice::getFeatures(VkPhysicalDeviceFeatures2* features) {
|
||||
f16Features->shaderInt8 = true;
|
||||
break;
|
||||
}
|
||||
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES: {
|
||||
auto* multiviewFeatures = (VkPhysicalDeviceMultiviewFeatures*)next;
|
||||
multiviewFeatures->multiview = true;
|
||||
multiviewFeatures->multiviewGeometryShader = false;
|
||||
multiviewFeatures->multiviewTessellationShader = false; // FIXME
|
||||
break;
|
||||
}
|
||||
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES_KHR: {
|
||||
auto* uboLayoutFeatures = (VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR*)next;
|
||||
uboLayoutFeatures->uniformBufferStandardLayout = true;
|
||||
@ -193,6 +200,16 @@ void MVKPhysicalDevice::getProperties(VkPhysicalDeviceProperties2* properties) {
|
||||
maint3Props->maxMemoryAllocationSize = _metalFeatures.maxMTLBufferSize;
|
||||
break;
|
||||
}
|
||||
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES: {
|
||||
auto* multiviewProps = (VkPhysicalDeviceMultiviewProperties*)next;
|
||||
multiviewProps->maxMultiviewViewCount = 32;
|
||||
if (canUseInstancingForMultiview()) {
|
||||
multiviewProps->maxMultiviewInstanceIndex = std::numeric_limits<uint32_t>::max() / 32;
|
||||
} else {
|
||||
multiviewProps->maxMultiviewInstanceIndex = std::numeric_limits<uint32_t>::max();
|
||||
}
|
||||
break;
|
||||
}
|
||||
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
|
||||
auto* pushDescProps = (VkPhysicalDevicePushDescriptorPropertiesKHR*)next;
|
||||
pushDescProps->maxPushDescriptors = _properties.limits.maxPerStageResources;
|
||||
|
@ -25,6 +25,7 @@
|
||||
#include "MVKSmallVector.h"
|
||||
#include <MoltenVKSPIRVToMSLConverter/SPIRVReflection.h>
|
||||
#include <MoltenVKSPIRVToMSLConverter/SPIRVToMSLConverter.h>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <ostream>
|
||||
|
||||
@ -78,6 +79,9 @@ public:
|
||||
/** Returns the current buffer size buffer bindings. */
|
||||
const MVKShaderImplicitRezBinding& getBufferSizeBufferIndex() { return _bufferSizeBufferIndex; }
|
||||
|
||||
/** Returns the current view range buffer binding for multiview draws. */
|
||||
const MVKShaderImplicitRezBinding& getViewRangeBufferIndex() { return _viewRangeBufferIndex; }
|
||||
|
||||
/** Returns the current indirect parameter buffer bindings. */
|
||||
const MVKShaderImplicitRezBinding& getIndirectParamsIndex() { return _indirectParamsIndex; }
|
||||
|
||||
@ -113,6 +117,7 @@ protected:
|
||||
MVKShaderResourceBinding _pushConstantsMTLResourceIndexes;
|
||||
MVKShaderImplicitRezBinding _swizzleBufferIndex;
|
||||
MVKShaderImplicitRezBinding _bufferSizeBufferIndex;
|
||||
MVKShaderImplicitRezBinding _viewRangeBufferIndex;
|
||||
MVKShaderImplicitRezBinding _indirectParamsIndex;
|
||||
MVKShaderImplicitRezBinding _outputBufferIndex;
|
||||
uint32_t _tessCtlPatchOutputBufferIndex = 0;
|
||||
@ -282,6 +287,7 @@ protected:
|
||||
bool addFragmentShaderToPipeline(MTLRenderPipelineDescriptor* plDesc, const VkGraphicsPipelineCreateInfo* pCreateInfo, SPIRVToMSLConversionConfiguration& shaderContext, SPIRVShaderOutputs& prevOutput);
|
||||
template<class T>
|
||||
bool addVertexInputToPipeline(T* inputDesc, const VkPipelineVertexInputStateCreateInfo* pVI, const SPIRVToMSLConversionConfiguration& shaderContext);
|
||||
void adjustVertexInputForMultiview(MTLVertexDescriptor* inputDesc, const VkPipelineVertexInputStateCreateInfo* pVI, uint32_t viewCount, uint32_t oldViewCount = 1);
|
||||
void addTessellationToPipeline(MTLRenderPipelineDescriptor* plDesc, const SPIRVTessReflectionData& reflectData, const VkPipelineTessellationStateCreateInfo* pTS);
|
||||
void addFragmentOutputToPipeline(MTLRenderPipelineDescriptor* plDesc, const VkGraphicsPipelineCreateInfo* pCreateInfo);
|
||||
bool isRenderingPoints(const VkGraphicsPipelineCreateInfo* pCreateInfo);
|
||||
@ -309,6 +315,7 @@ protected:
|
||||
id<MTLComputePipelineState> _mtlTessVertexStageIndex32State = nil;
|
||||
id<MTLComputePipelineState> _mtlTessControlStageState = nil;
|
||||
id<MTLRenderPipelineState> _mtlPipelineState = nil;
|
||||
std::unordered_map<uint32_t, id<MTLRenderPipelineState>> _multiviewMTLPipelineStates;
|
||||
MTLCullMode _mtlCullMode;
|
||||
MTLWinding _mtlFrontWinding;
|
||||
MTLTriangleFillMode _mtlFillMode;
|
||||
@ -317,6 +324,7 @@ protected:
|
||||
|
||||
float _blendConstants[4] = { 0.0, 0.0, 0.0, 1.0 };
|
||||
uint32_t _outputControlPointCount;
|
||||
MVKShaderImplicitRezBinding _viewRangeBufferIndex;
|
||||
MVKShaderImplicitRezBinding _outputBufferIndex;
|
||||
uint32_t _tessCtlPatchOutputBufferIndex = 0;
|
||||
uint32_t _tessCtlLevelBufferIndex = 0;
|
||||
@ -325,6 +333,7 @@ protected:
|
||||
bool _hasDepthStencilInfo;
|
||||
bool _needsVertexSwizzleBuffer = false;
|
||||
bool _needsVertexBufferSizeBuffer = false;
|
||||
bool _needsVertexViewRangeBuffer = false;
|
||||
bool _needsVertexOutputBuffer = false;
|
||||
bool _needsTessCtlSwizzleBuffer = false;
|
||||
bool _needsTessCtlBufferSizeBuffer = false;
|
||||
@ -335,6 +344,7 @@ protected:
|
||||
bool _needsTessEvalBufferSizeBuffer = false;
|
||||
bool _needsFragmentSwizzleBuffer = false;
|
||||
bool _needsFragmentBufferSizeBuffer = false;
|
||||
bool _needsFragmentViewRangeBuffer = false;
|
||||
};
|
||||
|
||||
|
||||
|
@ -149,6 +149,10 @@ MVKPipelineLayout::MVKPipelineLayout(MVKDevice* device,
|
||||
_tessCtlLevelBufferIndex = _tessCtlPatchOutputBufferIndex + 1;
|
||||
}
|
||||
}
|
||||
// Since we currently can't use multiview with tessellation or geometry shaders,
|
||||
// to conserve the number of buffer bindings, use the same bindings for the
|
||||
// view range buffer as for the indirect paramters buffer.
|
||||
_viewRangeBufferIndex = _indirectParamsIndex;
|
||||
}
|
||||
|
||||
MVKPipelineLayout::~MVKPipelineLayout() {
|
||||
@ -232,7 +236,11 @@ void MVKGraphicsPipeline::encode(MVKCommandEncoder* cmdEncoder, uint32_t stage)
|
||||
|
||||
if ( !_mtlPipelineState ) { return; } // Abort if pipeline could not be created.
|
||||
// Render pipeline state
|
||||
[mtlCmdEnc setRenderPipelineState: _mtlPipelineState];
|
||||
if (cmdEncoder->getSubpass()->isMultiview() && !isTessellationPipeline() && !_multiviewMTLPipelineStates.empty()) {
|
||||
[mtlCmdEnc setRenderPipelineState: _multiviewMTLPipelineStates[cmdEncoder->getSubpass()->getViewCountInMetalPass(cmdEncoder->getMultiviewPassIndex())]];
|
||||
} else {
|
||||
[mtlCmdEnc setRenderPipelineState: _mtlPipelineState];
|
||||
}
|
||||
|
||||
// Depth stencil state
|
||||
if (_hasDepthStencilInfo) {
|
||||
@ -263,6 +271,7 @@ void MVKGraphicsPipeline::encode(MVKCommandEncoder* cmdEncoder, uint32_t stage)
|
||||
}
|
||||
cmdEncoder->_graphicsResourcesState.bindSwizzleBuffer(_swizzleBufferIndex, _needsVertexSwizzleBuffer, _needsTessCtlSwizzleBuffer, _needsTessEvalSwizzleBuffer, _needsFragmentSwizzleBuffer);
|
||||
cmdEncoder->_graphicsResourcesState.bindBufferSizeBuffer(_bufferSizeBufferIndex, _needsVertexBufferSizeBuffer, _needsTessCtlBufferSizeBuffer, _needsTessEvalBufferSizeBuffer, _needsFragmentBufferSizeBuffer);
|
||||
cmdEncoder->_graphicsResourcesState.bindViewRangeBuffer(_viewRangeBufferIndex, _needsVertexViewRangeBuffer, _needsFragmentViewRangeBuffer);
|
||||
}
|
||||
|
||||
bool MVKGraphicsPipeline::supportsDynamicState(VkDynamicState state) {
|
||||
@ -468,7 +477,35 @@ void MVKGraphicsPipeline::initMTLRenderPipelineState(const VkGraphicsPipelineCre
|
||||
if (!isTessellationPipeline()) {
|
||||
MTLRenderPipelineDescriptor* plDesc = newMTLRenderPipelineDescriptor(pCreateInfo, reflectData); // temp retain
|
||||
if (plDesc) {
|
||||
getOrCompilePipeline(plDesc, _mtlPipelineState);
|
||||
MVKRenderPass* mvkRendPass = (MVKRenderPass*)pCreateInfo->renderPass;
|
||||
MVKRenderSubpass* mvkSubpass = mvkRendPass->getSubpass(pCreateInfo->subpass);
|
||||
if (mvkSubpass->isMultiview()) {
|
||||
// We need to adjust the step rate for per-instance attributes to account for the
|
||||
// extra instances needed to render all views. But, there's a problem: vertex input
|
||||
// descriptions are static pipeline state. If we need multiple passes, and some have
|
||||
// different numbers of views to render than others, then the step rate must be different
|
||||
// for these passes. We'll need to make a pipeline for every pass view count we can see
|
||||
// in the render pass. This really sucks.
|
||||
std::unordered_set<uint32_t> viewCounts;
|
||||
for (uint32_t passIdx = 0; passIdx < mvkSubpass->getMultiviewMetalPassCount(); ++passIdx) {
|
||||
viewCounts.insert(mvkSubpass->getViewCountInMetalPass(passIdx));
|
||||
}
|
||||
auto count = viewCounts.cbegin();
|
||||
adjustVertexInputForMultiview(plDesc.vertexDescriptor, pCreateInfo->pVertexInputState, *count);
|
||||
getOrCompilePipeline(plDesc, _mtlPipelineState);
|
||||
if (viewCounts.size() > 1) {
|
||||
_multiviewMTLPipelineStates[*count] = _mtlPipelineState;
|
||||
uint32_t oldCount = *count++;
|
||||
for (auto last = viewCounts.cend(); count != last; ++count) {
|
||||
if (_multiviewMTLPipelineStates.count(*count)) { continue; }
|
||||
adjustVertexInputForMultiview(plDesc.vertexDescriptor, pCreateInfo->pVertexInputState, *count, oldCount);
|
||||
getOrCompilePipeline(plDesc, _multiviewMTLPipelineStates[*count]);
|
||||
oldCount = *count;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
getOrCompilePipeline(plDesc, _mtlPipelineState);
|
||||
}
|
||||
}
|
||||
[plDesc release]; // temp release
|
||||
} else {
|
||||
@ -816,8 +853,9 @@ bool MVKGraphicsPipeline::addVertexShaderToPipeline(MTLRenderPipelineDescriptor*
|
||||
shaderContext.options.mslOptions.indirect_params_buffer_index = _indirectParamsIndex.stages[kMVKShaderStageVertex];
|
||||
shaderContext.options.mslOptions.shader_output_buffer_index = _outputBufferIndex.stages[kMVKShaderStageVertex];
|
||||
shaderContext.options.mslOptions.buffer_size_buffer_index = _bufferSizeBufferIndex.stages[kMVKShaderStageVertex];
|
||||
shaderContext.options.mslOptions.capture_output_to_buffer = isTessellationPipeline();
|
||||
shaderContext.options.mslOptions.disable_rasterization = isTessellationPipeline() || (pCreateInfo->pRasterizationState && (pCreateInfo->pRasterizationState->rasterizerDiscardEnable));
|
||||
shaderContext.options.mslOptions.view_mask_buffer_index = _viewRangeBufferIndex.stages[kMVKShaderStageVertex];
|
||||
shaderContext.options.mslOptions.capture_output_to_buffer = false;
|
||||
shaderContext.options.mslOptions.disable_rasterization = pCreateInfo->pRasterizationState && pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
|
||||
addVertexInputToShaderConverterContext(shaderContext, pCreateInfo);
|
||||
|
||||
MVKMTLFunction func = ((MVKShaderModule*)_pVertexSS->module)->getMTLFunction(&shaderContext, _pVertexSS->pSpecializationInfo, _pipelineCache);
|
||||
@ -832,6 +870,7 @@ bool MVKGraphicsPipeline::addVertexShaderToPipeline(MTLRenderPipelineDescriptor*
|
||||
plDesc.rasterizationEnabled = !funcRslts.isRasterizationDisabled;
|
||||
_needsVertexSwizzleBuffer = funcRslts.needsSwizzleBuffer;
|
||||
_needsVertexBufferSizeBuffer = funcRslts.needsBufferSizeBuffer;
|
||||
_needsVertexViewRangeBuffer = funcRslts.needsViewRangeBuffer;
|
||||
_needsVertexOutputBuffer = funcRslts.needsOutputBuffer;
|
||||
|
||||
// If we need the swizzle buffer and there's no place to put it, we're in serious trouble.
|
||||
@ -849,6 +888,9 @@ bool MVKGraphicsPipeline::addVertexShaderToPipeline(MTLRenderPipelineDescriptor*
|
||||
if (!verifyImplicitBuffer(_needsVertexOutputBuffer, _indirectParamsIndex, kMVKShaderStageVertex, "indirect parameters", vbCnt)) {
|
||||
return false;
|
||||
}
|
||||
if (!verifyImplicitBuffer(_needsVertexViewRangeBuffer, _viewRangeBufferIndex, kMVKShaderStageVertex, "view range", vbCnt)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -1006,6 +1048,7 @@ bool MVKGraphicsPipeline::addFragmentShaderToPipeline(MTLRenderPipelineDescripto
|
||||
shaderContext.options.entryPointStage = spv::ExecutionModelFragment;
|
||||
shaderContext.options.mslOptions.swizzle_buffer_index = _swizzleBufferIndex.stages[kMVKShaderStageFragment];
|
||||
shaderContext.options.mslOptions.buffer_size_buffer_index = _bufferSizeBufferIndex.stages[kMVKShaderStageFragment];
|
||||
shaderContext.options.mslOptions.view_mask_buffer_index = _viewRangeBufferIndex.stages[kMVKShaderStageFragment];
|
||||
shaderContext.options.entryPointName = _pFragmentSS->pName;
|
||||
shaderContext.options.mslOptions.capture_output_to_buffer = false;
|
||||
if (pCreateInfo->pMultisampleState && pCreateInfo->pMultisampleState->pSampleMask && pCreateInfo->pMultisampleState->pSampleMask[0] != 0xffffffff) {
|
||||
@ -1024,12 +1067,16 @@ bool MVKGraphicsPipeline::addFragmentShaderToPipeline(MTLRenderPipelineDescripto
|
||||
auto& funcRslts = func.shaderConversionResults;
|
||||
_needsFragmentSwizzleBuffer = funcRslts.needsSwizzleBuffer;
|
||||
_needsFragmentBufferSizeBuffer = funcRslts.needsBufferSizeBuffer;
|
||||
_needsFragmentViewRangeBuffer = funcRslts.needsViewRangeBuffer;
|
||||
if (!verifyImplicitBuffer(_needsFragmentSwizzleBuffer, _swizzleBufferIndex, kMVKShaderStageFragment, "swizzle", 0)) {
|
||||
return false;
|
||||
}
|
||||
if (!verifyImplicitBuffer(_needsFragmentBufferSizeBuffer, _bufferSizeBufferIndex, kMVKShaderStageFragment, "buffer size", 0)) {
|
||||
return false;
|
||||
}
|
||||
if (!verifyImplicitBuffer(_needsFragmentViewRangeBuffer, _viewRangeBufferIndex, kMVKShaderStageFragment, "view range", 0)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -1182,6 +1229,24 @@ template bool MVKGraphicsPipeline::addVertexInputToPipeline<MTLStageInputOutputD
|
||||
const VkPipelineVertexInputStateCreateInfo* pVI,
|
||||
const SPIRVToMSLConversionConfiguration& shaderContext);
|
||||
|
||||
// Adjusts step rates for per-instance vertex buffers based on the number of views to be drawn.
|
||||
void MVKGraphicsPipeline::adjustVertexInputForMultiview(MTLVertexDescriptor* inputDesc, const VkPipelineVertexInputStateCreateInfo* pVI, uint32_t viewCount, uint32_t oldViewCount) {
|
||||
uint32_t vbCnt = pVI->vertexBindingDescriptionCount;
|
||||
const VkVertexInputBindingDescription* pVKVB = pVI->pVertexBindingDescriptions;
|
||||
for (uint32_t i = 0; i < vbCnt; ++i, ++pVKVB) {
|
||||
uint32_t vbIdx = getMetalBufferIndexForVertexAttributeBinding(pVKVB->binding);
|
||||
if (inputDesc.layouts[vbIdx].stepFunction == MTLVertexStepFunctionPerInstance) {
|
||||
inputDesc.layouts[vbIdx].stepRate = inputDesc.layouts[vbIdx].stepRate / oldViewCount * viewCount;
|
||||
for (auto& xltdBind : _translatedVertexBindings) {
|
||||
if (xltdBind.binding == pVKVB->binding) {
|
||||
uint32_t vbXltdIdx = getMetalBufferIndexForVertexAttributeBinding(xltdBind.translationBinding);
|
||||
inputDesc.layouts[vbXltdIdx].stepRate = inputDesc.layouts[vbXltdIdx].stepRate / oldViewCount * viewCount;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Returns a translated binding for the existing binding and translation offset, creating it if needed.
|
||||
uint32_t MVKGraphicsPipeline::getTranslatedVertexBinding(uint32_t binding, uint32_t translationOffset, uint32_t maxBinding) {
|
||||
// See if a translated binding already exists (for example if more than one VA needs the same translation).
|
||||
@ -1323,6 +1388,7 @@ void MVKGraphicsPipeline::initMVKShaderConverterContext(SPIRVToMSLConversionConf
|
||||
_outputBufferIndex = layout->getOutputBufferIndex();
|
||||
_tessCtlPatchOutputBufferIndex = layout->getTessCtlPatchOutputBufferIndex();
|
||||
_tessCtlLevelBufferIndex = layout->getTessCtlLevelBufferIndex();
|
||||
_viewRangeBufferIndex = layout->getViewRangeBufferIndex();
|
||||
|
||||
MVKRenderPass* mvkRendPass = (MVKRenderPass*)pCreateInfo->renderPass;
|
||||
MVKRenderSubpass* mvkRenderSubpass = mvkRendPass->getSubpass(pCreateInfo->subpass);
|
||||
@ -1345,6 +1411,9 @@ void MVKGraphicsPipeline::initMVKShaderConverterContext(SPIRVToMSLConversionConf
|
||||
shaderContext.options.shouldFlipVertexY = _device->_pMVKConfig->shaderConversionFlipVertexY;
|
||||
shaderContext.options.mslOptions.swizzle_texture_samples = _fullImageViewSwizzle && !getDevice()->_pMetalFeatures->nativeTextureSwizzle;
|
||||
shaderContext.options.mslOptions.tess_domain_origin_lower_left = pTessDomainOriginState && pTessDomainOriginState->domainOrigin == VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT;
|
||||
shaderContext.options.mslOptions.multiview = mvkRendPass->isMultiview();
|
||||
shaderContext.options.mslOptions.multiview_layered_rendering = getDevice()->getPhysicalDevice()->canUseInstancingForMultiview();
|
||||
shaderContext.options.mslOptions.view_index_from_device_index = mvkAreAllFlagsEnabled(pCreateInfo->flags, VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT);
|
||||
|
||||
shaderContext.options.tessPatchKind = reflectData.patchKind;
|
||||
shaderContext.options.numTessControlPoints = reflectData.numControlPoints;
|
||||
@ -1481,7 +1550,7 @@ MVKComputePipeline::MVKComputePipeline(MVKDevice* device,
|
||||
const VkComputePipelineCreateInfo* pCreateInfo) :
|
||||
MVKPipeline(device, pipelineCache, (MVKPipelineLayout*)pCreateInfo->layout, parent) {
|
||||
|
||||
_allowsDispatchBase = mvkAreAllFlagsEnabled(pCreateInfo->flags, VK_PIPELINE_CREATE_DISPATCH_BASE); // sic; drafters forgot the 'BIT' suffix
|
||||
_allowsDispatchBase = mvkAreAllFlagsEnabled(pCreateInfo->flags, VK_PIPELINE_CREATE_DISPATCH_BASE_BIT);
|
||||
|
||||
MVKMTLFunction func = getMTLFunction(pCreateInfo);
|
||||
_mtlThreadgroupSize = func.threadGroupSize;
|
||||
@ -1815,6 +1884,7 @@ namespace SPIRV_CROSS_NAMESPACE {
|
||||
opt.swizzle_texture_samples,
|
||||
opt.tess_domain_origin_lower_left,
|
||||
opt.multiview,
|
||||
opt.multiview_layered_rendering,
|
||||
opt.view_index_from_device_index,
|
||||
opt.dispatch_base,
|
||||
opt.texture_1D_as_2D,
|
||||
@ -1942,7 +2012,8 @@ namespace mvk {
|
||||
scr.needsPatchOutputBuffer,
|
||||
scr.needsBufferSizeBuffer,
|
||||
scr.needsInputThreadgroupMem,
|
||||
scr.needsDispatchBaseBuffer);
|
||||
scr.needsDispatchBaseBuffer,
|
||||
scr.needsViewRangeBuffer);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -18,6 +18,7 @@
|
||||
|
||||
#include "MVKQueryPool.h"
|
||||
#include "MVKBuffer.h"
|
||||
#include "MVKRenderPass.h"
|
||||
#include "MVKCommandBuffer.h"
|
||||
#include "MVKCommandEncodingPool.h"
|
||||
#include "MVKOSExtensions.h"
|
||||
@ -30,8 +31,11 @@ using namespace std;
|
||||
#pragma mark MVKQueryPool
|
||||
|
||||
void MVKQueryPool::endQuery(uint32_t query, MVKCommandEncoder* cmdEncoder) {
|
||||
uint32_t queryCount = cmdEncoder->getSubpass()->getViewCountInMetalPass(cmdEncoder->getMultiviewPassIndex());
|
||||
lock_guard<mutex> lock(_availabilityLock);
|
||||
_availability[query] = DeviceAvailable;
|
||||
for (uint32_t i = query; i < query + queryCount; ++i) {
|
||||
_availability[i] = DeviceAvailable;
|
||||
}
|
||||
lock_guard<mutex> copyLock(_deferredCopiesLock);
|
||||
if (!_deferredCopies.empty()) {
|
||||
// Partition by readiness.
|
||||
@ -287,7 +291,12 @@ void MVKOcclusionQueryPool::encodeSetResultBuffer(MVKCommandEncoder* cmdEncoder,
|
||||
|
||||
void MVKOcclusionQueryPool::beginQueryAddedTo(uint32_t query, MVKCommandBuffer* cmdBuffer) {
|
||||
NSUInteger offset = getVisibilityResultOffset(query);
|
||||
NSUInteger maxOffset = getDevice()->_pMetalFeatures->maxQueryBufferSize - kMVKQuerySlotSizeInBytes;
|
||||
NSUInteger queryCount = 1;
|
||||
if (cmdBuffer->getLastMultiviewSubpass()) {
|
||||
// In multiview passes, one query is used for each view.
|
||||
queryCount = cmdBuffer->getLastMultiviewSubpass()->getViewCount();
|
||||
}
|
||||
NSUInteger maxOffset = getDevice()->_pMetalFeatures->maxQueryBufferSize - kMVKQuerySlotSizeInBytes * queryCount;
|
||||
if (offset > maxOffset) {
|
||||
cmdBuffer->setConfigurationResult(reportError(VK_ERROR_OUT_OF_DEVICE_MEMORY, "vkCmdBeginQuery(): The query offset value %lu is larger than the maximum offset value %lu available on this device.", offset, maxOffset));
|
||||
}
|
||||
|
@ -46,6 +46,12 @@ public:
|
||||
/** Returns the Vulkan API opaque object controlling this object. */
|
||||
MVKVulkanAPIObject* getVulkanAPIObject() override;
|
||||
|
||||
/** Returns the parent render pass of this subpass. */
|
||||
inline MVKRenderPass* getRenderPass() { return _renderPass; }
|
||||
|
||||
/** Returns the index of this subpass in its parent render pass. */
|
||||
inline uint32_t getSubpassIndex() { return _subpassIndex; }
|
||||
|
||||
/** Returns the number of color attachments, which may be zero for depth-only rendering. */
|
||||
inline uint32_t getColorAttachmentCount() { return uint32_t(_colorAttachments.size()); }
|
||||
|
||||
@ -61,11 +67,31 @@ public:
|
||||
/** Returns the Vulkan sample count of the attachments used in this subpass. */
|
||||
VkSampleCountFlagBits getSampleCount();
|
||||
|
||||
/** Returns whether or not this is a multiview subpass. */
|
||||
bool isMultiview() const { return _viewMask != 0; }
|
||||
|
||||
/** Returns the total number of views to be rendered. */
|
||||
inline uint32_t getViewCount() const { return __builtin_popcount(_viewMask); }
|
||||
|
||||
/** Returns the number of Metal render passes needed to render all views. */
|
||||
uint32_t getMultiviewMetalPassCount() const;
|
||||
|
||||
/** Returns the first view to be rendered in the given multiview pass. */
|
||||
uint32_t getFirstViewIndexInMetalPass(uint32_t passIdx) const;
|
||||
|
||||
/** Returns the number of views to be rendered in the given multiview pass. */
|
||||
uint32_t getViewCountInMetalPass(uint32_t passIdx) const;
|
||||
|
||||
/** Returns the number of views to be rendered in all multiview passes up to the given one. */
|
||||
uint32_t getViewCountUpToMetalPass(uint32_t passIdx) const;
|
||||
|
||||
/**
|
||||
* Populates the specified Metal MTLRenderPassDescriptor with content from this
|
||||
* instance, the specified framebuffer, and the specified array of clear values.
|
||||
* instance, the specified framebuffer, and the specified array of clear values
|
||||
* for the specified multiview pass.
|
||||
*/
|
||||
void populateMTLRenderPassDescriptor(MTLRenderPassDescriptor* mtlRPDesc,
|
||||
uint32_t passIdx,
|
||||
MVKFramebuffer* framebuffer,
|
||||
const MVKArrayRef<VkClearValue>& clearValues,
|
||||
bool isRenderingEntireAttachment,
|
||||
@ -78,21 +104,32 @@ public:
|
||||
void populateClearAttachments(MVKClearAttachments& clearAtts,
|
||||
const MVKArrayRef<VkClearValue>& clearValues);
|
||||
|
||||
/**
|
||||
* Populates the specified vector with VkClearRects for clearing views of a specified multiview
|
||||
* attachment on first use, when the render area is smaller than the full framebuffer size
|
||||
* and/or not all views used in this subpass need to be cleared.
|
||||
*/
|
||||
void populateMultiviewClearRects(MVKSmallVector<VkClearRect, 1>& clearRects,
|
||||
MVKCommandEncoder* cmdEncoder,
|
||||
uint32_t caIdx, VkImageAspectFlags aspectMask);
|
||||
|
||||
/** If a render encoder is active, sets the store actions for all attachments to it. */
|
||||
void encodeStoreActions(MVKCommandEncoder* cmdEncoder, bool isRenderingEntireAttachment, bool storeOverride = false);
|
||||
|
||||
/** Constructs an instance for the specified parent renderpass. */
|
||||
MVKRenderSubpass(MVKRenderPass* renderPass, const VkSubpassDescription* pCreateInfo);
|
||||
MVKRenderSubpass(MVKRenderPass* renderPass, const VkSubpassDescription* pCreateInfo, uint32_t viewMask);
|
||||
|
||||
private:
|
||||
|
||||
friend class MVKRenderPass;
|
||||
friend class MVKRenderPassAttachment;
|
||||
|
||||
uint32_t getViewMaskGroupForMetalPass(uint32_t passIdx);
|
||||
MVKMTLFmtCaps getRequiredFormatCapabilitiesForAttachmentAt(uint32_t rpAttIdx);
|
||||
|
||||
MVKRenderPass* _renderPass;
|
||||
uint32_t _subpassIndex;
|
||||
uint32_t _viewMask;
|
||||
MVKSmallVector<VkAttachmentReference, kMVKDefaultAttachmentCount> _inputAttachments;
|
||||
MVKSmallVector<VkAttachmentReference, kMVKDefaultAttachmentCount> _colorAttachments;
|
||||
MVKSmallVector<VkAttachmentReference, kMVKDefaultAttachmentCount> _resolveAttachments;
|
||||
@ -139,6 +176,9 @@ public:
|
||||
bool isStencil,
|
||||
bool storeOverride = false);
|
||||
|
||||
/** Populates the specified vector with VkClearRects for clearing views of a multiview attachment on first use. */
|
||||
void populateMultiviewClearRects(MVKSmallVector<VkClearRect, 1>& clearRects, MVKCommandEncoder* cmdEncoder);
|
||||
|
||||
/** Returns whether this attachment should be cleared in the subpass. */
|
||||
bool shouldUseClearAttachment(MVKRenderSubpass* subpass);
|
||||
|
||||
@ -147,6 +187,8 @@ public:
|
||||
const VkAttachmentDescription* pCreateInfo);
|
||||
|
||||
protected:
|
||||
bool isFirstUseOfAttachment(MVKRenderSubpass* subpass);
|
||||
bool isLastUseOfAttachment(MVKRenderSubpass* subpass);
|
||||
MTLStoreAction getMTLStoreAction(MVKRenderSubpass* subpass,
|
||||
bool isRenderingEntireAttachment,
|
||||
bool hasResolveAttachment,
|
||||
@ -158,6 +200,8 @@ protected:
|
||||
uint32_t _attachmentIndex;
|
||||
uint32_t _firstUseSubpassIdx;
|
||||
uint32_t _lastUseSubpassIdx;
|
||||
MVKSmallVector<uint32_t> _firstUseViewMasks;
|
||||
MVKSmallVector<uint32_t> _lastUseViewMasks;
|
||||
};
|
||||
|
||||
|
||||
@ -181,6 +225,9 @@ public:
|
||||
/** Returns the format of the color attachment at the specified index. */
|
||||
MVKRenderSubpass* getSubpass(uint32_t subpassIndex);
|
||||
|
||||
/** Returns whether or not this render pass is a multiview render pass. */
|
||||
bool isMultiview() const;
|
||||
|
||||
/** Constructs an instance for the specified device. */
|
||||
MVKRenderPass(MVKDevice* device, const VkRenderPassCreateInfo* pCreateInfo);
|
||||
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include "MVKCommandBuffer.h"
|
||||
#include "MVKFoundation.h"
|
||||
#include "mvk_datatypes.hpp"
|
||||
#include <cassert>
|
||||
|
||||
using namespace std;
|
||||
|
||||
@ -67,7 +68,109 @@ VkSampleCountFlagBits MVKRenderSubpass::getSampleCount() {
|
||||
return VK_SAMPLE_COUNT_1_BIT;
|
||||
}
|
||||
|
||||
// Extract the first view, number of views, and the portion of the mask to be rendered from
|
||||
// the lowest clump of set bits in a view mask.
|
||||
static uint32_t getNextViewMaskGroup(uint32_t viewMask, uint32_t* startView, uint32_t* viewCount, uint32_t *groupMask = nullptr) {
|
||||
// First, find the first set bit. This is the start of the next clump of views to be rendered.
|
||||
// n.b. ffs(3) returns a 1-based index. This actually bit me during development of this feature.
|
||||
int pos = ffs(viewMask) - 1;
|
||||
int end = pos;
|
||||
if (groupMask) { *groupMask = 0; }
|
||||
// Now we'll step through the bits one at a time until we find a bit that isn't set.
|
||||
// This is one past the end of the next clump. Clear the bits as we go, so we can use
|
||||
// ffs(3) again on the next clump.
|
||||
// TODO: Find a way to make this faster.
|
||||
while (viewMask & (1 << end)) {
|
||||
if (groupMask) { *groupMask |= viewMask & (1 << end); }
|
||||
viewMask &= ~(1 << (end++));
|
||||
}
|
||||
if (startView) { *startView = pos; }
|
||||
if (viewCount) { *viewCount = end - pos; }
|
||||
return viewMask;
|
||||
}
|
||||
|
||||
// Get the portion of the view mask that will be rendered in the specified Metal render pass.
|
||||
uint32_t MVKRenderSubpass::getViewMaskGroupForMetalPass(uint32_t passIdx) {
|
||||
if (!_viewMask) { return 0; }
|
||||
assert(passIdx < getMultiviewMetalPassCount());
|
||||
if (!_renderPass->getDevice()->getPhysicalDevice()->canUseInstancingForMultiview()) {
|
||||
return 1 << getFirstViewIndexInMetalPass(passIdx);
|
||||
}
|
||||
uint32_t mask = _viewMask, groupMask = 0;
|
||||
for (uint32_t i = 0; i <= passIdx; ++i) {
|
||||
mask = getNextViewMaskGroup(mask, nullptr, nullptr, &groupMask);
|
||||
}
|
||||
return groupMask;
|
||||
}
|
||||
|
||||
uint32_t MVKRenderSubpass::getMultiviewMetalPassCount() const {
|
||||
if (!_viewMask) { return 0; }
|
||||
if (!_renderPass->getDevice()->getPhysicalDevice()->canUseInstancingForMultiview()) {
|
||||
// If we can't use instanced drawing for this, we'll have to unroll the render pass.
|
||||
return __builtin_popcount(_viewMask);
|
||||
}
|
||||
uint32_t mask = _viewMask;
|
||||
uint32_t count;
|
||||
// Step through each clump until there are no more clumps. I'll know this has
|
||||
// happened when the mask becomes 0, since getNextViewMaskGroup() clears each group of bits
|
||||
// as it finds them, and returns the remainder of the mask.
|
||||
for (count = 0; mask != 0; ++count) {
|
||||
mask = getNextViewMaskGroup(mask, nullptr, nullptr);
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
uint32_t MVKRenderSubpass::getFirstViewIndexInMetalPass(uint32_t passIdx) const {
|
||||
if (!_viewMask) { return 0; }
|
||||
assert(passIdx < getMultiviewMetalPassCount());
|
||||
uint32_t mask = _viewMask;
|
||||
uint32_t startView = 0, viewCount = 0;
|
||||
if (!_renderPass->getDevice()->getPhysicalDevice()->canUseInstancingForMultiview()) {
|
||||
for (uint32_t i = 0; mask != 0; ++i) {
|
||||
mask = getNextViewMaskGroup(mask, &startView, &viewCount);
|
||||
while (passIdx-- > 0 && viewCount-- > 0) {
|
||||
startView++;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (uint32_t i = 0; i <= passIdx; ++i) {
|
||||
mask = getNextViewMaskGroup(mask, &startView, nullptr);
|
||||
}
|
||||
}
|
||||
return startView;
|
||||
}
|
||||
|
||||
uint32_t MVKRenderSubpass::getViewCountInMetalPass(uint32_t passIdx) const {
|
||||
if (!_viewMask) { return 0; }
|
||||
assert(passIdx < getMultiviewMetalPassCount());
|
||||
if (!_renderPass->getDevice()->getPhysicalDevice()->canUseInstancingForMultiview()) {
|
||||
return 1;
|
||||
}
|
||||
uint32_t mask = _viewMask;
|
||||
uint32_t viewCount = 0;
|
||||
for (uint32_t i = 0; i <= passIdx; ++i) {
|
||||
mask = getNextViewMaskGroup(mask, nullptr, &viewCount);
|
||||
}
|
||||
return viewCount;
|
||||
}
|
||||
|
||||
uint32_t MVKRenderSubpass::getViewCountUpToMetalPass(uint32_t passIdx) const {
|
||||
if (!_viewMask) { return 0; }
|
||||
if (!_renderPass->getDevice()->getPhysicalDevice()->canUseInstancingForMultiview()) {
|
||||
return passIdx+1;
|
||||
}
|
||||
uint32_t mask = _viewMask;
|
||||
uint32_t totalViewCount = 0;
|
||||
for (uint32_t i = 0; i <= passIdx; ++i) {
|
||||
uint32_t viewCount;
|
||||
mask = getNextViewMaskGroup(mask, nullptr, &viewCount);
|
||||
totalViewCount += viewCount;
|
||||
}
|
||||
return totalViewCount;
|
||||
}
|
||||
|
||||
void MVKRenderSubpass::populateMTLRenderPassDescriptor(MTLRenderPassDescriptor* mtlRPDesc,
|
||||
uint32_t passIdx,
|
||||
MVKFramebuffer* framebuffer,
|
||||
const MVKArrayRef<VkClearValue>& clearValues,
|
||||
bool isRenderingEntireAttachment,
|
||||
@ -89,6 +192,15 @@ void MVKRenderSubpass::populateMTLRenderPassDescriptor(MTLRenderPassDescriptor*
|
||||
bool hasResolveAttachment = (rslvRPAttIdx != VK_ATTACHMENT_UNUSED);
|
||||
if (hasResolveAttachment) {
|
||||
framebuffer->getAttachment(rslvRPAttIdx)->populateMTLRenderPassAttachmentDescriptorResolve(mtlColorAttDesc);
|
||||
// In a multiview render pass, we need to override the starting layer to ensure
|
||||
// only the enabled views are loaded.
|
||||
if (isMultiview()) {
|
||||
uint32_t startView = getFirstViewIndexInMetalPass(passIdx);
|
||||
if (mtlColorAttDesc.resolveTexture.textureType == MTLTextureType3D)
|
||||
mtlColorAttDesc.resolveDepthPlane += startView;
|
||||
else
|
||||
mtlColorAttDesc.resolveSlice += startView;
|
||||
}
|
||||
}
|
||||
|
||||
// Configure the color attachment
|
||||
@ -100,6 +212,13 @@ void MVKRenderSubpass::populateMTLRenderPassDescriptor(MTLRenderPassDescriptor*
|
||||
loadOverride)) {
|
||||
mtlColorAttDesc.clearColor = pixFmts->getMTLClearColor(clearValues[clrRPAttIdx], clrMVKRPAtt->getFormat());
|
||||
}
|
||||
if (isMultiview()) {
|
||||
uint32_t startView = getFirstViewIndexInMetalPass(passIdx);
|
||||
if (mtlColorAttDesc.texture.textureType == MTLTextureType3D)
|
||||
mtlColorAttDesc.depthPlane += startView;
|
||||
else
|
||||
mtlColorAttDesc.slice += startView;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -119,6 +238,9 @@ void MVKRenderSubpass::populateMTLRenderPassDescriptor(MTLRenderPassDescriptor*
|
||||
loadOverride)) {
|
||||
mtlDepthAttDesc.clearDepth = pixFmts->getMTLClearDepthValue(clearValues[dsRPAttIdx]);
|
||||
}
|
||||
if (isMultiview()) {
|
||||
mtlDepthAttDesc.slice += getFirstViewIndexInMetalPass(passIdx);
|
||||
}
|
||||
}
|
||||
if (pixFmts->isStencilFormat(mtlDSFormat)) {
|
||||
MTLRenderPassStencilAttachmentDescriptor* mtlStencilAttDesc = mtlRPDesc.stencilAttachment;
|
||||
@ -129,6 +251,9 @@ void MVKRenderSubpass::populateMTLRenderPassDescriptor(MTLRenderPassDescriptor*
|
||||
loadOverride)) {
|
||||
mtlStencilAttDesc.clearStencil = pixFmts->getMTLClearStencilValue(clearValues[dsRPAttIdx]);
|
||||
}
|
||||
if (isMultiview()) {
|
||||
mtlStencilAttDesc.slice += getFirstViewIndexInMetalPass(passIdx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -145,7 +270,10 @@ void MVKRenderSubpass::populateMTLRenderPassDescriptor(MTLRenderPassDescriptor*
|
||||
// Add a dummy attachment so this passes validation.
|
||||
VkExtent2D fbExtent = framebuffer->getExtent2D();
|
||||
MTLTextureDescriptor* mtlTexDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat: MTLPixelFormatR8Unorm width: fbExtent.width height: fbExtent.height mipmapped: NO];
|
||||
if (framebuffer->getLayerCount() > 1) {
|
||||
if (isMultiview()) {
|
||||
mtlTexDesc.textureType = MTLTextureType2DArray;
|
||||
mtlTexDesc.arrayLength = getViewCountInMetalPass(passIdx);
|
||||
} else if (framebuffer->getLayerCount() > 1) {
|
||||
mtlTexDesc.textureType = MTLTextureType2DArray;
|
||||
mtlTexDesc.arrayLength = framebuffer->getLayerCount();
|
||||
}
|
||||
@ -222,6 +350,24 @@ void MVKRenderSubpass::populateClearAttachments(MVKClearAttachments& clearAtts,
|
||||
}
|
||||
}
|
||||
|
||||
void MVKRenderSubpass::populateMultiviewClearRects(MVKSmallVector<VkClearRect, 1>& clearRects,
|
||||
MVKCommandEncoder* cmdEncoder,
|
||||
uint32_t caIdx, VkImageAspectFlags aspectMask) {
|
||||
uint32_t attIdx;
|
||||
assert(this == cmdEncoder->getSubpass());
|
||||
if (mvkIsAnyFlagEnabled(aspectMask, VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
|
||||
attIdx = _depthStencilAttachment.attachment;
|
||||
if (attIdx != VK_ATTACHMENT_UNUSED) {
|
||||
_renderPass->_attachments[attIdx].populateMultiviewClearRects(clearRects, cmdEncoder);
|
||||
}
|
||||
return;
|
||||
}
|
||||
attIdx = _colorAttachments[caIdx].attachment;
|
||||
if (attIdx != VK_ATTACHMENT_UNUSED) {
|
||||
_renderPass->_attachments[attIdx].populateMultiviewClearRects(clearRects, cmdEncoder);
|
||||
}
|
||||
}
|
||||
|
||||
// Returns the format capabilities required by this render subpass.
|
||||
// It is possible for a subpass to use a single framebuffer attachment for multiple purposes.
|
||||
// For example, a subpass may use a color or depth attachment as an input attachment as well.
|
||||
@ -253,9 +399,11 @@ MVKMTLFmtCaps MVKRenderSubpass::getRequiredFormatCapabilitiesForAttachmentAt(uin
|
||||
}
|
||||
|
||||
MVKRenderSubpass::MVKRenderSubpass(MVKRenderPass* renderPass,
|
||||
const VkSubpassDescription* pCreateInfo) {
|
||||
const VkSubpassDescription* pCreateInfo,
|
||||
uint32_t viewMask) {
|
||||
_renderPass = renderPass;
|
||||
_subpassIndex = (uint32_t)_renderPass->_subpasses.size();
|
||||
_viewMask = viewMask;
|
||||
|
||||
// Add attachments
|
||||
_inputAttachments.reserve(pCreateInfo->inputAttachmentCount);
|
||||
@ -310,7 +458,7 @@ bool MVKRenderPassAttachment::populateMTLRenderPassAttachmentDescriptor(MTLRende
|
||||
// attachment AND we're in the first subpass.
|
||||
if ( loadOverride ) {
|
||||
mtlAttDesc.loadAction = MTLLoadActionLoad;
|
||||
} else if ( isRenderingEntireAttachment && (subpass->_subpassIndex == _firstUseSubpassIdx) ) {
|
||||
} else if ( isRenderingEntireAttachment && isFirstUseOfAttachment(subpass) ) {
|
||||
VkAttachmentLoadOp loadOp = isStencil ? _info.stencilLoadOp : _info.loadOp;
|
||||
mtlAttDesc.loadAction = mvkMTLLoadActionFromVkAttachmentLoadOp(loadOp);
|
||||
willClear = (loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR);
|
||||
@ -347,6 +495,35 @@ void MVKRenderPassAttachment::encodeStoreAction(MVKCommandEncoder* cmdEncoder,
|
||||
}
|
||||
}
|
||||
|
||||
void MVKRenderPassAttachment::populateMultiviewClearRects(MVKSmallVector<VkClearRect, 1>& clearRects, MVKCommandEncoder* cmdEncoder) {
|
||||
MVKRenderSubpass* subpass = cmdEncoder->getSubpass();
|
||||
uint32_t clearMask = subpass->getViewMaskGroupForMetalPass(cmdEncoder->getMultiviewPassIndex()) & _firstUseViewMasks[subpass->_subpassIndex];
|
||||
|
||||
if (!clearMask) { return; }
|
||||
VkRect2D renderArea = cmdEncoder->clipToRenderArea({{0, 0}, {kMVKUndefinedLargeUInt32, kMVKUndefinedLargeUInt32}});
|
||||
uint32_t startView, viewCount;
|
||||
do {
|
||||
clearMask = getNextViewMaskGroup(clearMask, &startView, &viewCount);
|
||||
clearRects.push_back({renderArea, startView, viewCount});
|
||||
} while (clearMask);
|
||||
}
|
||||
|
||||
bool MVKRenderPassAttachment::isFirstUseOfAttachment(MVKRenderSubpass* subpass) {
|
||||
if ( subpass->isMultiview() ) {
|
||||
return _firstUseViewMasks[subpass->_subpassIndex] == subpass->_viewMask;
|
||||
} else {
|
||||
return _firstUseSubpassIdx == subpass->_subpassIndex;
|
||||
}
|
||||
}
|
||||
|
||||
bool MVKRenderPassAttachment::isLastUseOfAttachment(MVKRenderSubpass* subpass) {
|
||||
if ( subpass->isMultiview() ) {
|
||||
return _lastUseViewMasks[subpass->_subpassIndex] == subpass->_viewMask;
|
||||
} else {
|
||||
return _lastUseSubpassIdx == subpass->_subpassIndex;
|
||||
}
|
||||
}
|
||||
|
||||
MTLStoreAction MVKRenderPassAttachment::getMTLStoreAction(MVKRenderSubpass* subpass,
|
||||
bool isRenderingEntireAttachment,
|
||||
bool hasResolveAttachment,
|
||||
@ -361,7 +538,7 @@ MTLStoreAction MVKRenderPassAttachment::getMTLStoreAction(MVKRenderSubpass* subp
|
||||
if ( storeOverride ) {
|
||||
return hasResolveAttachment ? MTLStoreActionStoreAndMultisampleResolve : MTLStoreActionStore;
|
||||
}
|
||||
if ( isRenderingEntireAttachment && (subpass->_subpassIndex == _lastUseSubpassIdx) ) {
|
||||
if ( isRenderingEntireAttachment && isLastUseOfAttachment(subpass) ) {
|
||||
VkAttachmentStoreOp storeOp = isStencil ? _info.stencilStoreOp : _info.storeOp;
|
||||
return mvkMTLStoreActionFromVkAttachmentStoreOp(storeOp, hasResolveAttachment);
|
||||
}
|
||||
@ -371,7 +548,11 @@ MTLStoreAction MVKRenderPassAttachment::getMTLStoreAction(MVKRenderSubpass* subp
|
||||
bool MVKRenderPassAttachment::shouldUseClearAttachment(MVKRenderSubpass* subpass) {
|
||||
|
||||
// If the subpass is not the first subpass to use this attachment, don't clear this attachment
|
||||
if (subpass->_subpassIndex != _firstUseSubpassIdx) { return false; }
|
||||
if (subpass->isMultiview()) {
|
||||
if (_firstUseViewMasks[subpass->_subpassIndex] == 0) { return false; }
|
||||
} else {
|
||||
if (subpass->_subpassIndex != _firstUseSubpassIdx) { return false; }
|
||||
}
|
||||
|
||||
return (_info.loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR);
|
||||
}
|
||||
@ -391,6 +572,10 @@ MVKRenderPassAttachment::MVKRenderPassAttachment(MVKRenderPass* renderPass,
|
||||
// Determine the indices of the first and last render subpasses to use this attachment.
|
||||
_firstUseSubpassIdx = kMVKUndefinedLargeUInt32;
|
||||
_lastUseSubpassIdx = 0;
|
||||
if ( _renderPass->isMultiview() ) {
|
||||
_firstUseViewMasks.reserve(_renderPass->_subpasses.size());
|
||||
_lastUseViewMasks.reserve(_renderPass->_subpasses.size());
|
||||
}
|
||||
for (auto& subPass : _renderPass->_subpasses) {
|
||||
// If it uses this attachment, the subpass will identify required format capabilities.
|
||||
MVKMTLFmtCaps reqCaps = subPass.getRequiredFormatCapabilitiesForAttachmentAt(_attachmentIndex);
|
||||
@ -398,6 +583,13 @@ MVKRenderPassAttachment::MVKRenderPassAttachment(MVKRenderPass* renderPass,
|
||||
uint32_t spIdx = subPass._subpassIndex;
|
||||
_firstUseSubpassIdx = min(spIdx, _firstUseSubpassIdx);
|
||||
_lastUseSubpassIdx = max(spIdx, _lastUseSubpassIdx);
|
||||
if ( subPass.isMultiview() ) {
|
||||
uint32_t viewMask = subPass._viewMask;
|
||||
std::for_each(_lastUseViewMasks.begin(), _lastUseViewMasks.end(), [viewMask](uint32_t& mask) { mask &= ~viewMask; });
|
||||
_lastUseViewMasks.push_back(viewMask);
|
||||
std::for_each(_firstUseViewMasks.begin(), _firstUseViewMasks.end(), [&viewMask](uint32_t mask) { viewMask &= ~mask; });
|
||||
_firstUseViewMasks.push_back(viewMask);
|
||||
}
|
||||
|
||||
// Validate that the attachment pixel format supports the capabilities required by the subpass.
|
||||
// Use MTLPixelFormat to look up capabilities to permit Metal format substitution.
|
||||
@ -416,13 +608,31 @@ VkExtent2D MVKRenderPass::getRenderAreaGranularity() { return { 1, 1 }; }
|
||||
|
||||
MVKRenderSubpass* MVKRenderPass::getSubpass(uint32_t subpassIndex) { return &_subpasses[subpassIndex]; }
|
||||
|
||||
bool MVKRenderPass::isMultiview() const { return _subpasses[0].isMultiview(); }
|
||||
|
||||
MVKRenderPass::MVKRenderPass(MVKDevice* device,
|
||||
const VkRenderPassCreateInfo* pCreateInfo) : MVKVulkanAPIDeviceObject(device) {
|
||||
|
||||
const VkRenderPassMultiviewCreateInfo* pMultiviewCreateInfo = nullptr;
|
||||
for (auto* next = (const VkBaseInStructure*)pCreateInfo->pNext; next; next = next->pNext) {
|
||||
switch (next->sType) {
|
||||
case VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO:
|
||||
pMultiviewCreateInfo = (const VkRenderPassMultiviewCreateInfo*)next;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const uint32_t* viewMasks = nullptr;
|
||||
if (pMultiviewCreateInfo && pMultiviewCreateInfo->subpassCount) {
|
||||
viewMasks = pMultiviewCreateInfo->pViewMasks;
|
||||
}
|
||||
|
||||
// Add subpasses and dependencies first
|
||||
_subpasses.reserve(pCreateInfo->subpassCount);
|
||||
for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
|
||||
_subpasses.emplace_back(this, &pCreateInfo->pSubpasses[i]);
|
||||
_subpasses.emplace_back(this, &pCreateInfo->pSubpasses[i], viewMasks ? viewMasks[i] : 0);
|
||||
}
|
||||
_subpassDependencies.reserve(pCreateInfo->dependencyCount);
|
||||
for (uint32_t i = 0; i < pCreateInfo->dependencyCount; i++) {
|
||||
|
@ -56,6 +56,7 @@ MVK_EXTENSION(KHR_image_format_list, KHR_IMAGE_FORMAT_LIST, DEVICE)
|
||||
MVK_EXTENSION(KHR_maintenance1, KHR_MAINTENANCE1, DEVICE)
|
||||
MVK_EXTENSION(KHR_maintenance2, KHR_MAINTENANCE2, DEVICE)
|
||||
MVK_EXTENSION(KHR_maintenance3, KHR_MAINTENANCE3, DEVICE)
|
||||
MVK_EXTENSION(KHR_multiview, KHR_MULTIVIEW, DEVICE)
|
||||
MVK_EXTENSION(KHR_push_descriptor, KHR_PUSH_DESCRIPTOR, DEVICE)
|
||||
MVK_EXTENSION(KHR_relaxed_block_layout, KHR_RELAXED_BLOCK_LAYOUT, DEVICE)
|
||||
MVK_EXTENSION(KHR_sampler_mirror_clamp_to_edge, KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE, DEVICE)
|
||||
|
@ -85,6 +85,7 @@ typedef enum : uint8_t {
|
||||
kMVKCommandUseResetQueryPool, /**< vkCmdResetQueryPool. */
|
||||
kMVKCommandUseDispatch, /**< vkCmdDispatch. */
|
||||
kMVKCommandUseTessellationVertexTessCtl,/**< vkCmdDraw* - vertex and tessellation control stages. */
|
||||
kMVKCommandUseMultiviewInstanceCountAdjust,/**< vkCmdDrawIndirect* - adjust instance count for multiview. */
|
||||
kMVKCommandUseCopyQueryPoolResults /**< vkCmdCopyQueryPoolResults. */
|
||||
} MVKCommandUse;
|
||||
|
||||
|
@ -302,6 +302,7 @@ MVK_PUBLIC_SYMBOL bool SPIRVToMSLConverter::convert(SPIRVToMSLConversionConfigur
|
||||
_shaderConversionResults.needsBufferSizeBuffer = pMSLCompiler && pMSLCompiler->needs_buffer_size_buffer();
|
||||
_shaderConversionResults.needsInputThreadgroupMem = pMSLCompiler && pMSLCompiler->needs_input_threadgroup_mem();
|
||||
_shaderConversionResults.needsDispatchBaseBuffer = pMSLCompiler && pMSLCompiler->needs_dispatch_base_buffer();
|
||||
_shaderConversionResults.needsViewRangeBuffer = pMSLCompiler && pMSLCompiler->needs_view_mask_buffer();
|
||||
|
||||
for (auto& ctxSI : context.shaderInputs) {
|
||||
ctxSI.isUsedByShader = pMSLCompiler->is_msl_shader_input_used(ctxSI.shaderInput.location);
|
||||
|
@ -209,6 +209,7 @@ namespace mvk {
|
||||
bool needsBufferSizeBuffer = false;
|
||||
bool needsInputThreadgroupMem = false;
|
||||
bool needsDispatchBaseBuffer = false;
|
||||
bool needsViewRangeBuffer = false;
|
||||
|
||||
void reset() { *this = SPIRVToMSLConversionResults(); }
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user