Process multiple patches per workgroup in a tessellation control shader.

This should hopefully reduce underutilization of the GPU, especially on
GPUs where the thread execution width is greater than the number of
control points.

This also eliminates the extra invocations previously needed to read the
varyings from the vertex shader into the tessellation shader. The number
of threads per workgroup is now lcm(SIMD-size, output control points).
This should ensure we always process a whole number of patches per
workgroup, and further reduce underutilization of the GPU's SIMD units.

To avoid complexity handling indices in the tessellation control shader,
I've also changed the way vertex shaders for tessellation are handled.
They are now compute kernels using Metal's support for vertex-style
stage input. This lets us always emit vertices into the buffer in order
of vertex shader execution. Now we no longer have to deal with indexing
in the tessellation control shader, nor do we always have to duplicate
the index buffer to insert gaps. This also fixes a long-standing issue
where if an index were greater than the number of vertices to draw, the
vertex shader would wind up writing outside the buffer, and the vertex
would be lost.
This commit is contained in:
Chip Davis 2020-07-26 19:22:42 -05:00
parent 4609416ef2
commit 3db2cbff6b
12 changed files with 625 additions and 447 deletions

View File

@ -344,6 +344,9 @@ MTLVertexFormat mvkMTLVertexFormatFromVkFormat(VkFormat vkFormat);
/** Returns the Metal MTLVertexStepFunction corresponding to the specified Vulkan VkVertexInputRate. */
MTLVertexStepFunction mvkMTLVertexStepFunctionFromVkVertexInputRate(VkVertexInputRate vkVtxStep);
/** Returns the Metal MTLStepFunction corresponding to the specified Vulkan VkVertexInputRate. */
MTLStepFunction mvkMTLStepFunctionFromVkVertexInputRate(VkVertexInputRate vkVtxStep, bool forTess = false);
/** Returns the Metal MTLPrimitiveType corresponding to the specified Vulkan VkPrimitiveTopology. */
MTLPrimitiveType mvkMTLPrimitiveTypeFromVkPrimitiveTopology(VkPrimitiveTopology vkTopology);

View File

@ -55,7 +55,7 @@ typedef unsigned long MTLLanguageVersion;
#define MVK_MAKE_VERSION(major, minor, patch) (((major) * 10000) + ((minor) * 100) + (patch))
#define MVK_VERSION MVK_MAKE_VERSION(MVK_VERSION_MAJOR, MVK_VERSION_MINOR, MVK_VERSION_PATCH)
#define VK_MVK_MOLTENVK_SPEC_VERSION 27
#define VK_MVK_MOLTENVK_SPEC_VERSION 28
#define VK_MVK_MOLTENVK_EXTENSION_NAME "VK_MVK_moltenvk"
/**
@ -614,6 +614,7 @@ typedef struct {
uint32_t subgroupSize; /**< The number of threads in a SIMD-group. */
VkDeviceSize vertexStrideAlignment; /**< The alignment used for the stride of vertex attribute bindings. */
VkBool32 indirectTessellationDrawing; /**< If true, tessellation draw calls support parameters held in a GPU buffer. */
VkBool32 nonUniformThreadgroups; /**< If true, the device supports arbitrary-sized grids in compute workloads. */
} MVKPhysicalDeviceMetalFeatures;
/** MoltenVK performance of a particular type of activity. */

View File

@ -112,21 +112,21 @@ void MVKCmdDraw::encode(MVKCommandEncoder* cmdEncoder) {
pipeline->getStages(stages);
const MVKMTLBufferAllocation* vtxOutBuff = nullptr;
const MVKMTLBufferAllocation* vtxParamsBuff = nullptr;
const MVKMTLBufferAllocation* tcOutBuff = nullptr;
const MVKMTLBufferAllocation* tcPatchOutBuff = nullptr;
const MVKMTLBufferAllocation* tcLevelBuff = nullptr;
uint32_t patchCount = 0;
uint32_t inControlPointCount = 0, outControlPointCount = 0;
struct {
uint32_t inControlPointCount = 0;
uint32_t patchCount = 0;
} tessParams;
uint32_t outControlPointCount = 0;
if (pipeline->isTessellationPipeline()) {
inControlPointCount = pipeline->getInputControlPointCount();
tessParams.inControlPointCount = pipeline->getInputControlPointCount();
outControlPointCount = pipeline->getOutputControlPointCount();
patchCount = mvkCeilingDivide(_vertexCount, inControlPointCount);
tessParams.patchCount = mvkCeilingDivide(_vertexCount, tessParams.inControlPointCount) * _instanceCount;
}
for (uint32_t s : stages) {
auto stage = MVKGraphicsStage(s);
if (stage == kMVKGraphicsStageVertex)
cmdEncoder->_depthStencilState.markDirty();
cmdEncoder->finalizeDrawState(stage); // Ensure all updated state has been submitted to Metal
if ( !pipeline->hasValidMTLPipelineStates() ) { return; } // Abort if this pipeline stage could not be compiled.
@ -134,33 +134,25 @@ void MVKCmdDraw::encode(MVKCommandEncoder* cmdEncoder) {
id<MTLComputeCommandEncoder> mtlTessCtlEncoder = nil;
switch (stage) {
case kMVKGraphicsStageVertex:
case kMVKGraphicsStageVertex: {
mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl);
if (pipeline->needsVertexOutputBuffer()) {
vtxOutBuff = cmdEncoder->getTempMTLBuffer(_vertexCount * _instanceCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents);
[cmdEncoder->_mtlRenderEncoder setVertexBuffer: vtxOutBuff->_mtlBuffer
offset: vtxOutBuff->_offset
atIndex: pipeline->getOutputBufferIndex().stages[kMVKShaderStageVertex]];
// The shader only needs the number of vertices, so that's all we'll give it.
// It'd be nice to be able to use setVertexBytes(), but since we can't guarantee
// more than 4 bytes alignment because of indirect draws, we're stuck doing this.
vtxParamsBuff = cmdEncoder->getTempMTLBuffer(4);
*(uint32_t*)vtxParamsBuff->getContents() = _vertexCount;
[cmdEncoder->_mtlRenderEncoder setVertexBuffer: vtxParamsBuff->_mtlBuffer
offset: vtxParamsBuff->_offset
atIndex: pipeline->getIndirectParamsIndex().stages[kMVKShaderStageVertex]];
}
if (cmdEncoder->_pDeviceMetalFeatures->baseVertexInstanceDrawing) {
[cmdEncoder->_mtlRenderEncoder drawPrimitives: MTLPrimitiveTypePoint
vertexStart: _firstVertex
vertexCount: _vertexCount
instanceCount: _instanceCount
baseInstance: _firstInstance];
} else {
[cmdEncoder->_mtlRenderEncoder drawPrimitives: MTLPrimitiveTypePoint
vertexStart: _firstVertex
vertexCount: _vertexCount
instanceCount: _instanceCount];
[mtlTessCtlEncoder setBuffer: vtxOutBuff->_mtlBuffer
offset: vtxOutBuff->_offset
atIndex: pipeline->getOutputBufferIndex().stages[kMVKShaderStageVertex]];
}
[mtlTessCtlEncoder setStageInRegion: MTLRegionMake2D(_firstVertex, _firstInstance, _vertexCount, _instanceCount)];
id<MTLComputePipelineState> vtxState = pipeline->getTessVertexStageState();
if (cmdEncoder->getDevice()->_pMetalFeatures->nonUniformThreadgroups) {
#if MVK_MACOS_OR_IOS
[mtlTessCtlEncoder dispatchThreads: MTLSizeMake(_vertexCount, _instanceCount, 1)
threadsPerThreadgroup: MTLSizeMake(vtxState.threadExecutionWidth, 1, 1)];
#endif
} else {
[mtlTessCtlEncoder dispatchThreadgroups: MTLSizeMake(mvkCeilingDivide(_vertexCount, vtxState.threadExecutionWidth), _instanceCount, 1)
threadsPerThreadgroup: MTLSizeMake(vtxState.threadExecutionWidth, 1, 1)];
}
// Mark pipeline, resources, and tess control push constants as dirty
// so I apply them during the next stage.
cmdEncoder->_graphicsPipelineState.beginMetalRenderPass();
@ -168,56 +160,55 @@ void MVKCmdDraw::encode(MVKCommandEncoder* cmdEncoder) {
cmdEncoder->_depthStencilState.markDirty();
cmdEncoder->getPushConstants(VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)->beginMetalRenderPass();
break;
case kMVKGraphicsStageTessControl:
mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationControl);
}
case kMVKGraphicsStageTessControl: {
mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl);
if (pipeline->needsTessCtlOutputBuffer()) {
tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * patchCount * _instanceCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerVertexOutputComponents);
tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * tessParams.patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerVertexOutputComponents);
[mtlTessCtlEncoder setBuffer: tcOutBuff->_mtlBuffer
offset: tcOutBuff->_offset
atIndex: pipeline->getOutputBufferIndex().stages[kMVKShaderStageTessCtl]];
}
if (pipeline->needsTessCtlPatchOutputBuffer()) {
tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(_instanceCount * patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerPatchOutputComponents);
tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(tessParams.patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerPatchOutputComponents);
[mtlTessCtlEncoder setBuffer: tcPatchOutBuff->_mtlBuffer
offset: tcPatchOutBuff->_offset
atIndex: pipeline->getTessCtlPatchOutputBufferIndex()];
}
tcLevelBuff = cmdEncoder->getTempMTLBuffer(_instanceCount * patchCount * sizeof(MTLQuadTessellationFactorsHalf));
tcLevelBuff = cmdEncoder->getTempMTLBuffer(tessParams.patchCount * sizeof(MTLQuadTessellationFactorsHalf));
[mtlTessCtlEncoder setBuffer: tcLevelBuff->_mtlBuffer
offset: tcLevelBuff->_offset
atIndex: pipeline->getTessCtlLevelBufferIndex()];
cmdEncoder->setComputeBytes(mtlTessCtlEncoder,
&inControlPointCount,
sizeof(inControlPointCount),
&tessParams,
sizeof(tessParams),
pipeline->getIndirectParamsIndex().stages[kMVKShaderStageTessCtl]);
if (pipeline->needsVertexOutputBuffer()) {
[mtlTessCtlEncoder setBuffer: vtxOutBuff->_mtlBuffer
offset: vtxOutBuff->_offset
atIndex: kMVKTessCtlInputBufferIndex];
[mtlTessCtlEncoder setStageInRegion: MTLRegionMake1D(0, _instanceCount * std::max(_vertexCount, outControlPointCount * patchCount))];
}
if (outControlPointCount > inControlPointCount) {
// In this case, we use an index buffer to avoid stepping over some of the input points.
const MVKMTLBufferAllocation* tcIndexBuff = cmdEncoder->getTempMTLBuffer(_instanceCount * patchCount * outControlPointCount * 4);
auto* indices = (uint32_t*)tcIndexBuff->getContents();
uint32_t index = 0;
for (uint32_t i = 0; i < outControlPointCount * patchCount; i++) {
if ((i % outControlPointCount) < inControlPointCount) {
indices[i] = index++;
} else {
indices[i] = 0;
}
}
[mtlTessCtlEncoder setBuffer: tcIndexBuff->_mtlBuffer
offset: tcIndexBuff->_offset
atIndex: kMVKTessCtlIndexBufferIndex];
}
[mtlTessCtlEncoder dispatchThreadgroups: MTLSizeMake(_instanceCount * patchCount, 1, 1)
threadsPerThreadgroup: MTLSizeMake(std::max(inControlPointCount, outControlPointCount), 1, 1)];
NSUInteger sgSize = pipeline->getTessControlStageState().threadExecutionWidth;
NSUInteger wgSize = mvkLeastCommonMultiple(outControlPointCount, sgSize);
while (wgSize > cmdEncoder->getDevice()->_pProperties->limits.maxComputeWorkGroupSize[0]) {
sgSize >>= 1;
wgSize = mvkLeastCommonMultiple(outControlPointCount, sgSize);
}
if (cmdEncoder->getDevice()->_pMetalFeatures->nonUniformThreadgroups) {
#if MVK_MACOS_OR_IOS
[mtlTessCtlEncoder dispatchThreads: MTLSizeMake(tessParams.patchCount * outControlPointCount, 1, 1)
threadsPerThreadgroup: MTLSizeMake(wgSize, 1, 1)];
#endif
} else {
[mtlTessCtlEncoder dispatchThreadgroups: MTLSizeMake(mvkCeilingDivide(tessParams.patchCount * outControlPointCount, wgSize), 1, 1)
threadsPerThreadgroup: MTLSizeMake(wgSize, 1, 1)];
}
// Running this stage prematurely ended the render pass, so we have to start it up again.
// TODO: On iOS, maybe we could use a tile shader to avoid this.
cmdEncoder->beginMetalRenderPass(_loadOverride, _storeOverride);
break;
}
case kMVKGraphicsStageRasterization:
if (pipeline->isTessellationPipeline()) {
if (pipeline->needsTessCtlOutputBuffer()) {
@ -238,7 +229,7 @@ void MVKCmdDraw::encode(MVKCommandEncoder* cmdEncoder) {
instanceStride: 0];
[cmdEncoder->_mtlRenderEncoder drawPatches: outControlPointCount
patchStart: 0
patchCount: _instanceCount * patchCount
patchCount: tessParams.patchCount
patchIndexBuffer: nil
patchIndexBufferOffset: 0
instanceCount: 1
@ -310,92 +301,49 @@ void MVKCmdDrawIndexed::encode(MVKCommandEncoder* cmdEncoder) {
VkDeviceSize idxBuffOffset = ibb.offset + (_firstIndex * idxSize);
const MVKMTLBufferAllocation* vtxOutBuff = nullptr;
const MVKMTLBufferAllocation* vtxParamsBuff = nullptr;
const MVKMTLBufferAllocation* tcOutBuff = nullptr;
const MVKMTLBufferAllocation* tcPatchOutBuff = nullptr;
const MVKMTLBufferAllocation* tcLevelBuff = nullptr;
const MVKMTLBufferAllocation* tcIndexBuff = nullptr;
uint32_t patchCount = 0;
uint32_t inControlPointCount = 0, outControlPointCount = 0;
struct {
uint32_t inControlPointCount = 0;
uint32_t patchCount = 0;
} tessParams;
uint32_t outControlPointCount = 0;
if (pipeline->isTessellationPipeline()) {
inControlPointCount = pipeline->getInputControlPointCount();
tessParams.inControlPointCount = pipeline->getInputControlPointCount();
outControlPointCount = pipeline->getOutputControlPointCount();
patchCount = mvkCeilingDivide(_indexCount, inControlPointCount);
tessParams.patchCount = mvkCeilingDivide(_indexCount, tessParams.inControlPointCount) * _instanceCount;
}
for (uint32_t s : stages) {
auto stage = MVKGraphicsStage(s);
id<MTLComputeCommandEncoder> mtlTessCtlEncoder = nil;
if (stage == kMVKGraphicsStageTessControl && (outControlPointCount > inControlPointCount || _instanceCount > 1)) {
// We need make a copy of the old index buffer so we can insert gaps where
// there are more output points than input points, and also to add more indices
// to handle instancing. Do it now, before finalizing draw state, or the
// pipeline will get overridden.
// Yeah, this sucks. But there aren't many good ways for dealing with this issue.
mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationControl);
tcIndexBuff = cmdEncoder->getTempMTLBuffer(_instanceCount * patchCount * outControlPointCount * idxSize);
id<MTLComputePipelineState> mtlCopyIndexState = cmdEncoder->getCommandEncodingPool()->getCmdDrawIndexedCopyIndexBufferMTLComputePipelineState((MTLIndexType)ibb.mtlIndexType);
[mtlTessCtlEncoder setComputePipelineState: mtlCopyIndexState];
[mtlTessCtlEncoder setBuffer: ibb.mtlBuffer
offset: ibb.offset
atIndex: 0];
[mtlTessCtlEncoder setBuffer: tcIndexBuff->_mtlBuffer
offset: tcIndexBuff->_offset
atIndex: 1];
cmdEncoder->setComputeBytes(mtlTessCtlEncoder,
&inControlPointCount,
sizeof(inControlPointCount),
2);
cmdEncoder->setComputeBytes(mtlTessCtlEncoder,
&outControlPointCount,
sizeof(outControlPointCount),
3);
const MVKMTLBufferAllocation* indexParamsBuff = cmdEncoder->getTempMTLBuffer(sizeof(MTLDrawIndexedPrimitivesIndirectArguments));
auto* params = (MTLDrawIndexedPrimitivesIndirectArguments*)indexParamsBuff->getContents();
params->indexCount = _indexCount;
params->instanceCount = _instanceCount;
params->indexStart = _firstIndex;
[mtlTessCtlEncoder setBuffer: indexParamsBuff->_mtlBuffer
offset: indexParamsBuff->_offset
atIndex: 4];
[mtlTessCtlEncoder dispatchThreadgroups: MTLSizeMake(1, 1, 1) threadsPerThreadgroup: MTLSizeMake(1, 1, 1)];
}
if (stage == kMVKGraphicsStageVertex)
cmdEncoder->_depthStencilState.markDirty();
cmdEncoder->finalizeDrawState(stage); // Ensure all updated state has been submitted to Metal
if ( !pipeline->hasValidMTLPipelineStates() ) { return; } // Abort if this pipeline stage could not be compiled.
switch (stage) {
case kMVKGraphicsStageVertex:
case kMVKGraphicsStageVertex: {
mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl);
if (pipeline->needsVertexOutputBuffer()) {
vtxOutBuff = cmdEncoder->getTempMTLBuffer(_indexCount * _instanceCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents);
[cmdEncoder->_mtlRenderEncoder setVertexBuffer: vtxOutBuff->_mtlBuffer
offset: vtxOutBuff->_offset
atIndex: pipeline->getOutputBufferIndex().stages[kMVKShaderStageVertex]];
// The shader only needs the number of vertices, so that's all we'll give it.
vtxParamsBuff = cmdEncoder->getTempMTLBuffer(4);
*(uint32_t*)vtxParamsBuff->getContents() = _indexCount;
[cmdEncoder->_mtlRenderEncoder setVertexBuffer: vtxParamsBuff->_mtlBuffer
offset: vtxParamsBuff->_offset
atIndex: pipeline->getIndirectParamsIndex().stages[kMVKShaderStageVertex]];
}
if (cmdEncoder->_pDeviceMetalFeatures->baseVertexInstanceDrawing) {
[cmdEncoder->_mtlRenderEncoder drawIndexedPrimitives: MTLPrimitiveTypePoint
indexCount: _indexCount
indexType: (MTLIndexType)ibb.mtlIndexType
indexBuffer: ibb.mtlBuffer
indexBufferOffset: idxBuffOffset
instanceCount: _instanceCount
baseVertex: _vertexOffset
baseInstance: _firstInstance];
} else {
[cmdEncoder->_mtlRenderEncoder drawIndexedPrimitives: MTLPrimitiveTypePoint
indexCount: _indexCount
indexType: (MTLIndexType)ibb.mtlIndexType
indexBuffer: ibb.mtlBuffer
indexBufferOffset: idxBuffOffset
instanceCount: _instanceCount];
[mtlTessCtlEncoder setBuffer: vtxOutBuff->_mtlBuffer
offset: vtxOutBuff->_offset
atIndex: pipeline->getOutputBufferIndex().stages[kMVKShaderStageVertex]];
}
[mtlTessCtlEncoder setBuffer: ibb.mtlBuffer
offset: idxBuffOffset
atIndex: pipeline->getIndirectParamsIndex().stages[kMVKShaderStageVertex]];
[mtlTessCtlEncoder setStageInRegion: MTLRegionMake2D(_vertexOffset, _firstInstance, _indexCount, _instanceCount)];
id<MTLComputePipelineState> vtxState = ibb.mtlIndexType == MTLIndexTypeUInt16 ? pipeline->getTessVertexStageIndex16State() : pipeline->getTessVertexStageIndex32State();
if (cmdEncoder->getDevice()->_pMetalFeatures->nonUniformThreadgroups) {
#if MVK_MACOS_OR_IOS
[mtlTessCtlEncoder dispatchThreads: MTLSizeMake(_indexCount, _instanceCount, 1)
threadsPerThreadgroup: MTLSizeMake(vtxState.threadExecutionWidth, 1, 1)];
#endif
} else {
[mtlTessCtlEncoder dispatchThreadgroups: MTLSizeMake(mvkCeilingDivide(_indexCount, vtxState.threadExecutionWidth), _instanceCount, 1)
threadsPerThreadgroup: MTLSizeMake(vtxState.threadExecutionWidth, 1, 1)];
}
// Mark pipeline, resources, and tess control push constants as dirty
// so I apply them during the next stage.
cmdEncoder->_graphicsPipelineState.beginMetalRenderPass();
@ -403,49 +351,56 @@ void MVKCmdDrawIndexed::encode(MVKCommandEncoder* cmdEncoder) {
cmdEncoder->_depthStencilState.markDirty();
cmdEncoder->getPushConstants(VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)->beginMetalRenderPass();
break;
case kMVKGraphicsStageTessControl:
mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationControl);
}
case kMVKGraphicsStageTessControl: {
mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl);
if (pipeline->needsTessCtlOutputBuffer()) {
tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * patchCount * _instanceCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerVertexOutputComponents);
tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * tessParams.patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerVertexOutputComponents);
[mtlTessCtlEncoder setBuffer: tcOutBuff->_mtlBuffer
offset: tcOutBuff->_offset
atIndex: pipeline->getOutputBufferIndex().stages[kMVKShaderStageTessCtl]];
}
if (pipeline->needsTessCtlPatchOutputBuffer()) {
tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(_instanceCount * patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerPatchOutputComponents);
tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(tessParams.patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerPatchOutputComponents);
[mtlTessCtlEncoder setBuffer: tcPatchOutBuff->_mtlBuffer
offset: tcPatchOutBuff->_offset
atIndex: pipeline->getTessCtlPatchOutputBufferIndex()];
}
tcLevelBuff = cmdEncoder->getTempMTLBuffer(_instanceCount * patchCount * sizeof(MTLQuadTessellationFactorsHalf));
tcLevelBuff = cmdEncoder->getTempMTLBuffer(tessParams.patchCount * sizeof(MTLQuadTessellationFactorsHalf));
[mtlTessCtlEncoder setBuffer: tcLevelBuff->_mtlBuffer
offset: tcLevelBuff->_offset
atIndex: pipeline->getTessCtlLevelBufferIndex()];
cmdEncoder->setComputeBytes(mtlTessCtlEncoder,
&inControlPointCount,
sizeof(inControlPointCount),
&tessParams,
sizeof(tessParams),
pipeline->getIndirectParamsIndex().stages[kMVKShaderStageTessCtl]);
if (pipeline->needsVertexOutputBuffer()) {
[mtlTessCtlEncoder setBuffer: vtxOutBuff->_mtlBuffer
offset: vtxOutBuff->_offset
atIndex: kMVKTessCtlInputBufferIndex];
[mtlTessCtlEncoder setStageInRegion: MTLRegionMake1D(0, _instanceCount * std::max(_indexCount, outControlPointCount * patchCount))];
}
if (outControlPointCount > inControlPointCount || _instanceCount > 1) {
[mtlTessCtlEncoder setBuffer: tcIndexBuff->_mtlBuffer
offset: tcIndexBuff->_offset
atIndex: kMVKTessCtlIndexBufferIndex];
} else {
[mtlTessCtlEncoder setBuffer: ibb.mtlBuffer
offset: idxBuffOffset
atIndex: kMVKTessCtlIndexBufferIndex];
}
[mtlTessCtlEncoder dispatchThreadgroups: MTLSizeMake(_instanceCount * patchCount, 1, 1)
threadsPerThreadgroup: MTLSizeMake(std::max(inControlPointCount, outControlPointCount), 1, 1)];
// The vertex shader produced output in the correct order, so there's no need to use
// an index buffer here.
NSUInteger sgSize = pipeline->getTessControlStageState().threadExecutionWidth;
NSUInteger wgSize = mvkLeastCommonMultiple(outControlPointCount, sgSize);
while (wgSize > cmdEncoder->getDevice()->_pProperties->limits.maxComputeWorkGroupSize[0]) {
sgSize >>= 1;
wgSize = mvkLeastCommonMultiple(outControlPointCount, sgSize);
}
if (cmdEncoder->getDevice()->_pMetalFeatures->nonUniformThreadgroups) {
#if MVK_MACOS_OR_IOS
[mtlTessCtlEncoder dispatchThreads: MTLSizeMake(tessParams.patchCount * outControlPointCount, 1, 1)
threadsPerThreadgroup: MTLSizeMake(wgSize, 1, 1)];
#endif
} else {
[mtlTessCtlEncoder dispatchThreadgroups: MTLSizeMake(mvkCeilingDivide(tessParams.patchCount * outControlPointCount, wgSize), 1, 1)
threadsPerThreadgroup: MTLSizeMake(wgSize, 1, 1)];
}
// Running this stage prematurely ended the render pass, so we have to start it up again.
// TODO: On iOS, maybe we could use a tile shader to avoid this.
cmdEncoder->beginMetalRenderPass(_loadOverride, _storeOverride);
break;
}
case kMVKGraphicsStageRasterization:
if (pipeline->isTessellationPipeline()) {
if (pipeline->needsTessCtlOutputBuffer()) {
@ -468,7 +423,7 @@ void MVKCmdDrawIndexed::encode(MVKCommandEncoder* cmdEncoder) {
// an index buffer here.
[cmdEncoder->_mtlRenderEncoder drawPatches: outControlPointCount
patchStart: 0
patchCount: _instanceCount * patchCount
patchCount: tessParams.patchCount
patchIndexBuffer: nil
patchIndexBufferOffset: 0
instanceCount: 1
@ -546,13 +501,15 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) {
// While we're at it, we can create the temporary output buffers once and reuse them
// for each draw.
const MVKMTLBufferAllocation* tcIndirectBuff = nullptr;
const MVKMTLBufferAllocation* tcParamsBuff = nullptr;
const MVKMTLBufferAllocation* vtxOutBuff = nullptr;
const MVKMTLBufferAllocation* tcOutBuff = nullptr;
const MVKMTLBufferAllocation* tcPatchOutBuff = nullptr;
const MVKMTLBufferAllocation* tcLevelBuff = nullptr;
const MVKMTLBufferAllocation* tcIndexBuff = nullptr;
uint32_t patchCount = 0, vertexCount = 0;
uint32_t inControlPointCount = 0, outControlPointCount = 0;
VkDeviceSize paramsIncr = 0;
if (pipeline->isTessellationPipeline()) {
// We can't read the indirect buffer CPU-side, since it may change between
// encoding and execution. So we don't know how big to make the buffers.
@ -562,11 +519,14 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) {
outControlPointCount = pipeline->getOutputControlPointCount();
vertexCount = kMVKDrawIndirectVertexCountUpperBound;
patchCount = mvkCeilingDivide(vertexCount, inControlPointCount);
VkDeviceSize indirectSize = (sizeof(MTLDispatchThreadgroupsIndirectArguments) + sizeof(MTLDrawPatchIndirectArguments)) * _drawCount;
VkDeviceSize indirectSize = (2 * sizeof(MTLDispatchThreadgroupsIndirectArguments) + sizeof(MTLDrawPatchIndirectArguments)) * _drawCount;
if (cmdEncoder->_pDeviceMetalFeatures->mslVersion >= 20100) {
indirectSize += sizeof(MTLStageInRegionIndirectArguments) * _drawCount;
}
paramsIncr = std::max((size_t)cmdEncoder->getDevice()->_pProperties->limits.minUniformBufferOffsetAlignment, sizeof(uint32_t) * 2);
VkDeviceSize paramsSize = paramsIncr * _drawCount;
tcIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize);
tcParamsBuff = cmdEncoder->getTempMTLBuffer(paramsSize);
if (pipeline->needsVertexOutputBuffer()) {
vtxOutBuff = cmdEncoder->getTempMTLBuffer(vertexCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents);
}
@ -577,19 +537,6 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) {
tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerPatchOutputComponents);
}
tcLevelBuff = cmdEncoder->getTempMTLBuffer(patchCount * sizeof(MTLQuadTessellationFactorsHalf));
if (outControlPointCount > inControlPointCount) {
// In this case, we use an index buffer to avoid stepping over some of the input points.
tcIndexBuff = cmdEncoder->getTempMTLBuffer(patchCount * outControlPointCount * 4);
auto* indices = (uint32_t*)tcIndexBuff->getContents();
uint32_t index = 0;
for (uint32_t i = 0; i < tcIndexBuff->_length / 4; i++) {
if ((i % outControlPointCount) < inControlPointCount) {
indices[i] = index++;
} else {
indices[i] = 0;
}
}
}
}
MVKPiplineStages stages;
@ -597,16 +544,24 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) {
VkDeviceSize mtlIndBuffOfst = _mtlIndirectBufferOffset;
VkDeviceSize mtlTCIndBuffOfst = tcIndirectBuff ? tcIndirectBuff->_offset : 0;
VkDeviceSize mtlParmBuffOfst = tcParamsBuff ? tcParamsBuff->_offset : 0;
NSUInteger vtxThreadExecWidth = pipeline->getTessVertexStageState().threadExecutionWidth;
NSUInteger sgSize = pipeline->getTessControlStageState().threadExecutionWidth;
NSUInteger tcWorkgroupSize = mvkLeastCommonMultiple(outControlPointCount, sgSize);
while (tcWorkgroupSize > cmdEncoder->getDevice()->_pProperties->limits.maxComputeWorkGroupSize[0]) {
sgSize >>= 1;
tcWorkgroupSize = mvkLeastCommonMultiple(outControlPointCount, sgSize);
}
for (uint32_t drawIdx = 0; drawIdx < _drawCount; drawIdx++) {
for (uint32_t s : stages) {
auto stage = MVKGraphicsStage(s);
id<MTLComputeCommandEncoder> mtlTessCtlEncoder = nil;
if (drawIdx == 0 && stage == kMVKGraphicsStageTessControl) {
if (drawIdx == 0 && stage == kMVKGraphicsStageVertex) {
// We need the indirect buffers now. This must be done before finalizing
// draw state, or the pipeline will get overridden. This is a good time
// to do it, since it will require switching to compute anyway. Do it all
// at once to get it over with.
mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationControl);
mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl);
id<MTLComputePipelineState> mtlConvertState = cmdEncoder->getCommandEncodingPool()->getCmdDrawIndirectConvertBuffersMTLComputePipelineState(false);
[mtlTessCtlEncoder setComputePipelineState: mtlConvertState];
[mtlTessCtlEncoder setBuffer: _mtlIndirectBuffer
@ -615,46 +570,67 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) {
[mtlTessCtlEncoder setBuffer: tcIndirectBuff->_mtlBuffer
offset: tcIndirectBuff->_offset
atIndex: 1];
[mtlTessCtlEncoder setBuffer: tcParamsBuff->_mtlBuffer
offset: tcParamsBuff->_offset
atIndex: 2];
cmdEncoder->setComputeBytes(mtlTessCtlEncoder,
&_mtlIndirectBufferStride,
sizeof(_mtlIndirectBufferStride),
2);
3);
cmdEncoder->setComputeBytes(mtlTessCtlEncoder,
&inControlPointCount,
sizeof(inControlPointCount),
3);
4);
cmdEncoder->setComputeBytes(mtlTessCtlEncoder,
&outControlPointCount,
sizeof(inControlPointCount),
4);
sizeof(outControlPointCount),
5);
cmdEncoder->setComputeBytes(mtlTessCtlEncoder,
&_drawCount,
sizeof(_drawCount),
5);
[mtlTessCtlEncoder dispatchThreadgroups: MTLSizeMake(mvkCeilingDivide<NSUInteger>(_drawCount, mtlConvertState.threadExecutionWidth), 1, 1)
threadsPerThreadgroup: MTLSizeMake(mtlConvertState.threadExecutionWidth, 1, 1)];
6);
cmdEncoder->setComputeBytes(mtlTessCtlEncoder,
&vtxThreadExecWidth,
sizeof(vtxThreadExecWidth),
7);
cmdEncoder->setComputeBytes(mtlTessCtlEncoder,
&tcWorkgroupSize,
sizeof(tcWorkgroupSize),
8);
if (cmdEncoder->getDevice()->_pMetalFeatures->nonUniformThreadgroups) {
#if MVK_MACOS_OR_IOS
[mtlTessCtlEncoder dispatchThreads: MTLSizeMake(_drawCount, 1, 1)
threadsPerThreadgroup: MTLSizeMake(mtlConvertState.threadExecutionWidth, 1, 1)];
#endif
} else {
[mtlTessCtlEncoder dispatchThreadgroups: MTLSizeMake(mvkCeilingDivide<NSUInteger>(_drawCount, mtlConvertState.threadExecutionWidth), 1, 1)
threadsPerThreadgroup: MTLSizeMake(mtlConvertState.threadExecutionWidth, 1, 1)];
}
}
if (stage == kMVKGraphicsStageVertex)
cmdEncoder->_depthStencilState.markDirty();
cmdEncoder->finalizeDrawState(stage); // Ensure all updated state has been submitted to Metal
if ( !pipeline->hasValidMTLPipelineStates() ) { return; } // Abort if this pipeline stage could not be compiled.
switch (stage) {
case kMVKGraphicsStageVertex:
mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl);
if (pipeline->needsVertexOutputBuffer()) {
[cmdEncoder->_mtlRenderEncoder setVertexBuffer: vtxOutBuff->_mtlBuffer
offset: vtxOutBuff->_offset
atIndex: pipeline->getOutputBufferIndex().stages[kMVKShaderStageVertex]];
[cmdEncoder->_mtlRenderEncoder setVertexBuffer: _mtlIndirectBuffer
offset: mtlIndBuffOfst
atIndex: pipeline->getIndirectParamsIndex().stages[kMVKShaderStageVertex]];
[mtlTessCtlEncoder setBuffer: vtxOutBuff->_mtlBuffer
offset: vtxOutBuff->_offset
atIndex: pipeline->getOutputBufferIndex().stages[kMVKShaderStageVertex]];
}
[cmdEncoder->_mtlRenderEncoder drawPrimitives: MTLPrimitiveTypePoint
indirectBuffer: _mtlIndirectBuffer
indirectBufferOffset: mtlIndBuffOfst];
mtlIndBuffOfst += _mtlIndirectBufferStride;
// We must assume we can read up to the maximum number of vertices.
[mtlTessCtlEncoder setStageInRegion: MTLRegionMake2D(0, 0, vertexCount, vertexCount)];
if ([mtlTessCtlEncoder respondsToSelector: @selector(setStageInRegionWithIndirectBuffer:indirectBufferOffset:)]) {
[mtlTessCtlEncoder setStageInRegionWithIndirectBuffer: tcIndirectBuff->_mtlBuffer
indirectBufferOffset: mtlTCIndBuffOfst];
mtlTCIndBuffOfst += sizeof(MTLStageInRegionIndirectArguments);
}
[mtlTessCtlEncoder dispatchThreadgroupsWithIndirectBuffer: tcIndirectBuff->_mtlBuffer
indirectBufferOffset: mtlTCIndBuffOfst
threadsPerThreadgroup: MTLSizeMake(vtxThreadExecWidth, 1, 1)];
mtlTCIndBuffOfst += sizeof(MTLDispatchThreadgroupsIndirectArguments);
// Mark pipeline, resources, and tess control push constants as dirty
// so I apply them during the next stage.
cmdEncoder->_graphicsPipelineState.beginMetalRenderPass();
@ -663,7 +639,7 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) {
cmdEncoder->getPushConstants(VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)->beginMetalRenderPass();
break;
case kMVKGraphicsStageTessControl:
mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationControl);
mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl);
if (pipeline->needsTessCtlOutputBuffer()) {
[mtlTessCtlEncoder setBuffer: tcOutBuff->_mtlBuffer
offset: tcOutBuff->_offset
@ -677,33 +653,18 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) {
[mtlTessCtlEncoder setBuffer: tcLevelBuff->_mtlBuffer
offset: tcLevelBuff->_offset
atIndex: pipeline->getTessCtlLevelBufferIndex()];
cmdEncoder->setComputeBytes(mtlTessCtlEncoder,
&inControlPointCount,
sizeof(inControlPointCount),
pipeline->getIndirectParamsIndex().stages[kMVKShaderStageTessCtl]);
[mtlTessCtlEncoder setBuffer: tcParamsBuff->_mtlBuffer
offset: mtlParmBuffOfst
atIndex: pipeline->getIndirectParamsIndex().stages[kMVKShaderStageTessCtl]];
mtlParmBuffOfst += paramsIncr;
if (pipeline->needsVertexOutputBuffer()) {
[mtlTessCtlEncoder setBuffer: vtxOutBuff->_mtlBuffer
offset: vtxOutBuff->_offset
atIndex: kMVKTessCtlInputBufferIndex];
if ([mtlTessCtlEncoder respondsToSelector: @selector(setStageInRegionWithIndirectBuffer:indirectBufferOffset:)]) {
// setStageInRegionWithIndirectBuffer appears to be broken. We have a 1D linear region anyway, so size is irrelevant
//[mtlTessCtlEncoder setStageInRegionWithIndirectBuffer: tcIndirectBuff->_mtlBuffer
// indirectBufferOffset: mtlTCIndBuffOfst];
[mtlTessCtlEncoder setStageInRegion: MTLRegionMake1D(0, std::max(inControlPointCount, outControlPointCount) * patchCount)];
mtlTCIndBuffOfst += sizeof(MTLStageInRegionIndirectArguments);
} else {
// We must assume we can read up to the maximum number of vertices.
[mtlTessCtlEncoder setStageInRegion: MTLRegionMake1D(0, std::max(inControlPointCount, outControlPointCount) * patchCount)];
}
}
if (outControlPointCount > inControlPointCount) {
[mtlTessCtlEncoder setBuffer: tcIndexBuff->_mtlBuffer
offset: tcIndexBuff->_offset
atIndex: kMVKTessCtlIndexBufferIndex];
}
[mtlTessCtlEncoder dispatchThreadgroupsWithIndirectBuffer: tcIndirectBuff->_mtlBuffer
indirectBufferOffset: mtlTCIndBuffOfst
threadsPerThreadgroup: MTLSizeMake(std::max(inControlPointCount, outControlPointCount), 1, 1)];
threadsPerThreadgroup: MTLSizeMake(tcWorkgroupSize, 1, 1)];
mtlTCIndBuffOfst += sizeof(MTLDispatchThreadgroupsIndirectArguments);
// Running this stage prematurely ended the render pass, so we have to start it up again.
// TODO: On iOS, maybe we could use a tile shader to avoid this.
@ -742,7 +703,7 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) {
// so I apply them during the next stage.
cmdEncoder->_graphicsPipelineState.beginMetalRenderPass();
cmdEncoder->_graphicsResourcesState.beginMetalRenderPass();
cmdEncoder->getPushConstants(VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)->beginMetalRenderPass();
cmdEncoder->getPushConstants(VK_SHADER_STAGE_VERTEX_BIT)->beginMetalRenderPass();
} else {
[cmdEncoder->_mtlRenderEncoder drawPrimitives: cmdEncoder->_mtlPrimitiveType
indirectBuffer: _mtlIndirectBuffer
@ -788,20 +749,22 @@ VkResult MVKCmdDrawIndexedIndirect::setContent(MVKCommandBuffer* cmdBuff,
void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder) {
MVKIndexMTLBufferBinding& ibb = cmdEncoder->_graphicsResourcesState._mtlIndexBufferBinding;
size_t idxSize = mvkMTLIndexTypeSizeInBytes((MTLIndexType)ibb.mtlIndexType);
auto* pipeline = (MVKGraphicsPipeline*)cmdEncoder->_graphicsPipelineState.getPipeline();
// The indirect calls for dispatchThreadgroups:... and drawPatches:... have different formats.
// We have to convert from the drawIndexedPrimitives:... format to them.
// While we're at it, we can create the temporary output buffers once and reuse them
// for each draw.
const MVKMTLBufferAllocation* tcIndirectBuff = nullptr;
const MVKMTLBufferAllocation* tcParamsBuff = nullptr;
const MVKMTLBufferAllocation* vtxOutBuff = nullptr;
const MVKMTLBufferAllocation* tcOutBuff = nullptr;
const MVKMTLBufferAllocation* tcPatchOutBuff = nullptr;
const MVKMTLBufferAllocation* tcLevelBuff = nullptr;
const MVKMTLBufferAllocation* tcIndexBuff = nullptr;
const MVKMTLBufferAllocation* vtxIndexBuff = nullptr;
uint32_t patchCount = 0, vertexCount = 0;
uint32_t inControlPointCount = 0, outControlPointCount = 0;
VkDeviceSize paramsIncr = 0;
if (pipeline->isTessellationPipeline()) {
// We can't read the indirect buffer CPU-side, since it may change between
// encoding and execution. So we don't know how big to make the buffers.
@ -815,7 +778,10 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder) {
if (cmdEncoder->_pDeviceMetalFeatures->mslVersion >= 20100) {
indirectSize += sizeof(MTLStageInRegionIndirectArguments) * _drawCount;
}
paramsIncr = std::max((size_t)cmdEncoder->getDevice()->_pProperties->limits.minUniformBufferOffsetAlignment, sizeof(uint32_t) * 2);
VkDeviceSize paramsSize = paramsIncr * _drawCount;
tcIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize);
tcParamsBuff = cmdEncoder->getTempMTLBuffer(paramsSize);
if (pipeline->needsVertexOutputBuffer()) {
vtxOutBuff = cmdEncoder->getTempMTLBuffer(vertexCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents);
}
@ -826,7 +792,7 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder) {
tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerPatchOutputComponents);
}
tcLevelBuff = cmdEncoder->getTempMTLBuffer(patchCount * sizeof(MTLQuadTessellationFactorsHalf));
tcIndexBuff = cmdEncoder->getTempMTLBuffer(patchCount * outControlPointCount * idxSize);
vtxIndexBuff = cmdEncoder->getTempMTLBuffer(ibb.mtlBuffer.length);
}
MVKPiplineStages stages;
@ -834,12 +800,21 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder) {
VkDeviceSize mtlIndBuffOfst = _mtlIndirectBufferOffset;
VkDeviceSize mtlTCIndBuffOfst = tcIndirectBuff ? tcIndirectBuff->_offset : 0;
VkDeviceSize mtlParmBuffOfst = tcParamsBuff ? tcParamsBuff->_offset : 0;
id<MTLComputePipelineState> vtxState = ibb.mtlIndexType == MTLIndexTypeUInt16 ? pipeline->getTessVertexStageIndex16State() : pipeline->getTessVertexStageIndex32State();
NSUInteger vtxThreadExecWidth = vtxState.threadExecutionWidth;
NSUInteger sgSize = pipeline->getTessControlStageState().threadExecutionWidth;
NSUInteger tcWorkgroupSize = mvkLeastCommonMultiple(outControlPointCount, sgSize);
while (tcWorkgroupSize > cmdEncoder->getDevice()->_pProperties->limits.maxComputeWorkGroupSize[0]) {
sgSize >>= 1;
tcWorkgroupSize = mvkLeastCommonMultiple(outControlPointCount, sgSize);
}
for (uint32_t drawIdx = 0; drawIdx < _drawCount; drawIdx++) {
for (uint32_t s : stages) {
auto stage = MVKGraphicsStage(s);
id<MTLComputeCommandEncoder> mtlTessCtlEncoder = nil;
if (stage == kMVKGraphicsStageTessControl) {
mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationControl);
if (stage == kMVKGraphicsStageVertex) {
mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl);
// We need the indirect buffers now. This must be done before finalizing
// draw state, or the pipeline will get overridden. This is a good time
// to do it, since it will require switching to compute anyway. Do it all
@ -853,27 +828,37 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder) {
[mtlTessCtlEncoder setBuffer: tcIndirectBuff->_mtlBuffer
offset: tcIndirectBuff->_offset
atIndex: 1];
[mtlTessCtlEncoder setBuffer: tcParamsBuff->_mtlBuffer
offset: tcParamsBuff->_offset
atIndex: 2];
cmdEncoder->setComputeBytes(mtlTessCtlEncoder,
&_mtlIndirectBufferStride,
sizeof(_mtlIndirectBufferStride),
2);
3);
cmdEncoder->setComputeBytes(mtlTessCtlEncoder,
&inControlPointCount,
sizeof(inControlPointCount),
3);
4);
cmdEncoder->setComputeBytes(mtlTessCtlEncoder,
&outControlPointCount,
sizeof(inControlPointCount),
4);
5);
cmdEncoder->setComputeBytes(mtlTessCtlEncoder,
&_drawCount,
sizeof(_drawCount),
5);
6);
cmdEncoder->setComputeBytes(mtlTessCtlEncoder,
&vtxThreadExecWidth,
sizeof(vtxThreadExecWidth),
7);
cmdEncoder->setComputeBytes(mtlTessCtlEncoder,
&tcWorkgroupSize,
sizeof(tcWorkgroupSize),
8);
[mtlTessCtlEncoder dispatchThreadgroups: MTLSizeMake(mvkCeilingDivide<NSUInteger>(_drawCount, mtlConvertState.threadExecutionWidth), 1, 1)
threadsPerThreadgroup: MTLSizeMake(mtlConvertState.threadExecutionWidth, 1, 1)];
}
// We actually need to make a copy of the index buffer, regardless of whether
// or not there are gaps in it, because there's no way to tell Metal to
// We actually need to make a copy of the index buffer, because there's no way to tell Metal to
// offset an index buffer from a value in an indirect buffer. This also
// means that, to make a copy, we have to use a compute shader.
id<MTLComputePipelineState> mtlCopyIndexState = cmdEncoder->getCommandEncodingPool()->getCmdDrawIndexedCopyIndexBufferMTLComputePipelineState((MTLIndexType)ibb.mtlIndexType);
@ -881,46 +866,43 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder) {
[mtlTessCtlEncoder setBuffer: ibb.mtlBuffer
offset: ibb.offset
atIndex: 0];
[mtlTessCtlEncoder setBuffer: tcIndexBuff->_mtlBuffer
offset: tcIndexBuff->_offset
[mtlTessCtlEncoder setBuffer: vtxIndexBuff->_mtlBuffer
offset: vtxIndexBuff->_offset
atIndex: 1];
cmdEncoder->setComputeBytes(mtlTessCtlEncoder,
&inControlPointCount,
sizeof(inControlPointCount),
2);
cmdEncoder->setComputeBytes(mtlTessCtlEncoder,
&outControlPointCount,
sizeof(outControlPointCount),
3);
[mtlTessCtlEncoder setBuffer: tcIndirectBuff->_mtlBuffer
offset: mtlTCIndBuffOfst
atIndex: 4];
[mtlTessCtlEncoder dispatchThreadgroups: MTLSizeMake(1, 1, 1) threadsPerThreadgroup: MTLSizeMake(1, 1, 1)];
[mtlTessCtlEncoder setBuffer: _mtlIndirectBuffer
offset: mtlIndBuffOfst
atIndex: 2];
[mtlTessCtlEncoder dispatchThreadgroupsWithIndirectBuffer: tcIndirectBuff->_mtlBuffer
indirectBufferOffset: mtlTCIndBuffOfst
threadsPerThreadgroup: MTLSizeMake(vtxThreadExecWidth, 1, 1)];
mtlIndBuffOfst += sizeof(MTLDrawIndexedPrimitivesIndirectArguments);
}
if (stage == kMVKGraphicsStageVertex)
cmdEncoder->_depthStencilState.markDirty();
cmdEncoder->finalizeDrawState(stage); // Ensure all updated state has been submitted to Metal
if ( !pipeline->hasValidMTLPipelineStates() ) { return; } // Abort if this pipeline stage could not be compiled.
switch (stage) {
case kMVKGraphicsStageVertex:
mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl);
if (pipeline->needsVertexOutputBuffer()) {
[cmdEncoder->_mtlRenderEncoder setVertexBuffer: vtxOutBuff->_mtlBuffer
offset: vtxOutBuff->_offset
atIndex: pipeline->getOutputBufferIndex().stages[kMVKShaderStageVertex]];
[cmdEncoder->_mtlRenderEncoder setVertexBuffer: _mtlIndirectBuffer
offset: mtlIndBuffOfst
atIndex: pipeline->getIndirectParamsIndex().stages[kMVKShaderStageVertex]];
[mtlTessCtlEncoder setBuffer: vtxOutBuff->_mtlBuffer
offset: vtxOutBuff->_offset
atIndex: pipeline->getOutputBufferIndex().stages[kMVKShaderStageVertex]];
}
[cmdEncoder->_mtlRenderEncoder drawIndexedPrimitives: MTLPrimitiveTypePoint
indexType: (MTLIndexType)ibb.mtlIndexType
indexBuffer: ibb.mtlBuffer
indexBufferOffset: ibb.offset
indirectBuffer: _mtlIndirectBuffer
indirectBufferOffset: mtlIndBuffOfst];
mtlIndBuffOfst += _mtlIndirectBufferStride;
[mtlTessCtlEncoder setBuffer: vtxIndexBuff->_mtlBuffer
offset: vtxIndexBuff->_offset
atIndex: pipeline->getIndirectParamsIndex().stages[kMVKShaderStageVertex]];
[mtlTessCtlEncoder setStageInRegion: MTLRegionMake2D(0, 0, vertexCount, vertexCount)];
if ([mtlTessCtlEncoder respondsToSelector: @selector(setStageInRegionWithIndirectBuffer:indirectBufferOffset:)]) {
[mtlTessCtlEncoder setStageInRegionWithIndirectBuffer: tcIndirectBuff->_mtlBuffer
indirectBufferOffset: mtlTCIndBuffOfst];
mtlTCIndBuffOfst += sizeof(MTLStageInRegionIndirectArguments);
}
[mtlTessCtlEncoder dispatchThreadgroupsWithIndirectBuffer: tcIndirectBuff->_mtlBuffer
indirectBufferOffset: mtlTCIndBuffOfst
threadsPerThreadgroup: MTLSizeMake(vtxThreadExecWidth, 1, 1)];
mtlTCIndBuffOfst += sizeof(MTLDispatchThreadgroupsIndirectArguments);
// Mark pipeline, resources, and tess control push constants as dirty
// so I apply them during the next stage.
cmdEncoder->_graphicsPipelineState.beginMetalRenderPass();
@ -929,7 +911,7 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder) {
cmdEncoder->getPushConstants(VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)->beginMetalRenderPass();
break;
case kMVKGraphicsStageTessControl:
mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationControl);
mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl);
if (pipeline->needsTessCtlOutputBuffer()) {
[mtlTessCtlEncoder setBuffer: tcOutBuff->_mtlBuffer
offset: tcOutBuff->_offset
@ -943,31 +925,18 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder) {
[mtlTessCtlEncoder setBuffer: tcLevelBuff->_mtlBuffer
offset: tcLevelBuff->_offset
atIndex: pipeline->getTessCtlLevelBufferIndex()];
cmdEncoder->setComputeBytes(mtlTessCtlEncoder,
&inControlPointCount,
sizeof(inControlPointCount),
pipeline->getIndirectParamsIndex().stages[kMVKShaderStageTessCtl]);
[mtlTessCtlEncoder setBuffer: tcParamsBuff->_mtlBuffer
offset: mtlParmBuffOfst
atIndex: pipeline->getIndirectParamsIndex().stages[kMVKShaderStageTessCtl]];
mtlParmBuffOfst += paramsIncr;
if (pipeline->needsVertexOutputBuffer()) {
[mtlTessCtlEncoder setBuffer: vtxOutBuff->_mtlBuffer
offset: vtxOutBuff->_offset
atIndex: kMVKTessCtlInputBufferIndex];
if ([mtlTessCtlEncoder respondsToSelector: @selector(setStageInRegionWithIndirectBuffer:indirectBufferOffset:)]) {
// setStageInRegionWithIndirectBuffer appears to be broken. We have a 1D linear region anyway, so size is irrelevant
//[mtlTessCtlEncoder setStageInRegionWithIndirectBuffer: tcIndirectBuff->_mtlBuffer
// indirectBufferOffset: mtlTCIndBuffOfst];
[mtlTessCtlEncoder setStageInRegion: MTLRegionMake1D(0, std::max(inControlPointCount, outControlPointCount) * patchCount)];
mtlTCIndBuffOfst += sizeof(MTLStageInRegionIndirectArguments);
} else {
// We must assume we can read up to the maximum number of vertices.
[mtlTessCtlEncoder setStageInRegion: MTLRegionMake1D(0, std::max(inControlPointCount, outControlPointCount) * patchCount)];
}
}
[mtlTessCtlEncoder setBuffer: tcIndexBuff->_mtlBuffer
offset: tcIndexBuff->_offset
atIndex: kMVKTessCtlIndexBufferIndex];
[mtlTessCtlEncoder dispatchThreadgroupsWithIndirectBuffer: tcIndirectBuff->_mtlBuffer
indirectBufferOffset: mtlTCIndBuffOfst
threadsPerThreadgroup: MTLSizeMake(std::max(inControlPointCount, outControlPointCount), 1, 1)];
threadsPerThreadgroup: MTLSizeMake(tcWorkgroupSize, 1, 1)];
mtlTCIndBuffOfst += sizeof(MTLDispatchThreadgroupsIndirectArguments);
// Running this stage prematurely ended the render pass, so we have to start it up again.
// TODO: On iOS, maybe we could use a tile shader to avoid this.

View File

@ -678,7 +678,7 @@ NSString* mvkMTLComputeCommandEncoderLabel(MVKCommandUse cmdUse) {
case kMVKCommandUseCopyBufferToImage: return @"vkCmdCopyBufferToImage ComputeEncoder";
case kMVKCommandUseCopyImageToBuffer: return @"vkCmdCopyImageToBuffer ComputeEncoder";
case kMVKCommandUseFillBuffer: return @"vkCmdFillBuffer ComputeEncoder";
case kMVKCommandUseTessellationControl: return @"vkCmdDraw (tess control stage) ComputeEncoder";
case kMVKCommandUseTessellationVertexTessCtl: return @"vkCmdDraw (vertex and tess control stages) ComputeEncoder";
case kMVKCommandUseCopyQueryPoolResults:return @"vkCmdCopyQueryPoolResults ComputeEncoder";
default: return @"Unknown Use ComputeEncoder";
}

View File

@ -197,7 +197,13 @@ void MVKPushConstantsCommandEncoderState::encodeImpl(uint32_t stage) {
switch (_shaderStage) {
case VK_SHADER_STAGE_VERTEX_BIT:
if (stage == (isTessellating() ? kMVKGraphicsStageVertex : kMVKGraphicsStageRasterization)) {
if (stage == kMVKGraphicsStageVertex) {
_cmdEncoder->setComputeBytes(_cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl),
_pushConstants.data(),
_pushConstants.size(),
_mtlBufferIndex);
_isDirty = false; // Okay, I changed the encoder
} else if (!isTessellating() && stage == kMVKGraphicsStageRasterization) {
_cmdEncoder->setVertexBytes(_cmdEncoder->_mtlRenderEncoder,
_pushConstants.data(),
_pushConstants.size(),
@ -207,7 +213,7 @@ void MVKPushConstantsCommandEncoderState::encodeImpl(uint32_t stage) {
break;
case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
if (stage == kMVKGraphicsStageTessControl) {
_cmdEncoder->setComputeBytes(_cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationControl),
_cmdEncoder->setComputeBytes(_cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl),
_pushConstants.data(),
_pushConstants.size(),
_mtlBufferIndex);
@ -353,11 +359,6 @@ void MVKDepthStencilCommandEncoderState::encodeImpl(uint32_t stage) {
[_cmdEncoder->_mtlRenderEncoder setDepthStencilState: cmdEncPool->getMTLDepthStencilState(adjustedDSData)];
break;
}
case kMVKGraphicsStageVertex: {
// Vertex stage of tessellation pipeline requires depth/stencil testing be disabled
[_cmdEncoder->_mtlRenderEncoder setDepthStencilState: cmdEncPool->getMTLDepthStencilState(false, false)];
break;
}
default: // Do nothing on other stages
break;
}
@ -606,7 +607,35 @@ void MVKGraphicsResourcesCommandEncoderState::encodeImpl(uint32_t stage) {
bool fullImageViewSwizzle = pipeline->fullImageViewSwizzle() || _cmdEncoder->getDevice()->_pMetalFeatures->nativeTextureSwizzle;
bool forTessellation = pipeline->isTessellationPipeline();
if (stage == (forTessellation ? kMVKGraphicsStageVertex : kMVKGraphicsStageRasterization)) {
if (stage == kMVKGraphicsStageVertex) {
encodeBindings(kMVKShaderStageVertex, "vertex", fullImageViewSwizzle,
[](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b)->void {
if (b.isInline)
cmdEncoder->setComputeBytes(cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl),
b.mtlBytes,
b.size,
b.index);
else
[cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl) setBuffer: b.mtlBuffer
offset: b.offset
atIndex: b.index];
},
[](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, const MVKArrayRef<uint32_t>& s)->void {
cmdEncoder->setComputeBytes(cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl),
s.data,
s.size * sizeof(uint32_t),
b.index);
},
[](MVKCommandEncoder* cmdEncoder, MVKMTLTextureBinding& b)->void {
[cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl) setTexture: b.mtlTexture
atIndex: b.index];
},
[](MVKCommandEncoder* cmdEncoder, MVKMTLSamplerStateBinding& b)->void {
[cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl) setSamplerState: b.mtlSamplerState
atIndex: b.index];
});
} else if (!forTessellation && stage == kMVKGraphicsStageRasterization) {
encodeBindings(kMVKShaderStageVertex, "vertex", fullImageViewSwizzle,
[pipeline](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b)->void {
if (b.isInline) {
@ -651,28 +680,28 @@ void MVKGraphicsResourcesCommandEncoderState::encodeImpl(uint32_t stage) {
encodeBindings(kMVKShaderStageTessCtl, "tessellation control", fullImageViewSwizzle,
[](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b)->void {
if (b.isInline)
cmdEncoder->setComputeBytes(cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationControl),
cmdEncoder->setComputeBytes(cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl),
b.mtlBytes,
b.size,
b.index);
else
[cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationControl) setBuffer: b.mtlBuffer
offset: b.offset
atIndex: b.index];
[cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl) setBuffer: b.mtlBuffer
offset: b.offset
atIndex: b.index];
},
[](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, const MVKArrayRef<uint32_t>& s)->void {
cmdEncoder->setComputeBytes(cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationControl),
cmdEncoder->setComputeBytes(cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl),
s.data,
s.size * sizeof(uint32_t),
b.index);
},
[](MVKCommandEncoder* cmdEncoder, MVKMTLTextureBinding& b)->void {
[cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationControl) setTexture: b.mtlTexture
atIndex: b.index];
[cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl) setTexture: b.mtlTexture
atIndex: b.index];
},
[](MVKCommandEncoder* cmdEncoder, MVKMTLSamplerStateBinding& b)->void {
[cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationControl) setSamplerState: b.mtlSamplerState
atIndex: b.index];
[cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl) setSamplerState: b.mtlSamplerState
atIndex: b.index];
});
}

View File

@ -173,96 +173,106 @@ struct MTLStageInRegionIndirectArguments {
#if __METAL_VERSION__ >= 120 \n\
kernel void cmdDrawIndirectConvertBuffers(const device char* srcBuff [[buffer(0)]], \n\
device char* destBuff [[buffer(1)]], \n\
constant uint32_t& srcStride [[buffer(2)]], \n\
constant uint32_t& inControlPointCount [[buffer(3)]], \n\
constant uint32_t& outControlPointCount [[buffer(4)]], \n\
constant uint32_t& drawCount [[buffer(5)]], \n\
device char* paramsBuff [[buffer(2)]], \n\
constant uint32_t& srcStride [[buffer(3)]], \n\
constant uint32_t& inControlPointCount [[buffer(4)]], \n\
constant uint32_t& outControlPointCount [[buffer(5)]], \n\
constant uint32_t& drawCount [[buffer(6)]], \n\
constant uint32_t& vtxThreadExecWidth [[buffer(7)]], \n\
constant uint32_t& tcWorkgroupSize [[buffer(8)]], \n\
uint idx [[thread_position_in_grid]]) { \n\
if (idx >= drawCount) { return; } \n\
const device auto& src = *reinterpret_cast<const device MTLDrawPrimitivesIndirectArguments*>(srcBuff + idx * srcStride);\n\
device char* dest = destBuff + idx * (sizeof(MTLDispatchThreadgroupsIndirectArguments) + sizeof(MTLDrawPatchIndirectArguments));\n\
device char* dest; \n\
device auto* params = reinterpret_cast<device uint32_t*>(paramsBuff + idx * 256); \n\
#if __METAL_VERSION__ >= 210 \n\
dest = destBuff + idx * (sizeof(MTLStageInRegionIndirectArguments) + sizeof(MTLDispatchThreadgroupsIndirectArguments) * 2 + sizeof(MTLDrawPatchIndirectArguments));\n\
device auto& destSI = *(device MTLStageInRegionIndirectArguments*)dest; \n\
dest += sizeof(MTLStageInRegionIndirectArguments); \n\
#else \n\
dest = destBuff + idx * (sizeof(MTLDispatchThreadgroupsIndirectArguments) * 2 + sizeof(MTLDrawPatchIndirectArguments));\n\
#endif \n\
device auto& destTC = *(device MTLDispatchThreadgroupsIndirectArguments*)dest; \n\
device auto& destTE = *(device MTLDrawPatchIndirectArguments*)(dest + sizeof(MTLDispatchThreadgroupsIndirectArguments));\n\
destTC.threadgroupsPerGrid[0] = (src.vertexCount * src.instanceCount + inControlPointCount - 1) / inControlPointCount;\n\
device auto& destVtx = *(device MTLDispatchThreadgroupsIndirectArguments*)dest; \n\
device auto& destTC = *(device MTLDispatchThreadgroupsIndirectArguments*)(dest + sizeof(MTLDispatchThreadgroupsIndirectArguments));\n\
device auto& destTE = *(device MTLDrawPatchIndirectArguments*)(dest + sizeof(MTLDispatchThreadgroupsIndirectArguments) * 2);\n\
uint32_t patchCount = (src.vertexCount * src.instanceCount + inControlPointCount - 1) / inControlPointCount;\n\
params[0] = inControlPointCount; \n\
params[1] = patchCount; \n\
destVtx.threadgroupsPerGrid[0] = (src.vertexCount + vtxThreadExecWidth - 1) / vtxThreadExecWidth; \n\
destVtx.threadgroupsPerGrid[1] = src.instanceCount; \n\
destVtx.threadgroupsPerGrid[2] = 1; \n\
destTC.threadgroupsPerGrid[0] = (patchCount * outControlPointCount + tcWorkgroupSize - 1) / tcWorkgroupSize;\n\
destTC.threadgroupsPerGrid[1] = destTC.threadgroupsPerGrid[2] = 1; \n\
destTE.patchCount = destTC.threadgroupsPerGrid[0]; \n\
destTE.patchCount = patchCount; \n\
destTE.instanceCount = 1; \n\
destTE.patchStart = destTE.baseInstance = 0; \n\
#if __METAL_VERSION__ >= 210 \n\
destSI.stageInOrigin[0] = destSI.stageInOrigin[1] = destSI.stageInOrigin[2] = 0; \n\
destSI.stageInSize[0] = src.instanceCount * max(src.vertexCount, outControlPointCount * destTE.patchCount); \n\
destSI.stageInSize[1] = destSI.stageInSize[2] = 1; \n\
destSI.stageInOrigin[0] = src.vertexStart; \n\
destSI.stageInOrigin[1] = src.baseInstance; \n\
destSI.stageInOrigin[2] = 0; \n\
destSI.stageInSize[0] = src.vertexCount; \n\
destSI.stageInSize[1] = src.instanceCount; \n\
destSI.stageInSize[2] = 1; \n\
#endif \n\
} \n\
\n\
kernel void cmdDrawIndexedIndirectConvertBuffers(const device char* srcBuff [[buffer(0)]], \n\
device char* destBuff [[buffer(1)]], \n\
constant uint32_t& srcStride [[buffer(2)]], \n\
constant uint32_t& inControlPointCount [[buffer(3)]], \n\
constant uint32_t& outControlPointCount [[buffer(4)]], \n\
constant uint32_t& drawCount [[buffer(5)]], \n\
device char* paramsBuff [[buffer(2)]], \n\
constant uint32_t& srcStride [[buffer(3)]], \n\
constant uint32_t& inControlPointCount [[buffer(4)]], \n\
constant uint32_t& outControlPointCount [[buffer(5)]], \n\
constant uint32_t& drawCount [[buffer(6)]], \n\
constant uint32_t& vtxThreadExecWidth [[buffer(7)]], \n\
constant uint32_t& tcWorkgroupSize [[buffer(8)]], \n\
uint idx [[thread_position_in_grid]]) { \n\
if (idx >= drawCount) { return; } \n\
const device auto& src = *reinterpret_cast<const device MTLDrawIndexedPrimitivesIndirectArguments*>(srcBuff + idx * srcStride);\n\
device char* dest = destBuff + idx * (sizeof(MTLDispatchThreadgroupsIndirectArguments) + sizeof(MTLDrawPatchIndirectArguments));\n\
device char* dest; \n\
device auto* params = reinterpret_cast<device uint32_t*>(paramsBuff + idx * 256); \n\
#if __METAL_VERSION__ >= 210 \n\
dest = destBuff + idx * (sizeof(MTLStageInRegionIndirectArguments) + sizeof(MTLDispatchThreadgroupsIndirectArguments) * 2 + sizeof(MTLDrawPatchIndirectArguments));\n\
device auto& destSI = *(device MTLStageInRegionIndirectArguments*)dest; \n\
dest += sizeof(MTLStageInRegionIndirectArguments); \n\
#else \n\
dest = destBuff + idx * (sizeof(MTLDispatchThreadgroupsIndirectArguments) * 2 + sizeof(MTLDrawPatchIndirectArguments));\n\
#endif \n\
device auto& destTC = *(device MTLDispatchThreadgroupsIndirectArguments*)dest; \n\
device auto& destTE = *(device MTLDrawPatchIndirectArguments*)(dest + sizeof(MTLDispatchThreadgroupsIndirectArguments));\n\
destTC.threadgroupsPerGrid[0] = (src.indexCount * src.instanceCount + inControlPointCount - 1) / inControlPointCount;\n\
device auto& destVtx = *(device MTLDispatchThreadgroupsIndirectArguments*)dest; \n\
device auto& destTC = *(device MTLDispatchThreadgroupsIndirectArguments*)(dest + sizeof(MTLDispatchThreadgroupsIndirectArguments));\n\
device auto& destTE = *(device MTLDrawPatchIndirectArguments*)(dest + sizeof(MTLDispatchThreadgroupsIndirectArguments) * 2);\n\
uint32_t patchCount = (src.indexCount * src.instanceCount + inControlPointCount - 1) / inControlPointCount;\n\
params[0] = inControlPointCount; \n\
params[1] = patchCount; \n\
destVtx.threadgroupsPerGrid[0] = (src.indexCount + vtxThreadExecWidth - 1) / vtxThreadExecWidth; \n\
destVtx.threadgroupsPerGrid[1] = src.instanceCount; \n\
destVtx.threadgroupsPerGrid[2] = 1; \n\
destTC.threadgroupsPerGrid[0] = (patchCount * outControlPointCount + tcWorkgroupSize - 1) / tcWorkgroupSize;\n\
destTC.threadgroupsPerGrid[1] = destTC.threadgroupsPerGrid[2] = 1; \n\
destTE.patchCount = destTC.threadgroupsPerGrid[0]; \n\
destTE.patchCount = patchCount; \n\
destTE.instanceCount = 1; \n\
destTE.patchStart = destTE.baseInstance = 0; \n\
#if __METAL_VERSION__ >= 210 \n\
destSI.stageInOrigin[0] = destSI.stageInOrigin[1] = destSI.stageInOrigin[2] = 0; \n\
destSI.stageInSize[0] = src.instanceCount * max(src.indexCount, outControlPointCount * destTE.patchCount); \n\
destSI.stageInSize[1] = destSI.stageInSize[2] = 1; \n\
destSI.stageInOrigin[0] = src.baseVertex; \n\
destSI.stageInOrigin[1] = src.baseInstance; \n\
destSI.stageInOrigin[2] = 0; \n\
destSI.stageInSize[0] = src.indexCount; \n\
destSI.stageInSize[1] = src.instanceCount; \n\
destSI.stageInSize[2] = 1; \n\
#endif \n\
} \n\
\n\
kernel void cmdDrawIndexedCopyIndex16Buffer(const device uint16_t* srcBuff [[buffer(0)]], \n\
device uint16_t* destBuff [[buffer(1)]], \n\
constant uint32_t& inControlPointCount [[buffer(2)]], \n\
constant uint32_t& outControlPointCount [[buffer(3)]], \n\
const device MTLDrawIndexedPrimitivesIndirectArguments& params [[buffer(4)]]) {\n\
uint patchCount = (params.indexCount + inControlPointCount - 1) / inControlPointCount; \n\
for (uint i = 0; i < params.instanceCount; i++) { \n\
for (uint j = 0; j < patchCount; j++) { \n\
for (uint k = 0; k < max(inControlPointCount, outControlPointCount); k++) { \n\
if (k < inControlPointCount) { \n\
destBuff[i * params.indexCount + j * outControlPointCount + k] = srcBuff[params.indexStart + j * inControlPointCount + k] + i * params.indexCount;\n\
} else { \n\
destBuff[i * params.indexCount + j * outControlPointCount + k] = 0; \n\
} \n\
} \n\
} \n\
} \n\
const device MTLDrawIndexedPrimitivesIndirectArguments& params [[buffer(2)]],\n\
uint i [[thread_position_in_grid]]) { \n\
destBuff[i] = srcBuff[params.indexStart + i]; \n\
} \n\
\n\
kernel void cmdDrawIndexedCopyIndex32Buffer(const device uint32_t* srcBuff [[buffer(0)]], \n\
device uint32_t* destBuff [[buffer(1)]], \n\
constant uint32_t& inControlPointCount [[buffer(2)]], \n\
constant uint32_t& outControlPointCount [[buffer(3)]], \n\
const device MTLDrawIndexedPrimitivesIndirectArguments& params [[buffer(4)]]) {\n\
uint patchCount = (params.indexCount + inControlPointCount - 1) / inControlPointCount; \n\
for (uint i = 0; i < params.instanceCount; i++) { \n\
for (uint j = 0; j < patchCount; j++) { \n\
for (uint k = 0; k < max(inControlPointCount, outControlPointCount); k++) { \n\
if (k < inControlPointCount) { \n\
destBuff[i * params.indexCount + j * outControlPointCount + k] = srcBuff[params.indexStart + j * inControlPointCount + k] + i * params.indexCount;\n\
} else { \n\
destBuff[i * params.indexCount + j * outControlPointCount + k] = 0; \n\
} \n\
} \n\
} \n\
} \n\
const device MTLDrawIndexedPrimitivesIndirectArguments& params [[buffer(2)]],\n\
uint i [[thread_position_in_grid]]) { \n\
destBuff[i] = srcBuff[params.indexStart + i]; \n\
} \n\
\n\
#endif \n\

View File

@ -1063,6 +1063,7 @@ void MVKPhysicalDevice::initMetalFeatures() {
if (supportsMTLFeatureSet(iOS_GPUFamily4_v1)) {
_metalFeatures.postDepthCoverage = true;
_metalFeatures.nonUniformThreadgroups = true;
}
if (supportsMTLFeatureSet(iOS_GPUFamily5_v1)) {
@ -1110,6 +1111,7 @@ void MVKPhysicalDevice::initMetalFeatures() {
_metalFeatures.arrayOfSamplers = true;
_metalFeatures.presentModeImmediate = true;
_metalFeatures.fences = true;
_metalFeatures.nonUniformThreadgroups = true;
}
if (supportsMTLFeatureSet(macOS_GPUFamily1_v4)) {

View File

@ -124,8 +124,7 @@ protected:
#pragma mark MVKPipeline
static const uint32_t kMVKTessCtlInputBufferIndex = 30;
static const uint32_t kMVKTessCtlIndexBufferIndex = 29;
static const uint32_t kMVKTessCtlNumReservedBuffers = 2;
static const uint32_t kMVKTessCtlNumReservedBuffers = 1;
static const uint32_t kMVKTessEvalInputBufferIndex = 30;
static const uint32_t kMVKTessEvalPatchInputBufferIndex = 29;
@ -228,6 +227,18 @@ public:
/** Returns the current tessellation level buffer binding for the tess. control shader. */
uint32_t getTessCtlLevelBufferIndex() { return _tessCtlLevelBufferIndex; }
/** Returns the MTLComputePipelineState object for the vertex stage of a tessellated draw with no indices. */
id<MTLComputePipelineState> getTessVertexStageState();
/** Returns the MTLComputePipelineState object for the vertex stage of a tessellated draw with 16-bit indices. */
id<MTLComputePipelineState> getTessVertexStageIndex16State();
/** Returns the MTLComputePipelineState object for the vertex stage of a tessellated draw with 32-bit indices. */
id<MTLComputePipelineState> getTessVertexStageIndex32State();
/** Returns the MTLComputePipelineState object for the tessellation control stage of a tessellated draw. */
id<MTLComputePipelineState> getTessControlStageState() { return _mtlTessControlStageState; }
/** Returns true if the vertex shader needs a buffer to store its output. */
bool needsVertexOutputBuffer() { return _needsVertexOutputBuffer; }
@ -261,17 +272,19 @@ protected:
void addVertexInputToShaderConverterContext(SPIRVToMSLConversionConfiguration& shaderContext, const VkGraphicsPipelineCreateInfo* pCreateInfo);
void addPrevStageOutputToShaderConverterContext(SPIRVToMSLConversionConfiguration& shaderContext, SPIRVShaderOutputs& outputs);
MTLRenderPipelineDescriptor* newMTLRenderPipelineDescriptor(const VkGraphicsPipelineCreateInfo* pCreateInfo, const SPIRVTessReflectionData& reflectData);
MTLRenderPipelineDescriptor* newMTLTessVertexStageDescriptor(const VkGraphicsPipelineCreateInfo* pCreateInfo, const SPIRVTessReflectionData& reflectData, SPIRVToMSLConversionConfiguration& shaderContext);
MTLComputePipelineDescriptor* newMTLTessVertexStageDescriptor(const VkGraphicsPipelineCreateInfo* pCreateInfo, const SPIRVTessReflectionData& reflectData, SPIRVToMSLConversionConfiguration& shaderContext);
MTLComputePipelineDescriptor* newMTLTessControlStageDescriptor(const VkGraphicsPipelineCreateInfo* pCreateInfo, const SPIRVTessReflectionData& reflectData, SPIRVToMSLConversionConfiguration& shaderContext);
MTLRenderPipelineDescriptor* newMTLTessRasterStageDescriptor(const VkGraphicsPipelineCreateInfo* pCreateInfo, const SPIRVTessReflectionData& reflectData, SPIRVToMSLConversionConfiguration& shaderContext);
bool addVertexShaderToPipeline(MTLRenderPipelineDescriptor* plDesc, const VkGraphicsPipelineCreateInfo* pCreateInfo, SPIRVToMSLConversionConfiguration& shaderContext);
bool addVertexShaderToPipeline(MTLComputePipelineDescriptor* plDesc, const VkGraphicsPipelineCreateInfo* pCreateInfo, SPIRVToMSLConversionConfiguration& shaderContext);
bool addTessCtlShaderToPipeline(MTLComputePipelineDescriptor* plDesc, const VkGraphicsPipelineCreateInfo* pCreateInfo, SPIRVToMSLConversionConfiguration& shaderContext, SPIRVShaderOutputs& prevOutput);
bool addTessEvalShaderToPipeline(MTLRenderPipelineDescriptor* plDesc, const VkGraphicsPipelineCreateInfo* pCreateInfo, SPIRVToMSLConversionConfiguration& shaderContext, SPIRVShaderOutputs& prevOutput);
bool addFragmentShaderToPipeline(MTLRenderPipelineDescriptor* plDesc, const VkGraphicsPipelineCreateInfo* pCreateInfo, SPIRVToMSLConversionConfiguration& shaderContext, SPIRVShaderOutputs& prevOutput);
bool addVertexInputToPipeline(MTLRenderPipelineDescriptor* plDesc, const VkPipelineVertexInputStateCreateInfo* pVI, const SPIRVToMSLConversionConfiguration& shaderContext);
template<class T>
bool addVertexInputToPipeline(T* inputDesc, const VkPipelineVertexInputStateCreateInfo* pVI, const SPIRVToMSLConversionConfiguration& shaderContext);
void addTessellationToPipeline(MTLRenderPipelineDescriptor* plDesc, const SPIRVTessReflectionData& reflectData, const VkPipelineTessellationStateCreateInfo* pTS);
void addFragmentOutputToPipeline(MTLRenderPipelineDescriptor* plDesc, const SPIRVTessReflectionData& reflectData, const VkGraphicsPipelineCreateInfo* pCreateInfo, bool isTessellationVertexPipeline = false);
bool isRenderingPoints(const VkGraphicsPipelineCreateInfo* pCreateInfo, const SPIRVTessReflectionData& reflectData);
void addFragmentOutputToPipeline(MTLRenderPipelineDescriptor* plDesc, const VkGraphicsPipelineCreateInfo* pCreateInfo);
bool isRenderingPoints(const VkGraphicsPipelineCreateInfo* pCreateInfo);
bool verifyImplicitBuffer(bool needsBuffer, MVKShaderImplicitRezBinding& index, MVKShaderStage stage, const char* name, uint32_t reservedBuffers);
uint32_t getTranslatedVertexBinding(uint32_t binding, uint32_t translationOffset, uint32_t maxBinding);
@ -288,12 +301,13 @@ protected:
MVKSmallVector<VkRect2D, kMVKCachedViewportScissorCount> _scissors;
MVKSmallVector<MVKTranslatedVertexBinding> _translatedVertexBindings;
MTLComputePipelineDescriptor* _mtlTessControlStageDesc = nil;
MTLComputePipelineDescriptor* _mtlTessVertexStageDesc = nil;
id<MTLFunction> _mtlTessVertexFunctions[3] = {nil, nil, nil};
id<MTLRenderPipelineState> _mtlTessVertexStageState = nil;
id<MTLComputePipelineState> _mtlTessVertexStageState = nil;
id<MTLComputePipelineState> _mtlTessVertexStageIndex16State = nil;
id<MTLComputePipelineState> _mtlTessVertexStageIndex32State = nil;
id<MTLComputePipelineState> _mtlTessControlStageState = nil;
id<MTLComputePipelineState> _mtlTessControlStageIndex16State = nil;
id<MTLComputePipelineState> _mtlTessControlStageIndex32State = nil;
id<MTLRenderPipelineState> _mtlPipelineState = nil;
MTLCullMode _mtlCullMode;
MTLWinding _mtlFrontWinding;
@ -316,7 +330,7 @@ protected:
bool _needsTessCtlBufferSizeBuffer = false;
bool _needsTessCtlOutputBuffer = false;
bool _needsTessCtlPatchOutputBuffer = false;
bool _needsTessCtlInput = false;
bool _needsTessCtlInputBuffer = false;
bool _needsTessEvalSwizzleBuffer = false;
bool _needsTessEvalBufferSizeBuffer = false;
bool _needsFragmentSwizzleBuffer = false;

View File

@ -189,49 +189,41 @@ void MVKGraphicsPipeline::encode(MVKCommandEncoder* cmdEncoder, uint32_t stage)
if ( !_hasValidMTLPipelineStates ) { return; }
id<MTLRenderCommandEncoder> mtlCmdEnc = cmdEncoder->_mtlRenderEncoder;
id<MTLComputeCommandEncoder> tessCtlEnc;
if ( stage != kMVKGraphicsStageTessControl && !mtlCmdEnc ) { return; } // Pre-renderpass. Come back later.
switch (stage) {
case kMVKGraphicsStageVertex:
// Stage 1 of a tessellated draw: vertex-only pipeline with rasterization disabled.
[mtlCmdEnc setRenderPipelineState: _mtlTessVertexStageState];
break;
case kMVKGraphicsStageTessControl: {
// Stage 2 of a tessellated draw: compute pipeline to run the tess. control shader.
case kMVKGraphicsStageVertex: {
// Stage 1 of a tessellated draw: compute pipeline to run the vertex shader.
// N.B. This will prematurely terminate the current subpass. We'll have to remember to start it back up again.
// Due to yet another impedance mismatch between Metal and Vulkan, which pipeline
// state we use depends on whether or not we have an index buffer, and if we do,
// the kind of indices in it. Furthermore, to avoid fetching the wrong attribute
// data when there are more output vertices than input vertices, we use an
// indexed dispatch to force each instance to fetch the correct entry.
// the kind of indices in it.
id<MTLComputePipelineState> plState;
const char* compilerType = "Tessellation control stage pipeline";
const MVKIndexMTLBufferBinding& indexBuff = cmdEncoder->_graphicsResourcesState._mtlIndexBufferBinding;
MTLComputePipelineDescriptor* plDesc = [_mtlTessControlStageDesc copy]; // temp retain a copy to be thread-safe.
if (!indexBuff.mtlBuffer && getInputControlPointCount() >= getOutputControlPointCount()) {
plState = getOrCompilePipeline(plDesc, _mtlTessControlStageState, compilerType);
if (!indexBuff.mtlBuffer) {
plState = getTessVertexStageState();
} else if (indexBuff.mtlIndexType == MTLIndexTypeUInt16) {
plDesc.stageInputDescriptor.indexType = MTLIndexTypeUInt16;
plDesc.stageInputDescriptor.layouts[kMVKTessCtlInputBufferIndex].stepFunction = MTLStepFunctionThreadPositionInGridXIndexed;
plState = getOrCompilePipeline(plDesc, _mtlTessControlStageIndex16State, compilerType);
plState = getTessVertexStageIndex16State();
} else {
plDesc.stageInputDescriptor.indexType = MTLIndexTypeUInt32;
plDesc.stageInputDescriptor.layouts[kMVKTessCtlInputBufferIndex].stepFunction = MTLStepFunctionThreadPositionInGridXIndexed;
plState = getOrCompilePipeline(plDesc, _mtlTessControlStageIndex32State, compilerType);
plState = getTessVertexStageIndex32State();
}
[plDesc release]; // temp release
if ( !_hasValidMTLPipelineStates ) { return; }
id<MTLComputeCommandEncoder> tessCtlEnc = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationControl);
tessCtlEnc = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl);
[tessCtlEnc setComputePipelineState: plState];
if (_needsTessCtlInput) {
[tessCtlEnc setThreadgroupMemoryLength: getDevice()->_pProperties->limits.maxTessellationControlPerVertexInputComponents * 4 * getInputControlPointCount() atIndex: 0];
}
break;
}
case kMVKGraphicsStageTessControl: {
// Stage 2 of a tessellated draw: compute pipeline to run the tess. control shader.
if ( !_mtlTessControlStageState ) { return; } // Abort if pipeline could not be created.
tessCtlEnc = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl);
[tessCtlEnc setComputePipelineState: _mtlTessControlStageState];
break;
}
@ -287,6 +279,46 @@ bool MVKGraphicsPipeline::supportsDynamicState(VkDynamicState state) {
}
}
static const char vtxCompilerType[] = "Vertex stage pipeline for tessellation";
id<MTLComputePipelineState> MVKGraphicsPipeline::getTessVertexStageState() {
MTLComputePipelineDescriptor* plDesc = [_mtlTessVertexStageDesc copy]; // temp retain a copy to be thread-safe.
plDesc.computeFunction = _mtlTessVertexFunctions[0];
id<MTLComputePipelineState> plState = getOrCompilePipeline(plDesc, _mtlTessVertexStageState, vtxCompilerType);
[plDesc release]; // temp release
return plState;
}
id<MTLComputePipelineState> MVKGraphicsPipeline::getTessVertexStageIndex16State() {
MTLComputePipelineDescriptor* plDesc = [_mtlTessVertexStageDesc copy]; // temp retain a copy to be thread-safe.
plDesc.computeFunction = _mtlTessVertexFunctions[1];
plDesc.stageInputDescriptor.indexType = MTLIndexTypeUInt16;
for (uint32_t i = 0; i < 31; i++) {
MTLBufferLayoutDescriptor* blDesc = plDesc.stageInputDescriptor.layouts[i];
if (blDesc.stepFunction == MTLStepFunctionThreadPositionInGridX) {
blDesc.stepFunction = MTLStepFunctionThreadPositionInGridXIndexed;
}
}
id<MTLComputePipelineState> plState = getOrCompilePipeline(plDesc, _mtlTessVertexStageIndex16State, vtxCompilerType);
[plDesc release]; // temp release
return plState;
}
id<MTLComputePipelineState> MVKGraphicsPipeline::getTessVertexStageIndex32State() {
MTLComputePipelineDescriptor* plDesc = [_mtlTessVertexStageDesc copy]; // temp retain a copy to be thread-safe.
plDesc.computeFunction = _mtlTessVertexFunctions[2];
plDesc.stageInputDescriptor.indexType = MTLIndexTypeUInt32;
for (uint32_t i = 0; i < 31; i++) {
MTLBufferLayoutDescriptor* blDesc = plDesc.stageInputDescriptor.layouts[i];
if (blDesc.stepFunction == MTLStepFunctionThreadPositionInGridX) {
blDesc.stepFunction = MTLStepFunctionThreadPositionInGridXIndexed;
}
}
id<MTLComputePipelineState> plState = getOrCompilePipeline(plDesc, _mtlTessVertexStageIndex32State, vtxCompilerType);
[plDesc release]; // temp release
return plState;
}
#pragma mark Construction
@ -343,7 +375,7 @@ MVKGraphicsPipeline::MVKGraphicsPipeline(MVKDevice* device,
// Topology
_mtlPrimitiveType = MTLPrimitiveTypePoint;
if (pCreateInfo->pInputAssemblyState && !isRenderingPoints(pCreateInfo, reflectData)) {
if (pCreateInfo->pInputAssemblyState && !isRenderingPoints(pCreateInfo)) {
_mtlPrimitiveType = mvkMTLPrimitiveTypeFromVkPrimitiveTopology(pCreateInfo->pInputAssemblyState->topology);
}
@ -427,11 +459,12 @@ id<MTLComputePipelineState> MVKGraphicsPipeline::getOrCompilePipeline(MTLCompute
// Constructs the underlying Metal render pipeline.
void MVKGraphicsPipeline::initMTLRenderPipelineState(const VkGraphicsPipelineCreateInfo* pCreateInfo, const SPIRVTessReflectionData& reflectData) {
_mtlTessVertexStageState = nil;
_mtlTessVertexStageIndex16State = nil;
_mtlTessVertexStageIndex32State = nil;
_mtlTessControlStageState = nil;
_mtlTessControlStageIndex16State = nil;
_mtlTessControlStageIndex32State = nil;
_mtlPipelineState = nil;
_mtlTessControlStageDesc = nil;
_mtlTessVertexStageDesc = nil;
for (uint32_t i = 0; i < 3; i++) { _mtlTessVertexFunctions[i] = nil; }
if (!isTessellationPipeline()) {
MTLRenderPipelineDescriptor* plDesc = newMTLRenderPipelineDescriptor(pCreateInfo, reflectData); // temp retain
if (plDesc) {
@ -440,20 +473,20 @@ void MVKGraphicsPipeline::initMTLRenderPipelineState(const VkGraphicsPipelineCre
[plDesc release]; // temp release
} else {
// In this case, we need to create three render pipelines. But, the way Metal handles
// index buffers for compute stage-in means we have to defer creation of stage 2 until
// index buffers for compute stage-in means we have to defer creation of stage 1 until
// draw time. In the meantime, we'll create and retain a descriptor for it.
SPIRVToMSLConversionConfiguration shaderContext;
initMVKShaderConverterContext(shaderContext, pCreateInfo, reflectData);
MTLRenderPipelineDescriptor* vtxPLDesc = newMTLTessVertexStageDescriptor(pCreateInfo, reflectData, shaderContext); // temp retain
_mtlTessControlStageDesc = newMTLTessControlStageDescriptor(pCreateInfo, reflectData, shaderContext); // retained
_mtlTessVertexStageDesc = newMTLTessVertexStageDescriptor(pCreateInfo, reflectData, shaderContext); // retained
MTLComputePipelineDescriptor* tcPLDesc = newMTLTessControlStageDescriptor(pCreateInfo, reflectData, shaderContext); // temp retained
MTLRenderPipelineDescriptor* rastPLDesc = newMTLTessRasterStageDescriptor(pCreateInfo, reflectData, shaderContext); // temp retained
if (vtxPLDesc && _mtlTessControlStageDesc && rastPLDesc) {
if (getOrCompilePipeline(vtxPLDesc, _mtlTessVertexStageState)) {
if (_mtlTessVertexStageDesc && tcPLDesc && rastPLDesc) {
if (getOrCompilePipeline(tcPLDesc, _mtlTessControlStageState, "Tessellation control")) {
getOrCompilePipeline(rastPLDesc, _mtlPipelineState);
}
}
[vtxPLDesc release]; // temp release
[tcPLDesc release]; // temp release
[rastPLDesc release]; // temp release
}
}
@ -479,13 +512,13 @@ MTLRenderPipelineDescriptor* MVKGraphicsPipeline::newMTLRenderPipelineDescriptor
// Vertex input
// This needs to happen before compiling the fragment shader, or we'll lose information on vertex attributes.
if (!addVertexInputToPipeline(plDesc, pCreateInfo->pVertexInputState, shaderContext)) { return nil; }
if (!addVertexInputToPipeline(plDesc.vertexDescriptor, pCreateInfo->pVertexInputState, shaderContext)) { return nil; }
// Fragment shader - only add if rasterization is enabled
if (!addFragmentShaderToPipeline(plDesc, pCreateInfo, shaderContext, vtxOutputs)) { return nil; }
// Output
addFragmentOutputToPipeline(plDesc, reflectData, pCreateInfo);
addFragmentOutputToPipeline(plDesc, pCreateInfo);
// Metal does not allow the name of the pipeline to be changed after it has been created,
// and we need to create the Metal pipeline immediately to provide error feedback to app.
@ -495,22 +528,27 @@ MTLRenderPipelineDescriptor* MVKGraphicsPipeline::newMTLRenderPipelineDescriptor
return plDesc;
}
// Returns a retained MTLRenderPipelineDescriptor for the vertex stage of a tessellated draw constructed from this instance, or nil if an error occurs.
// Returns a retained MTLComputePipelineDescriptor for the vertex stage of a tessellated draw constructed from this instance, or nil if an error occurs.
// It is the responsibility of the caller to release the returned descriptor.
MTLRenderPipelineDescriptor* MVKGraphicsPipeline::newMTLTessVertexStageDescriptor(const VkGraphicsPipelineCreateInfo* pCreateInfo,
MTLComputePipelineDescriptor* MVKGraphicsPipeline::newMTLTessVertexStageDescriptor(const VkGraphicsPipelineCreateInfo* pCreateInfo,
const SPIRVTessReflectionData& reflectData,
SPIRVToMSLConversionConfiguration& shaderContext) {
MTLRenderPipelineDescriptor* plDesc = [MTLRenderPipelineDescriptor new]; // retained
MTLComputePipelineDescriptor* plDesc = [MTLComputePipelineDescriptor new]; // retained
// Add shader stages.
if (!addVertexShaderToPipeline(plDesc, pCreateInfo, shaderContext)) { return nil; }
// Vertex input
if (!addVertexInputToPipeline(plDesc, pCreateInfo->pVertexInputState, shaderContext)) { return nil; }
plDesc.stageInputDescriptor = [MTLStageInputOutputDescriptor stageInputOutputDescriptor];
if (!addVertexInputToPipeline(plDesc.stageInputDescriptor, pCreateInfo->pVertexInputState, shaderContext)) { return nil; }
plDesc.stageInputDescriptor.indexBufferIndex = _indirectParamsIndex.stages[kMVKShaderStageVertex];
// Even though this won't be used for rasterization, we still have to set up the rasterization state to
// match the render pass, or Metal will complain.
addFragmentOutputToPipeline(plDesc, reflectData, pCreateInfo, true);
plDesc.threadGroupSizeIsMultipleOfThreadExecutionWidth = YES;
// Metal does not allow the name of the pipeline to be changed after it has been created,
// and we need to create the Metal pipeline immediately to provide error feedback to app.
// The best we can do at this point is set the pipeline name from the layout.
setLabelIfNotNil(plDesc, ((MVKPipelineLayout*)pCreateInfo->layout)->getDebugName());
return plDesc;
}
@ -628,25 +666,6 @@ MTLComputePipelineDescriptor* MVKGraphicsPipeline::newMTLTessControlStageDescrip
return nil;
}
// Stage input
plDesc.stageInputDescriptor = [MTLStageInputOutputDescriptor stageInputOutputDescriptor];
uint32_t offset = 0;
for (const SPIRVShaderOutput& output : vtxOutputs) {
if (output.builtin == spv::BuiltInPointSize && !reflectData.pointMode) { continue; }
offset = (uint32_t)mvkAlignByteCount(offset, sizeOfOutput(output));
if (shaderContext.isShaderInputLocationUsed(output.location)) {
plDesc.stageInputDescriptor.attributes[output.location].bufferIndex = kMVKTessCtlInputBufferIndex;
plDesc.stageInputDescriptor.attributes[output.location].format = (MTLAttributeFormat)getPixelFormats()->getMTLVertexFormat(mvkFormatFromOutput(output));
plDesc.stageInputDescriptor.attributes[output.location].offset = offset;
}
offset += sizeOfOutput(output);
}
if (vtxOutputs.size() > 0) {
plDesc.stageInputDescriptor.layouts[kMVKTessCtlInputBufferIndex].stepFunction = MTLStepFunctionThreadPositionInGridX;
plDesc.stageInputDescriptor.layouts[kMVKTessCtlInputBufferIndex].stride = mvkAlignByteCount(offset, sizeOfOutput(vtxOutputs[0]));
}
plDesc.stageInputDescriptor.indexBufferIndex = kMVKTessCtlIndexBufferIndex;
// Metal does not allow the name of the pipeline to be changed after it has been created,
// and we need to create the Metal pipeline immediately to provide error feedback to app.
// The best we can do at this point is set the pipeline name from the layout.
@ -767,7 +786,7 @@ MTLRenderPipelineDescriptor* MVKGraphicsPipeline::newMTLTessRasterStageDescripto
addTessellationToPipeline(plDesc, reflectData, pCreateInfo->pTessellationState);
// Output
addFragmentOutputToPipeline(plDesc, reflectData, pCreateInfo);
addFragmentOutputToPipeline(plDesc, pCreateInfo);
return plDesc;
}
@ -833,6 +852,62 @@ bool MVKGraphicsPipeline::addVertexShaderToPipeline(MTLRenderPipelineDescriptor*
return true;
}
// Adds a vertex shader compiled as a compute kernel to the pipeline description.
bool MVKGraphicsPipeline::addVertexShaderToPipeline(MTLComputePipelineDescriptor* plDesc,
const VkGraphicsPipelineCreateInfo* pCreateInfo,
SPIRVToMSLConversionConfiguration& shaderContext) {
uint32_t vbCnt = pCreateInfo->pVertexInputState->vertexBindingDescriptionCount;
shaderContext.options.entryPointStage = spv::ExecutionModelVertex;
shaderContext.options.entryPointName = _pVertexSS->pName;
shaderContext.options.mslOptions.swizzle_buffer_index = _swizzleBufferIndex.stages[kMVKShaderStageVertex];
shaderContext.options.mslOptions.shader_index_buffer_index = _indirectParamsIndex.stages[kMVKShaderStageVertex];
shaderContext.options.mslOptions.shader_output_buffer_index = _outputBufferIndex.stages[kMVKShaderStageVertex];
shaderContext.options.mslOptions.buffer_size_buffer_index = _bufferSizeBufferIndex.stages[kMVKShaderStageVertex];
shaderContext.options.mslOptions.capture_output_to_buffer = true;
shaderContext.options.mslOptions.vertex_for_tessellation = true;
shaderContext.options.mslOptions.disable_rasterization = true;
addVertexInputToShaderConverterContext(shaderContext, pCreateInfo);
static const CompilerMSL::Options::IndexType indexTypes[] = {
CompilerMSL::Options::IndexType::None,
CompilerMSL::Options::IndexType::UInt16,
CompilerMSL::Options::IndexType::UInt32,
};
// We need to compile this function three times, with no indexing, 16-bit indices, and 32-bit indices.
for (uint32_t i = 0; i < sizeof(indexTypes)/sizeof(indexTypes[0]); i++) {
shaderContext.options.mslOptions.vertex_index_type = indexTypes[i];
MVKMTLFunction func = ((MVKShaderModule*)_pVertexSS->module)->getMTLFunction(&shaderContext, _pVertexSS->pSpecializationInfo, _pipelineCache);
id<MTLFunction> mtlFunc = func.getMTLFunction();
if ( !mtlFunc ) {
setConfigurationResult(reportError(VK_ERROR_INVALID_SHADER_NV, "Vertex shader function could not be compiled into pipeline. See previous logged error."));
return false;
}
_mtlTessVertexFunctions[i] = [mtlFunc retain];
auto& funcRslts = func.shaderConversionResults;
_needsVertexSwizzleBuffer = funcRslts.needsSwizzleBuffer;
_needsVertexBufferSizeBuffer = funcRslts.needsBufferSizeBuffer;
_needsVertexOutputBuffer = funcRslts.needsOutputBuffer;
}
// If we need the swizzle buffer and there's no place to put it, we're in serious trouble.
if (!verifyImplicitBuffer(_needsVertexSwizzleBuffer, _swizzleBufferIndex, kMVKShaderStageVertex, "swizzle", vbCnt)) {
return false;
}
// Ditto buffer size buffer.
if (!verifyImplicitBuffer(_needsVertexBufferSizeBuffer, _bufferSizeBufferIndex, kMVKShaderStageVertex, "buffer size", vbCnt)) {
return false;
}
// Ditto captured output buffer.
if (!verifyImplicitBuffer(_needsVertexOutputBuffer, _outputBufferIndex, kMVKShaderStageVertex, "output", vbCnt)) {
return false;
}
if (!verifyImplicitBuffer(!shaderContext.shaderInputs.empty(), _indirectParamsIndex, kMVKShaderStageVertex, "index", vbCnt)) {
return false;
}
return true;
}
bool MVKGraphicsPipeline::addTessCtlShaderToPipeline(MTLComputePipelineDescriptor* plDesc,
const VkGraphicsPipelineCreateInfo* pCreateInfo,
SPIRVToMSLConversionConfiguration& shaderContext,
@ -841,11 +916,13 @@ bool MVKGraphicsPipeline::addTessCtlShaderToPipeline(MTLComputePipelineDescripto
shaderContext.options.entryPointName = _pTessCtlSS->pName;
shaderContext.options.mslOptions.swizzle_buffer_index = _swizzleBufferIndex.stages[kMVKShaderStageTessCtl];
shaderContext.options.mslOptions.indirect_params_buffer_index = _indirectParamsIndex.stages[kMVKShaderStageTessCtl];
shaderContext.options.mslOptions.shader_input_buffer_index = kMVKTessCtlInputBufferIndex;
shaderContext.options.mslOptions.shader_output_buffer_index = _outputBufferIndex.stages[kMVKShaderStageTessCtl];
shaderContext.options.mslOptions.shader_patch_output_buffer_index = _tessCtlPatchOutputBufferIndex;
shaderContext.options.mslOptions.shader_tess_factor_buffer_index = _tessCtlLevelBufferIndex;
shaderContext.options.mslOptions.buffer_size_buffer_index = _bufferSizeBufferIndex.stages[kMVKShaderStageTessCtl];
shaderContext.options.mslOptions.capture_output_to_buffer = true;
shaderContext.options.mslOptions.multi_patch_workgroup = true;
addPrevStageOutputToShaderConverterContext(shaderContext, vtxOutputs);
MVKMTLFunction func = ((MVKShaderModule*)_pTessCtlSS->module)->getMTLFunction(&shaderContext, _pTessCtlSS->pSpecializationInfo, _pipelineCache);
@ -861,7 +938,7 @@ bool MVKGraphicsPipeline::addTessCtlShaderToPipeline(MTLComputePipelineDescripto
_needsTessCtlBufferSizeBuffer = funcRslts.needsBufferSizeBuffer;
_needsTessCtlOutputBuffer = funcRslts.needsOutputBuffer;
_needsTessCtlPatchOutputBuffer = funcRslts.needsPatchOutputBuffer;
_needsTessCtlInput = funcRslts.needsInputThreadgroupMem;
_needsTessCtlInputBuffer = funcRslts.needsInputThreadgroupMem;
if (!verifyImplicitBuffer(_needsTessCtlSwizzleBuffer, _swizzleBufferIndex, kMVKShaderStageTessCtl, "swizzle", kMVKTessCtlNumReservedBuffers)) {
return false;
@ -957,7 +1034,8 @@ bool MVKGraphicsPipeline::addFragmentShaderToPipeline(MTLRenderPipelineDescripto
return true;
}
bool MVKGraphicsPipeline::addVertexInputToPipeline(MTLRenderPipelineDescriptor* plDesc,
template<class T>
bool MVKGraphicsPipeline::addVertexInputToPipeline(T* inputDesc,
const VkPipelineVertexInputStateCreateInfo* pVI,
const SPIRVToMSLConversionConfiguration& shaderContext) {
// Collect extension structures
@ -989,16 +1067,16 @@ bool MVKGraphicsPipeline::addVertexInputToPipeline(MTLRenderPipelineDescriptor*
maxBinding = max(pVKVB->binding, maxBinding);
uint32_t vbIdx = getMetalBufferIndexForVertexAttributeBinding(pVKVB->binding);
MTLVertexBufferLayoutDescriptor* vbDesc = plDesc.vertexDescriptor.layouts[vbIdx];
auto vbDesc = inputDesc.layouts[vbIdx];
if (pVKVB->stride == 0) {
// Stride can't be 0, it will be set later to attributes' maximum offset + size
// to prevent it from being larger than the underlying buffer permits.
vbDesc.stride = 0;
vbDesc.stepFunction = MTLVertexStepFunctionConstant;
vbDesc.stepFunction = (decltype(vbDesc.stepFunction))MTLStepFunctionConstant;
vbDesc.stepRate = 0;
} else {
vbDesc.stride = pVKVB->stride;
vbDesc.stepFunction = mvkMTLVertexStepFunctionFromVkVertexInputRate(pVKVB->inputRate);
vbDesc.stepFunction = (decltype(vbDesc.stepFunction))mvkMTLStepFunctionFromVkVertexInputRate(pVKVB->inputRate, isTessellationPipeline());
vbDesc.stepRate = 1;
}
}
@ -1011,11 +1089,11 @@ bool MVKGraphicsPipeline::addVertexInputToPipeline(MTLRenderPipelineDescriptor*
const VkVertexInputBindingDivisorDescriptionEXT* pVKVB = &pVertexInputDivisorState->pVertexBindingDivisors[i];
uint32_t vbIdx = getMetalBufferIndexForVertexAttributeBinding(pVKVB->binding);
if (shaderContext.isVertexBufferUsed(vbIdx)) {
MTLVertexBufferLayoutDescriptor* vbDesc = plDesc.vertexDescriptor.layouts[vbIdx];
if (vbDesc.stepFunction == MTLVertexStepFunctionPerInstance) {
if ((NSUInteger)inputDesc.layouts[vbIdx].stepFunction == MTLStepFunctionPerInstance ||
(NSUInteger)inputDesc.layouts[vbIdx].stepFunction == MTLStepFunctionThreadPositionInGridY) {
if (pVKVB->divisor == 0)
vbDesc.stepFunction = MTLVertexStepFunctionConstant;
vbDesc.stepRate = pVKVB->divisor;
inputDesc.layouts[vbIdx].stepFunction = (decltype(inputDesc.layouts[vbIdx].stepFunction))MTLStepFunctionConstant;
inputDesc.layouts[vbIdx].stepRate = pVKVB->divisor;
}
}
}
@ -1040,7 +1118,7 @@ bool MVKGraphicsPipeline::addVertexInputToPipeline(MTLRenderPipelineDescriptor*
// The step is set to constant, but we need to change stride to be non-zero for metal.
// Look for the maximum offset + size to set as the stride.
uint32_t vbIdx = getMetalBufferIndexForVertexAttributeBinding(pVKVB->binding);
MTLVertexBufferLayoutDescriptor* vbDesc = plDesc.vertexDescriptor.layouts[vbIdx];
auto vbDesc = inputDesc.layouts[vbIdx];
uint32_t strideLowBound = vaOffset + attrSize;
if (vbDesc.stride < strideLowBound) vbDesc.stride = strideLowBound;
} else if (vaOffset + attrSize > pVKVB->stride) {
@ -1060,9 +1138,9 @@ bool MVKGraphicsPipeline::addVertexInputToPipeline(MTLRenderPipelineDescriptor*
}
}
MTLVertexAttributeDescriptor* vaDesc = plDesc.vertexDescriptor.attributes[pVKVA->location];
vaDesc.format = getPixelFormats()->getMTLVertexFormat(pVKVA->format);
vaDesc.bufferIndex = getMetalBufferIndexForVertexAttributeBinding(vaBinding);
auto vaDesc = inputDesc.attributes[pVKVA->location];
vaDesc.format = (decltype(vaDesc.format))getPixelFormats()->getMTLVertexFormat(pVKVA->format);
vaDesc.bufferIndex = (decltype(vaDesc.bufferIndex))getMetalBufferIndexForVertexAttributeBinding(vaBinding);
vaDesc.offset = vaOffset;
}
}
@ -1075,13 +1153,13 @@ bool MVKGraphicsPipeline::addVertexInputToPipeline(MTLRenderPipelineDescriptor*
uint32_t vbVACnt = shaderContext.countShaderInputsAt(pVKVB->binding);
if (vbVACnt > 0) {
uint32_t vbIdx = getMetalBufferIndexForVertexAttributeBinding(pVKVB->binding);
MTLVertexBufferLayoutDescriptor* vbDesc = plDesc.vertexDescriptor.layouts[vbIdx];
auto vbDesc = inputDesc.layouts[vbIdx];
uint32_t xldtVACnt = 0;
for (auto& xltdBind : _translatedVertexBindings) {
if (xltdBind.binding == pVKVB->binding) {
uint32_t vbXltdIdx = getMetalBufferIndexForVertexAttributeBinding(xltdBind.translationBinding);
MTLVertexBufferLayoutDescriptor* vbXltdDesc = plDesc.vertexDescriptor.layouts[vbXltdIdx];
auto vbXltdDesc = inputDesc.layouts[vbXltdIdx];
vbXltdDesc.stride = vbDesc.stride;
vbXltdDesc.stepFunction = vbDesc.stepFunction;
vbXltdDesc.stepRate = vbDesc.stepRate;
@ -1097,6 +1175,13 @@ bool MVKGraphicsPipeline::addVertexInputToPipeline(MTLRenderPipelineDescriptor*
return true;
}
template bool MVKGraphicsPipeline::addVertexInputToPipeline<MTLVertexDescriptor>(MTLVertexDescriptor* inputDesc,
const VkPipelineVertexInputStateCreateInfo* pVI,
const SPIRVToMSLConversionConfiguration& shaderContext);
template bool MVKGraphicsPipeline::addVertexInputToPipeline<MTLStageInputOutputDescriptor>(MTLStageInputOutputDescriptor* inputDesc,
const VkPipelineVertexInputStateCreateInfo* pVI,
const SPIRVToMSLConversionConfiguration& shaderContext);
// Returns a translated binding for the existing binding and translation offset, creating it if needed.
uint32_t MVKGraphicsPipeline::getTranslatedVertexBinding(uint32_t binding, uint32_t translationOffset, uint32_t maxBinding) {
// See if a translated binding already exists (for example if more than one VA needs the same translation).
@ -1146,9 +1231,7 @@ void MVKGraphicsPipeline::addTessellationToPipeline(MTLRenderPipelineDescriptor*
}
void MVKGraphicsPipeline::addFragmentOutputToPipeline(MTLRenderPipelineDescriptor* plDesc,
const SPIRVTessReflectionData& reflectData,
const VkGraphicsPipelineCreateInfo* pCreateInfo,
bool isTessellationVertexPipeline) {
const VkGraphicsPipelineCreateInfo* pCreateInfo) {
// Retrieve the render subpass for which this pipeline is being constructed
MVKRenderPass* mvkRendPass = (MVKRenderPass*)pCreateInfo->renderPass;
@ -1156,7 +1239,7 @@ void MVKGraphicsPipeline::addFragmentOutputToPipeline(MTLRenderPipelineDescripto
// Topology
if (pCreateInfo->pInputAssemblyState) {
plDesc.inputPrimitiveTopologyMVK = (isTessellationVertexPipeline || isRenderingPoints(pCreateInfo, reflectData))
plDesc.inputPrimitiveTopologyMVK = isRenderingPoints(pCreateInfo)
? MTLPrimitiveTopologyClassPoint
: mvkMTLPrimitiveTopologyClassFromVkPrimitiveTopology(pCreateInfo->pInputAssemblyState->topology);
}
@ -1254,7 +1337,7 @@ void MVKGraphicsPipeline::initMVKShaderConverterContext(SPIRVToMSLConversionConf
}
shaderContext.options.mslOptions.texture_1D_as_2D = mvkTreatTexture1DAs2D();
shaderContext.options.mslOptions.enable_point_size_builtin = isRenderingPoints(pCreateInfo, reflectData);
shaderContext.options.mslOptions.enable_point_size_builtin = isRenderingPoints(pCreateInfo) || reflectData.pointMode;
shaderContext.options.mslOptions.enable_frag_depth_builtin = pixFmts->isDepthFormat(mtlDSFormat);
shaderContext.options.mslOptions.enable_frag_stencil_ref_builtin = pixFmts->isStencilFormat(mtlDSFormat);
shaderContext.options.shouldFlipVertexY = _device->_pMVKConfig->shaderConversionFlipVertexY;
@ -1324,6 +1407,8 @@ void MVKGraphicsPipeline::addPrevStageOutputToShaderConverterContext(SPIRVToMSLC
shaderContext.shaderInputs.clear();
uint32_t siCnt = (uint32_t)shaderOutputs.size();
for (uint32_t siIdx = 0; siIdx < siCnt; siIdx++) {
if (!shaderOutputs[siIdx].isUsed) { continue; }
mvk::MSLShaderInput si;
si.shaderInput.location = shaderOutputs[siIdx].location;
si.shaderInput.builtin = shaderOutputs[siIdx].builtin;
@ -1331,13 +1416,24 @@ void MVKGraphicsPipeline::addPrevStageOutputToShaderConverterContext(SPIRVToMSLC
switch (getPixelFormats()->getFormatType(mvkFormatFromOutput(shaderOutputs[siIdx]) ) ) {
case kMVKFormatColorUInt8:
si.shaderInput.format = MSL_VERTEX_FORMAT_UINT8;
si.shaderInput.format = MSL_SHADER_INPUT_FORMAT_UINT8;
break;
case kMVKFormatColorUInt16:
si.shaderInput.format = MSL_VERTEX_FORMAT_UINT16;
si.shaderInput.format = MSL_SHADER_INPUT_FORMAT_UINT16;
break;
case kMVKFormatColorHalf:
case kMVKFormatColorInt16:
si.shaderInput.format = MSL_SHADER_INPUT_FORMAT_ANY16;
break;
case kMVKFormatColorFloat:
case kMVKFormatColorInt32:
case kMVKFormatColorUInt32:
si.shaderInput.format = MSL_SHADER_INPUT_FORMAT_ANY32;
break;
default:
break;
}
@ -1347,20 +1443,21 @@ void MVKGraphicsPipeline::addPrevStageOutputToShaderConverterContext(SPIRVToMSLC
}
// We render points if either the topology or polygon fill mode dictate it
bool MVKGraphicsPipeline::isRenderingPoints(const VkGraphicsPipelineCreateInfo* pCreateInfo, const SPIRVTessReflectionData& reflectData) {
bool MVKGraphicsPipeline::isRenderingPoints(const VkGraphicsPipelineCreateInfo* pCreateInfo) {
return ((pCreateInfo->pInputAssemblyState && (pCreateInfo->pInputAssemblyState->topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST)) ||
(pCreateInfo->pRasterizationState && (pCreateInfo->pRasterizationState->polygonMode == VK_POLYGON_MODE_POINT)) ||
(reflectData.pointMode));
(pCreateInfo->pRasterizationState && (pCreateInfo->pRasterizationState->polygonMode == VK_POLYGON_MODE_POINT)));
}
MVKGraphicsPipeline::~MVKGraphicsPipeline() {
[_mtlTessControlStageDesc release];
[_mtlTessVertexStageDesc release];
[_mtlTessVertexStageState release];
[_mtlTessVertexStageIndex16State release];
[_mtlTessVertexStageIndex32State release];
[_mtlTessControlStageState release];
[_mtlTessControlStageIndex16State release];
[_mtlTessControlStageIndex32State release];
[_mtlPipelineState release];
for (id<MTLFunction> func : _mtlTessVertexFunctions) { [func release]; }
}

View File

@ -23,8 +23,11 @@
#include "MVKCommonEnvironment.h"
#include "mvk_vulkan.h"
#include <algorithm>
#include <cassert>
#include <limits>
#include <string>
#include <simd/simd.h>
#include <type_traits>
#pragma mark Math
@ -81,7 +84,7 @@ typedef enum : uint8_t {
kMVKCommandUseClearDepthStencilImage, /**< vkCmdClearDepthStencilImage. */
kMVKCommandUseResetQueryPool, /**< vkCmdResetQueryPool. */
kMVKCommandUseDispatch, /**< vkCmdDispatch. */
kMVKCommandUseTessellationControl, /**< vkCmdDraw* - tessellation control stage. */
kMVKCommandUseTessellationVertexTessCtl,/**< vkCmdDraw* - vertex and tessellation control stages. */
kMVKCommandUseCopyQueryPoolResults /**< vkCmdCopyQueryPoolResults. */
} MVKCommandUse;
@ -346,10 +349,49 @@ const T& mvkClamp(const T& val, const T& lower, const T& upper) {
}
/** Returns the result of a division, rounded up. */
template<typename T>
T mvkCeilingDivide(T numerator, T denominator) {
template<typename T, typename U>
constexpr typename std::common_type<T, U>::type mvkCeilingDivide(T numerator, U denominator) {
typedef typename std::common_type<T, U>::type R;
// Short circuit very common usecase of dividing by one.
return (denominator == 1) ? numerator : (numerator + denominator - 1) / denominator;
return (denominator == 1) ? numerator : (R(numerator) + denominator - 1) / denominator;
}
/** Returns the absolute value of a number. */
template<typename R, typename T, bool = std::is_signed<T>::value>
struct MVKAbs;
template<typename R, typename T>
struct MVKAbs<R, T, true> {
static constexpr R eval(T x) noexcept {
return x >= 0 ? x : (x == std::numeric_limits<T>::min() ? -static_cast<R>(x) : -x);
}
};
template<typename R, typename T>
struct MVKAbs<R, T, false> {
static constexpr R eval(T x) noexcept {
return x;
}
};
/** Returns the greatest common divisor of two numbers. */
template<typename T>
constexpr T mvkGreatestCommonDivisorImpl(T a, T b) {
return b == 0 ? a : mvkGreatestCommonDivisorImpl(b, a % b);
}
template<typename T, typename U>
constexpr typename std::common_type<T, U>::type mvkGreatestCommonDivisor(T a, U b) {
typedef typename std::common_type<T, U>::type R;
typedef typename std::make_unsigned<R>::type UI;
return static_cast<R>(mvkGreatestCommonDivisorImpl(static_cast<UI>(MVKAbs<R, T>::eval(a)), static_cast<UI>(MVKAbs<R, U>::eval(b))));
}
/** Returns the least common multiple of two numbers. */
template<typename T, typename U>
constexpr typename std::common_type<T, U>::type mvkLeastCommonMultiple(T a, U b) {
typedef typename std::common_type<T, U>::type R;
return (a == 0 && b == 0) ? 0 : MVKAbs<R, T>::eval(a) / mvkGreatestCommonDivisor(a, b) * MVKAbs<R, U>::eval(b);
}

View File

@ -380,6 +380,17 @@ MVK_PUBLIC_SYMBOL MTLVertexStepFunction mvkMTLVertexStepFunctionFromVkVertexInpu
}
}
MVK_PUBLIC_SYMBOL MTLStepFunction mvkMTLStepFunctionFromVkVertexInputRate(VkVertexInputRate vkVtxStep, bool forTess) {
if (!forTess) {
return (MTLStepFunction)mvkMTLVertexStepFunctionFromVkVertexInputRate(vkVtxStep);
}
switch (vkVtxStep) {
case VK_VERTEX_INPUT_RATE_VERTEX: return MTLStepFunctionThreadPositionInGridX;
case VK_VERTEX_INPUT_RATE_INSTANCE: return MTLStepFunctionThreadPositionInGridY;
default: return MTLStepFunctionThreadPositionInGridX;
}
}
#undef mvkMTLPrimitiveTypeFromVkPrimitiveTopology
MVK_PUBLIC_SYMBOL MTLPrimitiveType mvkMTLPrimitiveTypeFromVkPrimitiveTopology(VkPrimitiveTopology vkTopology) {
return mvkMTLPrimitiveTypeFromVkPrimitiveTopologyInObj(vkTopology, nullptr);

View File

@ -214,7 +214,7 @@ namespace mvk {
if (reflect.has_member_decoration(type->self, idx, spv::DecorationLocation)) {
memberLoc = reflect.get_member_decoration(type->self, idx, spv::DecorationLocation);
}
patch = reflect.has_member_decoration(type->self, idx, spv::DecorationPatch);
patch = patch || reflect.has_member_decoration(type->self, idx, spv::DecorationPatch);
if (reflect.has_member_decoration(type->self, idx, spv::DecorationBuiltIn)) {
biType = (spv::BuiltIn)reflect.get_member_decoration(type->self, idx, spv::DecorationBuiltIn);
isUsed = reflect.has_active_builtin(biType, var.storage);