Refactor public MVKDevice content into MVKDeviceTrackingMixin functions.
This is a non-functional code-maintenance change. Previously, MVKDevice contained a significant amount of publicly exposed internal content. This patch adds functions to MVKDeviceTrackingMixin to better encapsulate, consolidate & streamline access to this content. - Make MVKDeviceTrackingMixin a friend of MVKDevice & MVKPhysicalDevice. - Hide public MVKDevice content behind MVKDeviceTrackingMixin functions. - Remove similar MVKDevice content pointers from MVKCommandEncoder. - MVKDevice remove getPhysicalDevice(), getPixelFormats() & getMTLDevice(), to focus access through MVKDeviceTrackingMixin. - Move performance tracking functions to MVKDeviceTrackingMixin to remove need to reference MVKDevice multiple times when marking performance values. - Subclass MVKQueueSubmission, MVKMetalCompiler, MVKShaderLibrary, and MVKShaderLibraryCache from MVKBaseDeviceObject to make use of these changes.
This commit is contained in:
parent
0d62a427d4
commit
e1baea9a95
@ -97,7 +97,7 @@ VkResult MVKCmdDraw::setContent(MVKCommandBuffer* cmdBuff,
|
||||
_firstInstance = firstInstance;
|
||||
|
||||
// Validate
|
||||
if ((_firstInstance != 0) && !(cmdBuff->getDevice()->_pMetalFeatures->baseVertexInstanceDrawing)) {
|
||||
if ((_firstInstance != 0) && !(cmdBuff->getMetalFeatures().baseVertexInstanceDrawing)) {
|
||||
return cmdBuff->reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdDraw(): The current device does not support drawing with a non-zero base instance.");
|
||||
}
|
||||
|
||||
@ -149,6 +149,8 @@ void MVKCmdDraw::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
cmdEncoder->restartMetalRenderPassIfNeeded();
|
||||
|
||||
auto* pipeline = cmdEncoder->getGraphicsPipeline();
|
||||
auto& mtlFeats = cmdEncoder->getMetalFeatures();
|
||||
auto& dvcLimits = cmdEncoder->getDeviceProperties().limits;
|
||||
|
||||
// Metal doesn't support triangle fans, so encode it as triangles via an indexed indirect triangles command instead.
|
||||
if (pipeline->getVkPrimitiveTopology() == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN) {
|
||||
@ -187,7 +189,7 @@ void MVKCmdDraw::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
case kMVKGraphicsStageVertex: {
|
||||
mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl);
|
||||
if (pipeline->needsVertexOutputBuffer()) {
|
||||
vtxOutBuff = cmdEncoder->getTempMTLBuffer(_vertexCount * _instanceCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents, true);
|
||||
vtxOutBuff = cmdEncoder->getTempMTLBuffer(_vertexCount * _instanceCount * 4 * dvcLimits.maxVertexOutputComponents, true);
|
||||
[mtlTessCtlEncoder setBuffer: vtxOutBuff->_mtlBuffer
|
||||
offset: vtxOutBuff->_offset
|
||||
atIndex: pipeline->getOutputBufferIndex().stages[kMVKShaderStageVertex]];
|
||||
@ -197,7 +199,7 @@ void MVKCmdDraw::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
// _firstInstance * stride, since that is the expected behaviour for a divisor of 0.
|
||||
cmdEncoder->_graphicsResourcesState.offsetZeroDivisorVertexBuffers(stage, pipeline, _firstInstance);
|
||||
id<MTLComputePipelineState> vtxState = pipeline->getTessVertexStageState();
|
||||
if (cmdEncoder->getDevice()->_pMetalFeatures->nonUniformThreadgroups) {
|
||||
if (mtlFeats.nonUniformThreadgroups) {
|
||||
#if MVK_MACOS_OR_IOS
|
||||
[mtlTessCtlEncoder dispatchThreads: MTLSizeMake(_vertexCount, _instanceCount, 1)
|
||||
threadsPerThreadgroup: MTLSizeMake(vtxState.threadExecutionWidth, 1, 1)];
|
||||
@ -217,13 +219,13 @@ void MVKCmdDraw::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
case kMVKGraphicsStageTessControl: {
|
||||
mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl);
|
||||
if (pipeline->needsTessCtlOutputBuffer()) {
|
||||
tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * tessParams.patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerVertexOutputComponents, true);
|
||||
tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * tessParams.patchCount * 4 * dvcLimits.maxTessellationControlPerVertexOutputComponents, true);
|
||||
[mtlTessCtlEncoder setBuffer: tcOutBuff->_mtlBuffer
|
||||
offset: tcOutBuff->_offset
|
||||
atIndex: pipeline->getOutputBufferIndex().stages[kMVKShaderStageTessCtl]];
|
||||
}
|
||||
if (pipeline->needsTessCtlPatchOutputBuffer()) {
|
||||
tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(tessParams.patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerPatchOutputComponents, true);
|
||||
tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(tessParams.patchCount * 4 * dvcLimits.maxTessellationControlPerPatchOutputComponents, true);
|
||||
[mtlTessCtlEncoder setBuffer: tcPatchOutBuff->_mtlBuffer
|
||||
offset: tcPatchOutBuff->_offset
|
||||
atIndex: pipeline->getTessCtlPatchOutputBufferIndex()];
|
||||
@ -244,11 +246,11 @@ void MVKCmdDraw::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
|
||||
NSUInteger sgSize = pipeline->getTessControlStageState().threadExecutionWidth;
|
||||
NSUInteger wgSize = mvkLeastCommonMultiple(outControlPointCount, sgSize);
|
||||
while (wgSize > cmdEncoder->getDevice()->_pProperties->limits.maxComputeWorkGroupSize[0]) {
|
||||
while (wgSize > dvcLimits.maxComputeWorkGroupSize[0]) {
|
||||
sgSize >>= 1;
|
||||
wgSize = mvkLeastCommonMultiple(outControlPointCount, sgSize);
|
||||
}
|
||||
if (cmdEncoder->getDevice()->_pMetalFeatures->nonUniformThreadgroups) {
|
||||
if (mtlFeats.nonUniformThreadgroups) {
|
||||
#if MVK_MACOS_OR_IOS
|
||||
[mtlTessCtlEncoder dispatchThreads: MTLSizeMake(tessParams.patchCount * outControlPointCount, 1, 1)
|
||||
threadsPerThreadgroup: MTLSizeMake(wgSize, 1, 1)];
|
||||
@ -297,7 +299,7 @@ void MVKCmdDraw::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
uint32_t viewCount = subpass->isMultiview() ? subpass->getViewCountInMetalPass(cmdEncoder->getMultiviewPassIndex()) : 1;
|
||||
uint32_t instanceCount = _instanceCount * viewCount;
|
||||
cmdEncoder->_graphicsResourcesState.offsetZeroDivisorVertexBuffers(stage, pipeline, _firstInstance);
|
||||
if (cmdEncoder->_pDeviceMetalFeatures->baseVertexInstanceDrawing) {
|
||||
if (mtlFeats.baseVertexInstanceDrawing) {
|
||||
[cmdEncoder->_mtlRenderEncoder drawPrimitives: cmdEncoder->_renderingState.getPrimitiveType()
|
||||
vertexStart: _firstVertex
|
||||
vertexCount: _vertexCount
|
||||
@ -332,11 +334,11 @@ VkResult MVKCmdDrawIndexed::setContent(MVKCommandBuffer* cmdBuff,
|
||||
_firstInstance = firstInstance;
|
||||
|
||||
// Validate
|
||||
MVKDevice* mvkDvc = cmdBuff->getDevice();
|
||||
if ((_firstInstance != 0) && !(mvkDvc->_pMetalFeatures->baseVertexInstanceDrawing)) {
|
||||
auto& mtlFeats = cmdBuff->getMetalFeatures();
|
||||
if ((_firstInstance != 0) && !(mtlFeats.baseVertexInstanceDrawing)) {
|
||||
return cmdBuff->reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdDrawIndexed(): The current device does not support drawing with a non-zero base instance.");
|
||||
}
|
||||
if ((_vertexOffset != 0) && !(mvkDvc->_pMetalFeatures->baseVertexInstanceDrawing)) {
|
||||
if ((_vertexOffset != 0) && !(mtlFeats.baseVertexInstanceDrawing)) {
|
||||
return cmdBuff->reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdDrawIndexed(): The current device does not support drawing with a non-zero base vertex.");
|
||||
}
|
||||
|
||||
@ -373,6 +375,8 @@ void MVKCmdDrawIndexed::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
cmdEncoder->restartMetalRenderPassIfNeeded();
|
||||
|
||||
auto* pipeline = cmdEncoder->getGraphicsPipeline();
|
||||
auto& mtlFeats = cmdEncoder->getMetalFeatures();
|
||||
auto& dvcLimits = cmdEncoder->getDeviceProperties().limits;
|
||||
|
||||
// Metal doesn't support triangle fans, so encode it as triangles via an indexed indirect triangles command instead.
|
||||
if (pipeline->getVkPrimitiveTopology() == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN) {
|
||||
@ -414,7 +418,7 @@ void MVKCmdDrawIndexed::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
case kMVKGraphicsStageVertex: {
|
||||
mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl);
|
||||
if (pipeline->needsVertexOutputBuffer()) {
|
||||
vtxOutBuff = cmdEncoder->getTempMTLBuffer(_indexCount * _instanceCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents, true);
|
||||
vtxOutBuff = cmdEncoder->getTempMTLBuffer(_indexCount * _instanceCount * 4 * dvcLimits.maxVertexOutputComponents, true);
|
||||
[mtlTessCtlEncoder setBuffer: vtxOutBuff->_mtlBuffer
|
||||
offset: vtxOutBuff->_offset
|
||||
atIndex: pipeline->getOutputBufferIndex().stages[kMVKShaderStageVertex]];
|
||||
@ -427,7 +431,7 @@ void MVKCmdDrawIndexed::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
// _firstInstance * stride, since that is the expected behaviour for a divisor of 0.
|
||||
cmdEncoder->_graphicsResourcesState.offsetZeroDivisorVertexBuffers(stage, pipeline, _firstInstance);
|
||||
id<MTLComputePipelineState> vtxState = ibb.mtlIndexType == MTLIndexTypeUInt16 ? pipeline->getTessVertexStageIndex16State() : pipeline->getTessVertexStageIndex32State();
|
||||
if (cmdEncoder->getDevice()->_pMetalFeatures->nonUniformThreadgroups) {
|
||||
if (mtlFeats.nonUniformThreadgroups) {
|
||||
#if MVK_MACOS_OR_IOS
|
||||
[mtlTessCtlEncoder dispatchThreads: MTLSizeMake(_indexCount, _instanceCount, 1)
|
||||
threadsPerThreadgroup: MTLSizeMake(vtxState.threadExecutionWidth, 1, 1)];
|
||||
@ -447,13 +451,13 @@ void MVKCmdDrawIndexed::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
case kMVKGraphicsStageTessControl: {
|
||||
mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl);
|
||||
if (pipeline->needsTessCtlOutputBuffer()) {
|
||||
tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * tessParams.patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerVertexOutputComponents, true);
|
||||
tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * tessParams.patchCount * 4 * dvcLimits.maxTessellationControlPerVertexOutputComponents, true);
|
||||
[mtlTessCtlEncoder setBuffer: tcOutBuff->_mtlBuffer
|
||||
offset: tcOutBuff->_offset
|
||||
atIndex: pipeline->getOutputBufferIndex().stages[kMVKShaderStageTessCtl]];
|
||||
}
|
||||
if (pipeline->needsTessCtlPatchOutputBuffer()) {
|
||||
tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(tessParams.patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerPatchOutputComponents, true);
|
||||
tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(tessParams.patchCount * 4 * dvcLimits.maxTessellationControlPerPatchOutputComponents, true);
|
||||
[mtlTessCtlEncoder setBuffer: tcPatchOutBuff->_mtlBuffer
|
||||
offset: tcPatchOutBuff->_offset
|
||||
atIndex: pipeline->getTessCtlPatchOutputBufferIndex()];
|
||||
@ -475,11 +479,11 @@ void MVKCmdDrawIndexed::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
// an index buffer here.
|
||||
NSUInteger sgSize = pipeline->getTessControlStageState().threadExecutionWidth;
|
||||
NSUInteger wgSize = mvkLeastCommonMultiple(outControlPointCount, sgSize);
|
||||
while (wgSize > cmdEncoder->getDevice()->_pProperties->limits.maxComputeWorkGroupSize[0]) {
|
||||
while (wgSize > dvcLimits.maxComputeWorkGroupSize[0]) {
|
||||
sgSize >>= 1;
|
||||
wgSize = mvkLeastCommonMultiple(outControlPointCount, sgSize);
|
||||
}
|
||||
if (cmdEncoder->getDevice()->_pMetalFeatures->nonUniformThreadgroups) {
|
||||
if (mtlFeats.nonUniformThreadgroups) {
|
||||
#if MVK_MACOS_OR_IOS
|
||||
[mtlTessCtlEncoder dispatchThreads: MTLSizeMake(tessParams.patchCount * outControlPointCount, 1, 1)
|
||||
threadsPerThreadgroup: MTLSizeMake(wgSize, 1, 1)];
|
||||
@ -530,7 +534,7 @@ void MVKCmdDrawIndexed::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
uint32_t viewCount = subpass->isMultiview() ? subpass->getViewCountInMetalPass(cmdEncoder->getMultiviewPassIndex()) : 1;
|
||||
uint32_t instanceCount = _instanceCount * viewCount;
|
||||
cmdEncoder->_graphicsResourcesState.offsetZeroDivisorVertexBuffers(stage, pipeline, _firstInstance);
|
||||
if (cmdEncoder->_pDeviceMetalFeatures->baseVertexInstanceDrawing) {
|
||||
if (mtlFeats.baseVertexInstanceDrawing) {
|
||||
[cmdEncoder->_mtlRenderEncoder drawIndexedPrimitives: cmdEncoder->_renderingState.getPrimitiveType()
|
||||
indexCount: _indexCount
|
||||
indexType: (MTLIndexType)ibb.mtlIndexType
|
||||
@ -575,11 +579,11 @@ VkResult MVKCmdDrawIndirect::setContent(MVKCommandBuffer* cmdBuff,
|
||||
_drawCount = drawCount;
|
||||
|
||||
// Validate
|
||||
MVKDevice* mvkDvc = cmdBuff->getDevice();
|
||||
if ( !mvkDvc->_pMetalFeatures->indirectDrawing ) {
|
||||
auto& mtlFeats = cmdBuff->getMetalFeatures();
|
||||
if ( !mtlFeats.indirectDrawing ) {
|
||||
return cmdBuff->reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdDrawIndirect(): The current device does not support indirect drawing.");
|
||||
}
|
||||
if (cmdBuff->_lastTessellationPipeline && !mvkDvc->_pMetalFeatures->indirectTessellationDrawing) {
|
||||
if (cmdBuff->_lastTessellationPipeline && !mtlFeats.indirectTessellationDrawing) {
|
||||
return cmdBuff->reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdDrawIndirect(): The current device does not support indirect tessellated drawing.");
|
||||
}
|
||||
|
||||
@ -623,7 +627,7 @@ void MVKCmdDrawIndirect::encodeIndexedIndirect(MVKCommandEncoder* cmdEncoder) {
|
||||
[mtlConvertEncoder setBuffer: ibb.mtlBuffer
|
||||
offset: ibb.offset
|
||||
atIndex: 4];
|
||||
if (cmdEncoder->getDevice()->_pMetalFeatures->nonUniformThreadgroups) {
|
||||
if (cmdEncoder->getMetalFeatures().nonUniformThreadgroups) {
|
||||
#if MVK_MACOS_OR_IOS
|
||||
[mtlConvertEncoder dispatchThreads: MTLSizeMake(_drawCount, 1, 1)
|
||||
threadsPerThreadgroup: MTLSizeMake(mtlConvertState.threadExecutionWidth, 1, 1)];
|
||||
@ -650,6 +654,8 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
cmdEncoder->restartMetalRenderPassIfNeeded();
|
||||
|
||||
auto* pipeline = cmdEncoder->getGraphicsPipeline();
|
||||
auto& mtlFeats = cmdEncoder->getMetalFeatures();
|
||||
auto& dvcLimits = cmdEncoder->getDeviceProperties().limits;
|
||||
|
||||
// Metal doesn't support triangle fans, so encode it as indexed indirect triangles instead.
|
||||
if (pipeline->getVkPrimitiveTopology() == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN) {
|
||||
@ -691,10 +697,10 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
vertexCount = kMVKMaxDrawIndirectVertexCount;
|
||||
patchCount = mvkCeilingDivide(vertexCount, inControlPointCount);
|
||||
VkDeviceSize indirectSize = (2 * sizeof(MTLDispatchThreadgroupsIndirectArguments) + sizeof(MTLDrawPatchIndirectArguments)) * _drawCount;
|
||||
if (cmdEncoder->_pDeviceMetalFeatures->mslVersion >= 20100) {
|
||||
if (mtlFeats.mslVersion >= 20100) {
|
||||
indirectSize += sizeof(MTLStageInRegionIndirectArguments) * _drawCount;
|
||||
}
|
||||
paramsIncr = std::max((size_t)cmdEncoder->getDevice()->_pProperties->limits.minUniformBufferOffsetAlignment, sizeof(uint32_t) * 2);
|
||||
paramsIncr = std::max((size_t)dvcLimits.minUniformBufferOffsetAlignment, sizeof(uint32_t) * 2);
|
||||
VkDeviceSize paramsSize = paramsIncr * _drawCount;
|
||||
tempIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize, true);
|
||||
mtlIndBuff = tempIndirectBuff->_mtlBuffer;
|
||||
@ -702,20 +708,20 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
tcParamsBuff = cmdEncoder->getTempMTLBuffer(paramsSize, true);
|
||||
mtlParmBuffOfst = tcParamsBuff->_offset;
|
||||
if (pipeline->needsVertexOutputBuffer()) {
|
||||
vtxOutBuff = cmdEncoder->getTempMTLBuffer(vertexCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents, true);
|
||||
vtxOutBuff = cmdEncoder->getTempMTLBuffer(vertexCount * 4 * dvcLimits.maxVertexOutputComponents, true);
|
||||
}
|
||||
if (pipeline->needsTessCtlOutputBuffer()) {
|
||||
tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerVertexOutputComponents, true);
|
||||
tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * patchCount * 4 * dvcLimits.maxTessellationControlPerVertexOutputComponents, true);
|
||||
}
|
||||
if (pipeline->needsTessCtlPatchOutputBuffer()) {
|
||||
tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerPatchOutputComponents, true);
|
||||
tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(patchCount * 4 * dvcLimits.maxTessellationControlPerPatchOutputComponents, true);
|
||||
}
|
||||
tcLevelBuff = cmdEncoder->getTempMTLBuffer(patchCount * sizeof(MTLQuadTessellationFactorsHalf), true);
|
||||
|
||||
vtxThreadExecWidth = pipeline->getTessVertexStageState().threadExecutionWidth;
|
||||
NSUInteger sgSize = pipeline->getTessControlStageState().threadExecutionWidth;
|
||||
tcWorkgroupSize = mvkLeastCommonMultiple(outControlPointCount, sgSize);
|
||||
while (tcWorkgroupSize > cmdEncoder->getDevice()->_pProperties->limits.maxComputeWorkGroupSize[0]) {
|
||||
while (tcWorkgroupSize > dvcLimits.maxComputeWorkGroupSize[0]) {
|
||||
sgSize >>= 1;
|
||||
tcWorkgroupSize = mvkLeastCommonMultiple(outControlPointCount, sgSize);
|
||||
}
|
||||
@ -776,7 +782,7 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
&tcWorkgroupSize,
|
||||
sizeof(tcWorkgroupSize),
|
||||
8);
|
||||
if (cmdEncoder->getDevice()->_pMetalFeatures->nonUniformThreadgroups) {
|
||||
if (mtlFeats.nonUniformThreadgroups) {
|
||||
#if MVK_MACOS_OR_IOS
|
||||
[mtlTessCtlEncoder dispatchThreads: MTLSizeMake(_drawCount, 1, 1)
|
||||
threadsPerThreadgroup: MTLSizeMake(mtlConvertState.threadExecutionWidth, 1, 1)];
|
||||
@ -818,7 +824,7 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
&viewCount,
|
||||
sizeof(viewCount),
|
||||
4);
|
||||
if (cmdEncoder->getDevice()->_pMetalFeatures->nonUniformThreadgroups) {
|
||||
if (mtlFeats.nonUniformThreadgroups) {
|
||||
#if MVK_MACOS_OR_IOS
|
||||
[mtlConvertEncoder dispatchThreads: MTLSizeMake(_drawCount, 1, 1)
|
||||
threadsPerThreadgroup: MTLSizeMake(mtlConvertState.threadExecutionWidth, 1, 1)];
|
||||
@ -895,7 +901,7 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
break;
|
||||
case kMVKGraphicsStageRasterization:
|
||||
if (pipeline->isTessellationPipeline()) {
|
||||
if (cmdEncoder->getDevice()->_pMetalFeatures->indirectTessellationDrawing) {
|
||||
if (mtlFeats.indirectTessellationDrawing) {
|
||||
if (pipeline->needsTessCtlOutputBuffer()) {
|
||||
[cmdEncoder->_mtlRenderEncoder setVertexBuffer: tcOutBuff->_mtlBuffer
|
||||
offset: tcOutBuff->_offset
|
||||
@ -978,11 +984,11 @@ VkResult MVKCmdDrawIndexedIndirect::setContent(MVKCommandBuffer* cmdBuff,
|
||||
_directCmdFirstInstance = directCmdFirstInstance;
|
||||
|
||||
// Validate
|
||||
MVKDevice* mvkDvc = cmdBuff->getDevice();
|
||||
if ( !mvkDvc->_pMetalFeatures->indirectDrawing ) {
|
||||
auto& mtlFeats = cmdBuff->getMetalFeatures();
|
||||
if ( !mtlFeats.indirectDrawing ) {
|
||||
return cmdBuff->reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdDrawIndexedIndirect(): The current device does not support indirect drawing.");
|
||||
}
|
||||
if (cmdBuff->_lastTessellationPipeline && !mvkDvc->_pMetalFeatures->indirectTessellationDrawing) {
|
||||
if (cmdBuff->_lastTessellationPipeline && !mtlFeats.indirectTessellationDrawing) {
|
||||
return cmdBuff->reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdDrawIndexedIndirect(): The current device does not support indirect tessellated drawing.");
|
||||
}
|
||||
|
||||
@ -1001,6 +1007,8 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder, const MVKI
|
||||
MVKIndexMTLBufferBinding ibb = ibbOrig;
|
||||
MVKIndexMTLBufferBinding ibbTriFan = ibb;
|
||||
auto* pipeline = cmdEncoder->getGraphicsPipeline();
|
||||
auto& mtlFeats = cmdEncoder->getMetalFeatures();
|
||||
auto& dvcLimits = cmdEncoder->getDeviceProperties().limits;
|
||||
|
||||
MVKVertexAdjustments vtxAdjmts;
|
||||
vtxAdjmts.mtlIndexType = ibb.mtlIndexType;
|
||||
@ -1040,10 +1048,10 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder, const MVKI
|
||||
vertexCount = kMVKMaxDrawIndirectVertexCount;
|
||||
patchCount = mvkCeilingDivide(vertexCount, inControlPointCount);
|
||||
VkDeviceSize indirectSize = (sizeof(MTLDispatchThreadgroupsIndirectArguments) + sizeof(MTLDrawPatchIndirectArguments)) * _drawCount;
|
||||
if (cmdEncoder->_pDeviceMetalFeatures->mslVersion >= 20100) {
|
||||
if (mtlFeats.mslVersion >= 20100) {
|
||||
indirectSize += sizeof(MTLStageInRegionIndirectArguments) * _drawCount;
|
||||
}
|
||||
paramsIncr = std::max((size_t)cmdEncoder->getDevice()->_pProperties->limits.minUniformBufferOffsetAlignment, sizeof(uint32_t) * 2);
|
||||
paramsIncr = std::max((size_t)dvcLimits.minUniformBufferOffsetAlignment, sizeof(uint32_t) * 2);
|
||||
VkDeviceSize paramsSize = paramsIncr * _drawCount;
|
||||
tempIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize, true);
|
||||
mtlIndBuff = tempIndirectBuff->_mtlBuffer;
|
||||
@ -1051,13 +1059,13 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder, const MVKI
|
||||
tcParamsBuff = cmdEncoder->getTempMTLBuffer(paramsSize, true);
|
||||
mtlParmBuffOfst = tcParamsBuff->_offset;
|
||||
if (pipeline->needsVertexOutputBuffer()) {
|
||||
vtxOutBuff = cmdEncoder->getTempMTLBuffer(vertexCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents, true);
|
||||
vtxOutBuff = cmdEncoder->getTempMTLBuffer(vertexCount * 4 * dvcLimits.maxVertexOutputComponents, true);
|
||||
}
|
||||
if (pipeline->needsTessCtlOutputBuffer()) {
|
||||
tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerVertexOutputComponents, true);
|
||||
tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * patchCount * 4 * dvcLimits.maxTessellationControlPerVertexOutputComponents, true);
|
||||
}
|
||||
if (pipeline->needsTessCtlPatchOutputBuffer()) {
|
||||
tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerPatchOutputComponents, true);
|
||||
tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(patchCount * 4 * dvcLimits.maxTessellationControlPerPatchOutputComponents, true);
|
||||
}
|
||||
tcLevelBuff = cmdEncoder->getTempMTLBuffer(patchCount * sizeof(MTLQuadTessellationFactorsHalf), true);
|
||||
vtxIndexBuff = cmdEncoder->getTempMTLBuffer(ibb.mtlBuffer.length, true);
|
||||
@ -1068,7 +1076,7 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder, const MVKI
|
||||
|
||||
NSUInteger sgSize = pipeline->getTessControlStageState().threadExecutionWidth;
|
||||
tcWorkgroupSize = mvkLeastCommonMultiple(outControlPointCount, sgSize);
|
||||
while (tcWorkgroupSize > cmdEncoder->getDevice()->_pProperties->limits.maxComputeWorkGroupSize[0]) {
|
||||
while (tcWorkgroupSize > dvcLimits.maxComputeWorkGroupSize[0]) {
|
||||
sgSize >>= 1;
|
||||
tcWorkgroupSize = mvkLeastCommonMultiple(outControlPointCount, sgSize);
|
||||
}
|
||||
@ -1200,7 +1208,7 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder, const MVKI
|
||||
[mtlConvertEncoder setBuffer: ibbTriFan.mtlBuffer
|
||||
offset: ibbTriFan.offset
|
||||
atIndex: 7];
|
||||
if (cmdEncoder->getDevice()->_pMetalFeatures->nonUniformThreadgroups) {
|
||||
if (mtlFeats.nonUniformThreadgroups) {
|
||||
#if MVK_MACOS_OR_IOS
|
||||
[mtlConvertEncoder dispatchThreads: MTLSizeMake(_drawCount, 1, 1)
|
||||
threadsPerThreadgroup: MTLSizeMake(mtlConvertState.threadExecutionWidth, 1, 1)];
|
||||
@ -1282,7 +1290,7 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder, const MVKI
|
||||
break;
|
||||
case kMVKGraphicsStageRasterization:
|
||||
if (pipeline->isTessellationPipeline()) {
|
||||
if (cmdEncoder->getDevice()->_pMetalFeatures->indirectTessellationDrawing) {
|
||||
if (mtlFeats.indirectTessellationDrawing) {
|
||||
if (pipeline->needsTessCtlOutputBuffer()) {
|
||||
[cmdEncoder->_mtlRenderEncoder setVertexBuffer: tcOutBuff->_mtlBuffer
|
||||
offset: tcOutBuff->_offset
|
||||
|
@ -111,10 +111,12 @@ VkResult MVKCmdPipelineBarrier<N>::setContent(MVKCommandBuffer* cmdBuff,
|
||||
template <size_t N>
|
||||
void MVKCmdPipelineBarrier<N>::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
|
||||
auto& mtlFeats = cmdEncoder->getMetalFeatures();
|
||||
|
||||
#if MVK_MACOS
|
||||
// Calls below invoke MTLBlitCommandEncoder so must apply this first.
|
||||
// Check if pipeline barriers are available and we are in a renderpass.
|
||||
if (cmdEncoder->getDevice()->_pMetalFeatures->memoryBarriers && cmdEncoder->_mtlRenderEncoder) {
|
||||
if (mtlFeats.memoryBarriers && cmdEncoder->_mtlRenderEncoder) {
|
||||
for (auto& b : _barriers) {
|
||||
MTLRenderStages srcStages = mvkMTLRenderStagesFromVkPipelineStageFlags(b.srcStageMask, false);
|
||||
MTLRenderStages dstStages = mvkMTLRenderStagesFromVkPipelineStageFlags(b.dstStageMask, true);
|
||||
@ -161,7 +163,7 @@ void MVKCmdPipelineBarrier<N>::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
// into separate Metal renderpasses. Since this is a potentially expensive operation,
|
||||
// verify that at least one attachment is being used both as an input and render attachment
|
||||
// by checking for a VK_IMAGE_LAYOUT_GENERAL layout.
|
||||
if (cmdEncoder->_mtlRenderEncoder && cmdEncoder->getDevice()->_pMetalFeatures->tileBasedDeferredRendering) {
|
||||
if (cmdEncoder->_mtlRenderEncoder && mtlFeats.tileBasedDeferredRendering) {
|
||||
bool needsRenderpassRestart = false;
|
||||
for (auto& b : _barriers) {
|
||||
if (b.type == MVKPipelineBarrier::Image && b.newLayout == VK_IMAGE_LAYOUT_GENERAL) {
|
||||
@ -388,7 +390,7 @@ VkResult MVKCmdPushDescriptorSet::setContent(MVKCommandBuffer* cmdBuff,
|
||||
_pipelineLayout->retain();
|
||||
|
||||
// Add the descriptor writes
|
||||
MVKDevice* mvkDvc = cmdBuff->getDevice();
|
||||
auto& enabledExtns = cmdBuff->getEnabledExtensions();
|
||||
clearDescriptorWrites(); // Clear for reuse
|
||||
_descriptorWrites.reserve(descriptorWriteCount);
|
||||
for (uint32_t dwIdx = 0; dwIdx < descriptorWriteCount; dwIdx++) {
|
||||
@ -410,7 +412,7 @@ VkResult MVKCmdPushDescriptorSet::setContent(MVKCommandBuffer* cmdBuff,
|
||||
std::copy_n(descWrite.pTexelBufferView, descWrite.descriptorCount, pNewTexelBufferView);
|
||||
descWrite.pTexelBufferView = pNewTexelBufferView;
|
||||
}
|
||||
if (mvkDvc->_enabledExtensions.vk_EXT_inline_uniform_block.enabled) {
|
||||
if (enabledExtns.vk_EXT_inline_uniform_block.enabled) {
|
||||
const VkWriteDescriptorSetInlineUniformBlockEXT* pInlineUniformBlock = nullptr;
|
||||
for (const auto* next = (VkBaseInStructure*)descWrite.pNext; next; next = next->pNext) {
|
||||
switch (next->sType) {
|
||||
|
@ -319,7 +319,7 @@ VkResult MVKCmdBlitImage<N>::setContent(MVKCommandBuffer* cmdBuff,
|
||||
|
||||
_filter = filter;
|
||||
|
||||
bool isDestUnwritableLinear = MVK_MACOS && !cmdBuff->getDevice()->_pMetalFeatures->renderLinearTextures && _dstImage->getIsLinear();
|
||||
bool isDestUnwritableLinear = MVK_MACOS && !cmdBuff->getMetalFeatures().renderLinearTextures && _dstImage->getIsLinear();
|
||||
|
||||
_vkImageBlits.clear(); // Clear for reuse
|
||||
for (uint32_t rIdx = 0; rIdx < regionCount; rIdx++) {
|
||||
@ -350,7 +350,7 @@ VkResult MVKCmdBlitImage<N>::setContent(MVKCommandBuffer* cmdBuff,
|
||||
|
||||
_filter = pBlitImageInfo->filter;
|
||||
|
||||
bool isDestUnwritableLinear = MVK_MACOS && !cmdBuff->getDevice()->_pMetalFeatures->renderLinearTextures && _dstImage->getIsLinear();
|
||||
bool isDestUnwritableLinear = MVK_MACOS && !cmdBuff->getMetalFeatures().renderLinearTextures && _dstImage->getIsLinear();
|
||||
|
||||
_vkImageBlits.clear(); // Clear for reuse
|
||||
_vkImageBlits.reserve(pBlitImageInfo->regionCount);
|
||||
@ -457,6 +457,7 @@ void MVKCmdBlitImage<N>::populateVertices(MVKVertexPosTex* vertices, const VkIma
|
||||
template <size_t N>
|
||||
void MVKCmdBlitImage<N>::encode(MVKCommandEncoder* cmdEncoder, MVKCommandUse commandUse) {
|
||||
|
||||
auto& mtlFeats = cmdEncoder->getMetalFeatures();
|
||||
size_t vkIBCnt = _vkImageBlits.size();
|
||||
VkImageCopy vkImageCopies[vkIBCnt];
|
||||
MVKImageBlitRender mvkBlitRenders[vkIBCnt];
|
||||
@ -507,7 +508,7 @@ void MVKCmdBlitImage<N>::encode(MVKCommandEncoder* cmdEncoder, MVKCommandUse com
|
||||
id<MTLTexture> srcMTLTex = _srcImage->getMTLTexture(srcPlaneIndex);
|
||||
id<MTLTexture> dstMTLTex = _dstImage->getMTLTexture(dstPlaneIndex);
|
||||
if (blitCnt && srcMTLTex && dstMTLTex) {
|
||||
if (cmdEncoder->getDevice()->_pMetalFeatures->nativeTextureSwizzle &&
|
||||
if (mtlFeats.nativeTextureSwizzle &&
|
||||
_srcImage->needsSwizzle()) {
|
||||
// Use a view that has a swizzle on it.
|
||||
srcMTLTex = [srcMTLTex newTextureViewWithPixelFormat:srcMTLTex.pixelFormat
|
||||
@ -564,7 +565,7 @@ void MVKCmdBlitImage<N>::encode(MVKCommandEncoder* cmdEncoder, MVKCommandUse com
|
||||
blitKey.srcFilter = mvkMTLSamplerMinMagFilterFromVkFilter(_filter);
|
||||
blitKey.srcAspect = mvkIBR.region.srcSubresource.aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT);
|
||||
blitKey.dstSampleCount = mvkSampleCountFromVkSampleCountFlagBits(_dstImage->getSampleCount());
|
||||
if (!cmdEncoder->getDevice()->_pMetalFeatures->nativeTextureSwizzle &&
|
||||
if (!mtlFeats.nativeTextureSwizzle &&
|
||||
_srcImage->needsSwizzle()) {
|
||||
VkComponentMapping vkMapping = _srcImage->getPixelFormats()->getVkComponentMapping(_srcImage->getVkFormat());
|
||||
blitKey.srcSwizzleR = vkMapping.r;
|
||||
@ -581,7 +582,7 @@ void MVKCmdBlitImage<N>::encode(MVKCommandEncoder* cmdEncoder, MVKCommandUse com
|
||||
mtlDepthAttDesc.level = mvkIBR.region.dstSubresource.mipLevel;
|
||||
mtlStencilAttDesc.level = mvkIBR.region.dstSubresource.mipLevel;
|
||||
|
||||
bool isLayeredBlit = blitKey.dstSampleCount > 1 ? cmdEncoder->getDevice()->_pMetalFeatures->multisampleLayeredRendering : cmdEncoder->getDevice()->_pMetalFeatures->layeredRendering;
|
||||
bool isLayeredBlit = blitKey.dstSampleCount > 1 ? mtlFeats.multisampleLayeredRendering : mtlFeats.layeredRendering;
|
||||
|
||||
uint32_t layCnt = mvkIBR.region.srcSubresource.layerCount;
|
||||
if (_dstImage->getMTLTextureType() == MTLTextureType3D) {
|
||||
@ -761,13 +762,14 @@ inline VkResult MVKCmdResolveImage<N>::validate(MVKCommandBuffer* cmdBuff, const
|
||||
template <size_t N>
|
||||
void MVKCmdResolveImage<N>::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
|
||||
auto& mtlFeats = cmdEncoder->getMetalFeatures();
|
||||
size_t vkIRCnt = _vkImageResolves.size();
|
||||
VkImageBlit expansionRegions[vkIRCnt];
|
||||
VkImageCopy copyRegions[vkIRCnt];
|
||||
|
||||
// If we can do layered rendering to a multisample texture, I can resolve all the layers at once.
|
||||
uint32_t layerCnt = 0;
|
||||
if (cmdEncoder->getDevice()->_pMetalFeatures->multisampleLayeredRendering) {
|
||||
if (mtlFeats.multisampleLayeredRendering) {
|
||||
layerCnt = (uint32_t)_vkImageResolves.size();
|
||||
} else {
|
||||
for (VkImageResolve2& vkIR : _vkImageResolves) { layerCnt += vkIR.dstSubresource.layerCount; }
|
||||
@ -820,7 +822,7 @@ void MVKCmdResolveImage<N>::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
// direct resolve, but that of the DESTINATION if we need a temporary transfer image.
|
||||
mtlResolveSlices[sliceCnt].dstSubresource = vkIR.dstSubresource;
|
||||
mtlResolveSlices[sliceCnt].srcSubresource = needXfrImage ? vkIR.dstSubresource : vkIR.srcSubresource;
|
||||
if (cmdEncoder->getDevice()->_pMetalFeatures->multisampleLayeredRendering) {
|
||||
if (mtlFeats.multisampleLayeredRendering) {
|
||||
sliceCnt++;
|
||||
} else {
|
||||
uint32_t layCnt = vkIR.dstSubresource.layerCount;
|
||||
@ -961,7 +963,7 @@ void MVKCmdCopyBuffer<N>::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
id<MTLBuffer> dstMTLBuff = _dstBuffer->getMTLBuffer();
|
||||
NSUInteger dstMTLBuffOffset = _dstBuffer->getMTLBufferOffset();
|
||||
|
||||
VkDeviceSize buffAlign = cmdEncoder->getDevice()->_pMetalFeatures->mtlCopyBufferAlignment;
|
||||
VkDeviceSize buffAlign = cmdEncoder->getMetalFeatures().mtlCopyBufferAlignment;
|
||||
|
||||
for (const auto& cpyRgn : _bufferCopyRegions) {
|
||||
const bool useComputeCopy = buffAlign > 1 && (cpyRgn.srcOffset % buffAlign != 0 ||
|
||||
@ -1149,7 +1151,7 @@ void MVKCmdBufferImageCopy<N>::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
// If we're copying to mip level 0, we can skip the copy and just decode
|
||||
// directly into the image. Otherwise, we need to use an intermediate buffer.
|
||||
if (_toImage && _image->getIsCompressed() && mtlTexture.textureType == MTLTextureType3D &&
|
||||
!cmdEncoder->getDevice()->_pMetalFeatures->native3DCompressedTextures) {
|
||||
!cmdEncoder->getMetalFeatures().native3DCompressedTextures) {
|
||||
|
||||
MVKCmdCopyBufferToImageInfo info;
|
||||
info.srcRowStride = bytesPerRow & 0xffffffff;
|
||||
@ -1496,7 +1498,7 @@ void MVKCmdClearAttachments<N>::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
// Apple GPUs do not support rendering/writing to an attachment and then reading from
|
||||
// that attachment within a single Metal renderpass. So, if any of the attachments just
|
||||
// cleared is an input attachment, we need to restart into separate Metal renderpasses.
|
||||
if (cmdEncoder->getDevice()->_pMetalFeatures->tileBasedDeferredRendering) {
|
||||
if (cmdEncoder->getMetalFeatures().tileBasedDeferredRendering) {
|
||||
bool needsRenderpassRestart = false;
|
||||
for (uint32_t caIdx = 0; caIdx < caCnt; caIdx++) {
|
||||
if (_rpsKey.isAttachmentEnabled(caIdx) && subpass->isColorAttachmentAlsoInputAttachment(caIdx)) {
|
||||
@ -1562,7 +1564,7 @@ VkResult MVKCmdClearImage<N>::setContent(MVKCommandBuffer* cmdBuff,
|
||||
|
||||
// Validate
|
||||
MVKMTLFmtCaps mtlFmtCaps = cmdBuff->getPixelFormats()->getCapabilities(_image->getMTLPixelFormat(planeIndex));
|
||||
bool isDestUnwritableLinear = MVK_MACOS && !cmdBuff->getDevice()->_pMetalFeatures->renderLinearTextures && _image->getIsLinear();
|
||||
bool isDestUnwritableLinear = MVK_MACOS && !cmdBuff->getMetalFeatures().renderLinearTextures && _image->getIsLinear();
|
||||
uint32_t reqCap = isDS ? kMVKMTLFmtCapsDSAtt : (isDestUnwritableLinear ? kMVKMTLFmtCapsWrite : kMVKMTLFmtCapsColorAtt);
|
||||
if (!mvkAreAllFlagsEnabled(mtlFmtCaps, reqCap)) {
|
||||
return cmdBuff->reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdClear%sImage(): Format %s cannot be cleared on this device.", (isDS ? "DepthStencil" : "Color"), cmdBuff->getPixelFormats()->getName(_image->getVkFormat()));
|
||||
@ -1588,6 +1590,7 @@ void MVKCmdClearImage<N>::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
|
||||
cmdEncoder->endCurrentMetalEncoding();
|
||||
|
||||
auto& mtlFeats = cmdEncoder->getMetalFeatures();
|
||||
MVKPixelFormats* pixFmts = cmdEncoder->getPixelFormats();
|
||||
for (auto& srRange : _subresourceRanges) {
|
||||
uint8_t planeIndex = MVKImage::getPlaneFromVkImageAspectFlags(srRange.aspectMask);
|
||||
@ -1595,7 +1598,7 @@ void MVKCmdClearImage<N>::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
if ( !imgMTLTex ) { continue; }
|
||||
|
||||
#if MVK_MACOS
|
||||
if ( _image->getIsLinear() && !cmdEncoder->getDevice()->_pMetalFeatures->renderLinearTextures ) {
|
||||
if (_image->getIsLinear() && !mtlFeats.renderLinearTextures) {
|
||||
// These images cannot be rendered. Instead, use a compute shader.
|
||||
// Luckily for us, linear images only have one mip and one array layer under Metal.
|
||||
assert( !isDS );
|
||||
@ -1608,7 +1611,7 @@ void MVKCmdClearImage<N>::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
cmdEncoder->setComputeBytes(mtlComputeEnc, &_clearValue, sizeof(_clearValue), 0);
|
||||
MTLSize gridSize = mvkMTLSizeFromVkExtent3D(_image->getExtent3D());
|
||||
MTLSize tgSize = MTLSizeMake(mtlClearState.threadExecutionWidth, 1, 1);
|
||||
if (cmdEncoder->getDevice()->_pMetalFeatures->nonUniformThreadgroups) {
|
||||
if (mtlFeats.nonUniformThreadgroups) {
|
||||
[mtlComputeEnc dispatchThreads: gridSize threadsPerThreadgroup: tgSize];
|
||||
} else {
|
||||
MTLSize tgCount = MTLSizeMake(gridSize.width / tgSize.width, gridSize.height, gridSize.depth);
|
||||
@ -1681,8 +1684,7 @@ void MVKCmdClearImage<N>::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
}
|
||||
|
||||
// If we can do layered rendering, I can clear all the layers at once.
|
||||
if (cmdEncoder->getDevice()->_pMetalFeatures->layeredRendering &&
|
||||
(_image->getSampleCount() == VK_SAMPLE_COUNT_1_BIT || cmdEncoder->getDevice()->_pMetalFeatures->multisampleLayeredRendering)) {
|
||||
if (mtlFeats.layeredRendering && (mtlFeats.multisampleLayeredRendering || _image->getSampleCount() == VK_SAMPLE_COUNT_1_BIT)) {
|
||||
if (is3D) {
|
||||
mtlRPCADesc.depthPlane = layerStart;
|
||||
mtlRPDADesc.depthPlane = layerStart;
|
||||
|
@ -421,18 +421,6 @@ public:
|
||||
/** Context for tracking information across multiple encodings. */
|
||||
MVKCommandEncodingContext* _pEncodingContext;
|
||||
|
||||
/** A reference to the Metal features supported by the device. */
|
||||
const MVKPhysicalDeviceMetalFeatures* _pDeviceMetalFeatures;
|
||||
|
||||
/** A reference to the Vulkan features supported by the device. */
|
||||
const VkPhysicalDeviceFeatures* _pDeviceFeatures;
|
||||
|
||||
/** Pointer to the properties of the device. */
|
||||
const VkPhysicalDeviceProperties* _pDeviceProperties;
|
||||
|
||||
/** Pointer to the memory properties of the device. */
|
||||
const VkPhysicalDeviceMemoryProperties* _pDeviceMemoryProperties;
|
||||
|
||||
/** The command buffer whose commands are being encoded. */
|
||||
MVKCommandBuffer* _cmdBuffer;
|
||||
|
||||
|
@ -320,7 +320,7 @@ void MVKCommandBuffer::recordExecuteCommands(MVKArrayRef<MVKCommandBuffer*const>
|
||||
// Track whether a stage-based timestamp command has been added, so we know
|
||||
// to update the timestamp command fence when ending a Metal command encoder.
|
||||
void MVKCommandBuffer::recordTimestampCommand() {
|
||||
_hasStageCounterTimestampCommand = mvkIsAnyFlagEnabled(_device->_pMetalFeatures->counterSamplingPoints, MVK_COUNTER_SAMPLING_AT_PIPELINE_STAGE);
|
||||
_hasStageCounterTimestampCommand = mvkIsAnyFlagEnabled(getMetalFeatures().counterSamplingPoints, MVK_COUNTER_SAMPLING_AT_PIPELINE_STAGE);
|
||||
}
|
||||
|
||||
|
||||
@ -340,14 +340,13 @@ void MVKCommandBuffer::recordBindPipeline(MVKCmdBindPipeline* mvkBindPipeline) {
|
||||
// because that would include app time between command submissions.
|
||||
void MVKCommandEncoder::encode(id<MTLCommandBuffer> mtlCmdBuff,
|
||||
MVKCommandEncodingContext* pEncodingContext) {
|
||||
MVKDevice* mvkDev = getDevice();
|
||||
uint64_t startTime = mvkDev->getPerformanceTimestamp();
|
||||
uint64_t startTime = getPerformanceTimestamp();
|
||||
|
||||
beginEncoding(mtlCmdBuff, pEncodingContext);
|
||||
encodeCommands(_cmdBuffer->_head);
|
||||
endEncoding();
|
||||
|
||||
mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.queue.commandBufferEncoding, startTime);
|
||||
addPerformanceInterval(getPerformanceStats().queue.commandBufferEncoding, startTime);
|
||||
}
|
||||
|
||||
void MVKCommandEncoder::beginEncoding(id<MTLCommandBuffer> mtlCmdBuff, MVKCommandEncodingContext* pEncodingContext) {
|
||||
@ -494,9 +493,8 @@ void MVKCommandEncoder::setSubpass(MVKCommand* subpassCmd,
|
||||
_renderSubpassIndex = subpassIndex;
|
||||
_multiviewPassIndex = 0;
|
||||
|
||||
_canUseLayeredRendering = (_device->_pMetalFeatures->layeredRendering &&
|
||||
(_device->_pMetalFeatures->multisampleLayeredRendering ||
|
||||
(getSubpass()->getSampleCount() == VK_SAMPLE_COUNT_1_BIT)));
|
||||
auto& mtlFeats = getMetalFeatures();
|
||||
_canUseLayeredRendering = mtlFeats.layeredRendering && (mtlFeats.multisampleLayeredRendering || getSubpass()->getSampleCount() == VK_SAMPLE_COUNT_1_BIT);
|
||||
|
||||
beginMetalRenderPass(cmdUse);
|
||||
}
|
||||
@ -539,7 +537,7 @@ void MVKCommandEncoder::beginMetalRenderPass(MVKCommandUse cmdUse) {
|
||||
isRestart);
|
||||
if (_cmdBuffer->_needsVisibilityResultMTLBuffer) {
|
||||
if ( !_pEncodingContext->visibilityResultBuffer ) {
|
||||
_pEncodingContext->visibilityResultBuffer = getTempMTLBuffer(_pDeviceMetalFeatures->maxQueryBufferSize, true, true);
|
||||
_pEncodingContext->visibilityResultBuffer = getTempMTLBuffer(getMetalFeatures().maxQueryBufferSize, true, true);
|
||||
}
|
||||
mtlRPDesc.visibilityResultBuffer = _pEncodingContext->visibilityResultBuffer->_mtlBuffer;
|
||||
}
|
||||
@ -577,7 +575,7 @@ void MVKCommandEncoder::beginMetalRenderPass(MVKCommandUse cmdUse) {
|
||||
// If programmable sample positions are supported, set them into the render pass descriptor.
|
||||
// If no custom sample positions are established, size will be zero,
|
||||
// and Metal will default to using default sample postions.
|
||||
if (_pDeviceMetalFeatures->programmableSamplePositions) {
|
||||
if (getMetalFeatures().programmableSamplePositions) {
|
||||
auto sampPosns = _renderingState.getSamplePositions();
|
||||
[mtlRPDesc setSamplePositions: sampPosns.data() count: sampPosns.size()];
|
||||
}
|
||||
@ -892,7 +890,8 @@ void MVKCommandEncoder::setVertexBytes(id<MTLRenderCommandEncoder> mtlEncoder,
|
||||
NSUInteger length,
|
||||
uint32_t mtlBuffIndex,
|
||||
bool descOverride) {
|
||||
if (_pDeviceMetalFeatures->dynamicMTLBufferSize && length <= _pDeviceMetalFeatures->dynamicMTLBufferSize) {
|
||||
auto& mtlFeats = getMetalFeatures();
|
||||
if (mtlFeats.dynamicMTLBufferSize && length <= mtlFeats.dynamicMTLBufferSize) {
|
||||
[mtlEncoder setVertexBytes: bytes length: length atIndex: mtlBuffIndex];
|
||||
} else {
|
||||
const MVKMTLBufferAllocation* mtlBuffAlloc = copyToTempMTLBufferAllocation(bytes, length);
|
||||
@ -905,7 +904,7 @@ void MVKCommandEncoder::setVertexBytes(id<MTLRenderCommandEncoder> mtlEncoder,
|
||||
}
|
||||
|
||||
void MVKCommandEncoder::encodeVertexAttributeBuffer(MVKMTLBufferBinding& b, bool isDynamicStride) {
|
||||
if (_device->_pMetalFeatures->dynamicVertexStride) {
|
||||
if (getMetalFeatures().dynamicVertexStride) {
|
||||
#if MVK_XCODE_15
|
||||
NSUInteger mtlStride = isDynamicStride ? b.stride : MTLAttributeStrideStatic;
|
||||
if (b.isInline) {
|
||||
@ -945,7 +944,8 @@ void MVKCommandEncoder::setFragmentBytes(id<MTLRenderCommandEncoder> mtlEncoder,
|
||||
NSUInteger length,
|
||||
uint32_t mtlBuffIndex,
|
||||
bool descOverride) {
|
||||
if (_pDeviceMetalFeatures->dynamicMTLBufferSize && length <= _pDeviceMetalFeatures->dynamicMTLBufferSize) {
|
||||
auto& mtlFeats = getMetalFeatures();
|
||||
if (mtlFeats.dynamicMTLBufferSize && length <= mtlFeats.dynamicMTLBufferSize) {
|
||||
[mtlEncoder setFragmentBytes: bytes length: length atIndex: mtlBuffIndex];
|
||||
} else {
|
||||
const MVKMTLBufferAllocation* mtlBuffAlloc = copyToTempMTLBufferAllocation(bytes, length);
|
||||
@ -962,7 +962,8 @@ void MVKCommandEncoder::setComputeBytes(id<MTLComputeCommandEncoder> mtlEncoder,
|
||||
NSUInteger length,
|
||||
uint32_t mtlBuffIndex,
|
||||
bool descOverride) {
|
||||
if (_pDeviceMetalFeatures->dynamicMTLBufferSize && length <= _pDeviceMetalFeatures->dynamicMTLBufferSize) {
|
||||
auto& mtlFeats = getMetalFeatures();
|
||||
if (mtlFeats.dynamicMTLBufferSize && length <= mtlFeats.dynamicMTLBufferSize) {
|
||||
[mtlEncoder setBytes: bytes length: length atIndex: mtlBuffIndex];
|
||||
} else {
|
||||
const MVKMTLBufferAllocation* mtlBuffAlloc = copyToTempMTLBufferAllocation(bytes, length);
|
||||
@ -1035,7 +1036,7 @@ void MVKCommandEncoder::markTimestamp(MVKTimestampQueryPool* pQueryPool, uint32_
|
||||
addActivatedQueries(pQueryPool, query, queryCount);
|
||||
|
||||
if (pQueryPool->hasMTLCounterBuffer()) {
|
||||
MVKCounterSamplingFlags sampPts = _device->_pMetalFeatures->counterSamplingPoints;
|
||||
MVKCounterSamplingFlags sampPts = getMetalFeatures().counterSamplingPoints;
|
||||
for (uint32_t qOfst = 0; qOfst < queryCount; qOfst++) {
|
||||
if (mvkIsAnyFlagEnabled(sampPts, MVK_COUNTER_SAMPLING_AT_PIPELINE_STAGE)) {
|
||||
_timestampStageCounterQueries.push_back({ pQueryPool, query + qOfst });
|
||||
@ -1155,10 +1156,6 @@ MVKCommandEncoder::MVKCommandEncoder(MVKCommandBuffer* cmdBuffer,
|
||||
_computePushConstants(this, VK_SHADER_STAGE_COMPUTE_BIT),
|
||||
_prefillStyle(prefillStyle){
|
||||
|
||||
_pDeviceFeatures = &_device->_enabledFeatures;
|
||||
_pDeviceMetalFeatures = _device->_pMetalFeatures;
|
||||
_pDeviceProperties = _device->_pProperties;
|
||||
_pDeviceMemoryProperties = _device->_pMemoryProperties;
|
||||
_pActivatedQueries = nullptr;
|
||||
_mtlCmdBuffer = nil;
|
||||
_mtlRenderEncoder = nil;
|
||||
|
@ -69,7 +69,7 @@ void MVKPushConstantsCommandEncoderState:: setPushConstants(uint32_t offset, MVK
|
||||
// MSL structs can have a larger size than the equivalent C struct due to MSL alignment needs.
|
||||
// Typically any MSL struct that contains a float4 will also have a size that is rounded up to a multiple of a float4 size.
|
||||
// Ensure that we pass along enough content to cover this extra space even if it is never actually accessed by the shader.
|
||||
size_t pcSizeAlign = getDevice()->_pMetalFeatures->pushConstantSizeAlignment;
|
||||
size_t pcSizeAlign = _cmdEncoder->getMetalFeatures().pushConstantSizeAlignment;
|
||||
size_t pcSize = pushConstants.size();
|
||||
size_t pcBuffSize = mvkAlignByteCount(offset + pcSize, pcSizeAlign);
|
||||
mvkEnsureSize(_pushConstants, pcBuffSize);
|
||||
@ -369,7 +369,7 @@ void MVKRenderingCommandEncoderState::setStencilReferenceValues(VkStencilFaceFla
|
||||
void MVKRenderingCommandEncoderState::setViewports(const MVKArrayRef<VkViewport> viewports,
|
||||
uint32_t firstViewport,
|
||||
bool isDynamic) {
|
||||
uint32_t maxViewports = getDevice()->_pProperties->limits.maxViewports;
|
||||
uint32_t maxViewports = _cmdEncoder->getDeviceProperties().limits.maxViewports;
|
||||
if (firstViewport >= maxViewports) { return; }
|
||||
|
||||
MVKMTLViewports mtlViewports = isDynamic ? _mtlViewports[StateScope::Dynamic] : _mtlViewports[StateScope::Static];
|
||||
@ -384,7 +384,7 @@ void MVKRenderingCommandEncoderState::setViewports(const MVKArrayRef<VkViewport>
|
||||
void MVKRenderingCommandEncoderState::setScissors(const MVKArrayRef<VkRect2D> scissors,
|
||||
uint32_t firstScissor,
|
||||
bool isDynamic) {
|
||||
uint32_t maxScissors = getDevice()->_pProperties->limits.maxViewports;
|
||||
uint32_t maxScissors = _cmdEncoder->getDeviceProperties().limits.maxViewports;
|
||||
if (firstScissor >= maxScissors) { return; }
|
||||
|
||||
MVKMTLScissors mtlScissors = isDynamic ? _mtlScissors[StateScope::Dynamic] : _mtlScissors[StateScope::Static];
|
||||
@ -538,6 +538,7 @@ void MVKRenderingCommandEncoderState::encodeImpl(uint32_t stage) {
|
||||
if (stage != kMVKGraphicsStageRasterization) { return; }
|
||||
|
||||
auto& rendEnc = _cmdEncoder->_mtlRenderEncoder;
|
||||
auto& enabledFeats = _cmdEncoder->getEnabledFeatures();
|
||||
|
||||
if (isDirty(PolygonMode)) { [rendEnc setTriangleFillMode: getMTLContent(PolygonMode)]; }
|
||||
if (isDirty(CullMode)) { [rendEnc setCullMode: getMTLContent(CullMode)]; }
|
||||
@ -566,13 +567,13 @@ void MVKRenderingCommandEncoderState::encodeImpl(uint32_t stage) {
|
||||
[rendEnc setDepthBias: 0 slopeScale: 0 clamp: 0];
|
||||
}
|
||||
}
|
||||
if (isDirty(DepthClipEnable) && _cmdEncoder->_pDeviceFeatures->depthClamp) {
|
||||
if (isDirty(DepthClipEnable) && enabledFeats.depthClamp) {
|
||||
[rendEnc setDepthClipMode: getMTLContent(DepthClipEnable)];
|
||||
}
|
||||
|
||||
#if MVK_USE_METAL_PRIVATE_API
|
||||
if (getMVKConfig().useMetalPrivateAPI && (isDirty(DepthBoundsTestEnable) || isDirty(DepthBounds)) &&
|
||||
_cmdEncoder->_pDeviceFeatures->depthBounds) {
|
||||
enabledFeats.depthBounds) {
|
||||
if (getMTLContent(DepthBoundsTestEnable)) {
|
||||
auto& db = getMTLContent(DepthBounds);
|
||||
[(id<MVKMTLRenderCommandEncoderDepthBoundsAMD>)_cmdEncoder->_mtlRenderEncoder setDepthBoundsTestAMD: YES
|
||||
@ -603,7 +604,7 @@ void MVKRenderingCommandEncoderState::encodeImpl(uint32_t stage) {
|
||||
|
||||
if (isDirty(Viewports)) {
|
||||
auto& mtlViewports = getMTLContent(Viewports);
|
||||
if (_cmdEncoder->_pDeviceFeatures->multiViewport) {
|
||||
if (enabledFeats.multiViewport) {
|
||||
#if MVK_MACOS_OR_IOS
|
||||
[rendEnc setViewports: mtlViewports.viewports count: mtlViewports.viewportCount];
|
||||
#endif
|
||||
@ -623,7 +624,7 @@ void MVKRenderingCommandEncoderState::encodeImpl(uint32_t stage) {
|
||||
mtlScissors.scissors[sIdx] = shouldDiscard ? zeroRect : _cmdEncoder->clipToRenderArea(mtlScissors.scissors[sIdx]);
|
||||
}
|
||||
|
||||
if (_cmdEncoder->_pDeviceFeatures->multiViewport) {
|
||||
if (enabledFeats.multiViewport) {
|
||||
#if MVK_MACOS_OR_IOS
|
||||
[rendEnc setScissorRects: mtlScissors.scissors count: mtlScissors.scissorCount];
|
||||
#endif
|
||||
@ -949,7 +950,7 @@ void MVKGraphicsResourcesCommandEncoderState::markDirty() {
|
||||
void MVKGraphicsResourcesCommandEncoderState::encodeImpl(uint32_t stage) {
|
||||
|
||||
auto* pipeline = _cmdEncoder->getGraphicsPipeline();
|
||||
bool fullImageViewSwizzle = pipeline->fullImageViewSwizzle() || getDevice()->_pMetalFeatures->nativeTextureSwizzle;
|
||||
bool fullImageViewSwizzle = pipeline->fullImageViewSwizzle() || _cmdEncoder->getMetalFeatures().nativeTextureSwizzle;
|
||||
bool forTessellation = pipeline->isTessellationPipeline();
|
||||
bool isDynamicVertexStride = pipeline->isDynamicState(VertexStride);
|
||||
|
||||
@ -1369,12 +1370,12 @@ void MVKOcclusionQueryCommandEncoderState::endMetalRenderPass() {
|
||||
// In most cases, a MTLCommandBuffer corresponds to a Vulkan command submit (VkSubmitInfo),
|
||||
// and so the error text is framed in terms of the Vulkan submit.
|
||||
void MVKOcclusionQueryCommandEncoderState::beginOcclusionQuery(MVKOcclusionQueryPool* pQueryPool, uint32_t query, VkQueryControlFlags flags) {
|
||||
if (_cmdEncoder->_pEncodingContext->mtlVisibilityResultOffset + kMVKQuerySlotSizeInBytes <= _cmdEncoder->_pDeviceMetalFeatures->maxQueryBufferSize) {
|
||||
bool shouldCount = _cmdEncoder->_pDeviceFeatures->occlusionQueryPrecise && mvkAreAllFlagsEnabled(flags, VK_QUERY_CONTROL_PRECISE_BIT);
|
||||
if (_cmdEncoder->_pEncodingContext->mtlVisibilityResultOffset + kMVKQuerySlotSizeInBytes <= _cmdEncoder->getMetalFeatures().maxQueryBufferSize) {
|
||||
bool shouldCount = _cmdEncoder->getEnabledFeatures().occlusionQueryPrecise && mvkAreAllFlagsEnabled(flags, VK_QUERY_CONTROL_PRECISE_BIT);
|
||||
_mtlVisibilityResultMode = shouldCount ? MTLVisibilityResultModeCounting : MTLVisibilityResultModeBoolean;
|
||||
_mtlRenderPassQueries.emplace_back(pQueryPool, query, _cmdEncoder->_pEncodingContext->mtlVisibilityResultOffset);
|
||||
} else {
|
||||
reportError(VK_ERROR_OUT_OF_DEVICE_MEMORY, "vkCmdBeginQuery(): The maximum number of queries in a single Vulkan command submission is %llu.", _cmdEncoder->_pDeviceMetalFeatures->maxQueryBufferSize / kMVKQuerySlotSizeInBytes);
|
||||
reportError(VK_ERROR_OUT_OF_DEVICE_MEMORY, "vkCmdBeginQuery(): The maximum number of queries in a single Vulkan command submission is %llu.", _cmdEncoder->getMetalFeatures().maxQueryBufferSize / kMVKQuerySlotSizeInBytes);
|
||||
_mtlVisibilityResultMode = MTLVisibilityResultModeDisabled;
|
||||
_cmdEncoder->_pEncodingContext->mtlVisibilityResultOffset -= kMVKQuerySlotSizeInBytes;
|
||||
}
|
||||
|
@ -174,9 +174,9 @@ void MVKCommandEncodingPool::clear() {
|
||||
#pragma mark Construction
|
||||
|
||||
MVKCommandEncodingPool::MVKCommandEncodingPool(MVKCommandPool* commandPool) : _commandPool(commandPool),
|
||||
_mtlBufferAllocator(commandPool->getDevice(), commandPool->getDevice()->_pMetalFeatures->maxMTLBufferSize, true),
|
||||
_privateMtlBufferAllocator(commandPool->getDevice(), commandPool->getDevice()->_pMetalFeatures->maxMTLBufferSize, true, false, MTLStorageModePrivate),
|
||||
_dedicatedMtlBufferAllocator(commandPool->getDevice(), commandPool->getDevice()->_pMetalFeatures->maxQueryBufferSize, true, true, MTLStorageModePrivate) {
|
||||
_mtlBufferAllocator(commandPool->getDevice(), commandPool->getMetalFeatures().maxMTLBufferSize, true),
|
||||
_privateMtlBufferAllocator(commandPool->getDevice(), commandPool->getMetalFeatures().maxMTLBufferSize, true, false, MTLStorageModePrivate),
|
||||
_dedicatedMtlBufferAllocator(commandPool->getDevice(), commandPool->getMetalFeatures().maxQueryBufferSize, true, true, MTLStorageModePrivate) {
|
||||
}
|
||||
|
||||
MVKCommandEncodingPool::~MVKCommandEncodingPool() {
|
||||
|
@ -32,7 +32,7 @@ using namespace std;
|
||||
|
||||
id<MTLRenderPipelineState> MVKCommandResourceFactory::newCmdBlitImageMTLRenderPipelineState(MVKRPSKeyBlitImg& blitKey,
|
||||
MVKVulkanAPIDeviceObject* owner) {
|
||||
bool isLayeredBlit = blitKey.dstSampleCount > 1 ? _device->_pMetalFeatures->multisampleLayeredRendering : _device->_pMetalFeatures->layeredRendering;
|
||||
bool isLayeredBlit = blitKey.dstSampleCount > 1 ? getMetalFeatures().multisampleLayeredRendering : getMetalFeatures().layeredRendering;
|
||||
id<MTLFunction> vtxFunc = newFunctionNamed(isLayeredBlit ? "vtxCmdBlitImageLayered" : "vtxCmdBlitImage"); // temp retain
|
||||
id<MTLFunction> fragFunc = newBlitFragFunction(blitKey); // temp retain
|
||||
MTLRenderPipelineDescriptor* plDesc = [MTLRenderPipelineDescriptor new]; // temp retain
|
||||
@ -188,7 +188,7 @@ static void getSwizzleString(char swizzleStr[4], VkComponentMapping vkMapping) {
|
||||
|
||||
id<MTLFunction> MVKCommandResourceFactory::newBlitFragFunction(MVKRPSKeyBlitImg& blitKey) {
|
||||
@autoreleasepool {
|
||||
bool isLayeredBlit = blitKey.dstSampleCount > 1 ? _device->_pMetalFeatures->multisampleLayeredRendering : _device->_pMetalFeatures->layeredRendering;
|
||||
bool isLayeredBlit = blitKey.dstSampleCount > 1 ? getMetalFeatures().multisampleLayeredRendering : getMetalFeatures().layeredRendering;
|
||||
NSString* typeStr = getMTLFormatTypeString(blitKey.getSrcMTLPixelFormat());
|
||||
|
||||
bool isArrayType = blitKey.isSrcArrayType();
|
||||
@ -228,7 +228,7 @@ id<MTLFunction> MVKCommandResourceFactory::newBlitFragFunction(MVKRPSKeyBlitImg&
|
||||
}
|
||||
NSString* sliceArg = isArrayType ? (isLayeredBlit ? @", subRez.slice + varyings.v_layer" : @", subRez.slice") : @"";
|
||||
NSString* srcFilter = isLinearFilter ? @"linear" : @"nearest";
|
||||
if (!getDevice()->_pMetalFeatures->nativeTextureSwizzle) {
|
||||
if (!getMetalFeatures().nativeTextureSwizzle) {
|
||||
getSwizzleString(swizzleArg, blitKey.getSrcSwizzle());
|
||||
}
|
||||
|
||||
@ -620,11 +620,11 @@ id<MTLComputePipelineState> MVKCommandResourceFactory::newAccumulateOcclusionQue
|
||||
// Returns the retained MTLFunction with the name.
|
||||
// The caller is responsible for releasing the returned function object.
|
||||
id<MTLFunction> MVKCommandResourceFactory::newFunctionNamed(const char* funcName) {
|
||||
uint64_t startTime = _device->getPerformanceTimestamp();
|
||||
uint64_t startTime = getPerformanceTimestamp();
|
||||
NSString* nsFuncName = [[NSString alloc] initWithUTF8String: funcName]; // temp retained
|
||||
id<MTLFunction> mtlFunc = [_mtlLibrary newFunctionWithName: nsFuncName]; // retained
|
||||
[nsFuncName release]; // temp release
|
||||
_device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.functionRetrieval, startTime);
|
||||
addPerformanceInterval(getPerformanceStats().shaderCompilation.functionRetrieval, startTime);
|
||||
return mtlFunc;
|
||||
}
|
||||
|
||||
@ -633,20 +633,20 @@ id<MTLFunction> MVKCommandResourceFactory::newMTLFunction(NSString* mslSrcCode,
|
||||
id<MTLFunction> mtlFunc = nil;
|
||||
NSError* err = nil;
|
||||
|
||||
uint64_t startTime = _device->getPerformanceTimestamp();
|
||||
uint64_t startTime = getPerformanceTimestamp();
|
||||
id<MTLLibrary> mtlLib = [getMTLDevice() newLibraryWithSource: mslSrcCode
|
||||
options: getDevice()->getMTLCompileOptions()
|
||||
error: &err]; // temp retain
|
||||
_device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.mslCompile, startTime);
|
||||
addPerformanceInterval(getPerformanceStats().shaderCompilation.mslCompile, startTime);
|
||||
|
||||
if (err) {
|
||||
reportError(VK_ERROR_INITIALIZATION_FAILED,
|
||||
"Could not compile support shader from MSL source (Error code %li):\n%s\n%s",
|
||||
(long)err.code, mslSrcCode.UTF8String, err.localizedDescription.UTF8String);
|
||||
} else {
|
||||
startTime = _device->getPerformanceTimestamp();
|
||||
startTime = getPerformanceTimestamp();
|
||||
mtlFunc = [mtlLib newFunctionWithName: funcName];
|
||||
_device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.functionRetrieval, startTime);
|
||||
addPerformanceInterval(getPerformanceStats().shaderCompilation.functionRetrieval, startTime);
|
||||
}
|
||||
|
||||
[mtlLib release]; // temp release
|
||||
@ -685,12 +685,12 @@ MVKCommandResourceFactory::MVKCommandResourceFactory(MVKDevice* device) : MVKBas
|
||||
void MVKCommandResourceFactory::initMTLLibrary() {
|
||||
@autoreleasepool {
|
||||
NSError* err = nil;
|
||||
uint64_t startTime = _device->getPerformanceTimestamp();
|
||||
uint64_t startTime = getPerformanceTimestamp();
|
||||
_mtlLibrary = [getMTLDevice() newLibraryWithSource: _MVKStaticCmdShaderSource
|
||||
options: getDevice()->getMTLCompileOptions()
|
||||
error: &err]; // retained
|
||||
MVKAssert( !err, "Could not compile command shaders (Error code %li):\n%s", (long)err.code, err.localizedDescription.UTF8String);
|
||||
_device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.mslCompile, startTime);
|
||||
addPerformanceInterval(getPerformanceStats().shaderCompilation.mslCompile, startTime);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -45,7 +45,7 @@ MVKMTLBufferAllocation* MVKMTLBufferAllocationPool::newObject() {
|
||||
// Adds a new MTLBuffer to the buffer pool and resets the next offset to the start of it
|
||||
void MVKMTLBufferAllocationPool::addMTLBuffer() {
|
||||
MTLResourceOptions mbOpts = (_mtlStorageMode << MTLResourceStorageModeShift) | MTLResourceCPUCacheModeDefaultCache;
|
||||
_mtlBuffers.push_back({ [_device->getMTLDevice() newBufferWithLength: _mtlBufferLength options: mbOpts], 0 });
|
||||
_mtlBuffers.push_back({ [getMTLDevice() newBufferWithLength: _mtlBufferLength options: mbOpts], 0 });
|
||||
_nextOffset = 0;
|
||||
}
|
||||
|
||||
@ -120,7 +120,7 @@ MVKMTLBufferAllocation* MVKMTLBufferAllocator::acquireMTLBufferRegion(NSUInteger
|
||||
MVKAssert(length <= _maxAllocationLength, "This MVKMTLBufferAllocator has been configured to dispense MVKMTLBufferRegions no larger than %lu bytes.", (unsigned long)_maxAllocationLength);
|
||||
|
||||
// Can't allocate a segment smaller than the minimum MTLBuffer alignment.
|
||||
length = std::max<NSUInteger>(length, _device->_pMetalFeatures->mtlBufferAlignment);
|
||||
length = std::max<NSUInteger>(length, getMetalFeatures().mtlBufferAlignment);
|
||||
|
||||
// Convert max length to the next power-of-two exponent to use as a lookup
|
||||
NSUInteger p2Exp = mvkPowerOfTwoExponent(length);
|
||||
@ -128,7 +128,7 @@ MVKMTLBufferAllocation* MVKMTLBufferAllocator::acquireMTLBufferRegion(NSUInteger
|
||||
}
|
||||
|
||||
MVKMTLBufferAllocator::MVKMTLBufferAllocator(MVKDevice* device, NSUInteger maxRegionLength, bool makeThreadSafe, bool isDedicated, MTLStorageMode mtlStorageMode) : MVKBaseDeviceObject(device) {
|
||||
_maxAllocationLength = std::max<NSUInteger>(maxRegionLength, _device->_pMetalFeatures->mtlBufferAlignment);
|
||||
_maxAllocationLength = std::max<NSUInteger>(maxRegionLength, getMetalFeatures().mtlBufferAlignment);
|
||||
_isThreadSafe = makeThreadSafe;
|
||||
|
||||
// Convert max length to the next power-of-two exponent
|
||||
|
@ -42,8 +42,9 @@ void MVKBuffer::propagateDebugName() {
|
||||
#pragma mark Resource memory
|
||||
|
||||
VkResult MVKBuffer::getMemoryRequirements(VkMemoryRequirements* pMemoryRequirements) {
|
||||
if (_device->_pMetalFeatures->placementHeaps) {
|
||||
MTLSizeAndAlign sizeAndAlign = [_device->getMTLDevice() heapBufferSizeAndAlignWithLength: getByteCount() options: MTLResourceStorageModePrivate];
|
||||
if (getMetalFeatures().placementHeaps) {
|
||||
MTLSizeAndAlign sizeAndAlign = [getMTLDevice() heapBufferSizeAndAlignWithLength: getByteCount()
|
||||
options: MTLResourceStorageModePrivate];
|
||||
pMemoryRequirements->size = sizeAndAlign.size;
|
||||
pMemoryRequirements->alignment = sizeAndAlign.align;
|
||||
} else {
|
||||
@ -82,7 +83,7 @@ VkResult MVKBuffer::bindDeviceMemory(MVKDeviceMemory* mvkMem, VkDeviceSize memOf
|
||||
#if MVK_MACOS
|
||||
if (_deviceMemory) {
|
||||
_isHostCoherentTexelBuffer = (!isUnifiedMemoryGPU() &&
|
||||
!_device->_pMetalFeatures->sharedLinearTextures &&
|
||||
!getMetalFeatures().sharedLinearTextures &&
|
||||
_deviceMemory->isMemoryHostCoherent() &&
|
||||
mvkIsAnyFlagEnabled(_usage, (VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT)));
|
||||
@ -199,7 +200,7 @@ id<MTLBuffer> MVKBuffer::getMTLBufferCache() {
|
||||
lock_guard<mutex> lock(_lock);
|
||||
if (_mtlBufferCache) { return _mtlBufferCache; }
|
||||
|
||||
_mtlBufferCache = [_device->getMTLDevice() newBufferWithLength: getByteCount()
|
||||
_mtlBufferCache = [getMTLDevice() newBufferWithLength: getByteCount()
|
||||
options: MTLResourceStorageModeManaged]; // retained
|
||||
flushToDevice(_deviceMemoryOffset, _byteCount);
|
||||
}
|
||||
@ -217,7 +218,7 @@ uint64_t MVKBuffer::getMTLBufferGPUAddress() {
|
||||
#pragma mark Construction
|
||||
|
||||
MVKBuffer::MVKBuffer(MVKDevice* device, const VkBufferCreateInfo* pCreateInfo) : MVKResource(device), _usage(pCreateInfo->usage) {
|
||||
_byteAlignment = _device->_pMetalFeatures->mtlBufferAlignment;
|
||||
_byteAlignment = getMetalFeatures().mtlBufferAlignment;
|
||||
_byteCount = pCreateInfo->size;
|
||||
|
||||
for (const auto* next = (const VkBaseInStructure*)pCreateInfo->pNext; next; next = next->pNext) {
|
||||
@ -284,7 +285,8 @@ void MVKBufferView::propagateDebugName() {
|
||||
#pragma mark Metal
|
||||
|
||||
id<MTLTexture> MVKBufferView::getMTLTexture() {
|
||||
if ( !_mtlTexture && _mtlPixelFormat && _device->_pMetalFeatures->texelBuffers) {
|
||||
auto& mtlFeats = getMetalFeatures();
|
||||
if ( !_mtlTexture && _mtlPixelFormat && mtlFeats.texelBuffers) {
|
||||
|
||||
// Lock and check again in case another thread has created the texture.
|
||||
lock_guard<mutex> lock(_lock);
|
||||
@ -300,7 +302,7 @@ id<MTLTexture> MVKBufferView::getMTLTexture() {
|
||||
}
|
||||
id<MTLBuffer> mtlBuff;
|
||||
VkDeviceSize mtlBuffOffset;
|
||||
if ( !_device->_pMetalFeatures->sharedLinearTextures && _buffer->isMemoryHostCoherent() ) {
|
||||
if ( !mtlFeats.sharedLinearTextures && _buffer->isMemoryHostCoherent() ) {
|
||||
mtlBuff = _buffer->getMTLBufferCache();
|
||||
mtlBuffOffset = _offset;
|
||||
} else {
|
||||
@ -308,7 +310,7 @@ id<MTLTexture> MVKBufferView::getMTLTexture() {
|
||||
mtlBuffOffset = _buffer->getMTLBufferOffset() + _offset;
|
||||
}
|
||||
MTLTextureDescriptor* mtlTexDesc;
|
||||
if ( _device->_pMetalFeatures->textureBuffers ) {
|
||||
if ( mtlFeats.textureBuffers ) {
|
||||
mtlTexDesc = [MTLTextureDescriptor textureBufferDescriptorWithPixelFormat: _mtlPixelFormat
|
||||
width: _textureSize.width
|
||||
resourceOptions: (mtlBuff.cpuCacheMode << MTLResourceCPUCacheModeShift) | (mtlBuff.storageMode << MTLResourceStorageModeShift)
|
||||
@ -347,10 +349,11 @@ MVKBufferView::MVKBufferView(MVKDevice* device, const VkBufferViewCreateInfo* pC
|
||||
if (byteCount == VK_WHOLE_SIZE) { byteCount = _buffer->getByteCount() - pCreateInfo->offset; } // Remaining bytes in buffer
|
||||
size_t blockCount = byteCount / bytesPerBlock;
|
||||
|
||||
if ( !_device->_pMetalFeatures->textureBuffers ) {
|
||||
auto& mtlFeats = getMetalFeatures();
|
||||
if ( !mtlFeats.textureBuffers ) {
|
||||
// But Metal requires the texture to be a 2D texture. Determine the number of 2D rows we need and their width.
|
||||
// Multiple rows will automatically align with PoT max texture dimension, but need to align upwards if less than full single row.
|
||||
size_t maxBlocksPerRow = _device->_pMetalFeatures->maxTextureDimension / fmtBlockSize.width;
|
||||
size_t maxBlocksPerRow = mtlFeats.maxTextureDimension / fmtBlockSize.width;
|
||||
size_t blocksPerRow = min(blockCount, maxBlocksPerRow);
|
||||
_mtlBytesPerRow = mvkAlignByteCount(blocksPerRow * bytesPerBlock, _device->getVkFormatTexelBufferAlignment(pCreateInfo->format, this));
|
||||
|
||||
@ -367,7 +370,7 @@ MVKBufferView::MVKBufferView(MVKDevice* device, const VkBufferViewCreateInfo* pC
|
||||
_mtlBytesPerRow = mvkAlignByteCount(byteCount, _device->getVkFormatTexelBufferAlignment(pCreateInfo->format, this));
|
||||
}
|
||||
|
||||
if ( !_device->_pMetalFeatures->texelBuffers ) {
|
||||
if ( !mtlFeats.texelBuffers ) {
|
||||
setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "Texel buffers are not supported on this device."));
|
||||
}
|
||||
}
|
||||
|
@ -619,6 +619,7 @@ void MVKDescriptorSetLayoutBinding::initMetalResourceIndexOffsets(const VkDescri
|
||||
} \
|
||||
} while(false)
|
||||
|
||||
auto& mtlFeats = getMetalFeatures();
|
||||
MVKShaderStageResourceBinding& bindIdxs = _mtlResourceIndexOffsets.stages[stage];
|
||||
MVKShaderStageResourceBinding& dslCnts = _layout->_mtlResourceCounts.stages[stage];
|
||||
|
||||
@ -627,7 +628,7 @@ void MVKDescriptorSetLayoutBinding::initMetalResourceIndexOffsets(const VkDescri
|
||||
case VK_DESCRIPTOR_TYPE_SAMPLER:
|
||||
setResourceIndexOffset(samplerIndex);
|
||||
|
||||
if (pBinding->descriptorCount > 1 && !_device->_pMetalFeatures->arrayOfSamplers) {
|
||||
if (pBinding->descriptorCount > 1 && !mtlFeats.arrayOfSamplers) {
|
||||
_layout->setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "Device %s does not support arrays of samplers.", _device->getName()));
|
||||
}
|
||||
break;
|
||||
@ -637,10 +638,10 @@ void MVKDescriptorSetLayoutBinding::initMetalResourceIndexOffsets(const VkDescri
|
||||
setResourceIndexOffset(samplerIndex);
|
||||
|
||||
if (pBinding->descriptorCount > 1) {
|
||||
if ( !_device->_pMetalFeatures->arrayOfTextures ) {
|
||||
if ( !mtlFeats.arrayOfTextures ) {
|
||||
_layout->setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "Device %s does not support arrays of textures.", _device->getName()));
|
||||
}
|
||||
if ( !_device->_pMetalFeatures->arrayOfSamplers ) {
|
||||
if ( !mtlFeats.arrayOfSamplers ) {
|
||||
_layout->setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "Device %s does not support arrays of samplers.", _device->getName()));
|
||||
}
|
||||
}
|
||||
@ -660,7 +661,7 @@ void MVKDescriptorSetLayoutBinding::initMetalResourceIndexOffsets(const VkDescri
|
||||
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
|
||||
setResourceIndexOffset(textureIndex);
|
||||
|
||||
if (pBinding->descriptorCount > 1 && !_device->_pMetalFeatures->arrayOfTextures) {
|
||||
if (pBinding->descriptorCount > 1 && !mtlFeats.arrayOfTextures) {
|
||||
_layout->setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "Device %s does not support arrays of textures.", _device->getName()));
|
||||
}
|
||||
break;
|
||||
@ -670,7 +671,7 @@ void MVKDescriptorSetLayoutBinding::initMetalResourceIndexOffsets(const VkDescri
|
||||
setResourceIndexOffset(textureIndex);
|
||||
if (!getPhysicalDevice()->useNativeTextureAtomics()) setResourceIndexOffset(bufferIndex);
|
||||
|
||||
if (pBinding->descriptorCount > 1 && !_device->_pMetalFeatures->arrayOfTextures) {
|
||||
if (pBinding->descriptorCount > 1 && !mtlFeats.arrayOfTextures) {
|
||||
_layout->setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "Device %s does not support arrays of textures.", _device->getName()));
|
||||
}
|
||||
break;
|
||||
|
@ -98,6 +98,8 @@ void MVKDescriptorSetLayout::pushDescriptorSet(MVKCommandEncoder* cmdEncoder,
|
||||
if (!_isPushDescriptorLayout) return;
|
||||
|
||||
if (!cmdEncoder) { clearConfigurationResult(); }
|
||||
|
||||
auto& enabledExtns = getEnabledExtensions();
|
||||
for (const VkWriteDescriptorSet& descWrite : descriptorWrites) {
|
||||
uint32_t dstBinding = descWrite.dstBinding;
|
||||
uint32_t dstArrayElement = descWrite.dstArrayElement;
|
||||
@ -106,7 +108,7 @@ void MVKDescriptorSetLayout::pushDescriptorSet(MVKCommandEncoder* cmdEncoder,
|
||||
const VkDescriptorBufferInfo* pBufferInfo = descWrite.pBufferInfo;
|
||||
const VkBufferView* pTexelBufferView = descWrite.pTexelBufferView;
|
||||
const VkWriteDescriptorSetInlineUniformBlockEXT* pInlineUniformBlock = nullptr;
|
||||
if (_device->_enabledExtensions.vk_EXT_inline_uniform_block.enabled) {
|
||||
if (enabledExtns.vk_EXT_inline_uniform_block.enabled) {
|
||||
for (const auto* next = (VkBaseInStructure*)descWrite.pNext; next; next = next->pNext) {
|
||||
switch (next->sType) {
|
||||
case VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_INLINE_UNIFORM_BLOCK_EXT: {
|
||||
@ -489,7 +491,7 @@ VkResult MVKDescriptorPool::allocateDescriptorSet(MVKDescriptorSetLayout* mvkDSL
|
||||
VkResult rslt = VK_ERROR_OUT_OF_POOL_MEMORY;
|
||||
NSUInteger mtlArgBuffAllocSize = mvkDSL->getMTLArgumentEncoder().mtlArgumentEncoderSize;
|
||||
NSUInteger mtlArgBuffAlignedSize = mvkAlignByteCount(mtlArgBuffAllocSize,
|
||||
getDevice()->_pMetalFeatures->mtlBufferAlignment);
|
||||
getMetalFeatures().mtlBufferAlignment);
|
||||
|
||||
size_t dsCnt = _descriptorSetAvailablility.size();
|
||||
_descriptorSetAvailablility.enumerateEnabledBits(true, [&](size_t dsIdx) {
|
||||
@ -713,7 +715,7 @@ MVKDescriptorPool::MVKDescriptorPool(MVKDevice* device, const VkDescriptorPoolCr
|
||||
_hasPooledDescriptors(getMVKConfig().preallocateDescriptors), // Set this first! Accessed by MVKDescriptorSet constructor and getPoolSize() in following lines.
|
||||
_descriptorSets(pCreateInfo->maxSets, MVKDescriptorSet(this)),
|
||||
_descriptorSetAvailablility(pCreateInfo->maxSets, true),
|
||||
_inlineBlockMTLBufferAllocator(device, device->_pMetalFeatures->dynamicMTLBufferSize, true),
|
||||
_inlineBlockMTLBufferAllocator(device, getMetalFeatures().dynamicMTLBufferSize, true),
|
||||
_uniformBufferDescriptors(getPoolSize(pCreateInfo, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER)),
|
||||
_storageBufferDescriptors(getPoolSize(pCreateInfo, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)),
|
||||
_uniformBufferDynamicDescriptors(getPoolSize(pCreateInfo, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC)),
|
||||
@ -735,6 +737,7 @@ void MVKDescriptorPool::initMetalArgumentBuffer(const VkDescriptorPoolCreateInfo
|
||||
|
||||
if ( !isUsingDescriptorSetMetalArgumentBuffers() ) { return; }
|
||||
|
||||
auto& mtlFeats = getMetalFeatures();
|
||||
@autoreleasepool {
|
||||
NSUInteger mtlBuffCnt = 0;
|
||||
NSUInteger mtlTexCnt = 0;
|
||||
@ -803,14 +806,14 @@ void MVKDescriptorPool::initMetalArgumentBuffer(const VkDescriptorPoolCreateInfo
|
||||
// the alignment of each descriptor set Metal argument buffer offset.
|
||||
NSUInteger overheadPerDescSet = (2 * getMetalArgumentBufferResourceStorageSize(1, 1, 1) -
|
||||
getMetalArgumentBufferResourceStorageSize(2, 2, 2) +
|
||||
_device->_pMetalFeatures->mtlBufferAlignment);
|
||||
mtlFeats.mtlBufferAlignment);
|
||||
|
||||
// Measure the size of an argument buffer that would hold all of the resources
|
||||
// managed in this pool, then add any overhead for all the descriptor sets.
|
||||
NSUInteger metalArgBuffSize = getMetalArgumentBufferResourceStorageSize(mtlBuffCnt, mtlTexCnt, mtlSampCnt);
|
||||
metalArgBuffSize += (overheadPerDescSet * (pCreateInfo->maxSets - 1)); // metalArgBuffSize already includes overhead for one descriptor set
|
||||
if (metalArgBuffSize) {
|
||||
NSUInteger maxMTLBuffSize = _device->_pMetalFeatures->maxMTLBufferSize;
|
||||
NSUInteger maxMTLBuffSize = mtlFeats.maxMTLBufferSize;
|
||||
if (metalArgBuffSize > maxMTLBuffSize) {
|
||||
setConfigurationResult(reportError(VK_ERROR_FRAGMENTATION, "vkCreateDescriptorPool(): The requested descriptor storage of %d MB is larger than the maximum descriptor storage of %d MB per VkDescriptorPool.", (uint32_t)(metalArgBuffSize / MEBI), (uint32_t)(maxMTLBuffSize / MEBI)));
|
||||
metalArgBuffSize = maxMTLBuffSize;
|
||||
@ -902,7 +905,7 @@ void mvkUpdateDescriptorSets(uint32_t writeCount,
|
||||
MVKDescriptorSet* dstSet = (MVKDescriptorSet*)pDescWrite->dstSet;
|
||||
|
||||
const VkWriteDescriptorSetInlineUniformBlockEXT* pInlineUniformBlock = nullptr;
|
||||
if (dstSet->getDevice()->_enabledExtensions.vk_EXT_inline_uniform_block.enabled) {
|
||||
if (dstSet->getEnabledExtensions().vk_EXT_inline_uniform_block.enabled) {
|
||||
for (const auto* next = (VkBaseInStructure*)pDescWrite->pNext; next; next = next->pNext) {
|
||||
switch (next->sType) {
|
||||
case VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_INLINE_UNIFORM_BLOCK_EXT: {
|
||||
|
@ -474,16 +474,10 @@ public:
|
||||
VkDebugReportObjectTypeEXT getVkDebugReportObjectType() override { return VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT; }
|
||||
|
||||
/** Returns a pointer to the Vulkan instance. */
|
||||
MVKInstance* getInstance() override { return _physicalDevice->getInstance(); }
|
||||
|
||||
/** Returns the physical device underlying this logical device. */
|
||||
MVKPhysicalDevice* getPhysicalDevice() { return _physicalDevice; }
|
||||
|
||||
/** Returns info about the pixel format supported by the physical device. */
|
||||
MVKPixelFormats* getPixelFormats() { return &_physicalDevice->_pixelFormats; }
|
||||
MVKInstance* getInstance() override { return _physicalDevice->_mvkInstance; }
|
||||
|
||||
/** Returns the name of this device. */
|
||||
const char* getName() { return _pProperties->deviceName; }
|
||||
const char* getName() { return _physicalDevice->_properties.deviceName; }
|
||||
|
||||
/** Returns the common resource factory for creating command resources. */
|
||||
MVKCommandResourceFactory* getCommandResourceFactory() { return _commandResourceFactory; }
|
||||
@ -698,44 +692,6 @@ public:
|
||||
MVKCommandEncoder* cmdEncoder,
|
||||
MVKCommandUse cmdUse);
|
||||
|
||||
/**
|
||||
* If performance is being tracked, returns a monotonic timestamp value for use performance timestamping.
|
||||
* The returned value corresponds to the number of CPU "ticks" since the app was initialized.
|
||||
*
|
||||
* Call this function twice, then use the functions mvkGetElapsedNanoseconds() or mvkGetElapsedMilliseconds()
|
||||
* to determine the number of nanoseconds or milliseconds between the two calls.
|
||||
*/
|
||||
uint64_t getPerformanceTimestamp() { return _isPerformanceTracking ? mvkGetTimestamp() : 0; }
|
||||
|
||||
/**
|
||||
* If performance is being tracked, adds the performance for an activity with a duration interval
|
||||
* between the start and end times, measured in milliseconds, to the given performance statistics.
|
||||
*
|
||||
* If endTime is zero or not supplied, the current time is used.
|
||||
*/
|
||||
void addPerformanceInterval(MVKPerformanceTracker& perfTracker,
|
||||
uint64_t startTime, uint64_t endTime = 0) {
|
||||
if (_isPerformanceTracking) {
|
||||
updateActivityPerformance(perfTracker, mvkGetElapsedMilliseconds(startTime, endTime));
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* If performance is being tracked, adds the performance for an activity
|
||||
* with a kilobyte count, to the given performance statistics.
|
||||
*/
|
||||
void addPerformanceByteCount(MVKPerformanceTracker& perfTracker, uint64_t byteCount) {
|
||||
if (_isPerformanceTracking) {
|
||||
updateActivityPerformance(perfTracker, double(byteCount / KIBI));
|
||||
}
|
||||
};
|
||||
|
||||
/** Updates the given performance statistic. */
|
||||
void updateActivityPerformance(MVKPerformanceTracker& activity, double currentValue);
|
||||
|
||||
/** Populates the specified statistics structure from the current activity performance statistics. */
|
||||
void getPerformanceStatistics(MVKPerformanceStatistics* pPerf);
|
||||
|
||||
/** Invalidates the memory regions. */
|
||||
VkResult invalidateMappedMemoryRanges(uint32_t memRangeCount, const VkMappedMemoryRange* pMemRanges);
|
||||
|
||||
@ -748,15 +704,15 @@ public:
|
||||
/** Returns the number of views to be rendered in the given multiview pass. */
|
||||
uint32_t getViewCountInMetalPass(uint32_t viewMask, uint32_t passIdx) const;
|
||||
|
||||
/** Populates the specified statistics structure from the current activity performance statistics. */
|
||||
void getPerformanceStatistics(MVKPerformanceStatistics* pPerf);
|
||||
|
||||
/** Log all performance statistics. */
|
||||
void logPerformanceSummary();
|
||||
|
||||
|
||||
#pragma mark Metal
|
||||
|
||||
/** Returns the underlying Metal device. */
|
||||
id<MTLDevice> getMTLDevice() { return _physicalDevice->getMTLDevice(); }
|
||||
|
||||
/** Returns whether this device is using Metal argument buffers. */
|
||||
bool isUsingMetalArgumentBuffers() { return _isUsingMetalArgumentBuffers; };
|
||||
|
||||
@ -837,37 +793,6 @@ public:
|
||||
void getMetalObjects(VkExportMetalObjectsInfoEXT* pMetalObjectsInfo);
|
||||
|
||||
|
||||
#pragma mark Properties directly accessible
|
||||
|
||||
/** The list of Vulkan extensions, indicating whether each has been enabled by the app for this device. */
|
||||
MVKExtensionList _enabledExtensions;
|
||||
|
||||
/** Device features available and enabled. */
|
||||
VkPhysicalDeviceFeatures _enabledFeatures;
|
||||
|
||||
// List of extended device feature enabling structures, as public member variables.
|
||||
#define MVK_DEVICE_FEATURE(structName, enumName, flagCount) \
|
||||
VkPhysicalDevice##structName##Features _enabled##structName##Features;
|
||||
#define MVK_DEVICE_FEATURE_EXTN(structName, enumName, extnSfx, flagCount) \
|
||||
VkPhysicalDevice##structName##Features##extnSfx _enabled##structName##Features;
|
||||
#include "MVKDeviceFeatureStructs.def"
|
||||
|
||||
/** VkPhysicalDeviceVulkan12Features entries that did not originate in a prior extension available and enabled. */
|
||||
MVKPhysicalDeviceVulkan12FeaturesNoExt _enabledVulkan12FeaturesNoExt;
|
||||
|
||||
/** Pointer to the Metal-specific features of the underlying physical device. */
|
||||
const MVKPhysicalDeviceMetalFeatures* _pMetalFeatures;
|
||||
|
||||
/** Pointer to the properties of the underlying physical device. */
|
||||
const VkPhysicalDeviceProperties* _pProperties;
|
||||
|
||||
/** Pointer to the memory properties of the underlying physical device. */
|
||||
const VkPhysicalDeviceMemoryProperties* _pMemoryProperties;
|
||||
|
||||
/** Performance statistics. */
|
||||
MVKPerformanceStatistics _performanceStatistics;
|
||||
|
||||
|
||||
#pragma mark Construction
|
||||
|
||||
/** Constructs an instance on the specified physical device. */
|
||||
@ -905,6 +830,7 @@ protected:
|
||||
template<typename S> void enableFeatures(S* pEnabled, const S* pRequested, const S* pAvailable, uint32_t count);
|
||||
template<typename S> void enableFeatures(S* pRequested, VkBool32* pEnabledBools, const VkBool32* pRequestedBools, const VkBool32* pAvailableBools, uint32_t count);
|
||||
void enableExtensions(const VkDeviceCreateInfo* pCreateInfo);
|
||||
void updateActivityPerformance(MVKPerformanceTracker& activity, double currentValue);
|
||||
const char* getActivityPerformanceDescription(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats);
|
||||
MVKActivityPerformanceValueType getActivityPerformanceValueType(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats);
|
||||
void logActivityInline(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats);
|
||||
@ -915,6 +841,18 @@ protected:
|
||||
VkDescriptorSetVariableDescriptorCountLayoutSupport* pVarDescSetCountSupport);
|
||||
|
||||
MVKPhysicalDevice* _physicalDevice = nullptr;
|
||||
MVKExtensionList _enabledExtensions;
|
||||
VkPhysicalDeviceFeatures _enabledFeatures;
|
||||
MVKPhysicalDeviceVulkan12FeaturesNoExt _enabledVulkan12FeaturesNoExt;
|
||||
|
||||
// List of extended device feature enabling structures, as member variables.
|
||||
#define MVK_DEVICE_FEATURE(structName, enumName, flagCount) \
|
||||
VkPhysicalDevice##structName##Features _enabled##structName##Features;
|
||||
#define MVK_DEVICE_FEATURE_EXTN(structName, enumName, extnSfx, flagCount) \
|
||||
VkPhysicalDevice##structName##Features##extnSfx _enabled##structName##Features;
|
||||
#include "MVKDeviceFeatureStructs.def"
|
||||
|
||||
MVKPerformanceStatistics _performanceStats;
|
||||
MVKCommandResourceFactory* _commandResourceFactory = nullptr;
|
||||
MVKSmallVector<MVKSmallVector<MVKQueue*, kMVKQueueCountPerQueueFamily>, kMVKQueueFamilyCount> _queuesByQueueFamilyIndex;
|
||||
MVKSmallVector<MVKResource*> _resources;
|
||||
@ -956,28 +894,75 @@ public:
|
||||
MVKDevice* getDevice() { return _device; }
|
||||
|
||||
/** Returns the physical device underlying this logical device. */
|
||||
MVKPhysicalDevice* getPhysicalDevice() { return _device->getPhysicalDevice(); }
|
||||
MVKPhysicalDevice* getPhysicalDevice() { return _device->_physicalDevice; }
|
||||
|
||||
/** Returns the underlying Metal device. */
|
||||
id<MTLDevice> getMTLDevice() { return _device->getMTLDevice(); }
|
||||
id<MTLDevice> getMTLDevice() { return _device->_physicalDevice->_mtlDevice; }
|
||||
|
||||
/** Returns whether the GPU is a unified memory device. */
|
||||
bool isUnifiedMemoryGPU() { return getPhysicalDevice()->_hasUnifiedMemory; }
|
||||
bool isUnifiedMemoryGPU() { return _device->_physicalDevice->_hasUnifiedMemory; }
|
||||
|
||||
/** Returns whether the GPU is Apple Silicon. */
|
||||
bool isAppleGPU() { return getPhysicalDevice()->_isAppleGPU; }
|
||||
bool isAppleGPU() { return _device->_physicalDevice->_isAppleGPU; }
|
||||
|
||||
/** Returns info about the pixel format supported by the physical device. */
|
||||
MVKPixelFormats* getPixelFormats() { return _device->getPixelFormats(); }
|
||||
MVKPixelFormats* getPixelFormats() { return &_device->_physicalDevice->_pixelFormats; }
|
||||
|
||||
/** Returns whether this device is using Metal argument buffers. */
|
||||
bool isUsingMetalArgumentBuffers() { return _device->isUsingMetalArgumentBuffers(); };
|
||||
bool isUsingMetalArgumentBuffers() { return _device->_isUsingMetalArgumentBuffers; };
|
||||
|
||||
/** Returns whether this device is using one Metal argument buffer for each descriptor set, on multiple pipeline and pipeline stages. */
|
||||
bool isUsingDescriptorSetMetalArgumentBuffers() { return isUsingMetalArgumentBuffers() && _device->_pMetalFeatures->descriptorSetArgumentBuffers; };
|
||||
bool isUsingDescriptorSetMetalArgumentBuffers() { return _device->_isUsingMetalArgumentBuffers && getMetalFeatures().descriptorSetArgumentBuffers; };
|
||||
|
||||
/** Returns whether this device is using one Metal argument buffer for each descriptor set-pipeline-stage combination. */
|
||||
bool isUsingPipelineStageMetalArgumentBuffers() { return isUsingMetalArgumentBuffers() && !_device->_pMetalFeatures->descriptorSetArgumentBuffers; };
|
||||
bool isUsingPipelineStageMetalArgumentBuffers() { return _device->_isUsingMetalArgumentBuffers && !getMetalFeatures().descriptorSetArgumentBuffers; };
|
||||
|
||||
/** The list of Vulkan extensions, indicating whether each has been enabled by the app for this device. */
|
||||
MVKExtensionList& getEnabledExtensions() { return _device->_enabledExtensions; }
|
||||
|
||||
/** Device features available and enabled. */
|
||||
VkPhysicalDeviceFeatures& getEnabledFeatures() { return _device->_enabledFeatures; }
|
||||
|
||||
// List of extended device feature enabling structures, as getEnabledXXXFeatures() functions.
|
||||
#define MVK_DEVICE_FEATURE(structName, enumName, flagCount) \
|
||||
VkPhysicalDevice##structName##Features& getEnabled##structName##Features() { return _device->_enabled##structName##Features; }
|
||||
#define MVK_DEVICE_FEATURE_EXTN(structName, enumName, extnSfx, flagCount) \
|
||||
VkPhysicalDevice##structName##Features##extnSfx& getEnabled##structName##Features() { return _device->_enabled##structName##Features; }
|
||||
#include "MVKDeviceFeatureStructs.def"
|
||||
|
||||
/** Pointer to the Metal-specific features of the underlying physical device. */
|
||||
const MVKPhysicalDeviceMetalFeatures& getMetalFeatures() { return _device->_physicalDevice->_metalFeatures; }
|
||||
|
||||
/** Pointer to the properties of the underlying physical device. */
|
||||
const VkPhysicalDeviceProperties& getDeviceProperties() { return _device->_physicalDevice->_properties; }
|
||||
|
||||
/** Pointer to the memory properties of the underlying physical device. */
|
||||
const VkPhysicalDeviceMemoryProperties& getDeviceMemoryProperties() { return _device->_physicalDevice->_memoryProperties; }
|
||||
|
||||
/** Performance statistics. */
|
||||
MVKPerformanceStatistics& getPerformanceStats() { return _device->_performanceStats; }
|
||||
|
||||
/**
|
||||
* If performance is being tracked, returns a monotonic timestamp value for use performance timestamping.
|
||||
* The returned value corresponds to the number of CPU "ticks" since the app was initialized.
|
||||
*
|
||||
* Call this function twice, then use the functions mvkGetElapsedNanoseconds() or mvkGetElapsedMilliseconds()
|
||||
* to determine the number of nanoseconds or milliseconds between the two calls.
|
||||
*/
|
||||
uint64_t getPerformanceTimestamp() { return _device->_isPerformanceTracking ? mvkGetTimestamp() : 0; }
|
||||
|
||||
/**
|
||||
* If performance is being tracked, adds the performance for an activity with a duration interval
|
||||
* between the start and end times, measured in milliseconds, to the given performance statistics.
|
||||
*
|
||||
* If endTime is zero or not supplied, the current time is used.
|
||||
* If addAlways is true, the duration is tracked even if performance tracking is disabled.
|
||||
*/
|
||||
void addPerformanceInterval(MVKPerformanceTracker& perfTracker, uint64_t startTime, uint64_t endTime = 0, bool addAlways = false) {
|
||||
if (_device->_isPerformanceTracking || addAlways) {
|
||||
_device->updateActivityPerformance(perfTracker, mvkGetElapsedMilliseconds(startTime, endTime));
|
||||
}
|
||||
};
|
||||
|
||||
/** Constructs an instance for the specified device. */
|
||||
MVKDeviceTrackingMixin(MVKDevice* device) : _device(device) { assert(_device); }
|
||||
|
@ -1353,7 +1353,7 @@ VkResult MVKPhysicalDevice::getSurfaceSupport(uint32_t queueFamilyIndex,
|
||||
// Check whether this is a headless device
|
||||
bool isHeadless = false;
|
||||
#if MVK_MACOS
|
||||
isHeadless = getMTLDevice().isHeadless;
|
||||
isHeadless = _mtlDevice.isHeadless;
|
||||
#endif
|
||||
|
||||
// If this device is headless, the surface must be headless.
|
||||
@ -3743,42 +3743,42 @@ void MVKDevice::getDescriptorVariableDescriptorCountLayoutSupport(const VkDescri
|
||||
if (bindIdx == varBindingIdx) {
|
||||
requestedCount = std::max(pBind->descriptorCount, 1u);
|
||||
} else {
|
||||
auto& mtlFeats = _physicalDevice->_metalFeatures;
|
||||
switch (pBind->descriptorType) {
|
||||
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
|
||||
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
|
||||
mtlBuffCnt += pBind->descriptorCount;
|
||||
maxVarDescCount = _pMetalFeatures->maxPerStageBufferCount - mtlBuffCnt;
|
||||
maxVarDescCount = mtlFeats.maxPerStageBufferCount - mtlBuffCnt;
|
||||
break;
|
||||
case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
|
||||
maxVarDescCount = (uint32_t)min<VkDeviceSize>(_pMetalFeatures->maxMTLBufferSize, numeric_limits<uint32_t>::max());
|
||||
maxVarDescCount = (uint32_t)min<VkDeviceSize>(mtlFeats.maxMTLBufferSize, numeric_limits<uint32_t>::max());
|
||||
break;
|
||||
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
|
||||
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
|
||||
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
|
||||
mtlTexCnt += pBind->descriptorCount;
|
||||
maxVarDescCount = _pMetalFeatures->maxPerStageTextureCount - mtlTexCnt;
|
||||
maxVarDescCount = mtlFeats.maxPerStageTextureCount - mtlTexCnt;
|
||||
break;
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
|
||||
mtlTexCnt += pBind->descriptorCount;
|
||||
|
||||
if (getPhysicalDevice()->useNativeTextureAtomics())
|
||||
if (_physicalDevice->useNativeTextureAtomics()) {
|
||||
mtlBuffCnt += pBind->descriptorCount;
|
||||
|
||||
maxVarDescCount = min(_pMetalFeatures->maxPerStageTextureCount - mtlTexCnt,
|
||||
_pMetalFeatures->maxPerStageBufferCount - mtlBuffCnt);
|
||||
}
|
||||
maxVarDescCount = min(mtlFeats.maxPerStageTextureCount - mtlTexCnt,
|
||||
mtlFeats.maxPerStageBufferCount - mtlBuffCnt);
|
||||
break;
|
||||
case VK_DESCRIPTOR_TYPE_SAMPLER:
|
||||
mtlSampCnt += pBind->descriptorCount;
|
||||
maxVarDescCount = _pMetalFeatures->maxPerStageSamplerCount - mtlSampCnt;
|
||||
maxVarDescCount = mtlFeats.maxPerStageSamplerCount - mtlSampCnt;
|
||||
break;
|
||||
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
|
||||
mtlTexCnt += pBind->descriptorCount;
|
||||
mtlSampCnt += pBind->descriptorCount;
|
||||
maxVarDescCount = min(_pMetalFeatures->maxPerStageTextureCount - mtlTexCnt,
|
||||
_pMetalFeatures->maxPerStageSamplerCount - mtlSampCnt);
|
||||
maxVarDescCount = min(mtlFeats.maxPerStageTextureCount - mtlTexCnt,
|
||||
mtlFeats.maxPerStageSamplerCount - mtlSampCnt);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
@ -3838,7 +3838,7 @@ void MVKDevice::getCalibratedTimestamps(uint32_t timestampCount,
|
||||
uint64_t cpuStart, cpuEnd;
|
||||
|
||||
cpuStart = mvkGetContinuousNanoseconds();
|
||||
[getMTLDevice() sampleTimestamps: &cpuStamp gpuTimestamp: &gpuStamp];
|
||||
[_physicalDevice->_mtlDevice sampleTimestamps: &cpuStamp gpuTimestamp: &gpuStamp];
|
||||
// Sample again to calculate the maximum deviation. Note that the
|
||||
// -[MTLDevice sampleTimestamps:gpuTimestamp:] method guarantees that CPU
|
||||
// timestamps are in nanoseconds. We don't want to call the method again,
|
||||
@ -3887,8 +3887,9 @@ uint32_t MVKDevice::getVulkanMemoryTypeIndex(MTLStorageMode mtlStorageMode) {
|
||||
break;
|
||||
}
|
||||
|
||||
for (uint32_t mtIdx = 0; mtIdx < _pMemoryProperties->memoryTypeCount; mtIdx++) {
|
||||
if (_pMemoryProperties->memoryTypes[mtIdx].propertyFlags == vkMemFlags) { return mtIdx; }
|
||||
auto& memProps = _physicalDevice->_memoryProperties;
|
||||
for (uint32_t mtIdx = 0; mtIdx < memProps.memoryTypeCount; mtIdx++) {
|
||||
if (memProps.memoryTypes[mtIdx].propertyFlags == vkMemFlags) { return mtIdx; }
|
||||
}
|
||||
MVKAssert(false, "Could not find memory type corresponding to VkMemoryPropertyFlags %d", vkMemFlags);
|
||||
return 0;
|
||||
@ -4010,7 +4011,7 @@ MVKSemaphore* MVKDevice::createSemaphore(const VkSemaphoreCreateInfo* pCreateInf
|
||||
}
|
||||
|
||||
if (pTypeCreateInfo && pTypeCreateInfo->semaphoreType == VK_SEMAPHORE_TYPE_TIMELINE) {
|
||||
if (_pMetalFeatures->events) {
|
||||
if (_physicalDevice->_metalFeatures.events) {
|
||||
return new MVKTimelineSemaphoreMTLEvent(this, pCreateInfo, pTypeCreateInfo, pExportInfo, pImportInfo);
|
||||
} else {
|
||||
return new MVKTimelineSemaphoreEmulated(this, pCreateInfo, pTypeCreateInfo, pExportInfo, pImportInfo);
|
||||
@ -4055,7 +4056,7 @@ MVKEvent* MVKDevice::createEvent(const VkEventCreateInfo* pCreateInfo,
|
||||
}
|
||||
}
|
||||
|
||||
if (_pMetalFeatures->events) {
|
||||
if (_physicalDevice->_metalFeatures.events) {
|
||||
return new MVKEventNative(this, pCreateInfo, pExportInfo, pImportInfo);
|
||||
} else {
|
||||
return new MVKEventEmulated(this, pCreateInfo, pExportInfo, pImportInfo);
|
||||
@ -4432,15 +4433,15 @@ void MVKDevice::updateActivityPerformance(MVKPerformanceTracker& activity, doubl
|
||||
activity.average = total / activity.count;
|
||||
|
||||
if (_isPerformanceTracking && getMVKConfig().activityPerformanceLoggingStyle == MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_IMMEDIATE) {
|
||||
logActivityInline(activity, _performanceStatistics);
|
||||
logActivityInline(activity, _performanceStats);
|
||||
}
|
||||
}
|
||||
|
||||
void MVKDevice::logActivityInline(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats) {
|
||||
if (getActivityPerformanceValueType(activity, _performanceStatistics) == MVKActivityPerformanceValueTypeByteCount) {
|
||||
logActivityByteCount(activity, _performanceStatistics, true);
|
||||
if (getActivityPerformanceValueType(activity, _performanceStats) == MVKActivityPerformanceValueTypeByteCount) {
|
||||
logActivityByteCount(activity, _performanceStats, true);
|
||||
} else {
|
||||
logActivityDuration(activity, _performanceStatistics, true);
|
||||
logActivityDuration(activity, _performanceStats, true);
|
||||
}
|
||||
}
|
||||
void MVKDevice::logActivityDuration(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats, bool isInline) {
|
||||
@ -4538,11 +4539,12 @@ MVKActivityPerformanceValueType MVKDevice::getActivityPerformanceValueType(MVKPe
|
||||
}
|
||||
|
||||
void MVKDevice::getPerformanceStatistics(MVKPerformanceStatistics* pPerf) {
|
||||
addPerformanceByteCount(_performanceStatistics.device.gpuMemoryAllocated,
|
||||
_physicalDevice->getCurrentAllocatedSize());
|
||||
|
||||
if (_isPerformanceTracking) {
|
||||
updateActivityPerformance(_performanceStats.device.gpuMemoryAllocated,
|
||||
double(_physicalDevice->getCurrentAllocatedSize() / KIBI));
|
||||
}
|
||||
lock_guard<mutex> lock(_perfLock);
|
||||
if (pPerf) { *pPerf = _performanceStatistics; }
|
||||
if (pPerf) { *pPerf = _performanceStats; }
|
||||
}
|
||||
|
||||
VkResult MVKDevice::invalidateMappedMemoryRanges(uint32_t memRangeCount, const VkMappedMemoryRange* pMemRanges) {
|
||||
@ -4619,13 +4621,13 @@ uint32_t MVKDevice::getViewCountInMetalPass(uint32_t viewMask, uint32_t passIdx)
|
||||
#pragma mark Metal
|
||||
|
||||
uint32_t MVKDevice::getMetalBufferIndexForVertexAttributeBinding(uint32_t binding) {
|
||||
return ((_pMetalFeatures->maxPerStageBufferCount - 1) - binding);
|
||||
return ((_physicalDevice->_metalFeatures.maxPerStageBufferCount - 1) - binding);
|
||||
}
|
||||
|
||||
VkDeviceSize MVKDevice::getVkFormatTexelBufferAlignment(VkFormat format, MVKBaseObject* mvkObj) {
|
||||
VkDeviceSize deviceAlignment = 0;
|
||||
id<MTLDevice> mtlDev = getMTLDevice();
|
||||
MVKPixelFormats* mvkPixFmts = getPixelFormats();
|
||||
id<MTLDevice> mtlDev = _physicalDevice->_mtlDevice;
|
||||
MVKPixelFormats* mvkPixFmts = &_physicalDevice->_pixelFormats;
|
||||
if ([mtlDev respondsToSelector: @selector(minimumLinearTextureAlignmentForPixelFormat:)]) {
|
||||
MTLPixelFormat mtlPixFmt = mvkPixFmts->getMTLPixelFormat(format);
|
||||
if (mvkPixFmts->getChromaSubsamplingPlaneCount(format) >= 2) {
|
||||
@ -4635,7 +4637,7 @@ VkDeviceSize MVKDevice::getVkFormatTexelBufferAlignment(VkFormat format, MVKBase
|
||||
}
|
||||
deviceAlignment = [mtlDev minimumLinearTextureAlignmentForPixelFormat: mtlPixFmt];
|
||||
}
|
||||
return deviceAlignment ? deviceAlignment : _pProperties->limits.minTexelBufferOffsetAlignment;
|
||||
return deviceAlignment ? deviceAlignment : _physicalDevice->_properties.limits.minTexelBufferOffsetAlignment;
|
||||
}
|
||||
|
||||
id<MTLBuffer> MVKDevice::getGlobalVisibilityResultMTLBuffer() {
|
||||
@ -4649,7 +4651,7 @@ uint32_t MVKDevice::expandVisibilityResultMTLBuffer(uint32_t queryCount) {
|
||||
// Ensure we don't overflow the maximum number of queries
|
||||
_globalVisibilityQueryCount += queryCount;
|
||||
VkDeviceSize reqBuffLen = (VkDeviceSize)_globalVisibilityQueryCount * kMVKQuerySlotSizeInBytes;
|
||||
VkDeviceSize maxBuffLen = _pMetalFeatures->maxQueryBufferSize;
|
||||
VkDeviceSize maxBuffLen = _physicalDevice->_metalFeatures.maxQueryBufferSize;
|
||||
VkDeviceSize newBuffLen = min(reqBuffLen, maxBuffLen);
|
||||
_globalVisibilityQueryCount = uint32_t(newBuffLen / kMVKQuerySlotSizeInBytes);
|
||||
|
||||
@ -4657,10 +4659,10 @@ uint32_t MVKDevice::expandVisibilityResultMTLBuffer(uint32_t queryCount) {
|
||||
reportError(VK_ERROR_OUT_OF_DEVICE_MEMORY, "vkCreateQueryPool(): A maximum of %d total queries are available on this device in its current configuration. See the API notes for the MVKConfiguration.supportLargeQueryPools configuration parameter for more info.", _globalVisibilityQueryCount);
|
||||
}
|
||||
|
||||
NSUInteger mtlBuffLen = mvkAlignByteCount(newBuffLen, _pMetalFeatures->mtlBufferAlignment);
|
||||
NSUInteger mtlBuffLen = mvkAlignByteCount(newBuffLen, _physicalDevice->_metalFeatures.mtlBufferAlignment);
|
||||
MTLResourceOptions mtlBuffOpts = MTLResourceStorageModeShared | MTLResourceCPUCacheModeDefaultCache;
|
||||
[_globalVisibilityResultMTLBuffer release];
|
||||
_globalVisibilityResultMTLBuffer = [getMTLDevice() newBufferWithLength: mtlBuffLen options: mtlBuffOpts]; // retained
|
||||
_globalVisibilityResultMTLBuffer = [_physicalDevice->_mtlDevice newBufferWithLength: mtlBuffLen options: mtlBuffOpts]; // retained
|
||||
|
||||
return _globalVisibilityQueryCount - queryCount; // Might be lower than requested if an overflow occurred
|
||||
}
|
||||
@ -4674,7 +4676,7 @@ id<MTLSamplerState> MVKDevice::getDefaultMTLSamplerState() {
|
||||
@autoreleasepool {
|
||||
MTLSamplerDescriptor* mtlSampDesc = [[MTLSamplerDescriptor new] autorelease];
|
||||
mtlSampDesc.supportArgumentBuffers = isUsingMetalArgumentBuffers();
|
||||
_defaultMTLSamplerState = [getMTLDevice() newSamplerStateWithDescriptor: mtlSampDesc]; // retained
|
||||
_defaultMTLSamplerState = [_physicalDevice->_mtlDevice newSamplerStateWithDescriptor: mtlSampDesc]; // retained
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -4688,7 +4690,7 @@ id<MTLBuffer> MVKDevice::getDummyBlitMTLBuffer() {
|
||||
lock_guard<mutex> lock(_rezLock);
|
||||
if ( !_dummyBlitMTLBuffer ) {
|
||||
@autoreleasepool {
|
||||
_dummyBlitMTLBuffer = [getMTLDevice() newBufferWithLength: 1 options: MTLResourceStorageModePrivate];
|
||||
_dummyBlitMTLBuffer = [_physicalDevice->_mtlDevice newBufferWithLength: 1 options: MTLResourceStorageModePrivate];
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -4697,7 +4699,7 @@ id<MTLBuffer> MVKDevice::getDummyBlitMTLBuffer() {
|
||||
|
||||
MTLCompileOptions* MVKDevice::getMTLCompileOptions(bool requestFastMath, bool preserveInvariance) {
|
||||
MTLCompileOptions* mtlCompOpt = [MTLCompileOptions new];
|
||||
mtlCompOpt.languageVersion = _pMetalFeatures->mslVersionEnum;
|
||||
mtlCompOpt.languageVersion = _physicalDevice->_metalFeatures.mslVersionEnum;
|
||||
mtlCompOpt.fastMathEnabled = (getMVKConfig().fastMathEnabled == MVK_CONFIG_FAST_MATH_ALWAYS ||
|
||||
(getMVKConfig().fastMathEnabled == MVK_CONFIG_FAST_MATH_ON_DEMAND && requestFastMath));
|
||||
#if MVK_XCODE_12
|
||||
@ -4781,7 +4783,7 @@ void MVKDevice::getMetalObjects(VkExportMetalObjectsInfoEXT* pMetalObjectsInfo)
|
||||
switch (next->sType) {
|
||||
case VK_STRUCTURE_TYPE_EXPORT_METAL_DEVICE_INFO_EXT: {
|
||||
auto* pDvcInfo = (VkExportMetalDeviceInfoEXT*)next;
|
||||
pDvcInfo->mtlDevice = getMTLDevice();
|
||||
pDvcInfo->mtlDevice = _physicalDevice->_mtlDevice;
|
||||
break;
|
||||
}
|
||||
case VK_STRUCTURE_TYPE_EXPORT_METAL_COMMAND_QUEUE_INFO_EXT: {
|
||||
@ -4859,8 +4861,9 @@ MVKDevice::MVKDevice(MVKPhysicalDevice* physicalDevice, const VkDeviceCreateInfo
|
||||
// In a multi-GPU system, if we are using the high-power GPU and want the window system
|
||||
// to also use that GPU to avoid copying content between GPUs, force the window system
|
||||
// to use the high-power GPU by calling the MTLCreateSystemDefaultDevice() function.
|
||||
id<MTLDevice> mtlDev = _physicalDevice->_mtlDevice;
|
||||
if (_enabledExtensions.vk_KHR_swapchain.enabled && getMVKConfig().switchSystemGPU &&
|
||||
!(_physicalDevice->_mtlDevice.isLowPower || _physicalDevice->_mtlDevice.isHeadless) ) {
|
||||
!(mtlDev.isLowPower || mtlDev.isHeadless) ) {
|
||||
MTLCreateSystemDefaultDevice();
|
||||
}
|
||||
#endif
|
||||
@ -4875,18 +4878,18 @@ MVKDevice::MVKDevice(MVKPhysicalDevice* physicalDevice, const VkDeviceCreateInfo
|
||||
|
||||
_commandResourceFactory = new MVKCommandResourceFactory(this);
|
||||
|
||||
startAutoGPUCapture(MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_DEVICE, getMTLDevice());
|
||||
startAutoGPUCapture(MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_DEVICE, _physicalDevice->_mtlDevice);
|
||||
|
||||
MVKLogInfo("Created VkDevice to run on GPU %s with the following %d Vulkan extensions enabled:%s",
|
||||
getName(), _enabledExtensions.getEnabledCount(), _enabledExtensions.enabledNamesString("\n\t\t", true).c_str());
|
||||
}
|
||||
|
||||
// Perf stats that last the duration of the app process.
|
||||
static MVKPerformanceStatistics _processPerformanceStatistics = {};
|
||||
static MVKPerformanceStatistics _processPerformanceStats = {};
|
||||
|
||||
void MVKDevice::initPerformanceTracking() {
|
||||
_isPerformanceTracking = getMVKConfig().performanceTracking;
|
||||
_performanceStatistics = _processPerformanceStatistics;
|
||||
_performanceStats = _processPerformanceStats;
|
||||
}
|
||||
|
||||
void MVKDevice::initPhysicalDevice(MVKPhysicalDevice* physicalDevice, const VkDeviceCreateInfo* pCreateInfo) {
|
||||
@ -4910,10 +4913,6 @@ void MVKDevice::initPhysicalDevice(MVKPhysicalDevice* physicalDevice, const VkDe
|
||||
else
|
||||
_physicalDevice = physicalDevice;
|
||||
|
||||
_pMetalFeatures = _physicalDevice->getMetalFeatures();
|
||||
_pProperties = &_physicalDevice->_properties;
|
||||
_pMemoryProperties = &_physicalDevice->_memoryProperties;
|
||||
|
||||
switch (_physicalDevice->_vkSemaphoreStyle) {
|
||||
case MVKSemaphoreStyleUseMTLEvent:
|
||||
MVKLogInfo("Vulkan semaphores using MTLEvent.");
|
||||
@ -5191,7 +5190,7 @@ MVKDevice::~MVKDevice() {
|
||||
} else if (perfLogStyle == MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_DEVICE_LIFETIME_ACCUMULATE) {
|
||||
MVKLogInfo("Process activity performance summary:");
|
||||
logPerformanceSummary();
|
||||
_processPerformanceStatistics = _performanceStatistics;
|
||||
_processPerformanceStats = _performanceStats;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -139,7 +139,7 @@ VkResult MVKDeviceMemory::addBuffer(MVKBuffer* mvkBuff) {
|
||||
}
|
||||
|
||||
if (!ensureMTLBuffer() ) {
|
||||
return reportError(VK_ERROR_OUT_OF_DEVICE_MEMORY, "Could not bind a VkBuffer to a VkDeviceMemory of size %llu bytes. The maximum memory-aligned size of a VkDeviceMemory that supports a VkBuffer is %llu bytes.", _allocationSize, _device->_pMetalFeatures->maxMTLBufferSize);
|
||||
return reportError(VK_ERROR_OUT_OF_DEVICE_MEMORY, "Could not bind a VkBuffer to a VkDeviceMemory of size %llu bytes. The maximum memory-aligned size of a VkDeviceMemory that supports a VkBuffer is %llu bytes.", _allocationSize, getMetalFeatures().maxMTLBufferSize);
|
||||
}
|
||||
|
||||
// In the dedicated case, we already saved the buffer we're going to use.
|
||||
@ -183,7 +183,7 @@ bool MVKDeviceMemory::ensureMTLHeap() {
|
||||
if (_isHostMemImported) { return true; }
|
||||
|
||||
// Don't bother if we don't have placement heaps.
|
||||
if (!getDevice()->_pMetalFeatures->placementHeaps) { return true; }
|
||||
if (!getMetalFeatures().placementHeaps) { return true; }
|
||||
|
||||
// Can't create MTLHeaps of zero size.
|
||||
if (_allocationSize == 0) { return true; }
|
||||
@ -206,7 +206,7 @@ bool MVKDeviceMemory::ensureMTLHeap() {
|
||||
// to untracked, since Vulkan uses explicit barriers anyway.
|
||||
heapDesc.hazardTrackingMode = MTLHazardTrackingModeTracked;
|
||||
heapDesc.size = _allocationSize;
|
||||
_mtlHeap = [_device->getMTLDevice() newHeapWithDescriptor: heapDesc]; // retained
|
||||
_mtlHeap = [getMTLDevice() newHeapWithDescriptor: heapDesc]; // retained
|
||||
[heapDesc release];
|
||||
if (!_mtlHeap) { return false; }
|
||||
|
||||
@ -221,9 +221,9 @@ bool MVKDeviceMemory::ensureMTLBuffer() {
|
||||
|
||||
if (_mtlBuffer) { return true; }
|
||||
|
||||
NSUInteger memLen = mvkAlignByteCount(_allocationSize, _device->_pMetalFeatures->mtlBufferAlignment);
|
||||
NSUInteger memLen = mvkAlignByteCount(_allocationSize, getMetalFeatures().mtlBufferAlignment);
|
||||
|
||||
if (memLen > _device->_pMetalFeatures->maxMTLBufferSize) { return false; }
|
||||
if (memLen > getMetalFeatures().maxMTLBufferSize) { return false; }
|
||||
|
||||
// If host memory was already allocated, it is copied into the new MTLBuffer, and then released.
|
||||
if (_mtlHeap) {
|
||||
@ -258,7 +258,7 @@ bool MVKDeviceMemory::ensureHostMemory() {
|
||||
if (_pMemory) { return true; }
|
||||
|
||||
if ( !_pHostMemory) {
|
||||
size_t memAlign = _device->_pMetalFeatures->mtlBufferAlignment;
|
||||
size_t memAlign = getMetalFeatures().mtlBufferAlignment;
|
||||
NSUInteger memLen = mvkAlignByteCount(_allocationSize, memAlign);
|
||||
int err = posix_memalign(&_pHostMemory, memAlign, memLen);
|
||||
if (err) { return false; }
|
||||
@ -284,7 +284,7 @@ MVKDeviceMemory::MVKDeviceMemory(MVKDevice* device,
|
||||
const VkAllocationCallbacks* pAllocator) : MVKVulkanAPIDeviceObject(device) {
|
||||
// Set Metal memory parameters
|
||||
_vkMemAllocFlags = 0;
|
||||
_vkMemPropFlags = _device->_pMemoryProperties->memoryTypes[pAllocateInfo->memoryTypeIndex].propertyFlags;
|
||||
_vkMemPropFlags = getDeviceMemoryProperties().memoryTypes[pAllocateInfo->memoryTypeIndex].propertyFlags;
|
||||
_mtlStorageMode = getPhysicalDevice()->getMTLStorageModeFromVkMemoryPropertyFlags(_vkMemPropFlags);
|
||||
_mtlCPUCacheMode = mvkMTLCPUCacheModeFromVkMemoryPropertyFlags(_vkMemPropFlags);
|
||||
|
||||
@ -359,13 +359,13 @@ MVKDeviceMemory::MVKDeviceMemory(MVKDevice* device,
|
||||
if (!((MVKImage*)dedicatedImage)->_isLinear) {
|
||||
setConfigurationResult(reportError(VK_ERROR_OUT_OF_DEVICE_MEMORY, "vkAllocateMemory(): Host-coherent VkDeviceMemory objects cannot be associated with optimal-tiling images."));
|
||||
} else {
|
||||
if (!_device->_pMetalFeatures->sharedLinearTextures) {
|
||||
if (!getMetalFeatures().sharedLinearTextures) {
|
||||
// Need to use the managed mode for images.
|
||||
_mtlStorageMode = MTLStorageModeManaged;
|
||||
}
|
||||
// Nonetheless, we need a buffer to be able to map the memory at will.
|
||||
if (!ensureMTLBuffer() ) {
|
||||
setConfigurationResult(reportError(VK_ERROR_OUT_OF_DEVICE_MEMORY, "vkAllocateMemory(): Could not allocate a host-coherent VkDeviceMemory of size %llu bytes. The maximum memory-aligned size of a host-coherent VkDeviceMemory is %llu bytes.", _allocationSize, _device->_pMetalFeatures->maxMTLBufferSize));
|
||||
setConfigurationResult(reportError(VK_ERROR_OUT_OF_DEVICE_MEMORY, "vkAllocateMemory(): Could not allocate a host-coherent VkDeviceMemory of size %llu bytes. The maximum memory-aligned size of a host-coherent VkDeviceMemory is %llu bytes.", _allocationSize, getMetalFeatures().maxMTLBufferSize));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -392,7 +392,7 @@ MVKDeviceMemory::MVKDeviceMemory(MVKDevice* device,
|
||||
// If memory was imported, a MTLBuffer must be created on it.
|
||||
// Or if a MTLBuffer will be exported, ensure it exists.
|
||||
if ((isMemoryHostCoherent() || _isHostMemImported || willExportMTLBuffer) && !ensureMTLBuffer() ) {
|
||||
setConfigurationResult(reportError(VK_ERROR_OUT_OF_DEVICE_MEMORY, "vkAllocateMemory(): Could not allocate a host-coherent or exportable VkDeviceMemory of size %llu bytes. The maximum memory-aligned size of a host-coherent VkDeviceMemory is %llu bytes.", _allocationSize, _device->_pMetalFeatures->maxMTLBufferSize));
|
||||
setConfigurationResult(reportError(VK_ERROR_OUT_OF_DEVICE_MEMORY, "vkAllocateMemory(): Could not allocate a host-coherent or exportable VkDeviceMemory of size %llu bytes. The maximum memory-aligned size of a host-coherent VkDeviceMemory is %llu bytes.", _allocationSize, getMetalFeatures().maxMTLBufferSize));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -37,7 +37,7 @@ id<MTLTexture> MVKFramebuffer::getDummyAttachmentMTLTexture(MVKRenderSubpass* su
|
||||
MTLTextureDescriptor* mtlTexDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat: MTLPixelFormatR8Unorm width: fbExtent.width height: fbExtent.height mipmapped: NO];
|
||||
if (subpass->isMultiview()) {
|
||||
#if MVK_MACOS_OR_IOS
|
||||
if (sampleCount > 1 && getDevice()->_pMetalFeatures->multisampleLayeredRendering) {
|
||||
if (sampleCount > 1 && getMetalFeatures().multisampleLayeredRendering) {
|
||||
mtlTexDesc.textureType = MTLTextureType2DMultisampleArray;
|
||||
mtlTexDesc.sampleCount = sampleCount;
|
||||
} else {
|
||||
@ -49,7 +49,7 @@ id<MTLTexture> MVKFramebuffer::getDummyAttachmentMTLTexture(MVKRenderSubpass* su
|
||||
mtlTexDesc.arrayLength = subpass->getViewCountInMetalPass(passIdx);
|
||||
} else if (fbLayerCount > 1) {
|
||||
#if MVK_MACOS
|
||||
if (sampleCount > 1 && getDevice()->_pMetalFeatures->multisampleLayeredRendering) {
|
||||
if (sampleCount > 1 && getMetalFeatures().multisampleLayeredRendering) {
|
||||
mtlTexDesc.textureType = MTLTextureType2DMultisampleArray;
|
||||
mtlTexDesc.sampleCount = sampleCount;
|
||||
} else {
|
||||
|
@ -146,7 +146,7 @@ void MVKImagePlane::initSubresources(const VkImageCreateInfo* pCreateInfo) {
|
||||
|
||||
VkDeviceSize offset = 0;
|
||||
if (_planeIndex > 0 && _image->getMemoryBindingCount() == 1) {
|
||||
if (!_image->_isLinear && !_image->_isLinearForAtomics && _image->getDevice()->_pMetalFeatures->placementHeaps) {
|
||||
if (!_image->_isLinear && !_image->_isLinearForAtomics && _image->getMetalFeatures().placementHeaps) {
|
||||
// For textures allocated directly on the heap, we need to obey the size and alignment
|
||||
// requirements reported by the device.
|
||||
MTLTextureDescriptor* mtlTexDesc = _image->_planes[_planeIndex-1]->newMTLTextureDescriptor(); // temp retain
|
||||
@ -387,16 +387,17 @@ VkResult MVKImageMemoryBinding::getMemoryRequirements(VkMemoryRequirements* pMem
|
||||
}
|
||||
|
||||
VkResult MVKImageMemoryBinding::getMemoryRequirements(const void*, VkMemoryRequirements2* pMemoryRequirements) {
|
||||
auto& mtlFeats = getMetalFeatures();
|
||||
pMemoryRequirements->sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2;
|
||||
for (auto* next = (VkBaseOutStructure*)pMemoryRequirements->pNext; next; next = next->pNext) {
|
||||
switch (next->sType) {
|
||||
case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
|
||||
auto* dedicatedReqs = (VkMemoryDedicatedRequirements*)next;
|
||||
bool writable = mvkIsAnyFlagEnabled(_image->getCombinedUsage(), VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT);
|
||||
bool canUseTexelBuffer = _device->_pMetalFeatures->texelBuffers && _image->_isLinear && !_image->getIsCompressed();
|
||||
bool canUseTexelBuffer = mtlFeats.texelBuffers && _image->_isLinear && !_image->getIsCompressed();
|
||||
dedicatedReqs->requiresDedicatedAllocation = _requiresDedicatedMemoryAllocation;
|
||||
dedicatedReqs->prefersDedicatedAllocation = (dedicatedReqs->requiresDedicatedAllocation ||
|
||||
(!canUseTexelBuffer && (writable || !_device->_pMetalFeatures->placementHeaps)));
|
||||
(!canUseTexelBuffer && (writable || !mtlFeats.placementHeaps)));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
@ -411,13 +412,14 @@ VkResult MVKImageMemoryBinding::bindDeviceMemory(MVKDeviceMemory* mvkMem, VkDevi
|
||||
if (_deviceMemory) { _deviceMemory->removeImageMemoryBinding(this); }
|
||||
MVKResource::bindDeviceMemory(mvkMem, memOffset);
|
||||
|
||||
bool usesTexelBuffer = _device->_pMetalFeatures->texelBuffers && _deviceMemory; // Texel buffers available
|
||||
usesTexelBuffer = usesTexelBuffer && (isMemoryHostAccessible() || _device->_pMetalFeatures->placementHeaps) && _image->_isLinear && !_image->getIsCompressed(); // Applicable memory layout
|
||||
auto& mtlFeats = getMetalFeatures();
|
||||
bool usesTexelBuffer = mtlFeats.texelBuffers && _deviceMemory; // Texel buffers available
|
||||
usesTexelBuffer = usesTexelBuffer && (isMemoryHostAccessible() || mtlFeats.placementHeaps) && _image->_isLinear && !_image->getIsCompressed(); // Applicable memory layout
|
||||
|
||||
// macOS before 10.15.5 cannot use shared memory for texel buffers.
|
||||
usesTexelBuffer = usesTexelBuffer && (_device->_pMetalFeatures->sharedLinearTextures || !isMemoryHostCoherent());
|
||||
usesTexelBuffer = usesTexelBuffer && (mtlFeats.sharedLinearTextures || !isMemoryHostCoherent());
|
||||
|
||||
if (_image->_isLinearForAtomics || (usesTexelBuffer && _device->_pMetalFeatures->placementHeaps)) {
|
||||
if (_image->_isLinearForAtomics || (usesTexelBuffer && mtlFeats.placementHeaps)) {
|
||||
if (usesTexelBuffer && _deviceMemory->ensureMTLBuffer()) {
|
||||
_mtlTexelBuffer = _deviceMemory->_mtlBuffer;
|
||||
_mtlTexelBufferOffset = getDeviceMemoryOffset();
|
||||
@ -471,7 +473,7 @@ bool MVKImageMemoryBinding::needsHostReadSync(MVKPipelineBarrier& barrier) {
|
||||
#if MVK_MACOS
|
||||
return ( !isUnifiedMemoryGPU() && (barrier.newLayout == VK_IMAGE_LAYOUT_GENERAL) &&
|
||||
mvkIsAnyFlagEnabled(barrier.dstAccessMask, (VK_ACCESS_HOST_READ_BIT | VK_ACCESS_MEMORY_READ_BIT)) &&
|
||||
isMemoryHostAccessible() && (!_device->_pMetalFeatures->sharedLinearTextures || !isMemoryHostCoherent()));
|
||||
isMemoryHostAccessible() && (!getMetalFeatures().sharedLinearTextures || !isMemoryHostCoherent()));
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
@ -953,7 +955,7 @@ VkResult MVKImage::setMTLTexture(uint8_t planeIndex, id<MTLTexture> mtlTexture)
|
||||
_usage = getPixelFormats()->getVkImageUsageFlags(mtlTexture.usage, mtlTexture.pixelFormat);
|
||||
_stencilUsage = _usage;
|
||||
|
||||
if (_device->_pMetalFeatures->ioSurfaces) {
|
||||
if (getMetalFeatures().ioSurfaces) {
|
||||
_ioSurface = mtlTexture.iosurface;
|
||||
if (_ioSurface) { CFRetain(_ioSurface); }
|
||||
}
|
||||
@ -976,7 +978,7 @@ VkResult MVKImage::useIOSurface(IOSurfaceRef ioSurface) {
|
||||
// Don't recreate existing. But special case of incoming nil if already nil means create a new IOSurface.
|
||||
if (ioSurface && _ioSurface == ioSurface) { return VK_SUCCESS; }
|
||||
|
||||
if (!_device->_pMetalFeatures->ioSurfaces) { return reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkUseIOSurfaceMVK() : IOSurfaces are not supported on this platform."); }
|
||||
if (!getMetalFeatures().ioSurfaces) { return reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkUseIOSurfaceMVK() : IOSurfaces are not supported on this platform."); }
|
||||
|
||||
#if MVK_SUPPORT_IOSURFACE_BOOL
|
||||
|
||||
@ -1053,7 +1055,7 @@ MTLStorageMode MVKImage::getMTLStorageMode() {
|
||||
#if MVK_MACOS
|
||||
// For macOS prior to 10.15.5, textures cannot use Shared storage mode, so change to Managed storage mode.
|
||||
// All Apple GPUs support shared linear textures, so this only applies to other GPUs.
|
||||
if (stgMode == MTLStorageModeShared && !_device->_pMetalFeatures->sharedLinearTextures) {
|
||||
if (stgMode == MTLStorageModeShared && !getMetalFeatures().sharedLinearTextures) {
|
||||
stgMode = MTLStorageModeManaged;
|
||||
}
|
||||
#endif
|
||||
@ -1144,6 +1146,7 @@ MVKImage::MVKImage(MVKDevice* device, const VkImageCreateInfo* pCreateInfo) : MV
|
||||
_mipLevels = validateMipLevels(pCreateInfo, isAttachment);
|
||||
_isLinear = validateLinear(pCreateInfo, isAttachment);
|
||||
|
||||
auto& mtlFeats = getMetalFeatures();
|
||||
MVKPixelFormats* pixFmts = getPixelFormats();
|
||||
_vkFormat = pCreateInfo->format;
|
||||
_isAliasable = mvkIsAnyFlagEnabled(pCreateInfo->flags, VK_IMAGE_CREATE_ALIAS_BIT);
|
||||
@ -1159,10 +1162,10 @@ MVKImage::MVKImage(MVKDevice* device, const VkImageCreateInfo* pCreateInfo) : MV
|
||||
|
||||
_isLinearForAtomics = _shouldSupportAtomics && !getPhysicalDevice()->useNativeTextureAtomics() && _arrayLayers == 1 && getImageType() == VK_IMAGE_TYPE_2D;
|
||||
|
||||
_is3DCompressed = (getImageType() == VK_IMAGE_TYPE_3D) && (pixFmts->getFormatType(pCreateInfo->format) == kMVKFormatCompressed) && !_device->_pMetalFeatures->native3DCompressedTextures;
|
||||
_is3DCompressed = (getImageType() == VK_IMAGE_TYPE_3D) && (pixFmts->getFormatType(pCreateInfo->format) == kMVKFormatCompressed) && !mtlFeats.native3DCompressedTextures;
|
||||
_isDepthStencilAttachment = (mvkAreAllFlagsEnabled(pCreateInfo->usage, VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) ||
|
||||
mvkAreAllFlagsEnabled(pixFmts->getVkFormatProperties3(pCreateInfo->format).optimalTilingFeatures, VK_FORMAT_FEATURE_2_DEPTH_STENCIL_ATTACHMENT_BIT));
|
||||
_canSupportMTLTextureView = !_isDepthStencilAttachment || _device->_pMetalFeatures->stencilViews;
|
||||
_canSupportMTLTextureView = !_isDepthStencilAttachment || mtlFeats.stencilViews;
|
||||
_rowByteAlignment = _isLinear || _isLinearForAtomics ? _device->getVkFormatTexelBufferAlignment(pCreateInfo->format, this) : mvkEnsurePowerOfTwo(pixFmts->getBytesPerBlock(pCreateInfo->format));
|
||||
|
||||
VkExtent2D blockTexelSizeOfPlane[3];
|
||||
@ -1188,21 +1191,21 @@ MVKImage::MVKImage(MVKDevice* device, const VkImageCreateInfo* pCreateInfo) : MV
|
||||
}
|
||||
_planes[planeIndex]->initSubresources(pCreateInfo);
|
||||
MVKImageMemoryBinding* memoryBinding = _planes[planeIndex]->getMemoryBinding();
|
||||
if (!_isLinear && !_isLinearForAtomics && _device->_pMetalFeatures->placementHeaps) {
|
||||
if (!_isLinear && !_isLinearForAtomics && mtlFeats.placementHeaps) {
|
||||
MTLTextureDescriptor* mtlTexDesc = _planes[planeIndex]->newMTLTextureDescriptor(); // temp retain
|
||||
MTLSizeAndAlign sizeAndAlign = [_device->getMTLDevice() heapTextureSizeAndAlignWithDescriptor: mtlTexDesc];
|
||||
MTLSizeAndAlign sizeAndAlign = [getMTLDevice() heapTextureSizeAndAlignWithDescriptor: mtlTexDesc];
|
||||
[mtlTexDesc release];
|
||||
// Textures allocated on heaps must be aligned to the alignment reported here,
|
||||
// so make sure there's enough space to hold all the planes after alignment.
|
||||
memoryBinding->_byteCount = mvkAlignByteRef(memoryBinding->_byteCount, sizeAndAlign.align) + sizeAndAlign.size;
|
||||
memoryBinding->_byteAlignment = std::max(memoryBinding->_byteAlignment, (VkDeviceSize)sizeAndAlign.align);
|
||||
} else if (_isLinearForAtomics && _device->_pMetalFeatures->placementHeaps) {
|
||||
} else if (_isLinearForAtomics && mtlFeats.placementHeaps) {
|
||||
NSUInteger bufferLength = 0;
|
||||
for (uint32_t mipLvl = 0; mipLvl < _mipLevels; mipLvl++) {
|
||||
VkExtent3D mipExtent = getExtent3D(planeIndex, mipLvl);
|
||||
bufferLength += getBytesPerLayer(planeIndex, mipLvl) * mipExtent.depth * _arrayLayers;
|
||||
}
|
||||
MTLSizeAndAlign sizeAndAlign = [_device->getMTLDevice() heapBufferSizeAndAlignWithLength: bufferLength options: MTLResourceStorageModePrivate];
|
||||
MTLSizeAndAlign sizeAndAlign = [getMTLDevice() heapBufferSizeAndAlignWithLength: bufferLength options: MTLResourceStorageModePrivate];
|
||||
memoryBinding->_byteCount += sizeAndAlign.size;
|
||||
memoryBinding->_byteAlignment = std::max(std::max(memoryBinding->_byteAlignment, _rowByteAlignment), (VkDeviceSize)sizeAndAlign.align);
|
||||
} else {
|
||||
@ -1269,7 +1272,7 @@ VkSampleCountFlagBits MVKImage::validateSamples(const VkImageCreateInfo* pCreate
|
||||
validSamples = VK_SAMPLE_COUNT_1_BIT;
|
||||
}
|
||||
|
||||
if (pCreateInfo->arrayLayers > 1 && !_device->_pMetalFeatures->multisampleArrayTextures ) {
|
||||
if (pCreateInfo->arrayLayers > 1 && !getMetalFeatures().multisampleArrayTextures ) {
|
||||
setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCreateImage() : This device does not support multisampled array textures. Setting sample count to 1."));
|
||||
validSamples = VK_SAMPLE_COUNT_1_BIT;
|
||||
}
|
||||
@ -1513,9 +1516,9 @@ id<CAMetalDrawable> MVKPresentableSwapchainImage::getCAMetalDrawable() {
|
||||
bool hasInvalidFormat = false;
|
||||
uint32_t attemptCnt = _swapchain->getImageCount(); // Attempt a resonable number of times
|
||||
for (uint32_t attemptIdx = 0; !_mtlDrawable && attemptIdx < attemptCnt; attemptIdx++) {
|
||||
uint64_t startTime = _device->getPerformanceTimestamp();
|
||||
uint64_t startTime = getPerformanceTimestamp();
|
||||
_mtlDrawable = [_swapchain->getCAMetalLayer().nextDrawable retain]; // retained
|
||||
_device->addPerformanceInterval(_device->_performanceStatistics.queue.retrieveCAMetalDrawable, startTime);
|
||||
addPerformanceInterval(getPerformanceStats().queue.retrieveCAMetalDrawable, startTime);
|
||||
hasInvalidFormat = _mtlDrawable && !_mtlDrawable.texture.pixelFormat;
|
||||
if (hasInvalidFormat) { releaseMetalDrawable(); }
|
||||
}
|
||||
@ -1629,7 +1632,7 @@ void MVKPresentableSwapchainImage::addPresentedHandler(id<CAMetalDrawable> mtlDr
|
||||
void MVKPresentableSwapchainImage::beginPresentation(const MVKImagePresentInfo& presentInfo) {
|
||||
retain();
|
||||
_swapchain->beginPresentation(presentInfo);
|
||||
_presentationStartTime = getDevice()->getPerformanceTimestamp();
|
||||
_presentationStartTime = getPerformanceTimestamp();
|
||||
}
|
||||
|
||||
void MVKPresentableSwapchainImage::endPresentation(const MVKImagePresentInfo& presentInfo,
|
||||
@ -1646,7 +1649,7 @@ void MVKPresentableSwapchainImage::endPresentation(const MVKImagePresentInfo& pr
|
||||
// If I have become detached from the swapchain, it means the swapchain, and possibly the
|
||||
// VkDevice, have been destroyed by the time of this callback, so do not reference them.
|
||||
lock_guard<mutex> lock(_detachmentLock);
|
||||
if (_device) { _device->addPerformanceInterval(_device->_performanceStatistics.queue.presentSwapchains, _presentationStartTime); }
|
||||
if (_device) { addPerformanceInterval(getPerformanceStats().queue.presentSwapchains, _presentationStartTime); }
|
||||
if (_swapchain) { _swapchain->endPresentation(presentInfo, actualPresentTime); }
|
||||
}
|
||||
|
||||
@ -2124,7 +2127,7 @@ VkResult MVKImageViewPlane::initSwizzledMTLPixelFormat(const VkImageViewCreateIn
|
||||
|
||||
// Enable either native or shader swizzling, depending on what is available, preferring native, and return whether successful.
|
||||
bool MVKImageViewPlane::enableSwizzling() {
|
||||
_useNativeSwizzle = _device->_pMetalFeatures->nativeTextureSwizzle;
|
||||
_useNativeSwizzle = getMetalFeatures().nativeTextureSwizzle;
|
||||
_useShaderSwizzle = !_useNativeSwizzle && getMVKConfig().fullImageViewSwizzle;
|
||||
return _useNativeSwizzle || _useShaderSwizzle;
|
||||
}
|
||||
@ -2261,15 +2264,16 @@ MVKImageView::MVKImageView(MVKDevice* device, const VkImageViewCreateInfo* pCrea
|
||||
_subresourceRange.layerCount = _image->getLayerCount() - _subresourceRange.baseArrayLayer;
|
||||
}
|
||||
|
||||
auto& mtlFeats = getMetalFeatures();
|
||||
bool isAttachment = mvkIsAnyFlagEnabled(_usage, (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
|
||||
VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT |
|
||||
VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT |
|
||||
VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT));
|
||||
if (isAttachment && _subresourceRange.layerCount > 1) {
|
||||
if ( !_device->_pMetalFeatures->layeredRendering ) {
|
||||
if ( !mtlFeats.layeredRendering ) {
|
||||
setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCreateImageView() : This device does not support rendering to array (layered) attachments."));
|
||||
}
|
||||
if (_image->getSampleCount() != VK_SAMPLE_COUNT_1_BIT && !_device->_pMetalFeatures->multisampleLayeredRendering ) {
|
||||
if (_image->getSampleCount() != VK_SAMPLE_COUNT_1_BIT && !mtlFeats.multisampleLayeredRendering ) {
|
||||
setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCreateImageView() : This device does not support rendering to multisampled array (layered) attachments."));
|
||||
}
|
||||
}
|
||||
@ -2414,8 +2418,9 @@ bool MVKSampler::getConstexprSampler(mvk::MSLResourceBinding& resourceBinding) {
|
||||
|
||||
// Ensure available Metal features.
|
||||
MTLSamplerAddressMode MVKSampler::getMTLSamplerAddressMode(VkSamplerAddressMode vkMode) {
|
||||
if ((vkMode == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER && !_device->_pMetalFeatures->samplerClampToBorder) ||
|
||||
(vkMode == VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE && !_device->_pMetalFeatures->samplerMirrorClampToEdge)) {
|
||||
auto& mtlFeats = getMetalFeatures();
|
||||
if ((vkMode == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER && !mtlFeats.samplerClampToBorder) ||
|
||||
(vkMode == VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE && !mtlFeats.samplerMirrorClampToEdge)) {
|
||||
return MTLSamplerAddressModeClampToZero;
|
||||
}
|
||||
return mvkMTLSamplerAddressModeFromVkSamplerAddressMode(vkMode);
|
||||
@ -2448,7 +2453,7 @@ MTLSamplerDescriptor* MVKSampler::newMTLSamplerDescriptor(const VkSamplerCreateI
|
||||
mtlSampDesc.lodMinClamp = pCreateInfo->minLod;
|
||||
mtlSampDesc.lodMaxClamp = pCreateInfo->maxLod;
|
||||
mtlSampDesc.maxAnisotropy = (pCreateInfo->anisotropyEnable
|
||||
? mvkClamp(pCreateInfo->maxAnisotropy, 1.0f, _device->_pProperties->limits.maxSamplerAnisotropy)
|
||||
? mvkClamp(pCreateInfo->maxAnisotropy, 1.0f, getDeviceProperties().limits.maxSamplerAnisotropy)
|
||||
: 1);
|
||||
mtlSampDesc.normalizedCoordinates = !pCreateInfo->unnormalizedCoordinates;
|
||||
mtlSampDesc.supportArgumentBuffers = isUsingMetalArgumentBuffers();
|
||||
@ -2483,7 +2488,7 @@ MVKSampler::MVKSampler(MVKDevice* device, const VkSamplerCreateInfo* pCreateInfo
|
||||
}
|
||||
}
|
||||
|
||||
_requiresConstExprSampler = (pCreateInfo->compareEnable && !_device->_pMetalFeatures->depthSampleCompare) || _ycbcrConversion;
|
||||
_requiresConstExprSampler = (pCreateInfo->compareEnable && !getMetalFeatures().depthSampleCompare) || _ycbcrConversion;
|
||||
|
||||
@autoreleasepool {
|
||||
auto mtlDev = getMTLDevice();
|
||||
|
@ -159,9 +159,6 @@ public:
|
||||
/** Returns whether debug callbacks are being used. */
|
||||
bool hasDebugCallbacks() { return _hasDebugReportCallbacks || _hasDebugUtilsMessengers; }
|
||||
|
||||
/** The list of Vulkan extensions, indicating whether each has been enabled by the app. */
|
||||
const MVKExtensionList _enabledExtensions;
|
||||
|
||||
|
||||
#pragma mark Object Creation
|
||||
|
||||
@ -198,6 +195,7 @@ protected:
|
||||
void logVersions();
|
||||
VkResult verifyLayers(uint32_t count, const char* const* names);
|
||||
|
||||
MVKExtensionList _enabledExtensions;
|
||||
MVKConfiguration _mvkConfig;
|
||||
VkApplicationInfo _appInfo;
|
||||
MVKSmallVector<MVKPhysicalDevice*, 2> _physicalDevices;
|
||||
|
@ -164,7 +164,7 @@ public:
|
||||
|
||||
/** Returns whether the pipeline creation fail if a pipeline compile is required. */
|
||||
bool shouldFailOnPipelineCompileRequired() {
|
||||
return (_device->_enabledPipelineCreationCacheControlFeatures.pipelineCreationCacheControl &&
|
||||
return (getEnabledPipelineCreationCacheControlFeatures().pipelineCreationCacheControl &&
|
||||
mvkIsAnyFlagEnabled(_flags, VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT));
|
||||
}
|
||||
|
||||
@ -590,7 +590,7 @@ public:
|
||||
|
||||
MVKRenderPipelineCompiler(MVKVulkanAPIDeviceObject* owner) : MVKMetalCompiler(owner) {
|
||||
_compilerType = "Render pipeline";
|
||||
_pPerformanceTracker = &_owner->getDevice()->_performanceStatistics.shaderCompilation.pipelineCompile;
|
||||
_pPerformanceTracker = &getPerformanceStats().shaderCompilation.pipelineCompile;
|
||||
}
|
||||
|
||||
~MVKRenderPipelineCompiler() override;
|
||||
@ -635,7 +635,7 @@ public:
|
||||
|
||||
MVKComputePipelineCompiler(MVKVulkanAPIDeviceObject* owner, const char* compilerType = nullptr) : MVKMetalCompiler(owner) {
|
||||
_compilerType = compilerType ? compilerType : "Compute pipeline";
|
||||
_pPerformanceTracker = &_owner->getDevice()->_performanceStatistics.shaderCompilation.pipelineCompile;
|
||||
_pPerformanceTracker = &getPerformanceStats().shaderCompilation.pipelineCompile;
|
||||
}
|
||||
|
||||
~MVKComputePipelineCompiler() override;
|
||||
|
@ -602,7 +602,7 @@ void MVKGraphicsPipeline::initDynamicState(const VkGraphicsPipelineCreateInfo* p
|
||||
// Some dynamic states have other restrictions
|
||||
switch (dynStateType) {
|
||||
case VertexStride:
|
||||
isDynamic = _device->_pMetalFeatures->dynamicVertexStride;
|
||||
isDynamic = getMetalFeatures().dynamicVertexStride;
|
||||
if ( !isDynamic ) { setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "This device and platform does not support VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE (macOS 14.0 or iOS/tvOS 17.0, plus either Apple4 or Mac2 GPU).")); }
|
||||
break;
|
||||
default:
|
||||
@ -1216,7 +1216,7 @@ bool MVKGraphicsPipeline::addTessCtlShaderToPipeline(MTLComputePipelineDescripto
|
||||
shaderConfig.options.mslOptions.dynamic_offsets_buffer_index = _dynamicOffsetBufferIndex.stages[kMVKShaderStageTessCtl];
|
||||
shaderConfig.options.mslOptions.capture_output_to_buffer = true;
|
||||
shaderConfig.options.mslOptions.multi_patch_workgroup = true;
|
||||
shaderConfig.options.mslOptions.fixed_subgroup_size = mvkIsAnyFlagEnabled(pTessCtlSS->flags, VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT) ? 0 : _device->_pMetalFeatures->maxSubgroupSize;
|
||||
shaderConfig.options.mslOptions.fixed_subgroup_size = mvkIsAnyFlagEnabled(pTessCtlSS->flags, VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT) ? 0 : getMetalFeatures().maxSubgroupSize;
|
||||
addPrevStageOutputToShaderConversionConfig(shaderConfig, vtxOutputs);
|
||||
addNextStageInputToShaderConversionConfig(shaderConfig, teInputs);
|
||||
|
||||
@ -1318,6 +1318,7 @@ bool MVKGraphicsPipeline::addFragmentShaderToPipeline(MTLRenderPipelineDescripto
|
||||
SPIRVShaderOutputs& shaderOutputs,
|
||||
const VkPipelineShaderStageCreateInfo* pFragmentSS,
|
||||
VkPipelineCreationFeedback* pFragmentFB) {
|
||||
auto& mtlFeats = getMetalFeatures();
|
||||
if (pFragmentSS) {
|
||||
shaderConfig.options.entryPointStage = spv::ExecutionModelFragment;
|
||||
shaderConfig.options.mslOptions.swizzle_buffer_index = _swizzleBufferIndex.stages[kMVKShaderStageFragment];
|
||||
@ -1326,9 +1327,9 @@ bool MVKGraphicsPipeline::addFragmentShaderToPipeline(MTLRenderPipelineDescripto
|
||||
shaderConfig.options.mslOptions.view_mask_buffer_index = _viewRangeBufferIndex.stages[kMVKShaderStageFragment];
|
||||
shaderConfig.options.entryPointName = pFragmentSS->pName;
|
||||
shaderConfig.options.mslOptions.capture_output_to_buffer = false;
|
||||
shaderConfig.options.mslOptions.fixed_subgroup_size = mvkIsAnyFlagEnabled(pFragmentSS->flags, VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT) ? 0 : _device->_pMetalFeatures->maxSubgroupSize;
|
||||
shaderConfig.options.mslOptions.fixed_subgroup_size = mvkIsAnyFlagEnabled(pFragmentSS->flags, VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT) ? 0 : mtlFeats.maxSubgroupSize;
|
||||
shaderConfig.options.mslOptions.check_discarded_frag_stores = true;
|
||||
if (_device->_pMetalFeatures->needsSampleDrefLodArrayWorkaround) {
|
||||
if (mtlFeats.needsSampleDrefLodArrayWorkaround) {
|
||||
shaderConfig.options.mslOptions.sample_dref_lod_array_as_grad = true;
|
||||
}
|
||||
if (_isRasterizing && pCreateInfo->pMultisampleState) { // Must ignore allowed bad pMultisampleState pointer if rasterization disabled
|
||||
@ -1407,8 +1408,8 @@ bool MVKGraphicsPipeline::addVertexInputToPipeline(T* inputDesc,
|
||||
if (shaderConfig.isVertexBufferUsed(pVKVB->binding)) {
|
||||
|
||||
// Vulkan allows any stride, but Metal requires multiples of 4 on older GPUs.
|
||||
if (isVtxStrideStatic && (pVKVB->stride % _device->_pMetalFeatures->vertexStrideAlignment) != 0) {
|
||||
setConfigurationResult(reportError(VK_ERROR_INITIALIZATION_FAILED, "Under Metal, vertex attribute binding strides must be aligned to %llu bytes.", _device->_pMetalFeatures->vertexStrideAlignment));
|
||||
if (isVtxStrideStatic && (pVKVB->stride % getMetalFeatures().vertexStrideAlignment) != 0) {
|
||||
setConfigurationResult(reportError(VK_ERROR_INITIALIZATION_FAILED, "Under Metal, vertex attribute binding strides must be aligned to %llu bytes.", getMetalFeatures().vertexStrideAlignment));
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -1599,7 +1600,7 @@ void MVKGraphicsPipeline::addTessellationToPipeline(MTLRenderPipelineDescriptor*
|
||||
}
|
||||
}
|
||||
|
||||
plDesc.maxTessellationFactor = _device->_pProperties->limits.maxTessellationGenerationLevel;
|
||||
plDesc.maxTessellationFactor = getDeviceProperties().limits.maxTessellationGenerationLevel;
|
||||
plDesc.tessellationFactorFormat = MTLTessellationFactorFormatHalf; // FIXME Use Float when it becomes available
|
||||
plDesc.tessellationFactorStepFunction = MTLTessellationFactorStepFunctionPerPatch;
|
||||
plDesc.tessellationOutputWindingOrder = mvkMTLWindingFromSpvExecutionMode(reflectData.windingOrder);
|
||||
@ -1676,7 +1677,7 @@ void MVKGraphicsPipeline::addFragmentOutputToPipeline(MTLRenderPipelineDescripto
|
||||
|
||||
// In Vulkan, it's perfectly valid to render without any attachments. In Metal, if that
|
||||
// isn't supported, and we have no attachments, then we have to add a dummy attachment.
|
||||
if (!getDevice()->_pMetalFeatures->renderWithoutAttachments &&
|
||||
if (!getMetalFeatures().renderWithoutAttachments &&
|
||||
!caCnt && !pRendInfo->depthAttachmentFormat && !pRendInfo->stencilAttachmentFormat) {
|
||||
|
||||
MTLRenderPipelineColorAttachmentDescriptor* colorDesc = plDesc.colorAttachments[0];
|
||||
@ -1721,16 +1722,17 @@ void MVKGraphicsPipeline::initShaderConversionConfig(SPIRVToMSLConversionConfigu
|
||||
}
|
||||
}
|
||||
|
||||
shaderConfig.options.mslOptions.msl_version = _device->_pMetalFeatures->mslVersion;
|
||||
shaderConfig.options.mslOptions.texel_buffer_texture_width = _device->_pMetalFeatures->maxTextureDimension;
|
||||
auto& mtlFeats = getMetalFeatures();
|
||||
shaderConfig.options.mslOptions.msl_version = mtlFeats.mslVersion;
|
||||
shaderConfig.options.mslOptions.texel_buffer_texture_width = mtlFeats.maxTextureDimension;
|
||||
shaderConfig.options.mslOptions.r32ui_linear_texture_alignment = (uint32_t)_device->getVkFormatTexelBufferAlignment(VK_FORMAT_R32_UINT, this);
|
||||
shaderConfig.options.mslOptions.texture_buffer_native = _device->_pMetalFeatures->textureBuffers;
|
||||
shaderConfig.options.mslOptions.texture_buffer_native = mtlFeats.textureBuffers;
|
||||
|
||||
bool useMetalArgBuff = isUsingMetalArgumentBuffers();
|
||||
shaderConfig.options.mslOptions.argument_buffers = useMetalArgBuff;
|
||||
shaderConfig.options.mslOptions.force_active_argument_buffer_resources = useMetalArgBuff;
|
||||
shaderConfig.options.mslOptions.pad_argument_buffer_resources = useMetalArgBuff;
|
||||
shaderConfig.options.mslOptions.agx_manual_cube_grad_fixup = _device->_pMetalFeatures->needsCubeGradWorkaround;
|
||||
shaderConfig.options.mslOptions.agx_manual_cube_grad_fixup = mtlFeats.needsCubeGradWorkaround;
|
||||
|
||||
MVKPipelineLayout* layout = (MVKPipelineLayout*)pCreateInfo->layout;
|
||||
layout->populateShaderConversionConfig(shaderConfig);
|
||||
@ -1775,24 +1777,24 @@ void MVKGraphicsPipeline::initShaderConversionConfig(SPIRVToMSLConversionConfigu
|
||||
}
|
||||
}
|
||||
|
||||
shaderConfig.options.mslOptions.ios_support_base_vertex_instance = getDevice()->_pMetalFeatures->baseVertexInstanceDrawing;
|
||||
shaderConfig.options.mslOptions.ios_support_base_vertex_instance = mtlFeats.baseVertexInstanceDrawing;
|
||||
shaderConfig.options.mslOptions.texture_1D_as_2D = getMVKConfig().texture1DAs2D;
|
||||
shaderConfig.options.mslOptions.enable_point_size_builtin = isRenderingPoints(pCreateInfo) || reflectData.pointMode;
|
||||
shaderConfig.options.mslOptions.enable_frag_depth_builtin = pixFmts->isDepthFormat(pixFmts->getMTLPixelFormat(pRendInfo->depthAttachmentFormat));
|
||||
shaderConfig.options.mslOptions.enable_frag_stencil_ref_builtin = pixFmts->isStencilFormat(pixFmts->getMTLPixelFormat(pRendInfo->stencilAttachmentFormat));
|
||||
shaderConfig.options.shouldFlipVertexY = getMVKConfig().shaderConversionFlipVertexY;
|
||||
shaderConfig.options.mslOptions.swizzle_texture_samples = _fullImageViewSwizzle && !getDevice()->_pMetalFeatures->nativeTextureSwizzle;
|
||||
shaderConfig.options.mslOptions.swizzle_texture_samples = _fullImageViewSwizzle && !mtlFeats.nativeTextureSwizzle;
|
||||
shaderConfig.options.mslOptions.tess_domain_origin_lower_left = pTessDomainOriginState && pTessDomainOriginState->domainOrigin == VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT;
|
||||
shaderConfig.options.mslOptions.multiview = mvkIsMultiview(pRendInfo->viewMask);
|
||||
shaderConfig.options.mslOptions.multiview_layered_rendering = getPhysicalDevice()->canUseInstancingForMultiview();
|
||||
shaderConfig.options.mslOptions.view_index_from_device_index = mvkAreAllFlagsEnabled(pCreateInfo->flags, VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT);
|
||||
shaderConfig.options.mslOptions.replace_recursive_inputs = mvkOSVersionIsAtLeast(14.0, 17.0, 1.0);
|
||||
#if MVK_MACOS
|
||||
shaderConfig.options.mslOptions.emulate_subgroups = !_device->_pMetalFeatures->simdPermute;
|
||||
shaderConfig.options.mslOptions.emulate_subgroups = !mtlFeats.simdPermute;
|
||||
#endif
|
||||
#if MVK_IOS_OR_TVOS
|
||||
shaderConfig.options.mslOptions.emulate_subgroups = !_device->_pMetalFeatures->quadPermute;
|
||||
shaderConfig.options.mslOptions.ios_use_simdgroup_functions = !!_device->_pMetalFeatures->simdPermute;
|
||||
shaderConfig.options.mslOptions.emulate_subgroups = !mtlFeats.quadPermute;
|
||||
shaderConfig.options.mslOptions.ios_use_simdgroup_functions = !!mtlFeats.simdPermute;
|
||||
#endif
|
||||
|
||||
shaderConfig.options.tessPatchKind = reflectData.patchKind;
|
||||
@ -2136,16 +2138,17 @@ MVKComputePipeline::MVKComputePipeline(MVKDevice* device,
|
||||
pPipelineFB->duration = mvkGetElapsedNanoseconds(pipelineStart);
|
||||
}
|
||||
|
||||
if (_needsSwizzleBuffer && _swizzleBufferIndex.stages[kMVKShaderStageCompute] > _device->_pMetalFeatures->maxPerStageBufferCount) {
|
||||
auto& mtlFeats = getMetalFeatures();
|
||||
if (_needsSwizzleBuffer && _swizzleBufferIndex.stages[kMVKShaderStageCompute] > mtlFeats.maxPerStageBufferCount) {
|
||||
setConfigurationResult(reportError(VK_ERROR_INVALID_SHADER_NV, "Compute shader requires swizzle buffer, but there is no free slot to pass it."));
|
||||
}
|
||||
if (_needsBufferSizeBuffer && _bufferSizeBufferIndex.stages[kMVKShaderStageCompute] > _device->_pMetalFeatures->maxPerStageBufferCount) {
|
||||
if (_needsBufferSizeBuffer && _bufferSizeBufferIndex.stages[kMVKShaderStageCompute] > mtlFeats.maxPerStageBufferCount) {
|
||||
setConfigurationResult(reportError(VK_ERROR_INVALID_SHADER_NV, "Compute shader requires buffer size buffer, but there is no free slot to pass it."));
|
||||
}
|
||||
if (_needsDynamicOffsetBuffer && _dynamicOffsetBufferIndex.stages[kMVKShaderStageCompute] > _device->_pMetalFeatures->maxPerStageBufferCount) {
|
||||
if (_needsDynamicOffsetBuffer && _dynamicOffsetBufferIndex.stages[kMVKShaderStageCompute] > mtlFeats.maxPerStageBufferCount) {
|
||||
setConfigurationResult(reportError(VK_ERROR_INVALID_SHADER_NV, "Compute shader requires dynamic offset buffer, but there is no free slot to pass it."));
|
||||
}
|
||||
if (_needsDispatchBaseBuffer && _indirectParamsIndex.stages[kMVKShaderStageCompute] > _device->_pMetalFeatures->maxPerStageBufferCount) {
|
||||
if (_needsDispatchBaseBuffer && _indirectParamsIndex.stages[kMVKShaderStageCompute] > mtlFeats.maxPerStageBufferCount) {
|
||||
setConfigurationResult(reportError(VK_ERROR_INVALID_SHADER_NV, "Compute shader requires dispatch base buffer, but there is no free slot to pass it."));
|
||||
}
|
||||
}
|
||||
@ -2157,17 +2160,18 @@ MVKMTLFunction MVKComputePipeline::getMTLFunction(const VkComputePipelineCreateI
|
||||
const VkPipelineShaderStageCreateInfo* pSS = &pCreateInfo->stage;
|
||||
if ( !mvkAreAllFlagsEnabled(pSS->stage, VK_SHADER_STAGE_COMPUTE_BIT) ) { return MVKMTLFunctionNull; }
|
||||
|
||||
auto& mtlFeats = getMetalFeatures();
|
||||
SPIRVToMSLConversionConfiguration shaderConfig;
|
||||
shaderConfig.options.entryPointName = pCreateInfo->stage.pName;
|
||||
shaderConfig.options.entryPointStage = spv::ExecutionModelGLCompute;
|
||||
shaderConfig.options.mslOptions.msl_version = _device->_pMetalFeatures->mslVersion;
|
||||
shaderConfig.options.mslOptions.texel_buffer_texture_width = _device->_pMetalFeatures->maxTextureDimension;
|
||||
shaderConfig.options.mslOptions.msl_version = mtlFeats.mslVersion;
|
||||
shaderConfig.options.mslOptions.texel_buffer_texture_width = mtlFeats.maxTextureDimension;
|
||||
shaderConfig.options.mslOptions.r32ui_linear_texture_alignment = (uint32_t)_device->getVkFormatTexelBufferAlignment(VK_FORMAT_R32_UINT, this);
|
||||
shaderConfig.options.mslOptions.swizzle_texture_samples = _fullImageViewSwizzle && !getDevice()->_pMetalFeatures->nativeTextureSwizzle;
|
||||
shaderConfig.options.mslOptions.texture_buffer_native = _device->_pMetalFeatures->textureBuffers;
|
||||
shaderConfig.options.mslOptions.swizzle_texture_samples = _fullImageViewSwizzle && !mtlFeats.nativeTextureSwizzle;
|
||||
shaderConfig.options.mslOptions.texture_buffer_native = mtlFeats.textureBuffers;
|
||||
shaderConfig.options.mslOptions.dispatch_base = _allowsDispatchBase;
|
||||
shaderConfig.options.mslOptions.texture_1D_as_2D = getMVKConfig().texture1DAs2D;
|
||||
shaderConfig.options.mslOptions.fixed_subgroup_size = mvkIsAnyFlagEnabled(pSS->flags, VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT) ? 0 : _device->_pMetalFeatures->maxSubgroupSize;
|
||||
shaderConfig.options.mslOptions.fixed_subgroup_size = mvkIsAnyFlagEnabled(pSS->flags, VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT) ? 0 : mtlFeats.maxSubgroupSize;
|
||||
|
||||
bool useMetalArgBuff = isUsingMetalArgumentBuffers();
|
||||
shaderConfig.options.mslOptions.argument_buffers = useMetalArgBuff;
|
||||
@ -2175,11 +2179,11 @@ MVKMTLFunction MVKComputePipeline::getMTLFunction(const VkComputePipelineCreateI
|
||||
shaderConfig.options.mslOptions.pad_argument_buffer_resources = useMetalArgBuff;
|
||||
|
||||
#if MVK_MACOS
|
||||
shaderConfig.options.mslOptions.emulate_subgroups = !_device->_pMetalFeatures->simdPermute;
|
||||
shaderConfig.options.mslOptions.emulate_subgroups = !mtlFeats.simdPermute;
|
||||
#endif
|
||||
#if MVK_IOS_OR_TVOS
|
||||
shaderConfig.options.mslOptions.emulate_subgroups = !_device->_pMetalFeatures->quadPermute;
|
||||
shaderConfig.options.mslOptions.ios_use_simdgroup_functions = !!_device->_pMetalFeatures->simdPermute;
|
||||
shaderConfig.options.mslOptions.emulate_subgroups = !mtlFeats.quadPermute;
|
||||
shaderConfig.options.mslOptions.ios_use_simdgroup_functions = !!mtlFeats.simdPermute;
|
||||
#endif
|
||||
|
||||
MVKPipelineLayout* layout = (MVKPipelineLayout*)pCreateInfo->layout;
|
||||
@ -2224,7 +2228,7 @@ MVKMTLFunction MVKComputePipeline::getMTLFunction(const VkComputePipelineCreateI
|
||||
}
|
||||
|
||||
uint32_t MVKComputePipeline::getImplicitBufferIndex(uint32_t bufferIndexOffset) {
|
||||
return _device->_pMetalFeatures->maxPerStageBufferCount - (bufferIndexOffset + 1);
|
||||
return getMetalFeatures().maxPerStageBufferCount - (bufferIndexOffset + 1);
|
||||
}
|
||||
|
||||
bool MVKComputePipeline::usesPhysicalStorageBufferAddressesCapability(MVKShaderStage stage) {
|
||||
@ -2367,19 +2371,19 @@ VkResult MVKPipelineCache::writeDataImpl(size_t* pDataSize, void* pData) {
|
||||
void MVKPipelineCache::writeData(ostream& outstream, bool isCounting) {
|
||||
#if MVK_USE_CEREAL
|
||||
MVKPerformanceTracker& perfTracker = isCounting
|
||||
? _device->_performanceStatistics.pipelineCache.sizePipelineCache
|
||||
: _device->_performanceStatistics.pipelineCache.writePipelineCache;
|
||||
? getPerformanceStats().pipelineCache.sizePipelineCache
|
||||
: getPerformanceStats().pipelineCache.writePipelineCache;
|
||||
|
||||
uint32_t cacheEntryType;
|
||||
cereal::BinaryOutputArchive writer(outstream);
|
||||
|
||||
// Write the data header...after ensuring correct byte-order.
|
||||
const VkPhysicalDeviceProperties* pDevProps = _device->_pProperties;
|
||||
auto& devProps = getDeviceProperties();
|
||||
writer(NSSwapHostIntToLittle(kDataHeaderSize));
|
||||
writer(NSSwapHostIntToLittle(VK_PIPELINE_CACHE_HEADER_VERSION_ONE));
|
||||
writer(NSSwapHostIntToLittle(pDevProps->vendorID));
|
||||
writer(NSSwapHostIntToLittle(pDevProps->deviceID));
|
||||
writer(pDevProps->pipelineCacheUUID);
|
||||
writer(NSSwapHostIntToLittle(devProps.vendorID));
|
||||
writer(NSSwapHostIntToLittle(devProps.deviceID));
|
||||
writer(devProps.pipelineCacheUUID);
|
||||
|
||||
// Shader libraries
|
||||
// Output a cache entry for each shader library, including the shader module key in each entry.
|
||||
@ -2388,13 +2392,13 @@ void MVKPipelineCache::writeData(ostream& outstream, bool isCounting) {
|
||||
MVKShaderModuleKey smKey = scPair.first;
|
||||
MVKShaderCacheIterator cacheIter(scPair.second);
|
||||
while (cacheIter.next()) {
|
||||
uint64_t startTime = _device->getPerformanceTimestamp();
|
||||
uint64_t startTime = getPerformanceTimestamp();
|
||||
writer(cacheEntryType);
|
||||
writer(smKey);
|
||||
writer(cacheIter.getShaderConversionConfig());
|
||||
writer(cacheIter.getShaderConversionResultInfo());
|
||||
writer(cacheIter.getCompressedMSL());
|
||||
_device->addPerformanceInterval(perfTracker, startTime);
|
||||
addPerformanceInterval(perfTracker, startTime);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2425,7 +2429,7 @@ void MVKPipelineCache::readData(const VkPipelineCacheCreateInfo* pCreateInfo) {
|
||||
// Read the data header...and ensure correct byte-order.
|
||||
uint32_t hdrComponent;
|
||||
uint8_t pcUUID[VK_UUID_SIZE];
|
||||
const VkPhysicalDeviceProperties* pDevProps = _device->_pProperties;
|
||||
auto& dvcProps = getDeviceProperties();
|
||||
|
||||
reader(hdrComponent); // Header size
|
||||
if (NSSwapLittleIntToHost(hdrComponent) != kDataHeaderSize) { return; }
|
||||
@ -2434,20 +2438,20 @@ void MVKPipelineCache::readData(const VkPipelineCacheCreateInfo* pCreateInfo) {
|
||||
if (NSSwapLittleIntToHost(hdrComponent) != VK_PIPELINE_CACHE_HEADER_VERSION_ONE) { return; }
|
||||
|
||||
reader(hdrComponent); // Vendor ID
|
||||
if (NSSwapLittleIntToHost(hdrComponent) != pDevProps->vendorID) { return; }
|
||||
if (NSSwapLittleIntToHost(hdrComponent) != dvcProps.vendorID) { return; }
|
||||
|
||||
reader(hdrComponent); // Device ID
|
||||
if (NSSwapLittleIntToHost(hdrComponent) != pDevProps->deviceID) { return; }
|
||||
if (NSSwapLittleIntToHost(hdrComponent) != dvcProps.deviceID) { return; }
|
||||
|
||||
reader(pcUUID); // Pipeline cache UUID
|
||||
if ( !mvkAreEqual(pcUUID, pDevProps->pipelineCacheUUID, VK_UUID_SIZE) ) { return; }
|
||||
if ( !mvkAreEqual(pcUUID, dvcProps.pipelineCacheUUID, VK_UUID_SIZE) ) { return; }
|
||||
|
||||
bool done = false;
|
||||
while ( !done ) {
|
||||
reader(cacheEntryType);
|
||||
switch (cacheEntryType) {
|
||||
case MVKPipelineCacheEntryTypeShaderLibrary: {
|
||||
uint64_t startTime = _device->getPerformanceTimestamp();
|
||||
uint64_t startTime = getPerformanceTimestamp();
|
||||
|
||||
MVKShaderModuleKey smKey;
|
||||
reader(smKey);
|
||||
@ -2463,7 +2467,7 @@ void MVKPipelineCache::readData(const VkPipelineCacheCreateInfo* pCreateInfo) {
|
||||
|
||||
// Add the shader library to the staging cache.
|
||||
MVKShaderLibraryCache* slCache = getShaderLibraryCache(smKey);
|
||||
_device->addPerformanceInterval(_device->_performanceStatistics.pipelineCache.readPipelineCache, startTime);
|
||||
addPerformanceInterval(getPerformanceStats().pipelineCache.readPipelineCache, startTime);
|
||||
slCache->addShaderLibrary(&shaderConversionConfig, resultInfo, compressedMSL);
|
||||
|
||||
break;
|
||||
@ -2726,7 +2730,7 @@ void serialize(Archive & archive, MVKCompressor<C>& comp) {
|
||||
|
||||
MVKPipelineCache::MVKPipelineCache(MVKDevice* device, const VkPipelineCacheCreateInfo* pCreateInfo) :
|
||||
MVKVulkanAPIDeviceObject(device),
|
||||
_isExternallySynchronized(device->_enabledPipelineCreationCacheControlFeatures.pipelineCreationCacheControl &&
|
||||
_isExternallySynchronized(getEnabledPipelineCreationCacheControlFeatures().pipelineCreationCacheControl &&
|
||||
mvkIsAnyFlagEnabled(pCreateInfo->flags, VK_PIPELINE_CACHE_CREATE_EXTERNALLY_SYNCHRONIZED_BIT)) {
|
||||
|
||||
readData(pCreateInfo);
|
||||
@ -2745,7 +2749,7 @@ id<MTLRenderPipelineState> MVKRenderPipelineCompiler::newMTLRenderPipelineState(
|
||||
unique_lock<mutex> lock(_completionLock);
|
||||
|
||||
compile(lock, ^{
|
||||
auto mtlDev = _owner->getMTLDevice();
|
||||
auto mtlDev = getMTLDevice();
|
||||
@synchronized (mtlDev) {
|
||||
[mtlDev newRenderPipelineStateWithDescriptor: mtlRPLDesc
|
||||
completionHandler: ^(id<MTLRenderPipelineState> ps, NSError* error) {
|
||||
@ -2779,7 +2783,7 @@ id<MTLComputePipelineState> MVKComputePipelineCompiler::newMTLComputePipelineSta
|
||||
unique_lock<mutex> lock(_completionLock);
|
||||
|
||||
compile(lock, ^{
|
||||
auto mtlDev = _owner->getMTLDevice();
|
||||
auto mtlDev = getMTLDevice();
|
||||
@synchronized (mtlDev) {
|
||||
[mtlDev newComputePipelineStateWithFunction: mtlFunction
|
||||
completionHandler: ^(id<MTLComputePipelineState> ps, NSError* error) {
|
||||
@ -2796,7 +2800,7 @@ id<MTLComputePipelineState> MVKComputePipelineCompiler::newMTLComputePipelineSta
|
||||
unique_lock<mutex> lock(_completionLock);
|
||||
|
||||
compile(lock, ^{
|
||||
auto mtlDev = _owner->getMTLDevice();
|
||||
auto mtlDev = getMTLDevice();
|
||||
@synchronized (mtlDev) {
|
||||
[mtlDev newComputePipelineStateWithDescriptor: plDesc
|
||||
options: MTLPipelineOptionNone
|
||||
|
@ -293,7 +293,7 @@ void MVKOcclusionQueryPool::beginQueryAddedTo(uint32_t query, MVKCommandBuffer*
|
||||
// In multiview passes, one query is used for each view.
|
||||
NSUInteger queryCount = cmdBuffer->getViewCount();
|
||||
NSUInteger offset = getVisibilityResultOffset(query);
|
||||
NSUInteger maxOffset = getDevice()->_pMetalFeatures->maxQueryBufferSize - kMVKQuerySlotSizeInBytes * queryCount;
|
||||
NSUInteger maxOffset = getMetalFeatures().maxQueryBufferSize - kMVKQuerySlotSizeInBytes * queryCount;
|
||||
if (offset > maxOffset) {
|
||||
cmdBuffer->setConfigurationResult(reportError(VK_ERROR_OUT_OF_DEVICE_MEMORY, "vkCmdBeginQuery(): The query offset value %lu is larger than the maximum offset value %lu available on this device.", offset, maxOffset));
|
||||
}
|
||||
@ -311,8 +311,9 @@ MVKOcclusionQueryPool::MVKOcclusionQueryPool(MVKDevice* device,
|
||||
_queryIndexOffset = 0;
|
||||
|
||||
// Ensure we don't overflow the maximum number of queries
|
||||
auto& mtlFeats = getMetalFeatures();
|
||||
VkDeviceSize reqBuffLen = (VkDeviceSize)pCreateInfo->queryCount * kMVKQuerySlotSizeInBytes;
|
||||
VkDeviceSize maxBuffLen = _device->_pMetalFeatures->maxQueryBufferSize;
|
||||
VkDeviceSize maxBuffLen = mtlFeats.maxQueryBufferSize;
|
||||
VkDeviceSize newBuffLen = min(reqBuffLen, maxBuffLen);
|
||||
|
||||
if (reqBuffLen > maxBuffLen) {
|
||||
@ -321,7 +322,7 @@ MVKOcclusionQueryPool::MVKOcclusionQueryPool(MVKDevice* device,
|
||||
uint32_t(newBuffLen / kMVKQuerySlotSizeInBytes));
|
||||
}
|
||||
|
||||
NSUInteger mtlBuffLen = mvkAlignByteCount(newBuffLen, _device->_pMetalFeatures->mtlBufferAlignment);
|
||||
NSUInteger mtlBuffLen = mvkAlignByteCount(newBuffLen, mtlFeats.mtlBufferAlignment);
|
||||
MTLResourceOptions mtlBuffOpts = MTLResourceStorageModeShared | MTLResourceCPUCacheModeDefaultCache;
|
||||
_visibilityResultMTLBuffer = [getMTLDevice() newBufferWithLength: mtlBuffLen options: mtlBuffOpts]; // retained
|
||||
|
||||
@ -463,7 +464,7 @@ MVKTimestampQueryPool::MVKTimestampQueryPool(MVKDevice* device, const VkQueryPoo
|
||||
|
||||
MVKPipelineStatisticsQueryPool::MVKPipelineStatisticsQueryPool(MVKDevice* device,
|
||||
const VkQueryPoolCreateInfo* pCreateInfo) : MVKGPUCounterQueryPool(device, pCreateInfo) {
|
||||
if ( !_device->_enabledFeatures.pipelineStatisticsQuery ) {
|
||||
if ( !getEnabledFeatures().pipelineStatisticsQuery ) {
|
||||
setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCreateQueryPool: VK_QUERY_TYPE_PIPELINE_STATISTICS is not supported."));
|
||||
}
|
||||
}
|
||||
|
@ -184,7 +184,7 @@ public:
|
||||
} MVKSemaphoreSubmitInfo;
|
||||
|
||||
/** This is an abstract class for an operation that can be submitted to an MVKQueue. */
|
||||
class MVKQueueSubmission : public MVKBaseObject, public MVKConfigurableMixin {
|
||||
class MVKQueueSubmission : public MVKBaseDeviceObject, public MVKConfigurableMixin {
|
||||
|
||||
public:
|
||||
|
||||
|
@ -154,8 +154,7 @@ VkResult MVKQueue::waitIdle(MVKCommandUse cmdUse) {
|
||||
|
||||
id<MTLCommandBuffer> MVKQueue::getMTLCommandBuffer(MVKCommandUse cmdUse, bool retainRefs) {
|
||||
id<MTLCommandBuffer> mtlCmdBuff = nil;
|
||||
MVKDevice* mvkDev = getDevice();
|
||||
uint64_t startTime = mvkDev->getPerformanceTimestamp();
|
||||
uint64_t startTime = getPerformanceTimestamp();
|
||||
#if MVK_XCODE_12
|
||||
if ([_mtlQueue respondsToSelector: @selector(commandBufferWithDescriptor:)]) {
|
||||
MTLCommandBufferDescriptor* mtlCmdBuffDesc = [MTLCommandBufferDescriptor new]; // temp retain
|
||||
@ -172,7 +171,7 @@ id<MTLCommandBuffer> MVKQueue::getMTLCommandBuffer(MVKCommandUse cmdUse, bool re
|
||||
} else {
|
||||
mtlCmdBuff = [_mtlQueue commandBufferWithUnretainedReferences];
|
||||
}
|
||||
mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.queue.retrieveMTLCommandBuffer, startTime);
|
||||
addPerformanceInterval(getPerformanceStats().queue.retrieveMTLCommandBuffer, startTime);
|
||||
NSString* mtlCmdBuffLabel = getMTLCommandBufferLabel(cmdUse);
|
||||
setLabelIfNotNil(mtlCmdBuff, mtlCmdBuffLabel);
|
||||
[mtlCmdBuff addCompletedHandler: ^(id<MTLCommandBuffer> mtlCB) { handleMTLCommandBufferError(mtlCB); }];
|
||||
@ -412,11 +411,12 @@ MVKCommandBufferSubmitInfo::MVKCommandBufferSubmitInfo(VkCommandBuffer commandBu
|
||||
|
||||
MVKQueueSubmission::MVKQueueSubmission(MVKQueue* queue,
|
||||
uint32_t waitSemaphoreInfoCount,
|
||||
const VkSemaphoreSubmitInfo* pWaitSemaphoreSubmitInfos) {
|
||||
_queue = queue;
|
||||
_queue->retain(); // Retain here and release in destructor. See note for MVKQueueCommandBufferSubmission::finish().
|
||||
const VkSemaphoreSubmitInfo* pWaitSemaphoreSubmitInfos) :
|
||||
MVKBaseDeviceObject(queue->getDevice()),
|
||||
_queue(queue) {
|
||||
|
||||
_creationTime = getDevice()->getPerformanceTimestamp(); // call getDevice() only after _queue is defined
|
||||
_queue->retain(); // Retain here and release in destructor. See note for MVKQueueCommandBufferSubmission::finish().
|
||||
_creationTime = getPerformanceTimestamp();
|
||||
|
||||
_waitSemaphores.reserve(waitSemaphoreInfoCount);
|
||||
for (uint32_t i = 0; i < waitSemaphoreInfoCount; i++) {
|
||||
@ -427,11 +427,12 @@ MVKQueueSubmission::MVKQueueSubmission(MVKQueue* queue,
|
||||
MVKQueueSubmission::MVKQueueSubmission(MVKQueue* queue,
|
||||
uint32_t waitSemaphoreCount,
|
||||
const VkSemaphore* pWaitSemaphores,
|
||||
const VkPipelineStageFlags* pWaitDstStageMask) {
|
||||
_queue = queue;
|
||||
_queue->retain(); // Retain here and release in destructor. See note for MVKQueueCommandBufferSubmission::finish().
|
||||
const VkPipelineStageFlags* pWaitDstStageMask) :
|
||||
MVKBaseDeviceObject(queue->getDevice()),
|
||||
_queue(queue) {
|
||||
|
||||
_creationTime = getDevice()->getPerformanceTimestamp(); // call getDevice() only after _queue is defined
|
||||
_queue->retain(); // Retain here and release in destructor. See note for MVKQueueCommandBufferSubmission::finish().
|
||||
_creationTime = getPerformanceTimestamp();
|
||||
|
||||
_waitSemaphores.reserve(waitSemaphoreCount);
|
||||
for (uint32_t i = 0; i < waitSemaphoreCount; i++) {
|
||||
@ -455,8 +456,7 @@ VkResult MVKQueueCommandBufferSubmission::execute() {
|
||||
for (auto& ws : _waitSemaphores) { ws.encodeWait(getActiveMTLCommandBuffer()); }
|
||||
|
||||
// Wait time from an async vkQueueSubmit() call to starting submit and encoding of the command buffers
|
||||
MVKDevice* mvkDev = getDevice();
|
||||
mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.queue.waitSubmitCommandBuffers, _creationTime);
|
||||
addPerformanceInterval(_queue->getPerformanceStats().queue.waitSubmitCommandBuffers, _creationTime);
|
||||
|
||||
// Submit each command buffer.
|
||||
submitCommandBuffers();
|
||||
@ -518,10 +518,9 @@ VkResult MVKQueueCommandBufferSubmission::commitActiveMTLCommandBuffer(bool sign
|
||||
id<MTLCommandBuffer> mtlCmdBuff = signalCompletion ? getActiveMTLCommandBuffer() : _activeMTLCommandBuffer;
|
||||
_activeMTLCommandBuffer = nil;
|
||||
|
||||
MVKDevice* mvkDev = getDevice();
|
||||
uint64_t startTime = mvkDev->getPerformanceTimestamp();
|
||||
uint64_t startTime = getPerformanceTimestamp();
|
||||
[mtlCmdBuff addCompletedHandler: ^(id<MTLCommandBuffer> mtlCB) {
|
||||
mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.queue.mtlCommandBufferExecution, startTime);
|
||||
addPerformanceInterval(getPerformanceStats().queue.mtlCommandBufferExecution, startTime);
|
||||
if (signalCompletion) { this->finish(); } // Must be the last thing the completetion callback does.
|
||||
}];
|
||||
|
||||
@ -638,12 +637,11 @@ MVKQueueCommandBufferSubmission::~MVKQueueCommandBufferSubmission() {
|
||||
|
||||
template <size_t N>
|
||||
void MVKQueueFullCommandBufferSubmission<N>::submitCommandBuffers() {
|
||||
MVKDevice* mvkDev = getDevice();
|
||||
uint64_t startTime = mvkDev->getPerformanceTimestamp();
|
||||
uint64_t startTime = getPerformanceTimestamp();
|
||||
|
||||
for (auto& cbInfo : _cmdBuffers) { cbInfo.commandBuffer->submit(this, &_encodingContext); }
|
||||
|
||||
mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.queue.submitCommandBuffers, startTime);
|
||||
addPerformanceInterval(getPerformanceStats().queue.submitCommandBuffers, startTime);
|
||||
}
|
||||
|
||||
template <size_t N>
|
||||
@ -699,8 +697,7 @@ VkResult MVKQueuePresentSurfaceSubmission::execute() {
|
||||
}
|
||||
|
||||
// Wait time from an async vkQueuePresentKHR() call to starting presentation of the swapchains
|
||||
MVKDevice* mvkDev = getDevice();
|
||||
mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.queue.waitPresentSwapchains, _creationTime);
|
||||
addPerformanceInterval(getPerformanceStats().queue.waitPresentSwapchains, _creationTime);
|
||||
|
||||
for (int i = 0; i < _presentInfo.size(); i++ ) {
|
||||
setConfigurationResult(_presentInfo[i].presentableImage->presentCAMetalDrawable(mtlCmdBuff, _presentInfo[i]));
|
||||
|
@ -263,7 +263,7 @@ void MVKRenderSubpass::populateMTLRenderPassDescriptor(MTLRenderPassDescriptor*
|
||||
// Vulkan supports rendering without attachments, but older Metal does not.
|
||||
// If Metal does not support rendering without attachments, create a dummy attachment to pass Metal validation.
|
||||
if (caUsedCnt == 0 && depthRPAttIdx == VK_ATTACHMENT_UNUSED && stencilRPAttIdx == VK_ATTACHMENT_UNUSED) {
|
||||
if (_renderPass->getDevice()->_pMetalFeatures->renderWithoutAttachments) {
|
||||
if (_renderPass->getMetalFeatures().renderWithoutAttachments) {
|
||||
mtlRPDesc.defaultRasterSampleCount = mvkSampleCountFromVkSampleCountFlagBits(_defaultSampleCount);
|
||||
} else {
|
||||
MTLRenderPassColorAttachmentDescriptor* mtlColorAttDesc = mtlRPDesc.colorAttachments[0];
|
||||
@ -282,7 +282,7 @@ void MVKRenderSubpass::encodeStoreActions(MVKCommandEncoder* cmdEncoder,
|
||||
MVKArrayRef<MVKImageView*const> attachments,
|
||||
bool storeOverride) {
|
||||
if (!cmdEncoder->_mtlRenderEncoder) { return; }
|
||||
if (!_renderPass->getDevice()->_pMetalFeatures->deferredStoreActions) { return; }
|
||||
if (!_renderPass->getMetalFeatures().deferredStoreActions) { return; }
|
||||
|
||||
MVKPixelFormats* pixFmts = _renderPass->getPixelFormats();
|
||||
uint32_t caCnt = getColorAttachmentCount();
|
||||
@ -414,7 +414,7 @@ void MVKRenderSubpass::resolveUnresolvableAttachments(MVKCommandEncoder* cmdEnco
|
||||
[mtlComputeEnc setTexture: caImgView->getMTLTexture() atIndex: 1];
|
||||
MTLSize gridSize = mvkMTLSizeFromVkExtent3D(raImgView->getExtent3D());
|
||||
MTLSize tgSize = MTLSizeMake(mtlRslvState.threadExecutionWidth, 1, 1);
|
||||
if (cmdEncoder->getDevice()->_pMetalFeatures->nonUniformThreadgroups) {
|
||||
if (cmdEncoder->getMetalFeatures().nonUniformThreadgroups) {
|
||||
[mtlComputeEnc dispatchThreads: gridSize threadsPerThreadgroup: tgSize];
|
||||
} else {
|
||||
MTLSize tgCount = MTLSizeMake(gridSize.width / tgSize.width, gridSize.height, gridSize.depth);
|
||||
@ -676,7 +676,7 @@ bool MVKAttachmentDescription::populateMTLRenderPassAttachmentDescriptor(MTLRend
|
||||
// If the device supports late-specified store actions, we'll use those, and then set them later.
|
||||
// That way, if we wind up doing a tessellated draw, we can set the store action to store then,
|
||||
// and then when the render pass actually ends, we can use the true store action.
|
||||
if ( _renderPass->getDevice()->_pMetalFeatures->deferredStoreActions ) {
|
||||
if (_renderPass->getMetalFeatures().deferredStoreActions) {
|
||||
mtlAttDesc.storeAction = MTLStoreActionUnknown;
|
||||
} else {
|
||||
// For a combined depth-stencil format in an attachment with VK_IMAGE_ASPECT_STENCIL_BIT,
|
||||
@ -777,7 +777,7 @@ MTLStoreAction MVKAttachmentDescription::getMTLStoreAction(MVKRenderSubpass* sub
|
||||
}
|
||||
|
||||
// If a resolve attachment exists, this attachment must resolve once complete.
|
||||
if (hasResolveAttachment && canResolveFormat && !_renderPass->getDevice()->_pMetalFeatures->combinedStoreResolveAction) {
|
||||
if (hasResolveAttachment && canResolveFormat && !_renderPass->getMetalFeatures().combinedStoreResolveAction) {
|
||||
return MTLStoreActionMultisampleResolve;
|
||||
}
|
||||
// Memoryless can't be stored.
|
||||
@ -926,7 +926,7 @@ MVKSubpassDependency::MVKSubpassDependency(const VkSubpassDependency2& spDep, co
|
||||
viewOffset(spDep.viewOffset) {}
|
||||
|
||||
VkExtent2D MVKRenderPass::getRenderAreaGranularity() {
|
||||
if (_device->_pMetalFeatures->tileBasedDeferredRendering) {
|
||||
if (getMetalFeatures().tileBasedDeferredRendering) {
|
||||
// This is the tile area.
|
||||
// FIXME: We really ought to use MTLRenderCommandEncoder.tile{Width,Height}, but that requires
|
||||
// creating a command buffer.
|
||||
|
@ -60,7 +60,7 @@ private:
|
||||
const MVKMTLFunction MVKMTLFunctionNull(nil, SPIRVToMSLConversionResultInfo(), MTLSizeMake(1, 1, 1));
|
||||
|
||||
/** Wraps a single MTLLibrary. */
|
||||
class MVKShaderLibrary : public MVKBaseObject {
|
||||
class MVKShaderLibrary : public MVKBaseDeviceObject {
|
||||
|
||||
public:
|
||||
|
||||
@ -128,7 +128,7 @@ protected:
|
||||
#pragma mark MVKShaderLibraryCache
|
||||
|
||||
/** Represents a cache of shader libraries for one shader module. */
|
||||
class MVKShaderLibraryCache : public MVKBaseObject {
|
||||
class MVKShaderLibraryCache : public MVKBaseDeviceObject {
|
||||
|
||||
public:
|
||||
|
||||
@ -149,7 +149,7 @@ public:
|
||||
bool* pWasAdded, VkPipelineCreationFeedback* pShaderFeedback,
|
||||
uint64_t startTime = 0);
|
||||
|
||||
MVKShaderLibraryCache(MVKVulkanAPIDeviceObject* owner) : _owner(owner) {};
|
||||
MVKShaderLibraryCache(MVKVulkanAPIDeviceObject* owner) : MVKBaseDeviceObject(owner->getDevice()), _owner(owner) {};
|
||||
|
||||
~MVKShaderLibraryCache() override;
|
||||
|
||||
@ -273,7 +273,7 @@ public:
|
||||
|
||||
MVKShaderLibraryCompiler(MVKVulkanAPIDeviceObject* owner) : MVKMetalCompiler(owner) {
|
||||
_compilerType = "Shader library";
|
||||
_pPerformanceTracker = &_owner->getDevice()->_performanceStatistics.shaderCompilation.mslCompile;
|
||||
_pPerformanceTracker = &getPerformanceStats().shaderCompilation.mslCompile;
|
||||
}
|
||||
|
||||
~MVKShaderLibraryCompiler() override;
|
||||
@ -311,7 +311,7 @@ public:
|
||||
|
||||
MVKFunctionSpecializer(MVKVulkanAPIDeviceObject* owner) : MVKMetalCompiler(owner) {
|
||||
_compilerType = "Function specialization";
|
||||
_pPerformanceTracker = &_owner->getDevice()->_performanceStatistics.shaderCompilation.functionSpecialization;
|
||||
_pPerformanceTracker = &getPerformanceStats().shaderCompilation.functionSpecialization;
|
||||
}
|
||||
|
||||
~MVKFunctionSpecializer() override;
|
||||
|
@ -75,14 +75,13 @@ MVKMTLFunction MVKShaderLibrary::getMTLFunction(const VkSpecializationInfo* pSpe
|
||||
|
||||
if ( !_mtlLibrary ) { return MVKMTLFunctionNull; }
|
||||
|
||||
@synchronized (_owner->getMTLDevice()) {
|
||||
@synchronized (getMTLDevice()) {
|
||||
@autoreleasepool {
|
||||
NSString* mtlFuncName = @(_shaderConversionResultInfo.entryPoint.mtlFunctionName.c_str());
|
||||
MVKDevice* mvkDev = _owner->getDevice();
|
||||
|
||||
uint64_t startTime = pShaderFeedback ? mvkGetTimestamp() : mvkDev->getPerformanceTimestamp();
|
||||
uint64_t startTime = pShaderFeedback ? mvkGetTimestamp() : getPerformanceTimestamp();
|
||||
id<MTLFunction> mtlFunc = [[_mtlLibrary newFunctionWithName: mtlFuncName] autorelease];
|
||||
mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.shaderCompilation.functionRetrieval, startTime);
|
||||
addPerformanceInterval(getPerformanceStats().shaderCompilation.functionRetrieval, startTime);
|
||||
if (pShaderFeedback) {
|
||||
if (mtlFunc) {
|
||||
mvkEnableFlags(pShaderFeedback->flags, VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT);
|
||||
@ -94,7 +93,7 @@ MVKMTLFunction MVKShaderLibrary::getMTLFunction(const VkSpecializationInfo* pSpe
|
||||
// If the Metal device supports shader specialization, and the Metal function expects to be specialized,
|
||||
// populate Metal function constant values from the Vulkan specialization info, and compile a specialized
|
||||
// Metal function, otherwise simply use the unspecialized Metal function.
|
||||
if (mvkDev->_pMetalFeatures->shaderSpecialization) {
|
||||
if (getMetalFeatures().shaderSpecialization) {
|
||||
NSArray<MTLFunctionConstant*>* mtlFCs = mtlFunc.functionConstantsDictionary.allValues;
|
||||
if (mtlFCs.count > 0) {
|
||||
// The Metal shader contains function constants and expects to be specialized.
|
||||
@ -155,22 +154,23 @@ void MVKShaderLibrary::setWorkgroupSize(uint32_t x, uint32_t y, uint32_t z) {
|
||||
|
||||
// Sets the cached MSL source code, after first compressing it.
|
||||
void MVKShaderLibrary::compressMSL(const string& msl) {
|
||||
MVKDevice* mvkDev = _owner->getDevice();
|
||||
uint64_t startTime = mvkDev->getPerformanceTimestamp();
|
||||
uint64_t startTime = getPerformanceTimestamp();
|
||||
_compressedMSL.compress(msl, getMVKConfig().shaderSourceCompressionAlgorithm);
|
||||
mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.shaderCompilation.mslCompress, startTime);
|
||||
addPerformanceInterval(getPerformanceStats().shaderCompilation.mslCompress, startTime);
|
||||
}
|
||||
|
||||
// Decompresses the cached MSL into the string.
|
||||
void MVKShaderLibrary::decompressMSL(string& msl) {
|
||||
MVKDevice* mvkDev = _owner->getDevice();
|
||||
uint64_t startTime = mvkDev->getPerformanceTimestamp();
|
||||
uint64_t startTime = getPerformanceTimestamp();
|
||||
_compressedMSL.decompress(msl);
|
||||
mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.shaderCompilation.mslDecompress, startTime);
|
||||
addPerformanceInterval(getPerformanceStats().shaderCompilation.mslDecompress, startTime);
|
||||
}
|
||||
|
||||
MVKShaderLibrary::MVKShaderLibrary(MVKVulkanAPIDeviceObject* owner,
|
||||
const SPIRVToMSLConversionResult& conversionResult) : _owner(owner) {
|
||||
const SPIRVToMSLConversionResult& conversionResult) :
|
||||
MVKBaseDeviceObject(owner->getDevice()),
|
||||
_owner(owner) {
|
||||
|
||||
_shaderConversionResultInfo = conversionResult.resultInfo;
|
||||
compressMSL(conversionResult.msl);
|
||||
compileLibrary(conversionResult.msl);
|
||||
@ -178,7 +178,10 @@ MVKShaderLibrary::MVKShaderLibrary(MVKVulkanAPIDeviceObject* owner,
|
||||
|
||||
MVKShaderLibrary::MVKShaderLibrary(MVKVulkanAPIDeviceObject* owner,
|
||||
const SPIRVToMSLConversionResultInfo& resultInfo,
|
||||
const MVKCompressor<std::string> compressedMSL) : _owner(owner) {
|
||||
const MVKCompressor<std::string> compressedMSL) :
|
||||
MVKBaseDeviceObject(owner->getDevice()),
|
||||
_owner(owner) {
|
||||
|
||||
_shaderConversionResultInfo = resultInfo;
|
||||
_compressedMSL = compressedMSL;
|
||||
string msl;
|
||||
@ -196,24 +199,28 @@ void MVKShaderLibrary::compileLibrary(const string& msl) {
|
||||
|
||||
MVKShaderLibrary::MVKShaderLibrary(MVKVulkanAPIDeviceObject* owner,
|
||||
const void* mslCompiledCodeData,
|
||||
size_t mslCompiledCodeLength) : _owner(owner) {
|
||||
MVKDevice* mvkDev = _owner->getDevice();
|
||||
uint64_t startTime = mvkDev->getPerformanceTimestamp();
|
||||
size_t mslCompiledCodeLength) :
|
||||
MVKBaseDeviceObject(owner->getDevice()),
|
||||
_owner(owner) {
|
||||
|
||||
uint64_t startTime = getPerformanceTimestamp();
|
||||
@autoreleasepool {
|
||||
dispatch_data_t shdrData = dispatch_data_create(mslCompiledCodeData,
|
||||
mslCompiledCodeLength,
|
||||
NULL,
|
||||
DISPATCH_DATA_DESTRUCTOR_DEFAULT);
|
||||
NSError* err = nil;
|
||||
_mtlLibrary = [mvkDev->getMTLDevice() newLibraryWithData: shdrData error: &err]; // retained
|
||||
_mtlLibrary = [getMTLDevice() newLibraryWithData: shdrData error: &err]; // retained
|
||||
handleCompilationError(err, "Compiled shader module creation");
|
||||
[shdrData release];
|
||||
}
|
||||
mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.shaderCompilation.mslLoad, startTime);
|
||||
addPerformanceInterval(getPerformanceStats().shaderCompilation.mslLoad, startTime);
|
||||
}
|
||||
|
||||
MVKShaderLibrary::MVKShaderLibrary(const MVKShaderLibrary& other) {
|
||||
_owner = other._owner;
|
||||
MVKShaderLibrary::MVKShaderLibrary(const MVKShaderLibrary& other) :
|
||||
MVKBaseDeviceObject(other._device),
|
||||
_owner(other._owner) {
|
||||
|
||||
_mtlLibrary = [other._mtlLibrary retain];
|
||||
_shaderConversionResultInfo = other._shaderConversionResultInfo;
|
||||
_compressedMSL = other._compressedMSL;
|
||||
@ -284,8 +291,7 @@ MVKShaderLibrary* MVKShaderLibraryCache::findShaderLibrary(SPIRVToMSLConversionC
|
||||
for (auto& slPair : _shaderLibraries) {
|
||||
if (slPair.first.matches(*pShaderConfig)) {
|
||||
pShaderConfig->alignWith(slPair.first);
|
||||
MVKDevice* mvkDev = _owner->getDevice();
|
||||
mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.shaderCompilation.shaderLibraryFromCache, startTime);
|
||||
addPerformanceInterval(getPerformanceStats().shaderCompilation.shaderLibraryFromCache, startTime);
|
||||
if (pShaderFeedback) {
|
||||
pShaderFeedback->duration += mvkGetElapsedNanoseconds(startTime);
|
||||
}
|
||||
@ -337,7 +343,7 @@ MVKMTLFunction MVKShaderModule::getMTLFunction(SPIRVToMSLConversionConfiguration
|
||||
VkPipelineCreationFeedback* pShaderFeedback) {
|
||||
MVKShaderLibrary* mvkLib = _directMSLLibrary;
|
||||
if ( !mvkLib ) {
|
||||
uint64_t startTime = pShaderFeedback ? mvkGetTimestamp() : _device->getPerformanceTimestamp();
|
||||
uint64_t startTime = pShaderFeedback ? mvkGetTimestamp() : getPerformanceTimestamp();
|
||||
MVKPipelineCache* pipelineCache = pipeline->getPipelineCache();
|
||||
if (pipelineCache) {
|
||||
mvkLib = pipelineCache->getShaderLibrary(pShaderConfig, this, pipeline, pShaderFeedback, startTime);
|
||||
@ -363,9 +369,9 @@ bool MVKShaderModule::convert(SPIRVToMSLConversionConfiguration* pShaderConfig,
|
||||
if ( !_spvConverter.hasSPIRV() && _glslConverter.hasGLSL() ) {
|
||||
|
||||
GLSLToSPIRVConversionResult glslConversionResult;
|
||||
uint64_t startTime = _device->getPerformanceTimestamp();
|
||||
uint64_t startTime = getPerformanceTimestamp();
|
||||
bool wasConverted = _glslConverter.convert(getMVKGLSLConversionShaderStage(pShaderConfig), glslConversionResult, shouldLogCode, false);
|
||||
_device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.glslToSPRIV, startTime);
|
||||
addPerformanceInterval(getPerformanceStats().shaderCompilation.glslToSPRIV, startTime);
|
||||
|
||||
if (wasConverted) {
|
||||
if (shouldLogCode) { MVKLogInfo("%s", glslConversionResult.resultLog.c_str()); }
|
||||
@ -376,9 +382,9 @@ bool MVKShaderModule::convert(SPIRVToMSLConversionConfiguration* pShaderConfig,
|
||||
shouldLogEstimatedGLSL = false;
|
||||
}
|
||||
|
||||
uint64_t startTime = _device->getPerformanceTimestamp();
|
||||
uint64_t startTime = getPerformanceTimestamp();
|
||||
bool wasConverted = _spvConverter.convert(*pShaderConfig, conversionResult, shouldLogCode, shouldLogCode, shouldLogEstimatedGLSL);
|
||||
_device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.spirvToMSL, startTime);
|
||||
addPerformanceInterval(getPerformanceStats().shaderCompilation.spirvToMSL, startTime);
|
||||
|
||||
const char* dumpDir = getMVKConfig().shaderDumpDir;
|
||||
if (dumpDir && *dumpDir) {
|
||||
@ -472,9 +478,9 @@ MVKShaderModule::MVKShaderModule(MVKDevice* device,
|
||||
case kMVKMagicNumberSPIRVCode: { // SPIR-V code
|
||||
size_t spvCount = (codeSize + 3) >> 2; // Round up if byte length not exactly on uint32_t boundary
|
||||
|
||||
uint64_t startTime = _device->getPerformanceTimestamp();
|
||||
uint64_t startTime = getPerformanceTimestamp();
|
||||
codeHash = mvkHash(pCreateInfo->pCode, spvCount);
|
||||
_device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.hashShaderCode, startTime);
|
||||
addPerformanceInterval(getPerformanceStats().shaderCompilation.hashShaderCode, startTime);
|
||||
|
||||
_spvConverter.setSPIRV(pCreateInfo->pCode, spvCount);
|
||||
|
||||
@ -485,10 +491,10 @@ MVKShaderModule::MVKShaderModule(MVKDevice* device,
|
||||
char* pMSLCode = (char*)(uintptr_t(pCreateInfo->pCode) + hdrSize);
|
||||
size_t mslCodeLen = codeSize - hdrSize;
|
||||
|
||||
uint64_t startTime = _device->getPerformanceTimestamp();
|
||||
uint64_t startTime = getPerformanceTimestamp();
|
||||
codeHash = mvkHash(&magicNum);
|
||||
codeHash = mvkHash(pMSLCode, mslCodeLen, codeHash);
|
||||
_device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.hashShaderCode, startTime);
|
||||
addPerformanceInterval(getPerformanceStats().shaderCompilation.hashShaderCode, startTime);
|
||||
|
||||
SPIRVToMSLConversionResult conversionResult;
|
||||
conversionResult.msl = pMSLCode;
|
||||
@ -501,23 +507,23 @@ MVKShaderModule::MVKShaderModule(MVKDevice* device,
|
||||
char* pMSLCode = (char*)(uintptr_t(pCreateInfo->pCode) + hdrSize);
|
||||
size_t mslCodeLen = codeSize - hdrSize;
|
||||
|
||||
uint64_t startTime = _device->getPerformanceTimestamp();
|
||||
uint64_t startTime = getPerformanceTimestamp();
|
||||
codeHash = mvkHash(&magicNum);
|
||||
codeHash = mvkHash(pMSLCode, mslCodeLen, codeHash);
|
||||
_device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.hashShaderCode, startTime);
|
||||
addPerformanceInterval(getPerformanceStats().shaderCompilation.hashShaderCode, startTime);
|
||||
|
||||
_directMSLLibrary = new MVKShaderLibrary(this, (void*)(pMSLCode), mslCodeLen);
|
||||
|
||||
break;
|
||||
}
|
||||
default: // Could be GLSL source code
|
||||
if (_device->_enabledExtensions.vk_NV_glsl_shader.enabled) {
|
||||
if (getEnabledExtensions().vk_NV_glsl_shader.enabled) {
|
||||
const char* pGLSL = (char*)pCreateInfo->pCode;
|
||||
size_t glslLen = codeSize - 1;
|
||||
|
||||
uint64_t startTime = _device->getPerformanceTimestamp();
|
||||
uint64_t startTime = getPerformanceTimestamp();
|
||||
codeHash = mvkHash(pGLSL, codeSize);
|
||||
_device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.hashShaderCode, startTime);
|
||||
addPerformanceInterval(getPerformanceStats().shaderCompilation.hashShaderCode, startTime);
|
||||
|
||||
_glslConverter.setGLSL(pGLSL, glslLen);
|
||||
} else {
|
||||
@ -542,9 +548,9 @@ id<MTLLibrary> MVKShaderLibraryCompiler::newMTLLibrary(NSString* mslSourceCode,
|
||||
unique_lock<mutex> lock(_completionLock);
|
||||
|
||||
compile(lock, ^{
|
||||
auto mtlDev = _owner->getMTLDevice();
|
||||
auto mtlDev = getMTLDevice();
|
||||
@synchronized (mtlDev) {
|
||||
auto mtlCompileOptions = _owner->getDevice()->getMTLCompileOptions(shaderConversionResults.entryPoint.supportsFastMath,
|
||||
auto mtlCompileOptions = getDevice()->getMTLCompileOptions(shaderConversionResults.entryPoint.supportsFastMath,
|
||||
shaderConversionResults.isPositionInvariant);
|
||||
MVKLogInfoIf(getMVKConfig().debugMode, "Compiling Metal shader%s.", mtlCompileOptions.fastMathEnabled ? " with FastMath enabled" : "");
|
||||
[mtlDev newLibraryWithSource: mslSourceCode
|
||||
|
@ -171,7 +171,7 @@ void MVKSwapchain::markFrameInterval() {
|
||||
|
||||
if (prevFrameTime == 0) { return; } // First frame starts at first presentation
|
||||
|
||||
_device->updateActivityPerformance(_device->_performanceStatistics.queue.frameInterval, mvkGetElapsedMilliseconds(prevFrameTime, _lastFrameTime));
|
||||
addPerformanceInterval(getPerformanceStats().queue.frameInterval, prevFrameTime, _lastFrameTime, true);
|
||||
|
||||
auto& mvkCfg = getMVKConfig();
|
||||
bool shouldLogOnFrames = mvkCfg.performanceTracking && mvkCfg.activityPerformanceLoggingStyle == MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_FRAME_COUNT;
|
||||
@ -179,7 +179,7 @@ void MVKSwapchain::markFrameInterval() {
|
||||
_currentPerfLogFrameCount = 0;
|
||||
MVKLogInfo("Performance statistics reporting every: %d frames, avg FPS: %.2f, elapsed time: %.3f seconds:",
|
||||
mvkCfg.performanceLoggingFrameCount,
|
||||
(1000.0 / _device->_performanceStatistics.queue.frameInterval.average),
|
||||
(1000.0 / getPerformanceStats().queue.frameInterval.average),
|
||||
mvkGetElapsedMilliseconds() / 1000.0);
|
||||
if (getMVKConfig().activityPerformanceLoggingStyle == MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_FRAME_COUNT) {
|
||||
_device->logPerformanceSummary();
|
||||
@ -419,9 +419,10 @@ MVKSwapchain::MVKSwapchain(MVKDevice* device, const VkSwapchainCreateInfoKHR* pC
|
||||
}
|
||||
}
|
||||
|
||||
auto& mtlFeats = getMetalFeatures();
|
||||
uint32_t imgCnt = mvkClamp(pCreateInfo->minImageCount,
|
||||
_device->_pMetalFeatures->minSwapchainImageCount,
|
||||
_device->_pMetalFeatures->maxSwapchainImageCount);
|
||||
mtlFeats.minSwapchainImageCount,
|
||||
mtlFeats.maxSwapchainImageCount);
|
||||
initCAMetalLayer(pCreateInfo, pScalingInfo, imgCnt);
|
||||
initSurfaceImages(pCreateInfo, imgCnt); // After initCAMetalLayer()
|
||||
}
|
||||
|
@ -605,7 +605,7 @@ VkResult mvkWaitSemaphores(MVKDevice* device,
|
||||
*
|
||||
* Instances of this class are one-shot, and can only be used for a single compilation.
|
||||
*/
|
||||
class MVKMetalCompiler : public MVKBaseObject {
|
||||
class MVKMetalCompiler : public MVKBaseDeviceObject {
|
||||
|
||||
public:
|
||||
|
||||
@ -618,7 +618,7 @@ public:
|
||||
|
||||
#pragma mark Construction
|
||||
|
||||
MVKMetalCompiler(MVKVulkanAPIDeviceObject* owner) : _owner(owner) {}
|
||||
MVKMetalCompiler(MVKVulkanAPIDeviceObject* owner) : MVKBaseDeviceObject(owner->getDevice()), _owner(owner) {}
|
||||
|
||||
~MVKMetalCompiler() override;
|
||||
|
||||
|
@ -141,10 +141,10 @@ MVKSemaphoreMTLEvent::MVKSemaphoreMTLEvent(MVKDevice* device,
|
||||
_mtlEvent = [pImportInfo->mtlSharedEvent retain]; // retained
|
||||
_mtlEventValue = pImportInfo->mtlSharedEvent.signaledValue + 1;
|
||||
} else if (pExportInfo && pExportInfo->exportObjectType == VK_EXPORT_METAL_OBJECT_TYPE_METAL_SHARED_EVENT_BIT_EXT) {
|
||||
_mtlEvent = [device->getMTLDevice() newSharedEvent]; //retained
|
||||
_mtlEvent = [getMTLDevice() newSharedEvent]; //retained
|
||||
_mtlEventValue = ((id<MTLSharedEvent>)_mtlEvent).signaledValue + 1;
|
||||
} else {
|
||||
_mtlEvent = [device->getMTLDevice() newEvent]; //retained
|
||||
_mtlEvent = [getMTLDevice() newEvent]; //retained
|
||||
_mtlEventValue = 1;
|
||||
}
|
||||
}
|
||||
@ -241,7 +241,7 @@ MVKTimelineSemaphoreMTLEvent::MVKTimelineSemaphoreMTLEvent(MVKDevice* device,
|
||||
// Import or create a Metal event
|
||||
_mtlEvent = (pImportInfo && pImportInfo->mtlSharedEvent
|
||||
? [pImportInfo->mtlSharedEvent retain]
|
||||
: [device->getMTLDevice() newSharedEvent]); //retained
|
||||
: [getMTLDevice() newSharedEvent]); //retained
|
||||
|
||||
if (pTypeCreateInfo) {
|
||||
_mtlEvent.signaledValue = pTypeCreateInfo->initialValue;
|
||||
@ -423,7 +423,7 @@ MVKEventNative::MVKEventNative(MVKDevice* device,
|
||||
// Import or create a Metal event
|
||||
_mtlEvent = (pImportInfo
|
||||
? [pImportInfo->mtlSharedEvent retain]
|
||||
: [device->getMTLDevice() newSharedEvent]); //retained
|
||||
: [getMTLDevice() newSharedEvent]); //retained
|
||||
}
|
||||
|
||||
MVKEventNative::~MVKEventNative() {
|
||||
@ -570,8 +570,7 @@ VkResult mvkWaitSemaphores(MVKDevice* device,
|
||||
void MVKMetalCompiler::compile(unique_lock<mutex>& lock, dispatch_block_t block) {
|
||||
MVKAssert( _startTime == 0, "%s compile occurred already in this instance. Instances of %s should only be used for a single compile activity.", _compilerType.c_str(), getClassName().c_str());
|
||||
|
||||
MVKDevice* mvkDev = _owner->getDevice();
|
||||
_startTime = mvkDev->getPerformanceTimestamp();
|
||||
_startTime = getPerformanceTimestamp();
|
||||
|
||||
dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{ @autoreleasepool { block(); } });
|
||||
|
||||
@ -588,7 +587,7 @@ void MVKMetalCompiler::compile(unique_lock<mutex>& lock, dispatch_block_t block)
|
||||
|
||||
if (_compileError) { handleError(); }
|
||||
|
||||
mvkDev->addPerformanceInterval(*_pPerformanceTracker, _startTime);
|
||||
addPerformanceInterval(*_pPerformanceTracker, _startTime);
|
||||
}
|
||||
|
||||
void MVKMetalCompiler::handleError() {
|
||||
|
Loading…
x
Reference in New Issue
Block a user