Ensure buffers available for buffer addresses in push constants.

Fix issue where push constants contain a buffer address, but no descriptor
resources are encoded, resulting in addressed buffer not resident in GPU.

- Add MVKGPUAddressableBuffersCommandEncoderState to track when the
  GPU-addressable buffer usage needs to be encoded, and only encode
  them once per Metal renderpass.
- Set MVKGPUAddressableBuffersCommandEncoderState dirty whenever
  a buffer is bound for a descriptor or a push constant.

Unrelated changes:
- MVKCommandEncoder::finalizeDrawState() reorder encoding of encoder
  states to encode resource and push-constant encoder states together.
- Move getGraphicsPipeline() and getComputePipeline() to MVKCommandEncoder.
- MVKDevice Remove preallocation on _resources and _gpuAddressableBuffers.
- Align conditions for VkPhysicalDeviceVulkan12Features::bufferDeviceAddress
  to those for VK_KHR_buffer_device_address/VK_EXT_buffer_device_address.
This commit is contained in:
Bill Hollings 2024-01-13 13:38:06 -05:00
parent 41ed2bec36
commit ccf68f4aac
9 changed files with 96 additions and 28 deletions

View File

@ -19,6 +19,7 @@ MoltenVK 1.2.8
Released TBD
- Fix potential crash when using multi-planar images.
- Ensure buffers available for buffer addresses in push constants.

View File

@ -46,7 +46,7 @@ void MVKCmdDispatch::encode(MVKCommandEncoder* cmdEncoder) {
MTLRegion mtlThreadgroupCount = MTLRegionMake3D(_baseGroupX, _baseGroupY, _baseGroupZ, _groupCountX, _groupCountY, _groupCountZ);
cmdEncoder->finalizeDispatchState(); // Ensure all updated state has been submitted to Metal
id<MTLComputeCommandEncoder> mtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseDispatch);
auto* pipeline = cmdEncoder->_computePipelineState.getComputePipeline();
auto* pipeline = cmdEncoder->getComputePipeline();
if (pipeline->allowsDispatchBase()) {
if ([mtlEncoder respondsToSelector: @selector(setStageInRegion:)]) {
// We'll use the stage-input region to pass the base along to the shader.

View File

@ -148,7 +148,7 @@ void MVKCmdDraw::encode(MVKCommandEncoder* cmdEncoder) {
cmdEncoder->restartMetalRenderPassIfNeeded();
auto* pipeline = cmdEncoder->_graphicsPipelineState.getGraphicsPipeline();
auto* pipeline = cmdEncoder->getGraphicsPipeline();
// Metal doesn't support triangle fans, so encode it as triangles via an indexed indirect triangles command instead.
if (pipeline->getVkPrimitiveTopology() == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN) {
@ -372,7 +372,7 @@ void MVKCmdDrawIndexed::encode(MVKCommandEncoder* cmdEncoder) {
cmdEncoder->restartMetalRenderPassIfNeeded();
auto* pipeline = cmdEncoder->_graphicsPipelineState.getGraphicsPipeline();
auto* pipeline = cmdEncoder->getGraphicsPipeline();
// Metal doesn't support triangle fans, so encode it as triangles via an indexed indirect triangles command instead.
if (pipeline->getVkPrimitiveTopology() == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN) {
@ -649,7 +649,7 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) {
cmdEncoder->restartMetalRenderPassIfNeeded();
auto* pipeline = cmdEncoder->_graphicsPipelineState.getGraphicsPipeline();
auto* pipeline = cmdEncoder->getGraphicsPipeline();
// Metal doesn't support triangle fans, so encode it as indexed indirect triangles instead.
if (pipeline->getVkPrimitiveTopology() == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN) {
@ -1000,7 +1000,7 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder, const MVKI
MVKIndexMTLBufferBinding ibb = ibbOrig;
MVKIndexMTLBufferBinding ibbTriFan = ibb;
auto* pipeline = cmdEncoder->_graphicsPipelineState.getGraphicsPipeline();
auto* pipeline = cmdEncoder->getGraphicsPipeline();
MVKVertexAdjustments vtxAdjmts;
vtxAdjmts.mtlIndexType = ibb.mtlIndexType;

View File

@ -357,6 +357,12 @@ public:
*/
id<MTLCommandEncoder> getMTLEncoder();
/** Returns the graphics pipeline. */
MVKGraphicsPipeline* getGraphicsPipeline() { return (MVKGraphicsPipeline*)_graphicsPipelineState.getPipeline(); }
/** Returns the compute pipeline. */
MVKComputePipeline* getComputePipeline() { return (MVKComputePipeline*)_computePipelineState.getPipeline(); }
/** Returns the push constants associated with the specified shader stage. */
MVKPushConstantsCommandEncoderState* getPushConstants(VkShaderStageFlagBits shaderStage);
@ -448,6 +454,9 @@ public:
/** Tracks the current compute resources state of the encoder. */
MVKComputeResourcesCommandEncoderState _computeResourcesState;
/** Tracks whether the GPU-addressable buffers need to be used. */
MVKGPUAddressableBuffersCommandEncoderState _gpuAddressableBuffersState;
/** Tracks the current depth stencil state of the encoder. */
MVKDepthStencilCommandEncoderState _depthStencilState;

View File

@ -709,14 +709,15 @@ void MVKCommandEncoder::finalizeDrawState(MVKGraphicsStage stage) {
// Must happen before switching encoders.
encodeStoreActions(true);
}
_graphicsPipelineState.encode(stage); // Must do first..it sets others
_graphicsResourcesState.encode(stage); // Before push constants, to allow them to override.
_graphicsPipelineState.encode(stage); // Must do first..it sets others
_depthStencilState.encode(stage);
_renderingState.encode(stage);
_graphicsResourcesState.encode(stage); // Before push constants, to allow them to override.
_vertexPushConstants.encode(stage);
_tessCtlPushConstants.encode(stage);
_tessEvalPushConstants.encode(stage);
_fragmentPushConstants.encode(stage);
_gpuAddressableBuffersState.encode(stage); // After resources and push constants
_renderingState.encode(stage);
_occlusionQueryState.encode(stage);
}
@ -771,9 +772,10 @@ void MVKCommandEncoder::beginMetalComputeEncoding(MVKCommandUse cmdUse) {
}
void MVKCommandEncoder::finalizeDispatchState() {
_computePipelineState.encode(); // Must do first..it sets others
_computeResourcesState.encode(); // Before push constants, to allow them to override.
_computePipelineState.encode(); // Must do first..it sets others
_computeResourcesState.encode(); // Before push constants, to allow them to override.
_computePushConstants.encode();
_gpuAddressableBuffersState.encode(); // After resources and push constants
}
void MVKCommandEncoder::endRendering() {
@ -1142,6 +1144,7 @@ MVKCommandEncoder::MVKCommandEncoder(MVKCommandBuffer* cmdBuffer,
_graphicsResourcesState(this),
_computePipelineState(this),
_computeResourcesState(this),
_gpuAddressableBuffersState(this),
_depthStencilState(this),
_renderingState(this),
_occlusionQueryState(this),

View File

@ -129,8 +129,6 @@ public:
void bindPipeline(MVKPipeline* pipeline);
MVKPipeline* getPipeline();
MVKGraphicsPipeline* getGraphicsPipeline() { return (MVKGraphicsPipeline*)getPipeline(); }
MVKComputePipeline* getComputePipeline() { return (MVKComputePipeline*)getPipeline(); }
MVKPipelineCommandEncoderState(MVKCommandEncoder* cmdEncoder) : MVKCommandEncoderState(cmdEncoder) {}
@ -641,6 +639,26 @@ protected:
};
#pragma mark -
#pragma mark MVKGPUAddressableBuffersCommandEncoderState
/** Tracks whether the GPU-addressable buffers need to be used. */
class MVKGPUAddressableBuffersCommandEncoderState : public MVKCommandEncoderState {
public:
/** Marks that GPU addressable buffers may be needed in the specified shader stage. */
void useGPUAddressableBuffersInStage(MVKShaderStage shaderStage);
MVKGPUAddressableBuffersCommandEncoderState(MVKCommandEncoder* cmdEncoder) : MVKCommandEncoderState(cmdEncoder) {}
protected:
void encodeImpl(uint32_t stage) override;
bool _usageStages[kMVKShaderStageCount] = {};
};
#pragma mark -
#pragma mark MVKOcclusionQueryCommandEncoderState

View File

@ -36,7 +36,7 @@ MVKVulkanAPIObject* MVKCommandEncoderState::getVulkanAPIObject() { return _cmdEn
MVKDevice* MVKCommandEncoderState::getDevice() { return _cmdEncoder->getDevice(); }
bool MVKCommandEncoderState::isDynamicState(MVKRenderStateType state) {
auto* gpl = _cmdEncoder->_graphicsPipelineState.getGraphicsPipeline();
auto* gpl = _cmdEncoder->getGraphicsPipeline();
return !gpl || gpl->isDynamicState(state);
}
@ -100,12 +100,14 @@ void MVKPushConstantsCommandEncoderState::encodeImpl(uint32_t stage) {
_pushConstants.data(),
_pushConstants.size(),
_mtlBufferIndex, true);
_cmdEncoder->_gpuAddressableBuffersState.useGPUAddressableBuffersInStage(kMVKShaderStageVertex);
_isDirty = false; // Okay, I changed the encoder
} else if (!isTessellating() && stage == kMVKGraphicsStageRasterization) {
_cmdEncoder->setVertexBytes(_cmdEncoder->_mtlRenderEncoder,
_pushConstants.data(),
_pushConstants.size(),
_mtlBufferIndex, true);
_cmdEncoder->_gpuAddressableBuffersState.useGPUAddressableBuffersInStage(kMVKShaderStageVertex);
_isDirty = false; // Okay, I changed the encoder
}
break;
@ -115,6 +117,7 @@ void MVKPushConstantsCommandEncoderState::encodeImpl(uint32_t stage) {
_pushConstants.data(),
_pushConstants.size(),
_mtlBufferIndex, true);
_cmdEncoder->_gpuAddressableBuffersState.useGPUAddressableBuffersInStage(kMVKShaderStageTessCtl);
_isDirty = false; // Okay, I changed the encoder
}
break;
@ -124,6 +127,7 @@ void MVKPushConstantsCommandEncoderState::encodeImpl(uint32_t stage) {
_pushConstants.data(),
_pushConstants.size(),
_mtlBufferIndex, true);
_cmdEncoder->_gpuAddressableBuffersState.useGPUAddressableBuffersInStage(kMVKShaderStageTessEval);
_isDirty = false; // Okay, I changed the encoder
}
break;
@ -133,6 +137,7 @@ void MVKPushConstantsCommandEncoderState::encodeImpl(uint32_t stage) {
_pushConstants.data(),
_pushConstants.size(),
_mtlBufferIndex, true);
_cmdEncoder->_gpuAddressableBuffersState.useGPUAddressableBuffersInStage(kMVKShaderStageFragment);
_isDirty = false; // Okay, I changed the encoder
}
break;
@ -141,6 +146,7 @@ void MVKPushConstantsCommandEncoderState::encodeImpl(uint32_t stage) {
_pushConstants.data(),
_pushConstants.size(),
_mtlBufferIndex, true);
_cmdEncoder->_gpuAddressableBuffersState.useGPUAddressableBuffersInStage(kMVKShaderStageCompute);
_isDirty = false; // Okay, I changed the encoder
break;
default:
@ -150,7 +156,7 @@ void MVKPushConstantsCommandEncoderState::encodeImpl(uint32_t stage) {
}
bool MVKPushConstantsCommandEncoderState::isTessellating() {
auto* gp = _cmdEncoder->_graphicsPipelineState.getGraphicsPipeline();
auto* gp = _cmdEncoder->getGraphicsPipeline();
return gp ? gp->isTessellationPipeline() : false;
}
@ -835,11 +841,6 @@ void MVKGraphicsResourcesCommandEncoderState::encodeBindings(MVKShaderStage stag
encodeMetalArgumentBuffer(stage);
MVKPipeline* pipeline = getPipeline();
if (pipeline && pipeline->usesPhysicalStorageBufferAddressesCapability(stage)) {
getDevice()->encodeGPUAddressableBuffers(this, stage);
}
auto& shaderStage = _shaderStageResourceBindings[stage];
if (shaderStage.swizzleBufferBinding.isDirty) {
@ -873,9 +874,15 @@ void MVKGraphicsResourcesCommandEncoderState::encodeBindings(MVKShaderStage stag
bindImplicitBuffer(_cmdEncoder, shaderStage.viewRangeBufferBinding, viewRange.contents());
}
bool wereBufferBindingsDirty = shaderStage.areBufferBindingsDirty;
encodeBinding<MVKMTLBufferBinding>(shaderStage.bufferBindings, shaderStage.areBufferBindingsDirty, bindBuffer);
encodeBinding<MVKMTLTextureBinding>(shaderStage.textureBindings, shaderStage.areTextureBindingsDirty, bindTexture);
encodeBinding<MVKMTLSamplerStateBinding>(shaderStage.samplerStateBindings, shaderStage.areSamplerStateBindingsDirty, bindSampler);
// If any buffers have been bound, mark the GPU addressable buffers as needed.
if (wereBufferBindingsDirty && !shaderStage.areBufferBindingsDirty ) {
_cmdEncoder->_gpuAddressableBuffersState.useGPUAddressableBuffersInStage(MVKShaderStage(stage));
}
}
void MVKGraphicsResourcesCommandEncoderState::offsetZeroDivisorVertexBuffers(MVKGraphicsStage stage,
@ -923,7 +930,7 @@ static const NSUInteger MTLAttributeStrideStatic = NSUIntegerMax;
void MVKGraphicsResourcesCommandEncoderState::encodeImpl(uint32_t stage) {
auto* pipeline = _cmdEncoder->_graphicsPipelineState.getGraphicsPipeline();
auto* pipeline = _cmdEncoder->getGraphicsPipeline();
bool fullImageViewSwizzle = pipeline->fullImageViewSwizzle() || getDevice()->_pMetalFeatures->nativeTextureSwizzle;
bool forTessellation = pipeline->isTessellationPipeline();
bool isDynamicVertexStride = pipeline->isDynamicState(VertexStride);
@ -1181,11 +1188,6 @@ void MVKComputeResourcesCommandEncoderState::encodeImpl(uint32_t) {
encodeMetalArgumentBuffer(kMVKShaderStageCompute);
MVKPipeline* pipeline = getPipeline();
if (pipeline && pipeline->usesPhysicalStorageBufferAddressesCapability(kMVKShaderStageCompute)) {
getDevice()->encodeGPUAddressableBuffers(this, kMVKShaderStageCompute);
}
if (_resourceBindings.swizzleBufferBinding.isDirty) {
for (auto& b : _resourceBindings.textureBindings) {
if (b.isDirty) { updateImplicitBuffer(_resourceBindings.swizzleConstants, b.index, b.swizzle); }
@ -1197,6 +1199,7 @@ void MVKComputeResourcesCommandEncoderState::encodeImpl(uint32_t) {
_resourceBindings.swizzleBufferBinding.index);
} else {
MVKPipeline* pipeline = getPipeline();
bool fullImageViewSwizzle = pipeline ? pipeline->fullImageViewSwizzle() : false;
assertMissingSwizzles(_resourceBindings.needsSwizzle && !fullImageViewSwizzle, "compute", _resourceBindings.textureBindings.contents());
}
@ -1221,6 +1224,7 @@ void MVKComputeResourcesCommandEncoderState::encodeImpl(uint32_t) {
}
bool wereBufferBindingsDirty = _resourceBindings.areBufferBindingsDirty;
encodeBinding<MVKMTLBufferBinding>(_resourceBindings.bufferBindings, _resourceBindings.areBufferBindingsDirty,
[](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b)->void {
if (b.isInline) {
@ -1251,6 +1255,11 @@ void MVKComputeResourcesCommandEncoderState::encodeImpl(uint32_t) {
[cmdEncoder->getMTLComputeEncoder(kMVKCommandUseDispatch) setSamplerState: b.mtlSamplerState
atIndex: b.index];
});
// If any buffers have been bound, mark the GPU addressable buffers as needed.
if (wereBufferBindingsDirty && !_resourceBindings.areBufferBindingsDirty ) {
_cmdEncoder->_gpuAddressableBuffersState.useGPUAddressableBuffersInStage(kMVKShaderStageCompute);
}
}
MVKPipeline* MVKComputeResourcesCommandEncoderState::getPipeline() {
@ -1280,6 +1289,34 @@ void MVKComputeResourcesCommandEncoderState::markOverriddenBufferIndexesDirty()
}
#pragma mark -
#pragma mark MVKGPUAddressableBuffersCommandEncoderState
void MVKGPUAddressableBuffersCommandEncoderState::useGPUAddressableBuffersInStage(MVKShaderStage shaderStage) {
MVKPipeline* pipeline = (shaderStage == kMVKShaderStageCompute
? (MVKPipeline*)_cmdEncoder->getComputePipeline()
: (MVKPipeline*)_cmdEncoder->getGraphicsPipeline());
if (pipeline && pipeline->usesPhysicalStorageBufferAddressesCapability(shaderStage)) {
_usageStages[shaderStage] = true;
markDirty();
}
}
void MVKGPUAddressableBuffersCommandEncoderState::encodeImpl(uint32_t stage) {
auto* mvkDev = getDevice();
for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageCount; i++) {
MVKShaderStage shaderStage = MVKShaderStage(i);
if (_usageStages[shaderStage]) {
MVKResourcesCommandEncoderState* rezEncState = (shaderStage == kMVKShaderStageCompute
? (MVKResourcesCommandEncoderState*)&_cmdEncoder->_computeResourcesState
: (MVKResourcesCommandEncoderState*)&_cmdEncoder->_graphicsResourcesState);
mvkDev->encodeGPUAddressableBuffers(rezEncState, shaderStage);
}
}
mvkClear(_usageStages);
}
#pragma mark -
#pragma mark MVKOcclusionQueryCommandEncoderState

View File

@ -908,8 +908,8 @@ protected:
MVKPhysicalDevice* _physicalDevice = nullptr;
MVKCommandResourceFactory* _commandResourceFactory = nullptr;
MVKSmallVector<MVKSmallVector<MVKQueue*, kMVKQueueCountPerQueueFamily>, kMVKQueueFamilyCount> _queuesByQueueFamilyIndex;
MVKSmallVector<MVKResource*, 256> _resources;
MVKSmallVector<MVKBuffer*, 8> _gpuAddressableBuffers;
MVKSmallVector<MVKResource*> _resources;
MVKSmallVector<MVKBuffer*> _gpuAddressableBuffers;
MVKSmallVector<MVKPrivateDataSlot*> _privateDataSlots;
MVKSmallVector<bool> _privateDataSlotsAvailability;
MVKSmallVector<MVKSemaphoreImpl*> _awaitingSemaphores;

View File

@ -153,7 +153,7 @@ void MVKPhysicalDevice::getFeatures(VkPhysicalDeviceFeatures2* features) {
.separateDepthStencilLayouts = true,
.hostQueryReset = true,
.timelineSemaphore = true,
.bufferDeviceAddress = mvkOSVersionIsAtLeast(12.05, 16.0, 1.0),
.bufferDeviceAddress = mvkOSVersionIsAtLeast(13.0, 16.0, 1.0),
.bufferDeviceAddressCaptureReplay = false,
.bufferDeviceAddressMultiDevice = false,
.vulkanMemoryModel = false,