Reinstate memory barriers on non-Apple GPUs.

Ensure non-Apple GPU's enable memory barriers.
A previous commit inadvertently disabled GPU memory barriers.

Change tests for memory barriers to runtime test for Apple GPU, instead of
build-time test for Apple Silicon, to accommodate running on Rosetta2, and
refactor tests for Apple Silicon and OS version on some macOS GPU feature settings.
This commit is contained in:
Bill Hollings 2022-03-08 16:57:15 -05:00
parent c09bcd0972
commit 6327b767e0
5 changed files with 41 additions and 40 deletions

View File

@ -18,6 +18,7 @@ MoltenVK 1.1.9
Released TBD
- Reinstate memory barriers on non-Apple GPUs, which were inadvertently disabled in an earlier update.
- Update *glslang* version, to use `python3` in *glslang* scripts, to replace missing `python` on *macOS 12.3*.
- Fix alignment between outputs and inputs between shader stages when using nested structures.
- Fix issue where the depth component of a stencil-only renderpass attachment was incorrectly attempting to be stored.

View File

@ -113,14 +113,13 @@ void MVKCmdPipelineBarrier<N>::encode(MVKCommandEncoder* cmdEncoder) {
}
#endif
#if MVK_APPLE_SILICON
// Apple GPUs do not support renderpass barriers, and do not support rendering/writing
// to an attachment and then reading from that attachment within a single renderpass.
// So, in the case where we are inside a Metal renderpass, we need to split those activities
// into separate Metal renderpasses. Since this is a potentially expensive operation,
// verify that at least one attachment is being used both as an input and render attachment
// by checking for a VK_IMAGE_LAYOUT_GENERAL layout.
if (cmdEncoder->_mtlRenderEncoder) {
if (cmdEncoder->_mtlRenderEncoder && cmdEncoder->getDevice()->_pMetalFeatures->tileBasedDeferredRendering) {
bool needsRenderpassRestart = false;
for (auto& b : _barriers) {
if (b.type == MVKPipelineBarrier::Image && b.newLayout == VK_IMAGE_LAYOUT_GENERAL) {
@ -133,7 +132,6 @@ void MVKCmdPipelineBarrier<N>::encode(MVKCommandEncoder* cmdEncoder) {
cmdEncoder->beginMetalRenderPass(kMVKCommandUseRestartSubpass);
}
}
#endif
MVKDevice* mvkDvc = cmdEncoder->getDevice();
MVKCommandUse cmdUse = kMVKCommandUsePipelineBarrier;

View File

@ -1310,22 +1310,22 @@ void MVKCmdClearAttachments<N>::encode(MVKCommandEncoder* cmdEncoder) {
[mtlRendEnc drawPrimitives: MTLPrimitiveTypeTriangle vertexStart: 0 vertexCount: vtxCnt];
[mtlRendEnc popDebugGroup];
#if MVK_APPLE_SILICON
// Apple GPUs do not support rendering/writing to an attachment and then reading from
// that attachment within a single Metal renderpass. So, if any of the attachments just
// cleared is an input attachment, we need to restart into separate Metal renderpasses.
bool needsRenderpassRestart = false;
for (uint32_t caIdx = 0; caIdx < caCnt; caIdx++) {
if (_rpsKey.isAttachmentEnabled(caIdx) && subpass->isColorAttachmentAlsoInputAttachment(caIdx)) {
needsRenderpassRestart = true;
break;
if (cmdEncoder->getDevice()->_pMetalFeatures->tileBasedDeferredRendering) {
bool needsRenderpassRestart = false;
for (uint32_t caIdx = 0; caIdx < caCnt; caIdx++) {
if (_rpsKey.isAttachmentEnabled(caIdx) && subpass->isColorAttachmentAlsoInputAttachment(caIdx)) {
needsRenderpassRestart = true;
break;
}
}
if (needsRenderpassRestart) {
cmdEncoder->encodeStoreActions(true);
cmdEncoder->beginMetalRenderPass(kMVKCommandUseRestartSubpass);
}
}
if (needsRenderpassRestart) {
cmdEncoder->encodeStoreActions(true);
cmdEncoder->beginMetalRenderPass(kMVKCommandUseRestartSubpass);
}
#endif
// Return to the previous rendering state on the next render activity
cmdEncoder->_graphicsPipelineState.markDirty();

View File

@ -1478,38 +1478,42 @@ void MVKPhysicalDevice::initMetalFeatures() {
}
#if MVK_XCODE_12
if ( mvkOSVersionIsAtLeast(10.16) ) {
if ( mvkOSVersionIsAtLeast(11.0) ) {
_metalFeatures.mslVersionEnum = MTLLanguageVersion2_3;
if (supportsMTLGPUFamily(Apple5)) {
// This is an Apple GPU--treat it accordingly.
_metalFeatures.mtlCopyBufferAlignment = 1;
_metalFeatures.mtlBufferAlignment = 16; // Min float4 alignment for typical vertex buffers. MTLBuffer may go down to 4 bytes for other data.
_metalFeatures.maxQueryBufferSize = (64 * KIBI);
_metalFeatures.maxPerStageDynamicMTLBufferCount = _metalFeatures.maxPerStageBufferCount;
_metalFeatures.postDepthCoverage = true;
_metalFeatures.renderLinearTextures = true;
_metalFeatures.tileBasedDeferredRendering = true;
if (supportsMTLGPUFamily(Apple6)) {
_metalFeatures.astcHDRTextures = true;
}
if (supportsMTLGPUFamily(Apple7)) {
_metalFeatures.maxQueryBufferSize = (256 * KIBI);
}
}
} else
}
#endif
{
#if MVK_XCODE_13
if ( mvkOSVersionIsAtLeast(12.0) ) {
_metalFeatures.mslVersionEnum = MTLLanguageVersion2_4;
}
#endif
// This is an Apple GPU--treat it accordingly.
if (supportsMTLGPUFamily(Apple1)) {
_metalFeatures.mtlCopyBufferAlignment = 1;
_metalFeatures.mtlBufferAlignment = 16; // Min float4 alignment for typical vertex buffers. MTLBuffer may go down to 4 bytes for other data.
_metalFeatures.maxQueryBufferSize = (64 * KIBI);
_metalFeatures.maxPerStageDynamicMTLBufferCount = _metalFeatures.maxPerStageBufferCount;
_metalFeatures.postDepthCoverage = true;
_metalFeatures.renderLinearTextures = true;
_metalFeatures.tileBasedDeferredRendering = true;
if (supportsMTLGPUFamily(Apple6)) {
_metalFeatures.astcHDRTextures = true;
}
if (supportsMTLGPUFamily(Apple7)) {
_metalFeatures.maxQueryBufferSize = (256 * KIBI);
}
}
// Don't use barriers in render passes on Apple GPUs. Apple GPUs don't support them,
// and in fact Metal's validation layer will complain if you try to use them.
if ( !supportsMTLGPUFamily(Apple1) ) {
if (supportsMTLFeatureSet(macOS_GPUFamily1_v4)) {
_metalFeatures.memoryBarriers = true;
}
_metalFeatures.textureBarriers = true;
}
#if MVK_XCODE_13
if ( mvkOSVersionIsAtLeast(12.0) ) {
_metalFeatures.mslVersionEnum = MTLLanguageVersion2_4;
}
#endif
#endif
// Note the selector name, which is different from the property name.

View File

@ -809,14 +809,12 @@ MVKRenderPassAttachment::MVKRenderPassAttachment(MVKRenderPass* renderPass,
#pragma mark MVKRenderPass
VkExtent2D MVKRenderPass::getRenderAreaGranularity() {
#if MVK_APPLE_SILICON
if (_device->_pMetalFeatures->tileBasedDeferredRendering) {
// This is the tile area.
// FIXME: We really ought to use MTLRenderCommandEncoder.tile{Width,Height}, but that requires
// creating a command buffer.
return { 32, 32 };
}
#endif
return { 1, 1 };
}