Reinstate memory barriers on non-Apple GPUs.
Ensure non-Apple GPU's enable memory barriers. A previous commit inadvertently disabled GPU memory barriers. Change tests for memory barriers to runtime test for Apple GPU, instead of build-time test for Apple Silicon, to accommodate running on Rosetta2, and refactor tests for Apple Silicon and OS version on some macOS GPU feature settings.
This commit is contained in:
parent
c09bcd0972
commit
6327b767e0
@ -18,6 +18,7 @@ MoltenVK 1.1.9
|
||||
|
||||
Released TBD
|
||||
|
||||
- Reinstate memory barriers on non-Apple GPUs, which were inadvertently disabled in an earlier update.
|
||||
- Update *glslang* version, to use `python3` in *glslang* scripts, to replace missing `python` on *macOS 12.3*.
|
||||
- Fix alignment between outputs and inputs between shader stages when using nested structures.
|
||||
- Fix issue where the depth component of a stencil-only renderpass attachment was incorrectly attempting to be stored.
|
||||
|
@ -113,14 +113,13 @@ void MVKCmdPipelineBarrier<N>::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
}
|
||||
#endif
|
||||
|
||||
#if MVK_APPLE_SILICON
|
||||
// Apple GPUs do not support renderpass barriers, and do not support rendering/writing
|
||||
// to an attachment and then reading from that attachment within a single renderpass.
|
||||
// So, in the case where we are inside a Metal renderpass, we need to split those activities
|
||||
// into separate Metal renderpasses. Since this is a potentially expensive operation,
|
||||
// verify that at least one attachment is being used both as an input and render attachment
|
||||
// by checking for a VK_IMAGE_LAYOUT_GENERAL layout.
|
||||
if (cmdEncoder->_mtlRenderEncoder) {
|
||||
if (cmdEncoder->_mtlRenderEncoder && cmdEncoder->getDevice()->_pMetalFeatures->tileBasedDeferredRendering) {
|
||||
bool needsRenderpassRestart = false;
|
||||
for (auto& b : _barriers) {
|
||||
if (b.type == MVKPipelineBarrier::Image && b.newLayout == VK_IMAGE_LAYOUT_GENERAL) {
|
||||
@ -133,7 +132,6 @@ void MVKCmdPipelineBarrier<N>::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
cmdEncoder->beginMetalRenderPass(kMVKCommandUseRestartSubpass);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
MVKDevice* mvkDvc = cmdEncoder->getDevice();
|
||||
MVKCommandUse cmdUse = kMVKCommandUsePipelineBarrier;
|
||||
|
@ -1310,22 +1310,22 @@ void MVKCmdClearAttachments<N>::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
[mtlRendEnc drawPrimitives: MTLPrimitiveTypeTriangle vertexStart: 0 vertexCount: vtxCnt];
|
||||
[mtlRendEnc popDebugGroup];
|
||||
|
||||
#if MVK_APPLE_SILICON
|
||||
// Apple GPUs do not support rendering/writing to an attachment and then reading from
|
||||
// that attachment within a single Metal renderpass. So, if any of the attachments just
|
||||
// cleared is an input attachment, we need to restart into separate Metal renderpasses.
|
||||
bool needsRenderpassRestart = false;
|
||||
for (uint32_t caIdx = 0; caIdx < caCnt; caIdx++) {
|
||||
if (_rpsKey.isAttachmentEnabled(caIdx) && subpass->isColorAttachmentAlsoInputAttachment(caIdx)) {
|
||||
needsRenderpassRestart = true;
|
||||
break;
|
||||
if (cmdEncoder->getDevice()->_pMetalFeatures->tileBasedDeferredRendering) {
|
||||
bool needsRenderpassRestart = false;
|
||||
for (uint32_t caIdx = 0; caIdx < caCnt; caIdx++) {
|
||||
if (_rpsKey.isAttachmentEnabled(caIdx) && subpass->isColorAttachmentAlsoInputAttachment(caIdx)) {
|
||||
needsRenderpassRestart = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (needsRenderpassRestart) {
|
||||
cmdEncoder->encodeStoreActions(true);
|
||||
cmdEncoder->beginMetalRenderPass(kMVKCommandUseRestartSubpass);
|
||||
}
|
||||
}
|
||||
if (needsRenderpassRestart) {
|
||||
cmdEncoder->encodeStoreActions(true);
|
||||
cmdEncoder->beginMetalRenderPass(kMVKCommandUseRestartSubpass);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Return to the previous rendering state on the next render activity
|
||||
cmdEncoder->_graphicsPipelineState.markDirty();
|
||||
|
@ -1478,38 +1478,42 @@ void MVKPhysicalDevice::initMetalFeatures() {
|
||||
}
|
||||
|
||||
#if MVK_XCODE_12
|
||||
if ( mvkOSVersionIsAtLeast(10.16) ) {
|
||||
if ( mvkOSVersionIsAtLeast(11.0) ) {
|
||||
_metalFeatures.mslVersionEnum = MTLLanguageVersion2_3;
|
||||
if (supportsMTLGPUFamily(Apple5)) {
|
||||
// This is an Apple GPU--treat it accordingly.
|
||||
_metalFeatures.mtlCopyBufferAlignment = 1;
|
||||
_metalFeatures.mtlBufferAlignment = 16; // Min float4 alignment for typical vertex buffers. MTLBuffer may go down to 4 bytes for other data.
|
||||
_metalFeatures.maxQueryBufferSize = (64 * KIBI);
|
||||
_metalFeatures.maxPerStageDynamicMTLBufferCount = _metalFeatures.maxPerStageBufferCount;
|
||||
_metalFeatures.postDepthCoverage = true;
|
||||
_metalFeatures.renderLinearTextures = true;
|
||||
_metalFeatures.tileBasedDeferredRendering = true;
|
||||
if (supportsMTLGPUFamily(Apple6)) {
|
||||
_metalFeatures.astcHDRTextures = true;
|
||||
}
|
||||
if (supportsMTLGPUFamily(Apple7)) {
|
||||
_metalFeatures.maxQueryBufferSize = (256 * KIBI);
|
||||
}
|
||||
}
|
||||
} else
|
||||
}
|
||||
#endif
|
||||
{
|
||||
#if MVK_XCODE_13
|
||||
if ( mvkOSVersionIsAtLeast(12.0) ) {
|
||||
_metalFeatures.mslVersionEnum = MTLLanguageVersion2_4;
|
||||
}
|
||||
#endif
|
||||
|
||||
// This is an Apple GPU--treat it accordingly.
|
||||
if (supportsMTLGPUFamily(Apple1)) {
|
||||
_metalFeatures.mtlCopyBufferAlignment = 1;
|
||||
_metalFeatures.mtlBufferAlignment = 16; // Min float4 alignment for typical vertex buffers. MTLBuffer may go down to 4 bytes for other data.
|
||||
_metalFeatures.maxQueryBufferSize = (64 * KIBI);
|
||||
_metalFeatures.maxPerStageDynamicMTLBufferCount = _metalFeatures.maxPerStageBufferCount;
|
||||
_metalFeatures.postDepthCoverage = true;
|
||||
_metalFeatures.renderLinearTextures = true;
|
||||
_metalFeatures.tileBasedDeferredRendering = true;
|
||||
if (supportsMTLGPUFamily(Apple6)) {
|
||||
_metalFeatures.astcHDRTextures = true;
|
||||
}
|
||||
if (supportsMTLGPUFamily(Apple7)) {
|
||||
_metalFeatures.maxQueryBufferSize = (256 * KIBI);
|
||||
}
|
||||
}
|
||||
|
||||
// Don't use barriers in render passes on Apple GPUs. Apple GPUs don't support them,
|
||||
// and in fact Metal's validation layer will complain if you try to use them.
|
||||
if ( !supportsMTLGPUFamily(Apple1) ) {
|
||||
if (supportsMTLFeatureSet(macOS_GPUFamily1_v4)) {
|
||||
_metalFeatures.memoryBarriers = true;
|
||||
}
|
||||
_metalFeatures.textureBarriers = true;
|
||||
}
|
||||
|
||||
#if MVK_XCODE_13
|
||||
if ( mvkOSVersionIsAtLeast(12.0) ) {
|
||||
_metalFeatures.mslVersionEnum = MTLLanguageVersion2_4;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// Note the selector name, which is different from the property name.
|
||||
|
@ -809,14 +809,12 @@ MVKRenderPassAttachment::MVKRenderPassAttachment(MVKRenderPass* renderPass,
|
||||
#pragma mark MVKRenderPass
|
||||
|
||||
VkExtent2D MVKRenderPass::getRenderAreaGranularity() {
|
||||
#if MVK_APPLE_SILICON
|
||||
if (_device->_pMetalFeatures->tileBasedDeferredRendering) {
|
||||
// This is the tile area.
|
||||
// FIXME: We really ought to use MTLRenderCommandEncoder.tile{Width,Height}, but that requires
|
||||
// creating a command buffer.
|
||||
return { 32, 32 };
|
||||
}
|
||||
#endif
|
||||
return { 1, 1 };
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user