Merge branch 'master' of https://github.com/billhollings/MoltenVK into fastmath

2021-01-29 17:30:23 -05:00 · 2021-01-29 17:30:23 -05:00 · 4083dd1229
commit 4083dd1229
parent 7ce4e0ccbb 05fe007de8
15 changed files with 167 additions and 52 deletions
--- a/MoltenVK/MoltenVK/Commands/MVKCmdQueries.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCmdQueries.mm
@ -92,7 +92,7 @@ void MVKCmdWriteTimestamp::encode(MVKCommandEncoder* cmdEncoder) {
    uint32_t query = _query;
    if (cmdEncoder->getMultiviewPassIndex() > 0)
        query += cmdEncoder->getSubpass()->getViewCountUpToMetalPass(cmdEncoder->getMultiviewPassIndex() - 1);
-    cmdEncoder->markTimestamp(_queryPool, query);
+    _queryPool->endQuery(query, cmdEncoder);
 }


@ -112,6 +112,7 @@ VkResult MVKCmdResetQueryPool::setContent(MVKCommandBuffer* cmdBuff,
 }

 void MVKCmdResetQueryPool::encode(MVKCommandEncoder* cmdEncoder) {
+    cmdEncoder->resetQueries(_queryPool, _query, _queryCount);
    _queryPool->resetResults(_query, _queryCount, cmdEncoder);
 }

--- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
@ -92,10 +92,9 @@ public:
     * Metal requires that a visibility buffer is established when a render pass is created, 
     * but Vulkan permits it to be set during a render pass. When the first occlusion query
     * command is added, it sets this value so that it can be applied when the first renderpass
-     * is begun. The execution of subsequent occlusion query commands may change the visibility
-     * buffer during command execution, and begin a new Metal renderpass.
+     * is begun.
     */
-    id<MTLBuffer> _initialVisibilityResultMTLBuffer;
+    bool _needsVisibilityResultMTLBuffer;

 	/** Called when a MVKCmdExecuteCommands is added to this command buffer. */
 	void recordExecuteCommands(const MVKArrayRef<MVKCommandBuffer*> secondaryCommandBuffers);
@ -387,6 +386,9 @@ public:
    /** Marks a timestamp for the specified query. */
    void markTimestamp(MVKQueryPool* pQueryPool, uint32_t query);

+    /** Reset a range of queries. */
+    void resetQueries(MVKQueryPool* pQueryPool, uint32_t firstQuery, uint32_t queryCount);
+
 #pragma mark Dynamic encoding state accessed directly

    /** A reference to the Metal features supported by the device. */
@ -413,6 +415,9 @@ public:
 	/** The current Metal render encoder. */
 	id<MTLRenderCommandEncoder> _mtlRenderEncoder;

+	/** The buffer used to hold occlusion query results in this render pass. */
+	id<MTLBuffer> _visibilityResultMTLBuffer;
+
    /** Tracks the current graphics pipeline bound to the encoder. */
    MVKPipelineCommandEncoderState _graphicsPipelineState;

@ -461,7 +466,7 @@ public:
 	MVKCommandEncoder(MVKCommandBuffer* cmdBuffer);

 protected:
-    void addActivatedQuery(MVKQueryPool* pQueryPool, uint32_t query);
+    void addActivatedQueries(MVKQueryPool* pQueryPool, uint32_t query, uint32_t queryCount);
    void finishQueries();
 	void setSubpass(MVKCommand* passCmd, VkSubpassContents subpassContents, uint32_t subpassIndex);
 	void clearRenderArea();
--- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
@ -74,7 +74,7 @@ VkResult MVKCommandBuffer::reset(VkCommandBufferResetFlags flags) {
 	_wasExecuted = false;
 	_isExecutingNonConcurrently.clear();
 	_commandCount = 0;
-	_initialVisibilityResultMTLBuffer = nil;		// not retained
+	_needsVisibilityResultMTLBuffer = false;
 	_lastTessellationPipeline = nullptr;
 	_lastMultiviewSubpass = nullptr;
 	setConfigurationResult(VK_NOT_READY);
@ -198,10 +198,10 @@ MVKCommandBuffer::~MVKCommandBuffer() {
 // found among any of the secondary command buffers, to support the case where a render pass is started in
 // the primary command buffer but the visibility query is started inside one of the secondary command buffers.
 void MVKCommandBuffer::recordExecuteCommands(const MVKArrayRef<MVKCommandBuffer*> secondaryCommandBuffers) {
-	if (_initialVisibilityResultMTLBuffer == nil) {
+	if (!_needsVisibilityResultMTLBuffer) {
 		for (MVKCommandBuffer* cmdBuff : secondaryCommandBuffers) {
-			if (cmdBuff->_initialVisibilityResultMTLBuffer) {
-				_initialVisibilityResultMTLBuffer = cmdBuff->_initialVisibilityResultMTLBuffer;
+			if (cmdBuff->_needsVisibilityResultMTLBuffer) {
+				_needsVisibilityResultMTLBuffer = true;
 				break;
 			}
 		}
@ -334,7 +334,21 @@ void MVKCommandEncoder::beginMetalRenderPass(bool loadOverride) {

    MTLRenderPassDescriptor* mtlRPDesc = [MTLRenderPassDescriptor renderPassDescriptor];
    getSubpass()->populateMTLRenderPassDescriptor(mtlRPDesc, _multiviewPassIndex, _framebuffer, _clearValues.contents(), _isRenderingEntireAttachment, loadOverride);
-    mtlRPDesc.visibilityResultBuffer = _occlusionQueryState.getVisibilityResultMTLBuffer();
+    if (_occlusionQueryState.getNeedsVisibilityResultMTLBuffer()) {
+        if (!_visibilityResultMTLBuffer) {
+            // Unfortunately, the temp buffer mechanism tends to allocate large buffers and return offsets into them.
+            // This won't work with visibility buffers, particularly if the offset is greater than the maximum supported
+            // by the device. So we can't use that.
+            // Use a local variable to make sure it gets copied.
+            id<MTLBuffer> visibilityResultMTLBuffer = [getMTLDevice() newBufferWithLength: _pDeviceMetalFeatures->maxQueryBufferSize options: MTLResourceStorageModePrivate];    // not retained
+            [visibilityResultMTLBuffer setPurgeableState: MTLPurgeableStateVolatile];
+            [_mtlCmdBuffer addCompletedHandler: ^(id<MTLCommandBuffer>) {
+                [visibilityResultMTLBuffer release];
+            }];
+            _visibilityResultMTLBuffer = visibilityResultMTLBuffer;
+        }
+        mtlRPDesc.visibilityResultBuffer = _visibilityResultMTLBuffer;
+    }

    VkExtent2D fbExtent = _framebuffer->getExtent2D();
    mtlRPDesc.renderTargetWidthMVK = max(min(_renderArea.offset.x + _renderArea.extent.width, fbExtent.width), 1u);
@ -525,8 +539,24 @@ void MVKCommandEncoder::endRenderpass() {

 void MVKCommandEncoder::endMetalRenderEncoding() {
 //    MVKLogDebugIf(_mtlRenderEncoder, "Render subpass end MTLRenderCommandEncoder.");
+    if (_mtlRenderEncoder == nil) { return; }
+
    [_mtlRenderEncoder endEncoding];
 	_mtlRenderEncoder = nil;    // not retained
+
+    _graphicsPipelineState.endMetalRenderPass();
+    _graphicsResourcesState.endMetalRenderPass();
+    _viewportState.endMetalRenderPass();
+    _scissorState.endMetalRenderPass();
+    _depthBiasState.endMetalRenderPass();
+    _blendColorState.endMetalRenderPass();
+    _vertexPushConstants.endMetalRenderPass();
+    _tessCtlPushConstants.endMetalRenderPass();
+    _tessEvalPushConstants.endMetalRenderPass();
+    _fragmentPushConstants.endMetalRenderPass();
+    _depthStencilState.endMetalRenderPass();
+    _stencilReferenceValueState.endMetalRenderPass();
+    _occlusionQueryState.endMetalRenderPass();
 }

 void MVKCommandEncoder::endCurrentMetalEncoding() {
@ -655,7 +685,11 @@ const MVKMTLBufferAllocation* MVKCommandEncoder::copyToTempMTLBufferAllocation(c

 void MVKCommandEncoder::beginOcclusionQuery(MVKOcclusionQueryPool* pQueryPool, uint32_t query, VkQueryControlFlags flags) {
    _occlusionQueryState.beginOcclusionQuery(pQueryPool, query, flags);
-    addActivatedQuery(pQueryPool, query);
+    uint32_t queryCount = 1;
+    if (_renderPass && getSubpass()->isMultiview()) {
+        queryCount = getSubpass()->getViewCountInMetalPass(_multiviewPassIndex);
+    }
+    addActivatedQueries(pQueryPool, query, queryCount);
 }

 void MVKCommandEncoder::endOcclusionQuery(MVKOcclusionQueryPool* pQueryPool, uint32_t query) {
@ -663,16 +697,21 @@ void MVKCommandEncoder::endOcclusionQuery(MVKOcclusionQueryPool* pQueryPool, uin
 }

 void MVKCommandEncoder::markTimestamp(MVKQueryPool* pQueryPool, uint32_t query) {
-    addActivatedQuery(pQueryPool, query);
+    uint32_t queryCount = 1;
+    if (_renderPass && getSubpass()->isMultiview()) {
+        queryCount = getSubpass()->getViewCountInMetalPass(_multiviewPassIndex);
+    }
+    addActivatedQueries(pQueryPool, query, queryCount);
 }

-// Marks the specified query as activated
-void MVKCommandEncoder::addActivatedQuery(MVKQueryPool* pQueryPool, uint32_t query) {
+void MVKCommandEncoder::resetQueries(MVKQueryPool* pQueryPool, uint32_t firstQuery, uint32_t queryCount) {
+    addActivatedQueries(pQueryPool, firstQuery, queryCount);
+}
+
+// Marks the specified queries as activated
+void MVKCommandEncoder::addActivatedQueries(MVKQueryPool* pQueryPool, uint32_t query, uint32_t queryCount) {
    if ( !_pActivatedQueries ) { _pActivatedQueries = new MVKActivatedQueries(); }
-    uint32_t endQuery = query + 1;
-    if (_renderPass && getSubpass()->isMultiview()) {
-        endQuery = query + getSubpass()->getViewCountInMetalPass(_multiviewPassIndex);
-    }
+    uint32_t endQuery = query + queryCount;
    while (query < endQuery) {
        (*_pActivatedQueries)[pQueryPool].push_back(query++);
    }
@ -698,6 +737,7 @@ void MVKCommandEncoder::finishQueries() {

 MVKCommandEncoder::MVKCommandEncoder(MVKCommandBuffer* cmdBuffer) : MVKBaseDeviceObject(cmdBuffer->getDevice()),
        _cmdBuffer(cmdBuffer),
+        _visibilityResultMTLBuffer(nil),
        _graphicsPipelineState(this),
        _computePipelineState(this),
        _viewportState(this),
@ -772,6 +812,7 @@ NSString* mvkMTLComputeCommandEncoderLabel(MVKCommandUse cmdUse) {
        case kMVKCommandUseTessellationVertexTessCtl: return @"vkCmdDraw (vertex and tess control stages) ComputeEncoder";
        case kMVKCommandUseMultiviewInstanceCountAdjust: return @"vkCmdDraw (multiview instance count adjustment) ComputeEncoder";
        case kMVKCommandUseCopyQueryPoolResults:return @"vkCmdCopyQueryPoolResults ComputeEncoder";
+        case kMVKCommandUseAccumOcclusionQuery: return @"Post-render-pass occlusion query accumulation ComputeEncoder";
        default:                                return @"Unknown Use ComputeEncoder";
    }
 }
--- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h
@ -66,6 +66,11 @@ public:
     */
 	virtual void beginMetalRenderPass() { if (_isModified) { markDirty(); } }

+	/**
+	 * Called automatically when a Metal render pass ends.
+	 */
+	virtual void endMetalRenderPass() { }
+
    /**
     * If the content of this instance is dirty, marks this instance as no longer dirty
     * and calls the encodeImpl() function to encode the content onto the Metal encoder.
@ -572,14 +577,16 @@ class MVKOcclusionQueryCommandEncoderState : public MVKCommandEncoderState {

 public:

+	void endMetalRenderPass() override;
+
    /** Begins an occlusion query. */
    void beginOcclusionQuery(MVKOcclusionQueryPool* pQueryPool, uint32_t query, VkQueryControlFlags flags);

    /** Ends an occlusion query. */
    void endOcclusionQuery(MVKOcclusionQueryPool* pQueryPool, uint32_t query);

-    /** Returns the MTLBuffer used to hold occlusion query results. */
-    id<MTLBuffer> getVisibilityResultMTLBuffer();
+    /** Returns whether an MTLBuffer is needed to hold occlusion query results. */
+    bool getNeedsVisibilityResultMTLBuffer();

    /** Constructs this instance for the specified command encoder. */
    MVKOcclusionQueryCommandEncoderState(MVKCommandEncoder* cmdEncoder);
@ -588,11 +595,10 @@ protected:
    void encodeImpl(uint32_t) override;
    void resetImpl() override;

-    id<MTLBuffer> _visibilityResultMTLBuffer = nil;
+    bool _needsVisibilityResultMTLBuffer = false;
    MTLVisibilityResultMode _mtlVisibilityResultMode = MTLVisibilityResultModeDisabled;
    NSUInteger _mtlVisibilityResultOffset = 0;
-	std::unordered_map<MVKQuerySpec, id<MTLRenderCommandEncoder>> _mtlEncodersUsed;
-	MVKQuerySpec _currentQuery;
+	MVKSmallVector<std::pair<MVKQuerySpec, NSUInteger>> _mtlRenderPassQueries;
 };


--- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
@ -916,18 +916,43 @@ void MVKComputeResourcesCommandEncoderState::resetImpl() {
 #pragma mark -
 #pragma mark MVKOcclusionQueryCommandEncoderState

+void MVKOcclusionQueryCommandEncoderState::endMetalRenderPass() {
+
+    if (_mtlRenderPassQueries.empty()) { return; }  // Nothing to do.
+
+    id<MTLComputePipelineState> mtlAccumState = _cmdEncoder->getCommandEncodingPool()->getAccumulateOcclusionQueryResultsMTLComputePipelineState();
+    id<MTLComputeCommandEncoder> mtlAccumEncoder = _cmdEncoder->getMTLComputeEncoder(kMVKCommandUseAccumOcclusionQuery);
+    [mtlAccumEncoder setComputePipelineState: mtlAccumState];
+    for (auto& query : _mtlRenderPassQueries) {
+        // Accumulate the current results to the query pool's buffer.
+        auto* pQueryPool = (MVKOcclusionQueryPool*)query.first.queryPool;
+        [mtlAccumEncoder setBuffer: pQueryPool->getVisibilityResultMTLBuffer()
+                            offset: pQueryPool->getVisibilityResultOffset(query.first.query)
+                           atIndex: 0];
+        [mtlAccumEncoder setBuffer: _cmdEncoder->_visibilityResultMTLBuffer
+                            offset: query.second
+                           atIndex: 1];
+        [mtlAccumEncoder dispatchThreadgroups: MTLSizeMake(1, 1, 1)
+                        threadsPerThreadgroup: MTLSizeMake(1, 1, 1)];
+    }
+    _cmdEncoder->endCurrentMetalEncoding();
+    _mtlRenderPassQueries.clear();
+}
+
 void MVKOcclusionQueryCommandEncoderState::beginOcclusionQuery(MVKOcclusionQueryPool* pQueryPool, uint32_t query, VkQueryControlFlags flags) {

-	_currentQuery.set(pQueryPool, query);
-
-    NSUInteger offset = pQueryPool->getVisibilityResultOffset(query);
+    MVKQuerySpec querySpec;
+    querySpec.set(pQueryPool, query);
+    NSUInteger offset = _mtlRenderPassQueries.empty() ? 0 : _mtlVisibilityResultOffset + 8;
    NSUInteger maxOffset = _cmdEncoder->_pDeviceMetalFeatures->maxQueryBufferSize - kMVKQuerySlotSizeInBytes;
+    offset = min(offset, maxOffset);
+    _mtlRenderPassQueries.push_back(make_pair(querySpec, offset));

    bool shouldCount = _cmdEncoder->_pDeviceFeatures->occlusionQueryPrecise && mvkAreAllFlagsEnabled(flags, VK_QUERY_CONTROL_PRECISE_BIT);
    _mtlVisibilityResultMode = shouldCount ? MTLVisibilityResultModeCounting : MTLVisibilityResultModeBoolean;
-    _mtlVisibilityResultOffset = min(offset, maxOffset);
+    _mtlVisibilityResultOffset = offset;

-    _visibilityResultMTLBuffer = pQueryPool->getVisibilityResultMTLBuffer();    // not retained
+    _needsVisibilityResultMTLBuffer = true;

    markDirty();
 }
@ -936,31 +961,17 @@ void MVKOcclusionQueryCommandEncoderState::endOcclusionQuery(MVKOcclusionQueryPo
 	reset();
 }

-id<MTLBuffer> MVKOcclusionQueryCommandEncoderState::getVisibilityResultMTLBuffer() { return _visibilityResultMTLBuffer; }
+bool MVKOcclusionQueryCommandEncoderState::getNeedsVisibilityResultMTLBuffer() { return _needsVisibilityResultMTLBuffer; }

 void MVKOcclusionQueryCommandEncoderState::encodeImpl(uint32_t stage) {
 	if (stage != kMVKGraphicsStageRasterization) { return; }

-	// Metal does not allow a query to be run twice on a single render encoder.
-	// If the query is active and was already used for the current Metal render encoder,
-	// log an error and terminate the current query. Remember which MTLRenderEncoder
-	// was used for this query to test for this situation on future queries.
-	if (_mtlVisibilityResultMode != MTLVisibilityResultModeDisabled) {
-		id<MTLRenderCommandEncoder> currMTLRendEnc = _cmdEncoder->_mtlRenderEncoder;
-		if (currMTLRendEnc == _mtlEncodersUsed[_currentQuery]) {
-			MVKLogError("vkCmdBeginQuery(): Metal does not support using the same occlusion query more than once within a single Vulkan render subpass.");
-			resetImpl();
-		}
-		_mtlEncodersUsed[_currentQuery] = currMTLRendEnc;
-	}
-
 	[_cmdEncoder->_mtlRenderEncoder setVisibilityResultMode: _mtlVisibilityResultMode
 													 offset: _mtlVisibilityResultOffset];
 }

 void MVKOcclusionQueryCommandEncoderState::resetImpl() {
-	_currentQuery.reset();
-    _visibilityResultMTLBuffer = _cmdEncoder->_cmdBuffer->_initialVisibilityResultMTLBuffer;
+    _needsVisibilityResultMTLBuffer = _cmdEncoder->_cmdBuffer->_needsVisibilityResultMTLBuffer;
    _mtlVisibilityResultMode = MTLVisibilityResultModeDisabled;
    _mtlVisibilityResultOffset = 0;
 }
--- a/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.h
@ -129,6 +129,9 @@ public:
 	/** Returns a MTLComputePipelineState for copying query results to a buffer. */
 	id<MTLComputePipelineState> getCmdCopyQueryPoolResultsMTLComputePipelineState();

+	/** Returns a MTLComputePipelineState for accumulating occlusion query results over multiple render passes. */
+	id<MTLComputePipelineState> getAccumulateOcclusionQueryResultsMTLComputePipelineState();
+
 	/** Deletes all the internal resources. */
 	void clear();

@ -164,5 +167,6 @@ protected:
 	id<MTLComputePipelineState> _mtlDrawIndirectTessConvertBuffersComputePipelineState[2] = {nil, nil};
 	id<MTLComputePipelineState> _mtlDrawIndexedCopyIndexBufferComputePipelineState[2] = {nil, nil};
 	id<MTLComputePipelineState> _mtlCopyQueryPoolResultsComputePipelineState = nil;
+	id<MTLComputePipelineState> _mtlAccumOcclusionQueryResultsComputePipelineState = nil;
 };

--- a/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.mm
@ -146,6 +146,10 @@ id<MTLComputePipelineState> MVKCommandEncodingPool::getCmdCopyQueryPoolResultsMT
 	MVK_ENC_REZ_ACCESS(_mtlCopyQueryPoolResultsComputePipelineState, newCmdCopyQueryPoolResultsMTLComputePipelineState(_commandPool));
 }

+id<MTLComputePipelineState> MVKCommandEncodingPool::getAccumulateOcclusionQueryResultsMTLComputePipelineState() {
+	MVK_ENC_REZ_ACCESS(_mtlAccumOcclusionQueryResultsComputePipelineState, newAccumulateOcclusionQueryResultsMTLComputePipelineState(_commandPool));
+}
+
 void MVKCommandEncodingPool::clear() {
 	lock_guard<mutex> lock(_lock);
 	destroyMetalResources();
@ -233,5 +237,8 @@ void MVKCommandEncodingPool::destroyMetalResources() {

    [_mtlCopyQueryPoolResultsComputePipelineState release];
    _mtlCopyQueryPoolResultsComputePipelineState = nil;
+
+    [_mtlAccumOcclusionQueryResultsComputePipelineState release];
+    _mtlAccumOcclusionQueryResultsComputePipelineState = nil;
 }

--- a/MoltenVK/MoltenVK/Commands/MVKCommandPipelineStateFactoryShaderSource.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandPipelineStateFactoryShaderSource.h
@ -376,5 +376,13 @@ kernel void cmdCopyQueryPoolResultsToBuffer(const device VisibilityBuffer* src [
    }                                                                                                           \n\
 }                                                                                                               \n\
                                                                                                                \n\
+kernel void accumulateOcclusionQueryResults(device VisibilityBuffer& dest [[buffer(0)]],                        \n\
+                                            const device VisibilityBuffer& src [[buffer(1)]]) {                 \n\
+    uint32_t oldDestCount = dest.count;                                                                         \n\
+    dest.count += src.count;                                                                                    \n\
+    dest.countHigh += src.countHigh;                                                                            \n\
+    if (dest.count < max(oldDestCount, src.count)) { dest.countHigh++; }                                        \n\
+}                                                                                                               \n\
+                                                                                                                \n\
 ";

--- a/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.h
@ -449,6 +449,9 @@ public:
 	/** Returns a new MTLComputePipelineState for copying query results to a buffer. */
 	id<MTLComputePipelineState> newCmdCopyQueryPoolResultsMTLComputePipelineState(MVKVulkanAPIDeviceObject* owner);

+	/** Returns a new MTLComputePipelineState for accumulating occlusion query results to a buffer. */
+	id<MTLComputePipelineState> newAccumulateOcclusionQueryResultsMTLComputePipelineState(MVKVulkanAPIDeviceObject* owner);
+

 #pragma mark Construction

--- a/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm
@ -554,6 +554,10 @@ id<MTLComputePipelineState> MVKCommandResourceFactory::newCmdCopyQueryPoolResult
 	return newMTLComputePipelineState("cmdCopyQueryPoolResultsToBuffer", owner);
 }

+id<MTLComputePipelineState> MVKCommandResourceFactory::newAccumulateOcclusionQueryResultsMTLComputePipelineState(MVKVulkanAPIDeviceObject* owner) {
+	return newMTLComputePipelineState("accumulateOcclusionQueryResults", owner);
+}
+

 #pragma mark Support methods

--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
@ -1396,7 +1396,6 @@ void MVKPhysicalDevice::initMetalFeatures() {
        _metalFeatures.multisampleArrayTextures = true;
 		_metalFeatures.events = true;
        _metalFeatures.textureBuffers = true;
-		_metalFeatures.quadPermute = true;
 		_metalFeatures.simdPermute = true;
    }

@ -1405,6 +1404,7 @@ void MVKPhysicalDevice::initMetalFeatures() {
 		_metalFeatures.stencilFeedback = true;
 		_metalFeatures.depthResolve = true;
 		_metalFeatures.stencilResolve = true;
+		_metalFeatures.quadPermute = true;
 		_metalFeatures.simdReduction = true;
 	}

@ -1412,13 +1412,13 @@ void MVKPhysicalDevice::initMetalFeatures() {
 		_metalFeatures.mslVersionEnum = MTLLanguageVersion2_2;
 		_metalFeatures.maxQueryBufferSize = (256 * KIBI);
 		_metalFeatures.native3DCompressedTextures = true;
-        _metalFeatures.renderWithoutAttachments = true;
        if ( mvkOSVersionIsAtLeast(mvkMakeOSVersion(10, 15, 6)) ) {
            _metalFeatures.sharedLinearTextures = true;
        }
 		if (supportsMTLGPUFamily(Mac2)) {
 			_metalFeatures.nativeTextureSwizzle = true;
 			_metalFeatures.placementHeaps = mvkGetMVKConfiguration()->useMTLHeap;
+			_metalFeatures.renderWithoutAttachments = true;
 		}
 	}

@ -1580,7 +1580,11 @@ void MVKPhysicalDevice::initFeatures() {
 #if MVK_TVOS
    _features.textureCompressionETC2 = true;
    _features.textureCompressionASTC_LDR = true;
+#if MVK_XCODE_12
 	_features.shaderInt64 = mslVersionIsAtLeast(MTLLanguageVersion2_3) && supportsMTLGPUFamily(Apple3);
+#else
+	_features.shaderInt64 = false;
+#endif

 	if (supportsMTLFeatureSet(tvOS_GPUFamily1_v3)) {
 		_features.dualSrcBlend = true;
@ -1597,7 +1601,11 @@ void MVKPhysicalDevice::initFeatures() {

 #if MVK_IOS
    _features.textureCompressionETC2 = true;
+#if MVK_XCODE_12
 	_features.shaderInt64 = mslVersionIsAtLeast(MTLLanguageVersion2_3) && supportsMTLGPUFamily(Apple3);
+#else
+	_features.shaderInt64 = false;
+#endif

    if (supportsMTLFeatureSet(iOS_GPUFamily2_v1)) {
        _features.textureCompressionASTC_LDR = true;
@ -1640,7 +1648,11 @@ void MVKPhysicalDevice::initFeatures() {
    _features.depthClamp = true;
    _features.vertexPipelineStoresAndAtomics = true;
    _features.fragmentStoresAndAtomics = true;
+#if MVK_XCODE_12
 	_features.shaderInt64 = mslVersionIsAtLeast(MTLLanguageVersion2_3);
+#else
+	_features.shaderInt64 = false;
+#endif

    _features.shaderStorageImageArrayDynamicIndexing = _metalFeatures.arrayOfTextures;

--- a/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.h
@ -143,6 +143,7 @@ protected:
 class MVKTimestampQueryPool : public MVKQueryPool {

 public:
+    void endQuery(uint32_t query, MVKCommandEncoder* cmdEncoder) override;
    void finishQueries(const MVKArrayRef<uint32_t>& queries) override;


--- a/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm
@ -31,6 +31,7 @@ using namespace std;

 void MVKQueryPool::endQuery(uint32_t query, MVKCommandEncoder* cmdEncoder) {
    uint32_t queryCount = cmdEncoder->isInRenderPass() ? cmdEncoder->getSubpass()->getViewCountInMetalPass(cmdEncoder->getMultiviewPassIndex()) : 1;
+    queryCount = max(queryCount, 1u);
    lock_guard<mutex> lock(_availabilityLock);
    for (uint32_t i = query; i < query + queryCount; ++i) {
        _availability[i] = DeviceAvailable;
@ -52,7 +53,11 @@ void MVKQueryPool::endQuery(uint32_t query, MVKCommandEncoder* cmdEncoder) {
 // Mark queries as available
 void MVKQueryPool::finishQueries(const MVKArrayRef<uint32_t>& queries) {
    lock_guard<mutex> lock(_availabilityLock);
-    for (uint32_t qry : queries) { _availability[qry] = Available; }
+    for (uint32_t qry : queries) {
+        if (_availability[qry] == DeviceAvailable) {
+            _availability[qry] = Available;
+        }
+    }
    _availabilityBlocker.notify_all();      // Predicate of each wait() call will check whether all required queries are available
 }

@ -192,6 +197,11 @@ void MVKQueryPool::deferCopyResults(uint32_t firstQuery,
 #pragma mark -
 #pragma mark MVKTimestampQueryPool

+void MVKTimestampQueryPool::endQuery(uint32_t query, MVKCommandEncoder* cmdEncoder) {
+    cmdEncoder->markTimestamp(this, query);
+    MVKQueryPool::endQuery(query, cmdEncoder);
+}
+
 // Update timestamp values, then mark queries as available
 void MVKTimestampQueryPool::finishQueries(const MVKArrayRef<uint32_t>& queries) {
    uint64_t ts = mvkGetTimestamp();
@ -306,9 +316,7 @@ void MVKOcclusionQueryPool::beginQueryAddedTo(uint32_t query, MVKCommandBuffer*
        cmdBuffer->setConfigurationResult(reportError(VK_ERROR_OUT_OF_DEVICE_MEMORY, "vkCmdBeginQuery(): The query offset value %lu is larger than the maximum offset value %lu available on this device.", offset, maxOffset));
    }

-    if (cmdBuffer->_initialVisibilityResultMTLBuffer == nil) {
-        cmdBuffer->_initialVisibilityResultMTLBuffer = getVisibilityResultMTLBuffer();
-    }
+    cmdBuffer->_needsVisibilityResultMTLBuffer = true;
 }


--- a/MoltenVK/MoltenVK/Utility/MVKFoundation.h
+++ b/MoltenVK/MoltenVK/Utility/MVKFoundation.h
@ -87,7 +87,8 @@ typedef enum : uint8_t {
    kMVKCommandUseDispatch,                 /**< vkCmdDispatch. */
    kMVKCommandUseTessellationVertexTessCtl,/**< vkCmdDraw* - vertex and tessellation control stages. */
 	kMVKCommandUseMultiviewInstanceCountAdjust,/**< vkCmdDrawIndirect* - adjust instance count for multiview. */
-    kMVKCommandUseCopyQueryPoolResults      /**< vkCmdCopyQueryPoolResults. */
+    kMVKCommandUseCopyQueryPoolResults,     /**< vkCmdCopyQueryPoolResults. */
+    kMVKCommandUseAccumOcclusionQuery       /**< Any command terminating a Metal render pass with active visibility buffer. */
 } MVKCommandUse;

 /** Represents a given stage of a graphics pipeline. */
--- a/MoltenVKShaderConverter/MoltenVKShaderConverterTool/OSSupport.mm
+++ b/MoltenVKShaderConverter/MoltenVKShaderConverterTool/OSSupport.mm
@ -71,9 +71,12 @@ bool mvk::compile(const string& mslSourceCode,
 #define mslVer(MJ, MN, PT)	mslVersionMajor == MJ && mslVersionMinor == MN && mslVersionPoint == PT

 	MTLLanguageVersion mslVerEnum = (MTLLanguageVersion)0;
+#if MVK_XCODE_12
 	if (mslVer(2, 3, 0)) {
 		mslVerEnum = MTLLanguageVersion2_3;
-	} else if (mslVer(2, 2, 0)) {
+	} else
+#endif
+	if (mslVer(2, 2, 0)) {
 		mslVerEnum = MTLLanguageVersion2_2;
 	} else if (mslVer(2, 1, 0)) {
 		mslVerEnum = MTLLanguageVersion2_1;