Merge pull request #1404 from billhollings/timestamp-using-metal-gpu-counters

Vulkan timestamp query pools use Metal GPU counters when available.
This commit is contained in:
Bill Hollings 2021-07-26 18:15:25 -04:00 committed by GitHub
commit 7a2def4604
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 472 additions and 186 deletions

View File

@ -18,11 +18,13 @@ MoltenVK 1.1.5
Released TBD Released TBD
- Vulkan timestamp query pools use Metal GPU counters when available.
- Fix incorrect translation of clear color values on Apple Silicon. - Fix incorrect translation of clear color values on Apple Silicon.
- Fix swizzle of depth and stencil values into RGBA (`float4`) variable in shaders. - Fix swizzle of depth and stencil values into RGBA (`float4`) variable in shaders.
- Disable `VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT` for - Disable `VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT` for
`VK_FORMAT_E5B9G9R9_UFLOAT_PACK32` on macOS Apple Silicon. `VK_FORMAT_E5B9G9R9_UFLOAT_PACK32` on macOS Apple Silicon.
- Support alpha-to-coverage without a color attachment. - Support alpha-to-coverage without a color attachment.
- Update `VK_MVK_MOLTENVK_SPEC_VERSION` to `32`.
- Update to latest SPIRV-Cross version: - Update to latest SPIRV-Cross version:
- MSL: Adjust `gl_SampleMaskIn` for sample-shading and/or fixed sample mask. - MSL: Adjust `gl_SampleMaskIn` for sample-shading and/or fixed sample mask.
- MSL: Fix setting `SPIRVCrossDecorationInterpolantComponentExpr` decoration. - MSL: Fix setting `SPIRVCrossDecorationInterpolantComponentExpr` decoration.

View File

@ -835,6 +835,16 @@ typedef enum MVKFloatRounding {
MVK_FLOAT_ROUNDING_UP_MAX_ENUM = 0x7FFFFFFF MVK_FLOAT_ROUNDING_UP_MAX_ENUM = 0x7FFFFFFF
} MVKFloatRounding; } MVKFloatRounding;
/** Identifies the pipeline points where GPU counter sampling can occur. Maps to MTLCounterSamplingPoint. */
typedef enum MVKCounterSamplingBits {
MVK_COUNTER_SAMPLING_AT_DRAW = 0x00000001,
MVK_COUNTER_SAMPLING_AT_DISPATCH = 0x00000002,
MVK_COUNTER_SAMPLING_AT_BLIT = 0x00000004,
MVK_COUNTER_SAMPLING_AT_PIPELINE_STAGE = 0x00000008,
MVK_COUNTER_SAMPLING_MAX_ENUM = 0X7FFFFFFF
} MVKCounterSamplingBits;
typedef VkFlags MVKCounterSamplingFlags;
/** /**
* Features provided by the current implementation of Metal on the current device. You can * Features provided by the current implementation of Metal on the current device. You can
* retrieve a copy of this structure using the vkGetPhysicalDeviceMetalFeaturesMVK() function. * retrieve a copy of this structure using the vkGetPhysicalDeviceMetalFeaturesMVK() function.
@ -915,6 +925,7 @@ typedef struct {
VkBool32 argumentBuffers; /**< If true, Metal argument buffers are supported. */ VkBool32 argumentBuffers; /**< If true, Metal argument buffers are supported. */
VkBool32 descriptorSetArgumentBuffers; /**< If true, a Metal argument buffer can be assigned to a descriptor set, and used on any pipeline and pipeline stage. If false, a different Metal argument buffer must be used for each pipeline-stage/descriptor-set combination. */ VkBool32 descriptorSetArgumentBuffers; /**< If true, a Metal argument buffer can be assigned to a descriptor set, and used on any pipeline and pipeline stage. If false, a different Metal argument buffer must be used for each pipeline-stage/descriptor-set combination. */
MVKFloatRounding clearColorFloatRounding; /**< Identifies the type of rounding Metal uses for MTLClearColor float to integer conversions. */ MVKFloatRounding clearColorFloatRounding; /**< Identifies the type of rounding Metal uses for MTLClearColor float to integer conversions. */
MVKCounterSamplingFlags counterSamplingPoints; /**< Identifies the points where pipeline GPU counter sampling may occur. */
} MVKPhysicalDeviceMetalFeatures; } MVKPhysicalDeviceMetalFeatures;
/** MoltenVK performance of a particular type of activity. */ /** MoltenVK performance of a particular type of activity. */

View File

@ -410,7 +410,7 @@ public:
void endOcclusionQuery(MVKOcclusionQueryPool* pQueryPool, uint32_t query); void endOcclusionQuery(MVKOcclusionQueryPool* pQueryPool, uint32_t query);
/** Marks a timestamp for the specified query. */ /** Marks a timestamp for the specified query. */
void markTimestamp(MVKQueryPool* pQueryPool, uint32_t query); void markTimestamp(MVKTimestampQueryPool* pQueryPool, uint32_t query);
/** Reset a range of queries. */ /** Reset a range of queries. */
void resetQueries(MVKQueryPool* pQueryPool, uint32_t firstQuery, uint32_t queryCount); void resetQueries(MVKQueryPool* pQueryPool, uint32_t firstQuery, uint32_t queryCount);
@ -499,6 +499,13 @@ protected:
void setSubpass(MVKCommand* passCmd, VkSubpassContents subpassContents, uint32_t subpassIndex); void setSubpass(MVKCommand* passCmd, VkSubpassContents subpassContents, uint32_t subpassIndex);
void clearRenderArea(); void clearRenderArea();
NSString* getMTLRenderCommandEncoderName(); NSString* getMTLRenderCommandEncoderName();
void encodeGPUCounterSample(MVKGPUCounterQueryPool* mvkQryPool, uint32_t sampleIndex, MVKCounterSamplingFlags samplingPoints);
void encodeTimestampStageCounterSamples();
typedef struct GPUCounterQuery {
MVKGPUCounterQueryPool* queryPool = nullptr;
uint32_t query = 0;
} GPUCounterQuery;
VkSubpassContents _subpassContents; VkSubpassContents _subpassContents;
MVKRenderPass* _renderPass; MVKRenderPass* _renderPass;
@ -507,6 +514,7 @@ protected:
uint32_t _multiviewPassIndex; uint32_t _multiviewPassIndex;
VkRect2D _renderArea; VkRect2D _renderArea;
MVKActivatedQueries* _pActivatedQueries; MVKActivatedQueries* _pActivatedQueries;
MVKSmallVector<GPUCounterQuery, 16> _timestampStageCounterQueries;
MVKSmallVector<VkClearValue, kMVKDefaultAttachmentCount> _clearValues; MVKSmallVector<VkClearValue, kMVKDefaultAttachmentCount> _clearValues;
MVKSmallVector<MVKImageView*, kMVKDefaultAttachmentCount> _attachments; MVKSmallVector<MVKImageView*, kMVKDefaultAttachmentCount> _attachments;
id<MTLComputeCommandEncoder> _mtlComputeEncoder; id<MTLComputeCommandEncoder> _mtlComputeEncoder;

View File

@ -614,6 +614,8 @@ void MVKCommandEncoder::endCurrentMetalEncoding() {
[_mtlBlitEncoder endEncoding]; [_mtlBlitEncoder endEncoding];
_mtlBlitEncoder = nil; // not retained _mtlBlitEncoder = nil; // not retained
_mtlBlitEncoderUse = kMVKCommandUseNone; _mtlBlitEncoderUse = kMVKCommandUseNone;
encodeTimestampStageCounterSamples();
} }
id<MTLComputeCommandEncoder> MVKCommandEncoder::getMTLComputeEncoder(MVKCommandUse cmdUse) { id<MTLComputeCommandEncoder> MVKCommandEncoder::getMTLComputeEncoder(MVKCommandUse cmdUse) {
@ -720,6 +722,23 @@ const MVKMTLBufferAllocation* MVKCommandEncoder::copyToTempMTLBufferAllocation(c
#pragma mark Queries #pragma mark Queries
// Only executes on immediate-mode GPUs. Encode a GPU counter sample command on whichever Metal
// encoder is currently in use, creating a temporary BLIT encoder if no encoder is currently active.
// We only encode the GPU sample if the platform allows encoding at the associated pipeline point.
void MVKCommandEncoder::encodeGPUCounterSample(MVKGPUCounterQueryPool* mvkQryPool, uint32_t sampleIndex, MVKCounterSamplingFlags samplingPoints){
if (_mtlRenderEncoder) {
if (mvkIsAnyFlagEnabled(samplingPoints, MVK_COUNTER_SAMPLING_AT_DRAW)) {
[_mtlRenderEncoder sampleCountersInBuffer: mvkQryPool->getMTLCounterBuffer() atSampleIndex: sampleIndex withBarrier: NO];
}
} else if (_mtlComputeEncoder) {
if (mvkIsAnyFlagEnabled(samplingPoints, MVK_COUNTER_SAMPLING_AT_DISPATCH)) {
[_mtlComputeEncoder sampleCountersInBuffer: mvkQryPool->getMTLCounterBuffer() atSampleIndex: sampleIndex withBarrier: NO];
}
} else if (mvkIsAnyFlagEnabled(samplingPoints, MVK_COUNTER_SAMPLING_AT_BLIT)) {
[getMTLBlitEncoder(kMVKCommandUseRecordGPUCounterSample) sampleCountersInBuffer: mvkQryPool->getMTLCounterBuffer() atSampleIndex: sampleIndex withBarrier: NO];
}
}
void MVKCommandEncoder::beginOcclusionQuery(MVKOcclusionQueryPool* pQueryPool, uint32_t query, VkQueryControlFlags flags) { void MVKCommandEncoder::beginOcclusionQuery(MVKOcclusionQueryPool* pQueryPool, uint32_t query, VkQueryControlFlags flags) {
_occlusionQueryState.beginOcclusionQuery(pQueryPool, query, flags); _occlusionQueryState.beginOcclusionQuery(pQueryPool, query, flags);
uint32_t queryCount = 1; uint32_t queryCount = 1;
@ -733,14 +752,67 @@ void MVKCommandEncoder::endOcclusionQuery(MVKOcclusionQueryPool* pQueryPool, uin
_occlusionQueryState.endOcclusionQuery(pQueryPool, query); _occlusionQueryState.endOcclusionQuery(pQueryPool, query);
} }
void MVKCommandEncoder::markTimestamp(MVKQueryPool* pQueryPool, uint32_t query) { void MVKCommandEncoder::markTimestamp(MVKTimestampQueryPool* pQueryPool, uint32_t query) {
uint32_t queryCount = 1; uint32_t queryCount = 1;
if (_renderPass && getSubpass()->isMultiview()) { if (_renderPass && getSubpass()->isMultiview()) {
queryCount = getSubpass()->getViewCountInMetalPass(_multiviewPassIndex); queryCount = getSubpass()->getViewCountInMetalPass(_multiviewPassIndex);
} }
addActivatedQueries(pQueryPool, query, queryCount); addActivatedQueries(pQueryPool, query, queryCount);
MVKCounterSamplingFlags sampPts = _device->_pMetalFeatures->counterSamplingPoints;
if (sampPts) {
for (uint32_t qOfst = 0; qOfst < queryCount; qOfst++) {
if (mvkIsAnyFlagEnabled(sampPts, MVK_COUNTER_SAMPLING_AT_PIPELINE_STAGE)) {
_timestampStageCounterQueries.push_back({ pQueryPool, query + qOfst });
} else {
encodeGPUCounterSample(pQueryPool, query + qOfst, sampPts);
}
}
}
} }
#if MVK_XCODE_12
// Metal stage GPU counters need to be configured in a Metal render, compute, or BLIT encoder, meaning that the
// Metal encoder needs to know about any Vulkan timestamp commands that will be executed during the execution
// of a renderpass, or set of Vulkan dispatch or BLIT commands. In addition, there are a very small number of
// staged timestamps (4) that can be tracked in any single render, compute, or BLIT pass, meaning a renderpass
// that timestamped after each of many draw calls, would not be trackable. Finally, stage counters are only
// available on tile-based GPU's, which means draw or dispatch calls cannot be individually timestamped.
// We avoid dealing with all this complexity and mismatch between how Vulkan and Metal stage counters operate
// by deferring all timestamps to the end of any batch of Metal encoding, and add a lightweight Metal encoder
// that does minimal work (it won't timestamp if completely empty), and timestamps that work into all of the
// Vulkan timestamp queries that have been executed during the execution of the previous Metal encoder.
void MVKCommandEncoder::encodeTimestampStageCounterSamples() {
size_t qCnt = _timestampStageCounterQueries.size();
uint32_t qIdx = 0;
while (qIdx < qCnt) {
// With each BLIT pass, consume as many outstanding timestamp queries as possible.
// Attach an query result to each of the available sample buffer attachments in the BLIT pass descriptor.
auto* bpDesc = [[[MTLBlitPassDescriptor alloc] init] autorelease];
for (uint32_t attIdx = 0; attIdx < MTLMaxBlitPassSampleBuffers && qIdx < qCnt; attIdx++, qIdx++) {
auto* sbAttDesc = bpDesc.sampleBufferAttachments[attIdx];
auto& tsQry = _timestampStageCounterQueries[qIdx];
// We actually only need to use startOfEncoderSampleIndex, but apparently,
// and contradicting docs, Metal hits an unexpected validation error if
// endOfEncoderSampleIndex is left at MTLCounterDontSample.
sbAttDesc.startOfEncoderSampleIndex = tsQry.query;
sbAttDesc.endOfEncoderSampleIndex = tsQry.query;
sbAttDesc.sampleBuffer = tsQry.queryPool->getMTLCounterBuffer();
}
auto* mtlEnc = [_mtlCmdBuffer blitCommandEncoderWithDescriptor: bpDesc];
setLabelIfNotNil(mtlEnc, mvkMTLBlitCommandEncoderLabel(kMVKCommandUseRecordGPUCounterSample));
[mtlEnc fillBuffer: _device->getDummyBlitMTLBuffer() range: NSMakeRange(0, 1) value: 0];
[mtlEnc endEncoding];
}
_timestampStageCounterQueries.clear();
}
#else
void MVKCommandEncoder::encodeTimestampStageCounterSamples() {}
#endif
void MVKCommandEncoder::resetQueries(MVKQueryPool* pQueryPool, uint32_t firstQuery, uint32_t queryCount) { void MVKCommandEncoder::resetQueries(MVKQueryPool* pQueryPool, uint32_t firstQuery, uint32_t queryCount) {
addActivatedQueries(pQueryPool, firstQuery, queryCount); addActivatedQueries(pQueryPool, firstQuery, queryCount);
} }
@ -847,6 +919,7 @@ NSString* mvkMTLBlitCommandEncoderLabel(MVKCommandUse cmdUse) {
case kMVKCommandUseUpdateBuffer: return @"vkCmdUpdateBuffer BlitEncoder"; case kMVKCommandUseUpdateBuffer: return @"vkCmdUpdateBuffer BlitEncoder";
case kMVKCommandUseResetQueryPool: return @"vkCmdResetQueryPool BlitEncoder"; case kMVKCommandUseResetQueryPool: return @"vkCmdResetQueryPool BlitEncoder";
case kMVKCommandUseCopyQueryPoolResults: return @"vkCmdCopyQueryPoolResults BlitEncoder"; case kMVKCommandUseCopyQueryPoolResults: return @"vkCmdCopyQueryPoolResults BlitEncoder";
case kMVKCommandUseRecordGPUCounterSample: return @"Record GPU Counter Sample BlitEncoder";
default: return @"Unknown Use BlitEncoder"; default: return @"Unknown Use BlitEncoder";
} }
} }

View File

@ -324,10 +324,10 @@ public:
} }
/** Returns whether the MSL version is supported on this device. */ /** Returns whether the MSL version is supported on this device. */
inline bool mslVersionIsAtLeast(MTLLanguageVersion minVer) { return _metalFeatures.mslVersionEnum >= minVer; } bool mslVersionIsAtLeast(MTLLanguageVersion minVer) { return _metalFeatures.mslVersionEnum >= minVer; }
/** Returns whether this device is using Metal argument buffers. */ /** Returns whether this device is using Metal argument buffers. */
inline bool isUsingMetalArgumentBuffers() const { return _metalFeatures.argumentBuffers && mvkConfig().useMetalArgumentBuffers; }; bool isUsingMetalArgumentBuffers() const { return _metalFeatures.argumentBuffers && mvkConfig().useMetalArgumentBuffers; };
#pragma mark Construction #pragma mark Construction
@ -371,6 +371,7 @@ protected:
uint32_t getMaxSamplerCount(); uint32_t getMaxSamplerCount();
void initExternalMemoryProperties(); void initExternalMemoryProperties();
void initExtensions(); void initExtensions();
void initCounterSets();
MVKArrayRef<MVKQueueFamily*> getQueueFamilies(); MVKArrayRef<MVKQueueFamily*> getQueueFamilies();
void initPipelineCacheUUID(); void initPipelineCacheUUID();
uint32_t getHighestMTLFeatureSet(); uint32_t getHighestMTLFeatureSet();
@ -388,6 +389,7 @@ protected:
VkPhysicalDeviceMemoryProperties _memoryProperties; VkPhysicalDeviceMemoryProperties _memoryProperties;
MVKSmallVector<MVKQueueFamily*, kMVKQueueFamilyCount> _queueFamilies; MVKSmallVector<MVKQueueFamily*, kMVKQueueFamilyCount> _queueFamilies;
MVKPixelFormats _pixelFormats; MVKPixelFormats _pixelFormats;
id<MTLCounterSet> _timestampMTLCounterSet;
uint32_t _allMemoryTypes; uint32_t _allMemoryTypes;
uint32_t _hostVisibleMemoryTypes; uint32_t _hostVisibleMemoryTypes;
uint32_t _hostCoherentMemoryTypes; uint32_t _hostCoherentMemoryTypes;
@ -684,12 +686,21 @@ public:
*/ */
uint32_t expandVisibilityResultMTLBuffer(uint32_t queryCount); uint32_t expandVisibilityResultMTLBuffer(uint32_t queryCount);
/** Returns the GPU sample counter used for timestamps. */
id<MTLCounterSet> getTimestampMTLCounterSet() { return _physicalDevice->_timestampMTLCounterSet; }
/** Returns the memory type index corresponding to the specified Metal memory storage mode. */ /** Returns the memory type index corresponding to the specified Metal memory storage mode. */
uint32_t getVulkanMemoryTypeIndex(MTLStorageMode mtlStorageMode); uint32_t getVulkanMemoryTypeIndex(MTLStorageMode mtlStorageMode);
/** Returns a default MTLSamplerState to populate empty array element descriptors. */ /** Returns a default MTLSamplerState to populate empty array element descriptors. */
id<MTLSamplerState> getDefaultMTLSamplerState(); id<MTLSamplerState> getDefaultMTLSamplerState();
/**
* Returns a MTLBuffer of length one that can be used as a dummy to
* create a no-op BLIT encoder based on filling this single-byte buffer.
*/
id<MTLBuffer> getDummyBlitMTLBuffer();
/** /**
* Returns whether MTLCommandBuffers can be prefilled. * Returns whether MTLCommandBuffers can be prefilled.
* *
@ -818,6 +829,7 @@ protected:
std::mutex _perfLock; std::mutex _perfLock;
id<MTLBuffer> _globalVisibilityResultMTLBuffer; id<MTLBuffer> _globalVisibilityResultMTLBuffer;
id<MTLSamplerState> _defaultMTLSamplerState; id<MTLSamplerState> _defaultMTLSamplerState;
id<MTLBuffer> _dummyBlitMTLBuffer;
uint32_t _globalVisibilityQueryCount; uint32_t _globalVisibilityQueryCount;
std::mutex _vizLock; std::mutex _vizLock;
bool _useMTLFenceForSemaphores; bool _useMTLFenceForSemaphores;

View File

@ -1150,6 +1150,7 @@ MVKPhysicalDevice::MVKPhysicalDevice(MVKInstance* mvkInstance, id<MTLDevice> mtl
initExtensions(); initExtensions();
initMemoryProperties(); initMemoryProperties();
initExternalMemoryProperties(); initExternalMemoryProperties();
initCounterSets();
logGPUInfo(); logGPUInfo();
} }
@ -1594,6 +1595,27 @@ void MVKPhysicalDevice::initMetalFeatures() {
// Currently, if we don't support descriptor set argument buffers, we can't support argument buffers. // Currently, if we don't support descriptor set argument buffers, we can't support argument buffers.
_metalFeatures.argumentBuffers = _metalFeatures.descriptorSetArgumentBuffers; _metalFeatures.argumentBuffers = _metalFeatures.descriptorSetArgumentBuffers;
#define checkSupportsMTLCounterSamplingPoint(mtlSP, mvkSP) \
if ([_mtlDevice respondsToSelector: @selector(supportsCounterSampling:)] && \
[_mtlDevice supportsCounterSampling: MTLCounterSamplingPointAt ##mtlSP ##Boundary]) { \
_metalFeatures.counterSamplingPoints |= MVK_COUNTER_SAMPLING_AT_ ##mvkSP; \
}
#if MVK_XCODE_12
checkSupportsMTLCounterSamplingPoint(Draw, DRAW);
checkSupportsMTLCounterSamplingPoint(Dispatch, DISPATCH);
checkSupportsMTLCounterSamplingPoint(Blit, BLIT);
checkSupportsMTLCounterSamplingPoint(Stage, PIPELINE_STAGE);
#endif
#if !MVK_APPLE_SILICON
// On macOS, if we couldn't query supported sample points (on macOS 11),
// but the platform can support immediate-mode sample points, indicate that here.
if (!_metalFeatures.counterSamplingPoints && mvkOSVersionIsAtLeast(10.15)) { \
_metalFeatures.counterSamplingPoints = MVK_COUNTER_SAMPLING_AT_DRAW | MVK_COUNTER_SAMPLING_AT_DISPATCH | MVK_COUNTER_SAMPLING_AT_BLIT; \
}
#endif
} }
// Initializes the physical device features of this instance. // Initializes the physical device features of this instance.
@ -2726,6 +2748,28 @@ void MVKPhysicalDevice::initExtensions() {
#endif #endif
} }
void MVKPhysicalDevice::initCounterSets() {
_timestampMTLCounterSet = nil;
@autoreleasepool {
if (_metalFeatures.counterSamplingPoints) {
NSArray<id<MTLCounterSet>>* counterSets = _mtlDevice.counterSets;
for (id<MTLCounterSet> cs in counterSets){
NSString* csName = cs.name;
if ( [csName caseInsensitiveCompare: MTLCommonCounterSetTimestamp] == NSOrderedSame) {
NSArray<id<MTLCounter>>* countersInSet = cs.counters;
for(id<MTLCounter> ctr in countersInSet) {
if ( [ctr.name caseInsensitiveCompare: MTLCommonCounterTimestamp] == NSOrderedSame) {
_timestampMTLCounterSet = [cs retain]; // retained
break;
}
}
break;
}
}
}
}
}
void MVKPhysicalDevice::logGPUInfo() { void MVKPhysicalDevice::logGPUInfo() {
string devTypeStr; string devTypeStr;
switch (_properties.deviceType) { switch (_properties.deviceType) {
@ -2838,6 +2882,7 @@ void MVKPhysicalDevice::logGPUInfo() {
MVKPhysicalDevice::~MVKPhysicalDevice() { MVKPhysicalDevice::~MVKPhysicalDevice() {
mvkDestroyContainerContents(_queueFamilies); mvkDestroyContainerContents(_queueFamilies);
[_timestampMTLCounterSet release];
[_mtlDevice release]; [_mtlDevice release];
} }
@ -3712,6 +3757,20 @@ id<MTLSamplerState> MVKDevice::getDefaultMTLSamplerState() {
return _defaultMTLSamplerState; return _defaultMTLSamplerState;
} }
id<MTLBuffer> MVKDevice::getDummyBlitMTLBuffer() {
if ( !_dummyBlitMTLBuffer ) {
// Lock and check again in case another thread has created the buffer.
lock_guard<mutex> lock(_rezLock);
if ( !_dummyBlitMTLBuffer ) {
@autoreleasepool {
_dummyBlitMTLBuffer = [getMTLDevice() newBufferWithLength: 1 options: MTLResourceStorageModePrivate];
}
}
}
return _dummyBlitMTLBuffer;
}
MTLCompileOptions* MVKDevice::getMTLCompileOptions(bool useFastMath, bool preserveInvariance) { MTLCompileOptions* MVKDevice::getMTLCompileOptions(bool useFastMath, bool preserveInvariance) {
MTLCompileOptions* mtlCompOpt = [MTLCompileOptions new]; MTLCompileOptions* mtlCompOpt = [MTLCompileOptions new];
mtlCompOpt.languageVersion = _pMetalFeatures->mslVersionEnum; mtlCompOpt.languageVersion = _pMetalFeatures->mslVersionEnum;
@ -3833,6 +3892,7 @@ MVKDevice::MVKDevice(MVKPhysicalDevice* physicalDevice, const VkDeviceCreateInfo
_globalVisibilityQueryCount = 0; _globalVisibilityQueryCount = 0;
_defaultMTLSamplerState = nil; _defaultMTLSamplerState = nil;
_dummyBlitMTLBuffer = nil;
_commandResourceFactory = new MVKCommandResourceFactory(this); _commandResourceFactory = new MVKCommandResourceFactory(this);
@ -4200,6 +4260,7 @@ MVKDevice::~MVKDevice() {
[_globalVisibilityResultMTLBuffer release]; [_globalVisibilityResultMTLBuffer release];
[_defaultMTLSamplerState release]; [_defaultMTLSamplerState release];
[_dummyBlitMTLBuffer release];
stopAutoGPUCapture(MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_DEVICE); stopAutoGPUCapture(MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_DEVICE);

View File

@ -38,7 +38,6 @@ class MVKCommandEncoder;
/** /**
* Abstract class representing a Vulkan query pool. * Abstract class representing a Vulkan query pool.
* Subclasses are specialized for specific query types. * Subclasses are specialized for specific query types.
* Subclasses will generally override the beginQuery(), endQuery(), and getResult(uint32_t, void*, bool) member functions.
*/ */
class MVKQueryPool : public MVKVulkanAPIDeviceObject { class MVKQueryPool : public MVKVulkanAPIDeviceObject {
@ -106,10 +105,12 @@ public:
protected: protected:
bool areQueriesHostAvailable(uint32_t firstQuery, uint32_t endQuery); bool areQueriesHostAvailable(uint32_t firstQuery, uint32_t endQuery);
VkResult getResult(uint32_t query, void* pQryData, VkQueryResultFlags flags); virtual NSData* getQuerySourceData(uint32_t firstQuery, uint32_t queryCount) { return nil; }
virtual void getResult(uint32_t query, void* pQryData, bool shouldOutput64Bit) {} VkResult getResult(uint32_t query, NSData* srcData, uint32_t srcDataQueryOffset, void* pDstData, VkQueryResultFlags flags);
virtual id<MTLBuffer> getResultBuffer(MVKCommandEncoder* cmdEncoder, uint32_t firstQuery, uint32_t queryCount, NSUInteger& offset) { return nil; } virtual id<MTLBuffer> getResultBuffer(MVKCommandEncoder* cmdEncoder, uint32_t firstQuery, uint32_t queryCount, NSUInteger& offset) { return nil; }
virtual void encodeSetResultBuffer(MVKCommandEncoder* cmdEncoder, uint32_t firstQuery, uint32_t queryCount, uint32_t index) {} virtual id<MTLComputeCommandEncoder> encodeComputeCopyResults(MVKCommandEncoder* cmdEncoder, uint32_t firstQuery, uint32_t queryCount, uint32_t index) { return nil; }
virtual void encodeDirectCopyResults(MVKCommandEncoder* cmdEncoder, uint32_t firstQuery, uint32_t queryCount,
MVKBuffer* destBuffer, VkDeviceSize destOffset, VkDeviceSize stride);
struct DeferredCopy { struct DeferredCopy {
uint32_t firstQuery; uint32_t firstQuery;
@ -136,31 +137,6 @@ protected:
}; };
#pragma mark -
#pragma mark MVKTimestampQueryPool
/** A Vulkan query pool for timestamp queries. */
class MVKTimestampQueryPool : public MVKQueryPool {
public:
void endQuery(uint32_t query, MVKCommandEncoder* cmdEncoder) override;
void finishQueries(const MVKArrayRef<uint32_t>& queries) override;
#pragma mark Construction
MVKTimestampQueryPool(MVKDevice* device, const VkQueryPoolCreateInfo* pCreateInfo);
protected:
void propagateDebugName() override {}
void getResult(uint32_t query, void* pQryData, bool shouldOutput64Bit) override;
id<MTLBuffer> getResultBuffer(MVKCommandEncoder* cmdEncoder, uint32_t firstQuery, uint32_t queryCount, NSUInteger& offset) override;
void encodeSetResultBuffer(MVKCommandEncoder* cmdEncoder, uint32_t firstQuery, uint32_t queryCount, uint32_t index) override;
MVKSmallVector<uint64_t, kMVKDefaultQueryCount> _timestamps;
};
#pragma mark - #pragma mark -
#pragma mark MVKOcclusionQueryPool #pragma mark MVKOcclusionQueryPool
@ -189,20 +165,73 @@ public:
protected: protected:
void propagateDebugName() override; void propagateDebugName() override;
void getResult(uint32_t query, void* pQryData, bool shouldOutput64Bit) override; NSData* getQuerySourceData(uint32_t firstQuery, uint32_t queryCount) override;
id<MTLBuffer> getResultBuffer(MVKCommandEncoder* cmdEncoder, uint32_t firstQuery, uint32_t queryCount, NSUInteger& offset) override; id<MTLBuffer> getResultBuffer(MVKCommandEncoder* cmdEncoder, uint32_t firstQuery, uint32_t queryCount, NSUInteger& offset) override;
void encodeSetResultBuffer(MVKCommandEncoder* cmdEncoder, uint32_t firstQuery, uint32_t queryCount, uint32_t index) override; id<MTLComputeCommandEncoder> encodeComputeCopyResults(MVKCommandEncoder* cmdEncoder, uint32_t firstQuery, uint32_t queryCount, uint32_t index) override;
id<MTLBuffer> _visibilityResultMTLBuffer; id<MTLBuffer> _visibilityResultMTLBuffer;
uint32_t _queryIndexOffset; uint32_t _queryIndexOffset;
}; };
#pragma mark -
#pragma mark MVKGPUCounterQueryPool
/** An abstract parent class for query pools that use Metal GPU counters if they are supported on the platform. */
class MVKGPUCounterQueryPool : public MVKQueryPool {
public:
/**
* Returns the MTLCounterBuffer being used by this query pool,
* or returns nil if GPU counters are not supported.
* */
id<MTLCounterSampleBuffer> getMTLCounterBuffer() { return _mtlCounterBuffer; }
MVKGPUCounterQueryPool(MVKDevice* device, const VkQueryPoolCreateInfo* pCreateInfo);
~MVKGPUCounterQueryPool() override;
protected:
void initMTLCounterSampleBuffer(const VkQueryPoolCreateInfo* pCreateInfo,
id<MTLCounterSet> mtlCounterSet,
const char* queryTypeName);
id<MTLCounterSampleBuffer> _mtlCounterBuffer;
};
#pragma mark -
#pragma mark MVKTimestampQueryPool
/** A Vulkan query pool for timestamp queries. */
class MVKTimestampQueryPool : public MVKGPUCounterQueryPool {
public:
void endQuery(uint32_t query, MVKCommandEncoder* cmdEncoder) override;
void finishQueries(const MVKArrayRef<uint32_t>& queries) override;
#pragma mark Construction
MVKTimestampQueryPool(MVKDevice* device, const VkQueryPoolCreateInfo* pCreateInfo);
protected:
void propagateDebugName() override {}
NSData* getQuerySourceData(uint32_t firstQuery, uint32_t queryCount) override;
id<MTLBuffer> getResultBuffer(MVKCommandEncoder* cmdEncoder, uint32_t firstQuery, uint32_t queryCount, NSUInteger& offset) override;
id<MTLComputeCommandEncoder> encodeComputeCopyResults(MVKCommandEncoder* cmdEncoder, uint32_t firstQuery, uint32_t queryCount, uint32_t index) override;
void encodeDirectCopyResults(MVKCommandEncoder* cmdEncoder, uint32_t firstQuery, uint32_t queryCount,
MVKBuffer* destBuffer, VkDeviceSize destOffset, VkDeviceSize stride) override;
MVKSmallVector<uint64_t> _timestamps;
};
#pragma mark - #pragma mark -
#pragma mark MVKPipelineStatisticsQueryPool #pragma mark MVKPipelineStatisticsQueryPool
/** A Vulkan query pool for a query pool type that tracks pipeline statistics. */ /** A Vulkan query pool for a query pool type that tracks pipeline statistics. */
class MVKPipelineStatisticsQueryPool : public MVKQueryPool { class MVKPipelineStatisticsQueryPool : public MVKGPUCounterQueryPool {
public: public:
MVKPipelineStatisticsQueryPool(MVKDevice* device, const VkQueryPoolCreateInfo* pCreateInfo); MVKPipelineStatisticsQueryPool(MVKDevice* device, const VkQueryPoolCreateInfo* pCreateInfo);

View File

@ -89,11 +89,14 @@ VkResult MVKQueryPool::getResults(uint32_t firstQuery,
} }
VkResult rqstRslt = VK_SUCCESS; VkResult rqstRslt = VK_SUCCESS;
uintptr_t pQryData = (uintptr_t)pData; @autoreleasepool {
for (uint32_t query = firstQuery; query < endQuery; query++, pQryData += stride) { NSData* srcData = getQuerySourceData(firstQuery, queryCount);
VkResult qryRslt = getResult(query, (void*)pQryData, flags); uintptr_t pDstData = (uintptr_t)pData;
for (uint32_t query = firstQuery; query < endQuery; query++, pDstData += stride) {
VkResult qryRslt = getResult(query, srcData, firstQuery, (void*)pDstData, flags);
if (rqstRslt == VK_SUCCESS) { rqstRslt = qryRslt; } if (rqstRslt == VK_SUCCESS) { rqstRslt = qryRslt; }
} }
}
return rqstRslt; return rqstRslt;
} }
@ -114,7 +117,7 @@ bool MVKQueryPool::areQueriesHostAvailable(uint32_t firstQuery, uint32_t endQuer
return true; return true;
} }
VkResult MVKQueryPool::getResult(uint32_t query, void* pQryData, VkQueryResultFlags flags) { VkResult MVKQueryPool::getResult(uint32_t query, NSData* srcData, uint32_t srcDataQueryOffset, void* pDstData, VkQueryResultFlags flags) {
if (_device->getConfigurationResult() != VK_SUCCESS) { return _device->getConfigurationResult(); } if (_device->getConfigurationResult() != VK_SUCCESS) { return _device->getConfigurationResult(); }
@ -123,15 +126,22 @@ VkResult MVKQueryPool::getResult(uint32_t query, void* pQryData, VkQueryResultFl
bool shouldOutput64Bit = mvkAreAllFlagsEnabled(flags, VK_QUERY_RESULT_64_BIT); bool shouldOutput64Bit = mvkAreAllFlagsEnabled(flags, VK_QUERY_RESULT_64_BIT);
// Output the results of this query // Output the results of this query
if (shouldOutput) { getResult(query, pQryData, shouldOutput64Bit); } if (shouldOutput) {
uint64_t rsltVal = ((uint64_t*)srcData.bytes)[query - srcDataQueryOffset];
if (shouldOutput64Bit) {
*(uint64_t*)pDstData = rsltVal;
} else {
*(uint32_t*)pDstData = (uint32_t)rsltVal;
}
}
// If requested, output the availability bit // If requested, output the availability bit
if (mvkAreAllFlagsEnabled(flags, VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)) { if (mvkAreAllFlagsEnabled(flags, VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)) {
if (shouldOutput64Bit) { if (shouldOutput64Bit) {
uintptr_t pAvailability = (uintptr_t)pQryData + (_queryElementCount * sizeof(uint64_t)); uintptr_t pAvailability = (uintptr_t)pDstData + (_queryElementCount * sizeof(uint64_t));
*(uint64_t*)pAvailability = isAvailable; *(uint64_t*)pAvailability = isAvailable;
} else { } else {
uintptr_t pAvailability = (uintptr_t)pQryData + (_queryElementCount * sizeof(uint32_t)); uintptr_t pAvailability = (uintptr_t)pDstData + (_queryElementCount * sizeof(uint32_t));
*(uint32_t*)pAvailability = isAvailable; *(uint32_t*)pAvailability = isAvailable;
} }
} }
@ -154,20 +164,12 @@ void MVKQueryPool::encodeCopyResults(MVKCommandEncoder* cmdEncoder,
stride == _queryElementCount * sizeof(uint64_t) && stride == _queryElementCount * sizeof(uint64_t) &&
areQueriesDeviceAvailable(firstQuery, queryCount)) { areQueriesDeviceAvailable(firstQuery, queryCount)) {
id<MTLBlitCommandEncoder> mtlBlitCmdEnc = cmdEncoder->getMTLBlitEncoder(kMVKCommandUseCopyQueryPoolResults); encodeDirectCopyResults(cmdEncoder, firstQuery, queryCount, destBuffer, destOffset, stride);
NSUInteger srcOffset;
id<MTLBuffer> srcBuff = getResultBuffer(cmdEncoder, firstQuery, queryCount, srcOffset);
[mtlBlitCmdEnc copyFromBuffer: srcBuff
sourceOffset: srcOffset
toBuffer: destBuffer->getMTLBuffer()
destinationOffset: destBuffer->getMTLBufferOffset() + destOffset
size: stride * queryCount];
// TODO: In the case where none of the queries is ready, we can fill with 0. // TODO: In the case where none of the queries is ready, we can fill with 0.
} else { } else {
id<MTLComputeCommandEncoder> mtlComputeCmdEnc = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseCopyQueryPoolResults);
id<MTLComputePipelineState> mtlCopyResultsState = cmdEncoder->getCommandEncodingPool()->getCmdCopyQueryPoolResultsMTLComputePipelineState(); id<MTLComputePipelineState> mtlCopyResultsState = cmdEncoder->getCommandEncodingPool()->getCmdCopyQueryPoolResultsMTLComputePipelineState();
id<MTLComputeCommandEncoder> mtlComputeCmdEnc = encodeComputeCopyResults(cmdEncoder, firstQuery, queryCount, 0);
[mtlComputeCmdEnc setComputePipelineState: mtlCopyResultsState]; [mtlComputeCmdEnc setComputePipelineState: mtlCopyResultsState];
encodeSetResultBuffer(cmdEncoder, firstQuery, queryCount, 0);
[mtlComputeCmdEnc setBuffer: destBuffer->getMTLBuffer() [mtlComputeCmdEnc setBuffer: destBuffer->getMTLBuffer()
offset: destBuffer->getMTLBufferOffset() + destOffset offset: destBuffer->getMTLBufferOffset() + destOffset
atIndex: 1]; atIndex: 1];
@ -183,6 +185,24 @@ void MVKQueryPool::encodeCopyResults(MVKCommandEncoder* cmdEncoder,
} }
} }
// If this asked for 64-bit results with no availability and packed stride, then we can do a straight copy.
void MVKQueryPool::encodeDirectCopyResults(MVKCommandEncoder* cmdEncoder,
uint32_t firstQuery,
uint32_t queryCount,
MVKBuffer* destBuffer,
VkDeviceSize destOffset,
VkDeviceSize stride) {
id<MTLBlitCommandEncoder> mtlBlitCmdEnc = cmdEncoder->getMTLBlitEncoder(kMVKCommandUseCopyQueryPoolResults);
NSUInteger srcOffset;
id<MTLBuffer> srcBuff = getResultBuffer(cmdEncoder, firstQuery, queryCount, srcOffset);
[mtlBlitCmdEnc copyFromBuffer: srcBuff
sourceOffset: srcOffset
toBuffer: destBuffer->getMTLBuffer()
destinationOffset: destBuffer->getMTLBufferOffset() + destOffset
size: stride * queryCount];
}
void MVKQueryPool::deferCopyResults(uint32_t firstQuery, void MVKQueryPool::deferCopyResults(uint32_t firstQuery,
uint32_t queryCount, uint32_t queryCount,
MVKBuffer* destBuffer, MVKBuffer* destBuffer,
@ -195,53 +215,6 @@ void MVKQueryPool::deferCopyResults(uint32_t firstQuery,
} }
#pragma mark -
#pragma mark MVKTimestampQueryPool
void MVKTimestampQueryPool::endQuery(uint32_t query, MVKCommandEncoder* cmdEncoder) {
cmdEncoder->markTimestamp(this, query);
MVKQueryPool::endQuery(query, cmdEncoder);
}
// Update timestamp values, then mark queries as available
void MVKTimestampQueryPool::finishQueries(const MVKArrayRef<uint32_t>& queries) {
uint64_t ts = mvkGetTimestamp();
for (uint32_t qry : queries) { _timestamps[qry] = ts; }
MVKQueryPool::finishQueries(queries);
}
void MVKTimestampQueryPool::getResult(uint32_t query, void* pQryData, bool shouldOutput64Bit) {
if (shouldOutput64Bit) {
*(uint64_t*)pQryData = _timestamps[query];
} else {
*(uint32_t*)pQryData = (uint32_t)_timestamps[query];
}
}
id<MTLBuffer> MVKTimestampQueryPool::getResultBuffer(MVKCommandEncoder* cmdEncoder, uint32_t firstQuery, uint32_t queryCount, NSUInteger& offset) {
const MVKMTLBufferAllocation* tempBuff = cmdEncoder->getTempMTLBuffer(queryCount * _queryElementCount * sizeof(uint64_t));
void* pBuffData = tempBuff->getContents();
size_t size = queryCount * _queryElementCount * sizeof(uint64_t);
memcpy(pBuffData, &_timestamps[firstQuery], size);
offset = tempBuff->_offset;
return tempBuff->_mtlBuffer;
}
void MVKTimestampQueryPool::encodeSetResultBuffer(MVKCommandEncoder* cmdEncoder, uint32_t firstQuery, uint32_t queryCount, uint32_t index) {
// No need to create a temp buffer here.
cmdEncoder->setComputeBytes(cmdEncoder->getMTLComputeEncoder(kMVKCommandUseCopyQueryPoolResults), &_timestamps[firstQuery], queryCount * _queryElementCount * sizeof(uint64_t), index);
}
#pragma mark Construction
MVKTimestampQueryPool::MVKTimestampQueryPool(MVKDevice* device,
const VkQueryPoolCreateInfo* pCreateInfo) :
MVKQueryPool(device, pCreateInfo, 1), _timestamps(pCreateInfo->queryCount, 0) {
}
#pragma mark - #pragma mark -
#pragma mark MVKOcclusionQueryPool #pragma mark MVKOcclusionQueryPool
@ -285,15 +258,11 @@ void MVKOcclusionQueryPool::resetResults(uint32_t firstQuery, uint32_t queryCoun
} }
} }
void MVKOcclusionQueryPool::getResult(uint32_t query, void* pQryData, bool shouldOutput64Bit) { NSData* MVKOcclusionQueryPool::getQuerySourceData(uint32_t firstQuery, uint32_t queryCount) {
NSUInteger mtlBuffOffset = getVisibilityResultOffset(query); id<MTLBuffer> vizBuff = getVisibilityResultMTLBuffer();
uint64_t* pData = (uint64_t*)((uintptr_t)getVisibilityResultMTLBuffer().contents + mtlBuffOffset); return [NSData dataWithBytesNoCopy: (void*)((uintptr_t)vizBuff.contents + getVisibilityResultOffset(firstQuery))
length: queryCount * kMVKQuerySlotSizeInBytes
if (shouldOutput64Bit) { freeWhenDone: false];
*(uint64_t*)pQryData = *pData;
} else {
*(uint32_t*)pQryData = (uint32_t)(*pData);
}
} }
id<MTLBuffer> MVKOcclusionQueryPool::getResultBuffer(MVKCommandEncoder*, uint32_t firstQuery, uint32_t, NSUInteger& offset) { id<MTLBuffer> MVKOcclusionQueryPool::getResultBuffer(MVKCommandEncoder*, uint32_t firstQuery, uint32_t, NSUInteger& offset) {
@ -301,10 +270,10 @@ id<MTLBuffer> MVKOcclusionQueryPool::getResultBuffer(MVKCommandEncoder*, uint32_
return getVisibilityResultMTLBuffer(); return getVisibilityResultMTLBuffer();
} }
void MVKOcclusionQueryPool::encodeSetResultBuffer(MVKCommandEncoder* cmdEncoder, uint32_t firstQuery, uint32_t, uint32_t index) { id<MTLComputeCommandEncoder> MVKOcclusionQueryPool::encodeComputeCopyResults(MVKCommandEncoder* cmdEncoder, uint32_t firstQuery, uint32_t, uint32_t index) {
[cmdEncoder->getMTLComputeEncoder(kMVKCommandUseCopyQueryPoolResults) setBuffer: getVisibilityResultMTLBuffer() id<MTLComputeCommandEncoder> mtlCmdEnc = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseCopyQueryPoolResults);
offset: getVisibilityResultOffset(firstQuery) [mtlCmdEnc setBuffer: getVisibilityResultMTLBuffer() offset: getVisibilityResultOffset(firstQuery) atIndex: index];
atIndex: index]; return mtlCmdEnc;
} }
void MVKOcclusionQueryPool::beginQueryAddedTo(uint32_t query, MVKCommandBuffer* cmdBuffer) { void MVKOcclusionQueryPool::beginQueryAddedTo(uint32_t query, MVKCommandBuffer* cmdBuffer) {
@ -332,14 +301,12 @@ MVKOcclusionQueryPool::MVKOcclusionQueryPool(MVKDevice* device,
_queryIndexOffset = 0; _queryIndexOffset = 0;
// Ensure we don't overflow the maximum number of queries // Ensure we don't overflow the maximum number of queries
uint32_t queryCount = pCreateInfo->queryCount; VkDeviceSize reqBuffLen = (VkDeviceSize)pCreateInfo->queryCount * kMVKQuerySlotSizeInBytes;
VkDeviceSize reqBuffLen = (VkDeviceSize)queryCount * kMVKQuerySlotSizeInBytes;
VkDeviceSize maxBuffLen = _device->_pMetalFeatures->maxQueryBufferSize; VkDeviceSize maxBuffLen = _device->_pMetalFeatures->maxQueryBufferSize;
VkDeviceSize newBuffLen = min(reqBuffLen, maxBuffLen); VkDeviceSize newBuffLen = min(reqBuffLen, maxBuffLen);
queryCount = uint32_t(newBuffLen / kMVKQuerySlotSizeInBytes);
if (reqBuffLen > maxBuffLen) { if (reqBuffLen > maxBuffLen) {
reportError(VK_ERROR_OUT_OF_DEVICE_MEMORY, "vkCreateQueryPool(): Each query pool can support a maximum of %d queries.", queryCount); reportError(VK_ERROR_OUT_OF_DEVICE_MEMORY, "vkCreateQueryPool(): Each query pool can support a maximum of %d queries.", uint32_t(newBuffLen / kMVKQuerySlotSizeInBytes));
} }
NSUInteger mtlBuffLen = mvkAlignByteCount(newBuffLen, _device->_pMetalFeatures->mtlBufferAlignment); NSUInteger mtlBuffLen = mvkAlignByteCount(newBuffLen, _device->_pMetalFeatures->mtlBufferAlignment);
@ -357,11 +324,133 @@ MVKOcclusionQueryPool::~MVKOcclusionQueryPool() {
}; };
#pragma mark -
#pragma mark MVKGPUCounterQueryPool
MVKGPUCounterQueryPool::MVKGPUCounterQueryPool(MVKDevice* device, const VkQueryPoolCreateInfo* pCreateInfo) :
MVKQueryPool(device, pCreateInfo, 1), _mtlCounterBuffer(nil) {}
// To establish the Metal counter sample buffer, this must be called from the construtors
// of subclasses, because the type of MTLCounterSet is determined by the subclass.
void MVKGPUCounterQueryPool::initMTLCounterSampleBuffer(const VkQueryPoolCreateInfo* pCreateInfo,
id<MTLCounterSet> mtlCounterSet,
const char* queryTypeName) {
if ( !_device->_pMetalFeatures->counterSamplingPoints ) { return; }
@autoreleasepool {
MTLCounterSampleBufferDescriptor* tsDesc = [[[MTLCounterSampleBufferDescriptor alloc] init] autorelease];
tsDesc.counterSet = mtlCounterSet;
tsDesc.storageMode = MTLStorageModeShared;
tsDesc.sampleCount = pCreateInfo->queryCount;
NSError* err = nil;
_mtlCounterBuffer = [getMTLDevice() newCounterSampleBufferWithDescriptor: tsDesc error: &err];
if (err) {
setConfigurationResult(reportError(VK_ERROR_INITIALIZATION_FAILED,
"Could not create MTLCounterSampleBuffer for query pool of type %s. Reverting to emulated behavior. (Error code %li): %s",
queryTypeName, (long)err.code, err.localizedDescription.UTF8String));
}
}
};
MVKGPUCounterQueryPool::~MVKGPUCounterQueryPool() {
[_mtlCounterBuffer release];
}
#pragma mark -
#pragma mark MVKTimestampQueryPool
void MVKTimestampQueryPool::endQuery(uint32_t query, MVKCommandEncoder* cmdEncoder) {
cmdEncoder->markTimestamp(this, query);
MVKQueryPool::endQuery(query, cmdEncoder);
}
// If not using MTLCounterSampleBuffer, update timestamp values, then mark queries as available
void MVKTimestampQueryPool::finishQueries(const MVKArrayRef<uint32_t>& queries) {
if ( !_mtlCounterBuffer ) {
uint64_t ts = mvkGetTimestamp();
for (uint32_t qry : queries) { _timestamps[qry] = ts; }
}
MVKQueryPool::finishQueries(queries);
}
NSData* MVKTimestampQueryPool::getQuerySourceData(uint32_t firstQuery, uint32_t queryCount) {
if (_mtlCounterBuffer) {
return [_mtlCounterBuffer resolveCounterRange: NSMakeRange(firstQuery, queryCount)];
} else {
return [NSData dataWithBytesNoCopy: (void*)&_timestamps[firstQuery]
length: queryCount * kMVKQuerySlotSizeInBytes
freeWhenDone: false];
}
}
void MVKTimestampQueryPool::encodeDirectCopyResults(MVKCommandEncoder* cmdEncoder,
uint32_t firstQuery,
uint32_t queryCount,
MVKBuffer* destBuffer,
VkDeviceSize destOffset,
VkDeviceSize stride) {
if (_mtlCounterBuffer) {
id<MTLBlitCommandEncoder> mtlBlitCmdEnc = cmdEncoder->getMTLBlitEncoder(kMVKCommandUseCopyQueryPoolResults);
[mtlBlitCmdEnc resolveCounters: _mtlCounterBuffer
inRange: NSMakeRange(firstQuery, queryCount)
destinationBuffer: destBuffer->getMTLBuffer()
destinationOffset: destBuffer->getMTLBufferOffset() + destOffset];
} else {
MVKQueryPool::encodeDirectCopyResults(cmdEncoder, firstQuery, queryCount, destBuffer, destOffset, stride);
}
}
id<MTLBuffer> MVKTimestampQueryPool::getResultBuffer(MVKCommandEncoder* cmdEncoder, uint32_t firstQuery, uint32_t queryCount, NSUInteger& offset) {
const MVKMTLBufferAllocation* tempBuff = cmdEncoder->getTempMTLBuffer(queryCount * _queryElementCount * sizeof(uint64_t));
void* pBuffData = tempBuff->getContents();
size_t size = queryCount * _queryElementCount * sizeof(uint64_t);
memcpy(pBuffData, &_timestamps[firstQuery], size);
offset = tempBuff->_offset;
return tempBuff->_mtlBuffer;
}
id<MTLComputeCommandEncoder> MVKTimestampQueryPool::encodeComputeCopyResults(MVKCommandEncoder* cmdEncoder, uint32_t firstQuery, uint32_t queryCount, uint32_t index) {
if (_mtlCounterBuffer) {
// We first need to resolve from the MTLCounterSampleBuffer into a temp buffer using a
// MTLBlitCommandEncoder, before creating the compute encoder and set that temp buffer into it.
const MVKMTLBufferAllocation* tempBuff = cmdEncoder->getTempMTLBuffer(queryCount * _queryElementCount * sizeof(uint64_t));
id<MTLBlitCommandEncoder> mtlBlitCmdEnc = cmdEncoder->getMTLBlitEncoder(kMVKCommandUseCopyQueryPoolResults);
[mtlBlitCmdEnc resolveCounters: _mtlCounterBuffer
inRange: NSMakeRange(firstQuery, queryCount)
destinationBuffer: tempBuff->_mtlBuffer
destinationOffset: tempBuff->_offset];
id<MTLComputeCommandEncoder> mtlCmdEnc = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseCopyQueryPoolResults);
[mtlCmdEnc setBuffer: tempBuff->_mtlBuffer offset: tempBuff->_offset atIndex: index];
return mtlCmdEnc;
} else {
// We can set the timestamp bytes into the compute encoder.
id<MTLComputeCommandEncoder> mtlCmdEnc = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseCopyQueryPoolResults);
cmdEncoder->setComputeBytes(mtlCmdEnc, &_timestamps[firstQuery], queryCount * _queryElementCount * sizeof(uint64_t), index);
return mtlCmdEnc;
}
}
#pragma mark Construction
MVKTimestampQueryPool::MVKTimestampQueryPool(MVKDevice* device, const VkQueryPoolCreateInfo* pCreateInfo) :
MVKGPUCounterQueryPool(device, pCreateInfo) {
initMTLCounterSampleBuffer(pCreateInfo, _device->getTimestampMTLCounterSet(), "VK_QUERY_TYPE_TIMESTAMP");
// If we don't use a MTLCounterSampleBuffer, allocate memory to hold the timestamps.
if ( !_mtlCounterBuffer ) { _timestamps.resize(pCreateInfo->queryCount, 0); }
}
#pragma mark - #pragma mark -
#pragma mark MVKPipelineStatisticsQueryPool #pragma mark MVKPipelineStatisticsQueryPool
MVKPipelineStatisticsQueryPool::MVKPipelineStatisticsQueryPool(MVKDevice* device, MVKPipelineStatisticsQueryPool::MVKPipelineStatisticsQueryPool(MVKDevice* device,
const VkQueryPoolCreateInfo* pCreateInfo) : MVKQueryPool(device, pCreateInfo, 1) { const VkQueryPoolCreateInfo* pCreateInfo) : MVKGPUCounterQueryPool(device, pCreateInfo) {
if ( !_device->_enabledFeatures.pipelineStatisticsQuery ) { if ( !_device->_enabledFeatures.pipelineStatisticsQuery ) {
setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCreateQueryPool: VK_QUERY_TYPE_PIPELINE_STATISTICS is not supported.")); setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCreateQueryPool: VK_QUERY_TYPE_PIPELINE_STATISTICS is not supported."));
} }

View File

@ -91,7 +91,8 @@ typedef enum : uint8_t {
kMVKCommandUseTessellationVertexTessCtl, /**< vkCmdDraw* - vertex and tessellation control stages. */ kMVKCommandUseTessellationVertexTessCtl, /**< vkCmdDraw* - vertex and tessellation control stages. */
kMVKCommandUseMultiviewInstanceCountAdjust, /**< vkCmdDrawIndirect* - adjust instance count for multiview. */ kMVKCommandUseMultiviewInstanceCountAdjust, /**< vkCmdDrawIndirect* - adjust instance count for multiview. */
kMVKCommandUseCopyQueryPoolResults, /**< vkCmdCopyQueryPoolResults. */ kMVKCommandUseCopyQueryPoolResults, /**< vkCmdCopyQueryPoolResults. */
kMVKCommandUseAccumOcclusionQuery /**< Any command terminating a Metal render pass with active visibility buffer. */ kMVKCommandUseAccumOcclusionQuery, /**< Any command terminating a Metal render pass with active visibility buffer. */
kMVKCommandUseRecordGPUCounterSample /**< Any command triggering the recording of a GPU counter sample. */
} MVKCommandUse; } MVKCommandUse;
/** Represents a given stage of a graphics pipeline. */ /** Represents a given stage of a graphics pipeline. */