Merge pull request #1404 from billhollings/timestamp-using-metal-gpu-counters
Vulkan timestamp query pools use Metal GPU counters when available.
This commit is contained in:
commit
7a2def4604
@ -18,11 +18,13 @@ MoltenVK 1.1.5
|
|||||||
|
|
||||||
Released TBD
|
Released TBD
|
||||||
|
|
||||||
|
- Vulkan timestamp query pools use Metal GPU counters when available.
|
||||||
- Fix incorrect translation of clear color values on Apple Silicon.
|
- Fix incorrect translation of clear color values on Apple Silicon.
|
||||||
- Fix swizzle of depth and stencil values into RGBA (`float4`) variable in shaders.
|
- Fix swizzle of depth and stencil values into RGBA (`float4`) variable in shaders.
|
||||||
- Disable `VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT` for
|
- Disable `VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT` for
|
||||||
`VK_FORMAT_E5B9G9R9_UFLOAT_PACK32` on macOS Apple Silicon.
|
`VK_FORMAT_E5B9G9R9_UFLOAT_PACK32` on macOS Apple Silicon.
|
||||||
- Support alpha-to-coverage without a color attachment.
|
- Support alpha-to-coverage without a color attachment.
|
||||||
|
- Update `VK_MVK_MOLTENVK_SPEC_VERSION` to `32`.
|
||||||
- Update to latest SPIRV-Cross version:
|
- Update to latest SPIRV-Cross version:
|
||||||
- MSL: Adjust `gl_SampleMaskIn` for sample-shading and/or fixed sample mask.
|
- MSL: Adjust `gl_SampleMaskIn` for sample-shading and/or fixed sample mask.
|
||||||
- MSL: Fix setting `SPIRVCrossDecorationInterpolantComponentExpr` decoration.
|
- MSL: Fix setting `SPIRVCrossDecorationInterpolantComponentExpr` decoration.
|
||||||
|
@ -835,6 +835,16 @@ typedef enum MVKFloatRounding {
|
|||||||
MVK_FLOAT_ROUNDING_UP_MAX_ENUM = 0x7FFFFFFF
|
MVK_FLOAT_ROUNDING_UP_MAX_ENUM = 0x7FFFFFFF
|
||||||
} MVKFloatRounding;
|
} MVKFloatRounding;
|
||||||
|
|
||||||
|
/** Identifies the pipeline points where GPU counter sampling can occur. Maps to MTLCounterSamplingPoint. */
|
||||||
|
typedef enum MVKCounterSamplingBits {
|
||||||
|
MVK_COUNTER_SAMPLING_AT_DRAW = 0x00000001,
|
||||||
|
MVK_COUNTER_SAMPLING_AT_DISPATCH = 0x00000002,
|
||||||
|
MVK_COUNTER_SAMPLING_AT_BLIT = 0x00000004,
|
||||||
|
MVK_COUNTER_SAMPLING_AT_PIPELINE_STAGE = 0x00000008,
|
||||||
|
MVK_COUNTER_SAMPLING_MAX_ENUM = 0X7FFFFFFF
|
||||||
|
} MVKCounterSamplingBits;
|
||||||
|
typedef VkFlags MVKCounterSamplingFlags;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Features provided by the current implementation of Metal on the current device. You can
|
* Features provided by the current implementation of Metal on the current device. You can
|
||||||
* retrieve a copy of this structure using the vkGetPhysicalDeviceMetalFeaturesMVK() function.
|
* retrieve a copy of this structure using the vkGetPhysicalDeviceMetalFeaturesMVK() function.
|
||||||
@ -915,6 +925,7 @@ typedef struct {
|
|||||||
VkBool32 argumentBuffers; /**< If true, Metal argument buffers are supported. */
|
VkBool32 argumentBuffers; /**< If true, Metal argument buffers are supported. */
|
||||||
VkBool32 descriptorSetArgumentBuffers; /**< If true, a Metal argument buffer can be assigned to a descriptor set, and used on any pipeline and pipeline stage. If false, a different Metal argument buffer must be used for each pipeline-stage/descriptor-set combination. */
|
VkBool32 descriptorSetArgumentBuffers; /**< If true, a Metal argument buffer can be assigned to a descriptor set, and used on any pipeline and pipeline stage. If false, a different Metal argument buffer must be used for each pipeline-stage/descriptor-set combination. */
|
||||||
MVKFloatRounding clearColorFloatRounding; /**< Identifies the type of rounding Metal uses for MTLClearColor float to integer conversions. */
|
MVKFloatRounding clearColorFloatRounding; /**< Identifies the type of rounding Metal uses for MTLClearColor float to integer conversions. */
|
||||||
|
MVKCounterSamplingFlags counterSamplingPoints; /**< Identifies the points where pipeline GPU counter sampling may occur. */
|
||||||
} MVKPhysicalDeviceMetalFeatures;
|
} MVKPhysicalDeviceMetalFeatures;
|
||||||
|
|
||||||
/** MoltenVK performance of a particular type of activity. */
|
/** MoltenVK performance of a particular type of activity. */
|
||||||
|
@ -410,7 +410,7 @@ public:
|
|||||||
void endOcclusionQuery(MVKOcclusionQueryPool* pQueryPool, uint32_t query);
|
void endOcclusionQuery(MVKOcclusionQueryPool* pQueryPool, uint32_t query);
|
||||||
|
|
||||||
/** Marks a timestamp for the specified query. */
|
/** Marks a timestamp for the specified query. */
|
||||||
void markTimestamp(MVKQueryPool* pQueryPool, uint32_t query);
|
void markTimestamp(MVKTimestampQueryPool* pQueryPool, uint32_t query);
|
||||||
|
|
||||||
/** Reset a range of queries. */
|
/** Reset a range of queries. */
|
||||||
void resetQueries(MVKQueryPool* pQueryPool, uint32_t firstQuery, uint32_t queryCount);
|
void resetQueries(MVKQueryPool* pQueryPool, uint32_t firstQuery, uint32_t queryCount);
|
||||||
@ -499,6 +499,13 @@ protected:
|
|||||||
void setSubpass(MVKCommand* passCmd, VkSubpassContents subpassContents, uint32_t subpassIndex);
|
void setSubpass(MVKCommand* passCmd, VkSubpassContents subpassContents, uint32_t subpassIndex);
|
||||||
void clearRenderArea();
|
void clearRenderArea();
|
||||||
NSString* getMTLRenderCommandEncoderName();
|
NSString* getMTLRenderCommandEncoderName();
|
||||||
|
void encodeGPUCounterSample(MVKGPUCounterQueryPool* mvkQryPool, uint32_t sampleIndex, MVKCounterSamplingFlags samplingPoints);
|
||||||
|
void encodeTimestampStageCounterSamples();
|
||||||
|
|
||||||
|
typedef struct GPUCounterQuery {
|
||||||
|
MVKGPUCounterQueryPool* queryPool = nullptr;
|
||||||
|
uint32_t query = 0;
|
||||||
|
} GPUCounterQuery;
|
||||||
|
|
||||||
VkSubpassContents _subpassContents;
|
VkSubpassContents _subpassContents;
|
||||||
MVKRenderPass* _renderPass;
|
MVKRenderPass* _renderPass;
|
||||||
@ -507,6 +514,7 @@ protected:
|
|||||||
uint32_t _multiviewPassIndex;
|
uint32_t _multiviewPassIndex;
|
||||||
VkRect2D _renderArea;
|
VkRect2D _renderArea;
|
||||||
MVKActivatedQueries* _pActivatedQueries;
|
MVKActivatedQueries* _pActivatedQueries;
|
||||||
|
MVKSmallVector<GPUCounterQuery, 16> _timestampStageCounterQueries;
|
||||||
MVKSmallVector<VkClearValue, kMVKDefaultAttachmentCount> _clearValues;
|
MVKSmallVector<VkClearValue, kMVKDefaultAttachmentCount> _clearValues;
|
||||||
MVKSmallVector<MVKImageView*, kMVKDefaultAttachmentCount> _attachments;
|
MVKSmallVector<MVKImageView*, kMVKDefaultAttachmentCount> _attachments;
|
||||||
id<MTLComputeCommandEncoder> _mtlComputeEncoder;
|
id<MTLComputeCommandEncoder> _mtlComputeEncoder;
|
||||||
|
@ -614,6 +614,8 @@ void MVKCommandEncoder::endCurrentMetalEncoding() {
|
|||||||
[_mtlBlitEncoder endEncoding];
|
[_mtlBlitEncoder endEncoding];
|
||||||
_mtlBlitEncoder = nil; // not retained
|
_mtlBlitEncoder = nil; // not retained
|
||||||
_mtlBlitEncoderUse = kMVKCommandUseNone;
|
_mtlBlitEncoderUse = kMVKCommandUseNone;
|
||||||
|
|
||||||
|
encodeTimestampStageCounterSamples();
|
||||||
}
|
}
|
||||||
|
|
||||||
id<MTLComputeCommandEncoder> MVKCommandEncoder::getMTLComputeEncoder(MVKCommandUse cmdUse) {
|
id<MTLComputeCommandEncoder> MVKCommandEncoder::getMTLComputeEncoder(MVKCommandUse cmdUse) {
|
||||||
@ -720,6 +722,23 @@ const MVKMTLBufferAllocation* MVKCommandEncoder::copyToTempMTLBufferAllocation(c
|
|||||||
|
|
||||||
#pragma mark Queries
|
#pragma mark Queries
|
||||||
|
|
||||||
|
// Only executes on immediate-mode GPUs. Encode a GPU counter sample command on whichever Metal
|
||||||
|
// encoder is currently in use, creating a temporary BLIT encoder if no encoder is currently active.
|
||||||
|
// We only encode the GPU sample if the platform allows encoding at the associated pipeline point.
|
||||||
|
void MVKCommandEncoder::encodeGPUCounterSample(MVKGPUCounterQueryPool* mvkQryPool, uint32_t sampleIndex, MVKCounterSamplingFlags samplingPoints){
|
||||||
|
if (_mtlRenderEncoder) {
|
||||||
|
if (mvkIsAnyFlagEnabled(samplingPoints, MVK_COUNTER_SAMPLING_AT_DRAW)) {
|
||||||
|
[_mtlRenderEncoder sampleCountersInBuffer: mvkQryPool->getMTLCounterBuffer() atSampleIndex: sampleIndex withBarrier: NO];
|
||||||
|
}
|
||||||
|
} else if (_mtlComputeEncoder) {
|
||||||
|
if (mvkIsAnyFlagEnabled(samplingPoints, MVK_COUNTER_SAMPLING_AT_DISPATCH)) {
|
||||||
|
[_mtlComputeEncoder sampleCountersInBuffer: mvkQryPool->getMTLCounterBuffer() atSampleIndex: sampleIndex withBarrier: NO];
|
||||||
|
}
|
||||||
|
} else if (mvkIsAnyFlagEnabled(samplingPoints, MVK_COUNTER_SAMPLING_AT_BLIT)) {
|
||||||
|
[getMTLBlitEncoder(kMVKCommandUseRecordGPUCounterSample) sampleCountersInBuffer: mvkQryPool->getMTLCounterBuffer() atSampleIndex: sampleIndex withBarrier: NO];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void MVKCommandEncoder::beginOcclusionQuery(MVKOcclusionQueryPool* pQueryPool, uint32_t query, VkQueryControlFlags flags) {
|
void MVKCommandEncoder::beginOcclusionQuery(MVKOcclusionQueryPool* pQueryPool, uint32_t query, VkQueryControlFlags flags) {
|
||||||
_occlusionQueryState.beginOcclusionQuery(pQueryPool, query, flags);
|
_occlusionQueryState.beginOcclusionQuery(pQueryPool, query, flags);
|
||||||
uint32_t queryCount = 1;
|
uint32_t queryCount = 1;
|
||||||
@ -733,14 +752,67 @@ void MVKCommandEncoder::endOcclusionQuery(MVKOcclusionQueryPool* pQueryPool, uin
|
|||||||
_occlusionQueryState.endOcclusionQuery(pQueryPool, query);
|
_occlusionQueryState.endOcclusionQuery(pQueryPool, query);
|
||||||
}
|
}
|
||||||
|
|
||||||
void MVKCommandEncoder::markTimestamp(MVKQueryPool* pQueryPool, uint32_t query) {
|
void MVKCommandEncoder::markTimestamp(MVKTimestampQueryPool* pQueryPool, uint32_t query) {
|
||||||
uint32_t queryCount = 1;
|
uint32_t queryCount = 1;
|
||||||
if (_renderPass && getSubpass()->isMultiview()) {
|
if (_renderPass && getSubpass()->isMultiview()) {
|
||||||
queryCount = getSubpass()->getViewCountInMetalPass(_multiviewPassIndex);
|
queryCount = getSubpass()->getViewCountInMetalPass(_multiviewPassIndex);
|
||||||
}
|
}
|
||||||
addActivatedQueries(pQueryPool, query, queryCount);
|
addActivatedQueries(pQueryPool, query, queryCount);
|
||||||
|
|
||||||
|
MVKCounterSamplingFlags sampPts = _device->_pMetalFeatures->counterSamplingPoints;
|
||||||
|
if (sampPts) {
|
||||||
|
for (uint32_t qOfst = 0; qOfst < queryCount; qOfst++) {
|
||||||
|
if (mvkIsAnyFlagEnabled(sampPts, MVK_COUNTER_SAMPLING_AT_PIPELINE_STAGE)) {
|
||||||
|
_timestampStageCounterQueries.push_back({ pQueryPool, query + qOfst });
|
||||||
|
} else {
|
||||||
|
encodeGPUCounterSample(pQueryPool, query + qOfst, sampPts);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if MVK_XCODE_12
|
||||||
|
// Metal stage GPU counters need to be configured in a Metal render, compute, or BLIT encoder, meaning that the
|
||||||
|
// Metal encoder needs to know about any Vulkan timestamp commands that will be executed during the execution
|
||||||
|
// of a renderpass, or set of Vulkan dispatch or BLIT commands. In addition, there are a very small number of
|
||||||
|
// staged timestamps (4) that can be tracked in any single render, compute, or BLIT pass, meaning a renderpass
|
||||||
|
// that timestamped after each of many draw calls, would not be trackable. Finally, stage counters are only
|
||||||
|
// available on tile-based GPU's, which means draw or dispatch calls cannot be individually timestamped.
|
||||||
|
// We avoid dealing with all this complexity and mismatch between how Vulkan and Metal stage counters operate
|
||||||
|
// by deferring all timestamps to the end of any batch of Metal encoding, and add a lightweight Metal encoder
|
||||||
|
// that does minimal work (it won't timestamp if completely empty), and timestamps that work into all of the
|
||||||
|
// Vulkan timestamp queries that have been executed during the execution of the previous Metal encoder.
|
||||||
|
void MVKCommandEncoder::encodeTimestampStageCounterSamples() {
|
||||||
|
size_t qCnt = _timestampStageCounterQueries.size();
|
||||||
|
uint32_t qIdx = 0;
|
||||||
|
while (qIdx < qCnt) {
|
||||||
|
|
||||||
|
// With each BLIT pass, consume as many outstanding timestamp queries as possible.
|
||||||
|
// Attach an query result to each of the available sample buffer attachments in the BLIT pass descriptor.
|
||||||
|
auto* bpDesc = [[[MTLBlitPassDescriptor alloc] init] autorelease];
|
||||||
|
for (uint32_t attIdx = 0; attIdx < MTLMaxBlitPassSampleBuffers && qIdx < qCnt; attIdx++, qIdx++) {
|
||||||
|
auto* sbAttDesc = bpDesc.sampleBufferAttachments[attIdx];
|
||||||
|
auto& tsQry = _timestampStageCounterQueries[qIdx];
|
||||||
|
|
||||||
|
// We actually only need to use startOfEncoderSampleIndex, but apparently,
|
||||||
|
// and contradicting docs, Metal hits an unexpected validation error if
|
||||||
|
// endOfEncoderSampleIndex is left at MTLCounterDontSample.
|
||||||
|
sbAttDesc.startOfEncoderSampleIndex = tsQry.query;
|
||||||
|
sbAttDesc.endOfEncoderSampleIndex = tsQry.query;
|
||||||
|
sbAttDesc.sampleBuffer = tsQry.queryPool->getMTLCounterBuffer();
|
||||||
|
}
|
||||||
|
|
||||||
|
auto* mtlEnc = [_mtlCmdBuffer blitCommandEncoderWithDescriptor: bpDesc];
|
||||||
|
setLabelIfNotNil(mtlEnc, mvkMTLBlitCommandEncoderLabel(kMVKCommandUseRecordGPUCounterSample));
|
||||||
|
[mtlEnc fillBuffer: _device->getDummyBlitMTLBuffer() range: NSMakeRange(0, 1) value: 0];
|
||||||
|
[mtlEnc endEncoding];
|
||||||
|
}
|
||||||
|
_timestampStageCounterQueries.clear();
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
void MVKCommandEncoder::encodeTimestampStageCounterSamples() {}
|
||||||
|
#endif
|
||||||
|
|
||||||
void MVKCommandEncoder::resetQueries(MVKQueryPool* pQueryPool, uint32_t firstQuery, uint32_t queryCount) {
|
void MVKCommandEncoder::resetQueries(MVKQueryPool* pQueryPool, uint32_t firstQuery, uint32_t queryCount) {
|
||||||
addActivatedQueries(pQueryPool, firstQuery, queryCount);
|
addActivatedQueries(pQueryPool, firstQuery, queryCount);
|
||||||
}
|
}
|
||||||
@ -847,6 +919,7 @@ NSString* mvkMTLBlitCommandEncoderLabel(MVKCommandUse cmdUse) {
|
|||||||
case kMVKCommandUseUpdateBuffer: return @"vkCmdUpdateBuffer BlitEncoder";
|
case kMVKCommandUseUpdateBuffer: return @"vkCmdUpdateBuffer BlitEncoder";
|
||||||
case kMVKCommandUseResetQueryPool: return @"vkCmdResetQueryPool BlitEncoder";
|
case kMVKCommandUseResetQueryPool: return @"vkCmdResetQueryPool BlitEncoder";
|
||||||
case kMVKCommandUseCopyQueryPoolResults: return @"vkCmdCopyQueryPoolResults BlitEncoder";
|
case kMVKCommandUseCopyQueryPoolResults: return @"vkCmdCopyQueryPoolResults BlitEncoder";
|
||||||
|
case kMVKCommandUseRecordGPUCounterSample: return @"Record GPU Counter Sample BlitEncoder";
|
||||||
default: return @"Unknown Use BlitEncoder";
|
default: return @"Unknown Use BlitEncoder";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -324,10 +324,10 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
/** Returns whether the MSL version is supported on this device. */
|
/** Returns whether the MSL version is supported on this device. */
|
||||||
inline bool mslVersionIsAtLeast(MTLLanguageVersion minVer) { return _metalFeatures.mslVersionEnum >= minVer; }
|
bool mslVersionIsAtLeast(MTLLanguageVersion minVer) { return _metalFeatures.mslVersionEnum >= minVer; }
|
||||||
|
|
||||||
/** Returns whether this device is using Metal argument buffers. */
|
/** Returns whether this device is using Metal argument buffers. */
|
||||||
inline bool isUsingMetalArgumentBuffers() const { return _metalFeatures.argumentBuffers && mvkConfig().useMetalArgumentBuffers; };
|
bool isUsingMetalArgumentBuffers() const { return _metalFeatures.argumentBuffers && mvkConfig().useMetalArgumentBuffers; };
|
||||||
|
|
||||||
|
|
||||||
#pragma mark Construction
|
#pragma mark Construction
|
||||||
@ -371,6 +371,7 @@ protected:
|
|||||||
uint32_t getMaxSamplerCount();
|
uint32_t getMaxSamplerCount();
|
||||||
void initExternalMemoryProperties();
|
void initExternalMemoryProperties();
|
||||||
void initExtensions();
|
void initExtensions();
|
||||||
|
void initCounterSets();
|
||||||
MVKArrayRef<MVKQueueFamily*> getQueueFamilies();
|
MVKArrayRef<MVKQueueFamily*> getQueueFamilies();
|
||||||
void initPipelineCacheUUID();
|
void initPipelineCacheUUID();
|
||||||
uint32_t getHighestMTLFeatureSet();
|
uint32_t getHighestMTLFeatureSet();
|
||||||
@ -388,6 +389,7 @@ protected:
|
|||||||
VkPhysicalDeviceMemoryProperties _memoryProperties;
|
VkPhysicalDeviceMemoryProperties _memoryProperties;
|
||||||
MVKSmallVector<MVKQueueFamily*, kMVKQueueFamilyCount> _queueFamilies;
|
MVKSmallVector<MVKQueueFamily*, kMVKQueueFamilyCount> _queueFamilies;
|
||||||
MVKPixelFormats _pixelFormats;
|
MVKPixelFormats _pixelFormats;
|
||||||
|
id<MTLCounterSet> _timestampMTLCounterSet;
|
||||||
uint32_t _allMemoryTypes;
|
uint32_t _allMemoryTypes;
|
||||||
uint32_t _hostVisibleMemoryTypes;
|
uint32_t _hostVisibleMemoryTypes;
|
||||||
uint32_t _hostCoherentMemoryTypes;
|
uint32_t _hostCoherentMemoryTypes;
|
||||||
@ -684,12 +686,21 @@ public:
|
|||||||
*/
|
*/
|
||||||
uint32_t expandVisibilityResultMTLBuffer(uint32_t queryCount);
|
uint32_t expandVisibilityResultMTLBuffer(uint32_t queryCount);
|
||||||
|
|
||||||
|
/** Returns the GPU sample counter used for timestamps. */
|
||||||
|
id<MTLCounterSet> getTimestampMTLCounterSet() { return _physicalDevice->_timestampMTLCounterSet; }
|
||||||
|
|
||||||
/** Returns the memory type index corresponding to the specified Metal memory storage mode. */
|
/** Returns the memory type index corresponding to the specified Metal memory storage mode. */
|
||||||
uint32_t getVulkanMemoryTypeIndex(MTLStorageMode mtlStorageMode);
|
uint32_t getVulkanMemoryTypeIndex(MTLStorageMode mtlStorageMode);
|
||||||
|
|
||||||
/** Returns a default MTLSamplerState to populate empty array element descriptors. */
|
/** Returns a default MTLSamplerState to populate empty array element descriptors. */
|
||||||
id<MTLSamplerState> getDefaultMTLSamplerState();
|
id<MTLSamplerState> getDefaultMTLSamplerState();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a MTLBuffer of length one that can be used as a dummy to
|
||||||
|
* create a no-op BLIT encoder based on filling this single-byte buffer.
|
||||||
|
*/
|
||||||
|
id<MTLBuffer> getDummyBlitMTLBuffer();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns whether MTLCommandBuffers can be prefilled.
|
* Returns whether MTLCommandBuffers can be prefilled.
|
||||||
*
|
*
|
||||||
@ -818,6 +829,7 @@ protected:
|
|||||||
std::mutex _perfLock;
|
std::mutex _perfLock;
|
||||||
id<MTLBuffer> _globalVisibilityResultMTLBuffer;
|
id<MTLBuffer> _globalVisibilityResultMTLBuffer;
|
||||||
id<MTLSamplerState> _defaultMTLSamplerState;
|
id<MTLSamplerState> _defaultMTLSamplerState;
|
||||||
|
id<MTLBuffer> _dummyBlitMTLBuffer;
|
||||||
uint32_t _globalVisibilityQueryCount;
|
uint32_t _globalVisibilityQueryCount;
|
||||||
std::mutex _vizLock;
|
std::mutex _vizLock;
|
||||||
bool _useMTLFenceForSemaphores;
|
bool _useMTLFenceForSemaphores;
|
||||||
|
@ -1150,6 +1150,7 @@ MVKPhysicalDevice::MVKPhysicalDevice(MVKInstance* mvkInstance, id<MTLDevice> mtl
|
|||||||
initExtensions();
|
initExtensions();
|
||||||
initMemoryProperties();
|
initMemoryProperties();
|
||||||
initExternalMemoryProperties();
|
initExternalMemoryProperties();
|
||||||
|
initCounterSets();
|
||||||
logGPUInfo();
|
logGPUInfo();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1594,6 +1595,27 @@ void MVKPhysicalDevice::initMetalFeatures() {
|
|||||||
// Currently, if we don't support descriptor set argument buffers, we can't support argument buffers.
|
// Currently, if we don't support descriptor set argument buffers, we can't support argument buffers.
|
||||||
_metalFeatures.argumentBuffers = _metalFeatures.descriptorSetArgumentBuffers;
|
_metalFeatures.argumentBuffers = _metalFeatures.descriptorSetArgumentBuffers;
|
||||||
|
|
||||||
|
#define checkSupportsMTLCounterSamplingPoint(mtlSP, mvkSP) \
|
||||||
|
if ([_mtlDevice respondsToSelector: @selector(supportsCounterSampling:)] && \
|
||||||
|
[_mtlDevice supportsCounterSampling: MTLCounterSamplingPointAt ##mtlSP ##Boundary]) { \
|
||||||
|
_metalFeatures.counterSamplingPoints |= MVK_COUNTER_SAMPLING_AT_ ##mvkSP; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#if MVK_XCODE_12
|
||||||
|
checkSupportsMTLCounterSamplingPoint(Draw, DRAW);
|
||||||
|
checkSupportsMTLCounterSamplingPoint(Dispatch, DISPATCH);
|
||||||
|
checkSupportsMTLCounterSamplingPoint(Blit, BLIT);
|
||||||
|
checkSupportsMTLCounterSamplingPoint(Stage, PIPELINE_STAGE);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if !MVK_APPLE_SILICON
|
||||||
|
// On macOS, if we couldn't query supported sample points (on macOS 11),
|
||||||
|
// but the platform can support immediate-mode sample points, indicate that here.
|
||||||
|
if (!_metalFeatures.counterSamplingPoints && mvkOSVersionIsAtLeast(10.15)) { \
|
||||||
|
_metalFeatures.counterSamplingPoints = MVK_COUNTER_SAMPLING_AT_DRAW | MVK_COUNTER_SAMPLING_AT_DISPATCH | MVK_COUNTER_SAMPLING_AT_BLIT; \
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initializes the physical device features of this instance.
|
// Initializes the physical device features of this instance.
|
||||||
@ -2726,6 +2748,28 @@ void MVKPhysicalDevice::initExtensions() {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void MVKPhysicalDevice::initCounterSets() {
|
||||||
|
_timestampMTLCounterSet = nil;
|
||||||
|
@autoreleasepool {
|
||||||
|
if (_metalFeatures.counterSamplingPoints) {
|
||||||
|
NSArray<id<MTLCounterSet>>* counterSets = _mtlDevice.counterSets;
|
||||||
|
for (id<MTLCounterSet> cs in counterSets){
|
||||||
|
NSString* csName = cs.name;
|
||||||
|
if ( [csName caseInsensitiveCompare: MTLCommonCounterSetTimestamp] == NSOrderedSame) {
|
||||||
|
NSArray<id<MTLCounter>>* countersInSet = cs.counters;
|
||||||
|
for(id<MTLCounter> ctr in countersInSet) {
|
||||||
|
if ( [ctr.name caseInsensitiveCompare: MTLCommonCounterTimestamp] == NSOrderedSame) {
|
||||||
|
_timestampMTLCounterSet = [cs retain]; // retained
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void MVKPhysicalDevice::logGPUInfo() {
|
void MVKPhysicalDevice::logGPUInfo() {
|
||||||
string devTypeStr;
|
string devTypeStr;
|
||||||
switch (_properties.deviceType) {
|
switch (_properties.deviceType) {
|
||||||
@ -2838,6 +2882,7 @@ void MVKPhysicalDevice::logGPUInfo() {
|
|||||||
|
|
||||||
MVKPhysicalDevice::~MVKPhysicalDevice() {
|
MVKPhysicalDevice::~MVKPhysicalDevice() {
|
||||||
mvkDestroyContainerContents(_queueFamilies);
|
mvkDestroyContainerContents(_queueFamilies);
|
||||||
|
[_timestampMTLCounterSet release];
|
||||||
[_mtlDevice release];
|
[_mtlDevice release];
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3712,6 +3757,20 @@ id<MTLSamplerState> MVKDevice::getDefaultMTLSamplerState() {
|
|||||||
return _defaultMTLSamplerState;
|
return _defaultMTLSamplerState;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
id<MTLBuffer> MVKDevice::getDummyBlitMTLBuffer() {
|
||||||
|
if ( !_dummyBlitMTLBuffer ) {
|
||||||
|
|
||||||
|
// Lock and check again in case another thread has created the buffer.
|
||||||
|
lock_guard<mutex> lock(_rezLock);
|
||||||
|
if ( !_dummyBlitMTLBuffer ) {
|
||||||
|
@autoreleasepool {
|
||||||
|
_dummyBlitMTLBuffer = [getMTLDevice() newBufferWithLength: 1 options: MTLResourceStorageModePrivate];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return _dummyBlitMTLBuffer;
|
||||||
|
}
|
||||||
|
|
||||||
MTLCompileOptions* MVKDevice::getMTLCompileOptions(bool useFastMath, bool preserveInvariance) {
|
MTLCompileOptions* MVKDevice::getMTLCompileOptions(bool useFastMath, bool preserveInvariance) {
|
||||||
MTLCompileOptions* mtlCompOpt = [MTLCompileOptions new];
|
MTLCompileOptions* mtlCompOpt = [MTLCompileOptions new];
|
||||||
mtlCompOpt.languageVersion = _pMetalFeatures->mslVersionEnum;
|
mtlCompOpt.languageVersion = _pMetalFeatures->mslVersionEnum;
|
||||||
@ -3833,6 +3892,7 @@ MVKDevice::MVKDevice(MVKPhysicalDevice* physicalDevice, const VkDeviceCreateInfo
|
|||||||
_globalVisibilityQueryCount = 0;
|
_globalVisibilityQueryCount = 0;
|
||||||
|
|
||||||
_defaultMTLSamplerState = nil;
|
_defaultMTLSamplerState = nil;
|
||||||
|
_dummyBlitMTLBuffer = nil;
|
||||||
|
|
||||||
_commandResourceFactory = new MVKCommandResourceFactory(this);
|
_commandResourceFactory = new MVKCommandResourceFactory(this);
|
||||||
|
|
||||||
@ -4200,6 +4260,7 @@ MVKDevice::~MVKDevice() {
|
|||||||
|
|
||||||
[_globalVisibilityResultMTLBuffer release];
|
[_globalVisibilityResultMTLBuffer release];
|
||||||
[_defaultMTLSamplerState release];
|
[_defaultMTLSamplerState release];
|
||||||
|
[_dummyBlitMTLBuffer release];
|
||||||
|
|
||||||
stopAutoGPUCapture(MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_DEVICE);
|
stopAutoGPUCapture(MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_DEVICE);
|
||||||
|
|
||||||
|
@ -38,7 +38,6 @@ class MVKCommandEncoder;
|
|||||||
/**
|
/**
|
||||||
* Abstract class representing a Vulkan query pool.
|
* Abstract class representing a Vulkan query pool.
|
||||||
* Subclasses are specialized for specific query types.
|
* Subclasses are specialized for specific query types.
|
||||||
* Subclasses will generally override the beginQuery(), endQuery(), and getResult(uint32_t, void*, bool) member functions.
|
|
||||||
*/
|
*/
|
||||||
class MVKQueryPool : public MVKVulkanAPIDeviceObject {
|
class MVKQueryPool : public MVKVulkanAPIDeviceObject {
|
||||||
|
|
||||||
@ -106,10 +105,12 @@ public:
|
|||||||
|
|
||||||
protected:
|
protected:
|
||||||
bool areQueriesHostAvailable(uint32_t firstQuery, uint32_t endQuery);
|
bool areQueriesHostAvailable(uint32_t firstQuery, uint32_t endQuery);
|
||||||
VkResult getResult(uint32_t query, void* pQryData, VkQueryResultFlags flags);
|
virtual NSData* getQuerySourceData(uint32_t firstQuery, uint32_t queryCount) { return nil; }
|
||||||
virtual void getResult(uint32_t query, void* pQryData, bool shouldOutput64Bit) {}
|
VkResult getResult(uint32_t query, NSData* srcData, uint32_t srcDataQueryOffset, void* pDstData, VkQueryResultFlags flags);
|
||||||
virtual id<MTLBuffer> getResultBuffer(MVKCommandEncoder* cmdEncoder, uint32_t firstQuery, uint32_t queryCount, NSUInteger& offset) { return nil; }
|
virtual id<MTLBuffer> getResultBuffer(MVKCommandEncoder* cmdEncoder, uint32_t firstQuery, uint32_t queryCount, NSUInteger& offset) { return nil; }
|
||||||
virtual void encodeSetResultBuffer(MVKCommandEncoder* cmdEncoder, uint32_t firstQuery, uint32_t queryCount, uint32_t index) {}
|
virtual id<MTLComputeCommandEncoder> encodeComputeCopyResults(MVKCommandEncoder* cmdEncoder, uint32_t firstQuery, uint32_t queryCount, uint32_t index) { return nil; }
|
||||||
|
virtual void encodeDirectCopyResults(MVKCommandEncoder* cmdEncoder, uint32_t firstQuery, uint32_t queryCount,
|
||||||
|
MVKBuffer* destBuffer, VkDeviceSize destOffset, VkDeviceSize stride);
|
||||||
|
|
||||||
struct DeferredCopy {
|
struct DeferredCopy {
|
||||||
uint32_t firstQuery;
|
uint32_t firstQuery;
|
||||||
@ -136,31 +137,6 @@ protected:
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
#pragma mark -
|
|
||||||
#pragma mark MVKTimestampQueryPool
|
|
||||||
|
|
||||||
/** A Vulkan query pool for timestamp queries. */
|
|
||||||
class MVKTimestampQueryPool : public MVKQueryPool {
|
|
||||||
|
|
||||||
public:
|
|
||||||
void endQuery(uint32_t query, MVKCommandEncoder* cmdEncoder) override;
|
|
||||||
void finishQueries(const MVKArrayRef<uint32_t>& queries) override;
|
|
||||||
|
|
||||||
|
|
||||||
#pragma mark Construction
|
|
||||||
|
|
||||||
MVKTimestampQueryPool(MVKDevice* device, const VkQueryPoolCreateInfo* pCreateInfo);
|
|
||||||
|
|
||||||
protected:
|
|
||||||
void propagateDebugName() override {}
|
|
||||||
void getResult(uint32_t query, void* pQryData, bool shouldOutput64Bit) override;
|
|
||||||
id<MTLBuffer> getResultBuffer(MVKCommandEncoder* cmdEncoder, uint32_t firstQuery, uint32_t queryCount, NSUInteger& offset) override;
|
|
||||||
void encodeSetResultBuffer(MVKCommandEncoder* cmdEncoder, uint32_t firstQuery, uint32_t queryCount, uint32_t index) override;
|
|
||||||
|
|
||||||
MVKSmallVector<uint64_t, kMVKDefaultQueryCount> _timestamps;
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
#pragma mark -
|
#pragma mark -
|
||||||
#pragma mark MVKOcclusionQueryPool
|
#pragma mark MVKOcclusionQueryPool
|
||||||
|
|
||||||
@ -189,20 +165,73 @@ public:
|
|||||||
|
|
||||||
protected:
|
protected:
|
||||||
void propagateDebugName() override;
|
void propagateDebugName() override;
|
||||||
void getResult(uint32_t query, void* pQryData, bool shouldOutput64Bit) override;
|
NSData* getQuerySourceData(uint32_t firstQuery, uint32_t queryCount) override;
|
||||||
id<MTLBuffer> getResultBuffer(MVKCommandEncoder* cmdEncoder, uint32_t firstQuery, uint32_t queryCount, NSUInteger& offset) override;
|
id<MTLBuffer> getResultBuffer(MVKCommandEncoder* cmdEncoder, uint32_t firstQuery, uint32_t queryCount, NSUInteger& offset) override;
|
||||||
void encodeSetResultBuffer(MVKCommandEncoder* cmdEncoder, uint32_t firstQuery, uint32_t queryCount, uint32_t index) override;
|
id<MTLComputeCommandEncoder> encodeComputeCopyResults(MVKCommandEncoder* cmdEncoder, uint32_t firstQuery, uint32_t queryCount, uint32_t index) override;
|
||||||
|
|
||||||
id<MTLBuffer> _visibilityResultMTLBuffer;
|
id<MTLBuffer> _visibilityResultMTLBuffer;
|
||||||
uint32_t _queryIndexOffset;
|
uint32_t _queryIndexOffset;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
#pragma mark -
|
||||||
|
#pragma mark MVKGPUCounterQueryPool
|
||||||
|
|
||||||
|
/** An abstract parent class for query pools that use Metal GPU counters if they are supported on the platform. */
|
||||||
|
class MVKGPUCounterQueryPool : public MVKQueryPool {
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the MTLCounterBuffer being used by this query pool,
|
||||||
|
* or returns nil if GPU counters are not supported.
|
||||||
|
* */
|
||||||
|
id<MTLCounterSampleBuffer> getMTLCounterBuffer() { return _mtlCounterBuffer; }
|
||||||
|
|
||||||
|
MVKGPUCounterQueryPool(MVKDevice* device, const VkQueryPoolCreateInfo* pCreateInfo);
|
||||||
|
|
||||||
|
~MVKGPUCounterQueryPool() override;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
void initMTLCounterSampleBuffer(const VkQueryPoolCreateInfo* pCreateInfo,
|
||||||
|
id<MTLCounterSet> mtlCounterSet,
|
||||||
|
const char* queryTypeName);
|
||||||
|
|
||||||
|
id<MTLCounterSampleBuffer> _mtlCounterBuffer;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
#pragma mark -
|
||||||
|
#pragma mark MVKTimestampQueryPool
|
||||||
|
|
||||||
|
/** A Vulkan query pool for timestamp queries. */
|
||||||
|
class MVKTimestampQueryPool : public MVKGPUCounterQueryPool {
|
||||||
|
|
||||||
|
public:
|
||||||
|
void endQuery(uint32_t query, MVKCommandEncoder* cmdEncoder) override;
|
||||||
|
void finishQueries(const MVKArrayRef<uint32_t>& queries) override;
|
||||||
|
|
||||||
|
#pragma mark Construction
|
||||||
|
|
||||||
|
MVKTimestampQueryPool(MVKDevice* device, const VkQueryPoolCreateInfo* pCreateInfo);
|
||||||
|
|
||||||
|
protected:
|
||||||
|
void propagateDebugName() override {}
|
||||||
|
NSData* getQuerySourceData(uint32_t firstQuery, uint32_t queryCount) override;
|
||||||
|
id<MTLBuffer> getResultBuffer(MVKCommandEncoder* cmdEncoder, uint32_t firstQuery, uint32_t queryCount, NSUInteger& offset) override;
|
||||||
|
id<MTLComputeCommandEncoder> encodeComputeCopyResults(MVKCommandEncoder* cmdEncoder, uint32_t firstQuery, uint32_t queryCount, uint32_t index) override;
|
||||||
|
void encodeDirectCopyResults(MVKCommandEncoder* cmdEncoder, uint32_t firstQuery, uint32_t queryCount,
|
||||||
|
MVKBuffer* destBuffer, VkDeviceSize destOffset, VkDeviceSize stride) override;
|
||||||
|
|
||||||
|
MVKSmallVector<uint64_t> _timestamps;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
#pragma mark -
|
#pragma mark -
|
||||||
#pragma mark MVKPipelineStatisticsQueryPool
|
#pragma mark MVKPipelineStatisticsQueryPool
|
||||||
|
|
||||||
/** A Vulkan query pool for a query pool type that tracks pipeline statistics. */
|
/** A Vulkan query pool for a query pool type that tracks pipeline statistics. */
|
||||||
class MVKPipelineStatisticsQueryPool : public MVKQueryPool {
|
class MVKPipelineStatisticsQueryPool : public MVKGPUCounterQueryPool {
|
||||||
|
|
||||||
public:
|
public:
|
||||||
MVKPipelineStatisticsQueryPool(MVKDevice* device, const VkQueryPoolCreateInfo* pCreateInfo);
|
MVKPipelineStatisticsQueryPool(MVKDevice* device, const VkQueryPoolCreateInfo* pCreateInfo);
|
||||||
|
@ -89,11 +89,14 @@ VkResult MVKQueryPool::getResults(uint32_t firstQuery,
|
|||||||
}
|
}
|
||||||
|
|
||||||
VkResult rqstRslt = VK_SUCCESS;
|
VkResult rqstRslt = VK_SUCCESS;
|
||||||
uintptr_t pQryData = (uintptr_t)pData;
|
@autoreleasepool {
|
||||||
for (uint32_t query = firstQuery; query < endQuery; query++, pQryData += stride) {
|
NSData* srcData = getQuerySourceData(firstQuery, queryCount);
|
||||||
VkResult qryRslt = getResult(query, (void*)pQryData, flags);
|
uintptr_t pDstData = (uintptr_t)pData;
|
||||||
|
for (uint32_t query = firstQuery; query < endQuery; query++, pDstData += stride) {
|
||||||
|
VkResult qryRslt = getResult(query, srcData, firstQuery, (void*)pDstData, flags);
|
||||||
if (rqstRslt == VK_SUCCESS) { rqstRslt = qryRslt; }
|
if (rqstRslt == VK_SUCCESS) { rqstRslt = qryRslt; }
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return rqstRslt;
|
return rqstRslt;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -114,7 +117,7 @@ bool MVKQueryPool::areQueriesHostAvailable(uint32_t firstQuery, uint32_t endQuer
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
VkResult MVKQueryPool::getResult(uint32_t query, void* pQryData, VkQueryResultFlags flags) {
|
VkResult MVKQueryPool::getResult(uint32_t query, NSData* srcData, uint32_t srcDataQueryOffset, void* pDstData, VkQueryResultFlags flags) {
|
||||||
|
|
||||||
if (_device->getConfigurationResult() != VK_SUCCESS) { return _device->getConfigurationResult(); }
|
if (_device->getConfigurationResult() != VK_SUCCESS) { return _device->getConfigurationResult(); }
|
||||||
|
|
||||||
@ -123,15 +126,22 @@ VkResult MVKQueryPool::getResult(uint32_t query, void* pQryData, VkQueryResultFl
|
|||||||
bool shouldOutput64Bit = mvkAreAllFlagsEnabled(flags, VK_QUERY_RESULT_64_BIT);
|
bool shouldOutput64Bit = mvkAreAllFlagsEnabled(flags, VK_QUERY_RESULT_64_BIT);
|
||||||
|
|
||||||
// Output the results of this query
|
// Output the results of this query
|
||||||
if (shouldOutput) { getResult(query, pQryData, shouldOutput64Bit); }
|
if (shouldOutput) {
|
||||||
|
uint64_t rsltVal = ((uint64_t*)srcData.bytes)[query - srcDataQueryOffset];
|
||||||
|
if (shouldOutput64Bit) {
|
||||||
|
*(uint64_t*)pDstData = rsltVal;
|
||||||
|
} else {
|
||||||
|
*(uint32_t*)pDstData = (uint32_t)rsltVal;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// If requested, output the availability bit
|
// If requested, output the availability bit
|
||||||
if (mvkAreAllFlagsEnabled(flags, VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)) {
|
if (mvkAreAllFlagsEnabled(flags, VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)) {
|
||||||
if (shouldOutput64Bit) {
|
if (shouldOutput64Bit) {
|
||||||
uintptr_t pAvailability = (uintptr_t)pQryData + (_queryElementCount * sizeof(uint64_t));
|
uintptr_t pAvailability = (uintptr_t)pDstData + (_queryElementCount * sizeof(uint64_t));
|
||||||
*(uint64_t*)pAvailability = isAvailable;
|
*(uint64_t*)pAvailability = isAvailable;
|
||||||
} else {
|
} else {
|
||||||
uintptr_t pAvailability = (uintptr_t)pQryData + (_queryElementCount * sizeof(uint32_t));
|
uintptr_t pAvailability = (uintptr_t)pDstData + (_queryElementCount * sizeof(uint32_t));
|
||||||
*(uint32_t*)pAvailability = isAvailable;
|
*(uint32_t*)pAvailability = isAvailable;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -154,20 +164,12 @@ void MVKQueryPool::encodeCopyResults(MVKCommandEncoder* cmdEncoder,
|
|||||||
stride == _queryElementCount * sizeof(uint64_t) &&
|
stride == _queryElementCount * sizeof(uint64_t) &&
|
||||||
areQueriesDeviceAvailable(firstQuery, queryCount)) {
|
areQueriesDeviceAvailable(firstQuery, queryCount)) {
|
||||||
|
|
||||||
id<MTLBlitCommandEncoder> mtlBlitCmdEnc = cmdEncoder->getMTLBlitEncoder(kMVKCommandUseCopyQueryPoolResults);
|
encodeDirectCopyResults(cmdEncoder, firstQuery, queryCount, destBuffer, destOffset, stride);
|
||||||
NSUInteger srcOffset;
|
|
||||||
id<MTLBuffer> srcBuff = getResultBuffer(cmdEncoder, firstQuery, queryCount, srcOffset);
|
|
||||||
[mtlBlitCmdEnc copyFromBuffer: srcBuff
|
|
||||||
sourceOffset: srcOffset
|
|
||||||
toBuffer: destBuffer->getMTLBuffer()
|
|
||||||
destinationOffset: destBuffer->getMTLBufferOffset() + destOffset
|
|
||||||
size: stride * queryCount];
|
|
||||||
// TODO: In the case where none of the queries is ready, we can fill with 0.
|
// TODO: In the case where none of the queries is ready, we can fill with 0.
|
||||||
} else {
|
} else {
|
||||||
id<MTLComputeCommandEncoder> mtlComputeCmdEnc = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseCopyQueryPoolResults);
|
|
||||||
id<MTLComputePipelineState> mtlCopyResultsState = cmdEncoder->getCommandEncodingPool()->getCmdCopyQueryPoolResultsMTLComputePipelineState();
|
id<MTLComputePipelineState> mtlCopyResultsState = cmdEncoder->getCommandEncodingPool()->getCmdCopyQueryPoolResultsMTLComputePipelineState();
|
||||||
|
id<MTLComputeCommandEncoder> mtlComputeCmdEnc = encodeComputeCopyResults(cmdEncoder, firstQuery, queryCount, 0);
|
||||||
[mtlComputeCmdEnc setComputePipelineState: mtlCopyResultsState];
|
[mtlComputeCmdEnc setComputePipelineState: mtlCopyResultsState];
|
||||||
encodeSetResultBuffer(cmdEncoder, firstQuery, queryCount, 0);
|
|
||||||
[mtlComputeCmdEnc setBuffer: destBuffer->getMTLBuffer()
|
[mtlComputeCmdEnc setBuffer: destBuffer->getMTLBuffer()
|
||||||
offset: destBuffer->getMTLBufferOffset() + destOffset
|
offset: destBuffer->getMTLBufferOffset() + destOffset
|
||||||
atIndex: 1];
|
atIndex: 1];
|
||||||
@ -183,6 +185,24 @@ void MVKQueryPool::encodeCopyResults(MVKCommandEncoder* cmdEncoder,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If this asked for 64-bit results with no availability and packed stride, then we can do a straight copy.
|
||||||
|
void MVKQueryPool::encodeDirectCopyResults(MVKCommandEncoder* cmdEncoder,
|
||||||
|
uint32_t firstQuery,
|
||||||
|
uint32_t queryCount,
|
||||||
|
MVKBuffer* destBuffer,
|
||||||
|
VkDeviceSize destOffset,
|
||||||
|
VkDeviceSize stride) {
|
||||||
|
|
||||||
|
id<MTLBlitCommandEncoder> mtlBlitCmdEnc = cmdEncoder->getMTLBlitEncoder(kMVKCommandUseCopyQueryPoolResults);
|
||||||
|
NSUInteger srcOffset;
|
||||||
|
id<MTLBuffer> srcBuff = getResultBuffer(cmdEncoder, firstQuery, queryCount, srcOffset);
|
||||||
|
[mtlBlitCmdEnc copyFromBuffer: srcBuff
|
||||||
|
sourceOffset: srcOffset
|
||||||
|
toBuffer: destBuffer->getMTLBuffer()
|
||||||
|
destinationOffset: destBuffer->getMTLBufferOffset() + destOffset
|
||||||
|
size: stride * queryCount];
|
||||||
|
}
|
||||||
|
|
||||||
void MVKQueryPool::deferCopyResults(uint32_t firstQuery,
|
void MVKQueryPool::deferCopyResults(uint32_t firstQuery,
|
||||||
uint32_t queryCount,
|
uint32_t queryCount,
|
||||||
MVKBuffer* destBuffer,
|
MVKBuffer* destBuffer,
|
||||||
@ -195,53 +215,6 @@ void MVKQueryPool::deferCopyResults(uint32_t firstQuery,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#pragma mark -
|
|
||||||
#pragma mark MVKTimestampQueryPool
|
|
||||||
|
|
||||||
void MVKTimestampQueryPool::endQuery(uint32_t query, MVKCommandEncoder* cmdEncoder) {
|
|
||||||
cmdEncoder->markTimestamp(this, query);
|
|
||||||
MVKQueryPool::endQuery(query, cmdEncoder);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Update timestamp values, then mark queries as available
|
|
||||||
void MVKTimestampQueryPool::finishQueries(const MVKArrayRef<uint32_t>& queries) {
|
|
||||||
uint64_t ts = mvkGetTimestamp();
|
|
||||||
for (uint32_t qry : queries) { _timestamps[qry] = ts; }
|
|
||||||
|
|
||||||
MVKQueryPool::finishQueries(queries);
|
|
||||||
}
|
|
||||||
|
|
||||||
void MVKTimestampQueryPool::getResult(uint32_t query, void* pQryData, bool shouldOutput64Bit) {
|
|
||||||
if (shouldOutput64Bit) {
|
|
||||||
*(uint64_t*)pQryData = _timestamps[query];
|
|
||||||
} else {
|
|
||||||
*(uint32_t*)pQryData = (uint32_t)_timestamps[query];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
id<MTLBuffer> MVKTimestampQueryPool::getResultBuffer(MVKCommandEncoder* cmdEncoder, uint32_t firstQuery, uint32_t queryCount, NSUInteger& offset) {
|
|
||||||
const MVKMTLBufferAllocation* tempBuff = cmdEncoder->getTempMTLBuffer(queryCount * _queryElementCount * sizeof(uint64_t));
|
|
||||||
void* pBuffData = tempBuff->getContents();
|
|
||||||
size_t size = queryCount * _queryElementCount * sizeof(uint64_t);
|
|
||||||
memcpy(pBuffData, &_timestamps[firstQuery], size);
|
|
||||||
offset = tempBuff->_offset;
|
|
||||||
return tempBuff->_mtlBuffer;
|
|
||||||
}
|
|
||||||
|
|
||||||
void MVKTimestampQueryPool::encodeSetResultBuffer(MVKCommandEncoder* cmdEncoder, uint32_t firstQuery, uint32_t queryCount, uint32_t index) {
|
|
||||||
// No need to create a temp buffer here.
|
|
||||||
cmdEncoder->setComputeBytes(cmdEncoder->getMTLComputeEncoder(kMVKCommandUseCopyQueryPoolResults), &_timestamps[firstQuery], queryCount * _queryElementCount * sizeof(uint64_t), index);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
#pragma mark Construction
|
|
||||||
|
|
||||||
MVKTimestampQueryPool::MVKTimestampQueryPool(MVKDevice* device,
|
|
||||||
const VkQueryPoolCreateInfo* pCreateInfo) :
|
|
||||||
MVKQueryPool(device, pCreateInfo, 1), _timestamps(pCreateInfo->queryCount, 0) {
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
#pragma mark -
|
#pragma mark -
|
||||||
#pragma mark MVKOcclusionQueryPool
|
#pragma mark MVKOcclusionQueryPool
|
||||||
|
|
||||||
@ -285,15 +258,11 @@ void MVKOcclusionQueryPool::resetResults(uint32_t firstQuery, uint32_t queryCoun
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void MVKOcclusionQueryPool::getResult(uint32_t query, void* pQryData, bool shouldOutput64Bit) {
|
NSData* MVKOcclusionQueryPool::getQuerySourceData(uint32_t firstQuery, uint32_t queryCount) {
|
||||||
NSUInteger mtlBuffOffset = getVisibilityResultOffset(query);
|
id<MTLBuffer> vizBuff = getVisibilityResultMTLBuffer();
|
||||||
uint64_t* pData = (uint64_t*)((uintptr_t)getVisibilityResultMTLBuffer().contents + mtlBuffOffset);
|
return [NSData dataWithBytesNoCopy: (void*)((uintptr_t)vizBuff.contents + getVisibilityResultOffset(firstQuery))
|
||||||
|
length: queryCount * kMVKQuerySlotSizeInBytes
|
||||||
if (shouldOutput64Bit) {
|
freeWhenDone: false];
|
||||||
*(uint64_t*)pQryData = *pData;
|
|
||||||
} else {
|
|
||||||
*(uint32_t*)pQryData = (uint32_t)(*pData);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
id<MTLBuffer> MVKOcclusionQueryPool::getResultBuffer(MVKCommandEncoder*, uint32_t firstQuery, uint32_t, NSUInteger& offset) {
|
id<MTLBuffer> MVKOcclusionQueryPool::getResultBuffer(MVKCommandEncoder*, uint32_t firstQuery, uint32_t, NSUInteger& offset) {
|
||||||
@ -301,10 +270,10 @@ id<MTLBuffer> MVKOcclusionQueryPool::getResultBuffer(MVKCommandEncoder*, uint32_
|
|||||||
return getVisibilityResultMTLBuffer();
|
return getVisibilityResultMTLBuffer();
|
||||||
}
|
}
|
||||||
|
|
||||||
void MVKOcclusionQueryPool::encodeSetResultBuffer(MVKCommandEncoder* cmdEncoder, uint32_t firstQuery, uint32_t, uint32_t index) {
|
id<MTLComputeCommandEncoder> MVKOcclusionQueryPool::encodeComputeCopyResults(MVKCommandEncoder* cmdEncoder, uint32_t firstQuery, uint32_t, uint32_t index) {
|
||||||
[cmdEncoder->getMTLComputeEncoder(kMVKCommandUseCopyQueryPoolResults) setBuffer: getVisibilityResultMTLBuffer()
|
id<MTLComputeCommandEncoder> mtlCmdEnc = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseCopyQueryPoolResults);
|
||||||
offset: getVisibilityResultOffset(firstQuery)
|
[mtlCmdEnc setBuffer: getVisibilityResultMTLBuffer() offset: getVisibilityResultOffset(firstQuery) atIndex: index];
|
||||||
atIndex: index];
|
return mtlCmdEnc;
|
||||||
}
|
}
|
||||||
|
|
||||||
void MVKOcclusionQueryPool::beginQueryAddedTo(uint32_t query, MVKCommandBuffer* cmdBuffer) {
|
void MVKOcclusionQueryPool::beginQueryAddedTo(uint32_t query, MVKCommandBuffer* cmdBuffer) {
|
||||||
@ -332,14 +301,12 @@ MVKOcclusionQueryPool::MVKOcclusionQueryPool(MVKDevice* device,
|
|||||||
_queryIndexOffset = 0;
|
_queryIndexOffset = 0;
|
||||||
|
|
||||||
// Ensure we don't overflow the maximum number of queries
|
// Ensure we don't overflow the maximum number of queries
|
||||||
uint32_t queryCount = pCreateInfo->queryCount;
|
VkDeviceSize reqBuffLen = (VkDeviceSize)pCreateInfo->queryCount * kMVKQuerySlotSizeInBytes;
|
||||||
VkDeviceSize reqBuffLen = (VkDeviceSize)queryCount * kMVKQuerySlotSizeInBytes;
|
|
||||||
VkDeviceSize maxBuffLen = _device->_pMetalFeatures->maxQueryBufferSize;
|
VkDeviceSize maxBuffLen = _device->_pMetalFeatures->maxQueryBufferSize;
|
||||||
VkDeviceSize newBuffLen = min(reqBuffLen, maxBuffLen);
|
VkDeviceSize newBuffLen = min(reqBuffLen, maxBuffLen);
|
||||||
queryCount = uint32_t(newBuffLen / kMVKQuerySlotSizeInBytes);
|
|
||||||
|
|
||||||
if (reqBuffLen > maxBuffLen) {
|
if (reqBuffLen > maxBuffLen) {
|
||||||
reportError(VK_ERROR_OUT_OF_DEVICE_MEMORY, "vkCreateQueryPool(): Each query pool can support a maximum of %d queries.", queryCount);
|
reportError(VK_ERROR_OUT_OF_DEVICE_MEMORY, "vkCreateQueryPool(): Each query pool can support a maximum of %d queries.", uint32_t(newBuffLen / kMVKQuerySlotSizeInBytes));
|
||||||
}
|
}
|
||||||
|
|
||||||
NSUInteger mtlBuffLen = mvkAlignByteCount(newBuffLen, _device->_pMetalFeatures->mtlBufferAlignment);
|
NSUInteger mtlBuffLen = mvkAlignByteCount(newBuffLen, _device->_pMetalFeatures->mtlBufferAlignment);
|
||||||
@ -357,11 +324,133 @@ MVKOcclusionQueryPool::~MVKOcclusionQueryPool() {
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
#pragma mark -
|
||||||
|
#pragma mark MVKGPUCounterQueryPool
|
||||||
|
|
||||||
|
MVKGPUCounterQueryPool::MVKGPUCounterQueryPool(MVKDevice* device, const VkQueryPoolCreateInfo* pCreateInfo) :
|
||||||
|
MVKQueryPool(device, pCreateInfo, 1), _mtlCounterBuffer(nil) {}
|
||||||
|
|
||||||
|
// To establish the Metal counter sample buffer, this must be called from the construtors
|
||||||
|
// of subclasses, because the type of MTLCounterSet is determined by the subclass.
|
||||||
|
void MVKGPUCounterQueryPool::initMTLCounterSampleBuffer(const VkQueryPoolCreateInfo* pCreateInfo,
|
||||||
|
id<MTLCounterSet> mtlCounterSet,
|
||||||
|
const char* queryTypeName) {
|
||||||
|
if ( !_device->_pMetalFeatures->counterSamplingPoints ) { return; }
|
||||||
|
|
||||||
|
@autoreleasepool {
|
||||||
|
MTLCounterSampleBufferDescriptor* tsDesc = [[[MTLCounterSampleBufferDescriptor alloc] init] autorelease];
|
||||||
|
tsDesc.counterSet = mtlCounterSet;
|
||||||
|
tsDesc.storageMode = MTLStorageModeShared;
|
||||||
|
tsDesc.sampleCount = pCreateInfo->queryCount;
|
||||||
|
|
||||||
|
NSError* err = nil;
|
||||||
|
_mtlCounterBuffer = [getMTLDevice() newCounterSampleBufferWithDescriptor: tsDesc error: &err];
|
||||||
|
if (err) {
|
||||||
|
setConfigurationResult(reportError(VK_ERROR_INITIALIZATION_FAILED,
|
||||||
|
"Could not create MTLCounterSampleBuffer for query pool of type %s. Reverting to emulated behavior. (Error code %li): %s",
|
||||||
|
queryTypeName, (long)err.code, err.localizedDescription.UTF8String));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
MVKGPUCounterQueryPool::~MVKGPUCounterQueryPool() {
|
||||||
|
[_mtlCounterBuffer release];
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#pragma mark -
|
||||||
|
#pragma mark MVKTimestampQueryPool
|
||||||
|
|
||||||
|
void MVKTimestampQueryPool::endQuery(uint32_t query, MVKCommandEncoder* cmdEncoder) {
|
||||||
|
cmdEncoder->markTimestamp(this, query);
|
||||||
|
MVKQueryPool::endQuery(query, cmdEncoder);
|
||||||
|
}
|
||||||
|
|
||||||
|
// If not using MTLCounterSampleBuffer, update timestamp values, then mark queries as available
|
||||||
|
void MVKTimestampQueryPool::finishQueries(const MVKArrayRef<uint32_t>& queries) {
|
||||||
|
if ( !_mtlCounterBuffer ) {
|
||||||
|
uint64_t ts = mvkGetTimestamp();
|
||||||
|
for (uint32_t qry : queries) { _timestamps[qry] = ts; }
|
||||||
|
}
|
||||||
|
MVKQueryPool::finishQueries(queries);
|
||||||
|
}
|
||||||
|
|
||||||
|
NSData* MVKTimestampQueryPool::getQuerySourceData(uint32_t firstQuery, uint32_t queryCount) {
|
||||||
|
if (_mtlCounterBuffer) {
|
||||||
|
return [_mtlCounterBuffer resolveCounterRange: NSMakeRange(firstQuery, queryCount)];
|
||||||
|
} else {
|
||||||
|
return [NSData dataWithBytesNoCopy: (void*)&_timestamps[firstQuery]
|
||||||
|
length: queryCount * kMVKQuerySlotSizeInBytes
|
||||||
|
freeWhenDone: false];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void MVKTimestampQueryPool::encodeDirectCopyResults(MVKCommandEncoder* cmdEncoder,
|
||||||
|
uint32_t firstQuery,
|
||||||
|
uint32_t queryCount,
|
||||||
|
MVKBuffer* destBuffer,
|
||||||
|
VkDeviceSize destOffset,
|
||||||
|
VkDeviceSize stride) {
|
||||||
|
if (_mtlCounterBuffer) {
|
||||||
|
id<MTLBlitCommandEncoder> mtlBlitCmdEnc = cmdEncoder->getMTLBlitEncoder(kMVKCommandUseCopyQueryPoolResults);
|
||||||
|
[mtlBlitCmdEnc resolveCounters: _mtlCounterBuffer
|
||||||
|
inRange: NSMakeRange(firstQuery, queryCount)
|
||||||
|
destinationBuffer: destBuffer->getMTLBuffer()
|
||||||
|
destinationOffset: destBuffer->getMTLBufferOffset() + destOffset];
|
||||||
|
} else {
|
||||||
|
MVKQueryPool::encodeDirectCopyResults(cmdEncoder, firstQuery, queryCount, destBuffer, destOffset, stride);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
id<MTLBuffer> MVKTimestampQueryPool::getResultBuffer(MVKCommandEncoder* cmdEncoder, uint32_t firstQuery, uint32_t queryCount, NSUInteger& offset) {
|
||||||
|
const MVKMTLBufferAllocation* tempBuff = cmdEncoder->getTempMTLBuffer(queryCount * _queryElementCount * sizeof(uint64_t));
|
||||||
|
void* pBuffData = tempBuff->getContents();
|
||||||
|
size_t size = queryCount * _queryElementCount * sizeof(uint64_t);
|
||||||
|
memcpy(pBuffData, &_timestamps[firstQuery], size);
|
||||||
|
offset = tempBuff->_offset;
|
||||||
|
return tempBuff->_mtlBuffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
id<MTLComputeCommandEncoder> MVKTimestampQueryPool::encodeComputeCopyResults(MVKCommandEncoder* cmdEncoder, uint32_t firstQuery, uint32_t queryCount, uint32_t index) {
|
||||||
|
if (_mtlCounterBuffer) {
|
||||||
|
// We first need to resolve from the MTLCounterSampleBuffer into a temp buffer using a
|
||||||
|
// MTLBlitCommandEncoder, before creating the compute encoder and set that temp buffer into it.
|
||||||
|
const MVKMTLBufferAllocation* tempBuff = cmdEncoder->getTempMTLBuffer(queryCount * _queryElementCount * sizeof(uint64_t));
|
||||||
|
id<MTLBlitCommandEncoder> mtlBlitCmdEnc = cmdEncoder->getMTLBlitEncoder(kMVKCommandUseCopyQueryPoolResults);
|
||||||
|
[mtlBlitCmdEnc resolveCounters: _mtlCounterBuffer
|
||||||
|
inRange: NSMakeRange(firstQuery, queryCount)
|
||||||
|
destinationBuffer: tempBuff->_mtlBuffer
|
||||||
|
destinationOffset: tempBuff->_offset];
|
||||||
|
|
||||||
|
id<MTLComputeCommandEncoder> mtlCmdEnc = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseCopyQueryPoolResults);
|
||||||
|
[mtlCmdEnc setBuffer: tempBuff->_mtlBuffer offset: tempBuff->_offset atIndex: index];
|
||||||
|
return mtlCmdEnc;
|
||||||
|
} else {
|
||||||
|
// We can set the timestamp bytes into the compute encoder.
|
||||||
|
id<MTLComputeCommandEncoder> mtlCmdEnc = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseCopyQueryPoolResults);
|
||||||
|
cmdEncoder->setComputeBytes(mtlCmdEnc, &_timestamps[firstQuery], queryCount * _queryElementCount * sizeof(uint64_t), index);
|
||||||
|
return mtlCmdEnc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#pragma mark Construction
|
||||||
|
|
||||||
|
MVKTimestampQueryPool::MVKTimestampQueryPool(MVKDevice* device, const VkQueryPoolCreateInfo* pCreateInfo) :
|
||||||
|
MVKGPUCounterQueryPool(device, pCreateInfo) {
|
||||||
|
|
||||||
|
initMTLCounterSampleBuffer(pCreateInfo, _device->getTimestampMTLCounterSet(), "VK_QUERY_TYPE_TIMESTAMP");
|
||||||
|
|
||||||
|
// If we don't use a MTLCounterSampleBuffer, allocate memory to hold the timestamps.
|
||||||
|
if ( !_mtlCounterBuffer ) { _timestamps.resize(pCreateInfo->queryCount, 0); }
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
#pragma mark -
|
#pragma mark -
|
||||||
#pragma mark MVKPipelineStatisticsQueryPool
|
#pragma mark MVKPipelineStatisticsQueryPool
|
||||||
|
|
||||||
MVKPipelineStatisticsQueryPool::MVKPipelineStatisticsQueryPool(MVKDevice* device,
|
MVKPipelineStatisticsQueryPool::MVKPipelineStatisticsQueryPool(MVKDevice* device,
|
||||||
const VkQueryPoolCreateInfo* pCreateInfo) : MVKQueryPool(device, pCreateInfo, 1) {
|
const VkQueryPoolCreateInfo* pCreateInfo) : MVKGPUCounterQueryPool(device, pCreateInfo) {
|
||||||
if ( !_device->_enabledFeatures.pipelineStatisticsQuery ) {
|
if ( !_device->_enabledFeatures.pipelineStatisticsQuery ) {
|
||||||
setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCreateQueryPool: VK_QUERY_TYPE_PIPELINE_STATISTICS is not supported."));
|
setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCreateQueryPool: VK_QUERY_TYPE_PIPELINE_STATISTICS is not supported."));
|
||||||
}
|
}
|
||||||
|
@ -91,7 +91,8 @@ typedef enum : uint8_t {
|
|||||||
kMVKCommandUseTessellationVertexTessCtl, /**< vkCmdDraw* - vertex and tessellation control stages. */
|
kMVKCommandUseTessellationVertexTessCtl, /**< vkCmdDraw* - vertex and tessellation control stages. */
|
||||||
kMVKCommandUseMultiviewInstanceCountAdjust, /**< vkCmdDrawIndirect* - adjust instance count for multiview. */
|
kMVKCommandUseMultiviewInstanceCountAdjust, /**< vkCmdDrawIndirect* - adjust instance count for multiview. */
|
||||||
kMVKCommandUseCopyQueryPoolResults, /**< vkCmdCopyQueryPoolResults. */
|
kMVKCommandUseCopyQueryPoolResults, /**< vkCmdCopyQueryPoolResults. */
|
||||||
kMVKCommandUseAccumOcclusionQuery /**< Any command terminating a Metal render pass with active visibility buffer. */
|
kMVKCommandUseAccumOcclusionQuery, /**< Any command terminating a Metal render pass with active visibility buffer. */
|
||||||
|
kMVKCommandUseRecordGPUCounterSample /**< Any command triggering the recording of a GPU counter sample. */
|
||||||
} MVKCommandUse;
|
} MVKCommandUse;
|
||||||
|
|
||||||
/** Represents a given stage of a graphics pipeline. */
|
/** Represents a given stage of a graphics pipeline. */
|
||||||
|
Loading…
x
Reference in New Issue
Block a user