Fix sync delay between calls to vkQueueSubmit() on non-Apple-Silicon devices.
The [MTLDevice sampleTimestamps:gpuTimestamp:] function turns out to be synchronized with other queue activities, and can block GPU execution if it is called between MTLCommandBuffer submissions. On non-Apple-Silicon devices, it was called before and after every vkQueueSubmit() submission, to track the correlation between GPU and CPU timestamps, and was delaying the start of GPU work on the next submission (on Apple Silicon, both CPU & GPU timestamps are specified in nanoseconds, and the call was bypassed). Move timestamp correlation from vkQueueSubmit() to vkGetPhysicalDeviceProperties(), where it is used to update VkPhysicalDeviceLimits::timestampPeriod on non-Apple-Silicon devices. Delegate MVKPhysicalDevice::getProperties(VkPhysicalDeviceProperties2*) to MVKPhysicalDevice::getProperties(VkPhysicalDeviceProperties*), plus minimize wasted effort if pNext is empty (unrelated). Move the declaration of several MVKPhysicalDevice member structs to potentially reduce member spacing (unrelated).
This commit is contained in:
parent
d9b32e0c75
commit
dd31587337
@ -363,6 +363,7 @@ Released 2022/02/07
|
|||||||
- Fix issue where *MSL 2.3* only available on *Apple Silicon*, even on *macOS*.
|
- Fix issue where *MSL 2.3* only available on *Apple Silicon*, even on *macOS*.
|
||||||
- Fix memory leak of dummy `MTLTexture` in render subpasses that use no attachments.
|
- Fix memory leak of dummy `MTLTexture` in render subpasses that use no attachments.
|
||||||
- Fix Metal object retain-release errors in assignment operators.
|
- Fix Metal object retain-release errors in assignment operators.
|
||||||
|
- Fix sync delay between calls to `vkQueueSubmit()` on non-Apple-Silicon devices.
|
||||||
- Fix use of GPU counter sets on older versions of iOS running on the simulator.
|
- Fix use of GPU counter sets on older versions of iOS running on the simulator.
|
||||||
- `mvk::getShaderOutputs()` in `SPRIVReflection.h` support flattening nested structures.
|
- `mvk::getShaderOutputs()` in `SPRIVReflection.h` support flattening nested structures.
|
||||||
- Replaced ASL logging levels with `MVKConfigLogLevel`.
|
- Replaced ASL logging levels with `MVKConfigLogLevel`.
|
||||||
|
@ -356,20 +356,6 @@ public:
|
|||||||
return _metalFeatures.argumentBuffers && mvkConfig().useMetalArgumentBuffers != MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS_NEVER;
|
return _metalFeatures.argumentBuffers && mvkConfig().useMetalArgumentBuffers != MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS_NEVER;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the start timestamps of a timestamp correlation.
|
|
||||||
* The returned values should be later passed back to updateTimestampPeriod().
|
|
||||||
*/
|
|
||||||
void startTimestampCorrelation(MTLTimestamp& cpuStart, MTLTimestamp& gpuStart);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Updates the current value of VkPhysicalDeviceLimits::timestampPeriod, based on the
|
|
||||||
* correlation between the CPU time tickes and GPU time ticks, from the specified start
|
|
||||||
* values, to the current values. The cpuStart and gpuStart values should have been
|
|
||||||
* retrieved from a prior call to startTimestampCorrelation().
|
|
||||||
*/
|
|
||||||
void updateTimestampPeriod(MTLTimestamp cpuStart, MTLTimestamp gpuStart);
|
|
||||||
|
|
||||||
|
|
||||||
#pragma mark Construction
|
#pragma mark Construction
|
||||||
|
|
||||||
@ -416,6 +402,7 @@ protected:
|
|||||||
void initExtensions();
|
void initExtensions();
|
||||||
void initCounterSets();
|
void initCounterSets();
|
||||||
bool needsCounterSetRetained();
|
bool needsCounterSetRetained();
|
||||||
|
void updateTimestampsAndPeriod();
|
||||||
MVKArrayRef<MVKQueueFamily*> getQueueFamilies();
|
MVKArrayRef<MVKQueueFamily*> getQueueFamilies();
|
||||||
void initPipelineCacheUUID();
|
void initPipelineCacheUUID();
|
||||||
uint32_t getHighestGPUCapability();
|
uint32_t getHighestGPUCapability();
|
||||||
@ -435,16 +422,18 @@ protected:
|
|||||||
VkPhysicalDeviceMemoryProperties _memoryProperties;
|
VkPhysicalDeviceMemoryProperties _memoryProperties;
|
||||||
MVKSmallVector<MVKQueueFamily*, kMVKQueueFamilyCount> _queueFamilies;
|
MVKSmallVector<MVKQueueFamily*, kMVKQueueFamilyCount> _queueFamilies;
|
||||||
MVKPixelFormats _pixelFormats;
|
MVKPixelFormats _pixelFormats;
|
||||||
|
VkExternalMemoryProperties _hostPointerExternalMemoryProperties;
|
||||||
|
VkExternalMemoryProperties _mtlBufferExternalMemoryProperties;
|
||||||
|
VkExternalMemoryProperties _mtlTextureExternalMemoryProperties;
|
||||||
id<MTLCounterSet> _timestampMTLCounterSet;
|
id<MTLCounterSet> _timestampMTLCounterSet;
|
||||||
MVKSemaphoreStyle _vkSemaphoreStyle;
|
MVKSemaphoreStyle _vkSemaphoreStyle;
|
||||||
|
MTLTimestamp _prevCPUTimestamp = 0;
|
||||||
|
MTLTimestamp _prevGPUTimestamp = 0;
|
||||||
uint32_t _allMemoryTypes;
|
uint32_t _allMemoryTypes;
|
||||||
uint32_t _hostVisibleMemoryTypes;
|
uint32_t _hostVisibleMemoryTypes;
|
||||||
uint32_t _hostCoherentMemoryTypes;
|
uint32_t _hostCoherentMemoryTypes;
|
||||||
uint32_t _privateMemoryTypes;
|
uint32_t _privateMemoryTypes;
|
||||||
uint32_t _lazilyAllocatedMemoryTypes;
|
uint32_t _lazilyAllocatedMemoryTypes;
|
||||||
VkExternalMemoryProperties _hostPointerExternalMemoryProperties;
|
|
||||||
VkExternalMemoryProperties _mtlBufferExternalMemoryProperties;
|
|
||||||
VkExternalMemoryProperties _mtlTextureExternalMemoryProperties;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -451,10 +451,17 @@ void MVKPhysicalDevice::getFeatures(VkPhysicalDeviceFeatures2* features) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void MVKPhysicalDevice::getProperties(VkPhysicalDeviceProperties* properties) {
|
void MVKPhysicalDevice::getProperties(VkPhysicalDeviceProperties* properties) {
|
||||||
|
updateTimestampsAndPeriod();
|
||||||
*properties = _properties;
|
*properties = _properties;
|
||||||
}
|
}
|
||||||
|
|
||||||
void MVKPhysicalDevice::getProperties(VkPhysicalDeviceProperties2* properties) {
|
void MVKPhysicalDevice::getProperties(VkPhysicalDeviceProperties2* properties) {
|
||||||
|
|
||||||
|
properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
|
||||||
|
getProperties(&properties->properties);
|
||||||
|
|
||||||
|
if ( !properties->pNext ) { return; }
|
||||||
|
|
||||||
uint32_t uintMax = std::numeric_limits<uint32_t>::max();
|
uint32_t uintMax = std::numeric_limits<uint32_t>::max();
|
||||||
uint32_t maxSamplerCnt = getMaxSamplerCount();
|
uint32_t maxSamplerCnt = getMaxSamplerCount();
|
||||||
bool isTier2 = supportsMetalArgumentBuffers() && (_metalFeatures.argumentBuffersTier >= MTLArgumentBuffersTier2);
|
bool isTier2 = supportsMetalArgumentBuffers() && (_metalFeatures.argumentBuffersTier >= MTLArgumentBuffersTier2);
|
||||||
@ -536,8 +543,6 @@ void MVKPhysicalDevice::getProperties(VkPhysicalDeviceProperties2* properties) {
|
|||||||
supportedProps12.maxTimelineSemaphoreValueDifference = std::numeric_limits<uint64_t>::max();
|
supportedProps12.maxTimelineSemaphoreValueDifference = std::numeric_limits<uint64_t>::max();
|
||||||
supportedProps12.framebufferIntegerColorSampleCounts = _metalFeatures.supportedSampleCounts;
|
supportedProps12.framebufferIntegerColorSampleCounts = _metalFeatures.supportedSampleCounts;
|
||||||
|
|
||||||
properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
|
|
||||||
properties->properties = _properties;
|
|
||||||
for (auto* next = (VkBaseOutStructure*)properties->pNext; next; next = next->pNext) {
|
for (auto* next = (VkBaseOutStructure*)properties->pNext; next; next = next->pNext) {
|
||||||
switch ((uint32_t)next->sType) {
|
switch ((uint32_t)next->sType) {
|
||||||
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES: {
|
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES: {
|
||||||
@ -1562,23 +1567,17 @@ VkResult MVKPhysicalDevice::getQueueFamilyProperties(uint32_t* pCount,
|
|||||||
return rslt;
|
return rslt;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Don't need to do this for Apple GPUs, where the GPU and CPU timestamps
|
// Mark both CPU and GPU timestamps, and if needed, update the timestamp period for this device.
|
||||||
// are the same, or if we're not using GPU timestamp counters.
|
// On Apple GPUs, the CPU & GPU timestamps are the same, and the timestamp period never changes.
|
||||||
void MVKPhysicalDevice::startTimestampCorrelation(MTLTimestamp& cpuStart, MTLTimestamp& gpuStart) {
|
void MVKPhysicalDevice::updateTimestampsAndPeriod() {
|
||||||
if (_properties.vendorID == kAppleVendorId || !_timestampMTLCounterSet) { return; }
|
if (_properties.vendorID == kAppleVendorId) {
|
||||||
[_mtlDevice sampleTimestamps: &cpuStart gpuTimestamp: &gpuStart];
|
_prevGPUTimestamp = _prevCPUTimestamp = mvkGetElapsedNanoseconds();
|
||||||
}
|
} else {
|
||||||
|
MTLTimestamp earlierCPUTs = _prevCPUTimestamp;
|
||||||
// Don't need to do this for Apple GPUs, where the GPU and CPU timestamps
|
MTLTimestamp earlierGPUTs = _prevGPUTimestamp;
|
||||||
// are the same, or if we're not using GPU timestamp counters.
|
[_mtlDevice sampleTimestamps: &_prevCPUTimestamp gpuTimestamp: &_prevGPUTimestamp];
|
||||||
void MVKPhysicalDevice::updateTimestampPeriod(MTLTimestamp cpuStart, MTLTimestamp gpuStart) {
|
_properties.limits.timestampPeriod = (double)(_prevCPUTimestamp - earlierCPUTs) / (double)(_prevGPUTimestamp - earlierGPUTs);
|
||||||
if (_properties.vendorID == kAppleVendorId || !_timestampMTLCounterSet) { return; }
|
}
|
||||||
|
|
||||||
MTLTimestamp cpuEnd;
|
|
||||||
MTLTimestamp gpuEnd;
|
|
||||||
[_mtlDevice sampleTimestamps: &cpuEnd gpuTimestamp: &gpuEnd];
|
|
||||||
|
|
||||||
_properties.limits.timestampPeriod = (double)(cpuEnd - cpuStart) / (double)(gpuEnd - gpuStart);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -2606,7 +2605,7 @@ void MVKPhysicalDevice::initLimits() {
|
|||||||
_properties.limits.optimalBufferCopyRowPitchAlignment = 1;
|
_properties.limits.optimalBufferCopyRowPitchAlignment = 1;
|
||||||
|
|
||||||
_properties.limits.timestampComputeAndGraphics = VK_TRUE;
|
_properties.limits.timestampComputeAndGraphics = VK_TRUE;
|
||||||
_properties.limits.timestampPeriod = _metalFeatures.counterSamplingPoints ? 1.0 : mvkGetTimestampPeriod();
|
_properties.limits.timestampPeriod = mvkGetTimestampPeriod(); // Will be 1.0 on Apple Silicon
|
||||||
|
|
||||||
_properties.limits.pointSizeRange[0] = 1;
|
_properties.limits.pointSizeRange[0] = 1;
|
||||||
switch (_properties.vendorID) {
|
switch (_properties.vendorID) {
|
||||||
|
@ -255,11 +255,8 @@ public:
|
|||||||
|
|
||||||
protected:
|
protected:
|
||||||
void submitCommandBuffers() override;
|
void submitCommandBuffers() override;
|
||||||
void finish() override;
|
|
||||||
|
|
||||||
MVKSmallVector<MVKCommandBuffer*, N> _cmdBuffers;
|
MVKSmallVector<MVKCommandBuffer*, N> _cmdBuffers;
|
||||||
MTLTimestamp _cpuStart = 0;
|
|
||||||
MTLTimestamp _gpuStart = 0;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -524,16 +524,9 @@ MVKQueueCommandBufferSubmission::~MVKQueueCommandBufferSubmission() {
|
|||||||
|
|
||||||
template <size_t N>
|
template <size_t N>
|
||||||
void MVKQueueFullCommandBufferSubmission<N>::submitCommandBuffers() {
|
void MVKQueueFullCommandBufferSubmission<N>::submitCommandBuffers() {
|
||||||
_queue->getPhysicalDevice()->startTimestampCorrelation(_cpuStart, _gpuStart);
|
|
||||||
for (auto& cb : _cmdBuffers) { cb->submit(this, &_encodingContext); }
|
for (auto& cb : _cmdBuffers) { cb->submit(this, &_encodingContext); }
|
||||||
}
|
}
|
||||||
|
|
||||||
template <size_t N>
|
|
||||||
void MVKQueueFullCommandBufferSubmission<N>::finish() {
|
|
||||||
_queue->getPhysicalDevice()->updateTimestampPeriod(_cpuStart, _gpuStart);
|
|
||||||
MVKQueueCommandBufferSubmission::finish();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
#pragma mark -
|
#pragma mark -
|
||||||
#pragma mark MVKQueuePresentSurfaceSubmission
|
#pragma mark MVKQueuePresentSurfaceSubmission
|
||||||
|
Loading…
x
Reference in New Issue
Block a user