Merge pull request #1992 from billhollings/fix-submit-sync-delay
Fix sync delay between calls to vkQueueSubmit() on non-Apple-Silicon devices.
This commit is contained in:
commit
8518a4f8db
@ -363,6 +363,7 @@ Released 2022/02/07
|
||||
- Fix issue where *MSL 2.3* only available on *Apple Silicon*, even on *macOS*.
|
||||
- Fix memory leak of dummy `MTLTexture` in render subpasses that use no attachments.
|
||||
- Fix Metal object retain-release errors in assignment operators.
|
||||
- Fix sync delay between calls to `vkQueueSubmit()` on non-Apple-Silicon devices.
|
||||
- Fix use of GPU counter sets on older versions of iOS running on the simulator.
|
||||
- `mvk::getShaderOutputs()` in `SPRIVReflection.h` support flattening nested structures.
|
||||
- Replaced ASL logging levels with `MVKConfigLogLevel`.
|
||||
|
@ -356,20 +356,6 @@ public:
|
||||
return _metalFeatures.argumentBuffers && mvkConfig().useMetalArgumentBuffers != MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS_NEVER;
|
||||
};
|
||||
|
||||
/**
|
||||
* Returns the start timestamps of a timestamp correlation.
|
||||
* The returned values should be later passed back to updateTimestampPeriod().
|
||||
*/
|
||||
void startTimestampCorrelation(MTLTimestamp& cpuStart, MTLTimestamp& gpuStart);
|
||||
|
||||
/**
|
||||
* Updates the current value of VkPhysicalDeviceLimits::timestampPeriod, based on the
|
||||
* correlation between the CPU time tickes and GPU time ticks, from the specified start
|
||||
* values, to the current values. The cpuStart and gpuStart values should have been
|
||||
* retrieved from a prior call to startTimestampCorrelation().
|
||||
*/
|
||||
void updateTimestampPeriod(MTLTimestamp cpuStart, MTLTimestamp gpuStart);
|
||||
|
||||
|
||||
#pragma mark Construction
|
||||
|
||||
@ -416,6 +402,7 @@ protected:
|
||||
void initExtensions();
|
||||
void initCounterSets();
|
||||
bool needsCounterSetRetained();
|
||||
void updateTimestampsAndPeriod();
|
||||
MVKArrayRef<MVKQueueFamily*> getQueueFamilies();
|
||||
void initPipelineCacheUUID();
|
||||
uint32_t getHighestGPUCapability();
|
||||
@ -435,16 +422,18 @@ protected:
|
||||
VkPhysicalDeviceMemoryProperties _memoryProperties;
|
||||
MVKSmallVector<MVKQueueFamily*, kMVKQueueFamilyCount> _queueFamilies;
|
||||
MVKPixelFormats _pixelFormats;
|
||||
VkExternalMemoryProperties _hostPointerExternalMemoryProperties;
|
||||
VkExternalMemoryProperties _mtlBufferExternalMemoryProperties;
|
||||
VkExternalMemoryProperties _mtlTextureExternalMemoryProperties;
|
||||
id<MTLCounterSet> _timestampMTLCounterSet;
|
||||
MVKSemaphoreStyle _vkSemaphoreStyle;
|
||||
MTLTimestamp _prevCPUTimestamp = 0;
|
||||
MTLTimestamp _prevGPUTimestamp = 0;
|
||||
uint32_t _allMemoryTypes;
|
||||
uint32_t _hostVisibleMemoryTypes;
|
||||
uint32_t _hostCoherentMemoryTypes;
|
||||
uint32_t _privateMemoryTypes;
|
||||
uint32_t _lazilyAllocatedMemoryTypes;
|
||||
VkExternalMemoryProperties _hostPointerExternalMemoryProperties;
|
||||
VkExternalMemoryProperties _mtlBufferExternalMemoryProperties;
|
||||
VkExternalMemoryProperties _mtlTextureExternalMemoryProperties;
|
||||
};
|
||||
|
||||
|
||||
|
@ -451,10 +451,17 @@ void MVKPhysicalDevice::getFeatures(VkPhysicalDeviceFeatures2* features) {
|
||||
}
|
||||
|
||||
void MVKPhysicalDevice::getProperties(VkPhysicalDeviceProperties* properties) {
|
||||
updateTimestampsAndPeriod();
|
||||
*properties = _properties;
|
||||
}
|
||||
|
||||
void MVKPhysicalDevice::getProperties(VkPhysicalDeviceProperties2* properties) {
|
||||
|
||||
properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
|
||||
getProperties(&properties->properties);
|
||||
|
||||
if ( !properties->pNext ) { return; }
|
||||
|
||||
uint32_t uintMax = std::numeric_limits<uint32_t>::max();
|
||||
uint32_t maxSamplerCnt = getMaxSamplerCount();
|
||||
bool isTier2 = supportsMetalArgumentBuffers() && (_metalFeatures.argumentBuffersTier >= MTLArgumentBuffersTier2);
|
||||
@ -536,8 +543,6 @@ void MVKPhysicalDevice::getProperties(VkPhysicalDeviceProperties2* properties) {
|
||||
supportedProps12.maxTimelineSemaphoreValueDifference = std::numeric_limits<uint64_t>::max();
|
||||
supportedProps12.framebufferIntegerColorSampleCounts = _metalFeatures.supportedSampleCounts;
|
||||
|
||||
properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
|
||||
properties->properties = _properties;
|
||||
for (auto* next = (VkBaseOutStructure*)properties->pNext; next; next = next->pNext) {
|
||||
switch ((uint32_t)next->sType) {
|
||||
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES: {
|
||||
@ -1562,23 +1567,17 @@ VkResult MVKPhysicalDevice::getQueueFamilyProperties(uint32_t* pCount,
|
||||
return rslt;
|
||||
}
|
||||
|
||||
// Don't need to do this for Apple GPUs, where the GPU and CPU timestamps
|
||||
// are the same, or if we're not using GPU timestamp counters.
|
||||
void MVKPhysicalDevice::startTimestampCorrelation(MTLTimestamp& cpuStart, MTLTimestamp& gpuStart) {
|
||||
if (_properties.vendorID == kAppleVendorId || !_timestampMTLCounterSet) { return; }
|
||||
[_mtlDevice sampleTimestamps: &cpuStart gpuTimestamp: &gpuStart];
|
||||
}
|
||||
|
||||
// Don't need to do this for Apple GPUs, where the GPU and CPU timestamps
|
||||
// are the same, or if we're not using GPU timestamp counters.
|
||||
void MVKPhysicalDevice::updateTimestampPeriod(MTLTimestamp cpuStart, MTLTimestamp gpuStart) {
|
||||
if (_properties.vendorID == kAppleVendorId || !_timestampMTLCounterSet) { return; }
|
||||
|
||||
MTLTimestamp cpuEnd;
|
||||
MTLTimestamp gpuEnd;
|
||||
[_mtlDevice sampleTimestamps: &cpuEnd gpuTimestamp: &gpuEnd];
|
||||
|
||||
_properties.limits.timestampPeriod = (double)(cpuEnd - cpuStart) / (double)(gpuEnd - gpuStart);
|
||||
// Mark both CPU and GPU timestamps, and if needed, update the timestamp period for this device.
|
||||
// On Apple GPUs, the CPU & GPU timestamps are the same, and the timestamp period never changes.
|
||||
void MVKPhysicalDevice::updateTimestampsAndPeriod() {
|
||||
if (_properties.vendorID == kAppleVendorId) {
|
||||
_prevGPUTimestamp = _prevCPUTimestamp = mvkGetElapsedNanoseconds();
|
||||
} else {
|
||||
MTLTimestamp earlierCPUTs = _prevCPUTimestamp;
|
||||
MTLTimestamp earlierGPUTs = _prevGPUTimestamp;
|
||||
[_mtlDevice sampleTimestamps: &_prevCPUTimestamp gpuTimestamp: &_prevGPUTimestamp];
|
||||
_properties.limits.timestampPeriod = (double)(_prevCPUTimestamp - earlierCPUTs) / (double)(_prevGPUTimestamp - earlierGPUTs);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -2606,7 +2605,7 @@ void MVKPhysicalDevice::initLimits() {
|
||||
_properties.limits.optimalBufferCopyRowPitchAlignment = 1;
|
||||
|
||||
_properties.limits.timestampComputeAndGraphics = VK_TRUE;
|
||||
_properties.limits.timestampPeriod = _metalFeatures.counterSamplingPoints ? 1.0 : mvkGetTimestampPeriod();
|
||||
_properties.limits.timestampPeriod = mvkGetTimestampPeriod(); // Will be 1.0 on Apple Silicon
|
||||
|
||||
_properties.limits.pointSizeRange[0] = 1;
|
||||
switch (_properties.vendorID) {
|
||||
|
@ -255,11 +255,8 @@ public:
|
||||
|
||||
protected:
|
||||
void submitCommandBuffers() override;
|
||||
void finish() override;
|
||||
|
||||
MVKSmallVector<MVKCommandBuffer*, N> _cmdBuffers;
|
||||
MTLTimestamp _cpuStart = 0;
|
||||
MTLTimestamp _gpuStart = 0;
|
||||
};
|
||||
|
||||
|
||||
|
@ -524,16 +524,9 @@ MVKQueueCommandBufferSubmission::~MVKQueueCommandBufferSubmission() {
|
||||
|
||||
template <size_t N>
|
||||
void MVKQueueFullCommandBufferSubmission<N>::submitCommandBuffers() {
|
||||
_queue->getPhysicalDevice()->startTimestampCorrelation(_cpuStart, _gpuStart);
|
||||
for (auto& cb : _cmdBuffers) { cb->submit(this, &_encodingContext); }
|
||||
}
|
||||
|
||||
template <size_t N>
|
||||
void MVKQueueFullCommandBufferSubmission<N>::finish() {
|
||||
_queue->getPhysicalDevice()->updateTimestampPeriod(_cpuStart, _gpuStart);
|
||||
MVKQueueCommandBufferSubmission::finish();
|
||||
}
|
||||
|
||||
|
||||
#pragma mark -
|
||||
#pragma mark MVKQueuePresentSurfaceSubmission
|
||||
|
Loading…
x
Reference in New Issue
Block a user