diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md index 9e0fc42a..d473ae91 100644 --- a/Docs/Whats_New.md +++ b/Docs/Whats_New.md @@ -363,6 +363,7 @@ Released 2022/02/07 - Fix issue where *MSL 2.3* only available on *Apple Silicon*, even on *macOS*. - Fix memory leak of dummy `MTLTexture` in render subpasses that use no attachments. - Fix Metal object retain-release errors in assignment operators. +- Fix sync delay between calls to `vkQueueSubmit()` on non-Apple-Silicon devices. - Fix use of GPU counter sets on older versions of iOS running on the simulator. - `mvk::getShaderOutputs()` in `SPRIVReflection.h` support flattening nested structures. - Replaced ASL logging levels with `MVKConfigLogLevel`. diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h index 5186fe4a..450fad66 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h @@ -356,20 +356,6 @@ public: return _metalFeatures.argumentBuffers && mvkConfig().useMetalArgumentBuffers != MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS_NEVER; }; - /** - * Returns the start timestamps of a timestamp correlation. - * The returned values should be later passed back to updateTimestampPeriod(). - */ - void startTimestampCorrelation(MTLTimestamp& cpuStart, MTLTimestamp& gpuStart); - - /** - * Updates the current value of VkPhysicalDeviceLimits::timestampPeriod, based on the - * correlation between the CPU time tickes and GPU time ticks, from the specified start - * values, to the current values. The cpuStart and gpuStart values should have been - * retrieved from a prior call to startTimestampCorrelation(). - */ - void updateTimestampPeriod(MTLTimestamp cpuStart, MTLTimestamp gpuStart); - #pragma mark Construction @@ -416,6 +402,7 @@ protected: void initExtensions(); void initCounterSets(); bool needsCounterSetRetained(); + void updateTimestampsAndPeriod(); MVKArrayRef getQueueFamilies(); void initPipelineCacheUUID(); uint32_t getHighestGPUCapability(); @@ -435,16 +422,18 @@ protected: VkPhysicalDeviceMemoryProperties _memoryProperties; MVKSmallVector _queueFamilies; MVKPixelFormats _pixelFormats; + VkExternalMemoryProperties _hostPointerExternalMemoryProperties; + VkExternalMemoryProperties _mtlBufferExternalMemoryProperties; + VkExternalMemoryProperties _mtlTextureExternalMemoryProperties; id _timestampMTLCounterSet; MVKSemaphoreStyle _vkSemaphoreStyle; + MTLTimestamp _prevCPUTimestamp = 0; + MTLTimestamp _prevGPUTimestamp = 0; uint32_t _allMemoryTypes; uint32_t _hostVisibleMemoryTypes; uint32_t _hostCoherentMemoryTypes; uint32_t _privateMemoryTypes; uint32_t _lazilyAllocatedMemoryTypes; - VkExternalMemoryProperties _hostPointerExternalMemoryProperties; - VkExternalMemoryProperties _mtlBufferExternalMemoryProperties; - VkExternalMemoryProperties _mtlTextureExternalMemoryProperties; }; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm index 3c77519d..59f90df3 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm @@ -451,10 +451,17 @@ void MVKPhysicalDevice::getFeatures(VkPhysicalDeviceFeatures2* features) { } void MVKPhysicalDevice::getProperties(VkPhysicalDeviceProperties* properties) { + updateTimestampsAndPeriod(); *properties = _properties; } void MVKPhysicalDevice::getProperties(VkPhysicalDeviceProperties2* properties) { + + properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; + getProperties(&properties->properties); + + if ( !properties->pNext ) { return; } + uint32_t uintMax = std::numeric_limits::max(); uint32_t maxSamplerCnt = getMaxSamplerCount(); bool isTier2 = supportsMetalArgumentBuffers() && (_metalFeatures.argumentBuffersTier >= MTLArgumentBuffersTier2); @@ -536,8 +543,6 @@ void MVKPhysicalDevice::getProperties(VkPhysicalDeviceProperties2* properties) { supportedProps12.maxTimelineSemaphoreValueDifference = std::numeric_limits::max(); supportedProps12.framebufferIntegerColorSampleCounts = _metalFeatures.supportedSampleCounts; - properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; - properties->properties = _properties; for (auto* next = (VkBaseOutStructure*)properties->pNext; next; next = next->pNext) { switch ((uint32_t)next->sType) { case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES: { @@ -1562,23 +1567,17 @@ VkResult MVKPhysicalDevice::getQueueFamilyProperties(uint32_t* pCount, return rslt; } -// Don't need to do this for Apple GPUs, where the GPU and CPU timestamps -// are the same, or if we're not using GPU timestamp counters. -void MVKPhysicalDevice::startTimestampCorrelation(MTLTimestamp& cpuStart, MTLTimestamp& gpuStart) { - if (_properties.vendorID == kAppleVendorId || !_timestampMTLCounterSet) { return; } - [_mtlDevice sampleTimestamps: &cpuStart gpuTimestamp: &gpuStart]; -} - -// Don't need to do this for Apple GPUs, where the GPU and CPU timestamps -// are the same, or if we're not using GPU timestamp counters. -void MVKPhysicalDevice::updateTimestampPeriod(MTLTimestamp cpuStart, MTLTimestamp gpuStart) { - if (_properties.vendorID == kAppleVendorId || !_timestampMTLCounterSet) { return; } - - MTLTimestamp cpuEnd; - MTLTimestamp gpuEnd; - [_mtlDevice sampleTimestamps: &cpuEnd gpuTimestamp: &gpuEnd]; - - _properties.limits.timestampPeriod = (double)(cpuEnd - cpuStart) / (double)(gpuEnd - gpuStart); +// Mark both CPU and GPU timestamps, and if needed, update the timestamp period for this device. +// On Apple GPUs, the CPU & GPU timestamps are the same, and the timestamp period never changes. +void MVKPhysicalDevice::updateTimestampsAndPeriod() { + if (_properties.vendorID == kAppleVendorId) { + _prevGPUTimestamp = _prevCPUTimestamp = mvkGetElapsedNanoseconds(); + } else { + MTLTimestamp earlierCPUTs = _prevCPUTimestamp; + MTLTimestamp earlierGPUTs = _prevGPUTimestamp; + [_mtlDevice sampleTimestamps: &_prevCPUTimestamp gpuTimestamp: &_prevGPUTimestamp]; + _properties.limits.timestampPeriod = (double)(_prevCPUTimestamp - earlierCPUTs) / (double)(_prevGPUTimestamp - earlierGPUTs); + } } @@ -2606,7 +2605,7 @@ void MVKPhysicalDevice::initLimits() { _properties.limits.optimalBufferCopyRowPitchAlignment = 1; _properties.limits.timestampComputeAndGraphics = VK_TRUE; - _properties.limits.timestampPeriod = _metalFeatures.counterSamplingPoints ? 1.0 : mvkGetTimestampPeriod(); + _properties.limits.timestampPeriod = mvkGetTimestampPeriod(); // Will be 1.0 on Apple Silicon _properties.limits.pointSizeRange[0] = 1; switch (_properties.vendorID) { diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h index 96d77bc1..bcefd2f3 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h @@ -255,11 +255,8 @@ public: protected: void submitCommandBuffers() override; - void finish() override; MVKSmallVector _cmdBuffers; - MTLTimestamp _cpuStart = 0; - MTLTimestamp _gpuStart = 0; }; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm index 96b34cc3..0ad14307 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm @@ -524,16 +524,9 @@ MVKQueueCommandBufferSubmission::~MVKQueueCommandBufferSubmission() { template void MVKQueueFullCommandBufferSubmission::submitCommandBuffers() { - _queue->getPhysicalDevice()->startTimestampCorrelation(_cpuStart, _gpuStart); for (auto& cb : _cmdBuffers) { cb->submit(this, &_encodingContext); } } -template -void MVKQueueFullCommandBufferSubmission::finish() { - _queue->getPhysicalDevice()->updateTimestampPeriod(_cpuStart, _gpuStart); - MVKQueueCommandBufferSubmission::finish(); -} - #pragma mark - #pragma mark MVKQueuePresentSurfaceSubmission