diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md index fd790514..3239b8d3 100644 --- a/Docs/Whats_New.md +++ b/Docs/Whats_New.md @@ -21,6 +21,7 @@ Released TBD - Add support for extensions: - `VK_KHR_device_group` - Add support for `VkEvent`, using either native `MTLEvent` or emulation when `MTLEvent` not available. +- `vkInvalidateMappedMemoryRanges()` synchronizes managed device memory to CPU. - Revert to supporting host-coherent memory for linear images on macOS. - Ensure Vulkan loader magic number is set every time before returning any dispatchable Vulkan handle. - Fix crash when `VkDeviceCreateInfo` specifies queue families out of numerical order. diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h index 5f267df8..b4a7461e 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h @@ -360,6 +360,11 @@ protected: #pragma mark - #pragma mark MVKDevice +typedef struct { + id mtlBlitEncoder = nil; + id mtlCmdBuffer = nil; +} MVKMTLBlitEncoder; + /** Represents a Vulkan logical GPU device, associated with a physical device. */ class MVKDevice : public MVKDispatchableVulkanAPIObject { @@ -387,7 +392,7 @@ public: PFN_vkVoidFunction getProcAddr(const char* pName); /** Retrieves a queue at the specified index within the specified family. */ - MVKQueue* getQueue(uint32_t queueFamilyIndex, uint32_t queueIndex); + MVKQueue* getQueue(uint32_t queueFamilyIndex = 0, uint32_t queueIndex = 0); /** Block the current thread until all queues in this device are idle. */ VkResult waitIdle(); @@ -528,6 +533,9 @@ public: void freeMemory(MVKDeviceMemory* mvkDevMem, const VkAllocationCallbacks* pAllocator); + +#pragma mark Operations + /** Applies the specified global memory barrier to all resource issued by this device. */ void applyMemoryBarrier(VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask, @@ -565,6 +573,9 @@ public: /** Populates the specified statistics structure from the current activity performance statistics. */ void getPerformanceStatistics(MVKPerformanceStatistics* pPerf); + /** Invalidates the memory regions. */ + VkResult invalidateMappedMemoryRanges(uint32_t memRangeCount, const VkMappedMemoryRange* pMemRanges); + #pragma mark Metal diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm index ff3bf4c6..170a6c4a 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm @@ -2122,6 +2122,9 @@ void MVKDevice::freeMemory(MVKDeviceMemory* mvkDevMem, mvkDevMem->destroy(); } + +#pragma mark Operations + // Adds the specified resource for tracking, and returns the added resource. MVKResource* MVKDevice::addResource(MVKResource* rez) { lock_guard lock(_rezLock); @@ -2197,6 +2200,25 @@ void MVKDevice::getPerformanceStatistics(MVKPerformanceStatistics* pPerf) { if (pPerf) { *pPerf = _performanceStatistics; } } +VkResult MVKDevice::invalidateMappedMemoryRanges(uint32_t memRangeCount, const VkMappedMemoryRange* pMemRanges) { + @autoreleasepool { + VkResult rslt = VK_SUCCESS; + MVKMTLBlitEncoder mvkBlitEnc; + for (uint32_t i = 0; i < memRangeCount; i++) { + const VkMappedMemoryRange* pMem = &pMemRanges[i]; + MVKDeviceMemory* mvkMem = (MVKDeviceMemory*)pMem->memory; + VkResult r = mvkMem->pullFromDevice(pMem->offset, pMem->size, false, &mvkBlitEnc); + if (rslt == VK_SUCCESS) { rslt = r; } + } + if (mvkBlitEnc.mtlBlitEncoder) { [mvkBlitEnc.mtlBlitEncoder endEncoding]; } + if (mvkBlitEnc.mtlCmdBuffer) { + [mvkBlitEnc.mtlCmdBuffer commit]; + [mvkBlitEnc.mtlCmdBuffer waitUntilCompleted]; + } + return rslt; + } +} + #pragma mark Metal diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.h b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.h index 9d468ef4..557c51c0 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.h @@ -85,8 +85,18 @@ public: * If this memory is host-visible, pulls the specified memory range from the device. * Normally, pulling will only occur if the device memory is non-coherent, but pulling * to coherent memory can be forced by setting evenIfCoherent to true. + * + * If pBlitEnc is not null, it points to a holder for a MTLBlitCommandEncoder and its + * assocated MTLCommandBuffer. If this instance has a MTLBuffer using managed memory, + * this function may call synchronizeResource: on the MTLBlitCommandEncoder to + * synchronize the GPU contents to the CPU. If the contents of the pBlitEnc do not + * include a MTLBlitCommandEncoder and MTLCommandBuffer, this function will create + * them and populate the contents into the MVKMTLBlitEncoder struct. */ - VkResult pullFromDevice(VkDeviceSize offset, VkDeviceSize size, bool evenIfCoherent = false); + VkResult pullFromDevice(VkDeviceSize offset, + VkDeviceSize size, + bool evenIfCoherent = false, + MVKMTLBlitEncoder* pBlitEnc = nullptr); #pragma mark Metal diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.mm index ee4aedf6..5d5dd092 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.mm @@ -19,6 +19,7 @@ #include "MVKDeviceMemory.h" #include "MVKBuffer.h" #include "MVKImage.h" +#include "MVKQueue.h" #include "MVKEnvironment.h" #include "mvk_datatypes.hpp" #include "MVKFoundation.h" @@ -91,12 +92,24 @@ VkResult MVKDeviceMemory::flushToDevice(VkDeviceSize offset, VkDeviceSize size, return VK_SUCCESS; } -VkResult MVKDeviceMemory::pullFromDevice(VkDeviceSize offset, VkDeviceSize size, bool evenIfCoherent) { +VkResult MVKDeviceMemory::pullFromDevice(VkDeviceSize offset, + VkDeviceSize size, + bool evenIfCoherent, + MVKMTLBlitEncoder* pBlitEnc) { // Coherent memory is flushed on unmap(), so it is only flushed if forced VkDeviceSize memSize = adjustMemorySize(size, offset); if (memSize > 0 && isMemoryHostAccessible() && (evenIfCoherent || !isMemoryHostCoherent()) ) { lock_guard lock(_rezLock); for (auto& img : _images) { img->pullFromDevice(offset, memSize); } + +#if MVK_MACOS + if (pBlitEnc && _mtlBuffer && _mtlStorageMode == MTLStorageModeManaged) { + if ( !pBlitEnc->mtlCmdBuffer) { pBlitEnc->mtlCmdBuffer = [_device->getQueue()->getMTLCommandQueue() commandBufferWithUnretainedReferences]; } + if ( !pBlitEnc->mtlBlitEncoder) { pBlitEnc->mtlBlitEncoder = [pBlitEnc->mtlCmdBuffer blitCommandEncoder]; } + [pBlitEnc->mtlBlitEncoder synchronizeResource: _mtlBuffer]; + } +#endif + } return VK_SUCCESS; } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm index e1e6467e..b10aac6d 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm @@ -166,14 +166,10 @@ void MVKSwapchain::signalWhenAvailable(uint32_t imageIndex, MVKSemaphore* semaph signal(signaler); if (_device->_useMTLEventsForSemaphores) { // Unfortunately, we can't assume we have an MTLSharedEvent here. - // This means we need to execute a command on the device to signal - // the semaphore. Alternatively, we could always use an MTLSharedEvent, - // but that might impose unacceptable performance costs just to handle - // this one case. - MVKQueue* queue = _device->getQueue(0, 0); - id mtlQ = queue->getMTLCommandQueue(); - id mtlCmdBuff = [mtlQ commandBufferWithUnretainedReferences]; - [mtlCmdBuff enqueue]; + // This means we need to execute a command on the device to signal the semaphore. + // Alternatively, we could always use an MTLSharedEvent, but that might impose + // unacceptable performance costs just to handle this one case. + id mtlCmdBuff = [_device->getQueue()->getMTLCommandQueue() commandBufferWithUnretainedReferences]; signaler.first->encodeSignal(mtlCmdBuff); [mtlCmdBuff commit]; } diff --git a/MoltenVK/MoltenVK/Vulkan/vulkan.mm b/MoltenVK/MoltenVK/Vulkan/vulkan.mm index e44f3684..a1e2fc9a 100644 --- a/MoltenVK/MoltenVK/Vulkan/vulkan.mm +++ b/MoltenVK/MoltenVK/Vulkan/vulkan.mm @@ -429,15 +429,10 @@ MVK_PUBLIC_SYMBOL VkResult vkInvalidateMappedMemoryRanges( VkDevice device, uint32_t memRangeCount, const VkMappedMemoryRange* pMemRanges) { - + MVKTraceVulkanCallStart(); - VkResult rslt = VK_SUCCESS; - for (uint32_t i = 0; i < memRangeCount; i++) { - const VkMappedMemoryRange* pMem = &pMemRanges[i]; - MVKDeviceMemory* mvkMem = (MVKDeviceMemory*)pMem->memory; - VkResult r = mvkMem->pullFromDevice(pMem->offset, pMem->size); - if (rslt == VK_SUCCESS) { rslt = r; } - } + MVKDevice* mvkDev = MVKDevice::getMVKDevice(device); + VkResult rslt = mvkDev->invalidateMappedMemoryRanges(memRangeCount, pMemRanges); MVKTraceVulkanCallEnd(); return rslt; }