VK_EXT_host_image_copy: On discrete GPUs, sync managed-memory textures before copying.

Discrete GPUs use managed-memory textures, and these need to be synchronized
from GPU memory before being available for host-copying to memory using the CPU.
Metal automatically handles the reverse sync when copying from memory to a texture.
This commit is contained in:
Bill Hollings 2024-04-23 14:51:31 -04:00
parent 2290e86cd9
commit b6735878f3
7 changed files with 60 additions and 22 deletions

View File

@ -256,11 +256,14 @@ public:
/** Flush underlying buffer memory into the image if necessary */
void flushToDevice(VkDeviceSize offset, VkDeviceSize size);
/** Host-copy the content of this image to or from memory using the CPU. */
template<typename CopyInfo> VkResult copyContent(const CopyInfo* pCopyInfo);
/** Host-copy the content of an image to another using the CPU. */
static VkResult copyImageToImage(const VkCopyImageToImageInfoEXT* pCopyImageToImageInfo);
/** Host-copy the content of one image to another using the CPU. */
static VkResult copyContent(const VkCopyImageToImageInfoEXT* pCopyImageToImageInfo);
/** Host-copy the content of an image to memory using the CPU. */
VkResult copyImageToMemory(const VkCopyImageToMemoryInfoEXT* pCopyImageToMemoryInfo);
/** Host-copy the content of an image from memory using the CPU. */
VkResult copyMemoryToImage(const VkCopyMemoryToImageInfoEXT* pCopyMemoryToImageInfo);
#pragma mark Metal
@ -359,6 +362,7 @@ protected:
uint8_t getMemoryBindingCount() const { return (uint8_t)_memoryBindings.size(); }
uint8_t getMemoryBindingIndex(uint8_t planeIndex) const;
MVKImageMemoryBinding* getMemoryBinding(uint8_t planeIndex);
template<typename CopyInfo> VkResult copyContent(const CopyInfo* pCopyInfo);
VkResult copyContent(id<MTLTexture> mtlTex,
VkMemoryToImageCopyEXT imgRgn, uint32_t mipLevel, uint32_t slice,
void* pImgBytes, size_t rowPitch, size_t depthPitch);

View File

@ -566,7 +566,7 @@ static MTLRegion getMTLRegion(const ImgRgn& imgRgn) {
return { mvkMTLOriginFromVkOffset3D(imgRgn.imageOffset), mvkMTLSizeFromVkExtent3D(imgRgn.imageExtent) };
}
// Host-copy from memory to a MTLTexture.
// Host-copy from a MTLTexture to memory.
VkResult MVKImage::copyContent(id<MTLTexture> mtlTex,
VkImageToMemoryCopyEXT imgRgn, uint32_t mipLevel, uint32_t slice,
void* pImgBytes, size_t rowPitch, size_t depthPitch) {
@ -579,7 +579,7 @@ VkResult MVKImage::copyContent(id<MTLTexture> mtlTex,
return VK_SUCCESS;
}
// Host-copy from a MTLTexture to memory.
// Host-copy from memory to a MTLTexture.
VkResult MVKImage::copyContent(id<MTLTexture> mtlTex,
VkMemoryToImageCopyEXT imgRgn, uint32_t mipLevel, uint32_t slice,
void* pImgBytes, size_t rowPitch, size_t depthPitch) {
@ -646,14 +646,9 @@ VkResult MVKImage::copyContent(const CopyInfo* pCopyInfo) {
return VK_SUCCESS;
}
// Create concrete implementations of the variations of the copyContent() template function.
// This is required since the template is called from outside this file (compilation unit).
template VkResult MVKImage::copyContent(const VkCopyMemoryToImageInfoEXT* pCopyInfo);
template VkResult MVKImage::copyContent(const VkCopyImageToMemoryInfoEXT* pCopyInfo);
// Host-copy content between images by allocating a temporary memory buffer, copying into it from the
// source image, and then copying from the memory buffer into the destination image, all using the CPU.
VkResult MVKImage::copyContent(const VkCopyImageToImageInfoEXT* pCopyImageToImageInfo) {
VkResult MVKImage::copyImageToImage(const VkCopyImageToImageInfoEXT* pCopyImageToImageInfo) {
for (uint32_t imgRgnIdx = 0; imgRgnIdx < pCopyImageToImageInfo->regionCount; imgRgnIdx++) {
auto& imgRgn = pCopyImageToImageInfo->pRegions[imgRgnIdx];
@ -716,6 +711,40 @@ VkResult MVKImage::copyContent(const VkCopyImageToImageInfoEXT* pCopyImageToImag
return VK_SUCCESS;
}
VkResult MVKImage::copyImageToMemory(const VkCopyImageToMemoryInfoEXT* pCopyImageToMemoryInfo) {
#if MVK_MACOS
// On macOS, if the device doesn't have unified memory, and the texture is using managed memory, we need
// to sync the managed memory from the GPU, so the texture content is accessible to be copied by the CPU.
if ( !getPhysicalDevice()->getHasUnifiedMemory() && getMTLStorageMode() == MTLStorageModeManaged ) {
@autoreleasepool {
id<MTLCommandBuffer> mtlCmdBuff = getDevice()->getAnyQueue()->getMTLCommandBuffer(kMVKCommandUseCopyImageToMemory);
id<MTLBlitCommandEncoder> mtlBlitEnc = [mtlCmdBuff blitCommandEncoder];
for (uint32_t imgRgnIdx = 0; imgRgnIdx < pCopyImageToMemoryInfo->regionCount; imgRgnIdx++) {
auto& imgRgn = pCopyImageToMemoryInfo->pRegions[imgRgnIdx];
auto& imgSubRez = imgRgn.imageSubresource;
id<MTLTexture> mtlTex = getMTLTexture(getPlaneFromVkImageAspectFlags(imgSubRez.aspectMask));
for (uint32_t imgLyrIdx = 0; imgLyrIdx < imgSubRez.layerCount; imgLyrIdx++) {
[mtlBlitEnc synchronizeTexture: mtlTex
slice: imgSubRez.baseArrayLayer + imgLyrIdx
level: imgSubRez.mipLevel];
}
}
[mtlBlitEnc endEncoding];
[mtlCmdBuff commit];
[mtlCmdBuff waitUntilCompleted];
}
}
#endif
return copyContent(pCopyImageToMemoryInfo);
}
VkResult MVKImage::copyMemoryToImage(const VkCopyMemoryToImageInfoEXT* pCopyMemoryToImageInfo) {
return copyContent(pCopyMemoryToImageInfo);
}
VkImageType MVKImage::getImageType() { return mvkVkImageTypeFromMTLTextureType(_mtlTextureType); }
bool MVKImage::getIsDepthStencil() { return getPixelFormats()->getFormatType(_vkFormat) == kMVKFormatDepthStencil; }
@ -823,21 +852,22 @@ void MVKImage::applyImageMemoryBarrier(MVKPipelineBarrier& barrier,
}
VkResult MVKImage::getMemoryRequirements(VkMemoryRequirements* pMemoryRequirements, uint8_t planeIndex) {
MVKPhysicalDevice* mvkPD = getPhysicalDevice();
VkImageUsageFlags combinedUsage = getCombinedUsage();
pMemoryRequirements->memoryTypeBits = (_isDepthStencilAttachment)
? getPhysicalDevice()->getPrivateMemoryTypes()
: getPhysicalDevice()->getAllMemoryTypes();
? mvkPD->getPrivateMemoryTypes()
: mvkPD->getAllMemoryTypes();
#if MVK_MACOS
// Metal on macOS does not provide native support for host-coherent memory, but Vulkan requires it for Linear images
if ( !_isLinear ) {
mvkDisableFlags(pMemoryRequirements->memoryTypeBits, getPhysicalDevice()->getHostCoherentMemoryTypes());
mvkDisableFlags(pMemoryRequirements->memoryTypeBits, mvkPD->getHostCoherentMemoryTypes());
}
#endif
VkImageUsageFlags combinedUsage = getCombinedUsage();
// If the image can be used in a host-copy transfer, the memory cannot be private.
if (mvkIsAnyFlagEnabled(combinedUsage, VK_IMAGE_USAGE_HOST_TRANSFER_BIT_EXT)) {
mvkDisableFlags(pMemoryRequirements->memoryTypeBits, getPhysicalDevice()->getPrivateMemoryTypes());
mvkDisableFlags(pMemoryRequirements->memoryTypeBits, mvkPD->getPrivateMemoryTypes());
}
// Only transient attachments may use memoryless storage.
@ -845,7 +875,7 @@ VkResult MVKImage::getMemoryRequirements(VkMemoryRequirements* pMemoryRequiremen
// TODO: support framebuffer fetch so VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT uses color(m) in shader instead of setFragmentTexture:, which crashes Metal
if (!mvkIsAnyFlagEnabled(combinedUsage, VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT) ||
mvkIsAnyFlagEnabled(combinedUsage, VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) ) {
mvkDisableFlags(pMemoryRequirements->memoryTypeBits, getPhysicalDevice()->getLazilyAllocatedMemoryTypes());
mvkDisableFlags(pMemoryRequirements->memoryTypeBits, mvkPD->getLazilyAllocatedMemoryTypes());
}
return getMemoryBinding(planeIndex)->getMemoryRequirements(pMemoryRequirements);

View File

@ -155,6 +155,7 @@ protected:
NSString* _mtlCmdBuffLabelQueueWaitIdle = nil;
NSString* _mtlCmdBuffLabelAcquireNextImage = nil;
NSString* _mtlCmdBuffLabelInvalidateMappedMemoryRanges = nil;
NSString* _mtlCmdBuffLabelCopyImageToMemory = nil;
MVKGPUCaptureScope* _submissionCaptureScope = nil;
float _priority;
uint32_t _index;

View File

@ -195,6 +195,7 @@ NSString* MVKQueue::getMTLCommandBufferLabel(MVKCommandUse cmdUse) {
CASE_GET_LABEL(DeviceWaitIdle);
CASE_GET_LABEL(AcquireNextImage);
CASE_GET_LABEL(InvalidateMappedMemoryRanges);
CASE_GET_LABEL(CopyImageToMemory);
default:
MVKAssert(false, "Uncached MTLCommandBuffer label for command use %s.", mvkVkCommandName(cmdUse));
return [NSString stringWithFormat: @"%s MTLCommandBuffer on Queue %d-%d", mvkVkCommandName(cmdUse), _queueFamily->getIndex(), _index];

View File

@ -40,6 +40,7 @@ const char* mvkVkCommandName(MVKCommandUse cmdUse) {
case kMVKCommandUseResolveImage: return "vkCmdResolveImage (resolve stage)";
case kMVKCommandUseResolveExpandImage: return "vkCmdResolveImage (expand stage)";
case kMVKCommandUseResolveCopyImage: return "vkCmdResolveImage (copy stage)";
case kMVKCommandUseCopyImageToMemory: return "vkCopyImageToMemory host sync";
case kMVKCommandUseCopyBuffer: return "vkCmdCopyBuffer";
case kMVKCommandUseCopyBufferToImage: return "vkCmdCopyBufferToImage";
case kMVKCommandUseCopyImageToBuffer: return "vkCmdCopyImageToBuffer";

View File

@ -83,6 +83,7 @@ typedef enum : uint8_t {
kMVKCommandUseResolveImage, /**< vkCmdResolveImage - resolve stage. */
kMVKCommandUseResolveExpandImage, /**< vkCmdResolveImage - expand stage. */
kMVKCommandUseResolveCopyImage, /**< vkCmdResolveImage - copy stage. */
kMVKCommandUseCopyImageToMemory, /**< vkCopyImageToMemoryEXT host sync. */
kMVKCommandUseCopyBuffer, /**< vkCmdCopyBuffer. */
kMVKCommandUseCopyBufferToImage, /**< vkCmdCopyBufferToImage. */
kMVKCommandUseCopyImageToBuffer, /**< vkCmdCopyImageToBuffer. */

View File

@ -3909,7 +3909,7 @@ MVK_PUBLIC_VULKAN_SYMBOL VkResult vkCopyImageToImageEXT(
const VkCopyImageToImageInfoEXT* pCopyImageToImageInfo) {
MVKTraceVulkanCallStart();
VkResult rslt = MVKImage::copyContent(pCopyImageToImageInfo);
VkResult rslt = MVKImage::copyImageToImage(pCopyImageToImageInfo);
MVKTraceVulkanCallEnd();
return rslt;
}
@ -3920,7 +3920,7 @@ MVK_PUBLIC_VULKAN_SYMBOL VkResult vkCopyImageToMemoryEXT(
MVKTraceVulkanCallStart();
MVKImage* srcImg = (MVKImage*)pCopyImageToMemoryInfo->srcImage;
VkResult rslt = srcImg->copyContent(pCopyImageToMemoryInfo);
VkResult rslt = srcImg->copyImageToMemory(pCopyImageToMemoryInfo);
MVKTraceVulkanCallEnd();
return rslt;
}
@ -3931,7 +3931,7 @@ MVK_PUBLIC_VULKAN_SYMBOL VkResult vkCopyMemoryToImageEXT(
MVKTraceVulkanCallStart();
MVKImage* dstImg = (MVKImage*)pCopyMemoryToImageInfo->dstImage;
VkResult rslt = dstImg->copyContent(pCopyMemoryToImageInfo);
VkResult rslt = dstImg->copyMemoryToImage(pCopyMemoryToImageInfo);
MVKTraceVulkanCallEnd();
return rslt;
}