Merge pull request #806 from billhollings/master

Fix Metal validation error on push constant sizing differences between C and MSL structs.
This commit is contained in:
Bill Hollings 2019-12-28 22:20:32 -05:00 committed by GitHub
commit bf3ffff4bb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 30 additions and 20 deletions

View File

@ -21,6 +21,8 @@ Released TBD
- Fix crash when app does not use queue family zero.
- Fix buffer offset in `vkCmdPushDescriptorSet()` for non-dedicated buffer memory.
- Fix Metal validation error on push constant sizing differences between C and MSL structs.
- Update `VK_MVK_MOLTENVK_SPEC_VERSION` to `24`.

View File

@ -55,7 +55,7 @@ typedef unsigned long MTLLanguageVersion;
#define MVK_MAKE_VERSION(major, minor, patch) (((major) * 10000) + ((minor) * 100) + (patch))
#define MVK_VERSION MVK_MAKE_VERSION(MVK_VERSION_MAJOR, MVK_VERSION_MINOR, MVK_VERSION_PATCH)
#define VK_MVK_MOLTENVK_SPEC_VERSION 23
#define VK_MVK_MOLTENVK_SPEC_VERSION 24
#define VK_MVK_MOLTENVK_EXTENSION_NAME "VK_MVK_moltenvk"
/**
@ -564,6 +564,7 @@ typedef struct {
VkBool32 native3DCompressedTextures; /**< If true, 3D compressed images are supported natively, without manual decompression. */
VkBool32 nativeTextureSwizzle; /**< If true, component swizzle is supported natively, without manual swizzling in shaders. */
VkBool32 placementHeaps; /**< If true, MTLHeap objects support placement of resources. */
VkDeviceSize pushConstantSizeAlignment; /**< The alignment used internally when allocating memory for push constants. Must be PoT. */
} MVKPhysicalDeviceMetalFeatures;
/**

View File

@ -157,10 +157,15 @@ void MVKScissorCommandEncoderState::resetImpl() {
#pragma mark MVKPushConstantsCommandEncoderState
void MVKPushConstantsCommandEncoderState:: setPushConstants(uint32_t offset, MVKVector<char>& pushConstants) {
uint32_t pcCnt = (uint32_t)pushConstants.size();
mvkEnsureSize(_pushConstants, offset + pcCnt);
// MSL structs can have a larger size than the equivalent C struct due to MSL alignment needs.
// Typically any MSL struct that contains a float4 will also have a size that is rounded up to a multiple of a float4 size.
// Ensure that we pass along enough content to cover this extra space even if it is never actually accessed by the shader.
size_t pcSizeAlign = _cmdEncoder->getDevice()->_pMetalFeatures->pushConstantSizeAlignment;
size_t pcSize = pushConstants.size();
size_t pcBuffSize = mvkAlignByteCount(offset + pcSize, pcSizeAlign);
mvkEnsureSize(_pushConstants, pcBuffSize);
copy(pushConstants.begin(), pushConstants.end(), _pushConstants.begin() + offset);
if (pcCnt > 0) { markDirty(); }
if (pcBuffSize > 0) { markDirty(); }
}
void MVKPushConstantsCommandEncoderState::setMTLBufferIndex(uint32_t mtlBufferIndex) {

View File

@ -230,7 +230,7 @@ MVKBufferView::MVKBufferView(MVKDevice* device, const VkBufferViewCreateInfo* pC
// Multiple rows will automatically align with PoT max texture dimension, but need to align upwards if less than full single row.
size_t maxBlocksPerRow = _device->_pMetalFeatures->maxTextureDimension / fmtBlockSize.width;
size_t blocksPerRow = min(blockCount, maxBlocksPerRow);
_mtlBytesPerRow = mvkAlignByteOffset(blocksPerRow * bytesPerBlock, _device->getVkFormatTexelBufferAlignment(pCreateInfo->format, this));
_mtlBytesPerRow = mvkAlignByteCount(blocksPerRow * bytesPerBlock, _device->getVkFormatTexelBufferAlignment(pCreateInfo->format, this));
size_t rowCount = blockCount / blocksPerRow;
if (blockCount % blocksPerRow) { rowCount++; }

View File

@ -785,7 +785,7 @@ MVKPhysicalDevice::MVKPhysicalDevice(MVKInstance* mvkInstance, id<MTLDevice> mtl
logGPUInfo();
}
/** Initializes the Metal-specific physical device features of this instance. */
// Initializes the Metal-specific physical device features of this instance.
void MVKPhysicalDevice::initMetalFeatures() {
mvkClear(&_metalFeatures); // Start with everything cleared
@ -796,6 +796,8 @@ void MVKPhysicalDevice::initMetalFeatures() {
_metalFeatures.maxPerStageSamplerCount = 16;
_metalFeatures.maxQueryBufferSize = (64 * KIBI);
_metalFeatures.pushConstantSizeAlignment = 16; // Min float4 alignment for typical uniform structs.
_metalFeatures.ioSurfaces = MVK_SUPPORT_IOSURFACE_BOOL;
// Metal supports 2 or 3 concurrent CAMetalLayer drawables.
@ -2565,7 +2567,7 @@ uint32_t MVKDevice::expandVisibilityResultMTLBuffer(uint32_t queryCount) {
reportError(VK_ERROR_OUT_OF_DEVICE_MEMORY, "vkCreateQueryPool(): A maximum of %d total queries are available on this device in its current configuration. See the API notes for the MVKConfiguration.supportLargeQueryPools configuration parameter for more info.", _globalVisibilityQueryCount);
}
NSUInteger mtlBuffLen = mvkAlignByteOffset(newBuffLen, _pMetalFeatures->mtlBufferAlignment);
NSUInteger mtlBuffLen = mvkAlignByteCount(newBuffLen, _pMetalFeatures->mtlBufferAlignment);
MTLResourceOptions mtlBuffOpts = MTLResourceStorageModeShared | MTLResourceCPUCacheModeDefaultCache;
[_globalVisibilityResultMTLBuffer release];
_globalVisibilityResultMTLBuffer = [getMTLDevice() newBufferWithLength: mtlBuffLen options: mtlBuffOpts]; // retained

View File

@ -214,7 +214,7 @@ bool MVKDeviceMemory::ensureMTLBuffer() {
if (_mtlBuffer) { return true; }
NSUInteger memLen = mvkAlignByteOffset(_allocationSize, _device->_pMetalFeatures->mtlBufferAlignment);
NSUInteger memLen = mvkAlignByteCount(_allocationSize, _device->_pMetalFeatures->mtlBufferAlignment);
if (memLen > _device->_pMetalFeatures->maxMTLBufferSize) { return false; }
@ -247,7 +247,7 @@ bool MVKDeviceMemory::ensureHostMemory() {
if ( !_pHostMemory) {
size_t memAlign = _device->_pMetalFeatures->mtlBufferAlignment;
NSUInteger memLen = mvkAlignByteOffset(_allocationSize, memAlign);
NSUInteger memLen = mvkAlignByteCount(_allocationSize, memAlign);
int err = posix_memalign(&_pHostMemory, memAlign, memLen);
if (err) { return false; }
}

View File

@ -66,7 +66,7 @@ VkExtent3D MVKImage::getExtent3D(uint32_t mipLevel) {
VkDeviceSize MVKImage::getBytesPerRow(uint32_t mipLevel) {
size_t bytesPerRow = mvkMTLPixelFormatBytesPerRow(_mtlPixelFormat, getExtent3D(mipLevel).width);
return mvkAlignByteOffset(bytesPerRow, _rowByteAlignment);
return mvkAlignByteCount(bytesPerRow, _rowByteAlignment);
}
VkDeviceSize MVKImage::getBytesPerLayer(uint32_t mipLevel) {

View File

@ -625,7 +625,7 @@ MTLComputePipelineDescriptor* MVKGraphicsPipeline::newMTLTessControlStageDescrip
uint32_t offset = 0;
for (const SPIRVShaderOutput& output : vtxOutputs) {
if (output.builtin == spv::BuiltInPointSize && !reflectData.pointMode) { continue; }
offset = (uint32_t)mvkAlignByteOffset(offset, sizeOfOutput(output));
offset = (uint32_t)mvkAlignByteCount(offset, sizeOfOutput(output));
if (shaderContext.isVertexAttributeLocationUsed(output.location)) {
plDesc.stageInputDescriptor.attributes[output.location].bufferIndex = kMVKTessCtlInputBufferIndex;
plDesc.stageInputDescriptor.attributes[output.location].format = (MTLAttributeFormat)mvkMTLVertexFormatFromVkFormat(mvkFormatFromOutput(output));
@ -635,7 +635,7 @@ MTLComputePipelineDescriptor* MVKGraphicsPipeline::newMTLTessControlStageDescrip
}
if (vtxOutputs.size() > 0) {
plDesc.stageInputDescriptor.layouts[kMVKTessCtlInputBufferIndex].stepFunction = MTLStepFunctionThreadPositionInGridX;
plDesc.stageInputDescriptor.layouts[kMVKTessCtlInputBufferIndex].stride = mvkAlignByteOffset(offset, sizeOfOutput(vtxOutputs[0]));
plDesc.stageInputDescriptor.layouts[kMVKTessCtlInputBufferIndex].stride = mvkAlignByteCount(offset, sizeOfOutput(vtxOutputs[0]));
}
plDesc.stageInputDescriptor.indexBufferIndex = kMVKTessCtlIndexBufferIndex;
@ -718,7 +718,7 @@ MTLRenderPipelineDescriptor* MVKGraphicsPipeline::newMTLTessRasterStageDescripto
plDesc.vertexDescriptor.attributes[location].format = MTLVertexFormatHalf2; // FIXME Should use Float2
}
} else if (output.perPatch) {
patchOffset = (uint32_t)mvkAlignByteOffset(patchOffset, sizeOfOutput(output));
patchOffset = (uint32_t)mvkAlignByteCount(patchOffset, sizeOfOutput(output));
plDesc.vertexDescriptor.attributes[output.location].bufferIndex = kMVKTessEvalPatchInputBufferIndex;
plDesc.vertexDescriptor.attributes[output.location].format = mvkMTLVertexFormatFromVkFormat(mvkFormatFromOutput(output));
plDesc.vertexDescriptor.attributes[output.location].offset = patchOffset;
@ -726,7 +726,7 @@ MTLRenderPipelineDescriptor* MVKGraphicsPipeline::newMTLTessRasterStageDescripto
if (!firstPatch) { firstPatch = &output; }
usedPerPatch = true;
} else {
offset = (uint32_t)mvkAlignByteOffset(offset, sizeOfOutput(output));
offset = (uint32_t)mvkAlignByteCount(offset, sizeOfOutput(output));
plDesc.vertexDescriptor.attributes[output.location].bufferIndex = kMVKTessEvalInputBufferIndex;
plDesc.vertexDescriptor.attributes[output.location].format = mvkMTLVertexFormatFromVkFormat(mvkFormatFromOutput(output));
plDesc.vertexDescriptor.attributes[output.location].offset = offset;
@ -737,11 +737,11 @@ MTLRenderPipelineDescriptor* MVKGraphicsPipeline::newMTLTessRasterStageDescripto
}
if (usedPerVertex) {
plDesc.vertexDescriptor.layouts[kMVKTessEvalInputBufferIndex].stepFunction = MTLVertexStepFunctionPerPatchControlPoint;
plDesc.vertexDescriptor.layouts[kMVKTessEvalInputBufferIndex].stride = mvkAlignByteOffset(offset, sizeOfOutput(*firstVertex));
plDesc.vertexDescriptor.layouts[kMVKTessEvalInputBufferIndex].stride = mvkAlignByteCount(offset, sizeOfOutput(*firstVertex));
}
if (usedPerPatch) {
plDesc.vertexDescriptor.layouts[kMVKTessEvalPatchInputBufferIndex].stepFunction = MTLVertexStepFunctionPerPatch;
plDesc.vertexDescriptor.layouts[kMVKTessEvalPatchInputBufferIndex].stride = mvkAlignByteOffset(patchOffset, sizeOfOutput(*firstPatch));
plDesc.vertexDescriptor.layouts[kMVKTessEvalPatchInputBufferIndex].stride = mvkAlignByteCount(patchOffset, sizeOfOutput(*firstPatch));
}
if (outerLoc != (uint32_t)(-1) || innerLoc != (uint32_t)(-1)) {
plDesc.vertexDescriptor.layouts[kMVKTessEvalLevelBufferIndex].stepFunction = MTLVertexStepFunctionPerPatch;

View File

@ -317,7 +317,7 @@ MVKOcclusionQueryPool::MVKOcclusionQueryPool(MVKDevice* device,
reportError(VK_ERROR_OUT_OF_DEVICE_MEMORY, "vkCreateQueryPool(): Each query pool can support a maximum of %d queries.", queryCount);
}
NSUInteger mtlBuffLen = mvkAlignByteOffset(newBuffLen, _device->_pMetalFeatures->mtlBufferAlignment);
NSUInteger mtlBuffLen = mvkAlignByteCount(newBuffLen, _device->_pMetalFeatures->mtlBufferAlignment);
MTLResourceOptions mtlBuffOpts = MTLResourceStorageModeShared | MTLResourceCPUCacheModeDefaultCache;
_visibilityResultMTLBuffer = [getMTLDevice() newBufferWithLength: mtlBuffLen options: mtlBuffOpts]; // retained

View File

@ -185,7 +185,7 @@ static inline uint32_t mvkPowerOfTwoExponent(uintptr_t value) {
* than or equal to the reference if alignDown is true.
*
* This is a low level utility method. Usually you will use the convenience functions
* mvkAlignAddress() and mvkAlignByteOffset() to align addresses and offsets respectively.
* mvkAlignAddress() and mvkAlignByteCount() to align addresses and offsets respectively.
*/
static inline uintptr_t mvkAlignByteRef(uintptr_t byteRef, uintptr_t byteAlignment, bool alignDown = false) {
if (byteAlignment == 0) { return byteRef; }
@ -216,8 +216,8 @@ static inline void* mvkAlignAddress(void* address, uintptr_t byteAlignment, bool
* which will be greater than or equal to the original offset if alignDown is false, or less
* than or equal to the original offset if alignDown is true.
*/
static inline uintptr_t mvkAlignByteOffset(uintptr_t byteOffset, uintptr_t byteAlignment, bool alignDown = false) {
return mvkAlignByteRef(byteOffset, byteAlignment, alignDown);
static inline uintptr_t mvkAlignByteCount(uintptr_t byteCount, uintptr_t byteAlignment, bool alignDown = false) {
return mvkAlignByteRef(byteCount, byteAlignment, alignDown);
}
/**