Merge pull request #806 from billhollings/master
Fix Metal validation error on push constant sizing differences between C and MSL structs.
This commit is contained in:
commit
bf3ffff4bb
@ -21,6 +21,8 @@ Released TBD
|
||||
|
||||
- Fix crash when app does not use queue family zero.
|
||||
- Fix buffer offset in `vkCmdPushDescriptorSet()` for non-dedicated buffer memory.
|
||||
- Fix Metal validation error on push constant sizing differences between C and MSL structs.
|
||||
- Update `VK_MVK_MOLTENVK_SPEC_VERSION` to `24`.
|
||||
|
||||
|
||||
|
||||
|
@ -55,7 +55,7 @@ typedef unsigned long MTLLanguageVersion;
|
||||
#define MVK_MAKE_VERSION(major, minor, patch) (((major) * 10000) + ((minor) * 100) + (patch))
|
||||
#define MVK_VERSION MVK_MAKE_VERSION(MVK_VERSION_MAJOR, MVK_VERSION_MINOR, MVK_VERSION_PATCH)
|
||||
|
||||
#define VK_MVK_MOLTENVK_SPEC_VERSION 23
|
||||
#define VK_MVK_MOLTENVK_SPEC_VERSION 24
|
||||
#define VK_MVK_MOLTENVK_EXTENSION_NAME "VK_MVK_moltenvk"
|
||||
|
||||
/**
|
||||
@ -564,6 +564,7 @@ typedef struct {
|
||||
VkBool32 native3DCompressedTextures; /**< If true, 3D compressed images are supported natively, without manual decompression. */
|
||||
VkBool32 nativeTextureSwizzle; /**< If true, component swizzle is supported natively, without manual swizzling in shaders. */
|
||||
VkBool32 placementHeaps; /**< If true, MTLHeap objects support placement of resources. */
|
||||
VkDeviceSize pushConstantSizeAlignment; /**< The alignment used internally when allocating memory for push constants. Must be PoT. */
|
||||
} MVKPhysicalDeviceMetalFeatures;
|
||||
|
||||
/**
|
||||
|
@ -157,10 +157,15 @@ void MVKScissorCommandEncoderState::resetImpl() {
|
||||
#pragma mark MVKPushConstantsCommandEncoderState
|
||||
|
||||
void MVKPushConstantsCommandEncoderState:: setPushConstants(uint32_t offset, MVKVector<char>& pushConstants) {
|
||||
uint32_t pcCnt = (uint32_t)pushConstants.size();
|
||||
mvkEnsureSize(_pushConstants, offset + pcCnt);
|
||||
// MSL structs can have a larger size than the equivalent C struct due to MSL alignment needs.
|
||||
// Typically any MSL struct that contains a float4 will also have a size that is rounded up to a multiple of a float4 size.
|
||||
// Ensure that we pass along enough content to cover this extra space even if it is never actually accessed by the shader.
|
||||
size_t pcSizeAlign = _cmdEncoder->getDevice()->_pMetalFeatures->pushConstantSizeAlignment;
|
||||
size_t pcSize = pushConstants.size();
|
||||
size_t pcBuffSize = mvkAlignByteCount(offset + pcSize, pcSizeAlign);
|
||||
mvkEnsureSize(_pushConstants, pcBuffSize);
|
||||
copy(pushConstants.begin(), pushConstants.end(), _pushConstants.begin() + offset);
|
||||
if (pcCnt > 0) { markDirty(); }
|
||||
if (pcBuffSize > 0) { markDirty(); }
|
||||
}
|
||||
|
||||
void MVKPushConstantsCommandEncoderState::setMTLBufferIndex(uint32_t mtlBufferIndex) {
|
||||
|
@ -230,7 +230,7 @@ MVKBufferView::MVKBufferView(MVKDevice* device, const VkBufferViewCreateInfo* pC
|
||||
// Multiple rows will automatically align with PoT max texture dimension, but need to align upwards if less than full single row.
|
||||
size_t maxBlocksPerRow = _device->_pMetalFeatures->maxTextureDimension / fmtBlockSize.width;
|
||||
size_t blocksPerRow = min(blockCount, maxBlocksPerRow);
|
||||
_mtlBytesPerRow = mvkAlignByteOffset(blocksPerRow * bytesPerBlock, _device->getVkFormatTexelBufferAlignment(pCreateInfo->format, this));
|
||||
_mtlBytesPerRow = mvkAlignByteCount(blocksPerRow * bytesPerBlock, _device->getVkFormatTexelBufferAlignment(pCreateInfo->format, this));
|
||||
|
||||
size_t rowCount = blockCount / blocksPerRow;
|
||||
if (blockCount % blocksPerRow) { rowCount++; }
|
||||
|
@ -785,7 +785,7 @@ MVKPhysicalDevice::MVKPhysicalDevice(MVKInstance* mvkInstance, id<MTLDevice> mtl
|
||||
logGPUInfo();
|
||||
}
|
||||
|
||||
/** Initializes the Metal-specific physical device features of this instance. */
|
||||
// Initializes the Metal-specific physical device features of this instance.
|
||||
void MVKPhysicalDevice::initMetalFeatures() {
|
||||
mvkClear(&_metalFeatures); // Start with everything cleared
|
||||
|
||||
@ -796,6 +796,8 @@ void MVKPhysicalDevice::initMetalFeatures() {
|
||||
_metalFeatures.maxPerStageSamplerCount = 16;
|
||||
_metalFeatures.maxQueryBufferSize = (64 * KIBI);
|
||||
|
||||
_metalFeatures.pushConstantSizeAlignment = 16; // Min float4 alignment for typical uniform structs.
|
||||
|
||||
_metalFeatures.ioSurfaces = MVK_SUPPORT_IOSURFACE_BOOL;
|
||||
|
||||
// Metal supports 2 or 3 concurrent CAMetalLayer drawables.
|
||||
@ -2565,7 +2567,7 @@ uint32_t MVKDevice::expandVisibilityResultMTLBuffer(uint32_t queryCount) {
|
||||
reportError(VK_ERROR_OUT_OF_DEVICE_MEMORY, "vkCreateQueryPool(): A maximum of %d total queries are available on this device in its current configuration. See the API notes for the MVKConfiguration.supportLargeQueryPools configuration parameter for more info.", _globalVisibilityQueryCount);
|
||||
}
|
||||
|
||||
NSUInteger mtlBuffLen = mvkAlignByteOffset(newBuffLen, _pMetalFeatures->mtlBufferAlignment);
|
||||
NSUInteger mtlBuffLen = mvkAlignByteCount(newBuffLen, _pMetalFeatures->mtlBufferAlignment);
|
||||
MTLResourceOptions mtlBuffOpts = MTLResourceStorageModeShared | MTLResourceCPUCacheModeDefaultCache;
|
||||
[_globalVisibilityResultMTLBuffer release];
|
||||
_globalVisibilityResultMTLBuffer = [getMTLDevice() newBufferWithLength: mtlBuffLen options: mtlBuffOpts]; // retained
|
||||
|
@ -214,7 +214,7 @@ bool MVKDeviceMemory::ensureMTLBuffer() {
|
||||
|
||||
if (_mtlBuffer) { return true; }
|
||||
|
||||
NSUInteger memLen = mvkAlignByteOffset(_allocationSize, _device->_pMetalFeatures->mtlBufferAlignment);
|
||||
NSUInteger memLen = mvkAlignByteCount(_allocationSize, _device->_pMetalFeatures->mtlBufferAlignment);
|
||||
|
||||
if (memLen > _device->_pMetalFeatures->maxMTLBufferSize) { return false; }
|
||||
|
||||
@ -247,7 +247,7 @@ bool MVKDeviceMemory::ensureHostMemory() {
|
||||
|
||||
if ( !_pHostMemory) {
|
||||
size_t memAlign = _device->_pMetalFeatures->mtlBufferAlignment;
|
||||
NSUInteger memLen = mvkAlignByteOffset(_allocationSize, memAlign);
|
||||
NSUInteger memLen = mvkAlignByteCount(_allocationSize, memAlign);
|
||||
int err = posix_memalign(&_pHostMemory, memAlign, memLen);
|
||||
if (err) { return false; }
|
||||
}
|
||||
|
@ -66,7 +66,7 @@ VkExtent3D MVKImage::getExtent3D(uint32_t mipLevel) {
|
||||
|
||||
VkDeviceSize MVKImage::getBytesPerRow(uint32_t mipLevel) {
|
||||
size_t bytesPerRow = mvkMTLPixelFormatBytesPerRow(_mtlPixelFormat, getExtent3D(mipLevel).width);
|
||||
return mvkAlignByteOffset(bytesPerRow, _rowByteAlignment);
|
||||
return mvkAlignByteCount(bytesPerRow, _rowByteAlignment);
|
||||
}
|
||||
|
||||
VkDeviceSize MVKImage::getBytesPerLayer(uint32_t mipLevel) {
|
||||
|
@ -625,7 +625,7 @@ MTLComputePipelineDescriptor* MVKGraphicsPipeline::newMTLTessControlStageDescrip
|
||||
uint32_t offset = 0;
|
||||
for (const SPIRVShaderOutput& output : vtxOutputs) {
|
||||
if (output.builtin == spv::BuiltInPointSize && !reflectData.pointMode) { continue; }
|
||||
offset = (uint32_t)mvkAlignByteOffset(offset, sizeOfOutput(output));
|
||||
offset = (uint32_t)mvkAlignByteCount(offset, sizeOfOutput(output));
|
||||
if (shaderContext.isVertexAttributeLocationUsed(output.location)) {
|
||||
plDesc.stageInputDescriptor.attributes[output.location].bufferIndex = kMVKTessCtlInputBufferIndex;
|
||||
plDesc.stageInputDescriptor.attributes[output.location].format = (MTLAttributeFormat)mvkMTLVertexFormatFromVkFormat(mvkFormatFromOutput(output));
|
||||
@ -635,7 +635,7 @@ MTLComputePipelineDescriptor* MVKGraphicsPipeline::newMTLTessControlStageDescrip
|
||||
}
|
||||
if (vtxOutputs.size() > 0) {
|
||||
plDesc.stageInputDescriptor.layouts[kMVKTessCtlInputBufferIndex].stepFunction = MTLStepFunctionThreadPositionInGridX;
|
||||
plDesc.stageInputDescriptor.layouts[kMVKTessCtlInputBufferIndex].stride = mvkAlignByteOffset(offset, sizeOfOutput(vtxOutputs[0]));
|
||||
plDesc.stageInputDescriptor.layouts[kMVKTessCtlInputBufferIndex].stride = mvkAlignByteCount(offset, sizeOfOutput(vtxOutputs[0]));
|
||||
}
|
||||
plDesc.stageInputDescriptor.indexBufferIndex = kMVKTessCtlIndexBufferIndex;
|
||||
|
||||
@ -718,7 +718,7 @@ MTLRenderPipelineDescriptor* MVKGraphicsPipeline::newMTLTessRasterStageDescripto
|
||||
plDesc.vertexDescriptor.attributes[location].format = MTLVertexFormatHalf2; // FIXME Should use Float2
|
||||
}
|
||||
} else if (output.perPatch) {
|
||||
patchOffset = (uint32_t)mvkAlignByteOffset(patchOffset, sizeOfOutput(output));
|
||||
patchOffset = (uint32_t)mvkAlignByteCount(patchOffset, sizeOfOutput(output));
|
||||
plDesc.vertexDescriptor.attributes[output.location].bufferIndex = kMVKTessEvalPatchInputBufferIndex;
|
||||
plDesc.vertexDescriptor.attributes[output.location].format = mvkMTLVertexFormatFromVkFormat(mvkFormatFromOutput(output));
|
||||
plDesc.vertexDescriptor.attributes[output.location].offset = patchOffset;
|
||||
@ -726,7 +726,7 @@ MTLRenderPipelineDescriptor* MVKGraphicsPipeline::newMTLTessRasterStageDescripto
|
||||
if (!firstPatch) { firstPatch = &output; }
|
||||
usedPerPatch = true;
|
||||
} else {
|
||||
offset = (uint32_t)mvkAlignByteOffset(offset, sizeOfOutput(output));
|
||||
offset = (uint32_t)mvkAlignByteCount(offset, sizeOfOutput(output));
|
||||
plDesc.vertexDescriptor.attributes[output.location].bufferIndex = kMVKTessEvalInputBufferIndex;
|
||||
plDesc.vertexDescriptor.attributes[output.location].format = mvkMTLVertexFormatFromVkFormat(mvkFormatFromOutput(output));
|
||||
plDesc.vertexDescriptor.attributes[output.location].offset = offset;
|
||||
@ -737,11 +737,11 @@ MTLRenderPipelineDescriptor* MVKGraphicsPipeline::newMTLTessRasterStageDescripto
|
||||
}
|
||||
if (usedPerVertex) {
|
||||
plDesc.vertexDescriptor.layouts[kMVKTessEvalInputBufferIndex].stepFunction = MTLVertexStepFunctionPerPatchControlPoint;
|
||||
plDesc.vertexDescriptor.layouts[kMVKTessEvalInputBufferIndex].stride = mvkAlignByteOffset(offset, sizeOfOutput(*firstVertex));
|
||||
plDesc.vertexDescriptor.layouts[kMVKTessEvalInputBufferIndex].stride = mvkAlignByteCount(offset, sizeOfOutput(*firstVertex));
|
||||
}
|
||||
if (usedPerPatch) {
|
||||
plDesc.vertexDescriptor.layouts[kMVKTessEvalPatchInputBufferIndex].stepFunction = MTLVertexStepFunctionPerPatch;
|
||||
plDesc.vertexDescriptor.layouts[kMVKTessEvalPatchInputBufferIndex].stride = mvkAlignByteOffset(patchOffset, sizeOfOutput(*firstPatch));
|
||||
plDesc.vertexDescriptor.layouts[kMVKTessEvalPatchInputBufferIndex].stride = mvkAlignByteCount(patchOffset, sizeOfOutput(*firstPatch));
|
||||
}
|
||||
if (outerLoc != (uint32_t)(-1) || innerLoc != (uint32_t)(-1)) {
|
||||
plDesc.vertexDescriptor.layouts[kMVKTessEvalLevelBufferIndex].stepFunction = MTLVertexStepFunctionPerPatch;
|
||||
|
@ -317,7 +317,7 @@ MVKOcclusionQueryPool::MVKOcclusionQueryPool(MVKDevice* device,
|
||||
reportError(VK_ERROR_OUT_OF_DEVICE_MEMORY, "vkCreateQueryPool(): Each query pool can support a maximum of %d queries.", queryCount);
|
||||
}
|
||||
|
||||
NSUInteger mtlBuffLen = mvkAlignByteOffset(newBuffLen, _device->_pMetalFeatures->mtlBufferAlignment);
|
||||
NSUInteger mtlBuffLen = mvkAlignByteCount(newBuffLen, _device->_pMetalFeatures->mtlBufferAlignment);
|
||||
MTLResourceOptions mtlBuffOpts = MTLResourceStorageModeShared | MTLResourceCPUCacheModeDefaultCache;
|
||||
_visibilityResultMTLBuffer = [getMTLDevice() newBufferWithLength: mtlBuffLen options: mtlBuffOpts]; // retained
|
||||
|
||||
|
@ -185,7 +185,7 @@ static inline uint32_t mvkPowerOfTwoExponent(uintptr_t value) {
|
||||
* than or equal to the reference if alignDown is true.
|
||||
*
|
||||
* This is a low level utility method. Usually you will use the convenience functions
|
||||
* mvkAlignAddress() and mvkAlignByteOffset() to align addresses and offsets respectively.
|
||||
* mvkAlignAddress() and mvkAlignByteCount() to align addresses and offsets respectively.
|
||||
*/
|
||||
static inline uintptr_t mvkAlignByteRef(uintptr_t byteRef, uintptr_t byteAlignment, bool alignDown = false) {
|
||||
if (byteAlignment == 0) { return byteRef; }
|
||||
@ -216,8 +216,8 @@ static inline void* mvkAlignAddress(void* address, uintptr_t byteAlignment, bool
|
||||
* which will be greater than or equal to the original offset if alignDown is false, or less
|
||||
* than or equal to the original offset if alignDown is true.
|
||||
*/
|
||||
static inline uintptr_t mvkAlignByteOffset(uintptr_t byteOffset, uintptr_t byteAlignment, bool alignDown = false) {
|
||||
return mvkAlignByteRef(byteOffset, byteAlignment, alignDown);
|
||||
static inline uintptr_t mvkAlignByteCount(uintptr_t byteCount, uintptr_t byteAlignment, bool alignDown = false) {
|
||||
return mvkAlignByteRef(byteCount, byteAlignment, alignDown);
|
||||
}
|
||||
|
||||
/**
|
||||
|
Loading…
x
Reference in New Issue
Block a user