Make mvkCeilingDivide() a template function.

This commit is contained in:
Bill Hollings 2019-11-21 16:51:41 -05:00
parent c5b91b423d
commit 16afd5e37f
3 changed files with 17 additions and 16 deletions

View File

@ -110,7 +110,7 @@ void MVKCmdDraw::encode(MVKCommandEncoder* cmdEncoder) {
if (pipeline->isTessellationPipeline()) {
inControlPointCount = pipeline->getInputControlPointCount();
outControlPointCount = pipeline->getOutputControlPointCount();
patchCount = (uint32_t)mvkCeilingDivide(_vertexCount, inControlPointCount);
patchCount = mvkCeilingDivide(_vertexCount, inControlPointCount);
}
for (uint32_t s : stages) {
auto stage = MVKGraphicsStage(s);
@ -308,7 +308,7 @@ void MVKCmdDrawIndexed::encode(MVKCommandEncoder* cmdEncoder) {
if (pipeline->isTessellationPipeline()) {
inControlPointCount = pipeline->getInputControlPointCount();
outControlPointCount = pipeline->getOutputControlPointCount();
patchCount = (uint32_t)mvkCeilingDivide(_indexCount, inControlPointCount);
patchCount = mvkCeilingDivide(_indexCount, inControlPointCount);
}
for (uint32_t s : stages) {
auto stage = MVKGraphicsStage(s);
@ -544,7 +544,7 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) {
inControlPointCount = pipeline->getInputControlPointCount();
outControlPointCount = pipeline->getOutputControlPointCount();
vertexCount = kMVKDrawIndirectVertexCountUpperBound;
patchCount = (uint32_t)mvkCeilingDivide(vertexCount, inControlPointCount);
patchCount = mvkCeilingDivide(vertexCount, inControlPointCount);
VkDeviceSize indirectSize = (sizeof(MTLDispatchThreadgroupsIndirectArguments) + sizeof(MTLDrawPatchIndirectArguments)) * _drawCount;
if (cmdEncoder->_pDeviceMetalFeatures->mslVersion >= 20100) {
indirectSize += sizeof(MTLStageInRegionIndirectArguments) * _drawCount;
@ -614,7 +614,7 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) {
&_drawCount,
sizeof(_drawCount),
5);
[mtlTessCtlEncoder dispatchThreadgroups: MTLSizeMake(mvkCeilingDivide(_drawCount, mtlConvertState.threadExecutionWidth), 1, 1)
[mtlTessCtlEncoder dispatchThreadgroups: MTLSizeMake(mvkCeilingDivide<NSUInteger>(_drawCount, mtlConvertState.threadExecutionWidth), 1, 1)
threadsPerThreadgroup: MTLSizeMake(mtlConvertState.threadExecutionWidth, 1, 1)];
}
@ -783,7 +783,7 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder) {
inControlPointCount = pipeline->getInputControlPointCount();
outControlPointCount = pipeline->getOutputControlPointCount();
vertexCount = kMVKDrawIndirectVertexCountUpperBound;
patchCount = (uint32_t)mvkCeilingDivide(vertexCount, inControlPointCount);
patchCount = mvkCeilingDivide(vertexCount, inControlPointCount);
VkDeviceSize indirectSize = (sizeof(MTLDispatchThreadgroupsIndirectArguments) + sizeof(MTLDrawPatchIndirectArguments)) * _drawCount;
if (cmdEncoder->_pDeviceMetalFeatures->mslVersion >= 20100) {
indirectSize += sizeof(MTLStageInRegionIndirectArguments) * _drawCount;
@ -842,7 +842,7 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder) {
&_drawCount,
sizeof(_drawCount),
5);
[mtlTessCtlEncoder dispatchThreadgroups: MTLSizeMake(mvkCeilingDivide(_drawCount, mtlConvertState.threadExecutionWidth), 1, 1)
[mtlTessCtlEncoder dispatchThreadgroups: MTLSizeMake(mvkCeilingDivide<NSUInteger>(_drawCount, mtlConvertState.threadExecutionWidth), 1, 1)
threadsPerThreadgroup: MTLSizeMake(mtlConvertState.threadExecutionWidth, 1, 1)];
}
// We actually need to make a copy of the index buffer, regardless of whether

View File

@ -825,15 +825,15 @@ void MVKCmdBufferImageCopy::encode(MVKCommandEncoder* cmdEncoder) {
// One thread is run per block. Each block decompresses to an m x n array of texels.
// So the size of the grid is (ceil(width/m), ceil(height/n), depth).
VkExtent2D blockExtent = mvkMTLPixelFormatBlockTexelSize(mtlPixFmt);
MTLSize mtlGridSize = MTLSizeMake(mvkCeilingDivide(mtlTxtSize.width, blockExtent.width),
mvkCeilingDivide(mtlTxtSize.height, blockExtent.height),
MTLSize mtlGridSize = MTLSizeMake(mvkCeilingDivide<NSUInteger>(mtlTxtSize.width, blockExtent.width),
mvkCeilingDivide<NSUInteger>(mtlTxtSize.height, blockExtent.height),
mtlTxtSize.depth);
// Use four times the thread execution width as the threadgroup size.
MTLSize mtlTgrpSize = MTLSizeMake(2, 2, mtlComputeState.threadExecutionWidth);
// Then the number of threadgroups is (ceil(x/2), ceil(y/2), ceil(z/t)),
// where 't' is the thread execution width.
mtlGridSize.width = mvkCeilingDivide(mtlGridSize.width, 2);
mtlGridSize.height = mvkCeilingDivide(mtlGridSize.height, 2);
mtlGridSize.width = mvkCeilingDivide(mtlGridSize.width, mtlTgrpSize.width);
mtlGridSize.height = mvkCeilingDivide(mtlGridSize.height, mtlTgrpSize.height);
mtlGridSize.depth = mvkCeilingDivide(mtlGridSize.depth, mtlTgrpSize.depth);
// There may be extra threads, but that's OK; the shader does bounds checking to
// ensure it doesn't try to write out of bounds.

View File

@ -141,12 +141,6 @@ static inline std::string mvkGetMoltenVKVersionString(uint32_t mvkVersion) {
#pragma mark -
#pragma mark Alignment functions
/** Returns the result of an unsigned integer division, rounded up. */
static inline size_t mvkCeilingDivide(size_t numerator, size_t denominator) {
if (denominator == 1) { return numerator; } // Short circuit for this very common usecase.
return (numerator + denominator - 1) / denominator;
}
/** Returns whether the specified value is a power-of-two. */
static inline bool mvkIsPowerOfTwo(uintptr_t value) {
// Test POT: (x != 0) && ((x & (x - 1)) == 0)
@ -348,6 +342,13 @@ const T& mvkClamp(const T& val, const T& lower, const T& upper) {
return std::min(std::max(val, lower), upper);
}
/** Returns the result of a division, rounded up. */
template<typename T>
T mvkCeilingDivide(T numerator, T denominator) {
// Short circuit very common usecase of dividing by one.
return (denominator == 1) ? numerator : (numerator + denominator - 1) / denominator;
}
/**
* Returns a hash value calculated from the specified array of numeric elements,
* using the DJB2a algorithm: hash = (hash * 33) ^ value.