Make mvkCeilingDivide() a template function.
This commit is contained in:
parent
c5b91b423d
commit
16afd5e37f
@ -110,7 +110,7 @@ void MVKCmdDraw::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
if (pipeline->isTessellationPipeline()) {
|
||||
inControlPointCount = pipeline->getInputControlPointCount();
|
||||
outControlPointCount = pipeline->getOutputControlPointCount();
|
||||
patchCount = (uint32_t)mvkCeilingDivide(_vertexCount, inControlPointCount);
|
||||
patchCount = mvkCeilingDivide(_vertexCount, inControlPointCount);
|
||||
}
|
||||
for (uint32_t s : stages) {
|
||||
auto stage = MVKGraphicsStage(s);
|
||||
@ -308,7 +308,7 @@ void MVKCmdDrawIndexed::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
if (pipeline->isTessellationPipeline()) {
|
||||
inControlPointCount = pipeline->getInputControlPointCount();
|
||||
outControlPointCount = pipeline->getOutputControlPointCount();
|
||||
patchCount = (uint32_t)mvkCeilingDivide(_indexCount, inControlPointCount);
|
||||
patchCount = mvkCeilingDivide(_indexCount, inControlPointCount);
|
||||
}
|
||||
for (uint32_t s : stages) {
|
||||
auto stage = MVKGraphicsStage(s);
|
||||
@ -544,7 +544,7 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
inControlPointCount = pipeline->getInputControlPointCount();
|
||||
outControlPointCount = pipeline->getOutputControlPointCount();
|
||||
vertexCount = kMVKDrawIndirectVertexCountUpperBound;
|
||||
patchCount = (uint32_t)mvkCeilingDivide(vertexCount, inControlPointCount);
|
||||
patchCount = mvkCeilingDivide(vertexCount, inControlPointCount);
|
||||
VkDeviceSize indirectSize = (sizeof(MTLDispatchThreadgroupsIndirectArguments) + sizeof(MTLDrawPatchIndirectArguments)) * _drawCount;
|
||||
if (cmdEncoder->_pDeviceMetalFeatures->mslVersion >= 20100) {
|
||||
indirectSize += sizeof(MTLStageInRegionIndirectArguments) * _drawCount;
|
||||
@ -614,7 +614,7 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
&_drawCount,
|
||||
sizeof(_drawCount),
|
||||
5);
|
||||
[mtlTessCtlEncoder dispatchThreadgroups: MTLSizeMake(mvkCeilingDivide(_drawCount, mtlConvertState.threadExecutionWidth), 1, 1)
|
||||
[mtlTessCtlEncoder dispatchThreadgroups: MTLSizeMake(mvkCeilingDivide<NSUInteger>(_drawCount, mtlConvertState.threadExecutionWidth), 1, 1)
|
||||
threadsPerThreadgroup: MTLSizeMake(mtlConvertState.threadExecutionWidth, 1, 1)];
|
||||
}
|
||||
|
||||
@ -783,7 +783,7 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
inControlPointCount = pipeline->getInputControlPointCount();
|
||||
outControlPointCount = pipeline->getOutputControlPointCount();
|
||||
vertexCount = kMVKDrawIndirectVertexCountUpperBound;
|
||||
patchCount = (uint32_t)mvkCeilingDivide(vertexCount, inControlPointCount);
|
||||
patchCount = mvkCeilingDivide(vertexCount, inControlPointCount);
|
||||
VkDeviceSize indirectSize = (sizeof(MTLDispatchThreadgroupsIndirectArguments) + sizeof(MTLDrawPatchIndirectArguments)) * _drawCount;
|
||||
if (cmdEncoder->_pDeviceMetalFeatures->mslVersion >= 20100) {
|
||||
indirectSize += sizeof(MTLStageInRegionIndirectArguments) * _drawCount;
|
||||
@ -842,7 +842,7 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
&_drawCount,
|
||||
sizeof(_drawCount),
|
||||
5);
|
||||
[mtlTessCtlEncoder dispatchThreadgroups: MTLSizeMake(mvkCeilingDivide(_drawCount, mtlConvertState.threadExecutionWidth), 1, 1)
|
||||
[mtlTessCtlEncoder dispatchThreadgroups: MTLSizeMake(mvkCeilingDivide<NSUInteger>(_drawCount, mtlConvertState.threadExecutionWidth), 1, 1)
|
||||
threadsPerThreadgroup: MTLSizeMake(mtlConvertState.threadExecutionWidth, 1, 1)];
|
||||
}
|
||||
// We actually need to make a copy of the index buffer, regardless of whether
|
||||
|
@ -825,15 +825,15 @@ void MVKCmdBufferImageCopy::encode(MVKCommandEncoder* cmdEncoder) {
|
||||
// One thread is run per block. Each block decompresses to an m x n array of texels.
|
||||
// So the size of the grid is (ceil(width/m), ceil(height/n), depth).
|
||||
VkExtent2D blockExtent = mvkMTLPixelFormatBlockTexelSize(mtlPixFmt);
|
||||
MTLSize mtlGridSize = MTLSizeMake(mvkCeilingDivide(mtlTxtSize.width, blockExtent.width),
|
||||
mvkCeilingDivide(mtlTxtSize.height, blockExtent.height),
|
||||
MTLSize mtlGridSize = MTLSizeMake(mvkCeilingDivide<NSUInteger>(mtlTxtSize.width, blockExtent.width),
|
||||
mvkCeilingDivide<NSUInteger>(mtlTxtSize.height, blockExtent.height),
|
||||
mtlTxtSize.depth);
|
||||
// Use four times the thread execution width as the threadgroup size.
|
||||
MTLSize mtlTgrpSize = MTLSizeMake(2, 2, mtlComputeState.threadExecutionWidth);
|
||||
// Then the number of threadgroups is (ceil(x/2), ceil(y/2), ceil(z/t)),
|
||||
// where 't' is the thread execution width.
|
||||
mtlGridSize.width = mvkCeilingDivide(mtlGridSize.width, 2);
|
||||
mtlGridSize.height = mvkCeilingDivide(mtlGridSize.height, 2);
|
||||
mtlGridSize.width = mvkCeilingDivide(mtlGridSize.width, mtlTgrpSize.width);
|
||||
mtlGridSize.height = mvkCeilingDivide(mtlGridSize.height, mtlTgrpSize.height);
|
||||
mtlGridSize.depth = mvkCeilingDivide(mtlGridSize.depth, mtlTgrpSize.depth);
|
||||
// There may be extra threads, but that's OK; the shader does bounds checking to
|
||||
// ensure it doesn't try to write out of bounds.
|
||||
|
@ -141,12 +141,6 @@ static inline std::string mvkGetMoltenVKVersionString(uint32_t mvkVersion) {
|
||||
#pragma mark -
|
||||
#pragma mark Alignment functions
|
||||
|
||||
/** Returns the result of an unsigned integer division, rounded up. */
|
||||
static inline size_t mvkCeilingDivide(size_t numerator, size_t denominator) {
|
||||
if (denominator == 1) { return numerator; } // Short circuit for this very common usecase.
|
||||
return (numerator + denominator - 1) / denominator;
|
||||
}
|
||||
|
||||
/** Returns whether the specified value is a power-of-two. */
|
||||
static inline bool mvkIsPowerOfTwo(uintptr_t value) {
|
||||
// Test POT: (x != 0) && ((x & (x - 1)) == 0)
|
||||
@ -348,6 +342,13 @@ const T& mvkClamp(const T& val, const T& lower, const T& upper) {
|
||||
return std::min(std::max(val, lower), upper);
|
||||
}
|
||||
|
||||
/** Returns the result of a division, rounded up. */
|
||||
template<typename T>
|
||||
T mvkCeilingDivide(T numerator, T denominator) {
|
||||
// Short circuit very common usecase of dividing by one.
|
||||
return (denominator == 1) ? numerator : (numerator + denominator - 1) / denominator;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a hash value calculated from the specified array of numeric elements,
|
||||
* using the DJB2a algorithm: hash = (hash * 33) ^ value.
|
||||
|
Loading…
x
Reference in New Issue
Block a user