Make mvkCeilingDivide() a template function.

2019-11-21 16:51:41 -05:00 · 2019-11-21 16:51:41 -05:00 · 16afd5e37f
commit 16afd5e37f
parent c5b91b423d
3 changed files with 17 additions and 16 deletions
--- a/MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm
@ -110,7 +110,7 @@ void MVKCmdDraw::encode(MVKCommandEncoder* cmdEncoder) {
    if (pipeline->isTessellationPipeline()) {
        inControlPointCount = pipeline->getInputControlPointCount();
        outControlPointCount = pipeline->getOutputControlPointCount();
-        patchCount = (uint32_t)mvkCeilingDivide(_vertexCount, inControlPointCount);
+        patchCount = mvkCeilingDivide(_vertexCount, inControlPointCount);
    }
    for (uint32_t s : stages) {
        auto stage = MVKGraphicsStage(s);
@ -308,7 +308,7 @@ void MVKCmdDrawIndexed::encode(MVKCommandEncoder* cmdEncoder) {
    if (pipeline->isTessellationPipeline()) {
        inControlPointCount = pipeline->getInputControlPointCount();
        outControlPointCount = pipeline->getOutputControlPointCount();
-        patchCount = (uint32_t)mvkCeilingDivide(_indexCount, inControlPointCount);
+        patchCount = mvkCeilingDivide(_indexCount, inControlPointCount);
    }
    for (uint32_t s : stages) {
        auto stage = MVKGraphicsStage(s);
@ -544,7 +544,7 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) {
        inControlPointCount = pipeline->getInputControlPointCount();
        outControlPointCount = pipeline->getOutputControlPointCount();
        vertexCount = kMVKDrawIndirectVertexCountUpperBound;
-        patchCount = (uint32_t)mvkCeilingDivide(vertexCount, inControlPointCount);
+        patchCount = mvkCeilingDivide(vertexCount, inControlPointCount);
        VkDeviceSize indirectSize = (sizeof(MTLDispatchThreadgroupsIndirectArguments) + sizeof(MTLDrawPatchIndirectArguments)) * _drawCount;
        if (cmdEncoder->_pDeviceMetalFeatures->mslVersion >= 20100) {
            indirectSize += sizeof(MTLStageInRegionIndirectArguments) * _drawCount;
@ -614,7 +614,7 @@ void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) {
                                            &_drawCount,
                                            sizeof(_drawCount),
                                            5);
-                [mtlTessCtlEncoder dispatchThreadgroups: MTLSizeMake(mvkCeilingDivide(_drawCount, mtlConvertState.threadExecutionWidth), 1, 1)
+                [mtlTessCtlEncoder dispatchThreadgroups: MTLSizeMake(mvkCeilingDivide<NSUInteger>(_drawCount, mtlConvertState.threadExecutionWidth), 1, 1)
                                  threadsPerThreadgroup: MTLSizeMake(mtlConvertState.threadExecutionWidth, 1, 1)];
            }

@ -783,7 +783,7 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder) {
        inControlPointCount = pipeline->getInputControlPointCount();
        outControlPointCount = pipeline->getOutputControlPointCount();
        vertexCount = kMVKDrawIndirectVertexCountUpperBound;
-        patchCount = (uint32_t)mvkCeilingDivide(vertexCount, inControlPointCount);
+        patchCount = mvkCeilingDivide(vertexCount, inControlPointCount);
        VkDeviceSize indirectSize = (sizeof(MTLDispatchThreadgroupsIndirectArguments) + sizeof(MTLDrawPatchIndirectArguments)) * _drawCount;
        if (cmdEncoder->_pDeviceMetalFeatures->mslVersion >= 20100) {
            indirectSize += sizeof(MTLStageInRegionIndirectArguments) * _drawCount;
@ -842,7 +842,7 @@ void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder) {
                                                &_drawCount,
                                                sizeof(_drawCount),
                                                5);
-                    [mtlTessCtlEncoder dispatchThreadgroups: MTLSizeMake(mvkCeilingDivide(_drawCount, mtlConvertState.threadExecutionWidth), 1, 1)
+                    [mtlTessCtlEncoder dispatchThreadgroups: MTLSizeMake(mvkCeilingDivide<NSUInteger>(_drawCount, mtlConvertState.threadExecutionWidth), 1, 1)
                                      threadsPerThreadgroup: MTLSizeMake(mtlConvertState.threadExecutionWidth, 1, 1)];
                }
                // We actually need to make a copy of the index buffer, regardless of whether
--- a/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm
@ -825,15 +825,15 @@ void MVKCmdBufferImageCopy::encode(MVKCommandEncoder* cmdEncoder) {
            // One thread is run per block. Each block decompresses to an m x n array of texels.
            // So the size of the grid is (ceil(width/m), ceil(height/n), depth).
            VkExtent2D blockExtent = mvkMTLPixelFormatBlockTexelSize(mtlPixFmt);
-            MTLSize mtlGridSize = MTLSizeMake(mvkCeilingDivide(mtlTxtSize.width, blockExtent.width),
-                                              mvkCeilingDivide(mtlTxtSize.height, blockExtent.height),
+            MTLSize mtlGridSize = MTLSizeMake(mvkCeilingDivide<NSUInteger>(mtlTxtSize.width, blockExtent.width),
+                                              mvkCeilingDivide<NSUInteger>(mtlTxtSize.height, blockExtent.height),
                                              mtlTxtSize.depth);
            // Use four times the thread execution width as the threadgroup size.
            MTLSize mtlTgrpSize = MTLSizeMake(2, 2, mtlComputeState.threadExecutionWidth);
            // Then the number of threadgroups is (ceil(x/2), ceil(y/2), ceil(z/t)),
            // where 't' is the thread execution width.
-            mtlGridSize.width = mvkCeilingDivide(mtlGridSize.width, 2);
-            mtlGridSize.height = mvkCeilingDivide(mtlGridSize.height, 2);
+            mtlGridSize.width = mvkCeilingDivide(mtlGridSize.width, mtlTgrpSize.width);
+            mtlGridSize.height = mvkCeilingDivide(mtlGridSize.height, mtlTgrpSize.height);
            mtlGridSize.depth = mvkCeilingDivide(mtlGridSize.depth, mtlTgrpSize.depth);
            // There may be extra threads, but that's OK; the shader does bounds checking to
            // ensure it doesn't try to write out of bounds.
--- a/MoltenVK/MoltenVK/Utility/MVKFoundation.h
+++ b/MoltenVK/MoltenVK/Utility/MVKFoundation.h
@ -141,12 +141,6 @@ static inline std::string mvkGetMoltenVKVersionString(uint32_t mvkVersion) {
 #pragma mark -
 #pragma mark Alignment functions

-/** Returns the result of an unsigned integer division, rounded up. */
-static inline size_t mvkCeilingDivide(size_t numerator, size_t denominator) {
-	if (denominator == 1) { return numerator; }		// Short circuit for this very common usecase.
-	return (numerator + denominator - 1) / denominator;
-}
-
 /** Returns whether the specified value is a power-of-two. */
 static inline bool mvkIsPowerOfTwo(uintptr_t value) {
 	// Test POT:  (x != 0) && ((x & (x - 1)) == 0)
@ -348,6 +342,13 @@ const T& mvkClamp(const T& val, const T& lower, const T& upper) {
    return std::min(std::max(val, lower), upper);
 }

+/** Returns the result of a division, rounded up. */
+template<typename T>
+T mvkCeilingDivide(T numerator, T denominator) {
+	// Short circuit very common usecase of dividing by one.
+	return (denominator == 1) ? numerator : (numerator + denominator - 1) / denominator;
+}
+
 /**
 * Returns a hash value calculated from the specified array of numeric elements,
 * using the DJB2a algorithm:  hash = (hash * 33) ^ value.