Merge pull request #1169 from cdavis5e/device-loss

Handle device loss.
This commit is contained in:
Bill Hollings 2020-12-02 16:07:02 -05:00 committed by GitHub
commit 796ca4514e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 174 additions and 21 deletions

View File

@ -50,7 +50,9 @@ class MVKSwapchain;
class MVKDeviceMemory;
class MVKFence;
class MVKSemaphore;
class MVKTimelineSemaphore;
class MVKEvent;
class MVKSemaphoreImpl;
class MVKQueryPool;
class MVKShaderModule;
class MVKPipelineCache;
@ -435,6 +437,9 @@ public:
/** Block the current thread until all queues in this device are idle. */
VkResult waitIdle();
/** Mark this device as lost. Releases all waits for this device. */
VkResult markLost();
/** Returns whether or not the given descriptor set layout is supported. */
void getDescriptorSetLayoutSupport(const VkDescriptorSetLayoutCreateInfo* pCreateInfo,
@ -740,9 +745,19 @@ public:
}
protected:
friend class MVKSemaphoreEmulated;
friend class MVKTimelineSemaphoreMTLEvent;
friend class MVKTimelineSemaphoreEmulated;
friend class MVKFence;
friend class MVKEventEmulated;
void propagateDebugName() override {}
MVKResource* addResource(MVKResource* rez);
MVKResource* removeResource(MVKResource* rez);
void addSemaphore(MVKSemaphoreImpl* sem4);
void removeSemaphore(MVKSemaphoreImpl* sem4);
void addTimelineSemaphore(MVKTimelineSemaphore* sem4, uint64_t value);
void removeTimelineSemaphore(MVKTimelineSemaphore* sem4, uint64_t value);
void initPerformanceTracking();
void initPhysicalDevice(MVKPhysicalDevice* physicalDevice, const VkDeviceCreateInfo* pCreateInfo);
void initQueues(const VkDeviceCreateInfo* pCreateInfo);
@ -765,7 +780,10 @@ protected:
MVKSmallVector<MVKResource*, 256> _resources;
MVKSmallVector<MVKPrivateDataSlot*> _privateDataSlots;
MVKSmallVector<bool> _privateDataSlotsAvailability;
MVKSmallVector<MVKSemaphoreImpl*> _awaitingSemaphores;
MVKSmallVector<std::pair<MVKTimelineSemaphore*, uint64_t>> _awaitingTimelineSem4s;
std::mutex _rezLock;
std::mutex _sem4Lock;
std::mutex _perfLock;
id<MTLBuffer> _globalVisibilityResultMTLBuffer;
id<MTLSamplerState> _defaultMTLSamplerState;

View File

@ -28,6 +28,7 @@
#include "MVKPipeline.h"
#include "MVKFramebuffer.h"
#include "MVKRenderPass.h"
#include "MVKSync.h"
#include "MVKCommandPool.h"
#include "MVKFoundation.h"
#include "MVKCodec.h"
@ -2778,14 +2779,31 @@ MVKQueue* MVKDevice::getAnyQueue() {
}
VkResult MVKDevice::waitIdle() {
VkResult rslt = VK_SUCCESS;
for (auto& queues : _queuesByQueueFamilyIndex) {
for (MVKQueue* q : queues) {
q->waitIdle();
if ((rslt = q->waitIdle()) != VK_SUCCESS) { return rslt; }
}
}
return VK_SUCCESS;
}
VkResult MVKDevice::markLost() {
lock_guard<mutex> lock(_sem4Lock);
setConfigurationResult(VK_ERROR_DEVICE_LOST);
for (auto* sem4 : _awaitingSemaphores) {
sem4->release();
}
for (auto& sem4AndValue : _awaitingTimelineSem4s) {
VkSemaphoreSignalInfo signalInfo;
signalInfo.value = sem4AndValue.second;
sem4AndValue.first->signal(&signalInfo);
}
_awaitingSemaphores.clear();
_awaitingTimelineSem4s.clear();
return VK_ERROR_DEVICE_LOST;
}
void MVKDevice::getDescriptorSetLayoutSupport(const VkDescriptorSetLayoutCreateInfo* pCreateInfo,
VkDescriptorSetLayoutSupport* pSupport) {
// According to the Vulkan spec:
@ -3396,6 +3414,30 @@ MVKResource* MVKDevice::removeResource(MVKResource* rez) {
return rez;
}
// Adds the specified host semaphore to be woken upon device loss.
void MVKDevice::addSemaphore(MVKSemaphoreImpl* sem4) {
lock_guard<mutex> lock(_sem4Lock);
_awaitingSemaphores.push_back(sem4);
}
// Removes the specified host semaphore.
void MVKDevice::removeSemaphore(MVKSemaphoreImpl* sem4) {
lock_guard<mutex> lock(_sem4Lock);
mvkRemoveFirstOccurance(_awaitingSemaphores, sem4);
}
// Adds the specified timeline semaphore to be woken at the specified value upon device loss.
void MVKDevice::addTimelineSemaphore(MVKTimelineSemaphore* sem4, uint64_t value) {
lock_guard<mutex> lock(_sem4Lock);
_awaitingTimelineSem4s.emplace_back(sem4, value);
}
// Removes the specified timeline semaphore.
void MVKDevice::removeTimelineSemaphore(MVKTimelineSemaphore* sem4, uint64_t value) {
lock_guard<mutex> lock(_sem4Lock);
mvkRemoveFirstOccurance(_awaitingTimelineSem4s, make_pair(sem4, value));
}
void MVKDevice::applyMemoryBarrier(VkPipelineStageFlags srcStageMask,
VkPipelineStageFlags dstStageMask,
MVKPipelineBarrier& barrier,
@ -3603,6 +3645,11 @@ MVKDevice::MVKDevice(MVKPhysicalDevice* physicalDevice, const VkDeviceCreateInfo
_enabledPortabilityFeatures(),
_enabledExtensions(this)
{
// If the physical device is lost, bail.
if (physicalDevice->getConfigurationResult() != VK_SUCCESS) {
setConfigurationResult(physicalDevice->getConfigurationResult());
return;
}
initPerformanceTracking();
initPhysicalDevice(physicalDevice, pCreateInfo);

View File

@ -71,6 +71,8 @@ VkResult MVKQueryPool::getResults(uint32_t firstQuery,
void* pData,
VkDeviceSize stride,
VkQueryResultFlags flags) {
if (_device->getConfigurationResult() != VK_SUCCESS) { return _device->getConfigurationResult(); }
unique_lock<mutex> lock(_availabilityLock);
uint32_t endQuery = firstQuery + queryCount;
@ -99,6 +101,8 @@ bool MVKQueryPool::areQueriesDeviceAvailable(uint32_t firstQuery, uint32_t endQu
// Returns whether all the queries between the start (inclusive) and end (exclusive) queries are available.
bool MVKQueryPool::areQueriesHostAvailable(uint32_t firstQuery, uint32_t endQuery) {
// If we lost the device, stop waiting immediately.
if (_device->getConfigurationResult() != VK_SUCCESS) { return true; }
for (uint32_t query = firstQuery; query < endQuery; query++) {
if ( _availability[query] < Available ) { return false; }
}
@ -107,6 +111,8 @@ bool MVKQueryPool::areQueriesHostAvailable(uint32_t firstQuery, uint32_t endQuer
VkResult MVKQueryPool::getResult(uint32_t query, void* pQryData, VkQueryResultFlags flags) {
if (_device->getConfigurationResult() != VK_SUCCESS) { return _device->getConfigurationResult(); }
bool isAvailable = _availability[query] == Available;
bool shouldOutput = (isAvailable || mvkAreAllFlagsEnabled(flags, VK_QUERY_RESULT_PARTIAL_BIT));
bool shouldOutput64Bit = mvkAreAllFlagsEnabled(flags, VK_QUERY_RESULT_64_BIT);

View File

@ -76,6 +76,8 @@ static inline void execute(MVKQueueSubmission* qSubmit) { @autoreleasepool { qSu
// Relying on the dispatch queue to find time to drain the autoreleasepool can
// result in significant memory creep under heavy workloads.
VkResult MVKQueue::submit(MVKQueueSubmission* qSubmit) {
if (_device->getConfigurationResult() != VK_SUCCESS) { return _device->getConfigurationResult(); }
if ( !qSubmit ) { return VK_SUCCESS; } // Ignore nils
VkResult rslt = qSubmit->getConfigurationResult(); // Extract result before submission to avoid race condition with early destruction
@ -130,6 +132,8 @@ VkResult MVKQueue::submit(const VkPresentInfoKHR* pPresentInfo) {
// Create an empty submit struct and fence, submit to queue and wait on fence.
VkResult MVKQueue::waitIdle() {
if (_device->getConfigurationResult() != VK_SUCCESS) { return _device->getConfigurationResult(); }
VkFenceCreateInfo vkFenceInfo = {
.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
.pNext = nullptr,
@ -292,8 +296,27 @@ void MVKQueueCommandBufferSubmission::commitActiveMTLCommandBuffer(bool signalCo
}];
}
// Use temp var because callback may destroy this instance before this function ends.
// Use temp vars because callback may destroy this instance before this function ends.
MVKDevice* device = _queue->getDevice();
id<MTLCommandBuffer> mtlCmdBuff = _activeMTLCommandBuffer;
// If command buffer execution fails, log it, and mark the device lost.
[mtlCmdBuff addCompletedHandler: ^(id<MTLCommandBuffer> mtlCmdBuff) {
if (mtlCmdBuff.status == MTLCommandBufferStatusError) {
device->reportError(device->markLost(), "Command buffer %p \"%s\" execution failed (code %li): %s", mtlCmdBuff, mtlCmdBuff.label ? mtlCmdBuff.label.UTF8String : "", mtlCmdBuff.error.code, mtlCmdBuff.error.localizedDescription.UTF8String);
// Some errors indicate we lost the physical device as well.
switch (mtlCmdBuff.error.code) {
case MTLCommandBufferErrorBlacklisted:
// XXX This may also be used for command buffers executed in the background without the right entitlement.
case MTLCommandBufferErrorNotPermitted:
#if MVK_MACOS
case MTLCommandBufferErrorDeviceRemoved:
#endif
device->getPhysicalDevice()->setConfigurationResult(VK_ERROR_DEVICE_LOST);
break;
}
}
}];
_activeMTLCommandBuffer = nil;
[mtlCmdBuff commit];
[mtlCmdBuff release]; // retained

View File

@ -82,6 +82,7 @@ public:
/** Returns the status of the surface. Surface loss takes precedence over out-of-date errors. */
inline VkResult getSurfaceStatus() {
if (_device->getConfigurationResult() != VK_SUCCESS) { return _device->getConfigurationResult(); }
if (getIsSurfaceLost()) { return VK_ERROR_SURFACE_LOST_KHR; }
if (getHasSurfaceSizeChanged()) { return VK_ERROR_OUT_OF_DATE_KHR; }
return VK_SUCCESS;

View File

@ -82,6 +82,7 @@ VkResult MVKSwapchain::acquireNextImageKHR(uint64_t timeout,
uint32_t deviceMask,
uint32_t* pImageIndex) {
if ( _device->getConfigurationResult() != VK_SUCCESS ) { return _device->getConfigurationResult(); }
if ( getIsSurfaceLost() ) { return VK_ERROR_SURFACE_LOST_KHR; }
// Find the image that has the shortest wait by finding the smallest availability measure.
@ -353,6 +354,7 @@ void MVKSwapchain::initCAMetalLayer(const VkSwapchainCreateInfoKHR* pCreateInfo,
// The CAMetalLayer should already be initialized when this is called.
void MVKSwapchain::initSurfaceImages(const VkSwapchainCreateInfoKHR* pCreateInfo, uint32_t imgCnt) {
if ( _device->getConfigurationResult() != VK_SUCCESS ) { return; }
if ( getIsSurfaceLost() ) { return; }
VkImageFormatListCreateInfo fmtListInfo;
@ -401,6 +403,8 @@ void MVKSwapchain::initSurfaceImages(const VkSwapchainCreateInfoKHR* pCreateInfo
}
VkResult MVKSwapchain::getRefreshCycleDuration(VkRefreshCycleDurationGOOGLE *pRefreshCycleDuration) {
if (_device->getConfigurationResult() != VK_SUCCESS) { return _device->getConfigurationResult(); }
NSInteger framesPerSecond = 60;
#if MVK_IOS_OR_TVOS
UIScreen* screen = [UIScreen mainScreen];
@ -417,6 +421,8 @@ VkResult MVKSwapchain::getRefreshCycleDuration(VkRefreshCycleDurationGOOGLE *pRe
}
VkResult MVKSwapchain::getPastPresentationTiming(uint32_t *pCount, VkPastPresentationTimingGOOGLE *pPresentationTimings) {
if (_device->getConfigurationResult() != VK_SUCCESS) { return _device->getConfigurationResult(); }
std::lock_guard<std::mutex> lock(_presentHistoryLock);
if (pCount && pPresentationTimings == nullptr) {
*pCount = _presentHistoryCount;

View File

@ -146,7 +146,11 @@ MVKSemaphoreMTLEvent::~MVKSemaphoreMTLEvent() {
#pragma mark MVKSemaphoreEmulated
void MVKSemaphoreEmulated::encodeWait(id<MTLCommandBuffer> mtlCmdBuff, uint64_t) {
if ( !mtlCmdBuff ) { _blocker.wait(UINT64_MAX, true); }
if ( !mtlCmdBuff ) {
_device->addSemaphore(&_blocker);
_blocker.wait(UINT64_MAX, true);
_device->removeSemaphore(&_blocker);
}
}
void MVKSemaphoreEmulated::encodeSignal(id<MTLCommandBuffer> mtlCmdBuff, uint64_t) {
@ -190,6 +194,7 @@ bool MVKTimelineSemaphoreMTLEvent::registerWait(MVKFenceSitter* sitter, const Vk
auto addRslt = _sitters.insert(sitter);
if (addRslt.second) {
retain();
_device->addSemaphore(&sitter->_blocker);
[_mtlEvent notifyListener: sitter->getMTLSharedEventListener()
atValue: pWaitInfo->pValues[index]
block: ^(id<MTLSharedEvent>, uint64_t) {
@ -203,6 +208,7 @@ bool MVKTimelineSemaphoreMTLEvent::registerWait(MVKFenceSitter* sitter, const Vk
void MVKTimelineSemaphoreMTLEvent::unregisterWait(MVKFenceSitter* sitter) {
lock_guard<mutex> lock(_lock);
_device->removeSemaphore(&sitter->_blocker);
_sitters.erase(sitter);
}
@ -219,11 +225,15 @@ MVKTimelineSemaphoreMTLEvent::~MVKTimelineSemaphoreMTLEvent() {
#pragma mark -
#pragma mark MVKSemaphoreEmulated
#pragma mark MVKTimelineSemaphoreEmulated
void MVKTimelineSemaphoreEmulated::encodeWait(id<MTLCommandBuffer> mtlCmdBuff, uint64_t value) {
unique_lock<mutex> lock(_lock);
if ( !mtlCmdBuff ) { _blocker.wait(lock, [=]() { return _value >= value; }); }
if ( !mtlCmdBuff ) {
_device->addTimelineSemaphore(this, value);
_blocker.wait(lock, [=]() { return _value >= value; });
_device->removeTimelineSemaphore(this, value);
}
}
void MVKTimelineSemaphoreEmulated::encodeSignal(id<MTLCommandBuffer> mtlCmdBuff, uint64_t value) {
@ -255,13 +265,17 @@ bool MVKTimelineSemaphoreEmulated::registerWait(MVKFenceSitter* sitter, const Vk
uint64_t value = pWaitInfo->pValues[index];
if (!_sitters.count(value)) { _sitters.emplace(make_pair(value, unordered_set<MVKFenceSitter*>())); }
auto addRslt = _sitters[value].insert(sitter);
if (addRslt.second) { sitter->await(); }
if (addRslt.second) {
_device->addSemaphore(&sitter->_blocker);
sitter->await();
}
return false;
}
void MVKTimelineSemaphoreEmulated::unregisterWait(MVKFenceSitter* sitter) {
MVKSmallVector<uint64_t> emptySets;
for (auto& sittersForValue : _sitters) {
_device->removeSemaphore(&sitter->_blocker);
sittersForValue.second.erase(sitter);
// Can't destroy while iterating...
if (sittersForValue.second.empty()) {
@ -288,12 +302,16 @@ void MVKFence::addSitter(MVKFenceSitter* fenceSitter) {
// Ensure each fence only added once to each fence sitter
auto addRslt = _fenceSitters.insert(fenceSitter); // pair with second element true if was added
if (addRslt.second) { fenceSitter->await(); }
if (addRslt.second) {
_device->addSemaphore(&fenceSitter->_blocker);
fenceSitter->await();
}
}
void MVKFence::removeSitter(MVKFenceSitter* fenceSitter) {
lock_guard<mutex> lock(_lock);
_device->removeSemaphore(&fenceSitter->_blocker);
_fenceSitters.erase(fenceSitter);
}
@ -397,7 +415,11 @@ void MVKEventEmulated::encodeSignal(id<MTLCommandBuffer> mtlCmdBuff, bool status
}
void MVKEventEmulated::encodeWait(id<MTLCommandBuffer> mtlCmdBuff) {
if ( !_inlineSignalStatus ) { _blocker.wait(); }
if ( !_inlineSignalStatus ) {
_device->addSemaphore(&_blocker);
_blocker.wait();
_device->removeSemaphore(&_blocker);
}
}
MVKEventEmulated::MVKEventEmulated(MVKDevice* device, const VkEventCreateInfo* pCreateInfo) :
@ -421,6 +443,10 @@ VkResult mvkWaitForFences(MVKDevice* device,
VkBool32 waitAll,
uint64_t timeout) {
if (device->getConfigurationResult() != VK_SUCCESS) {
return device->getConfigurationResult();
}
VkResult rslt = VK_SUCCESS;
MVKFenceSitter fenceSitter(waitAll);
@ -428,7 +454,12 @@ VkResult mvkWaitForFences(MVKDevice* device,
((MVKFence*)pFences[i])->addSitter(&fenceSitter);
}
if ( !fenceSitter.wait(timeout) ) { rslt = VK_TIMEOUT; }
bool finished = fenceSitter.wait(timeout);
if (device->getConfigurationResult() != VK_SUCCESS) {
rslt = device->getConfigurationResult();
} else if ( !finished ) {
rslt = VK_TIMEOUT;
}
for (uint32_t i = 0; i < fenceCount; i++) {
((MVKFence*)pFences[i])->removeSitter(&fenceSitter);
@ -442,6 +473,10 @@ VkResult mvkWaitSemaphores(MVKDevice* device,
const VkSemaphoreWaitInfo* pWaitInfo,
uint64_t timeout) {
if (device->getConfigurationResult() != VK_SUCCESS) {
return device->getConfigurationResult();
}
VkResult rslt = VK_SUCCESS;
bool waitAny = mvkIsAnyFlagEnabled(pWaitInfo->flags, VK_SEMAPHORE_WAIT_ANY_BIT);
bool alreadySignaled = false;
@ -455,7 +490,12 @@ VkResult mvkWaitSemaphores(MVKDevice* device,
}
}
if ( !alreadySignaled && !fenceSitter.wait(timeout) ) { rslt = VK_TIMEOUT; }
bool finished = alreadySignaled || fenceSitter.wait(timeout);
if (device->getConfigurationResult() != VK_SUCCESS) {
rslt = device->getConfigurationResult();
} else if ( !finished ) {
rslt = VK_TIMEOUT;
}
for (uint32_t i = 0; i < pWaitInfo->semaphoreCount; i++) {
((MVKTimelineSemaphore*)pWaitInfo->pSemaphores[i])->unregisterWait(&fenceSitter);

View File

@ -653,8 +653,11 @@ MVK_PUBLIC_SYMBOL VkResult vkGetFenceStatus(
VkFence fence) {
MVKTraceVulkanCallStart();
MVKFence* mvkFence = (MVKFence*)fence;
VkResult rslt = mvkFence->getIsSignaled() ? VK_SUCCESS : VK_NOT_READY;
VkResult rslt = MVKDevice::getMVKDevice(device)->getConfigurationResult();
if (rslt == VK_SUCCESS) {
MVKFence* mvkFence = (MVKFence*)fence;
rslt = mvkFence->getIsSignaled() ? VK_SUCCESS : VK_NOT_READY;
}
MVKTraceVulkanCallEnd();
return rslt;
}
@ -732,8 +735,11 @@ MVK_PUBLIC_SYMBOL VkResult vkGetEventStatus(
VkEvent event) {
MVKTraceVulkanCallStart();
MVKEvent* mvkEvent = (MVKEvent*)event;
VkResult rslt = mvkEvent->isSet() ? VK_EVENT_SET : VK_EVENT_RESET;
VkResult rslt = MVKDevice::getMVKDevice(device)->getConfigurationResult();
if (rslt == VK_SUCCESS) {
MVKEvent* mvkEvent = (MVKEvent*)event;
rslt = mvkEvent->isSet() ? VK_EVENT_SET : VK_EVENT_RESET;
}
MVKTraceVulkanCallEnd();
return rslt;
}
@ -2447,10 +2453,13 @@ MVK_PUBLIC_SYMBOL VkResult vkCreateSwapchainKHR(
MVKTraceVulkanCallStart();
MVKDevice* mvkDev = MVKDevice::getMVKDevice(device);
MVKSwapchain* mvkSwpChn = mvkDev->createSwapchain(pCreateInfo, pAllocator);
*pSwapchain = (VkSwapchainKHR)(mvkSwpChn);
VkResult rslt = mvkSwpChn->getConfigurationResult();
if (rslt < 0) { *pSwapchain = VK_NULL_HANDLE; mvkDev->destroySwapchain(mvkSwpChn, pAllocator); }
VkResult rslt = mvkDev->getConfigurationResult();
if (rslt == VK_SUCCESS) {
MVKSwapchain* mvkSwpChn = mvkDev->createSwapchain(pCreateInfo, pAllocator);
*pSwapchain = (VkSwapchainKHR)(mvkSwpChn);
rslt = mvkSwpChn->getConfigurationResult();
if (rslt < 0) { *pSwapchain = VK_NULL_HANDLE; mvkDev->destroySwapchain(mvkSwpChn, pAllocator); }
}
MVKTraceVulkanCallEnd();
return rslt;
}
@ -2670,10 +2679,13 @@ MVK_PUBLIC_SYMBOL VkResult vkGetSemaphoreCounterValueKHR(
uint64_t* pValue) {
MVKTraceVulkanCallStart();
auto* mvkSem4 = (MVKTimelineSemaphore*)semaphore;
*pValue = mvkSem4->getCounterValue();
VkResult rslt = MVKDevice::getMVKDevice(device)->getConfigurationResult();
if (rslt == VK_SUCCESS) {
auto* mvkSem4 = (MVKTimelineSemaphore*)semaphore;
*pValue = mvkSem4->getCounterValue();
}
MVKTraceVulkanCallEnd();
return VK_SUCCESS;
return rslt;
}
MVK_PUBLIC_SYMBOL VkResult vkSignalSemaphoreKHR(