Skip to content

Commit d8c87cd

Browse files
cdl: Control timeline semaphore crash detection
1 parent 533c9cc commit d8c87cd

File tree

4 files changed

+78
-32
lines changed

4 files changed

+78
-32
lines changed

src/cdl.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,8 @@ static const std::unordered_map<std::string, DumpShaders> kDumpShadersValues{
8282
{"all", DumpShaders::kAll},
8383
};
8484

85+
const char* kTriggerTimelineSemaphore = "trigger_timeline_semaphore";
86+
const char* kTriggerWatchdogTimeout = "trigger_watchdog_timeout";
8587
const char* kWatchdogTimeout = "watchdog_timeout_ms";
8688
const char* kDumpAllCommandBuffers = "dump_all_command_buffers";
8789
const char* kTrackSemaphores = "track_semaphores";
@@ -129,6 +131,8 @@ Settings::Settings(VkuLayerSettingSet layer_settings, Logger& log) {
129131
GetEnumVal<DumpCommands>(log, layer_settings, settings::kDumpCommands, dump_commands,
130132
settings::kDumpCommandsValues);
131133
GetEnumVal<DumpShaders>(log, layer_settings, settings::kDumpShaders, dump_shaders, settings::kDumpShadersValues);
134+
GetEnvVal<bool>(layer_settings, settings::kTriggerTimelineSemaphore, trigger_timeline_semaphore);
135+
GetEnvVal<bool>(layer_settings, settings::kTriggerWatchdogTimeout, trigger_watchdog_timeout);
132136
GetEnvVal<uint64_t>(layer_settings, settings::kWatchdogTimeout, watchdog_timer_ms);
133137
GetEnvVal<bool>(layer_settings, settings::kTrackSemaphores, track_semaphores);
134138
GetEnvVal<bool>(layer_settings, settings::kTraceAllSemaphores, trace_all_semaphores);
@@ -575,7 +579,7 @@ const VkDeviceCreateInfo* Context::GetModifiedDeviceCreateInfo(VkPhysicalDevice
575579
}
576580
} else {
577581
Log().Warning(
578-
"No VK_EXT_device_address_binding_report extension, DeviceAddress information will not be available.");
582+
"No VK_KHR_timeline_semaphore extension, No tracking of queue progress semaphore possible preventing early device lost detection.");
579583
}
580584

581585
// save the raw ptr before std::move of the std::unique_ptr

src/cdl.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,8 @@ struct Settings {
108108
bool trace_all_semaphores{false};
109109
bool trace_all{false};
110110
bool sync_after_commands{false};
111+
bool trigger_timeline_semaphore{true};
112+
bool trigger_watchdog_timeout{true};
111113
uint64_t watchdog_timer_ms{0};
112114
};
113115

src/crash_diagnostic_layer.json.in

Lines changed: 66 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -50,13 +50,79 @@
5050
],
5151
"features": {
5252
"settings": [
53+
{
54+
"key": "triggers",
55+
"label": "Crash Dump Triggers",
56+
"description": "Additional crash detection methods beyond a VK_ERROR_DEVICE_LOST error",
57+
"type": "GROUP",
58+
"expanded": true,
59+
"settings": [
60+
{
61+
"key": "trigger_timeline_semaphore",
62+
"env": "CDL_TRIGGER_TIMELINE_SEMAPHORE",
63+
"label": "Timeline Semaphore",
64+
"description": "Using timeline semaphore to detect device lost before entering into a host wait that might never finishes.",
65+
"detailed": "This feature is requiring Vulkan timeline semaphore, either Vulkan 1.2 or VK_KHR_timeline_semaphore support. Due to VK_EXT_device_fault specification, using this feature will generate a VUID-vkGetDeviceFaultInfoEXT-device-07336 errors on a crash.",
66+
"type": "BOOL",
67+
"default": true
68+
},
69+
{
70+
"key": "trigger_watchdog_timeout",
71+
"env": "CDL_TRIGGER_WATCHDOG_TIMEOUT",
72+
"label": "Watchdog Timeout",
73+
"description": "This will trigger if the application fails to submit new commands within a set time (in milliseconds) and a log will be created as if the a lost device error was encountered.",
74+
"type": "BOOL",
75+
"default": true,
76+
"settings": [
77+
{
78+
"key": "watchdog_timeout_ms",
79+
"env": "CDL_WATCHDOG_TIMEOUT_MS",
80+
"label": "Watchdog Timeout (ms)",
81+
"description": "If set to a non-zero number, a watchdog thread will be created.",
82+
"type": "INT",
83+
"default": 30000,
84+
"range": {
85+
"min": 1
86+
},
87+
"dependence": {
88+
"mode": "ALL",
89+
"settings": [
90+
{
91+
"key": "trigger_watchdog_timeout",
92+
"value": true
93+
}
94+
]
95+
}
96+
}
97+
]
98+
}
99+
]
100+
},
53101
{
54102
"key": "state",
55103
"label": "State Tracking",
56104
"description": "Control of state tracking.",
57105
"type": "GROUP",
58106
"expanded": true,
59107
"settings": [
108+
{
109+
"key": "instrument_all_commands",
110+
"env": "CDL_INSTRUMENT_ALL_COMMANDS",
111+
"label": "Add markers on each Vulkan command",
112+
"description": "Include completion markers around every vulkan command",
113+
"detailed": "This may allow more accuratute fault locations at the expense of larger command buffers and reduced performance",
114+
"type": "BOOL",
115+
"default": false
116+
},
117+
{
118+
"key": "track_semaphores",
119+
"env": "CDL_TRACK_SEMAPHORES",
120+
"label": "Track semaphores",
121+
"description": "Include semaphore state reporting in runtime logging and dump files",
122+
"detailed": "VK_AMD_buffer_marker is required for this feature",
123+
"type": "BOOL",
124+
"default": true
125+
},
60126
{
61127
"key": "sync_after_commands",
62128
"env": "CDL_SYNC_AFTER_COMMANDS",
@@ -109,36 +175,6 @@
109175
}
110176
}
111177
]
112-
},
113-
{
114-
"key": "instrument_all_commands",
115-
"env": "CDL_INSTRUMENT_ALL_COMMANDS",
116-
"label": "Add markers on each Vulkan command",
117-
"description": "Include completion markers around every vulkan command",
118-
"detailed": "This may allow more accuratute fault locations at the expense of larger command buffers and reduced performance",
119-
"type": "BOOL",
120-
"default": false
121-
},
122-
{
123-
"key": "track_semaphores",
124-
"env": "CDL_TRACK_SEMAPHORES",
125-
"label": "Track semaphores",
126-
"description": "Include semaphore state reporting in runtime logging and dump files",
127-
"detailed": "VK_AMD_buffer_marker is required for this feature",
128-
"type": "BOOL",
129-
"default": true
130-
},
131-
{
132-
"key": "watchdog_timeout_ms",
133-
"env": "CDL_WATCHDOG_TIMEOUT_MS",
134-
"label": "Watchdog timeout (ms)",
135-
"description": "If set to a non-zero number, a watchdog thread will be created.",
136-
"detailed": "This will trigger if the application fails to submit new commands within a set time (in milliseconds) and a log will be created as if the a lost device error was encountered.",
137-
"type": "INT",
138-
"default": 30000,
139-
"range": {
140-
"min": 1
141-
}
142178
}
143179
]
144180
},

src/device.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -475,6 +475,10 @@ std::string Device::GetObjectName(uint64_t handle, HandleDebugNamePreference han
475475
std::string Device::GetObjectInfo(uint64_t handle) const { return object_info_db_.GetObjectInfo(handle); }
476476

477477
bool Device::UpdateIdleState() {
478+
if (extensions_present_.khr_timeline_semaphore) {
479+
return true;
480+
}
481+
478482
bool result = true;
479483
auto queues = GetAllQueues();
480484
for (auto& q : queues) {
@@ -586,7 +590,7 @@ void Device::DumpDeviceFaultInfo(YAML::Emitter& os) const {
586590
return;
587591
}
588592
auto fault_counts = vku::InitStruct<VkDeviceFaultCountsEXT>();
589-
VkResult result = Dispatch().GetDeviceFaultInfoEXT(vk_device_, &fault_counts, nullptr);
593+
VkResult result = Dispatch().GetDeviceFaultInfoEXT(vk_device_, &fault_counts, nullptr);
590594
if (result != VK_SUCCESS) {
591595
// TODO: log
592596
return;

0 commit comments

Comments
 (0)