Skip to content

Commit 38c4b52

Browse files
Debugger: Clear exceptions before resuming interrupted threads (#80)
Signed-off-by: Jitendra Sharma <[email protected]>
1 parent c5ebca2 commit 38c4b52

File tree

6 files changed

+214
-25
lines changed

6 files changed

+214
-25
lines changed

conformance_tests/tools/debug/src/child_debugger.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -166,11 +166,11 @@ int main(int argc, char **argv) {
166166
LOG_INFO << "[Child Debugger] Sending interrupt";
167167
lzt::debug_interrupt(debugSession, device_thread);
168168

169-
lzt::debug_read_event(debugSession, debug_event, eventsTimeoutMS, false);
170-
LOG_INFO << "[Child Debugger] received event: "
171-
<< lzt::debuggerEventTypeString[debug_event.type];
169+
std::vector<ze_device_thread_t> stopped_threads;
170+
if (!find_stopped_threads(debugSession, device, device_thread, true,
171+
stopped_threads)) {
172172

173-
if (debug_event.type != ZET_DEBUG_EVENT_TYPE_THREAD_STOPPED) {
173+
LOG_INFO << "[Child Debugger] Did not find stopped threads";
174174
exit(1);
175175
}
176176

conformance_tests/tools/debug/src/test_debug.cpp

Lines changed: 42 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -990,11 +990,14 @@ TEST_F(
990990
std::mutex module_load_mutex;
991991
std::condition_variable module_load_cv;
992992
bool module_loaded = false;
993+
std::mutex interrupt_sent_mutex;
994+
std::condition_variable interrupt_sent_cv;
995+
bool interrupt_sent = false;
993996
void read_and_verify_events_debugger_thread(
997+
const zet_device_handle_t &device,
994998
const zet_debug_session_handle_t &debug_session, uint64_t *gpu_buffer_va) {
995999

9961000
LOG_INFO << "[Debugger] Event Read Thread starting...";
997-
zet_debug_event_t debug_event;
9981001

9991002
std::vector<zet_debug_event_type_t> expectedEvents = {
10001003
ZET_DEBUG_EVENT_TYPE_PROCESS_ENTRY, ZET_DEBUG_EVENT_TYPE_MODULE_LOAD};
@@ -1010,10 +1013,22 @@ void read_and_verify_events_debugger_thread(
10101013
module_load_cv.notify_one();
10111014
}
10121015

1013-
lzt::debug_read_event(debug_session, debug_event, eventsTimeoutMS, false);
1014-
LOG_INFO << "[Debugger] received event: "
1015-
<< lzt::debuggerEventTypeString[debug_event.type];
1016-
ASSERT_EQ(debug_event.type, ZET_DEBUG_EVENT_TYPE_THREAD_STOPPED);
1016+
LOG_INFO << "[Debugger] Event read thread waiting for interrupt to be sent";
1017+
std::unique_lock<std::mutex> lk(interrupt_sent_mutex);
1018+
interrupt_sent_cv.wait(lk, [] { return interrupt_sent; });
1019+
lk.unlock();
1020+
1021+
ze_device_thread_t device_thread;
1022+
device_thread.slice = UINT32_MAX;
1023+
device_thread.subslice = UINT32_MAX;
1024+
device_thread.eu = UINT32_MAX;
1025+
device_thread.thread = UINT32_MAX;
1026+
std::vector<ze_device_thread_t> stopped_threads;
1027+
1028+
if (!find_stopped_threads(debug_session, device, device_thread, true,
1029+
stopped_threads)) {
1030+
LOG_INFO << "[Debugger] Did not find stopped threads";
1031+
}
10171032

10181033
// write to kernel buffer to signal to application to end
10191034
zet_debug_memory_space_desc_t memory_space_desc = {};
@@ -1024,12 +1039,17 @@ void read_and_verify_events_debugger_thread(
10241039

10251040
uint8_t *buffer = new uint8_t[1];
10261041
buffer[0] = 0;
1027-
auto thread = debug_event.info.thread.thread;
1028-
LOG_INFO << "[Debugger] Writing to address: " << std::hex << *gpu_buffer_va;
1029-
lzt::debug_write_memory(debug_session, thread, memory_space_desc, 1, buffer);
1030-
delete[] buffer;
1031-
print_thread("Resuming device thread ", thread, DEBUG);
1032-
lzt::debug_resume(debug_session, thread);
1042+
1043+
for (auto &stopped_thread : stopped_threads) {
1044+
uint8_t *buffer = new uint8_t[1];
1045+
buffer[0] = 0;
1046+
LOG_INFO << "[Debugger] Writing to address: " << std::hex << *gpu_buffer_va;
1047+
lzt::debug_write_memory(debug_session, stopped_thread, memory_space_desc, 1,
1048+
buffer);
1049+
print_thread("Resuming device thread ", stopped_thread, DEBUG);
1050+
lzt::debug_resume(debug_session, stopped_thread);
1051+
delete[] buffer;
1052+
}
10331053

10341054
LOG_INFO << "[Debugger] Waiting for module unload and process exit events";
10351055

@@ -1063,7 +1083,7 @@ void zetDebugEventReadTest::run_read_events_in_separate_thread_test(
10631083

10641084
uint64_t gpu_buffer_va = 0;
10651085
std::thread event_read_thread(read_and_verify_events_debugger_thread,
1066-
debugSession, &gpu_buffer_va);
1086+
device, debugSession, &gpu_buffer_va);
10671087
synchro->wait_for_application_signal();
10681088
if (!synchro->get_app_gpu_buffer_address(gpu_buffer_va)) {
10691089
FAIL() << "[Debugger] Could not get a valid GPU buffer VA";
@@ -1081,11 +1101,18 @@ void zetDebugEventReadTest::run_read_events_in_separate_thread_test(
10811101
module_load_cv.wait(lk, [] { return module_loaded; });
10821102
lk.unlock();
10831103
LOG_INFO << "[Debugger] Main thread sleeping to wait for device threads";
1084-
std::this_thread::sleep_for(std::chrono::seconds(6));
1104+
std::this_thread::sleep_for(std::chrono::seconds(60));
10851105

10861106
LOG_INFO << "[Debugger] Sending interrupt from main thread";
10871107
lzt::debug_interrupt(debugSession, device_thread);
10881108

1109+
{
1110+
// we have received sent the interrupt
1111+
std::lock_guard<std::mutex> lk(interrupt_sent_mutex);
1112+
interrupt_sent = true;
1113+
interrupt_sent_cv.notify_one();
1114+
}
1115+
10891116
LOG_INFO << "[Debugger] Waiting for application to finish";
10901117
debugHelper.wait();
10911118

@@ -1250,7 +1277,7 @@ void zetDebugReadWriteRegistersTest::run_read_write_registers_test(
12501277

12511278
LOG_INFO << "[Debugger] Stopping all device threads";
12521279
// give time to app to launch the kernel
1253-
std::this_thread::sleep_for(std::chrono::seconds(6));
1280+
std::this_thread::sleep_for(std::chrono::seconds(60));
12541281

12551282
lzt::debug_interrupt(debugSession, device_threads);
12561283

@@ -1399,7 +1426,7 @@ void zetDebugThreadControlTest::SetUpThreadControl(ze_device_handle_t &device,
13991426

14001427
LOG_INFO << "[Debugger] Interrupting all threads";
14011428
// give time to app to launch the kernel
1402-
std::this_thread::sleep_for(std::chrono::seconds(6));
1429+
std::this_thread::sleep_for(std::chrono::seconds(60));
14031430

14041431
lzt::debug_interrupt(debugSession, thread);
14051432
stopped_threads = {};

conformance_tests/tools/debug/src/test_debug_utils.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -522,6 +522,7 @@ get_stopped_threads(const zet_debug_session_handle_t &debug_session,
522522

523523
if (read_register(debug_session, device_thread, regset_properties[2],
524524
false)) {
525+
lzt::clear_exceptions(device, debug_session, device_thread);
525526
threads.push_back(device_thread);
526527
}
527528
}
@@ -533,7 +534,7 @@ get_stopped_threads(const zet_debug_session_handle_t &debug_session,
533534
return threads;
534535
}
535536

536-
// wait for stopped thread event and retunrn stopped threads
537+
// wait for stopped thread event and return stopped threads
537538
bool find_stopped_threads(const zet_debug_session_handle_t &debugSession,
538539
const ze_device_handle_t &device,
539540
ze_device_thread_t thread, bool checkEvent,
@@ -679,10 +680,11 @@ void wait_for_events_interrupt_and_resume(
679680
LOG_INFO << "[Debugger] Sending interrupt";
680681
lzt::debug_interrupt(debugSession, device_thread);
681682

682-
lzt::debug_read_event(debugSession, debug_event, eventsTimeoutMS, false);
683-
LOG_INFO << "[Debugger] received event: "
684-
<< lzt::debuggerEventTypeString[debug_event.type];
685-
ASSERT_EQ(debug_event.type, ZET_DEBUG_EVENT_TYPE_THREAD_STOPPED);
683+
std::vector<ze_device_thread_t> stopped_threads;
684+
if (!find_stopped_threads(debugSession, device, device_thread, true,
685+
stopped_threads)) {
686+
FAIL() << "[Debugger] Did not find stopped threads";
687+
}
686688

687689
// write to kernel buffer to signal to application to end
688690
zet_debug_memory_space_desc_t memory_space_desc = {};
@@ -758,7 +760,7 @@ void zetDebugMemAccessTest::run_read_write_module_and_memory_test(
758760

759761
LOG_INFO << "[Debugger] Interrupting all threads";
760762
// give time to app to launch the kernel
761-
std::this_thread::sleep_for(std::chrono::seconds(6));
763+
std::this_thread::sleep_for(std::chrono::seconds(60));
762764

763765
lzt::debug_interrupt(debugSession, thread);
764766
std::vector<ze_device_thread_t> stopped_threads;
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
/*
2+
* Copyright (C) 2021-2024 Intel Corporation
3+
*
4+
* SPDX-License-Identifier: MIT
5+
*
6+
*/
7+
8+
#ifndef _ZET_INTEL_GPU_DEBUG_H
9+
#define _ZET_INTEL_GPU_DEBUG_H
10+
#if defined(__cplusplus)
11+
#pragma once
12+
#endif
13+
14+
#if defined(__cplusplus)
15+
extern "C" {
16+
#endif
17+
18+
#ifndef ZET_INTEL_GPU_DEBUG_MAJOR
19+
#define ZET_INTEL_GPU_DEBUG_MAJOR 1
20+
#endif // !ZET_INTEL_GPU_DEBUG_MAJOR
21+
22+
#ifndef ZET_INTEL_GPU_DEBUG_MINOR
23+
#define ZET_INTEL_GPU_DEBUG_MINOR 1
24+
#endif //! ZET_INTEL_GPU_DEBUG_MINOR
25+
26+
#if ZET_INTEL_GPU_DEBUG_MAJOR == 1
27+
///////////////////////////////////////////////////////////////////////////////
28+
/// @brief Supported device-specific register set types.
29+
typedef enum _zet_debug_regset_type_intel_gpu_t {
30+
#if ZET_INTEL_GPU_DEBUG_MINOR >= 0
31+
ZET_DEBUG_REGSET_TYPE_INVALID_INTEL_GPU = 0, ///< An invalid register set
32+
ZET_DEBUG_REGSET_TYPE_GRF_INTEL_GPU = 1, ///< The general purpose register set
33+
ZET_DEBUG_REGSET_TYPE_ADDR_INTEL_GPU = 2, ///< The address register set
34+
ZET_DEBUG_REGSET_TYPE_FLAG_INTEL_GPU = 3, ///< The flag register set
35+
ZET_DEBUG_REGSET_TYPE_CE_INTEL_GPU = 4, ///< The channel enable register set
36+
ZET_DEBUG_REGSET_TYPE_SR_INTEL_GPU = 5, ///< The status register set
37+
ZET_DEBUG_REGSET_TYPE_CR_INTEL_GPU = 6, ///< The control register set
38+
ZET_DEBUG_REGSET_TYPE_TDR_INTEL_GPU = 7, ///< The thread dependency register set
39+
ZET_DEBUG_REGSET_TYPE_ACC_INTEL_GPU = 8, ///< The accumulator register set
40+
ZET_DEBUG_REGSET_TYPE_MME_INTEL_GPU = 9, ///< The mme register set
41+
ZET_DEBUG_REGSET_TYPE_SP_INTEL_GPU = 10, ///< The stack pointer register set
42+
ZET_DEBUG_REGSET_TYPE_SBA_INTEL_GPU = 11, ///< The state base address register set
43+
ZET_DEBUG_REGSET_TYPE_DBG_INTEL_GPU = 12, ///< The debug register set
44+
ZET_DEBUG_REGSET_TYPE_FC_INTEL_GPU = 13, ///< The flow control register set
45+
#if ZET_INTEL_GPU_DEBUG_MINOR >= 1
46+
ZET_DEBUG_REGSET_TYPE_MSG_INTEL_GPU = 14, ///< The msg register set
47+
ZET_DEBUG_REGSET_TYPE_MODE_FLAGS_INTEL_GPU = 15, ///< System Routine mode flags register set
48+
ZET_DEBUG_REGSET_TYPE_DEBUG_SCRATCH_INTEL_GPU = 16, ///< Debug Scratch register set
49+
ZET_DEBUG_REGSET_TYPE_THREAD_SCRATCH_INTEL_GPU = 17, ///< Thread Scratch register set
50+
ZET_DEBUG_REGSET_TYPE_SCALAR_INTEL_GPU = 18, ///< The scalar register set
51+
#endif // ZET_INTEL_GPU_DEBUG_MINOR >= 1
52+
ZET_DEBUG_REGSET_TYPE_FORCE_UINT32 = 0x7fffffff
53+
#endif // ZET_INTEL_GPU_DEBUG_MINOR >= 0
54+
} zet_debug_regset_type_intel_gpu_t;
55+
56+
#if ZET_INTEL_GPU_DEBUG_MINOR >= 1
57+
////////////////////////////////////////////////////////////////////////////////
58+
/// @brief ZET_DEBUG_REGSET_TYPE_MODE_FLAGS_INTEL_GPU flag values
59+
typedef enum _zet_debug_mode_flags_intel_gpu_t {
60+
ZET_DEBUG_MODE_FLAG_HEAPLESS = 1 ///< Bit set indicates heapless mode
61+
} zet_debug_mode_flags_intel_gpu_t;
62+
63+
////////////////////////////////////////////////////////////////////////////////
64+
/// @brief ZET_DEBUG_REGSET_TYPE_DEBUG_SCRATCH_INTEL_GPU register layout
65+
typedef enum _zet_debug_debug_scratch_intel_gpu_t {
66+
ZET_DEBUG_DEBUG_SCRATCH_ADDRESS = 0, ///< VA of scratch area for debugger code injection.
67+
///< VA is in default address space of device
68+
ZET_DEBUG_DEBUG_SCRATCH_SIZE = 1, ///< Size of scratch area in bytes
69+
} zet_debug_debug_scratch_intel_gpu_t;
70+
#endif // ZET_INTEL_GPU_DEBUG_MINOR >= 1
71+
////////////////////////////////////////////////////////////////////////////////
72+
/// @brief SBA register set layout
73+
typedef enum _zet_debug_sba_intel_gpu_t {
74+
ZET_DEBUG_SBA_GENERAL_STATE_INTEL_GPU = 0, ///< GeneralStateBaseAddress
75+
ZET_DEBUG_SBA_SURFACE_STATE_INTEL_GPU = 1, ///< SurfaceStateBaseAddress
76+
ZET_DEBUG_SBA_DYNAMIC_STATE_INTEL_GPU = 2, ///< DynamicStateBaseAddress
77+
ZET_DEBUG_SBA_INDIRECT_OBJECT_INTEL_GPU = 3, ///< IndirectObjectBaseAddress
78+
ZET_DEBUG_SBA_INSTRUCTION_INTEL_GPU = 4, ///< InstructionBaseAddress
79+
ZET_DEBUG_SBA_BINDLESS_SURFACE_INTEL_GPU = 5, ///< BindlessSurfaceStateBaseAddress
80+
ZET_DEBUG_SBA_BINDLESS_SAMPLER_INTEL_GPU = 6, ///< BindlessSamplerStateBaseAddress
81+
ZET_DEBUG_SBA_BINDING_TABLE_INTEL_GPU = 7, ///< BindingTableStateBaseAddress
82+
ZET_DEBUG_SBA_SCRATCH_SPACE_INTEL_GPU = 8, ///< ScratchSpaceBaseAddress
83+
ZET_DEBUG_SBA_COUNT_INTEL_GPU = 9 ///< Number of registers in SBA regster set
84+
} zet_debug_sba_intel_gpu_t;
85+
#endif // ZET_INTEL_GPU_DEBUG_MAJOR == 1
86+
87+
#if defined(__cplusplus)
88+
} // extern "C"
89+
#endif
90+
91+
#endif // _ZET_INTEL_GPU_DEBUG_H

utils/test_harness/tools/include/test_harness_debug.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,10 @@ void debug_interrupt(const zet_debug_session_handle_t &debug_session,
4242
void debug_resume(const zet_debug_session_handle_t &debug_session,
4343
const ze_device_thread_t &device_thread);
4444

45+
void clear_exceptions(const ze_device_handle_t &device,
46+
const zet_debug_session_handle_t &debug_session,
47+
const ze_device_thread_t &device_thread);
48+
4549
void debug_read_memory(const zet_debug_session_handle_t &debug_session,
4650
const ze_device_thread_t &device_thread,
4751
const zet_debug_memory_space_desc_t &desc, size_t size,

utils/test_harness/tools/src/test_harness_debug.cpp

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "test_harness/test_harness.hpp"
1212
#include <level_zero/ze_api.h>
1313
#include "utils/utils.hpp"
14+
#include "test_harness/zet_intel_gpu_debug.h"
1415

1516
namespace lzt = level_zero_tests;
1617

@@ -141,6 +142,70 @@ void debug_resume(const zet_debug_session_handle_t &debug_session,
141142
EXPECT_EQ(ZE_RESULT_SUCCESS, zetDebugResume(debug_session, device_thread));
142143
}
143144

145+
void printRegSetProperties(zet_debug_regset_properties_t regSet) {
146+
147+
LOG_DEBUG << "[Debugger] Reg Set Type: " << regSet.type;
148+
LOG_DEBUG << "[Debugger] Reg Set Version: " << regSet.version;
149+
LOG_DEBUG << "[Debugger] Reg Set GeneralFlags: " << regSet.generalFlags;
150+
LOG_DEBUG << "[Debugger] Reg Set deviceFlags: " << regSet.deviceFlags;
151+
LOG_DEBUG << "[Debugger] Reg Set count: " << regSet.count;
152+
LOG_DEBUG << "[Debugger] Reg Set bit size: " << regSet.bitSize;
153+
LOG_DEBUG << "[Debugger] Reg Set byte size: " << regSet.byteSize;
154+
return;
155+
}
156+
157+
bool get_register_set_props(ze_device_handle_t device,
158+
zet_debug_regset_type_intel_gpu_t type,
159+
zet_debug_regset_properties_t &reg) {
160+
uint32_t nRegSets = 0;
161+
zetDebugGetRegisterSetProperties(device, &nRegSets, nullptr);
162+
zet_debug_regset_properties_t *pRegSets =
163+
new zet_debug_regset_properties_t[nRegSets];
164+
for (int i = 0; i < nRegSets; i++) {
165+
pRegSets[i] = {ZET_STRUCTURE_TYPE_DEBUG_REGSET_PROPERTIES, nullptr};
166+
}
167+
zetDebugGetRegisterSetProperties(device, &nRegSets, pRegSets);
168+
169+
bool found = false;
170+
for (int i = 0; i < nRegSets; i++) {
171+
if (pRegSets[i].type == type) {
172+
printRegSetProperties(pRegSets[i]);
173+
reg = pRegSets[i];
174+
found = true;
175+
break;
176+
}
177+
}
178+
179+
delete[] pRegSets;
180+
return found;
181+
}
182+
183+
void clear_exceptions(const ze_device_handle_t &device,
184+
const zet_debug_session_handle_t &debug_session,
185+
const ze_device_thread_t &device_thread) {
186+
size_t reg_size_in_bytes = 0;
187+
188+
zet_debug_regset_properties_t cr_reg_prop;
189+
ASSERT_TRUE(get_register_set_props(device, ZET_DEBUG_REGSET_TYPE_CR_INTEL_GPU,
190+
cr_reg_prop));
191+
192+
reg_size_in_bytes = cr_reg_prop.count * cr_reg_prop.byteSize;
193+
uint8_t *cr_values = new uint8_t[reg_size_in_bytes];
194+
uint32_t *uint32_values = (uint32_t *)cr_values;
195+
196+
ASSERT_EQ(zetDebugReadRegisters(debug_session, device_thread,
197+
ZET_DEBUG_REGSET_TYPE_CR_INTEL_GPU, 0,
198+
cr_reg_prop.count, cr_values),
199+
ZE_RESULT_SUCCESS);
200+
201+
uint32_values[1] &=
202+
~((1 << 26) | (1 << 30));
203+
ASSERT_EQ(zetDebugWriteRegisters(debug_session, device_thread,
204+
ZET_DEBUG_REGSET_TYPE_CR_INTEL_GPU, 0,
205+
cr_reg_prop.count, cr_values),
206+
ZE_RESULT_SUCCESS);
207+
}
208+
144209
void debug_read_memory(const zet_debug_session_handle_t &debug_session,
145210
const ze_device_thread_t &device_thread,
146211
const zet_debug_memory_space_desc_t &desc, size_t size,

0 commit comments

Comments
 (0)