Skip to content

Commit 79e34c0

Browse files
committed
Add CTS for scratch register reading.
1 parent 32527cc commit 79e34c0

File tree

6 files changed

+294
-6
lines changed

6 files changed

+294
-6
lines changed

conformance_tests/tools/debug/src/test_debug.cpp

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
#include "test_debug.hpp"
1010
#include "test_debug_utils.hpp"
11+
#include "test_harness/zet_intel_gpu_debug.h"
1112

1213
namespace lzt = level_zero_tests;
1314

@@ -1376,6 +1377,128 @@ void zetDebugReadWriteRegistersTest::run_read_write_registers_test(
13761377
}
13771378
}
13781379

1380+
void zetDebugReadWriteRegistersTest::run_read_registers_test(
1381+
std::vector<ze_device_handle_t> &devices, bool use_sub_devices) {
1382+
for (auto &device : devices) {
1383+
print_device(device);
1384+
if (!is_debug_supported(device))
1385+
continue;
1386+
1387+
synchro->clear_debugger_signal();
1388+
debugHelper = launch_process(LONG_RUNNING_KERNEL_INTERRUPTED_SCRATCH,
1389+
device, use_sub_devices);
1390+
1391+
zet_debug_event_t module_event;
1392+
attach_and_get_module_event(debugHelper.id(), synchro, device, debugSession,
1393+
module_event);
1394+
1395+
if (module_event.flags & ZET_DEBUG_EVENT_FLAG_NEED_ACK) {
1396+
LOG_DEBUG << "[Debugger] Acking event: "
1397+
<< lzt::debuggerEventTypeString[module_event.type];
1398+
lzt::debug_ack_event(debugSession, &module_event);
1399+
}
1400+
1401+
uint64_t gpu_buffer_va = 0;
1402+
synchro->wait_for_application_signal();
1403+
if (!synchro->get_app_gpu_buffer_address(gpu_buffer_va)) {
1404+
FAIL() << "[Debugger] Could not get a valid GPU buffer VA";
1405+
}
1406+
synchro->clear_application_signal();
1407+
1408+
zet_debug_memory_space_desc_t memorySpaceDesc;
1409+
memorySpaceDesc.type = ZET_DEBUG_MEMORY_SPACE_TYPE_DEFAULT;
1410+
int sizeToRead = 512;
1411+
uint8_t *kernel_buffer = new uint8_t[sizeToRead];
1412+
// set buffer[0] to 0 to break the loop. See debug_loop_slm.cl
1413+
kernel_buffer[0] = 0;
1414+
memorySpaceDesc.address = gpu_buffer_va;
1415+
1416+
ze_device_thread_t device_threads = {};
1417+
device_threads.slice = UINT32_MAX;
1418+
device_threads.subslice = UINT32_MAX;
1419+
device_threads.eu = UINT32_MAX;
1420+
device_threads.thread = UINT32_MAX;
1421+
1422+
LOG_INFO << "[Debugger] Stopping all device threads";
1423+
// give time to app to launch the kernel
1424+
std::this_thread::sleep_for(std::chrono::seconds(6));
1425+
lzt::debug_interrupt(debugSession, device_threads);
1426+
1427+
std::vector<ze_device_thread_t> stopped_threads;
1428+
if (!find_stopped_threads(debugSession, device, device_threads, true,
1429+
stopped_threads)) {
1430+
delete[] kernel_buffer;
1431+
FAIL() << "[Debugger] Did not find stopped threads";
1432+
}
1433+
1434+
LOG_INFO << "[Debugger] Reading/Writing Thread Scratch Register on "
1435+
"interrupted threads";
1436+
1437+
for (auto &stopped_thread : stopped_threads) {
1438+
std::vector<zet_debug_regset_properties_t> register_set_properties =
1439+
lzt::get_register_set_properties(device);
1440+
if (lzt::is_heapless_mode(stopped_thread, device, debugSession)) {
1441+
for (auto &register_set : register_set_properties) {
1442+
if ((register_set.type ==
1443+
ZET_DEBUG_REGSET_TYPE_THREAD_SCRATCH_INTEL_GPU) &&
1444+
(register_set.generalFlags & ZET_DEBUG_REGSET_FLAG_READABLE)) {
1445+
LOG_DEBUG << "[Debugger] Register set type " << register_set.type
1446+
<< " is readable";
1447+
size_t reg_size_in_bytes =
1448+
register_set.count * register_set.byteSize;
1449+
1450+
uint64_t *thread_scratch_reg_values =
1451+
new uint64_t[reg_size_in_bytes];
1452+
ASSERT_EQ(zetDebugReadRegisters(
1453+
debugSession, stopped_thread,
1454+
ZET_DEBUG_REGSET_TYPE_DEBUG_SCRATCH_INTEL_GPU, 0,
1455+
register_set.count, thread_scratch_reg_values),
1456+
ZE_RESULT_SUCCESS);
1457+
} else {
1458+
FAIL() << "[Debugger] Register set type " << register_set.type
1459+
<< " is NOT readable";
1460+
}
1461+
if (register_set.generalFlags & ZET_DEBUG_REGSET_FLAG_WRITEABLE) {
1462+
FAIL() << "[Debugger] Register set type " << register_set.type
1463+
<< " should NOT be Writable";
1464+
} else {
1465+
LOG_INFO << "[Debugger] Register set " << register_set.type
1466+
<< " type is NOT writeable";
1467+
}
1468+
}
1469+
} else {
1470+
GTEST_SKIP() << "Test is not supported on this device";
1471+
}
1472+
}
1473+
1474+
lzt::debug_write_memory(debugSession, device_threads, memorySpaceDesc, 1,
1475+
kernel_buffer);
1476+
delete[] kernel_buffer;
1477+
1478+
LOG_INFO << "[Debugger] resuming interrupted threads";
1479+
lzt::debug_resume(debugSession, device_threads);
1480+
debugHelper.wait();
1481+
1482+
std::vector<zet_debug_event_type_t> expectedEvents = {
1483+
ZET_DEBUG_EVENT_TYPE_MODULE_UNLOAD, ZET_DEBUG_EVENT_TYPE_PROCESS_EXIT};
1484+
1485+
if (!check_events(debugSession, expectedEvents)) {
1486+
FAIL() << "[Debugger] Did not receive expected events";
1487+
}
1488+
1489+
lzt::debug_detach(debugSession);
1490+
ASSERT_EQ(debugHelper.exit_code(), 0);
1491+
}
1492+
}
1493+
1494+
TEST_F(
1495+
zetDebugReadWriteRegistersTest,
1496+
GivenActiveDebugSessionWhenReadingScratchRegistersThenDataReadIsDoneSuccessfully) {
1497+
auto driver = lzt::get_default_driver();
1498+
auto devices = lzt::get_devices(driver);
1499+
run_read_registers_test(devices, false);
1500+
}
1501+
13791502
TEST_F(
13801503
zetDebugReadWriteRegistersTest,
13811504
GivenActiveDebugSessionWhenReadingAndWritingRegistersThenValidDataReadAndDataWrittenSuccessfully) {

conformance_tests/tools/debug/src/test_debug.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,8 @@ class zetDebugReadWriteRegistersTest : public zetDebugMemAccessTest {
255255
void TearDown() override { zetDebugMemAccessTest::TearDown(); }
256256
void run_read_write_registers_test(std::vector<ze_device_handle_t> &devices,
257257
bool use_sub_devices);
258+
void run_read_registers_test(std::vector<ze_device_handle_t> &devices,
259+
bool use_sub_devices);
258260
};
259261

260262
class zetDebugThreadControlTest : public zetDebugBaseSetup {

conformance_tests/tools/debug/src/test_debug_common.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ typedef enum {
4848
ATTACH_AFTER_MODULE_DESTROYED,
4949
LONG_RUNNING_KERNEL_INTERRUPTED,
5050
LONG_RUNNING_KERNEL_INTERRUPTED_SLM,
51+
LONG_RUNNING_KERNEL_INTERRUPTED_SCRATCH,
5152
PAGE_FAULT,
5253
MULTIPLE_THREADS,
5354
MULTIPLE_CQ,

conformance_tests/tools/debug/src/test_debug_helper.cpp

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -583,6 +583,133 @@ void run_long_kernel(ze_context_handle_t context, ze_device_handle_t device,
583583
}
584584
}
585585

586+
void run_long_kernel_scratch(ze_context_handle_t context,
587+
ze_device_handle_t device,
588+
process_synchro &synchro, debug_options &options) {
589+
590+
auto command_list = lzt::create_command_list(device);
591+
auto command_queue = lzt::create_command_queue(device);
592+
std::string module_name = options.module_name_in;
593+
594+
std::string kernel_name = "long_kernel_slm";
595+
size_t slm_buffer_size = 512; // NOTE: Not all SKUs have same SLM so can go too big.
596+
597+
synchro.wait_for_debugger_signal();
598+
const char *build_flags ="-g -igc_opts 'VISAOptions=-forcespills'";
599+
auto module =
600+
lzt::create_module(device, module_name, ZE_MODULE_FORMAT_IL_SPIRV,
601+
build_flags /* include debug symbols*/, nullptr);
602+
603+
auto kernel = lzt::create_function(module, kernel_name);
604+
auto size = slm_buffer_size;
605+
606+
ze_kernel_properties_t kernel_properties = {
607+
ZE_STRUCTURE_TYPE_KERNEL_PROPERTIES, nullptr};
608+
EXPECT_EQ(ZE_RESULT_SUCCESS,
609+
zeKernelGetProperties(kernel, &kernel_properties));
610+
int threadCount = std::ceil(size / kernel_properties.maxSubgroupSize);
611+
612+
LOG_INFO << "[Application] Problem size: " << size
613+
<< ". Kernel maxSubGroupSize: " << kernel_properties.maxSubgroupSize
614+
<< ". GPU thread count: ceil (P size/maxSubGroupSize) = "
615+
<< threadCount;
616+
617+
auto dest_buffer_d =
618+
lzt::allocate_device_memory(size, size, 0, 0, device, context);
619+
auto dest_buffer_s =
620+
lzt::allocate_shared_memory(size, size, 0, 0, device, context);
621+
auto src_buffer_d =
622+
lzt::allocate_device_memory(size, size, 0, 0, device, context);
623+
auto src_buffer_s =
624+
lzt::allocate_shared_memory(size, size, 0, 0, device, context);
625+
626+
void *slm_output_s = nullptr;
627+
slm_output_s = lzt::allocate_shared_memory(slm_buffer_size, slm_buffer_size,
628+
0, 0, device, context);
629+
630+
unsigned long loop_max = 1000000000;
631+
632+
auto loop_counter_d = lzt::allocate_device_memory(
633+
loop_counter_alloc_size, loop_counter_alloc_size, 0, 0, device, context);
634+
auto loop_counter_s = lzt::allocate_shared_memory(
635+
loop_counter_alloc_size, loop_counter_alloc_size, 0, 0, device, context);
636+
637+
LOG_DEBUG << "[Application] Allocated source device memory at: " << std::hex
638+
<< src_buffer_d;
639+
LOG_DEBUG << "[Application] Allocated destination device memory at: "
640+
<< std::hex << dest_buffer_d;
641+
642+
std::memset(dest_buffer_s, 1, size);
643+
std::memset(src_buffer_s, 0, size);
644+
std::memset(loop_counter_s, 0, loop_counter_alloc_size);
645+
for (size_t i = 0; i < size; i++) {
646+
static_cast<uint8_t *>(src_buffer_s)[i] = (i + 1 & 0xFF);
647+
}
648+
649+
lzt::set_argument_value(kernel, 0, sizeof(dest_buffer_d), &dest_buffer_d);
650+
lzt::set_argument_value(kernel, 1, sizeof(src_buffer_d), &src_buffer_d);
651+
lzt::set_argument_value(kernel, 2, sizeof(loop_counter_d), &loop_counter_d);
652+
lzt::set_argument_value(kernel, 3, sizeof(loop_max), &loop_max);
653+
lzt::set_argument_value(kernel, 4, sizeof(slm_output_s), &slm_output_s);
654+
655+
uint32_t group_size_x = 1;
656+
uint32_t group_size_y = 1;
657+
uint32_t group_size_z = 1;
658+
lzt::suggest_group_size(kernel, size, 1, 1, group_size_x, group_size_y,
659+
group_size_z);
660+
lzt::set_group_size(kernel, group_size_x, 1, 1);
661+
ze_group_count_t group_count = {};
662+
group_count.groupCountX = size / group_size_x;
663+
group_count.groupCountY = 1;
664+
group_count.groupCountZ = 1;
665+
666+
lzt::append_memory_copy(command_list, src_buffer_d, src_buffer_s, size);
667+
lzt::append_barrier(command_list);
668+
lzt::append_launch_function(command_list, kernel, &group_count, nullptr, 0,
669+
nullptr);
670+
lzt::append_barrier(command_list);
671+
lzt::append_memory_copy(command_list, dest_buffer_s, dest_buffer_d, size);
672+
lzt::append_memory_copy(command_list, loop_counter_s, loop_counter_d,
673+
loop_counter_alloc_size);
674+
lzt::close_command_list(command_list);
675+
676+
LOG_DEBUG << "[Application] launching execution of " << kernel_name;
677+
678+
synchro.update_gpu_buffer_address(reinterpret_cast<uint64_t>(src_buffer_d));
679+
synchro.notify_debugger();
680+
681+
lzt::execute_command_lists(command_queue, 1, &command_list, nullptr);
682+
lzt::synchronize(command_queue, UINT64_MAX);
683+
684+
for (size_t i = 1; i < size; i++) {
685+
EXPECT_EQ(static_cast<uint8_t *>(dest_buffer_s)[i],
686+
static_cast<uint8_t *>(src_buffer_s)[i]);
687+
if (static_cast<uint8_t *>(dest_buffer_s)[i] !=
688+
static_cast<uint8_t *>(src_buffer_s)[i]) {
689+
LOG_ERROR << "[Application] Buffer Sanity check did not pass";
690+
break;
691+
}
692+
}
693+
694+
// cleanup
695+
lzt::free_memory(context, dest_buffer_s);
696+
lzt::free_memory(context, dest_buffer_d);
697+
lzt::free_memory(context, src_buffer_s);
698+
lzt::free_memory(context, src_buffer_d);
699+
lzt::free_memory(context, loop_counter_s);
700+
lzt::free_memory(context, loop_counter_d);
701+
lzt::free_memory(context, slm_output_s);
702+
703+
lzt::destroy_function(kernel);
704+
lzt::destroy_module(module);
705+
lzt::destroy_command_list(command_list);
706+
lzt::destroy_command_queue(command_queue);
707+
708+
if (::testing::Test::HasFailure()) {
709+
exit(1);
710+
}
711+
}
712+
586713
void run_multiple_threads(ze_context_handle_t context,
587714
ze_device_handle_t device, process_synchro &synchro,
588715
debug_options &options) {
@@ -1227,6 +1354,11 @@ int main(int argc, char **argv) {
12271354
options.kernel_name_in = "long_kernel_slm";
12281355
run_long_kernel(context, device, synchro, options);
12291356
break;
1357+
case LONG_RUNNING_KERNEL_INTERRUPTED_SCRATCH:
1358+
options.use_custom_module = true;
1359+
options.module_name_in = "debug_loop_slm.spv";
1360+
run_long_kernel_scratch(context, device, synchro, options);
1361+
break;
12301362
case MULTIPLE_THREADS:
12311363
run_multiple_threads(context, device, synchro, options);
12321364
break;

utils/test_harness/tools/include/test_harness_debug.hpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,8 @@ void debug_resume(const zet_debug_session_handle_t &debug_session,
4343
const ze_device_thread_t &device_thread);
4444

4545
void clear_exceptions(const ze_device_handle_t &device,
46-
const zet_debug_session_handle_t &debug_session,
47-
const ze_device_thread_t &device_thread);
46+
const zet_debug_session_handle_t &debug_session,
47+
const ze_device_thread_t &device_thread);
4848

4949
void debug_read_memory(const zet_debug_session_handle_t &debug_session,
5050
const ze_device_thread_t &device_thread,
@@ -79,6 +79,10 @@ void debug_write_registers(const zet_debug_session_handle_t &debug_session,
7979

8080
std::vector<uint8_t> get_debug_info(const zet_module_handle_t &module);
8181

82+
bool is_heapless_mode(ze_device_thread_t stopped_thread,
83+
ze_device_handle_t &device_handle,
84+
zet_debug_session_handle_t debug_session);
85+
8286
}; // namespace level_zero_tests
8387

8488
#endif /* TEST_HARNESS_DEBUG_HPP */

utils/test_harness/tools/src/test_harness_debug.cpp

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -181,8 +181,8 @@ bool get_register_set_props(ze_device_handle_t device,
181181
}
182182

183183
void clear_exceptions(const ze_device_handle_t &device,
184-
const zet_debug_session_handle_t &debug_session,
185-
const ze_device_thread_t &device_thread) {
184+
const zet_debug_session_handle_t &debug_session,
185+
const ze_device_thread_t &device_thread) {
186186
size_t reg_size_in_bytes = 0;
187187

188188
zet_debug_regset_properties_t cr_reg_prop;
@@ -198,8 +198,7 @@ void clear_exceptions(const ze_device_handle_t &device,
198198
cr_reg_prop.count, cr_values),
199199
ZE_RESULT_SUCCESS);
200200

201-
uint32_values[1] &=
202-
~((1 << 26) | (1 << 30));
201+
uint32_values[1] &= ~((1 << 26) | (1 << 30));
203202
ASSERT_EQ(zetDebugWriteRegisters(debug_session, device_thread,
204203
ZET_DEBUG_REGSET_TYPE_CR_INTEL_GPU, 0,
205204
cr_reg_prop.count, cr_values),
@@ -305,4 +304,31 @@ std::vector<uint8_t> get_debug_info(const zet_module_handle_t &module_handle) {
305304
return debug_info;
306305
}
307306

307+
bool is_heapless_mode(ze_device_thread_t stopped_thread,
308+
ze_device_handle_t &device_handle,
309+
zet_debug_session_handle_t debug_session) {
310+
311+
uint8_t *mode_values = nullptr;
312+
bool result = false;
313+
std::vector<zet_debug_regset_properties_t> regset_properties =
314+
lzt::get_register_set_properties(device_handle);
315+
for (auto &register_set : regset_properties) {
316+
if (register_set.type == ZET_DEBUG_REGSET_TYPE_MODE_FLAGS_INTEL_GPU) {
317+
auto reg_size_in_bytes = register_set.count * register_set.byteSize;
318+
mode_values = new uint8_t[reg_size_in_bytes];
319+
EXPECT_EQ(
320+
zetDebugReadRegisters(debug_session, stopped_thread,
321+
ZET_DEBUG_REGSET_TYPE_MODE_FLAGS_INTEL_GPU, 0,
322+
register_set.count, mode_values),
323+
ZE_RESULT_SUCCESS);
324+
325+
uint32_t *uint32_t_values = (uint32_t *)mode_values;
326+
LOG_DEBUG << "[Debugger] mode value: %u " << uint32_t_values[0];
327+
result = (uint32_t_values[0] & ZET_DEBUG_MODE_FLAG_HEAPLESS);
328+
}
329+
}
330+
331+
return result;
332+
}
333+
308334
} // namespace level_zero_tests

0 commit comments

Comments
 (0)