Skip to content

Commit 4697059

Browse files
committed
Add CTS for scratch register reading.
1 parent 32527cc commit 4697059

File tree

4 files changed

+273
-0
lines changed

4 files changed

+273
-0
lines changed

conformance_tests/tools/debug/src/test_debug.cpp

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
#include "test_debug.hpp"
1010
#include "test_debug_utils.hpp"
11+
#include "test_harness/zet_intel_gpu_debug.h"
1112

1213
namespace lzt = level_zero_tests;
1314

@@ -1376,6 +1377,143 @@ void zetDebugReadWriteRegistersTest::run_read_write_registers_test(
13761377
}
13771378
}
13781379

1380+
void zetDebugReadWriteRegistersTest::run_read_registers_test(
1381+
std::vector<ze_device_handle_t> &devices, bool use_sub_devices) {
1382+
for (auto &device : devices) {
1383+
print_device(device);
1384+
if (!is_debug_supported(device))
1385+
continue;
1386+
1387+
synchro->clear_debugger_signal();
1388+
debugHelper = launch_process(LONG_RUNNING_KERNEL_INTERRUPTED_SCRATCH,
1389+
device, use_sub_devices);
1390+
1391+
zet_debug_event_t module_event;
1392+
attach_and_get_module_event(debugHelper.id(), synchro, device, debugSession,
1393+
module_event);
1394+
1395+
if (module_event.flags & ZET_DEBUG_EVENT_FLAG_NEED_ACK) {
1396+
LOG_DEBUG << "[Debugger] Acking event: "
1397+
<< lzt::debuggerEventTypeString[module_event.type];
1398+
lzt::debug_ack_event(debugSession, &module_event);
1399+
}
1400+
1401+
uint64_t gpu_buffer_va = 0;
1402+
synchro->wait_for_application_signal();
1403+
if (!synchro->get_app_gpu_buffer_address(gpu_buffer_va)) {
1404+
FAIL() << "[Debugger] Could not get a valid GPU buffer VA";
1405+
}
1406+
synchro->clear_application_signal();
1407+
1408+
zet_debug_memory_space_desc_t memorySpaceDesc;
1409+
memorySpaceDesc.type = ZET_DEBUG_MEMORY_SPACE_TYPE_DEFAULT;
1410+
int sizeToRead = 512;
1411+
uint8_t *kernel_buffer = new uint8_t[sizeToRead];
1412+
// set buffer[0] to 0 to break the loop. See debug_loop_slm.cl
1413+
kernel_buffer[0] = 0;
1414+
memorySpaceDesc.address = gpu_buffer_va;
1415+
1416+
ze_device_thread_t device_threads = {};
1417+
device_threads.slice = UINT32_MAX;
1418+
device_threads.subslice = UINT32_MAX;
1419+
device_threads.eu = UINT32_MAX;
1420+
device_threads.thread = UINT32_MAX;
1421+
1422+
LOG_INFO << "[Debugger] Stopping all device threads";
1423+
// give time to app to launch the kernel
1424+
std::this_thread::sleep_for(std::chrono::seconds(6));
1425+
lzt::debug_interrupt(debugSession, device_threads);
1426+
1427+
std::vector<ze_device_thread_t> stopped_threads;
1428+
if (!find_stopped_threads(debugSession, device, device_threads, true,
1429+
stopped_threads)) {
1430+
delete[] kernel_buffer;
1431+
FAIL() << "[Debugger] Did not find stopped threads";
1432+
}
1433+
1434+
bool is_heapless_mode = false;
1435+
LOG_INFO << "[Debugger] Reading/Writing Thread Scratch Register on "
1436+
"interrupted threads";
1437+
1438+
for (auto &stopped_thread : stopped_threads) {
1439+
uint8_t *mode_values = nullptr;
1440+
auto register_set_properties = lzt::get_register_set_properties(device);
1441+
for (auto &register_set : register_set_properties){
1442+
1443+
if (register_set.type == ZET_DEBUG_REGSET_TYPE_MODE_FLAGS_INTEL_GPU){
1444+
auto reg_size_in_bytes = register_set.count * register_set.byteSize;
1445+
mode_values = new uint8_t[reg_size_in_bytes];
1446+
ASSERT_EQ(
1447+
zetDebugReadRegisters(debugSession, stopped_thread,
1448+
ZET_DEBUG_REGSET_TYPE_MODE_FLAGS_INTEL_GPU,
1449+
0, register_set.count, mode_values),
1450+
ZE_RESULT_SUCCESS);
1451+
1452+
uint32_t *uint32_t_values = (uint32_t *)mode_values;
1453+
LOG_DEBUG << "[Debugger] mode value: %u " << uint32_t_values[0];
1454+
is_heapless_mode =
1455+
(uint32_t_values[0] & ZET_DEBUG_MODE_FLAG_HEAPLESS);
1456+
}
1457+
}
1458+
1459+
if (is_heapless_mode) {
1460+
for (auto &register_set : register_set_properties){
1461+
if ((register_set.type ==
1462+
ZET_DEBUG_REGSET_TYPE_THREAD_SCRATCH_INTEL_GPU) &&
1463+
(register_set.generalFlags & ZET_DEBUG_REGSET_FLAG_READABLE)) {
1464+
LOG_DEBUG << "[Debugger] Register set type " << register_set.type
1465+
<< " is readable";
1466+
size_t reg_size_in_bytes =
1467+
register_set.count * register_set.byteSize;
1468+
1469+
uint64_t *thread_scratch_reg_values =
1470+
new uint64_t[reg_size_in_bytes];
1471+
ASSERT_EQ(zetDebugReadRegisters(
1472+
debugSession, stopped_thread,
1473+
ZET_DEBUG_REGSET_TYPE_DEBUG_SCRATCH_INTEL_GPU, 0,
1474+
register_set.count, thread_scratch_reg_values),
1475+
ZE_RESULT_SUCCESS);
1476+
} else {
1477+
LOG_INFO << "[Debugger] Register set type " << register_set.type
1478+
<< " is NOT readable";
1479+
}
1480+
if (register_set.generalFlags & ZET_DEBUG_REGSET_FLAG_WRITEABLE) {
1481+
} else {
1482+
LOG_INFO << "[Debugger] Register set " << register_set.type
1483+
<< " type is NOT writeable";
1484+
}
1485+
}
1486+
}
1487+
}
1488+
1489+
lzt::debug_write_memory(debugSession, device_threads, memorySpaceDesc, 1,
1490+
kernel_buffer);
1491+
delete[] kernel_buffer;
1492+
1493+
LOG_INFO << "[Debugger] resuming interrupted threads";
1494+
lzt::debug_resume(debugSession, device_threads);
1495+
debugHelper.wait();
1496+
1497+
std::vector<zet_debug_event_type_t> expectedEvents = {
1498+
ZET_DEBUG_EVENT_TYPE_MODULE_UNLOAD, ZET_DEBUG_EVENT_TYPE_PROCESS_EXIT};
1499+
1500+
if (!check_events(debugSession, expectedEvents)) {
1501+
FAIL() << "[Debugger] Did not receive expected events";
1502+
}
1503+
1504+
lzt::debug_detach(debugSession);
1505+
ASSERT_EQ(debugHelper.exit_code(), 0);
1506+
}
1507+
}
1508+
1509+
TEST_F(
1510+
zetDebugReadWriteRegistersTest,
1511+
GivenActiveDebugSessionWhenReadingScratchRegistersThenDataReadIsDoneSuccessfully) {
1512+
auto driver = lzt::get_default_driver();
1513+
auto devices = lzt::get_devices(driver);
1514+
run_read_registers_test(devices, false);
1515+
}
1516+
13791517
TEST_F(
13801518
zetDebugReadWriteRegistersTest,
13811519
GivenActiveDebugSessionWhenReadingAndWritingRegistersThenValidDataReadAndDataWrittenSuccessfully) {

conformance_tests/tools/debug/src/test_debug.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,8 @@ class zetDebugReadWriteRegistersTest : public zetDebugMemAccessTest {
255255
void TearDown() override { zetDebugMemAccessTest::TearDown(); }
256256
void run_read_write_registers_test(std::vector<ze_device_handle_t> &devices,
257257
bool use_sub_devices);
258+
void run_read_registers_test(std::vector<ze_device_handle_t> &devices,
259+
bool use_sub_devices);
258260
};
259261

260262
class zetDebugThreadControlTest : public zetDebugBaseSetup {

conformance_tests/tools/debug/src/test_debug_common.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ typedef enum {
4848
ATTACH_AFTER_MODULE_DESTROYED,
4949
LONG_RUNNING_KERNEL_INTERRUPTED,
5050
LONG_RUNNING_KERNEL_INTERRUPTED_SLM,
51+
LONG_RUNNING_KERNEL_INTERRUPTED_SCRATCH,
5152
PAGE_FAULT,
5253
MULTIPLE_THREADS,
5354
MULTIPLE_CQ,

conformance_tests/tools/debug/src/test_debug_helper.cpp

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -583,6 +583,133 @@ void run_long_kernel(ze_context_handle_t context, ze_device_handle_t device,
583583
}
584584
}
585585

586+
void run_long_kernel_scratch(ze_context_handle_t context,
587+
ze_device_handle_t device,
588+
process_synchro &synchro, debug_options &options) {
589+
590+
auto command_list = lzt::create_command_list(device);
591+
auto command_queue = lzt::create_command_queue(device);
592+
std::string module_name = options.module_name_in;
593+
594+
std::string kernel_name = "long_kernel_slm";
595+
size_t slm_buffer_size = 512; // NOTE: Not all SKUs have same SLM so can go too big.
596+
597+
synchro.wait_for_debugger_signal();
598+
const char *build_flags ="-g -igc_opts 'VISAOptions=-forcespills'";
599+
auto module =
600+
lzt::create_module(device, module_name, ZE_MODULE_FORMAT_IL_SPIRV,
601+
build_flags /* include debug symbols*/, nullptr);
602+
603+
auto kernel = lzt::create_function(module, kernel_name);
604+
auto size = slm_buffer_size;
605+
606+
ze_kernel_properties_t kernel_properties = {
607+
ZE_STRUCTURE_TYPE_KERNEL_PROPERTIES, nullptr};
608+
EXPECT_EQ(ZE_RESULT_SUCCESS,
609+
zeKernelGetProperties(kernel, &kernel_properties));
610+
int threadCount = std::ceil(size / kernel_properties.maxSubgroupSize);
611+
612+
LOG_INFO << "[Application] Problem size: " << size
613+
<< ". Kernel maxSubGroupSize: " << kernel_properties.maxSubgroupSize
614+
<< ". GPU thread count: ceil (P size/maxSubGroupSize) = "
615+
<< threadCount;
616+
617+
auto dest_buffer_d =
618+
lzt::allocate_device_memory(size, size, 0, 0, device, context);
619+
auto dest_buffer_s =
620+
lzt::allocate_shared_memory(size, size, 0, 0, device, context);
621+
auto src_buffer_d =
622+
lzt::allocate_device_memory(size, size, 0, 0, device, context);
623+
auto src_buffer_s =
624+
lzt::allocate_shared_memory(size, size, 0, 0, device, context);
625+
626+
void *slm_output_s = nullptr;
627+
slm_output_s = lzt::allocate_shared_memory(slm_buffer_size, slm_buffer_size,
628+
0, 0, device, context);
629+
630+
unsigned long loop_max = 1000000000;
631+
632+
auto loop_counter_d = lzt::allocate_device_memory(
633+
loop_counter_alloc_size, loop_counter_alloc_size, 0, 0, device, context);
634+
auto loop_counter_s = lzt::allocate_shared_memory(
635+
loop_counter_alloc_size, loop_counter_alloc_size, 0, 0, device, context);
636+
637+
LOG_DEBUG << "[Application] Allocated source device memory at: " << std::hex
638+
<< src_buffer_d;
639+
LOG_DEBUG << "[Application] Allocated destination device memory at: "
640+
<< std::hex << dest_buffer_d;
641+
642+
std::memset(dest_buffer_s, 1, size);
643+
std::memset(src_buffer_s, 0, size);
644+
std::memset(loop_counter_s, 0, loop_counter_alloc_size);
645+
for (size_t i = 0; i < size; i++) {
646+
static_cast<uint8_t *>(src_buffer_s)[i] = (i + 1 & 0xFF);
647+
}
648+
649+
lzt::set_argument_value(kernel, 0, sizeof(dest_buffer_d), &dest_buffer_d);
650+
lzt::set_argument_value(kernel, 1, sizeof(src_buffer_d), &src_buffer_d);
651+
lzt::set_argument_value(kernel, 2, sizeof(loop_counter_d), &loop_counter_d);
652+
lzt::set_argument_value(kernel, 3, sizeof(loop_max), &loop_max);
653+
lzt::set_argument_value(kernel, 4, sizeof(slm_output_s), &slm_output_s);
654+
655+
uint32_t group_size_x = 1;
656+
uint32_t group_size_y = 1;
657+
uint32_t group_size_z = 1;
658+
lzt::suggest_group_size(kernel, size, 1, 1, group_size_x, group_size_y,
659+
group_size_z);
660+
lzt::set_group_size(kernel, group_size_x, 1, 1);
661+
ze_group_count_t group_count = {};
662+
group_count.groupCountX = size / group_size_x;
663+
group_count.groupCountY = 1;
664+
group_count.groupCountZ = 1;
665+
666+
lzt::append_memory_copy(command_list, src_buffer_d, src_buffer_s, size);
667+
lzt::append_barrier(command_list);
668+
lzt::append_launch_function(command_list, kernel, &group_count, nullptr, 0,
669+
nullptr);
670+
lzt::append_barrier(command_list);
671+
lzt::append_memory_copy(command_list, dest_buffer_s, dest_buffer_d, size);
672+
lzt::append_memory_copy(command_list, loop_counter_s, loop_counter_d,
673+
loop_counter_alloc_size);
674+
lzt::close_command_list(command_list);
675+
676+
LOG_DEBUG << "[Application] launching execution of " << kernel_name;
677+
678+
synchro.update_gpu_buffer_address(reinterpret_cast<uint64_t>(src_buffer_d));
679+
synchro.notify_debugger();
680+
681+
lzt::execute_command_lists(command_queue, 1, &command_list, nullptr);
682+
lzt::synchronize(command_queue, UINT64_MAX);
683+
684+
for (size_t i = 1; i < size; i++) {
685+
EXPECT_EQ(static_cast<uint8_t *>(dest_buffer_s)[i],
686+
static_cast<uint8_t *>(src_buffer_s)[i]);
687+
if (static_cast<uint8_t *>(dest_buffer_s)[i] !=
688+
static_cast<uint8_t *>(src_buffer_s)[i]) {
689+
LOG_ERROR << "[Application] Buffer Sanity check did not pass";
690+
break;
691+
}
692+
}
693+
694+
// cleanup
695+
lzt::free_memory(context, dest_buffer_s);
696+
lzt::free_memory(context, dest_buffer_d);
697+
lzt::free_memory(context, src_buffer_s);
698+
lzt::free_memory(context, src_buffer_d);
699+
lzt::free_memory(context, loop_counter_s);
700+
lzt::free_memory(context, loop_counter_d);
701+
lzt::free_memory(context, slm_output_s);
702+
703+
lzt::destroy_function(kernel);
704+
lzt::destroy_module(module);
705+
lzt::destroy_command_list(command_list);
706+
lzt::destroy_command_queue(command_queue);
707+
708+
if (::testing::Test::HasFailure()) {
709+
exit(1);
710+
}
711+
}
712+
586713
void run_multiple_threads(ze_context_handle_t context,
587714
ze_device_handle_t device, process_synchro &synchro,
588715
debug_options &options) {
@@ -1227,6 +1354,11 @@ int main(int argc, char **argv) {
12271354
options.kernel_name_in = "long_kernel_slm";
12281355
run_long_kernel(context, device, synchro, options);
12291356
break;
1357+
case LONG_RUNNING_KERNEL_INTERRUPTED_SCRATCH:
1358+
options.use_custom_module = true;
1359+
options.module_name_in = "debug_loop_slm.spv";
1360+
run_long_kernel_scratch(context, device, synchro, options);
1361+
break;
12301362
case MULTIPLE_THREADS:
12311363
run_multiple_threads(context, device, synchro, options);
12321364
break;

0 commit comments

Comments
 (0)