diff --git a/stress_tests/common/src/stress_common_func.cpp b/stress_tests/common/src/stress_common_func.cpp index d028cffd7..c940d7891 100644 --- a/stress_tests/common/src/stress_common_func.cpp +++ b/stress_tests/common/src/stress_common_func.cpp @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2019-2023 Intel Corporation + * Copyright (C) 2019-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -43,7 +43,7 @@ uint64_t total_available_host_memory() { stat.dwLength = sizeof(stat); GlobalMemoryStatusEx(&stat); - return stat.ullAvailVirtual; + return stat.ullAvailPhys; } uint64_t get_page_size() { SYSTEM_INFO si; diff --git a/stress_tests/test_memory_allocation/kernels/test_multiple_memory_allocations.cl b/stress_tests/test_memory_allocation/kernels/test_multiple_memory_allocations.cl index 5cc7d8f9e..0779b40e4 100644 --- a/stress_tests/test_memory_allocation/kernels/test_multiple_memory_allocations.cl +++ b/stress_tests/test_memory_allocation/kernels/test_multiple_memory_allocations.cl @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2019 Intel Corporation + * Copyright (C) 2019-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -86,3 +86,78 @@ kernel void test_device_memory10_unit_size4(__global uint *src, __global uint *d size_t tid = get_global_id(0); dst[tid] = src[tid]; } + +struct buffer { + uint *data; +}; + +kernel void test_device_memory1_indirect(__global struct buffer *src_ptrs, __global struct buffer *dst_ptrs, uint dispatch_id) { + uint *src = src_ptrs[dispatch_id].data; + uint *dst = dst_ptrs[dispatch_id].data; + size_t tid = get_global_id(0); + dst[tid] = src[tid]; +} + +kernel void test_device_memory2_indirect(__global struct buffer *src_ptrs, __global struct buffer *dst_ptrs, uint dispatch_id) { + uint *src = src_ptrs[dispatch_id].data; + uint *dst = dst_ptrs[dispatch_id].data; + size_t tid = get_global_id(0); + dst[tid] = src[tid]; +} + +kernel void test_device_memory3_indirect(__global struct buffer *src_ptrs, __global struct buffer *dst_ptrs, uint dispatch_id) { + uint *src = src_ptrs[dispatch_id].data; + uint *dst = dst_ptrs[dispatch_id].data; + size_t tid = get_global_id(0); + dst[tid] = src[tid]; +} + +kernel void test_device_memory4_indirect(__global struct buffer *src_ptrs, __global struct buffer *dst_ptrs, uint dispatch_id) { + uint *src = src_ptrs[dispatch_id].data; + uint *dst = dst_ptrs[dispatch_id].data; + size_t tid = get_global_id(0); + dst[tid] = src[tid]; +} + +kernel void test_device_memory5_indirect(__global struct buffer *src_ptrs, __global struct buffer *dst_ptrs, uint dispatch_id) { + uint *src = src_ptrs[dispatch_id].data; + uint *dst = dst_ptrs[dispatch_id].data; + size_t tid = get_global_id(0); + dst[tid] = src[tid]; +} + +kernel void test_device_memory6_indirect(__global struct buffer *src_ptrs, __global struct buffer *dst_ptrs, uint dispatch_id) { + uint *src = src_ptrs[dispatch_id].data; + uint *dst = dst_ptrs[dispatch_id].data; + size_t tid = get_global_id(0); + dst[tid] = src[tid]; +} + +kernel void test_device_memory7_indirect(__global struct buffer *src_ptrs, __global struct buffer *dst_ptrs, uint dispatch_id) { + uint *src = src_ptrs[dispatch_id].data; + uint *dst = dst_ptrs[dispatch_id].data; + size_t tid = get_global_id(0); + dst[tid] = src[tid]; +} + +kernel void test_device_memory8_indirect(__global struct buffer *src_ptrs, __global struct buffer *dst_ptrs, uint dispatch_id) { + uint *src = src_ptrs[dispatch_id].data; + uint *dst = dst_ptrs[dispatch_id].data; + size_t tid = get_global_id(0); + dst[tid] = src[tid]; +} + +kernel void test_device_memory9_indirect(__global struct buffer *src_ptrs, __global struct buffer *dst_ptrs, uint dispatch_id) { + uint *src = src_ptrs[dispatch_id].data; + uint *dst = dst_ptrs[dispatch_id].data; + size_t tid = get_global_id(0); + dst[tid] = src[tid]; +} + +kernel void test_device_memory10_indirect(__global struct buffer *src_ptrs, __global struct buffer *dst_ptrs, uint dispatch_id) { + uint *src = src_ptrs[dispatch_id].data; + uint *dst = dst_ptrs[dispatch_id].data; + size_t tid = get_global_id(0); + dst[tid] = src[tid]; +} + diff --git a/stress_tests/test_memory_allocation/kernels/test_multiple_memory_allocations.spv b/stress_tests/test_memory_allocation/kernels/test_multiple_memory_allocations.spv index fc0cb5370..e21b37208 100644 Binary files a/stress_tests/test_memory_allocation/kernels/test_multiple_memory_allocations.spv and b/stress_tests/test_memory_allocation/kernels/test_multiple_memory_allocations.spv differ diff --git a/stress_tests/test_memory_allocation/src/test_memory_allocation.cpp b/stress_tests/test_memory_allocation/src/test_memory_allocation.cpp index 9cfcb3c37..f1ba65512 100644 --- a/stress_tests/test_memory_allocation/src/test_memory_allocation.cpp +++ b/stress_tests/test_memory_allocation/src/test_memory_allocation.cpp @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2019 Intel Corporation + * Copyright (C) 2019-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -25,11 +25,20 @@ using lzt::to_u32; class zeDriverMemoryAllocationStressTest : public ::testing::Test, public ::testing::WithParamInterface< - std::tuple> { + std::tuple> { protected: typedef uint32_t kernel_copy_unit_t; const size_t kernel_copy_unit_size = sizeof(kernel_copy_unit_t); + struct MemoryAllocationTestArguments : public TestArguments_t { + bool immediate; + bool indirect_access; + } test_arguments_; + + struct Buffer { + kernel_copy_unit_t *data; + }; + bool verify_results(kernel_copy_unit_t *allocation, uint64_t test_single_allocation_count) { for (uint64_t i = 0; i < test_single_allocation_count; i++) { @@ -42,6 +51,7 @@ class zeDriverMemoryAllocationStressTest } return false; } + void dispatch_kernels( const ze_device_handle_t device, ze_memory_type_t memory_type, ze_module_handle_t module_handle, @@ -51,10 +61,50 @@ class zeDriverMemoryAllocationStressTest const std::vector &test_kernel_names, uint32_t number_of_dispatch, uint64_t one_case_allocation_count, ze_context_handle_t context) { + auto cmd_bundle = lzt::create_command_bundle( + context, device, 0, ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS, + ZE_COMMAND_QUEUE_PRIORITY_NORMAL, 0, 0, 0, test_arguments_.immediate); std::vector test_functions; - ze_command_list_handle_t command_list = - lzt::create_command_list(context, device, 0); + [[maybe_unused]] std::vector host_src_ptrs(number_of_dispatch); + [[maybe_unused]] std::vector host_dst_ptrs(number_of_dispatch); + [[maybe_unused]] kernel_copy_unit_t *src_allocation_ptrs = nullptr; + allocate_memory( + context, device, test_arguments_.memory_type, + number_of_dispatch * sizeof(void *), false); + [[maybe_unused]] kernel_copy_unit_t *dst_allocation_ptrs = nullptr; + allocate_memory( + context, device, test_arguments_.memory_type, + number_of_dispatch * sizeof(void *), false); + + if (test_arguments_.indirect_access) { + src_allocation_ptrs = allocate_memory( + context, device, test_arguments_.memory_type, + number_of_dispatch * sizeof(void *), false); + dst_allocation_ptrs = allocate_memory( + context, device, test_arguments_.memory_type, + number_of_dispatch * sizeof(void *), false); + for (uint32_t i = 0; i < number_of_dispatch; i++) { + host_src_ptrs[i].data = src_allocations[i]; + host_dst_ptrs[i].data = dst_allocations[i]; + } + + if (test_arguments_.memory_type == ZE_MEMORY_TYPE_DEVICE) { + lzt::append_memory_copy(cmd_bundle.list, src_allocation_ptrs, + host_src_ptrs.data(), + number_of_dispatch * sizeof(void *), nullptr); + lzt::append_memory_copy(cmd_bundle.list, dst_allocation_ptrs, + host_dst_ptrs.data(), + number_of_dispatch * sizeof(void *), nullptr); + lzt::append_barrier(cmd_bundle.list); + } else { + std::memcpy(src_allocation_ptrs, host_src_ptrs.data(), + number_of_dispatch * sizeof(void *)); + std::memcpy(dst_allocation_ptrs, host_dst_ptrs.data(), + number_of_dispatch * sizeof(void *)); + } + } + for (uint64_t dispatch_id = 0; dispatch_id < number_of_dispatch; dispatch_id++) { @@ -65,51 +115,76 @@ class zeDriverMemoryAllocationStressTest lzt::create_function(module_handle, test_kernel_names[dispatch_id]); lzt::set_group_size(kernel_handle, workgroup_size_x_, 1, 1); - lzt::set_argument_value(kernel_handle, 0, sizeof(src_allocation), - &src_allocation); - lzt::set_argument_value(kernel_handle, 1, sizeof(dst_allocation), - &dst_allocation); + + if (test_arguments_.indirect_access) { + switch (test_arguments_.memory_type) { + case ZE_MEMORY_TYPE_DEVICE: + lzt::kernel_set_indirect_access( + kernel_handle, ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE); + break; + case ZE_MEMORY_TYPE_HOST: + lzt::kernel_set_indirect_access(kernel_handle, + ZE_KERNEL_INDIRECT_ACCESS_FLAG_HOST); + break; + case ZE_MEMORY_TYPE_SHARED: + lzt::kernel_set_indirect_access( + kernel_handle, ZE_KERNEL_INDIRECT_ACCESS_FLAG_SHARED); + break; + default: + break; + } + lzt::set_argument_value(kernel_handle, 0, sizeof(src_allocation_ptrs), + &src_allocation_ptrs); + lzt::set_argument_value(kernel_handle, 1, sizeof(dst_allocation_ptrs), + &dst_allocation_ptrs); + lzt::set_argument_value(kernel_handle, 2, sizeof(uint32_t), + &dispatch_id); + } else { + lzt::set_argument_value(kernel_handle, 0, sizeof(src_allocation), + &src_allocation); + lzt::set_argument_value(kernel_handle, 1, sizeof(dst_allocation), + &dst_allocation); + } uint32_t group_count_x = to_u32(one_case_allocation_count / workgroup_size_x_); ze_group_count_t thread_group_dimensions = {group_count_x, 1, 1}; - lzt::append_memory_fill( - command_list, src_allocation, &init_value_2_, sizeof(init_value_2_), - one_case_allocation_count * kernel_copy_unit_size, nullptr); + lzt::append_memory_fill(cmd_bundle.list, src_allocation, &init_value_2_, + sizeof(init_value_2_), + one_case_allocation_count * kernel_copy_unit_size, + nullptr); - lzt::append_memory_fill( - command_list, dst_allocation, &init_value_3_, sizeof(init_value_3_), - one_case_allocation_count * kernel_copy_unit_size, nullptr); + lzt::append_memory_fill(cmd_bundle.list, dst_allocation, &init_value_3_, + sizeof(init_value_3_), + one_case_allocation_count * kernel_copy_unit_size, + nullptr); - lzt::append_barrier(command_list, nullptr); + lzt::append_barrier(cmd_bundle.list, nullptr); - lzt::append_launch_function(command_list, kernel_handle, + lzt::append_launch_function(cmd_bundle.list, kernel_handle, &thread_group_dimensions, nullptr, 0, nullptr); - lzt::append_barrier(command_list, nullptr); + lzt::append_barrier(cmd_bundle.list, nullptr); if (memory_type == ZE_MEMORY_TYPE_DEVICE) { lzt::append_memory_copy( - command_list, data_out[dispatch_id].data(), dst_allocation, + cmd_bundle.list, data_out[dispatch_id].data(), dst_allocation, one_case_allocation_count * kernel_copy_unit_size, nullptr); } - lzt::append_barrier(command_list, nullptr); + lzt::append_barrier(cmd_bundle.list, nullptr); test_functions.push_back(kernel_handle); } - ze_command_queue_handle_t command_queue = lzt::create_command_queue( - context, device, 0, ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS, - ZE_COMMAND_QUEUE_PRIORITY_NORMAL, 0); + if (!test_arguments_.immediate) { + lzt::close_command_list(cmd_bundle.list); + } - lzt::close_command_list(command_list); - lzt::execute_command_lists(command_queue, 1, &command_list, nullptr); - lzt::synchronize(command_queue, UINT64_MAX); + lzt::execute_and_sync_command_bundle(cmd_bundle, UINT64_MAX); + lzt::destroy_command_bundle(cmd_bundle); - lzt::destroy_command_queue(command_queue); - lzt::destroy_command_list(command_list); for (uint64_t dispatch_id = 0; dispatch_id < test_functions.size(); dispatch_id++) { EXPECT_ZE_RESULT_SUCCESS(zeKernelDestroy(test_functions[dispatch_id])); @@ -121,17 +196,21 @@ class zeDriverMemoryAllocationStressTest kernel_copy_unit_t init_value_1_ = 0; kernel_copy_unit_t init_value_2_ = 0xAAAAAAAA; // 1010 1010 kernel_copy_unit_t init_value_3_ = 0x55555555; // 0101 0101 + bool indirect_access = false; + bool immediate = false; }; LZT_TEST_P( zeDriverMemoryAllocationStressTest, AlocateFullAvailableMemoryNumberOfKernelDispatchesDependsOnUserChunkAllocaitonRequest) { - TestArguments_t test_arguments = { + test_arguments_ = { std::get<0>(GetParam()), // total memory size limit std::get<1>(GetParam()), // one allocation size limit std::get<2>(GetParam()), // dispatch multiplier - std::get<3>(GetParam()) // memory type + std::get<3>(GetParam()), // memory type + std::get<4>(GetParam()), // immediate + std::get<5>(GetParam()) // indirect access }; auto driver = lzt::get_default_driver(); @@ -139,14 +218,14 @@ LZT_TEST_P( auto device = lzt::get_default_device(driver); ze_device_properties_t device_properties = lzt::get_device_properties(device); - test_arguments.print_test_arguments(device_properties); + test_arguments_.print_test_arguments(device_properties); std::vector device_memory_properties = lzt::get_memory_properties(device); const uint32_t used_vectors_in_test = - test_arguments.memory_type == ZE_MEMORY_TYPE_DEVICE ? 4 : 3; - uint32_t number_of_dispatches = to_u32(test_arguments.multiplier); + test_arguments_.memory_type == ZE_MEMORY_TYPE_DEVICE ? 4 : 3; + uint32_t number_of_dispatches = to_u32(test_arguments_.multiplier); uint64_t number_of_all_allocations = used_vectors_in_test * number_of_dispatches; uint64_t test_single_allocation_memory_size = 0; @@ -156,15 +235,16 @@ LZT_TEST_P( adjust_max_memory_allocation( driver, device_properties, device_memory_properties, test_total_memory_size, test_single_allocation_memory_size, - number_of_all_allocations, test_arguments, relax_memory_capability); + number_of_all_allocations, test_arguments_, relax_memory_capability); if (number_of_all_allocations != used_vectors_in_test * number_of_dispatches) { LOG_INFO << "Need to limit dispatches from : " << number_of_dispatches << " to: " << number_of_all_allocations / used_vectors_in_test; - number_of_dispatches = - to_u32(number_of_all_allocations / used_vectors_in_test); // bacause number_of_all_allocations can change; + number_of_dispatches = to_u32( + number_of_all_allocations / + used_vectors_in_test); // bacause number_of_all_allocations can change; } if (test_single_allocation_memory_size < kernel_copy_unit_size) { @@ -197,10 +277,10 @@ LZT_TEST_P( for (uint32_t dispatch_id = 0; dispatch_id < number_of_dispatches; dispatch_id++) { kernel_copy_unit_t *input_allocation = allocate_memory( - context, device, test_arguments.memory_type, + context, device, test_arguments_.memory_type, test_single_allocation_memory_size, relax_memory_capability); kernel_copy_unit_t *output_allocation = allocate_memory( - context, device, test_arguments.memory_type, + context, device, test_arguments_.memory_type, test_single_allocation_memory_size, relax_memory_capability); if (input_allocation == nullptr || output_allocation == nullptr) { LOG_WARNING << "Cannot allocate " @@ -217,18 +297,22 @@ LZT_TEST_P( } input_allocations.push_back(input_allocation); output_allocations.push_back(output_allocation); - if (test_arguments.memory_type == ZE_MEMORY_TYPE_DEVICE) { + if (test_arguments_.memory_type == ZE_MEMORY_TYPE_DEVICE) { std::vector data_out( test_single_allocation_count * kernel_copy_unit_size, init_value_1_); data_out_vector.push_back(data_out); } - std::string kernel_name; - kernel_name = - "test_device_memory" + - std::to_string((dispatch_id % number_of_kernels_in_module_) + 1) + - "_unit_size" + std::to_string(kernel_copy_unit_size); - test_kernel_names.push_back(kernel_name); + std::stringstream kernel_name_ss; + kernel_name_ss << "test_device_memory" + + std::to_string( + (dispatch_id % number_of_kernels_in_module_) + 1); + if (test_arguments_.indirect_access) { + kernel_name_ss << "_indirect"; + } else { + kernel_name_ss << "_unit_size" << std::to_string(kernel_copy_unit_size); + } + test_kernel_names.push_back(kernel_name_ss.str()); } LOG_INFO << "call create module"; @@ -238,7 +322,7 @@ LZT_TEST_P( nullptr); LOG_INFO << "call dispatch_kernels"; - dispatch_kernels(device, test_arguments.memory_type, module_handle, + dispatch_kernels(device, test_arguments_.memory_type, module_handle, input_allocations, output_allocations, data_out_vector, test_kernel_names, number_of_dispatches, test_single_allocation_count, context); @@ -247,7 +331,7 @@ LZT_TEST_P( bool memory_test_failure = false; uint32_t counter = 0; - if (test_arguments.memory_type == ZE_MEMORY_TYPE_DEVICE) { + if (test_arguments_.memory_type == ZE_MEMORY_TYPE_DEVICE) { for (auto output : data_out_vector) { counter++; memory_test_failure |= @@ -290,30 +374,34 @@ struct CombinationsTestNameSuffix { std::stringstream ss; ss << "dispatches_" << std::get<2>(info.param); ss << "_memoryType_" << print_allocation_type(std::get<3>(info.param)); + ss << (std::get<4>(info.param) ? "_immediate" : ""); + ss << (std::get<5>(info.param) ? "_indirectAccess" : ""); return ss.str(); } }; std::vector multiple_dispatches = {1, 10, 1000, 5000, 10000}; -INSTANTIATE_TEST_CASE_P( +INSTANTIATE_TEST_SUITE_P( TestAllocationMemoryMatrixMaxMemory, zeDriverMemoryAllocationStressTest, ::testing::Combine(::testing::Values(hundred_percent), ::testing::Values(hundred_percent), ::testing::ValuesIn(multiple_dispatches), ::testing::Values(ZE_MEMORY_TYPE_HOST, ZE_MEMORY_TYPE_SHARED, - ZE_MEMORY_TYPE_DEVICE)), + ZE_MEMORY_TYPE_DEVICE), + ::testing::Bool(), ::testing::Bool()), CombinationsTestNameSuffix()); -INSTANTIATE_TEST_CASE_P( +INSTANTIATE_TEST_SUITE_P( TestAllocationMemoryMatrixMinMemory, zeDriverMemoryAllocationStressTest, ::testing::Combine(::testing::Values(hundred_percent), ::testing::Values(ten_percent), ::testing::ValuesIn(multiple_dispatches), ::testing::Values(ZE_MEMORY_TYPE_HOST, ZE_MEMORY_TYPE_SHARED, - ZE_MEMORY_TYPE_DEVICE)), + ZE_MEMORY_TYPE_DEVICE), + ::testing::Bool(), ::testing::Bool()), CombinationsTestNameSuffix()); } // namespace