Skip to content

Commit e50663a

Browse files
committed
Add indirect access memory stress test
Signed-off-by: Oskar Hubert Weber <[email protected]>
1 parent f6ab42a commit e50663a

File tree

3 files changed

+142
-54
lines changed

3 files changed

+142
-54
lines changed

stress_tests/common/include/stress_common_func.hpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,16 +49,20 @@ typedef struct TestArguments {
4949
double one_allocation_size_limit;
5050
uint64_t multiplier;
5151
ze_memory_type_t memory_type;
52+
bool immediate;
53+
bool indirect_access;
5254

5355
void print_test_arguments(const ze_device_properties_t &device_properties) {
5456
LOG_INFO << "TESTED device: " << device_properties.name;
5557
LOG_INFO << "TESTING user ARGS: "
56-
<< " Limit total memory to = " << 100 * total_memory_size_limit
58+
<< "immediate = " << (immediate ? "true" : "false")
59+
<< " | indirect_access = " << (indirect_access ? "true" : "false")
60+
<< " | limit total memory to = " << 100 * total_memory_size_limit
5761
<< "% "
58-
<< " | Limit one allocation to = "
62+
<< " | limit one allocation to = "
5963
<< 100 * one_allocation_size_limit << " %"
60-
<< " | Multiplier set = " << multiplier
61-
<< " | User memory allocation type: "
64+
<< " | multiplier set = " << multiplier
65+
<< " | user memory allocation type: "
6266
<< print_allocation_type(memory_type);
6367
};
6468
} TestArguments_t;

stress_tests/common/src/stress_common_func.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ uint64_t total_available_host_memory() {
4343
stat.dwLength = sizeof(stat);
4444
GlobalMemoryStatusEx(&stat);
4545

46-
return stat.ullAvailVirtual;
46+
return stat.ullAvailPhys;
4747
}
4848
uint64_t get_page_size() {
4949
SYSTEM_INFO si;

stress_tests/test_memory_allocation/src/test_memory_allocation.cpp

Lines changed: 133 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,15 @@ using lzt::to_u32;
2525
class zeDriverMemoryAllocationStressTest
2626
: public ::testing::Test,
2727
public ::testing::WithParamInterface<
28-
std::tuple<double, double, uint32_t, ze_memory_type_t>> {
28+
std::tuple<double, double, uint32_t, ze_memory_type_t, bool, bool>> {
2929
protected:
3030
typedef uint32_t kernel_copy_unit_t;
3131
const size_t kernel_copy_unit_size = sizeof(kernel_copy_unit_t);
3232

33+
struct Buffer {
34+
kernel_copy_unit_t *data;
35+
};
36+
3337
bool verify_results(kernel_copy_unit_t *allocation,
3438
uint64_t test_single_allocation_count) {
3539
for (uint64_t i = 0; i < test_single_allocation_count; i++) {
@@ -42,6 +46,7 @@ class zeDriverMemoryAllocationStressTest
4246
}
4347
return false;
4448
}
49+
4550
void dispatch_kernels(
4651
const ze_device_handle_t device, ze_memory_type_t memory_type,
4752
ze_module_handle_t module_handle,
@@ -51,10 +56,50 @@ class zeDriverMemoryAllocationStressTest
5156
const std::vector<std::string> &test_kernel_names,
5257
uint32_t number_of_dispatch, uint64_t one_case_allocation_count,
5358
ze_context_handle_t context) {
59+
auto cmd_bundle = lzt::create_command_bundle(
60+
context, device, 0, ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS,
61+
ZE_COMMAND_QUEUE_PRIORITY_NORMAL, 0, 0, 0, test_arguments_.immediate);
5462

5563
std::vector<ze_kernel_handle_t> test_functions;
56-
ze_command_list_handle_t command_list =
57-
lzt::create_command_list(context, device, 0);
64+
[[maybe_unused]] std::vector<Buffer> host_src_ptrs(number_of_dispatch);
65+
[[maybe_unused]] std::vector<Buffer> host_dst_ptrs(number_of_dispatch);
66+
[[maybe_unused]] kernel_copy_unit_t *src_allocation_ptrs = nullptr;
67+
allocate_memory<kernel_copy_unit_t>(
68+
context, device, test_arguments_.memory_type,
69+
number_of_dispatch * sizeof(void *), false);
70+
[[maybe_unused]] kernel_copy_unit_t *dst_allocation_ptrs = nullptr;
71+
allocate_memory<kernel_copy_unit_t>(
72+
context, device, test_arguments_.memory_type,
73+
number_of_dispatch * sizeof(void *), false);
74+
75+
if (test_arguments_.indirect_access) {
76+
src_allocation_ptrs = allocate_memory<kernel_copy_unit_t>(
77+
context, device, test_arguments_.memory_type,
78+
number_of_dispatch * sizeof(void *), false);
79+
dst_allocation_ptrs = allocate_memory<kernel_copy_unit_t>(
80+
context, device, test_arguments_.memory_type,
81+
number_of_dispatch * sizeof(void *), false);
82+
for (uint32_t i = 0; i < number_of_dispatch; i++) {
83+
host_src_ptrs[i].data = src_allocations[i];
84+
host_dst_ptrs[i].data = dst_allocations[i];
85+
}
86+
87+
if (test_arguments_.memory_type == ZE_MEMORY_TYPE_DEVICE) {
88+
lzt::append_memory_copy(cmd_bundle.list, src_allocation_ptrs,
89+
host_src_ptrs.data(),
90+
number_of_dispatch * sizeof(void *), nullptr);
91+
lzt::append_memory_copy(cmd_bundle.list, dst_allocation_ptrs,
92+
host_dst_ptrs.data(),
93+
number_of_dispatch * sizeof(void *), nullptr);
94+
lzt::append_barrier(cmd_bundle.list);
95+
} else {
96+
std::memcpy(src_allocation_ptrs, host_src_ptrs.data(),
97+
number_of_dispatch * sizeof(void *));
98+
std::memcpy(dst_allocation_ptrs, host_dst_ptrs.data(),
99+
number_of_dispatch * sizeof(void *));
100+
}
101+
}
102+
58103
for (uint64_t dispatch_id = 0; dispatch_id < number_of_dispatch;
59104
dispatch_id++) {
60105

@@ -65,51 +110,76 @@ class zeDriverMemoryAllocationStressTest
65110
lzt::create_function(module_handle, test_kernel_names[dispatch_id]);
66111

67112
lzt::set_group_size(kernel_handle, workgroup_size_x_, 1, 1);
68-
lzt::set_argument_value(kernel_handle, 0, sizeof(src_allocation),
69-
&src_allocation);
70-
lzt::set_argument_value(kernel_handle, 1, sizeof(dst_allocation),
71-
&dst_allocation);
113+
114+
if (test_arguments_.indirect_access) {
115+
switch (test_arguments_.memory_type) {
116+
case ZE_MEMORY_TYPE_DEVICE:
117+
lzt::kernel_set_indirect_access(
118+
kernel_handle, ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE);
119+
break;
120+
case ZE_MEMORY_TYPE_HOST:
121+
lzt::kernel_set_indirect_access(kernel_handle,
122+
ZE_KERNEL_INDIRECT_ACCESS_FLAG_HOST);
123+
break;
124+
case ZE_MEMORY_TYPE_SHARED:
125+
lzt::kernel_set_indirect_access(
126+
kernel_handle, ZE_KERNEL_INDIRECT_ACCESS_FLAG_SHARED);
127+
break;
128+
default:
129+
break;
130+
}
131+
lzt::set_argument_value(kernel_handle, 0, sizeof(src_allocation_ptrs),
132+
&src_allocation_ptrs);
133+
lzt::set_argument_value(kernel_handle, 1, sizeof(dst_allocation_ptrs),
134+
&dst_allocation_ptrs);
135+
lzt::set_argument_value(kernel_handle, 2, sizeof(uint32_t),
136+
&dispatch_id);
137+
} else {
138+
lzt::set_argument_value(kernel_handle, 0, sizeof(src_allocation),
139+
&src_allocation);
140+
lzt::set_argument_value(kernel_handle, 1, sizeof(dst_allocation),
141+
&dst_allocation);
142+
}
72143

73144
uint32_t group_count_x =
74145
to_u32(one_case_allocation_count / workgroup_size_x_);
75146
ze_group_count_t thread_group_dimensions = {group_count_x, 1, 1};
76147

77-
lzt::append_memory_fill(
78-
command_list, src_allocation, &init_value_2_, sizeof(init_value_2_),
79-
one_case_allocation_count * kernel_copy_unit_size, nullptr);
148+
lzt::append_memory_fill(cmd_bundle.list, src_allocation, &init_value_2_,
149+
sizeof(init_value_2_),
150+
one_case_allocation_count * kernel_copy_unit_size,
151+
nullptr);
80152

81-
lzt::append_memory_fill(
82-
command_list, dst_allocation, &init_value_3_, sizeof(init_value_3_),
83-
one_case_allocation_count * kernel_copy_unit_size, nullptr);
153+
lzt::append_memory_fill(cmd_bundle.list, dst_allocation, &init_value_3_,
154+
sizeof(init_value_3_),
155+
one_case_allocation_count * kernel_copy_unit_size,
156+
nullptr);
84157

85-
lzt::append_barrier(command_list, nullptr);
158+
lzt::append_barrier(cmd_bundle.list, nullptr);
86159

87-
lzt::append_launch_function(command_list, kernel_handle,
160+
lzt::append_launch_function(cmd_bundle.list, kernel_handle,
88161
&thread_group_dimensions, nullptr, 0,
89162
nullptr);
90163

91-
lzt::append_barrier(command_list, nullptr);
164+
lzt::append_barrier(cmd_bundle.list, nullptr);
92165

93166
if (memory_type == ZE_MEMORY_TYPE_DEVICE) {
94167
lzt::append_memory_copy(
95-
command_list, data_out[dispatch_id].data(), dst_allocation,
168+
cmd_bundle.list, data_out[dispatch_id].data(), dst_allocation,
96169
one_case_allocation_count * kernel_copy_unit_size, nullptr);
97170
}
98-
lzt::append_barrier(command_list, nullptr);
171+
lzt::append_barrier(cmd_bundle.list, nullptr);
99172

100173
test_functions.push_back(kernel_handle);
101174
}
102175

103-
ze_command_queue_handle_t command_queue = lzt::create_command_queue(
104-
context, device, 0, ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS,
105-
ZE_COMMAND_QUEUE_PRIORITY_NORMAL, 0);
176+
if (!test_arguments_.immediate) {
177+
lzt::close_command_list(cmd_bundle.list);
178+
}
106179

107-
lzt::close_command_list(command_list);
108-
lzt::execute_command_lists(command_queue, 1, &command_list, nullptr);
109-
lzt::synchronize(command_queue, UINT64_MAX);
180+
lzt::execute_and_sync_command_bundle(cmd_bundle, UINT64_MAX);
181+
lzt::destroy_command_bundle(cmd_bundle);
110182

111-
lzt::destroy_command_queue(command_queue);
112-
lzt::destroy_command_list(command_list);
113183
for (uint64_t dispatch_id = 0; dispatch_id < test_functions.size();
114184
dispatch_id++) {
115185
EXPECT_ZE_RESULT_SUCCESS(zeKernelDestroy(test_functions[dispatch_id]));
@@ -121,32 +191,37 @@ class zeDriverMemoryAllocationStressTest
121191
kernel_copy_unit_t init_value_1_ = 0;
122192
kernel_copy_unit_t init_value_2_ = 0xAAAAAAAA; // 1010 1010
123193
kernel_copy_unit_t init_value_3_ = 0x55555555; // 0101 0101
194+
TestArguments_t test_arguments_;
195+
bool indirect_access = false;
196+
bool immediate = false;
124197
};
125198

126199
LZT_TEST_P(
127200
zeDriverMemoryAllocationStressTest,
128201
AlocateFullAvailableMemoryNumberOfKernelDispatchesDependsOnUserChunkAllocaitonRequest) {
129202

130-
TestArguments_t test_arguments = {
203+
test_arguments_ = {
131204
std::get<0>(GetParam()), // total memory size limit
132205
std::get<1>(GetParam()), // one allocation size limit
133206
std::get<2>(GetParam()), // dispatch multiplier
134-
std::get<3>(GetParam()) // memory type
207+
std::get<3>(GetParam()), // memory type
208+
std::get<4>(GetParam()), // immediate
209+
std::get<5>(GetParam()) // indirect access
135210
};
136211

137212
auto driver = lzt::get_default_driver();
138213
auto context = lzt::create_context(driver);
139214
auto device = lzt::get_default_device(driver);
140215

141216
ze_device_properties_t device_properties = lzt::get_device_properties(device);
142-
test_arguments.print_test_arguments(device_properties);
217+
test_arguments_.print_test_arguments(device_properties);
143218

144219
std::vector<ze_device_memory_properties_t> device_memory_properties =
145220
lzt::get_memory_properties(device);
146221

147222
const uint32_t used_vectors_in_test =
148-
test_arguments.memory_type == ZE_MEMORY_TYPE_DEVICE ? 4 : 3;
149-
uint32_t number_of_dispatches = to_u32(test_arguments.multiplier);
223+
test_arguments_.memory_type == ZE_MEMORY_TYPE_DEVICE ? 4 : 3;
224+
uint32_t number_of_dispatches = to_u32(test_arguments_.multiplier);
150225
uint64_t number_of_all_allocations =
151226
used_vectors_in_test * number_of_dispatches;
152227
uint64_t test_single_allocation_memory_size = 0;
@@ -156,15 +231,16 @@ LZT_TEST_P(
156231
adjust_max_memory_allocation(
157232
driver, device_properties, device_memory_properties,
158233
test_total_memory_size, test_single_allocation_memory_size,
159-
number_of_all_allocations, test_arguments, relax_memory_capability);
234+
number_of_all_allocations, test_arguments_, relax_memory_capability);
160235

161236
if (number_of_all_allocations !=
162237
used_vectors_in_test * number_of_dispatches) {
163238

164239
LOG_INFO << "Need to limit dispatches from : " << number_of_dispatches
165240
<< " to: " << number_of_all_allocations / used_vectors_in_test;
166-
number_of_dispatches =
167-
to_u32(number_of_all_allocations / used_vectors_in_test); // bacause number_of_all_allocations can change;
241+
number_of_dispatches = to_u32(
242+
number_of_all_allocations /
243+
used_vectors_in_test); // bacause number_of_all_allocations can change;
168244
}
169245

170246
if (test_single_allocation_memory_size < kernel_copy_unit_size) {
@@ -197,10 +273,10 @@ LZT_TEST_P(
197273
for (uint32_t dispatch_id = 0; dispatch_id < number_of_dispatches;
198274
dispatch_id++) {
199275
kernel_copy_unit_t *input_allocation = allocate_memory<kernel_copy_unit_t>(
200-
context, device, test_arguments.memory_type,
276+
context, device, test_arguments_.memory_type,
201277
test_single_allocation_memory_size, relax_memory_capability);
202278
kernel_copy_unit_t *output_allocation = allocate_memory<kernel_copy_unit_t>(
203-
context, device, test_arguments.memory_type,
279+
context, device, test_arguments_.memory_type,
204280
test_single_allocation_memory_size, relax_memory_capability);
205281
if (input_allocation == nullptr || output_allocation == nullptr) {
206282
LOG_WARNING << "Cannot allocate "
@@ -217,18 +293,22 @@ LZT_TEST_P(
217293
}
218294
input_allocations.push_back(input_allocation);
219295
output_allocations.push_back(output_allocation);
220-
if (test_arguments.memory_type == ZE_MEMORY_TYPE_DEVICE) {
296+
if (test_arguments_.memory_type == ZE_MEMORY_TYPE_DEVICE) {
221297
std::vector<kernel_copy_unit_t> data_out(
222298
test_single_allocation_count * kernel_copy_unit_size, init_value_1_);
223299
data_out_vector.push_back(data_out);
224300
}
225301

226-
std::string kernel_name;
227-
kernel_name =
228-
"test_device_memory" +
229-
std::to_string((dispatch_id % number_of_kernels_in_module_) + 1) +
230-
"_unit_size" + std::to_string(kernel_copy_unit_size);
231-
test_kernel_names.push_back(kernel_name);
302+
std::stringstream kernel_name_ss;
303+
kernel_name_ss << "test_device_memory" +
304+
std::to_string(
305+
(dispatch_id % number_of_kernels_in_module_) + 1);
306+
if (test_arguments_.indirect_access) {
307+
kernel_name_ss << "_indirect";
308+
} else {
309+
kernel_name_ss << "_unit_size" << std::to_string(kernel_copy_unit_size);
310+
}
311+
test_kernel_names.push_back(kernel_name_ss.str());
232312
}
233313

234314
LOG_INFO << "call create module";
@@ -238,7 +318,7 @@ LZT_TEST_P(
238318
nullptr);
239319

240320
LOG_INFO << "call dispatch_kernels";
241-
dispatch_kernels(device, test_arguments.memory_type, module_handle,
321+
dispatch_kernels(device, test_arguments_.memory_type, module_handle,
242322
input_allocations, output_allocations, data_out_vector,
243323
test_kernel_names, number_of_dispatches,
244324
test_single_allocation_count, context);
@@ -247,7 +327,7 @@ LZT_TEST_P(
247327
bool memory_test_failure = false;
248328

249329
uint32_t counter = 0;
250-
if (test_arguments.memory_type == ZE_MEMORY_TYPE_DEVICE) {
330+
if (test_arguments_.memory_type == ZE_MEMORY_TYPE_DEVICE) {
251331
for (auto output : data_out_vector) {
252332
counter++;
253333
memory_test_failure |=
@@ -290,30 +370,34 @@ struct CombinationsTestNameSuffix {
290370
std::stringstream ss;
291371
ss << "dispatches_" << std::get<2>(info.param);
292372
ss << "_memoryType_" << print_allocation_type(std::get<3>(info.param));
373+
ss << (std::get<4>(info.param) ? "_immediate" : "");
374+
ss << (std::get<5>(info.param) ? "_indirectAccess" : "");
293375
return ss.str();
294376
}
295377
};
296378

297379
std::vector<uint32_t> multiple_dispatches = {1, 10, 1000, 5000, 10000};
298380

299-
INSTANTIATE_TEST_CASE_P(
381+
INSTANTIATE_TEST_SUITE_P(
300382
TestAllocationMemoryMatrixMaxMemory, zeDriverMemoryAllocationStressTest,
301383
::testing::Combine(::testing::Values(hundred_percent),
302384
::testing::Values(hundred_percent),
303385
::testing::ValuesIn(multiple_dispatches),
304386
::testing::Values(ZE_MEMORY_TYPE_HOST,
305387
ZE_MEMORY_TYPE_SHARED,
306-
ZE_MEMORY_TYPE_DEVICE)),
388+
ZE_MEMORY_TYPE_DEVICE),
389+
::testing::Bool(), ::testing::Bool()),
307390
CombinationsTestNameSuffix());
308391

309-
INSTANTIATE_TEST_CASE_P(
392+
INSTANTIATE_TEST_SUITE_P(
310393
TestAllocationMemoryMatrixMinMemory, zeDriverMemoryAllocationStressTest,
311394
::testing::Combine(::testing::Values(hundred_percent),
312395
::testing::Values(ten_percent),
313396
::testing::ValuesIn(multiple_dispatches),
314397
::testing::Values(ZE_MEMORY_TYPE_HOST,
315398
ZE_MEMORY_TYPE_SHARED,
316-
ZE_MEMORY_TYPE_DEVICE)),
399+
ZE_MEMORY_TYPE_DEVICE),
400+
::testing::Bool(), ::testing::Bool()),
317401
CombinationsTestNameSuffix());
318402

319403
} // namespace

0 commit comments

Comments
 (0)