Skip to content

Commit 5c77ac8

Browse files
author
Hugh Delaney
committed
Add test for mem migration in multi dev ctx
Add tests which makes sure memory is implictly migrated across devices in a multi device context. Also add new device code for an inc kernel on a buffer.
1 parent ecbff5d commit 5c77ac8

File tree

4 files changed

+270
-0
lines changed

4 files changed

+270
-0
lines changed

test/conformance/device_code/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/fill_3d.cpp)
135135
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/fill_usm.cpp)
136136
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/foo.cpp)
137137
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/image_copy.cpp)
138+
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/inc.cpp)
138139
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/mean.cpp)
139140
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/cpy_and_mult.cpp)
140141
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/cpy_and_mult_usm.cpp)

test/conformance/device_code/inc.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
// Copyright (C) 2024 Intel Corporation
2+
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
3+
// See LICENSE.TXT
4+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5+
6+
#include <sycl/sycl.hpp>
7+
8+
class inc;
9+
10+
int main() {
11+
uint32_t *ptr;
12+
sycl::buffer<uint32_t> buf{ptr, 1};
13+
sycl::queue{}.submit([&](sycl::handler &cgh) {
14+
sycl::accessor acc{buf, cgh};
15+
auto kernel = [acc](sycl::item<1> it) { acc[it]++; };
16+
cgh.parallel_for<inc>(sycl::range<1>{1}, kernel);
17+
});
18+
}

test/conformance/memory/CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,8 @@ add_conformance_test_with_devices_environment(memory
1515
urMemImageGetInfo.cpp
1616
urMemRelease.cpp
1717
urMemRetain.cpp)
18+
19+
if (UR_DPCXX)
20+
add_conformance_test_with_kernels_environment(memory-migrate
21+
urMemBufferMigrateAcrossDevices.cpp)
22+
endif()
Lines changed: 246 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,246 @@
1+
// Copyright (C) 2023 Intel Corporation
2+
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
3+
// See LICENSE.TXT
4+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5+
6+
// Some tests to ensure implicit memory migration of buffers across devices
7+
// in the same context.
8+
9+
#include "uur/fixtures.h"
10+
11+
using T = uint32_t;
12+
13+
struct urMultiDeviceContextTest : uur::urPlatformTest {
14+
void SetUp() {
15+
uur::urPlatformTest::SetUp();
16+
ASSERT_SUCCESS(urDeviceGet(platform, UR_DEVICE_TYPE_ALL, 0, nullptr,
17+
&num_devices));
18+
ASSERT_NE(num_devices, 0);
19+
devices = std::vector<ur_device_handle_t>(num_devices);
20+
ASSERT_SUCCESS(urDeviceGet(platform, UR_DEVICE_TYPE_ALL, num_devices,
21+
devices.data(), nullptr));
22+
ASSERT_SUCCESS(
23+
urContextCreate(num_devices, devices.data(), nullptr, &context));
24+
25+
queues = std::vector<ur_queue_handle_t>(num_devices);
26+
for (auto i = 0u; i < num_devices; ++i) {
27+
ASSERT_SUCCESS(
28+
urQueueCreate(context, devices[i], nullptr, &queues[i]));
29+
}
30+
}
31+
32+
void TearDown() {
33+
uur::urPlatformTest::TearDown();
34+
urContextRelease(context);
35+
for (auto i = 0u; i < num_devices; ++i) {
36+
urQueueRelease(queues[i]);
37+
urDeviceRelease(devices[i]);
38+
}
39+
}
40+
41+
uint32_t num_devices = 0;
42+
ur_context_handle_t context;
43+
std::vector<ur_device_handle_t> devices;
44+
std::vector<ur_queue_handle_t> queues;
45+
};
46+
47+
struct urMultiDeviceContextMemBufferTest : urMultiDeviceContextTest {
48+
void SetUp() {
49+
urMultiDeviceContextTest::SetUp();
50+
ASSERT_SUCCESS(urMemBufferCreate(context, 0 /*flags=*/,
51+
buffer_size_bytes,
52+
nullptr /*pProperties=*/, &buffer));
53+
54+
UUR_RETURN_ON_FATAL_FAILURE(
55+
uur::KernelsEnvironment::instance->LoadSource(program_name,
56+
il_binary));
57+
58+
programs = std::vector<ur_program_handle_t>(num_devices);
59+
kernels = std::vector<ur_kernel_handle_t>(num_devices);
60+
61+
const ur_program_properties_t properties = {
62+
UR_STRUCTURE_TYPE_PROGRAM_PROPERTIES, nullptr,
63+
static_cast<uint32_t>(metadatas.size()),
64+
metadatas.empty() ? nullptr : metadatas.data()};
65+
for (auto i = 0u; i < num_devices; ++i) {
66+
ASSERT_SUCCESS(uur::KernelsEnvironment::instance->CreateProgram(
67+
platform, context, devices[i], *il_binary, &properties,
68+
&programs[i]));
69+
ASSERT_SUCCESS(urProgramBuild(context, programs[i], nullptr));
70+
auto kernel_names =
71+
uur::KernelsEnvironment::instance->GetEntryPointNames(
72+
program_name);
73+
kernel_name = kernel_names[0];
74+
ASSERT_FALSE(kernel_name.empty());
75+
ASSERT_SUCCESS(
76+
urKernelCreate(programs[i], kernel_name.data(), &kernels[i]));
77+
}
78+
}
79+
80+
// Adds a kernel arg representing a sycl buffer constructed with a 1D range.
81+
void AddBuffer1DArg(ur_kernel_handle_t kernel, size_t current_arg_index,
82+
ur_mem_handle_t buffer) {
83+
ASSERT_SUCCESS(
84+
urKernelSetArgMemObj(kernel, current_arg_index, nullptr, buffer));
85+
86+
// SYCL device kernels have different interfaces depending on the
87+
// backend being used. Typically a kernel which takes a buffer argument
88+
// will take a pointer to the start of the buffer and a sycl::id param
89+
// which is a struct that encodes the accessor to the buffer. However
90+
// the AMD backend handles this differently and uses three separate
91+
// arguments for each of the three dimensions of the accessor.
92+
93+
ur_platform_backend_t backend;
94+
ASSERT_SUCCESS(urPlatformGetInfo(platform, UR_PLATFORM_INFO_BACKEND,
95+
sizeof(backend), &backend, nullptr));
96+
if (backend == UR_PLATFORM_BACKEND_HIP) {
97+
// this emulates the three offset params for buffer accessor on AMD.
98+
size_t val = 0;
99+
ASSERT_SUCCESS(urKernelSetArgValue(kernel, current_arg_index + 1,
100+
sizeof(size_t), nullptr, &val));
101+
ASSERT_SUCCESS(urKernelSetArgValue(kernel, current_arg_index + 2,
102+
sizeof(size_t), nullptr, &val));
103+
ASSERT_SUCCESS(urKernelSetArgValue(kernel, current_arg_index + 3,
104+
sizeof(size_t), nullptr, &val));
105+
current_arg_index += 4;
106+
} else {
107+
// This emulates the offset struct sycl adds for a 1D buffer accessor.
108+
struct {
109+
size_t offsets[1] = {0};
110+
} accessor;
111+
ASSERT_SUCCESS(urKernelSetArgValue(kernel, current_arg_index + 1,
112+
sizeof(accessor), nullptr,
113+
&accessor));
114+
current_arg_index += 2;
115+
}
116+
}
117+
118+
void TearDown() {
119+
for (auto i = 0u; i < num_devices; ++i) {
120+
ASSERT_SUCCESS(urProgramRelease(programs[i]));
121+
}
122+
urMemRelease(buffer);
123+
urMultiDeviceContextTest::TearDown();
124+
}
125+
126+
size_t buffer_size = 4096;
127+
size_t buffer_size_bytes = 4096 * sizeof(T);
128+
ur_mem_handle_t buffer;
129+
130+
// Program stuff so we can launch kernels
131+
std::shared_ptr<std::vector<char>> il_binary;
132+
std::string program_name = "inc";
133+
std::string kernel_name;
134+
std::vector<ur_program_handle_t> programs;
135+
std::vector<ur_kernel_handle_t> kernels;
136+
std::vector<ur_program_metadata_t> metadatas{};
137+
};
138+
139+
TEST_F(urMultiDeviceContextMemBufferTest, WriteRead) {
140+
if (num_devices == 1) {
141+
return;
142+
}
143+
T fill_val = 42;
144+
std::vector<T> in_vec(buffer_size, fill_val);
145+
std::vector<T> out_vec(buffer_size, 0);
146+
147+
ASSERT_SUCCESS(urEnqueueMemBufferWrite(queues[0], buffer, false, 0,
148+
buffer_size_bytes, in_vec.data(), 0,
149+
nullptr, nullptr));
150+
151+
ASSERT_SUCCESS(urEnqueueMemBufferRead(queues[1], buffer, false, 0,
152+
buffer_size_bytes, out_vec.data(), 0,
153+
nullptr, nullptr));
154+
for (auto &a : out_vec) {
155+
ASSERT_EQ(a, fill_val);
156+
}
157+
}
158+
159+
TEST_F(urMultiDeviceContextMemBufferTest, FillRead) {
160+
if (num_devices == 1) {
161+
return;
162+
}
163+
T fill_val = 42;
164+
std::vector<T> in_vec(buffer_size, fill_val);
165+
std::vector<T> out_vec(buffer_size);
166+
167+
ASSERT_SUCCESS(
168+
urEnqueueMemBufferFill(queues[0], buffer, &fill_val, sizeof(fill_val),
169+
0, buffer_size_bytes, 0, nullptr, nullptr));
170+
171+
ASSERT_SUCCESS(urEnqueueMemBufferRead(queues[1], buffer, false, 0,
172+
buffer_size_bytes, out_vec.data(), 0,
173+
nullptr, nullptr));
174+
for (auto &a : out_vec) {
175+
ASSERT_EQ(a, fill_val);
176+
}
177+
}
178+
179+
TEST_F(urMultiDeviceContextMemBufferTest, WriteKernelRead) {
180+
if (num_devices == 1) {
181+
return;
182+
}
183+
184+
// Kernel to run on queues[1]
185+
AddBuffer1DArg(kernels[1], 0, buffer);
186+
187+
T fill_val = 42;
188+
std::vector<T> in_vec(buffer_size, fill_val);
189+
std::vector<T> out_vec(buffer_size);
190+
191+
ASSERT_SUCCESS(urEnqueueMemBufferWrite(queues[0], buffer, false, 0,
192+
buffer_size_bytes, in_vec.data(), 0,
193+
nullptr, nullptr));
194+
195+
size_t work_dims[3] = {buffer_size, 1, 1};
196+
size_t offset[3] = {0, 0, 0};
197+
198+
// Kernel increments the fill val by 1
199+
ASSERT_SUCCESS(urEnqueueKernelLaunch(queues[1], kernels[1], 1 /*workDim=*/,
200+
offset, work_dims, nullptr, 0, nullptr,
201+
nullptr));
202+
203+
ASSERT_SUCCESS(urEnqueueMemBufferRead(queues[0], buffer, false, 0,
204+
buffer_size_bytes, out_vec.data(), 0,
205+
nullptr, nullptr));
206+
for (auto &a : out_vec) {
207+
ASSERT_EQ(a, fill_val + 1);
208+
}
209+
}
210+
211+
TEST_F(urMultiDeviceContextMemBufferTest, WriteKernelKernelRead) {
212+
if (num_devices == 1) {
213+
return;
214+
}
215+
216+
AddBuffer1DArg(kernels[0], 0, buffer);
217+
AddBuffer1DArg(kernels[1], 0, buffer);
218+
219+
T fill_val = 42;
220+
std::vector<T> in_vec(buffer_size, fill_val);
221+
std::vector<T> out_vec(buffer_size);
222+
223+
ASSERT_SUCCESS(urEnqueueMemBufferWrite(queues[0], buffer, false, 0,
224+
buffer_size_bytes, in_vec.data(), 0,
225+
nullptr, nullptr));
226+
227+
size_t work_dims[3] = {buffer_size, 1, 1};
228+
size_t offset[3] = {0, 0, 0};
229+
230+
// Kernel increments the fill val by 1
231+
ASSERT_SUCCESS(urEnqueueKernelLaunch(queues[1], kernels[1], 1 /*workDim=*/,
232+
offset, work_dims, nullptr, 0, nullptr,
233+
nullptr));
234+
235+
// Kernel increments the fill val by 1
236+
ASSERT_SUCCESS(urEnqueueKernelLaunch(queues[0], kernels[0], 1 /*workDim=*/,
237+
offset, work_dims, nullptr, 0, nullptr,
238+
nullptr));
239+
240+
ASSERT_SUCCESS(urEnqueueMemBufferRead(queues[1], buffer, false, 0,
241+
buffer_size_bytes, out_vec.data(), 0,
242+
nullptr, nullptr));
243+
for (auto &a : out_vec) {
244+
ASSERT_EQ(a, fill_val + 2);
245+
}
246+
}

0 commit comments

Comments
 (0)