Skip to content

Commit 7a252ef

Browse files
committed
tests: Add gdrcopy wrapper library unit test
This test is only enabled (at compile time) for CUDA builds. It will be skipped (at runtime) if GDRCopy or CUDA initialization fails. Signed-off-by: Eric Raut <eraut@amazon.com>
1 parent e500a85 commit 7a252ef

File tree

3 files changed

+187
-1
lines changed

3 files changed

+187
-1
lines changed

tests/unit/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,4 @@ histogram_binner
1313
dlopen_c_test
1414
param
1515
platform_manager
16+
gdrcopy

tests/unit/Makefile.am

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,12 @@ noinst_PROGRAMS += aws_platform_mapper
3333
endif
3434

3535
if !ENABLE_NEURON
36-
if WANT_PLATFORM_AWS
3736
AM_LDFLAGS = $(CUDA_LDFLAGS)
3837
AM_CPPFLAGS += $(CUDA_CPPFLAGS)
3938
LDADD += $(CUDA_LIBS)
39+
noinst_PROGRAMS += gdrcopy
40+
gdrcopy_SOURCES = gdrcopy.cpp
41+
if WANT_PLATFORM_AWS
4042
noinst_PROGRAMS += region_based_tuner
4143
region_based_tuner_SOURCES = region_based_tuner.cpp
4244
endif

tests/unit/gdrcopy.cpp

Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
/*
2+
* Copyright (c) 2025 Amazon.com, Inc. or its affiliates. All rights reserved.
3+
*/
4+
5+
#include "config.h"
6+
7+
#include <cuda_runtime_api.h>
8+
#include <stdexcept>
9+
10+
#include "nccl_ofi.h"
11+
#include "test-logger.h"
12+
#include "nccl_ofi_cuda.h"
13+
#include "nccl_ofi_gdrcopy.h"
14+
15+
static int test_large_buffer(nccl_ofi_gdrcopy_ctx &gdr)
16+
{
17+
/* "Large" buffer */
18+
constexpr size_t buff_size = 2*1024*1024UL; /* 2M */
19+
20+
/* Allocate a buffer */
21+
void *ptr = nullptr;
22+
int ret = nccl_net_ofi_gpu_mem_alloc(&ptr, buff_size);
23+
if (ret != 0) {
24+
fprintf(stderr, "nccl_net_ofi_gpu_mem_alloc() failed: %d\n", ret);
25+
return 1;
26+
}
27+
28+
/* Registration */
29+
nccl_ofi_device_copy::RegHandle *handle;
30+
ret = gdr.register_region(ptr, buff_size, handle);
31+
if (ret != 0) {
32+
fprintf(stderr, "register_region() failed: %d\n", ret);
33+
return 1;
34+
}
35+
36+
/* Host buffer for comparison */
37+
char host_buff[buff_size];
38+
memset(host_buff, 'A', buff_size);
39+
40+
ret = gdr.copy_to_device(host_buff, *handle, 0, buff_size);
41+
if (ret != 0) {
42+
fprintf(stderr, "copy_to_device() failed: %d\n", ret);
43+
return 1;
44+
}
45+
46+
char dst_host_buff[buff_size];
47+
ret = gdr.copy_from_device(*handle, 0, dst_host_buff, buff_size);
48+
if (ret != 0) {
49+
fprintf(stderr, "copy_from_device() failed: %d\n", ret);
50+
return 1;
51+
}
52+
53+
/* Verify match */
54+
ret = memcmp(host_buff, dst_host_buff, buff_size);
55+
if (ret != 0) {
56+
fprintf(stderr, "Buffers were not equal!\n");
57+
return 2;
58+
}
59+
60+
ret = gdr.deregister_region(handle);
61+
if (ret != 0) {
62+
fprintf(stderr, "deregister_region() failed: %d\n", ret);
63+
return 1;
64+
}
65+
66+
ret = nccl_net_ofi_gpu_mem_free(ptr);
67+
if (ret != 0) {
68+
fprintf(stderr, "nccl_net_ofi_gpu_mem_free() failed: %d\n", ret);
69+
return 1;
70+
}
71+
72+
return ret;
73+
}
74+
75+
static int test_small_buffer(nccl_ofi_gdrcopy_ctx &gdr)
76+
{
77+
/* "Small" buffer */
78+
constexpr size_t buff_size = 10;
79+
80+
/* Allocate a buffer */
81+
void *ptr = nullptr;
82+
int ret = nccl_net_ofi_gpu_mem_alloc(&ptr, buff_size);
83+
if (ret != 0) {
84+
fprintf(stderr, "nccl_net_ofi_gpu_mem_alloc() failed: %d\n", ret);
85+
return 1;
86+
}
87+
88+
/* Registration */
89+
nccl_ofi_device_copy::RegHandle *handle;
90+
ret = gdr.register_region(ptr, buff_size, handle);
91+
if (ret != 0) {
92+
fprintf(stderr, "register_region() failed: %d\n", ret);
93+
return 1;
94+
}
95+
96+
for (size_t i = 0; i < buff_size; ++i) {
97+
uint8_t v = i;
98+
ret = gdr.copy_to_device(&v, *handle, i, 1);
99+
if (ret != 0) {
100+
fprintf(stderr, "copy_to_device() failed: %d\n", ret);
101+
return 1;
102+
}
103+
}
104+
105+
/* Verification */
106+
for (size_t i = 0; i < buff_size; ++i) {
107+
uint8_t v = 0;
108+
ret = gdr.copy_from_device(*handle, i, &v, 1);
109+
if (ret != 0) {
110+
fprintf(stderr, "copy_from_device() failed: %d\n", ret);
111+
return 1;
112+
}
113+
114+
if (v != i) {
115+
fprintf(stderr, "Value mismatch (idx %zu); expected %zu, got %hhu\n",
116+
i, i, v);
117+
}
118+
}
119+
120+
ret = gdr.deregister_region(handle);
121+
if (ret != 0) {
122+
fprintf(stderr, "deregister_region() failed: %d\n", ret);
123+
return 1;
124+
}
125+
126+
ret = nccl_net_ofi_gpu_mem_free(ptr);
127+
if (ret != 0) {
128+
fprintf(stderr, "nccl_net_ofi_gpu_mem_free() failed: %d\n", ret);
129+
return 1;
130+
}
131+
132+
return ret;
133+
}
134+
135+
/* Value to return to Autotools to skip the test */
136+
#define SKIP_TEST 77
137+
138+
int main(int argc, char *argv[])
139+
{
140+
ofi_log_function = logger;
141+
142+
/* Initialize CUDA support */
143+
int ret = nccl_net_ofi_gpu_init();
144+
if (ret != 0) {
145+
printf("nccl_net_ofi_gpu_init() failed: %d. Skipping test.\n", ret);
146+
return SKIP_TEST;
147+
}
148+
149+
/* Using GPU 0 for simplicity. This also serves to initialize the context. */
150+
cudaError_t cudaErr = cudaSetDevice(0);
151+
if (cudaErr != cudaSuccess) {
152+
fprintf(stderr, "cudaSetDevice() failed: %s\n", cudaGetErrorString(cudaErr));
153+
return 1;
154+
}
155+
156+
std::unique_ptr<nccl_ofi_gdrcopy_ctx> gdr;
157+
158+
try {
159+
gdr = std::make_unique<nccl_ofi_gdrcopy_ctx>();
160+
} catch (std::runtime_error &e) {
161+
printf("Creating gdrcopy context failed: %s. Skipping test.\n",
162+
e.what());
163+
return SKIP_TEST;
164+
}
165+
166+
printf("Supports forced PCIe copy: %d\n", gdr->forced_pcie_copy());
167+
168+
printf("Testing large buffer\n");
169+
ret = test_large_buffer(*gdr);
170+
if (ret != 0) {
171+
return ret;
172+
}
173+
174+
printf("Testing small buffer\n");
175+
ret = test_small_buffer(*gdr);
176+
if (ret != 0) {
177+
return ret;
178+
}
179+
180+
printf("Test completed successfully\n");
181+
182+
return 0;
183+
}

0 commit comments

Comments
 (0)