2424
2525#include " free_list_gtest.hpp"
2626
27- #include < thrust/sort.h>
28-
2927#include " ../src/util.hpp"
3028
3129using namespace rocshmem ;
@@ -76,71 +74,95 @@ TYPED_TEST(FreeListTestFixture, pop_empty_device) {
7674 using T = typename TestFixture::T;
7775
7876 auto & h_input = this ->h_input ;
79- auto & d_input = this ->d_input ;
8077 auto & free_list = this ->free_list ;
78+ auto & hip_allocator_ = this ->hip_allocator_ ;
79+
80+ bool *is_empty {nullptr };
81+ hip_allocator_.allocate (reinterpret_cast <void **>(&is_empty),
82+ sizeof (bool ));
8183
84+ CHECK_HIP (hipMemset (is_empty, 0 , sizeof (bool )));
8285 FreeListProxy<Allocator, T> empty_list_proxy{};
8386 FreeList<T, Allocator>* empty_free_list{empty_list_proxy.get ()};
8487
85- thrust::device_vector<bool > is_empty (1 );
86- rocshmem::pop_empty<<<1 , 1 >>>(empty_free_list, is_empty.data ().get ());
88+ rocshmem::pop_empty<<<1 , 1 >>>(empty_free_list, is_empty);
8789 CHECK_HIP (hipDeviceSynchronize ());
8890 EXPECT_TRUE (is_empty[0 ]);
91+
92+ hip_allocator_.deallocate (is_empty);
8993}
9094
9195TYPED_TEST (FreeListTestFixture, push_host_pop_device) {
9296 using Allocator = typename TestFixture::Allocator;
9397 using T = typename TestFixture::T;
9498
9599 auto & h_input = this ->h_input ;
96- auto & d_input = this ->d_input ;
97100 auto & free_list = this ->free_list ;
101+ auto & hip_allocator_ = this ->hip_allocator_ ;
102+
103+ T *results {nullptr };
104+ bool *is_empty {nullptr };
105+ size_t size_bytes = sizeof (T) * h_input.size () + sizeof (bool );
106+ hip_allocator_.allocate (reinterpret_cast <void **>(&results),
107+ size_bytes);
98108
99- thrust::device_vector<T> results (h_input.size ());
109+ CHECK_HIP (hipMemset (results, 0 , size_bytes));
110+ is_empty = reinterpret_cast <bool *>(results + h_input.size ());
100111 const auto block_size = WF_SIZE;
101- rocshmem::pop_all<<<1 , block_size>>>(free_list, results.data ().get (),
102- results.size ());
112+ rocshmem::pop_all<<<1 , block_size>>>(free_list, results, h_input.size ());
103113 CHECK_HIP (hipDeviceSynchronize ());
104114
105- for (std::size_t i = 0 ; i < results .size (); i++) {
115+ for (std::size_t i = 0 ; i < h_input .size (); i++) {
106116 EXPECT_EQ (results[i], h_input[i]);
107117 }
108118
109- thrust::device_vector<bool > is_empty (1 );
110- rocshmem::pop_empty<<<1 , 1 >>>(free_list, is_empty.data ().get ());
119+ rocshmem::pop_empty<<<1 , 1 >>>(free_list, is_empty);
111120 CHECK_HIP (hipDeviceSynchronize ());
112121
113122 EXPECT_TRUE (is_empty[0 ]);
123+ hip_allocator_.deallocate (results);
114124}
115125
116126TYPED_TEST (FreeListTestFixture, push_host_concurrent_pop_device) {
117127 using Allocator = typename TestFixture::Allocator;
118128 using T = typename TestFixture::T;
119129
120130 auto & h_input = this ->h_input ;
121- auto & d_input = this ->d_input ;
122131 auto & free_list = this ->free_list ;
132+ auto & hip_allocator_ = this ->hip_allocator_ ;
123133
124- thrust::device_vector<T> results (h_input.size ());
134+ T *results {nullptr };
135+ bool *is_empty {nullptr };
136+ size_t size_bytes = sizeof (T) * h_input.size () + sizeof (bool );
137+ hip_allocator_.allocate (reinterpret_cast <void **>(&results),
138+ size_bytes);
139+
140+ CHECK_HIP (hipMemset (results, 0 , size_bytes));
141+ is_empty = reinterpret_cast <bool *>(results + h_input.size ());
125142 const auto num_blocks = h_input.size ();
126143 const auto block_size = WF_SIZE;
127- rocshmem::pop_all<<<num_blocks, block_size>>>(free_list, results. data (). get (),
128- results.size ());
144+ rocshmem::pop_all<<<num_blocks, block_size>>>(
145+ free_list, results, h_input .size ());
129146 CHECK_HIP (hipDeviceSynchronize ());
130147
148+ std::vector<T> h_results (h_input.size ());
149+ CHECK_HIP (hipMemcpy (h_results.data (), results, sizeof (T) * h_input.size (),
150+ hipMemcpyDeviceToHost));
151+
131152 // sort to guarantee that the ordering is correct
132- thrust ::sort (results .begin (), results .end ());
133- thrust ::sort (h_input .begin (), h_input .end ());
153+ std ::sort (h_input .begin (), h_input .end ());
154+ std ::sort (h_results .begin (), h_results .end ());
134155
135- for (std::size_t i = 0 ; i < results.size (); i++) {
136- EXPECT_EQ (results[i], h_input[i]);
156+
157+ for (std::size_t i = 0 ; i < h_results.size (); i++) {
158+ EXPECT_EQ (h_results[i], h_input[i]);
137159 }
138160
139- thrust::device_vector<bool > is_empty (1 );
140- rocshmem::pop_empty<<<1 , 1 >>>(free_list, is_empty.data ().get ());
161+ rocshmem::pop_empty<<<1 , 1 >>>(free_list, is_empty);
141162 CHECK_HIP (hipDeviceSynchronize ());
142163
143164 EXPECT_TRUE (is_empty[0 ]);
165+ hip_allocator_.deallocate (results);
144166}
145167
146168TYPED_TEST (FreeListTestFixture, push_host_pop_push_device) {
@@ -149,24 +171,39 @@ TYPED_TEST(FreeListTestFixture, push_host_pop_push_device) {
149171 using FreeListType = FreeList<T, Allocator>;
150172
151173 auto & h_input = this ->h_input ;
152- auto & d_input = this ->d_input ;
153174 auto & free_list = this ->free_list ;
154-
175+ auto & hip_allocator_ = this ->hip_allocator_ ;
176+
177+ T *results {nullptr };
178+ T *d_input {nullptr };
179+ bool *is_empty {nullptr };
180+ size_t size_bytes = 2 * sizeof (T) * h_input.size () + sizeof (bool );
181+ hip_allocator_.allocate (reinterpret_cast <void **>(&results),
182+ size_bytes);
183+
184+ CHECK_HIP (hipMemset (results, 0 , size_bytes));
185+ d_input = reinterpret_cast <T*>(results + h_input.size ());
186+ is_empty = reinterpret_cast <bool *>(d_input + h_input.size ());
155187 const auto block_size = WF_SIZE;
156188
157- rocshmem::pop_all<FreeListType, T><<<1 , block_size>>>(free_list, nullptr , 0 );
189+ CHECK_HIP (hipMemcpy (d_input, h_input.data (), sizeof (T) * h_input.size (),
190+ hipMemcpyHostToDevice));
191+
192+ rocshmem::pop_all<FreeListType, T><<<1 , block_size>>>(
193+ free_list, nullptr , h_input.size ());
158194 CHECK_HIP (hipDeviceSynchronize ());
159195
160- rocshmem::push_all<<<1 , 1 >>>(free_list, d_input. data (). get (), d_input .size ());
196+ rocshmem::push_all<<<1 , block_size >>>(free_list, d_input, h_input .size ());
161197 CHECK_HIP (hipDeviceSynchronize ());
162198
163- thrust::device_vector<T> results (d_input.size ());
164- rocshmem::pop_all<<<1 , block_size>>>(free_list, results.data ().get (),
165- results.size ());
199+ rocshmem::pop_all<<<1 , block_size>>>(free_list, results, h_input.size ());
200+ CHECK_HIP (hipDeviceSynchronize ());
166201
167- for (std::size_t i = 0 ; i < results .size (); i++) {
202+ for (std::size_t i = 0 ; i < h_input .size (); i++) {
168203 EXPECT_EQ (results[i], h_input[i]);
169204 }
205+
206+ hip_allocator_.deallocate (results);
170207}
171208
172209TYPED_TEST (FreeListTestFixture, push_host_pop_concurrent_push_device) {
@@ -175,30 +212,48 @@ TYPED_TEST(FreeListTestFixture, push_host_pop_concurrent_push_device) {
175212 using FreeListType = FreeList<T, Allocator>;
176213
177214 auto & h_input = this ->h_input ;
178- auto & d_input = this ->d_input ;
179215 auto & free_list = this ->free_list ;
216+ auto & hip_allocator_ = this ->hip_allocator_ ;
217+
218+ T *results {nullptr };
219+ T *d_input {nullptr };
220+ size_t size_bytes = 2 * sizeof (T) * h_input.size ();
221+ hip_allocator_.allocate (reinterpret_cast <void **>(&results),
222+ size_bytes);
180223
224+ CHECK_HIP (hipMemset (results, 0 , size_bytes));
225+ d_input = reinterpret_cast <T*>(results + h_input.size ());
181226 const auto block_size = WF_SIZE;
182- rocshmem::pop_all<FreeListType, T><<<1 , block_size>>>(free_list, nullptr , 0 );
227+
228+ CHECK_HIP (hipMemcpy (d_input, h_input.data (), sizeof (T) * h_input.size (),
229+ hipMemcpyHostToDevice));
230+
231+ rocshmem::pop_all<FreeListType, T><<<1 , block_size>>>(
232+ free_list, nullptr ,h_input.size ());
183233 CHECK_HIP (hipDeviceSynchronize ());
184234
185235 // Concurrently push all values
186236 const auto num_blocks = h_input.size ();
187237 rocshmem::push_all<<<num_blocks, block_size>>>(
188- free_list, d_input. data (). get (), d_input .size ());
238+ free_list, d_input, h_input .size ());
189239 CHECK_HIP (hipDeviceSynchronize ());
190240
191- thrust::device_vector<T> results (d_input.size ());
192- rocshmem::pop_all<<<1 , block_size>>>(free_list, results.data ().get (),
193- results.size ());
241+ rocshmem::pop_all<<<1 , block_size>>>(free_list, results, h_input.size ());
242+ CHECK_HIP (hipDeviceSynchronize ());
194243
195- // Sort to guarantee that the ordering is correct
196- thrust::sort (results. begin (), results. end ());
197- thrust::sort (h_input. begin (), h_input. end ( ));
244+ std::vector<T> h_results (h_input. size ());
245+ CHECK_HIP ( hipMemcpy (h_results. data (), results, sizeof (T) * h_input. size (),
246+ hipMemcpyDeviceToHost ));
198247
199- for (std::size_t i = 0 ; i < results.size (); i++) {
200- EXPECT_EQ (results[i], h_input[i]);
248+ // sort to guarantee that the ordering is correct
249+ std::sort (h_input.begin (), h_input.end ());
250+ std::sort (h_results.begin (), h_results.end ());
251+
252+ for (std::size_t i = 0 ; i < h_results.size (); i++) {
253+ EXPECT_EQ (h_results[i], h_input[i]);
201254 }
255+
256+ hip_allocator_.deallocate (results);
202257}
203258
204259TYPED_TEST (FreeListTestFixture, push_host_concurrent_pop_push_device) {
@@ -207,29 +262,48 @@ TYPED_TEST(FreeListTestFixture, push_host_concurrent_pop_push_device) {
207262 using FreeListType = FreeList<T, Allocator>;
208263
209264 auto & h_input = this ->h_input ;
210- auto & d_input = this ->d_input ;
211265 auto & free_list = this ->free_list ;
266+ auto & hip_allocator_ = this ->hip_allocator_ ;
267+
268+ T *results {nullptr };
269+ T *d_input {nullptr };
270+ size_t size_bytes = 2 * sizeof (T) * h_input.size ();
271+ hip_allocator_.allocate (reinterpret_cast <void **>(&results),
272+ size_bytes);
273+
274+ CHECK_HIP (hipMemset (results, 0 , size_bytes));
275+ d_input = reinterpret_cast <T*>(results + h_input.size ());
276+
277+ CHECK_HIP (hipMemcpy (d_input, h_input.data (), sizeof (T) * h_input.size (),
278+ hipMemcpyHostToDevice));
212279
213280 const auto block_size = WF_SIZE;
214- rocshmem::pop_all<FreeListType, T><<<1 , block_size>>>(free_list, nullptr , 0 );
281+ rocshmem::pop_all<FreeListType, T><<<1 , block_size>>>(
282+ free_list, nullptr , h_input.size ());
215283 CHECK_HIP (hipDeviceSynchronize ());
216284
217285 // Concurrently push all values
218286 const auto num_blocks = h_input.size ();
219287 rocshmem::push_all<<<num_blocks, block_size>>>(
220- free_list, d_input. data (). get (), d_input .size ());
288+ free_list, d_input, h_input .size ());
221289 CHECK_HIP (hipDeviceSynchronize ());
222290
223291 // Concurrently pop all values
224- thrust::device_vector<T> results (d_input. size ());
225- rocshmem::pop_all<<<num_blocks, block_size>>>( free_list, results. data (). get (),
226- results. size ());
292+ rocshmem::pop_all<<<num_blocks, block_size>>>(
293+ free_list, results, h_input. size ());
294+ CHECK_HIP ( hipDeviceSynchronize ());
227295
228- // Sort to guarantee that the ordering is correct
229- thrust::sort (results. begin (), results. end ());
230- thrust::sort (h_input. begin (), h_input. end ( ));
296+ std::vector<T> h_results (h_input. size ());
297+ CHECK_HIP ( hipMemcpy (h_results. data (), results, sizeof (T) * h_input. size (),
298+ hipMemcpyDeviceToHost ));
231299
232- for (std::size_t i = 0 ; i < results.size (); i++) {
233- EXPECT_EQ (results[i], h_input[i]);
300+ // sort to guarantee that the ordering is correct
301+ std::sort (h_input.begin (), h_input.end ());
302+ std::sort (h_results.begin (), h_results.end ());
303+
304+ for (std::size_t i = 0 ; i < h_results.size (); i++) {
305+ EXPECT_EQ (h_results[i], h_input[i]);
234306 }
307+
308+ hip_allocator_.deallocate (results);
235309}
0 commit comments