Skip to content

Commit 3989409

Browse files
tests: remove rocthrust and rocprim dependencies from free_list unit tests (#231)
[ROCm/rocshmem commit: 0b973bc]
1 parent 1aa9679 commit 3989409

File tree

3 files changed

+127
-61
lines changed

3 files changed

+127
-61
lines changed

projects/rocshmem/tests/unit_tests/CMakeLists.txt

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -101,10 +101,6 @@ target_link_libraries(
101101
###############################################################################
102102
# GTEST DEPENDENCY
103103
###############################################################################
104-
# These packages are required for the unit tests
105-
find_package(rocprim REQUIRED PATHS /opt/rocm)
106-
find_package(rocthrust REQUIRED PATHS /opt/rocm)
107-
108104
include(FetchContent)
109105

110106
FetchContent_Declare(
@@ -126,5 +122,4 @@ target_link_libraries(
126122
PRIVATE
127123
gtest
128124
gtest_main
129-
roc::rocthrust
130125
)

projects/rocshmem/tests/unit_tests/free_list_gtest.cpp

Lines changed: 126 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,6 @@
2424

2525
#include "free_list_gtest.hpp"
2626

27-
#include <thrust/sort.h>
28-
2927
#include "../src/util.hpp"
3028

3129
using namespace rocshmem;
@@ -76,71 +74,95 @@ TYPED_TEST(FreeListTestFixture, pop_empty_device) {
7674
using T = typename TestFixture::T;
7775

7876
auto& h_input = this->h_input;
79-
auto& d_input = this->d_input;
8077
auto& free_list = this->free_list;
78+
auto& hip_allocator_ = this->hip_allocator_;
79+
80+
bool *is_empty {nullptr};
81+
hip_allocator_.allocate(reinterpret_cast<void**>(&is_empty),
82+
sizeof(bool));
8183

84+
CHECK_HIP(hipMemset(is_empty, 0, sizeof(bool)));
8285
FreeListProxy<Allocator, T> empty_list_proxy{};
8386
FreeList<T, Allocator>* empty_free_list{empty_list_proxy.get()};
8487

85-
thrust::device_vector<bool> is_empty(1);
86-
rocshmem::pop_empty<<<1, 1>>>(empty_free_list, is_empty.data().get());
88+
rocshmem::pop_empty<<<1, 1>>>(empty_free_list, is_empty);
8789
CHECK_HIP(hipDeviceSynchronize());
8890
EXPECT_TRUE(is_empty[0]);
91+
92+
hip_allocator_.deallocate(is_empty);
8993
}
9094

9195
TYPED_TEST(FreeListTestFixture, push_host_pop_device) {
9296
using Allocator = typename TestFixture::Allocator;
9397
using T = typename TestFixture::T;
9498

9599
auto& h_input = this->h_input;
96-
auto& d_input = this->d_input;
97100
auto& free_list = this->free_list;
101+
auto& hip_allocator_ = this->hip_allocator_;
102+
103+
T *results {nullptr};
104+
bool *is_empty {nullptr};
105+
size_t size_bytes = sizeof(T) * h_input.size() + sizeof(bool);
106+
hip_allocator_.allocate(reinterpret_cast<void**>(&results),
107+
size_bytes);
98108

99-
thrust::device_vector<T> results(h_input.size());
109+
CHECK_HIP(hipMemset(results, 0, size_bytes));
110+
is_empty = reinterpret_cast<bool*>(results + h_input.size());
100111
const auto block_size = WF_SIZE;
101-
rocshmem::pop_all<<<1, block_size>>>(free_list, results.data().get(),
102-
results.size());
112+
rocshmem::pop_all<<<1, block_size>>>(free_list, results, h_input.size());
103113
CHECK_HIP(hipDeviceSynchronize());
104114

105-
for (std::size_t i = 0; i < results.size(); i++) {
115+
for (std::size_t i = 0; i < h_input.size(); i++) {
106116
EXPECT_EQ(results[i], h_input[i]);
107117
}
108118

109-
thrust::device_vector<bool> is_empty(1);
110-
rocshmem::pop_empty<<<1, 1>>>(free_list, is_empty.data().get());
119+
rocshmem::pop_empty<<<1, 1>>>(free_list, is_empty);
111120
CHECK_HIP(hipDeviceSynchronize());
112121

113122
EXPECT_TRUE(is_empty[0]);
123+
hip_allocator_.deallocate(results);
114124
}
115125

116126
TYPED_TEST(FreeListTestFixture, push_host_concurrent_pop_device) {
117127
using Allocator = typename TestFixture::Allocator;
118128
using T = typename TestFixture::T;
119129

120130
auto& h_input = this->h_input;
121-
auto& d_input = this->d_input;
122131
auto& free_list = this->free_list;
132+
auto& hip_allocator_ = this->hip_allocator_;
123133

124-
thrust::device_vector<T> results(h_input.size());
134+
T *results {nullptr};
135+
bool *is_empty {nullptr};
136+
size_t size_bytes = sizeof(T) * h_input.size() + sizeof(bool);
137+
hip_allocator_.allocate(reinterpret_cast<void**>(&results),
138+
size_bytes);
139+
140+
CHECK_HIP(hipMemset(results, 0, size_bytes));
141+
is_empty = reinterpret_cast<bool*>(results + h_input.size());
125142
const auto num_blocks = h_input.size();
126143
const auto block_size = WF_SIZE;
127-
rocshmem::pop_all<<<num_blocks, block_size>>>(free_list, results.data().get(),
128-
results.size());
144+
rocshmem::pop_all<<<num_blocks, block_size>>>(
145+
free_list, results, h_input.size());
129146
CHECK_HIP(hipDeviceSynchronize());
130147

148+
std::vector<T> h_results(h_input.size());
149+
CHECK_HIP(hipMemcpy(h_results.data(), results, sizeof(T) * h_input.size(),
150+
hipMemcpyDeviceToHost));
151+
131152
// sort to guarantee that the ordering is correct
132-
thrust::sort(results.begin(), results.end());
133-
thrust::sort(h_input.begin(), h_input.end());
153+
std::sort(h_input.begin(), h_input.end());
154+
std::sort(h_results.begin(), h_results.end());
134155

135-
for (std::size_t i = 0; i < results.size(); i++) {
136-
EXPECT_EQ(results[i], h_input[i]);
156+
157+
for (std::size_t i = 0; i < h_results.size(); i++) {
158+
EXPECT_EQ(h_results[i], h_input[i]);
137159
}
138160

139-
thrust::device_vector<bool> is_empty(1);
140-
rocshmem::pop_empty<<<1, 1>>>(free_list, is_empty.data().get());
161+
rocshmem::pop_empty<<<1, 1>>>(free_list, is_empty);
141162
CHECK_HIP(hipDeviceSynchronize());
142163

143164
EXPECT_TRUE(is_empty[0]);
165+
hip_allocator_.deallocate(results);
144166
}
145167

146168
TYPED_TEST(FreeListTestFixture, push_host_pop_push_device) {
@@ -149,24 +171,39 @@ TYPED_TEST(FreeListTestFixture, push_host_pop_push_device) {
149171
using FreeListType = FreeList<T, Allocator>;
150172

151173
auto& h_input = this->h_input;
152-
auto& d_input = this->d_input;
153174
auto& free_list = this->free_list;
154-
175+
auto& hip_allocator_ = this->hip_allocator_;
176+
177+
T *results {nullptr};
178+
T *d_input {nullptr};
179+
bool *is_empty {nullptr};
180+
size_t size_bytes = 2 * sizeof(T) * h_input.size() + sizeof(bool);
181+
hip_allocator_.allocate(reinterpret_cast<void**>(&results),
182+
size_bytes);
183+
184+
CHECK_HIP(hipMemset(results, 0, size_bytes));
185+
d_input = reinterpret_cast<T*>(results + h_input.size());
186+
is_empty = reinterpret_cast<bool*>(d_input + h_input.size());
155187
const auto block_size = WF_SIZE;
156188

157-
rocshmem::pop_all<FreeListType, T><<<1, block_size>>>(free_list, nullptr, 0);
189+
CHECK_HIP(hipMemcpy(d_input, h_input.data(), sizeof(T) * h_input.size(),
190+
hipMemcpyHostToDevice));
191+
192+
rocshmem::pop_all<FreeListType, T><<<1, block_size>>>(
193+
free_list, nullptr, h_input.size());
158194
CHECK_HIP(hipDeviceSynchronize());
159195

160-
rocshmem::push_all<<<1, 1>>>(free_list, d_input.data().get(), d_input.size());
196+
rocshmem::push_all<<<1, block_size>>>(free_list, d_input, h_input.size());
161197
CHECK_HIP(hipDeviceSynchronize());
162198

163-
thrust::device_vector<T> results(d_input.size());
164-
rocshmem::pop_all<<<1, block_size>>>(free_list, results.data().get(),
165-
results.size());
199+
rocshmem::pop_all<<<1, block_size>>>(free_list, results, h_input.size());
200+
CHECK_HIP(hipDeviceSynchronize());
166201

167-
for (std::size_t i = 0; i < results.size(); i++) {
202+
for (std::size_t i = 0; i < h_input.size(); i++) {
168203
EXPECT_EQ(results[i], h_input[i]);
169204
}
205+
206+
hip_allocator_.deallocate(results);
170207
}
171208

172209
TYPED_TEST(FreeListTestFixture, push_host_pop_concurrent_push_device) {
@@ -175,30 +212,48 @@ TYPED_TEST(FreeListTestFixture, push_host_pop_concurrent_push_device) {
175212
using FreeListType = FreeList<T, Allocator>;
176213

177214
auto& h_input = this->h_input;
178-
auto& d_input = this->d_input;
179215
auto& free_list = this->free_list;
216+
auto& hip_allocator_ = this->hip_allocator_;
217+
218+
T *results {nullptr};
219+
T *d_input {nullptr};
220+
size_t size_bytes = 2 * sizeof(T) * h_input.size();
221+
hip_allocator_.allocate(reinterpret_cast<void**>(&results),
222+
size_bytes);
180223

224+
CHECK_HIP(hipMemset(results, 0, size_bytes));
225+
d_input = reinterpret_cast<T*>(results + h_input.size());
181226
const auto block_size = WF_SIZE;
182-
rocshmem::pop_all<FreeListType, T><<<1, block_size>>>(free_list, nullptr, 0);
227+
228+
CHECK_HIP(hipMemcpy(d_input, h_input.data(), sizeof(T) * h_input.size(),
229+
hipMemcpyHostToDevice));
230+
231+
rocshmem::pop_all<FreeListType, T><<<1, block_size>>>(
232+
free_list, nullptr,h_input.size());
183233
CHECK_HIP(hipDeviceSynchronize());
184234

185235
// Concurrently push all values
186236
const auto num_blocks = h_input.size();
187237
rocshmem::push_all<<<num_blocks, block_size>>>(
188-
free_list, d_input.data().get(), d_input.size());
238+
free_list, d_input, h_input.size());
189239
CHECK_HIP(hipDeviceSynchronize());
190240

191-
thrust::device_vector<T> results(d_input.size());
192-
rocshmem::pop_all<<<1, block_size>>>(free_list, results.data().get(),
193-
results.size());
241+
rocshmem::pop_all<<<1, block_size>>>(free_list, results, h_input.size());
242+
CHECK_HIP(hipDeviceSynchronize());
194243

195-
// Sort to guarantee that the ordering is correct
196-
thrust::sort(results.begin(), results.end());
197-
thrust::sort(h_input.begin(), h_input.end());
244+
std::vector<T> h_results(h_input.size());
245+
CHECK_HIP(hipMemcpy(h_results.data(), results, sizeof(T) * h_input.size(),
246+
hipMemcpyDeviceToHost));
198247

199-
for (std::size_t i = 0; i < results.size(); i++) {
200-
EXPECT_EQ(results[i], h_input[i]);
248+
// sort to guarantee that the ordering is correct
249+
std::sort(h_input.begin(), h_input.end());
250+
std::sort(h_results.begin(), h_results.end());
251+
252+
for (std::size_t i = 0; i < h_results.size(); i++) {
253+
EXPECT_EQ(h_results[i], h_input[i]);
201254
}
255+
256+
hip_allocator_.deallocate(results);
202257
}
203258

204259
TYPED_TEST(FreeListTestFixture, push_host_concurrent_pop_push_device) {
@@ -207,29 +262,48 @@ TYPED_TEST(FreeListTestFixture, push_host_concurrent_pop_push_device) {
207262
using FreeListType = FreeList<T, Allocator>;
208263

209264
auto& h_input = this->h_input;
210-
auto& d_input = this->d_input;
211265
auto& free_list = this->free_list;
266+
auto& hip_allocator_ = this->hip_allocator_;
267+
268+
T *results {nullptr};
269+
T *d_input {nullptr};
270+
size_t size_bytes = 2 * sizeof(T) * h_input.size();
271+
hip_allocator_.allocate(reinterpret_cast<void**>(&results),
272+
size_bytes);
273+
274+
CHECK_HIP(hipMemset(results, 0, size_bytes));
275+
d_input = reinterpret_cast<T*>(results + h_input.size());
276+
277+
CHECK_HIP(hipMemcpy(d_input, h_input.data(), sizeof(T) * h_input.size(),
278+
hipMemcpyHostToDevice));
212279

213280
const auto block_size = WF_SIZE;
214-
rocshmem::pop_all<FreeListType, T><<<1, block_size>>>(free_list, nullptr, 0);
281+
rocshmem::pop_all<FreeListType, T><<<1, block_size>>>(
282+
free_list, nullptr, h_input.size());
215283
CHECK_HIP(hipDeviceSynchronize());
216284

217285
// Concurrently push all values
218286
const auto num_blocks = h_input.size();
219287
rocshmem::push_all<<<num_blocks, block_size>>>(
220-
free_list, d_input.data().get(), d_input.size());
288+
free_list, d_input, h_input.size());
221289
CHECK_HIP(hipDeviceSynchronize());
222290

223291
// Concurrently pop all values
224-
thrust::device_vector<T> results(d_input.size());
225-
rocshmem::pop_all<<<num_blocks, block_size>>>(free_list, results.data().get(),
226-
results.size());
292+
rocshmem::pop_all<<<num_blocks, block_size>>>(
293+
free_list, results, h_input.size());
294+
CHECK_HIP(hipDeviceSynchronize());
227295

228-
// Sort to guarantee that the ordering is correct
229-
thrust::sort(results.begin(), results.end());
230-
thrust::sort(h_input.begin(), h_input.end());
296+
std::vector<T> h_results(h_input.size());
297+
CHECK_HIP(hipMemcpy(h_results.data(), results, sizeof(T) * h_input.size(),
298+
hipMemcpyDeviceToHost));
231299

232-
for (std::size_t i = 0; i < results.size(); i++) {
233-
EXPECT_EQ(results[i], h_input[i]);
300+
// sort to guarantee that the ordering is correct
301+
std::sort(h_input.begin(), h_input.end());
302+
std::sort(h_results.begin(), h_results.end());
303+
304+
for (std::size_t i = 0; i < h_results.size(); i++) {
305+
EXPECT_EQ(h_results[i], h_input[i]);
234306
}
307+
308+
hip_allocator_.deallocate(results);
235309
}

projects/rocshmem/tests/unit_tests/free_list_gtest.hpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,6 @@
2525
#ifndef ROCSHMEM_FREE_LIST_GTEST_HPP
2626
#define ROCSHMEM_FREE_LIST_GTEST_HPP
2727

28-
#include <thrust/device_vector.h>
29-
3028
#include <numeric>
3129
#include <vector>
3230

@@ -41,7 +39,6 @@ class FreeListTestFixture : public ::testing::Test {
4139
public:
4240
FreeListTestFixture() : h_input(num_elements) {
4341
std::iota(h_input.begin(), h_input.end(), T{1});
44-
d_input = h_input;
4542
free_list = list_proxy.get();
4643
}
4744

@@ -52,9 +49,9 @@ class FreeListTestFixture : public ::testing::Test {
5249

5350
using T = ValueType;
5451
using Allocator = HIPAllocator;
52+
Allocator hip_allocator_ {};
5553
const std::size_t num_elements{32};
5654
std::vector<T> h_input{};
57-
thrust::device_vector<T> d_input{};
5855

5956
FreeListProxy<Allocator, T> list_proxy{};
6057
FreeList<T, Allocator>* free_list{};

0 commit comments

Comments
 (0)