Skip to content

Commit e594b05

Browse files
authored
Use cuda::std::fill for tests and benchmarks (#7925)
1 parent 578d64b commit e594b05

File tree

22 files changed

+54
-71
lines changed

22 files changed

+54
-71
lines changed

libcudacxx/benchmarks/bench/all_of/basic.cu

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
//===----------------------------------------------------------------------===//
1010

1111
#include <thrust/device_vector.h>
12-
#include <thrust/fill.h>
1312

1413
#include <cuda/memory_pool>
1514
#include <cuda/std/__pstl_algorithm>
@@ -42,8 +41,8 @@ static void basic(nvbench::state& state, nvbench::type_list<T>)
4241
const auto mismatch_point = static_cast<std::size_t>(elements * common_prefix);
4342

4443
thrust::device_vector<T> dinput(elements, thrust::no_init);
45-
thrust::fill(dinput.begin(), dinput.begin() + mismatch_point, T{0});
46-
thrust::fill(dinput.begin() + mismatch_point, dinput.end(), val);
44+
cuda::std::fill(cuda::execution::__cub_par_unseq, dinput.begin(), dinput.begin() + mismatch_point, T{0});
45+
cuda::std::fill(cuda::execution::__cub_par_unseq, dinput.begin() + mismatch_point, dinput.end(), val);
4746

4847
state.add_global_memory_reads<T>(mismatch_point + 1);
4948
state.add_global_memory_writes<size_t>(1);

libcudacxx/benchmarks/bench/any_of/basic.cu

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
//===----------------------------------------------------------------------===//
1010

1111
#include <thrust/device_vector.h>
12-
#include <thrust/fill.h>
1312

1413
#include <cuda/memory_pool>
1514
#include <cuda/std/__pstl_algorithm>
@@ -42,8 +41,8 @@ static void basic(nvbench::state& state, nvbench::type_list<T>)
4241
const auto mismatch_point = static_cast<std::size_t>(elements * common_prefix);
4342

4443
thrust::device_vector<T> dinput(elements, thrust::no_init);
45-
thrust::fill(dinput.begin(), dinput.begin() + mismatch_point, T{0});
46-
thrust::fill(dinput.begin() + mismatch_point, dinput.end(), val);
44+
cuda::std::fill(cuda::execution::__cub_par_unseq, dinput.begin(), dinput.begin() + mismatch_point, T{0});
45+
cuda::std::fill(cuda::execution::__cub_par_unseq, dinput.begin() + mismatch_point, dinput.end(), val);
4746

4847
state.add_global_memory_reads<T>(mismatch_point + 1);
4948
state.add_global_memory_writes<size_t>(1);

libcudacxx/benchmarks/bench/equal/basic.cu

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
//===----------------------------------------------------------------------===//
1010

1111
#include <thrust/device_vector.h>
12-
#include <thrust/fill.h>
1312

1413
#include <cuda/memory_pool>
1514
#include <cuda/std/__pstl_algorithm>
@@ -27,8 +26,8 @@ static void range_iter(nvbench::state& state, nvbench::type_list<T>)
2726
const auto mismatch_point = static_cast<std::size_t>(elements * common_prefix);
2827

2928
thrust::device_vector<T> dinput(elements, thrust::no_init);
30-
thrust::fill(dinput.begin(), dinput.begin() + mismatch_point, T{0});
31-
thrust::fill(dinput.begin() + mismatch_point, dinput.end(), val);
29+
cuda::std::fill(cuda::execution::__cub_par_unseq, dinput.begin(), dinput.begin() + mismatch_point, T{0});
30+
cuda::std::fill(cuda::execution::__cub_par_unseq, dinput.begin() + mismatch_point, dinput.end(), val);
3231

3332
state.add_global_memory_reads<T>(mismatch_point + 1);
3433
state.add_global_memory_writes<size_t>(1);
@@ -57,8 +56,8 @@ static void range_range(nvbench::state& state, nvbench::type_list<T>)
5756
const auto mismatch_point = static_cast<std::size_t>(elements * common_prefix);
5857

5958
thrust::device_vector<T> dinput(elements, thrust::no_init);
60-
thrust::fill(dinput.begin(), dinput.begin() + mismatch_point, T{0});
61-
thrust::fill(dinput.begin() + mismatch_point, dinput.end(), val);
59+
cuda::std::fill(cuda::execution::__cub_par_unseq, dinput.begin(), dinput.begin() + mismatch_point, T{0});
60+
cuda::std::fill(cuda::execution::__cub_par_unseq, dinput.begin() + mismatch_point, dinput.end(), val);
6261

6362
state.add_global_memory_reads<T>(mismatch_point + 1);
6463
state.add_global_memory_writes<size_t>(1);

libcudacxx/benchmarks/bench/find/basic.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ static void basic(nvbench::state& state, nvbench::type_list<T>)
2626
const auto mismatch_point = static_cast<std::size_t>(elements * common_prefix);
2727

2828
thrust::device_vector<T> dinput(elements, thrust::no_init);
29-
thrust::fill(dinput.begin(), dinput.begin() + mismatch_point, T{0});
30-
thrust::fill(dinput.begin() + mismatch_point, dinput.end(), val);
29+
cuda::std::fill(cuda::execution::__cub_par_unseq, dinput.begin(), dinput.begin() + mismatch_point, T{0});
30+
cuda::std::fill(cuda::execution::__cub_par_unseq, dinput.begin() + mismatch_point, dinput.end(), val);
3131

3232
state.add_global_memory_reads<T>(mismatch_point + 1);
3333
state.add_global_memory_writes<size_t>(1);

libcudacxx/benchmarks/bench/find_if/basic.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@ static void basic(nvbench::state& state, nvbench::type_list<T>)
2727
const auto mismatch_point = static_cast<std::size_t>(elements * common_prefix);
2828

2929
thrust::device_vector<T> dinput(elements, thrust::no_init);
30-
thrust::fill(dinput.begin(), dinput.begin() + mismatch_point, T{0});
31-
thrust::fill(dinput.begin() + mismatch_point, dinput.end(), val);
30+
cuda::std::fill(cuda::execution::__cub_par_unseq, dinput.begin(), dinput.begin() + mismatch_point, T{0});
31+
cuda::std::fill(cuda::execution::__cub_par_unseq, dinput.begin() + mismatch_point, dinput.end(), val);
3232

3333
state.add_global_memory_reads<T>(mismatch_point + 1);
3434
state.add_global_memory_writes<size_t>(1);

libcudacxx/benchmarks/bench/find_if_not/basic.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,8 @@ static void basic(nvbench::state& state, nvbench::type_list<T>)
4141
const auto mismatch_point = static_cast<std::size_t>(elements * common_prefix);
4242

4343
thrust::device_vector<T> dinput(elements, thrust::no_init);
44-
thrust::fill(dinput.begin(), dinput.begin() + mismatch_point, T{0});
45-
thrust::fill(dinput.begin() + mismatch_point, dinput.end(), val);
44+
cuda::std::fill(cuda::execution::__cub_par_unseq, dinput.begin(), dinput.begin() + mismatch_point, T{0});
45+
cuda::std::fill(cuda::execution::__cub_par_unseq, dinput.begin() + mismatch_point, dinput.end(), val);
4646

4747
state.add_global_memory_reads<T>(mismatch_point + 1);
4848
state.add_global_memory_writes<size_t>(1);

libcudacxx/benchmarks/bench/mismatch/basic.cu

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
//===----------------------------------------------------------------------===//
1010

1111
#include <thrust/device_vector.h>
12-
#include <thrust/fill.h>
1312

1413
#include <cuda/memory_pool>
1514
#include <cuda/std/__pstl_algorithm>
@@ -27,8 +26,8 @@ static void range_iter(nvbench::state& state, nvbench::type_list<T>)
2726
const auto mismatch_point = static_cast<std::size_t>(elements * common_prefix);
2827

2928
thrust::device_vector<T> dinput(elements, thrust::no_init);
30-
thrust::fill(dinput.begin(), dinput.begin() + mismatch_point, T{0});
31-
thrust::fill(dinput.begin() + mismatch_point, dinput.end(), val);
29+
cuda::std::fill(cuda::execution::__cub_par_unseq, dinput.begin(), dinput.begin() + mismatch_point, T{0});
30+
cuda::std::fill(cuda::execution::__cub_par_unseq, dinput.begin() + mismatch_point, dinput.end(), val);
3231

3332
state.add_global_memory_reads<T>(mismatch_point + 1);
3433
state.add_global_memory_writes<size_t>(1);
@@ -57,8 +56,8 @@ static void range_range(nvbench::state& state, nvbench::type_list<T>)
5756
const auto mismatch_point = static_cast<std::size_t>(elements * common_prefix);
5857

5958
thrust::device_vector<T> dinput(elements, thrust::no_init);
60-
thrust::fill(dinput.begin(), dinput.begin() + mismatch_point, T{0});
61-
thrust::fill(dinput.begin() + mismatch_point, dinput.end(), val);
59+
cuda::std::fill(cuda::execution::__cub_par_unseq, dinput.begin(), dinput.begin() + mismatch_point, T{0});
60+
cuda::std::fill(cuda::execution::__cub_par_unseq, dinput.begin() + mismatch_point, dinput.end(), val);
6261

6362
state.add_global_memory_reads<T>(mismatch_point + 1);
6463
state.add_global_memory_writes<size_t>(1);

libcudacxx/benchmarks/bench/none_of/basic.cu

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
//===----------------------------------------------------------------------===//
1010

1111
#include <thrust/device_vector.h>
12-
#include <thrust/fill.h>
1312

1413
#include <cuda/memory_pool>
1514
#include <cuda/std/__pstl_algorithm>
@@ -42,8 +41,8 @@ static void basic(nvbench::state& state, nvbench::type_list<T>)
4241
const auto mismatch_point = static_cast<std::size_t>(elements * common_prefix);
4342

4443
thrust::device_vector<T> dinput(elements, thrust::no_init);
45-
thrust::fill(dinput.begin(), dinput.begin() + mismatch_point, T{0});
46-
thrust::fill(dinput.begin() + mismatch_point, dinput.end(), val);
44+
cuda::std::fill(cuda::execution::__cub_par_unseq, dinput.begin(), dinput.begin() + mismatch_point, T{0});
45+
cuda::std::fill(cuda::execution::__cub_par_unseq, dinput.begin() + mismatch_point, dinput.end(), val);
4746

4847
state.add_global_memory_reads<T>(mismatch_point + 1);
4948
state.add_global_memory_writes<size_t>(1);

libcudacxx/test/libcudacxx/std/algorithms/alg.modifying/alg.copy/pstl_copy.cu

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
#include <thrust/device_vector.h>
1818
#include <thrust/equal.h>
1919
#include <thrust/execution_policy.h>
20-
#include <thrust/fill.h>
2120
#include <thrust/sequence.h>
2221

2322
#include <cuda/cmath>
@@ -35,13 +34,13 @@ inline constexpr int size = 1000;
3534
template <class Policy>
3635
void test_copy(const Policy& policy, const thrust::device_vector<int>& input, thrust::device_vector<int>& output)
3736
{
38-
thrust::fill(output.begin(), output.end(), -1);
37+
cuda::std::fill(policy, output.begin(), output.end(), -1);
3938
{ // With non-contiguous iterator
4039
cuda::std::copy(policy, cuda::counting_iterator{0}, cuda::counting_iterator{size}, output.begin());
4140
CHECK(thrust::equal(output.begin(), output.end(), cuda::counting_iterator{0}));
4241
}
4342

44-
thrust::fill(output.begin(), output.end(), -1);
43+
cuda::std::fill(policy, output.begin(), output.end(), -1);
4544
{ // With contiguous iterator
4645
cuda::std::copy(policy, input.begin(), input.end(), output.begin());
4746
CHECK(thrust::equal(output.begin(), output.end(), cuda::counting_iterator{0}));

libcudacxx/test/libcudacxx/std/algorithms/alg.modifying/alg.copy/pstl_copy_if.cu

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
#include <thrust/device_vector.h>
1818
#include <thrust/equal.h>
1919
#include <thrust/execution_policy.h>
20-
#include <thrust/fill.h>
2120
#include <thrust/sequence.h>
2221

2322
#include <cuda/cmath>
@@ -60,22 +59,22 @@ void test_copy_if(const Policy& policy, const thrust::device_vector<int>& input,
6059

6160
{ // Same input output type
6261
{ // With random_access iterator
63-
thrust::fill(output.begin(), output.end(), -1);
62+
cuda::std::fill(policy, output.begin(), output.end(), -1);
6463
const auto res = cuda::std::copy_if(
6564
policy, cuda::counting_iterator{0}, cuda::counting_iterator{size}, output.begin(), is_even{});
6665
CHECK(thrust::equal(output.begin(), output.end(), cuda::strided_iterator{cuda::counting_iterator{0}, 2}));
6766
CHECK(res == output.end());
6867
}
6968

7069
{ // With contiguous iterator
71-
thrust::fill(output.begin(), output.end(), -1);
70+
cuda::std::fill(policy, output.begin(), output.end(), -1);
7271
const auto res = cuda::std::copy_if(policy, input.begin(), input.end(), output.begin(), is_even{});
7372
CHECK(thrust::equal(output.begin(), output.end(), cuda::strided_iterator{cuda::counting_iterator{0}, 2}));
7473
CHECK(res == output.end());
7574
}
7675

7776
{ // With pointer
78-
thrust::fill(output.begin(), output.end(), -1);
77+
cuda::std::fill(policy, output.begin(), output.end(), -1);
7978
auto ptr = thrust::raw_pointer_cast(input.data());
8079
const auto res = cuda::std::copy_if(policy, ptr, ptr + size, output.begin(), is_even{});
8180
CHECK(thrust::equal(output.begin(), output.end(), cuda::strided_iterator{cuda::counting_iterator{0}, 2}));
@@ -84,7 +83,7 @@ void test_copy_if(const Policy& policy, const thrust::device_vector<int>& input,
8483
}
8584

8685
{ // Different input type
87-
thrust::fill(output.begin(), output.end(), -1);
86+
cuda::std::fill(policy, output.begin(), output.end(), -1);
8887
const auto res = cuda::std::copy_if(
8988
policy, cuda::counting_iterator{short{0}}, cuda::counting_iterator{short{size}}, output.begin(), is_even{});
9089
CHECK(thrust::equal(output.begin(), output.end(), cuda::strided_iterator{cuda::counting_iterator{0}, 2}));

0 commit comments

Comments
 (0)