@@ -238,6 +238,60 @@ C2H_TEST("Device segmented max uses environment", "[segmented_reduce][device]")
238238 REQUIRE (d_out == expected);
239239}
240240
241+ C2H_TEST (" Device segmented argmin uses environment" , " [segmented_reduce][device]" )
242+ {
243+ int num_segments = 3 ;
244+ thrust::device_vector<int > d_offsets = {0 , 4 , 7 , 9 };
245+ auto d_offsets_it = thrust::raw_pointer_cast (d_offsets.data ());
246+ thrust::device_vector<int > d_in{8 , 6 , 7 , 5 , 3 , 0 , 9 , 1 , 2 };
247+ thrust::device_vector<cub::KeyValuePair<int , int >> d_out (3 );
248+
249+ size_t expected_bytes_allocated{};
250+ REQUIRE (
251+ cudaSuccess
252+ == cub::DeviceSegmentedReduce::ArgMin (
253+ nullptr , expected_bytes_allocated, d_in.begin (), d_out.begin (), num_segments, d_offsets_it, d_offsets_it + 1 ));
254+
255+ auto env = stdexec::env{expected_allocation_size (expected_bytes_allocated)};
256+
257+ device_segmented_reduce_argmin (d_in.begin (), d_out.begin (), num_segments, d_offsets_it, d_offsets_it + 1 , env);
258+
259+ thrust::host_vector<cub::KeyValuePair<int , int >> h_out (d_out);
260+ REQUIRE (h_out[0 ].key == 3 );
261+ REQUIRE (h_out[0 ].value == 5 );
262+ REQUIRE (h_out[1 ].key == 1 );
263+ REQUIRE (h_out[1 ].value == 0 );
264+ REQUIRE (h_out[2 ].key == 0 );
265+ REQUIRE (h_out[2 ].value == 1 );
266+ }
267+
268+ C2H_TEST (" Device segmented argmax uses environment" , " [segmented_reduce][device]" )
269+ {
270+ int num_segments = 3 ;
271+ thrust::device_vector<int > d_offsets = {0 , 4 , 7 , 9 };
272+ auto d_offsets_it = thrust::raw_pointer_cast (d_offsets.data ());
273+ thrust::device_vector<int > d_in{8 , 6 , 7 , 5 , 3 , 0 , 9 , 1 , 2 };
274+ thrust::device_vector<cub::KeyValuePair<int , int >> d_out (3 );
275+
276+ size_t expected_bytes_allocated{};
277+ REQUIRE (
278+ cudaSuccess
279+ == cub::DeviceSegmentedReduce::ArgMax (
280+ nullptr , expected_bytes_allocated, d_in.begin (), d_out.begin (), num_segments, d_offsets_it, d_offsets_it + 1 ));
281+
282+ auto env = stdexec::env{expected_allocation_size (expected_bytes_allocated)};
283+
284+ device_segmented_reduce_argmax (d_in.begin (), d_out.begin (), num_segments, d_offsets_it, d_offsets_it + 1 , env);
285+
286+ thrust::host_vector<cub::KeyValuePair<int , int >> h_out (d_out);
287+ REQUIRE (h_out[0 ].key == 0 );
288+ REQUIRE (h_out[0 ].value == 8 );
289+ REQUIRE (h_out[1 ].key == 2 );
290+ REQUIRE (h_out[1 ].value == 9 );
291+ REQUIRE (h_out[2 ].key == 1 );
292+ REQUIRE (h_out[2 ].value == 2 );
293+ }
294+
241295TEST_CASE (" Device segmented reduce uses custom stream" , " [segmented_reduce][device]" )
242296{
243297 int num_segments = 3 ;
@@ -367,60 +421,6 @@ TEST_CASE("Device segmented max uses custom stream", "[segmented_reduce][device]
367421 REQUIRE (cudaSuccess == cudaStreamDestroy (custom_stream));
368422}
369423
370- C2H_TEST (" Device segmented argmin uses environment" , " [segmented_reduce][device]" )
371- {
372- int num_segments = 3 ;
373- thrust::device_vector<int > d_offsets = {0 , 4 , 7 , 9 };
374- auto d_offsets_it = thrust::raw_pointer_cast (d_offsets.data ());
375- thrust::device_vector<int > d_in{8 , 6 , 7 , 5 , 3 , 0 , 9 , 1 , 2 };
376- thrust::device_vector<cub::KeyValuePair<int , int >> d_out (3 );
377-
378- size_t expected_bytes_allocated{};
379- REQUIRE (
380- cudaSuccess
381- == cub::DeviceSegmentedReduce::ArgMin (
382- nullptr , expected_bytes_allocated, d_in.begin (), d_out.begin (), num_segments, d_offsets_it, d_offsets_it + 1 ));
383-
384- auto env = stdexec::env{expected_allocation_size (expected_bytes_allocated)};
385-
386- device_segmented_reduce_argmin (d_in.begin (), d_out.begin (), num_segments, d_offsets_it, d_offsets_it + 1 , env);
387-
388- thrust::host_vector<cub::KeyValuePair<int , int >> h_out (d_out);
389- REQUIRE (h_out[0 ].key == 3 );
390- REQUIRE (h_out[0 ].value == 5 );
391- REQUIRE (h_out[1 ].key == 1 );
392- REQUIRE (h_out[1 ].value == 0 );
393- REQUIRE (h_out[2 ].key == 0 );
394- REQUIRE (h_out[2 ].value == 1 );
395- }
396-
397- C2H_TEST (" Device segmented argmax uses environment" , " [segmented_reduce][device]" )
398- {
399- int num_segments = 3 ;
400- thrust::device_vector<int > d_offsets = {0 , 4 , 7 , 9 };
401- auto d_offsets_it = thrust::raw_pointer_cast (d_offsets.data ());
402- thrust::device_vector<int > d_in{8 , 6 , 7 , 5 , 3 , 0 , 9 , 1 , 2 };
403- thrust::device_vector<cub::KeyValuePair<int , int >> d_out (3 );
404-
405- size_t expected_bytes_allocated{};
406- REQUIRE (
407- cudaSuccess
408- == cub::DeviceSegmentedReduce::ArgMax (
409- nullptr , expected_bytes_allocated, d_in.begin (), d_out.begin (), num_segments, d_offsets_it, d_offsets_it + 1 ));
410-
411- auto env = stdexec::env{expected_allocation_size (expected_bytes_allocated)};
412-
413- device_segmented_reduce_argmax (d_in.begin (), d_out.begin (), num_segments, d_offsets_it, d_offsets_it + 1 , env);
414-
415- thrust::host_vector<cub::KeyValuePair<int , int >> h_out (d_out);
416- REQUIRE (h_out[0 ].key == 0 );
417- REQUIRE (h_out[0 ].value == 8 );
418- REQUIRE (h_out[1 ].key == 2 );
419- REQUIRE (h_out[1 ].value == 9 );
420- REQUIRE (h_out[2 ].key == 1 );
421- REQUIRE (h_out[2 ].value == 2 );
422- }
423-
424424TEST_CASE (" Device segmented argmin uses custom stream" , " [segmented_reduce][device]" )
425425{
426426 int num_segments = 3 ;
0 commit comments