11/*
2- * Copyright (c) 2019-2024 , NVIDIA CORPORATION.
2+ * Copyright (c) 2019-2025 , NVIDIA CORPORATION.
33 *
44 * Licensed under the Apache License, Version 2.0 (the "License");
55 * you may not use this file except in compliance with the License.
3333#include < rmm/cuda_stream_view.hpp>
3434#include < rmm/device_uvector.hpp>
3535
36+ #include < cooperative_groups.h>
37+ #include < cooperative_groups/reduce.h>
38+ #include < cooperative_groups/scan.h>
3639#include < cub/cub.cuh>
3740
3841namespace cudf {
@@ -202,11 +205,14 @@ CUDF_KERNEL void url_decode_char_counter(column_device_view const in_strings,
202205 __shared__ char temporary_buffer[num_warps_per_threadblock][char_block_size + halo_size];
203206 __shared__ typename cub::WarpReduce<int8_t >::TempStorage cub_storage[num_warps_per_threadblock];
204207
205- auto const global_thread_id =
206- cudf::detail::grid_1d::global_thread_id<num_warps_per_threadblock * cudf::detail::warp_size>();
207- auto const global_warp_id = static_cast <size_type>(global_thread_id / cudf::detail::warp_size);
208- auto const local_warp_id = static_cast <size_type>(threadIdx .x / cudf::detail::warp_size);
209- auto const warp_lane = static_cast <size_type>(threadIdx .x % cudf::detail::warp_size);
208+ namespace cg = cooperative_groups;
209+ auto const block = cg::this_thread_block ();
210+ auto const warp = cg::tiled_partition<cudf::detail::warp_size>(block);
211+
212+ auto const global_thread_id = cudf::detail::grid_1d::global_thread_id ();
213+ auto const global_warp_id = static_cast <size_type>(global_thread_id / cudf::detail::warp_size);
214+ auto const local_warp_id = static_cast <size_type>(warp.meta_group_rank ());
215+ auto const warp_lane = static_cast <size_type>(warp.thread_rank ());
210216 auto const nwarps =
211217 static_cast <size_type>(cudf::detail::grid_1d::grid_stride () / cudf::detail::warp_size);
212218 char * in_chars_shared = temporary_buffer[local_warp_id];
@@ -241,7 +247,7 @@ CUDF_KERNEL void url_decode_char_counter(column_device_view const in_strings,
241247 in_chars_shared[char_idx] = in_idx < string_length ? in_chars[in_idx] : 0 ;
242248 }
243249
244- __syncwarp ();
250+ warp. sync ();
245251
246252 // `char_idx_start` represents the start character index of the current warp.
247253 for (size_type char_idx_start = 0 ; char_idx_start < string_length_block;
@@ -258,7 +264,7 @@ CUDF_KERNEL void url_decode_char_counter(column_device_view const in_strings,
258264
259265 if (warp_lane == 0 ) { escape_char_count += total_escape_char; }
260266
261- __syncwarp ();
267+ warp. sync ();
262268 }
263269 }
264270 // URL decoding replaces 3 bytes with 1 for each escape character.
@@ -289,11 +295,14 @@ CUDF_KERNEL void url_decode_char_replacer(column_device_view const in_strings,
289295 __shared__ typename cub::WarpScan<int8_t >::TempStorage cub_storage[num_warps_per_threadblock];
290296 __shared__ size_type out_idx[num_warps_per_threadblock];
291297
292- auto const global_thread_id =
293- cudf::detail::grid_1d::global_thread_id<num_warps_per_threadblock * cudf::detail::warp_size>();
294- auto const global_warp_id = static_cast <size_type>(global_thread_id / cudf::detail::warp_size);
295- auto const local_warp_id = static_cast <size_type>(threadIdx .x / cudf::detail::warp_size);
296- auto const warp_lane = static_cast <size_type>(threadIdx .x % cudf::detail::warp_size);
298+ namespace cg = cooperative_groups;
299+ auto const block = cg::this_thread_block ();
300+ auto const warp = cg::tiled_partition<cudf::detail::warp_size>(block);
301+
302+ auto const global_thread_id = cudf::detail::grid_1d::global_thread_id ();
303+ auto const global_warp_id = static_cast <size_type>(global_thread_id / cudf::detail::warp_size);
304+ auto const local_warp_id = static_cast <size_type>(warp.meta_group_rank ());
305+ auto const warp_lane = static_cast <size_type>(warp.thread_rank ());
297306 auto const nwarps =
298307 static_cast <size_type>(cudf::detail::grid_1d::grid_stride () / cudf::detail::warp_size);
299308 char * in_chars_shared = temporary_buffer[local_warp_id];
@@ -326,7 +335,7 @@ CUDF_KERNEL void url_decode_char_replacer(column_device_view const in_strings,
326335 in_chars_shared[char_idx] = in_idx >= 0 && in_idx < string_length ? in_chars[in_idx] : 0 ;
327336 }
328337
329- __syncwarp ();
338+ warp. sync ();
330339
331340 // `char_idx_start` represents the start character index of the current warp.
332341 for (size_type char_idx_start = 0 ; char_idx_start < string_length_block;
@@ -364,7 +373,7 @@ CUDF_KERNEL void url_decode_char_replacer(column_device_view const in_strings,
364373 out_idx[local_warp_id] += (out_offset + out_size);
365374 }
366375
367- __syncwarp ();
376+ warp. sync ();
368377 }
369378 }
370379 }
0 commit comments