NVIDIA
diff --git a/‎.devcontainer/cuda13.0-gcc13/devcontainer.json‎
Lines changed: 42 additions & 0 deletions b/‎.devcontainer/cuda13.0-gcc13/devcontainer.json‎
Lines changed: 42 additions & 0 deletions
diff --git a/‎.devcontainer/cuda13.0-llvm20/devcontainer.json‎
Lines changed: 42 additions & 0 deletions b/‎.devcontainer/cuda13.0-llvm20/devcontainer.json‎
Lines changed: 42 additions & 0 deletions
diff --git a/‎.devcontainer/devcontainer.json‎
Lines changed: 4 additions & 4 deletions b/‎.devcontainer/devcontainer.json‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎.devcontainer/make_devcontainers.sh‎
Lines changed: 6 additions & 1 deletion b/‎.devcontainer/make_devcontainers.sh‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎CMakeLists.txt‎
Lines changed: 8 additions & 1 deletion b/‎CMakeLists.txt‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎benchmarks/benchmark_utils.hpp‎
Lines changed: 2 additions & 2 deletions b/‎benchmarks/benchmark_utils.hpp‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎ci/matrix.yml‎
Lines changed: 11 additions & 7 deletions b/‎ci/matrix.yml‎
Lines changed: 11 additions & 7 deletions
diff --git a/‎cmake/header_testing.cmake‎
Lines changed: 3 additions & 10 deletions b/‎cmake/header_testing.cmake‎
Lines changed: 3 additions & 10 deletions
diff --git a/‎doxygen/Doxyfile‎
Lines changed: 1 addition & 1 deletion b/‎doxygen/Doxyfile‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎include/cuco/bloom_filter.cuh‎
Lines changed: 92 additions & 7 deletions b/‎include/cuco/bloom_filter.cuh‎
Lines changed: 92 additions & 7 deletions
@@ -0,0 +1,42 @@
+{
+  "shutdownAction": "stopContainer",
+  "image": "rapidsai/devcontainers:25.12-cpp-gcc13-cuda13.0-ubuntu24.04",
+  "hostRequirements": {
+    "gpu": true
+  },
+  "initializeCommand": [
+    "/bin/bash",
+    "-c",
+    "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
+  ],
+  "containerEnv": {
+    "SCCACHE_REGION": "us-east-2",
+    "SCCACHE_BUCKET": "rapids-sccache-devs",
+    "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
+    "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
+    "DEVCONTAINER_NAME": "cuda13.0-gcc13",
+    "CUCO_CUDA_VERSION": "13.0",
+    "CUCO_HOST_COMPILER": "gcc",
+    "CUCO_HOST_COMPILER_VERSION": "13"
+  },
+  "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
+  "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
+  "mounts": [
+    "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+    "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+    "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
+  ],
+  "customizations": {
+    "vscode": {
+      "extensions": [
+        "llvm-vs-code-extensions.vscode-clangd"
+      ],
+      "settings": {
+        "clangd.arguments": [
+          "--compile-commands-dir=${workspaceFolder}/build/latest"
+        ]
+      }
+    }
+  },
+  "name": "cuda13.0-gcc13"
+}
@@ -0,0 +1,42 @@
+{
+  "shutdownAction": "stopContainer",
+  "image": "rapidsai/devcontainers:25.12-cpp-llvm20-cuda13.0ext-ubuntu24.04",
+  "hostRequirements": {
+    "gpu": true
+  },
+  "initializeCommand": [
+    "/bin/bash",
+    "-c",
+    "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
+  ],
+  "containerEnv": {
+    "SCCACHE_REGION": "us-east-2",
+    "SCCACHE_BUCKET": "rapids-sccache-devs",
+    "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
+    "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
+    "DEVCONTAINER_NAME": "cuda13.0-llvm20",
+    "CUCO_CUDA_VERSION": "13.0",
+    "CUCO_HOST_COMPILER": "llvm",
+    "CUCO_HOST_COMPILER_VERSION": "20"
+  },
+  "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
+  "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
+  "mounts": [
+    "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+    "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+    "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
+  ],
+  "customizations": {
+    "vscode": {
+      "extensions": [
+        "llvm-vs-code-extensions.vscode-clangd"
+      ],
+      "settings": {
+        "clangd.arguments": [
+          "--compile-commands-dir=${workspaceFolder}/build/latest"
+        ]
+      }
+    }
+  },
+  "name": "cuda13.0-llvm20"
+}
@@ -1,6 +1,6 @@
 {
   "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:25.12-cpp-gcc13-cuda12.9-ubuntu24.04",
+  "image": "rapidsai/devcontainers:25.12-cpp-gcc13-cuda13.0-ubuntu24.04",
   "hostRequirements": {
     "gpu": true
   },
@@ -14,8 +14,8 @@
     "SCCACHE_BUCKET": "rapids-sccache-devs",
     "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
     "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
-    "DEVCONTAINER_NAME": "cuda12.9-gcc13",
-    "CUCO_CUDA_VERSION": "12.9",
+    "DEVCONTAINER_NAME": "cuda13.0-gcc13",
+    "CUCO_CUDA_VERSION": "13.0",
     "CUCO_HOST_COMPILER": "gcc",
     "CUCO_HOST_COMPILER_VERSION": "13"
   },
@@ -38,5 +38,5 @@
       }
     }
   },
-  "name": "cuda12.9-gcc13"
+  "name": "cuda13.0-gcc13"
 }
@@ -45,7 +45,12 @@ update_devcontainer() {
     local devcontainer_version="$8"
 
     local IMAGE_ROOT="rapidsai/devcontainers:${devcontainer_version}-cpp-"
-    local image="${IMAGE_ROOT}${compiler_name}${compiler_version}-cuda${cuda_version}-${os}"
+    # Add 'ext' suffix only for LLVM compilers with CUDA 13.0
+    local cuda_suffix=""
+    if [[ "$cuda_version" == "13.0" && "$compiler_name" == "llvm" ]]; then
+        cuda_suffix="ext"
+    fi
+    local image="${IMAGE_ROOT}${compiler_name}${compiler_version}-cuda${cuda_version}${cuda_suffix}-${os}"
 
     jq --arg image "$image" --arg name "$name" \
        --arg cuda_version "$cuda_version" --arg compiler_name "$compiler_name" \
 
@@ -18,7 +18,7 @@ cmake_minimum_required(VERSION 3.23.1 FATAL_ERROR)
 set(rapids-cmake-version 25.12)
 if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/CUCO_RAPIDS.cmake)
     file(DOWNLOAD
-      https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-${rapids-cmake-version}/RAPIDS.cmake
+      https://raw.githubusercontent.com/rapidsai/rapids-cmake/release/${rapids-cmake-version}/RAPIDS.cmake
          ${CMAKE_CURRENT_BINARY_DIR}/CUCO_RAPIDS.cmake)
 endif()
 include(${CMAKE_CURRENT_BINARY_DIR}/CUCO_RAPIDS.cmake)
@@ -116,6 +116,13 @@ function(cuco_set_common_compile_options target_name)
     if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
         target_compile_options(${target_name} PRIVATE -Xcompiler -Wno-subobject-linkage)
     endif()
+    
+    # Add Clang-specific warning suppression for deprecated literal operators
+    # (Catch2 and cuco code still use deprecated syntax)
+    # Only for Clang 15+ which introduced this warning
+    if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 15.0)
+        target_compile_options(${target_name} PRIVATE -Xcompiler -Wno-deprecated-literal-operator)
+    endif()
 endfunction()
 
 ###################################################################################################
 
@@ -59,8 +59,8 @@ template <class OutputIt>
 struct lazy_discard {
   OutputIt it;
 
-  using index_type = typename thrust::iterator_traits<OutputIt>::difference_type;
-  using value_type = typename thrust::iterator_traits<OutputIt>::value_type;
+  using index_type = typename cuda::std::iterator_traits<OutputIt>::difference_type;
+  using value_type = typename cuda::std::iterator_traits<OutputIt>::value_type;
 
   __device__ void device_dispatch(index_type index, value_type const& value) const
   {
 
@@ -13,8 +13,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-cuda_oldest: &cuda_oldest '12.0'
-cuda_newest: &cuda_newest '12.9'
+cuda_12_0: &cuda_12_0 '12.0'
+cuda_12_9: &cuda_12_9 '12.9'
+cuda_13_0: &cuda_13_0 '13.0'
 
 # The GPUs to test on
 # Note: This assumes that the appropriate gpu_build_archs are set to include building for the GPUs listed here
@@ -42,8 +43,11 @@ devcontainer_version: '25.12'
 # Configurations that will run for every PR
 pull_request:
   nvcc:
-    - {cuda: *cuda_oldest, os: 'ubuntu22.04', cpu: 'amd64', compiler: {name: 'gcc', version: '11', exe: 'g++'}, gpu_build_archs: '70', std: [17], jobs: ['build', 'test']}
-    - {cuda: *cuda_newest, os: 'ubuntu24.04', cpu: 'amd64', compiler: {name: 'gcc', version: '13', exe: 'g++'}, gpu_build_archs: '70', std: [17], jobs: ['build', 'test']}
-    - {cuda: *cuda_newest, os: 'ubuntu24.04', cpu: 'amd64', compiler: {name: 'gcc', version: '13', exe: 'g++'}, gpu_build_archs: '80,90', std: [17], jobs: ['build']}
-    - {cuda: *cuda_oldest, os: 'ubuntu20.04', cpu: 'amd64', compiler: {name: 'llvm', version: '14', exe: 'clang++'}, gpu_build_archs: '70', std: [17], jobs: ['build']}
-    - {cuda: *cuda_newest, os: 'ubuntu22.04', cpu: 'amd64', compiler: {name: 'llvm', version: '18', exe: 'clang++'}, gpu_build_archs: '90', std: [17], jobs: ['build']}
+    - {cuda: *cuda_12_0, os: 'ubuntu22.04', cpu: 'amd64', compiler: {name: 'gcc', version: '11', exe: 'g++'}, gpu_build_archs: '70', std: [17], jobs: ['build', 'test']}
+    - {cuda: *cuda_12_9, os: 'ubuntu24.04', cpu: 'amd64', compiler: {name: 'gcc', version: '13', exe: 'g++'}, gpu_build_archs: '70', std: [17], jobs: ['build', 'test']}
+    - {cuda: *cuda_12_9, os: 'ubuntu24.04', cpu: 'amd64', compiler: {name: 'gcc', version: '13', exe: 'g++'}, gpu_build_archs: '80,90', std: [17], jobs: ['build']}
+    - {cuda: *cuda_12_0, os: 'ubuntu20.04', cpu: 'amd64', compiler: {name: 'llvm', version: '14', exe: 'clang++'}, gpu_build_archs: '70', std: [17], jobs: ['build']}
+    - {cuda: *cuda_12_9, os: 'ubuntu22.04', cpu: 'amd64', compiler: {name: 'llvm', version: '18', exe: 'clang++'}, gpu_build_archs: '90', std: [17], jobs: ['build']}
+    - {cuda: *cuda_13_0, os: 'ubuntu24.04', cpu: 'amd64', compiler: {name: 'gcc', version: '13', exe: 'g++'}, gpu_build_archs: '80', std: [17], jobs: ['build']}
+    - {cuda: *cuda_13_0, os: 'ubuntu24.04', cpu: 'amd64', compiler: {name: 'gcc', version: '13', exe: 'g++'}, gpu_build_archs: '80,90', std: [17], jobs: ['build']}
+    - {cuda: *cuda_13_0, os: 'ubuntu24.04', cpu: 'amd64', compiler: {name: 'llvm', version: '20', exe: 'clang++'}, gpu_build_archs: '90', std: [17], jobs: ['build']}
@@ -63,16 +63,9 @@ function(cuco_add_header_tests)
     # Create executable test for this specific header
     add_executable(${headertest_target} ${header_src})
     target_link_libraries(${headertest_target} PRIVATE cuco::cuco CUDA::cudart)
-
-    target_compile_options(${headertest_target} PRIVATE
-      $<$<COMPILE_LANGUAGE:CUDA>:--expt-extended-lambda>
-      --compiler-options=-Wall --compiler-options=-Wextra
-      --compiler-options=-Werror -Wno-deprecated-gpu-targets
-    )
-
-    if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
-      target_compile_options(${headertest_target} PRIVATE -Xcompiler -Wno-subobject-linkage)
-    endif()
+    
+    # Use common compile options (includes all compiler-specific warning suppressions)
+    cuco_set_common_compile_options(${headertest_target})
 
     set_target_properties(${headertest_target} PROPERTIES
       RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/tests/headers"
 
@@ -1508,7 +1508,7 @@ FORMULA_MACROFILE      =
 # The default value is: NO.
 # This tag requires that the tag GENERATE_HTML is set to YES.
 
-USE_MATHJAX            = NO
+USE_MATHJAX            = YES
 
 # When MathJax is enabled you can set the default output format to be used for
 # the MathJax output. See the MathJax site (see:
 
@@ -42,7 +42,7 @@ namespace cuco {
  * - Host-side "bulk" operations
  * - Device-side "singular" operations
  *
- * The host-side bulk operations include `add`, `contains`, etc. These APIs should be used when
+ * The host-side bulk operations include add(), contains(), etc. These APIs should be used when
  * there are a large number of keys to add or lookup. For example, given a range of keys
  * specified by device-accessible iterators, the bulk `add` function will add all keys into
  * the filter.
@@ -124,7 +124,7 @@ class bloom_filter {
    * @brief Erases all information from the filter.
    *
    * @note This function synchronizes the given stream. For asynchronous execution use
-   * `clear_async`.
+   * clear_async().
    *
    * @param stream CUDA stream used for device memory operations and kernel launches
    */
@@ -142,7 +142,7 @@ class bloom_filter {
    * @brief Adds all keys in the range `[first, last)` to the filter.
    *
    * @note This function synchronizes the given stream. For asynchronous execution use
-   * `add_async`.
+   * add_async().
    *
    * @tparam InputIt Device-accessible random access input key iterator
    * @param first Beginning of the sequence of keys
@@ -173,7 +173,7 @@ class bloom_filter {
    *
    * @note The key `*(first + i)` is added if `pred( *(stencil + i) )` returns `true`.
    * @note This function synchronizes the given stream and returns the number of successful
-   * insertions. For asynchronous execution use `add_if_async`.
+   * insertions. For asynchronous execution use add_if_async().
    *
    * @tparam InputIt Device-accessible random access input key iterator
    * @tparam StencilIt Device-accessible random-access iterator whose `value_type` is
@@ -227,7 +227,7 @@ class bloom_filter {
    * filter.
    *
    * @note This function synchronizes the given stream. For asynchronous execution use
-   * `contains_async`.
+   * contains_async().
    *
    * @tparam InputIt Device-accessible random access input key iterator
    * @tparam OutputIt Device-accessible output iterator assignable from `bool`
@@ -269,7 +269,7 @@ class bloom_filter {
    *
    * @note The key `*(first + i)` is queried if `pred( *(stencil + i) )` returns `true`.
    * @note This function synchronizes the given stream. For asynchronous execution use
-   * `contains_if_async`.
+   * contains_if_async().
    *
    * @tparam InputIt Device-accessible random access input key iterator
    * @tparam StencilIt Device-accessible random-access iterator whose `value_type` is
@@ -325,6 +325,91 @@ class bloom_filter {
                                             cuda::stream_ref stream = cuda::stream_ref{
                                               cudaStream_t{nullptr}}) const noexcept;
 
+  /**
+   * @brief Merge another bloom filter into this.
+   *
+   * @note Modifies `this` in place.
+   * @note This function synchronizes the given stream. For asynchronous execution use
+   * merge_async().
+   *
+   * @note This performs the set union of the two filters. Let \f$f : X \to B\f$ denote the
+   * construction of a bloom filter on some set \f$X\f$, and let \f$A\f$ and \f$B\f$ be two sets,
+   * then it holds that \f$f(A \cup B) = f(A) \cup f(B)\f$.
+   *
+   * @param other Other filter with matching type to this. The policy object must be equal to that
+   * of this filter, otherwise behavior is undefined.
+   * @param stream CUDA stream used for device memory operations and kernel launches.
+   *
+   * @throws cuco::logic_error If the other filter does not have the same number of blocks as this.
+   */
+  __host__ constexpr void merge(bloom_filter<Key, Extent, Scope, Policy, Allocator> const& other,
+                                cuda::stream_ref stream = cuda::stream_ref{cudaStream_t{nullptr}});
+
+  /**
+   * @brief Asynchronously merge another bloom filter into this.
+   *
+   * @note Modifies `this` in place.
+   *
+   * @note This performs the set union of the two filters. Let \f$f : X \to B\f$ denote the
+   * construction of a bloom filter on some set \f$X\f$, and let \f$A\f$ and \f$B\f$ be two sets,
+   * then it holds that \f$f(A \cup B) = f(A) \cup f(B)\f$
+   *
+   * @param other Other filter with matching type to this. The policy object must be equal to that
+   * of this filter, otherwise behavior is undefined.
+   * @param stream CUDA stream used for device memory operations and kernel launches.
+   *
+   * @throws cuco::logic_error If the other filter does not have the same number of blocks as this.
+   */
+  __host__ constexpr void merge_async(
+    bloom_filter<Key, Extent, Scope, Policy, Allocator> const& other,
+    cuda::stream_ref stream = cuda::stream_ref{cudaStream_t{nullptr}});
+
+  /**
+   * @brief Intersect another bloom filter into this.
+   *
+   * @note Modifies `this` in place.
+   * @note This function synchronizes the given stream. For asynchronous execution use
+   * intersect_async().
+   *
+   * @note This performs the set intersection of the two filters. Unlike merge(), this operation
+   * does not distribute over filter construction and therefore only approximates the bloom filter
+   * of the intersection of the input sets. In other words, let \f$f : X \to B\f$ denote the
+   * construction of a bloom filter on some set \f$X\f$, and let \f$A\f$ and \f$B\f$ be two sets,
+   * then \f$(A \cap B) \ne f(A) \cap f(B)\f$. Despite this, it is guaranteed that for all \f$x \in
+   * (A \cap B)\f$, it holds \f$x \in f(A) \cap f(B)\f$.
+   *
+   * @param other Other filter with matching type to this. The policy object must be equal to that
+   * of this filter, otherwise behavior is undefined.
+   * @param stream CUDA stream used for device memory operations and kernel launches.
+   *
+   * @throws cuco::logic_error If the other filter does not have the same number of blocks as this.
+   */
+  __host__ constexpr void intersect(
+    bloom_filter<Key, Extent, Scope, Policy, Allocator> const& other,
+    cuda::stream_ref stream = cuda::stream_ref{cudaStream_t{nullptr}});
+
+  /**
+   * @brief Asynchronously intersect another bloom filter into this.
+   *
+   * @note Modifies `this` in place.
+   *
+   * @note This performs the set intersection of the two filters. Unlike merge_async(), this
+   * operation does not distribute over filter construction and therefore only approximates the
+   * bloom filter of the intersection of the input sets. In other words, let \f$f : X \to B\f$
+   * denote the construction of a bloom filter on some set \f$X\f$, and let \f$A\f$ and \f$B\f$ be
+   * two sets, then \f$(A \cap B) \ne f(A) \cap f(B)\f$. Despite this, it is guaranteed that for
+   * all \f$x \in (A \cap B)\f$, it holds \f$x \in f(A) \cap f(B)\f$.
+   *
+   * @param other Other filter with matching type to this. The policy object must be equal to that
+   * of this filter, otherwise behavior is undefined.
+   * @param stream CUDA stream used for device memory operations and kernel launches.
+   *
+   * @throws cuco::logic_error If the other filter does not have the same number of blocks as this.
+   */
+  __host__ constexpr void intersect_async(
+    bloom_filter<Key, Extent, Scope, Policy, Allocator> const& other,
+    cuda::stream_ref stream = cuda::stream_ref{cudaStream_t{nullptr}});
+
   /**
    * @brief Gets a pointer to the underlying filter storage.
    *
@@ -369,4 +454,4 @@ class bloom_filter {
 };
 }  // namespace cuco
 
-#include <cuco/detail/bloom_filter/bloom_filter.inl>
+#include <cuco/detail/bloom_filter/bloom_filter.inl>
Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"shutdownAction": "stopContainer",`
`3`		`- "image": "rapidsai/devcontainers:25.12-cpp-gcc13-cuda12.9-ubuntu24.04",`
	`3`	`+ "image": "rapidsai/devcontainers:25.12-cpp-gcc13-cuda13.0-ubuntu24.04",`
`4`	`4`	`"hostRequirements": {`
`5`	`5`	`"gpu": true`
`6`	`6`	`},`
`@@ -14,8 +14,8 @@`
`14`	`14`	`"SCCACHE_BUCKET": "rapids-sccache-devs",`
`15`	`15`	`"AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",`
`16`	`16`	`"HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",`
`17`		`- "DEVCONTAINER_NAME": "cuda12.9-gcc13",`
`18`		`- "CUCO_CUDA_VERSION": "12.9",`
	`17`	`+ "DEVCONTAINER_NAME": "cuda13.0-gcc13",`
	`18`	`+ "CUCO_CUDA_VERSION": "13.0",`
`19`	`19`	`"CUCO_HOST_COMPILER": "gcc",`
`20`	`20`	`"CUCO_HOST_COMPILER_VERSION": "13"`
`21`	`21`	`},`
`@@ -38,5 +38,5 @@`
`38`	`38`	`}`
`39`	`39`	`}`
`40`	`40`	`},`
`41`		`- "name": "cuda12.9-gcc13"`
	`41`	`+ "name": "cuda13.0-gcc13"`
`42`	`42`	`}`
Original file line number	Diff line number	Diff line change
`@@ -59,8 +59,8 @@ template <class OutputIt>`
`59`	`59`	`struct lazy_discard {`
`60`	`60`	`OutputIt it;`
`61`	`61`
`62`		`- using index_type = typename thrust::iterator_traits<OutputIt>::difference_type;`
`63`		`- using value_type = typename thrust::iterator_traits<OutputIt>::value_type;`
	`62`	`+ using index_type = typename cuda::std::iterator_traits<OutputIt>::difference_type;`
	`63`	`+ using value_type = typename cuda::std::iterator_traits<OutputIt>::value_type;`
`64`	`64`
`65`	`65`	`__device__ void device_dispatch(index_type index, value_type const& value) const`
`66`	`66`	`{`