diff --git a/.devcontainer/cuda13.0-conda/devcontainer.json b/.devcontainer/cuda13.0-conda/devcontainer.json deleted file mode 100644 index 5c0beccf9c..0000000000 --- a/.devcontainer/cuda13.0-conda/devcontainer.json +++ /dev/null @@ -1,44 +0,0 @@ -{ - "build": { - "context": "${localWorkspaceFolder}/.devcontainer", - "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile", - "args": { - "CUDA": "13.0", - "PYTHON_PACKAGE_MANAGER": "conda", - "BASE": "rapidsai/devcontainers:26.02-cpp-mambaforge" - } - }, - "runArgs": [ - "--rm", - "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-26.02-cuda13.0-conda", - "--ulimit", - "nofile=500000" - ], - "hostRequirements": {"gpu": "optional"}, - "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:26.2": {} - }, - "overrideFeatureInstallOrder": [ - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" - ], - "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda13.0-envs}"], - "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"], - "workspaceFolder": "/home/coder", - "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/cuvs,type=bind,consistency=consistent", - "mounts": [ - "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent", - "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent", - "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent", - "source=${localWorkspaceFolder}/../.conda/pkgs,target=/home/coder/.conda/pkgs,type=bind,consistency=consistent", - "source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda13.0-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent" - ], - "customizations": { - "vscode": { - "extensions": [ - "ms-python.flake8", - "nvidia.nsight-vscode-edition" - ] - } - } -} diff --git a/.devcontainer/cuda13.0-pip/devcontainer.json b/.devcontainer/cuda13.0-pip/devcontainer.json deleted file mode 100644 index 88b6bc9def..0000000000 --- a/.devcontainer/cuda13.0-pip/devcontainer.json +++ /dev/null @@ -1,52 +0,0 @@ -{ - "build": { - "context": "${localWorkspaceFolder}/.devcontainer", - "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile", - "args": { - "CUDA": "13.0", - "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:26.02-cpp-cuda13.0-ucx1.19.0-openmpi5.0.7" - } - }, - "runArgs": [ - "--rm", - "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-26.02-cuda13.0-pip", - "--ulimit", - "nofile=500000" - ], - "hostRequirements": {"gpu": "optional"}, - "features": { - "ghcr.io/rapidsai/devcontainers/features/cuda:26.2": { - "version": "13.0", - "installcuBLAS": true, - "installcuSOLVER": true, - "installcuRAND": true, - "installcuSPARSE": true - }, - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:26.2": {} - }, - "overrideFeatureInstallOrder": [ - "ghcr.io/rapidsai/devcontainers/features/ucx", - "ghcr.io/rapidsai/devcontainers/features/cuda", - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" - ], - "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda13.0-venvs}"], - "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"], - "workspaceFolder": "/home/coder", - "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/cuvs,type=bind,consistency=consistent", - "mounts": [ - "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent", - "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent", - "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent", - "source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda13.0-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent" - ], - "customizations": { - "vscode": { - "extensions": [ - "ms-python.flake8", - "nvidia.nsight-vscode-edition" - ] - } - } -} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 22407052e3..c5d598c9c4 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -334,7 +334,7 @@ jobs: uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@cuda-13.1.0 with: arch: '["amd64", "arm64"]' - cuda: '["13.0"]' + cuda: '["13.1"]' node_type: "cpu8" rapids-aux-secret-1: GIST_REPO_READ_ORG_GITHUB_TOKEN env: | diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index c2ca51e749..ad13678d84 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -192,6 +192,8 @@ if(NOT BUILD_CPU_ONLY) rapids_cpm_cccl(BUILD_EXPORT_SET cuvs-exports INSTALL_EXPORT_SET cuvs-exports) include(cmake/thirdparty/get_raft.cmake) include(cmake/thirdparty/get_cutlass.cmake) + include(${rapids-cmake-dir}/cpm/cuco.cmake) + rapids_cpm_cuco() endif() if(BUILD_TESTS OR BUILD_C_TESTS) @@ -315,7 +317,10 @@ if(NOT BUILD_CPU_ONLY) CUDA_SEPARABLE_COMPILATION ON POSITION_INDEPENDENT_CODE ON ) - target_link_libraries(cuvs-cagra-search PRIVATE cuvs::cuvs_cpp_headers) + target_link_libraries( + cuvs-cagra-search PRIVATE cuvs::cuvs_cpp_headers + $ + ) target_compile_options( cuvs-cagra-search PRIVATE "$<$:${CUVS_CXX_FLAGS}>" "$<$:${CUVS_CUDA_FLAGS}>" @@ -566,8 +571,12 @@ if(NOT BUILD_CPU_ONLY) target_link_libraries( cuvs_objs - PRIVATE cuvs::cuvs_cpp_headers ${CUVS_CTK_MATH_DEPENDENCIES} - $ $ + PRIVATE cuvs::cuvs_cpp_headers + cuco::cuco + nvidia::cutlass::cutlass + ${CUVS_CTK_MATH_DEPENDENCIES} + $ + $ ) # Endian detection @@ -637,7 +646,8 @@ if(NOT BUILD_CPU_ONLY) $> $> $<$:CUDA::nvtx3> - PRIVATE nvidia::cutlass::cutlass $ + PRIVATE $ $ + $ ) # ensure CUDA symbols aren't relocated to the middle of the debug build binaries @@ -692,8 +702,8 @@ SECTIONS ${CUVS_CTK_MATH_DEPENDENCIES} $ # needs to be public for DT_NEEDED $> # header only - PRIVATE nvidia::cutlass::cutlass $ - $<$:CUDA::nvtx3> + PRIVATE $ $<$:CUDA::nvtx3> + $ $ ) endif() diff --git a/cpp/cmake/patches/cutlass/Support-both-CUDA-12-and-13-cccl-header-locations.patch b/cpp/cmake/patches/cutlass/Support-both-CUDA-12-and-13-cccl-header-locations.patch new file mode 100644 index 0000000000..489ac69f93 --- /dev/null +++ b/cpp/cmake/patches/cutlass/Support-both-CUDA-12-and-13-cccl-header-locations.patch @@ -0,0 +1,31 @@ +From 661c7e679ac72926d619da46834d09f52a727f5e Mon Sep 17 00:00:00 2001 +From: Robert Maynard +Date: Tue, 5 Aug 2025 15:05:57 -0400 +Subject: [PATCH] Support both CUDA 12 and 13 cccl header locations + +--- + CMakeLists.txt | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/CMakeLists.txt b/CMakeLists.txt +index 38dcca9f..4088b71f 100755 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -704,8 +704,14 @@ target_include_directories( + CUTLASS + SYSTEM INTERFACE + $ +- $ + ) ++if(CUDA_VERSION VERSION_GREATER_EQUAL 13.0) ++ target_include_directories( ++ CUTLASS ++ SYSTEM INTERFACE ++ $ ++ ) ++endif() + + install( + DIRECTORY +-- +2.39.5 (Apple Git-154) diff --git a/cpp/cmake/patches/cutlass/build-export.patch b/cpp/cmake/patches/cutlass/build-export.patch deleted file mode 100644 index 31bbd25102..0000000000 --- a/cpp/cmake/patches/cutlass/build-export.patch +++ /dev/null @@ -1,26 +0,0 @@ -From e0a9597946257a01ae8444200f836ee51d5597ba Mon Sep 17 00:00:00 2001 -From: Kyle Edwards -Date: Wed, 20 Nov 2024 16:37:38 -0500 -Subject: [PATCH] Remove erroneous include directories - -These directories are left over from when CuTe was a separate -CMake project. Remove them. ---- - CMakeLists.txt | 2 -- - 1 file changed, 2 deletions(-) - -diff --git a/CMakeLists.txt b/CMakeLists.txt -index 7419bdf5e..545384d82 100755 ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -665,8 +665,6 @@ target_include_directories( - $ - $ - $ -- $ -- $ - ) - - # Mark CTK headers as system to supress warnings from them --- -2.34.1 diff --git a/cpp/cmake/patches/cutlass_override.json b/cpp/cmake/patches/cutlass_override.json index 7bf818987f..bf9791ebd1 100644 --- a/cpp/cmake/patches/cutlass_override.json +++ b/cpp/cmake/patches/cutlass_override.json @@ -1,13 +1,13 @@ { "packages" : { "cutlass" : { - "version": "3.5.1", + "version": "4.1.0", "git_url": "https://github.com/NVIDIA/cutlass.git", "git_tag": "v${version}", "patches" : [ { - "file" : "${current_json_dir}/cutlass/build-export.patch", - "issue" : "Fix build directory export", + "file" : "${current_json_dir}/cutlass/Support-both-CUDA-12-and-13-cccl-header-locations.patch", + "issue" : "Support CUDA 12 CTK layout[https://github.com/NVIDIA/cutlass/pull/2543]", "fixed_in" : "" } ] diff --git a/cpp/cmake/thirdparty/get_cutlass.cmake b/cpp/cmake/thirdparty/get_cutlass.cmake index 1c12b7ae5f..eb005ecf6b 100644 --- a/cpp/cmake/thirdparty/get_cutlass.cmake +++ b/cpp/cmake/thirdparty/get_cutlass.cmake @@ -1,6 +1,6 @@ # ============================================================================= # cmake-format: off -# SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 # cmake-format: on # ============================================================================= @@ -24,9 +24,7 @@ function(find_and_configure_cutlass) CACHE BOOL "Disable CUTLASS to build with cuBLAS library." ) - if (CUDA_STATIC_RUNTIME) - set(CUDART_LIBRARY "${CUDA_cudart_static_LIBRARY}" CACHE FILEPATH "fixing cutlass cmake code" FORCE) - endif() + set(CUDART_LIBRARY "${CUDA_cudart_static_LIBRARY}" CACHE FILEPATH "fixing cutlass cmake code" FORCE) include("${rapids-cmake-dir}/cpm/package_override.cmake") rapids_cpm_package_override("${CMAKE_CURRENT_FUNCTION_LIST_DIR}/../patches/cutlass_override.json") @@ -42,6 +40,7 @@ function(find_and_configure_cutlass) NvidiaCutlass ${version} ${find_args} GLOBAL_TARGETS nvidia::cutlass::cutlass CPM_ARGS ${cpm_args} + EXCLUDE_FROM_ALL ON OPTIONS "CUDAToolkit_ROOT ${CUDAToolkit_LIBRARY_DIR}" ) diff --git a/cpp/include/cuvs/util/cutlass_utils.hpp b/cpp/include/cuvs/util/cutlass_utils.hpp new file mode 100644 index 0000000000..e934eb56eb --- /dev/null +++ b/cpp/include/cuvs/util/cutlass_utils.hpp @@ -0,0 +1,19 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ +#pragma once + +#include + +namespace cuvs { + +/** + * @brief Exception thrown when a CUTLASS error is encountered. + */ +struct cutlass_error : public raft::exception { + explicit cutlass_error(char const* const message) : raft::exception(message) {} + explicit cutlass_error(std::string const& message) : raft::exception(message) {} +}; + +} // namespace cuvs diff --git a/cpp/src/distance/detail/fused_distance_nn/cutlass_base.cuh b/cpp/src/distance/detail/fused_distance_nn/cutlass_base.cuh index 6573762875..93b2b80daa 100644 --- a/cpp/src/distance/detail/fused_distance_nn/cutlass_base.cuh +++ b/cpp/src/distance/detail/fused_distance_nn/cutlass_base.cuh @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -15,10 +15,10 @@ #define cutlass cuvs_cutlass #endif -#include "epilogue_elementwise.cuh" // FusedDistanceNNEpilogueElementwise -#include "gemm.h" // FusedDistanceNNGemm -#include // getMultiProcessorCount -#include // RAFT_CUTLASS_TRY +#include "../../../util/cutlass_utils.hpp" // CUVS_CUTLASS_TRY +#include "epilogue_elementwise.cuh" // FusedDistanceNNEpilogueElementwise +#include "gemm.h" // FusedDistanceNNGemm +#include // getMultiProcessorCount #include @@ -152,11 +152,11 @@ void cutlassFusedDistanceNN(const DataT* x, // Instantiate CUTLASS kernel depending on templates fusedDistanceNN fusedDistanceNN_op; // Check the problem size is supported or not - RAFT_CUTLASS_TRY(fusedDistanceNN_op.can_implement(arguments)); + CUVS_CUTLASS_TRY(fusedDistanceNN_op.can_implement(arguments)); // Initialize CUTLASS kernel with arguments and workspace pointer - RAFT_CUTLASS_TRY(fusedDistanceNN_op.initialize(arguments, workspace.data(), stream)); + CUVS_CUTLASS_TRY(fusedDistanceNN_op.initialize(arguments, workspace.data(), stream)); // Launch initialized CUTLASS kernel - RAFT_CUTLASS_TRY(fusedDistanceNN_op.run(stream)); + CUVS_CUTLASS_TRY(fusedDistanceNN_op.run(stream)); } }; // namespace detail diff --git a/cpp/src/distance/detail/pairwise_distance_cutlass_base.cuh b/cpp/src/distance/detail/pairwise_distance_cutlass_base.cuh index 55728cfbd3..d01445c682 100644 --- a/cpp/src/distance/detail/pairwise_distance_cutlass_base.cuh +++ b/cpp/src/distance/detail/pairwise_distance_cutlass_base.cuh @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2018-2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2018-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -18,8 +18,8 @@ #include "pairwise_distance_epilogue_elementwise.h" #include "pairwise_distance_gemm.h" +#include "../../util/cutlass_utils.hpp" #include "distance_ops/cutlass.cuh" -#include #include @@ -157,13 +157,13 @@ std::enable_if_t::value> cutlassDistanceKernel(const Da // Instantiate CUTLASS kernel depending on templates cutlassDist cutlassDist_op; // Check the problem size is supported or not - RAFT_CUTLASS_TRY(cutlassDist_op.can_implement(arguments)); + CUVS_CUTLASS_TRY(cutlassDist_op.can_implement(arguments)); // Initialize CUTLASS kernel with arguments and workspace pointer - RAFT_CUTLASS_TRY(cutlassDist_op.initialize(arguments, workspace.data(), stream)); + CUVS_CUTLASS_TRY(cutlassDist_op.initialize(arguments, workspace.data(), stream)); // Launch initialized CUTLASS kernel - RAFT_CUTLASS_TRY(cutlassDist_op(stream)); + CUVS_CUTLASS_TRY(cutlassDist_op(stream)); } } diff --git a/cpp/src/util/cutlass_utils.hpp b/cpp/src/util/cutlass_utils.hpp new file mode 100644 index 0000000000..470473bd09 --- /dev/null +++ b/cpp/src/util/cutlass_utils.hpp @@ -0,0 +1,33 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include + +#include + +/** + * @brief Error checking macro for CUTLASS functions. + * + * Invokes a CUTLASS function call, if the call does not return cutlass::Status::kSuccess, + * throws an exception detailing the CUTLASS error that occurred. This macro + * is only available internally to cuvs and as such the file differs from + * the one found in cuvs/include/util + * + */ +#define CUVS_CUTLASS_TRY(call) \ + do { \ + cutlass::Status const status = call; \ + if (status != cutlass::Status::kSuccess) { \ + std::string msg{}; \ + SET_ERROR_MSG(msg, \ + "CUTLASS error encountered at: ", \ + "call='%s', Reason=%s", \ + #call, \ + cutlassGetStatusString(status)); \ + throw cuvs::cutlass_error(msg); \ + } \ + } while (0) diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index fed07246cb..3c47db4a16 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -1,6 +1,6 @@ # ============================================================================= # cmake-format: off -# SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 # cmake-format: on # ============================================================================= @@ -49,6 +49,7 @@ function(ConfigureTest) $ $ ${_CUVS_TEST_ADDITIONAL_DEP} + $ ) set_target_properties( ${TEST_NAME}