File tree Expand file tree Collapse file tree 10 files changed +72
-28
lines changed
python/rapidsmpf/rapidsmpf/communicator Expand file tree Collapse file tree 10 files changed +72
-28
lines changed Original file line number Diff line number Diff line change 1717 - conda-cpp-build
1818 - conda-cpp-linters
1919 - conda-cpp-tests
20+ - conda-cpp-memcheck
2021 - conda-python-build
2122 - conda-python-tests
2223 - docs-build
@@ -121,6 +122,14 @@ jobs:
121122 build_type : pull-request
122123 container-options : " --cap-add CAP_SYS_PTRACE --shm-size=8g --ulimit=nofile=1000000:1000000"
123124 script : ci/test_cpp.sh
125+ conda-cpp-memcheck :
126+ secrets : inherit
127+ needs : conda-cpp-build
128+ uses : rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
129+ with :
130+ build_type : pull-request
131+ script : " ci/test_cpp_memcheck.sh"
132+ node_type : " gpu-l4-latest-1"
124133 conda-python-build :
125134 needs : conda-cpp-build
126135 secrets : inherit
Original file line number Diff line number Diff line change 3333 date : ${{ inputs.date }}
3434 script : ci/test_cpp.sh
3535 sha : ${{ inputs.sha }}
36+ conda-cpp-memcheck :
37+ secrets : inherit
38+ needs : checks
39+ uses : rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
40+ with :
41+ build_type : pull-request
42+ script : " ci/test_cpp_memcheck.sh"
43+ node_type : " gpu-l4-latest-1"
3644 conda-python-tests :
3745 secrets : inherit
3846 uses : rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.10
Original file line number Diff line number Diff line change 1+ #! /bin/bash
2+ # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
3+ # SPDX-License-Identifier: Apache-2.0
4+
5+ set -xeuo pipefail
6+
7+ . /opt/conda/etc/profile.d/conda.sh
8+
9+ CPP_CHANNEL=$( rapids-download-conda-from-github cpp)
10+
11+ rapids-logger " Generate C++ testing dependencies"
12+ rapids-dependency-file-generator \
13+ --output conda \
14+ --file-key test_cpp \
15+ --matrix " cuda=${RAPIDS_CUDA_VERSION% .* } ;arch=$( arch) " \
16+ --prepend-channel " ${CPP_CHANNEL} " \
17+ | tee env.yaml
18+
19+ rapids-mamba-retry env create --yes -f env.yaml -n test
20+
21+ # Temporarily allow unbound variables for conda activation.
22+ set +u
23+ conda activate test
24+ set -u
25+
26+ RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:- " ${PWD} /test-results" } /
27+ mkdir -p " ${RAPIDS_TESTS_DIR} "
28+
29+ rapids-print-env
30+
31+ rapids-logger " Check GPU usage"
32+ nvidia-smi
33+
34+ # Trap ERR so that `EXITCODE=1` is set when a command fails
35+ EXITCODE=0
36+ trap " EXITCODE=1" ERR
37+ set +e
38+
39+ # Support customizing the ctests' install location
40+ cd " ${INSTALL_PREFIX:- ${CONDA_PREFIX:-/ usr} } /bin/tests/librapidsmpf/"
41+
42+ rapids-logger " Run librapidsmpf gtests with compute-sanitizer (Single Node)"
43+ compute-sanitizer --tool memcheck --track-stream-ordered-races=all gtests/single_tests --gtest_filter=-CuptiMonitorTest.*
44+
45+ rapids-logger " Test script exiting with value: $EXITCODE "
46+ exit ${EXITCODE}
Original file line number Diff line number Diff line change @@ -114,11 +114,6 @@ include(../cmake/thirdparty/get_nvtx.cmake)
114114include (../cmake/thirdparty/get_rmm.cmake)
115115include (../cmake/thirdparty/get_cudf.cmake)
116116if (RAPIDSMPF_HAVE_UCXX)
117- rapids_find_package(
118- UCX REQUIRED
119- BUILD_EXPORT_SET rapidsmpf-exports
120- INSTALL_EXPORT_SET rapidsmpf-exports
121- )
122117 rapids_find_package(
123118 ucxx REQUIRED
124119 BUILD_EXPORT_SET rapidsmpf-exports
@@ -292,17 +287,7 @@ endif()
292287# ##################################################################################################
293288# * add tests -------------------------------------------------------------------------------------
294289if (RAPIDSMPF_BUILD_TESTS)
295- # include CTest module -- automatically calls enable_testing()
296290 include (CTest)
297-
298- # ctest cuda memcheck
299- find_program (CUDA_SANITIZER compute-sanitizer)
300- set (MEMORYCHECK_COMMAND ${CUDA_SANITIZER} )
301- set (MEMORYCHECK_TYPE CudaSanitizer)
302- set (CUDA_SANITIZER_COMMAND_OPTIONS "--tool memcheck" )
303-
304- # Always print verbose output when tests fail if run using `make test`.
305- list (APPEND CMAKE_CTEST_ARGUMENTS "--output-on-failure" )
306291 add_subdirectory (tests)
307292endif ()
308293
Original file line number Diff line number Diff line change @@ -22,9 +22,8 @@ target_compile_options(
2222 "$<$<COMPILE_LANGUAGE:CUDA>:${RAPIDSMPF_CUDA_FLAGS} >"
2323)
2424target_link_libraries (
25- bench_shuffle
26- PRIVATE rapidsmpf::rapidsmpf ucxx::ucxx ucx::ucp $<TARGET_NAME_IF_EXISTS:MPI::MPI_C>
27- $<TARGET_NAME_IF_EXISTS:conda_env> maybe_asan bench_utils
25+ bench_shuffle PRIVATE rapidsmpf::rapidsmpf ucxx::ucxx $<TARGET_NAME_IF_EXISTS:MPI::MPI_C>
26+ $<TARGET_NAME_IF_EXISTS:conda_env> maybe_asan bench_utils
2827)
2928install (
3029 TARGETS bench_shuffle
@@ -50,7 +49,7 @@ target_compile_options(
5049 "$<$<COMPILE_LANGUAGE:CUDA>:${RAPIDSMPF_CUDA_FLAGS} >"
5150)
5251target_link_libraries (
53- bench_comm PRIVATE rapidsmpf::rapidsmpf ucxx::ucxx ucx::ucp $<TARGET_NAME_IF_EXISTS:MPI::MPI_C>
52+ bench_comm PRIVATE rapidsmpf::rapidsmpf ucxx::ucxx $<TARGET_NAME_IF_EXISTS:MPI::MPI_C>
5453 $<TARGET_NAME_IF_EXISTS:conda_env> maybe_asan bench_utils
5554)
5655install (
@@ -84,7 +83,6 @@ target_link_libraries(
8483 bench_partition
8584 PRIVATE rapidsmpf::rapidsmpf
8685 ucxx::ucxx
87- ucx::ucp
8886 benchmark::benchmark
8987 benchmark::benchmark_main
9088 $<TARGET_NAME_IF_EXISTS:MPI::MPI_C>
Original file line number Diff line number Diff line change @@ -19,7 +19,7 @@ target_compile_options(
1919)
2020target_link_libraries (
2121 bench_streaming_shuffle
22- PRIVATE rapidsmpf::rapidsmpf ucxx::ucxx ucx::ucp $<TARGET_NAME_IF_EXISTS:MPI::MPI_C>
22+ PRIVATE rapidsmpf::rapidsmpf ucxx::ucxx $<TARGET_NAME_IF_EXISTS:MPI::MPI_C>
2323 $<TARGET_NAME_IF_EXISTS:conda_env> maybe_asan bench_utils
2424)
2525install (
Original file line number Diff line number Diff line change @@ -20,9 +20,8 @@ target_compile_options(
2020 "$<$<COMPILE_LANGUAGE:CUDA>:${RAPIDSMPF_CUDA_FLAGS} >"
2121)
2222target_link_libraries (
23- example_shuffle
24- PRIVATE rapidsmpf::rapidsmpf ucxx::ucxx ucx::ucp $<TARGET_NAME_IF_EXISTS:MPI::MPI_C>
25- $<TARGET_NAME_IF_EXISTS:conda_env> maybe_asan
23+ example_shuffle PRIVATE rapidsmpf::rapidsmpf ucxx::ucxx $<TARGET_NAME_IF_EXISTS:MPI::MPI_C>
24+ $<TARGET_NAME_IF_EXISTS:conda_env> maybe_asan
2625)
2726install (
2827 TARGETS example_shuffle
Original file line number Diff line number Diff line change @@ -121,7 +121,6 @@ if(RAPIDSMPF_HAVE_MPI)
121121 GTest::gmock
122122 GTest::gtest
123123 ucxx::ucxx
124- ucx::ucp
125124 $<TARGET_NAME_IF_EXISTS:MPI::MPI_C>
126125 $<TARGET_NAME_IF_EXISTS:conda_env>
127126 maybe_asan
Original file line number Diff line number Diff line change @@ -37,8 +37,8 @@ struct throw_at_limit_resource final : public rmm::mr::device_memory_resource {
3737 return ptr;
3838 }
3939
40- void do_deallocate (void * ptr, std::size_t , rmm::cuda_stream_view) override {
41- RAPIDSMPF_CUDA_TRY (cudaFree (ptr));
40+ void do_deallocate (void * ptr, std::size_t , rmm::cuda_stream_view stream ) override {
41+ RAPIDSMPF_CUDA_TRY (cudaFreeAsync (ptr, stream. value () ));
4242 allocs.erase (ptr);
4343 }
4444
Original file line number Diff line number Diff line change @@ -17,7 +17,7 @@ if(RAPIDSMPF_HAVE_UCXX)
1717 rapids_cython_create_modules(
1818 CXX
1919 SOURCE_FILES "${modules_need_ucxx} "
20- LINKED_LIBRARIES rapidsmpf::rapidsmpf PRIVATE ucxx::ucxx ucxx::python ucx::ucp maybe_asan
20+ LINKED_LIBRARIES rapidsmpf::rapidsmpf PRIVATE ucxx::ucxx ucxx::python maybe_asan
2121 )
2222endif ()
2323
You can’t perform that action at this time.
0 commit comments