Skip to content

Commit 28b810f

Browse files
Merge branch 'branch-25.10' into shuffler-callbacks
2 parents 74ad736 + 4dc6bbb commit 28b810f

File tree

10 files changed

+72
-28
lines changed

10 files changed

+72
-28
lines changed

.github/workflows/pr.yaml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ jobs:
1717
- conda-cpp-build
1818
- conda-cpp-linters
1919
- conda-cpp-tests
20+
- conda-cpp-memcheck
2021
- conda-python-build
2122
- conda-python-tests
2223
- docs-build
@@ -121,6 +122,14 @@ jobs:
121122
build_type: pull-request
122123
container-options: "--cap-add CAP_SYS_PTRACE --shm-size=8g --ulimit=nofile=1000000:1000000"
123124
script: ci/test_cpp.sh
125+
conda-cpp-memcheck:
126+
secrets: inherit
127+
needs: conda-cpp-build
128+
uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
129+
with:
130+
build_type: pull-request
131+
script: "ci/test_cpp_memcheck.sh"
132+
node_type: "gpu-l4-latest-1"
124133
conda-python-build:
125134
needs: conda-cpp-build
126135
secrets: inherit

.github/workflows/test.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,14 @@ jobs:
3333
date: ${{ inputs.date }}
3434
script: ci/test_cpp.sh
3535
sha: ${{ inputs.sha }}
36+
conda-cpp-memcheck:
37+
secrets: inherit
38+
needs: checks
39+
uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
40+
with:
41+
build_type: pull-request
42+
script: "ci/test_cpp_memcheck.sh"
43+
node_type: "gpu-l4-latest-1"
3644
conda-python-tests:
3745
secrets: inherit
3846
uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.10

ci/test_cpp_memcheck.sh

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
#!/bin/bash
2+
# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
set -xeuo pipefail
6+
7+
. /opt/conda/etc/profile.d/conda.sh
8+
9+
CPP_CHANNEL=$(rapids-download-conda-from-github cpp)
10+
11+
rapids-logger "Generate C++ testing dependencies"
12+
rapids-dependency-file-generator \
13+
--output conda \
14+
--file-key test_cpp \
15+
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch)" \
16+
--prepend-channel "${CPP_CHANNEL}" \
17+
| tee env.yaml
18+
19+
rapids-mamba-retry env create --yes -f env.yaml -n test
20+
21+
# Temporarily allow unbound variables for conda activation.
22+
set +u
23+
conda activate test
24+
set -u
25+
26+
RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}/
27+
mkdir -p "${RAPIDS_TESTS_DIR}"
28+
29+
rapids-print-env
30+
31+
rapids-logger "Check GPU usage"
32+
nvidia-smi
33+
34+
# Trap ERR so that `EXITCODE=1` is set when a command fails
35+
EXITCODE=0
36+
trap "EXITCODE=1" ERR
37+
set +e
38+
39+
# Support customizing the ctests' install location
40+
cd "${INSTALL_PREFIX:-${CONDA_PREFIX:-/usr}}/bin/tests/librapidsmpf/"
41+
42+
rapids-logger "Run librapidsmpf gtests with compute-sanitizer (Single Node)"
43+
compute-sanitizer --tool memcheck --track-stream-ordered-races=all gtests/single_tests --gtest_filter=-CuptiMonitorTest.*
44+
45+
rapids-logger "Test script exiting with value: $EXITCODE"
46+
exit ${EXITCODE}

cpp/CMakeLists.txt

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -114,11 +114,6 @@ include(../cmake/thirdparty/get_nvtx.cmake)
114114
include(../cmake/thirdparty/get_rmm.cmake)
115115
include(../cmake/thirdparty/get_cudf.cmake)
116116
if(RAPIDSMPF_HAVE_UCXX)
117-
rapids_find_package(
118-
UCX REQUIRED
119-
BUILD_EXPORT_SET rapidsmpf-exports
120-
INSTALL_EXPORT_SET rapidsmpf-exports
121-
)
122117
rapids_find_package(
123118
ucxx REQUIRED
124119
BUILD_EXPORT_SET rapidsmpf-exports
@@ -292,17 +287,7 @@ endif()
292287
# ##################################################################################################
293288
# * add tests -------------------------------------------------------------------------------------
294289
if(RAPIDSMPF_BUILD_TESTS)
295-
# include CTest module -- automatically calls enable_testing()
296290
include(CTest)
297-
298-
# ctest cuda memcheck
299-
find_program(CUDA_SANITIZER compute-sanitizer)
300-
set(MEMORYCHECK_COMMAND ${CUDA_SANITIZER})
301-
set(MEMORYCHECK_TYPE CudaSanitizer)
302-
set(CUDA_SANITIZER_COMMAND_OPTIONS "--tool memcheck")
303-
304-
# Always print verbose output when tests fail if run using `make test`.
305-
list(APPEND CMAKE_CTEST_ARGUMENTS "--output-on-failure")
306291
add_subdirectory(tests)
307292
endif()
308293

cpp/benchmarks/CMakeLists.txt

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,8 @@ target_compile_options(
2222
"$<$<COMPILE_LANGUAGE:CUDA>:${RAPIDSMPF_CUDA_FLAGS}>"
2323
)
2424
target_link_libraries(
25-
bench_shuffle
26-
PRIVATE rapidsmpf::rapidsmpf ucxx::ucxx ucx::ucp $<TARGET_NAME_IF_EXISTS:MPI::MPI_C>
27-
$<TARGET_NAME_IF_EXISTS:conda_env> maybe_asan bench_utils
25+
bench_shuffle PRIVATE rapidsmpf::rapidsmpf ucxx::ucxx $<TARGET_NAME_IF_EXISTS:MPI::MPI_C>
26+
$<TARGET_NAME_IF_EXISTS:conda_env> maybe_asan bench_utils
2827
)
2928
install(
3029
TARGETS bench_shuffle
@@ -50,7 +49,7 @@ target_compile_options(
5049
"$<$<COMPILE_LANGUAGE:CUDA>:${RAPIDSMPF_CUDA_FLAGS}>"
5150
)
5251
target_link_libraries(
53-
bench_comm PRIVATE rapidsmpf::rapidsmpf ucxx::ucxx ucx::ucp $<TARGET_NAME_IF_EXISTS:MPI::MPI_C>
52+
bench_comm PRIVATE rapidsmpf::rapidsmpf ucxx::ucxx $<TARGET_NAME_IF_EXISTS:MPI::MPI_C>
5453
$<TARGET_NAME_IF_EXISTS:conda_env> maybe_asan bench_utils
5554
)
5655
install(
@@ -84,7 +83,6 @@ target_link_libraries(
8483
bench_partition
8584
PRIVATE rapidsmpf::rapidsmpf
8685
ucxx::ucxx
87-
ucx::ucp
8886
benchmark::benchmark
8987
benchmark::benchmark_main
9088
$<TARGET_NAME_IF_EXISTS:MPI::MPI_C>

cpp/benchmarks/streaming/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ target_compile_options(
1919
)
2020
target_link_libraries(
2121
bench_streaming_shuffle
22-
PRIVATE rapidsmpf::rapidsmpf ucxx::ucxx ucx::ucp $<TARGET_NAME_IF_EXISTS:MPI::MPI_C>
22+
PRIVATE rapidsmpf::rapidsmpf ucxx::ucxx $<TARGET_NAME_IF_EXISTS:MPI::MPI_C>
2323
$<TARGET_NAME_IF_EXISTS:conda_env> maybe_asan bench_utils
2424
)
2525
install(

cpp/examples/CMakeLists.txt

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,8 @@ target_compile_options(
2020
"$<$<COMPILE_LANGUAGE:CUDA>:${RAPIDSMPF_CUDA_FLAGS}>"
2121
)
2222
target_link_libraries(
23-
example_shuffle
24-
PRIVATE rapidsmpf::rapidsmpf ucxx::ucxx ucx::ucp $<TARGET_NAME_IF_EXISTS:MPI::MPI_C>
25-
$<TARGET_NAME_IF_EXISTS:conda_env> maybe_asan
23+
example_shuffle PRIVATE rapidsmpf::rapidsmpf ucxx::ucxx $<TARGET_NAME_IF_EXISTS:MPI::MPI_C>
24+
$<TARGET_NAME_IF_EXISTS:conda_env> maybe_asan
2625
)
2726
install(
2827
TARGETS example_shuffle

cpp/tests/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,6 @@ if(RAPIDSMPF_HAVE_MPI)
121121
GTest::gmock
122122
GTest::gtest
123123
ucxx::ucxx
124-
ucx::ucp
125124
$<TARGET_NAME_IF_EXISTS:MPI::MPI_C>
126125
$<TARGET_NAME_IF_EXISTS:conda_env>
127126
maybe_asan

cpp/tests/test_rmm_resource_adaptor.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,8 @@ struct throw_at_limit_resource final : public rmm::mr::device_memory_resource {
3737
return ptr;
3838
}
3939

40-
void do_deallocate(void* ptr, std::size_t, rmm::cuda_stream_view) override {
41-
RAPIDSMPF_CUDA_TRY(cudaFree(ptr));
40+
void do_deallocate(void* ptr, std::size_t, rmm::cuda_stream_view stream) override {
41+
RAPIDSMPF_CUDA_TRY(cudaFreeAsync(ptr, stream.value()));
4242
allocs.erase(ptr);
4343
}
4444

python/rapidsmpf/rapidsmpf/communicator/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ if(RAPIDSMPF_HAVE_UCXX)
1717
rapids_cython_create_modules(
1818
CXX
1919
SOURCE_FILES "${modules_need_ucxx}"
20-
LINKED_LIBRARIES rapidsmpf::rapidsmpf PRIVATE ucxx::ucxx ucxx::python ucx::ucp maybe_asan
20+
LINKED_LIBRARIES rapidsmpf::rapidsmpf PRIVATE ucxx::ucxx ucxx::python maybe_asan
2121
)
2222
endif()
2323

0 commit comments

Comments
 (0)