Skip to content

Commit 4dc6bbb

Browse files
authored
CI: run with compute sanitizer (rapidsai#498)
Run compute-sanitizer in CI. Because of limited CI resources, we only run with the single-communicator. It only takes 4mins to run the tests. Also fixing a free-bug in a test (found by compute-sanitizer). Authors: - Mads R. B. Kristensen (https://github.com/madsbk) - Niranda Perera (https://github.com/nirandaperera) Approvers: - Kyle Edwards (https://github.com/KyleFromNVIDIA) - Niranda Perera (https://github.com/nirandaperera) - Peter Andreas Entschev (https://github.com/pentschev) URL: rapidsai#498
1 parent 6b8881c commit 4dc6bbb

File tree

5 files changed

+65
-12
lines changed

5 files changed

+65
-12
lines changed

.github/workflows/pr.yaml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ jobs:
1717
- conda-cpp-build
1818
- conda-cpp-linters
1919
- conda-cpp-tests
20+
- conda-cpp-memcheck
2021
- conda-python-build
2122
- conda-python-tests
2223
- docs-build
@@ -121,6 +122,14 @@ jobs:
121122
build_type: pull-request
122123
container-options: "--cap-add CAP_SYS_PTRACE --shm-size=8g --ulimit=nofile=1000000:1000000"
123124
script: ci/test_cpp.sh
125+
conda-cpp-memcheck:
126+
secrets: inherit
127+
needs: conda-cpp-build
128+
uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
129+
with:
130+
build_type: pull-request
131+
script: "ci/test_cpp_memcheck.sh"
132+
node_type: "gpu-l4-latest-1"
124133
conda-python-build:
125134
needs: conda-cpp-build
126135
secrets: inherit

.github/workflows/test.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,14 @@ jobs:
3333
date: ${{ inputs.date }}
3434
script: ci/test_cpp.sh
3535
sha: ${{ inputs.sha }}
36+
conda-cpp-memcheck:
37+
secrets: inherit
38+
needs: checks
39+
uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
40+
with:
41+
build_type: pull-request
42+
script: "ci/test_cpp_memcheck.sh"
43+
node_type: "gpu-l4-latest-1"
3644
conda-python-tests:
3745
secrets: inherit
3846
uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.10

ci/test_cpp_memcheck.sh

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
#!/bin/bash
2+
# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
set -xeuo pipefail
6+
7+
. /opt/conda/etc/profile.d/conda.sh
8+
9+
CPP_CHANNEL=$(rapids-download-conda-from-github cpp)
10+
11+
rapids-logger "Generate C++ testing dependencies"
12+
rapids-dependency-file-generator \
13+
--output conda \
14+
--file-key test_cpp \
15+
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch)" \
16+
--prepend-channel "${CPP_CHANNEL}" \
17+
| tee env.yaml
18+
19+
rapids-mamba-retry env create --yes -f env.yaml -n test
20+
21+
# Temporarily allow unbound variables for conda activation.
22+
set +u
23+
conda activate test
24+
set -u
25+
26+
RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}/
27+
mkdir -p "${RAPIDS_TESTS_DIR}"
28+
29+
rapids-print-env
30+
31+
rapids-logger "Check GPU usage"
32+
nvidia-smi
33+
34+
# Trap ERR so that `EXITCODE=1` is set when a command fails
35+
EXITCODE=0
36+
trap "EXITCODE=1" ERR
37+
set +e
38+
39+
# Support customizing the ctests' install location
40+
cd "${INSTALL_PREFIX:-${CONDA_PREFIX:-/usr}}/bin/tests/librapidsmpf/"
41+
42+
rapids-logger "Run librapidsmpf gtests with compute-sanitizer (Single Node)"
43+
compute-sanitizer --tool memcheck --track-stream-ordered-races=all gtests/single_tests --gtest_filter=-CuptiMonitorTest.*
44+
45+
rapids-logger "Test script exiting with value: $EXITCODE"
46+
exit ${EXITCODE}

cpp/CMakeLists.txt

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -287,17 +287,7 @@ endif()
287287
# ##################################################################################################
288288
# * add tests -------------------------------------------------------------------------------------
289289
if(RAPIDSMPF_BUILD_TESTS)
290-
# include CTest module -- automatically calls enable_testing()
291290
include(CTest)
292-
293-
# ctest cuda memcheck
294-
find_program(CUDA_SANITIZER compute-sanitizer)
295-
set(MEMORYCHECK_COMMAND ${CUDA_SANITIZER})
296-
set(MEMORYCHECK_TYPE CudaSanitizer)
297-
set(CUDA_SANITIZER_COMMAND_OPTIONS "--tool memcheck")
298-
299-
# Always print verbose output when tests fail if run using `make test`.
300-
list(APPEND CMAKE_CTEST_ARGUMENTS "--output-on-failure")
301291
add_subdirectory(tests)
302292
endif()
303293

cpp/tests/test_rmm_resource_adaptor.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,8 @@ struct throw_at_limit_resource final : public rmm::mr::device_memory_resource {
3737
return ptr;
3838
}
3939

40-
void do_deallocate(void* ptr, std::size_t, rmm::cuda_stream_view) override {
41-
RAPIDSMPF_CUDA_TRY(cudaFree(ptr));
40+
void do_deallocate(void* ptr, std::size_t, rmm::cuda_stream_view stream) override {
41+
RAPIDSMPF_CUDA_TRY(cudaFreeAsync(ptr, stream.value()));
4242
allocs.erase(ptr);
4343
}
4444

0 commit comments

Comments
 (0)