Skip to content

Commit 1fe0125

Browse files
authored
feat: compute-sanitizer in build (#1735)
* add compute-sanitizer * fix a cuda mem error.
1 parent eaec977 commit 1fe0125

16 files changed

+41
-26
lines changed

.github/workflows/ci.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ jobs:
106106
clang-runtime: '19'
107107
coverage: true
108108
cuda: true
109-
extra_cmake_options: '-DCLAD_ENABLE_ENZYME_BACKEND=On'
109+
extra_cmake_options: '-DCLAD_ENABLE_ENZYME_BACKEND=On -DCLAD_CUDA_TEST_USE_SANITIZER=On'
110110
extra_packages: ' libzstd-dev '
111111
#clang-format: true
112112

@@ -127,6 +127,7 @@ jobs:
127127
os: [self-hosted, cuda, heavy]
128128
runs-on: cuda
129129
compiler: clang-16
130+
extra_cmake_options: '-DCLAD_CUDA_TEST_USE_SANITIZER=On'
130131
clang-runtime: '18'
131132
cuda: true
132133

test/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,11 +84,15 @@ if(NOT LLVM_MAIN_SRC_DIR)
8484
set(LLVM_MAIN_SRC_DIR ${LLVM_BUILD_MAIN_SRC_DIR})
8585
endif()
8686

87+
option(CLAD_CUDA_TEST_USE_SANITIZER "Run Clang cuda tests with compute-sanitizer" OFF)
8788
option(CLAD_TEST_USE_VG "Run Clang tests under Valgrind" OFF)
8889
set(CLAD_TEST_EXTRA_ARGS --verbose --show-skipped --show-unsupported)
8990
if(CLAD_TEST_USE_VG)
9091
set(CLAD_TEST_EXTRA_ARGS ${CLAD_TEST_EXTRA_ARGS} "--vg --vg-arg=-q")
9192
endif ()
93+
if(CLAD_CUDA_TEST_USE_SANITIZER)
94+
set(CLAD_TEST_EXTRA_ARGS ${CLAD_TEST_EXTRA_ARGS} "--param" "cuda_sanitizer=1")
95+
endif ()
9296

9397
add_lit_testsuite(check-clad "Running the Clad regression tests"
9498
${CMAKE_CURRENT_BINARY_DIR}

test/CUDA/ForwardMode.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
// RUN: --cuda-gpu-arch=%cudaarch %cudaldflags -oForwardMode.out \
33
// RUN: -Xclang -verify %s 2>&1 | %filecheck %s
44
//
5-
// RUN: ./ForwardMode.out | %filecheck_exec %s
5+
// RUN: %cudarun ./ForwardMode.out | %filecheck_exec %s
66
//
77
// REQUIRES: cuda-runtime
88
//

test/CUDA/GradientCuda.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
// RUN: %cladclang_cuda -I%S/../../include --cuda-gpu-arch=%cudaarch \
99
// RUN: --cuda-path=%cudapath %cudaldflags -oGradientCuda.out %s
1010
//
11-
// RUN: ./GradientCuda.out | %filecheck_exec %s
11+
// RUN: %cudarun ./GradientCuda.out | %filecheck_exec %s
1212
//
1313
// REQUIRES: cuda-runtime
1414
//

test/CUDA/GradientKernels.cu

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
// RUN: %cladclang_cuda -Xclang -plugin-arg-clad -Xclang -disable-tbr -I%S/../../include --cuda-path=%cudapath \
66
// RUN: --cuda-gpu-arch=%cudaarch %cudaldflags -oGradientKernels.out %s
77
//
8-
// RUN: ./GradientKernels.out | %filecheck_exec %s
8+
// RUN: %cudarun ./GradientKernels.out | %filecheck_exec %s
99
//
1010
// REQUIRES: cuda-runtime
1111

@@ -1019,11 +1019,11 @@ __global__ void injective_reassignment_loop(int *a) {
10191019

10201020
int main(void) {
10211021
int *a, *d_a;
1022-
cudaMalloc(&a, sizeof(int));
1023-
cudaMalloc(&d_a, sizeof(int));
1022+
cudaMalloc(&a, 2 * sizeof(int));
1023+
cudaMalloc(&d_a, 2 * sizeof(int));
10241024

1025-
TEST(kernel, dim3(1), dim3(1), 0, false, a, d_a, 1); // CHECK-EXEC: 10
1026-
TEST(kernel, dim3(1), dim3(1), 0, true, a, d_a, 1); // CHECK-EXEC: 10
1025+
TEST(kernel, dim3(1), dim3(1), 0, false, a, d_a, 2); // CHECK-EXEC: 10
1026+
TEST(kernel, dim3(1), dim3(1), 0, true, a, d_a, 2); // CHECK-EXEC: 10
10271027

10281028
auto error = clad::gradient(fake_kernel);
10291029
error.execute_kernel(dim3(1), dim3(1), a, d_a); // CHECK-EXEC: Use execute() for non-global CUDA kernels
@@ -1146,12 +1146,12 @@ int main(void) {
11461146
TEST_2(indices_lin_comb, dim3(1), dim3(5, 1, 1), 0, false, "out, in", dummy_out, dummy_in, d_out, d_in, 5); // CHECK-EXEC: 20, 25, 45, 15, 15
11471147

11481148
int *n, *d_n;
1149-
cudaMalloc(&n, sizeof(int));
1150-
cudaMalloc(&d_n, sizeof(int));
1149+
cudaMalloc(&n, 2 * sizeof(int));
1150+
cudaMalloc(&d_n, 2 * sizeof(int));
11511151

1152-
TEST(kernel_device_injective, dim3(1), dim3(1), 0, false, n, d_n, 1); // CHECK-EXEC: 4
1153-
TEST(injective_reassignment, dim3(1), dim3(1), 0, false, n, d_n, 1); // CHECK-EXEC: 1
1154-
TEST(injective_reassignment_loop, dim3(1), dim3(1), 0, false, n, d_n, 1); // CHECK-EXEC: 1
1152+
TEST(kernel_device_injective, dim3(1), dim3(1), 0, false, n, d_n, 2); // CHECK-EXEC: 4
1153+
TEST(injective_reassignment, dim3(1), dim3(1), 0, false, n, d_n, 2); // CHECK-EXEC: 1
1154+
TEST(injective_reassignment_loop, dim3(1), dim3(1), 0, false, n, d_n,2); // CHECK-EXEC: 1
11551155

11561156
cudaFree(dummy_in);
11571157
cudaFree(dummy_out);

test/CUDA/RunCudaDemos.C

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
// CHECK_VECTOR_ADDITION: clad::custom_derivatives::thrust::reduce_pullback
66
// CHECK_VECTOR_ADDITION: clad::custom_derivatives::thrust::transform_pullback
77

8-
// RUN: ./VectorAddition.out | FileCheck -check-prefix CHECK_VECTOR_ADDITION_EXEC %s
8+
// RUN: %cudarun ./VectorAddition.out | FileCheck -check-prefix CHECK_VECTOR_ADDITION_EXEC %s
99
// CHECK_VECTOR_ADDITION_EXEC: Running vector addition demo.
1010
// CHECK_VECTOR_ADDITION_EXEC: Gradients of sum wrt initial x: 1 1 1 1 1 1 1 1 1 1
1111

@@ -17,7 +17,7 @@
1717
// CHECK_PARTICLE_SIMULATION: clad::custom_derivatives::thrust::copy_pullback
1818
// CHECK_PARTICLE_SIMULATION: clad::custom_derivatives::thrust::transform_pullback
1919

20-
// RUN: ./ParticleSimulation.out | FileCheck -check-prefix CHECK_PARTICLE_SIMULATION_EXEC %s
20+
// RUN: %cudarun ./ParticleSimulation.out | FileCheck -check-prefix CHECK_PARTICLE_SIMULATION_EXEC %s
2121
// CHECK_PARTICLE_SIMULATION_EXEC: Running particle simulation demo.
2222
// CHECK_PARTICLE_SIMULATION_EXEC: Gradients of final x-pos sum wrt initial vx: 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5
2323

@@ -44,7 +44,7 @@
4444
// CHECK_LINEAR_REGRESSION-NEXT: }
4545
// CHECK_LINEAR_REGRESSION-NEXT: }
4646

47-
// RUN: ./LinearRegression.out | FileCheck -check-prefix CHECK_LINEAR_REGRESSION_EXEC %s
47+
// RUN: %cudarun ./LinearRegression.out | FileCheck -check-prefix CHECK_LINEAR_REGRESSION_EXEC %s
4848
// CHECK_LINEAR_REGRESSION_EXEC: Running linear regression demo.
4949
// CHECK_LINEAR_REGRESSION_EXEC: Gradients of loss wrt weights (w): -9 -18 -27 -36 -45 -54 -63 -72 -81 -90
5050

@@ -53,7 +53,7 @@
5353
// CHECK_BOW_LOGREG: void logistic_loss_batch2_prepared_l2_grad(
5454
// CHECK_BOW_LOGREG: clad::custom_derivatives::thrust::inner_product_pullback
5555

56-
// RUN: ./BoWLogisticRegression.out | FileCheck -check-prefix CHECK_BOW_LOGREG_EXEC %s
56+
// RUN: %cudarun ./BoWLogisticRegression.out | FileCheck -check-prefix CHECK_BOW_LOGREG_EXEC %s
5757
// CHECK_BOW_LOGREG_EXEC: Running minimal logistic regression demo.
5858
// CHECK_BOW_LOGREG_EXEC: Loss:
5959
// CHECK_BOW_LOGREG_EXEC: Gradient wrt w:

test/CUDA/ThrustAdjacentDifference.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
// RUN: --cuda-gpu-arch=%cudaarch %cudaldflags -oThrustAdjacentDifference.out \
33
// RUN: -Xclang -verify %s 2>&1 | %filecheck %s
44
//
5-
// RUN: ./ThrustAdjacentDifference.out | %filecheck_exec %s
5+
// RUN: %cudarun ./ThrustAdjacentDifference.out | %filecheck_exec %s
66
//
77
// REQUIRES: cuda-runtime
88
//

test/CUDA/ThrustCopy.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
// RUN: --cuda-gpu-arch=%cudaarch %cudaldflags -oThrustCopy.out \
33
// RUN: -Xclang -verify %s 2>&1 | %filecheck %s
44
//
5-
// RUN: ./ThrustCopy.out | %filecheck_exec %s
5+
// RUN: %cudarun ./ThrustCopy.out | %filecheck_exec %s
66
//
77
// REQUIRES: cuda-runtime
88
//

test/CUDA/ThrustReduce.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
// RUN: --cuda-gpu-arch=%cudaarch %cudaldflags -oThrustReduce.out \
33
// RUN: -Xclang -verify %s 2>&1 | %filecheck %s
44
//
5-
// RUN: ./ThrustReduce.out | %filecheck_exec %s
5+
// RUN: %cudarun ./ThrustReduce.out | %filecheck_exec %s
66
//
77
// REQUIRES: cuda-runtime
88
//

test/CUDA/ThrustReduceByKey.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
// RUN: --cuda-gpu-arch=%cudaarch %cudaldflags -oThrustReduceByKey.out \
33
// RUN: -Xclang -verify %s 2>&1 | %filecheck %s
44
//
5-
// RUN: ./ThrustReduceByKey.out | %filecheck_exec %s
5+
// RUN: %cudarun ./ThrustReduceByKey.out | %filecheck_exec %s
66
//
77
// REQUIRES: cuda-runtime
88
//

0 commit comments

Comments
 (0)