Skip to content

Commit 9c1d30b

Browse files
committed
[test-suite,CUDA] Add a test case to test the edge cases for the implementation of llvm.round intrinsic in the PTX backend.
Reviewers: tra Subscribers: sanjoy, mgorny, jlebar, jdoerfert, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D59950 llvm-svn: 357430
1 parent 1db9040 commit 9c1d30b

File tree

2 files changed

+55
-0
lines changed

2 files changed

+55
-0
lines changed

External/CUDA/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ macro(create_local_cuda_tests VariantSuffix)
110110
# buildbot a lot.
111111
create_one_local_test_f(simd simd.cu
112112
"cuda-(8[.]0|9[.]2)-c[+][+]11-libc[+][+]")
113+
create_one_local_test(test_round test_round.cu)
113114
endmacro()
114115

115116
macro(thrust_make_test_name TestName TestSourcePath)

External/CUDA/test_round.cu

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <iostream>
10+
#include <cassert>
11+
12+
// Test the implementation of llvm intrinsic round. In particular, when the
13+
// source is equidistant between two integers, it rounds away from zero.
14+
//
15+
// In CUDA libdevice, the implementation of round separates the values into
16+
// three regions and uses a region specific rounding method to calculate
17+
// the result:
18+
// abs(x) <= 0.5
19+
// 2 ^ 23 > abs(x) > 0.5 (float)
20+
// abs(x) >= 2 ^ 23 (float)
21+
// For double, 2 ^ 23 above is replaced with 2 ^ 52
22+
//
23+
// The PTX backend implements round in a similar way. We chose the test values
24+
// based on this.
25+
26+
__global__ void test_round(float v) {
27+
assert(__builtin_roundf(-0.5f + v) == -1.0f);
28+
assert(__builtin_roundf(8.5f + v) == 9.0f);
29+
assert(__builtin_roundf(-8.38861e+06f + v) == -8.38861e+06f);
30+
assert(__builtin_roundf(8.38861e+06f + v) == 8.38861e+06f);
31+
32+
assert(__builtin_round(0.5 + v) == 1.0f);
33+
assert(__builtin_round(-8.5 + v) == -9.0f);
34+
assert(__builtin_round(4.5035996e+15 + v) == 4.5035996e+15);
35+
assert(__builtin_round(-4.5035996e+15 + v) == -4.5035996e+15);
36+
// test values beyond +/- max(float)
37+
assert(__builtin_round(3.4e39 + v) == 3.4e39);
38+
assert(__builtin_round(-3.4e39 + v) == -3.4e39);
39+
}
40+
41+
int main(int argc, char* argv[]) {
42+
float host_value = 0;
43+
44+
// Launch the kernel.
45+
test_round<<<1, 1>>>(0);
46+
cudaError_t err = cudaDeviceSynchronize();
47+
if (err != cudaSuccess) {
48+
printf("CUDA error %d\n", (int)err);
49+
return 1;
50+
}
51+
52+
printf("Success!\n");
53+
return 0;
54+
}

0 commit comments

Comments
 (0)