|
1 | | -; Verify that when passing in command-line options to NVVMReflect, that reflect calls are replaced with |
2 | | -; the appropriate command line values. |
| 1 | +; Test the NVVM reflect pass functionality: verifying that reflect calls are replaced with |
| 2 | +; appropriate values based on command-line options. Verify that we can handle custom reflect arguments |
| 3 | +; that aren't __CUDA_ARCH or __CUDA_FTZ. If that argument is given a value on the command-line, the reflect call should be replaced with that value. |
| 4 | +; Otherwise, the reflect call should be replaced with 0. |
| 5 | + |
| 6 | +; RUN: opt -passes=nvvm-reflect -mtriple=nvptx-nvidia-cuda -nvvm-reflect-add __CUDA_FTZ=1 -nvvm-reflect-add __CUDA_ARCH=350 %s -S | FileCheck %s --check-prefix=CHECK-FTZ1 --check-prefix=CHECK-ARCH350 --check-prefix=CHECK-CUSTOM-ABSENT |
| 7 | +; RUN: opt -passes=nvvm-reflect -mtriple=nvptx-nvidia-cuda -nvvm-reflect-add __CUDA_FTZ=0 -nvvm-reflect-add __CUDA_ARCH=520 %s -S | FileCheck %s --check-prefix=CHECK-FTZ0 --check-prefix=CHECK-ARCH520 --check-prefix=CHECK-CUSTOM-ABSENT |
| 8 | +; RUN: opt -passes=nvvm-reflect -mtriple=nvptx-nvidia-cuda -nvvm-reflect-add __CUDA_FTZ=0 -nvvm-reflect-add __CUDA_ARCH=520 -nvvm-reflect-add __CUSTOM_VALUE=42 %s -S | FileCheck %s --check-prefix=CHECK-CUSTOM-PRESENT |
| 9 | + |
| 10 | +; To ensure that command line options override module options, create a copy of this test file with module options appended and rerun some tests. |
| 11 | +; |
| 12 | +; RUN: cat %s > %t.options |
| 13 | +; RUN: echo '!llvm.module.flags = !{!0}' >> %t.options |
| 14 | +; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 1}' >> %t.options |
| 15 | +; RUN: opt -passes=nvvm-reflect -mtriple=nvptx-nvidia-cuda -nvvm-reflect-add __CUDA_FTZ=0 -nvvm-reflect-add __CUDA_ARCH=520 %t.options -S | FileCheck %s --check-prefix=CHECK-FTZ0 --check-prefix=CHECK-ARCH520 |
3 | 16 |
|
4 | 17 | declare i32 @__nvvm_reflect(ptr) |
5 | 18 | @ftz = private unnamed_addr addrspace(1) constant [11 x i8] c"__CUDA_FTZ\00" |
6 | 19 | @arch = private unnamed_addr addrspace(1) constant [12 x i8] c"__CUDA_ARCH\00" |
| 20 | +@custom = private unnamed_addr addrspace(1) constant [15 x i8] c"__CUSTOM_VALUE\00" |
7 | 21 |
|
8 | | -; RUN: opt -passes=nvvm-reflect -mtriple=nvptx-nvidia-cuda -nvvm-reflect-add __CUDA_FTZ=1 -nvvm-reflect-add __CUDA_ARCH=350 %s -S | FileCheck %s --check-prefix=CHECK-FTZ1-ARCH350 |
9 | | -; RUN: opt -passes=nvvm-reflect -mtriple=nvptx-nvidia-cuda -nvvm-reflect-add __CUDA_FTZ=0 -nvvm-reflect-add __CUDA_ARCH=520 %s -S | FileCheck %s --check-prefix=CHECK-FTZ0-ARCH520 |
| 22 | +; Test handling of __CUDA_FTZ reflect value |
| 23 | +define i32 @test_ftz() { |
| 24 | + %1 = call i32 @__nvvm_reflect(ptr addrspacecast (ptr addrspace(1) @ftz to ptr)) |
| 25 | + ret i32 %1 |
| 26 | +} |
10 | 27 |
|
11 | | -; Verify that if we have module metadata that sets __CUDA_FTZ=1, that gets overridden by the command line arguments |
| 28 | +; CHECK-FTZ1: define i32 @test_ftz() |
| 29 | +; CHECK-FTZ1: ret i32 1 |
| 30 | +; CHECK-FTZ0: define i32 @test_ftz() |
| 31 | +; CHECK-FTZ0: ret i32 0 |
12 | 32 |
|
13 | | -; RUN: cat %s > %t.options |
14 | | -; RUN: echo '!llvm.module.flags = !{!0}' >> %t.options |
15 | | -; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 1}' >> %t.options |
16 | | -; RUN: opt -passes=nvvm-reflect -mtriple=nvptx-nvidia-cuda -nvvm-reflect-add __CUDA_FTZ=0 -nvvm-reflect-add __CUDA_ARCH=520 %t.options -S | FileCheck %s --check-prefix=CHECK-FTZ0-ARCH520 |
| 33 | +; Test handling of __CUDA_ARCH reflect value |
| 34 | +define i32 @test_arch() { |
| 35 | + %1 = call i32 @__nvvm_reflect(ptr addrspacecast (ptr addrspace(1) @arch to ptr)) |
| 36 | + ret i32 %1 |
| 37 | +} |
17 | 38 |
|
18 | | -define i32 @options() { |
19 | | - %1 = call i32 @__nvvm_reflect(ptr addrspacecast (ptr addrspace(1) @ftz to ptr)) |
20 | | - %2 = call i32 @__nvvm_reflect(ptr addrspacecast (ptr addrspace(1) @arch to ptr)) |
21 | | - %3 = add i32 %1, %2 |
22 | | - ret i32 %3 |
| 39 | +; CHECK-ARCH350: define i32 @test_arch() |
| 40 | +; CHECK-ARCH350: ret i32 350 |
| 41 | +; CHECK-ARCH520: define i32 @test_arch() |
| 42 | +; CHECK-ARCH520: ret i32 520 |
| 43 | + |
| 44 | +; Test handling of a custom reflect value that's not built into the pass |
| 45 | +define i32 @test_custom() { |
| 46 | + %1 = call i32 @__nvvm_reflect(ptr addrspacecast (ptr addrspace(1) @custom to ptr)) |
| 47 | + ret i32 %1 |
23 | 48 | } |
24 | 49 |
|
25 | | -; CHECK-FTZ1-ARCH350: ret i32 351 |
26 | | -; CHECK-FTZ0-ARCH520: ret i32 520 |
| 50 | +; CHECK-CUSTOM-ABSENT: define i32 @test_custom() |
| 51 | +; CHECK-CUSTOM-ABSENT: ret i32 0 |
| 52 | +; CHECK-CUSTOM-PRESENT: define i32 @test_custom() |
| 53 | +; CHECK-CUSTOM-PRESENT: ret i32 42 |
0 commit comments