|
1 | | -; RUN: llc < %s | FileCheck %s |
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| 2 | +; RUN: llc < %s -march=nvptx64 | FileCheck %s |
| 3 | +; RUN: %if ptxas %{ llc < %s -march=nvptx64 | %ptxas-verify %} |
| 4 | + |
| 5 | +target triple = "nvptx64-nvidia-cuda" |
2 | 6 |
|
3 | 7 | define i32 @trunc(i64 %a, i64 %b) { |
4 | | -; CHECK-LABEL: trunc |
5 | | -; CHECK: or.b32 |
6 | | -; CHECK-NOT: or.b64 |
7 | | -entry: |
| 8 | +; CHECK-LABEL: trunc( |
| 9 | +; CHECK: { |
| 10 | +; CHECK-NEXT: .reg .b32 %r<4>; |
| 11 | +; CHECK-NEXT: .reg .b64 %rd<3>; |
| 12 | +; CHECK-EMPTY: |
| 13 | +; CHECK-NEXT: // %bb.0: |
| 14 | +; CHECK-NEXT: ld.param.u64 %rd1, [trunc_param_0]; |
| 15 | +; CHECK-NEXT: ld.param.u64 %rd2, [trunc_param_1]; |
| 16 | +; CHECK-NEXT: cvt.u32.u64 %r1, %rd2; |
| 17 | +; CHECK-NEXT: cvt.u32.u64 %r2, %rd1; |
| 18 | +; CHECK-NEXT: or.b32 %r3, %r2, %r1; |
| 19 | +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r3; |
| 20 | +; CHECK-NEXT: ret; |
8 | 21 | %or = or i64 %a, %b |
9 | 22 | %trunc = trunc i64 %or to i32 |
10 | 23 | ret i32 %trunc |
11 | 24 | } |
12 | 25 |
|
13 | | -define i32 @trunc_not(i64 %a, i64 %b, ptr %p) { |
14 | | -; CHECK-LABEL: trunc_not |
15 | | -; CHECK: or.b64 |
16 | | -; CHECK-NOT: or.b32 |
17 | | -entry: |
| 26 | +define i32 @trunc_not(i64 %a, i64 %b) { |
| 27 | +; CHECK-LABEL: trunc_not( |
| 28 | +; CHECK: { |
| 29 | +; CHECK-NEXT: .reg .b32 %r<2>; |
| 30 | +; CHECK-NEXT: .reg .b64 %rd<5>; |
| 31 | +; CHECK-EMPTY: |
| 32 | +; CHECK-NEXT: // %bb.0: |
| 33 | +; CHECK-NEXT: ld.param.u64 %rd1, [trunc_not_param_0]; |
| 34 | +; CHECK-NEXT: ld.param.u64 %rd2, [trunc_not_param_1]; |
| 35 | +; CHECK-NEXT: or.b64 %rd3, %rd1, %rd2; |
| 36 | +; CHECK-NEXT: cvt.u32.u64 %r1, %rd3; |
| 37 | +; CHECK-NEXT: mov.u64 %rd4, 0; |
| 38 | +; CHECK-NEXT: st.u64 [%rd4], %rd3; |
| 39 | +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1; |
| 40 | +; CHECK-NEXT: ret; |
18 | 41 | %or = or i64 %a, %b |
19 | 42 | %trunc = trunc i64 %or to i32 |
20 | | - store i64 %or, ptr %p |
| 43 | + store i64 %or, ptr null |
| 44 | + ret i32 %trunc |
| 45 | +} |
| 46 | + |
| 47 | +define i32 @trunc_cvt(i64 %a, i64 %b) { |
| 48 | +; CHECK-LABEL: trunc_cvt( |
| 49 | +; CHECK: { |
| 50 | +; CHECK-NEXT: .reg .b32 %r<5>; |
| 51 | +; CHECK-NEXT: .reg .b64 %rd<3>; |
| 52 | +; CHECK-EMPTY: |
| 53 | +; CHECK-NEXT: // %bb.0: |
| 54 | +; CHECK-NEXT: ld.param.u64 %rd1, [trunc_cvt_param_0]; |
| 55 | +; CHECK-NEXT: ld.param.u64 %rd2, [trunc_cvt_param_1]; |
| 56 | +; CHECK-NEXT: cvt.u32.u64 %r1, %rd2; |
| 57 | +; CHECK-NEXT: cvt.u32.u64 %r2, %rd1; |
| 58 | +; CHECK-NEXT: add.s32 %r3, %r2, %r1; |
| 59 | +; CHECK-NEXT: or.b32 %r4, %r3, %r2; |
| 60 | +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r4; |
| 61 | +; CHECK-NEXT: ret; |
| 62 | + %add = add i64 %a, %b |
| 63 | + %or = or i64 %add, %a |
| 64 | + %trunc = trunc i64 %or to i32 |
| 65 | + ret i32 %trunc |
| 66 | +} |
| 67 | + |
| 68 | +define i32 @trunc_cvt_not(i64 %a, i64 %b) { |
| 69 | +; CHECK-LABEL: trunc_cvt_not( |
| 70 | +; CHECK: { |
| 71 | +; CHECK-NEXT: .reg .b32 %r<5>; |
| 72 | +; CHECK-NEXT: .reg .b64 %rd<4>; |
| 73 | +; CHECK-EMPTY: |
| 74 | +; CHECK-NEXT: // %bb.0: |
| 75 | +; CHECK-NEXT: ld.param.u64 %rd1, [trunc_cvt_not_param_0]; |
| 76 | +; CHECK-NEXT: ld.param.u64 %rd2, [trunc_cvt_not_param_1]; |
| 77 | +; CHECK-NEXT: mov.u64 %rd3, 0; |
| 78 | +; CHECK-NEXT: st.u64 [%rd3], %rd2; |
| 79 | +; CHECK-NEXT: cvt.u32.u64 %r1, %rd2; |
| 80 | +; CHECK-NEXT: cvt.u32.u64 %r2, %rd1; |
| 81 | +; CHECK-NEXT: add.s32 %r3, %r2, %r1; |
| 82 | +; CHECK-NEXT: or.b32 %r4, %r3, %r2; |
| 83 | +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r4; |
| 84 | +; CHECK-NEXT: ret; |
| 85 | + %add = add i64 %a, %b |
| 86 | + store i64 %b, ptr null |
| 87 | + %or = or i64 %add, %a |
| 88 | + %trunc = trunc i64 %or to i32 |
21 | 89 | ret i32 %trunc |
22 | 90 | } |
0 commit comments