|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
1 | 2 | ; Check that various LLVM idioms get lowered to NVPTX as expected. |
2 | 3 |
|
3 | 4 | ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s |
|
8 | 9 | %struct.S16 = type { i16, i16 } |
9 | 10 | %struct.S32 = type { i32, i32 } |
10 | 11 |
|
11 | | -; CHECK-LABEL: abs_i16( |
12 | 12 | define i16 @abs_i16(i16 %a) { |
13 | | -; CHECK: abs.s16 |
| 13 | +; CHECK-LABEL: abs_i16( |
| 14 | +; CHECK: { |
| 15 | +; CHECK-NEXT: .reg .b16 %rs<3>; |
| 16 | +; CHECK-NEXT: .reg .b32 %r<2>; |
| 17 | +; CHECK-EMPTY: |
| 18 | +; CHECK-NEXT: // %bb.0: |
| 19 | +; CHECK-NEXT: ld.param.b16 %rs1, [abs_i16_param_0]; |
| 20 | +; CHECK-NEXT: abs.s16 %rs2, %rs1; |
| 21 | +; CHECK-NEXT: cvt.u32.u16 %r1, %rs2; |
| 22 | +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; |
| 23 | +; CHECK-NEXT: ret; |
14 | 24 | %neg = sub i16 0, %a |
15 | 25 | %abs.cond = icmp sge i16 %a, 0 |
16 | 26 | %abs = select i1 %abs.cond, i16 %a, i16 %neg |
17 | 27 | ret i16 %abs |
18 | 28 | } |
19 | 29 |
|
20 | | -; CHECK-LABEL: abs_i32( |
21 | 30 | define i32 @abs_i32(i32 %a) { |
22 | | -; CHECK: abs.s32 |
| 31 | +; CHECK-LABEL: abs_i32( |
| 32 | +; CHECK: { |
| 33 | +; CHECK-NEXT: .reg .b32 %r<3>; |
| 34 | +; CHECK-EMPTY: |
| 35 | +; CHECK-NEXT: // %bb.0: |
| 36 | +; CHECK-NEXT: ld.param.b32 %r1, [abs_i32_param_0]; |
| 37 | +; CHECK-NEXT: abs.s32 %r2, %r1; |
| 38 | +; CHECK-NEXT: st.param.b32 [func_retval0], %r2; |
| 39 | +; CHECK-NEXT: ret; |
23 | 40 | %neg = sub i32 0, %a |
24 | 41 | %abs.cond = icmp sge i32 %a, 0 |
25 | 42 | %abs = select i1 %abs.cond, i32 %a, i32 %neg |
26 | 43 | ret i32 %abs |
27 | 44 | } |
28 | 45 |
|
29 | | -; CHECK-LABEL: abs_i64( |
30 | 46 | define i64 @abs_i64(i64 %a) { |
31 | | -; CHECK: abs.s64 |
| 47 | +; CHECK-LABEL: abs_i64( |
| 48 | +; CHECK: { |
| 49 | +; CHECK-NEXT: .reg .b64 %rd<3>; |
| 50 | +; CHECK-EMPTY: |
| 51 | +; CHECK-NEXT: // %bb.0: |
| 52 | +; CHECK-NEXT: ld.param.b64 %rd1, [abs_i64_param_0]; |
| 53 | +; CHECK-NEXT: abs.s64 %rd2, %rd1; |
| 54 | +; CHECK-NEXT: st.param.b64 [func_retval0], %rd2; |
| 55 | +; CHECK-NEXT: ret; |
32 | 56 | %neg = sub i64 0, %a |
33 | 57 | %abs.cond = icmp sge i64 %a, 0 |
34 | 58 | %abs = select i1 %abs.cond, i64 %a, i64 %neg |
35 | 59 | ret i64 %abs |
36 | 60 | } |
37 | 61 |
|
38 | | -; CHECK-LABEL: i32_to_2xi16( |
39 | 62 | define %struct.S16 @i32_to_2xi16(i32 noundef %in) { |
| 63 | +; CHECK-LABEL: i32_to_2xi16( |
| 64 | +; CHECK: { |
| 65 | +; CHECK-NEXT: .reg .b16 %rs<3>; |
| 66 | +; CHECK-NEXT: .reg .b32 %r<2>; |
| 67 | +; CHECK-EMPTY: |
| 68 | +; CHECK-NEXT: // %bb.0: |
| 69 | +; CHECK-NEXT: ld.param.b32 %r1, [i32_to_2xi16_param_0]; |
| 70 | +; CHECK-NEXT: cvt.u16.u32 %rs1, %r1; |
| 71 | +; CHECK-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs2}, %r1; } |
| 72 | +; CHECK-NEXT: st.param.b16 [func_retval0], %rs1; |
| 73 | +; CHECK-NEXT: st.param.b16 [func_retval0+2], %rs2; |
| 74 | +; CHECK-NEXT: ret; |
40 | 75 | %low = trunc i32 %in to i16 |
41 | 76 | %high32 = lshr i32 %in, 16 |
42 | 77 | %high = trunc i32 %high32 to i16 |
43 | | -; CHECK: ld.param.b32 %[[R32:r[0-9]+]], [i32_to_2xi16_param_0]; |
44 | | -; CHECK-DAG: cvt.u16.u32 %rs{{[0-9+]}}, %[[R32]]; |
45 | | -; CHECK-DAG mov.b32 {tmp, %rs{{[0-9+]}}}, %[[R32]]; |
46 | 78 | %s1 = insertvalue %struct.S16 poison, i16 %low, 0 |
47 | 79 | %s = insertvalue %struct.S16 %s1, i16 %high, 1 |
48 | 80 | ret %struct.S16 %s |
49 | 81 | } |
50 | 82 |
|
51 | | -; CHECK-LABEL: i32_to_2xi16_lh( |
52 | 83 | ; Same as above, but with rearranged order of low/high parts. |
53 | 84 | define %struct.S16 @i32_to_2xi16_lh(i32 noundef %in) { |
| 85 | +; CHECK-LABEL: i32_to_2xi16_lh( |
| 86 | +; CHECK: { |
| 87 | +; CHECK-NEXT: .reg .b16 %rs<3>; |
| 88 | +; CHECK-NEXT: .reg .b32 %r<2>; |
| 89 | +; CHECK-EMPTY: |
| 90 | +; CHECK-NEXT: // %bb.0: |
| 91 | +; CHECK-NEXT: ld.param.b32 %r1, [i32_to_2xi16_lh_param_0]; |
| 92 | +; CHECK-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r1; } |
| 93 | +; CHECK-NEXT: cvt.u16.u32 %rs2, %r1; |
| 94 | +; CHECK-NEXT: st.param.b16 [func_retval0], %rs2; |
| 95 | +; CHECK-NEXT: st.param.b16 [func_retval0+2], %rs1; |
| 96 | +; CHECK-NEXT: ret; |
54 | 97 | %high32 = lshr i32 %in, 16 |
55 | 98 | %high = trunc i32 %high32 to i16 |
56 | 99 | %low = trunc i32 %in to i16 |
57 | | -; CHECK: ld.param.b32 %[[R32:r[0-9]+]], [i32_to_2xi16_lh_param_0]; |
58 | | -; CHECK-DAG: cvt.u16.u32 %rs{{[0-9+]}}, %[[R32]]; |
59 | | -; CHECK-DAG mov.b32 {tmp, %rs{{[0-9+]}}}, %[[R32]]; |
60 | 100 | %s1 = insertvalue %struct.S16 poison, i16 %low, 0 |
61 | 101 | %s = insertvalue %struct.S16 %s1, i16 %high, 1 |
62 | 102 | ret %struct.S16 %s |
63 | 103 | } |
64 | 104 |
|
65 | 105 |
|
66 | | -; CHECK-LABEL: i32_to_2xi16_not( |
67 | 106 | define %struct.S16 @i32_to_2xi16_not(i32 noundef %in) { |
| 107 | +; CHECK-LABEL: i32_to_2xi16_not( |
| 108 | +; CHECK: { |
| 109 | +; CHECK-NEXT: .reg .b16 %rs<3>; |
| 110 | +; CHECK-NEXT: .reg .b32 %r<3>; |
| 111 | +; CHECK-EMPTY: |
| 112 | +; CHECK-NEXT: // %bb.0: |
| 113 | +; CHECK-NEXT: ld.param.b32 %r1, [i32_to_2xi16_not_param_0]; |
| 114 | +; CHECK-NEXT: cvt.u16.u32 %rs1, %r1; |
| 115 | +; CHECK-NEXT: shr.u32 %r2, %r1, 15; |
| 116 | +; CHECK-NEXT: cvt.u16.u32 %rs2, %r2; |
| 117 | +; CHECK-NEXT: st.param.b16 [func_retval0], %rs1; |
| 118 | +; CHECK-NEXT: st.param.b16 [func_retval0+2], %rs2; |
| 119 | +; CHECK-NEXT: ret; |
68 | 120 | %low = trunc i32 %in to i16 |
69 | 121 | ; Shift by any value other than 16 blocks the conversiopn to mov. |
70 | 122 | %high32 = lshr i32 %in, 15 |
71 | 123 | %high = trunc i32 %high32 to i16 |
72 | | -; CHECK: cvt.u16.u32 |
73 | | -; CHECK: shr.u32 |
74 | | -; CHECK: cvt.u16.u32 |
75 | 124 | %s1 = insertvalue %struct.S16 poison, i16 %low, 0 |
76 | 125 | %s = insertvalue %struct.S16 %s1, i16 %high, 1 |
77 | 126 | ret %struct.S16 %s |
78 | 127 | } |
79 | 128 |
|
80 | | -; CHECK-LABEL: i64_to_2xi32( |
81 | 129 | define %struct.S32 @i64_to_2xi32(i64 noundef %in) { |
| 130 | +; CHECK-LABEL: i64_to_2xi32( |
| 131 | +; CHECK: { |
| 132 | +; CHECK-NEXT: .reg .b32 %r<3>; |
| 133 | +; CHECK-NEXT: .reg .b64 %rd<2>; |
| 134 | +; CHECK-EMPTY: |
| 135 | +; CHECK-NEXT: // %bb.0: |
| 136 | +; CHECK-NEXT: ld.param.b64 %rd1, [i64_to_2xi32_param_0]; |
| 137 | +; CHECK-NEXT: cvt.u32.u64 %r1, %rd1; |
| 138 | +; CHECK-NEXT: { .reg .b32 tmp; mov.b64 {tmp, %r2}, %rd1; } |
| 139 | +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; |
| 140 | +; CHECK-NEXT: st.param.b32 [func_retval0+4], %r2; |
| 141 | +; CHECK-NEXT: ret; |
82 | 142 | %low = trunc i64 %in to i32 |
83 | 143 | %high64 = lshr i64 %in, 32 |
84 | 144 | %high = trunc i64 %high64 to i32 |
85 | | -; CHECK: ld.param.b64 %[[R64:rd[0-9]+]], [i64_to_2xi32_param_0]; |
86 | | -; CHECK-DAG: cvt.u32.u64 %r{{[0-9+]}}, %[[R64]]; |
87 | | -; CHECK-DAG mov.b64 {tmp, %r{{[0-9+]}}}, %[[R64]]; |
88 | 145 | %s1 = insertvalue %struct.S32 poison, i32 %low, 0 |
89 | 146 | %s = insertvalue %struct.S32 %s1, i32 %high, 1 |
90 | 147 | ret %struct.S32 %s |
91 | 148 | } |
92 | 149 |
|
93 | | -; CHECK-LABEL: i64_to_2xi32_not( |
94 | 150 | define %struct.S32 @i64_to_2xi32_not(i64 noundef %in) { |
| 151 | +; CHECK-LABEL: i64_to_2xi32_not( |
| 152 | +; CHECK: { |
| 153 | +; CHECK-NEXT: .reg .b32 %r<3>; |
| 154 | +; CHECK-NEXT: .reg .b64 %rd<3>; |
| 155 | +; CHECK-EMPTY: |
| 156 | +; CHECK-NEXT: // %bb.0: |
| 157 | +; CHECK-NEXT: ld.param.b64 %rd1, [i64_to_2xi32_not_param_0]; |
| 158 | +; CHECK-NEXT: cvt.u32.u64 %r1, %rd1; |
| 159 | +; CHECK-NEXT: shr.u64 %rd2, %rd1, 31; |
| 160 | +; CHECK-NEXT: cvt.u32.u64 %r2, %rd2; |
| 161 | +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; |
| 162 | +; CHECK-NEXT: st.param.b32 [func_retval0+4], %r2; |
| 163 | +; CHECK-NEXT: ret; |
95 | 164 | %low = trunc i64 %in to i32 |
96 | 165 | ; Shift by any value other than 32 blocks the conversiopn to mov. |
97 | 166 | %high64 = lshr i64 %in, 31 |
98 | 167 | %high = trunc i64 %high64 to i32 |
99 | | -; CHECK: cvt.u32.u64 |
100 | | -; CHECK: shr.u64 |
101 | | -; CHECK: cvt.u32.u64 |
102 | 168 | %s1 = insertvalue %struct.S32 poison, i32 %low, 0 |
103 | 169 | %s = insertvalue %struct.S32 %s1, i32 %high, 1 |
104 | 170 | ret %struct.S32 %s |
105 | 171 | } |
106 | 172 |
|
107 | | -; CHECK-LABEL: i32_to_2xi16_shr( |
108 | 173 | ; Make sure we do not get confused when our input itself is [al]shr. |
109 | 174 | define %struct.S16 @i32_to_2xi16_shr(i32 noundef %i){ |
| 175 | +; CHECK-LABEL: i32_to_2xi16_shr( |
| 176 | +; CHECK: { |
| 177 | +; CHECK-NEXT: .reg .b16 %rs<3>; |
| 178 | +; CHECK-NEXT: .reg .b32 %r<3>; |
| 179 | +; CHECK-EMPTY: |
| 180 | +; CHECK-NEXT: // %bb.0: |
| 181 | +; CHECK-NEXT: ld.param.b32 %r1, [i32_to_2xi16_shr_param_0]; |
| 182 | +; CHECK-NEXT: { // callseq 0, 0 |
| 183 | +; CHECK-NEXT: .param .b32 param0; |
| 184 | +; CHECK-NEXT: st.param.b32 [param0], %r1; |
| 185 | +; CHECK-NEXT: call.uni |
| 186 | +; CHECK-NEXT: escape_int, |
| 187 | +; CHECK-NEXT: ( |
| 188 | +; CHECK-NEXT: param0 |
| 189 | +; CHECK-NEXT: ); |
| 190 | +; CHECK-NEXT: } // callseq 0 |
| 191 | +; CHECK-NEXT: shr.s32 %r2, %r1, 16; |
| 192 | +; CHECK-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r1; } |
| 193 | +; CHECK-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs2}, %r2; } |
| 194 | +; CHECK-NEXT: st.param.b16 [func_retval0], %rs1; |
| 195 | +; CHECK-NEXT: st.param.b16 [func_retval0+2], %rs2; |
| 196 | +; CHECK-NEXT: ret; |
110 | 197 | call void @escape_int(i32 %i); // Force %i to be loaded completely. |
111 | 198 | %i1 = ashr i32 %i, 16 |
112 | 199 | %l = trunc i32 %i1 to i16 |
113 | 200 | %h32 = ashr i32 %i1, 16 |
114 | 201 | %h = trunc i32 %h32 to i16 |
115 | | -; CHECK: ld.param.b32 %[[R32:r[0-9]+]], [i32_to_2xi16_shr_param_0]; |
116 | | -; CHECK: shr.s32 %[[R32H:r[0-9]+]], %[[R32]], 16; |
117 | | -; CHECK-DAG mov.b32 {tmp, %rs{{[0-9+]}}}, %[[R32]]; |
118 | | -; CHECK-DAG mov.b32 {tmp, %rs{{[0-9+]}}}, %[[R32H]]; |
119 | 202 | %s0 = insertvalue %struct.S16 poison, i16 %l, 0 |
120 | 203 | %s1 = insertvalue %struct.S16 %s0, i16 %h, 1 |
121 | 204 | ret %struct.S16 %s1 |
|
0 commit comments