|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
1 | 2 | ; RUN: llc < %s -march=nvptx64 --debug-counter=dagcombine=0 | FileCheck %s |
2 | 3 | ; RUN: %if ptxas %{ llc < %s -march=nvptx64 | %ptxas-verify %} |
3 | 4 |
|
| 5 | +; REQUIRES: asserts |
| 6 | +; asserts are required for --debug-counter=dagcombine=0 to have the intended |
| 7 | +; effect of disabling DAG combines, which exposes the bug. When combines are |
| 8 | +; enabled the bug does not occur. |
| 9 | + |
4 | 10 | %struct.8float = type <{ [8 x float] }> |
5 | 11 |
|
6 | 12 | declare i32 @callee(%struct.8float %a) |
7 | 13 |
|
8 | 14 | define i32 @test(%struct.8float alignstack(32) %data) { |
9 | | - ;CHECK-NOT: add. |
10 | | - ;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0]; |
11 | | - ;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+1]; |
12 | | - ;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+2]; |
13 | | - ;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+3]; |
14 | | - ;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+4]; |
15 | | - ;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+5]; |
16 | | - ;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+6]; |
17 | | - ;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+7]; |
18 | | - ;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+8]; |
19 | | - ;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+9]; |
20 | | - ;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+10]; |
21 | | - ;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+11]; |
22 | | - ;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+12]; |
23 | | - ;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+13]; |
24 | | - ;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+14]; |
25 | | - ;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+15]; |
26 | | - ;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+16]; |
27 | | - ;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+17]; |
28 | | - ;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+18]; |
29 | | - ;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+19]; |
30 | | - ;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+20]; |
31 | | - ;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+21]; |
32 | | - ;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+22]; |
33 | | - ;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+23]; |
34 | | - ;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+24]; |
35 | | - ;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+26]; |
36 | | - ;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+27]; |
37 | | - ;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+28]; |
38 | | - ;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+29]; |
39 | | - ;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+30]; |
40 | | - ;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+31]; |
| 15 | +; CHECK-LABEL: test( |
| 16 | +; CHECK: { |
| 17 | +; CHECK-NEXT: .reg .b32 %r<123>; |
| 18 | +; CHECK-NEXT: .reg .f32 %f<9>; |
| 19 | +; CHECK-EMPTY: |
| 20 | +; CHECK-NEXT: // %bb.0: |
| 21 | +; CHECK-NEXT: ld.param.u8 %r1, [test_param_0+29]; |
| 22 | +; CHECK-NEXT: shl.b32 %r2, %r1, 8; |
| 23 | +; CHECK-NEXT: ld.param.u8 %r3, [test_param_0+28]; |
| 24 | +; CHECK-NEXT: or.b32 %r4, %r2, %r3; |
| 25 | +; CHECK-NEXT: ld.param.u8 %r5, [test_param_0+31]; |
| 26 | +; CHECK-NEXT: shl.b32 %r6, %r5, 8; |
| 27 | +; CHECK-NEXT: ld.param.u8 %r7, [test_param_0+30]; |
| 28 | +; CHECK-NEXT: or.b32 %r8, %r6, %r7; |
| 29 | +; CHECK-NEXT: shl.b32 %r9, %r8, 16; |
| 30 | +; CHECK-NEXT: or.b32 %r122, %r9, %r4; |
| 31 | +; CHECK-NEXT: mov.b32 %f1, %r122; |
| 32 | +; CHECK-NEXT: ld.param.u8 %r11, [test_param_0+25]; |
| 33 | +; CHECK-NEXT: shl.b32 %r12, %r11, 8; |
| 34 | +; CHECK-NEXT: ld.param.u8 %r13, [test_param_0+24]; |
| 35 | +; CHECK-NEXT: or.b32 %r14, %r12, %r13; |
| 36 | +; CHECK-NEXT: ld.param.u8 %r15, [test_param_0+27]; |
| 37 | +; CHECK-NEXT: shl.b32 %r16, %r15, 8; |
| 38 | +; CHECK-NEXT: ld.param.u8 %r17, [test_param_0+26]; |
| 39 | +; CHECK-NEXT: or.b32 %r18, %r16, %r17; |
| 40 | +; CHECK-NEXT: shl.b32 %r19, %r18, 16; |
| 41 | +; CHECK-NEXT: or.b32 %r121, %r19, %r14; |
| 42 | +; CHECK-NEXT: mov.b32 %f2, %r121; |
| 43 | +; CHECK-NEXT: ld.param.u8 %r21, [test_param_0+21]; |
| 44 | +; CHECK-NEXT: shl.b32 %r22, %r21, 8; |
| 45 | +; CHECK-NEXT: ld.param.u8 %r23, [test_param_0+20]; |
| 46 | +; CHECK-NEXT: or.b32 %r24, %r22, %r23; |
| 47 | +; CHECK-NEXT: ld.param.u8 %r25, [test_param_0+23]; |
| 48 | +; CHECK-NEXT: shl.b32 %r26, %r25, 8; |
| 49 | +; CHECK-NEXT: ld.param.u8 %r27, [test_param_0+22]; |
| 50 | +; CHECK-NEXT: or.b32 %r28, %r26, %r27; |
| 51 | +; CHECK-NEXT: shl.b32 %r29, %r28, 16; |
| 52 | +; CHECK-NEXT: or.b32 %r120, %r29, %r24; |
| 53 | +; CHECK-NEXT: mov.b32 %f3, %r120; |
| 54 | +; CHECK-NEXT: ld.param.u8 %r31, [test_param_0+17]; |
| 55 | +; CHECK-NEXT: shl.b32 %r32, %r31, 8; |
| 56 | +; CHECK-NEXT: ld.param.u8 %r33, [test_param_0+16]; |
| 57 | +; CHECK-NEXT: or.b32 %r34, %r32, %r33; |
| 58 | +; CHECK-NEXT: ld.param.u8 %r35, [test_param_0+19]; |
| 59 | +; CHECK-NEXT: shl.b32 %r36, %r35, 8; |
| 60 | +; CHECK-NEXT: ld.param.u8 %r37, [test_param_0+18]; |
| 61 | +; CHECK-NEXT: or.b32 %r38, %r36, %r37; |
| 62 | +; CHECK-NEXT: shl.b32 %r39, %r38, 16; |
| 63 | +; CHECK-NEXT: or.b32 %r119, %r39, %r34; |
| 64 | +; CHECK-NEXT: mov.b32 %f4, %r119; |
| 65 | +; CHECK-NEXT: ld.param.u8 %r41, [test_param_0+13]; |
| 66 | +; CHECK-NEXT: shl.b32 %r42, %r41, 8; |
| 67 | +; CHECK-NEXT: ld.param.u8 %r43, [test_param_0+12]; |
| 68 | +; CHECK-NEXT: or.b32 %r44, %r42, %r43; |
| 69 | +; CHECK-NEXT: ld.param.u8 %r45, [test_param_0+15]; |
| 70 | +; CHECK-NEXT: shl.b32 %r46, %r45, 8; |
| 71 | +; CHECK-NEXT: ld.param.u8 %r47, [test_param_0+14]; |
| 72 | +; CHECK-NEXT: or.b32 %r48, %r46, %r47; |
| 73 | +; CHECK-NEXT: shl.b32 %r49, %r48, 16; |
| 74 | +; CHECK-NEXT: or.b32 %r118, %r49, %r44; |
| 75 | +; CHECK-NEXT: mov.b32 %f5, %r118; |
| 76 | +; CHECK-NEXT: ld.param.u8 %r51, [test_param_0+9]; |
| 77 | +; CHECK-NEXT: shl.b32 %r52, %r51, 8; |
| 78 | +; CHECK-NEXT: ld.param.u8 %r53, [test_param_0+8]; |
| 79 | +; CHECK-NEXT: or.b32 %r54, %r52, %r53; |
| 80 | +; CHECK-NEXT: ld.param.u8 %r55, [test_param_0+11]; |
| 81 | +; CHECK-NEXT: shl.b32 %r56, %r55, 8; |
| 82 | +; CHECK-NEXT: ld.param.u8 %r57, [test_param_0+10]; |
| 83 | +; CHECK-NEXT: or.b32 %r58, %r56, %r57; |
| 84 | +; CHECK-NEXT: shl.b32 %r59, %r58, 16; |
| 85 | +; CHECK-NEXT: or.b32 %r117, %r59, %r54; |
| 86 | +; CHECK-NEXT: mov.b32 %f6, %r117; |
| 87 | +; CHECK-NEXT: ld.param.u8 %r61, [test_param_0+5]; |
| 88 | +; CHECK-NEXT: shl.b32 %r62, %r61, 8; |
| 89 | +; CHECK-NEXT: ld.param.u8 %r63, [test_param_0+4]; |
| 90 | +; CHECK-NEXT: or.b32 %r64, %r62, %r63; |
| 91 | +; CHECK-NEXT: ld.param.u8 %r65, [test_param_0+7]; |
| 92 | +; CHECK-NEXT: shl.b32 %r66, %r65, 8; |
| 93 | +; CHECK-NEXT: ld.param.u8 %r67, [test_param_0+6]; |
| 94 | +; CHECK-NEXT: or.b32 %r68, %r66, %r67; |
| 95 | +; CHECK-NEXT: shl.b32 %r69, %r68, 16; |
| 96 | +; CHECK-NEXT: or.b32 %r116, %r69, %r64; |
| 97 | +; CHECK-NEXT: mov.b32 %f7, %r116; |
| 98 | +; CHECK-NEXT: ld.param.u8 %r71, [test_param_0+1]; |
| 99 | +; CHECK-NEXT: shl.b32 %r72, %r71, 8; |
| 100 | +; CHECK-NEXT: ld.param.u8 %r73, [test_param_0]; |
| 101 | +; CHECK-NEXT: or.b32 %r74, %r72, %r73; |
| 102 | +; CHECK-NEXT: ld.param.u8 %r75, [test_param_0+3]; |
| 103 | +; CHECK-NEXT: shl.b32 %r76, %r75, 8; |
| 104 | +; CHECK-NEXT: ld.param.u8 %r77, [test_param_0+2]; |
| 105 | +; CHECK-NEXT: or.b32 %r78, %r76, %r77; |
| 106 | +; CHECK-NEXT: shl.b32 %r79, %r78, 16; |
| 107 | +; CHECK-NEXT: or.b32 %r115, %r79, %r74; |
| 108 | +; CHECK-NEXT: mov.b32 %f8, %r115; |
| 109 | +; CHECK-NEXT: shr.u32 %r82, %r115, 8; |
| 110 | +; CHECK-NEXT: shr.u32 %r83, %r115, 16; |
| 111 | +; CHECK-NEXT: shr.u32 %r84, %r115, 24; |
| 112 | +; CHECK-NEXT: shr.u32 %r86, %r116, 8; |
| 113 | +; CHECK-NEXT: shr.u32 %r87, %r116, 16; |
| 114 | +; CHECK-NEXT: shr.u32 %r88, %r116, 24; |
| 115 | +; CHECK-NEXT: shr.u32 %r90, %r117, 8; |
| 116 | +; CHECK-NEXT: shr.u32 %r91, %r117, 16; |
| 117 | +; CHECK-NEXT: shr.u32 %r92, %r117, 24; |
| 118 | +; CHECK-NEXT: shr.u32 %r94, %r118, 8; |
| 119 | +; CHECK-NEXT: shr.u32 %r95, %r118, 16; |
| 120 | +; CHECK-NEXT: shr.u32 %r96, %r118, 24; |
| 121 | +; CHECK-NEXT: shr.u32 %r98, %r119, 8; |
| 122 | +; CHECK-NEXT: shr.u32 %r99, %r119, 16; |
| 123 | +; CHECK-NEXT: shr.u32 %r100, %r119, 24; |
| 124 | +; CHECK-NEXT: shr.u32 %r102, %r120, 8; |
| 125 | +; CHECK-NEXT: shr.u32 %r103, %r120, 16; |
| 126 | +; CHECK-NEXT: shr.u32 %r104, %r120, 24; |
| 127 | +; CHECK-NEXT: shr.u32 %r106, %r121, 8; |
| 128 | +; CHECK-NEXT: shr.u32 %r107, %r121, 16; |
| 129 | +; CHECK-NEXT: shr.u32 %r108, %r121, 24; |
| 130 | +; CHECK-NEXT: shr.u32 %r110, %r122, 8; |
| 131 | +; CHECK-NEXT: shr.u32 %r111, %r122, 16; |
| 132 | +; CHECK-NEXT: shr.u32 %r112, %r122, 24; |
| 133 | +; CHECK-NEXT: { // callseq 0, 0 |
| 134 | +; CHECK-NEXT: .param .align 1 .b8 param0[32]; |
| 135 | +; CHECK-NEXT: st.param.b8 [param0], %r115; |
| 136 | +; CHECK-NEXT: st.param.b8 [param0+1], %r82; |
| 137 | +; CHECK-NEXT: st.param.b8 [param0+2], %r83; |
| 138 | +; CHECK-NEXT: st.param.b8 [param0+3], %r84; |
| 139 | +; CHECK-NEXT: st.param.b8 [param0+4], %r116; |
| 140 | +; CHECK-NEXT: st.param.b8 [param0+5], %r86; |
| 141 | +; CHECK-NEXT: st.param.b8 [param0+6], %r87; |
| 142 | +; CHECK-NEXT: st.param.b8 [param0+7], %r88; |
| 143 | +; CHECK-NEXT: st.param.b8 [param0+8], %r117; |
| 144 | +; CHECK-NEXT: st.param.b8 [param0+9], %r90; |
| 145 | +; CHECK-NEXT: st.param.b8 [param0+10], %r91; |
| 146 | +; CHECK-NEXT: st.param.b8 [param0+11], %r92; |
| 147 | +; CHECK-NEXT: st.param.b8 [param0+12], %r118; |
| 148 | +; CHECK-NEXT: st.param.b8 [param0+13], %r94; |
| 149 | +; CHECK-NEXT: st.param.b8 [param0+14], %r95; |
| 150 | +; CHECK-NEXT: st.param.b8 [param0+15], %r96; |
| 151 | +; CHECK-NEXT: st.param.b8 [param0+16], %r119; |
| 152 | +; CHECK-NEXT: st.param.b8 [param0+17], %r98; |
| 153 | +; CHECK-NEXT: st.param.b8 [param0+18], %r99; |
| 154 | +; CHECK-NEXT: st.param.b8 [param0+19], %r100; |
| 155 | +; CHECK-NEXT: st.param.b8 [param0+20], %r120; |
| 156 | +; CHECK-NEXT: st.param.b8 [param0+21], %r102; |
| 157 | +; CHECK-NEXT: st.param.b8 [param0+22], %r103; |
| 158 | +; CHECK-NEXT: st.param.b8 [param0+23], %r104; |
| 159 | +; CHECK-NEXT: st.param.b8 [param0+24], %r121; |
| 160 | +; CHECK-NEXT: st.param.b8 [param0+25], %r106; |
| 161 | +; CHECK-NEXT: st.param.b8 [param0+26], %r107; |
| 162 | +; CHECK-NEXT: st.param.b8 [param0+27], %r108; |
| 163 | +; CHECK-NEXT: st.param.b8 [param0+28], %r122; |
| 164 | +; CHECK-NEXT: st.param.b8 [param0+29], %r110; |
| 165 | +; CHECK-NEXT: st.param.b8 [param0+30], %r111; |
| 166 | +; CHECK-NEXT: st.param.b8 [param0+31], %r112; |
| 167 | +; CHECK-NEXT: .param .b32 retval0; |
| 168 | +; CHECK-NEXT: call.uni (retval0), |
| 169 | +; CHECK-NEXT: callee, |
| 170 | +; CHECK-NEXT: ( |
| 171 | +; CHECK-NEXT: param0 |
| 172 | +; CHECK-NEXT: ); |
| 173 | +; CHECK-NEXT: ld.param.b32 %r113, [retval0]; |
| 174 | +; CHECK-NEXT: } // callseq 0 |
| 175 | +; CHECK-NEXT: st.param.b32 [func_retval0], %r113; |
| 176 | +; CHECK-NEXT: ret; |
41 | 177 |
|
42 | 178 | %1 = call i32 @callee(%struct.8float %data) |
43 | 179 | ret i32 %1 |
|
0 commit comments