@@ -7,141 +7,138 @@ define void @issue63986(i64 %0, i64 %idxprom, ptr inreg %ptr) {
77; CHECK-LABEL: issue63986:
88; CHECK: ; %bb.0: ; %entry
99; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10- ; CHECK-NEXT: v_lshlrev_b64 v[8:9], 6, v[2:3]
11- ; CHECK-NEXT: v_mov_b32_e32 v4, s17
12- ; CHECK-NEXT: v_add_co_u32_e32 v10, vcc, s16, v8
13- ; CHECK-NEXT: v_addc_co_u32_e32 v11, vcc, v4, v9, vcc
14- ; CHECK-NEXT: ; %bb.1: ; %entry.loop-memcpy-expansion_crit_edge
15- ; CHECK-NEXT: v_mov_b32_e32 v4, 0
16- ; CHECK-NEXT: v_mov_b32_e32 v5, 0
17- ; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[4:5]
10+ ; CHECK-NEXT: v_lshlrev_b64 v[4:5], 6, v[2:3]
11+ ; CHECK-NEXT: v_mov_b32_e32 v6, s17
12+ ; CHECK-NEXT: v_add_co_u32_e32 v8, vcc, s16, v4
13+ ; CHECK-NEXT: v_addc_co_u32_e32 v9, vcc, v6, v5, vcc
1814; CHECK-NEXT: s_mov_b64 s[4:5], 0
19- ; CHECK-NEXT: s_waitcnt vmcnt(0)
20- ; CHECK-NEXT: .LBB0_2: ; %loop-memcpy-expansion
15+ ; CHECK-NEXT: .LBB0_1: ; %loop-memcpy-expansion
2116; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
22- ; CHECK-NEXT: v_add_co_u32_e32 v12, vcc, s4, v10
17+ ; CHECK-NEXT: v_mov_b32_e32 v7, s5
18+ ; CHECK-NEXT: v_mov_b32_e32 v6, s4
19+ ; CHECK-NEXT: flat_load_dwordx4 v[10:13], v[6:7]
20+ ; CHECK-NEXT: v_add_co_u32_e32 v6, vcc, s4, v8
2321; CHECK-NEXT: s_add_u32 s4, s4, 16
24- ; CHECK-NEXT: v_mov_b32_e32 v13, s5
2522; CHECK-NEXT: s_addc_u32 s5, s5, 0
2623; CHECK-NEXT: v_cmp_ge_u64_e64 s[6:7], s[4:5], 32
27- ; CHECK-NEXT: v_addc_co_u32_e32 v13 , vcc, v11, v13 , vcc
24+ ; CHECK-NEXT: v_addc_co_u32_e32 v7 , vcc, v9, v7 , vcc
2825; CHECK-NEXT: s_and_b64 vcc, exec, s[6:7]
29- ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
30- ; CHECK-NEXT: flat_store_dwordx4 v[12:13], v[4:7]
31- ; CHECK-NEXT: s_cbranch_vccz .LBB0_2
32- ; CHECK-NEXT: ; %bb.3: ; %loop-memcpy-residual-header
26+ ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
27+ ; CHECK-NEXT: flat_store_dwordx4 v[6:7], v[10:13]
28+ ; CHECK-NEXT: s_cbranch_vccz .LBB0_1
29+ ; CHECK-NEXT: ; %bb.2: ; %loop-memcpy-residual-header
30+ ; CHECK-NEXT: s_branch .LBB0_4
31+ ; CHECK-NEXT: ; %bb.3:
32+ ; CHECK-NEXT: ; implicit-def: $vgpr6_vgpr7
3333; CHECK-NEXT: s_branch .LBB0_5
34- ; CHECK-NEXT: ; %bb.4:
35- ; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3
36- ; CHECK-NEXT: s_branch .LBB0_6
37- ; CHECK-NEXT: .LBB0_5: ; %loop-memcpy-residual-header.post-loop-memcpy-expansion_crit_edge
38- ; CHECK-NEXT: v_lshlrev_b64 v[2:3], 6, v[2:3]
39- ; CHECK-NEXT: s_cbranch_execnz .LBB0_9
40- ; CHECK-NEXT: .LBB0_6: ; %loop-memcpy-residual-header.loop-memcpy-residual_crit_edge
41- ; CHECK-NEXT: v_mov_b32_e32 v2, 0
42- ; CHECK-NEXT: v_mov_b32_e32 v3, 0
43- ; CHECK-NEXT: flat_load_ubyte v2, v[2:3]
44- ; CHECK-NEXT: s_add_u32 s6, s16, 32
45- ; CHECK-NEXT: s_addc_u32 s4, s17, 0
46- ; CHECK-NEXT: v_mov_b32_e32 v4, s4
47- ; CHECK-NEXT: v_add_co_u32_e32 v3, vcc, s6, v8
34+ ; CHECK-NEXT: .LBB0_4: ; %loop-memcpy-residual-header.post-loop-memcpy-expansion_crit_edge
35+ ; CHECK-NEXT: v_lshlrev_b64 v[6:7], 6, v[2:3]
36+ ; CHECK-NEXT: s_cbranch_execnz .LBB0_8
37+ ; CHECK-NEXT: .LBB0_5: ; %loop-memcpy-residual.preheader
38+ ; CHECK-NEXT: s_add_u32 s4, s16, 32
39+ ; CHECK-NEXT: s_addc_u32 s5, s17, 0
40+ ; CHECK-NEXT: v_mov_b32_e32 v3, s5
41+ ; CHECK-NEXT: v_add_co_u32_e32 v2, vcc, s4, v4
42+ ; CHECK-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v5, vcc
4843; CHECK-NEXT: s_mov_b64 s[4:5], 0
49- ; CHECK-NEXT: v_addc_co_u32_e32 v4, vcc, v4, v9, vcc
50- ; CHECK-NEXT: s_waitcnt vmcnt(0)
51- ; CHECK-NEXT: ; %bb.7: ; %loop-memcpy-residual
52- ; CHECK-NEXT: v_mov_b32_e32 v6, s5
53- ; CHECK-NEXT: v_add_co_u32_e32 v5, vcc, s4, v3
44+ ; CHECK-NEXT: ; %bb.6: ; %loop-memcpy-residual
45+ ; CHECK-NEXT: s_add_u32 s6, 32, s4
46+ ; CHECK-NEXT: s_addc_u32 s7, 0, s5
47+ ; CHECK-NEXT: v_mov_b32_e32 v6, s6
48+ ; CHECK-NEXT: v_mov_b32_e32 v7, s7
49+ ; CHECK-NEXT: flat_load_ubyte v10, v[6:7]
50+ ; CHECK-NEXT: v_mov_b32_e32 v7, s5
51+ ; CHECK-NEXT: v_add_co_u32_e32 v6, vcc, s4, v2
52+ ; CHECK-NEXT: v_addc_co_u32_e32 v7, vcc, v3, v7, vcc
5453; CHECK-NEXT: s_add_u32 s4, s4, 1
55- ; CHECK-NEXT: v_addc_co_u32_e32 v6, vcc, v4, v6, vcc
5654; CHECK-NEXT: s_addc_u32 s5, s5, 0
57- ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
58- ; CHECK-NEXT: flat_store_byte v[5:6 ], v2
59- ; CHECK-NEXT: ; %bb.8 :
60- ; CHECK-NEXT: v_mov_b32_e32 v2, v8
61- ; CHECK-NEXT: v_mov_b32_e32 v3, v9
62- ; CHECK-NEXT: .LBB0_9 : ; %post-loop-memcpy-expansion
63- ; CHECK-NEXT: v_and_b32_e32 v6 , 15, v0
55+ ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
56+ ; CHECK-NEXT: flat_store_byte v[6:7 ], v10
57+ ; CHECK-NEXT: ; %bb.7 :
58+ ; CHECK-NEXT: v_mov_b32_e32 v7, v5
59+ ; CHECK-NEXT: v_mov_b32_e32 v6, v4
60+ ; CHECK-NEXT: .LBB0_8 : ; %post-loop-memcpy-expansion
61+ ; CHECK-NEXT: v_and_b32_e32 v2 , 15, v0
6462; CHECK-NEXT: v_and_b32_e32 v0, -16, v0
65- ; CHECK-NEXT: v_add_co_u32_e32 v2 , vcc, v2 , v0
66- ; CHECK-NEXT: v_mov_b32_e32 v7 , 0
67- ; CHECK-NEXT: v_addc_co_u32_e32 v3 , vcc, v3 , v1, vcc
63+ ; CHECK-NEXT: v_add_co_u32_e32 v4 , vcc, v6 , v0
64+ ; CHECK-NEXT: v_mov_b32_e32 v3 , 0
65+ ; CHECK-NEXT: v_addc_co_u32_e32 v5 , vcc, v7 , v1, vcc
6866; CHECK-NEXT: v_cmp_ne_u64_e64 s[4:5], 0, v[0:1]
69- ; CHECK-NEXT: v_cmp_ne_u64_e64 s[6:7], 0, v[6:7]
70- ; CHECK-NEXT: v_mov_b32_e32 v4, s17
71- ; CHECK-NEXT: v_mov_b32_e32 v8, 0
72- ; CHECK-NEXT: v_add_co_u32_e32 v12, vcc, s16, v2
73- ; CHECK-NEXT: v_mov_b32_e32 v9, 0
74- ; CHECK-NEXT: v_addc_co_u32_e32 v13, vcc, v4, v3, vcc
75- ; CHECK-NEXT: s_branch .LBB0_12
76- ; CHECK-NEXT: .LBB0_10: ; %Flow14
77- ; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1
67+ ; CHECK-NEXT: v_cmp_ne_u64_e64 s[6:7], 0, v[2:3]
68+ ; CHECK-NEXT: v_mov_b32_e32 v6, s17
69+ ; CHECK-NEXT: v_add_co_u32_e32 v4, vcc, s16, v4
70+ ; CHECK-NEXT: v_addc_co_u32_e32 v5, vcc, v6, v5, vcc
71+ ; CHECK-NEXT: s_branch .LBB0_11
72+ ; CHECK-NEXT: .LBB0_9: ; %Flow14
73+ ; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1
7874; CHECK-NEXT: s_or_b64 exec, exec, s[10:11]
7975; CHECK-NEXT: s_mov_b64 s[8:9], 0
80- ; CHECK-NEXT: .LBB0_11 : ; %Flow16
81- ; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1
76+ ; CHECK-NEXT: .LBB0_10 : ; %Flow16
77+ ; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1
8278; CHECK-NEXT: s_andn2_b64 vcc, exec, s[8:9]
83- ; CHECK-NEXT: s_cbranch_vccz .LBB0_20
84- ; CHECK-NEXT: .LBB0_12 : ; %while.cond
79+ ; CHECK-NEXT: s_cbranch_vccz .LBB0_19
80+ ; CHECK-NEXT: .LBB0_11 : ; %while.cond
8581; CHECK-NEXT: ; =>This Loop Header: Depth=1
86- ; CHECK-NEXT: ; Child Loop BB0_14 Depth 2
87- ; CHECK-NEXT: ; Child Loop BB0_18 Depth 2
82+ ; CHECK-NEXT: ; Child Loop BB0_13 Depth 2
83+ ; CHECK-NEXT: ; Child Loop BB0_17 Depth 2
8884; CHECK-NEXT: s_and_saveexec_b64 s[8:9], s[4:5]
89- ; CHECK-NEXT: s_cbranch_execz .LBB0_15
90- ; CHECK-NEXT: ; %bb.13: ; %while.cond.loop-memcpy-expansion2_crit_edge
91- ; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1
92- ; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[8:9]
85+ ; CHECK-NEXT: s_cbranch_execz .LBB0_14
86+ ; CHECK-NEXT: ; %bb.12: ; %loop-memcpy-expansion2.preheader
87+ ; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1
9388; CHECK-NEXT: s_mov_b64 s[10:11], 0
9489; CHECK-NEXT: s_mov_b64 s[12:13], 0
95- ; CHECK-NEXT: s_waitcnt vmcnt(0)
96- ; CHECK-NEXT: .LBB0_14: ; %loop-memcpy-expansion2
97- ; CHECK-NEXT: ; Parent Loop BB0_12 Depth=1
90+ ; CHECK-NEXT: .LBB0_13: ; %loop-memcpy-expansion2
91+ ; CHECK-NEXT: ; Parent Loop BB0_11 Depth=1
9892; CHECK-NEXT: ; => This Inner Loop Header: Depth=2
99- ; CHECK-NEXT: v_mov_b32_e32 v15, s13
100- ; CHECK-NEXT: v_add_co_u32_e32 v14, vcc, s12, v10
93+ ; CHECK-NEXT: v_mov_b32_e32 v6, s12
94+ ; CHECK-NEXT: v_mov_b32_e32 v7, s13
95+ ; CHECK-NEXT: flat_load_dwordx4 v[10:13], v[6:7]
96+ ; CHECK-NEXT: v_add_co_u32_e32 v6, vcc, s12, v8
10197; CHECK-NEXT: s_add_u32 s12, s12, 16
102- ; CHECK-NEXT: v_addc_co_u32_e32 v15 , vcc, v11, v15 , vcc
98+ ; CHECK-NEXT: v_addc_co_u32_e32 v7 , vcc, v9, v7 , vcc
10399; CHECK-NEXT: s_addc_u32 s13, s13, 0
104100; CHECK-NEXT: v_cmp_ge_u64_e32 vcc, s[12:13], v[0:1]
105- ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
106- ; CHECK-NEXT: flat_store_dwordx4 v[14:15], v[2:5]
107101; CHECK-NEXT: s_or_b64 s[10:11], vcc, s[10:11]
102+ ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
103+ ; CHECK-NEXT: flat_store_dwordx4 v[6:7], v[10:13]
108104; CHECK-NEXT: s_andn2_b64 exec, exec, s[10:11]
109- ; CHECK-NEXT: s_cbranch_execnz .LBB0_14
110- ; CHECK-NEXT: .LBB0_15 : ; %Flow15
111- ; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1
105+ ; CHECK-NEXT: s_cbranch_execnz .LBB0_13
106+ ; CHECK-NEXT: .LBB0_14 : ; %Flow15
107+ ; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1
112108; CHECK-NEXT: s_or_b64 exec, exec, s[8:9]
113109; CHECK-NEXT: s_mov_b64 s[8:9], -1
114- ; CHECK-NEXT: s_cbranch_execz .LBB0_11
115- ; CHECK-NEXT: ; %bb.16 : ; %loop-memcpy-residual-header5
116- ; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1
110+ ; CHECK-NEXT: s_cbranch_execz .LBB0_10
111+ ; CHECK-NEXT: ; %bb.15 : ; %loop-memcpy-residual-header5
112+ ; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1
117113; CHECK-NEXT: s_and_saveexec_b64 s[8:9], s[6:7]
118114; CHECK-NEXT: s_xor_b64 s[10:11], exec, s[8:9]
119- ; CHECK-NEXT: s_cbranch_execz .LBB0_10
120- ; CHECK-NEXT: ; %bb.17: ; %loop-memcpy-residual-header5.loop-memcpy-residual4_crit_edge
121- ; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1
122- ; CHECK-NEXT: flat_load_ubyte v2, v[8:9]
115+ ; CHECK-NEXT: s_cbranch_execz .LBB0_9
116+ ; CHECK-NEXT: ; %bb.16: ; %loop-memcpy-residual4.preheader
117+ ; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1
123118; CHECK-NEXT: s_mov_b64 s[12:13], 0
124119; CHECK-NEXT: s_mov_b64 s[14:15], 0
125- ; CHECK-NEXT: s_waitcnt vmcnt(0)
126- ; CHECK-NEXT: .LBB0_18: ; %loop-memcpy-residual4
127- ; CHECK-NEXT: ; Parent Loop BB0_12 Depth=1
120+ ; CHECK-NEXT: .LBB0_17: ; %loop-memcpy-residual4
121+ ; CHECK-NEXT: ; Parent Loop BB0_11 Depth=1
128122; CHECK-NEXT: ; => This Inner Loop Header: Depth=2
129- ; CHECK-NEXT: v_add_co_u32_e32 v3, vcc, s14, v12
123+ ; CHECK-NEXT: v_mov_b32_e32 v10, s15
124+ ; CHECK-NEXT: v_add_co_u32_e32 v6, vcc, s14, v0
125+ ; CHECK-NEXT: v_addc_co_u32_e32 v7, vcc, v1, v10, vcc
126+ ; CHECK-NEXT: flat_load_ubyte v11, v[6:7]
127+ ; CHECK-NEXT: v_add_co_u32_e32 v6, vcc, s14, v4
130128; CHECK-NEXT: s_add_u32 s14, s14, 1
131- ; CHECK-NEXT: v_mov_b32_e32 v4, s15
132129; CHECK-NEXT: s_addc_u32 s15, s15, 0
133- ; CHECK-NEXT: v_cmp_ge_u64_e64 s[8:9], s[14:15], v[6:7 ]
134- ; CHECK-NEXT: v_addc_co_u32_e32 v4 , vcc, v13, v4 , vcc
130+ ; CHECK-NEXT: v_cmp_ge_u64_e64 s[8:9], s[14:15], v[2:3 ]
131+ ; CHECK-NEXT: v_addc_co_u32_e32 v7 , vcc, v5, v10 , vcc
135132; CHECK-NEXT: s_or_b64 s[12:13], s[8:9], s[12:13]
136- ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
137- ; CHECK-NEXT: flat_store_byte v[3:4 ], v2
133+ ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
134+ ; CHECK-NEXT: flat_store_byte v[6:7 ], v11
138135; CHECK-NEXT: s_andn2_b64 exec, exec, s[12:13]
139- ; CHECK-NEXT: s_cbranch_execnz .LBB0_18
140- ; CHECK-NEXT: ; %bb.19 : ; %Flow
141- ; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1
136+ ; CHECK-NEXT: s_cbranch_execnz .LBB0_17
137+ ; CHECK-NEXT: ; %bb.18 : ; %Flow
138+ ; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1
142139; CHECK-NEXT: s_or_b64 exec, exec, s[12:13]
143- ; CHECK-NEXT: s_branch .LBB0_10
144- ; CHECK-NEXT: .LBB0_20 : ; %DummyReturnBlock
140+ ; CHECK-NEXT: s_branch .LBB0_9
141+ ; CHECK-NEXT: .LBB0_19 : ; %DummyReturnBlock
145142; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
146143; CHECK-NEXT: s_setpc_b64 s[30:31]
147144entry:
0 commit comments