@@ -7,138 +7,141 @@ define void @issue63986(i64 %0, i64 %idxprom, ptr inreg %ptr) {
77; CHECK-LABEL: issue63986:
88; CHECK: ; %bb.0: ; %entry
99; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10- ; CHECK-NEXT: v_lshlrev_b64 v[4:5], 6, v[2:3]
11- ; CHECK-NEXT: v_mov_b32_e32 v6, s17
12- ; CHECK-NEXT: v_add_co_u32_e32 v8, vcc, s16, v4
13- ; CHECK-NEXT: v_addc_co_u32_e32 v9, vcc, v6, v5, vcc
10+ ; CHECK-NEXT: v_lshlrev_b64 v[8:9], 6, v[2:3]
11+ ; CHECK-NEXT: v_mov_b32_e32 v4, s17
12+ ; CHECK-NEXT: v_add_co_u32_e32 v10, vcc, s16, v8
13+ ; CHECK-NEXT: v_addc_co_u32_e32 v11, vcc, v4, v9, vcc
14+ ; CHECK-NEXT: ; %bb.1: ; %entry.loop-memcpy-expansion_crit_edge
15+ ; CHECK-NEXT: v_mov_b32_e32 v4, 0
16+ ; CHECK-NEXT: v_mov_b32_e32 v5, 0
17+ ; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[4:5]
1418; CHECK-NEXT: s_mov_b64 s[4:5], 0
15- ; CHECK-NEXT: .LBB0_1: ; %loop-memcpy-expansion
19+ ; CHECK-NEXT: s_waitcnt vmcnt(0)
20+ ; CHECK-NEXT: .LBB0_2: ; %loop-memcpy-expansion
1621; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
17- ; CHECK-NEXT: v_mov_b32_e32 v7, s5
18- ; CHECK-NEXT: v_mov_b32_e32 v6, s4
19- ; CHECK-NEXT: flat_load_dwordx4 v[10:13], v[6:7]
20- ; CHECK-NEXT: v_add_co_u32_e32 v6, vcc, s4, v8
22+ ; CHECK-NEXT: v_add_co_u32_e32 v12, vcc, s4, v10
2123; CHECK-NEXT: s_add_u32 s4, s4, 16
24+ ; CHECK-NEXT: v_mov_b32_e32 v13, s5
2225; CHECK-NEXT: s_addc_u32 s5, s5, 0
2326; CHECK-NEXT: v_cmp_ge_u64_e64 s[6:7], s[4:5], 32
24- ; CHECK-NEXT: v_addc_co_u32_e32 v7 , vcc, v9, v7 , vcc
27+ ; CHECK-NEXT: v_addc_co_u32_e32 v13 , vcc, v11, v13 , vcc
2528; CHECK-NEXT: s_and_b64 vcc, exec, s[6:7]
26- ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
27- ; CHECK-NEXT: flat_store_dwordx4 v[6:7], v[10:13]
28- ; CHECK-NEXT: s_cbranch_vccz .LBB0_1
29- ; CHECK-NEXT: ; %bb.2: ; %loop-memcpy-residual-header
30- ; CHECK-NEXT: s_branch .LBB0_4
31- ; CHECK-NEXT: ; %bb.3:
32- ; CHECK-NEXT: ; implicit-def: $vgpr6_vgpr7
29+ ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
30+ ; CHECK-NEXT: flat_store_dwordx4 v[12:13], v[4:7]
31+ ; CHECK-NEXT: s_cbranch_vccz .LBB0_2
32+ ; CHECK-NEXT: ; %bb.3: ; %loop-memcpy-residual-header
3333; CHECK-NEXT: s_branch .LBB0_5
34- ; CHECK-NEXT: .LBB0_4: ; %loop-memcpy-residual-header.post-loop-memcpy-expansion_crit_edge
35- ; CHECK-NEXT: v_lshlrev_b64 v[6:7], 6, v[2:3]
36- ; CHECK-NEXT: s_cbranch_execnz .LBB0_8
37- ; CHECK-NEXT: .LBB0_5: ; %loop-memcpy-residual.preheader
38- ; CHECK-NEXT: s_add_u32 s4, s16, 32
39- ; CHECK-NEXT: s_addc_u32 s5, s17, 0
40- ; CHECK-NEXT: v_mov_b32_e32 v3, s5
41- ; CHECK-NEXT: v_add_co_u32_e32 v2, vcc, s4, v4
42- ; CHECK-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v5, vcc
34+ ; CHECK-NEXT: ; %bb.4:
35+ ; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3
36+ ; CHECK-NEXT: s_branch .LBB0_6
37+ ; CHECK-NEXT: .LBB0_5: ; %loop-memcpy-residual-header.post-loop-memcpy-expansion_crit_edge
38+ ; CHECK-NEXT: v_lshlrev_b64 v[2:3], 6, v[2:3]
39+ ; CHECK-NEXT: s_cbranch_execnz .LBB0_9
40+ ; CHECK-NEXT: .LBB0_6: ; %loop-memcpy-residual-header.loop-memcpy-residual_crit_edge
41+ ; CHECK-NEXT: v_mov_b32_e32 v2, 0
42+ ; CHECK-NEXT: v_mov_b32_e32 v3, 0
43+ ; CHECK-NEXT: flat_load_ubyte v2, v[2:3]
44+ ; CHECK-NEXT: s_add_u32 s6, s16, 32
45+ ; CHECK-NEXT: s_addc_u32 s4, s17, 0
46+ ; CHECK-NEXT: v_mov_b32_e32 v4, s4
47+ ; CHECK-NEXT: v_add_co_u32_e32 v3, vcc, s6, v8
4348; CHECK-NEXT: s_mov_b64 s[4:5], 0
44- ; CHECK-NEXT: ; %bb.6: ; %loop-memcpy-residual
45- ; CHECK-NEXT: s_add_u32 s6, 32, s4
46- ; CHECK-NEXT: s_addc_u32 s7, 0, s5
47- ; CHECK-NEXT: v_mov_b32_e32 v6, s6
48- ; CHECK-NEXT: v_mov_b32_e32 v7, s7
49- ; CHECK-NEXT: flat_load_ubyte v10, v[6:7]
50- ; CHECK-NEXT: v_mov_b32_e32 v7, s5
51- ; CHECK-NEXT: v_add_co_u32_e32 v6, vcc, s4, v2
52- ; CHECK-NEXT: v_addc_co_u32_e32 v7, vcc, v3, v7, vcc
49+ ; CHECK-NEXT: v_addc_co_u32_e32 v4, vcc, v4, v9, vcc
50+ ; CHECK-NEXT: s_waitcnt vmcnt(0)
51+ ; CHECK-NEXT: ; %bb.7: ; %loop-memcpy-residual
52+ ; CHECK-NEXT: v_mov_b32_e32 v6, s5
53+ ; CHECK-NEXT: v_add_co_u32_e32 v5, vcc, s4, v3
5354; CHECK-NEXT: s_add_u32 s4, s4, 1
55+ ; CHECK-NEXT: v_addc_co_u32_e32 v6, vcc, v4, v6, vcc
5456; CHECK-NEXT: s_addc_u32 s5, s5, 0
55- ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
56- ; CHECK-NEXT: flat_store_byte v[6:7 ], v10
57- ; CHECK-NEXT: ; %bb.7 :
58- ; CHECK-NEXT: v_mov_b32_e32 v7, v5
59- ; CHECK-NEXT: v_mov_b32_e32 v6, v4
60- ; CHECK-NEXT: .LBB0_8 : ; %post-loop-memcpy-expansion
61- ; CHECK-NEXT: v_and_b32_e32 v2 , 15, v0
57+ ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
58+ ; CHECK-NEXT: flat_store_byte v[5:6 ], v2
59+ ; CHECK-NEXT: ; %bb.8 :
60+ ; CHECK-NEXT: v_mov_b32_e32 v2, v8
61+ ; CHECK-NEXT: v_mov_b32_e32 v3, v9
62+ ; CHECK-NEXT: .LBB0_9 : ; %post-loop-memcpy-expansion
63+ ; CHECK-NEXT: v_and_b32_e32 v6 , 15, v0
6264; CHECK-NEXT: v_and_b32_e32 v0, -16, v0
63- ; CHECK-NEXT: v_add_co_u32_e32 v4 , vcc, v6 , v0
64- ; CHECK-NEXT: v_mov_b32_e32 v3 , 0
65- ; CHECK-NEXT: v_addc_co_u32_e32 v5 , vcc, v7 , v1, vcc
65+ ; CHECK-NEXT: v_add_co_u32_e32 v2 , vcc, v2 , v0
66+ ; CHECK-NEXT: v_mov_b32_e32 v7 , 0
67+ ; CHECK-NEXT: v_addc_co_u32_e32 v3 , vcc, v3 , v1, vcc
6668; CHECK-NEXT: v_cmp_ne_u64_e64 s[4:5], 0, v[0:1]
67- ; CHECK-NEXT: v_cmp_ne_u64_e64 s[6:7], 0, v[2:3]
68- ; CHECK-NEXT: v_mov_b32_e32 v6, s17
69- ; CHECK-NEXT: v_add_co_u32_e32 v4, vcc, s16, v4
70- ; CHECK-NEXT: v_addc_co_u32_e32 v5, vcc, v6, v5, vcc
71- ; CHECK-NEXT: s_branch .LBB0_11
72- ; CHECK-NEXT: .LBB0_9: ; %Flow14
73- ; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1
69+ ; CHECK-NEXT: v_cmp_ne_u64_e64 s[6:7], 0, v[6:7]
70+ ; CHECK-NEXT: v_mov_b32_e32 v4, s17
71+ ; CHECK-NEXT: v_mov_b32_e32 v8, 0
72+ ; CHECK-NEXT: v_add_co_u32_e32 v12, vcc, s16, v2
73+ ; CHECK-NEXT: v_mov_b32_e32 v9, 0
74+ ; CHECK-NEXT: v_addc_co_u32_e32 v13, vcc, v4, v3, vcc
75+ ; CHECK-NEXT: s_branch .LBB0_12
76+ ; CHECK-NEXT: .LBB0_10: ; %Flow14
77+ ; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1
7478; CHECK-NEXT: s_or_b64 exec, exec, s[10:11]
7579; CHECK-NEXT: s_mov_b64 s[8:9], 0
76- ; CHECK-NEXT: .LBB0_10 : ; %Flow16
77- ; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1
80+ ; CHECK-NEXT: .LBB0_11 : ; %Flow16
81+ ; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1
7882; CHECK-NEXT: s_andn2_b64 vcc, exec, s[8:9]
79- ; CHECK-NEXT: s_cbranch_vccz .LBB0_19
80- ; CHECK-NEXT: .LBB0_11 : ; %while.cond
83+ ; CHECK-NEXT: s_cbranch_vccz .LBB0_20
84+ ; CHECK-NEXT: .LBB0_12 : ; %while.cond
8185; CHECK-NEXT: ; =>This Loop Header: Depth=1
82- ; CHECK-NEXT: ; Child Loop BB0_13 Depth 2
83- ; CHECK-NEXT: ; Child Loop BB0_17 Depth 2
86+ ; CHECK-NEXT: ; Child Loop BB0_14 Depth 2
87+ ; CHECK-NEXT: ; Child Loop BB0_18 Depth 2
8488; CHECK-NEXT: s_and_saveexec_b64 s[8:9], s[4:5]
85- ; CHECK-NEXT: s_cbranch_execz .LBB0_14
86- ; CHECK-NEXT: ; %bb.12: ; %loop-memcpy-expansion2.preheader
87- ; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1
89+ ; CHECK-NEXT: s_cbranch_execz .LBB0_15
90+ ; CHECK-NEXT: ; %bb.13: ; %while.cond.loop-memcpy-expansion2_crit_edge
91+ ; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1
92+ ; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[8:9]
8893; CHECK-NEXT: s_mov_b64 s[10:11], 0
8994; CHECK-NEXT: s_mov_b64 s[12:13], 0
90- ; CHECK-NEXT: .LBB0_13: ; %loop-memcpy-expansion2
91- ; CHECK-NEXT: ; Parent Loop BB0_11 Depth=1
95+ ; CHECK-NEXT: s_waitcnt vmcnt(0)
96+ ; CHECK-NEXT: .LBB0_14: ; %loop-memcpy-expansion2
97+ ; CHECK-NEXT: ; Parent Loop BB0_12 Depth=1
9298; CHECK-NEXT: ; => This Inner Loop Header: Depth=2
93- ; CHECK-NEXT: v_mov_b32_e32 v6, s12
94- ; CHECK-NEXT: v_mov_b32_e32 v7, s13
95- ; CHECK-NEXT: flat_load_dwordx4 v[10:13], v[6:7]
96- ; CHECK-NEXT: v_add_co_u32_e32 v6, vcc, s12, v8
99+ ; CHECK-NEXT: v_mov_b32_e32 v15, s13
100+ ; CHECK-NEXT: v_add_co_u32_e32 v14, vcc, s12, v10
97101; CHECK-NEXT: s_add_u32 s12, s12, 16
98- ; CHECK-NEXT: v_addc_co_u32_e32 v7 , vcc, v9, v7 , vcc
102+ ; CHECK-NEXT: v_addc_co_u32_e32 v15 , vcc, v11, v15 , vcc
99103; CHECK-NEXT: s_addc_u32 s13, s13, 0
100104; CHECK-NEXT: v_cmp_ge_u64_e32 vcc, s[12:13], v[0:1]
105+ ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
106+ ; CHECK-NEXT: flat_store_dwordx4 v[14:15], v[2:5]
101107; CHECK-NEXT: s_or_b64 s[10:11], vcc, s[10:11]
102- ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
103- ; CHECK-NEXT: flat_store_dwordx4 v[6:7], v[10:13]
104108; CHECK-NEXT: s_andn2_b64 exec, exec, s[10:11]
105- ; CHECK-NEXT: s_cbranch_execnz .LBB0_13
106- ; CHECK-NEXT: .LBB0_14 : ; %Flow15
107- ; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1
109+ ; CHECK-NEXT: s_cbranch_execnz .LBB0_14
110+ ; CHECK-NEXT: .LBB0_15 : ; %Flow15
111+ ; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1
108112; CHECK-NEXT: s_or_b64 exec, exec, s[8:9]
109113; CHECK-NEXT: s_mov_b64 s[8:9], -1
110- ; CHECK-NEXT: s_cbranch_execz .LBB0_10
111- ; CHECK-NEXT: ; %bb.15 : ; %loop-memcpy-residual-header5
112- ; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1
114+ ; CHECK-NEXT: s_cbranch_execz .LBB0_11
115+ ; CHECK-NEXT: ; %bb.16 : ; %loop-memcpy-residual-header5
116+ ; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1
113117; CHECK-NEXT: s_and_saveexec_b64 s[8:9], s[6:7]
114118; CHECK-NEXT: s_xor_b64 s[10:11], exec, s[8:9]
115- ; CHECK-NEXT: s_cbranch_execz .LBB0_9
116- ; CHECK-NEXT: ; %bb.16: ; %loop-memcpy-residual4.preheader
117- ; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1
119+ ; CHECK-NEXT: s_cbranch_execz .LBB0_10
120+ ; CHECK-NEXT: ; %bb.17: ; %loop-memcpy-residual-header5.loop-memcpy-residual4_crit_edge
121+ ; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1
122+ ; CHECK-NEXT: flat_load_ubyte v2, v[8:9]
118123; CHECK-NEXT: s_mov_b64 s[12:13], 0
119124; CHECK-NEXT: s_mov_b64 s[14:15], 0
120- ; CHECK-NEXT: .LBB0_17: ; %loop-memcpy-residual4
121- ; CHECK-NEXT: ; Parent Loop BB0_11 Depth=1
125+ ; CHECK-NEXT: s_waitcnt vmcnt(0)
126+ ; CHECK-NEXT: .LBB0_18: ; %loop-memcpy-residual4
127+ ; CHECK-NEXT: ; Parent Loop BB0_12 Depth=1
122128; CHECK-NEXT: ; => This Inner Loop Header: Depth=2
123- ; CHECK-NEXT: v_mov_b32_e32 v10, s15
124- ; CHECK-NEXT: v_add_co_u32_e32 v6, vcc, s14, v0
125- ; CHECK-NEXT: v_addc_co_u32_e32 v7, vcc, v1, v10, vcc
126- ; CHECK-NEXT: flat_load_ubyte v11, v[6:7]
127- ; CHECK-NEXT: v_add_co_u32_e32 v6, vcc, s14, v4
129+ ; CHECK-NEXT: v_add_co_u32_e32 v3, vcc, s14, v12
128130; CHECK-NEXT: s_add_u32 s14, s14, 1
131+ ; CHECK-NEXT: v_mov_b32_e32 v4, s15
129132; CHECK-NEXT: s_addc_u32 s15, s15, 0
130- ; CHECK-NEXT: v_cmp_ge_u64_e64 s[8:9], s[14:15], v[2:3 ]
131- ; CHECK-NEXT: v_addc_co_u32_e32 v7 , vcc, v5, v10 , vcc
133+ ; CHECK-NEXT: v_cmp_ge_u64_e64 s[8:9], s[14:15], v[6:7 ]
134+ ; CHECK-NEXT: v_addc_co_u32_e32 v4 , vcc, v13, v4 , vcc
132135; CHECK-NEXT: s_or_b64 s[12:13], s[8:9], s[12:13]
133- ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
134- ; CHECK-NEXT: flat_store_byte v[6:7 ], v11
136+ ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
137+ ; CHECK-NEXT: flat_store_byte v[3:4 ], v2
135138; CHECK-NEXT: s_andn2_b64 exec, exec, s[12:13]
136- ; CHECK-NEXT: s_cbranch_execnz .LBB0_17
137- ; CHECK-NEXT: ; %bb.18 : ; %Flow
138- ; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1
139+ ; CHECK-NEXT: s_cbranch_execnz .LBB0_18
140+ ; CHECK-NEXT: ; %bb.19 : ; %Flow
141+ ; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1
139142; CHECK-NEXT: s_or_b64 exec, exec, s[12:13]
140- ; CHECK-NEXT: s_branch .LBB0_9
141- ; CHECK-NEXT: .LBB0_19 : ; %DummyReturnBlock
143+ ; CHECK-NEXT: s_branch .LBB0_10
144+ ; CHECK-NEXT: .LBB0_20 : ; %DummyReturnBlock
142145; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
143146; CHECK-NEXT: s_setpc_b64 s[30:31]
144147entry:
0 commit comments