33
44%"struct.__llvm_libc::rpc::Buffer" = type { [8 x i64 ] }
55
6- define void @issue63986 (i64 %0 , i64 %idxprom ) {
6+ define void @issue63986 (i64 %0 , i64 %idxprom , ptr inreg %ptr ) {
77; CHECK-LABEL: issue63986:
88; CHECK: ; %bb.0: ; %entry
99; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1010; CHECK-NEXT: v_lshlrev_b64 v[4:5], 6, v[2:3]
11+ ; CHECK-NEXT: v_mov_b32_e32 v6, s17
12+ ; CHECK-NEXT: v_add_co_u32_e32 v8, vcc, s16, v4
13+ ; CHECK-NEXT: v_addc_co_u32_e32 v9, vcc, v6, v5, vcc
1114; CHECK-NEXT: s_mov_b64 s[4:5], 0
1215; CHECK-NEXT: .LBB0_1: ; %loop-memcpy-expansion
1316; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
1417; CHECK-NEXT: v_mov_b32_e32 v7, s5
1518; CHECK-NEXT: v_mov_b32_e32 v6, s4
16- ; CHECK-NEXT: flat_load_dwordx4 v[6:9 ], v[6:7]
17- ; CHECK-NEXT: v_add_co_u32_e32 v10 , vcc, s4, v4
19+ ; CHECK-NEXT: flat_load_dwordx4 v[10:13 ], v[6:7]
20+ ; CHECK-NEXT: v_add_co_u32_e32 v6 , vcc, s4, v8
1821; CHECK-NEXT: s_add_u32 s4, s4, 16
19- ; CHECK-NEXT: v_mov_b32_e32 v11, s5
2022; CHECK-NEXT: s_addc_u32 s5, s5, 0
2123; CHECK-NEXT: v_cmp_ge_u64_e64 s[6:7], s[4:5], 32
22- ; CHECK-NEXT: v_addc_co_u32_e32 v11 , vcc, v5, v11 , vcc
24+ ; CHECK-NEXT: v_addc_co_u32_e32 v7 , vcc, v9, v7 , vcc
2325; CHECK-NEXT: s_and_b64 vcc, exec, s[6:7]
2426; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
25- ; CHECK-NEXT: flat_store_dwordx4 v[10:11 ], v[6:9 ]
27+ ; CHECK-NEXT: flat_store_dwordx4 v[6:7 ], v[10:13 ]
2628; CHECK-NEXT: s_cbranch_vccz .LBB0_1
2729; CHECK-NEXT: ; %bb.2: ; %loop-memcpy-residual-header
2830; CHECK-NEXT: s_branch .LBB0_4
@@ -31,110 +33,116 @@ define void @issue63986(i64 %0, i64 %idxprom) {
3133; CHECK-NEXT: s_branch .LBB0_5
3234; CHECK-NEXT: .LBB0_4: ; %loop-memcpy-residual-header.post-loop-memcpy-expansion_crit_edge
3335; CHECK-NEXT: v_lshlrev_b64 v[6:7], 6, v[2:3]
34- ; CHECK-NEXT: s_cbranch_execnz .LBB0_7
36+ ; CHECK-NEXT: s_cbranch_execnz .LBB0_8
3537; CHECK-NEXT: .LBB0_5: ; %loop-memcpy-residual.preheader
36- ; CHECK-NEXT: v_or_b32_e32 v2, 32, v4
37- ; CHECK-NEXT: v_mov_b32_e32 v3, v5
38+ ; CHECK-NEXT: s_add_u32 s4, s16, 32
39+ ; CHECK-NEXT: s_addc_u32 s5, s17, 0
40+ ; CHECK-NEXT: v_mov_b32_e32 v3, s5
41+ ; CHECK-NEXT: v_add_co_u32_e32 v2, vcc, s4, v4
42+ ; CHECK-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v5, vcc
3843; CHECK-NEXT: s_mov_b64 s[4:5], 0
3944; CHECK-NEXT: ; %bb.6: ; %loop-memcpy-residual
4045; CHECK-NEXT: s_add_u32 s6, 32, s4
4146; CHECK-NEXT: s_addc_u32 s7, 0, s5
4247; CHECK-NEXT: v_mov_b32_e32 v6, s6
4348; CHECK-NEXT: v_mov_b32_e32 v7, s7
4449; CHECK-NEXT: flat_load_ubyte v10, v[6:7]
45- ; CHECK-NEXT: v_mov_b32_e32 v9, s5
46- ; CHECK-NEXT: v_add_co_u32_e32 v8, vcc, s4, v2
47- ; CHECK-NEXT: v_mov_b32_e32 v7, v5
48- ; CHECK-NEXT: v_addc_co_u32_e32 v9, vcc, v3, v9, vcc
50+ ; CHECK-NEXT: v_mov_b32_e32 v7, s5
51+ ; CHECK-NEXT: v_add_co_u32_e32 v6, vcc, s4, v2
52+ ; CHECK-NEXT: v_addc_co_u32_e32 v7, vcc, v3, v7, vcc
4953; CHECK-NEXT: s_add_u32 s4, s4, 1
50- ; CHECK-NEXT: v_mov_b32_e32 v6, v4
5154; CHECK-NEXT: s_addc_u32 s5, s5, 0
5255; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
53- ; CHECK-NEXT: flat_store_byte v[8:9], v10
54- ; CHECK-NEXT: .LBB0_7: ; %post-loop-memcpy-expansion
56+ ; CHECK-NEXT: flat_store_byte v[6:7], v10
57+ ; CHECK-NEXT: ; %bb.7:
58+ ; CHECK-NEXT: v_mov_b32_e32 v7, v5
59+ ; CHECK-NEXT: v_mov_b32_e32 v6, v4
60+ ; CHECK-NEXT: .LBB0_8: ; %post-loop-memcpy-expansion
5561; CHECK-NEXT: v_and_b32_e32 v2, 15, v0
56- ; CHECK-NEXT: v_mov_b32_e32 v3, 0
5762; CHECK-NEXT: v_and_b32_e32 v0, -16, v0
63+ ; CHECK-NEXT: v_add_co_u32_e32 v4, vcc, v6, v0
64+ ; CHECK-NEXT: v_mov_b32_e32 v3, 0
65+ ; CHECK-NEXT: v_addc_co_u32_e32 v5, vcc, v7, v1, vcc
5866; CHECK-NEXT: v_cmp_ne_u64_e64 s[4:5], 0, v[0:1]
5967; CHECK-NEXT: v_cmp_ne_u64_e64 s[6:7], 0, v[2:3]
60- ; CHECK-NEXT: v_add_co_u32_e32 v6, vcc, v6, v0
61- ; CHECK-NEXT: v_addc_co_u32_e32 v7, vcc, v7, v1, vcc
62- ; CHECK-NEXT: s_branch .LBB0_10
63- ; CHECK-NEXT: .LBB0_8: ; %Flow14
64- ; CHECK-NEXT: ; in Loop: Header=BB0_10 Depth=1
68+ ; CHECK-NEXT: v_mov_b32_e32 v6, s17
69+ ; CHECK-NEXT: v_add_co_u32_e32 v4, vcc, s16, v4
70+ ; CHECK-NEXT: v_addc_co_u32_e32 v5, vcc, v6, v5, vcc
71+ ; CHECK-NEXT: s_branch .LBB0_11
72+ ; CHECK-NEXT: .LBB0_9: ; %Flow14
73+ ; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1
6574; CHECK-NEXT: s_or_b64 exec, exec, s[10:11]
6675; CHECK-NEXT: s_mov_b64 s[8:9], 0
67- ; CHECK-NEXT: .LBB0_9 : ; %Flow16
68- ; CHECK-NEXT: ; in Loop: Header=BB0_10 Depth=1
76+ ; CHECK-NEXT: .LBB0_10 : ; %Flow16
77+ ; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1
6978; CHECK-NEXT: s_andn2_b64 vcc, exec, s[8:9]
70- ; CHECK-NEXT: s_cbranch_vccz .LBB0_18
71- ; CHECK-NEXT: .LBB0_10 : ; %while.cond
79+ ; CHECK-NEXT: s_cbranch_vccz .LBB0_19
80+ ; CHECK-NEXT: .LBB0_11 : ; %while.cond
7281; CHECK-NEXT: ; =>This Loop Header: Depth=1
73- ; CHECK-NEXT: ; Child Loop BB0_12 Depth 2
74- ; CHECK-NEXT: ; Child Loop BB0_16 Depth 2
82+ ; CHECK-NEXT: ; Child Loop BB0_13 Depth 2
83+ ; CHECK-NEXT: ; Child Loop BB0_17 Depth 2
7584; CHECK-NEXT: s_and_saveexec_b64 s[8:9], s[4:5]
76- ; CHECK-NEXT: s_cbranch_execz .LBB0_13
77- ; CHECK-NEXT: ; %bb.11 : ; %loop-memcpy-expansion2.preheader
78- ; CHECK-NEXT: ; in Loop: Header=BB0_10 Depth=1
85+ ; CHECK-NEXT: s_cbranch_execz .LBB0_14
86+ ; CHECK-NEXT: ; %bb.12 : ; %loop-memcpy-expansion2.preheader
87+ ; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1
7988; CHECK-NEXT: s_mov_b64 s[10:11], 0
8089; CHECK-NEXT: s_mov_b64 s[12:13], 0
81- ; CHECK-NEXT: .LBB0_12 : ; %loop-memcpy-expansion2
82- ; CHECK-NEXT: ; Parent Loop BB0_10 Depth=1
90+ ; CHECK-NEXT: .LBB0_13 : ; %loop-memcpy-expansion2
91+ ; CHECK-NEXT: ; Parent Loop BB0_11 Depth=1
8392; CHECK-NEXT: ; => This Inner Loop Header: Depth=2
84- ; CHECK-NEXT: v_mov_b32_e32 v8, s12
85- ; CHECK-NEXT: v_mov_b32_e32 v9, s13
86- ; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[8:9]
87- ; CHECK-NEXT: v_mov_b32_e32 v13, s13
88- ; CHECK-NEXT: v_add_co_u32_e32 v12, vcc, s12, v4
93+ ; CHECK-NEXT: v_mov_b32_e32 v6, s12
94+ ; CHECK-NEXT: v_mov_b32_e32 v7, s13
95+ ; CHECK-NEXT: flat_load_dwordx4 v[10:13], v[6:7]
96+ ; CHECK-NEXT: v_add_co_u32_e32 v6, vcc, s12, v8
8997; CHECK-NEXT: s_add_u32 s12, s12, 16
90- ; CHECK-NEXT: v_addc_co_u32_e32 v13 , vcc, v5, v13 , vcc
98+ ; CHECK-NEXT: v_addc_co_u32_e32 v7 , vcc, v9, v7 , vcc
9199; CHECK-NEXT: s_addc_u32 s13, s13, 0
92100; CHECK-NEXT: v_cmp_ge_u64_e32 vcc, s[12:13], v[0:1]
93101; CHECK-NEXT: s_or_b64 s[10:11], vcc, s[10:11]
94102; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
95- ; CHECK-NEXT: flat_store_dwordx4 v[12:13 ], v[8:11 ]
103+ ; CHECK-NEXT: flat_store_dwordx4 v[6:7 ], v[10:13 ]
96104; CHECK-NEXT: s_andn2_b64 exec, exec, s[10:11]
97- ; CHECK-NEXT: s_cbranch_execnz .LBB0_12
98- ; CHECK-NEXT: .LBB0_13 : ; %Flow15
99- ; CHECK-NEXT: ; in Loop: Header=BB0_10 Depth=1
105+ ; CHECK-NEXT: s_cbranch_execnz .LBB0_13
106+ ; CHECK-NEXT: .LBB0_14 : ; %Flow15
107+ ; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1
100108; CHECK-NEXT: s_or_b64 exec, exec, s[8:9]
101109; CHECK-NEXT: s_mov_b64 s[8:9], -1
102- ; CHECK-NEXT: s_cbranch_execz .LBB0_9
103- ; CHECK-NEXT: ; %bb.14 : ; %loop-memcpy-residual-header5
104- ; CHECK-NEXT: ; in Loop: Header=BB0_10 Depth=1
110+ ; CHECK-NEXT: s_cbranch_execz .LBB0_10
111+ ; CHECK-NEXT: ; %bb.15 : ; %loop-memcpy-residual-header5
112+ ; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1
105113; CHECK-NEXT: s_and_saveexec_b64 s[8:9], s[6:7]
106114; CHECK-NEXT: s_xor_b64 s[10:11], exec, s[8:9]
107- ; CHECK-NEXT: s_cbranch_execz .LBB0_8
108- ; CHECK-NEXT: ; %bb.15 : ; %loop-memcpy-residual4.preheader
109- ; CHECK-NEXT: ; in Loop: Header=BB0_10 Depth=1
115+ ; CHECK-NEXT: s_cbranch_execz .LBB0_9
116+ ; CHECK-NEXT: ; %bb.16 : ; %loop-memcpy-residual4.preheader
117+ ; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1
110118; CHECK-NEXT: s_mov_b64 s[12:13], 0
111119; CHECK-NEXT: s_mov_b64 s[14:15], 0
112- ; CHECK-NEXT: .LBB0_16 : ; %loop-memcpy-residual4
113- ; CHECK-NEXT: ; Parent Loop BB0_10 Depth=1
120+ ; CHECK-NEXT: .LBB0_17 : ; %loop-memcpy-residual4
121+ ; CHECK-NEXT: ; Parent Loop BB0_11 Depth=1
114122; CHECK-NEXT: ; => This Inner Loop Header: Depth=2
115123; CHECK-NEXT: v_mov_b32_e32 v10, s15
116- ; CHECK-NEXT: v_add_co_u32_e32 v8 , vcc, s14, v0
117- ; CHECK-NEXT: v_addc_co_u32_e32 v9 , vcc, v1, v10, vcc
118- ; CHECK-NEXT: flat_load_ubyte v11, v[8:9 ]
119- ; CHECK-NEXT: v_add_co_u32_e32 v8 , vcc, s14, v6
124+ ; CHECK-NEXT: v_add_co_u32_e32 v6 , vcc, s14, v0
125+ ; CHECK-NEXT: v_addc_co_u32_e32 v7 , vcc, v1, v10, vcc
126+ ; CHECK-NEXT: flat_load_ubyte v11, v[6:7 ]
127+ ; CHECK-NEXT: v_add_co_u32_e32 v6 , vcc, s14, v4
120128; CHECK-NEXT: s_add_u32 s14, s14, 1
121129; CHECK-NEXT: s_addc_u32 s15, s15, 0
122130; CHECK-NEXT: v_cmp_ge_u64_e64 s[8:9], s[14:15], v[2:3]
123- ; CHECK-NEXT: v_addc_co_u32_e32 v9 , vcc, v7 , v10, vcc
131+ ; CHECK-NEXT: v_addc_co_u32_e32 v7 , vcc, v5 , v10, vcc
124132; CHECK-NEXT: s_or_b64 s[12:13], s[8:9], s[12:13]
125133; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
126- ; CHECK-NEXT: flat_store_byte v[8:9 ], v11
134+ ; CHECK-NEXT: flat_store_byte v[6:7 ], v11
127135; CHECK-NEXT: s_andn2_b64 exec, exec, s[12:13]
128- ; CHECK-NEXT: s_cbranch_execnz .LBB0_16
129- ; CHECK-NEXT: ; %bb.17 : ; %Flow
130- ; CHECK-NEXT: ; in Loop: Header=BB0_10 Depth=1
136+ ; CHECK-NEXT: s_cbranch_execnz .LBB0_17
137+ ; CHECK-NEXT: ; %bb.18 : ; %Flow
138+ ; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1
131139; CHECK-NEXT: s_or_b64 exec, exec, s[12:13]
132- ; CHECK-NEXT: s_branch .LBB0_8
133- ; CHECK-NEXT: .LBB0_18 : ; %DummyReturnBlock
140+ ; CHECK-NEXT: s_branch .LBB0_9
141+ ; CHECK-NEXT: .LBB0_19 : ; %DummyReturnBlock
134142; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
135143; CHECK-NEXT: s_setpc_b64 s[30:31]
136144entry:
137- %arrayidx = getelementptr [32 x %"struct.__llvm_libc::rpc::Buffer" ], ptr null , i64 0 , i64 %idxprom
145+ %arrayidx = getelementptr [32 x %"struct.__llvm_libc::rpc::Buffer" ], ptr %ptr , i64 0 , i64 %idxprom
138146 %spec.select = tail call i64 @llvm.umin.i64 (i64 sub (i64 ptrtoint (ptr addrspacecast (ptr addrspace (4 ) inttoptr (i64 32 to ptr addrspace (4 )) to ptr ) to i64 ), i64 ptrtoint (ptr addrspacecast (ptr addrspace (4 ) null to ptr ) to i64 )), i64 56 )
139147 tail call void @llvm.memcpy.p0.p0.i64 (ptr %arrayidx , ptr null , i64 %spec.select , i1 false )
140148 br label %while.cond
0 commit comments