@@ -152,4 +152,90 @@ end:
152152 ret float %r
153153}
154154
155+ ; Two chains of phi network that have the same value from %if block.
156+ define amdgpu_ps < 2 x float > @while_break_two_chains_of_phi (float %v , i32 %x , i32 %y , i32 %z , ptr addrspace (1 ) %p ) #0 {
157+ ; GCN-LABEL: while_break_two_chains_of_phi:
158+ ; GCN: ; %bb.0: ; %entry
159+ ; GCN-NEXT: v_mov_b32_e32 v6, 0
160+ ; GCN-NEXT: s_mov_b32 s2, 0
161+ ; GCN-NEXT: s_mov_b32 s0, 0
162+ ; GCN-NEXT: s_branch .LBB2_2
163+ ; GCN-NEXT: .LBB2_1: ; %Flow1
164+ ; GCN-NEXT: ; in Loop: Header=BB2_2 Depth=1
165+ ; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s1
166+ ; GCN-NEXT: s_and_b32 s1, exec_lo, s4
167+ ; GCN-NEXT: s_or_b32 s2, s1, s2
168+ ; GCN-NEXT: s_andn2_b32 exec_lo, exec_lo, s2
169+ ; GCN-NEXT: s_cbranch_execz .LBB2_6
170+ ; GCN-NEXT: .LBB2_2: ; %header
171+ ; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
172+ ; GCN-NEXT: v_cmp_ge_i32_e64 s3, s0, v1
173+ ; GCN-NEXT: v_cmp_lt_i32_e32 vcc_lo, s0, v1
174+ ; GCN-NEXT: s_and_saveexec_b32 s4, vcc_lo
175+ ; GCN-NEXT: s_cbranch_execz .LBB2_4
176+ ; GCN-NEXT: ; %bb.3: ; %if
177+ ; GCN-NEXT: ; in Loop: Header=BB2_2 Depth=1
178+ ; GCN-NEXT: s_ashr_i32 s1, s0, 31
179+ ; GCN-NEXT: s_lshl_b64 s[6:7], s[0:1], 2
180+ ; GCN-NEXT: s_andn2_b32 s1, s3, exec_lo
181+ ; GCN-NEXT: v_add_co_u32 v6, vcc_lo, v4, s6
182+ ; GCN-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, s7, v5, vcc_lo
183+ ; GCN-NEXT: v_cmp_lt_i32_e32 vcc_lo, s0, v2
184+ ; GCN-NEXT: global_load_dword v0, v[6:7], off
185+ ; GCN-NEXT: s_and_b32 s3, vcc_lo, exec_lo
186+ ; GCN-NEXT: s_or_b32 s3, s1, s3
187+ ; GCN-NEXT: s_waitcnt vmcnt(0)
188+ ; GCN-NEXT: v_add_f32_e32 v6, 1.0, v0
189+ ; GCN-NEXT: v_mov_b32_e32 v0, v6
190+ ; GCN-NEXT: .LBB2_4: ; %Flow
191+ ; GCN-NEXT: ; in Loop: Header=BB2_2 Depth=1
192+ ; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s4
193+ ; GCN-NEXT: v_mov_b32_e32 v7, v6
194+ ; GCN-NEXT: s_mov_b32 s4, -1
195+ ; GCN-NEXT: s_and_saveexec_b32 s1, s3
196+ ; GCN-NEXT: s_cbranch_execz .LBB2_1
197+ ; GCN-NEXT: ; %bb.5: ; %latch
198+ ; GCN-NEXT: ; in Loop: Header=BB2_2 Depth=1
199+ ; GCN-NEXT: v_cmp_lt_i32_e32 vcc_lo, s0, v3
200+ ; GCN-NEXT: v_mov_b32_e32 v7, v0
201+ ; GCN-NEXT: s_add_i32 s0, s0, 1
202+ ; GCN-NEXT: s_orn2_b32 s4, vcc_lo, exec_lo
203+ ; GCN-NEXT: s_branch .LBB2_1
204+ ; GCN-NEXT: .LBB2_6: ; %end
205+ ; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s2
206+ ; GCN-NEXT: v_mov_b32_e32 v0, v7
207+ ; GCN-NEXT: v_mov_b32_e32 v1, v6
208+ ; GCN-NEXT: ; return to shader part epilog
209+ entry:
210+ br label %header
211+
212+ header:
213+ %v.1 = phi float [ %v , %entry ], [ %v.2 , %latch ]
214+ %v.copy = phi float [ 0 .0 , %entry ], [ %v.copy.2 , %latch ]
215+ %ind = phi i32 [ 0 , %entry ], [ %ind.inc , %latch ]
216+ %cc = icmp slt i32 %ind , %x
217+ br i1 %cc , label %if , label %latch
218+
219+ if:
220+ %v.ptr = getelementptr float , ptr addrspace (1 ) %p , i32 %ind
221+ %v.load = load float , ptr addrspace (1 ) %v.ptr
222+ %v.if = fadd float %v.load , 1 .0
223+ %cc2 = icmp slt i32 %ind , %y
224+ br i1 %cc2 , label %latch , label %end
225+
226+ latch:
227+ %v.2 = phi float [ %v.1 , %header ], [ %v.if , %if ]
228+ %v.copy.2 = phi float [ %v.copy , %header ], [ %v.if , %if ]
229+ %ind.inc = add i32 %ind , 1
230+ %cc3 = icmp slt i32 %ind , %z
231+ br i1 %cc3 , label %end , label %header
232+
233+ end:
234+ %r = phi float [ %v.2 , %latch ], [ %v.if , %if ]
235+ %r2 = phi float [ %v.copy.2 , %latch ], [ %v.if , %if ]
236+ %packed0 = insertelement < 2 x float > poison, float %r , i32 0
237+ %packed1 = insertelement < 2 x float > %packed0 , float %r2 , i32 1
238+ ret < 2 x float > %packed1
239+ }
240+
155241attributes #0 = { nounwind }
0 commit comments