@@ -44,6 +44,8 @@ define amdgpu_kernel void @v3i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1)
4444; FEATURE-NEXT: br label [[BB_2]]
4545; FEATURE: bb.2:
4646; FEATURE-NEXT: [[PHI5_TC:%.*]] = phi i32 [ [[VEC1_BC]], [[ENTRY:%.*]] ], [ [[VEC2_BC]], [[BB_1]] ]
47+ ; FEATURE-NEXT: [[PHI5_TC1:%.*]] = phi i32 [ [[VEC1_BC]], [[ENTRY]] ], [ [[VEC2_BC]], [[BB_1]] ]
48+ ; FEATURE-NEXT: [[PHI5_TC2:%.*]] = phi i32 [ [[VEC1_BC]], [[ENTRY]] ], [ [[VEC2_BC]], [[BB_1]] ]
4749; FEATURE-NEXT: [[TMP2:%.*]] = trunc i32 [[PHI5_TC]] to i24
4850; FEATURE-NEXT: [[TMP3:%.*]] = bitcast i24 [[TMP2]] to <3 x i8>
4951; FEATURE-NEXT: store <3 x i8> [[TMP3]], ptr addrspace(1) [[DST]], align 4
@@ -67,6 +69,8 @@ define amdgpu_kernel void @v3i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1)
6769; DEFAULT-NEXT: br label [[BB_2]]
6870; DEFAULT: bb.2:
6971; DEFAULT-NEXT: [[PHI5_TC:%.*]] = phi i32 [ [[VEC1_BC]], [[ENTRY:%.*]] ], [ [[VEC2_BC]], [[BB_1]] ]
72+ ; DEFAULT-NEXT: [[PHI5_TC1:%.*]] = phi i32 [ [[VEC1_BC]], [[ENTRY]] ], [ [[VEC2_BC]], [[BB_1]] ]
73+ ; DEFAULT-NEXT: [[PHI5_TC2:%.*]] = phi i32 [ [[VEC1_BC]], [[ENTRY]] ], [ [[VEC2_BC]], [[BB_1]] ]
7074; DEFAULT-NEXT: [[TMP2:%.*]] = trunc i32 [[PHI5_TC]] to i24
7175; DEFAULT-NEXT: [[PHI5:%.*]] = bitcast i24 [[TMP2]] to <3 x i8>
7276; DEFAULT-NEXT: store <3 x i8> [[PHI5]], ptr addrspace(1) [[DST]], align 4
@@ -126,6 +130,8 @@ define amdgpu_kernel void @v4i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1)
126130; FEATURE-NEXT: br label [[BB_2]]
127131; FEATURE: bb.2:
128132; FEATURE-NEXT: [[PHI5_TC:%.*]] = phi i32 [ [[VEC1_BC]], [[ENTRY:%.*]] ], [ [[VEC2_BC]], [[BB_1]] ]
133+ ; FEATURE-NEXT: [[PHI5_TC1:%.*]] = phi i32 [ [[VEC1_BC]], [[ENTRY]] ], [ [[VEC2_BC]], [[BB_1]] ]
134+ ; FEATURE-NEXT: [[PHI5_TC2:%.*]] = phi i32 [ [[VEC1_BC]], [[ENTRY]] ], [ [[VEC2_BC]], [[BB_1]] ]
129135; FEATURE-NEXT: [[PHI5_TC_BC:%.*]] = bitcast i32 [[PHI5_TC]] to <4 x i8>
130136; FEATURE-NEXT: store <4 x i8> [[PHI5_TC_BC]], ptr addrspace(1) [[DST]], align 4
131137; FEATURE-NEXT: ret void
@@ -146,6 +152,8 @@ define amdgpu_kernel void @v4i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1)
146152; DEFAULT-NEXT: br label [[BB_2]]
147153; DEFAULT: bb.2:
148154; DEFAULT-NEXT: [[PHI5_TC:%.*]] = phi i32 [ [[VEC1_BC]], [[ENTRY:%.*]] ], [ [[VEC2_BC]], [[BB_1]] ]
155+ ; DEFAULT-NEXT: [[PHI5_TC1:%.*]] = phi i32 [ [[VEC1_BC]], [[ENTRY]] ], [ [[VEC2_BC]], [[BB_1]] ]
156+ ; DEFAULT-NEXT: [[PHI5_TC2:%.*]] = phi i32 [ [[VEC1_BC]], [[ENTRY]] ], [ [[VEC2_BC]], [[BB_1]] ]
149157; DEFAULT-NEXT: [[PHI5:%.*]] = bitcast i32 [[PHI5_TC]] to <4 x i8>
150158; DEFAULT-NEXT: store <4 x i8> [[PHI5]], ptr addrspace(1) [[DST]], align 4
151159; DEFAULT-NEXT: ret void
@@ -209,6 +217,8 @@ define amdgpu_kernel void @v5i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1)
209217; FEATURE-NEXT: br label [[BB_2]]
210218; FEATURE: bb.2:
211219; FEATURE-NEXT: [[PHI5_TC:%.*]] = phi <2 x i32> [ [[VEC1_BC]], [[ENTRY:%.*]] ], [ [[VEC2_BC]], [[BB_1]] ]
220+ ; FEATURE-NEXT: [[PHI5_TC1:%.*]] = phi <2 x i32> [ [[VEC1_BC]], [[ENTRY]] ], [ [[VEC2_BC]], [[BB_1]] ]
221+ ; FEATURE-NEXT: [[PHI5_TC2:%.*]] = phi <2 x i32> [ [[VEC1_BC]], [[ENTRY]] ], [ [[VEC2_BC]], [[BB_1]] ]
212222; FEATURE-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[PHI5_TC]] to <8 x i8>
213223; FEATURE-NEXT: [[TMP3:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> poison, <5 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4>
214224; FEATURE-NEXT: store <5 x i8> [[TMP3]], ptr addrspace(1) [[DST]], align 4
@@ -232,6 +242,8 @@ define amdgpu_kernel void @v5i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1)
232242; DEFAULT-NEXT: br label [[BB_2]]
233243; DEFAULT: bb.2:
234244; DEFAULT-NEXT: [[PHI5_TC:%.*]] = phi <2 x i32> [ [[VEC1_BC]], [[ENTRY:%.*]] ], [ [[VEC2_BC]], [[BB_1]] ]
245+ ; DEFAULT-NEXT: [[PHI5_TC1:%.*]] = phi <2 x i32> [ [[VEC1_BC]], [[ENTRY]] ], [ [[VEC2_BC]], [[BB_1]] ]
246+ ; DEFAULT-NEXT: [[PHI5_TC2:%.*]] = phi <2 x i32> [ [[VEC1_BC]], [[ENTRY]] ], [ [[VEC2_BC]], [[BB_1]] ]
235247; DEFAULT-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[PHI5_TC]] to <8 x i8>
236248; DEFAULT-NEXT: [[PHI5:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> poison, <5 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4>
237249; DEFAULT-NEXT: store <5 x i8> [[PHI5]], ptr addrspace(1) [[DST]], align 4
@@ -291,6 +303,8 @@ define amdgpu_kernel void @v8i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1)
291303; FEATURE-NEXT: br label [[BB_2]]
292304; FEATURE: bb.2:
293305; FEATURE-NEXT: [[PHI5_TC:%.*]] = phi <2 x i32> [ [[VEC1_BC]], [[ENTRY:%.*]] ], [ [[VEC2_BC]], [[BB_1]] ]
306+ ; FEATURE-NEXT: [[PHI5_TC1:%.*]] = phi <2 x i32> [ [[VEC1_BC]], [[ENTRY]] ], [ [[VEC2_BC]], [[BB_1]] ]
307+ ; FEATURE-NEXT: [[PHI5_TC2:%.*]] = phi <2 x i32> [ [[VEC1_BC]], [[ENTRY]] ], [ [[VEC2_BC]], [[BB_1]] ]
294308; FEATURE-NEXT: [[PHI5_TC_BC:%.*]] = bitcast <2 x i32> [[PHI5_TC]] to <8 x i8>
295309; FEATURE-NEXT: store <8 x i8> [[PHI5_TC_BC]], ptr addrspace(1) [[DST]], align 4
296310; FEATURE-NEXT: ret void
@@ -311,6 +325,8 @@ define amdgpu_kernel void @v8i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1)
311325; DEFAULT-NEXT: br label [[BB_2]]
312326; DEFAULT: bb.2:
313327; DEFAULT-NEXT: [[PHI5_TC:%.*]] = phi <2 x i32> [ [[VEC1_BC]], [[ENTRY:%.*]] ], [ [[VEC2_BC]], [[BB_1]] ]
328+ ; DEFAULT-NEXT: [[PHI5_TC1:%.*]] = phi <2 x i32> [ [[VEC1_BC]], [[ENTRY]] ], [ [[VEC2_BC]], [[BB_1]] ]
329+ ; DEFAULT-NEXT: [[PHI5_TC2:%.*]] = phi <2 x i32> [ [[VEC1_BC]], [[ENTRY]] ], [ [[VEC2_BC]], [[BB_1]] ]
314330; DEFAULT-NEXT: [[PHI5:%.*]] = bitcast <2 x i32> [[PHI5_TC]] to <8 x i8>
315331; DEFAULT-NEXT: store <8 x i8> [[PHI5]], ptr addrspace(1) [[DST]], align 4
316332; DEFAULT-NEXT: ret void
@@ -377,6 +393,8 @@ define amdgpu_kernel void @repeat_successor(i32 %in, ptr addrspace(1) %src1, ptr
377393; FEATURE-NEXT: br label [[RETURN_SINK_SPLIT]]
378394; FEATURE: return.sink.split:
379395; FEATURE-NEXT: [[PHI5_TC:%.*]] = phi i32 [ [[VEC2_BC]], [[SW_BB5]] ], [ [[VEC1_BC]], [[ENTRY:%.*]] ], [ [[VEC1_BC]], [[ENTRY]] ]
396+ ; FEATURE-NEXT: [[PHI5_TC1:%.*]] = phi i32 [ [[VEC2_BC]], [[SW_BB5]] ], [ [[VEC1_BC]], [[ENTRY]] ], [ [[VEC1_BC]], [[ENTRY]] ]
397+ ; FEATURE-NEXT: [[PHI5_TC2:%.*]] = phi i32 [ [[VEC2_BC]], [[SW_BB5]] ], [ [[VEC1_BC]], [[ENTRY]] ], [ [[VEC1_BC]], [[ENTRY]] ]
380398; FEATURE-NEXT: [[PHI5_TC_BC:%.*]] = bitcast i32 [[PHI5_TC]] to <4 x i8>
381399; FEATURE-NEXT: store <4 x i8> [[PHI5_TC_BC]], ptr addrspace(1) [[DST]], align 4
382400; FEATURE-NEXT: ret void
@@ -402,6 +420,8 @@ define amdgpu_kernel void @repeat_successor(i32 %in, ptr addrspace(1) %src1, ptr
402420; DEFAULT-NEXT: br label [[RETURN_SINK_SPLIT]]
403421; DEFAULT: return.sink.split:
404422; DEFAULT-NEXT: [[PHI5_TC:%.*]] = phi i32 [ [[VEC2_BC]], [[SW_BB5]] ], [ [[VEC1_BC]], [[ENTRY:%.*]] ], [ [[VEC1_BC]], [[ENTRY]] ]
423+ ; DEFAULT-NEXT: [[PHI5_TC1:%.*]] = phi i32 [ [[VEC2_BC]], [[SW_BB5]] ], [ [[VEC1_BC]], [[ENTRY]] ], [ [[VEC1_BC]], [[ENTRY]] ]
424+ ; DEFAULT-NEXT: [[PHI5_TC2:%.*]] = phi i32 [ [[VEC2_BC]], [[SW_BB5]] ], [ [[VEC1_BC]], [[ENTRY]] ], [ [[VEC1_BC]], [[ENTRY]] ]
405425; DEFAULT-NEXT: [[PHI5:%.*]] = bitcast i32 [[PHI5_TC]] to <4 x i8>
406426; DEFAULT-NEXT: store <4 x i8> [[PHI5]], ptr addrspace(1) [[DST]], align 4
407427; DEFAULT-NEXT: ret void
@@ -476,11 +496,17 @@ define amdgpu_kernel void @v8i8_phi_chain(ptr addrspace(1) %src1, ptr addrspace(
476496; FEATURE-NEXT: br i1 [[CMP2]], label [[BB_2]], label [[BB_3:%.*]]
477497; FEATURE: bb.2:
478498; FEATURE-NEXT: [[PHI5_TC:%.*]] = phi <2 x i32> [ [[VEC1_BC]], [[ENTRY:%.*]] ], [ [[VEC2_BC]], [[BB_1]] ]
499+ ; FEATURE-NEXT: [[PHI5_TC1:%.*]] = phi <2 x i32> [ [[VEC1_BC]], [[ENTRY]] ], [ [[VEC2_BC]], [[BB_1]] ]
500+ ; FEATURE-NEXT: [[PHI5_TC3:%.*]] = phi <2 x i32> [ [[VEC1_BC]], [[ENTRY]] ], [ [[VEC2_BC]], [[BB_1]] ]
501+ ; FEATURE-NEXT: [[PHI5_TC5:%.*]] = phi <2 x i32> [ [[VEC1_BC]], [[ENTRY]] ], [ [[VEC2_BC]], [[BB_1]] ]
479502; FEATURE-NEXT: [[PHI5_TC_BC:%.*]] = bitcast <2 x i32> [[PHI5_TC]] to <8 x i8>
480503; FEATURE-NEXT: store <8 x i8> [[PHI5_TC_BC]], ptr addrspace(1) [[DST0]], align 4
481504; FEATURE-NEXT: br label [[BB_3]]
482505; FEATURE: bb.3:
483506; FEATURE-NEXT: [[PHI7_TC:%.*]] = phi <2 x i32> [ [[VEC2_BC]], [[BB_1]] ], [ [[PHI5_TC]], [[BB_2]] ]
507+ ; FEATURE-NEXT: [[PHI7_TC2:%.*]] = phi <2 x i32> [ [[VEC2_BC]], [[BB_1]] ], [ [[PHI5_TC1]], [[BB_2]] ]
508+ ; FEATURE-NEXT: [[PHI7_TC4:%.*]] = phi <2 x i32> [ [[VEC2_BC]], [[BB_1]] ], [ [[PHI5_TC3]], [[BB_2]] ]
509+ ; FEATURE-NEXT: [[PHI7_TC6:%.*]] = phi <2 x i32> [ [[VEC2_BC]], [[BB_1]] ], [ [[PHI5_TC5]], [[BB_2]] ]
484510; FEATURE-NEXT: [[PHI7_TC_BC:%.*]] = bitcast <2 x i32> [[PHI7_TC]] to <8 x i8>
485511; FEATURE-NEXT: store <8 x i8> [[PHI7_TC_BC]], ptr addrspace(1) [[DST1]], align 4
486512; FEATURE-NEXT: ret void
@@ -502,11 +528,17 @@ define amdgpu_kernel void @v8i8_phi_chain(ptr addrspace(1) %src1, ptr addrspace(
502528; DEFAULT-NEXT: br i1 [[CMP2]], label [[BB_2]], label [[BB_3:%.*]]
503529; DEFAULT: bb.2:
504530; DEFAULT-NEXT: [[PHI5_TC:%.*]] = phi <2 x i32> [ [[VEC1_BC]], [[ENTRY:%.*]] ], [ [[VEC2_BC]], [[BB_1]] ]
531+ ; DEFAULT-NEXT: [[PHI5_TC1:%.*]] = phi <2 x i32> [ [[VEC1_BC]], [[ENTRY]] ], [ [[VEC2_BC]], [[BB_1]] ]
532+ ; DEFAULT-NEXT: [[PHI5_TC3:%.*]] = phi <2 x i32> [ [[VEC1_BC]], [[ENTRY]] ], [ [[VEC2_BC]], [[BB_1]] ]
533+ ; DEFAULT-NEXT: [[PHI5_TC5:%.*]] = phi <2 x i32> [ [[VEC1_BC]], [[ENTRY]] ], [ [[VEC2_BC]], [[BB_1]] ]
505534; DEFAULT-NEXT: [[PHI5:%.*]] = bitcast <2 x i32> [[PHI5_TC]] to <8 x i8>
506535; DEFAULT-NEXT: store <8 x i8> [[PHI5]], ptr addrspace(1) [[DST0]], align 4
507536; DEFAULT-NEXT: br label [[BB_3]]
508537; DEFAULT: bb.3:
509538; DEFAULT-NEXT: [[PHI7_TC:%.*]] = phi <2 x i32> [ [[VEC2_BC]], [[BB_1]] ], [ [[PHI5_TC]], [[BB_2]] ]
539+ ; DEFAULT-NEXT: [[PHI7_TC2:%.*]] = phi <2 x i32> [ [[VEC2_BC]], [[BB_1]] ], [ [[PHI5_TC1]], [[BB_2]] ]
540+ ; DEFAULT-NEXT: [[PHI7_TC4:%.*]] = phi <2 x i32> [ [[VEC2_BC]], [[BB_1]] ], [ [[PHI5_TC3]], [[BB_2]] ]
541+ ; DEFAULT-NEXT: [[PHI7_TC6:%.*]] = phi <2 x i32> [ [[VEC2_BC]], [[BB_1]] ], [ [[PHI5_TC5]], [[BB_2]] ]
510542; DEFAULT-NEXT: [[PHI7:%.*]] = bitcast <2 x i32> [[PHI7_TC]] to <8 x i8>
511543; DEFAULT-NEXT: store <8 x i8> [[PHI7]], ptr addrspace(1) [[DST1]], align 4
512544; DEFAULT-NEXT: ret void
@@ -581,6 +613,8 @@ define amdgpu_kernel void @v8i8_multi_block(ptr addrspace(1) %src1, ptr addrspac
581613; FEATURE-NEXT: br label [[BB_3]]
582614; FEATURE: bb.3:
583615; FEATURE-NEXT: [[PHI5_TC:%.*]] = phi <2 x i32> [ [[VEC1_BC]], [[ENTRY:%.*]] ], [ [[VEC2_BC]], [[BB_1]] ], [ [[VEC2_BC]], [[BB_2]] ]
616+ ; FEATURE-NEXT: [[PHI5_TC1:%.*]] = phi <2 x i32> [ [[VEC1_BC]], [[ENTRY]] ], [ [[VEC2_BC]], [[BB_1]] ], [ [[VEC2_BC]], [[BB_2]] ]
617+ ; FEATURE-NEXT: [[PHI5_TC2:%.*]] = phi <2 x i32> [ [[VEC1_BC]], [[ENTRY]] ], [ [[VEC2_BC]], [[BB_1]] ], [ [[VEC2_BC]], [[BB_2]] ]
584618; FEATURE-NEXT: [[PHI5_TC_BC:%.*]] = bitcast <2 x i32> [[PHI5_TC]] to <8 x i8>
585619; FEATURE-NEXT: store <8 x i8> [[PHI5_TC_BC]], ptr addrspace(1) [[DST1]], align 4
586620; FEATURE-NEXT: ret void
@@ -606,6 +640,8 @@ define amdgpu_kernel void @v8i8_multi_block(ptr addrspace(1) %src1, ptr addrspac
606640; DEFAULT-NEXT: br label [[BB_3]]
607641; DEFAULT: bb.3:
608642; DEFAULT-NEXT: [[PHI5_TC:%.*]] = phi <2 x i32> [ [[VEC1_BC]], [[ENTRY:%.*]] ], [ [[VEC2_BC]], [[BB_1]] ], [ [[VEC2_BC]], [[BB_2]] ]
643+ ; DEFAULT-NEXT: [[PHI5_TC1:%.*]] = phi <2 x i32> [ [[VEC1_BC]], [[ENTRY]] ], [ [[VEC2_BC]], [[BB_1]] ], [ [[VEC2_BC]], [[BB_2]] ]
644+ ; DEFAULT-NEXT: [[PHI5_TC2:%.*]] = phi <2 x i32> [ [[VEC1_BC]], [[ENTRY]] ], [ [[VEC2_BC]], [[BB_1]] ], [ [[VEC2_BC]], [[BB_2]] ]
609645; DEFAULT-NEXT: [[PHI5:%.*]] = bitcast <2 x i32> [[PHI5_TC]] to <8 x i8>
610646; DEFAULT-NEXT: store <8 x i8> [[PHI5]], ptr addrspace(1) [[DST1]], align 4
611647; DEFAULT-NEXT: ret void
@@ -666,6 +702,8 @@ define amdgpu_kernel void @v32i8_loop_carried(ptr addrspace(1) %src1, ptr addrsp
666702; FEATURE-NEXT: br label [[BB_1:%.*]]
667703; FEATURE: bb.1:
668704; FEATURE-NEXT: [[TEMP_TC:%.*]] = phi i32 [ [[VEC1_BC]], [[ENTRY:%.*]] ], [ [[VEC2_BC:%.*]], [[BB_1]] ]
705+ ; FEATURE-NEXT: [[TEMP_TC1:%.*]] = phi i32 [ [[VEC1_BC]], [[ENTRY]] ], [ [[VEC2_BC]], [[BB_1]] ]
706+ ; FEATURE-NEXT: [[TEMP_TC2:%.*]] = phi i32 [ [[VEC1_BC]], [[ENTRY]] ], [ [[VEC2_BC]], [[BB_1]] ]
669707; FEATURE-NEXT: [[TEMP_TC_BC:%.*]] = bitcast i32 [[TEMP_TC]] to <4 x i8>
670708; FEATURE-NEXT: [[VEC1_BC_BC:%.*]] = bitcast i32 [[VEC1_BC]] to <4 x i8>
671709; FEATURE-NEXT: [[VEC2:%.*]] = shufflevector <4 x i8> [[VEC1_BC_BC]], <4 x i8> [[TEMP_TC_BC]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
@@ -689,6 +727,8 @@ define amdgpu_kernel void @v32i8_loop_carried(ptr addrspace(1) %src1, ptr addrsp
689727; DEFAULT-NEXT: br label [[BB_1:%.*]]
690728; DEFAULT: bb.1:
691729; DEFAULT-NEXT: [[TEMP_TC:%.*]] = phi i32 [ [[VEC1_BC]], [[ENTRY:%.*]] ], [ [[VEC2_BC:%.*]], [[BB_1]] ]
730+ ; DEFAULT-NEXT: [[TEMP_TC1:%.*]] = phi i32 [ [[VEC1_BC]], [[ENTRY]] ], [ [[VEC2_BC]], [[BB_1]] ]
731+ ; DEFAULT-NEXT: [[TEMP_TC2:%.*]] = phi i32 [ [[VEC1_BC]], [[ENTRY]] ], [ [[VEC2_BC]], [[BB_1]] ]
692732; DEFAULT-NEXT: [[TEMP_TC_BC:%.*]] = bitcast i32 [[TEMP_TC]] to <4 x i8>
693733; DEFAULT-NEXT: [[VEC1_BC_BC:%.*]] = bitcast i32 [[VEC1_BC]] to <4 x i8>
694734; DEFAULT-NEXT: [[VEC3:%.*]] = shufflevector <4 x i8> [[VEC1_BC_BC]], <4 x i8> [[TEMP_TC_BC]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
0 commit comments