diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp index 92e47cbc7ae8b..90c18c0b9c01c 100644 --- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -8,6 +8,7 @@ #include "llvm/Transforms/Scalar/StructurizeCFG.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/EquivalenceClasses.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/STLExtras.h" @@ -325,6 +326,10 @@ class StructurizeCFG { void findUndefBlocks(BasicBlock *PHIBlock, const SmallSet &Incomings, SmallVector &UndefBlks) const; + + void mergeIfCompatible(EquivalenceClasses &PhiClasses, PHINode *A, + PHINode *B); + void setPhiValues(); void simplifyAffectedPhis(); @@ -755,10 +760,102 @@ void StructurizeCFG::findUndefBlocks( } } +// If two phi nodes have compatible incoming values (for each +// incoming block, either they have the same incoming value or only one phi +// node has an incoming value), let them share the merged incoming values. The +// merge process is guided by the equivalence information from \p PhiClasses. +// The function will possibly update the incoming values of leader phi in +// DeletedPhis. +void StructurizeCFG::mergeIfCompatible( + EquivalenceClasses &PhiClasses, PHINode *A, PHINode *B) { + auto ItA = PhiClasses.findLeader(PhiClasses.insert(A)); + auto ItB = PhiClasses.findLeader(PhiClasses.insert(B)); + // They are already in the same class, no work needed. + if (ItA == ItB) + return; + + PHINode *LeaderA = *ItA; + PHINode *LeaderB = *ItB; + BBValueVector &IncomingA = DeletedPhis[LeaderA->getParent()][LeaderA]; + BBValueVector &IncomingB = DeletedPhis[LeaderB->getParent()][LeaderB]; + + DenseMap Mergeable(IncomingA.begin(), IncomingA.end()); + for (auto [BB, V] : IncomingB) { + auto BBIt = Mergeable.find(BB); + if (BBIt != Mergeable.end() && BBIt->second != V) + return; + // Either IncomingA does not have this value or IncomingA has the same + // value. + Mergeable.insert({BB, V}); + } + + // Update the incoming value of leaderA. + IncomingA.assign(Mergeable.begin(), Mergeable.end()); + PhiClasses.unionSets(ItA, ItB); +} + /// Add the real PHI value as soon as everything is set up void StructurizeCFG::setPhiValues() { SmallVector InsertedPhis; SSAUpdater Updater(&InsertedPhis); + DenseMap> UndefBlksMap; + + // Find phi nodes that have compatible incoming values (either they have + // the same value for the same block or only one phi node has an incoming + // value, see example below). We only search again the phi's that are + // referenced by another phi, which is the case we care about. + // + // For example (-- means no incoming value): + // phi1 : BB1:phi2 BB2:v BB3:-- + // phi2: BB1:-- BB2:v BB3:w + // + // Then we can merge these incoming values and let phi1, phi2 use the + // same set of incoming values: + // + // phi1&phi2: BB1:phi2 BB2:v BB3:w + // + // By doing this, phi1 and phi2 would share more intermediate phi nodes. + // This would help reduce the number of phi nodes during SSA reconstruction + // and ultimately result in fewer COPY instructions. + // + // This should be correct, because if a phi node does not have incoming + // value from certain block, this means the block is not the predecessor + // of the parent block, so we actually don't care about its incoming value. + EquivalenceClasses PhiClasses; + for (const auto &[To, From] : AddedPhis) { + auto OldPhiIt = DeletedPhis.find(To); + if (OldPhiIt == DeletedPhis.end()) + continue; + + PhiMap &BlkPhis = OldPhiIt->second; + SmallVector &UndefBlks = UndefBlksMap[To]; + SmallSet Incomings; + + // Get the undefined blocks shared by all the phi nodes. + if (!BlkPhis.empty()) { + for (const auto &VI : BlkPhis.front().second) + Incomings.insert(VI.first); + findUndefBlocks(To, Incomings, UndefBlks); + } + + for (const auto &[Phi, Incomings] : OldPhiIt->second) { + SmallVector IncomingPHIs; + for (const auto &[BB, V] : Incomings) { + // First, for each phi, check whether it has incoming value which is + // another phi. + if (PHINode *P = dyn_cast(V)) + IncomingPHIs.push_back(P); + } + + for (auto *OtherPhi : IncomingPHIs) { + // Skip phis that are unrelated to the phi reconstruction for now. + if (!DeletedPhis.contains(OtherPhi->getParent())) + continue; + mergeIfCompatible(PhiClasses, Phi, OtherPhi); + } + } + } + for (const auto &AddedPhi : AddedPhis) { BasicBlock *To = AddedPhi.first; const BBVector &From = AddedPhi.second; @@ -766,28 +863,27 @@ void StructurizeCFG::setPhiValues() { if (!DeletedPhis.count(To)) continue; - SmallVector UndefBlks; - bool CachedUndefs = false; PhiMap &Map = DeletedPhis[To]; - for (const auto &PI : Map) { - PHINode *Phi = PI.first; + SmallVector &UndefBlks = UndefBlksMap[To]; + for (const auto &[Phi, Incoming] : Map) { Value *Undef = UndefValue::get(Phi->getType()); Updater.Initialize(Phi->getType(), ""); Updater.AddAvailableValue(&Func->getEntryBlock(), Undef); Updater.AddAvailableValue(To, Undef); - SmallSet Incomings; - SmallVector ConstantPreds; - for (const auto &VI : PI.second) { - Incomings.insert(VI.first); - Updater.AddAvailableValue(VI.first, VI.second); - if (isa(VI.second)) - ConstantPreds.push_back(VI.first); - } + // Use leader phi's incoming if there is. + auto LeaderIt = PhiClasses.findLeader(Phi); + bool UseIncomingOfLeader = + LeaderIt != PhiClasses.member_end() && *LeaderIt != Phi; + const auto &IncomingMap = + UseIncomingOfLeader ? DeletedPhis[(*LeaderIt)->getParent()][*LeaderIt] + : Incoming; - if (!CachedUndefs) { - findUndefBlocks(To, Incomings, UndefBlks); - CachedUndefs = true; + SmallVector ConstantPreds; + for (const auto &[BB, V] : IncomingMap) { + Updater.AddAvailableValue(BB, V); + if (isa(V)) + ConstantPreds.push_back(BB); } for (auto UB : UndefBlks) { @@ -798,6 +894,10 @@ void StructurizeCFG::setPhiValues() { if (any_of(ConstantPreds, [&](BasicBlock *CP) { return DT->dominates(CP, UB); })) continue; + // Maybe already get a value through sharing with other phi nodes. + if (Updater.HasValueForBlock(UB)) + continue; + Updater.AddAvailableValue(UB, Undef); } @@ -805,10 +905,7 @@ void StructurizeCFG::setPhiValues() { Phi->setIncomingValueForBlock(FI, Updater.GetValueAtEndOfBlock(FI)); AffectedPhis.push_back(Phi); } - - DeletedPhis.erase(To); } - assert(DeletedPhis.empty()); AffectedPhis.append(InsertedPhis.begin(), InsertedPhis.end()); } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll index b27d8fdc24ff7..935200d595307 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll @@ -298,7 +298,7 @@ define void @divergent_i1_icmp_used_outside_loop(i32 %v0, i32 %v1, ptr addrspace ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_mov_b32 s5, 0 ; GFX10-NEXT: ; implicit-def: $sgpr6 -; GFX10-NEXT: v_mov_b32_e32 v5, s5 +; GFX10-NEXT: v_mov_b32_e32 v4, s5 ; GFX10-NEXT: s_branch .LBB4_2 ; GFX10-NEXT: .LBB4_1: ; %Flow ; GFX10-NEXT: ; in Loop: Header=BB4_2 Depth=1 @@ -312,7 +312,6 @@ define void @divergent_i1_icmp_used_outside_loop(i32 %v0, i32 %v1, ptr addrspace ; GFX10-NEXT: s_cbranch_execz .LBB4_6 ; GFX10-NEXT: .LBB4_2: ; %cond.block.0 ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_mov_b32_e32 v4, v5 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v4 ; GFX10-NEXT: s_and_saveexec_b32 s7, vcc_lo ; GFX10-NEXT: s_cbranch_execz .LBB4_4 @@ -329,12 +328,11 @@ define void @divergent_i1_icmp_used_outside_loop(i32 %v0, i32 %v1, ptr addrspace ; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s7 ; GFX10-NEXT: v_cmp_ne_u32_e64 s4, v1, v4 ; GFX10-NEXT: s_mov_b32 s7, -1 -; GFX10-NEXT: ; implicit-def: $vgpr5 ; GFX10-NEXT: s_and_saveexec_b32 s8, s4 ; GFX10-NEXT: s_cbranch_execz .LBB4_1 ; GFX10-NEXT: ; %bb.5: ; %loop.cond ; GFX10-NEXT: ; in Loop: Header=BB4_2 Depth=1 -; GFX10-NEXT: v_add_nc_u32_e32 v5, 1, v4 +; GFX10-NEXT: v_add_nc_u32_e32 v4, 1, v4 ; GFX10-NEXT: s_andn2_b32 s4, -1, exec_lo ; GFX10-NEXT: s_and_b32 s7, exec_lo, 0 ; GFX10-NEXT: s_or_b32 s7, s4, s7 diff --git a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll index 49b450a9af0bc..4d26453e1a0d6 100644 --- a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll +++ b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll @@ -576,11 +576,11 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg ; GFX908-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[0:1] ; GFX908-NEXT: v_mov_b32_e32 v4, s8 ; GFX908-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v6 -; GFX908-NEXT: v_mov_b32_e32 v8, s8 ; GFX908-NEXT: v_mov_b32_e32 v6, s8 +; GFX908-NEXT: v_mov_b32_e32 v8, s8 ; GFX908-NEXT: v_mov_b32_e32 v5, s9 -; GFX908-NEXT: v_mov_b32_e32 v9, s9 ; GFX908-NEXT: v_mov_b32_e32 v7, s9 +; GFX908-NEXT: v_mov_b32_e32 v9, s9 ; GFX908-NEXT: v_cmp_lt_i64_e64 s[16:17], s[4:5], 0 ; GFX908-NEXT: v_mov_b32_e32 v11, v5 ; GFX908-NEXT: s_mov_b64 s[18:19], s[10:11] @@ -641,10 +641,10 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg ; GFX908-NEXT: v_add_f32_e32 v12, v20, v12 ; GFX908-NEXT: v_add_f32_e32 v5, v5, v25 ; GFX908-NEXT: v_add_f32_e32 v4, v4, v24 -; GFX908-NEXT: v_add_f32_e32 v9, v9, v27 -; GFX908-NEXT: v_add_f32_e32 v8, v8, v26 -; GFX908-NEXT: v_add_f32_e32 v6, v6, v14 -; GFX908-NEXT: v_add_f32_e32 v7, v7, v15 +; GFX908-NEXT: v_add_f32_e32 v7, v7, v27 +; GFX908-NEXT: v_add_f32_e32 v6, v6, v26 +; GFX908-NEXT: v_add_f32_e32 v8, v8, v14 +; GFX908-NEXT: v_add_f32_e32 v9, v9, v15 ; GFX908-NEXT: v_add_f32_e32 v10, v10, v12 ; GFX908-NEXT: v_add_f32_e32 v11, v11, v13 ; GFX908-NEXT: s_mov_b64 s[20:21], -1 @@ -654,10 +654,6 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg ; GFX908-NEXT: s_andn2_b64 vcc, exec, s[20:21] ; GFX908-NEXT: s_cbranch_vccz .LBB3_4 ; GFX908-NEXT: ; %bb.8: ; in Loop: Header=BB3_2 Depth=1 -; GFX908-NEXT: ; implicit-def: $vgpr10_vgpr11 -; GFX908-NEXT: ; implicit-def: $vgpr6_vgpr7 -; GFX908-NEXT: ; implicit-def: $vgpr8_vgpr9 -; GFX908-NEXT: ; implicit-def: $vgpr4_vgpr5 ; GFX908-NEXT: ; implicit-def: $vgpr2_vgpr3 ; GFX908-NEXT: ; implicit-def: $sgpr18_sgpr19 ; GFX908-NEXT: .LBB3_9: ; %loop.exit.guard @@ -743,8 +739,8 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg ; GFX90A-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[0:1] ; GFX90A-NEXT: v_pk_mov_b32 v[6:7], s[8:9], s[8:9] op_sel:[0,1] ; GFX90A-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v8 -; GFX90A-NEXT: v_pk_mov_b32 v[10:11], s[8:9], s[8:9] op_sel:[0,1] ; GFX90A-NEXT: v_pk_mov_b32 v[8:9], s[8:9], s[8:9] op_sel:[0,1] +; GFX90A-NEXT: v_pk_mov_b32 v[10:11], s[8:9], s[8:9] op_sel:[0,1] ; GFX90A-NEXT: v_cmp_lt_i64_e64 s[16:17], s[4:5], 0 ; GFX90A-NEXT: s_mov_b64 s[18:19], s[10:11] ; GFX90A-NEXT: v_pk_mov_b32 v[12:13], v[6:7], v[6:7] op_sel:[0,1] @@ -800,8 +796,8 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg ; GFX90A-NEXT: v_pk_add_f32 v[16:17], v[22:23], v[16:17] ; GFX90A-NEXT: v_pk_add_f32 v[14:15], v[20:21], v[14:15] ; GFX90A-NEXT: v_pk_add_f32 v[6:7], v[6:7], v[24:25] -; GFX90A-NEXT: v_pk_add_f32 v[10:11], v[10:11], v[26:27] -; GFX90A-NEXT: v_pk_add_f32 v[8:9], v[8:9], v[16:17] +; GFX90A-NEXT: v_pk_add_f32 v[8:9], v[8:9], v[26:27] +; GFX90A-NEXT: v_pk_add_f32 v[10:11], v[10:11], v[16:17] ; GFX90A-NEXT: v_pk_add_f32 v[12:13], v[12:13], v[14:15] ; GFX90A-NEXT: s_mov_b64 s[20:21], -1 ; GFX90A-NEXT: s_branch .LBB3_4 @@ -810,10 +806,6 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg ; GFX90A-NEXT: s_andn2_b64 vcc, exec, s[20:21] ; GFX90A-NEXT: s_cbranch_vccz .LBB3_4 ; GFX90A-NEXT: ; %bb.8: ; in Loop: Header=BB3_2 Depth=1 -; GFX90A-NEXT: ; implicit-def: $vgpr12_vgpr13 -; GFX90A-NEXT: ; implicit-def: $vgpr8_vgpr9 -; GFX90A-NEXT: ; implicit-def: $vgpr10_vgpr11 -; GFX90A-NEXT: ; implicit-def: $vgpr6_vgpr7 ; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5 ; GFX90A-NEXT: ; implicit-def: $sgpr18_sgpr19 ; GFX90A-NEXT: .LBB3_9: ; %loop.exit.guard diff --git a/llvm/test/CodeGen/AMDGPU/while-break.ll b/llvm/test/CodeGen/AMDGPU/while-break.ll index 46254994580d2..9bb8a2f9f0282 100644 --- a/llvm/test/CodeGen/AMDGPU/while-break.ll +++ b/llvm/test/CodeGen/AMDGPU/while-break.ll @@ -162,8 +162,8 @@ define amdgpu_ps < 2 x float> @while_break_two_chains_of_phi(float %v, i32 %x, i ; GCN-NEXT: s_branch .LBB2_2 ; GCN-NEXT: .LBB2_1: ; %Flow1 ; GCN-NEXT: ; in Loop: Header=BB2_2 Depth=1 -; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s1 -; GCN-NEXT: s_and_b32 s1, exec_lo, s4 +; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s4 +; GCN-NEXT: s_and_b32 s1, exec_lo, s1 ; GCN-NEXT: s_or_b32 s2, s1, s2 ; GCN-NEXT: s_andn2_b32 exec_lo, exec_lo, s2 ; GCN-NEXT: s_cbranch_execz .LBB2_6 @@ -190,20 +190,17 @@ define amdgpu_ps < 2 x float> @while_break_two_chains_of_phi(float %v, i32 %x, i ; GCN-NEXT: .LBB2_4: ; %Flow ; GCN-NEXT: ; in Loop: Header=BB2_2 Depth=1 ; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s4 -; GCN-NEXT: v_mov_b32_e32 v7, v6 -; GCN-NEXT: s_mov_b32 s4, -1 -; GCN-NEXT: s_and_saveexec_b32 s1, s3 +; GCN-NEXT: s_mov_b32 s1, -1 +; GCN-NEXT: s_and_saveexec_b32 s4, s3 ; GCN-NEXT: s_cbranch_execz .LBB2_1 ; GCN-NEXT: ; %bb.5: ; %latch ; GCN-NEXT: ; in Loop: Header=BB2_2 Depth=1 ; GCN-NEXT: v_cmp_lt_i32_e32 vcc_lo, s0, v3 -; GCN-NEXT: v_mov_b32_e32 v7, v0 ; GCN-NEXT: s_add_i32 s0, s0, 1 -; GCN-NEXT: s_orn2_b32 s4, vcc_lo, exec_lo +; GCN-NEXT: s_orn2_b32 s1, vcc_lo, exec_lo ; GCN-NEXT: s_branch .LBB2_1 ; GCN-NEXT: .LBB2_6: ; %end ; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s2 -; GCN-NEXT: v_mov_b32_e32 v0, v7 ; GCN-NEXT: v_mov_b32_e32 v1, v6 ; GCN-NEXT: ; return to shader part epilog entry: diff --git a/llvm/test/Transforms/StructurizeCFG/AMDGPU/loop-subregion-misordered.ll b/llvm/test/Transforms/StructurizeCFG/AMDGPU/loop-subregion-misordered.ll index 10a3e65e5f57d..385e37e2750d1 100644 --- a/llvm/test/Transforms/StructurizeCFG/AMDGPU/loop-subregion-misordered.ll +++ b/llvm/test/Transforms/StructurizeCFG/AMDGPU/loop-subregion-misordered.ll @@ -28,7 +28,7 @@ define amdgpu_kernel void @loop_subregion_misordered(ptr addrspace(1) %arg0) #0 ; CHECK-NEXT: [[I_INITIAL:%.*]] = load volatile i32, ptr addrspace(1) [[GEP]], align 4 ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: LOOP.HEADER: -; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INITIAL]], [[ENTRY:%.*]] ], [ [[TMP5:%.*]], [[FLOW3:%.*]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INITIAL]], [[ENTRY:%.*]] ], [ [[TMP3:%.*]], [[FLOW3:%.*]] ] ; CHECK-NEXT: call void asm sideeffect "s_nop 0x100b ; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[I]] to i64 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) null, i64 [[TMP12]] @@ -49,8 +49,8 @@ define amdgpu_kernel void @loop_subregion_misordered(ptr addrspace(1) %arg0) #0 ; CHECK-NEXT: [[TMP25:%.*]] = mul nuw nsw i32 [[TMP24]], 52 ; CHECK-NEXT: br label [[INNER_LOOP:%.*]] ; CHECK: Flow2: -; CHECK-NEXT: [[TMP3:%.*]] = phi i32 [ [[TMP59:%.*]], [[INNER_LOOP_BREAK:%.*]] ], [ [[TMP7:%.*]], [[FLOW]] ] -; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ true, [[INNER_LOOP_BREAK]] ], [ [[TMP9:%.*]], [[FLOW]] ] +; CHECK-NEXT: [[TMP3]] = phi i32 [ [[TMP59:%.*]], [[INNER_LOOP_BREAK:%.*]] ], [ [[TMP6:%.*]], [[FLOW]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ true, [[INNER_LOOP_BREAK]] ], [ [[TMP8:%.*]], [[FLOW]] ] ; CHECK-NEXT: br i1 [[TMP4]], label [[END_ELSE_BLOCK:%.*]], label [[FLOW3]] ; CHECK: INNER_LOOP: ; CHECK-NEXT: [[INNER_LOOP_J:%.*]] = phi i32 [ [[INNER_LOOP_J_INC:%.*]], [[INNER_LOOP]] ], [ [[TMP25]], [[BB18:%.*]] ] @@ -66,20 +66,19 @@ define amdgpu_kernel void @loop_subregion_misordered(ptr addrspace(1) %arg0) #0 ; CHECK-NEXT: [[LOAD13:%.*]] = icmp uge i32 [[TMP16]], 271 ; CHECK-NEXT: br i1 [[LOAD13]], label [[INCREMENT_I]], label [[FLOW1:%.*]] ; CHECK: Flow3: -; CHECK-NEXT: [[TMP5]] = phi i32 [ [[TMP3]], [[END_ELSE_BLOCK]] ], [ undef, [[FLOW2]] ] -; CHECK-NEXT: [[TMP6:%.*]] = phi i1 [ [[CMP_END_ELSE_BLOCK:%.*]], [[END_ELSE_BLOCK]] ], [ true, [[FLOW2]] ] -; CHECK-NEXT: br i1 [[TMP6]], label [[FLOW4:%.*]], label [[LOOP_HEADER]] +; CHECK-NEXT: [[TMP5:%.*]] = phi i1 [ [[CMP_END_ELSE_BLOCK:%.*]], [[END_ELSE_BLOCK]] ], [ true, [[FLOW2]] ] +; CHECK-NEXT: br i1 [[TMP5]], label [[FLOW4:%.*]], label [[LOOP_HEADER]] ; CHECK: Flow4: -; CHECK-NEXT: br i1 [[TMP8:%.*]], label [[BB64:%.*]], label [[RETURN:%.*]] +; CHECK-NEXT: br i1 [[TMP7:%.*]], label [[BB64:%.*]], label [[RETURN:%.*]] ; CHECK: bb64: ; CHECK-NEXT: call void asm sideeffect "s_nop 42", "~{memory}"() #[[ATTR0]] ; CHECK-NEXT: br label [[RETURN]] ; CHECK: Flow: -; CHECK-NEXT: [[TMP7]] = phi i32 [ [[TMP0]], [[FLOW1]] ], [ undef, [[LOOP_HEADER]] ] -; CHECK-NEXT: [[TMP8]] = phi i1 [ [[TMP1]], [[FLOW1]] ], [ false, [[LOOP_HEADER]] ] -; CHECK-NEXT: [[TMP9]] = phi i1 [ [[TMP2]], [[FLOW1]] ], [ false, [[LOOP_HEADER]] ] -; CHECK-NEXT: [[TMP10:%.*]] = phi i1 [ false, [[FLOW1]] ], [ true, [[LOOP_HEADER]] ] -; CHECK-NEXT: br i1 [[TMP10]], label [[BB18]], label [[FLOW2]] +; CHECK-NEXT: [[TMP6]] = phi i32 [ [[TMP0]], [[FLOW1]] ], [ undef, [[LOOP_HEADER]] ] +; CHECK-NEXT: [[TMP7]] = phi i1 [ [[TMP1]], [[FLOW1]] ], [ false, [[LOOP_HEADER]] ] +; CHECK-NEXT: [[TMP8]] = phi i1 [ [[TMP2]], [[FLOW1]] ], [ false, [[LOOP_HEADER]] ] +; CHECK-NEXT: [[TMP9:%.*]] = phi i1 [ false, [[FLOW1]] ], [ true, [[LOOP_HEADER]] ] +; CHECK-NEXT: br i1 [[TMP9]], label [[BB18]], label [[FLOW2]] ; CHECK: INCREMENT_I: ; CHECK-NEXT: [[INC_I]] = add i32 [[I]], 1 ; CHECK-NEXT: call void asm sideeffect "s_nop 0x1336 diff --git a/llvm/test/Transforms/StructurizeCFG/loop-break-phi.ll b/llvm/test/Transforms/StructurizeCFG/loop-break-phi.ll index c832b7d1394a8..46881ec827286 100644 --- a/llvm/test/Transforms/StructurizeCFG/loop-break-phi.ll +++ b/llvm/test/Transforms/StructurizeCFG/loop-break-phi.ll @@ -7,8 +7,8 @@ define float @while_break(i32 %z, float %v, i32 %x, i32 %y) #0 { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br label %[[HEADER:.*]] ; CHECK: [[HEADER]]: -; CHECK-NEXT: [[V_1:%.*]] = phi float [ [[V]], %[[ENTRY]] ], [ [[TMP7:%.*]], %[[FLOW2:.*]] ] -; CHECK-NEXT: [[IND:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[TMP6:%.*]], %[[FLOW2]] ] +; CHECK-NEXT: [[V_1:%.*]] = phi float [ [[V]], %[[ENTRY]] ], [ [[TMP8:%.*]], %[[FLOW2:.*]] ] +; CHECK-NEXT: [[IND:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[TMP5:%.*]], %[[FLOW2]] ] ; CHECK-NEXT: [[CC:%.*]] = icmp sge i32 [[IND]], [[X]] ; CHECK-NEXT: br i1 [[CC]], label %[[ELSE:.*]], label %[[FLOW:.*]] ; CHECK: [[FLOW]]: @@ -23,20 +23,17 @@ define float @while_break(i32 %z, float %v, i32 %x, i32 %y) #0 { ; CHECK-NEXT: [[CC2]] = icmp slt i32 [[IND]], [[Y]] ; CHECK-NEXT: br label %[[FLOW]] ; CHECK: [[FLOW1]]: -; CHECK-NEXT: [[TMP3:%.*]] = phi float [ undef, %[[IF]] ], [ [[TMP0]], %[[FLOW]] ] -; CHECK-NEXT: [[TMP4:%.*]] = phi float [ [[V_IF]], %[[IF]] ], [ [[TMP0]], %[[FLOW]] ] -; CHECK-NEXT: [[TMP5:%.*]] = phi i1 [ true, %[[IF]] ], [ [[TMP1]], %[[FLOW]] ] -; CHECK-NEXT: br i1 [[TMP5]], label %[[LATCH:.*]], label %[[FLOW2]] +; CHECK-NEXT: [[TMP8]] = phi float [ [[V_IF]], %[[IF]] ], [ [[TMP0]], %[[FLOW]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ true, %[[IF]] ], [ [[TMP1]], %[[FLOW]] ] +; CHECK-NEXT: br i1 [[TMP4]], label %[[LATCH:.*]], label %[[FLOW2]] ; CHECK: [[LATCH]]: ; CHECK-NEXT: [[IND_INC:%.*]] = add i32 [[IND]], 1 ; CHECK-NEXT: [[CC3:%.*]] = icmp slt i32 [[IND]], [[Z]] ; CHECK-NEXT: br label %[[FLOW2]] ; CHECK: [[FLOW2]]: -; CHECK-NEXT: [[TMP6]] = phi i32 [ [[IND_INC]], %[[LATCH]] ], [ undef, %[[FLOW1]] ] -; CHECK-NEXT: [[TMP7]] = phi float [ [[TMP4]], %[[LATCH]] ], [ undef, %[[FLOW1]] ] -; CHECK-NEXT: [[TMP8:%.*]] = phi float [ [[TMP4]], %[[LATCH]] ], [ [[TMP3]], %[[FLOW1]] ] -; CHECK-NEXT: [[TMP9:%.*]] = phi i1 [ [[CC3]], %[[LATCH]] ], [ true, %[[FLOW1]] ] -; CHECK-NEXT: br i1 [[TMP9]], label %[[END:.*]], label %[[HEADER]] +; CHECK-NEXT: [[TMP5]] = phi i32 [ [[IND_INC]], %[[LATCH]] ], [ undef, %[[FLOW1]] ] +; CHECK-NEXT: [[TMP6:%.*]] = phi i1 [ [[CC3]], %[[LATCH]] ], [ true, %[[FLOW1]] ] +; CHECK-NEXT: br i1 [[TMP6]], label %[[END:.*]], label %[[HEADER]] ; CHECK: [[END]]: ; CHECK-NEXT: ret float [[TMP8]] ; @@ -75,8 +72,8 @@ define float @while_break2(i32 %z, float %v, i32 %x, i32 %y) #0 { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br label %[[HEADER:.*]] ; CHECK: [[HEADER]]: -; CHECK-NEXT: [[V_1:%.*]] = phi float [ [[V]], %[[ENTRY]] ], [ [[TMP7:%.*]], %[[FLOW2:.*]] ] -; CHECK-NEXT: [[IND:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[TMP6:%.*]], %[[FLOW2]] ] +; CHECK-NEXT: [[V_1:%.*]] = phi float [ [[V]], %[[ENTRY]] ], [ [[TMP8:%.*]], %[[FLOW2:.*]] ] +; CHECK-NEXT: [[IND:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[TMP5:%.*]], %[[FLOW2]] ] ; CHECK-NEXT: [[CC:%.*]] = icmp sge i32 [[IND]], [[X]] ; CHECK-NEXT: br i1 [[CC]], label %[[IF:.*]], label %[[FLOW:.*]] ; CHECK: [[IF]]: @@ -91,20 +88,17 @@ define float @while_break2(i32 %z, float %v, i32 %x, i32 %y) #0 { ; CHECK-NEXT: [[CC2:%.*]] = icmp slt i32 [[IND]], [[Y]] ; CHECK-NEXT: br label %[[FLOW1]] ; CHECK: [[FLOW1]]: -; CHECK-NEXT: [[TMP3:%.*]] = phi float [ [[V_1]], %[[ELSE]] ], [ undef, %[[FLOW]] ] -; CHECK-NEXT: [[TMP4:%.*]] = phi float [ [[V_1]], %[[ELSE]] ], [ [[TMP0]], %[[FLOW]] ] -; CHECK-NEXT: [[TMP5:%.*]] = phi i1 [ [[CC2]], %[[ELSE]] ], [ [[TMP1]], %[[FLOW]] ] -; CHECK-NEXT: br i1 [[TMP5]], label %[[LATCH:.*]], label %[[FLOW2]] +; CHECK-NEXT: [[TMP8]] = phi float [ [[V_1]], %[[ELSE]] ], [ [[TMP0]], %[[FLOW]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ [[CC2]], %[[ELSE]] ], [ [[TMP1]], %[[FLOW]] ] +; CHECK-NEXT: br i1 [[TMP4]], label %[[LATCH:.*]], label %[[FLOW2]] ; CHECK: [[LATCH]]: ; CHECK-NEXT: [[IND_INC:%.*]] = add i32 [[IND]], 1 ; CHECK-NEXT: [[CC3:%.*]] = icmp slt i32 [[IND]], [[Z]] ; CHECK-NEXT: br label %[[FLOW2]] ; CHECK: [[FLOW2]]: -; CHECK-NEXT: [[TMP6]] = phi i32 [ [[IND_INC]], %[[LATCH]] ], [ undef, %[[FLOW1]] ] -; CHECK-NEXT: [[TMP7]] = phi float [ [[TMP4]], %[[LATCH]] ], [ undef, %[[FLOW1]] ] -; CHECK-NEXT: [[TMP8:%.*]] = phi float [ [[TMP4]], %[[LATCH]] ], [ [[TMP3]], %[[FLOW1]] ] -; CHECK-NEXT: [[TMP9:%.*]] = phi i1 [ [[CC3]], %[[LATCH]] ], [ true, %[[FLOW1]] ] -; CHECK-NEXT: br i1 [[TMP9]], label %[[END:.*]], label %[[HEADER]] +; CHECK-NEXT: [[TMP5]] = phi i32 [ [[IND_INC]], %[[LATCH]] ], [ undef, %[[FLOW1]] ] +; CHECK-NEXT: [[TMP6:%.*]] = phi i1 [ [[CC3]], %[[LATCH]] ], [ true, %[[FLOW1]] ] +; CHECK-NEXT: br i1 [[TMP6]], label %[[END:.*]], label %[[HEADER]] ; CHECK: [[END]]: ; CHECK-NEXT: ret float [[TMP8]] ; @@ -143,9 +137,9 @@ define < 2 x float> @while_break_two_chains_of_phi(float %v, i32 %x, i32 %y, i32 ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br label %[[HEADER:.*]] ; CHECK: [[HEADER]]: -; CHECK-NEXT: [[V_1:%.*]] = phi float [ [[V]], %[[ENTRY]] ], [ [[TMP6:%.*]], %[[FLOW1:.*]] ] -; CHECK-NEXT: [[V_COPY:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP5:%.*]], %[[FLOW1]] ] -; CHECK-NEXT: [[IND:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[TMP4:%.*]], %[[FLOW1]] ] +; CHECK-NEXT: [[V_1:%.*]] = phi float [ [[V]], %[[ENTRY]] ], [ [[TMP8:%.*]], %[[FLOW1:.*]] ] +; CHECK-NEXT: [[V_COPY:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP7:%.*]], %[[FLOW1]] ] +; CHECK-NEXT: [[IND:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[TMP3:%.*]], %[[FLOW1]] ] ; CHECK-NEXT: [[CC:%.*]] = icmp slt i32 [[IND]], [[X]] ; CHECK-NEXT: [[CC_INV:%.*]] = xor i1 [[CC]], true ; CHECK-NEXT: br i1 [[CC]], label %[[IF:.*]], label %[[FLOW:.*]] @@ -156,23 +150,18 @@ define < 2 x float> @while_break_two_chains_of_phi(float %v, i32 %x, i32 %y, i32 ; CHECK-NEXT: [[CC2:%.*]] = icmp slt i32 [[IND]], [[Y]] ; CHECK-NEXT: br label %[[FLOW]] ; CHECK: [[FLOW]]: -; CHECK-NEXT: [[TMP0:%.*]] = phi float [ [[V_IF]], %[[IF]] ], [ undef, %[[HEADER]] ] -; CHECK-NEXT: [[TMP1:%.*]] = phi float [ [[V_IF]], %[[IF]] ], [ [[V_COPY]], %[[HEADER]] ] -; CHECK-NEXT: [[TMP2:%.*]] = phi float [ [[V_IF]], %[[IF]] ], [ [[V_1]], %[[HEADER]] ] -; CHECK-NEXT: [[TMP3:%.*]] = phi i1 [ [[CC2]], %[[IF]] ], [ [[CC_INV]], %[[HEADER]] ] -; CHECK-NEXT: br i1 [[TMP3]], label %[[LATCH:.*]], label %[[FLOW1]] +; CHECK-NEXT: [[TMP7]] = phi float [ [[V_IF]], %[[IF]] ], [ [[V_COPY]], %[[HEADER]] ] +; CHECK-NEXT: [[TMP8]] = phi float [ [[V_IF]], %[[IF]] ], [ [[V_1]], %[[HEADER]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi i1 [ [[CC2]], %[[IF]] ], [ [[CC_INV]], %[[HEADER]] ] +; CHECK-NEXT: br i1 [[TMP2]], label %[[LATCH:.*]], label %[[FLOW1]] ; CHECK: [[LATCH]]: ; CHECK-NEXT: [[IND_INC:%.*]] = add i32 [[IND]], 1 ; CHECK-NEXT: [[CC3:%.*]] = icmp slt i32 [[IND]], [[Z]] ; CHECK-NEXT: br label %[[FLOW1]] ; CHECK: [[FLOW1]]: -; CHECK-NEXT: [[TMP4]] = phi i32 [ [[IND_INC]], %[[LATCH]] ], [ undef, %[[FLOW]] ] -; CHECK-NEXT: [[TMP5]] = phi float [ [[TMP1]], %[[LATCH]] ], [ undef, %[[FLOW]] ] -; CHECK-NEXT: [[TMP6]] = phi float [ [[TMP2]], %[[LATCH]] ], [ undef, %[[FLOW]] ] -; CHECK-NEXT: [[TMP7:%.*]] = phi float [ [[TMP1]], %[[LATCH]] ], [ [[TMP0]], %[[FLOW]] ] -; CHECK-NEXT: [[TMP8:%.*]] = phi float [ [[TMP2]], %[[LATCH]] ], [ [[TMP0]], %[[FLOW]] ] -; CHECK-NEXT: [[TMP9:%.*]] = phi i1 [ [[CC3]], %[[LATCH]] ], [ true, %[[FLOW]] ] -; CHECK-NEXT: br i1 [[TMP9]], label %[[END:.*]], label %[[HEADER]] +; CHECK-NEXT: [[TMP3]] = phi i32 [ [[IND_INC]], %[[LATCH]] ], [ undef, %[[FLOW]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ [[CC3]], %[[LATCH]] ], [ true, %[[FLOW]] ] +; CHECK-NEXT: br i1 [[TMP4]], label %[[END:.*]], label %[[HEADER]] ; CHECK: [[END]]: ; CHECK-NEXT: [[PACKED0:%.*]] = insertelement <2 x float> poison, float [[TMP8]], i32 0 ; CHECK-NEXT: [[PACKED1:%.*]] = insertelement <2 x float> [[PACKED0]], float [[TMP7]], i32 1