Skip to content

Commit 8460dbb

Browse files
committed
[VPlan] Mark VPInstruction::Broadcast as not reading/writing memory.
This enables additional DCE/CSE opportunities and ensures that we don't end up with multiple redundant users of a VPInstruction using EVL. It fixes a verifier error in the added test_3_inductions test.
1 parent 17a66ea commit 8460dbb

File tree

10 files changed

+118
-70
lines changed

10 files changed

+118
-70
lines changed

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1214,6 +1214,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
12141214
case Instruction::Select:
12151215
case Instruction::PHI:
12161216
case VPInstruction::AnyOf:
1217+
case VPInstruction::Broadcast:
12171218
case VPInstruction::BuildStructVector:
12181219
case VPInstruction::BuildVector:
12191220
case VPInstruction::CalculateTripCountMinusVF:

llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,81 @@ exit:
119119
ret void
120120
}
121121

122+
define void @test_3_inductions(ptr noalias %dst, ptr noalias %src, i64 %n) #1 {
123+
; CHECK-LABEL: define void @test_3_inductions(
124+
; CHECK-SAME: ptr noalias [[DST:%.*]], ptr noalias [[SRC:%.*]], i64 [[N:%.*]]) #[[ATTR1:[0-9]+]] {
125+
; CHECK-NEXT: [[ENTRY:.*:]]
126+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
127+
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
128+
; CHECK: [[VECTOR_PH]]:
129+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[DST]], i64 0
130+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
131+
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32()
132+
; CHECK-NEXT: [[TMP2:%.*]] = mul <vscale x 2 x i32> [[TMP1]], splat (i32 2)
133+
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i32> splat (i32 1), [[TMP2]]
134+
; CHECK-NEXT: [[INDUCTION1:%.*]] = add <vscale x 2 x i32> zeroinitializer, [[TMP2]]
135+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
136+
; CHECK: [[VECTOR_BODY]]:
137+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
138+
; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <vscale x 2 x i32> [ [[INDUCTION1]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], %[[VECTOR_BODY]] ]
139+
; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP0]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
140+
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
141+
; CHECK-NEXT: [[TMP4:%.*]] = mul i32 2, [[TMP3]]
142+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[TMP4]], i64 0
143+
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT3]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
144+
; CHECK-NEXT: [[TMP5:%.*]] = or <vscale x 2 x i32> [[VEC_IND2]], [[VEC_IND]]
145+
; CHECK-NEXT: [[TMP6:%.*]] = sext <vscale x 2 x i32> [[TMP5]] to <vscale x 2 x i64>
146+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[SRC]], <vscale x 2 x i64> [[TMP6]]
147+
; CHECK-NEXT: call void @llvm.vp.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> [[TMP7]], <vscale x 2 x ptr> align 8 [[BROADCAST_SPLAT]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP3]])
148+
; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP3]] to i64
149+
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]]
150+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT4]]
151+
; CHECK-NEXT: [[VEC_IND_NEXT5]] = add <vscale x 2 x i32> [[VEC_IND2]], [[BROADCAST_SPLAT4]]
152+
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
153+
; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
154+
; CHECK: [[MIDDLE_BLOCK]]:
155+
; CHECK-NEXT: br label %[[EXIT:.*]]
156+
; CHECK: [[SCALAR_PH:.*]]:
157+
; CHECK-NEXT: br label %[[LOOP:.*]]
158+
; CHECK: [[LOOP]]:
159+
; CHECK-NEXT: [[IV_0:%.*]] = phi i32 [ 1, %[[SCALAR_PH]] ], [ [[IV_0_NEXT:%.*]], %[[LOOP]] ]
160+
; CHECK-NEXT: [[IV_1:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], %[[LOOP]] ]
161+
; CHECK-NEXT: [[IV_2:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], %[[LOOP]] ]
162+
; CHECK-NEXT: [[IV_OR:%.*]] = or i32 [[IV_2]], [[IV_0]]
163+
; CHECK-NEXT: [[IV_OR_EXT:%.*]] = sext i32 [[IV_OR]] to i64
164+
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[IV_OR_EXT]]
165+
; CHECK-NEXT: store ptr [[GEP_SRC]], ptr [[DST]], align 8
166+
; CHECK-NEXT: [[IV_0_NEXT]] = add i32 [[IV_0]], 2
167+
; CHECK-NEXT: [[IV_1_NEXT]] = add i64 [[IV_1]], 1
168+
; CHECK-NEXT: [[IV_2_NEXT]] = add i32 [[IV_2]], 2
169+
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_1]], [[N]]
170+
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]]
171+
; CHECK: [[EXIT]]:
172+
; CHECK-NEXT: ret void
173+
;
174+
entry:
175+
br label %loop
176+
177+
loop:
178+
%iv.0 = phi i32 [ 1, %entry ], [ %iv.0.next, %loop ]
179+
%iv.1 = phi i64 [ 0, %entry ], [ %iv.1.next, %loop ]
180+
%iv.2 = phi i32 [ 0, %entry ], [ %iv.2.next, %loop ]
181+
%iv.or = or i32 %iv.2, %iv.0
182+
%iv.or.ext = sext i32 %iv.or to i64
183+
%gep.src = getelementptr i8, ptr %src, i64 %iv.or.ext
184+
store ptr %gep.src, ptr %dst, align 8
185+
%iv.0.next = add i32 %iv.0, 2
186+
%iv.1.next = add i64 %iv.1, 1
187+
%iv.2.next = add i32 %iv.2, 2
188+
%ec = icmp eq i64 %iv.1, %n
189+
br i1 %ec, label %exit, label %loop
190+
191+
exit:
192+
ret void
193+
}
194+
122195
attributes #0 = { "target-features"="+64bit,+v,+zvl256b" }
196+
attributes #1 = { "target-cpu"="sifive-p670" }
123197
;.
124198
; CHECK: [[META0]] = !{[[META1:![0-9]+]]}
125199
; CHECK: [[META1]] = distinct !{[[META1]], [[META2:![0-9]+]]}
@@ -131,4 +205,5 @@ attributes #0 = { "target-features"="+64bit,+v,+zvl256b" }
131205
; CHECK: [[META7]] = !{!"llvm.loop.isvectorized", i32 1}
132206
; CHECK: [[META8]] = !{!"llvm.loop.unroll.runtime.disable"}
133207
; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META7]]}
208+
; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META7]], [[META8]]}
134209
;.

llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -145,11 +145,9 @@ define void @test_induction_step_needs_expansion(ptr noalias %j, ptr %k, i64 %l,
145145
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i16
146146
; CHECK-NEXT: [[IND_END:%.*]] = mul i16 [[DOTCAST]], [[TMP0]]
147147
; CHECK-NEXT: [[TMP1:%.*]] = mul <16 x i16> splat (i16 16), [[TMP2]]
148-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <16 x i16> poison, i16 [[OFF]], i64 0
149-
; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <16 x i16> [[BROADCAST_SPLATINSERT2]], <16 x i16> poison, <16 x i32> zeroinitializer
150-
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[TMP0]], i64 0
148+
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[OFF]], i64 0
151149
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i16> [[DOTSPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
152-
; CHECK-NEXT: [[TMP11:%.*]] = mul <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, [[DOTSPLAT]]
150+
; CHECK-NEXT: [[TMP11:%.*]] = mul <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, [[TMP2]]
153151
; CHECK-NEXT: [[INDUCTION:%.*]] = add <16 x i16> zeroinitializer, [[TMP11]]
154152
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
155153
; CHECK: vector.body:
@@ -158,10 +156,10 @@ define void @test_induction_step_needs_expansion(ptr noalias %j, ptr %k, i64 %l,
158156
; CHECK-NEXT: [[STEP_ADD:%.*]] = add <16 x i16> [[VEC_IND]], [[TMP1]]
159157
; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add <16 x i16> [[STEP_ADD]], [[TMP1]]
160158
; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add <16 x i16> [[STEP_ADD_2]], [[TMP1]]
161-
; CHECK-NEXT: [[TMP4:%.*]] = sub <16 x i16> [[VEC_IND]], [[BROADCAST_SPLAT3]]
162-
; CHECK-NEXT: [[TMP5:%.*]] = sub <16 x i16> [[STEP_ADD]], [[BROADCAST_SPLAT3]]
163-
; CHECK-NEXT: [[TMP6:%.*]] = sub <16 x i16> [[STEP_ADD_2]], [[BROADCAST_SPLAT3]]
164-
; CHECK-NEXT: [[TMP7:%.*]] = sub <16 x i16> [[STEP_ADD_3]], [[BROADCAST_SPLAT3]]
159+
; CHECK-NEXT: [[TMP4:%.*]] = sub <16 x i16> [[VEC_IND]], [[DOTSPLAT]]
160+
; CHECK-NEXT: [[TMP5:%.*]] = sub <16 x i16> [[STEP_ADD]], [[DOTSPLAT]]
161+
; CHECK-NEXT: [[TMP6:%.*]] = sub <16 x i16> [[STEP_ADD_2]], [[DOTSPLAT]]
162+
; CHECK-NEXT: [[TMP7:%.*]] = sub <16 x i16> [[STEP_ADD_3]], [[DOTSPLAT]]
165163
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[K:%.*]], i64 [[INDEX]]
166164
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i32 16
167165
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i32 32

llvm/test/Transforms/LoopVectorize/X86/induction-step.ll

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,7 @@ define i16 @wide_add_induction_step_live_in(ptr %dst, i64 %N, i16 %off) {
1717
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i16
1818
; CHECK-NEXT: [[TMP0:%.*]] = mul i16 [[DOTCAST]], [[O_1]]
1919
; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i16> splat (i16 4), [[BROADCAST_SPLAT]]
20-
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[O_1]], i64 0
21-
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i16> [[DOTSPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
22-
; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i16> <i16 0, i16 1, i16 2, i16 3>, [[DOTSPLAT]]
20+
; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i16> <i16 0, i16 1, i16 2, i16 3>, [[BROADCAST_SPLAT]]
2321
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i16> zeroinitializer, [[TMP2]]
2422
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
2523
; CHECK: vector.body:
@@ -89,19 +87,17 @@ define i16 @wide_sub_induction_step_live_in(ptr %dst, i64 %N, i16 %off) {
8987
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i16
9088
; CHECK-NEXT: [[TMP1:%.*]] = mul i16 [[DOTCAST]], [[TMP0]]
9189
; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i16> splat (i16 4), [[BROADCAST_SPLAT]]
92-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i16> poison, i16 [[O_1]], i64 0
93-
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT1]], <4 x i16> poison, <4 x i32> zeroinitializer
94-
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i64 0
90+
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[O_1]], i64 0
9591
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i16> [[DOTSPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
96-
; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i16> <i16 0, i16 1, i16 2, i16 3>, [[DOTSPLAT]]
92+
; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i16> <i16 0, i16 1, i16 2, i16 3>, [[BROADCAST_SPLAT]]
9793
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i16> zeroinitializer, [[TMP3]]
9894
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
9995
; CHECK: vector.body:
10096
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
10197
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
10298
; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], [[TMP2]]
103-
; CHECK-NEXT: [[TMP5:%.*]] = sub <4 x i16> [[VEC_IND]], [[BROADCAST_SPLAT2]]
104-
; CHECK-NEXT: [[TMP10:%.*]] = sub <4 x i16> [[STEP_ADD]], [[BROADCAST_SPLAT2]]
99+
; CHECK-NEXT: [[TMP5:%.*]] = sub <4 x i16> [[VEC_IND]], [[DOTSPLAT]]
100+
; CHECK-NEXT: [[TMP10:%.*]] = sub <4 x i16> [[STEP_ADD]], [[DOTSPLAT]]
105101
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[DST:%.*]], i64 [[INDEX]]
106102
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i32 4
107103
; CHECK-NEXT: store <4 x i16> [[TMP5]], ptr [[TMP6]], align 2

llvm/test/Transforms/LoopVectorize/blend-in-header.ll

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -111,8 +111,6 @@ define i64 @invar_cond(i1 %c) {
111111
; CHECK-NEXT: entry:
112112
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
113113
; CHECK: vector.ph:
114-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C]], i64 0
115-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
116114
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
117115
; CHECK: vector.body:
118116
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -164,8 +162,6 @@ define i64 @invar_cond_incoming_ops_reordered(i1 %c) {
164162
; CHECK-NEXT: entry:
165163
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
166164
; CHECK: vector.ph:
167-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C]], i64 0
168-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
169165
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
170166
; CHECK: vector.body:
171167
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]

llvm/test/Transforms/LoopVectorize/expand-scev-after-invoke.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,7 @@ define void @test(ptr %dst) personality ptr null {
1818
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
1919
; CHECK-NEXT: [[TMP0:%.*]] = mul i32 160, [[STEP]]
2020
; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i32> splat (i32 4), [[BROADCAST_SPLAT]]
21-
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[STEP]], i64 0
22-
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
23-
; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[DOTSPLAT]]
21+
; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[BROADCAST_SPLAT]]
2422
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> zeroinitializer, [[TMP2]]
2523
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
2624
; CHECK: [[VECTOR_BODY]]:

0 commit comments

Comments
 (0)