Skip to content

Commit 82dfa00

Browse files
committed
[LV] Always add uniform pointers to uniforms list.
Always add pointers proved to be uniform via legal/SCEV to worklist. This extends the existing logic to handle a few more pointers known to be uniform. (cherry picked from commit 0c028bb)
1 parent af0ba32 commit 82dfa00

File tree

2 files changed

+16
-27
lines changed

2 files changed

+16
-27
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3316,6 +3316,11 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
33163316
if (!Ptr)
33173317
continue;
33183318

3319+
// If the pointer can be proven to be uniform, always add it to the
3320+
// worklist.
3321+
if (isa<Instruction>(Ptr) && Legal->isUniform(Ptr, VF))
3322+
AddToWorklistIfAllowed(cast<Instruction>(Ptr));
3323+
33193324
if (IsUniformMemOpUse(&I))
33203325
AddToWorklistIfAllowed(&I);
33213326

llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll

Lines changed: 11 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -155,34 +155,18 @@ define void @uniform_gep_for_replicating_gep(ptr %dst) {
155155
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
156156
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
157157
; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
158-
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
159-
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1
160158
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2
161-
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3
162-
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <2 x i32> [[VEC_IND]], zeroinitializer
163-
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <2 x i32> [[STEP_ADD]], zeroinitializer
164-
; CHECK-NEXT: [[TMP8:%.*]] = lshr i32 [[TMP0]], 1
165-
; CHECK-NEXT: [[TMP9:%.*]] = lshr i32 [[TMP1]], 1
166-
; CHECK-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP2]], 1
167-
; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP3]], 1
168-
; CHECK-NEXT: [[TMP11:%.*]] = zext <2 x i1> [[TMP5]] to <2 x i8>
169-
; CHECK-NEXT: [[TMP13:%.*]] = zext <2 x i1> [[TMP6]] to <2 x i8>
170-
; CHECK-NEXT: [[TMP14:%.*]] = zext i32 [[TMP8]] to i64
171-
; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP9]] to i64
172-
; CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[TMP10]] to i64
173-
; CHECK-NEXT: [[TMP17:%.*]] = zext i32 [[TMP12]] to i64
174-
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP14]]
175-
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP15]]
176-
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP16]]
177-
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP17]]
178-
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x i8> [[TMP11]], i32 0
179-
; CHECK-NEXT: store i8 [[TMP20]], ptr [[TMP18]], align 1
180-
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i8> [[TMP11]], i32 1
181-
; CHECK-NEXT: store i8 [[TMP22]], ptr [[TMP19]], align 1
182-
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x i8> [[TMP13]], i32 0
183-
; CHECK-NEXT: store i8 [[TMP26]], ptr [[TMP21]], align 1
184-
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i8> [[TMP13]], i32 1
185-
; CHECK-NEXT: store i8 [[TMP23]], ptr [[TMP25]], align 1
159+
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i32> [[STEP_ADD]], zeroinitializer
160+
; CHECK-NEXT: [[TMP10:%.*]] = lshr i32 [[INDEX]], 1
161+
; CHECK-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP2]], 1
162+
; CHECK-NEXT: [[TMP4:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i8>
163+
; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP10]] to i64
164+
; CHECK-NEXT: [[TMP6:%.*]] = zext i32 [[TMP3]] to i64
165+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP5]]
166+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP6]]
167+
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i8> [[TMP4]], i32 1
168+
; CHECK-NEXT: store i8 [[TMP9]], ptr [[TMP7]], align 1
169+
; CHECK-NEXT: store i8 [[TMP9]], ptr [[TMP8]], align 1
186170
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
187171
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2)
188172
; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i32 [[INDEX_NEXT]], 128

0 commit comments

Comments
 (0)