Skip to content

Commit e089d48

Browse files
committed
[VPlan] VPWidenGEPRecipe uses first lane of invariant indices (NFC)
Update VPWidenGEPRecipe::onlyFirstLaneUsed to return true for indices that are defined outside the loop regions, if the base pointer is not invariant.
1 parent 2849b12 commit e089d48

File tree

2 files changed

+67
-12
lines changed

2 files changed

+67
-12
lines changed

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1697,7 +1697,10 @@ class VPWidenGEPRecipe : public VPRecipeWithIRFlags {
16971697
bool onlyFirstLaneUsed(const VPValue *Op) const override {
16981698
assert(is_contained(operands(), Op) &&
16991699
"Op must be an operand of the recipe");
1700-
return Op == getOperand(0) && isPointerLoopInvariant();
1700+
if (Op == getOperand(0))
1701+
return isPointerLoopInvariant();
1702+
else
1703+
return !isPointerLoopInvariant() && Op->isDefinedOutsideLoopRegions();
17011704
}
17021705
};
17031706

llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll

Lines changed: 63 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -66,22 +66,19 @@ define void @wide_gep_index_invariant(ptr noalias %dst, ptr noalias %src, i64 %n
6666
; CHECK-NEXT: entry:
6767
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
6868
; CHECK: vector.ph:
69-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[N]], i64 0
70-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
7169
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
7270
; CHECK: vector.body:
7371
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
7472
; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SRC]], align 8
75-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i64 0
76-
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT1]], <4 x ptr> poison, <4 x i32> zeroinitializer
77-
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i64> [[BROADCAST_SPLAT]], i32 0
78-
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, <4 x ptr> [[BROADCAST_SPLAT2]], i64 [[TMP1]]
79-
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr ptr, ptr [[DST]], i64 [[INDEX]]
80-
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr ptr, ptr [[TMP3]], i32 0
81-
; CHECK-NEXT: store <4 x ptr> [[TMP2]], ptr [[TMP4]], align 8
73+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i64 0
74+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer
75+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, <4 x ptr> [[BROADCAST_SPLAT]], i64 [[N]]
76+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr ptr, ptr [[DST]], i64 [[INDEX]]
77+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr ptr, ptr [[TMP2]], i32 0
78+
; CHECK-NEXT: store <4 x ptr> [[TMP1]], ptr [[TMP3]], align 8
8279
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
83-
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
84-
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
80+
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
81+
; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
8582
; CHECK: middle.block:
8683
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
8784
; CHECK: scalar.ph:
@@ -115,3 +112,58 @@ loop:
115112
exit:
116113
ret void
117114
}
115+
116+
define void @wide_gep_multiple_indices_some_invariant(ptr noalias %dst, ptr noalias %src, i32 %x) {
117+
; CHECK-LABEL: define void @wide_gep_multiple_indices_some_invariant
118+
; CHECK-SAME: (ptr noalias [[DST:%.*]], ptr noalias [[SRC:%.*]], i32 [[X:%.*]]) {
119+
; CHECK-NEXT: entry:
120+
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
121+
; CHECK: vector.ph:
122+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
123+
; CHECK: vector.body:
124+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
125+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
126+
; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SRC]], align 8
127+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i64 0
128+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer
129+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [10 x float], <4 x ptr> [[BROADCAST_SPLAT]], i32 [[X]], <4 x i64> [[VEC_IND]]
130+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr ptr, ptr [[DST]], i64 [[INDEX]]
131+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr ptr, ptr [[TMP2]], i32 0
132+
; CHECK-NEXT: store <4 x ptr> [[TMP1]], ptr [[TMP3]], align 8
133+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
134+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
135+
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
136+
; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
137+
; CHECK: middle.block:
138+
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
139+
; CHECK: scalar.ph:
140+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
141+
; CHECK-NEXT: br label [[LOOP:%.*]]
142+
; CHECK: loop:
143+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
144+
; CHECK-NEXT: [[L:%.*]] = load ptr, ptr [[SRC]], align 8
145+
; CHECK-NEXT: [[GEP_L:%.*]] = getelementptr [10 x float], ptr [[L]], i32 [[X]], i64 [[IV]]
146+
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr ptr, ptr [[DST]], i64 [[IV]]
147+
; CHECK-NEXT: store ptr [[GEP_L]], ptr [[GEP_DST]], align 8
148+
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
149+
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 100
150+
; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
151+
; CHECK: exit:
152+
; CHECK-NEXT: ret void
153+
;
154+
entry:
155+
br label %loop
156+
157+
loop:
158+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
159+
%l = load ptr, ptr %src, align 8
160+
%gep.l = getelementptr [10 x float], ptr %l, i32 %x, i64 %iv
161+
%gep.dst = getelementptr ptr, ptr %dst, i64 %iv
162+
store ptr %gep.l, ptr %gep.dst, align 8
163+
%iv.next = add nuw nsw i64 %iv, 1
164+
%ec = icmp eq i64 %iv.next, 100
165+
br i1 %ec, label %exit, label %loop
166+
167+
exit:
168+
ret void
169+
}

0 commit comments

Comments
 (0)