Skip to content

Commit 89ca371

Browse files
committed
[VPlan] Optz WidenGEP with single-scalar operands
This patch is similar to b0b4616 ([VPlan] Handle single-scalar conds in VPWidenSelectRecipe) in spirit.
1 parent f9360e3 commit 89ca371

File tree

4 files changed

+13
-19
lines changed

4 files changed

+13
-19
lines changed

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1786,12 +1786,6 @@ class LLVM_ABI_FOR_TEST VPWidenGEPRecipe : public VPRecipeWithIRFlags {
17861786
return getOperand(I + 1)->isDefinedOutsideLoopRegions();
17871787
}
17881788

1789-
bool areAllOperandsInvariant() const {
1790-
return all_of(operands(), [](VPValue *Op) {
1791-
return Op->isDefinedOutsideLoopRegions();
1792-
});
1793-
}
1794-
17951789
public:
17961790
VPWidenGEPRecipe(GetElementPtrInst *GEP, ArrayRef<VPValue *> Operands)
17971791
: VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, *GEP),

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2507,8 +2507,8 @@ void VPWidenGEPRecipe::execute(VPTransformState &State) {
25072507
// is vector-typed. Thus, to keep the representation compact, we only use
25082508
// vector-typed operands for loop-varying values.
25092509

2510-
if (areAllOperandsInvariant()) {
2511-
// If we are vectorizing, but the GEP has only loop-invariant operands,
2510+
if (all_of(operands(), vputils::isSingleScalar)) {
2511+
// If we are vectorizing, but the GEP has only single-scalar operands,
25122512
// the GEP we build (by only using vector-typed operands for
25132513
// loop-varying values) would be a scalar pointer. Thus, to ensure we
25142514
// produce a vector of pointers, we need to either arbitrarily pick an

llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ define void @predicated_uniform_load(ptr %src, i32 %n, ptr %dst, i1 %cond) {
6363
; CHECK-NEXT: store i32 [[STORE]], ptr [[NBRBOXES]], align 4
6464
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
6565
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp sgt i32 [[IV]], [[IBOX]]
66-
; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
66+
; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP8:![0-9]+]]
6767
; CHECK: exit:
6868
; CHECK-NEXT: ret void
6969
;
@@ -114,7 +114,7 @@ define void @predicated_strided_store(ptr %start) {
114114
; RVA23-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP3]]
115115
; RVA23-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
116116
; RVA23-NEXT: [[TMP7:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
117-
; RVA23-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
117+
; RVA23-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
118118
; RVA23: middle.block:
119119
; RVA23-NEXT: br label [[LOOP:%.*]]
120120
; RVA23: exit:
@@ -141,7 +141,7 @@ define void @predicated_strided_store(ptr %start) {
141141
; RVA23ZVL1024B-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP3]]
142142
; RVA23ZVL1024B-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
143143
; RVA23ZVL1024B-NEXT: [[TMP7:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
144-
; RVA23ZVL1024B-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
144+
; RVA23ZVL1024B-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
145145
; RVA23ZVL1024B: middle.block:
146146
; RVA23ZVL1024B-NEXT: br label [[LOOP:%.*]]
147147
; RVA23ZVL1024B: exit:
@@ -185,16 +185,16 @@ define void @store_to_addr_generated_from_invariant_addr(ptr noalias %p0, ptr no
185185
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[P1:%.*]], <vscale x 2 x i64> [[VEC_IND]]
186186
; CHECK-NEXT: call void @llvm.vp.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> [[BROADCAST_SPLAT1]], <vscale x 2 x ptr> align 8 [[TMP5]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP3]])
187187
; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[P2:%.*]], align 4
188-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP6]], i64 0
189-
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
190-
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[P3:%.*]], <vscale x 2 x i64> [[BROADCAST_SPLAT2]]
188+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[P3:%.*]], i64 [[TMP6]]
189+
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[TMP8]], i64 0
190+
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <vscale x 2 x ptr> [[DOTSPLATINSERT]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
191191
; CHECK-NEXT: call void @llvm.vp.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> zeroinitializer, <vscale x 2 x ptr> align 4 [[TMP7]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP3]])
192192
; CHECK-NEXT: call void @llvm.vp.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> zeroinitializer, <vscale x 2 x ptr> align 4 [[TMP7]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP3]])
193193
; CHECK-NEXT: call void @llvm.vp.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> zeroinitializer, <vscale x 2 x ptr> align 1 [[TMP7]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP3]])
194194
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP4]]
195195
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
196196
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
197-
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
197+
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
198198
; CHECK: middle.block:
199199
; CHECK-NEXT: br label [[LOOP:%.*]]
200200
; CHECK: exit:

llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,11 +55,11 @@ define void @wide_gep_index_invariant(ptr noalias %dst, ptr noalias %src, i64 %n
5555
; CHECK: vector.body:
5656
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
5757
; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SRC]], align 8
58-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i64 0
59-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer
60-
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, <4 x ptr> [[BROADCAST_SPLAT]], i64 [[N]]
58+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[TMP0]], i64 [[N]]
59+
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP1]], i64 0
60+
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x ptr> [[DOTSPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer
6161
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr ptr, ptr [[DST]], i64 [[INDEX]]
62-
; CHECK-NEXT: store <4 x ptr> [[TMP1]], ptr [[TMP2]], align 8
62+
; CHECK-NEXT: store <4 x ptr> [[DOTSPLAT]], ptr [[TMP2]], align 8
6363
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
6464
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
6565
; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]

0 commit comments

Comments
 (0)