Skip to content

Commit 9bfaf12

Browse files
authored
[VPlan] Handle more replicates in isUniformAcrossVFsAndUFs (llvm#162342)
A single-scalar replicate without side-effects, and with uniform operands, is uniform. Special-case assumes and stores.
1 parent 5b5eacc commit 9bfaf12

File tree

2 files changed

+24
-22
lines changed

2 files changed

+24
-22
lines changed

llvm/lib/Transforms/Vectorize/VPlanUtils.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -113,12 +113,12 @@ bool vputils::isUniformAcrossVFsAndUFs(VPValue *V) {
113113
return TypeSwitch<const VPRecipeBase *, bool>(R)
114114
.Case<VPDerivedIVRecipe>([](const auto *R) { return true; })
115115
.Case<VPReplicateRecipe>([](const auto *R) {
116-
// Loads and stores that are uniform across VF lanes are handled by
117-
// VPReplicateRecipe.IsUniform. They are also uniform across UF parts if
118-
// all their operands are invariant.
119-
// TODO: Further relax the restrictions.
116+
// Be conservative about side-effects, except for the
117+
// known-side-effecting assumes and stores, which we know will be
118+
// uniform.
120119
return R->isSingleScalar() &&
121-
(isa<LoadInst, StoreInst>(R->getUnderlyingValue())) &&
120+
(!R->mayHaveSideEffects() ||
121+
isa<AssumeInst, StoreInst>(R->getUnderlyingInstr())) &&
122122
all_of(R->operands(), isUniformAcrossVFsAndUFs);
123123
})
124124
.Case<VPInstruction>([](const auto *VPI) {

llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,9 @@ define void @replicating_load_used_as_store_addr_2(ptr noalias %invar.dst, ptr n
6666
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
6767
; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
6868
; CHECK: [[MIDDLE_BLOCK]]:
69-
; CHECK-NEXT: br [[EXIT:label %.*]]
70-
; CHECK: [[SCALAR_PH:.*:]]
69+
; CHECK-NEXT: br label %[[EXIT:.*]]
70+
; CHECK: [[EXIT]]:
71+
; CHECK-NEXT: ret void
7172
;
7273
entry:
7374
br label %loop
@@ -107,15 +108,15 @@ define void @replicating_load_used_as_store_addr_3(ptr noalias %src, ptr noalias
107108
; CHECK-NEXT: [[TMP6:%.*]] = zext i32 [[TMP4]] to i64
108109
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP6]]
109110
; CHECK-NEXT: store i8 0, ptr [[TMP7]], align 1
110-
; CHECK-NEXT: store i8 0, ptr [[TMP7]], align 1
111111
; CHECK-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP5]] to i8
112112
; CHECK-NEXT: store i8 [[TMP8]], ptr [[INVAR_DST]], align 1
113113
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
114114
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
115115
; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
116116
; CHECK: [[MIDDLE_BLOCK]]:
117-
; CHECK-NEXT: br [[EXIT:label %.*]]
118-
; CHECK: [[SCALAR_PH:.*:]]
117+
; CHECK-NEXT: br label %[[EXIT:.*]]
118+
; CHECK: [[EXIT]]:
119+
; CHECK-NEXT: ret void
119120
;
120121
entry:
121122
br label %loop
@@ -466,21 +467,21 @@ define void @test_prefer_vector_addressing(ptr %start, ptr %ms, ptr noalias %src
466467
; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP11]]
467468
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP12]]
468469
; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP13]]
469-
; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr [[NEXT_GEP]], align 1, !tbaa [[LONG_LONG_TBAA14:![0-9]+]]
470-
; CHECK-NEXT: [[TMP15:%.*]] = load i64, ptr [[NEXT_GEP3]], align 1, !tbaa [[LONG_LONG_TBAA14]]
471-
; CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[NEXT_GEP4]], align 1, !tbaa [[LONG_LONG_TBAA14]]
472-
; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[NEXT_GEP5]], align 1, !tbaa [[LONG_LONG_TBAA14]]
470+
; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr [[NEXT_GEP]], align 1, !tbaa [[LONG_LONG_TBAA12:![0-9]+]]
471+
; CHECK-NEXT: [[TMP15:%.*]] = load i64, ptr [[NEXT_GEP3]], align 1, !tbaa [[LONG_LONG_TBAA12]]
472+
; CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[NEXT_GEP4]], align 1, !tbaa [[LONG_LONG_TBAA12]]
473+
; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[NEXT_GEP5]], align 1, !tbaa [[LONG_LONG_TBAA12]]
473474
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP14]]
474475
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP15]]
475476
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP16]]
476477
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP17]]
477-
; CHECK-NEXT: store i32 0, ptr [[TMP18]], align 4, !tbaa [[INT_TBAA19:![0-9]+]]
478-
; CHECK-NEXT: store i32 0, ptr [[TMP19]], align 4, !tbaa [[INT_TBAA19]]
479-
; CHECK-NEXT: store i32 0, ptr [[TMP20]], align 4, !tbaa [[INT_TBAA19]]
480-
; CHECK-NEXT: store i32 0, ptr [[TMP21]], align 4, !tbaa [[INT_TBAA19]]
478+
; CHECK-NEXT: store i32 0, ptr [[TMP18]], align 4, !tbaa [[INT_TBAA17:![0-9]+]]
479+
; CHECK-NEXT: store i32 0, ptr [[TMP19]], align 4, !tbaa [[INT_TBAA17]]
480+
; CHECK-NEXT: store i32 0, ptr [[TMP20]], align 4, !tbaa [[INT_TBAA17]]
481+
; CHECK-NEXT: store i32 0, ptr [[TMP21]], align 4, !tbaa [[INT_TBAA17]]
481482
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
482483
; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
483-
; CHECK-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
484+
; CHECK-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
484485
; CHECK: [[MIDDLE_BLOCK]]:
485486
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP6]], [[N_VEC]]
486487
; CHECK-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]]
@@ -581,10 +582,11 @@ define double @test_scalarization_cost_for_load_of_address(ptr %src.0, ptr %src.
581582
; CHECK-NEXT: [[TMP20:%.*]] = fmul <2 x double> [[TMP9]], [[TMP19]]
582583
; CHECK-NEXT: [[TMP21]] = call double @llvm.vector.reduce.fadd.v2f64(double [[VEC_PHI]], <2 x double> [[TMP20]])
583584
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
584-
; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
585+
; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
585586
; CHECK: [[MIDDLE_BLOCK]]:
586-
; CHECK-NEXT: br [[EXIT:label %.*]]
587-
; CHECK: [[SCALAR_PH:.*:]]
587+
; CHECK-NEXT: br label %[[EXIT:.*]]
588+
; CHECK: [[EXIT]]:
589+
; CHECK-NEXT: ret double [[TMP21]]
588590
;
589591
entry:
590592
br label %loop

0 commit comments

Comments
 (0)