Skip to content

Commit 246913d

Browse files
committed
[VPlan] Refine check for preserving uniformity
The PreservesUniformity lambda in isSingleScalar can be improved by taking inspiration from ValueTracking's isNotCrossLaneOperation, by also forbidding operations that are not uniform across all lanes.
1 parent 3e69545 commit 246913d

File tree

5 files changed

+153
-154
lines changed

5 files changed

+153
-154
lines changed

llvm/lib/Transforms/Vectorize/VPlanUtils.h

Lines changed: 32 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -40,18 +40,37 @@ const SCEV *getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE);
4040
/// Returns true if \p VPV is a single scalar, either because it produces the
4141
/// same value for all lanes or only has its first lane used.
4242
inline bool isSingleScalar(const VPValue *VPV) {
43-
auto PreservesUniformity = [](unsigned Opcode) -> bool {
44-
if (Instruction::isBinaryOp(Opcode) || Instruction::isCast(Opcode))
43+
// A variant of ValueTracking's isNotCrossLaneOperation that checks that the
44+
// operation is uniform across lanes.
45+
auto PreservesUniformity = [](auto *V) {
46+
Intrinsic::ID ID;
47+
if (const auto *R = dyn_cast<VPWidenIntrinsicRecipe>(V))
48+
ID = R->getVectorIntrinsicID();
49+
if (const auto *R = dyn_cast<VPWidenCallRecipe>(V))
50+
ID = R->getCalledScalarFunction()->getIntrinsicID();
51+
if (const auto *R = dyn_cast<VPReplicateRecipe>(V))
52+
if (const auto *CI = dyn_cast<CallInst>(R->getUnderlyingInstr()))
53+
if (const auto *F = CI->getCalledFunction())
54+
ID = F->getIntrinsicID();
55+
if (isTriviallyVectorizable(ID))
4556
return true;
46-
switch (Opcode) {
47-
case Instruction::GetElementPtr:
48-
case Instruction::ICmp:
49-
case Instruction::FCmp:
50-
case VPInstruction::Broadcast:
51-
case VPInstruction::PtrAdd:
52-
return true;
53-
default:
57+
58+
switch (V->getOpcode()) {
59+
case Instruction::Call:
60+
case Instruction::Invoke:
61+
case Instruction::BitCast:
62+
case Instruction::ShuffleVector:
63+
case Instruction::InsertElement:
64+
case Instruction::ExtractElement:
65+
case VPInstruction::BuildVector:
66+
case VPInstruction::BuildStructVector:
67+
case VPInstruction::ExtractLane:
68+
case VPInstruction::FirstActiveLane:
69+
case VPInstruction::ExtractLastElement:
70+
case VPInstruction::ExtractPenultimateElement:
5471
return false;
72+
default:
73+
return true;
5574
}
5675
};
5776

@@ -66,19 +85,19 @@ inline bool isSingleScalar(const VPValue *VPV) {
6685
// lanes.
6786
if (RegionOfR && RegionOfR->isReplicator())
6887
return false;
69-
return Rep->isSingleScalar() || (PreservesUniformity(Rep->getOpcode()) &&
88+
return Rep->isSingleScalar() || (PreservesUniformity(Rep) &&
7089
all_of(Rep->operands(), isSingleScalar));
7190
}
7291
if (isa<VPWidenGEPRecipe, VPDerivedIVRecipe, VPBlendRecipe,
7392
VPWidenSelectRecipe>(VPV))
7493
return all_of(VPV->getDefiningRecipe()->operands(), isSingleScalar);
7594
if (auto *WidenR = dyn_cast<VPWidenRecipe>(VPV)) {
76-
return PreservesUniformity(WidenR->getOpcode()) &&
95+
return PreservesUniformity(WidenR) &&
7796
all_of(WidenR->operands(), isSingleScalar);
7897
}
7998
if (auto *VPI = dyn_cast<VPInstruction>(VPV))
8099
return VPI->isSingleScalar() || VPI->isVectorToScalar() ||
81-
(PreservesUniformity(VPI->getOpcode()) &&
100+
(PreservesUniformity(VPI) &&
82101
all_of(VPI->operands(), isSingleScalar));
83102

84103
// VPExpandSCEVRecipes must be placed in the entry and are alway uniform.

llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll

Lines changed: 4 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -82,90 +82,56 @@ define void @replicate_udiv_with_only_first_lane_used2(i32 %x, ptr %dst, i64 %d)
8282
; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_UDIV_IF:.*]], label %[[PRED_UDIV_CONTINUE:.*]]
8383
; CHECK: [[PRED_UDIV_IF]]:
8484
; CHECK-NEXT: [[TMP2:%.*]] = udiv i64 99, [[D]]
85-
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i64> poison, i64 [[TMP2]], i32 0
8685
; CHECK-NEXT: br label %[[PRED_UDIV_CONTINUE]]
8786
; CHECK: [[PRED_UDIV_CONTINUE]]:
88-
; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i64> [ poison, %[[VECTOR_BODY]] ], [ [[TMP3]], %[[PRED_UDIV_IF]] ]
87+
; CHECK-NEXT: [[TMP3:%.*]] = phi i64 [ poison, %[[VECTOR_BODY]] ], [ [[TMP2]], %[[PRED_UDIV_IF]] ]
8988
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
9089
; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_UDIV_IF1:.*]], label %[[PRED_UDIV_CONTINUE2:.*]]
9190
; CHECK: [[PRED_UDIV_IF1]]:
9291
; CHECK-NEXT: [[TMP6:%.*]] = udiv i64 99, [[D]]
93-
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i64> [[TMP4]], i64 [[TMP6]], i32 1
9492
; CHECK-NEXT: br label %[[PRED_UDIV_CONTINUE2]]
9593
; CHECK: [[PRED_UDIV_CONTINUE2]]:
96-
; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i64> [ [[TMP4]], %[[PRED_UDIV_CONTINUE]] ], [ [[TMP7]], %[[PRED_UDIV_IF1]] ]
9794
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
9895
; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_UDIV_IF3:.*]], label %[[PRED_UDIV_CONTINUE4:.*]]
9996
; CHECK: [[PRED_UDIV_IF3]]:
10097
; CHECK-NEXT: [[TMP10:%.*]] = udiv i64 99, [[D]]
101-
; CHECK-NEXT: [[TMP34:%.*]] = insertelement <4 x i64> [[TMP8]], i64 [[TMP10]], i32 2
10298
; CHECK-NEXT: br label %[[PRED_UDIV_CONTINUE4]]
10399
; CHECK: [[PRED_UDIV_CONTINUE4]]:
104-
; CHECK-NEXT: [[TMP49:%.*]] = phi <4 x i64> [ [[TMP8]], %[[PRED_UDIV_CONTINUE2]] ], [ [[TMP34]], %[[PRED_UDIV_IF3]] ]
105100
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
106101
; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_UDIV_IF5:.*]], label %[[PRED_UDIV_CONTINUE6:.*]]
107102
; CHECK: [[PRED_UDIV_IF5]]:
108103
; CHECK-NEXT: [[TMP14:%.*]] = udiv i64 99, [[D]]
109-
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x i64> [[TMP49]], i64 [[TMP14]], i32 3
110104
; CHECK-NEXT: br label %[[PRED_UDIV_CONTINUE6]]
111105
; CHECK: [[PRED_UDIV_CONTINUE6]]:
112-
; CHECK-NEXT: [[TMP16:%.*]] = phi <4 x i64> [ [[TMP49]], %[[PRED_UDIV_CONTINUE4]] ], [ [[TMP15]], %[[PRED_UDIV_IF5]] ]
113106
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
114107
; CHECK-NEXT: br i1 [[TMP17]], label %[[PRED_UDIV_IF7:.*]], label %[[PRED_UDIV_CONTINUE8:.*]]
115108
; CHECK: [[PRED_UDIV_IF7]]:
116109
; CHECK-NEXT: [[TMP18:%.*]] = udiv i64 99, [[D]]
117-
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> poison, i64 [[TMP18]], i32 0
118110
; CHECK-NEXT: br label %[[PRED_UDIV_CONTINUE8]]
119111
; CHECK: [[PRED_UDIV_CONTINUE8]]:
120-
; CHECK-NEXT: [[TMP20:%.*]] = phi <4 x i64> [ poison, %[[PRED_UDIV_CONTINUE6]] ], [ [[TMP19]], %[[PRED_UDIV_IF7]] ]
112+
; CHECK-NEXT: [[TMP15:%.*]] = phi i64 [ poison, %[[PRED_UDIV_CONTINUE6]] ], [ [[TMP18]], %[[PRED_UDIV_IF7]] ]
121113
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
122114
; CHECK-NEXT: br i1 [[TMP21]], label %[[PRED_UDIV_IF9:.*]], label %[[PRED_UDIV_CONTINUE10:.*]]
123115
; CHECK: [[PRED_UDIV_IF9]]:
124116
; CHECK-NEXT: [[TMP22:%.*]] = udiv i64 99, [[D]]
125-
; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP22]], i32 1
126117
; CHECK-NEXT: br label %[[PRED_UDIV_CONTINUE10]]
127118
; CHECK: [[PRED_UDIV_CONTINUE10]]:
128-
; CHECK-NEXT: [[TMP24:%.*]] = phi <4 x i64> [ [[TMP20]], %[[PRED_UDIV_CONTINUE8]] ], [ [[TMP23]], %[[PRED_UDIV_IF9]] ]
129119
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
130120
; CHECK-NEXT: br i1 [[TMP25]], label %[[PRED_UDIV_IF11:.*]], label %[[PRED_UDIV_CONTINUE12:.*]]
131121
; CHECK: [[PRED_UDIV_IF11]]:
132122
; CHECK-NEXT: [[TMP26:%.*]] = udiv i64 99, [[D]]
133-
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i64> [[TMP24]], i64 [[TMP26]], i32 2
134123
; CHECK-NEXT: br label %[[PRED_UDIV_CONTINUE12]]
135124
; CHECK: [[PRED_UDIV_CONTINUE12]]:
136-
; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i64> [ [[TMP24]], %[[PRED_UDIV_CONTINUE10]] ], [ [[TMP27]], %[[PRED_UDIV_IF11]] ]
137125
; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
138126
; CHECK-NEXT: br i1 [[TMP29]], label %[[PRED_UDIV_IF13:.*]], label %[[PRED_UDIV_CONTINUE14]]
139127
; CHECK: [[PRED_UDIV_IF13]]:
140128
; CHECK-NEXT: [[TMP30:%.*]] = udiv i64 99, [[D]]
141-
; CHECK-NEXT: [[TMP31:%.*]] = insertelement <4 x i64> [[TMP28]], i64 [[TMP30]], i32 3
142129
; CHECK-NEXT: br label %[[PRED_UDIV_CONTINUE14]]
143130
; CHECK: [[PRED_UDIV_CONTINUE14]]:
144-
; CHECK-NEXT: [[TMP32:%.*]] = phi <4 x i64> [ [[TMP28]], %[[PRED_UDIV_CONTINUE12]] ], [ [[TMP31]], %[[PRED_UDIV_IF13]] ]
145-
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i64> zeroinitializer, <4 x i64> [[TMP16]]
146-
; CHECK-NEXT: [[PREDPHI15:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i64> zeroinitializer, <4 x i64> [[TMP32]]
147-
; CHECK-NEXT: [[TMP33:%.*]] = extractelement <4 x i64> [[PREDPHI]], i32 0
148-
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP33]]
149-
; CHECK-NEXT: [[TMP35:%.*]] = extractelement <4 x i64> [[PREDPHI]], i32 1
150-
; CHECK-NEXT: [[TMP36:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP35]]
151-
; CHECK-NEXT: [[TMP37:%.*]] = extractelement <4 x i64> [[PREDPHI]], i32 2
152-
; CHECK-NEXT: [[TMP38:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP37]]
153-
; CHECK-NEXT: [[TMP39:%.*]] = extractelement <4 x i64> [[PREDPHI]], i32 3
154-
; CHECK-NEXT: [[TMP40:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP39]]
155-
; CHECK-NEXT: [[TMP41:%.*]] = extractelement <4 x i64> [[PREDPHI15]], i32 0
156-
; CHECK-NEXT: [[TMP42:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP41]]
157-
; CHECK-NEXT: [[TMP43:%.*]] = extractelement <4 x i64> [[PREDPHI15]], i32 1
158-
; CHECK-NEXT: [[TMP44:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP43]]
159-
; CHECK-NEXT: [[TMP45:%.*]] = extractelement <4 x i64> [[PREDPHI15]], i32 2
131+
; CHECK-NEXT: [[TMP45:%.*]] = select i1 [[C]], i64 0, i64 [[TMP3]]
132+
; CHECK-NEXT: [[TMP47:%.*]] = select i1 [[C]], i64 0, i64 [[TMP15]]
160133
; CHECK-NEXT: [[TMP46:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP45]]
161-
; CHECK-NEXT: [[TMP47:%.*]] = extractelement <4 x i64> [[PREDPHI15]], i32 3
162134
; CHECK-NEXT: [[TMP48:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP47]]
163-
; CHECK-NEXT: store i16 0, ptr [[TMP11]], align 2
164-
; CHECK-NEXT: store i16 0, ptr [[TMP36]], align 2
165-
; CHECK-NEXT: store i16 0, ptr [[TMP38]], align 2
166-
; CHECK-NEXT: store i16 0, ptr [[TMP40]], align 2
167-
; CHECK-NEXT: store i16 0, ptr [[TMP42]], align 2
168-
; CHECK-NEXT: store i16 0, ptr [[TMP44]], align 2
169135
; CHECK-NEXT: store i16 0, ptr [[TMP46]], align 2
170136
; CHECK-NEXT: store i16 0, ptr [[TMP48]], align 2
171137
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8

0 commit comments

Comments
 (0)