Skip to content

Commit 66b4209

Browse files
artagnonfhahnlukel97
committed
[VPlan] Fix GEP source-element-ty issues
Co-authored-by: Florian Hahn <[email protected]> Co-authored-by: Luke Lau <[email protected]>
1 parent 1cbd96f commit 66b4209

File tree

3 files changed

+29
-6
lines changed

3 files changed

+29
-6
lines changed

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1902,6 +1902,8 @@ class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
19021902

19031903
void execute(VPTransformState &State) override;
19041904

1905+
Type *getSourceElementType() const { return IndexedTy; }
1906+
19051907
bool onlyFirstLaneUsed(const VPValue *Op) const override {
19061908
assert(is_contained(operands(), Op) &&
19071909
"Op must be an operand of the recipe");

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1986,6 +1986,22 @@ struct VPCSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> {
19861986
.Default([](auto *) { return std::nullopt; });
19871987
}
19881988

1989+
/// If recipe \p R will lower to a GEP with a non-i8 source element type,
1990+
/// return that source element type.
1991+
static Type *getGEPSourceElementType(const VPSingleDefRecipe *R) {
1992+
// All VPInstructions that lower to GEPs must have the i8 source element
1993+
// type (as they are PtrAdds), so we omit it.
1994+
return TypeSwitch<const VPSingleDefRecipe *, Type *>(R)
1995+
.Case<VPReplicateRecipe, VPWidenGEPRecipe>([](auto *I) -> Type * {
1996+
if (auto *GEP = dyn_cast<GetElementPtrInst>(I->getUnderlyingValue()))
1997+
return GEP->getSourceElementType();
1998+
return nullptr;
1999+
})
2000+
.Case<VPVectorPointerRecipe>(
2001+
[](auto *I) { return I->getSourceElementType(); })
2002+
.Default([](auto *) { return nullptr; });
2003+
}
2004+
19892005
/// Returns true if recipe \p Def can be safely handed for CSE.
19902006
static bool canHandle(const VPSingleDefRecipe *Def) {
19912007
// We can extend the list of handled recipes in the future,
@@ -2015,8 +2031,8 @@ struct VPCSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> {
20152031
VPTypeAnalysis TypeInfo(*Plan);
20162032
hash_code Result = hash_combine(
20172033
Def->getVPDefID(), getOpcodeOrIntrinsicID(Def),
2018-
TypeInfo.inferScalarType(Def), vputils::isSingleScalar(Def),
2019-
hash_combine_range(Def->operands()));
2034+
getGEPSourceElementType(Def), TypeInfo.inferScalarType(Def),
2035+
vputils::isSingleScalar(Def), hash_combine_range(Def->operands()));
20202036
if (auto *RFlags = dyn_cast<VPRecipeWithIRFlags>(Def))
20212037
if (RFlags->hasPredicate())
20222038
return hash_combine(Result, RFlags->getPredicate());
@@ -2029,6 +2045,7 @@ struct VPCSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> {
20292045
return L == R;
20302046
if (L->getVPDefID() != R->getVPDefID() ||
20312047
getOpcodeOrIntrinsicID(L) != getOpcodeOrIntrinsicID(R) ||
2048+
getGEPSourceElementType(L) != getGEPSourceElementType(R) ||
20322049
vputils::isSingleScalar(L) != vputils::isSingleScalar(R) ||
20332050
!equal(L->operands(), R->operands()))
20342051
return false;

llvm/test/Transforms/LoopVectorize/cse-gep-source-element-type.ll

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,10 @@ define void @cse_replicate_gep(ptr noalias %A, ptr noalias %B, ptr noalias %C, i
1919
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[TMP0]], i32 4
2020
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
2121
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
22-
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i16>, ptr [[TMP0]], align 2
23-
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i16>, ptr [[TMP1]], align 2
22+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i16, ptr [[A]], i64 [[INDEX]]
23+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[TMP8]], i32 4
24+
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i16>, ptr [[TMP8]], align 2
25+
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i16>, ptr [[TMP2]], align 2
2426
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]]
2527
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[TMP3]], i32 4
2628
; CHECK-NEXT: store <4 x i32> [[WIDE_LOAD]], ptr [[TMP3]], align 4
@@ -74,14 +76,16 @@ define void @cse_wide_gep(ptr noalias %A, ptr noalias %B, ptr noalias %C, i64 %n
7476
; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
7577
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[A]], <4 x i64> [[VEC_IND]]
7678
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A]], <4 x i64> [[STEP_ADD]]
79+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[A]], <4 x i64> [[VEC_IND]]
80+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i16, ptr [[A]], <4 x i64> [[STEP_ADD]]
7781
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX1]]
7882
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr ptr, ptr [[TMP4]], i32 4
7983
; CHECK-NEXT: store <4 x ptr> [[TMP0]], ptr [[TMP4]], align 8
8084
; CHECK-NEXT: store <4 x ptr> [[TMP1]], ptr [[TMP5]], align 8
8185
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[C]], i64 [[INDEX1]]
8286
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr ptr, ptr [[TMP6]], i32 4
83-
; CHECK-NEXT: store <4 x ptr> [[TMP0]], ptr [[TMP6]], align 8
84-
; CHECK-NEXT: store <4 x ptr> [[TMP1]], ptr [[TMP8]], align 8
87+
; CHECK-NEXT: store <4 x ptr> [[TMP2]], ptr [[TMP6]], align 8
88+
; CHECK-NEXT: store <4 x ptr> [[TMP3]], ptr [[TMP8]], align 8
8589
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 8
8690
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], splat (i64 4)
8791
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]

0 commit comments

Comments
 (0)