Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 81 additions & 15 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@

using namespace llvm;
using namespace VPlanPatternMatch;
using namespace SCEVPatternMatch;

bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
VPlan &Plan,
Expand Down Expand Up @@ -139,14 +140,77 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
return true;
}

// Check if a memory operation doesn't alias with memory operations in blocks
// between FirstBB and LastBB using scoped noalias metadata.
// For load hoisting, we only check writes in one direction.
// For store sinking, we check both reads and writes bidirectionally.
static bool canHoistOrSinkWithNoAliasCheck(
const MemoryLocation &MemLoc, VPBasicBlock *FirstBB, VPBasicBlock *LastBB,
bool CheckReads,
const SmallPtrSetImpl<VPRecipeBase *> *ExcludeRecipes = nullptr) {
/// Helper for extra no-alias checks via known-safe recipe and SCEV.
class SinkStoreInfo {
const SmallPtrSetImpl<VPRecipeBase *> &ExcludeRecipes;
VPReplicateRecipe &GroupLeader;
ScalarEvolution &SE;
const Loop &L;
VPTypeAnalysis &TypeInfo;

// Return true if \p A and \p B are known to not alias for all VFs in the
// plan, checked via the distance between the accesses
bool isNoAliasViaDistance(VPReplicateRecipe *A, VPReplicateRecipe *B) const {
if (A->getOpcode() != Instruction::Store ||
B->getOpcode() != Instruction::Store)
return false;

VPValue *AddrA = A->getOperand(1);
const SCEV *SCEVA = vputils::getSCEVExprForVPValue(AddrA, SE, &L);
VPValue *AddrB = B->getOperand(1);
const SCEV *SCEVB = vputils::getSCEVExprForVPValue(AddrB, SE, &L);
if (isa<SCEVCouldNotCompute>(SCEVA) || isa<SCEVCouldNotCompute>(SCEVB))
return false;

const APInt *Distance;
if (!match(SE.getMinusSCEV(SCEVA, SCEVB), m_scev_APInt(Distance)))
return false;

const DataLayout &DL = SE.getDataLayout();
Type *TyA = TypeInfo.inferScalarType(A->getOperand(0));
uint64_t SizeA = DL.getTypeStoreSize(TyA);
Type *TyB = TypeInfo.inferScalarType(B->getOperand(0));
uint64_t SizeB = DL.getTypeStoreSize(TyB);

// Use the maximum store size to ensure no overlap from either direction.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// Use the maximum store size to ensure no overlap from either direction.
// Use the maximum store size to ensure no overlap from either direction.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done thanks

// Currently only handles fixed sizes, as it is only used for
// replicating VPReplicateRecipes.
uint64_t MaxStoreSize = std::max(SizeA, SizeB);

auto VFs = B->getParent()->getPlan()->vectorFactors();
ElementCount MaxVF = *max_element(VFs, ElementCount::isKnownLT);
return Distance->abs().uge(
MaxVF.multiplyCoefficientBy(MaxStoreSize).getFixedValue());
}

public:
SinkStoreInfo(const SmallPtrSetImpl<VPRecipeBase *> &ExcludeRecipes,
VPReplicateRecipe &GroupLeader, ScalarEvolution &SE,
const Loop &L, VPTypeAnalysis &TypeInfo)
: ExcludeRecipes(ExcludeRecipes), GroupLeader(GroupLeader), SE(SE), L(L),
TypeInfo(TypeInfo) {}

/// Return true if \p R should be skipped during alias checking, either
/// because it's in the exclude set or because no-alias can be proven via
/// SCEV.
bool shouldSkip(VPRecipeBase &R) const {
auto *Store = dyn_cast<VPReplicateRecipe>(&R);
return ExcludeRecipes.contains(&R) ||
(Store && isNoAliasViaDistance(Store, &GroupLeader));
}
};

/// Check if a memory operation doesn't alias with memory operations in blocks
/// between \p FirstBB and \p LastBB using scoped noalias metadata. If
/// \p SinkInfo is std::nullopt, only recipes that may write to memory are
/// checked (for load hoisting). Otherwise recipes that both read and write
/// memory are checked, and SCEV is used to prove no-alias between the group
/// leader and other replicate recipes (for store sinking).
static bool
canHoistOrSinkWithNoAliasCheck(const MemoryLocation &MemLoc,
VPBasicBlock *FirstBB, VPBasicBlock *LastBB,
std::optional<SinkStoreInfo> SinkInfo = {}) {
bool CheckReads = SinkInfo.has_value();
if (!MemLoc.AATags.Scope)
return false;

Expand All @@ -158,7 +222,7 @@ static bool canHoistOrSinkWithNoAliasCheck(
"Expected at most one successor in block chain");
auto *VPBB = cast<VPBasicBlock>(Block);
for (VPRecipeBase &R : *VPBB) {
if (ExcludeRecipes && ExcludeRecipes->contains(&R))
if (SinkInfo && SinkInfo->shouldSkip(R))
continue;

// Skip recipes that don't need checking.
Expand Down Expand Up @@ -4273,8 +4337,7 @@ void VPlanTransforms::hoistPredicatedLoads(VPlan &Plan, ScalarEvolution &SE,

// Check that the load doesn't alias with stores between first and last.
auto LoadLoc = vputils::getMemoryLocation(*EarliestLoad);
if (!LoadLoc || !canHoistOrSinkWithNoAliasCheck(*LoadLoc, FirstBB, LastBB,
/*CheckReads=*/false))
if (!LoadLoc || !canHoistOrSinkWithNoAliasCheck(*LoadLoc, FirstBB, LastBB))
continue;

// Collect common metadata from all loads in the group.
Expand All @@ -4301,7 +4364,9 @@ void VPlanTransforms::hoistPredicatedLoads(VPlan &Plan, ScalarEvolution &SE,
}

static bool
canSinkStoreWithNoAliasCheck(ArrayRef<VPReplicateRecipe *> StoresToSink) {
canSinkStoreWithNoAliasCheck(ArrayRef<VPReplicateRecipe *> StoresToSink,
ScalarEvolution &SE, const Loop &L,
VPTypeAnalysis &TypeInfo) {
auto StoreLoc = vputils::getMemoryLocation(*StoresToSink.front());
if (!StoreLoc || !StoreLoc->AATags.Scope)
return false;
Expand All @@ -4313,8 +4378,8 @@ canSinkStoreWithNoAliasCheck(ArrayRef<VPReplicateRecipe *> StoresToSink) {

VPBasicBlock *FirstBB = StoresToSink.front()->getParent();
VPBasicBlock *LastBB = StoresToSink.back()->getParent();
return canHoistOrSinkWithNoAliasCheck(*StoreLoc, FirstBB, LastBB,
/*CheckReads=*/true, &StoresToSinkSet);
SinkStoreInfo SinkInfo(StoresToSinkSet, *StoresToSink[0], SE, L, TypeInfo);
return canHoistOrSinkWithNoAliasCheck(*StoreLoc, FirstBB, LastBB, SinkInfo);
}

void VPlanTransforms::sinkPredicatedStores(VPlan &Plan, ScalarEvolution &SE,
Expand All @@ -4325,13 +4390,14 @@ void VPlanTransforms::sinkPredicatedStores(VPlan &Plan, ScalarEvolution &SE,
return;

VPDominatorTree VPDT(Plan);
VPTypeAnalysis TypeInfo(Plan);

for (auto &Group : Groups) {
sort(Group, [&VPDT](VPReplicateRecipe *A, VPReplicateRecipe *B) {
return VPDT.properlyDominates(A, B);
});

if (!canSinkStoreWithNoAliasCheck(Group))
if (!canSinkStoreWithNoAliasCheck(Group, SE, *L, TypeInfo))
continue;

// Use the last (most dominated) store's location for the unconditional
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -764,7 +764,7 @@ define void @sink_multiple_store_groups_noalias_via_scev(ptr %dst, ptr %src) {
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE7:.*]] ]
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE3:.*]] ]
; CHECK-NEXT: [[INDEX:%.*]] = mul i64 [[INDEX1]], 16
; CHECK-NEXT: [[IV:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 16
Expand All @@ -781,42 +781,30 @@ define void @sink_multiple_store_groups_noalias_via_scev(ptr %dst, ptr %src) {
; CHECK-NEXT: [[TMP14:%.*]] = load double, ptr [[TMP22]], align 8, !alias.scope [[META78]]
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x double> poison, double [[TMP13]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = insertelement <2 x double> [[TMP15]], double [[TMP14]], i32 1
; CHECK-NEXT: [[TMP16:%.*]] = xor <2 x i1> [[TMP10]], splat (i1 true)
; CHECK-NEXT: [[TMP34:%.*]] = fadd <2 x double> [[WIDE_LOAD]], splat (double 8.000000e+00)
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x i1> [[TMP16]], i32 0
; CHECK-NEXT: br i1 [[TMP24]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr double, ptr [[DST]], i64 [[IV]]
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x double> [[TMP34]], i32 0
; CHECK-NEXT: store double [[TMP19]], ptr [[TMP18]], align 8, !alias.scope [[META81:![0-9]+]], !noalias [[META78]]
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP17]]
; CHECK-NEXT: [[TMP31:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP18]], i32 0
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x ptr> [[TMP31]], ptr [[TMP21]], i32 1
; CHECK-NEXT: [[TMP20:%.*]] = select <2 x i1> [[TMP10]], <2 x double> [[WIDE_LOAD]], <2 x double> [[TMP34]]
; CHECK-NEXT: [[TMP32:%.*]] = extractelement <2 x double> [[TMP20]], i32 0
; CHECK-NEXT: store double [[TMP32]], ptr [[TMP18]], align 8, !alias.scope [[META81:![0-9]+]], !noalias [[META78]]
; CHECK-NEXT: [[TMP33:%.*]] = extractelement <2 x double> [[TMP20]], i32 1
; CHECK-NEXT: store double [[TMP33]], ptr [[TMP21]], align 8, !alias.scope [[META81]], !noalias [[META78]]
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0
; CHECK-NEXT: br i1 [[TMP23]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[TMP18]], i64 16
; CHECK-NEXT: store double 1.000000e+01, ptr [[TMP24]], align 8, !alias.scope [[META81]], !noalias [[META78]]
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x i1> [[TMP16]], i32 1
; CHECK-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF2:.*]], label %[[PRED_STORE_CONTINUE3:.*]]
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1
; CHECK-NEXT: br i1 [[TMP25]], label %[[PRED_STORE_IF2:.*]], label %[[PRED_STORE_CONTINUE3]]
; CHECK: [[PRED_STORE_IF2]]:
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP17]]
; CHECK-NEXT: [[TMP33:%.*]] = extractelement <2 x double> [[TMP34]], i32 1
; CHECK-NEXT: store double [[TMP33]], ptr [[TMP21]], align 8, !alias.scope [[META81]], !noalias [[META78]]
; CHECK-NEXT: [[TMP35:%.*]] = getelementptr i8, ptr [[TMP21]], i64 16
; CHECK-NEXT: store double 1.000000e+01, ptr [[TMP35]], align 8, !alias.scope [[META81]], !noalias [[META78]]
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE3]]
; CHECK: [[PRED_STORE_CONTINUE3]]:
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0
; CHECK-NEXT: br i1 [[TMP23]], label %[[PRED_STORE_IF4:.*]], label %[[PRED_STORE_CONTINUE5:.*]]
; CHECK: [[PRED_STORE_IF4]]:
; CHECK-NEXT: [[TMP31:%.*]] = getelementptr double, ptr [[DST]], i64 [[IV]]
; CHECK-NEXT: store double [[TMP13]], ptr [[TMP31]], align 8, !alias.scope [[META81]], !noalias [[META78]]
; CHECK-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr [[TMP31]], i64 16
; CHECK-NEXT: store double 1.000000e+01, ptr [[TMP37]], align 8, !alias.scope [[META81]], !noalias [[META78]]
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE5]]
; CHECK: [[PRED_STORE_CONTINUE5]]:
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1
; CHECK-NEXT: br i1 [[TMP25]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7]]
; CHECK: [[PRED_STORE_IF6]]:
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP17]]
; CHECK-NEXT: store double [[TMP14]], ptr [[TMP32]], align 8, !alias.scope [[META81]], !noalias [[META78]]
; CHECK-NEXT: [[TMP47:%.*]] = getelementptr i8, ptr [[TMP32]], i64 16
; CHECK-NEXT: store double 1.000000e+01, ptr [[TMP47]], align 8, !alias.scope [[META81]], !noalias [[META78]]
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE7]]
; CHECK: [[PRED_STORE_CONTINUE7]]:
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 2
; CHECK-NEXT: [[TMP52:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
; CHECK-NEXT: br i1 [[TMP52]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP83:![0-9]+]]
Expand Down
Loading