Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 35 additions & 11 deletions llvm/include/llvm/Analysis/LoopAccessAnalysis.h
Original file line number Diff line number Diff line change
Expand Up @@ -183,10 +183,13 @@ class MemoryDepChecker {
MemoryDepChecker(PredicatedScalarEvolution &PSE, AssumptionCache *AC,
DominatorTree *DT, const Loop *L,
const DenseMap<Value *, const SCEV *> &SymbolicStrides,
unsigned MaxTargetVectorWidthInBits)
unsigned MaxTargetVectorWidthInBits,
bool AllowNonPow2StoreLoadForwardDistance)
: PSE(PSE), AC(AC), DT(DT), InnermostLoop(L),
SymbolicStrides(SymbolicStrides),
MaxTargetVectorWidthInBits(MaxTargetVectorWidthInBits) {}
MaxTargetVectorWidthInBits(MaxTargetVectorWidthInBits),
AllowNonPow2StoreLoadForwardDistance(
AllowNonPow2StoreLoadForwardDistance) {}

/// Register the location (instructions are given increasing numbers)
/// of a write access.
Expand Down Expand Up @@ -223,17 +226,29 @@ class MemoryDepChecker {

/// Return true if there are no store-load forwarding dependencies.
bool isSafeForAnyStoreLoadForwardDistances() const {
return MaxStoreLoadForwardSafeDistanceInBits ==
std::numeric_limits<uint64_t>::max();
return MaxPowerOf2StoreLoadForwardSafeDistanceInBits ==
std::numeric_limits<uint64_t>::max() &&
MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits ==
std::numeric_limits<uint64_t>::max();
}

/// Return safe power-of-2 number of elements, which do not prevent store-load
/// forwarding, multiplied by the size of the elements in bits.
uint64_t getStoreLoadForwardSafeDistanceInBits() const {
/// Return safe number of elements, which do not prevent store-load
/// forwarding, multiplied by the size of the elements in bits (power-of-2).
uint64_t getPowerOf2StoreLoadForwardSafeDistanceInBits() const {
assert(!isSafeForAnyStoreLoadForwardDistances() &&
"Expected the distance, that prevent store-load forwarding, to be "
"set.");
return MaxStoreLoadForwardSafeDistanceInBits;
return MaxPowerOf2StoreLoadForwardSafeDistanceInBits;
}

/// Return safe number of elements, which do not prevent store-load
/// forwarding, multiplied by the size of the elements in bits
/// (non-power-of-2).
uint64_t getNonPowerOf2StoreLoadForwardSafeDistanceInBits() const {
assert(!isSafeForAnyStoreLoadForwardDistances() &&
"Expected the distance, that prevent store-load forwarding, to be "
"set.");
return MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits;
}

/// In same cases when the dependency check fails we can still
Expand Down Expand Up @@ -337,9 +352,14 @@ class MemoryDepChecker {
/// restrictive.
uint64_t MaxSafeVectorWidthInBits = -1U;

/// Maximum power-of-2 number of elements, which do not prevent store-load
/// forwarding, multiplied by the size of the elements in bits.
uint64_t MaxStoreLoadForwardSafeDistanceInBits =
/// Maximum number of elements, which do not prevent store-load forwarding,
/// multiplied by the size of the elements in bits (power-of-2).
uint64_t MaxPowerOf2StoreLoadForwardSafeDistanceInBits =
std::numeric_limits<uint64_t>::max();

/// Maximum number of elements, which do not prevent store-load forwarding,
/// multiplied by the size of the elements in bits (non-power-of-2).
uint64_t MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits =
std::numeric_limits<uint64_t>::max();
Comment on lines +355 to 363
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems redundant to have both, when only one is ever going to be used.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't know before the vectorization which one is going to be used. Need to keep both


/// Whether we should try to vectorize the loop with runtime checks, if the
Expand All @@ -366,6 +386,10 @@ class MemoryDepChecker {
/// backwards-vectorizable or unknown (triggering a runtime check).
unsigned MaxTargetVectorWidthInBits = 0;

/// True if current target supports non-power-of-2 dependence distances,
/// allows to support non-power-of-2 store-load forwarding distance analysis.
bool AllowNonPow2StoreLoadForwardDistance = false;

/// Mapping of SCEV expressions to their expanded pointer bounds (pair of
/// start and end pointer expressions).
DenseMap<std::pair<const SCEV *, Type *>,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,7 @@ class LoopVectorizationLegality {
/// Return safe power-of-2 number of elements, which do not prevent store-load
/// forwarding and safe to operate simultaneously.
uint64_t getMaxStoreLoadForwardSafeDistanceInBits() const {
return LAI->getDepChecker().getStoreLoadForwardSafeDistanceInBits();
return LAI->getDepChecker().getPowerOf2StoreLoadForwardSafeDistanceInBits();
}

/// Returns true if vector representation of the instruction \p I
Expand Down
62 changes: 55 additions & 7 deletions llvm/lib/Analysis/LoopAccessAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1838,7 +1838,8 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
// Maximum vector factor.
uint64_t MaxVFWithoutSLForwardIssuesPowerOf2 =
std::min(VectorizerParams::MaxVectorWidth * TypeByteSize,
MaxStoreLoadForwardSafeDistanceInBits);
MaxPowerOf2StoreLoadForwardSafeDistanceInBits);
uint64_t MaxVFWithoutSLForwardIssuesNonPowerOf2 = 0;

// Compute the smallest VF at which the store and load would be misaligned.
for (uint64_t VF = 2 * TypeByteSize;
Expand All @@ -1850,24 +1851,68 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
break;
}
}
// If target supports non-power-of-2 store-load forwarding distances, then it
// supports non-power-of-2 vector factor. So, we iterate in a backward order
// to find largest VF, which allows aligned stores-loads or the number of
// iterations between conflicting memory addresses is not less than 8
// (NumItersForStoreLoadThroughMemory). If the HW supports non-power-of-2
// store-load forwarding distance, we can choose any vector factor, which is
// the whole divider of the MaxVFWithoutSLForwardIssuesNonPowerOf2. Say, if
// MaxVFWithoutSLForwardIssuesNonPowerOf2 is 9, then we can use vector factors
// 3 and 9. If it is 6, we can use vector factors 2, 3, 6. All these are safe.
if (AllowNonPow2StoreLoadForwardDistance) {
MaxVFWithoutSLForwardIssuesNonPowerOf2 =
std::min(8 * VectorizerParams::MaxVectorWidth / TypeByteSize,
MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits);

const bool IsSafeForAnyStoreLoadForwardDistances =
isSafeForAnyStoreLoadForwardDistances();
for (uint64_t VF = MaxVFWithoutSLForwardIssuesNonPowerOf2;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we count backwards here while forwards for the power-of-2 case?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The power-of-2 case tries to find the minimal supported vector factor. For non-power-of-2 it tries to find the largest (but still legal) dep distance, so it goes backwards

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok I see. But then it behaves different to MaxVFWithoutSLForwardIssuesPowerOf2. With MaxVFWithoutSLForwardIssuesPowerOf2 limits the Max VF we can use and we can also use any VF between 1 and MaxVF.

Is MaxVFWithoutSLForwardIssuesNonPowerOf2 a single non-power-of-2 VF we can use, but other VFs between 1 .. MaxVFWithoutSLForwardIssuesNonPowerOf2 may not be used?

Am I understanding correctly for example with, Max pow2 VF = 2, MaxNonPowOf2 VF = 9, LV can either chose 2 or 16 (with limiting VF to 9)?

If that's the case, I think it. would be good to update the comment for MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits to make this difference to the power-of-2 variant clear, as they would behave quite differently.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok I see. But then it behaves different to MaxVFWithoutSLForwardIssuesPowerOf2. With MaxVFWithoutSLForwardIssuesPowerOf2 limits the Max VF we can use and we can also use any VF between 1 and MaxVF.

Not quite so. Any power-of-2 VF, but not any VF.

Is MaxVFWithoutSLForwardIssuesNonPowerOf2 a single non-power-of-2 VF we can use, but other VFs between 1 .. MaxVFWithoutSLForwardIssuesNonPowerOf2 may not be used?

Not necessary. We can use any whole divider of the MaxVFWithoutSLForwardIssuesNonPowerOf2. Say, if MaxVFWithoutSLForwardIssuesNonPowerOf2 is 9, then we can use 3 and 9. If it is 6, we can use 2, 3, 6. All these are safe.

Am I understanding correctly for example with, Max pow2 VF = 2, MaxNonPowOf2 VF = 9, LV can either chose 2 or 16 (with limiting VF to 9)?

The vector factor can be 2, 4, 8 or 16. But with non-power-of-2 we need an extra check ( or special instruction), that the number of the processed elements is limited by 9 or 3 elements only.

If that's the case, I think it. would be good to update the comment for MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits to make this difference to the power-of-2 variant clear, as they would behave quite differently.

Suggestions? Any preferences here?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggestions? Any preferences here?

Thanks for the extra info. I think it would be helpful to update the comment to. include a generalized variant of the extra info you shared regarding what VFs can be picked (highlighted below)

Not necessary. We can use any whole divider of the MaxVFWithoutSLForwardIssuesNonPowerOf2. Say, if MaxVFWithoutSLForwardIssuesNonPowerOf2 is 9, then we can use 3 and 9. If it is 6, we can use 2, 3, 6. All these are safe.

I guess most RISCV HW with EVL support will support store-load forwarding with non-power-of-2 distances?

VF > MaxVFWithoutSLForwardIssuesPowerOf2; VF -= TypeByteSize) {
if (Distance % VF == 0 ||
Distance / VF >= NumItersForStoreLoadThroughMemory) {
uint64_t GCD =
IsSafeForAnyStoreLoadForwardDistances
? VF
: std::gcd(MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits,
VF);
MaxVFWithoutSLForwardIssuesNonPowerOf2 = GCD;
break;
}
}
}

if (MaxVFWithoutSLForwardIssuesPowerOf2 < 2 * TypeByteSize) {
if (MaxVFWithoutSLForwardIssuesPowerOf2 < 2 * TypeByteSize &&
MaxVFWithoutSLForwardIssuesNonPowerOf2 < 2 * TypeByteSize) {
LLVM_DEBUG(
dbgs() << "LAA: Distance " << Distance
<< " that could cause a store-load forwarding conflict\n");
return true;
}

// Handle non-power-2 store-load forwarding distance, power-of-2 distance can
// be calculated.
if (AllowNonPow2StoreLoadForwardDistance && CommonStride &&
MaxVFWithoutSLForwardIssuesNonPowerOf2 <
MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits &&
MaxVFWithoutSLForwardIssuesNonPowerOf2 !=
8 * VectorizerParams::MaxVectorWidth / TypeByteSize) {
uint64_t MaxVF = MaxVFWithoutSLForwardIssuesNonPowerOf2 / CommonStride;
uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits =
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to compute this separately? Would it instead be possible to always compute the non-power-of-2 version and then have users convert it to the closest power-of-2 if that's what they need?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unfortunately, it won't work. I tried this, but there might be different results because of CommonStride value

std::min(MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits, MaxVFInBits);
}

if (CommonStride &&
MaxVFWithoutSLForwardIssuesPowerOf2 <
MaxStoreLoadForwardSafeDistanceInBits &&
MaxPowerOf2StoreLoadForwardSafeDistanceInBits &&
MaxVFWithoutSLForwardIssuesPowerOf2 !=
VectorizerParams::MaxVectorWidth * TypeByteSize) {
uint64_t MaxVF =
bit_floor(MaxVFWithoutSLForwardIssuesPowerOf2 / CommonStride);
uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
MaxStoreLoadForwardSafeDistanceInBits =
std::min(MaxStoreLoadForwardSafeDistanceInBits, MaxVFInBits);
MaxPowerOf2StoreLoadForwardSafeDistanceInBits =
std::min(MaxPowerOf2StoreLoadForwardSafeDistanceInBits, MaxVFInBits);
}
return false;
}
Expand Down Expand Up @@ -3034,7 +3079,8 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
TTI->getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector) * 2;

DepChecker = std::make_unique<MemoryDepChecker>(
*PSE, AC, DT, L, SymbolicStrides, MaxTargetVectorWidthInBits);
*PSE, AC, DT, L, SymbolicStrides, MaxTargetVectorWidthInBits,
TTI && TTI->hasActiveVectorLength());
PtrRtChecking = std::make_unique<RuntimePointerChecking>(*DepChecker, SE);
if (canAnalyzeLoop())
CanVecMem = analyzeLoop(AA, LI, TLI, DT);
Expand All @@ -3048,7 +3094,9 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
OS << " with a maximum safe vector width of "
<< DC.getMaxSafeVectorWidthInBits() << " bits";
if (!DC.isSafeForAnyStoreLoadForwardDistances()) {
uint64_t SLDist = DC.getStoreLoadForwardSafeDistanceInBits();
uint64_t SLDist = DC.getNonPowerOf2StoreLoadForwardSafeDistanceInBits();
if (SLDist == std::numeric_limits<uint64_t>::max())
SLDist = DC.getPowerOf2StoreLoadForwardSafeDistanceInBits();
Comment on lines +3097 to +3099
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This logic would be eliminated by not having two different fields.

OS << ", with a maximum safe store-load forward width of " << SLDist
<< " bits";
}
Expand Down
Loading