Skip to content

Commit 7f0bf5f

Browse files
committed
DSE: lift limitation on sizes being non-scalable
As AliasAnalysis now has support for scalable sizes, lift the limitation on analyzing scalable sizes in DeadStoreElimination.
1 parent 4e8eabd commit 7f0bf5f

File tree

2 files changed

+60
-22
lines changed

2 files changed

+60
-22
lines changed

llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp

Lines changed: 57 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,54 @@ static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI,
271271
return OW_Unknown;
272272
}
273273

274+
// Given a fixed/scalable LocationSize for DeadSize, we compute the
275+
// upper-range(DeadSize), by factoring in VScale.
276+
uint64_t getDeadSizeFactoringVScale(const LocationSize &DeadSz,
277+
const Function &F) {
278+
APInt DeadSize = APInt(64, DeadSz.getValue().getKnownMinValue());
279+
ConstantRange CR = getVScaleRange(&F, 64);
280+
if (DeadSz.isScalable()) {
281+
bool Overflow;
282+
APInt UpperRange = CR.getUnsignedMax().umul_ov(DeadSize, Overflow);
283+
if (!Overflow)
284+
DeadSize = UpperRange;
285+
}
286+
return DeadSize.getZExtValue();
287+
}
288+
289+
// Given fixed/scalable LocationSizes for KillingSize and DeadSize, we compute
290+
// the lower-range(KillingSize) and upper-range(DeadSize), by factoring in
291+
// VScale.
292+
std::pair<uint64_t, uint64_t>
293+
getSizesFactoringVScale(const LocationSize &KillingSz,
294+
const LocationSize &DeadSz, const Function &F) {
295+
APInt KillingSize = APInt(64, KillingSz.getValue().getKnownMinValue());
296+
APInt DeadSize = APInt(64, DeadSz.getValue().getKnownMinValue());
297+
298+
ConstantRange CR = getVScaleRange(&F, 64);
299+
bool OverflowL, OverflowU;
300+
if (KillingSz.isScalable() && DeadSz.isScalable()) {
301+
// We have a special-case when both are scalable, so we ensure that we don't
302+
// set one of the values, if UpperRange overflows but LowerRange doesn't, or
303+
// vice-versa.
304+
APInt LowerRange = CR.getUnsignedMin().umul_ov(KillingSize, OverflowL);
305+
APInt UpperRange = CR.getUnsignedMax().umul_ov(DeadSize, OverflowU);
306+
if (!OverflowL && !OverflowU) {
307+
KillingSize = LowerRange;
308+
DeadSize = UpperRange;
309+
}
310+
} else if (KillingSz.isScalable()) {
311+
APInt LowerRange = CR.getUnsignedMin().umul_ov(KillingSize, OverflowL);
312+
if (!OverflowL)
313+
KillingSize = LowerRange;
314+
} else if (DeadSz.isScalable()) {
315+
APInt UpperRange = CR.getUnsignedMax().umul_ov(DeadSize, OverflowU);
316+
if (!OverflowU)
317+
DeadSize = UpperRange;
318+
}
319+
return {KillingSize.getZExtValue(), DeadSize.getZExtValue()};
320+
}
321+
274322
/// Return 'OW_Complete' if a store to the 'KillingLoc' location completely
275323
/// overwrites a store to the 'DeadLoc' location, 'OW_End' if the end of the
276324
/// 'DeadLoc' location is completely overwritten by 'KillingLoc', 'OW_Begin'
@@ -285,9 +333,11 @@ static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc,
285333
const MemoryLocation &DeadLoc,
286334
int64_t KillingOff, int64_t DeadOff,
287335
Instruction *DeadI,
288-
InstOverlapIntervalsTy &IOL) {
289-
const uint64_t KillingSize = KillingLoc.Size.getValue();
290-
const uint64_t DeadSize = DeadLoc.Size.getValue();
336+
InstOverlapIntervalsTy &IOL,
337+
const Function &F) {
338+
auto [KillingSize, DeadSize] =
339+
getSizesFactoringVScale(KillingLoc.Size, DeadLoc.Size, F);
340+
291341
// We may now overlap, although the overlap is not complete. There might also
292342
// be other incomplete overlaps, and together, they might cover the complete
293343
// dead store.
@@ -1063,15 +1113,9 @@ struct DSEState {
10631113
return isMaskedStoreOverwrite(KillingI, DeadI, BatchAA);
10641114
}
10651115

1066-
const TypeSize KillingSize = KillingLocSize.getValue();
1067-
const TypeSize DeadSize = DeadLoc.Size.getValue();
1068-
// Bail on doing Size comparison which depends on AA for now
1069-
// TODO: Remove AnyScalable once Alias Analysis deal with scalable vectors
1070-
const bool AnyScalable =
1071-
DeadSize.isScalable() || KillingLocSize.isScalable();
1116+
auto [KillingSize, DeadSize] =
1117+
getSizesFactoringVScale(KillingLocSize, DeadLoc.Size, F);
10721118

1073-
if (AnyScalable)
1074-
return OW_Unknown;
10751119
// Query the alias information
10761120
AliasResult AAR = BatchAA.alias(KillingLoc, DeadLoc);
10771121

@@ -2171,7 +2215,7 @@ struct DSEState {
21712215

21722216
const Value *Ptr = Loc.Ptr->stripPointerCasts();
21732217
int64_t DeadStart = 0;
2174-
uint64_t DeadSize = Loc.Size.getValue();
2218+
uint64_t DeadSize = getDeadSizeFactoringVScale(Loc.Size, F);
21752219
GetPointerBaseWithConstantOffset(Ptr, DeadStart, DL);
21762220
OverlapIntervalsTy &IntervalMap = OI.second;
21772221
Changed |= tryToShortenEnd(DeadI, IntervalMap, DeadStart, DeadSize);
@@ -2422,7 +2466,7 @@ DSEState::eliminateDeadDefs(const MemoryLocationWrapper &KillingLocWrapper) {
24222466
auto &IOL = IOLs[DeadLocWrapper.DefInst->getParent()];
24232467
OR = isPartialOverwrite(KillingLocWrapper.MemLoc, DeadLocWrapper.MemLoc,
24242468
KillingOffset, DeadOffset,
2425-
DeadLocWrapper.DefInst, IOL);
2469+
DeadLocWrapper.DefInst, IOL, F);
24262470
}
24272471
if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) {
24282472
auto *DeadSI = dyn_cast<StoreInst>(DeadLocWrapper.DefInst);

llvm/test/Transforms/DeadStoreElimination/stores-of-existing-values.ll

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -658,9 +658,7 @@ exit:
658658

659659
define void @scalable_scalable_redundant_store(ptr %ptr) {
660660
; CHECK-LABEL: @scalable_scalable_redundant_store(
661-
; CHECK-NEXT: [[GEP_PTR_2:%.*]] = getelementptr i64, ptr [[PTR:%.*]], i64 2
662-
; CHECK-NEXT: store <vscale x 2 x i64> zeroinitializer, ptr [[GEP_PTR_2]], align 16
663-
; CHECK-NEXT: store <vscale x 4 x i64> zeroinitializer, ptr [[PTR]], align 32
661+
; CHECK-NEXT: store <vscale x 4 x i64> zeroinitializer, ptr [[PTR:%.*]], align 32
664662
; CHECK-NEXT: ret void
665663
;
666664
%gep.ptr.2 = getelementptr i64, ptr %ptr, i64 2
@@ -697,9 +695,7 @@ define void @scalable_scalable_nonconst_offset_neg(ptr %ptr, i64 %i) {
697695

698696
define void @scalable_fixed_redundant_store(ptr %ptr) vscale_range(1, 2) {
699697
; CHECK-LABEL: @scalable_fixed_redundant_store(
700-
; CHECK-NEXT: [[GEP_PTR_2:%.*]] = getelementptr i64, ptr [[PTR:%.*]], i64 2
701-
; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr [[GEP_PTR_2]], align 16
702-
; CHECK-NEXT: store <vscale x 4 x i64> zeroinitializer, ptr [[PTR]], align 32
698+
; CHECK-NEXT: store <vscale x 4 x i64> zeroinitializer, ptr [[PTR:%.*]], align 32
703699
; CHECK-NEXT: ret void
704700
;
705701
%gep.ptr.2 = getelementptr i64, ptr %ptr, i64 2
@@ -723,9 +719,7 @@ define void @scalable_fixed_neg(ptr %ptr) vscale_range(1, 2) {
723719

724720
define void @fixed_scalable_redundant_store(ptr %ptr) vscale_range(1, 2) {
725721
; CHECK-LABEL: @fixed_scalable_redundant_store(
726-
; CHECK-NEXT: [[GEP_PTR_2:%.*]] = getelementptr i64, ptr [[PTR:%.*]], i64 2
727-
; CHECK-NEXT: store <vscale x 2 x i64> zeroinitializer, ptr [[GEP_PTR_2]], align 16
728-
; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr [[PTR]], align 64
722+
; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr [[PTR:%.*]], align 64
729723
; CHECK-NEXT: ret void
730724
;
731725
%gep.ptr.2 = getelementptr i64, ptr %ptr, i64 2

0 commit comments

Comments
 (0)