4848#include " llvm/Analysis/MustExecute.h"
4949#include " llvm/Analysis/PostDominators.h"
5050#include " llvm/Analysis/TargetLibraryInfo.h"
51+ #include " llvm/Analysis/TargetTransformInfo.h"
5152#include " llvm/Analysis/ValueTracking.h"
5253#include " llvm/IR/Argument.h"
5354#include " llvm/IR/BasicBlock.h"
@@ -560,7 +561,8 @@ static void shortenAssignment(Instruction *Inst, Value *OriginalDest,
560561
561562static bool tryToShorten (Instruction *DeadI, int64_t &DeadStart,
562563 uint64_t &DeadSize, int64_t KillingStart,
563- uint64_t KillingSize, bool IsOverwriteEnd) {
564+ uint64_t KillingSize, bool IsOverwriteEnd,
565+ const TargetTransformInfo &TTI) {
564566 auto *DeadIntrinsic = cast<AnyMemIntrinsic>(DeadI);
565567 Align PrefAlign = DeadIntrinsic->getDestAlign ().valueOrOne ();
566568
@@ -612,6 +614,24 @@ static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
612614 assert (DeadSize > ToRemoveSize && " Can't remove more than original size" );
613615
614616 uint64_t NewSize = DeadSize - ToRemoveSize;
617+
618+ // Check that we aren't going to pessimize codegen by lowering the length. I.e
619+ // a memcpy(dst, src, 8) is more efficient than memcpy(dst, src, 7).
620+ // These checks are relatively conservative. We bail out if:
621+ // 1) We are removing less than 1 store (measured by targets load/store Vec
622+ // width).
623+ // 2) We are saving a load/store (assuming loads/stores occur per pow2 block)
624+ // 3) We aren't preventing this from going below inline thresh
625+ // 4) We are shrinking by less than half of the initial size.
626+ uint64_t PrefVecWidth =
627+ TTI.getLoadStoreVecRegBitWidth (DeadIntrinsic->getDestAddressSpace ()) / 8U ;
628+ uint64_t InlineThresh = TTI.getMaxMemIntrinsicInlineSizeThreshold ();
629+ if (ToRemoveSize < PrefVecWidth &&
630+ popcount (DeadSize) < popcount (DeadSize - ToRemoveSize) &&
631+ (DeadSize <= InlineThresh) == (DeadSize - ToRemoveSize <= InlineThresh) &&
632+ ToRemoveSize < DeadSize / 2U )
633+ return false ;
634+
615635 if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(DeadI)) {
616636 // When shortening an atomic memory intrinsic, the newly shortened
617637 // length must remain an integer multiple of the element size.
@@ -654,7 +674,8 @@ static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
654674}
655675
656676static bool tryToShortenEnd (Instruction *DeadI, OverlapIntervalsTy &IntervalMap,
657- int64_t &DeadStart, uint64_t &DeadSize) {
677+ int64_t &DeadStart, uint64_t &DeadSize,
678+ const TargetTransformInfo &TTI) {
658679 if (IntervalMap.empty () || !isShortenableAtTheEnd (DeadI))
659680 return false ;
660681
@@ -672,7 +693,7 @@ static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap,
672693 // be non negative due to preceding checks.
673694 KillingSize >= DeadSize - (uint64_t )(KillingStart - DeadStart)) {
674695 if (tryToShorten (DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
675- true )) {
696+ true , TTI )) {
676697 IntervalMap.erase (OII);
677698 return true ;
678699 }
@@ -682,7 +703,8 @@ static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap,
682703
683704static bool tryToShortenBegin (Instruction *DeadI,
684705 OverlapIntervalsTy &IntervalMap,
685- int64_t &DeadStart, uint64_t &DeadSize) {
706+ int64_t &DeadStart, uint64_t &DeadSize,
707+ const TargetTransformInfo &TTI) {
686708 if (IntervalMap.empty () || !isShortenableAtTheBeginning (DeadI))
687709 return false ;
688710
@@ -701,7 +723,7 @@ static bool tryToShortenBegin(Instruction *DeadI,
701723 assert (KillingSize - (uint64_t )(DeadStart - KillingStart) < DeadSize &&
702724 " Should have been handled as OW_Complete" );
703725 if (tryToShorten (DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
704- false )) {
726+ false , TTI )) {
705727 IntervalMap.erase (OII);
706728 return true ;
707729 }
@@ -824,6 +846,7 @@ struct DSEState {
824846 DominatorTree &DT;
825847 PostDominatorTree &PDT;
826848 const TargetLibraryInfo &TLI;
849+ const TargetTransformInfo &TTI;
827850 const DataLayout &DL;
828851 const LoopInfo &LI;
829852
@@ -868,9 +891,9 @@ struct DSEState {
868891
869892 DSEState (Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
870893 PostDominatorTree &PDT, const TargetLibraryInfo &TLI,
871- const LoopInfo &LI)
894+ const TargetTransformInfo &TTI, const LoopInfo &LI)
872895 : F(F), AA(AA), EI(DT, &LI), BatchAA(AA, &EI), MSSA(MSSA), DT(DT),
873- PDT (PDT), TLI(TLI), DL(F.getDataLayout()), LI(LI) {
896+ PDT (PDT), TLI(TLI), TTI(TTI), DL(F.getDataLayout()), LI(LI) {
874897 // Collect blocks with throwing instructions not modeled in MemorySSA and
875898 // alloc-like objects.
876899 unsigned PO = 0 ;
@@ -2066,10 +2089,10 @@ struct DSEState {
20662089 uint64_t DeadSize = Loc.Size .getValue ();
20672090 GetPointerBaseWithConstantOffset (Ptr, DeadStart, DL);
20682091 OverlapIntervalsTy &IntervalMap = OI.second ;
2069- Changed |= tryToShortenEnd (DeadI, IntervalMap, DeadStart, DeadSize);
2092+ Changed |= tryToShortenEnd (DeadI, IntervalMap, DeadStart, DeadSize, TTI );
20702093 if (IntervalMap.empty ())
20712094 continue ;
2072- Changed |= tryToShortenBegin (DeadI, IntervalMap, DeadStart, DeadSize);
2095+ Changed |= tryToShortenBegin (DeadI, IntervalMap, DeadStart, DeadSize, TTI );
20732096 }
20742097 return Changed;
20752098 }
@@ -2137,10 +2160,11 @@ struct DSEState {
21372160static bool eliminateDeadStores (Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
21382161 DominatorTree &DT, PostDominatorTree &PDT,
21392162 const TargetLibraryInfo &TLI,
2163+ const TargetTransformInfo &TTI,
21402164 const LoopInfo &LI) {
21412165 bool MadeChange = false ;
21422166
2143- DSEState State (F, AA, MSSA, DT, PDT, TLI, LI);
2167+ DSEState State (F, AA, MSSA, DT, PDT, TLI, TTI, LI);
21442168 // For each store:
21452169 for (unsigned I = 0 ; I < State.MemDefs .size (); I++) {
21462170 MemoryDef *KillingDef = State.MemDefs [I];
@@ -2332,12 +2356,13 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
23322356PreservedAnalyses DSEPass::run (Function &F, FunctionAnalysisManager &AM) {
23332357 AliasAnalysis &AA = AM.getResult <AAManager>(F);
23342358 const TargetLibraryInfo &TLI = AM.getResult <TargetLibraryAnalysis>(F);
2359+ const TargetTransformInfo &TTI = AM.getResult <TargetIRAnalysis>(F);
23352360 DominatorTree &DT = AM.getResult <DominatorTreeAnalysis>(F);
23362361 MemorySSA &MSSA = AM.getResult <MemorySSAAnalysis>(F).getMSSA ();
23372362 PostDominatorTree &PDT = AM.getResult <PostDominatorTreeAnalysis>(F);
23382363 LoopInfo &LI = AM.getResult <LoopAnalysis>(F);
23392364
2340- bool Changed = eliminateDeadStores (F, AA, MSSA, DT, PDT, TLI, LI);
2365+ bool Changed = eliminateDeadStores (F, AA, MSSA, DT, PDT, TLI, TTI, LI);
23412366
23422367#ifdef LLVM_ENABLE_STATS
23432368 if (AreStatisticsEnabled ())
0 commit comments