13
13
#define DEBUG_TYPE " predictable-memopt"
14
14
15
15
#include " PMOMemoryUseCollector.h"
16
+ #include " swift/Basic/BlotMapVector.h"
16
17
#include " swift/Basic/BlotSetVector.h"
17
18
#include " swift/Basic/FrozenMultiMap.h"
18
19
#include " swift/Basic/STLExtras.h"
20
+ #include " swift/SIL/BasicBlockBits.h"
19
21
#include " swift/SIL/BasicBlockUtils.h"
20
22
#include " swift/SIL/LinearLifetimeChecker.h"
21
23
#include " swift/SIL/OwnershipUtils.h"
22
24
#include " swift/SIL/SILBuilder.h"
23
- #include " swift/SIL/BasicBlockBits.h"
24
25
#include " swift/SILOptimizer/PassManager/Passes.h"
25
26
#include " swift/SILOptimizer/PassManager/Transforms.h"
26
27
#include " swift/SILOptimizer/Utils/CFGOptUtils.h"
27
28
#include " swift/SILOptimizer/Utils/InstOptUtils.h"
29
+ #include " swift/SILOptimizer/Utils/OwnershipOptUtils.h"
28
30
#include " swift/SILOptimizer/Utils/SILSSAUpdater.h"
29
31
#include " swift/SILOptimizer/Utils/ValueLifetime.h"
30
32
#include " llvm/ADT/SmallBitVector.h"
@@ -1959,7 +1961,17 @@ class AllocOptimize {
1959
1961
bool promoteLoadCopy (LoadInst *li);
1960
1962
bool promoteLoadBorrow (LoadBorrowInst *lbi);
1961
1963
bool promoteCopyAddr (CopyAddrInst *cai);
1962
- void promoteLoadTake (LoadInst *Inst, MutableArrayRef<AvailableValue> values);
1964
+
1965
+ // / Promote a load take cleaning up everything except for RAUWing the
1966
+ // / instruction with the aggregated result. The routine returns the new
1967
+ // / aggreaged result to the caller and expects the caller to eventually RAUW
1968
+ // / \p inst with the return value. The reason why we do this is to allow for
1969
+ // / the caller to work around invalidation issues by not deleting the load
1970
+ // / [take] until after all load [take] have been cleaned up.
1971
+ // /
1972
+ // / \returns the value that the caller will RAUW with \p inst.
1973
+ SILValue promoteLoadTake (LoadInst *inst,
1974
+ MutableArrayRef<AvailableValue> values);
1963
1975
void promoteDestroyAddr (DestroyAddrInst *dai,
1964
1976
MutableArrayRef<AvailableValue> values);
1965
1977
bool canPromoteTake (SILInstruction *i,
@@ -2293,7 +2305,7 @@ void AllocOptimize::promoteDestroyAddr(
2293
2305
dai->eraseFromParent ();
2294
2306
}
2295
2307
2296
- void AllocOptimize::promoteLoadTake (
2308
+ SILValue AllocOptimize::promoteLoadTake (
2297
2309
LoadInst *li, MutableArrayRef<AvailableValue> availableValues) {
2298
2310
assert (li->getOwnershipQualifier () == LoadOwnershipQualifier::Take &&
2299
2311
" load [copy], load [trivial], load should be handled by "
@@ -2311,15 +2323,15 @@ void AllocOptimize::promoteLoadTake(
2311
2323
AvailableValueAggregator agg (li, availableValues, Uses, deadEndBlocks,
2312
2324
AvailableValueExpectedOwnership::Take);
2313
2325
SILValue newVal = agg.aggregateValues (loadTy, address, firstElt);
2326
+ assert (newVal);
2314
2327
2315
2328
++NumLoadTakePromoted;
2316
2329
2317
2330
LLVM_DEBUG (llvm::dbgs () << " *** Promoting load_take: " << *li);
2318
2331
LLVM_DEBUG (llvm::dbgs () << " To value: " << *newVal);
2319
2332
2320
- // Then perform the RAUW.
2321
- li->replaceAllUsesWith (newVal);
2322
- li->eraseFromParent ();
2333
+ // Our parent RAUWs with newVal/erases li.
2334
+ return newVal;
2323
2335
}
2324
2336
2325
2337
namespace {
@@ -2335,6 +2347,33 @@ struct TakePromotionState {
2335
2347
2336
2348
unsigned size () const { return takeInstIndices.size (); }
2337
2349
2350
+ void verify () {
2351
+ #ifndef NDEBUG
2352
+ for (unsigned i : range (size ())) {
2353
+ SILInstruction *inst;
2354
+ MutableArrayRef<AvailableValue> data;
2355
+ std::tie (inst, data) = getData (i);
2356
+ assert (inst);
2357
+ inst->verifyOperandOwnership ();
2358
+ assert (!data.empty () && " Value without any available values?!" );
2359
+ }
2360
+ #endif
2361
+ }
2362
+
2363
+ void verify (unsigned startOffset) {
2364
+ #ifndef NDEBUG
2365
+ assert (startOffset < size ());
2366
+ for (unsigned i : range (startOffset, size ())) {
2367
+ SILInstruction *inst;
2368
+ MutableArrayRef<AvailableValue> data;
2369
+ std::tie (inst, data) = getData (i);
2370
+ assert (inst);
2371
+ inst->verifyOperandOwnership ();
2372
+ assert (!data.empty () && " Value without any available values?!" );
2373
+ }
2374
+ #endif
2375
+ }
2376
+
2338
2377
void initializeForTakeInst (unsigned takeInstIndex) {
2339
2378
availableValueStartOffsets.push_back (availableValueList.size ());
2340
2379
takeInstIndices.push_back (takeInstIndex);
@@ -2352,9 +2391,8 @@ struct TakePromotionState {
2352
2391
count = availableValueList.size () - startOffset;
2353
2392
}
2354
2393
2355
- MutableArrayRef<AvailableValue> values (&availableValueList[startOffset],
2356
- count);
2357
- return {takeInsts[takeInstIndex], values};
2394
+ auto values = MutableArrayRef<AvailableValue>(availableValueList);
2395
+ return {takeInsts[takeInstIndex], values.slice (startOffset, count)};
2358
2396
}
2359
2397
};
2360
2398
@@ -2424,6 +2462,8 @@ static bool isRemovableAutogeneratedAllocation(AllocationInst *TheMemory) {
2424
2462
}
2425
2463
2426
2464
bool AllocOptimize::tryToRemoveDeadAllocation () {
2465
+ assert (TheMemory->getFunction ()->hasOwnership () &&
2466
+ " Can only eliminate dead allocations with ownership enabled" );
2427
2467
assert ((isa<AllocBoxInst>(TheMemory) || isa<AllocStackInst>(TheMemory)) &&
2428
2468
" Unhandled allocation case" );
2429
2469
@@ -2492,20 +2532,92 @@ bool AllocOptimize::tryToRemoveDeadAllocation() {
2492
2532
}
2493
2533
2494
2534
// If we reached this point, we can promote all of our destroy_addr and load
2495
- // take. Since our load [take] may be available values for our destroy_addr,
2496
- // we promote the destroy_addr first.
2535
+ // take. Before we begin, gather up all found available values before we do
2536
+ // anything so we can fix up lifetimes later if we need to.
2537
+ SmallBlotSetVector<SILValue, 32 > valuesNeedingLifetimeCompletion;
2538
+ for (auto pmoMemUse : Uses) {
2539
+ if (pmoMemUse.Inst && pmoMemUse.Kind == PMOUseKind::Initialization) {
2540
+ // Today if we promote, this is always a store, since we would have
2541
+ // blown up the copy_addr otherwise. Given that, always make sure we
2542
+ // clean up the src as appropriate after we optimize.
2543
+ auto *si = cast<StoreInst>(pmoMemUse.Inst );
2544
+ auto src = si->getSrc ();
2545
+
2546
+ // Bail if src has any uses that are forwarding unowned uses. This
2547
+ // allows us to know that we never have to deal with forwarding unowned
2548
+ // instructions like br. These are corner cases that complicate the
2549
+ // logic below.
2550
+ for (auto *use : src->getUses ()) {
2551
+ if (use->getOperandOwnership () == OperandOwnership::ForwardingUnowned)
2552
+ return false ;
2553
+ }
2554
+ valuesNeedingLifetimeCompletion.insert (src);
2555
+ }
2556
+ }
2557
+
2558
+ // Since our load [take] may be available values for our
2559
+ // destroy_addr/load [take], we promote the destroy_addr first and then handle
2560
+ // load [take] with extra rigour later to handle that possibility.
2497
2561
for (unsigned i : range (destroyAddrState.size ())) {
2498
2562
SILInstruction *dai;
2499
2563
MutableArrayRef<AvailableValue> values;
2500
2564
std::tie (dai, values) = destroyAddrState.getData (i);
2501
2565
promoteDestroyAddr (cast<DestroyAddrInst>(dai), values);
2502
2566
// We do not need to unset releases, since we are going to exit here.
2503
2567
}
2568
+
2569
+ llvm::SmallMapVector<LoadInst *, SILValue, 32 > loadsToDelete;
2504
2570
for (unsigned i : range (loadTakeState.size ())) {
2505
2571
SILInstruction *li;
2506
2572
MutableArrayRef<AvailableValue> values;
2507
2573
std::tie (li, values) = loadTakeState.getData (i);
2508
- promoteLoadTake (cast<LoadInst>(li), values);
2574
+
2575
+ for (unsigned i : indices (values)) {
2576
+ auto v = values[i].Value ;
2577
+ auto *li = dyn_cast<LoadInst>(v);
2578
+ if (!li)
2579
+ continue ;
2580
+
2581
+ auto iter = loadsToDelete.find (li);
2582
+ if (iter == loadsToDelete.end ())
2583
+ continue ;
2584
+
2585
+ SILValue newValue = iter->second ;
2586
+ assert (newValue && " We should neer store a nil SILValue into this map" );
2587
+ values[i].Value = newValue;
2588
+ }
2589
+
2590
+ auto *liCast = cast<LoadInst>(li);
2591
+ SILValue result = promoteLoadTake (liCast, values);
2592
+ assert (result);
2593
+
2594
+ // We need to erase liCast here before we erase it since a load [take] that
2595
+ // we are promoting could be an available value for another load
2596
+ // [take]. Consider the following SIL:
2597
+ //
2598
+ // %mem = alloc_stack
2599
+ // store %arg to [init] %mem
2600
+ // %0 = load [take] %mem
2601
+ // store %0 to [init] %mem
2602
+ // %1 = load [take] %mem
2603
+ // destroy_value %1
2604
+ // dealloc_stack %mem
2605
+ //
2606
+ // In such a case, we are going to delete %0 here, but %0 is an available
2607
+ // value for %1, so we will
2608
+ auto insertIter = loadsToDelete.insert ({liCast, result});
2609
+ valuesNeedingLifetimeCompletion.erase (liCast);
2610
+ (void )insertIter;
2611
+ assert (insertIter.second && " loadTakeState doesn't have unique loads?!" );
2612
+ }
2613
+
2614
+ // Now that we have promoted all of our load [take], perform the actual
2615
+ // RAUW/removal.
2616
+ for (auto p : loadsToDelete) {
2617
+ LoadInst *li = p.first ;
2618
+ SILValue newValue = p.second ;
2619
+ li->replaceAllUsesWith (newValue);
2620
+ li->eraseFromParent ();
2509
2621
}
2510
2622
2511
2623
LLVM_DEBUG (llvm::dbgs () << " *** Removing autogenerated non-trivial alloc: "
@@ -2516,6 +2628,144 @@ bool AllocOptimize::tryToRemoveDeadAllocation() {
2516
2628
// caller remove the allocation itself to avoid iterator invalidation.
2517
2629
eraseUsesOfInstruction (TheMemory);
2518
2630
2631
+ // Now look at all of our available values and complete any of their
2632
+ // post-dominating consuming use sets. This can happen if we have an enum that
2633
+ // is known dynamically none along a path. This is dynamically correct, but
2634
+ // can not be represented in OSSA so we insert these destroys along said path.
2635
+ SmallVector<SILBasicBlock *, 32 > consumingUseBlocks;
2636
+ while (!valuesNeedingLifetimeCompletion.empty ()) {
2637
+ auto optV = valuesNeedingLifetimeCompletion.pop_back_val ();
2638
+ if (!optV)
2639
+ continue ;
2640
+ SILValue v = *optV;
2641
+ if (v.getOwnershipKind () != OwnershipKind::Owned)
2642
+ continue ;
2643
+
2644
+ // First see if our value doesn't have any uses. In such a case, just
2645
+ // insert a destroy_value at the next instruction and return.
2646
+ if (v->use_empty ()) {
2647
+ auto *next = v->getNextInstruction ();
2648
+ auto loc = RegularLocation::getAutoGeneratedLocation ();
2649
+ SILBuilderWithScope localBuilder (next);
2650
+ localBuilder.createDestroyValue (loc, v);
2651
+ continue ;
2652
+ }
2653
+
2654
+ // Otherwise, we first see if we have any consuming uses at all. If we do,
2655
+ // then we know that any such consuming uses since we have an owned value
2656
+ // /must/ be strongly control equivalent to our value and unreachable from
2657
+ // each other, so we can just use findJointPostDominatingSet to complete
2658
+ // the set.
2659
+ consumingUseBlocks.clear ();
2660
+ for (auto *use : v->getConsumingUses ())
2661
+ consumingUseBlocks.push_back (use->getParentBlock ());
2662
+
2663
+ if (!consumingUseBlocks.empty ()) {
2664
+ findJointPostDominatingSet (
2665
+ v->getParentBlock (), consumingUseBlocks, [](SILBasicBlock *) {},
2666
+ [&](SILBasicBlock *result) {
2667
+ auto loc = RegularLocation::getAutoGeneratedLocation ();
2668
+ SILBuilderWithScope builder (result);
2669
+ builder.createDestroyValue (loc, v);
2670
+ });
2671
+ continue ;
2672
+ }
2673
+
2674
+ // If we do not have at least one consuming use, we need to do something
2675
+ // different. This situation can occur given a non-trivial enum typed
2676
+ // stack allocation that:
2677
+ //
2678
+ // 1. Had a destroy_addr eliminated along a path where we dynamically know
2679
+ // that the stack allocation is storing a trivial case.
2680
+ //
2681
+ // 2. Had some other paths where due to dead end blocks, no destroy_addr
2682
+ // is needed.
2683
+ //
2684
+ // To fix this, we just treat all uses as consuming blocks and insert
2685
+ // destroys using the joint post dominance set computer and insert
2686
+ // destroys at the end of all input blocks in the post dom set and at the
2687
+ // beginning of any leaking blocks.
2688
+ {
2689
+ // TODO: Can we just pass this in to findJointPostDominatingSet instead
2690
+ // of recomputing it there? Maybe an overload that lets us do this?
2691
+ BasicBlockSet foundUseBlocks (v->getFunction ());
2692
+ for (auto *use : v->getUses ()) {
2693
+ auto *block = use->getParentBlock ();
2694
+ if (!foundUseBlocks.insert (block))
2695
+ continue ;
2696
+ consumingUseBlocks.push_back (block);
2697
+ }
2698
+ }
2699
+ findJointPostDominatingSet (
2700
+ v->getParentBlock (), consumingUseBlocks,
2701
+ [&](SILBasicBlock *foundInputBlock) {
2702
+ // This is a block that is reachable from another use. We are not
2703
+ // interested in these.
2704
+ },
2705
+ [&](SILBasicBlock *leakingBlock) {
2706
+ auto loc = RegularLocation::getAutoGeneratedLocation ();
2707
+ SILBuilderWithScope builder (leakingBlock);
2708
+ builder.createDestroyValue (loc, v);
2709
+ },
2710
+ [&](SILBasicBlock *inputBlockInPostDomSet) {
2711
+ auto *termInst = inputBlockInPostDomSet->getTerminator ();
2712
+ switch (termInst->getTermKind ()) {
2713
+ case TermKind::UnreachableInst:
2714
+ // We do not care about input blocks that end in unreachables. We
2715
+ // are going to leak down them so do not insert a destroy_value
2716
+ // there.
2717
+ return ;
2718
+
2719
+ // NOTE: Given that our input value is owned, our branch can only
2720
+ // accept the use as a non-consuming use if the branch is forwarding
2721
+ // unowned ownership. Luckily for use, we checked early if we had
2722
+ // any such uses and bailed, so we know the branch can not use our
2723
+ // value. This is just avoiding a corner case that we don't need to
2724
+ // handle.
2725
+ case TermKind::BranchInst:
2726
+ LLVM_FALLTHROUGH;
2727
+ // NOTE: We put cond_br here since in OSSA, cond_br can never have
2728
+ // a non-trivial value operand, meaning we can insert before.
2729
+ case TermKind::CondBranchInst:
2730
+ LLVM_FALLTHROUGH;
2731
+ case TermKind::ReturnInst:
2732
+ case TermKind::ThrowInst:
2733
+ case TermKind::UnwindInst:
2734
+ case TermKind::YieldInst: {
2735
+ // These terminators can never be non-consuming uses of an owned
2736
+ // value since we would be leaking the owned value no matter what
2737
+ // we do. Given that, we can assume that what ever the
2738
+ // non-consuming use actually was, must be before this
2739
+ // instruction. So insert the destroy_value at the end of the
2740
+ // block, before the terminator.
2741
+ auto loc = RegularLocation::getAutoGeneratedLocation ();
2742
+ SILBuilderWithScope localBuilder (termInst);
2743
+ localBuilder.createDestroyValue (loc, v);
2744
+ return ;
2745
+ }
2746
+ case TermKind::TryApplyInst:
2747
+ case TermKind::SwitchValueInst:
2748
+ case TermKind::SwitchEnumInst:
2749
+ case TermKind::SwitchEnumAddrInst:
2750
+ case TermKind::DynamicMethodBranchInst:
2751
+ case TermKind::AwaitAsyncContinuationInst:
2752
+ case TermKind::CheckedCastBranchInst:
2753
+ case TermKind::CheckedCastAddrBranchInst:
2754
+ case TermKind::CheckedCastValueBranchInst: {
2755
+ // Otherwise, we insert the destroy_addr /after/ the
2756
+ // terminator. All of these are guaranteed to have each successor
2757
+ // to have the block as its only predecessor block.
2758
+ SILBuilderWithScope::insertAfter (termInst, [&](auto &b) {
2759
+ auto loc = RegularLocation::getAutoGeneratedLocation ();
2760
+ b.createDestroyValue (loc, v);
2761
+ });
2762
+ return ;
2763
+ }
2764
+ }
2765
+ llvm_unreachable (" Case that did not return in its body?!" );
2766
+ });
2767
+ }
2768
+
2519
2769
return true ;
2520
2770
}
2521
2771
@@ -2687,6 +2937,10 @@ class PredictableMemoryAccessOptimizations : public SILFunctionTransform {
2687
2937
2688
2938
class PredictableDeadAllocationElimination : public SILFunctionTransform {
2689
2939
void run () override {
2940
+ // If we are already canonical or do not have ownership, just bail.
2941
+ if (getFunction ()->wasDeserializedCanonical () ||
2942
+ !getFunction ()->hasOwnership ())
2943
+ return ;
2690
2944
if (eliminateDeadAllocations (*getFunction ()))
2691
2945
invalidateAnalysis (SILAnalysis::InvalidationKind::FunctionBody);
2692
2946
}
0 commit comments