Skip to content

Commit 705373c

Browse files
committed
[pred-deadalloc-elim] Fix memory safety issue and handle promoting paths where there is dynamically no value by inserting compensating destroys.
This commit is fixing two things: 1. In certain cases, we are seeing cases where either SILGen or the optimizer are eliminating destroy_addr along paths where we know that an enum is dynamically trivial. This can not be expressed in OSSA, so I added code to pred-deadalloc-elim so that I check if any of our available values after we finish promoting away an allocation now need to have their consuming use set completed. 2. That led me to discover that in certain cases load [take] that we were promoting were available values of other load [take]. This means that we have a memory safety issue if we promote one load before the other. Consider the following SIL: ``` %mem = alloc_stack store %arg to [init] %mem %0 = load [take] %mem store %0 to [init] %mem %1 = load [take] %mem destroy_value %1 dealloc_stack %mem ``` In this case, if we eliminate %0 before we eliminate %1, we will have a stale pointer to %0. I also took this as an opportunity to turn off predictable mem access opt on SIL that was deserialized canonicalized and non-OSSA SIL. We evidently need to still do this for pred mem opts for perf reasons (not sure why). But I am pretty sure this isn't needed and allows me to avoid some nasty code.
1 parent 428ab47 commit 705373c

File tree

4 files changed

+451
-280
lines changed

4 files changed

+451
-280
lines changed

lib/SILOptimizer/Mandatory/PredictableMemOpt.cpp

Lines changed: 266 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -13,18 +13,20 @@
1313
#define DEBUG_TYPE "predictable-memopt"
1414

1515
#include "PMOMemoryUseCollector.h"
16+
#include "swift/Basic/BlotMapVector.h"
1617
#include "swift/Basic/BlotSetVector.h"
1718
#include "swift/Basic/FrozenMultiMap.h"
1819
#include "swift/Basic/STLExtras.h"
20+
#include "swift/SIL/BasicBlockBits.h"
1921
#include "swift/SIL/BasicBlockUtils.h"
2022
#include "swift/SIL/LinearLifetimeChecker.h"
2123
#include "swift/SIL/OwnershipUtils.h"
2224
#include "swift/SIL/SILBuilder.h"
23-
#include "swift/SIL/BasicBlockBits.h"
2425
#include "swift/SILOptimizer/PassManager/Passes.h"
2526
#include "swift/SILOptimizer/PassManager/Transforms.h"
2627
#include "swift/SILOptimizer/Utils/CFGOptUtils.h"
2728
#include "swift/SILOptimizer/Utils/InstOptUtils.h"
29+
#include "swift/SILOptimizer/Utils/OwnershipOptUtils.h"
2830
#include "swift/SILOptimizer/Utils/SILSSAUpdater.h"
2931
#include "swift/SILOptimizer/Utils/ValueLifetime.h"
3032
#include "llvm/ADT/SmallBitVector.h"
@@ -1959,7 +1961,17 @@ class AllocOptimize {
19591961
bool promoteLoadCopy(LoadInst *li);
19601962
bool promoteLoadBorrow(LoadBorrowInst *lbi);
19611963
bool promoteCopyAddr(CopyAddrInst *cai);
1962-
void promoteLoadTake(LoadInst *Inst, MutableArrayRef<AvailableValue> values);
1964+
1965+
/// Promote a load take cleaning up everything except for RAUWing the
1966+
/// instruction with the aggregated result. The routine returns the new
1967+
/// aggreaged result to the caller and expects the caller to eventually RAUW
1968+
/// \p inst with the return value. The reason why we do this is to allow for
1969+
/// the caller to work around invalidation issues by not deleting the load
1970+
/// [take] until after all load [take] have been cleaned up.
1971+
///
1972+
/// \returns the value that the caller will RAUW with \p inst.
1973+
SILValue promoteLoadTake(LoadInst *inst,
1974+
MutableArrayRef<AvailableValue> values);
19631975
void promoteDestroyAddr(DestroyAddrInst *dai,
19641976
MutableArrayRef<AvailableValue> values);
19651977
bool canPromoteTake(SILInstruction *i,
@@ -2293,7 +2305,7 @@ void AllocOptimize::promoteDestroyAddr(
22932305
dai->eraseFromParent();
22942306
}
22952307

2296-
void AllocOptimize::promoteLoadTake(
2308+
SILValue AllocOptimize::promoteLoadTake(
22972309
LoadInst *li, MutableArrayRef<AvailableValue> availableValues) {
22982310
assert(li->getOwnershipQualifier() == LoadOwnershipQualifier::Take &&
22992311
"load [copy], load [trivial], load should be handled by "
@@ -2311,15 +2323,15 @@ void AllocOptimize::promoteLoadTake(
23112323
AvailableValueAggregator agg(li, availableValues, Uses, deadEndBlocks,
23122324
AvailableValueExpectedOwnership::Take);
23132325
SILValue newVal = agg.aggregateValues(loadTy, address, firstElt);
2326+
assert(newVal);
23142327

23152328
++NumLoadTakePromoted;
23162329

23172330
LLVM_DEBUG(llvm::dbgs() << " *** Promoting load_take: " << *li);
23182331
LLVM_DEBUG(llvm::dbgs() << " To value: " << *newVal);
23192332

2320-
// Then perform the RAUW.
2321-
li->replaceAllUsesWith(newVal);
2322-
li->eraseFromParent();
2333+
// Our parent RAUWs with newVal/erases li.
2334+
return newVal;
23232335
}
23242336

23252337
namespace {
@@ -2335,6 +2347,33 @@ struct TakePromotionState {
23352347

23362348
unsigned size() const { return takeInstIndices.size(); }
23372349

2350+
void verify() {
2351+
#ifndef NDEBUG
2352+
for (unsigned i : range(size())) {
2353+
SILInstruction *inst;
2354+
MutableArrayRef<AvailableValue> data;
2355+
std::tie(inst, data) = getData(i);
2356+
assert(inst);
2357+
inst->verifyOperandOwnership();
2358+
assert(!data.empty() && "Value without any available values?!");
2359+
}
2360+
#endif
2361+
}
2362+
2363+
void verify(unsigned startOffset) {
2364+
#ifndef NDEBUG
2365+
assert(startOffset < size());
2366+
for (unsigned i : range(startOffset, size())) {
2367+
SILInstruction *inst;
2368+
MutableArrayRef<AvailableValue> data;
2369+
std::tie(inst, data) = getData(i);
2370+
assert(inst);
2371+
inst->verifyOperandOwnership();
2372+
assert(!data.empty() && "Value without any available values?!");
2373+
}
2374+
#endif
2375+
}
2376+
23382377
void initializeForTakeInst(unsigned takeInstIndex) {
23392378
availableValueStartOffsets.push_back(availableValueList.size());
23402379
takeInstIndices.push_back(takeInstIndex);
@@ -2352,9 +2391,8 @@ struct TakePromotionState {
23522391
count = availableValueList.size() - startOffset;
23532392
}
23542393

2355-
MutableArrayRef<AvailableValue> values(&availableValueList[startOffset],
2356-
count);
2357-
return {takeInsts[takeInstIndex], values};
2394+
auto values = MutableArrayRef<AvailableValue>(availableValueList);
2395+
return {takeInsts[takeInstIndex], values.slice(startOffset, count)};
23582396
}
23592397
};
23602398

@@ -2424,6 +2462,8 @@ static bool isRemovableAutogeneratedAllocation(AllocationInst *TheMemory) {
24242462
}
24252463

24262464
bool AllocOptimize::tryToRemoveDeadAllocation() {
2465+
assert(TheMemory->getFunction()->hasOwnership() &&
2466+
"Can only eliminate dead allocations with ownership enabled");
24272467
assert((isa<AllocBoxInst>(TheMemory) || isa<AllocStackInst>(TheMemory)) &&
24282468
"Unhandled allocation case");
24292469

@@ -2492,20 +2532,92 @@ bool AllocOptimize::tryToRemoveDeadAllocation() {
24922532
}
24932533

24942534
// If we reached this point, we can promote all of our destroy_addr and load
2495-
// take. Since our load [take] may be available values for our destroy_addr,
2496-
// we promote the destroy_addr first.
2535+
// take. Before we begin, gather up all found available values before we do
2536+
// anything so we can fix up lifetimes later if we need to.
2537+
SmallBlotSetVector<SILValue, 32> valuesNeedingLifetimeCompletion;
2538+
for (auto pmoMemUse : Uses) {
2539+
if (pmoMemUse.Inst && pmoMemUse.Kind == PMOUseKind::Initialization) {
2540+
// Today if we promote, this is always a store, since we would have
2541+
// blown up the copy_addr otherwise. Given that, always make sure we
2542+
// clean up the src as appropriate after we optimize.
2543+
auto *si = cast<StoreInst>(pmoMemUse.Inst);
2544+
auto src = si->getSrc();
2545+
2546+
// Bail if src has any uses that are forwarding unowned uses. This
2547+
// allows us to know that we never have to deal with forwarding unowned
2548+
// instructions like br. These are corner cases that complicate the
2549+
// logic below.
2550+
for (auto *use : src->getUses()) {
2551+
if (use->getOperandOwnership() == OperandOwnership::ForwardingUnowned)
2552+
return false;
2553+
}
2554+
valuesNeedingLifetimeCompletion.insert(src);
2555+
}
2556+
}
2557+
2558+
// Since our load [take] may be available values for our
2559+
// destroy_addr/load [take], we promote the destroy_addr first and then handle
2560+
// load [take] with extra rigour later to handle that possibility.
24972561
for (unsigned i : range(destroyAddrState.size())) {
24982562
SILInstruction *dai;
24992563
MutableArrayRef<AvailableValue> values;
25002564
std::tie(dai, values) = destroyAddrState.getData(i);
25012565
promoteDestroyAddr(cast<DestroyAddrInst>(dai), values);
25022566
// We do not need to unset releases, since we are going to exit here.
25032567
}
2568+
2569+
llvm::SmallMapVector<LoadInst *, SILValue, 32> loadsToDelete;
25042570
for (unsigned i : range(loadTakeState.size())) {
25052571
SILInstruction *li;
25062572
MutableArrayRef<AvailableValue> values;
25072573
std::tie(li, values) = loadTakeState.getData(i);
2508-
promoteLoadTake(cast<LoadInst>(li), values);
2574+
2575+
for (unsigned i : indices(values)) {
2576+
auto v = values[i].Value;
2577+
auto *li = dyn_cast<LoadInst>(v);
2578+
if (!li)
2579+
continue;
2580+
2581+
auto iter = loadsToDelete.find(li);
2582+
if (iter == loadsToDelete.end())
2583+
continue;
2584+
2585+
SILValue newValue = iter->second;
2586+
assert(newValue && "We should neer store a nil SILValue into this map");
2587+
values[i].Value = newValue;
2588+
}
2589+
2590+
auto *liCast = cast<LoadInst>(li);
2591+
SILValue result = promoteLoadTake(liCast, values);
2592+
assert(result);
2593+
2594+
// We need to erase liCast here before we erase it since a load [take] that
2595+
// we are promoting could be an available value for another load
2596+
// [take]. Consider the following SIL:
2597+
//
2598+
// %mem = alloc_stack
2599+
// store %arg to [init] %mem
2600+
// %0 = load [take] %mem
2601+
// store %0 to [init] %mem
2602+
// %1 = load [take] %mem
2603+
// destroy_value %1
2604+
// dealloc_stack %mem
2605+
//
2606+
// In such a case, we are going to delete %0 here, but %0 is an available
2607+
// value for %1, so we will
2608+
auto insertIter = loadsToDelete.insert({liCast, result});
2609+
valuesNeedingLifetimeCompletion.erase(liCast);
2610+
(void)insertIter;
2611+
assert(insertIter.second && "loadTakeState doesn't have unique loads?!");
2612+
}
2613+
2614+
// Now that we have promoted all of our load [take], perform the actual
2615+
// RAUW/removal.
2616+
for (auto p : loadsToDelete) {
2617+
LoadInst *li = p.first;
2618+
SILValue newValue = p.second;
2619+
li->replaceAllUsesWith(newValue);
2620+
li->eraseFromParent();
25092621
}
25102622

25112623
LLVM_DEBUG(llvm::dbgs() << "*** Removing autogenerated non-trivial alloc: "
@@ -2516,6 +2628,144 @@ bool AllocOptimize::tryToRemoveDeadAllocation() {
25162628
// caller remove the allocation itself to avoid iterator invalidation.
25172629
eraseUsesOfInstruction(TheMemory);
25182630

2631+
// Now look at all of our available values and complete any of their
2632+
// post-dominating consuming use sets. This can happen if we have an enum that
2633+
// is known dynamically none along a path. This is dynamically correct, but
2634+
// can not be represented in OSSA so we insert these destroys along said path.
2635+
SmallVector<SILBasicBlock *, 32> consumingUseBlocks;
2636+
while (!valuesNeedingLifetimeCompletion.empty()) {
2637+
auto optV = valuesNeedingLifetimeCompletion.pop_back_val();
2638+
if (!optV)
2639+
continue;
2640+
SILValue v = *optV;
2641+
if (v.getOwnershipKind() != OwnershipKind::Owned)
2642+
continue;
2643+
2644+
// First see if our value doesn't have any uses. In such a case, just
2645+
// insert a destroy_value at the next instruction and return.
2646+
if (v->use_empty()) {
2647+
auto *next = v->getNextInstruction();
2648+
auto loc = RegularLocation::getAutoGeneratedLocation();
2649+
SILBuilderWithScope localBuilder(next);
2650+
localBuilder.createDestroyValue(loc, v);
2651+
continue;
2652+
}
2653+
2654+
// Otherwise, we first see if we have any consuming uses at all. If we do,
2655+
// then we know that any such consuming uses since we have an owned value
2656+
// /must/ be strongly control equivalent to our value and unreachable from
2657+
// each other, so we can just use findJointPostDominatingSet to complete
2658+
// the set.
2659+
consumingUseBlocks.clear();
2660+
for (auto *use : v->getConsumingUses())
2661+
consumingUseBlocks.push_back(use->getParentBlock());
2662+
2663+
if (!consumingUseBlocks.empty()) {
2664+
findJointPostDominatingSet(
2665+
v->getParentBlock(), consumingUseBlocks, [](SILBasicBlock *) {},
2666+
[&](SILBasicBlock *result) {
2667+
auto loc = RegularLocation::getAutoGeneratedLocation();
2668+
SILBuilderWithScope builder(result);
2669+
builder.createDestroyValue(loc, v);
2670+
});
2671+
continue;
2672+
}
2673+
2674+
// If we do not have at least one consuming use, we need to do something
2675+
// different. This situation can occur given a non-trivial enum typed
2676+
// stack allocation that:
2677+
//
2678+
// 1. Had a destroy_addr eliminated along a path where we dynamically know
2679+
// that the stack allocation is storing a trivial case.
2680+
//
2681+
// 2. Had some other paths where due to dead end blocks, no destroy_addr
2682+
// is needed.
2683+
//
2684+
// To fix this, we just treat all uses as consuming blocks and insert
2685+
// destroys using the joint post dominance set computer and insert
2686+
// destroys at the end of all input blocks in the post dom set and at the
2687+
// beginning of any leaking blocks.
2688+
{
2689+
// TODO: Can we just pass this in to findJointPostDominatingSet instead
2690+
// of recomputing it there? Maybe an overload that lets us do this?
2691+
BasicBlockSet foundUseBlocks(v->getFunction());
2692+
for (auto *use : v->getUses()) {
2693+
auto *block = use->getParentBlock();
2694+
if (!foundUseBlocks.insert(block))
2695+
continue;
2696+
consumingUseBlocks.push_back(block);
2697+
}
2698+
}
2699+
findJointPostDominatingSet(
2700+
v->getParentBlock(), consumingUseBlocks,
2701+
[&](SILBasicBlock *foundInputBlock) {
2702+
// This is a block that is reachable from another use. We are not
2703+
// interested in these.
2704+
},
2705+
[&](SILBasicBlock *leakingBlock) {
2706+
auto loc = RegularLocation::getAutoGeneratedLocation();
2707+
SILBuilderWithScope builder(leakingBlock);
2708+
builder.createDestroyValue(loc, v);
2709+
},
2710+
[&](SILBasicBlock *inputBlockInPostDomSet) {
2711+
auto *termInst = inputBlockInPostDomSet->getTerminator();
2712+
switch (termInst->getTermKind()) {
2713+
case TermKind::UnreachableInst:
2714+
// We do not care about input blocks that end in unreachables. We
2715+
// are going to leak down them so do not insert a destroy_value
2716+
// there.
2717+
return;
2718+
2719+
// NOTE: Given that our input value is owned, our branch can only
2720+
// accept the use as a non-consuming use if the branch is forwarding
2721+
// unowned ownership. Luckily for use, we checked early if we had
2722+
// any such uses and bailed, so we know the branch can not use our
2723+
// value. This is just avoiding a corner case that we don't need to
2724+
// handle.
2725+
case TermKind::BranchInst:
2726+
LLVM_FALLTHROUGH;
2727+
// NOTE: We put cond_br here since in OSSA, cond_br can never have
2728+
// a non-trivial value operand, meaning we can insert before.
2729+
case TermKind::CondBranchInst:
2730+
LLVM_FALLTHROUGH;
2731+
case TermKind::ReturnInst:
2732+
case TermKind::ThrowInst:
2733+
case TermKind::UnwindInst:
2734+
case TermKind::YieldInst: {
2735+
// These terminators can never be non-consuming uses of an owned
2736+
// value since we would be leaking the owned value no matter what
2737+
// we do. Given that, we can assume that what ever the
2738+
// non-consuming use actually was, must be before this
2739+
// instruction. So insert the destroy_value at the end of the
2740+
// block, before the terminator.
2741+
auto loc = RegularLocation::getAutoGeneratedLocation();
2742+
SILBuilderWithScope localBuilder(termInst);
2743+
localBuilder.createDestroyValue(loc, v);
2744+
return;
2745+
}
2746+
case TermKind::TryApplyInst:
2747+
case TermKind::SwitchValueInst:
2748+
case TermKind::SwitchEnumInst:
2749+
case TermKind::SwitchEnumAddrInst:
2750+
case TermKind::DynamicMethodBranchInst:
2751+
case TermKind::AwaitAsyncContinuationInst:
2752+
case TermKind::CheckedCastBranchInst:
2753+
case TermKind::CheckedCastAddrBranchInst:
2754+
case TermKind::CheckedCastValueBranchInst: {
2755+
// Otherwise, we insert the destroy_addr /after/ the
2756+
// terminator. All of these are guaranteed to have each successor
2757+
// to have the block as its only predecessor block.
2758+
SILBuilderWithScope::insertAfter(termInst, [&](auto &b) {
2759+
auto loc = RegularLocation::getAutoGeneratedLocation();
2760+
b.createDestroyValue(loc, v);
2761+
});
2762+
return;
2763+
}
2764+
}
2765+
llvm_unreachable("Case that did not return in its body?!");
2766+
});
2767+
}
2768+
25192769
return true;
25202770
}
25212771

@@ -2687,6 +2937,10 @@ class PredictableMemoryAccessOptimizations : public SILFunctionTransform {
26872937

26882938
class PredictableDeadAllocationElimination : public SILFunctionTransform {
26892939
void run() override {
2940+
// If we are already canonical or do not have ownership, just bail.
2941+
if (getFunction()->wasDeserializedCanonical() ||
2942+
!getFunction()->hasOwnership())
2943+
return;
26902944
if (eliminateDeadAllocations(*getFunction()))
26912945
invalidateAnalysis(SILAnalysis::InvalidationKind::FunctionBody);
26922946
}

0 commit comments

Comments
 (0)