Fix EscapeAnalysis::mayReleaseContent

atrick · atrick · commit 23696e4dff86 · 2021-02-18T19:00:22.000-08:00
The previous implementation made extremely subtle and specific
assumptions about how the API is used which doesn't apply
everywhere. It was trying very hard to avoid regressing performance
relative to an even older implementation that didn't even try to consider deinitializer side effects.

The aggressive logic was based on the idea that a release must have a
corresponding retain somewhere in the same function and we don't care
if the last release happens early if there are no more aliasing
uses. All the unit tests I wrote previously were based on release
hoisting, which happens to work given the way the API is used.

But this logic is incorrect for retain sinking. In that case sinking
past an "unrelated" release could cause the object to be freed
early. See test/SILOptimizer/arc_crash.swift.

With SemanticARC and other SIL improvements being made, I'm afraid
bugs like this will begin to surface.

To fix it, just remove the subtle logic to leave behind a simple and
sound EscapeAnalysis API. To do better, we will need to rewrite the
AliasAnalysis logic for release side effects, which is currently
a tangled web. In the meantime, SemanticARC can handle many cases without EscapeAnalysis.

Fixes rdar://74469299 (ARC miscompile:
EscapeAnalysis::mayReleaseContent; potential use-after-free)

While fixing this, add support for address-type queries too:

Fixes rdar://74360041 (Assertion failed:
(!releasedReference-&gt;getType().isAddress() &amp;&amp; "an address is never a
reference"), function mayReleaseContent
diff --git a/include/swift/SILOptimizer/Analysis/EscapeAnalysis.h b/include/swift/SILOptimizer/Analysis/EscapeAnalysis.h
@@ -1114,6 +1114,10 @@ class EscapeAnalysis : public BottomUpIPAnalysis {
   bool canEscapeToUsePoint(SILValue value, SILInstruction *usePoint,
                            ConnectionGraph *conGraph);
 
+  /// Common implementation for mayReleaseReferenceContent and
+  /// mayReleaseAddressContent.
+  bool mayReleaseContent(SILValue releasedPtr, SILValue liveAddress);
+
   friend struct ::CGForDotView;
 
 public:
@@ -1163,8 +1167,38 @@ class EscapeAnalysis : public BottomUpIPAnalysis {
   bool canEscapeTo(SILValue V, DestroyValueInst *DVI);
 
   /// Return true if \p releasedReference deinitialization may release memory
-  /// pointed to by \p accessedAddress.
-  bool mayReleaseContent(SILValue releasedReference, SILValue accessedAddress);
+  /// pointed to by \p liveAddress.
+  ///
+  /// This determines whether a direct release of \p releasedReference, such as
+  /// destroy_value or strong_release may release memory pointed to by \p
+  /// liveAddress. It can also be used to determine whether passing a
+  /// reference-type call argument may release \p liveAddress.
+  ///
+  /// This does not distinguish between a call that releases \p
+  /// releasedReference directly, vs. a call that releases one of indirect
+  /// references.The side effects of releasing any object reachable from \p
+  /// releasedReference are a strict subset of the side effects of directly
+  /// releasing the parent reference.
+  bool mayReleaseReferenceContent(SILValue releasedReference,
+                                  SILValue liveAddress) {
+    assert(!releasedReference->getType().isAddress() &&
+           "expected a potentially nontrivial value, not an address");
+    return mayReleaseContent(releasedReference, liveAddress);
+  }
+
+  /// Return true if accessing memory at \p accessedAddress may release memory
+  /// pointed to by \p liveAddress.
+  ///
+  /// This makes sense for determining whether accessing indirect call argument
+  /// \p accessedAddress may release memory pointed to by \p liveAddress.
+  ///
+  /// "Access" to the memory can be any release of a reference pointed to by \p
+  /// accessedAddress, so '@in' and '@inout' are handled the same.
+  bool mayReleaseAddressContent(SILValue accessedAddress,
+                                SILValue liveAddress) {
+    assert(accessedAddress->getType().isAddress() && "expected an address");
+    return mayReleaseContent(accessedAddress, liveAddress);
+  }
 
   /// Returns true if the pointers \p V1 and \p V2 can possibly point to the
   /// same memory.
diff --git a/lib/SILOptimizer/Analysis/AliasAnalysis.cpp b/lib/SILOptimizer/Analysis/AliasAnalysis.cpp
@@ -723,8 +723,18 @@ bool AliasAnalysis::canApplyDecrementRefCount(FullApplySite FAS, SILValue Ptr) {
     if (ArgEffect.mayRelease()) {
       // The function may release this argument, so check if the pointer can
       // escape to it.
-      if (EA->mayReleaseContent(FAS.getArgument(Idx), Ptr))
-        return true;
+      auto arg = FAS.getArgument(Idx);
+      if (arg->getType().isAddress()) {
+        // Handle indirect argument as if they are a release to any references
+        // pointed to by the argument's address.
+        if (EA->mayReleaseAddressContent(arg, Ptr))
+          return true;
+      } else {
+        // Handle direct arguments as if they are a direct release of the
+        // reference (just like a destroy_value).
+        if (EA->mayReleaseReferenceContent(arg, Ptr))
+          return true;
+      }
     }
   }
   return false;
@@ -744,7 +754,7 @@ bool AliasAnalysis::canBuiltinDecrementRefCount(BuiltinInst *BI, SILValue Ptr) {
     // to be an owned reference and disallows addresses. Conservatively handle
     // address type arguments as and conservatively treat all other values
     // potential owned references.
-    if (Arg->getType().isAddress() || EA->mayReleaseContent(Arg, Ptr))
+    if (Arg->getType().isAddress() || EA->mayReleaseReferenceContent(Arg, Ptr))
       return true;
   }
   return false;
@@ -788,7 +798,7 @@ bool AliasAnalysis::mayValueReleaseInterfereWithInstruction(
   // accessedPointer. Access to any objects beyond the first released refcounted
   // object are irrelevant--they must already have sufficient refcount that they
   // won't be released when releasing Ptr.
-  return EA->mayReleaseContent(releasedReference, accessedPointer);
+  return EA->mayReleaseReferenceContent(releasedReference, accessedPointer);
 }
 
 void AliasAnalysis::initialize(SILPassManager *PM) {
diff --git a/lib/SILOptimizer/Analysis/EscapeAnalysis.cpp b/lib/SILOptimizer/Analysis/EscapeAnalysis.cpp
@@ -2727,95 +2727,115 @@ bool EscapeAnalysis::canPointToSameMemory(SILValue V1, SILValue V2) {
   return true;
 }
 
-// Return true if deinitialization of \p releasedReference may release memory
-// directly pointed to by \p accessAddress.
+// Returns true if deinitialization of \p releasedPtr may release memory
+// directly pointed to by \p livePtr.
 //
-// Note that \p accessedAddress could be a reference itself, an address of a
-// local/argument that contains a reference, or even a pointer to the middle of
-// an object (even if it is an exclusive argument).
-//
-// This is almost the same as asking "is the content node for accessedAddress
-// reachable via releasedReference", with three subtle differences:
-//
-// (1) A locally referenced object can only be freed when deinitializing
-// releasedReference if it is the same object. Indirect references will be kept
-// alive by their distinct local references--ARC can't remove those without
-// inserting a mark_dependence/end_dependence scope.
-//
-// (2) the content of exclusive arguments may be indirectly reachable via
-// releasedReference, but the exclusive argument must have it's own reference
-// count, so cannot be freed via the locally released reference.
+// The implementation is common between mayReleaseReferenceContent and
+// mayReleaseAddressContent, but the semantics are different. For references,
+// this models the release of the reference itself. For addresses, this models
+// the release of any reference pointed to by the address. The caller should
+// explicitly ask for the right one so they aren't surprised. Here we simply
+// switch behavior based on whether \p releasedPtr is an address type.
 //
-// (3) Objects may contain raw pointers into themselves or into other
-// objects. Any access to the raw pointer is not considered a use of the object
-// because that access must be "guarded" by a fix_lifetime or
-// mark_dependence/end_dependence that acts as a placeholder.
+// Note that \p livePtr could be a reference itself, an address of a
+// local/argument that contains a reference, or even a pointer to the middle of
+// an object. (Even an exclusive argument may point to the middle of an object).
 //
-// There are two interesting cases in which a connection graph query can
-// determine that the accessed memory cannot be released:
+// This is similar to asking "is the content of livePtr reachable via
+// releasedPtr". There are two interesting cases in which a connection graph
+// query can determine that the accessed memory cannot be released:
 //
-// Case #1: accessedAddress points to a uniquely identified object that does not
+// Case #1: \p livePtr points to a uniquely identified object that does not
 // escape within this function.
 //
-// Note: A "uniquely identified object" is either a locally allocated object,
-// which is obviously not reachable outside this function, or an exclusive
-// address argument, which *is* reachable outside this function, but must
-// have its own reference count so cannot be released locally.
+// In this case, it is sufficient to ensure that no connection graph path exists
+// from the content of \p livePtr to the content of \p releasedPtr.
+//
+// Note: A "uniquely identified object" is either locally allocated, which is
+// obviously not reachable outside this function, or an exclusive address, which
+// *is* reachable outside this function, but must have its own reference count
+// so cannot be released in this function or its callees.
 //
 // Case #2: The released reference points to a local object and no connection
 // graph path exists from the referenced object to a global-escaping or
-// argument-escaping node without traversing a non-interior edge.
+// argument-escaping node.
+//
+// TODO: This API is inneffective for release hoisting, because the release
+// itself is often the only place that an object's contents may escape. We can't
+// currently determine that since the contents cannot escape prior to \p
+// releasePtr, then livePtr cannot possible point to the same memory!
 //
-// In both cases, the connection graph is sufficient to determine if the
-// accessed content may be released. To prove that the accessed memory is
-// distinct from any released memory it is now sufficient to check that no
-// connection graph path exists from the released object's node to the accessed
-// content node without traversing a non-interior edge.
-bool EscapeAnalysis::mayReleaseContent(SILValue releasedReference,
-                                       SILValue accessedAddress) {
-  assert(!releasedReference->getType().isAddress()
-         && "an address is never a reference");
-
-  SILFunction *f = getCommonFunction(releasedReference, accessedAddress);
+// TODO: In the future, we may have an AliasAnalysis query that distinguishes
+// between retain-sinking vs. release-hoisting. With SemanticARC, we may not
+// need to do this, but it is possible to be much more aggressive with
+// release-hoisting. This is becase, for a retain/release pair, it's always ok
+// to release earlier as long as there are no subsequent aliasing uses. If the
+// caller is only concerned with release hoisting and knows there are no
+// subsequent aliasing uses protected by a local release, then the connection
+// graph reachability check here only needs to search within the current object
+// (it can stop at a non-interior edge). This would assume that any indirectly
+// released reference needs to be kept alive by some distinct local
+// references--ARC can't remove those without inserting a
+// mark_dependence/end_dependence scope. It would also ignore the fact that
+// objects may contain raw pointers into themselves or into other objects. Any
+// access to the raw pointer is not considered a use of the object because that
+// access must be "guarded" by a fix_lifetime or mark_dependence/end_dependence
+// that acts as a placeholder.
+bool EscapeAnalysis::mayReleaseContent(SILValue releasedPtr, SILValue livePtr) {
+  SILFunction *f = getCommonFunction(releasedPtr, livePtr);
   if (!f)
     return true;
 
   auto *conGraph = getConnectionGraph(f);
 
-  CGNode *addrContentNode = conGraph->getValueContent(accessedAddress);
-  if (!addrContentNode)
+  CGNode *liveContentNode = conGraph->getValueContent(livePtr);
+  if (!liveContentNode)
     return true;
 
-  // Case #1: Unique accessedAddress whose content does not escape.
-  bool isAccessUniq =
-      isUniquelyIdentified(accessedAddress)
-      && !addrContentNode->valueEscapesInsideFunction(accessedAddress);
-
-  // Case #2: releasedReference points to a local object.
-  if (!isAccessUniq && !pointsToLocalObject(releasedReference))
+  // Case #1: Unique livePtr whose content does not escape.
+  //
+  // If \p livePtr is an exclusive function argument, it may be indirectly
+  // reachable via releasedPtr, but the exclusive argument must have it's own
+  // reference count retained by the called. We consider \p livePtr unique since
+  // it so cannot be freed via a release of \p releasedPtr within this function
+  // or its callees.
+  bool isLiveAddressUnique =
+      isUniquelyIdentified(livePtr)
+      && !liveContentNode->valueEscapesInsideFunction(livePtr);
+
+  // Case #2: releasedPtr points to a local object.
+  if (!isLiveAddressUnique && !pointsToLocalObject(releasedPtr))
     return true;
 
-  CGNode *releasedObjNode = conGraph->getValueContent(releasedReference);
+  // If \p releasedPtr is an address, then its released content is at least two
+  // levels away: the address points to a reference, which points to an object.
+  // CGNode *releasedObjNode = nullptr;
+  CGNode *releasedObjNode = nullptr;
+  if (releasedPtr->getType().isAddress()) {
+    CGNode *addrContentObjNode = conGraph->getValueContent(releasedPtr);
+    if (!addrContentObjNode)
+      return true;
+    releasedObjNode = conGraph->getOrCreateUnknownContent(addrContentObjNode);
+  } else {
+    releasedObjNode = conGraph->getValueContent(releasedPtr);
+  }
   // Make sure we have at least one value CGNode for releasedReference.
   if (!releasedObjNode)
     return true;
 
-  // Check for reachability from releasedObjNode to addrContentNode.
+  // Check for reachability from releasedObjNode to liveContentNode.
   // A pointsTo cycle is equivalent to a null pointsTo.
   CGNodeWorklist worklist(conGraph);
   for (CGNode *releasedNode = releasedObjNode;
        releasedNode && worklist.tryPush(releasedNode);
        releasedNode = releasedNode->getContentNodeOrNull()) {
     // A path exists from released content to accessed content.
-    if (releasedNode == addrContentNode)
+    if (releasedNode == liveContentNode)
       return true;
 
     // A path exists to an escaping node.
-    if (!isAccessUniq && releasedNode->escapesInsideFunction())
+    if (!isLiveAddressUnique && releasedNode->escapesInsideFunction())
       return true;
-
-    if (!releasedNode->isInterior())
-      break;
   }
   return false; // no path to escaping memory that may be freed.
 }
diff --git a/test/SILOptimizer/arc_crash.swift b/test/SILOptimizer/arc_crash.swift
@@ -0,0 +1,50 @@
+// RUN: %target-swift-frontend -O %s -parse-as-library -emit-sil -enforce-exclusivity=none -Xllvm -sil-disable-pass=function-signature-opts | %FileCheck %s
+
+// Test ARC optimizations on source level tests that have been
+// miscompiled and crash (e.g. because of use-after-free).
+
+// -----------------------------------------------------------------------------
+// rdar://74469299 (ARC miscompile: EscapeAnalysis::mayReleaseContent;
+// potential use-after-free)
+// -----------------------------------------------------------------------------
+
+public class Base {
+  var i = 3
+  init() {}
+}
+public class Node : Base {
+  var node: Base
+
+  init(node: Base) { self.node = node }
+}
+struct Queue {
+  var node: Node
+}
+
+@inline(never)
+func useQueue(q: __owned Queue) {}
+
+@inline(never)
+func useNode(n: Base) -> Int {
+  return n.i
+}
+
+// CHECK-LABEL: sil [noinline] @$s9arc_crash14testMayReleaseAA4BaseCyF : $@convention(thin) () -> @owned Base {
+// CHECK:   [[BASE:%.*]] = alloc_ref $Base
+// CHECK:   strong_retain [[BASE]] : $Base
+// CHECK:   apply %{{.*}} : $@convention(thin) (@owned Queue) -> ()
+// CHECK-LABEL: } // end sil function '$s9arc_crash14testMayReleaseAA4BaseCyF'
+@inline(never)
+public func testMayRelease() -> Base {
+  let n2 = Base()
+  let n1 = Node(node: n2)
+  let q = Queue(node: n1)
+  // n2 must not be release before useQueue.
+  useQueue(q: q)
+  return n2
+}
+
+// This crashes when testMayRelease releases the object too early.
+// print("Object:")
+// print(testMayRelease())
+// -----------------------------------------------------------------------------
diff --git a/test/SILOptimizer/retain_release_code_motion.sil b/test/SILOptimizer/retain_release_code_motion.sil