[NFC] Avoid repeated work in Precompute of GC allocations (#7763)

kripken · web-flow · commit 5ca16fe8af3b · 2025-07-28T14:45:28.000-07:00
When we precompute struct.new, we have a cache of allocations, as we need the ID of allocations to remain the same. The logic for managing that cache was actually first creating a new instance, then throwing it away if we used the cached value... so we ended up visiting the children and computing them etc., with exponential (wasted) work in the worst case. To fix that, just check the cache first. Fixes #7760
diff --git a/src/passes/Precompute.cpp b/src/passes/Precompute.cpp
@@ -126,11 +126,7 @@ class PrecomputingExpressionRunner
 
   // TODO: Use immutability for values
   Flow visitStructNew(StructNew* curr) {
-    auto flow = Super::visitStructNew(curr);
-    if (flow.breaking()) {
-      return flow;
-    }
-    return getHeapCreationFlow(flow, curr);
+    return getGCAllocation(curr, [&]() { return Super::visitStructNew(curr); });
   }
   Flow visitStructSet(StructSet* curr) { return Flow(NONCONSTANT_FLOW); }
   Flow visitStructGet(StructGet* curr) {
@@ -167,18 +163,11 @@ class PrecomputingExpressionRunner
     return Super::visitStructGet(curr);
   }
   Flow visitArrayNew(ArrayNew* curr) {
-    auto flow = Super::visitArrayNew(curr);
-    if (flow.breaking()) {
-      return flow;
-    }
-    return getHeapCreationFlow(flow, curr);
+    return getGCAllocation(curr, [&]() { return Super::visitArrayNew(curr); });
   }
   Flow visitArrayNewFixed(ArrayNewFixed* curr) {
-    auto flow = Super::visitArrayNewFixed(curr);
-    if (flow.breaking()) {
-      return flow;
-    }
-    return getHeapCreationFlow(flow, curr);
+    return getGCAllocation(curr,
+                           [&]() { return Super::visitArrayNewFixed(curr); });
   }
   Flow visitArraySet(ArraySet* curr) { return Flow(NONCONSTANT_FLOW); }
   Flow visitArrayGet(ArrayGet* curr) {
@@ -197,18 +186,22 @@ class PrecomputingExpressionRunner
   Flow visitArrayCopy(ArrayCopy* curr) { return Flow(NONCONSTANT_FLOW); }
 
   // Generates heap info for a heap-allocating expression.
-  template<typename T> Flow getHeapCreationFlow(Flow flow, T* curr) {
+  Flow getGCAllocation(Expression* curr, std::function<Flow()> visitFunc) {
     // We must return a literal that refers to the canonical location for this
-    // source expression, so that each time we compute a specific struct.new
+    // source expression, so that each time we compute a specific *.new then
     // we get the same identity.
-    std::shared_ptr<GCData>& canonical = heapValues[curr];
-    std::shared_ptr<GCData> newGCData = flow.getSingleValue().getGCData();
-    if (!canonical) {
-      canonical = std::make_shared<GCData>(*newGCData);
-    } else {
-      *canonical = *newGCData;
+    auto iter = heapValues.find(curr);
+    if (iter != heapValues.end()) {
+      // Refer to the same canonical GCData that we already created.
+      return Literal(iter->second, curr->type.getHeapType());
     }
-    return Literal(canonical, curr->type.getHeapType());
+    // Only call the visitor function here, so we do it once per allocation.
+    auto flow = visitFunc();
+    if (flow.breaking()) {
+      return flow;
+    }
+    heapValues[curr] = flow.getSingleValue().getGCData();
+    return flow;
   }
 
   Flow visitStringNew(StringNew* curr) {
diff --git a/test/lit/ctor-eval/slow.wast b/test/lit/ctor-eval/slow.wast
@@ -0,0 +1,54 @@
+;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited.
+;; RUN: wasm-ctor-eval %s --ctors=test --quiet -all -S -o - | filecheck %s
+
+;; We will execute the arrays here when we try to precompute them. Each such
+;; computation should be done once, so that we preserve identity (which other
+;; tests handle). We should handle that without repeated work: once we compute a
+;; thing, we can reuse that computation. If we do not, this test will still
+;; pass, but it will take many minutes to run (exponential time), hopefully
+;; timing out the bots. With the optimization, it takes only ms.
+
+(module
+  (type $array (sub (shared (array (ref null $array)))))
+
+  ;; CHECK:      (type $0 (func (param f32)))
+
+  ;; CHECK:      (type $1 (func))
+
+  ;; CHECK:      (import "" "" (func $imported (type $0) (param f32)))
+  (import "" "" (func $imported (param f32)))
+
+  (func $test (export "test")
+    (local $local (ref $array))
+    (local.set $local
+      (array.new $array
+        (array.new $array
+          (array.new $array
+            (array.new $array
+              (array.new $array
+                (array.new_default $array
+                  (i32.const 0)
+                )
+                (i32.const 100)
+              )
+              (i32.const 100)
+            )
+            (i32.const 100)
+          )
+          (i32.const 100)
+        )
+        (i32.const 100)
+      )
+    )
+    (call $imported
+      (f32.const 0)
+    )
+  )
+)
+;; CHECK:      (export "test" (func $test_2))
+
+;; CHECK:      (func $test_2 (type $1)
+;; CHECK-NEXT:  (call $imported
+;; CHECK-NEXT:   (f32.const 0)
+;; CHECK-NEXT:  )
+;; CHECK-NEXT: )