Skip to content

Commit 5ca16fe

Browse files
authored
[NFC] Avoid repeated work in Precompute of GC allocations (#7763)
When we precompute struct.new, we have a cache of allocations, as we need the ID of allocations to remain the same. The logic for managing that cache was actually first creating a new instance, then throwing it away if we used the cached value... so we ended up visiting the children and computing them etc., with exponential (wasted) work in the worst case. To fix that, just check the cache first. Fixes #7760
1 parent 2c9103e commit 5ca16fe

File tree

2 files changed

+71
-24
lines changed

2 files changed

+71
-24
lines changed

src/passes/Precompute.cpp

Lines changed: 17 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -126,11 +126,7 @@ class PrecomputingExpressionRunner
126126

127127
// TODO: Use immutability for values
128128
Flow visitStructNew(StructNew* curr) {
129-
auto flow = Super::visitStructNew(curr);
130-
if (flow.breaking()) {
131-
return flow;
132-
}
133-
return getHeapCreationFlow(flow, curr);
129+
return getGCAllocation(curr, [&]() { return Super::visitStructNew(curr); });
134130
}
135131
Flow visitStructSet(StructSet* curr) { return Flow(NONCONSTANT_FLOW); }
136132
Flow visitStructGet(StructGet* curr) {
@@ -167,18 +163,11 @@ class PrecomputingExpressionRunner
167163
return Super::visitStructGet(curr);
168164
}
169165
Flow visitArrayNew(ArrayNew* curr) {
170-
auto flow = Super::visitArrayNew(curr);
171-
if (flow.breaking()) {
172-
return flow;
173-
}
174-
return getHeapCreationFlow(flow, curr);
166+
return getGCAllocation(curr, [&]() { return Super::visitArrayNew(curr); });
175167
}
176168
Flow visitArrayNewFixed(ArrayNewFixed* curr) {
177-
auto flow = Super::visitArrayNewFixed(curr);
178-
if (flow.breaking()) {
179-
return flow;
180-
}
181-
return getHeapCreationFlow(flow, curr);
169+
return getGCAllocation(curr,
170+
[&]() { return Super::visitArrayNewFixed(curr); });
182171
}
183172
Flow visitArraySet(ArraySet* curr) { return Flow(NONCONSTANT_FLOW); }
184173
Flow visitArrayGet(ArrayGet* curr) {
@@ -197,18 +186,22 @@ class PrecomputingExpressionRunner
197186
Flow visitArrayCopy(ArrayCopy* curr) { return Flow(NONCONSTANT_FLOW); }
198187

199188
// Generates heap info for a heap-allocating expression.
200-
template<typename T> Flow getHeapCreationFlow(Flow flow, T* curr) {
189+
Flow getGCAllocation(Expression* curr, std::function<Flow()> visitFunc) {
201190
// We must return a literal that refers to the canonical location for this
202-
// source expression, so that each time we compute a specific struct.new
191+
// source expression, so that each time we compute a specific *.new then
203192
// we get the same identity.
204-
std::shared_ptr<GCData>& canonical = heapValues[curr];
205-
std::shared_ptr<GCData> newGCData = flow.getSingleValue().getGCData();
206-
if (!canonical) {
207-
canonical = std::make_shared<GCData>(*newGCData);
208-
} else {
209-
*canonical = *newGCData;
193+
auto iter = heapValues.find(curr);
194+
if (iter != heapValues.end()) {
195+
// Refer to the same canonical GCData that we already created.
196+
return Literal(iter->second, curr->type.getHeapType());
210197
}
211-
return Literal(canonical, curr->type.getHeapType());
198+
// Only call the visitor function here, so we do it once per allocation.
199+
auto flow = visitFunc();
200+
if (flow.breaking()) {
201+
return flow;
202+
}
203+
heapValues[curr] = flow.getSingleValue().getGCData();
204+
return flow;
212205
}
213206

214207
Flow visitStringNew(StringNew* curr) {

test/lit/ctor-eval/slow.wast

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited.
2+
;; RUN: wasm-ctor-eval %s --ctors=test --quiet -all -S -o - | filecheck %s
3+
4+
;; We will execute the arrays here when we try to precompute them. Each such
5+
;; computation should be done once, so that we preserve identity (which other
6+
;; tests handle). We should handle that without repeated work: once we compute a
7+
;; thing, we can reuse that computation. If we do not, this test will still
8+
;; pass, but it will take many minutes to run (exponential time), hopefully
9+
;; timing out the bots. With the optimization, it takes only ms.
10+
11+
(module
12+
(type $array (sub (shared (array (ref null $array)))))
13+
14+
;; CHECK: (type $0 (func (param f32)))
15+
16+
;; CHECK: (type $1 (func))
17+
18+
;; CHECK: (import "" "" (func $imported (type $0) (param f32)))
19+
(import "" "" (func $imported (param f32)))
20+
21+
(func $test (export "test")
22+
(local $local (ref $array))
23+
(local.set $local
24+
(array.new $array
25+
(array.new $array
26+
(array.new $array
27+
(array.new $array
28+
(array.new $array
29+
(array.new_default $array
30+
(i32.const 0)
31+
)
32+
(i32.const 100)
33+
)
34+
(i32.const 100)
35+
)
36+
(i32.const 100)
37+
)
38+
(i32.const 100)
39+
)
40+
(i32.const 100)
41+
)
42+
)
43+
(call $imported
44+
(f32.const 0)
45+
)
46+
)
47+
)
48+
;; CHECK: (export "test" (func $test_2))
49+
50+
;; CHECK: (func $test_2 (type $1)
51+
;; CHECK-NEXT: (call $imported
52+
;; CHECK-NEXT: (f32.const 0)
53+
;; CHECK-NEXT: )
54+
;; CHECK-NEXT: )

0 commit comments

Comments
 (0)