Skip to content

Commit 93399c3

Browse files
authored
[Custom Descriptors] Optimize ref.cast_desc (#7718)
Update the cast optimizations in OptimizeInstructions to correctly optimize ref.cast_desc. The optimizations differ from the optimizations of normal casts because they have to be careful not to skip any side effects in the descriptor operand and because type information alone is not sufficient to prove that a cast will succeed.
1 parent c017f8d commit 93399c3

6 files changed

+1206
-203
lines changed

src/passes/OptimizeInstructions.cpp

Lines changed: 147 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
#include <wasm.h>
4949

5050
#include "call-utils.h"
51+
#include "support/utilities.h"
5152

5253
// TODO: Use the new sign-extension opcodes where appropriate. This needs to be
5354
// conditionalized on the availability of atomics.
@@ -1613,10 +1614,12 @@ struct OptimizeInstructions
16131614
}
16141615

16151616
// Appends a result after the dropped children, if we need them.
1616-
Expression* getDroppedChildrenAndAppend(Expression* curr,
1617-
Expression* result) {
1617+
Expression*
1618+
getDroppedChildrenAndAppend(Expression* curr,
1619+
Expression* result,
1620+
DropMode mode = DropMode::NoticeParentEffects) {
16181621
return wasm::getDroppedChildrenAndAppend(
1619-
curr, *getModule(), getPassOptions(), result);
1622+
curr, *getModule(), getPassOptions(), result, mode);
16201623
}
16211624

16221625
Expression* getDroppedChildrenAndAppend(Expression* curr, Literal value) {
@@ -2269,54 +2272,40 @@ struct OptimizeInstructions
22692272
// TODO: more opts like StructCmpxchg
22702273
}
22712274

2272-
void visitRefCast(RefCast* curr) {
2273-
// Note we must check the ref's type here and not our own, since we only
2274-
// refinalize at the end, which means our type may not have been updated yet
2275-
// after a change in the child.
2276-
// TODO: we could update unreachability up the stack perhaps, or just move
2277-
// all patterns that can add unreachability to a pass that does so
2278-
// already like vacuum or dce.
2279-
if (curr->ref->type == Type::unreachable) {
2280-
return;
2281-
}
2282-
2283-
if (curr->type.isNonNullable() && trapOnNull(curr, curr->ref)) {
2284-
return;
2285-
}
2286-
2275+
bool optimizeKnownCastResult(RefCast* curr, Type refType) {
22872276
Builder builder(*getModule());
2288-
2289-
// Look at all the fallthrough values to get the most precise possible type
2290-
// of the value we are casting. local.tee, br_if, and blocks can all "lose"
2291-
// type information, so looking at all the fallthrough values can give us a
2292-
// more precise type than is stored in the IR.
2293-
Type refType = getFallthroughType(curr->ref);
2294-
2295-
// As a first step, we can tighten up the cast type to be the greatest lower
2296-
// bound of the original cast type and the type we know the cast value to
2297-
// have. We know any less specific type either cannot appear or will fail
2298-
// the cast anyways.
2299-
auto glb = Type::getGreatestLowerBound(curr->type, refType);
2300-
if (glb != Type::unreachable && glb != curr->type) {
2301-
curr->type = glb;
2302-
refinalize = true;
2303-
// Call replaceCurrent() to make us re-optimize this node, as we may have
2304-
// just unlocked further opportunities. (We could just continue down to
2305-
// the rest, but we'd need to do more work to make sure all the local
2306-
// state in this function is in sync which this change; it's easier to
2307-
// just do another clean pass on this node.)
2308-
replaceCurrent(curr);
2309-
return;
2310-
}
2311-
23122277
// Given what we know about the type of the value, determine what we know
23132278
// about the results of the cast and optimize accordingly.
23142279
switch (GCTypeUtils::evaluateCastCheck(refType, curr->type)) {
23152280
case GCTypeUtils::Unknown:
23162281
// The cast may or may not succeed, so we cannot optimize.
2317-
break;
2282+
return false;
23182283
case GCTypeUtils::Success:
23192284
case GCTypeUtils::SuccessOnlyIfNonNull: {
2285+
// Knowing the types match is not sufficient to know a descriptor cast
2286+
// succeeds. We must also know that the descriptor values match.
2287+
// However, if traps never happen, we can assume the descriptors will
2288+
// match and optimize anyway.
2289+
// TODO: Maybe we can determine that the descriptors values match in
2290+
// some cases.
2291+
if (curr->desc && !getPassOptions().trapsNeverHappen) {
2292+
// As a special case, we can still optimize if we know the value is
2293+
// null, because then we never get around to comparing the
2294+
// descriptors. We still need to preserve the trap on null
2295+
// descriptors, though.
2296+
if (refType.isNull()) {
2297+
assert(curr->type.isNullable());
2298+
if (curr->desc->type.isNullable()) {
2299+
curr->desc = builder.makeRefAs(RefAsNonNull, curr->desc);
2300+
}
2301+
replaceCurrent(getDroppedChildrenAndAppend(
2302+
curr,
2303+
builder.makeRefNull(curr->type.getHeapType()),
2304+
DropMode::IgnoreParentEffects));
2305+
return true;
2306+
}
2307+
return false;
2308+
}
23202309
// We know the cast will succeed, or at most requires a null check, so
23212310
// we can try to optimize it out. Find the best-typed fallthrough value
23222311
// to propagate.
@@ -2341,11 +2330,21 @@ struct OptimizeInstructions
23412330
// exactness.
23422331
if (ref == curr->ref && !needsExactCast) {
23432332
if (needsNullCheck) {
2344-
replaceCurrent(builder.makeRefAs(RefAsNonNull, curr->ref));
2333+
curr->ref = builder.makeRefAs(RefAsNonNull, curr->ref);
2334+
}
2335+
if (curr->desc) {
2336+
// We must move the ref past the descriptor operand.
2337+
auto* block =
2338+
ChildLocalizer(
2339+
curr, getFunction(), *getModule(), getPassOptions())
2340+
.getChildrenReplacement();
2341+
block->list.push_back(curr->ref);
2342+
block->type = curr->ref->type;
2343+
replaceCurrent(block);
23452344
} else {
2346-
replaceCurrent(ref);
2345+
replaceCurrent(curr->ref);
23472346
}
2348-
return;
2347+
return true;
23492348
}
23502349
// Otherwise we can't just remove the cast and replace it with `ref`
23512350
// because the intermediate expressions might have had side effects or
@@ -2370,17 +2369,19 @@ struct OptimizeInstructions
23702369
// Unreachable, so we'll not hit this assertion.
23712370
assert(curr->type.isNullable());
23722371
auto nullType = curr->type.getHeapType().getBottom();
2373-
replaceCurrent(builder.makeSequence(builder.makeDrop(curr->ref),
2374-
builder.makeRefNull(nullType)));
2375-
return;
2372+
replaceCurrent(
2373+
getDroppedChildrenAndAppend(curr,
2374+
builder.makeRefNull(nullType),
2375+
DropMode::IgnoreParentEffects));
2376+
return true;
23762377
}
23772378

23782379
// At this point we know the cast will succeed as long as nullability
23792380
// works out, but we still need the cast to recover the exactness that
23802381
// is not present in the value's static type, so there's nothing we
23812382
// can do.
23822383
if (needsExactCast) {
2383-
return;
2384+
return false;
23842385
}
23852386

23862387
// We need to use a tee to return the value since we can't materialize
@@ -2391,9 +2392,9 @@ struct OptimizeInstructions
23912392
if (needsNullCheck) {
23922393
get = builder.makeRefAs(RefAsNonNull, get);
23932394
}
2394-
replaceCurrent(
2395-
builder.makeSequence(builder.makeDrop(curr->ref), get));
2396-
return;
2395+
replaceCurrent(getDroppedChildrenAndAppend(
2396+
curr, get, DropMode::IgnoreParentEffects));
2397+
return true;
23972398
}
23982399
// If we get here, then we know that the heap type of the cast input is
23992400
// more refined than the heap type of the best available fallthrough
@@ -2417,48 +2418,114 @@ struct OptimizeInstructions
24172418
// The cast either returns null or traps. In trapsNeverHappen mode
24182419
// we know the result, since by assumption it will not trap.
24192420
if (getPassOptions().trapsNeverHappen) {
2420-
replaceCurrent(
2421-
builder.makeBlock({builder.makeDrop(curr->ref),
2422-
builder.makeRefNull(curr->type.getHeapType())},
2423-
curr->type));
2424-
return;
2421+
replaceCurrent(getDroppedChildrenAndAppend(
2422+
curr,
2423+
builder.makeRefNull(curr->type.getHeapType()),
2424+
DropMode::IgnoreParentEffects));
2425+
return true;
24252426
}
2426-
// Otherwise, we should have already refined the cast type to cast
2427-
// directly to null. We do not further refine the cast type to exact
2428-
// null because the extra precision is not useful and doing so would
2429-
// increase the size of the instruction encoding.
2430-
assert(curr->type.isNull());
2431-
break;
2427+
return false;
24322428
}
24332429
case GCTypeUtils::Unreachable:
24342430
case GCTypeUtils::Failure:
24352431
// This cast cannot succeed, or it cannot even be reached, so we can
2436-
// trap. Make sure to emit a block with the same type as us; leave
2437-
// updating types for other passes.
2438-
replaceCurrent(builder.makeBlock(
2439-
{builder.makeDrop(curr->ref), builder.makeUnreachable()},
2440-
curr->type));
2441-
return;
2432+
// trap.
2433+
replaceCurrent(getDroppedChildrenAndAppend(
2434+
curr, builder.makeUnreachable(), DropMode::IgnoreParentEffects));
2435+
return true;
2436+
}
2437+
WASM_UNREACHABLE("unexpected result");
2438+
}
2439+
2440+
void visitRefCast(RefCast* curr) {
2441+
// Note we must check the ref's type here and not our own, since we only
2442+
// refinalize at the end, which means our type may not have been updated yet
2443+
// after a change in the child.
2444+
// TODO: we could update unreachability up the stack perhaps, or just move
2445+
// all patterns that can add unreachability to a pass that does so
2446+
// already like vacuum or dce.
2447+
if (curr->ref->type == Type::unreachable ||
2448+
(curr->desc && curr->desc->type == Type::unreachable)) {
2449+
return;
2450+
}
2451+
2452+
if (curr->type.isNonNullable() && trapOnNull(curr, curr->ref)) {
2453+
return;
2454+
}
2455+
2456+
if (curr->desc && trapOnNull(curr, curr->desc)) {
2457+
return;
2458+
}
2459+
2460+
Builder builder(*getModule());
2461+
2462+
// Look at all the fallthrough values to get the most precise possible type
2463+
// of the value we are casting.
2464+
Type refType = getFallthroughType(curr->ref);
2465+
2466+
// As a first step, we can tighten up the cast type. For normal casts we can
2467+
// use the greatest lower bound of the original cast type and the type we
2468+
// know the cast value to have. For descriptor casts we cannot change the
2469+
// target heap type because it is controlled by the descriptor operand, but
2470+
// we can improve nullability.
2471+
Type improvedType = curr->type;
2472+
if (curr->desc) {
2473+
if (curr->type.isNullable() && refType.isNonNullable()) {
2474+
improvedType = curr->type.with(NonNullable);
2475+
}
2476+
} else {
2477+
improvedType = Type::getGreatestLowerBound(curr->type, refType);
2478+
}
2479+
if (improvedType != Type::unreachable && improvedType != curr->type) {
2480+
curr->type = improvedType;
2481+
refinalize = true;
2482+
// Call replaceCurrent() to make us re-optimize this node, as we may
2483+
// have just unlocked further opportunities. (We could just continue
2484+
// down to the rest, but we'd need to do more work to make sure all the
2485+
// local state in this function is in sync which this change; it's
2486+
// easier to just do another clean pass on this node.)
2487+
replaceCurrent(curr);
2488+
return;
2489+
}
2490+
2491+
// Try to optimize based on what we know statically about the result of the
2492+
// cast.
2493+
if (optimizeKnownCastResult(curr, refType)) {
2494+
return;
24422495
}
24432496

24442497
// If we got past the optimizations above, it must be the case that we
2445-
// cannot tell from the static types whether the cast will succeed or not,
2446-
// which means we must have a proper down cast.
2447-
assert(Type::isSubType(curr->type, curr->ref->type));
2498+
// cannot tell statically whether the cast will succeed or not.
24482499

24492500
if (auto* child = curr->ref->dynCast<RefCast>()) {
2450-
// Repeated casts can be removed, leaving just the most demanding of them.
2451-
// Since we know the current cast is a downcast, it must be strictly
2452-
// stronger than its child cast and we can remove the child cast entirely.
2453-
curr->ref = child->ref;
2454-
return;
2501+
// If the current cast is at least as strong as the child cast, then we
2502+
// can remove the child cast. If the child cast is a descriptor cast and
2503+
// traps are allowed, then we cannot remove the potentially-trapping
2504+
// child, though.
2505+
bool notWeaker = Type::isSubType(curr->type, child->type);
2506+
bool safe = !child->desc || getPassOptions().trapsNeverHappen;
2507+
if (notWeaker && safe) {
2508+
if (child->desc) {
2509+
// Reorder the child's reference past its dropped descriptor if
2510+
// necessary.
2511+
auto* block =
2512+
ChildLocalizer(child, getFunction(), *getModule(), getPassOptions())
2513+
.getChildrenReplacement();
2514+
block->list.push_back(child->ref);
2515+
block->type = child->ref->type;
2516+
curr->ref = block;
2517+
} else {
2518+
curr->ref = child->ref;
2519+
}
2520+
return;
2521+
}
24552522
}
24562523

24572524
// Similarly, ref.cast can be combined with ref.as_non_null.
24582525
//
2459-
// (ref.cast null (ref.as_non_null ..))
2526+
// (ref.cast (ref null T) (ref.as_non_null ..))
24602527
// =>
2461-
// (ref.cast ..)
2528+
// (ref.cast (ref T) ..)
24622529
//
24632530
if (auto* as = curr->ref->dynCast<RefAs>(); as && as->op == RefAsNonNull) {
24642531
curr->ref = as->value;

test/lit/passes/optimize-instructions-call_ref.wast

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
(elem $elem-1 (table $table-1) (i32.const 0) (ref null $i32_i32_=>_none)
2828
(ref.func $foo))
2929

30-
;; CHECK: (elem declare func $bar $fallthrough-no-params $fallthrough-non-nullable $return-nothing)
30+
;; CHECK: (elem declare func $bar $fallthrough-no-params $fallthrough-non-nullable)
3131

3232
;; CHECK: (func $foo (type $i32_i32_=>_none) (param $0 i32) (param $1 i32)
3333
;; CHECK-NEXT: (unreachable)
@@ -163,12 +163,7 @@
163163
;; CHECK: (func $fallthrough-bad-type (type $none_=>_i32) (result i32)
164164
;; CHECK-NEXT: (block ;; (replaces unreachable CallRef we can't emit)
165165
;; CHECK-NEXT: (drop
166-
;; CHECK-NEXT: (block (result (ref nofunc))
167-
;; CHECK-NEXT: (drop
168-
;; CHECK-NEXT: (ref.func $return-nothing)
169-
;; CHECK-NEXT: )
170-
;; CHECK-NEXT: (unreachable)
171-
;; CHECK-NEXT: )
166+
;; CHECK-NEXT: (unreachable)
172167
;; CHECK-NEXT: )
173168
;; CHECK-NEXT: (unreachable)
174169
;; CHECK-NEXT: )

0 commit comments

Comments
 (0)