diff --git a/src/passes/GlobalStructInference.cpp b/src/passes/GlobalStructInference.cpp index e2b728fc10b..6f3514275b4 100644 --- a/src/passes/GlobalStructInference.cpp +++ b/src/passes/GlobalStructInference.cpp @@ -79,6 +79,8 @@ struct GlobalStructInference : public Pass { // optimizable it will have an entry here, and not if not. std::unordered_map> typeGlobals; + std::unique_ptr subTypes; + void run(Module* module) override { if (!module->features.hasGC()) { return; @@ -208,6 +210,12 @@ struct GlobalStructInference : public Pass { return; } + // When CD is enabled, we can optimize to ref.get_desc, depending on the + // presence of subtypes. + if (module->features.hasCustomDescriptors()) { + subTypes = std::make_unique(*module); + } + // The above loop on typeGlobalsCopy is on an unsorted data structure, and // that can lead to nondeterminism in typeGlobals. Sort the vectors there to // ensure determinism. @@ -528,6 +536,52 @@ struct GlobalStructInference : public Pass { right)); } + void visitRefCast(RefCast* curr) { + // When we see (ref.cast $T), and the type has a descriptor, and that + // descriptor only has a single global, then we can do (ref.cast_desc) + // using the descriptor. Descriptor casts are usually more efficient + // than normal ones (and even more so if we get lucky and are in a loop, + // where the global.get of the descriptor can be hoisted). + // TODO: only do this when shrinkLevel == 0? + + // Check if we have a descriptor. + auto type = curr->type; + if (type == Type::unreachable) { + return; + } + auto heapType = type.getHeapType(); + auto desc = heapType.getDescriptorType(); + if (!desc) { + return; + } + + // Check if the type has no (relevant) subtypes, as a ref.cast_desc will + // find precisely that type and nothing else. + if (!type.isExact() && + !parent.subTypes->getStrictSubTypes(heapType).empty()) { + return; + } + + // Check if we have a single global for the descriptor. + auto iter = parent.typeGlobals.find(*desc); + if (iter == parent.typeGlobals.end()) { + return; + } + const auto& globals = iter->second; + if (globals.size() != 1) { + return; + } + + // We can optimize! + auto global = globals[0]; + auto& wasm = *getModule(); + Builder builder(wasm); + auto* getGlobal = + builder.makeGlobalGet(global, wasm.getGlobal(global)->type); + auto* castDesc = builder.makeRefCast(curr->ref, getGlobal, curr->type); + replaceCurrent(castDesc); + } + void visitFunction(Function* func) { if (refinalize) { ReFinalize().walkFunctionInModule(func, getModule()); diff --git a/test/lit/passes/gsi-desc.wast b/test/lit/passes/gsi-desc.wast index 0ca1806b568..c109bd5ca5e 100644 --- a/test/lit/passes/gsi-desc.wast +++ b/test/lit/passes/gsi-desc.wast @@ -186,3 +186,322 @@ ) ) +;; Two types with descriptors and subtyping between them. +(module + (rec + ;; CHECK: (rec + ;; CHECK-NEXT: (type $A (sub (descriptor $A.desc (struct)))) + (type $A (sub (descriptor $A.desc (struct)))) + ;; CHECK: (type $A.desc (sub (describes $A (struct)))) + (type $A.desc (sub (describes $A (struct)))) + + ;; CHECK: (type $B (sub $A (descriptor $B.desc (struct)))) + (type $B (sub $A (descriptor $B.desc (struct)))) + ;; CHECK: (type $B.desc (sub $A.desc (describes $B (struct)))) + (type $B.desc (sub $A.desc (describes $B (struct)))) + ) + + ;; CHECK: (type $4 (func (param anyref))) + + ;; CHECK: (global $A.desc (ref $A.desc) (struct.new_default $A.desc)) + (global $A.desc (ref $A.desc) (struct.new $A.desc)) + + ;; CHECK: (global $B.desc (ref $B.desc) (struct.new_default $B.desc)) + (global $B.desc (ref $B.desc) (struct.new $B.desc)) + + ;; CHECK: (func $test (type $4) (param $any anyref) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.cast (ref $A) + ;; CHECK-NEXT: (local.get $any) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.cast_desc (ref $B) + ;; CHECK-NEXT: (local.get $any) + ;; CHECK-NEXT: (global.get $B.desc) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $test (param $any anyref) + ;; The second cast here is optimizable: it can only be to a single type with + ;; no subtypes, so we can use ref.cast_desc. + (drop + (ref.cast (ref $A) + (local.get $any) + ) + ) + (drop + (ref.cast (ref $B) + (local.get $any) + ) + ) + ) + + ;; CHECK: (func $test-exact (type $4) (param $any anyref) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.cast (ref (exact $A)) + ;; CHECK-NEXT: (local.get $any) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.cast_desc (ref $B) + ;; CHECK-NEXT: (local.get $any) + ;; CHECK-NEXT: (global.get $B.desc) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $test-exact (param $any anyref) + ;; When using exact casts, we can optimize both. TODO: atm we do not + ;; optimize $A, as we propagate on |typeGlobals|. + (drop + (ref.cast (ref (exact $A)) + (local.get $any) + ) + ) + (drop + (ref.cast (ref (exact $B)) + (local.get $any) + ) + ) + ) +) + +;; As above, but without subtyping between $A and $B. +(module + (rec + ;; CHECK: (rec + ;; CHECK-NEXT: (type $A (sub (descriptor $A.desc (struct)))) + (type $A (sub (descriptor $A.desc (struct)))) + ;; CHECK: (type $A.desc (sub (describes $A (struct)))) + (type $A.desc (sub (describes $A (struct)))) + + ;; CHECK: (type $B (sub (descriptor $B.desc (struct)))) + (type $B (sub (descriptor $B.desc (struct)))) + ;; CHECK: (type $B.desc (sub $A.desc (describes $B (struct)))) + (type $B.desc (sub $A.desc (describes $B (struct)))) + ) + + ;; CHECK: (type $4 (func (param anyref))) + + ;; CHECK: (global $A.desc (ref $A.desc) (struct.new_default $A.desc)) + (global $A.desc (ref $A.desc) (struct.new $A.desc)) + + ;; CHECK: (global $B.desc (ref $B.desc) (struct.new_default $B.desc)) + (global $B.desc (ref $B.desc) (struct.new $B.desc)) + + ;; CHECK: (func $test (type $4) (param $any anyref) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.cast (ref $A) + ;; CHECK-NEXT: (local.get $any) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.cast_desc (ref $B) + ;; CHECK-NEXT: (local.get $any) + ;; CHECK-NEXT: (global.get $B.desc) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $test (param $any anyref) + ;; We still cannot optimize $A: while $A has no subtypes, the descriptor + ;; $A.desc has a subtype. We could optimize this TODO + (drop + (ref.cast (ref $A) + (local.get $any) + ) + ) + (drop + (ref.cast (ref $B) + (local.get $any) + ) + ) + ) +) + +;; As above, but without subtyping between $A.desc and $B.desc. +(module + (rec + ;; CHECK: (rec + ;; CHECK-NEXT: (type $A (sub (descriptor $A.desc (struct)))) + (type $A (sub (descriptor $A.desc (struct)))) + ;; CHECK: (type $A.desc (sub (describes $A (struct)))) + (type $A.desc (sub (describes $A (struct)))) + + ;; CHECK: (type $B (sub (descriptor $B.desc (struct)))) + (type $B (sub (descriptor $B.desc (struct)))) + ;; CHECK: (type $B.desc (sub (describes $B (struct)))) + (type $B.desc (sub (describes $B (struct)))) + ) + + ;; CHECK: (type $4 (func (param anyref))) + + ;; CHECK: (global $A.desc (ref $A.desc) (struct.new_default $A.desc)) + (global $A.desc (ref $A.desc) (struct.new $A.desc)) + + ;; CHECK: (global $B.desc (ref $B.desc) (struct.new_default $B.desc)) + (global $B.desc (ref $B.desc) (struct.new $B.desc)) + + ;; CHECK: (func $test (type $4) (param $any anyref) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.cast_desc (ref $A) + ;; CHECK-NEXT: (local.get $any) + ;; CHECK-NEXT: (global.get $A.desc) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.cast_desc (ref $B) + ;; CHECK-NEXT: (local.get $any) + ;; CHECK-NEXT: (global.get $B.desc) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $test (param $any anyref) + ;; We can fully optimize these two independent cases. + (drop + (ref.cast (ref $A) + (local.get $any) + ) + ) + (drop + (ref.cast (ref $B) + (local.get $any) + ) + ) + ) +) + +;; Zero descriptor instances in globals. +(module + (rec + ;; CHECK: (rec + ;; CHECK-NEXT: (type $A (sub (descriptor $A.desc (struct)))) + (type $A (sub (descriptor $A.desc (struct)))) + ;; CHECK: (type $A.desc (sub (describes $A (struct)))) + (type $A.desc (sub (describes $A (struct)))) + ) + + ;; CHECK: (type $2 (func (param anyref))) + + ;; CHECK: (func $test (type $2) (param $any anyref) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.cast (ref $A) + ;; CHECK-NEXT: (local.get $any) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $test (param $any anyref) + ;; We do not optimize here. TODO: we could make this trap + (drop + (ref.cast (ref $A) + (local.get $any) + ) + ) + ) +) + +;; Two descriptor instances in globals. +(module + (rec + ;; CHECK: (rec + ;; CHECK-NEXT: (type $A (sub (descriptor $A.desc (struct)))) + (type $A (sub (descriptor $A.desc (struct)))) + ;; CHECK: (type $A.desc (sub (describes $A (struct)))) + (type $A.desc (sub (describes $A (struct)))) + ) + + ;; CHECK: (type $2 (func (param anyref))) + + ;; CHECK: (global $A.desc (ref $A.desc) (struct.new_default $A.desc)) + (global $A.desc (ref $A.desc) (struct.new $A.desc)) + + ;; CHECK: (global $A.desc2 (ref $A.desc) (struct.new_default $A.desc)) + (global $A.desc2 (ref $A.desc) (struct.new $A.desc)) + + ;; CHECK: (func $test (type $2) (param $any anyref) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.cast (ref $A) + ;; CHECK-NEXT: (local.get $any) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block ;; (replaces unreachable RefCast we can't emit) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $test (param $any anyref) + ;; We do not optimize here. TODO: we could with a select + (drop + (ref.cast (ref $A) + (local.get $any) + ) + ) + ;; We do not error on unreachable casts. + (drop + (ref.cast (ref $A) + (unreachable) + ) + ) + ) +) + +(module + ;; CHECK: (type $0 (func (param anyref))) + + ;; CHECK: (type $A (sub (struct))) + (type $A (sub (struct))) + + ;; CHECK: (func $test (type $0) (param $any anyref) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.cast (ref $A) + ;; CHECK-NEXT: (local.get $any) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $test (param $any anyref) + ;; We do not handle casts to types without descriptors. + (drop + (ref.cast (ref $A) + (local.get $any) + ) + ) + ) +) + +;; Nullable cast. +(module + (rec + ;; CHECK: (rec + ;; CHECK-NEXT: (type $A (sub (descriptor $A.desc (struct)))) + (type $A (sub (descriptor $A.desc (struct)))) + ;; CHECK: (type $A.desc (sub (describes $A (struct)))) + (type $A.desc (sub (describes $A (struct)))) + ) + + ;; CHECK: (type $2 (func (param anyref))) + + ;; CHECK: (global $A.desc (ref $A.desc) (struct.new_default $A.desc)) + (global $A.desc (ref $A.desc) (struct.new $A.desc)) + + ;; CHECK: (func $test (type $2) (param $any anyref) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.cast_desc (ref null $A) + ;; CHECK-NEXT: (local.get $any) + ;; CHECK-NEXT: (global.get $A.desc) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $test (param $any anyref) + ;; The cast is nullable, which we can still optimize: null will succeed as + ;; expected. + (drop + (ref.cast (ref null $A) + (local.get $any) + ) + ) + ) +) +