Skip to content

Commit d93e394

Browse files
committed
Merge branch 'validate-type-def-features' into type-uniquer
2 parents 739d181 + f7131c6 commit d93e394

File tree

10 files changed

+193
-46
lines changed

10 files changed

+193
-46
lines changed

src/wasm-type-shape.h

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
#include <functional>
2121
#include <list>
22-
#include <unordered_set>
22+
#include <unordered_map>
2323
#include <vector>
2424

2525
#include "wasm-features.h"
@@ -162,17 +162,22 @@ struct BrandTypeIterator {
162162
// with an extra brand type at the end of the group that differentiates it from
163163
// previous group.
164164
struct UniqueRecGroups {
165-
std::list<std::vector<HeapType>> groups;
166-
std::unordered_set<RecGroupShape> shapes;
165+
using Groups = std::list<std::vector<HeapType>>;
166+
Groups groups;
167+
std::unordered_map<RecGroupShape, Groups::iterator> shapes;
167168

168169
FeatureSet features;
169170

170171
UniqueRecGroups(FeatureSet features) : features(features) {}
171172

172173
// Insert a rec group. If it is already unique, return the original types.
173-
// Otherwise rebuild the group make it unique and return the rebuilt types,
174+
// Otherwise rebuild the group to make it unique and return the rebuilt types,
174175
// including the brand.
175176
const std::vector<HeapType>& insert(std::vector<HeapType> group);
177+
178+
// If the group is unique, insert it and return the types. Otherwise, return
179+
// the types that already have this shape.
180+
const std::vector<HeapType>& insertOrGet(std::vector<HeapType> group);
176181
};
177182

178183
} // namespace wasm

src/wasm-type.h

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,11 @@ class HeapType {
264264
// Returns the feature set required to use this type.
265265
FeatureSet getFeatures() const;
266266

267+
// We support more precise types in the IR than the enabled feature set would
268+
// suggest. Get the generalized version of the type that will be written by
269+
// the binary writer given the feature set.
270+
inline HeapType asWrittenGivenFeatures(FeatureSet feats) const;
271+
267272
// Helper allowing the value of `print(...)` to be sent to an ostream. Stores
268273
// a `TypeID` because `Type` is incomplete at this point and using a reference
269274
// makes it less convenient to use.
@@ -283,6 +288,16 @@ class HeapType {
283288
std::string toString() const;
284289
};
285290

291+
HeapType HeapType::asWrittenGivenFeatures(FeatureSet feats) const {
292+
// Without GC, only top types like func and extern are supported. The
293+
// exception is string, since stringref can be enabled without GC and we still
294+
// expect to write stringref types in that case.
295+
if (!feats.hasGC() && *this != HeapType::string) {
296+
return getTop();
297+
}
298+
return *this;
299+
}
300+
286301
class Type {
287302
// The `id` uniquely represents each type, so type equality is just a
288303
// comparison of the ids. The basic types are packed at the bottom of the
@@ -419,7 +434,7 @@ class Type {
419434
return isRef() && getHeapType().isContinuation();
420435
}
421436
bool isDefaultable() const;
422-
bool isCastable();
437+
bool isCastable() const;
423438

424439
// TODO: Allow this only for reference types.
425440
Nullability getNullability() const {
@@ -450,6 +465,11 @@ class Type {
450465
return !isExact() || feats.hasCustomDescriptors() ? *this : with(Inexact);
451466
}
452467

468+
// We support more precise types in the IR than the enabled feature set would
469+
// suggest. Get the generalized version of the type that will be written by
470+
// the binary writer given the feature set.
471+
inline Type asWrittenGivenFeatures(FeatureSet feats) const;
472+
453473
private:
454474
template<bool (Type::*pred)() const> bool hasPredicate() {
455475
for (const auto& type : *this) {
@@ -578,6 +598,20 @@ class Type {
578598
const Type& operator[](size_t i) const { return *Iterator{{this, i}}; }
579599
};
580600

601+
Type Type::asWrittenGivenFeatures(FeatureSet feats) const {
602+
if (!isRef()) {
603+
return *this;
604+
}
605+
auto type = with(getHeapType().asWrittenGivenFeatures(feats));
606+
if (!feats.hasGC()) {
607+
type = type.with(Nullable);
608+
}
609+
if (!feats.hasCustomDescriptors()) {
610+
type = type.with(Inexact);
611+
}
612+
return type;
613+
}
614+
581615
namespace HeapTypes {
582616

583617
constexpr HeapType ext = HeapType::ext;
@@ -878,6 +912,9 @@ struct TypeBuilder {
878912
InvalidUnsharedDescribes,
879913
// The custom descriptors feature is missing.
880914
RequiresCustomDescriptors,
915+
// Two rec groups with different shapes would have the same shapes after
916+
// the binary writer generalizes refined types that use disabled features.
917+
RecGroupCollision,
881918
};
882919

883920
struct Error {

src/wasm/wasm-binary.cpp

Lines changed: 2 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1798,23 +1798,8 @@ void WasmBinaryWriter::writeInlineBuffer(const char* data, size_t size) {
17981798
}
17991799

18001800
void WasmBinaryWriter::writeType(Type type) {
1801+
type = type.asWrittenGivenFeatures(wasm->features);
18011802
if (type.isRef()) {
1802-
// The only reference types allowed without GC are funcref, externref, and
1803-
// exnref. We internally use more refined versions of those types, but we
1804-
// cannot emit those without GC.
1805-
if (!wasm->features.hasGC()) {
1806-
auto ht = type.getHeapType();
1807-
if (ht.isMaybeShared(HeapType::string)) {
1808-
// Do not overgeneralize stringref to anyref. We have tests that when a
1809-
// stringref is expected, we actually get a stringref. If we see a
1810-
// string, the stringref feature must be enabled.
1811-
type = Type(HeapTypes::string.getBasic(ht.getShared()), Nullable);
1812-
} else {
1813-
// Only the top type (func, extern, exn) is available, and only the
1814-
// nullable version.
1815-
type = Type(type.getHeapType().getTop(), Nullable);
1816-
}
1817-
}
18181803
auto heapType = type.getHeapType();
18191804
if (type.isNullable() && heapType.isBasic() && !heapType.isShared()) {
18201805
switch (heapType.getBasic(Unshared)) {
@@ -1902,15 +1887,10 @@ void WasmBinaryWriter::writeType(Type type) {
19021887
}
19031888

19041889
void WasmBinaryWriter::writeHeapType(HeapType type, Exactness exactness) {
1905-
// ref.null always has a bottom heap type in Binaryen IR, but those types are
1906-
// only actually valid with GC. Otherwise, emit the corresponding valid top
1907-
// types instead.
1890+
type = type.asWrittenGivenFeatures(wasm->features);
19081891
if (!wasm->features.hasCustomDescriptors()) {
19091892
exactness = Inexact;
19101893
}
1911-
if (!wasm->features.hasGC()) {
1912-
type = type.getTop();
1913-
}
19141894
assert(!type.isBasic() || exactness == Inexact);
19151895
if (exactness == Exact) {
19161896
o << uint8_t(BinaryConsts::EncodedType::Exact);

src/wasm/wasm-type-shape.cpp

Lines changed: 26 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,10 @@ template<typename CompareTypes> struct RecGroupComparator {
136136
}
137137

138138
Comparison compare(Type a, Type b) {
139+
// Compare types as they will eventually be written out, not as they are in
140+
// the IR.
141+
a = a.asWrittenGivenFeatures(features);
142+
b = b.asWrittenGivenFeatures(features);
139143
if (a.isBasic() != b.isBasic()) {
140144
return b.isBasic() < a.isBasic() ? LT : GT;
141145
}
@@ -152,14 +156,12 @@ template<typename CompareTypes> struct RecGroupComparator {
152156
return compare(a.getTuple(), b.getTuple());
153157
}
154158
assert(a.isRef() && b.isRef());
155-
// Only consider exactness if custom descriptors are enabled. Otherwise, it
156-
// will be erased when the types are written, so we ignore it here, too.
157-
if (features.hasCustomDescriptors() && a.isExact() != b.isExact()) {
158-
return a.isExact() < b.isExact() ? LT : GT;
159-
}
160159
if (a.isNullable() != b.isNullable()) {
161160
return a.isNullable() < b.isNullable() ? LT : GT;
162161
}
162+
if (a.isExact() != b.isExact()) {
163+
return a.isExact() < b.isExact() ? LT : GT;
164+
}
163165
return compare(a.getHeapType(), b.getHeapType());
164166
}
165167

@@ -294,6 +296,9 @@ struct RecGroupHasher {
294296
}
295297

296298
size_t hash(Type type) {
299+
// Hash types as they will eventually be written out, not as they are in the
300+
// IR.
301+
type = type.asWrittenGivenFeatures(features);
297302
size_t digest = wasm::hash(type.isBasic());
298303
if (type.isBasic()) {
299304
wasm::rehash(digest, type.getBasic());
@@ -305,10 +310,8 @@ struct RecGroupHasher {
305310
return digest;
306311
}
307312
assert(type.isRef());
308-
if (features.hasCustomDescriptors()) {
309-
wasm::rehash(digest, type.isExact());
310-
}
311313
wasm::rehash(digest, type.isNullable());
314+
wasm::rehash(digest, type.isExact());
312315
hash_combine(digest, hash(type.getHeapType()));
313316
return digest;
314317
}
@@ -372,15 +375,16 @@ bool ComparableRecGroupShape::operator>(const RecGroupShape& other) const {
372375

373376
const std::vector<HeapType>&
374377
UniqueRecGroups::insert(std::vector<HeapType> types) {
375-
auto& group = *groups.emplace(groups.end(), std::move(types));
376-
if (shapes.emplace(RecGroupShape(group, features)).second) {
378+
auto groupIt = groups.emplace(groups.end(), std::move(types));
379+
auto& group = *groupIt;
380+
if (shapes.emplace(RecGroupShape(group, features), groupIt).second) {
377381
// The types are already unique.
378382
return group;
379383
}
380384
// There is a conflict. Find a brand that makes the group unique.
381385
BrandTypeIterator brand;
382386
group.push_back(*brand);
383-
while (!shapes.emplace(RecGroupShape(group, features)).second) {
387+
while (!shapes.emplace(RecGroupShape(group, features), groupIt).second) {
384388
group.back() = *++brand;
385389
}
386390
// Rebuild the rec group to include the brand. Map the old types (excluding
@@ -405,6 +409,17 @@ UniqueRecGroups::insert(std::vector<HeapType> types) {
405409
return group;
406410
}
407411

412+
const std::vector<HeapType>&
413+
UniqueRecGroups::insertOrGet(std::vector<HeapType> types) {
414+
auto groupIt = groups.emplace(groups.end(), std::move(types));
415+
auto [it, inserted] =
416+
shapes.emplace(RecGroupShape(*groupIt, features), groupIt);
417+
if (!inserted) {
418+
groups.erase(groupIt);
419+
}
420+
return *it->second;
421+
}
422+
408423
} // namespace wasm
409424

410425
namespace std {

src/wasm/wasm-type.cpp

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,20 +15,15 @@
1515
*/
1616

1717
#include <algorithm>
18-
#include <array>
1918
#include <cassert>
20-
#include <map>
21-
#include <shared_mutex>
2219
#include <sstream>
2320
#include <unordered_map>
2421
#include <unordered_set>
25-
#include <variant>
2622

27-
#include "compiler-support.h"
2823
#include "support/hash.h"
29-
#include "support/insert_ordered.h"
3024
#include "wasm-features.h"
3125
#include "wasm-type-printing.h"
26+
#include "wasm-type-shape.h"
3227
#include "wasm-type.h"
3328

3429
#define TRACE_CANONICALIZATION 0
@@ -623,7 +618,7 @@ bool Type::isDefaultable() const {
623618
return isConcrete() && !isNonNullable();
624619
}
625620

626-
bool Type::isCastable() { return isRef() && getHeapType().isCastable(); }
621+
bool Type::isCastable() const { return isRef() && getHeapType().isCastable(); }
627622

628623
unsigned Type::getByteSize() const {
629624
// TODO: alignment?
@@ -1468,6 +1463,9 @@ std::ostream& operator<<(std::ostream& os, TypeBuilder::ErrorReason reason) {
14681463
return os << "Heap type describes an invalid unshared type";
14691464
case TypeBuilder::ErrorReason::RequiresCustomDescriptors:
14701465
return os << "custom descriptors required but not enabled";
1466+
case TypeBuilder::ErrorReason::RecGroupCollision:
1467+
return os
1468+
<< "distinct rec groups would be identical after binary writing";
14711469
}
14721470
WASM_UNREACHABLE("Unexpected error reason");
14731471
}
@@ -2260,7 +2258,19 @@ struct TypeBuilder::Impl {
22602258
// We will validate features as we go.
22612259
FeatureSet features;
22622260

2263-
Impl(size_t n, FeatureSet features) : entries(n), features(features) {}
2261+
// We allow some types to be used even if their corresponding features are not
2262+
// enabled. For example, we allow exact references without custom descriptors
2263+
// and typed function references without GC. Allowing these more-refined types
2264+
// in the IR helps the optimizer be more powerful. However, these disallowed
2265+
// refinements will be erased when a module is written out as a binary, which
2266+
// could cause distinct rec groups to become identical and potentially change
2267+
// the results of casts, etc. To avoid this, we must disallow building rec
2268+
// groups that vary only in some refinement that will be removed in binary
2269+
// writing. Track this with a UniqueRecGroups set, which is feature-aware.
2270+
UniqueRecGroups unique;
2271+
2272+
Impl(size_t n, FeatureSet features)
2273+
: entries(n), features(features), unique(features) {}
22642274
};
22652275

22662276
TypeBuilder::TypeBuilder(size_t n, FeatureSet features) {
@@ -2692,6 +2702,17 @@ TypeBuilder::BuildResult TypeBuilder::build() {
26922702
assert(built->size() == groupSize);
26932703
results.insert(results.end(), built->begin(), built->end());
26942704

2705+
// If we are building multiple groups, make sure there will be no conflicts
2706+
// after disallowed features are taken into account.
2707+
if (groupSize > 0 && groupSize != entryCount) {
2708+
auto expectedFirst = (*built)[0];
2709+
auto& types = impl->unique.insertOrGet(*built);
2710+
if (types[0] != expectedFirst) {
2711+
return {TypeBuilder::Error{
2712+
groupStart, TypeBuilder::ErrorReason::RecGroupCollision}};
2713+
}
2714+
}
2715+
26952716
groupStart += groupSize;
26962717
}
26972718

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited.
2+
;; RUN: wasm-opt %s -all --disable-custom-descriptors --roundtrip -S -o - | filecheck %s
3+
4+
(module
5+
;; CHECK: (type $foo (struct))
6+
(type $foo (struct))
7+
;; CHECK: (func $trivial-exact-cast (type $1) (param $x i32)
8+
;; CHECK-NEXT: (drop
9+
;; CHECK-NEXT: (ref.cast (ref $foo)
10+
;; CHECK-NEXT: (if (result (ref $foo))
11+
;; CHECK-NEXT: (local.get $x)
12+
;; CHECK-NEXT: (then
13+
;; CHECK-NEXT: (struct.new_default $foo)
14+
;; CHECK-NEXT: )
15+
;; CHECK-NEXT: (else
16+
;; CHECK-NEXT: (struct.new_default $foo)
17+
;; CHECK-NEXT: )
18+
;; CHECK-NEXT: )
19+
;; CHECK-NEXT: )
20+
;; CHECK-NEXT: )
21+
;; CHECK-NEXT: )
22+
(func $trivial-exact-cast (param $x i32)
23+
(drop
24+
;; We allow trivial exact casts even without custom descriptors enabled,
25+
;; so this is valid. However, when we round trip, the exactness in the if
26+
;; result will be erased. If we fail to erase the exactness in the cast,
27+
;; we will be emitting a binary that uses a disabled feature, and also we
28+
;; will fail validation when we read the module back in because the cast
29+
;; will no longer be trivial.
30+
(ref.cast (ref (exact $foo))
31+
(if (result (ref (exact $foo)))
32+
(local.get $x)
33+
(then (struct.new $foo))
34+
(else (struct.new $foo))
35+
)
36+
)
37+
)
38+
)
39+
)
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited.
2+
;; RUN: wasm-opt %s -all --disable-gc -S -o - | filecheck %s
3+
4+
;; No collision - we should not write a stringref as an externref.
5+
(module
6+
;; CHECK: (type $A (func (param externref)))
7+
(type $A (func (param externref)))
8+
;; CHECK: (type $B (func (param stringref)))
9+
(type $B (func (param stringref)))
10+
11+
;; CHECK: (func $a (param $0 externref)
12+
;; CHECK-NEXT: (nop)
13+
;; CHECK-NEXT: )
14+
(func $a (type $A)
15+
(nop)
16+
)
17+
18+
;; CHECK: (func $b (param $0 stringref)
19+
;; CHECK-NEXT: (nop)
20+
;; CHECK-NEXT: )
21+
(func $b (type $B)
22+
(nop)
23+
)
24+
)

0 commit comments

Comments
 (0)