Skip to content

Commit 97178d0

Browse files
authored
TypeSSA: Handle collisions by adding a hash to ensure a fresh rec group (#5724)
Fixes #5720
1 parent b7b1d0d commit 97178d0

File tree

3 files changed

+133
-22
lines changed

3 files changed

+133
-22
lines changed

src/passes/TypeSSA.cpp

Lines changed: 96 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -52,13 +52,104 @@
5252
#include "ir/names.h"
5353
#include "ir/possible-constant.h"
5454
#include "pass.h"
55+
#include "support/hash.h"
5556
#include "wasm-builder.h"
5657
#include "wasm.h"
5758

5859
namespace wasm {
5960

6061
namespace {
6162

63+
// Given some TypeBuilder items that we want to build new types with, this
64+
// function builds the types in a new rec group.
65+
//
66+
// This is almost the same as just calling build(), but there is a risk of a
67+
// collision with an existing rec group. This function handles that by finding a
68+
// way to ensure that the new types are in fact in a new rec group.
69+
//
70+
// TODO: Move this outside if we find more uses.
71+
std::vector<HeapType> ensureTypesAreInNewRecGroup(RecGroup recGroup,
72+
Module& wasm) {
73+
auto num = recGroup.size();
74+
75+
std::vector<HeapType> types;
76+
types.reserve(num);
77+
for (auto type : recGroup) {
78+
types.push_back(type);
79+
}
80+
81+
// Find all the heap types present before we create the new ones. The new
82+
// types must not appear in |existingSet|.
83+
std::vector<HeapType> existing = ModuleUtils::collectHeapTypes(wasm);
84+
std::unordered_set<HeapType> existingSet(existing.begin(), existing.end());
85+
86+
// Check for a collision with an existing rec group. Note that it is enough to
87+
// check one of the types: either the entire rec group gets merged, so they
88+
// are all merged, or not.
89+
if (existingSet.count(types[0])) {
90+
// Unfortunately there is a conflict. Handle it by adding a "hash" - a
91+
// "random" extra item in the rec group that is so outlandish it will
92+
// surely (?) never collide with anything. We must loop while doing so,
93+
// until we find a hash that does not collide.
94+
auto hashSize = num + 10;
95+
size_t random = num;
96+
while (1) {
97+
// Make a builder and add a slot for the hash.
98+
TypeBuilder builder(num + 1);
99+
for (Index i = 0; i < num; i++) {
100+
auto type = types[i];
101+
if (type.isStruct()) {
102+
builder[i] = type.getStruct();
103+
} else {
104+
// Atm this pass only needs struct and array types. If we refactor
105+
// this function to be general purpose we'd need to extend that. TODO
106+
assert(type.isArray());
107+
builder[i] = type.getArray();
108+
}
109+
if (auto super = type.getSuperType()) {
110+
builder[i].subTypeOf(*super);
111+
}
112+
}
113+
114+
// Implement the hash as a struct with "random" fields, and add it.
115+
Struct hashStruct;
116+
for (Index i = 0; i < hashSize; i++) {
117+
// TODO: a denser encoding?
118+
auto type = (random & 1) ? Type::i32 : Type::f64;
119+
hash_combine(random, hashSize + i);
120+
hashStruct.fields.push_back(Field(type, Mutable));
121+
}
122+
builder[num] = hashStruct;
123+
124+
// Build and hope for the best.
125+
builder.createRecGroup(0, num + 1);
126+
auto result = builder.build();
127+
assert(!result.getError());
128+
types = *result;
129+
assert(types.size() == num + 1);
130+
131+
if (existingSet.count(types[0])) {
132+
// There is still a collision. Exponentially use larger hashes to
133+
// quickly find one that works. Note that we also use different
134+
// pseudorandom values while doing so in the for-loop above.
135+
hashSize *= 2;
136+
} else {
137+
// Success! Leave the loop.
138+
break;
139+
}
140+
}
141+
}
142+
143+
#ifndef NDEBUG
144+
// Verify the lack of a collision, just to be safe.
145+
for (auto newType : types) {
146+
assert(!existingSet.count(newType));
147+
}
148+
#endif
149+
150+
return types;
151+
}
152+
62153
// A vector of struct.new or one of the variations on array.new.
63154
using News = std::vector<Expression*>;
64155

@@ -143,6 +234,8 @@ struct TypeSSA : public Pass {
143234
// instruction that we found. In the isorecursive type system there isn't an
144235
// explicit way to do so, but at least if the new rec group is very large
145236
// the risk of collision with another rec group in the program is small.
237+
// (If a collision does happen, though, then that is very dangerous, as
238+
// casts on the new types could succeed in ways the program doesn't expect.)
146239
// Note that the risk of collision with things outside of this module is
147240
// also a possibility, and so for that reason this pass is likely mostly
148241
// useful in the closed-world scenario.
@@ -164,27 +257,9 @@ struct TypeSSA : public Pass {
164257
auto newTypes = *result;
165258
assert(newTypes.size() == num);
166259

167-
// The new types must not overlap with any existing ones. If they do, then
168-
// it would be unsafe to apply this optimization (if casts exist to the
169-
// existing types, the new types merged with them would now succeed on those
170-
// casts). We could try to create a "weird" rec group to avoid this (e.g. we
171-
// could make a rec group larger than any existing one, or with an initial
172-
// member that is "random"), but hopefully this is rare, so just error for
173-
// now.
174-
//
175-
// Note that it is enough to check one of the types: either the entire rec
176-
// group gets merged, so they are all merged, or not.
177-
std::vector<HeapType> typesVec = ModuleUtils::collectHeapTypes(*module);
178-
std::unordered_set<HeapType> typesSet(typesVec.begin(), typesVec.end());
179-
if (typesSet.count(newTypes[0])) {
180-
Fatal() << "Rec group collision in TypeSSA! Please file a bug";
181-
}
182-
#ifndef NDEBUG
183-
// Verify the above assumption, just to be safe.
184-
for (auto newType : newTypes) {
185-
assert(!typesSet.count(newType));
186-
}
187-
#endif
260+
// Make sure this is actually a new rec group.
261+
auto recGroup = newTypes[0].getRecGroup();
262+
newTypes = ensureTypesAreInNewRecGroup(recGroup, *module);
188263

189264
// Success: we can apply the new types.
190265

src/support/hash.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,10 @@ template<typename T> inline std::size_t hash(const T& value) {
2828
}
2929

3030
// Combines two digests into the first digest. Use instead of `rehash` if
31-
// `otherDigest` is another digest and not a `size_t` value.
31+
// `otherDigest` is another digest and not a `size_t` value. This is also useful
32+
// when you want deterministic behavior across systems, as this method does not
33+
// call std::hash, so it does not depend on the behavior of the local machine's
34+
// C++ standard library implementation.
3235
inline void hash_combine(std::size_t& digest, const std::size_t otherDigest) {
3336
// see: boost/container_hash/hash.hpp
3437
// The constant is the N-bits reciprocal of the golden ratio:

test/lit/passes/type-ssa.wast

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -361,3 +361,36 @@
361361
)
362362
)
363363
)
364+
365+
(module
366+
;; CHECK: (type $array (array (mut f32)))
367+
(type $array (array (mut f32)))
368+
369+
;; CHECK: (type $subarray (array_subtype (mut f32) $array))
370+
(type $subarray (array_subtype (mut f32) $array))
371+
372+
;; CHECK: (type $ref|$subarray|_=>_none (func (param (ref $subarray))))
373+
374+
;; CHECK: (rec
375+
;; CHECK-NEXT: (type $array$1 (array_subtype (mut f32) $array))
376+
377+
;; CHECK: (type ${mut:i32_mut:i32_mut:f64_mut:f64_mut:i32_mut:f64_mut:f64_mut:i32_mut:i32_mut:i32_mut:i32} (struct (field (mut i32)) (field (mut i32)) (field (mut f64)) (field (mut f64)) (field (mut i32)) (field (mut f64)) (field (mut f64)) (field (mut i32)) (field (mut i32)) (field (mut i32)) (field (mut i32))))
378+
379+
;; CHECK: (func $1 (type $ref|$subarray|_=>_none) (param $ref (ref $subarray))
380+
;; CHECK-NEXT: (drop
381+
;; CHECK-NEXT: (array.new_default $array$1
382+
;; CHECK-NEXT: (i32.const 64)
383+
;; CHECK-NEXT: )
384+
;; CHECK-NEXT: )
385+
;; CHECK-NEXT: )
386+
(func $1 (param $ref (ref $subarray))
387+
;; TypeSSA will create another subtype of array, which will happen to
388+
;; conflict with $subarray. We will need to create a new "weird" rec group
389+
;; with a "hash" in it to avoid the conflict.
390+
(drop
391+
(array.new_default $array
392+
(i32.const 64)
393+
)
394+
)
395+
)
396+
)

0 commit comments

Comments
 (0)