Skip to content

Commit 8d4f96e

Browse files
authored
Require specific array types for string ops (#7810)
The operations for creating strings from arrays and decoding strings into arrays previously accepted any array types with i8 or i16 elements, depending on the specific operation. This violates the principal types properties of the WebAssembly type system and was a bug in the design of the defunct stringref proposal. In practice, these operations are only useful when lowered to calls to the standard JS string builtins, which have strict requirements about what array types they accept. Reflect those requirements by restricting the array types we allow to be used with the string Expressions in our IR. This allows us to remove the special casing for these operations in ChildTyper. Since Precompute only worked on string operations taking immutable arrays, but those arrays are now invalid to use with string operations, let it precompute on mutable arrays in the special case where the array allocation is an immediate child of the string operation so it is known not to escape elsewhere. Hopefully this is enough to prevent serious optimization regressions.
1 parent 48be223 commit 8d4f96e

14 files changed

+243
-237
lines changed

src/ir/child-typer.h

Lines changed: 16 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -103,18 +103,6 @@ template<typename Subtype> struct ChildTyper : OverriddenVisitor<Subtype> {
103103
self().noteAnyTupleType(childp, arity);
104104
}
105105

106-
// Used only for string.new_lossy_utf8_array to work around a missing type
107-
// annotation in the stringref spec.
108-
void noteAnyI8ArrayReferenceType(Expression** childp) {
109-
self().noteAnyI8ArrayReferenceType(childp);
110-
}
111-
112-
// Used only for string.new_wtf16_array to work around a missing type
113-
// annotation in the stringref spec.
114-
void noteAnyI16ArrayReferenceType(Expression** childp) {
115-
self().noteAnyI16ArrayReferenceType(childp);
116-
}
117-
118106
Type getLabelType(Name label) { return self().getLabelType(label); }
119107

120108
bool skipUnreachable() { return false; }
@@ -1180,16 +1168,18 @@ template<typename Subtype> struct ChildTyper : OverriddenVisitor<Subtype> {
11801168

11811169
void visitStringNew(StringNew* curr) {
11821170
switch (curr->op) {
1183-
case StringNewLossyUTF8Array:
1184-
noteAnyI8ArrayReferenceType(&curr->ref);
1171+
case StringNewLossyUTF8Array: {
1172+
note(&curr->ref, Type(HeapTypes::getMutI8Array(), Nullable));
11851173
note(&curr->start, Type::i32);
11861174
note(&curr->end, Type::i32);
11871175
return;
1188-
case StringNewWTF16Array:
1189-
noteAnyI16ArrayReferenceType(&curr->ref);
1176+
}
1177+
case StringNewWTF16Array: {
1178+
note(&curr->ref, Type(HeapTypes::getMutI16Array(), Nullable));
11901179
note(&curr->start, Type::i32);
11911180
note(&curr->end, Type::i32);
11921181
return;
1182+
}
11931183
case StringNewFromCodePoint:
11941184
note(&curr->ref, Type::i32);
11951185
return;
@@ -1203,16 +1193,18 @@ template<typename Subtype> struct ChildTyper : OverriddenVisitor<Subtype> {
12031193
note(&curr->ref, Type(HeapType::string, Nullable));
12041194
}
12051195

1206-
void visitStringEncode(StringEncode* curr,
1207-
std::optional<HeapType> ht = std::nullopt) {
1208-
if (!ht) {
1209-
if (self().skipUnreachable() && !curr->array->type.isRef()) {
1210-
return;
1196+
void visitStringEncode(StringEncode* curr) {
1197+
note(&curr->str, Type(HeapType::string, Nullable));
1198+
switch (curr->op) {
1199+
case StringEncodeLossyUTF8Array: {
1200+
note(&curr->array, Type(HeapTypes::getMutI8Array(), Nullable));
1201+
break;
1202+
}
1203+
case StringEncodeWTF16Array: {
1204+
note(&curr->array, Type(HeapTypes::getMutI16Array(), Nullable));
1205+
break;
12111206
}
1212-
ht = curr->array->type.getHeapType();
12131207
}
1214-
note(&curr->str, Type(HeapType::string, Nullable));
1215-
note(&curr->array, Type(*ht, Nullable));
12161208
note(&curr->start, Type::i32);
12171209
}
12181210

src/passes/Precompute.cpp

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -217,19 +217,19 @@ class PrecomputingExpressionRunner
217217
return Flow(NONCONSTANT_FLOW);
218218
}
219219

220-
// string.encode_wtf16_array is effectively an Array read operation, so
221-
// just like ArrayGet above we must check for immutability.
220+
// string.new_wtf16_array is effectively an Array read operation, so
221+
// we cannot optimize mutable arrays. Unfortunately, it is only valid with
222+
// mutable arrays, so we cannot generally precompute it. As a special
223+
// exception, we can precompute if the child is an array allocation because
224+
// then we know the allocation will not escape anywhere else.
222225
auto refType = curr->ref->type;
223-
if (refType.isRef()) {
224-
auto heapType = refType.getHeapType();
225-
if (heapType.isArray()) {
226-
if (heapType.getArray().element.mutable_ == Immutable) {
227-
return Super::visitStringNew(curr);
228-
}
229-
}
226+
if (refType.isRef() &&
227+
(curr->ref->is<ArrayNew>() || curr->ref->is<ArrayNewData>() ||
228+
curr->ref->is<ArrayNewFixed>())) {
229+
return Super::visitStringNew(curr);
230230
}
231231

232-
// Otherwise, this is mutable or unreachable or otherwise uninteresting.
232+
// TODO: Handle more general cases as well.
233233
return Flow(NONCONSTANT_FLOW);
234234
}
235235

src/passes/StringLowering.cpp

Lines changed: 2 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,13 @@
3737

3838
#include "ir/module-utils.h"
3939
#include "ir/names.h"
40-
#include "ir/subtype-exprs.h"
4140
#include "ir/type-updating.h"
4241
#include "ir/utils.h"
4342
#include "pass.h"
4443
#include "passes/string-utils.h"
4544
#include "support/string.h"
4645
#include "wasm-builder.h"
46+
#include "wasm-type.h"
4747
#include "wasm.h"
4848

4949
namespace wasm {
@@ -283,7 +283,7 @@ struct StringLowering : public StringGathering {
283283
}
284284

285285
// Common types used in imports.
286-
Type nullArray16 = Type(Array(Field(Field::i16, Mutable)), Nullable);
286+
Type nullArray16 = Type(HeapTypes::getMutI16Array(), Nullable);
287287
Type nullExt = Type(HeapType::ext, Nullable);
288288
Type nnExt = Type(HeapType::ext, NonNullable);
289289

@@ -337,22 +337,6 @@ struct StringLowering : public StringGathering {
337337
// Strings turn into externref.
338338
updates[HeapType::string] = HeapType::ext;
339339

340-
// The module may have its own array16 type inside a big rec group, but
341-
// imported strings expects that type in its own rec group as part of the
342-
// ABI. Fix that up here. (This is valid to do as this type has no sub- or
343-
// super-types anyhow; it is "plain old data" for communicating with the
344-
// outside.)
345-
auto allTypes = ModuleUtils::collectHeapTypes(*module);
346-
auto array16 = nullArray16.getHeapType();
347-
auto array16Element = array16.getArray().element;
348-
for (auto type : allTypes) {
349-
// Match an array type with no super and that is closed.
350-
if (type.isArray() && !type.getDeclaredSuperType() && !type.isOpen() &&
351-
type.getArray().element == array16Element) {
352-
updates[type] = array16;
353-
}
354-
}
355-
356340
TypeMapper(*module, updates).map();
357341
}
358342

src/tools/fuzzing/fuzzing.cpp

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3677,7 +3677,7 @@ Expression* TranslateToFuzzReader::makeCompoundRef(Type type) {
36773677
}
36783678

36793679
Expression* TranslateToFuzzReader::makeStringNewArray() {
3680-
auto* array = makeTrappingRefUse(getArrayTypeForString());
3680+
auto* array = makeTrappingRefUse(HeapTypes::getMutI16Array());
36813681
auto* start = make(Type::i32);
36823682
auto* end = make(Type::i32);
36833683
return builder.makeStringNew(StringNewWTF16Array, array, start, end);
@@ -5161,7 +5161,7 @@ Expression* TranslateToFuzzReader::makeStringEncode(Type type) {
51615161
assert(type == Type::i32);
51625162

51635163
auto* ref = makeTrappingRefUse(HeapType::string);
5164-
auto* array = makeTrappingRefUse(getArrayTypeForString());
5164+
auto* array = makeTrappingRefUse(HeapTypes::getMutI16Array());
51655165
auto* start = make(Type::i32);
51665166

51675167
// Only rarely emit without a bounds check, which might trap. See related
@@ -5625,12 +5625,6 @@ Type TranslateToFuzzReader::getSuperType(Type type) {
56255625
return superType;
56265626
}
56275627

5628-
HeapType TranslateToFuzzReader::getArrayTypeForString() {
5629-
// Emit an array that can be used with JS-style strings, containing 16-bit
5630-
// elements. For now, this must be a mutable type as that is all V8 accepts.
5631-
return HeapType(Array(Field(Field::PackedType::i16, Mutable)));
5632-
}
5633-
56345628
Name TranslateToFuzzReader::getTargetName(Expression* target) {
56355629
if (auto* block = target->dynCast<Block>()) {
56365630
return block->name;

src/wasm-type.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -591,6 +591,11 @@ constexpr HeapType nofunc = HeapType::nofunc;
591591
constexpr HeapType nocont = HeapType::nocont;
592592
constexpr HeapType noexn = HeapType::noexn;
593593

594+
// Certain heap types are used by standard operations. Provide central accessors
595+
// for them to avoid having to build them everywhere they are used.
596+
HeapType getMutI8Array();
597+
HeapType getMutI16Array();
598+
594599
} // namespace HeapTypes
595600

596601
// A recursion group consisting of one or more HeapTypes. HeapTypes with single

src/wasm/wasm-ir-builder.cpp

Lines changed: 1 addition & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -518,24 +518,6 @@ struct IRBuilder::ChildPopper
518518
if (!Type::isSubType(type, *bound)) {
519519
return true;
520520
}
521-
} else if (constraint.isAnyI8ArrayReference()) {
522-
bool isI8Array =
523-
type.isRef() && type.getHeapType().isArray() &&
524-
type.getHeapType().getArray().element.packedType == Field::i8;
525-
bool isNone =
526-
type.isRef() && type.getHeapType().isMaybeShared(HeapType::none);
527-
if (!isI8Array && !isNone && type != Type::unreachable) {
528-
return true;
529-
}
530-
} else if (constraint.isAnyI16ArrayReference()) {
531-
bool isI16Array =
532-
type.isRef() && type.getHeapType().isArray() &&
533-
type.getHeapType().getArray().element.packedType == Field::i16;
534-
bool isNone =
535-
type.isRef() && type.getHeapType().isMaybeShared(HeapType::none);
536-
if (!isI16Array && !isNone && type != Type::unreachable) {
537-
return true;
538-
}
539521
} else {
540522
WASM_UNREACHABLE("unexpected constraint");
541523
}
@@ -686,13 +668,6 @@ struct IRBuilder::ChildPopper
686668
return popConstrainedChildren(children);
687669
}
688670

689-
Result<> visitStringEncode(StringEncode* curr,
690-
std::optional<HeapType> ht = std::nullopt) {
691-
std::vector<Child> children;
692-
ConstraintCollector{builder, children}.visitStringEncode(curr, ht);
693-
return popConstrainedChildren(children);
694-
}
695-
696671
Result<> visitCallRef(CallRef* curr,
697672
std::optional<HeapType> ht = std::nullopt) {
698673
std::vector<Child> children;
@@ -2488,11 +2463,7 @@ Result<> IRBuilder::makeStringMeasure(StringMeasureOp op) {
24882463
Result<> IRBuilder::makeStringEncode(StringEncodeOp op) {
24892464
StringEncode curr;
24902465
curr.op = op;
2491-
// There's no type annotation on these instructions due to a bug in the
2492-
// stringref proposal, so we just fudge it and pass `array` instead of a
2493-
// defined heap type. This will allow us to pop a child with an invalid
2494-
// array type, but that's just too bad.
2495-
CHECK_ERR(ChildPopper{*this}.visitStringEncode(&curr, HeapType::array));
2466+
CHECK_ERR(visitStringEncode(&curr));
24962467
push(builder.makeStringEncode(op, curr.str, curr.array, curr.start));
24972468
return Ok{};
24982469
}

src/wasm/wasm-type.cpp

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2721,22 +2721,28 @@ void TypeBuilder::dump() {
27212721
}
27222722

27232723
std::unordered_set<HeapType> getIgnorablePublicTypes() {
2724-
auto array8 = Array(Field(Field::i8, Mutable));
2725-
auto array16 = Array(Field(Field::i16, Mutable));
2726-
TypeBuilder builder(2);
2727-
builder[0] = array8;
2728-
builder[1] = array16;
2729-
auto result = builder.build();
2730-
assert(result);
2731-
std::unordered_set<HeapType> ret;
2732-
for (auto type : *result) {
2733-
ret.insert(type);
2734-
}
2735-
return ret;
2724+
std::unordered_set<HeapType> set;
2725+
set.insert(HeapTypes::getMutI8Array());
2726+
set.insert(HeapTypes::getMutI16Array());
2727+
return set;
27362728
}
27372729

27382730
} // namespace wasm
27392731

2732+
namespace wasm::HeapTypes {
2733+
2734+
HeapType getMutI8Array() {
2735+
static HeapType i8Array = Array(Field(Field::i8, Mutable));
2736+
return i8Array;
2737+
}
2738+
2739+
HeapType getMutI16Array() {
2740+
static HeapType i16Array = Array(Field(Field::i16, Mutable));
2741+
return i16Array;
2742+
}
2743+
2744+
} // namespace wasm::HeapTypes
2745+
27402746
namespace std {
27412747

27422748
template<> class hash<wasm::Tuple> {

src/wasm/wasm-validator.cpp

Lines changed: 39 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include "ir/stack-utils.h"
3232
#include "ir/utils.h"
3333
#include "support/colors.h"
34+
#include "wasm-type.h"
3435
#include "wasm-validator.h"
3536
#include "wasm.h"
3637

@@ -3851,11 +3852,19 @@ void FunctionValidator::visitStringNew(StringNew* curr) {
38513852
refType.isRef(), curr, "string.new input must have array type")) {
38523853
return;
38533854
}
3854-
auto heapType = refType.getHeapType();
3855-
if (!shouldBeTrue(heapType.isBottom() || heapType.isArray(),
3856-
curr,
3857-
"string.new input must have array type")) {
3858-
return;
3855+
if (curr->op == StringNewLossyUTF8Array) {
3856+
shouldBeSubType(
3857+
refType,
3858+
Type(HeapTypes::getMutI8Array(), Nullable),
3859+
curr,
3860+
"string.new_lossy_utf8_array input must have proper i8 array type");
3861+
} else {
3862+
assert(curr->op == StringNewWTF16Array);
3863+
shouldBeSubType(
3864+
refType,
3865+
Type(HeapTypes::getMutI16Array(), Nullable),
3866+
curr,
3867+
"string.new_wtf16_array input must have proper i16 array type");
38593868
}
38603869
shouldBeEqualOrFirstIsUnreachable(curr->start->type,
38613870
Type(Type::i32),
@@ -3896,6 +3905,31 @@ void FunctionValidator::visitStringEncode(StringEncode* curr) {
38963905
shouldBeTrue(!getModule() || getModule()->features.hasStrings(),
38973906
curr,
38983907
"string operations require strings [--enable-strings]");
3908+
shouldBeSubTypeIgnoringShared(curr->str->type,
3909+
Type(HeapType::ext, Nullable),
3910+
curr,
3911+
"string.encode input should be an externref");
3912+
switch (curr->op) {
3913+
case StringEncodeLossyUTF8Array:
3914+
shouldBeSubType(
3915+
curr->array->type,
3916+
Type(HeapTypes::getMutI8Array(), Nullable),
3917+
curr,
3918+
"string.encode_lossy_utf8_array should have mutable i8 array");
3919+
break;
3920+
case StringEncodeWTF16Array: {
3921+
shouldBeSubType(
3922+
curr->array->type,
3923+
Type(HeapTypes::getMutI16Array(), Nullable),
3924+
curr,
3925+
"string.encode_wtf16_array should have mutable i16 array");
3926+
break;
3927+
}
3928+
}
3929+
shouldBeSubType(curr->start->type,
3930+
Type(Type::i32),
3931+
curr,
3932+
"string.encode start should be an i32");
38993933
}
39003934

39013935
void FunctionValidator::visitStringConcat(StringConcat* curr) {

0 commit comments

Comments
 (0)