Skip to content

Commit 670b736

Browse files
authored
[Strings] Initial string execution support (#5491)
Store string data as GC data. Inefficient (one Const per char), but ok for now. Implement string.new_wtf16 and string.const, enough for basic testing. Create strings in makeConstantExpression, which enables ctor-eval support. Print strings in fuzz-exec which makes testing easier.
1 parent c4d15ef commit 670b736

File tree

12 files changed

+193
-25
lines changed

12 files changed

+193
-25
lines changed

src/ir/properties.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,8 @@ inline bool isNamedControlFlow(Expression* curr) {
8282
// runtime will be equal as well. TODO: combine this with
8383
// isValidInConstantExpression or find better names(#4845)
8484
inline bool isSingleConstantExpression(const Expression* curr) {
85-
return curr->is<Const>() || curr->is<RefNull>() || curr->is<RefFunc>();
85+
return curr->is<Const>() || curr->is<RefNull>() || curr->is<RefFunc>() ||
86+
curr->is<StringConst>();
8687
}
8788

8889
inline bool isConstantExpression(const Expression* curr) {

src/literal.h

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,9 @@ class Literal {
4848
// A reference to GC data, either a Struct or an Array. For both of those
4949
// we store the referred data as a Literals object (which is natural for an
5050
// Array, and for a Struct, is just the fields in order). The type is used
51-
// to indicate whether this is a Struct or an Array, and of what type.
51+
// to indicate whether this is a Struct or an Array, and of what type. We
52+
// also use this to store String data, as it is similarly stored on the
53+
// heap.
5254
std::shared_ptr<GCData> gcData;
5355
// TODO: Literals of type `anyref` can only be `null` currently but we
5456
// will need to represent external values eventually, to
@@ -90,7 +92,10 @@ class Literal {
9092
bool isConcrete() const { return type.isConcrete(); }
9193
bool isNone() const { return type == Type::none; }
9294
bool isFunction() const { return type.isFunction(); }
95+
// Whether this is GC data, that is, something stored on the heap (aside from
96+
// a null or i31). This includes structs, arrays, and also strings.
9397
bool isData() const { return type.isData(); }
98+
bool isString() const { return type.isString(); }
9499

95100
bool isNull() const { return type.isNull(); }
96101

@@ -709,10 +714,10 @@ class Literals : public SmallVector<Literal, 1> {
709714
std::ostream& operator<<(std::ostream& o, wasm::Literal literal);
710715
std::ostream& operator<<(std::ostream& o, wasm::Literals literals);
711716

712-
// A GC Struct or Array is a set of values with a type saying how it should be
713-
// interpreted.
717+
// A GC Struct, Array, or String is a set of values with a type saying how it
718+
// should be interpreted.
714719
struct GCData {
715-
// The type of this struct or array.
720+
// The type of this struct, array, or string.
716721
HeapType type;
717722

718723
// The element or field values.

src/passes/Precompute.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -509,6 +509,10 @@ struct Precompute
509509
if (type.isFunction()) {
510510
return true;
511511
}
512+
// We can emit a StringConst for a string constant.
513+
if (type.isString()) {
514+
return true;
515+
}
512516
// All other reference types cannot be precomputed. Even an immutable GC
513517
// reference is not currently something this pass can handle, as it will
514518
// evaluate and reevaluate code multiple times in e.g. propagateLocals, see

src/tools/execution-results.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,12 +116,15 @@ struct ExecutionResults {
116116
if (values->size() > 0) {
117117
std::cout << "[fuzz-exec] note result: " << exp->name << " => ";
118118
auto resultType = func->getResults();
119-
if (resultType.isRef()) {
119+
if (resultType.isRef() && !resultType.isString()) {
120120
// Don't print reference values, as funcref(N) contains an index
121121
// for example, which is not guaranteed to remain identical after
122122
// optimizations.
123123
std::cout << resultType << '\n';
124124
} else {
125+
// Non-references can be printed in full. So can strings, since we
126+
// always know how to print them and there is just one string
127+
// type.
125128
std::cout << *values << '\n';
126129
}
127130
}

src/wasm-builder.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1153,6 +1153,14 @@ class Builder {
11531153
if (type.isRef() && type.getHeapType() == HeapType::i31) {
11541154
return makeI31New(makeConst(value.geti31()));
11551155
}
1156+
if (type.isString()) {
1157+
// TODO: more than ascii support
1158+
std::string string;
1159+
for (auto c : value.getGCData()->values) {
1160+
string.push_back(c.getInteger());
1161+
}
1162+
return makeStringConst(string);
1163+
}
11561164
TODO_SINGLE_COMPOUND(type);
11571165
WASM_UNREACHABLE("unsupported constant expression");
11581166
}

src/wasm-interpreter.h

Lines changed: 48 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1797,8 +1797,54 @@ class ExpressionRunner : public OverriddenVisitor<SubType, Flow> {
17971797
}
17981798
WASM_UNREACHABLE("unimplemented ref.as_*");
17991799
}
1800-
Flow visitStringNew(StringNew* curr) { WASM_UNREACHABLE("unimp"); }
1801-
Flow visitStringConst(StringConst* curr) { WASM_UNREACHABLE("unimp"); }
1800+
Flow visitStringNew(StringNew* curr) {
1801+
Flow ptr = visit(curr->ptr);
1802+
if (ptr.breaking()) {
1803+
return ptr;
1804+
}
1805+
switch (curr->op) {
1806+
case StringNewWTF16Array: {
1807+
Flow start = visit(curr->start);
1808+
if (start.breaking()) {
1809+
return start;
1810+
}
1811+
Flow end = visit(curr->end);
1812+
if (end.breaking()) {
1813+
return end;
1814+
}
1815+
auto ptrData = ptr.getSingleValue().getGCData();
1816+
if (!ptrData) {
1817+
trap("null ref");
1818+
}
1819+
const auto& ptrDataValues = ptrData->values;
1820+
size_t startVal = start.getSingleValue().getUnsigned();
1821+
size_t endVal = end.getSingleValue().getUnsigned();
1822+
if (endVal > ptrDataValues.size()) {
1823+
trap("array oob");
1824+
}
1825+
Literals contents;
1826+
if (endVal > startVal) {
1827+
contents.reserve(endVal - startVal);
1828+
for (size_t i = startVal; i < endVal; i++) {
1829+
contents.push_back(ptrDataValues[i]);
1830+
}
1831+
}
1832+
auto heapType = curr->type.getHeapType();
1833+
return Literal(std::make_shared<GCData>(heapType, contents), heapType);
1834+
}
1835+
default:
1836+
// TODO: others
1837+
return Flow(NONCONSTANT_FLOW);
1838+
}
1839+
}
1840+
Flow visitStringConst(StringConst* curr) {
1841+
Literals contents;
1842+
for (size_t i = 0; i < curr->string.size(); i++) {
1843+
contents.push_back(Literal(int32_t(curr->string[i])));
1844+
}
1845+
auto heapType = curr->type.getHeapType();
1846+
return Literal(std::make_shared<GCData>(heapType, contents), heapType);
1847+
}
18021848
Flow visitStringMeasure(StringMeasure* curr) { WASM_UNREACHABLE("unimp"); }
18031849
Flow visitStringEncode(StringEncode* curr) { WASM_UNREACHABLE("unimp"); }
18041850
Flow visitStringConcat(StringConcat* curr) { WASM_UNREACHABLE("unimp"); }
@@ -2121,8 +2167,6 @@ class ConstantExpressionRunner : public ExpressionRunner<SubType> {
21212167
NOTE_ENTER("Rethrow");
21222168
return Flow(NONCONSTANT_FLOW);
21232169
}
2124-
Flow visitStringNew(StringNew* curr) { return Flow(NONCONSTANT_FLOW); }
2125-
Flow visitStringConst(StringConst* curr) { return Flow(NONCONSTANT_FLOW); }
21262170
Flow visitStringMeasure(StringMeasure* curr) {
21272171
return Flow(NONCONSTANT_FLOW);
21282172
}

src/wasm-type.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,7 @@ class Type {
159159
bool isSingle() const { return isConcrete() && !isTuple(); }
160160
bool isRef() const;
161161
bool isFunction() const;
162+
// See literal.h.
162163
bool isData() const;
163164
// Checks whether a type is a reference and is nullable. This returns false
164165
// for a value that is not a reference, that is, for which nullability is
@@ -173,6 +174,7 @@ class Type {
173174
bool isNull() const;
174175
bool isStruct() const;
175176
bool isArray() const;
177+
bool isString() const;
176178
bool isDefaultable() const;
177179

178180
Nullability getNullability() const;
@@ -364,6 +366,7 @@ class HeapType {
364366
bool isSignature() const;
365367
bool isStruct() const;
366368
bool isArray() const;
369+
bool isString() const;
367370
bool isBottom() const;
368371

369372
Signature getSignature() const;

src/wasm/literal.cpp

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,8 @@ Literal::Literal(const uint8_t init[16]) : type(Type::v128) {
7171

7272
Literal::Literal(std::shared_ptr<GCData> gcData, HeapType type)
7373
: gcData(gcData), type(type, NonNullable) {
74-
// The type must be a proper type for GC data.
74+
// The type must be a proper type for GC data: either a struct, array, or
75+
// string; or a null.
7576
assert((isData() && gcData) || (type.isBottom() && !gcData));
7677
}
7778

@@ -577,7 +578,20 @@ std::ostream& operator<<(std::ostream& o, Literal literal) {
577578
case HeapType::struct_:
578579
case HeapType::array:
579580
WASM_UNREACHABLE("invalid type");
580-
case HeapType::string:
581+
case HeapType::string: {
582+
auto data = literal.getGCData();
583+
if (!data) {
584+
o << "nullstring";
585+
} else {
586+
o << "string(\"";
587+
for (auto c : data->values) {
588+
// TODO: more than ascii
589+
o << char(c.getInteger());
590+
}
591+
o << "\")";
592+
}
593+
break;
594+
}
581595
case HeapType::stringview_wtf8:
582596
case HeapType::stringview_wtf16:
583597
case HeapType::stringview_iter:

src/wasm/wasm-type.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -897,7 +897,8 @@ bool Type::isFunction() const {
897897

898898
bool Type::isData() const {
899899
if (isBasic()) {
900-
return false;
900+
// The only basic type that is considered data is a string.
901+
return isString();
901902
} else {
902903
auto* info = getTypeInfo(*this);
903904
return info->isRef() && info->ref.heapType.isData();
@@ -924,6 +925,8 @@ bool Type::isStruct() const { return isRef() && getHeapType().isStruct(); }
924925

925926
bool Type::isArray() const { return isRef() && getHeapType().isArray(); }
926927

928+
bool Type::isString() const { return isRef() && getHeapType().isString(); }
929+
927930
bool Type::isDefaultable() const {
928931
// A variable can get a default value if its type is concrete (unreachable
929932
// and none have no values, hence no default), and if it's a reference, it
@@ -1267,7 +1270,7 @@ bool HeapType::isFunction() const {
12671270

12681271
bool HeapType::isData() const {
12691272
if (isBasic()) {
1270-
return id == struct_ || id == array;
1273+
return id == struct_ || id == array || id == string;
12711274
} else {
12721275
return getHeapTypeInfo(*this)->isData();
12731276
}
@@ -1297,6 +1300,8 @@ bool HeapType::isArray() const {
12971300
}
12981301
}
12991302

1303+
bool HeapType::isString() const { return *this == HeapType::string; }
1304+
13001305
bool HeapType::isBottom() const {
13011306
if (isBasic()) {
13021307
switch (getBasic()) {
@@ -1672,7 +1677,9 @@ bool SubTyper::isSubType(HeapType a, HeapType b) {
16721677
case HeapType::any:
16731678
return a.getBottom() == HeapType::none;
16741679
case HeapType::eq:
1675-
return a == HeapType::i31 || a == HeapType::none || a.isData();
1680+
return a == HeapType::i31 || a == HeapType::none ||
1681+
a == HeapType::struct_ || a == HeapType::array || a.isStruct() ||
1682+
a.isArray();
16761683
case HeapType::i31:
16771684
return a == HeapType::none;
16781685
case HeapType::struct_:

test/lit/exec/strings.wast

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
;; NOTE: Assertions have been generated by update_lit_checks.py --output=fuzz-exec and should not be edited.
2+
3+
;; RUN: wasm-opt %s -all --fuzz-exec -q -o /dev/null 2>&1 | filecheck %s
4+
5+
(module
6+
(type $array16 (array (mut i16)))
7+
8+
;; CHECK: [fuzz-exec] calling new_wtf16_array
9+
;; CHECK-NEXT: [fuzz-exec] note result: new_wtf16_array => string("ello")
10+
(func "new_wtf16_array" (result stringref)
11+
(string.new_wtf16_array
12+
(array.init_static $array16
13+
(i32.const 104) ;; h
14+
(i32.const 101) ;; e
15+
(i32.const 108) ;; l
16+
(i32.const 108) ;; l
17+
(i32.const 111) ;; o
18+
)
19+
(i32.const 1) ;; start from index 1, to chop off the 'h'
20+
(i32.const 5)
21+
)
22+
)
23+
24+
;; CHECK: [fuzz-exec] calling const
25+
;; CHECK-NEXT: [fuzz-exec] note result: const => string("world")
26+
(func "const" (result stringref)
27+
(string.const "world")
28+
)
29+
)
30+
;; CHECK: [fuzz-exec] calling new_wtf16_array
31+
;; CHECK-NEXT: [fuzz-exec] note result: new_wtf16_array => string("ello")
32+
33+
;; CHECK: [fuzz-exec] calling const
34+
;; CHECK-NEXT: [fuzz-exec] note result: const => string("world")
35+
;; CHECK-NEXT: [fuzz-exec] comparing const
36+
;; CHECK-NEXT: [fuzz-exec] comparing new_wtf16_array

0 commit comments

Comments
 (0)