Skip to content

Commit 1a8fddc

Browse files
authored
[NFC] Precompute: Don't optimize string literals to string.const if not valid UTF-16 (#7324)
This replaces an interpreter override on string slicing. By considering the output of operations, this is more general, and avoids ever emitting an invalid string constant.
1 parent 5f767b7 commit 1a8fddc

File tree

1 file changed

+30
-37
lines changed

1 file changed

+30
-37
lines changed

src/passes/Precompute.cpp

Lines changed: 30 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -240,38 +240,6 @@ class PrecomputingExpressionRunner
240240
// string.encode_wtf16_array anyhow.)
241241
return Flow(NONCONSTANT_FLOW);
242242
}
243-
244-
Flow visitStringSliceWTF(StringSliceWTF* curr) {
245-
auto flow = Super::visitStringSliceWTF(curr);
246-
if (flow.breaking()) {
247-
return flow;
248-
}
249-
250-
auto refData = flow.getSingleValue().getGCData();
251-
if (!refData) {
252-
return Flow(NONCONSTANT_FLOW);
253-
}
254-
255-
auto& refValues = refData->values;
256-
if (refValues.size() == 0) {
257-
return flow;
258-
}
259-
260-
// Check that the slice is valid; since we can assume that we have a valid
261-
// UTF-16, we only need to check that it did not split surrogate pairs.
262-
auto firstChar = refValues[0].getInteger();
263-
if (firstChar >= 0xDC00 && firstChar <= 0xDFFF) {
264-
// The first char cannot be a low surrogate.
265-
return Flow(NONCONSTANT_FLOW);
266-
}
267-
268-
auto lastChar = refValues[refValues.size() - 1].getInteger();
269-
if (lastChar >= 0xD800 && lastChar <= 0xDBFF) {
270-
// The last char cannot be a high surrogate.
271-
return Flow(NONCONSTANT_FLOW);
272-
}
273-
return flow;
274-
}
275243
};
276244

277245
struct Precompute
@@ -967,18 +935,17 @@ struct Precompute
967935
if (value.isNull()) {
968936
return true;
969937
}
970-
return canEmitConstantFor(value.type);
971-
}
972938

973-
bool canEmitConstantFor(Type type) {
939+
auto type = value.type;
974940
// A function is fine to emit a constant for - we'll emit a RefFunc, which
975941
// is compact and immutable, so there can't be a problem.
976942
if (type.isFunction()) {
977943
return true;
978944
}
979-
// We can emit a StringConst for a string constant.
945+
// We can emit a StringConst for a string constant if the string is a
946+
// UTF-16 string.
980947
if (type.isString()) {
981-
return true;
948+
return isValidUTF16Literal(value);
982949
}
983950
// All other reference types cannot be precomputed. Even an immutable GC
984951
// reference is not currently something this pass can handle, as it will
@@ -991,6 +958,32 @@ struct Precompute
991958
return true;
992959
}
993960

961+
// TODO: move this logic to src/support/string, and refactor to share code
962+
// with wasm/literal.cpp string printing's conversion from a Literal to a raw
963+
// string.
964+
bool isValidUTF16Literal(const Literal& value) {
965+
bool expectLowSurrogate = false;
966+
for (auto& v : value.getGCData()->values) {
967+
auto c = v.getInteger();
968+
if (c >= 0xDC00 && c <= 0xDFFF) {
969+
if (expectLowSurrogate) {
970+
expectLowSurrogate = false;
971+
continue;
972+
}
973+
// We got a low surrogate but weren't expecting one.
974+
return false;
975+
}
976+
if (expectLowSurrogate) {
977+
// We are expecting a low surrogate but didn't get one.
978+
return false;
979+
}
980+
if (c >= 0xD800 && c <= 0xDBFF) {
981+
expectLowSurrogate = true;
982+
}
983+
}
984+
return !expectLowSurrogate;
985+
}
986+
994987
// Helpers for partial precomputing.
995988

996989
// Given a stack of expressions and the index of an expression in it, find

0 commit comments

Comments
 (0)