Skip to content

Commit aee292b

Browse files
authored
[strings] Add a StringLifting pass (#7389)
This converts imported string constants into string.const, and imported string instructions into string.* expressions. After this pass they are represented using stringref and we can optimize them fully (e.g. precomputing a string.concat of two constants). Typically a user would later lower then back down using StringLowering. This pass allows users to avoid emitting stringref directly, which means they are emitting standard wasm which can run in VMs, leaving wasm-opt entirely optional. Also refactor a few shared constants with StringLowering into a helper file. Left as TODOs: contents of the strings custom section, and casts (see comments in source). Fixes most of #7370
1 parent 6a6e080 commit aee292b

14 files changed

+612
-4
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@ full changeset diff at the end of each section.
1515
Current Trunk
1616
-------------
1717

18+
- Add a `--string-lifting` pass that raises imported string operations and
19+
constants into stringref in Binaryen IR (which can then be fully optimized,
20+
and typically lowered back down with `--string-lowering`).
21+
1822
v123
1923
----
2024

scripts/test/fuzzing.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,9 @@
9393
'names.wast',
9494
# huge amount of locals that make it extremely slow
9595
'too_much_for_liveness.wasm',
96+
# has (ref extern) imports, which the fuzzer cannot create values for when
97+
# it removes unknown imports
98+
'string-lifting.wast',
9699
# TODO: fuzzer support for stack switching
97100
'stack_switching.wast',
98101
'stack_switching_contnew.wast',

src/passes/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ FILE(GLOB passes_HEADERS *.h)
1515
set(passes_SOURCES
1616
param-utils.cpp
1717
pass.cpp
18+
string-utils.cpp
1819
test_passes.cpp
1920
AbstractTypeRefining.cpp
2021
AlignmentLowering.cpp
@@ -96,6 +97,7 @@ set(passes_SOURCES
9697
SignaturePruning.cpp
9798
SignatureRefining.cpp
9899
SignExtLowering.cpp
100+
StringLifting.cpp
99101
StringLowering.cpp
100102
Strip.cpp
101103
StripTargetFeatures.cpp

src/passes/StringLifting.cpp

Lines changed: 253 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,253 @@
1+
/*
2+
* Copyright 2025 WebAssembly Community Group participants
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
//
18+
// Lift JS string imports into wasm strings in Binaryen IR, which can then be
19+
// fully optimized. Typically StringLowering would be run later to lower them
20+
// back down.
21+
//
22+
23+
#include "ir/utils.h"
24+
#include "pass.h"
25+
#include "passes/string-utils.h"
26+
#include "support/string.h"
27+
#include "wasm-builder.h"
28+
#include "wasm.h"
29+
30+
namespace wasm {
31+
32+
struct StringLifting : public Pass {
33+
// Maps the global name of an imported string to the actual string.
34+
std::unordered_map<Name, Name> importedStrings;
35+
36+
// Imported string functions. Imports that do not exist remain null.
37+
Name fromCharCodeArrayImport;
38+
Name intoCharCodeArrayImport;
39+
Name fromCodePointImport;
40+
Name concatImport;
41+
Name equalsImport;
42+
Name compareImport;
43+
Name lengthImport;
44+
Name charCodeAtImport;
45+
Name substringImport;
46+
47+
void run(Module* module) override {
48+
// Whether we found any work to do.
49+
bool found = false;
50+
51+
// Imported string constants look like
52+
//
53+
// (import "\'" "bar" (global $string.bar.internal.name (ref extern)))
54+
//
55+
// That is, they are imported from module "'" and the basename is the
56+
// actual string. Find them all so we can apply them.
57+
//
58+
// TODO: parse the strings section for non-UTF16 strings.
59+
for (auto& global : module->globals) {
60+
if (!global->imported()) {
61+
continue;
62+
}
63+
if (global->module == WasmStringConstsModule) {
64+
importedStrings[global->name] = global->base;
65+
found = true;
66+
}
67+
}
68+
69+
auto array16 = Type(Array(Field(Field::i16, Mutable)), Nullable);
70+
auto refExtern = Type(HeapType::ext, NonNullable);
71+
auto externref = Type(HeapType::ext, Nullable);
72+
auto i32 = Type::i32;
73+
74+
// Find imported string functions.
75+
for (auto& func : module->functions) {
76+
if (!func->imported() || func->module != WasmStringsModule) {
77+
continue;
78+
}
79+
auto sig = func->type.getSignature();
80+
if (func->base == "fromCharCodeArray") {
81+
if (sig != Signature({array16, i32, i32}, refExtern)) {
82+
Fatal() << "StringLifting: bad signature for fromCharCodeArray: "
83+
<< sig;
84+
}
85+
fromCharCodeArrayImport = func->name;
86+
found = true;
87+
} else if (func->base == "fromCodePoint") {
88+
if (sig != Signature(i32, refExtern)) {
89+
Fatal() << "StringLifting: bad signature for fromCodePoint: " << sig;
90+
}
91+
fromCodePointImport = func->name;
92+
found = true;
93+
} else if (func->base == "concat") {
94+
if (sig != Signature({externref, externref}, refExtern)) {
95+
Fatal() << "StringLifting: bad signature for concta: " << sig;
96+
}
97+
concatImport = func->name;
98+
found = true;
99+
} else if (func->base == "intoCharCodeArray") {
100+
if (sig != Signature({externref, array16, i32}, i32)) {
101+
Fatal() << "StringLifting: bad signature for intoCharCodeArray: "
102+
<< sig;
103+
}
104+
intoCharCodeArrayImport = func->name;
105+
found = true;
106+
} else if (func->base == "equals") {
107+
if (sig != Signature({externref, externref}, i32)) {
108+
Fatal() << "StringLifting: bad signature for equals: " << sig;
109+
}
110+
equalsImport = func->name;
111+
found = true;
112+
} else if (func->base == "compare") {
113+
if (sig != Signature({externref, externref}, i32)) {
114+
Fatal() << "StringLifting: bad signature for compare: " << sig;
115+
}
116+
compareImport = func->name;
117+
found = true;
118+
} else if (func->base == "length") {
119+
if (sig != Signature({externref}, i32)) {
120+
Fatal() << "StringLifting: bad signature for length: " << sig;
121+
}
122+
lengthImport = func->name;
123+
found = true;
124+
} else if (func->base == "charCodeAt") {
125+
if (sig != Signature({externref, i32}, i32)) {
126+
Fatal() << "StringLifting: bad signature for charCodeAt: " << sig;
127+
}
128+
charCodeAtImport = func->name;
129+
found = true;
130+
} else if (func->base == "substring") {
131+
if (sig != Signature({externref, i32, i32}, refExtern)) {
132+
Fatal() << "StringLifting: bad signature for substring: " << sig;
133+
}
134+
substringImport = func->name;
135+
found = true;
136+
} else {
137+
std::cerr << "warning: unknown strings import: " << func->base << '\n';
138+
}
139+
}
140+
141+
if (!found) {
142+
// Nothing to do.
143+
return;
144+
}
145+
146+
struct StringApplier : public WalkerPass<PostWalker<StringApplier>> {
147+
bool isFunctionParallel() override { return true; }
148+
149+
const StringLifting& parent;
150+
151+
StringApplier(const StringLifting& parent) : parent(parent) {}
152+
153+
std::unique_ptr<Pass> create() override {
154+
return std::make_unique<StringApplier>(parent);
155+
}
156+
157+
bool modified = false;
158+
159+
void visitGlobalGet(GlobalGet* curr) {
160+
// Replace global.gets of imported strings with string.const.
161+
auto iter = parent.importedStrings.find(curr->name);
162+
if (iter != parent.importedStrings.end()) {
163+
// Encode from WTF-8 to WTF-16.
164+
auto wtf8 = iter->second;
165+
std::stringstream wtf16;
166+
bool valid = String::convertWTF8ToWTF16(wtf16, wtf8.str);
167+
if (!valid) {
168+
Fatal() << "Bad string to lift: " << wtf8;
169+
}
170+
171+
replaceCurrent(Builder(*getModule()).makeStringConst(wtf16.str()));
172+
modified = true;
173+
}
174+
}
175+
176+
void visitCall(Call* curr) {
177+
// Replace calls of imported string methods with stringref operations.
178+
if (curr->target == parent.fromCharCodeArrayImport) {
179+
replaceCurrent(Builder(*getModule())
180+
.makeStringNew(StringNewWTF16Array,
181+
curr->operands[0],
182+
curr->operands[1],
183+
curr->operands[2]));
184+
} else if (curr->target == parent.fromCodePointImport) {
185+
replaceCurrent(
186+
Builder(*getModule())
187+
.makeStringNew(StringNewFromCodePoint, curr->operands[0]));
188+
} else if (curr->target == parent.concatImport) {
189+
replaceCurrent(
190+
Builder(*getModule())
191+
.makeStringConcat(curr->operands[0], curr->operands[1]));
192+
} else if (curr->target == parent.intoCharCodeArrayImport) {
193+
replaceCurrent(Builder(*getModule())
194+
.makeStringEncode(StringEncodeWTF16Array,
195+
curr->operands[0],
196+
curr->operands[1],
197+
curr->operands[2]));
198+
} else if (curr->target == parent.equalsImport) {
199+
replaceCurrent(Builder(*getModule())
200+
.makeStringEq(StringEqEqual,
201+
curr->operands[0],
202+
curr->operands[1]));
203+
} else if (curr->target == parent.compareImport) {
204+
replaceCurrent(Builder(*getModule())
205+
.makeStringEq(StringEqCompare,
206+
curr->operands[0],
207+
curr->operands[1]));
208+
} else if (curr->target == parent.lengthImport) {
209+
replaceCurrent(
210+
Builder(*getModule())
211+
.makeStringMeasure(StringMeasureWTF16, curr->operands[0]));
212+
} else if (curr->target == parent.charCodeAtImport) {
213+
replaceCurrent(
214+
Builder(*getModule())
215+
.makeStringWTF16Get(curr->operands[0], curr->operands[1]));
216+
} else if (curr->target == parent.substringImport) {
217+
replaceCurrent(Builder(*getModule())
218+
.makeStringSliceWTF(curr->operands[0],
219+
curr->operands[1],
220+
curr->operands[2]));
221+
}
222+
}
223+
224+
void visitFunction(Function* curr) {
225+
// If we made modifications then we need to refinalize, as we replace
226+
// externrefs with stringrefs, a subtype.
227+
if (modified) {
228+
ReFinalize().walkFunctionInModule(curr, getModule());
229+
}
230+
}
231+
};
232+
233+
StringApplier applier(*this);
234+
applier.run(getPassRunner(), module);
235+
applier.walkModuleCode(module);
236+
237+
// TODO: Add casts. We generate new string.* instructions, and all their
238+
// string inputs should be stringref, not externref, but we have not
239+
// converted all externrefs to stringrefs (since some externrefs might
240+
// be something else). It is not urgent to fix this as the validator
241+
// accepts externrefs there atm, and since toolchains will lower
242+
// strings out at the end anyhow (which would remove such casts). Note
243+
// that if we add a type import for stringref then this problem would
244+
// become a lot simpler (we'd convert that type to stringref).
245+
246+
// Enable the feature so the module validates.
247+
module->features.enable(FeatureSet::Strings);
248+
}
249+
};
250+
251+
Pass* createStringLiftingPass() { return new StringLifting(); }
252+
253+
} // namespace wasm

src/passes/StringLowering.cpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
#include "ir/type-updating.h"
3939
#include "ir/utils.h"
4040
#include "pass.h"
41+
#include "passes/string-utils.h"
4142
#include "support/string.h"
4243
#include "wasm-builder.h"
4344
#include "wasm.h"
@@ -243,7 +244,7 @@ struct StringLowering : public StringGathering {
243244
std::stringstream utf8;
244245
if (useMagicImports &&
245246
String::convertUTF16ToUTF8(utf8, c->string.str)) {
246-
global->module = "'";
247+
global->module = WasmStringConstsModule;
247248
global->base = Name(utf8.str());
248249
} else {
249250
if (assertUTF8) {
@@ -359,9 +360,6 @@ struct StringLowering : public StringGathering {
359360
Name charCodeAtImport;
360361
Name substringImport;
361362

362-
// The name of the module to import string functions from.
363-
Name WasmStringsModule = "wasm:js-string";
364-
365363
// Creates an imported string function, returning its name (which is equal to
366364
// the true name of the import, if there is no conflict).
367365
Name addImport(Module* module, Name trueName, Type params, Type results) {

src/passes/pass.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -518,6 +518,9 @@ void PassRegistry::registerPasses() {
518518
registerPass("string-gathering",
519519
"gathers wasm strings to globals",
520520
createStringGatheringPass);
521+
registerPass("string-lifting",
522+
"lift string imports to wasm strings",
523+
createStringLiftingPass);
521524
registerPass("string-lowering",
522525
"lowers wasm strings and operations to imports",
523526
createStringLoweringPass);

src/passes/passes.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,7 @@ Pass* createSimplifyLocalsNoStructurePass();
161161
Pass* createSimplifyLocalsNoTeeNoStructurePass();
162162
Pass* createStackCheckPass();
163163
Pass* createStringGatheringPass();
164+
Pass* createStringLiftingPass();
164165
Pass* createStringLoweringPass();
165166
Pass* createStringLoweringMagicImportPass();
166167
Pass* createStringLoweringMagicImportAssertPass();

src/passes/string-utils.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
/*
2+
* Copyright 2025 WebAssembly Community Group participants
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#include "passes/string-utils.h"
18+
#include "support/name.h"
19+
20+
namespace wasm {
21+
22+
const Name WasmStringsModule = "wasm:js-string";
23+
24+
const Name WasmStringConstsModule = "'";
25+
26+
} // namespace wasm

src/passes/string-utils.h

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
/*
2+
* Copyright 2025 WebAssembly Community Group participants
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#ifndef wasm_passes_string_utils_h
18+
#define wasm_passes_string_utils_h
19+
20+
#include "support/name.h"
21+
22+
namespace wasm {
23+
24+
// The name of the module to import from, for imported JS strings. See
25+
// https://github.com/WebAssembly/js-string-builtins/blob/main/proposals/js-string-builtins/Overview.md
26+
extern const Name WasmStringsModule;
27+
28+
// The name of the module to import string constants from, for magical imported
29+
// JS strings.
30+
extern const Name WasmStringConstsModule;
31+
32+
} // namespace wasm
33+
34+
#endif // wasm_passes_string_utils_h

0 commit comments

Comments
 (0)