|
| 1 | +/* |
| 2 | + * Copyright 2025 WebAssembly Community Group participants |
| 3 | + * |
| 4 | + * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | + * you may not use this file except in compliance with the License. |
| 6 | + * You may obtain a copy of the License at |
| 7 | + * |
| 8 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | + * |
| 10 | + * Unless required by applicable law or agreed to in writing, software |
| 11 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | + * See the License for the specific language governing permissions and |
| 14 | + * limitations under the License. |
| 15 | + */ |
| 16 | + |
| 17 | +// |
| 18 | +// Lift JS string imports into wasm strings in Binaryen IR, which can then be |
| 19 | +// fully optimized. Typically StringLowering would be run later to lower them |
| 20 | +// back down. |
| 21 | +// |
| 22 | + |
| 23 | +#include "ir/utils.h" |
| 24 | +#include "pass.h" |
| 25 | +#include "passes/string-utils.h" |
| 26 | +#include "support/string.h" |
| 27 | +#include "wasm-builder.h" |
| 28 | +#include "wasm.h" |
| 29 | + |
| 30 | +namespace wasm { |
| 31 | + |
| 32 | +struct StringLifting : public Pass { |
| 33 | + // Maps the global name of an imported string to the actual string. |
| 34 | + std::unordered_map<Name, Name> importedStrings; |
| 35 | + |
| 36 | + // Imported string functions. Imports that do not exist remain null. |
| 37 | + Name fromCharCodeArrayImport; |
| 38 | + Name intoCharCodeArrayImport; |
| 39 | + Name fromCodePointImport; |
| 40 | + Name concatImport; |
| 41 | + Name equalsImport; |
| 42 | + Name compareImport; |
| 43 | + Name lengthImport; |
| 44 | + Name charCodeAtImport; |
| 45 | + Name substringImport; |
| 46 | + |
| 47 | + void run(Module* module) override { |
| 48 | + // Whether we found any work to do. |
| 49 | + bool found = false; |
| 50 | + |
| 51 | + // Imported string constants look like |
| 52 | + // |
| 53 | + // (import "\'" "bar" (global $string.bar.internal.name (ref extern))) |
| 54 | + // |
| 55 | + // That is, they are imported from module "'" and the basename is the |
| 56 | + // actual string. Find them all so we can apply them. |
| 57 | + // |
| 58 | + // TODO: parse the strings section for non-UTF16 strings. |
| 59 | + for (auto& global : module->globals) { |
| 60 | + if (!global->imported()) { |
| 61 | + continue; |
| 62 | + } |
| 63 | + if (global->module == WasmStringConstsModule) { |
| 64 | + importedStrings[global->name] = global->base; |
| 65 | + found = true; |
| 66 | + } |
| 67 | + } |
| 68 | + |
| 69 | + auto array16 = Type(Array(Field(Field::i16, Mutable)), Nullable); |
| 70 | + auto refExtern = Type(HeapType::ext, NonNullable); |
| 71 | + auto externref = Type(HeapType::ext, Nullable); |
| 72 | + auto i32 = Type::i32; |
| 73 | + |
| 74 | + // Find imported string functions. |
| 75 | + for (auto& func : module->functions) { |
| 76 | + if (!func->imported() || func->module != WasmStringsModule) { |
| 77 | + continue; |
| 78 | + } |
| 79 | + auto sig = func->type.getSignature(); |
| 80 | + if (func->base == "fromCharCodeArray") { |
| 81 | + if (sig != Signature({array16, i32, i32}, refExtern)) { |
| 82 | + Fatal() << "StringLifting: bad signature for fromCharCodeArray: " |
| 83 | + << sig; |
| 84 | + } |
| 85 | + fromCharCodeArrayImport = func->name; |
| 86 | + found = true; |
| 87 | + } else if (func->base == "fromCodePoint") { |
| 88 | + if (sig != Signature(i32, refExtern)) { |
| 89 | + Fatal() << "StringLifting: bad signature for fromCodePoint: " << sig; |
| 90 | + } |
| 91 | + fromCodePointImport = func->name; |
| 92 | + found = true; |
| 93 | + } else if (func->base == "concat") { |
| 94 | + if (sig != Signature({externref, externref}, refExtern)) { |
| 95 | + Fatal() << "StringLifting: bad signature for concta: " << sig; |
| 96 | + } |
| 97 | + concatImport = func->name; |
| 98 | + found = true; |
| 99 | + } else if (func->base == "intoCharCodeArray") { |
| 100 | + if (sig != Signature({externref, array16, i32}, i32)) { |
| 101 | + Fatal() << "StringLifting: bad signature for intoCharCodeArray: " |
| 102 | + << sig; |
| 103 | + } |
| 104 | + intoCharCodeArrayImport = func->name; |
| 105 | + found = true; |
| 106 | + } else if (func->base == "equals") { |
| 107 | + if (sig != Signature({externref, externref}, i32)) { |
| 108 | + Fatal() << "StringLifting: bad signature for equals: " << sig; |
| 109 | + } |
| 110 | + equalsImport = func->name; |
| 111 | + found = true; |
| 112 | + } else if (func->base == "compare") { |
| 113 | + if (sig != Signature({externref, externref}, i32)) { |
| 114 | + Fatal() << "StringLifting: bad signature for compare: " << sig; |
| 115 | + } |
| 116 | + compareImport = func->name; |
| 117 | + found = true; |
| 118 | + } else if (func->base == "length") { |
| 119 | + if (sig != Signature({externref}, i32)) { |
| 120 | + Fatal() << "StringLifting: bad signature for length: " << sig; |
| 121 | + } |
| 122 | + lengthImport = func->name; |
| 123 | + found = true; |
| 124 | + } else if (func->base == "charCodeAt") { |
| 125 | + if (sig != Signature({externref, i32}, i32)) { |
| 126 | + Fatal() << "StringLifting: bad signature for charCodeAt: " << sig; |
| 127 | + } |
| 128 | + charCodeAtImport = func->name; |
| 129 | + found = true; |
| 130 | + } else if (func->base == "substring") { |
| 131 | + if (sig != Signature({externref, i32, i32}, refExtern)) { |
| 132 | + Fatal() << "StringLifting: bad signature for substring: " << sig; |
| 133 | + } |
| 134 | + substringImport = func->name; |
| 135 | + found = true; |
| 136 | + } else { |
| 137 | + std::cerr << "warning: unknown strings import: " << func->base << '\n'; |
| 138 | + } |
| 139 | + } |
| 140 | + |
| 141 | + if (!found) { |
| 142 | + // Nothing to do. |
| 143 | + return; |
| 144 | + } |
| 145 | + |
| 146 | + struct StringApplier : public WalkerPass<PostWalker<StringApplier>> { |
| 147 | + bool isFunctionParallel() override { return true; } |
| 148 | + |
| 149 | + const StringLifting& parent; |
| 150 | + |
| 151 | + StringApplier(const StringLifting& parent) : parent(parent) {} |
| 152 | + |
| 153 | + std::unique_ptr<Pass> create() override { |
| 154 | + return std::make_unique<StringApplier>(parent); |
| 155 | + } |
| 156 | + |
| 157 | + bool modified = false; |
| 158 | + |
| 159 | + void visitGlobalGet(GlobalGet* curr) { |
| 160 | + // Replace global.gets of imported strings with string.const. |
| 161 | + auto iter = parent.importedStrings.find(curr->name); |
| 162 | + if (iter != parent.importedStrings.end()) { |
| 163 | + // Encode from WTF-8 to WTF-16. |
| 164 | + auto wtf8 = iter->second; |
| 165 | + std::stringstream wtf16; |
| 166 | + bool valid = String::convertWTF8ToWTF16(wtf16, wtf8.str); |
| 167 | + if (!valid) { |
| 168 | + Fatal() << "Bad string to lift: " << wtf8; |
| 169 | + } |
| 170 | + |
| 171 | + replaceCurrent(Builder(*getModule()).makeStringConst(wtf16.str())); |
| 172 | + modified = true; |
| 173 | + } |
| 174 | + } |
| 175 | + |
| 176 | + void visitCall(Call* curr) { |
| 177 | + // Replace calls of imported string methods with stringref operations. |
| 178 | + if (curr->target == parent.fromCharCodeArrayImport) { |
| 179 | + replaceCurrent(Builder(*getModule()) |
| 180 | + .makeStringNew(StringNewWTF16Array, |
| 181 | + curr->operands[0], |
| 182 | + curr->operands[1], |
| 183 | + curr->operands[2])); |
| 184 | + } else if (curr->target == parent.fromCodePointImport) { |
| 185 | + replaceCurrent( |
| 186 | + Builder(*getModule()) |
| 187 | + .makeStringNew(StringNewFromCodePoint, curr->operands[0])); |
| 188 | + } else if (curr->target == parent.concatImport) { |
| 189 | + replaceCurrent( |
| 190 | + Builder(*getModule()) |
| 191 | + .makeStringConcat(curr->operands[0], curr->operands[1])); |
| 192 | + } else if (curr->target == parent.intoCharCodeArrayImport) { |
| 193 | + replaceCurrent(Builder(*getModule()) |
| 194 | + .makeStringEncode(StringEncodeWTF16Array, |
| 195 | + curr->operands[0], |
| 196 | + curr->operands[1], |
| 197 | + curr->operands[2])); |
| 198 | + } else if (curr->target == parent.equalsImport) { |
| 199 | + replaceCurrent(Builder(*getModule()) |
| 200 | + .makeStringEq(StringEqEqual, |
| 201 | + curr->operands[0], |
| 202 | + curr->operands[1])); |
| 203 | + } else if (curr->target == parent.compareImport) { |
| 204 | + replaceCurrent(Builder(*getModule()) |
| 205 | + .makeStringEq(StringEqCompare, |
| 206 | + curr->operands[0], |
| 207 | + curr->operands[1])); |
| 208 | + } else if (curr->target == parent.lengthImport) { |
| 209 | + replaceCurrent( |
| 210 | + Builder(*getModule()) |
| 211 | + .makeStringMeasure(StringMeasureWTF16, curr->operands[0])); |
| 212 | + } else if (curr->target == parent.charCodeAtImport) { |
| 213 | + replaceCurrent( |
| 214 | + Builder(*getModule()) |
| 215 | + .makeStringWTF16Get(curr->operands[0], curr->operands[1])); |
| 216 | + } else if (curr->target == parent.substringImport) { |
| 217 | + replaceCurrent(Builder(*getModule()) |
| 218 | + .makeStringSliceWTF(curr->operands[0], |
| 219 | + curr->operands[1], |
| 220 | + curr->operands[2])); |
| 221 | + } |
| 222 | + } |
| 223 | + |
| 224 | + void visitFunction(Function* curr) { |
| 225 | + // If we made modifications then we need to refinalize, as we replace |
| 226 | + // externrefs with stringrefs, a subtype. |
| 227 | + if (modified) { |
| 228 | + ReFinalize().walkFunctionInModule(curr, getModule()); |
| 229 | + } |
| 230 | + } |
| 231 | + }; |
| 232 | + |
| 233 | + StringApplier applier(*this); |
| 234 | + applier.run(getPassRunner(), module); |
| 235 | + applier.walkModuleCode(module); |
| 236 | + |
| 237 | + // TODO: Add casts. We generate new string.* instructions, and all their |
| 238 | + // string inputs should be stringref, not externref, but we have not |
| 239 | + // converted all externrefs to stringrefs (since some externrefs might |
| 240 | + // be something else). It is not urgent to fix this as the validator |
| 241 | + // accepts externrefs there atm, and since toolchains will lower |
| 242 | + // strings out at the end anyhow (which would remove such casts). Note |
| 243 | + // that if we add a type import for stringref then this problem would |
| 244 | + // become a lot simpler (we'd convert that type to stringref). |
| 245 | + |
| 246 | + // Enable the feature so the module validates. |
| 247 | + module->features.enable(FeatureSet::Strings); |
| 248 | + } |
| 249 | +}; |
| 250 | + |
| 251 | +Pass* createStringLiftingPass() { return new StringLifting(); } |
| 252 | + |
| 253 | +} // namespace wasm |
0 commit comments