Skip to content

Commit 9bbb657

Browse files
committed
module: detect ESM syntax by trying to recompile as SourceTextModule
Instead of using an async function wrapper, just try compiling code with unknown module format as SourceTextModule when it cannot be compiled as CJS and the error message indicates that it's worth a retry. If it can be parsed as SourceTextModule then it's considered ESM. Also, move shouldRetryAsESM() to C++ completely so that we can reuse it in the CJS module loader for require(esm). Drive-by: move methods that don't belong to ContextifyContext out as static methods and move GetHostDefinedOptions to ModuleWrap.
1 parent 21211a3 commit 9bbb657

File tree

7 files changed

+286
-335
lines changed

7 files changed

+286
-335
lines changed

lib/internal/modules/esm/get_format.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ function getFileProtocolModuleFormat(url, context = { __proto__: null }, ignoreE
114114
// but this gets called again from `defaultLoad`/`defaultLoadSync`.
115115
if (getOptionValue('--experimental-detect-module')) {
116116
const format = source ?
117-
(containsModuleSyntax(`${source}`, fileURLToPath(url)) ? 'module' : 'commonjs') :
117+
(containsModuleSyntax(`${source}`, fileURLToPath(url), url) ? 'module' : 'commonjs') :
118118
null;
119119
if (format === 'module') {
120120
// This module has a .js extension, a package.json with no `type` field, and ESM syntax.
@@ -158,7 +158,7 @@ function getFileProtocolModuleFormat(url, context = { __proto__: null }, ignoreE
158158
if (!source) { return null; }
159159
const format = getFormatOfExtensionlessFile(url);
160160
if (format === 'module') {
161-
return containsModuleSyntax(`${source}`, fileURLToPath(url)) ? 'module' : 'commonjs';
161+
return containsModuleSyntax(`${source}`, fileURLToPath(url), url) ? 'module' : 'commonjs';
162162
}
163163
return format;
164164
}

lib/internal/modules/helpers.js

Lines changed: 0 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,6 @@ const {
1919
} = require('internal/errors').codes;
2020
const { BuiltinModule } = require('internal/bootstrap/realm');
2121

22-
const {
23-
shouldRetryAsESM: contextifyShouldRetryAsESM,
24-
constants: {
25-
syntaxDetectionErrors: {
26-
esmSyntaxErrorMessages,
27-
throwsOnlyInCommonJSErrorMessages,
28-
},
29-
},
30-
} = internalBinding('contextify');
3122
const { validateString } = require('internal/validators');
3223
const fs = require('fs'); // Import all of `fs` so that it can be monkey-patched.
3324
const internalFS = require('internal/fs/utils');
@@ -329,30 +320,6 @@ function normalizeReferrerURL(referrerName) {
329320
}
330321

331322

332-
let esmSyntaxErrorMessagesSet; // Declared lazily in shouldRetryAsESM
333-
let throwsOnlyInCommonJSErrorMessagesSet; // Declared lazily in shouldRetryAsESM
334-
/**
335-
* After an attempt to parse a module as CommonJS throws an error, should we try again as ESM?
336-
* We only want to try again as ESM if the error is due to syntax that is only valid in ESM; and if the CommonJS parse
337-
* throws on an error that would not have been a syntax error in ESM (like via top-level `await` or a lexical
338-
* redeclaration of one of the CommonJS variables) then we need to parse again to see if it would have thrown in ESM.
339-
* @param {string} errorMessage The string message thrown by V8 when attempting to parse as CommonJS
340-
* @param {string} source Module contents
341-
*/
342-
function shouldRetryAsESM(errorMessage, source) {
343-
esmSyntaxErrorMessagesSet ??= new SafeSet(esmSyntaxErrorMessages);
344-
if (esmSyntaxErrorMessagesSet.has(errorMessage)) {
345-
return true;
346-
}
347-
348-
throwsOnlyInCommonJSErrorMessagesSet ??= new SafeSet(throwsOnlyInCommonJSErrorMessages);
349-
if (throwsOnlyInCommonJSErrorMessagesSet.has(errorMessage)) {
350-
return /** @type {boolean} */(contextifyShouldRetryAsESM(source));
351-
}
352-
353-
return false;
354-
}
355-
356323
/**
357324
* @param {string|undefined} url URL to convert to filename
358325
*/
@@ -382,7 +349,6 @@ module.exports = {
382349
loadBuiltinModule,
383350
makeRequireFunction,
384351
normalizeReferrerURL,
385-
shouldRetryAsESM,
386352
stripBOM,
387353
toRealPath,
388354
hasStartedUserCJSExecution() {

lib/internal/modules/run_main.js

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
'use strict';
22

33
const {
4+
ObjectGetPrototypeOf,
5+
SyntaxErrorPrototype,
46
StringPrototypeEndsWith,
57
globalThis,
68
} = primordials;
@@ -159,27 +161,29 @@ function runEntryPointWithESMLoader(callback) {
159161
function executeUserEntryPoint(main = process.argv[1]) {
160162
const resolvedMain = resolveMainPath(main);
161163
const useESMLoader = shouldUseESMLoader(resolvedMain);
162-
164+
let mainURL;
163165
// Unless we know we should use the ESM loader to handle the entry point per the checks in `shouldUseESMLoader`, first
164166
// try to run the entry point via the CommonJS loader; and if that fails under certain conditions, retry as ESM.
165167
let retryAsESM = false;
166168
if (!useESMLoader) {
167169
const cjsLoader = require('internal/modules/cjs/loader');
168170
const { Module } = cjsLoader;
169171
if (getOptionValue('--experimental-detect-module')) {
172+
// TODO(joyeecheung): handle this in the CJS loader. Don't try-catch here.
170173
try {
171174
// Module._load is the monkey-patchable CJS module loader.
172175
Module._load(main, null, true);
173176
} catch (error) {
174-
const source = cjsLoader.entryPointSource;
175-
const { shouldRetryAsESM } = require('internal/modules/helpers');
176-
retryAsESM = shouldRetryAsESM(error.message, source);
177-
// In case the entry point is a large file, such as a bundle,
178-
// ensure no further references can prevent it being garbage-collected.
179-
cjsLoader.entryPointSource = undefined;
177+
if (ObjectGetPrototypeOf(error) === SyntaxErrorPrototype) {
178+
const { shouldRetryAsESM } = internalBinding('contextify');
179+
const mainPath = resolvedMain || main;
180+
mainURL = pathToFileURL(mainPath).href;
181+
retryAsESM = shouldRetryAsESM(error.message, cjsLoader.entryPointSource, mainPath);
182+
// In case the entry point is a large file, such as a bundle,
183+
// ensure no further references can prevent it being garbage-collected.
184+
cjsLoader.entryPointSource = undefined;
185+
}
180186
if (!retryAsESM) {
181-
const { enrichCJSError } = require('internal/modules/esm/translators');
182-
enrichCJSError(error, source, resolvedMain);
183187
throw error;
184188
}
185189
}
@@ -190,7 +194,9 @@ function executeUserEntryPoint(main = process.argv[1]) {
190194

191195
if (useESMLoader || retryAsESM) {
192196
const mainPath = resolvedMain || main;
193-
const mainURL = pathToFileURL(mainPath).href;
197+
if (mainURL === undefined) {
198+
mainURL = pathToFileURL(mainPath).href;
199+
}
194200

195201
runEntryPointWithESMLoader((cascadedLoader) => {
196202
// Note that if the graph contains unsettled TLA, this may never resolve

src/module_wrap.cc

Lines changed: 81 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -142,9 +142,17 @@ v8::Maybe<bool> ModuleWrap::CheckUnsettledTopLevelAwait() {
142142
return v8::Just(false);
143143
}
144144

145-
// new ModuleWrap(url, context, source, lineOffset, columnOffset, cachedData)
145+
Local<PrimitiveArray> ModuleWrap::GetHostDefinedOptions(
146+
Isolate* isolate, Local<Symbol> id_symbol) {
147+
Local<PrimitiveArray> host_defined_options =
148+
PrimitiveArray::New(isolate, HostDefinedOptions::kLength);
149+
host_defined_options->Set(isolate, HostDefinedOptions::kID, id_symbol);
150+
return host_defined_options;
151+
}
152+
153+
// new ModuleWrap(url, context, source, lineOffset, columnOffset[, cachedData]);
146154
// new ModuleWrap(url, context, source, lineOffset, columOffset,
147-
// hostDefinedOption)
155+
// idSymbol);
148156
// new ModuleWrap(url, context, exportNames, evaluationCallback[, cjsModule])
149157
void ModuleWrap::New(const FunctionCallbackInfo<Value>& args) {
150158
CHECK(args.IsConstructCall());
@@ -174,18 +182,17 @@ void ModuleWrap::New(const FunctionCallbackInfo<Value>& args) {
174182
int column_offset = 0;
175183

176184
bool synthetic = args[2]->IsArray();
177-
178-
Local<PrimitiveArray> host_defined_options =
179-
PrimitiveArray::New(isolate, HostDefinedOptions::kLength);
185+
Local<PrimitiveArray> host_defined_options;
180186
Local<Symbol> id_symbol;
181187
if (synthetic) {
182188
// new ModuleWrap(url, context, exportNames, evaluationCallback[,
183189
// cjsModule])
184190
CHECK(args[3]->IsFunction());
185191
} else {
186-
// new ModuleWrap(url, context, source, lineOffset, columOffset, cachedData)
192+
// new ModuleWrap(url, context, source, lineOffset, columOffset[,
193+
// cachedData]);
187194
// new ModuleWrap(url, context, source, lineOffset, columOffset,
188-
// hostDefinedOption)
195+
// idSymbol);
189196
CHECK(args[2]->IsString());
190197
CHECK(args[3]->IsNumber());
191198
line_offset = args[3].As<Int32>()->Value();
@@ -196,7 +203,7 @@ void ModuleWrap::New(const FunctionCallbackInfo<Value>& args) {
196203
} else {
197204
id_symbol = Symbol::New(isolate, url);
198205
}
199-
host_defined_options->Set(isolate, HostDefinedOptions::kID, id_symbol);
206+
host_defined_options = GetHostDefinedOptions(isolate, id_symbol);
200207

201208
if (that->SetPrivate(context,
202209
realm->isolate_data()->host_defined_option_symbol(),
@@ -231,36 +238,32 @@ void ModuleWrap::New(const FunctionCallbackInfo<Value>& args) {
231238
module = Module::CreateSyntheticModule(
232239
isolate, url, span, SyntheticModuleEvaluationStepsCallback);
233240
} else {
234-
ScriptCompiler::CachedData* cached_data = nullptr;
241+
std::optional<v8::ScriptCompiler::CachedData*> user_cached_data;
242+
if (id_symbol !=
243+
realm->isolate_data()->source_text_module_default_hdo()) {
244+
// TODO(joyeecheung): when we are compiling for the default loader, this
245+
// will be std::nullopt, and CompileSourceTextModule() should use
246+
// on-disk cache. See: https://github.com/nodejs/node/issues/47472
247+
user_cached_data = nullptr;
248+
}
235249
if (args[5]->IsArrayBufferView()) {
236250
Local<ArrayBufferView> cached_data_buf = args[5].As<ArrayBufferView>();
237251
uint8_t* data =
238252
static_cast<uint8_t*>(cached_data_buf->Buffer()->Data());
239-
cached_data =
253+
user_cached_data =
240254
new ScriptCompiler::CachedData(data + cached_data_buf->ByteOffset(),
241255
cached_data_buf->ByteLength());
242256
}
243-
244257
Local<String> source_text = args[2].As<String>();
245-
ScriptOrigin origin(isolate,
246-
url,
247-
line_offset,
248-
column_offset,
249-
true, // is cross origin
250-
-1, // script id
251-
Local<Value>(), // source map URL
252-
false, // is opaque (?)
253-
false, // is WASM
254-
true, // is ES Module
255-
host_defined_options);
256-
ScriptCompiler::Source source(source_text, origin, cached_data);
257-
ScriptCompiler::CompileOptions options;
258-
if (source.GetCachedData() == nullptr) {
259-
options = ScriptCompiler::kNoCompileOptions;
260-
} else {
261-
options = ScriptCompiler::kConsumeCodeCache;
262-
}
263-
if (!ScriptCompiler::CompileModule(isolate, &source, options)
258+
bool cache_rejected = false;
259+
if (!CompileSourceTextModule(realm,
260+
source_text,
261+
url,
262+
line_offset,
263+
column_offset,
264+
host_defined_options,
265+
user_cached_data,
266+
&cache_rejected)
264267
.ToLocal(&module)) {
265268
if (try_catch.HasCaught() && !try_catch.HasTerminated()) {
266269
CHECK(!try_catch.Message().IsEmpty());
@@ -273,8 +276,9 @@ void ModuleWrap::New(const FunctionCallbackInfo<Value>& args) {
273276
}
274277
return;
275278
}
276-
if (options == ScriptCompiler::kConsumeCodeCache &&
277-
source.GetCachedData()->rejected) {
279+
280+
if (user_cached_data.has_value() && user_cached_data.value() != nullptr &&
281+
cache_rejected) {
278282
THROW_ERR_VM_MODULE_CACHED_DATA_REJECTED(
279283
realm, "cachedData buffer was rejected");
280284
try_catch.ReThrow();
@@ -317,6 +321,51 @@ void ModuleWrap::New(const FunctionCallbackInfo<Value>& args) {
317321
args.GetReturnValue().Set(that);
318322
}
319323

324+
MaybeLocal<Module> ModuleWrap::CompileSourceTextModule(
325+
Realm* realm,
326+
Local<String> source_text,
327+
Local<String> url,
328+
int line_offset,
329+
int column_offset,
330+
Local<PrimitiveArray> host_defined_options,
331+
std::optional<ScriptCompiler::CachedData*> user_cached_data,
332+
bool* cache_rejected) {
333+
Isolate* isolate = realm->isolate();
334+
EscapableHandleScope scope(isolate);
335+
ScriptOrigin origin(isolate,
336+
url,
337+
line_offset,
338+
column_offset,
339+
true, // is cross origin
340+
-1, // script id
341+
Local<Value>(), // source map URL
342+
false, // is opaque (?)
343+
false, // is WASM
344+
true, // is ES Module
345+
host_defined_options);
346+
ScriptCompiler::CachedData* cached_data = nullptr;
347+
if (user_cached_data.has_value()) {
348+
cached_data = user_cached_data.value();
349+
}
350+
ScriptCompiler::Source source(source_text, origin, cached_data);
351+
ScriptCompiler::CompileOptions options;
352+
if (cached_data == nullptr) {
353+
options = ScriptCompiler::kNoCompileOptions;
354+
} else {
355+
options = ScriptCompiler::kConsumeCodeCache;
356+
}
357+
Local<Module> module;
358+
if (!ScriptCompiler::CompileModule(isolate, &source, options)
359+
.ToLocal(&module)) {
360+
return scope.EscapeMaybe(MaybeLocal<Module>());
361+
}
362+
if (cached_data != nullptr) {
363+
*cache_rejected = source.GetCachedData()->rejected;
364+
}
365+
366+
return scope.Escape(module);
367+
}
368+
320369
static Local<Object> createImportAttributesContainer(
321370
Realm* realm,
322371
Isolate* isolate,

src/module_wrap.h

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,12 @@
33

44
#if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS
55

6-
#include <unordered_map>
6+
#include <optional>
77
#include <string>
8+
#include <unordered_map>
89
#include <vector>
910
#include "base_object.h"
11+
#include "v8-script.h"
1012

1113
namespace node {
1214

@@ -69,6 +71,23 @@ class ModuleWrap : public BaseObject {
6971
return true;
7072
}
7173

74+
static v8::Local<v8::PrimitiveArray> GetHostDefinedOptions(
75+
v8::Isolate* isolate, v8::Local<v8::Symbol> symbol);
76+
77+
// When user_cached_data is not std::nullopt, use the code cache if it's not
78+
// nullptr, otherwise don't use code cache.
79+
// TODO(joyeecheung): when it is std::nullopt, use on-disk cache
80+
// See: https://github.com/nodejs/node/issues/47472
81+
static v8::MaybeLocal<v8::Module> CompileSourceTextModule(
82+
Realm* realm,
83+
v8::Local<v8::String> source_text,
84+
v8::Local<v8::String> url,
85+
int line_offset,
86+
int column_offset,
87+
v8::Local<v8::PrimitiveArray> host_defined_options,
88+
std::optional<v8::ScriptCompiler::CachedData*> user_cached_data,
89+
bool* cache_rejected);
90+
7291
private:
7392
ModuleWrap(Realm* realm,
7493
v8::Local<v8::Object> object,

0 commit comments

Comments
 (0)