module: support ESM detection in the CJS loader

This patch:

1. Adds ESM syntax detection to compileFunctionForCJSLoader()
  for --experimental-detect-module and allow it to emit the
  warning for how to load ESM when it's used to parse ESM as
  CJS but detection is not enabled.
2. Moves the ESM detection of --experimental-detect-module for
  the entrypoint from executeUserEntryPoint() into
  Module.prototype._compile() and handle it directly in the
  CJS loader so that the errors thrown during compilation *and
  execution* during the loading of the entrypoint does not
  need to be bubbled all the way up. If the entrypoint doesn't
  parse as CJS, and detection is enabled, the CJS loader will
  re-load the entrypoint as ESM on the spot asynchronously using
  runEntryPointWithESMLoader() and cascadedLoader.import(). This
  is fine for the entrypoint because unlike require(ESM) we don't
  the namespace of the entrypoint synchronously, and can just
  ignore the returned value. In this case process.mainModule is
  reset to undefined as they are not available for ESM entrypoints.
3. Supports --experimental-detect-module for require(esm).

PR-URL: https://github.com/nodejs/node/pull/52047
Reviewed-By: Geoffrey Booth <webadmin@geoffreybooth.com>
Reviewed-By: Antoine du Hamel <duhamelantoine1995@gmail.com>
pull/52788/head
Joyee Cheung 2024-04-29 22:21:53 +02:00 committed by GitHub
parent d2ebaaa4d2
commit 4d59a9deda
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 207 additions and 130 deletions

View File

@ -58,6 +58,8 @@ module.exports = {
'test/es-module/test-esm-example-loader.js',
'test/es-module/test-esm-type-flag.js',
'test/es-module/test-esm-type-flag-alias.js',
'test/es-module/test-require-module-detect-entry-point.js',
'test/es-module/test-require-module-detect-entry-point-aou.js',
],
parserOptions: { sourceType: 'module' },
},

View File

@ -180,9 +180,12 @@ regarding which files are parsed as ECMAScript modules.
If `--experimental-require-module` is enabled, and the ECMAScript module being
loaded by `require()` meets the following requirements:
* Explicitly marked as an ES module with a `"type": "module"` field in
the closest package.json or a `.mjs` extension.
* Fully synchronous (contains no top-level `await`).
* The module is fully synchronous (contains no top-level `await`); and
* One of these conditions are met:
1. The file has a `.mjs` extension.
2. The file has a `.js` extension, and the closest `package.json` contains `"type": "module"`
3. The file has a `.js` extension, the closest `package.json` does not contain
`"type": "commonjs"`, and `--experimental-detect-module` is enabled.
`require()` will load the requested module as an ES Module, and return
the module name space object. In this case it is similar to dynamic
@ -249,18 +252,27 @@ require(X) from module at path Y
6. LOAD_NODE_MODULES(X, dirname(Y))
7. THROW "not found"
MAYBE_DETECT_AND_LOAD(X)
1. If X parses as a CommonJS module, load X as a CommonJS module. STOP.
2. Else, if `--experimental-require-module` and `--experimental-detect-module` are
enabled, and the source code of X can be parsed as ECMAScript module using
<a href="esm.md#resolver-algorithm-specification">DETECT_MODULE_SYNTAX defined in
the ESM resolver</a>,
a. Load X as an ECMAScript module. STOP.
3. THROW the SyntaxError from attempting to parse X as CommonJS in 1. STOP.
LOAD_AS_FILE(X)
1. If X is a file, load X as its file extension format. STOP
2. If X.js is a file,
a. Find the closest package scope SCOPE to X.
b. If no scope was found, load X.js as a CommonJS module. STOP.
b. If no scope was found
1. MAYBE_DETECT_AND_LOAD(X.js)
c. If the SCOPE/package.json contains "type" field,
1. If the "type" field is "module", load X.js as an ECMAScript module. STOP.
2. Else, load X.js as an CommonJS module. STOP.
2. If the "type" field is "commonjs", load X.js as an CommonJS module. STOP.
d. MAYBE_DETECT_AND_LOAD(X.js)
3. If X.json is a file, load X.json to a JavaScript Object. STOP
4. If X.node is a file, load X.node as binary addon. STOP
5. If X.mjs is a file, and `--experimental-require-module` is enabled,
load X.mjs as an ECMAScript module. STOP
LOAD_INDEX(X)
1. If X/index.js is a file

View File

@ -106,7 +106,6 @@ module.exports = {
kModuleExportNames,
kModuleCircularVisited,
initializeCJS,
entryPointSource: undefined, // Set below.
Module,
wrapSafe,
kIsMainSymbol,
@ -1332,9 +1331,18 @@ function loadESMFromCJS(mod, filename) {
const source = getMaybeCachedSource(mod, filename);
const cascadedLoader = require('internal/modules/esm/loader').getOrInitializeCascadedLoader();
const isMain = mod[kIsMainSymbol];
// TODO(joyeecheung): we may want to invent optional special handling for default exports here.
// For now, it's good enough to be identical to what `import()` returns.
mod.exports = cascadedLoader.importSyncForRequire(mod, filename, source, isMain, mod[kModuleParent]);
if (isMain) {
require('internal/modules/run_main').runEntryPointWithESMLoader((cascadedLoader) => {
const mainURL = pathToFileURL(filename).href;
cascadedLoader.import(mainURL, undefined, { __proto__: null }, true);
});
// ESM won't be accessible via process.mainModule.
setOwnProperty(process, 'mainModule', undefined);
} else {
// TODO(joyeecheung): we may want to invent optional special handling for default exports here.
// For now, it's good enough to be identical to what `import()` returns.
mod.exports = cascadedLoader.importSyncForRequire(mod, filename, source, isMain, mod[kModuleParent]);
}
}
/**
@ -1343,8 +1351,10 @@ function loadESMFromCJS(mod, filename) {
* @param {string} content The content of the file being loaded
* @param {Module} cjsModuleInstance The CommonJS loader instance
* @param {object} codeCache The SEA code cache
* @param {'commonjs'|undefined} format Intended format of the module.
*/
function wrapSafe(filename, content, cjsModuleInstance, codeCache) {
function wrapSafe(filename, content, cjsModuleInstance, codeCache, format) {
assert(format !== 'module'); // ESM should be handled in loadESMFromCJS().
const hostDefinedOptionId = vm_dynamic_import_default_internal;
const importModuleDynamically = vm_dynamic_import_default_internal;
if (patched) {
@ -1374,36 +1384,23 @@ function wrapSafe(filename, content, cjsModuleInstance, codeCache) {
};
}
try {
const result = compileFunctionForCJSLoader(content, filename);
const isMain = !!(cjsModuleInstance && cjsModuleInstance[kIsMainSymbol]);
const shouldDetectModule = (format !== 'commonjs' && getOptionValue('--experimental-detect-module'));
const result = compileFunctionForCJSLoader(content, filename, isMain, shouldDetectModule);
// cachedDataRejected is only set for cache coming from SEA.
if (codeCache &&
result.cachedDataRejected !== false &&
internalBinding('sea').isSea()) {
process.emitWarning('Code cache data rejected.');
}
// Cache the source map for the module if present.
if (result.sourceMapURL) {
maybeCacheSourceMap(filename, content, this, false, undefined, result.sourceMapURL);
}
return result;
} catch (err) {
if (process.mainModule === cjsModuleInstance) {
if (getOptionValue('--experimental-detect-module')) {
// For the main entry point, cache the source to potentially retry as ESM.
module.exports.entryPointSource = content;
} else {
// We only enrich the error (print a warning) if we're sure we're going to for-sure throw it; so if we're
// retrying as ESM, wait until we know whether we're going to retry before calling `enrichCJSError`.
const { enrichCJSError } = require('internal/modules/esm/translators');
enrichCJSError(err, content, filename);
}
}
throw err;
// cachedDataRejected is only set for cache coming from SEA.
if (codeCache &&
result.cachedDataRejected !== false &&
internalBinding('sea').isSea()) {
process.emitWarning('Code cache data rejected.');
}
// Cache the source map for the module if present.
if (result.sourceMapURL) {
maybeCacheSourceMap(filename, content, this, false, undefined, result.sourceMapURL);
}
return result;
}
/**
@ -1411,9 +1408,9 @@ function wrapSafe(filename, content, cjsModuleInstance, codeCache) {
* `exports`) to the file. Returns exception, if any.
* @param {string} content The source code of the module
* @param {string} filename The file path of the module
* @param {boolean} loadAsESM Whether it's known to be ESM via .mjs or "type" in package.json.
* @param {'module'|'commonjs'|undefined} format Intended format of the module.
*/
Module.prototype._compile = function(content, filename, loadAsESM = false) {
Module.prototype._compile = function(content, filename, format) {
let moduleURL;
let redirects;
const manifest = policy()?.manifest;
@ -1423,17 +1420,24 @@ Module.prototype._compile = function(content, filename, loadAsESM = false) {
manifest.assertIntegrity(moduleURL, content);
}
let compiledWrapper;
if (format !== 'module') {
const result = wrapSafe(filename, content, this, undefined, format);
compiledWrapper = result.function;
if (result.canParseAsESM) {
format = 'module';
}
}
// TODO(joyeecheung): when the module is the entry point, consider allowing TLA.
// Only modules being require()'d really need to avoid TLA.
if (loadAsESM) {
if (format === 'module') {
// Pass the source into the .mjs extension handler indirectly through the cache.
this[kModuleSource] = content;
loadESMFromCJS(this, filename);
return;
}
const { function: compiledWrapper } = wrapSafe(filename, content, this);
// TODO(joyeecheung): the detection below is unnecessarily complex. Using the
// kIsMainSymbol, or a kBreakOnStartSymbol that gets passed from
// higher level instead of doing hacky detection here.
@ -1510,12 +1514,13 @@ Module._extensions['.js'] = function(module, filename) {
// If already analyzed the source, then it will be cached.
const content = getMaybeCachedSource(module, filename);
let format;
if (StringPrototypeEndsWith(filename, '.js')) {
const pkg = packageJsonReader.getNearestParentPackageJSON(filename);
// Function require shouldn't be used in ES modules.
if (pkg?.data.type === 'module') {
if (getOptionValue('--experimental-require-module')) {
module._compile(content, filename, true);
module._compile(content, filename, 'module');
return;
}
@ -1549,10 +1554,14 @@ Module._extensions['.js'] = function(module, filename) {
}
}
throw err;
} else if (pkg?.data.type === 'commonjs') {
format = 'commonjs';
}
} else if (StringPrototypeEndsWith(filename, '.cjs')) {
format = 'commonjs';
}
module._compile(content, filename, false);
module._compile(content, filename, format);
};
/**

View File

@ -4,7 +4,6 @@ const {
ArrayPrototypeMap,
Boolean,
JSONParse,
ObjectGetPrototypeOf,
ObjectKeys,
ObjectPrototypeHasOwnProperty,
ReflectApply,
@ -15,7 +14,6 @@ const {
StringPrototypeReplaceAll,
StringPrototypeSlice,
StringPrototypeStartsWith,
SyntaxErrorPrototype,
globalThis: { WebAssembly },
} = primordials;
@ -30,7 +28,6 @@ function lazyTypes() {
}
const {
containsModuleSyntax,
compileFunctionForCJSLoader,
} = internalBinding('contextify');
@ -62,7 +59,6 @@ const {
const { maybeCacheSourceMap } = require('internal/source_map/source_map_cache');
const moduleWrap = internalBinding('module_wrap');
const { ModuleWrap } = moduleWrap;
const { emitWarningSync } = require('internal/process/warning');
// Lazy-loading to avoid circular dependencies.
let getSourceSync;
@ -107,7 +103,6 @@ function initCJSParseSync() {
const translators = new SafeMap();
exports.translators = translators;
exports.enrichCJSError = enrichCJSError;
let DECODER = null;
/**
@ -169,25 +164,6 @@ translators.set('module', function moduleStrategy(url, source, isMain) {
return module;
});
/**
* Provide a more informative error for CommonJS imports.
* @param {Error | any} err
* @param {string} [content] Content of the file, if known.
* @param {string} [filename] The filename of the erroring module.
*/
function enrichCJSError(err, content, filename) {
if (err != null && ObjectGetPrototypeOf(err) === SyntaxErrorPrototype &&
containsModuleSyntax(content, filename)) {
// Emit the warning synchronously because we are in the middle of handling
// a SyntaxError that will throw and likely terminate the process before an
// asynchronous warning would be emitted.
emitWarningSync(
'To load an ES module, set "type": "module" in the package.json or use ' +
'the .mjs extension.',
);
}
}
/**
* Loads a CommonJS module via the ESM Loader sync CommonJS translator.
* This translator creates its own version of the `require` function passed into CommonJS modules.
@ -197,15 +173,11 @@ function enrichCJSError(err, content, filename) {
* @param {string} source - The source code of the module.
* @param {string} url - The URL of the module.
* @param {string} filename - The filename of the module.
* @param {boolean} isMain - Whether the module is the entrypoint
*/
function loadCJSModule(module, source, url, filename) {
let compileResult;
try {
compileResult = compileFunctionForCJSLoader(source, filename);
} catch (err) {
enrichCJSError(err, source, filename);
throw err;
}
function loadCJSModule(module, source, url, filename, isMain) {
const compileResult = compileFunctionForCJSLoader(source, filename, isMain, false);
// Cache the source map for the cjs module if present.
if (compileResult.sourceMapURL) {
maybeCacheSourceMap(url, source, null, false, undefined, compileResult.sourceMapURL);
@ -283,7 +255,7 @@ function createCJSModuleWrap(url, source, isMain, loadCJS = loadCJSModule) {
debug(`Loading CJSModule ${url}`);
if (!module.loaded) {
loadCJS(module, source, url, filename);
loadCJS(module, source, url, filename, !!isMain);
}
let exports;
@ -315,9 +287,10 @@ translators.set('commonjs-sync', function requireCommonJS(url, source, isMain) {
initCJSParseSync();
assert(!isMain); // This is only used by imported CJS modules.
return createCJSModuleWrap(url, source, isMain, (module, source, url, filename) => {
return createCJSModuleWrap(url, source, isMain, (module, source, url, filename, isMain) => {
assert(module === CJSModule._cache[filename]);
CJSModule._load(filename);
assert(!isMain);
CJSModule._load(filename, null, isMain);
});
});
@ -340,14 +313,9 @@ translators.set('commonjs', async function commonjsStrategy(url, source,
// For backward-compatibility, it's possible to return a nullish value for
// CJS source associated with a file: URL. In this case, the source is
// obtained by calling the monkey-patchable CJS loader.
const cjsLoader = source == null ? (module, source, url, filename) => {
try {
assert(module === CJSModule._cache[filename]);
CJSModule._load(filename);
} catch (err) {
enrichCJSError(err, source, filename);
throw err;
}
const cjsLoader = source == null ? (module, source, url, filename, isMain) => {
assert(module === CJSModule._cache[filename]);
CJSModule._load(filename, undefined, isMain);
} : loadCJSModule;
try {

View File

@ -1,9 +1,7 @@
'use strict';
const {
ObjectGetPrototypeOf,
StringPrototypeEndsWith,
SyntaxErrorPrototype,
globalThis,
} = primordials;
@ -164,35 +162,11 @@ function executeUserEntryPoint(main = process.argv[1]) {
let mainURL;
// Unless we know we should use the ESM loader to handle the entry point per the checks in `shouldUseESMLoader`, first
// try to run the entry point via the CommonJS loader; and if that fails under certain conditions, retry as ESM.
let retryAsESM = false;
if (!useESMLoader) {
const cjsLoader = require('internal/modules/cjs/loader');
const { Module } = cjsLoader;
if (getOptionValue('--experimental-detect-module')) {
// TODO(joyeecheung): handle this in the CJS loader. Don't try-catch here.
try {
// Module._load is the monkey-patchable CJS module loader.
Module._load(main, null, true);
} catch (error) {
if (error != null && ObjectGetPrototypeOf(error) === SyntaxErrorPrototype) {
const { shouldRetryAsESM } = internalBinding('contextify');
const mainPath = resolvedMain || main;
mainURL = pathToFileURL(mainPath).href;
retryAsESM = shouldRetryAsESM(error.message, cjsLoader.entryPointSource, mainURL);
// In case the entry point is a large file, such as a bundle,
// ensure no further references can prevent it being garbage-collected.
cjsLoader.entryPointSource = undefined;
}
if (!retryAsESM) {
throw error;
}
}
} else { // `--experimental-detect-module` is not passed
Module._load(main, null, true);
}
}
if (useESMLoader || retryAsESM) {
Module._load(main, null, true);
} else {
const mainPath = resolvedMain || main;
if (mainURL === undefined) {
mainURL = pathToFileURL(mainPath).href;

View File

@ -28,8 +28,10 @@
#include "node_errors.h"
#include "node_external_reference.h"
#include "node_internals.h"
#include "node_process.h"
#include "node_sea.h"
#include "node_snapshot_builder.h"
#include "node_url.h"
#include "node_watchdog.h"
#include "util-inl.h"
@ -56,6 +58,7 @@ using v8::Maybe;
using v8::MaybeLocal;
using v8::MeasureMemoryExecution;
using v8::MeasureMemoryMode;
using v8::Message;
using v8::MicrotaskQueue;
using v8::MicrotasksPolicy;
using v8::Name;
@ -1483,50 +1486,109 @@ static MaybeLocal<Function> CompileFunctionForCJSLoader(Environment* env,
return scope.Escape(fn);
}
static bool warned_about_require_esm = false;
// TODO(joyeecheung): this was copied from the warning previously emitted in the
// JS land, but it's not very helpful. There should be specific information
// about which file or which package.json to update.
const char* require_esm_warning =
"To load an ES module, set \"type\": \"module\" in the package.json or use "
"the .mjs extension.";
static bool ShouldRetryAsESM(Realm* realm,
Local<String> message,
Local<String> code,
Local<String> resource_name);
static void CompileFunctionForCJSLoader(
const FunctionCallbackInfo<Value>& args) {
CHECK(args[0]->IsString());
CHECK(args[1]->IsString());
CHECK(args[2]->IsBoolean());
CHECK(args[3]->IsBoolean());
Local<String> code = args[0].As<String>();
Local<String> filename = args[1].As<String>();
bool should_detect_module = args[3].As<Boolean>()->Value();
Isolate* isolate = args.GetIsolate();
Local<Context> context = isolate->GetCurrentContext();
Environment* env = Environment::GetCurrent(context);
Realm* realm = Realm::GetCurrent(context);
Environment* env = realm->env();
bool cache_rejected = false;
Local<Function> fn;
Local<Value> cjs_exception;
Local<Message> cjs_message;
{
ShouldNotAbortOnUncaughtScope no_abort_scope(realm->env());
TryCatchScope try_catch(env);
if (!CompileFunctionForCJSLoader(
env, context, code, filename, &cache_rejected)
.ToLocal(&fn)) {
CHECK(try_catch.HasCaught());
CHECK(!try_catch.HasTerminated());
errors::DecorateErrorStack(env, try_catch);
try_catch.ReThrow();
cjs_exception = try_catch.Exception();
cjs_message = try_catch.Message();
errors::DecorateErrorStack(env, cjs_exception, cjs_message);
}
}
bool can_parse_as_esm = false;
if (!cjs_exception.IsEmpty()) {
// Use the URL to match what would be used in the origin if it's going to
// be reparsed as ESM.
Utf8Value filename_utf8(isolate, filename);
std::string url = url::FromFilePath(filename_utf8.ToStringView());
Local<String> url_value;
if (!String::NewFromUtf8(isolate, url.c_str()).ToLocal(&url_value)) {
return;
}
can_parse_as_esm =
ShouldRetryAsESM(realm, cjs_message->Get(), code, url_value);
if (!can_parse_as_esm) {
// The syntax error is not related to ESM, throw the original error.
isolate->ThrowException(cjs_exception);
return;
}
if (!should_detect_module) {
bool should_throw = true;
if (!warned_about_require_esm) {
// This needs to call process.emit('warning') in JS which can throw if
// the user listener throws. In that case, don't try to throw the syntax
// error.
should_throw =
ProcessEmitWarningSync(env, require_esm_warning).IsJust();
}
if (should_throw) {
isolate->ThrowException(cjs_exception);
}
return;
}
}
Local<Value> undefined = v8::Undefined(isolate);
std::vector<Local<Name>> names = {
env->cached_data_rejected_string(),
env->source_map_url_string(),
env->function_string(),
FIXED_ONE_BYTE_STRING(isolate, "canParseAsESM"),
};
std::vector<Local<Value>> values = {
Boolean::New(isolate, cache_rejected),
fn->GetScriptOrigin().SourceMapUrl(),
fn,
fn.IsEmpty() ? undefined : fn->GetScriptOrigin().SourceMapUrl(),
fn.IsEmpty() ? undefined : fn.As<Value>(),
Boolean::New(isolate, can_parse_as_esm),
};
Local<Object> result = Object::New(
isolate, v8::Null(isolate), names.data(), values.data(), names.size());
args.GetReturnValue().Set(result);
}
static bool ShouldRetryAsESM(Realm* realm,
Local<String> message,
Local<String> code,
Local<String> resource_name) {
bool ShouldRetryAsESM(Realm* realm,
Local<String> message,
Local<String> code,
Local<String> resource_name) {
Isolate* isolate = realm->isolate();
Utf8Value message_value(isolate, message);
@ -1560,7 +1622,7 @@ static bool ShouldRetryAsESM(Realm* realm,
Local<PrimitiveArray> hdo = loader::ModuleWrap::GetHostDefinedOptions(
isolate, realm->isolate_data()->source_text_module_default_hdo());
if (loader::ModuleWrap::CompileSourceTextModule(
realm, code, resource_name, 0, 0, hdo, nullptr, &cache_rejected)
realm, code, resource_name, 0, 0, hdo, std::nullopt, &cache_rejected)
.ToLocal(&module)) {
return true;
}

View File

@ -1138,15 +1138,19 @@ void Initialize(Local<Object> target,
void DecorateErrorStack(Environment* env,
const errors::TryCatchScope& try_catch) {
Local<Value> exception = try_catch.Exception();
DecorateErrorStack(env, try_catch.Exception(), try_catch.Message());
}
void DecorateErrorStack(Environment* env,
Local<Value> exception,
Local<Message> message) {
if (!exception->IsObject()) return;
Local<Object> err_obj = exception.As<Object>();
if (IsExceptionDecorated(env, err_obj)) return;
AppendExceptionLine(env, exception, try_catch.Message(), CONTEXTIFY_ERROR);
AppendExceptionLine(env, exception, message, CONTEXTIFY_ERROR);
TryCatchScope try_catch_scope(env); // Ignore exceptions below.
MaybeLocal<Value> stack = err_obj->Get(env->context(), env->stack_string());
MaybeLocal<Value> maybe_value =

View File

@ -295,6 +295,9 @@ void PerIsolateMessageListener(v8::Local<v8::Message> message,
void DecorateErrorStack(Environment* env,
const errors::TryCatchScope& try_catch);
void DecorateErrorStack(Environment* env,
v8::Local<v8::Value> error,
v8::Local<v8::Message> message);
class PrinterTryCatch : public v8::TryCatch {
public:

View File

@ -0,0 +1,7 @@
// Flags: --experimental-require-module --experimental-detect-module --abort-on-uncaught-exception
import { mustCall } from '../common/index.mjs';
const fn = mustCall(() => {
console.log('hello');
});
fn();

View File

@ -0,0 +1,7 @@
// Flags: --experimental-require-module --experimental-detect-module
import { mustCall } from '../common/index.mjs';
const fn = mustCall(() => {
console.log('hello');
});
fn();

View File

@ -0,0 +1,11 @@
// Flags: --experimental-require-module --experimental-detect-module
'use strict';
require('../common');
const assert = require('assert');
assert.throws(() => {
require('../fixtures/es-modules/es-note-unexpected-export-1.cjs');
}, {
message: /Unexpected token 'export'/
});

View File

@ -0,0 +1,18 @@
// Flags: --experimental-require-module --experimental-detect-module
'use strict';
require('../common');
const assert = require('assert');
const { isModuleNamespaceObject } = require('util/types');
{
const mod = require('../fixtures/es-modules/loose.js');
assert.deepStrictEqual({ ...mod }, { default: 'module' });
assert(isModuleNamespaceObject(mod));
}
{
const mod = require('../fixtures/es-modules/package-without-type/noext-esm');
assert.deepStrictEqual({ ...mod }, { default: 'module' });
assert(isModuleNamespaceObject(mod));
}