From 10099bb3f7fd97bb9dd9667188426866b3098e07 Mon Sep 17 00:00:00 2001 From: Joyee Cheung Date: Fri, 5 Jul 2024 21:58:35 +0200 Subject: [PATCH] src: refactor embedded entrypoint loading This patch: 1. Refactor the routines used to compile and run an embedder entrypoint. In JS land special handling for SEA is done directly in main/embedding.js instead of clobbering the CJS loader. Add warnings to remind users that currently the require() in SEA bundled scripts only supports loading builtins. 2. Don't use the bundled SEA code cache when compiling CJS loaded from disk, since in that case we are certainly not compiling the code bundled into the SEA. Use a is_sea_main flag in CompileFunctionForCJSLoader() (which replaces an unused argument) to pass this into the C++ land - the code cache is still read directly from C++ to avoid the overhead of ArrayBuffer creation. 3. Move SEA loading code into MaybeLoadSingleExecutableApplication() which calls LoadEnvironment() with its own StartExecutionCallback(). This avoids more hidden switches in StartExecution() and make them explicit. Also add some TODOs about how to support ESM in embedded applications. 4. Add more comments PR-URL: https://github.com/nodejs/node/pull/53573 Reviewed-By: Geoffrey Booth Reviewed-By: Chengzhong Wu Reviewed-By: Matteo Collina Reviewed-By: James M Snell --- lib/internal/main/check_syntax.js | 2 +- lib/internal/main/embedding.js | 105 +++++++++++++++++++++++- lib/internal/modules/cjs/loader.js | 17 +--- lib/internal/modules/esm/translators.js | 2 +- lib/internal/util/embedding.js | 53 ------------ src/node.cc | 8 ++ src/node_contextify.cc | 60 ++++++++------ src/node_main_instance.cc | 15 +--- src/node_sea.cc | 62 +++++++++----- src/node_sea.h | 7 ++ test/fixtures/sea.js | 12 ++- 11 files changed, 209 insertions(+), 134 deletions(-) delete mode 100644 lib/internal/util/embedding.js diff --git a/lib/internal/main/check_syntax.js b/lib/internal/main/check_syntax.js index 5a7ab5dc19e..aa14dca8999 100644 --- a/lib/internal/main/check_syntax.js +++ b/lib/internal/main/check_syntax.js @@ -75,5 +75,5 @@ async function checkSyntax(source, filename) { return; } - wrapSafe(filename, source); + wrapSafe(filename, source, undefined, 'commonjs'); } diff --git a/lib/internal/main/embedding.js b/lib/internal/main/embedding.js index cc7cb0eee9d..e547e77e909 100644 --- a/lib/internal/main/embedding.js +++ b/lib/internal/main/embedding.js @@ -1,15 +1,116 @@ 'use strict'; + +// This main script is currently only run when LoadEnvironment() +// is run with a non-null StartExecutionCallback or a UTF8 +// main script. Effectively there are two cases where this happens: +// 1. It's a single-executable application *loading* a main script +// bundled into the executable. This is currently done from +// NodeMainInstance::Run(). +// 2. It's an embedder application and LoadEnvironment() is invoked +// as described above. + const { prepareMainThreadExecution, } = require('internal/process/pre_execution'); -const { isExperimentalSeaWarningNeeded } = internalBinding('sea'); +const { isExperimentalSeaWarningNeeded, isSea } = internalBinding('sea'); const { emitExperimentalWarning } = require('internal/util'); -const { embedderRequire, embedderRunCjs } = require('internal/util/embedding'); +const { emitWarningSync } = require('internal/process/warning'); +const { BuiltinModule: { normalizeRequirableId } } = require('internal/bootstrap/realm'); +const { Module } = require('internal/modules/cjs/loader'); +const { compileFunctionForCJSLoader } = internalBinding('contextify'); +const { maybeCacheSourceMap } = require('internal/source_map/source_map_cache'); +const { codes: { + ERR_UNKNOWN_BUILTIN_MODULE, +} } = require('internal/errors'); + +// Don't expand process.argv[1] because in a single-executable application or an +// embedder application, the user main script isn't necessarily provided via the +// command line (e.g. it could be provided via an API or bundled into the executable). prepareMainThreadExecution(false, true); +const isLoadingSea = isSea(); if (isExperimentalSeaWarningNeeded()) { emitExperimentalWarning('Single executable application'); } +// This is roughly the same as: +// +// const mod = new Module(filename); +// mod._compile(content, filename); +// +// but the code has been duplicated because currently there is no way to set the +// value of require.main to module. +// +// TODO(RaisinTen): Find a way to deduplicate this. +function embedderRunCjs(content) { + // The filename of the module (used for CJS module lookup) + // is always the same as the location of the executable itself + // at the time of the loading (which means it changes depending + // on where the executable is in the file system). + const filename = process.execPath; + const customModule = new Module(filename, null); + + const { + function: compiledWrapper, + cachedDataRejected, + sourceMapURL, + } = compileFunctionForCJSLoader( + content, + filename, + isLoadingSea, // is_sea_main + false, // should_detect_module, ESM should be supported differently for embedded code + ); + // Cache the source map for the module if present. + if (sourceMapURL) { + maybeCacheSourceMap( + filename, + content, + customModule, + false, // isGeneratedSource + undefined, // sourceURL, TODO(joyeecheung): should be extracted by V8 + sourceMapURL, + ); + } + + // cachedDataRejected is only set if cache from SEA is used. + if (cachedDataRejected !== false && isLoadingSea) { + emitWarningSync('Code cache data rejected.'); + } + + // Patch the module to make it look almost like a regular CJS module + // instance. + customModule.filename = process.execPath; + customModule.paths = Module._nodeModulePaths(process.execPath); + embedderRequire.main = customModule; + + return compiledWrapper( + customModule.exports, // exports + embedderRequire, // require + customModule, // module + process.execPath, // __filename + customModule.path, // __dirname + ); +} + +let warnedAboutBuiltins = false; + +function embedderRequire(id) { + const normalizedId = normalizeRequirableId(id); + if (!normalizedId) { + if (isLoadingSea && !warnedAboutBuiltins) { + emitWarningSync( + 'Currently the require() provided to the main script embedded into ' + + 'single-executable applications only supports loading built-in modules.\n' + + 'To load a module from disk after the single executable application is ' + + 'launched, use require("module").createRequire().\n' + + 'Support for bundled module loading or virtual file systems are under ' + + 'discussions in https://github.com/nodejs/single-executable'); + warnedAboutBuiltins = true; + } + throw new ERR_UNKNOWN_BUILTIN_MODULE(id); + } + return require(normalizedId); +} + return [process, embedderRequire, embedderRunCjs]; diff --git a/lib/internal/modules/cjs/loader.js b/lib/internal/modules/cjs/loader.js index 61286fe760b..040af4ace67 100644 --- a/lib/internal/modules/cjs/loader.js +++ b/lib/internal/modules/cjs/loader.js @@ -1343,11 +1343,10 @@ function loadESMFromCJS(mod, filename) { * Wraps the given content in a script and runs it in a new context. * @param {string} filename The name of the file being loaded * @param {string} content The content of the file being loaded - * @param {Module} cjsModuleInstance The CommonJS loader instance - * @param {object} codeCache The SEA code cache + * @param {Module|undefined} cjsModuleInstance The CommonJS loader instance * @param {'commonjs'|undefined} format Intended format of the module. */ -function wrapSafe(filename, content, cjsModuleInstance, codeCache, format) { +function wrapSafe(filename, content, cjsModuleInstance, format) { assert(format !== 'module'); // ESM should be handled in loadESMFromCJS(). const hostDefinedOptionId = vm_dynamic_import_default_internal; const importModuleDynamically = vm_dynamic_import_default_internal; @@ -1378,16 +1377,8 @@ function wrapSafe(filename, content, cjsModuleInstance, codeCache, format) { }; } - const isMain = !!(cjsModuleInstance && cjsModuleInstance[kIsMainSymbol]); const shouldDetectModule = (format !== 'commonjs' && getOptionValue('--experimental-detect-module')); - const result = compileFunctionForCJSLoader(content, filename, isMain, shouldDetectModule); - - // cachedDataRejected is only set for cache coming from SEA. - if (codeCache && - result.cachedDataRejected !== false && - internalBinding('sea').isSea()) { - process.emitWarning('Code cache data rejected.'); - } + const result = compileFunctionForCJSLoader(content, filename, false /* is_sea_main */, shouldDetectModule); // Cache the source map for the module if present. if (result.sourceMapURL) { @@ -1409,7 +1400,7 @@ Module.prototype._compile = function(content, filename, format) { let compiledWrapper; if (format !== 'module') { - const result = wrapSafe(filename, content, this, undefined, format); + const result = wrapSafe(filename, content, this, format); compiledWrapper = result.function; if (result.canParseAsESM) { format = 'module'; diff --git a/lib/internal/modules/esm/translators.js b/lib/internal/modules/esm/translators.js index 809792a90a6..48d86f611d9 100644 --- a/lib/internal/modules/esm/translators.js +++ b/lib/internal/modules/esm/translators.js @@ -177,7 +177,7 @@ translators.set('module', function moduleStrategy(url, source, isMain) { * @param {boolean} isMain - Whether the module is the entrypoint */ function loadCJSModule(module, source, url, filename, isMain) { - const compileResult = compileFunctionForCJSLoader(source, filename, isMain, false); + const compileResult = compileFunctionForCJSLoader(source, filename, false /* is_sea_main */, false); const { function: compiledWrapper, sourceMapURL } = compileResult; // Cache the source map for the cjs module if present. diff --git a/lib/internal/util/embedding.js b/lib/internal/util/embedding.js deleted file mode 100644 index fde209607c3..00000000000 --- a/lib/internal/util/embedding.js +++ /dev/null @@ -1,53 +0,0 @@ -'use strict'; -const { BuiltinModule: { normalizeRequirableId } } = require('internal/bootstrap/realm'); -const { Module, wrapSafe } = require('internal/modules/cjs/loader'); -const { codes: { - ERR_UNKNOWN_BUILTIN_MODULE, -} } = require('internal/errors'); -const { getCodePath, isSea } = internalBinding('sea'); - -// This is roughly the same as: -// -// const mod = new Module(filename); -// mod._compile(contents, filename); -// -// but the code has been duplicated because currently there is no way to set the -// value of require.main to module. -// -// TODO(RaisinTen): Find a way to deduplicate this. - -function embedderRunCjs(contents) { - const filename = process.execPath; - const { function: compiledWrapper } = wrapSafe( - isSea() ? getCodePath() : filename, - contents); - - const customModule = new Module(filename, null); - customModule.filename = filename; - customModule.paths = Module._nodeModulePaths(customModule.path); - - const customExports = customModule.exports; - - embedderRequire.main = customModule; - - const customFilename = customModule.filename; - - const customDirname = customModule.path; - - return compiledWrapper( - customExports, - embedderRequire, - customModule, - customFilename, - customDirname); -} - -function embedderRequire(id) { - const normalizedId = normalizeRequirableId(id); - if (!normalizedId) { - throw new ERR_UNKNOWN_BUILTIN_MODULE(id); - } - return require(normalizedId); -} - -module.exports = { embedderRequire, embedderRunCjs }; diff --git a/src/node.cc b/src/node.cc index a13c5a45496..208c89c5f93 100644 --- a/src/node.cc +++ b/src/node.cc @@ -254,6 +254,14 @@ std::optional CallbackInfoFromArray( CHECK(process_obj->IsObject()); CHECK(require_fn->IsFunction()); CHECK(runcjs_fn->IsFunction()); + // TODO(joyeecheung): some support for running ESM as an entrypoint + // is needed. The simplest API would be to add a run_esm to + // StartExecutionCallbackInfo which compiles, links (to builtins) + // and evaluates a SourceTextModule. + // TODO(joyeecheung): the env pointer should be part of + // StartExecutionCallbackInfo, otherwise embedders are forced to use + // lambdas to pass it into the callback, which can make the code + // difficult to read. node::StartExecutionCallbackInfo info{process_obj.As(), require_fn.As(), runcjs_fn.As()}; diff --git a/src/node_contextify.cc b/src/node_contextify.cc index 2b08aee16c8..0752a67326d 100644 --- a/src/node_contextify.cc +++ b/src/node_contextify.cc @@ -1453,12 +1453,17 @@ static std::vector throws_only_in_cjs_error_messages = { "await is only valid in async functions and " "the top level bodies of modules"}; -static MaybeLocal CompileFunctionForCJSLoader(Environment* env, - Local context, - Local code, - Local filename, - bool* cache_rejected, - bool is_cjs_scope) { +// If cached_data is provided, it would be used for the compilation and +// the on-disk compilation cache from NODE_COMPILE_CACHE (if configured) +// would be ignored. +static MaybeLocal CompileFunctionForCJSLoader( + Environment* env, + Local context, + Local code, + Local filename, + bool* cache_rejected, + bool is_cjs_scope, + ScriptCompiler::CachedData* cached_data) { Isolate* isolate = context->GetIsolate(); EscapableHandleScope scope(isolate); @@ -1475,25 +1480,9 @@ static MaybeLocal CompileFunctionForCJSLoader(Environment* env, false, // is WASM false, // is ES Module hdo); - ScriptCompiler::CachedData* cached_data = nullptr; - - bool used_cache_from_sea = false; -#ifndef DISABLE_SINGLE_EXECUTABLE_APPLICATION - if (sea::IsSingleExecutable()) { - sea::SeaResource sea = sea::FindSingleExecutableResource(); - if (sea.use_code_cache()) { - std::string_view data = sea.code_cache.value(); - cached_data = new ScriptCompiler::CachedData( - reinterpret_cast(data.data()), - static_cast(data.size()), - v8::ScriptCompiler::CachedData::BufferNotOwned); - used_cache_from_sea = true; - } - } -#endif CompileCacheEntry* cache_entry = nullptr; - if (!used_cache_from_sea && env->use_compile_cache()) { + if (cached_data == nullptr && env->use_compile_cache()) { cache_entry = env->compile_cache_handler()->GetOrInsert( code, filename, CachedCodeType::kCommonJS); } @@ -1559,6 +1548,7 @@ static void CompileFunctionForCJSLoader( CHECK(args[3]->IsBoolean()); Local code = args[0].As(); Local filename = args[1].As(); + bool is_sea_main = args[2].As()->Value(); bool should_detect_module = args[3].As()->Value(); Isolate* isolate = args.GetIsolate(); @@ -1571,11 +1561,31 @@ static void CompileFunctionForCJSLoader( Local cjs_exception; Local cjs_message; + ScriptCompiler::CachedData* cached_data = nullptr; +#ifndef DISABLE_SINGLE_EXECUTABLE_APPLICATION + if (is_sea_main) { + sea::SeaResource sea = sea::FindSingleExecutableResource(); + // Use the "main" field in SEA config for the filename. + Local filename_from_sea; + if (!ToV8Value(context, sea.code_path).ToLocal(&filename_from_sea)) { + return; + } + filename = filename_from_sea.As(); + if (sea.use_code_cache()) { + std::string_view data = sea.code_cache.value(); + cached_data = new ScriptCompiler::CachedData( + reinterpret_cast(data.data()), + static_cast(data.size()), + v8::ScriptCompiler::CachedData::BufferNotOwned); + } + } +#endif + { ShouldNotAbortOnUncaughtScope no_abort_scope(realm->env()); TryCatchScope try_catch(env); if (!CompileFunctionForCJSLoader( - env, context, code, filename, &cache_rejected, true) + env, context, code, filename, &cache_rejected, true, cached_data) .ToLocal(&fn)) { CHECK(try_catch.HasCaught()); CHECK(!try_catch.HasTerminated()); @@ -1730,7 +1740,7 @@ static void ContainsModuleSyntax(const FunctionCallbackInfo& args) { TryCatchScope try_catch(env); ShouldNotAbortOnUncaughtScope no_abort_scope(env); if (CompileFunctionForCJSLoader( - env, context, code, filename, &cache_rejected, cjs_var) + env, context, code, filename, &cache_rejected, cjs_var, nullptr) .ToLocal(&fn)) { args.GetReturnValue().Set(false); return; diff --git a/src/node_main_instance.cc b/src/node_main_instance.cc index 22b35e33e8f..4119ac1b002 100644 --- a/src/node_main_instance.cc +++ b/src/node_main_instance.cc @@ -103,20 +103,7 @@ ExitCode NodeMainInstance::Run() { void NodeMainInstance::Run(ExitCode* exit_code, Environment* env) { if (*exit_code == ExitCode::kNoFailure) { - bool runs_sea_code = false; -#ifndef DISABLE_SINGLE_EXECUTABLE_APPLICATION - if (sea::IsSingleExecutable()) { - sea::SeaResource sea = sea::FindSingleExecutableResource(); - if (!sea.use_snapshot()) { - runs_sea_code = true; - std::string_view code = sea.main_code_or_snapshot; - LoadEnvironment(env, code); - } - } -#endif - // Either there is already a snapshot main function from SEA, or it's not - // a SEA at all. - if (!runs_sea_code) { + if (!sea::MaybeLoadSingleExecutableApplication(env)) { LoadEnvironment(env, StartExecutionCallback{}); } diff --git a/src/node_sea.cc b/src/node_sea.cc index bef7fe7c227..8c4e3b3579f 100644 --- a/src/node_sea.cc +++ b/src/node_sea.cc @@ -36,6 +36,7 @@ using v8::FunctionCallbackInfo; using v8::HandleScope; using v8::Isolate; using v8::Local; +using v8::MaybeLocal; using v8::NewStringType; using v8::Object; using v8::ScriptCompiler; @@ -261,25 +262,6 @@ void IsExperimentalSeaWarningNeeded(const FunctionCallbackInfo& args) { sea_resource.flags & SeaFlags::kDisableExperimentalSeaWarning)); } -void GetCodePath(const FunctionCallbackInfo& args) { - DCHECK(IsSingleExecutable()); - - Isolate* isolate = args.GetIsolate(); - - SeaResource sea_resource = FindSingleExecutableResource(); - - Local code_path; - if (!String::NewFromUtf8(isolate, - sea_resource.code_path.data(), - NewStringType::kNormal, - sea_resource.code_path.length()) - .ToLocal(&code_path)) { - return; - } - - args.GetReturnValue().Set(code_path); -} - std::tuple FixupArgsForSEA(int argc, char** argv) { // Repeats argv[0] at position 1 on argv as a replacement for the missing // entry point file path. @@ -617,6 +599,46 @@ void GetAsset(const FunctionCallbackInfo& args) { args.GetReturnValue().Set(ab); } +MaybeLocal LoadSingleExecutableApplication( + const StartExecutionCallbackInfo& info) { + // Here we are currently relying on the fact that in NodeMainInstance::Run(), + // env->context() is entered. + Local context = Isolate::GetCurrent()->GetCurrentContext(); + Environment* env = Environment::GetCurrent(context); + SeaResource sea = FindSingleExecutableResource(); + + CHECK(!sea.use_snapshot()); + // TODO(joyeecheung): this should be an external string. Refactor UnionBytes + // and make it easy to create one based on static content on the fly. + Local main_script = + ToV8Value(env->context(), sea.main_code_or_snapshot).ToLocalChecked(); + return info.run_cjs->Call( + env->context(), Null(env->isolate()), 1, &main_script); +} + +bool MaybeLoadSingleExecutableApplication(Environment* env) { +#ifndef DISABLE_SINGLE_EXECUTABLE_APPLICATION + if (!IsSingleExecutable()) { + return false; + } + + SeaResource sea = FindSingleExecutableResource(); + + if (sea.use_snapshot()) { + // The SEA preparation blob building process should already enforce this, + // this check is just here to guard against the unlikely case where + // the SEA preparation blob has been manually modified by someone. + CHECK(!env->snapshot_deserialize_main().IsEmpty()); + LoadEnvironment(env, StartExecutionCallback{}); + return true; + } + + LoadEnvironment(env, LoadSingleExecutableApplication); + return true; +#endif + return false; +} + void Initialize(Local target, Local unused, Local context, @@ -626,14 +648,12 @@ void Initialize(Local target, target, "isExperimentalSeaWarningNeeded", IsExperimentalSeaWarningNeeded); - SetMethod(context, target, "getCodePath", GetCodePath); SetMethod(context, target, "getAsset", GetAsset); } void RegisterExternalReferences(ExternalReferenceRegistry* registry) { registry->Register(IsSea); registry->Register(IsExperimentalSeaWarningNeeded); - registry->Register(GetCodePath); registry->Register(GetAsset); } diff --git a/src/node_sea.h b/src/node_sea.h index 6f2f51d997d..f3b3c34d26a 100644 --- a/src/node_sea.h +++ b/src/node_sea.h @@ -14,6 +14,7 @@ #include "node_exit_code.h" namespace node { +class Environment; namespace sea { // A special number that will appear at the beginning of the single executable // preparation blobs ready to be injected into the binary. We use this to check @@ -49,6 +50,12 @@ node::ExitCode BuildSingleExecutableBlob( const std::string& config_path, const std::vector& args, const std::vector& exec_args); + +// Try loading the Environment as a single-executable application. +// Returns true if it is loaded as a single-executable application. +// Otherwise returns false and the caller is expected to call LoadEnvironment() +// differently. +bool MaybeLoadSingleExecutableApplication(Environment* env); } // namespace sea } // namespace node diff --git a/test/fixtures/sea.js b/test/fixtures/sea.js index e7b7f46ff00..6dea6960997 100644 --- a/test/fixtures/sea.js +++ b/test/fixtures/sea.js @@ -5,10 +5,14 @@ const createdRequire = createRequire(__filename); // because we set NODE_TEST_DIR=/Users/iojs/node-tmp on Jenkins CI. const { expectWarning, mustNotCall } = createdRequire(process.env.COMMON_DIRECTORY); +const builtinWarning = +`Currently the require() provided to the main script embedded into single-executable applications only supports loading built-in modules. +To load a module from disk after the single executable application is launched, use require("module").createRequire(). +Support for bundled module loading or virtual file systems are under discussions in https://github.com/nodejs/single-executable`; + +expectWarning('Warning', builtinWarning); // Triggered by require() calls below. // This additionally makes sure that no unexpected warnings are emitted. -if (createdRequire('./sea-config.json').disableExperimentalSEAWarning) { - process.on('warning', mustNotCall()); -} else { +if (!createdRequire('./sea-config.json').disableExperimentalSEAWarning) { expectWarning('ExperimentalWarning', 'Single executable application is an experimental feature and ' + 'might change at any time'); @@ -22,7 +26,7 @@ const { deepStrictEqual, strictEqual, throws } = require('assert'); const { dirname } = require('node:path'); // Checks that the source filename is used in the error stack trace. -strictEqual(new Error('lol').stack.split('\n')[1], ' at sea.js:25:13'); +strictEqual(new Error('lol').stack.split('\n')[1], ' at sea.js:29:13'); // Should be possible to require a core module that requires using the "node:" // scheme.