src: refactor embedded entrypoint loading

This patch:

1. Refactor the routines used to compile and run an embedder
  entrypoint. In JS land special handling for SEA is done
  directly in main/embedding.js instead of clobbering the CJS
  loader. Add warnings to remind users that currently the
  require() in SEA bundled scripts only supports loading builtins.
2. Don't use the bundled SEA code cache when compiling CJS
  loaded from disk, since in that case we are certainly not
  compiling the code bundled into the SEA. Use a is_sea_main
  flag in CompileFunctionForCJSLoader() (which replaces an unused
  argument) to pass this into the C++ land - the code cache is
  still read directly from C++ to avoid the overhead of
  ArrayBuffer creation.
3. Move SEA loading code into
  MaybeLoadSingleExecutableApplication() which calls
  LoadEnvironment() with its own StartExecutionCallback().
  This avoids more hidden switches in StartExecution() and
  make them explicit. Also add some TODOs about how to support
  ESM in embedded applications.
4. Add more comments

PR-URL: https://github.com/nodejs/node/pull/53573
Reviewed-By: Geoffrey Booth <webadmin@geoffreybooth.com>
Reviewed-By: Chengzhong Wu <legendecas@gmail.com>
Reviewed-By: Matteo Collina <matteo.collina@gmail.com>
Reviewed-By: James M Snell <jasnell@gmail.com>
pull/53743/head
Joyee Cheung 2024-07-05 21:58:35 +02:00 committed by GitHub
parent b32c7229d5
commit 10099bb3f7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 209 additions and 134 deletions

View File

@ -75,5 +75,5 @@ async function checkSyntax(source, filename) {
return;
}
wrapSafe(filename, source);
wrapSafe(filename, source, undefined, 'commonjs');
}

View File

@ -1,15 +1,116 @@
'use strict';
// This main script is currently only run when LoadEnvironment()
// is run with a non-null StartExecutionCallback or a UTF8
// main script. Effectively there are two cases where this happens:
// 1. It's a single-executable application *loading* a main script
// bundled into the executable. This is currently done from
// NodeMainInstance::Run().
// 2. It's an embedder application and LoadEnvironment() is invoked
// as described above.
const {
prepareMainThreadExecution,
} = require('internal/process/pre_execution');
const { isExperimentalSeaWarningNeeded } = internalBinding('sea');
const { isExperimentalSeaWarningNeeded, isSea } = internalBinding('sea');
const { emitExperimentalWarning } = require('internal/util');
const { embedderRequire, embedderRunCjs } = require('internal/util/embedding');
const { emitWarningSync } = require('internal/process/warning');
const { BuiltinModule: { normalizeRequirableId } } = require('internal/bootstrap/realm');
const { Module } = require('internal/modules/cjs/loader');
const { compileFunctionForCJSLoader } = internalBinding('contextify');
const { maybeCacheSourceMap } = require('internal/source_map/source_map_cache');
const { codes: {
ERR_UNKNOWN_BUILTIN_MODULE,
} } = require('internal/errors');
// Don't expand process.argv[1] because in a single-executable application or an
// embedder application, the user main script isn't necessarily provided via the
// command line (e.g. it could be provided via an API or bundled into the executable).
prepareMainThreadExecution(false, true);
const isLoadingSea = isSea();
if (isExperimentalSeaWarningNeeded()) {
emitExperimentalWarning('Single executable application');
}
// This is roughly the same as:
//
// const mod = new Module(filename);
// mod._compile(content, filename);
//
// but the code has been duplicated because currently there is no way to set the
// value of require.main to module.
//
// TODO(RaisinTen): Find a way to deduplicate this.
function embedderRunCjs(content) {
// The filename of the module (used for CJS module lookup)
// is always the same as the location of the executable itself
// at the time of the loading (which means it changes depending
// on where the executable is in the file system).
const filename = process.execPath;
const customModule = new Module(filename, null);
const {
function: compiledWrapper,
cachedDataRejected,
sourceMapURL,
} = compileFunctionForCJSLoader(
content,
filename,
isLoadingSea, // is_sea_main
false, // should_detect_module, ESM should be supported differently for embedded code
);
// Cache the source map for the module if present.
if (sourceMapURL) {
maybeCacheSourceMap(
filename,
content,
customModule,
false, // isGeneratedSource
undefined, // sourceURL, TODO(joyeecheung): should be extracted by V8
sourceMapURL,
);
}
// cachedDataRejected is only set if cache from SEA is used.
if (cachedDataRejected !== false && isLoadingSea) {
emitWarningSync('Code cache data rejected.');
}
// Patch the module to make it look almost like a regular CJS module
// instance.
customModule.filename = process.execPath;
customModule.paths = Module._nodeModulePaths(process.execPath);
embedderRequire.main = customModule;
return compiledWrapper(
customModule.exports, // exports
embedderRequire, // require
customModule, // module
process.execPath, // __filename
customModule.path, // __dirname
);
}
let warnedAboutBuiltins = false;
function embedderRequire(id) {
const normalizedId = normalizeRequirableId(id);
if (!normalizedId) {
if (isLoadingSea && !warnedAboutBuiltins) {
emitWarningSync(
'Currently the require() provided to the main script embedded into ' +
'single-executable applications only supports loading built-in modules.\n' +
'To load a module from disk after the single executable application is ' +
'launched, use require("module").createRequire().\n' +
'Support for bundled module loading or virtual file systems are under ' +
'discussions in https://github.com/nodejs/single-executable');
warnedAboutBuiltins = true;
}
throw new ERR_UNKNOWN_BUILTIN_MODULE(id);
}
return require(normalizedId);
}
return [process, embedderRequire, embedderRunCjs];

View File

@ -1343,11 +1343,10 @@ function loadESMFromCJS(mod, filename) {
* Wraps the given content in a script and runs it in a new context.
* @param {string} filename The name of the file being loaded
* @param {string} content The content of the file being loaded
* @param {Module} cjsModuleInstance The CommonJS loader instance
* @param {object} codeCache The SEA code cache
* @param {Module|undefined} cjsModuleInstance The CommonJS loader instance
* @param {'commonjs'|undefined} format Intended format of the module.
*/
function wrapSafe(filename, content, cjsModuleInstance, codeCache, format) {
function wrapSafe(filename, content, cjsModuleInstance, format) {
assert(format !== 'module'); // ESM should be handled in loadESMFromCJS().
const hostDefinedOptionId = vm_dynamic_import_default_internal;
const importModuleDynamically = vm_dynamic_import_default_internal;
@ -1378,16 +1377,8 @@ function wrapSafe(filename, content, cjsModuleInstance, codeCache, format) {
};
}
const isMain = !!(cjsModuleInstance && cjsModuleInstance[kIsMainSymbol]);
const shouldDetectModule = (format !== 'commonjs' && getOptionValue('--experimental-detect-module'));
const result = compileFunctionForCJSLoader(content, filename, isMain, shouldDetectModule);
// cachedDataRejected is only set for cache coming from SEA.
if (codeCache &&
result.cachedDataRejected !== false &&
internalBinding('sea').isSea()) {
process.emitWarning('Code cache data rejected.');
}
const result = compileFunctionForCJSLoader(content, filename, false /* is_sea_main */, shouldDetectModule);
// Cache the source map for the module if present.
if (result.sourceMapURL) {
@ -1409,7 +1400,7 @@ Module.prototype._compile = function(content, filename, format) {
let compiledWrapper;
if (format !== 'module') {
const result = wrapSafe(filename, content, this, undefined, format);
const result = wrapSafe(filename, content, this, format);
compiledWrapper = result.function;
if (result.canParseAsESM) {
format = 'module';

View File

@ -177,7 +177,7 @@ translators.set('module', function moduleStrategy(url, source, isMain) {
* @param {boolean} isMain - Whether the module is the entrypoint
*/
function loadCJSModule(module, source, url, filename, isMain) {
const compileResult = compileFunctionForCJSLoader(source, filename, isMain, false);
const compileResult = compileFunctionForCJSLoader(source, filename, false /* is_sea_main */, false);
const { function: compiledWrapper, sourceMapURL } = compileResult;
// Cache the source map for the cjs module if present.

View File

@ -1,53 +0,0 @@
'use strict';
const { BuiltinModule: { normalizeRequirableId } } = require('internal/bootstrap/realm');
const { Module, wrapSafe } = require('internal/modules/cjs/loader');
const { codes: {
ERR_UNKNOWN_BUILTIN_MODULE,
} } = require('internal/errors');
const { getCodePath, isSea } = internalBinding('sea');
// This is roughly the same as:
//
// const mod = new Module(filename);
// mod._compile(contents, filename);
//
// but the code has been duplicated because currently there is no way to set the
// value of require.main to module.
//
// TODO(RaisinTen): Find a way to deduplicate this.
function embedderRunCjs(contents) {
const filename = process.execPath;
const { function: compiledWrapper } = wrapSafe(
isSea() ? getCodePath() : filename,
contents);
const customModule = new Module(filename, null);
customModule.filename = filename;
customModule.paths = Module._nodeModulePaths(customModule.path);
const customExports = customModule.exports;
embedderRequire.main = customModule;
const customFilename = customModule.filename;
const customDirname = customModule.path;
return compiledWrapper(
customExports,
embedderRequire,
customModule,
customFilename,
customDirname);
}
function embedderRequire(id) {
const normalizedId = normalizeRequirableId(id);
if (!normalizedId) {
throw new ERR_UNKNOWN_BUILTIN_MODULE(id);
}
return require(normalizedId);
}
module.exports = { embedderRequire, embedderRunCjs };

View File

@ -254,6 +254,14 @@ std::optional<StartExecutionCallbackInfo> CallbackInfoFromArray(
CHECK(process_obj->IsObject());
CHECK(require_fn->IsFunction());
CHECK(runcjs_fn->IsFunction());
// TODO(joyeecheung): some support for running ESM as an entrypoint
// is needed. The simplest API would be to add a run_esm to
// StartExecutionCallbackInfo which compiles, links (to builtins)
// and evaluates a SourceTextModule.
// TODO(joyeecheung): the env pointer should be part of
// StartExecutionCallbackInfo, otherwise embedders are forced to use
// lambdas to pass it into the callback, which can make the code
// difficult to read.
node::StartExecutionCallbackInfo info{process_obj.As<Object>(),
require_fn.As<Function>(),
runcjs_fn.As<Function>()};

View File

@ -1453,12 +1453,17 @@ static std::vector<std::string_view> throws_only_in_cjs_error_messages = {
"await is only valid in async functions and "
"the top level bodies of modules"};
static MaybeLocal<Function> CompileFunctionForCJSLoader(Environment* env,
Local<Context> context,
Local<String> code,
Local<String> filename,
bool* cache_rejected,
bool is_cjs_scope) {
// If cached_data is provided, it would be used for the compilation and
// the on-disk compilation cache from NODE_COMPILE_CACHE (if configured)
// would be ignored.
static MaybeLocal<Function> CompileFunctionForCJSLoader(
Environment* env,
Local<Context> context,
Local<String> code,
Local<String> filename,
bool* cache_rejected,
bool is_cjs_scope,
ScriptCompiler::CachedData* cached_data) {
Isolate* isolate = context->GetIsolate();
EscapableHandleScope scope(isolate);
@ -1475,25 +1480,9 @@ static MaybeLocal<Function> CompileFunctionForCJSLoader(Environment* env,
false, // is WASM
false, // is ES Module
hdo);
ScriptCompiler::CachedData* cached_data = nullptr;
bool used_cache_from_sea = false;
#ifndef DISABLE_SINGLE_EXECUTABLE_APPLICATION
if (sea::IsSingleExecutable()) {
sea::SeaResource sea = sea::FindSingleExecutableResource();
if (sea.use_code_cache()) {
std::string_view data = sea.code_cache.value();
cached_data = new ScriptCompiler::CachedData(
reinterpret_cast<const uint8_t*>(data.data()),
static_cast<int>(data.size()),
v8::ScriptCompiler::CachedData::BufferNotOwned);
used_cache_from_sea = true;
}
}
#endif
CompileCacheEntry* cache_entry = nullptr;
if (!used_cache_from_sea && env->use_compile_cache()) {
if (cached_data == nullptr && env->use_compile_cache()) {
cache_entry = env->compile_cache_handler()->GetOrInsert(
code, filename, CachedCodeType::kCommonJS);
}
@ -1559,6 +1548,7 @@ static void CompileFunctionForCJSLoader(
CHECK(args[3]->IsBoolean());
Local<String> code = args[0].As<String>();
Local<String> filename = args[1].As<String>();
bool is_sea_main = args[2].As<Boolean>()->Value();
bool should_detect_module = args[3].As<Boolean>()->Value();
Isolate* isolate = args.GetIsolate();
@ -1571,11 +1561,31 @@ static void CompileFunctionForCJSLoader(
Local<Value> cjs_exception;
Local<Message> cjs_message;
ScriptCompiler::CachedData* cached_data = nullptr;
#ifndef DISABLE_SINGLE_EXECUTABLE_APPLICATION
if (is_sea_main) {
sea::SeaResource sea = sea::FindSingleExecutableResource();
// Use the "main" field in SEA config for the filename.
Local<Value> filename_from_sea;
if (!ToV8Value(context, sea.code_path).ToLocal(&filename_from_sea)) {
return;
}
filename = filename_from_sea.As<String>();
if (sea.use_code_cache()) {
std::string_view data = sea.code_cache.value();
cached_data = new ScriptCompiler::CachedData(
reinterpret_cast<const uint8_t*>(data.data()),
static_cast<int>(data.size()),
v8::ScriptCompiler::CachedData::BufferNotOwned);
}
}
#endif
{
ShouldNotAbortOnUncaughtScope no_abort_scope(realm->env());
TryCatchScope try_catch(env);
if (!CompileFunctionForCJSLoader(
env, context, code, filename, &cache_rejected, true)
env, context, code, filename, &cache_rejected, true, cached_data)
.ToLocal(&fn)) {
CHECK(try_catch.HasCaught());
CHECK(!try_catch.HasTerminated());
@ -1730,7 +1740,7 @@ static void ContainsModuleSyntax(const FunctionCallbackInfo<Value>& args) {
TryCatchScope try_catch(env);
ShouldNotAbortOnUncaughtScope no_abort_scope(env);
if (CompileFunctionForCJSLoader(
env, context, code, filename, &cache_rejected, cjs_var)
env, context, code, filename, &cache_rejected, cjs_var, nullptr)
.ToLocal(&fn)) {
args.GetReturnValue().Set(false);
return;

View File

@ -103,20 +103,7 @@ ExitCode NodeMainInstance::Run() {
void NodeMainInstance::Run(ExitCode* exit_code, Environment* env) {
if (*exit_code == ExitCode::kNoFailure) {
bool runs_sea_code = false;
#ifndef DISABLE_SINGLE_EXECUTABLE_APPLICATION
if (sea::IsSingleExecutable()) {
sea::SeaResource sea = sea::FindSingleExecutableResource();
if (!sea.use_snapshot()) {
runs_sea_code = true;
std::string_view code = sea.main_code_or_snapshot;
LoadEnvironment(env, code);
}
}
#endif
// Either there is already a snapshot main function from SEA, or it's not
// a SEA at all.
if (!runs_sea_code) {
if (!sea::MaybeLoadSingleExecutableApplication(env)) {
LoadEnvironment(env, StartExecutionCallback{});
}

View File

@ -36,6 +36,7 @@ using v8::FunctionCallbackInfo;
using v8::HandleScope;
using v8::Isolate;
using v8::Local;
using v8::MaybeLocal;
using v8::NewStringType;
using v8::Object;
using v8::ScriptCompiler;
@ -261,25 +262,6 @@ void IsExperimentalSeaWarningNeeded(const FunctionCallbackInfo<Value>& args) {
sea_resource.flags & SeaFlags::kDisableExperimentalSeaWarning));
}
void GetCodePath(const FunctionCallbackInfo<Value>& args) {
DCHECK(IsSingleExecutable());
Isolate* isolate = args.GetIsolate();
SeaResource sea_resource = FindSingleExecutableResource();
Local<String> code_path;
if (!String::NewFromUtf8(isolate,
sea_resource.code_path.data(),
NewStringType::kNormal,
sea_resource.code_path.length())
.ToLocal(&code_path)) {
return;
}
args.GetReturnValue().Set(code_path);
}
std::tuple<int, char**> FixupArgsForSEA(int argc, char** argv) {
// Repeats argv[0] at position 1 on argv as a replacement for the missing
// entry point file path.
@ -617,6 +599,46 @@ void GetAsset(const FunctionCallbackInfo<Value>& args) {
args.GetReturnValue().Set(ab);
}
MaybeLocal<Value> LoadSingleExecutableApplication(
const StartExecutionCallbackInfo& info) {
// Here we are currently relying on the fact that in NodeMainInstance::Run(),
// env->context() is entered.
Local<Context> context = Isolate::GetCurrent()->GetCurrentContext();
Environment* env = Environment::GetCurrent(context);
SeaResource sea = FindSingleExecutableResource();
CHECK(!sea.use_snapshot());
// TODO(joyeecheung): this should be an external string. Refactor UnionBytes
// and make it easy to create one based on static content on the fly.
Local<Value> main_script =
ToV8Value(env->context(), sea.main_code_or_snapshot).ToLocalChecked();
return info.run_cjs->Call(
env->context(), Null(env->isolate()), 1, &main_script);
}
bool MaybeLoadSingleExecutableApplication(Environment* env) {
#ifndef DISABLE_SINGLE_EXECUTABLE_APPLICATION
if (!IsSingleExecutable()) {
return false;
}
SeaResource sea = FindSingleExecutableResource();
if (sea.use_snapshot()) {
// The SEA preparation blob building process should already enforce this,
// this check is just here to guard against the unlikely case where
// the SEA preparation blob has been manually modified by someone.
CHECK(!env->snapshot_deserialize_main().IsEmpty());
LoadEnvironment(env, StartExecutionCallback{});
return true;
}
LoadEnvironment(env, LoadSingleExecutableApplication);
return true;
#endif
return false;
}
void Initialize(Local<Object> target,
Local<Value> unused,
Local<Context> context,
@ -626,14 +648,12 @@ void Initialize(Local<Object> target,
target,
"isExperimentalSeaWarningNeeded",
IsExperimentalSeaWarningNeeded);
SetMethod(context, target, "getCodePath", GetCodePath);
SetMethod(context, target, "getAsset", GetAsset);
}
void RegisterExternalReferences(ExternalReferenceRegistry* registry) {
registry->Register(IsSea);
registry->Register(IsExperimentalSeaWarningNeeded);
registry->Register(GetCodePath);
registry->Register(GetAsset);
}

View File

@ -14,6 +14,7 @@
#include "node_exit_code.h"
namespace node {
class Environment;
namespace sea {
// A special number that will appear at the beginning of the single executable
// preparation blobs ready to be injected into the binary. We use this to check
@ -49,6 +50,12 @@ node::ExitCode BuildSingleExecutableBlob(
const std::string& config_path,
const std::vector<std::string>& args,
const std::vector<std::string>& exec_args);
// Try loading the Environment as a single-executable application.
// Returns true if it is loaded as a single-executable application.
// Otherwise returns false and the caller is expected to call LoadEnvironment()
// differently.
bool MaybeLoadSingleExecutableApplication(Environment* env);
} // namespace sea
} // namespace node

12
test/fixtures/sea.js vendored
View File

@ -5,10 +5,14 @@ const createdRequire = createRequire(__filename);
// because we set NODE_TEST_DIR=/Users/iojs/node-tmp on Jenkins CI.
const { expectWarning, mustNotCall } = createdRequire(process.env.COMMON_DIRECTORY);
const builtinWarning =
`Currently the require() provided to the main script embedded into single-executable applications only supports loading built-in modules.
To load a module from disk after the single executable application is launched, use require("module").createRequire().
Support for bundled module loading or virtual file systems are under discussions in https://github.com/nodejs/single-executable`;
expectWarning('Warning', builtinWarning); // Triggered by require() calls below.
// This additionally makes sure that no unexpected warnings are emitted.
if (createdRequire('./sea-config.json').disableExperimentalSEAWarning) {
process.on('warning', mustNotCall());
} else {
if (!createdRequire('./sea-config.json').disableExperimentalSEAWarning) {
expectWarning('ExperimentalWarning',
'Single executable application is an experimental feature and ' +
'might change at any time');
@ -22,7 +26,7 @@ const { deepStrictEqual, strictEqual, throws } = require('assert');
const { dirname } = require('node:path');
// Checks that the source filename is used in the error stack trace.
strictEqual(new Error('lol').stack.split('\n')[1], ' at sea.js:25:13');
strictEqual(new Error('lol').stack.split('\n')[1], ' at sea.js:29:13');
// Should be possible to require a core module that requires using the "node:"
// scheme.