tools: speedup compilation of js2c output

Incremental compilation of Node.js is slow. Currently on a powerful
Linux machine, it takes about 9 seconds and 830 MB of memory to compile
`gen/node_javascript.cc` with g++. This is the longest step when
recompiling a small change to a Javascript file.

`gen/node_javascript.cc` contains a lot of large binary literals of our
Javascript source code. It is well-known that embedding large binary
literals as C/C++ arrays is slow. One workaround is to include the data
as string literals instead. This is particularly nice for the Javascript
included via js2c, which look better as string literals anyway.

Add a build flag `NODE_JS2C_USE_STRING_LITERALS` to js2c. When this flag
is set, we emit string literals instead of array literals, i.e.:

```c++
// old: static const uint8_t X[] = { ... };
static const uint8_t *X = R"JS2C1b732aee(...)JS2C1b732aee";

// old: static const uint16_t Y[] = { ... };
static const uint16_t *Y = uR"JS2C1b732aee(...)JS2C1b732aee";
```

This requires some modest refactoring in order to deal with the flag
being on or off, but the new code itself is actually shorter.

I only enabled the new flag on Linux/macOS, since those are systems that
I have available for testing. On my Linux system with gcc, it speeds up
compilation by 5.5s (9.0s -> 3.5s). On my Mac system with clang, it
speeds up compilation by 2.2s (3.7s -> 1.5s). (I don't think this flag
will work with MSVC, but it'd probably speed up clang on windows.)

The long-term goal here is probably to allow this to occur incrementally
per Javascript file & in parallel, to avoid recompiling all of
`gen/node_javascript.cc`. Unfortunately the necessary gyp incantations
seem impossible (or at least, far beyond me). Anyway, a 60% speedup is a
nice enough win.

Refs: https://github.com/nodejs/node/issues/47984
PR-URL: https://github.com/nodejs/node/pull/48160
Reviewed-By: Yagiz Nizipli <yagiz@nizipli.com>
Reviewed-By: Joyee Cheung <joyeec9h3@gmail.com>
pull/48538/head
Keyhan Vakil 2023-06-24 08:52:29 -07:00 committed by GitHub
parent ae9f919880
commit 5c1233dfbc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 95 additions and 31 deletions

View File

@ -1209,6 +1209,9 @@
[ 'node_shared_libuv=="false"', {
'dependencies': [ 'deps/uv/uv.gyp:libuv#host' ],
}],
[ 'OS in "linux mac"', {
'defines': ['NODE_JS2C_USE_STRING_LITERALS'],
}],
[ 'debug_node=="true"', {
'cflags!': [ '-O3' ],
'cflags': [ '-g', '-O0' ],

View File

@ -390,6 +390,19 @@ const std::string& GetCode(uint16_t index) {
return table[index];
}
#ifdef NODE_JS2C_USE_STRING_LITERALS
const char* string_literal_def_template = "static const %s *%s_raw = ";
constexpr std::string_view ascii_string_literal_start =
"reinterpret_cast<const uint8_t*>(R\"JS2C1b732aee(";
constexpr std::string_view utf16_string_literal_start =
"reinterpret_cast<const uint16_t*>(uR\"JS2C1b732aee(";
constexpr std::string_view string_literal_end = ")JS2C1b732aee\");";
#else
const char* array_literal_def_template = "static const %s %s_raw[] = ";
constexpr std::string_view array_literal_start = "{\n";
constexpr std::string_view array_literal_end = "\n};\n\n";
#endif
// Definitions:
// static const uint8_t fs_raw[] = {
// ....
@ -403,38 +416,93 @@ const std::string& GetCode(uint16_t index) {
//
// static StaticExternalTwoByteResource
// internal_cli_table_resource(internal_cli_table_raw, 1234, nullptr);
constexpr std::string_view literal_end = "\n};\n\n";
//
// If NODE_JS2C_USE_STRING_LITERALS is defined, the data is output as C++
// raw strings (i.e. R"JS2C1b732aee(...)JS2C1b732aee") rather than as an
// array. This speeds up compilation for gcc/clang.
template <typename T>
Fragment GetDefinitionImpl(const std::vector<T>& code, const std::string& var) {
size_t count = code.size();
Fragment GetDefinitionImpl(const std::vector<char>& code,
const std::string& var) {
constexpr bool is_two_byte = std::is_same_v<T, uint16_t>;
static_assert(is_two_byte || std::is_same_v<T, char>);
constexpr size_t unit =
(is_two_byte ? 5 : 3) + 1; // 0-65536 or 0-127 and a ","
size_t count = is_two_byte
? simdutf::utf16_length_from_utf8(code.data(), code.size())
: code.size();
constexpr const char* arr_type = is_two_byte ? "uint16_t" : "uint8_t";
constexpr const char* resource_type = is_two_byte
? "StaticExternalTwoByteResource"
: "StaticExternalOneByteResource";
size_t def_size = 256 + (count * unit);
#ifdef NODE_JS2C_USE_STRING_LITERALS
const char* literal_def_template = string_literal_def_template;
size_t def_size = 512 + code.size();
#else
const char* literal_def_template = array_literal_def_template;
constexpr size_t unit =
(is_two_byte ? 5 : 3) + 1; // 0-65536 or 0-127 and a ","
size_t def_size = 512 + count * unit;
#endif
Fragment result(def_size, 0);
int cur = snprintf(result.data(),
def_size,
"static const %s %s_raw[] = {\n",
arr_type,
var.c_str());
int cur = snprintf(
result.data(), def_size, literal_def_template, arr_type, var.c_str());
assert(cur != 0);
for (size_t i = 0; i < count; ++i) {
#ifdef NODE_JS2C_USE_STRING_LITERALS
constexpr std::string_view start_string_view =
is_two_byte ? utf16_string_literal_start : ascii_string_literal_start;
memcpy(
result.data() + cur, start_string_view.data(), start_string_view.size());
cur += start_string_view.size();
memcpy(result.data() + cur, code.data(), code.size());
cur += code.size();
memcpy(result.data() + cur,
string_literal_end.data(),
string_literal_end.size());
cur += string_literal_end.size();
#else
memcpy(result.data() + cur,
array_literal_start.data(),
array_literal_start.size());
cur += array_literal_start.size();
const std::vector<T>* codepoints;
std::vector<uint16_t> utf16_codepoints;
if constexpr (is_two_byte) {
utf16_codepoints.resize(count);
size_t utf16_count = simdutf::convert_utf8_to_utf16(
code.data(),
code.size(),
reinterpret_cast<char16_t*>(utf16_codepoints.data()));
assert(utf16_count != 0);
utf16_codepoints.resize(utf16_count);
Debug("static size %zu\n", utf16_count);
codepoints = &utf16_codepoints;
} else {
// The code is ASCII, so no need to translate.
codepoints = &code;
}
for (size_t i = 0; i < codepoints->size(); ++i) {
// Avoid using snprintf on large chunks of data because it's much slower.
// It's fine to use it on small amount of data though.
const std::string& str = GetCode(static_cast<uint16_t>(code[i]));
const std::string& str = GetCode(static_cast<uint16_t>((*codepoints)[i]));
memcpy(result.data() + cur, str.c_str(), str.size());
cur += str.size();
}
memcpy(result.data() + cur, literal_end.data(), literal_end.size());
cur += literal_end.size();
memcpy(
result.data() + cur, array_literal_end.data(), array_literal_end.size());
cur += array_literal_end.size();
#endif
int end_size = snprintf(result.data() + cur,
result.size() - cur,
@ -455,16 +523,9 @@ Fragment GetDefinition(const std::string& var, const std::vector<char>& code) {
if (is_one_byte) {
Debug("static size %zu\n", code.size());
return GetDefinitionImpl(code, var);
return GetDefinitionImpl<char>(code, var);
} else {
size_t length = simdutf::utf16_length_from_utf8(code.data(), code.size());
std::vector<uint16_t> utf16(length);
size_t utf16_count = simdutf::convert_utf8_to_utf16(
code.data(), code.size(), reinterpret_cast<char16_t*>(utf16.data()));
assert(utf16_count != 0);
utf16.resize(utf16_count);
Debug("static size %zu\n", utf16_count);
return GetDefinitionImpl(utf16, var);
return GetDefinitionImpl<uint16_t>(code, var);
}
}
@ -626,21 +687,21 @@ int JS2C(const FileList& js_files,
const FileList& mjs_files,
const std::string& config,
const std::string& dest) {
Fragments defintions;
defintions.reserve(js_files.size() + mjs_files.size() + 1);
Fragments definitions;
definitions.reserve(js_files.size() + mjs_files.size() + 1);
Fragments initializers;
initializers.reserve(js_files.size() + mjs_files.size());
Fragments registrations;
registrations.reserve(js_files.size() + mjs_files.size() + 1);
for (const auto& filename : js_files) {
int r = AddModule(filename, &defintions, &initializers, &registrations);
int r = AddModule(filename, &definitions, &initializers, &registrations);
if (r != 0) {
return r;
}
}
for (const auto& filename : mjs_files) {
int r = AddModule(filename, &defintions, &initializers, &registrations);
int r = AddModule(filename, &definitions, &initializers, &registrations);
if (r != 0) {
return r;
}
@ -648,11 +709,11 @@ int JS2C(const FileList& js_files,
assert(config == "config.gypi");
// "config.gypi" -> config_raw.
int r = AddGypi("config", config, &defintions);
int r = AddGypi("config", config, &definitions);
if (r != 0) {
return r;
}
Fragment out = Format(defintions, initializers, registrations);
Fragment out = Format(definitions, initializers, registrations);
return WriteIfChanged(out, dest);
}