node/tools/icu/iculslocs.cc

389 lines
11 KiB
C++

/*
**********************************************************************
* Copyright (C) 2014, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*
* Created 2014-06-20 by Steven R. Loomis
*
* See: http://bugs.icu-project.org/trac/ticket/10922
*
*/
/*
WHAT IS THIS?
Here's the problem: It's difficult to reconfigure ICU from the command
line without using the full makefiles. You can do a lot, but not
everything.
Consider:
$ icupkg -r 'ja*' icudt53l.dat
Great, you've now removed the (main) Japanese data. But something's
still wrong-- res_index (and thus, getAvailable* functions) still
claim the locale is present.
You are reading the source to a tool (using only public API C code)
that can solve this problem. Use as follows:
$ iculslocs -i . -N icudt53l -b res_index.txt
.. Generates a NEW res_index.txt (by looking at the .dat file, and
figuring out which locales are actually available. Has commented out
the ones which are no longer available:
...
it_SM {""}
// ja {""}
// ja_JP {""}
jgo {""}
...
Then you can build and in-place patch it with existing ICU tools:
$ genrb res_index.txt
$ icupkg -a res_index.res icudt53l.dat
.. Now you have a patched icudt539.dat that not only doesn't have
Japanese, it doesn't *claim* to have Japanese.
*/
#include "string.h"
#include "charstr.h" // ICU internal header
#include <unicode/ustdio.h>
#include <unicode/ures.h>
#include <unicode/udata.h>
const char* PROG = "iculslocs";
const char* NAME = U_ICUDATA_NAME; // assume ICU data
const char* TREE = "ROOT";
int VERBOSE = 0;
#define RES_INDEX "res_index"
#define INSTALLEDLOCALES "InstalledLocales"
CharString packageName;
const char* locale = RES_INDEX; // locale referring to our index
void usage() {
u_printf("Usage: %s [options]\n", PROG);
u_printf(
"This program lists and optionally regenerates the locale "
"manifests\n"
" in ICU 'res_index.res' files.\n");
u_printf(
" -i ICUDATA Set ICUDATA dir to ICUDATA.\n"
" NOTE: this must be the first option given.\n");
u_printf(" -h This Help\n");
u_printf(" -v Verbose Mode on\n");
u_printf(" -l List locales to stdout\n");
u_printf(
" if Verbose mode, then missing (unopenable)"
"locales\n"
" will be listed preceded by a '#'.\n");
u_printf(
" -b res_index.txt Write 'corrected' bundle "
"to res_index.txt\n"
" missing bundles will be "
"OMITTED\n");
u_printf(
" -T TREE Choose tree TREE\n"
" (TREE should be one of: \n"
" ROOT, brkitr, coll, curr, lang, rbnf, region, zone)\n");
// see ureslocs.h and elsewhere
u_printf(
" -N NAME Choose name NAME\n"
" (default: '%s')\n",
U_ICUDATA_NAME);
u_printf(
"\nNOTE: for best results, this tool ought to be "
"linked against\n"
"stubdata. i.e. '%s -l' SHOULD return an error with "
" no data.\n",
PROG);
}
#define ASSERT_SUCCESS(what) \
if (U_FAILURE(status)) { \
u_printf("%s:%d: %s: ERROR: %s %s\n", \
__FILE__, \
__LINE__, \
PROG, \
u_errorName(status), \
what); \
return 1; \
}
/**
* @param status changed from reference to pointer to match node.js style
*/
void calculatePackageName(UErrorCode* status) {
packageName.clear();
if (strcmp(NAME, "NONE")) {
packageName.append(NAME, *status);
if (strcmp(TREE, "ROOT")) {
packageName.append(U_TREE_SEPARATOR_STRING, *status);
packageName.append(TREE, *status);
}
}
if (VERBOSE) {
u_printf("packageName: %s\n", packageName.data());
}
}
/**
* Does the locale exist?
* return zero for false, or nonzero if it was openable.
* Assumes calculatePackageName was called.
* @param exists set to TRUE if exists, FALSE otherwise.
* Changed from reference to pointer to match node.js style
* @return 0 on "OK" (success or resource-missing),
* 1 on "FAILURE" (unexpected error)
*/
int localeExists(const char* loc, UBool* exists) {
UErrorCode status = U_ZERO_ERROR;
if (VERBOSE > 1) {
u_printf("Trying to open %s:%s\n", packageName.data(), loc);
}
LocalUResourceBundlePointer aResource(
ures_openDirect(packageName.data(), loc, &status));
*exists = FALSE;
if (U_SUCCESS(status)) {
*exists = true;
if (VERBOSE > 1) {
u_printf("%s:%s existed!\n", packageName.data(), loc);
}
return 0;
} else if (status == U_MISSING_RESOURCE_ERROR) {
*exists = false;
if (VERBOSE > 1) {
u_printf("%s:%s did NOT exist (%s)!\n",
packageName.data(),
loc,
u_errorName(status));
}
return 0; // "good" failure
} else {
// some other failure..
u_printf("%s:%d: %s: ERROR %s opening %s:%s for test.\n",
__FILE__,
__LINE__,
u_errorName(status),
packageName.data(),
loc);
return 1; // abort
}
}
void printIndent(const LocalUFILEPointer& bf, int indent) {
for (int i = 0; i < indent + 1; i++) {
u_fprintf(bf.getAlias(), " ");
}
}
/**
* Dumps a table resource contents
* if lev==0, skips INSTALLEDLOCALES
* @return 0 for OK, 1 for err
*/
int dumpAllButInstalledLocales(int lev,
LocalUResourceBundlePointer& bund,
LocalUFILEPointer& bf,
UErrorCode& status) {
ures_resetIterator(bund.getAlias());
const UBool isTable = (UBool)(ures_getType(bund.getAlias()) == URES_TABLE);
LocalUResourceBundlePointer t;
while (U_SUCCESS(status) && ures_hasNext(bund.getAlias())) {
t.adoptInstead(ures_getNextResource(bund.getAlias(), t.orphan(), &status));
ASSERT_SUCCESS("while processing table");
const char* key = ures_getKey(t.getAlias());
if (VERBOSE > 1) {
u_printf("dump@%d: got key %s\n", lev, key);
}
if (lev == 0 && !strcmp(key, INSTALLEDLOCALES)) {
if (VERBOSE > 1) {
u_printf("dump: skipping '%s' as it must be evaluated.\n", key);
}
} else {
printIndent(bf, lev);
u_fprintf(bf.getAlias(), "%s", key);
switch (ures_getType(t.getAlias())) {
case URES_STRING: {
int32_t len = 0;
const UChar* s = ures_getString(t.getAlias(), &len, &status);
ASSERT_SUCCESS("getting string");
u_fprintf(bf.getAlias(), ":string {\"");
u_file_write(s, len, bf.getAlias());
u_fprintf(bf.getAlias(), "\"}");
} break;
default: {
u_printf("ERROR: unhandled type in dumpAllButInstalledLocales().\n");
return 1;
} break;
}
u_fprintf(bf.getAlias(), "\n");
}
}
return 0;
}
int list(const char* toBundle) {
UErrorCode status = U_ZERO_ERROR;
LocalUFILEPointer bf;
if (toBundle != NULL) {
if (VERBOSE) {
u_printf("writing to bundle %s\n", toBundle);
}
// we write UTF-8 with BOM only. No exceptions.
bf.adoptInstead(u_fopen(toBundle, "w", "en_US_POSIX", "UTF-8"));
if (bf.isNull()) {
u_printf("ERROR: Could not open '%s' for writing.\n", toBundle);
return 1;
}
u_fputc(0xFEFF, bf.getAlias()); // write BOM
u_fprintf(bf.getAlias(), "// -*- Coding: utf-8; -*-\n//\n");
}
// first, calculate the bundle name.
calculatePackageName(&status);
ASSERT_SUCCESS("calculating package name");
if (VERBOSE) {
u_printf("\"locale\": %s\n", locale);
}
LocalUResourceBundlePointer bund(
ures_openDirect(packageName.data(), locale, &status));
ASSERT_SUCCESS("while opening the bundle");
LocalUResourceBundlePointer installedLocales(
ures_getByKey(bund.getAlias(), INSTALLEDLOCALES, NULL, &status));
ASSERT_SUCCESS("while fetching installed locales");
int32_t count = ures_getSize(installedLocales.getAlias());
if (VERBOSE) {
u_printf("Locales: %d\n", count);
}
if (bf.isValid()) {
// write the HEADER
u_fprintf(bf.getAlias(),
"// Warning this file is automatically generated\n"
"// Updated by %s based on %s:%s.txt\n",
PROG,
packageName.data(),
locale);
u_fprintf(bf.getAlias(),
"%s:table(nofallback) {\n"
" // First, everything besides InstalledLocales:\n",
locale);
if (dumpAllButInstalledLocales(0, bund, bf, status)) {
u_printf("Error dumping prolog for %s\n", toBundle);
return 1;
}
ASSERT_SUCCESS("while writing prolog"); // in case an error was missed
u_fprintf(bf.getAlias(),
" %s:table { // %d locales in input %s.res\n",
INSTALLEDLOCALES,
count,
locale);
}
// OK, now list them.
LocalUResourceBundlePointer subkey;
int validCount = 0;
for (int32_t i = 0; i < count; i++) {
subkey.adoptInstead(ures_getByIndex(
installedLocales.getAlias(), i, subkey.orphan(), &status));
ASSERT_SUCCESS("while fetching an installed locale's name");
const char* key = ures_getKey(subkey.getAlias());
if (VERBOSE > 1) {
u_printf("@%d: %s\n", i, key);
}
// now, see if the locale is installed..
UBool exists;
if (localeExists(key, &exists)) {
return 1; // get out.
}
if (exists) {
validCount++;
u_printf("%s\n", key);
if (bf.isValid()) {
u_fprintf(bf.getAlias(), " %s {\"\"}\n", key);
}
} else {
if (bf.isValid()) {
u_fprintf(bf.getAlias(), "// %s {\"\"}\n", key);
}
if (VERBOSE) {
u_printf("#%s\n", key); // verbosity one - '' vs '#'
}
}
}
if (bf.isValid()) {
u_fprintf(bf.getAlias(), " } // %d/%d valid\n", validCount, count);
// write the HEADER
u_fprintf(bf.getAlias(), "}\n");
}
return 0;
}
int main(int argc, const char* argv[]) {
PROG = argv[0];
for (int i = 1; i < argc; i++) {
const char* arg = argv[i];
int argsLeft = argc - i - 1; /* how many remain? */
if (!strcmp(arg, "-v")) {
VERBOSE++;
} else if (!strcmp(arg, "-i") && (argsLeft >= 1)) {
if (i != 1) {
u_printf("ERROR: -i must be the first argument given.\n");
usage();
return 1;
}
const char* dir = argv[++i];
u_setDataDirectory(dir);
if (VERBOSE) {
u_printf("ICUDATA is now %s\n", dir);
}
} else if (!strcmp(arg, "-T") && (argsLeft >= 1)) {
TREE = argv[++i];
if (VERBOSE) {
u_printf("TREE is now %s\n", TREE);
}
} else if (!strcmp(arg, "-N") && (argsLeft >= 1)) {
NAME = argv[++i];
if (VERBOSE) {
u_printf("NAME is now %s\n", NAME);
}
} else if (!strcmp(arg, "-?") || !strcmp(arg, "-h")) {
usage();
return 0;
} else if (!strcmp(arg, "-l")) {
if (list(NULL)) {
return 1;
}
} else if (!strcmp(arg, "-b") && (argsLeft >= 1)) {
if (list(argv[++i])) {
return 1;
}
} else {
u_printf("Unknown or malformed option: %s\n", arg);
usage();
return 1;
}
}
}
// Local Variables:
// compile-command: "icurun iculslocs.cpp"
// End: