node/tools/license2rtf.js

306 lines
7.6 KiB
JavaScript

'use strict';
const assert = require('assert');
const Stream = require('stream');
/*
* This filter consumes a stream of characters and emits one string per line.
*/
class LineSplitter extends Stream {
constructor() {
super();
this.buffer = '';
this.writable = true;
}
write(data) {
const lines = (this.buffer + data).split(/\r\n|\n\r|\n|\r/);
for (let i = 0; i < lines.length - 1; i++) {
this.emit('data', lines[i]);
}
this.buffer = lines[lines.length - 1];
return true;
}
end(data) {
this.write(data || '');
if (this.buffer) {
this.emit('data', this.buffer);
}
this.emit('end');
}
}
/*
* This filter consumes lines and emits paragraph objects.
*/
class ParagraphParser extends Stream {
constructor() {
super();
this.blockIsLicenseBlock = false;
this.writable = true;
this.resetBlock(false);
}
write(data) {
this.parseLine(data + '');
return true;
}
end(data) {
if (data)
this.parseLine(data + '');
this.flushParagraph();
this.emit('end');
}
resetParagraph() {
this.paragraphLineIndent = -1;
this.paragraph = {
li: '',
inLicenseBlock: this.blockIsLicenseBlock,
lines: []
};
}
resetBlock(isLicenseBlock) {
this.blockIsLicenseBlock = isLicenseBlock;
this.blockHasCStyleComment = false;
this.resetParagraph();
}
flushParagraph() {
if (this.paragraph.lines.length || this.paragraph.li) {
this.emit('data', this.paragraph);
}
this.resetParagraph();
}
parseLine(line) {
// Strip trailing whitespace
line = line.trimRight();
// Detect block separator
if (/^\s*(=|"){3,}\s*$/.test(line)) {
this.flushParagraph();
this.resetBlock(!this.blockIsLicenseBlock);
return;
}
// Strip comments around block
if (this.blockIsLicenseBlock) {
if (!this.blockHasCStyleComment)
this.blockHasCStyleComment = /^\s*(\/\*)/.test(line);
if (this.blockHasCStyleComment) {
const prev = line;
line = line.replace(/^(\s*?)(?:\s?\*\/|\/\*\s|\s\*\s?)/, '$1');
if (prev === line)
line = line.replace(/^\s{2}/, '');
if (/\*\//.test(prev))
this.blockHasCStyleComment = false;
} else {
// Strip C++ and perl style comments.
line = line.replace(/^(\s*)(?:\/\/\s?|#\s?)/, '$1');
}
}
// Detect blank line (paragraph separator)
if (!/\S/.test(line)) {
this.flushParagraph();
return;
}
// Detect separator "lines" within a block. These mark a paragraph break
// and are stripped from the output.
if (/^\s*[=*-]{5,}\s*$/.test(line)) {
this.flushParagraph();
return;
}
// Find out indentation level and the start of a lied or numbered list;
const result = /^(\s*)(\d+\.|\*|-)?\s*/.exec(line);
assert.ok(result);
// The number of characters that will be stripped from the beginning of
// the line.
const lineStripLength = result[0].length;
// The indentation size that will be used to detect indentation jumps.
// Fudge by 1 space.
const lineIndent = Math.floor(lineStripLength / 2) * 2;
// The indentation level that will be exported
const level = Math.floor(result[1].length / 2);
// The list indicator that precedes the actual content, if any.
const lineLi = result[2];
// Flush the paragraph when there is a li or an indentation jump
if (lineLi || (lineIndent !== this.paragraphLineIndent &&
this.paragraphLineIndent !== -1)) {
this.flushParagraph();
this.paragraph.li = lineLi;
}
// Set the paragraph indent that we use to detect indentation jumps. When
// we just detected a list indicator, wait
// for the next line to arrive before setting this.
if (!lineLi && this.paragraphLineIndent !== -1) {
this.paragraphLineIndent = lineIndent;
}
// Set the output indent level if it has not been set yet.
if (this.paragraph.level === undefined)
this.paragraph.level = level;
// Strip leading whitespace and li.
line = line.slice(lineStripLength);
if (line)
this.paragraph.lines.push(line);
}
}
/*
* This filter consumes paragraph objects and emits modified paragraph objects.
* The lines within the paragraph are unwrapped where appropriate. It also
* replaces multiple consecutive whitespace characters by a single one.
*/
class Unwrapper extends Stream {
constructor() {
super();
this.writable = true;
}
write(paragraph) {
const lines = paragraph.lines;
const breakAfter = [];
let i;
for (i = 0; i < lines.length - 1; i++) {
const line = lines[i];
// When a line is really short, the line was probably kept separate for a
// reason.
if (line.length < 50) {
// If the first word on the next line really didn't fit after the line,
// it probably was just ordinary wrapping after all.
const nextFirstWordLength = lines[i + 1].replace(/\s.*$/, '').length;
if (line.length + nextFirstWordLength < 60) {
breakAfter[i] = true;
}
}
}
for (i = 0; i < lines.length - 1;) {
if (!breakAfter[i]) {
lines[i] += ` ${lines.splice(i + 1, 1)[0]}`;
} else {
i++;
}
}
for (i = 0; i < lines.length; i++) {
// Replace multiple whitespace characters by a single one, and strip
// trailing whitespace.
lines[i] = lines[i].replace(/\s+/g, ' ').replace(/\s+$/, '');
}
this.emit('data', paragraph);
}
end(data) {
if (data)
this.write(data);
this.emit('end');
}
}
function rtfEscape(string) {
function toHex(number, length) {
return (~~number).toString(16).padStart(length, '0');
}
return string
.replace(/[\\{}]/g, (m) => `\\${m}`)
.replace(/\t/g, () => '\\tab ')
// eslint-disable-next-line no-control-regex
.replace(/[\x00-\x1f\x7f-\xff]/g, (m) => `\\'${toHex(m.charCodeAt(0), 2)}`)
.replace(/\ufeff/g, '')
.replace(/[\u0100-\uffff]/g, (m) => `\\u${toHex(m.charCodeAt(0), 4)}?`);
}
/*
* This filter generates an rtf document from a stream of paragraph objects.
*/
class RtfGenerator extends Stream {
constructor() {
super();
this.didWriteAnything = false;
this.writable = true;
}
write({ li, level, lines, inLicenseBlock: lic }) {
if (!this.didWriteAnything) {
this.emitHeader();
this.didWriteAnything = true;
}
if (li)
level++;
let rtf = '\\pard\\sa150\\sl300\\slmult1';
if (level > 0)
rtf += `\\li${level * 240}`;
if (li)
rtf += `\\tx${level * 240}\\fi-240`;
if (lic)
rtf += '\\ri240';
if (!lic)
rtf += '\\b';
if (li)
rtf += ` ${li}\\tab`;
rtf += ` ${lines.map(rtfEscape).join('\\line ')}`;
if (!lic)
rtf += '\\b0';
rtf += '\\par\n';
this.emit('data', rtf);
}
end(data) {
if (data)
this.write(data);
if (this.didWriteAnything)
this.emitFooter();
this.emit('end');
}
emitHeader() {
this.emit('data', '{\\rtf1\\ansi\\ansicpg1252\\uc1\\deff0\\deflang1033' +
'{\\fonttbl{\\f0\\fswiss\\fcharset0 Tahoma;}}\\fs20\n' +
'{\\*\\generator txt2rtf 0.0.1;}\n');
}
emitFooter() {
this.emit('data', '}');
}
}
const stdin = process.stdin;
const stdout = process.stdout;
const lineSplitter = new LineSplitter();
const paragraphParser = new ParagraphParser();
const unwrapper = new Unwrapper();
const rtfGenerator = new RtfGenerator();
stdin.setEncoding('utf-8');
stdin.resume();
stdin.pipe(lineSplitter);
lineSplitter.pipe(paragraphParser);
paragraphParser.pipe(unwrapper);
unwrapper.pipe(rtfGenerator);
rtfGenerator.pipe(stdout);