2012-04-05 00:06:00 +08:00
|
|
|
|
|
|
|
var assert = require('assert'),
|
|
|
|
Stream = require('stream'),
|
|
|
|
inherits = require('util').inherits;
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This filter consumes a stream of characters and emits one string per line.
|
|
|
|
*/
|
|
|
|
function LineSplitter() {
|
|
|
|
var self = this,
|
|
|
|
buffer = "";
|
|
|
|
|
|
|
|
Stream.call(this);
|
|
|
|
this.writable = true;
|
|
|
|
|
|
|
|
this.write = function(data) {
|
|
|
|
var lines = (buffer + data).split(/\r\n|\n\r|\n|\r/);
|
|
|
|
for (var i = 0; i < lines.length - 1; i++) {
|
|
|
|
self.emit('data', lines[i]);
|
|
|
|
}
|
|
|
|
buffer = lines[lines.length - 1];
|
|
|
|
return true;
|
|
|
|
};
|
|
|
|
|
|
|
|
this.end = function(data) {
|
|
|
|
this.write(data || '');
|
|
|
|
if (buffer) {
|
|
|
|
self.emit('data', buffer);
|
|
|
|
}
|
|
|
|
self.emit('end');
|
|
|
|
};
|
|
|
|
}
|
|
|
|
inherits(LineSplitter, Stream);
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This filter consumes lines and emits paragraph objects.
|
|
|
|
*/
|
|
|
|
function ParagraphParser() {
|
|
|
|
var self = this,
|
|
|
|
block_is_license_block = false,
|
|
|
|
block_has_c_style_comment,
|
|
|
|
is_first_line_in_paragraph,
|
|
|
|
paragraph_line_indent,
|
|
|
|
paragraph;
|
|
|
|
|
|
|
|
Stream.call(this);
|
|
|
|
this.writable = true;
|
|
|
|
|
|
|
|
resetBlock(false);
|
|
|
|
|
|
|
|
this.write = function(data) {
|
|
|
|
parseLine(data + '');
|
|
|
|
return true;
|
|
|
|
};
|
|
|
|
|
|
|
|
this.end = function(data) {
|
|
|
|
if (data) {
|
|
|
|
parseLine(data + '');
|
|
|
|
}
|
|
|
|
flushParagraph();
|
|
|
|
self.emit('end');
|
|
|
|
};
|
|
|
|
|
|
|
|
function resetParagraph() {
|
|
|
|
is_first_line_in_paragraph = true;
|
|
|
|
paragraph_line_indent = -1;
|
|
|
|
|
|
|
|
paragraph = {
|
|
|
|
li: '',
|
|
|
|
in_license_block: block_is_license_block,
|
|
|
|
lines: []
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
function resetBlock(is_license_block) {
|
|
|
|
block_is_license_block = is_license_block;
|
|
|
|
block_has_c_style_comment = false;
|
|
|
|
resetParagraph();
|
|
|
|
}
|
|
|
|
|
|
|
|
function flushParagraph() {
|
|
|
|
if (paragraph.lines.length || paragraph.li) {
|
|
|
|
self.emit('data', paragraph);
|
|
|
|
}
|
|
|
|
resetParagraph();
|
|
|
|
}
|
|
|
|
|
|
|
|
function parseLine(line) {
|
|
|
|
// Strip trailing whitespace
|
|
|
|
line = line.replace(/\s*$/, '');
|
|
|
|
|
|
|
|
// Detect block separator
|
|
|
|
if (/^\s*(=|"){3,}\s*$/.test(line)) {
|
|
|
|
flushParagraph();
|
|
|
|
resetBlock(!block_is_license_block);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Strip comments around block
|
|
|
|
if (block_is_license_block) {
|
|
|
|
if (!block_has_c_style_comment)
|
|
|
|
block_has_c_style_comment = /^\s*(\/\*)/.test(line);
|
|
|
|
if (block_has_c_style_comment) {
|
|
|
|
var prev = line;
|
|
|
|
line = line.replace(/^(\s*?)(?:\s?\*\/|\/\*\s|\s\*\s?)/, '$1');
|
|
|
|
if (prev == line)
|
|
|
|
line = line.replace(/^\s{2}/, '');
|
|
|
|
if (/\*\//.test(prev))
|
|
|
|
block_has_c_style_comment = false;
|
|
|
|
} else {
|
|
|
|
// Strip C++ and perl style comments.
|
|
|
|
line = line.replace(/^(\s*)(?:\/\/\s?|#\s?)/, '$1');
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Detect blank line (paragraph separator)
|
|
|
|
if (!/\S/.test(line)) {
|
|
|
|
flushParagraph();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Detect separator "lines" within a block. These mark a paragraph break
|
|
|
|
// and are stripped from the output.
|
|
|
|
if (/^\s*[=*\-]{5,}\s*$/.test(line)) {
|
|
|
|
flushParagraph();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Find out indentation level and the start of a lied or numbered list;
|
|
|
|
var result = /^(\s*)(\d+\.|\*|-)?\s*/.exec(line);
|
|
|
|
assert.ok(result);
|
|
|
|
// The number of characters that will be stripped from the beginning of
|
|
|
|
// the line.
|
|
|
|
var line_strip_length = result[0].length;
|
|
|
|
// The indentation size that will be used to detect indentation jumps.
|
|
|
|
// Fudge by 1 space.
|
|
|
|
var line_indent = Math.floor(result[0].length / 2) * 2;
|
|
|
|
// The indentation level that will be exported
|
|
|
|
var level = Math.floor(result[1].length / 2);
|
|
|
|
// The list indicator that precedes the actual content, if any.
|
|
|
|
var line_li = result[2];
|
|
|
|
|
|
|
|
// Flush the paragraph when there is a li or an indentation jump
|
|
|
|
if (line_li || (line_indent != paragraph_line_indent &&
|
|
|
|
paragraph_line_indent != -1)) {
|
|
|
|
flushParagraph();
|
|
|
|
paragraph.li = line_li;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Set the paragraph indent that we use to detect indentation jumps. When
|
|
|
|
// we just detected a list indicator, wait
|
|
|
|
// for the next line to arrive before setting this.
|
|
|
|
if (!line_li && paragraph_line_indent != -1) {
|
|
|
|
paragraph_line_indent = line_indent;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Set the output indent level if it has not been set yet.
|
|
|
|
if (paragraph.level === undefined)
|
|
|
|
paragraph.level = level;
|
|
|
|
|
|
|
|
// Strip leading whitespace and li.
|
|
|
|
line = line.slice(line_strip_length);
|
|
|
|
|
|
|
|
if (line)
|
|
|
|
paragraph.lines.push(line);
|
|
|
|
|
|
|
|
is_first_line_in_paragraph = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
inherits(ParagraphParser, Stream);
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This filter consumes paragraph objects and emits modified paragraph objects.
|
2012-06-15 00:34:38 +08:00
|
|
|
* The lines within the paragraph are unwrapped where appropriate. It also
|
|
|
|
* replaces multiple consecutive whitespace characters by a single one.
|
2012-04-05 00:06:00 +08:00
|
|
|
*/
|
|
|
|
function Unwrapper() {
|
|
|
|
var self = this;
|
|
|
|
|
|
|
|
Stream.call(this);
|
|
|
|
this.writable = true;
|
|
|
|
|
|
|
|
this.write = function(paragraph) {
|
|
|
|
var lines = paragraph.lines,
|
|
|
|
break_after = [],
|
|
|
|
i;
|
|
|
|
|
|
|
|
for (i = 0; i < lines.length - 1; i++) {
|
|
|
|
var line = lines[i];
|
|
|
|
|
|
|
|
// When a line is really short, the line was probably kept separate for a
|
|
|
|
// reason.
|
|
|
|
if (line.length < 50) {
|
|
|
|
// If the first word on the next line really didn't fit after the line,
|
|
|
|
// it probably was just ordinary wrapping after all.
|
|
|
|
var next_first_word_length = lines[i + 1].replace(/\s.*$/, '').length;
|
|
|
|
if (line.length + next_first_word_length < 60) {
|
|
|
|
break_after[i] = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < lines.length - 1; ) {
|
|
|
|
if (!break_after[i]) {
|
|
|
|
lines[i] += ' ' + lines.splice(i + 1, 1)[0];
|
|
|
|
} else {
|
|
|
|
i++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-06-15 00:34:38 +08:00
|
|
|
for (i = 0; i < lines.length; i++) {
|
|
|
|
// Replace multiple whitespace characters by a single one, and strip
|
|
|
|
// trailing whitespace.
|
|
|
|
lines[i] = lines[i].replace(/\s+/g, ' ').replace(/\s+$/, '');
|
|
|
|
}
|
|
|
|
|
2012-04-05 00:06:00 +08:00
|
|
|
self.emit('data', paragraph);
|
|
|
|
};
|
|
|
|
|
|
|
|
this.end = function(data) {
|
|
|
|
if (data)
|
|
|
|
self.write(data);
|
|
|
|
self.emit('end');
|
|
|
|
};
|
|
|
|
}
|
|
|
|
inherits(Unwrapper, Stream);
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This filter generates an rtf document from a stream of paragraph objects.
|
|
|
|
*/
|
|
|
|
function RtfGenerator() {
|
|
|
|
var self = this,
|
|
|
|
did_write_anything = false;
|
|
|
|
|
|
|
|
Stream.call(this);
|
|
|
|
this.writable = true;
|
|
|
|
|
|
|
|
this.write = function(paragraph) {
|
|
|
|
if (!did_write_anything) {
|
|
|
|
emitHeader();
|
|
|
|
did_write_anything = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
var li = paragraph.li,
|
|
|
|
level = paragraph.level + (li ? 1 : 0),
|
|
|
|
lic = paragraph.in_license_block;
|
|
|
|
|
|
|
|
var rtf = "\\pard";
|
|
|
|
rtf += '\\sa150\\sl300\\slmult1';
|
|
|
|
if (level > 0)
|
|
|
|
rtf += '\\li' + (level * 240);
|
|
|
|
if (li) {
|
|
|
|
rtf += '\\tx' + (level) * 240;
|
|
|
|
rtf += '\\fi-240';
|
|
|
|
}
|
|
|
|
if (lic)
|
|
|
|
rtf += '\\ri240';
|
|
|
|
if (!lic)
|
|
|
|
rtf += '\\b';
|
|
|
|
if (li)
|
|
|
|
rtf += ' ' + li + '\\tab';
|
|
|
|
rtf += ' ';
|
|
|
|
rtf += paragraph.lines.map(rtfEscape).join('\\line ');
|
|
|
|
if (!lic)
|
|
|
|
rtf += '\\b0';
|
|
|
|
rtf += '\\par\n';
|
|
|
|
|
|
|
|
self.emit('data', rtf);
|
|
|
|
};
|
|
|
|
|
|
|
|
this.end = function(data) {
|
|
|
|
if (data)
|
|
|
|
self.write(data);
|
|
|
|
if (did_write_anything)
|
|
|
|
emitFooter();
|
|
|
|
self.emit('end');
|
|
|
|
};
|
|
|
|
|
|
|
|
function toHex(number, length) {
|
|
|
|
var hex = (~~number).toString(16);
|
|
|
|
while (hex.length < length)
|
|
|
|
hex = '0' + hex;
|
|
|
|
return hex;
|
|
|
|
}
|
|
|
|
|
|
|
|
function rtfEscape(string) {
|
|
|
|
return string
|
|
|
|
.replace(/[\\\{\}]/g, function(m) {
|
|
|
|
return '\\' + m;
|
|
|
|
})
|
|
|
|
.replace(/\t/g, function() {
|
|
|
|
return '\\tab ';
|
|
|
|
})
|
|
|
|
.replace(/[\x00-\x1f\x7f-\xff]/g, function(m) {
|
|
|
|
return '\\\'' + toHex(m.charCodeAt(0), 2);
|
|
|
|
})
|
|
|
|
.replace(/\ufeff/g, '')
|
|
|
|
.replace(/[\u0100-\uffff]/g, function(m) {
|
|
|
|
return '\\u' + toHex(m.charCodeAt(0), 4) + '?';
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
function emitHeader() {
|
|
|
|
self.emit('data', '{\\rtf1\\ansi\\ansicpg1252\\uc1\\deff0\\deflang1033' +
|
|
|
|
'{\\fonttbl{\\f0\\fswiss\\fcharset0 Tahoma;}}\\fs20\n' +
|
|
|
|
'{\\*\\generator txt2rtf 0.0.1;}\n');
|
|
|
|
}
|
|
|
|
|
|
|
|
function emitFooter() {
|
|
|
|
self.emit('data', '}');
|
|
|
|
}
|
|
|
|
}
|
|
|
|
inherits(RtfGenerator, Stream);
|
|
|
|
|
|
|
|
|
|
|
|
var stdin = process.stdin,
|
|
|
|
stdout = process.stdout,
|
|
|
|
line_splitter = new LineSplitter(),
|
|
|
|
paragraph_parser = new ParagraphParser(),
|
|
|
|
unwrapper = new Unwrapper(),
|
|
|
|
rtf_generator = new RtfGenerator();
|
|
|
|
|
|
|
|
stdin.setEncoding('utf-8');
|
|
|
|
stdin.resume();
|
|
|
|
|
|
|
|
stdin.pipe(line_splitter);
|
|
|
|
line_splitter.pipe(paragraph_parser);
|
|
|
|
paragraph_parser.pipe(unwrapper);
|
|
|
|
unwrapper.pipe(rtf_generator);
|
|
|
|
rtf_generator.pipe(stdout);
|