readline: handle wide characters properly

Handle wide characters (such as あ, 谢, 고) as two column wide to make
cursor move properly.

Closes #555.
Closes #4994.
pull/24507/merge
Nao Iizuka 2013-03-15 16:18:30 -10:00 committed by Nathan Rajlich
parent c0721bcd66
commit 94284e7d2e
3 changed files with 164 additions and 17 deletions

View File

@ -86,7 +86,7 @@ a `"resize"` event on the `output` if/when the columns ever change
The class that represents a readline interface with an input and output
stream.
### rl.setPrompt(prompt, length)
### rl.setPrompt(prompt)
Sets the prompt, for example when you run `node` on the command line, you see
`> `, which is node's prompt.

View File

@ -148,15 +148,8 @@ Interface.prototype.__defineGetter__('columns', function() {
return this.output.columns || Infinity;
});
Interface.prototype.setPrompt = function(prompt, length) {
Interface.prototype.setPrompt = function(prompt) {
this._prompt = prompt;
if (length) {
this._promptLength = length;
} else {
var lines = prompt.split(/[\r\n]/);
var lastLine = lines[lines.length - 1];
this._promptLength = lastLine.length;
}
};
@ -224,9 +217,9 @@ Interface.prototype._refreshLine = function() {
// line length
var line = this._prompt + this.line;
var lineLength = line.length;
var lineCols = lineLength % columns;
var lineRows = (lineLength - lineCols) / columns;
var dispPos = this._getDisplayPos(line);
var lineCols = dispPos.cols;
var lineRows = dispPos.rows;
// cursor position
var cursorPos = this._getCursorPos();
@ -559,12 +552,46 @@ Interface.prototype._historyPrev = function() {
};
// Returns the last character's display position of the given string
Interface.prototype._getDisplayPos = function(str) {
var offset = 0;
var col = this.columns;
var code;
for (var i = 0, len = str.length; i < len; i++) {
code = codePointAt(str, i);
if (code >= 0x10000) { // surrogates
i++;
}
if (isFullWidthCodePoint(code)) {
if ((offset + 1) % col === 0) {
offset++;
}
offset += 2;
} else {
offset++;
}
}
var cols = offset % col;
var rows = (offset - cols) / col;
return {cols: cols, rows: rows};
};
// Returns current cursor's position and line
Interface.prototype._getCursorPos = function() {
var columns = this.columns;
var cursorPos = this.cursor + this._promptLength;
var cols = cursorPos % columns;
var rows = (cursorPos - cols) / columns;
var strBeforeCursor = this._prompt + this.line.substring(0, this.cursor);
var dispPos = this._getDisplayPos(strBeforeCursor);
var cols = dispPos.cols;
var rows = dispPos.rows;
// If the cursor is on a full-width character which steps over the line,
// move the cursor to the beginning of the next line.
if (cols + 1 === columns &&
this.cursor < this.line.length &&
isFullWidthCodePoint(codePointAt(this.line, this.cursor))) {
rows++;
cols = 0;
}
return {cols: cols, rows: rows};
};
@ -578,13 +605,24 @@ Interface.prototype._moveCursor = function(dx) {
// bounds check
if (this.cursor < 0) this.cursor = 0;
if (this.cursor > this.line.length) this.cursor = this.line.length;
else if (this.cursor > this.line.length) this.cursor = this.line.length;
var newPos = this._getCursorPos();
// check if cursors are in the same line
if (oldPos.rows === newPos.rows) {
exports.moveCursor(this.output, this.cursor - oldcursor, 0);
var diffCursor = this.cursor - oldcursor;
var diffWidth;
if (diffCursor < 0) {
diffWidth = -getStringWidth(
this.line.substring(this.cursor, oldcursor)
);
} else if (diffCursor > 0) {
diffWidth = getStringWidth(
this.line.substring(this.cursor, oldcursor)
);
}
exports.moveCursor(this.output, diffWidth, 0);
this.prevRows = newPos.rows;
} else {
this._refreshLine();
@ -1161,3 +1199,93 @@ function clearScreenDown(stream) {
stream.write('\x1b[0J');
}
exports.clearScreenDown = clearScreenDown;
/**
* Returns the number of columns required to display the given string.
*/
function getStringWidth(str) {
var width = 0;
for (var i = 0, len = str.length; i < len; i++) {
var code = codePointAt(str, i);
if (code >= 0x10000) { // surrogates
i++;
}
if (isFullWidthCodePoint(code)) {
width += 2;
} else {
width++;
}
}
return width;
}
exports.getStringWidth = getStringWidth;
/**
* Returns true if the character represented by a given
* Unicode code point is full-width. Otherwise returns false.
*/
function isFullWidthCodePoint(code) {
if (isNaN(code)) {
return false;
}
// Code points are derived from:
// http://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt
if (code >= 0x1100 && (
code <= 0x115f || // Hangul Jamo
0x2329 === code || // LEFT-POINTING ANGLE BRACKET
0x232a === code || // RIGHT-POINTING ANGLE BRACKET
// CJK Radicals Supplement .. Enclosed CJK Letters and Months
(0x2e80 <= code && code <= 0x3247 && code !== 0x303f) ||
// Enclosed CJK Letters and Months .. CJK Unified Ideographs Extension A
0x3250 <= code && code <= 0x4dbf ||
// CJK Unified Ideographs .. Yi Radicals
0x4e00 <= code && code <= 0xa4c6 ||
// Hangul Jamo Extended-A
0xa960 <= code && code <= 0xa97c ||
// Hangul Syllables
0xac00 <= code && code <= 0xd7a3 ||
// CJK Compatibility Ideographs
0xf900 <= code && code <= 0xfaff ||
// Vertical Forms
0xfe10 <= code && code <= 0xfe19 ||
// CJK Compatibility Forms .. Small Form Variants
0xfe30 <= code && code <= 0xfe6b ||
// Halfwidth and Fullwidth Forms
0xff01 <= code && code <= 0xff60 ||
0xffe0 <= code && code <= 0xffe6 ||
// Kana Supplement
0x1b000 <= code && code <= 0x1b001 ||
// Enclosed Ideographic Supplement
0x1f200 <= code && code <= 0x1f251 ||
// CJK Unified Ideographs Extension B .. Tertiary Ideographic Plane
0x20000 <= code && code <= 0x3fffd)) {
return true;
}
return false;
}
exports.isFullWidthCodePoint = isFullWidthCodePoint;
/**
* Returns the Unicode code point for the character at the
* given index in the given string. Similar to String.charCodeAt(),
* but this function handles surrogates (code point >= 0x10000).
*/
function codePointAt(str, index) {
var code = str.charCodeAt(index);
var low;
if (0xd800 <= code && code <= 0xdbff) { // High surrogate
low = str.charCodeAt(index + 1);
if (!isNaN(low)) {
code = 0x10000 + (code - 0xd800) * 0x400 + (low - 0xdc00);
}
}
return code;
}
exports.codePointAt = codePointAt;

View File

@ -173,6 +173,25 @@ FakeInput.prototype.end = function() {};
assert.equal(callCount, 1);
rli.close();
// wide characters should be treated as two columns.
assert.equal(readline.isFullWidthCodePoint('a'.charCodeAt(0)), false);
assert.equal(readline.isFullWidthCodePoint('あ'.charCodeAt(0)), true);
assert.equal(readline.isFullWidthCodePoint('谢'.charCodeAt(0)), true);
assert.equal(readline.isFullWidthCodePoint('고'.charCodeAt(0)), true);
assert.equal(readline.isFullWidthCodePoint(0x1f251), true); // surrogate
assert.equal(readline.codePointAt('ABC', 0), 0x41);
assert.equal(readline.codePointAt('あいう', 1), 0x3044);
assert.equal(readline.codePointAt('\ud800\udc00', 0), // surrogate
0x10000);
assert.equal(readline.codePointAt('\ud800\udc00A', 2), // surrogate
0x41);
assert.equal(readline.getStringWidth('abcde'), 5);
assert.equal(readline.getStringWidth('古池や'), 6);
assert.equal(readline.getStringWidth('ノード.js'), 9);
assert.equal(readline.getStringWidth('你好'), 4);
assert.equal(readline.getStringWidth('안녕하세요'), 10);
assert.equal(readline.getStringWidth('A\ud83c\ude00BC'), 5); // surrogate
assert.deepEqual(fi.listeners('end'), []);
assert.deepEqual(fi.listeners(terminal ? 'keypress' : 'data'), []);
});