mirror of https://github.com/nodejs/node.git
buffer: strip high bits when converting to ascii
Consider the following example: console.log(Buffer('ú').toString('ascii')); Before this commit, the contents of the buffer was used as-is and hence it prints 'ú'. Now, it prints 'C:'. Perhaps not much of an improvement but it conforms to what the documentation says it does: strip off the high bits. Fixes #4371.pull/24507/merge
parent
632b7d8750
commit
96a314b68b
|
@ -19,9 +19,10 @@ encoding method. Here are the different string encodings.
|
|||
|
||||
* `'ascii'` - for 7 bit ASCII data only. This encoding method is very fast, and
|
||||
will strip the high bit if set.
|
||||
Note that this encoding converts a null character (`'\0'` or `'\u0000'`) into
|
||||
`0x20` (character code of a space). If you want to convert a null character
|
||||
into `0x00`, you should use `'utf8'`.
|
||||
|
||||
Note that when converting from string to buffer, this encoding converts a null
|
||||
character (`'\0'` or `'\u0000'`) into `0x20` (character code of a space). If
|
||||
you want to convert a null character into `0x00`, you should use `'utf8'`.
|
||||
|
||||
* `'utf8'` - Multibyte encoded Unicode characters. Many web pages and other
|
||||
document formats use UTF-8.
|
||||
|
|
|
@ -247,15 +247,38 @@ Handle<Value> Buffer::BinarySlice(const Arguments &args) {
|
|||
}
|
||||
|
||||
|
||||
static bool contains_non_ascii(const char* buf, size_t len) {
|
||||
for (size_t i = 0; i < len; ++i) {
|
||||
if (buf[i] & 0x80) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
static void force_ascii(const char* src, char* dst, size_t len) {
|
||||
for (size_t i = 0; i < len; ++i) {
|
||||
dst[i] = src[i] & 0x7f;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Handle<Value> Buffer::AsciiSlice(const Arguments &args) {
|
||||
HandleScope scope;
|
||||
Buffer *parent = ObjectWrap::Unwrap<Buffer>(args.This());
|
||||
SLICE_ARGS(args[0], args[1])
|
||||
|
||||
char* data = parent->data_ + start;
|
||||
Local<String> string = String::New(data, end - start);
|
||||
size_t len = end - start;
|
||||
|
||||
return scope.Close(string);
|
||||
if (contains_non_ascii(data, len)) {
|
||||
char* out = new char[len];
|
||||
force_ascii(data, out, len);
|
||||
Local<String> rc = String::New(out, len);
|
||||
delete[] out;
|
||||
return scope.Close(rc);
|
||||
}
|
||||
|
||||
return scope.Close(String::New(data, len));
|
||||
}
|
||||
|
||||
|
||||
|
@ -268,6 +291,7 @@ Handle<Value> Buffer::Utf8Slice(const Arguments &args) {
|
|||
return scope.Close(string);
|
||||
}
|
||||
|
||||
|
||||
Handle<Value> Buffer::Ucs2Slice(const Arguments &args) {
|
||||
HandleScope scope;
|
||||
Buffer *parent = ObjectWrap::Unwrap<Buffer>(args.This());
|
||||
|
|
|
@ -0,0 +1,27 @@
|
|||
// Copyright Joyent, Inc. and other Node contributors.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a
|
||||
// copy of this software and associated documentation files (the
|
||||
// "Software"), to deal in the Software without restriction, including
|
||||
// without limitation the rights to use, copy, modify, merge, publish,
|
||||
// distribute, sublicense, and/or sell copies of the Software, and to permit
|
||||
// persons to whom the Software is furnished to do so, subject to the
|
||||
// following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included
|
||||
// in all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
|
||||
// NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
||||
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
// USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
var common = require('../common');
|
||||
var assert = require('assert');
|
||||
|
||||
// ASCII conversion in node.js simply masks off the high bits,
|
||||
// it doesn't do transliteration.
|
||||
assert.equal(Buffer('hérité').toString('ascii'), 'hC)ritC)');
|
Loading…
Reference in New Issue