buffer: strip high bits when converting to ascii

Consider the following example:

  console.log(Buffer('ú').toString('ascii'));

Before this commit, the contents of the buffer was used as-is and hence it
prints 'ú'.

Now, it prints 'C:'. Perhaps not much of an improvement but it conforms to what
the documentation says it does: strip off the high bits.

Fixes #4371.
pull/24507/merge
Ben Noordhuis 2012-12-06 05:13:14 +01:00 committed by isaacs
parent 632b7d8750
commit 96a314b68b
3 changed files with 57 additions and 5 deletions

View File

@ -19,9 +19,10 @@ encoding method. Here are the different string encodings.
* `'ascii'` - for 7 bit ASCII data only. This encoding method is very fast, and
will strip the high bit if set.
Note that this encoding converts a null character (`'\0'` or `'\u0000'`) into
`0x20` (character code of a space). If you want to convert a null character
into `0x00`, you should use `'utf8'`.
Note that when converting from string to buffer, this encoding converts a null
character (`'\0'` or `'\u0000'`) into `0x20` (character code of a space). If
you want to convert a null character into `0x00`, you should use `'utf8'`.
* `'utf8'` - Multibyte encoded Unicode characters. Many web pages and other
document formats use UTF-8.

View File

@ -247,15 +247,38 @@ Handle<Value> Buffer::BinarySlice(const Arguments &args) {
}
static bool contains_non_ascii(const char* buf, size_t len) {
for (size_t i = 0; i < len; ++i) {
if (buf[i] & 0x80) return true;
}
return false;
}
static void force_ascii(const char* src, char* dst, size_t len) {
for (size_t i = 0; i < len; ++i) {
dst[i] = src[i] & 0x7f;
}
}
Handle<Value> Buffer::AsciiSlice(const Arguments &args) {
HandleScope scope;
Buffer *parent = ObjectWrap::Unwrap<Buffer>(args.This());
SLICE_ARGS(args[0], args[1])
char* data = parent->data_ + start;
Local<String> string = String::New(data, end - start);
size_t len = end - start;
return scope.Close(string);
if (contains_non_ascii(data, len)) {
char* out = new char[len];
force_ascii(data, out, len);
Local<String> rc = String::New(out, len);
delete[] out;
return scope.Close(rc);
}
return scope.Close(String::New(data, len));
}
@ -268,6 +291,7 @@ Handle<Value> Buffer::Utf8Slice(const Arguments &args) {
return scope.Close(string);
}
Handle<Value> Buffer::Ucs2Slice(const Arguments &args) {
HandleScope scope;
Buffer *parent = ObjectWrap::Unwrap<Buffer>(args.This());

View File

@ -0,0 +1,27 @@
// Copyright Joyent, Inc. and other Node contributors.
//
// Permission is hereby granted, free of charge, to any person obtaining a
// copy of this software and associated documentation files (the
// "Software"), to deal in the Software without restriction, including
// without limitation the rights to use, copy, modify, merge, publish,
// distribute, sublicense, and/or sell copies of the Software, and to permit
// persons to whom the Software is furnished to do so, subject to the
// following conditions:
//
// The above copyright notice and this permission notice shall be included
// in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
// NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
// USE OR OTHER DEALINGS IN THE SOFTWARE.
var common = require('../common');
var assert = require('assert');
// ASCII conversion in node.js simply masks off the high bits,
// it doesn't do transliteration.
assert.equal(Buffer('hérité').toString('ascii'), 'hC)ritC)');