diff --git a/doc/api/buffer.markdown b/doc/api/buffer.markdown index fb118a954b4..fa52ae48c3c 100644 --- a/doc/api/buffer.markdown +++ b/doc/api/buffer.markdown @@ -19,9 +19,10 @@ encoding method. Here are the different string encodings. * `'ascii'` - for 7 bit ASCII data only. This encoding method is very fast, and will strip the high bit if set. - Note that this encoding converts a null character (`'\0'` or `'\u0000'`) into - `0x20` (character code of a space). If you want to convert a null character - into `0x00`, you should use `'utf8'`. + + Note that when converting from string to buffer, this encoding converts a null + character (`'\0'` or `'\u0000'`) into `0x20` (character code of a space). If + you want to convert a null character into `0x00`, you should use `'utf8'`. * `'utf8'` - Multibyte encoded Unicode characters. Many web pages and other document formats use UTF-8. diff --git a/src/node_buffer.cc b/src/node_buffer.cc index aeedf310485..af53c98efbd 100644 --- a/src/node_buffer.cc +++ b/src/node_buffer.cc @@ -247,15 +247,38 @@ Handle Buffer::BinarySlice(const Arguments &args) { } +static bool contains_non_ascii(const char* buf, size_t len) { + for (size_t i = 0; i < len; ++i) { + if (buf[i] & 0x80) return true; + } + return false; +} + + +static void force_ascii(const char* src, char* dst, size_t len) { + for (size_t i = 0; i < len; ++i) { + dst[i] = src[i] & 0x7f; + } +} + + Handle Buffer::AsciiSlice(const Arguments &args) { HandleScope scope; Buffer *parent = ObjectWrap::Unwrap(args.This()); SLICE_ARGS(args[0], args[1]) char* data = parent->data_ + start; - Local string = String::New(data, end - start); + size_t len = end - start; - return scope.Close(string); + if (contains_non_ascii(data, len)) { + char* out = new char[len]; + force_ascii(data, out, len); + Local rc = String::New(out, len); + delete[] out; + return scope.Close(rc); + } + + return scope.Close(String::New(data, len)); } @@ -268,6 +291,7 @@ Handle Buffer::Utf8Slice(const Arguments &args) { return scope.Close(string); } + Handle Buffer::Ucs2Slice(const Arguments &args) { HandleScope scope; Buffer *parent = ObjectWrap::Unwrap(args.This()); diff --git a/test/simple/test-buffer-ascii.js b/test/simple/test-buffer-ascii.js new file mode 100644 index 00000000000..a741a3db1e5 --- /dev/null +++ b/test/simple/test-buffer-ascii.js @@ -0,0 +1,27 @@ +// Copyright Joyent, Inc. and other Node contributors. +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the +// "Software"), to deal in the Software without restriction, including +// without limitation the rights to use, copy, modify, merge, publish, +// distribute, sublicense, and/or sell copies of the Software, and to permit +// persons to whom the Software is furnished to do so, subject to the +// following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN +// NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +// USE OR OTHER DEALINGS IN THE SOFTWARE. + +var common = require('../common'); +var assert = require('assert'); + +// ASCII conversion in node.js simply masks off the high bits, +// it doesn't do transliteration. +assert.equal(Buffer('hérité').toString('ascii'), 'hC)ritC)');