From 96a314b68b19277cc71ccac06b6517956b8f8a22 Mon Sep 17 00:00:00 2001 From: Ben Noordhuis Date: Thu, 6 Dec 2012 05:13:14 +0100 Subject: [PATCH] buffer: strip high bits when converting to ascii MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Consider the following example: console.log(Buffer('ú').toString('ascii')); Before this commit, the contents of the buffer was used as-is and hence it prints 'ú'. Now, it prints 'C:'. Perhaps not much of an improvement but it conforms to what the documentation says it does: strip off the high bits. Fixes #4371. --- doc/api/buffer.markdown | 7 ++++--- src/node_buffer.cc | 28 ++++++++++++++++++++++++++-- test/simple/test-buffer-ascii.js | 27 +++++++++++++++++++++++++++ 3 files changed, 57 insertions(+), 5 deletions(-) create mode 100644 test/simple/test-buffer-ascii.js diff --git a/doc/api/buffer.markdown b/doc/api/buffer.markdown index fb118a954b4..fa52ae48c3c 100644 --- a/doc/api/buffer.markdown +++ b/doc/api/buffer.markdown @@ -19,9 +19,10 @@ encoding method. Here are the different string encodings. * `'ascii'` - for 7 bit ASCII data only. This encoding method is very fast, and will strip the high bit if set. - Note that this encoding converts a null character (`'\0'` or `'\u0000'`) into - `0x20` (character code of a space). If you want to convert a null character - into `0x00`, you should use `'utf8'`. + + Note that when converting from string to buffer, this encoding converts a null + character (`'\0'` or `'\u0000'`) into `0x20` (character code of a space). If + you want to convert a null character into `0x00`, you should use `'utf8'`. * `'utf8'` - Multibyte encoded Unicode characters. Many web pages and other document formats use UTF-8. diff --git a/src/node_buffer.cc b/src/node_buffer.cc index aeedf310485..af53c98efbd 100644 --- a/src/node_buffer.cc +++ b/src/node_buffer.cc @@ -247,15 +247,38 @@ Handle Buffer::BinarySlice(const Arguments &args) { } +static bool contains_non_ascii(const char* buf, size_t len) { + for (size_t i = 0; i < len; ++i) { + if (buf[i] & 0x80) return true; + } + return false; +} + + +static void force_ascii(const char* src, char* dst, size_t len) { + for (size_t i = 0; i < len; ++i) { + dst[i] = src[i] & 0x7f; + } +} + + Handle Buffer::AsciiSlice(const Arguments &args) { HandleScope scope; Buffer *parent = ObjectWrap::Unwrap(args.This()); SLICE_ARGS(args[0], args[1]) char* data = parent->data_ + start; - Local string = String::New(data, end - start); + size_t len = end - start; - return scope.Close(string); + if (contains_non_ascii(data, len)) { + char* out = new char[len]; + force_ascii(data, out, len); + Local rc = String::New(out, len); + delete[] out; + return scope.Close(rc); + } + + return scope.Close(String::New(data, len)); } @@ -268,6 +291,7 @@ Handle Buffer::Utf8Slice(const Arguments &args) { return scope.Close(string); } + Handle Buffer::Ucs2Slice(const Arguments &args) { HandleScope scope; Buffer *parent = ObjectWrap::Unwrap(args.This()); diff --git a/test/simple/test-buffer-ascii.js b/test/simple/test-buffer-ascii.js new file mode 100644 index 00000000000..a741a3db1e5 --- /dev/null +++ b/test/simple/test-buffer-ascii.js @@ -0,0 +1,27 @@ +// Copyright Joyent, Inc. and other Node contributors. +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the +// "Software"), to deal in the Software without restriction, including +// without limitation the rights to use, copy, modify, merge, publish, +// distribute, sublicense, and/or sell copies of the Software, and to permit +// persons to whom the Software is furnished to do so, subject to the +// following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN +// NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +// USE OR OTHER DEALINGS IN THE SOFTWARE. + +var common = require('../common'); +var assert = require('assert'); + +// ASCII conversion in node.js simply masks off the high bits, +// it doesn't do transliteration. +assert.equal(Buffer('hérité').toString('ascii'), 'hC)ritC)');