zlib: detect gzip files when using unzip*

Detect whether a gzip file is being passed to `unzip*` by
testing the first bytes for the gzip magic bytes, and setting
the decompression mode to `GUNZIP` or `INFLATE` according to
the result.

This enables gzip-only features like multi-member support
to be used together with the `unzip*` autodetection support
and thereby makes `gunzip*` and `unzip*` return identical
results for gzip input again.

Add a simple test for checking that features specific to
`zlib.gunzip`, notably support for multiple members, also work
when using `zlib.unzip`.

PR-URL: https://github.com/nodejs/node/pull/5884
Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl>
Reviewed-By: James M Snell <jasnell@gmail.com>
pull/6065/head
Anna Henningsen 2016-03-24 02:13:09 +01:00 committed by Ben Noordhuis
parent 0d41463d1f
commit 2d7e31614d
3 changed files with 91 additions and 1 deletions

View File

@ -68,7 +68,8 @@ class ZCtx : public AsyncWrap {
windowBits_(0),
write_in_progress_(false),
pending_close_(false),
refs_(0) {
refs_(0),
gzip_id_bytes_read_(0) {
MakeWeak<ZCtx>(this);
}
@ -225,6 +226,8 @@ class ZCtx : public AsyncWrap {
static void Process(uv_work_t* work_req) {
ZCtx *ctx = ContainerOf(&ZCtx::work_req_, work_req);
const Bytef* next_expected_header_byte = nullptr;
// If the avail_out is left at 0, then it means that it ran out
// of room. If there was avail_out left over, then it means
// that all of the input was consumed.
@ -235,6 +238,50 @@ class ZCtx : public AsyncWrap {
ctx->err_ = deflate(&ctx->strm_, ctx->flush_);
break;
case UNZIP:
if (ctx->strm_.avail_in > 0) {
next_expected_header_byte = ctx->strm_.next_in;
}
switch (ctx->gzip_id_bytes_read_) {
case 0:
if (next_expected_header_byte == nullptr) {
break;
}
if (*next_expected_header_byte == GZIP_HEADER_ID1) {
ctx->gzip_id_bytes_read_ = 1;
next_expected_header_byte++;
if (ctx->strm_.avail_in == 1) {
// The only available byte was already read.
break;
}
} else {
ctx->mode_ = INFLATE;
break;
}
// fallthrough
case 1:
if (next_expected_header_byte == nullptr) {
break;
}
if (*next_expected_header_byte == GZIP_HEADER_ID2) {
ctx->gzip_id_bytes_read_ = 2;
ctx->mode_ = GUNZIP;
} else {
// There is no actual difference between INFLATE and INFLATERAW
// (after initialization).
ctx->mode_ = INFLATE;
}
break;
default:
CHECK(0 && "invalid number of gzip magic number bytes read");
}
// fallthrough
case INFLATE:
case GUNZIP:
case INFLATERAW:
@ -591,6 +638,7 @@ class ZCtx : public AsyncWrap {
bool write_in_progress_;
bool pending_close_;
unsigned int refs_;
unsigned int gzip_id_bytes_read_;
};

View File

@ -22,6 +22,20 @@ zlib.gunzip(data, common.mustCall((err, result) => {
assert.equal(result, 'abcdef', 'result should match original string');
}));
zlib.unzip(data, common.mustCall((err, result) => {
assert.ifError(err);
assert.equal(result, 'abcdef', 'result should match original string');
}));
// Multi-member support does not apply to zlib inflate/deflate.
zlib.unzip(Buffer.concat([
zlib.deflateSync('abc'),
zlib.deflateSync('def')
]), common.mustCall((err, result) => {
assert.ifError(err);
assert.equal(result, 'abc', 'result should match contents of first "member"');
}));
// files that have the "right" magic bytes for starting a new gzip member
// in the middle of themselves, even if they are part of a single
// regularly compressed member

View File

@ -0,0 +1,28 @@
'use strict';
const common = require('../common');
const assert = require('assert');
const zlib = require('zlib');
const data = Buffer.concat([
zlib.gzipSync('abc'),
zlib.gzipSync('def')
]);
const resultBuffers = [];
const unzip = zlib.createUnzip()
.on('error', (err) => {
assert.ifError(err);
})
.on('data', (data) => resultBuffers.push(data))
.on('finish', common.mustCall(() => {
assert.deepStrictEqual(Buffer.concat(resultBuffers).toString(), 'abcdef',
'result should match original string');
}));
for (let i = 0; i < data.length; i++) {
// Write each single byte individually.
unzip.write(Buffer.from([data[i]]));
}
unzip.end();