mirror of https://github.com/nodejs/node.git
436 lines
14 KiB
JavaScript
436 lines
14 KiB
JavaScript
|
|
var sys = require("sys"),
|
|
events = require("events"),
|
|
wrapExpression = /^[ \t]+/,
|
|
multipartExpression = new RegExp(
|
|
"^multipart\/(" +
|
|
"mixed|rfc822|message|digest|alternative|" +
|
|
"related|report|signed|encrypted|form-data|" +
|
|
"x-mixed-replace|byteranges)", "i"),
|
|
boundaryExpression = /boundary=([^;]+)/i,
|
|
CR = "\r",
|
|
LF = "\n",
|
|
CRLF = CR+LF,
|
|
MAX_BUFFER_LENGTH = 16 * 1024,
|
|
|
|
// parser states.
|
|
s = 0,
|
|
S_NEW_PART = s++,
|
|
S_HEADER = s++,
|
|
S_BODY = s++;
|
|
|
|
exports.parse = parse;
|
|
exports.cat = cat;
|
|
exports.Stream = Stream;
|
|
|
|
// Parse a streaming message to a stream.
|
|
// If the message has a "body" and no "addListener", then
|
|
// just take it in and write() the body.
|
|
function parse (message) {
|
|
return new Stream(message);
|
|
};
|
|
|
|
// WARNING: DONT EVER USE THE CAT FUNCTION IN PRODUCTION WEBSITES!!
|
|
// It works pretty great, and it's a nice test function. But if
|
|
// you use this function to parse an HTTP request from a live web
|
|
// site, then you're essentially giving the world permission to
|
|
// rack up as much memory usage as they can manage. This function
|
|
// buffers the whole message, which is very convenient, but also
|
|
// very much the wrong thing to do in most cases.
|
|
function cat (message) {
|
|
var p = new (events.Promise),
|
|
stream = parse(message);
|
|
stream.files = {};
|
|
stream.fields = {};
|
|
stream.addListener("partBegin", function (part) {
|
|
if (part.filename) stream.files[part.filename] = part;
|
|
if (part.name) stream.fields[part.name] = part;
|
|
});
|
|
stream.addListener("body", function (chunk) {
|
|
stream.part.body = (stream.part.body || "") + chunk;
|
|
});
|
|
stream.addListener("error", function (e) { p.emitError(e) });
|
|
stream.addListener("complete", function () { p.emitSuccess(stream) });
|
|
return p;
|
|
};
|
|
|
|
// events:
|
|
// "partBegin", "partEnd", "body", "complete"
|
|
// everything emits on the Stream directly.
|
|
// the stream's "parts" object is a nested collection of the header objects
|
|
// check the stream's "part" member to know what it's currently chewin on.
|
|
// this.part.parent refers to that part's containing message (which may be
|
|
// the stream itself)
|
|
// child messages inherit their parent's headers
|
|
// A non-multipart message looks just like a multipart message with a
|
|
// single part.
|
|
function Stream (message) {
|
|
var isMultiPart = multipartHeaders(message, this),
|
|
w = isMultiPart ? writer(this) : simpleWriter(this),
|
|
e = ender(this);
|
|
if (message.addListener) {
|
|
message.addListener("body", w);
|
|
message.addListener("complete", e);
|
|
if (message.pause && message.resume) {
|
|
this._pause = message;
|
|
}
|
|
} else if (message.body) {
|
|
var self = this;
|
|
if (message.body.pause && message.body.resume) {
|
|
this._pause = message.body;
|
|
}
|
|
if (message.body.addListener) {
|
|
message.body.addListener("data", w);
|
|
message.body.addListener("end", e);
|
|
} if (message.body.forEach) {
|
|
var p = message.body.forEach(w);
|
|
if (p && p.addCallback) p.addCallback(e);
|
|
else e();
|
|
} else {
|
|
// just write a string.
|
|
w(message.body);
|
|
e();
|
|
}
|
|
}
|
|
};
|
|
Stream.prototype = {
|
|
__proto__ : events.EventEmitter.prototype,
|
|
error : function (ex) {
|
|
this._error = ex;
|
|
this.emit("error", ex);
|
|
},
|
|
pause : function () {
|
|
if (this._pause) return this._pause.pause();
|
|
throw new Error("Unsupported");
|
|
},
|
|
resume : function () {
|
|
if (this._pause) return this._pause.resume();
|
|
throw new Error("Unsupported");
|
|
}
|
|
};
|
|
|
|
// check the headers of the message. If it wants to be multipart,
|
|
// then we'll be returning true. Regardless, if supplied, then
|
|
// stream will get a headers object that inherits from message's.
|
|
// If no stream object is supplied, then this function just inspects
|
|
// the message's headers for multipartness, and modifies the message
|
|
// directly. This divergence is so that we can avoid modifying
|
|
// the original message when we want a wrapper, but still have the
|
|
// info available when it's one of our own objects.
|
|
function multipartHeaders (message, stream) {
|
|
var field, val, contentType, contentDisposition = "";
|
|
if (stream) stream.headers = {};
|
|
for (var h in message.headers) if (message.headers.hasOwnProperty(h)) {
|
|
val = message.headers[h];
|
|
field = h.toLowerCase();
|
|
if (stream) stream.headers[field] = val;
|
|
if (field === "content-type") {
|
|
contentType = val;
|
|
} else if (field === "content-disposition") {
|
|
contentDisposition = val;
|
|
}
|
|
}
|
|
|
|
if (!Array.isArray(contentDisposition)) {
|
|
contentDisposition = contentDisposition.split(",");
|
|
}
|
|
contentDisposition = contentDisposition[contentDisposition.length - 1];
|
|
|
|
var mutate = (stream || message);
|
|
|
|
// Name and filename can come along with either content-disposition
|
|
// or content-type. Well-behaved agents use CD rather than CT,
|
|
// but sadly not all agents are well-behaved.
|
|
[contentDisposition, contentType].forEach(function (h) {
|
|
if (!h) return;
|
|
var cd = h.split(/; */);
|
|
cd.shift();
|
|
for (var i = 0, l = cd.length; i < l; i ++) {
|
|
var bit = cd[i].split("="),
|
|
name = bit.shift(),
|
|
val = stripQuotes(bit.join("="));
|
|
if (name === "filename" || name === "name") {
|
|
mutate[name] = val;
|
|
}
|
|
}
|
|
});
|
|
|
|
if (!contentType) {
|
|
return false;
|
|
}
|
|
|
|
// legacy
|
|
// TODO: Update this when/if jsgi-style headers are supported.
|
|
// this will keep working, but is less efficient than it could be.
|
|
if (!Array.isArray(contentType)) {
|
|
contentType = contentType.split(",");
|
|
}
|
|
contentType = contentType[contentType.length-1];
|
|
|
|
// make sure it's actually multipart.
|
|
var mpType = multipartExpression.exec(contentType);
|
|
if (!mpType) {
|
|
return false;
|
|
}
|
|
|
|
// make sure we have a boundary.
|
|
var boundary = boundaryExpression.exec(contentType);
|
|
if (!boundary) {
|
|
return false;
|
|
}
|
|
|
|
mutate.type = mpType[1];
|
|
mutate.boundary = "--" + boundary[1];
|
|
mutate.isMultiPart = true;
|
|
|
|
return true;
|
|
};
|
|
function simpleWriter (stream) {
|
|
stream.part = stream;
|
|
stream.type = false;
|
|
var started = false;
|
|
return function (chunk) {
|
|
if (!started) {
|
|
stream.emit("partBegin", stream);
|
|
started = true;
|
|
}
|
|
stream.emit("body", chunk);
|
|
};
|
|
}
|
|
function writer (stream) {
|
|
var buffer = "",
|
|
state = S_NEW_PART,
|
|
part = stream.part = stream;
|
|
stream.parts = [];
|
|
stream.parent = stream;
|
|
return function (chunk) {
|
|
if (stream._error) return;
|
|
// write to the buffer, and then process the buffer.
|
|
buffer += chunk;
|
|
while (buffer.length > 0) {
|
|
while (buffer.substr(0, 2) === CRLF) buffer = buffer.substr(2);
|
|
switch (state) {
|
|
case S_NEW_PART:
|
|
// part is a multipart message.
|
|
// we're either going to start reading a new part, or we're going to
|
|
// end the current part, depending on whether the boundary has -- at
|
|
// the end. either way, we expect --boundary right away.
|
|
var boundary = part.boundary,
|
|
len = boundary.length,
|
|
offset = buffer.indexOf(boundary);
|
|
if (offset === -1) {
|
|
if (buffer.length > MAX_BUFFER_LENGTH) {
|
|
return stream.error(new Error(
|
|
"Malformed: boundary not found at start of message"));
|
|
}
|
|
// keep waiting for it.
|
|
return;
|
|
}
|
|
if (offset > 0) {
|
|
return stream.error(Error("Malformed: data before the boundary"));
|
|
}
|
|
if (buffer.length < (len + 2)) {
|
|
// we'll need to see either -- or CRLF after the boundary.
|
|
// get it on the next pass.
|
|
return;
|
|
}
|
|
if (buffer.substr(len, 2) === "--") {
|
|
// this message is done.
|
|
// chomp off the boundary and crlf and move up
|
|
if (part !== stream) {
|
|
// wait to see the crlf, unless this is the top-level message.
|
|
if (buffer.length < (len + 4)) {
|
|
return;
|
|
}
|
|
if (buffer.substr(len+2, 2) !== CRLF) {
|
|
return stream.error(new Error(
|
|
"Malformed: CRLF not found after boundary"));
|
|
}
|
|
}
|
|
buffer = buffer.substr(len + 4);
|
|
stream.emit("partEnd", part);
|
|
stream.part = part = part.parent;
|
|
state = S_NEW_PART;
|
|
continue;
|
|
}
|
|
if (part !== stream) {
|
|
// wait to see the crlf, unless this is the top-level message.
|
|
if (buffer.length < (len + 2)) {
|
|
return;
|
|
}
|
|
if (buffer.substr(len, 2) !== CRLF) {
|
|
return stream.error(new Error(
|
|
"Malformed: CRLF not found after boundary"));
|
|
}
|
|
}
|
|
// walk past the crlf
|
|
buffer = buffer.substr(len + 2);
|
|
// mint a new child part, and start parsing headers.
|
|
stream.part = part = startPart(part);
|
|
state = S_HEADER;
|
|
continue;
|
|
case S_HEADER:
|
|
// just grab everything to the double crlf.
|
|
var headerEnd = buffer.indexOf(CRLF+CRLF);
|
|
if (headerEnd === -1) {
|
|
if (buffer.length > MAX_BUFFER_LENGTH) {
|
|
return stream.error(new Error(
|
|
"Malformed: header unreasonably long."));
|
|
}
|
|
return;
|
|
}
|
|
var headerString = buffer.substr(0, headerEnd);
|
|
// chomp off the header and the empty line.
|
|
buffer = buffer.substr(headerEnd + 4);
|
|
try {
|
|
parseHeaderString(part.headers, headerString);
|
|
} catch (ex) {
|
|
return stream.error(ex);
|
|
}
|
|
multipartHeaders(part);
|
|
|
|
// let the world know
|
|
stream.emit("partBegin", part);
|
|
|
|
if (part.isMultiPart) {
|
|
// it has a boundary and we're ready to grab parts out.
|
|
state = S_NEW_PART;
|
|
} else {
|
|
// it doesn't have a boundary, and is about to
|
|
// start spitting out body bits.
|
|
state = S_BODY;
|
|
}
|
|
continue;
|
|
case S_BODY:
|
|
// look for part.parent.boundary
|
|
var boundary = part.parent.boundary,
|
|
offset = buffer.indexOf(boundary);
|
|
if (offset === -1) {
|
|
// emit and wait for more data, but be careful, because
|
|
// we might only have half of the boundary so far.
|
|
// make sure to leave behind the boundary's length, so that we'll
|
|
// definitely get it next time if it's on its way.
|
|
var emittable = buffer.length - boundary.length;
|
|
if (buffer.substr(-1) === CR) emittable -= 1;
|
|
if (buffer.substr(-2) === CRLF) emittable -= 2;
|
|
|
|
if (emittable > 0) {
|
|
stream.emit("body", buffer.substr(0, emittable));
|
|
buffer = buffer.substr(emittable);
|
|
}
|
|
// haven't seen the boundary, so wait for more bytes.
|
|
return;
|
|
}
|
|
if (offset > 0) {
|
|
var emit = buffer.substr(0, offset);
|
|
if (emit.substr(-2) === CRLF) emit = emit.substr(0, emit.length-2);
|
|
if (emit) stream.emit("body", emit);
|
|
buffer = buffer.substr(offset);
|
|
}
|
|
|
|
// let em know we're done.
|
|
stream.emit("partEnd", part);
|
|
|
|
// now buffer starts with boundary.
|
|
if (buffer.substr(boundary.length, 2) === "--") {
|
|
// message end.
|
|
// parent ends, look for a new part in the grandparent.
|
|
stream.part = part = part.parent;
|
|
stream.emit("partEnd", part);
|
|
stream.part = part = part.parent;
|
|
state = S_NEW_PART;
|
|
buffer = buffer.substr(boundary.length + 4);
|
|
} else {
|
|
// another part coming for the parent message.
|
|
stream.part = part = part.parent;
|
|
state = S_NEW_PART;
|
|
}
|
|
continue;
|
|
}
|
|
}
|
|
};
|
|
};
|
|
|
|
function parseHeaderString (headers, string) {
|
|
var lines = string.split(CRLF),
|
|
field, value, line;
|
|
for (var i = 0, l = lines.length; i < l; i ++) {
|
|
line = lines[i];
|
|
if (line.match(wrapExpression)) {
|
|
if (!field) {
|
|
throw new Error("Malformed. First header starts with whitespace.");
|
|
}
|
|
value += line.replace(wrapExpression, " ");
|
|
continue;
|
|
} else if (field) {
|
|
// now that we know it's not wrapping, put it on the headers obj.
|
|
affixHeader(headers, field, value);
|
|
}
|
|
line = line.split(":");
|
|
field = line.shift().toLowerCase();
|
|
if (!field) {
|
|
throw new Error("Malformed: improper field name.");
|
|
}
|
|
value = line.join(":").replace(/^\s+/, "");
|
|
}
|
|
// now affix the last field.
|
|
affixHeader(headers, field, value);
|
|
};
|
|
|
|
function affixHeader (headers, field, value) {
|
|
if (!headers.hasOwnProperty(field)) {
|
|
headers[field] = value;
|
|
} else if (Array.isArray(headers[field])) {
|
|
headers[field].push(value);
|
|
} else {
|
|
headers[field] = [headers[field], value];
|
|
}
|
|
};
|
|
|
|
function startPart (parent) {
|
|
var part = {
|
|
headers : {},
|
|
parent : parent
|
|
};
|
|
parent.parts = parent.parts || [];
|
|
parent.parts.push(part);
|
|
return part;
|
|
};
|
|
|
|
function ender (stream) { return function () {
|
|
if (stream._error) return;
|
|
if (!stream.isMultiPart) stream.emit("partEnd", stream);
|
|
stream.emit("complete");
|
|
}};
|
|
|
|
function stripslashes(str) {
|
|
// + original by: Kevin van Zonneveld (http://kevin.vanzonneveld.net)
|
|
// + improved by: Ates Goral (http://magnetiq.com)
|
|
// + fixed by: Mick@el
|
|
// + improved by: marrtins
|
|
// + bugfixed by: Onno Marsman
|
|
// + improved by: rezna
|
|
// + input by: Rick Waldron
|
|
// + reimplemented by: Brett Zamir (http://brett-zamir.me)
|
|
// * example 1: stripslashes("Kevin\'s code");
|
|
// * returns 1: "Kevin's code"
|
|
// * example 2: stripslashes("Kevin\\\'s code");
|
|
// * returns 2: "Kevin\'s code"
|
|
return (str+"").replace(/\\(.?)/g, function (s, n1) {
|
|
switch(n1) {
|
|
case "\\":
|
|
return "\\";
|
|
case "0":
|
|
return "\0";
|
|
case "":
|
|
return "";
|
|
default:
|
|
return n1;
|
|
}
|
|
});
|
|
};
|
|
function stripQuotes (str) {
|
|
str = stripslashes(str);
|
|
return str.substr(1, str.length - 2);
|
|
};
|