node/lib/multipart.js

438 lines
14 KiB
JavaScript

var sys = require("sys"),
events = require("events"),
wrapExpression = /^[ \t]+/,
multipartExpression = new RegExp(
"^multipart\/(" +
"mixed|rfc822|message|digest|alternative|" +
"related|report|signed|encrypted|form-data|" +
"x-mixed-replace|byteranges)", "i"),
boundaryExpression = /boundary=([^;]+)/i,
CR = "\r",
LF = "\n",
CRLF = CR+LF,
MAX_BUFFER_LENGTH = 16 * 1024,
// parser states.
s = 0,
S_NEW_PART = s++,
S_HEADER = s++,
S_BODY = s++;
exports.parse = parse;
exports.cat = cat;
exports.Stream = Stream;
// Parse a streaming message to a stream.
// If the message has a "body" and no "addListener", then
// just take it in and write() the body.
function parse (message) {
return new Stream(message);
};
// WARNING: DONT EVER USE THE CAT FUNCTION IN PRODUCTION WEBSITES!!
// It works pretty great, and it's a nice test function. But if
// you use this function to parse an HTTP request from a live web
// site, then you're essentially giving the world permission to
// rack up as much memory usage as they can manage. This function
// buffers the whole message, which is very convenient, but also
// very much the wrong thing to do in most cases.
function cat (message, callback) {
var stream = parse(message);
stream.files = {};
stream.fields = {};
stream.addListener("partBegin", function (part) {
if (part.filename) stream.files[part.filename] = part;
if (part.name) stream.fields[part.name] = part;
});
stream.addListener("body", function (chunk) {
stream.part.body = (stream.part.body || "") + chunk;
});
stream.addListener("error", function (e) { p.emitError(e)
if (callback) callback(e);
});
stream.addListener("complete", function () {
if (callback) callback(null, stream);
});
};
// events:
// "partBegin", "partEnd", "body", "complete"
// everything emits on the Stream directly.
// the stream's "parts" object is a nested collection of the header objects
// check the stream's "part" member to know what it's currently chewin on.
// this.part.parent refers to that part's containing message (which may be
// the stream itself)
// child messages inherit their parent's headers
// A non-multipart message looks just like a multipart message with a
// single part.
function Stream (message) {
var isMultiPart = multipartHeaders(message, this),
w = isMultiPart ? writer(this) : simpleWriter(this),
e = ender(this);
if (message.addListener) {
message.addListener("data", w);
message.addListener("end", e);
if (message.pause && message.resume) {
this._pause = message;
}
} else if (message.body) {
var self = this;
if (message.body.pause && message.body.resume) {
this._pause = message.body;
}
if (message.body.addListener) {
message.body.addListener("data", w);
message.body.addListener("end", e);
} if (message.body.forEach) {
var p = message.body.forEach(w);
if (p && p.addCallback) p.addCallback(e);
else e();
} else {
// just write a string.
w(message.body);
e();
}
}
};
Stream.prototype = {
__proto__ : events.EventEmitter.prototype,
error : function (ex) {
this._error = ex;
this.emit("error", ex);
},
pause : function () {
if (this._pause) return this._pause.pause();
throw new Error("Unsupported");
},
resume : function () {
if (this._pause) return this._pause.resume();
throw new Error("Unsupported");
}
};
// check the headers of the message. If it wants to be multipart,
// then we'll be returning true. Regardless, if supplied, then
// stream will get a headers object that inherits from message's.
// If no stream object is supplied, then this function just inspects
// the message's headers for multipartness, and modifies the message
// directly. This divergence is so that we can avoid modifying
// the original message when we want a wrapper, but still have the
// info available when it's one of our own objects.
function multipartHeaders (message, stream) {
var field, val, contentType, contentDisposition = "";
if (stream) stream.headers = {};
for (var h in message.headers) if (message.headers.hasOwnProperty(h)) {
val = message.headers[h];
field = h.toLowerCase();
if (stream) stream.headers[field] = val;
if (field === "content-type") {
contentType = val;
} else if (field === "content-disposition") {
contentDisposition = val;
}
}
if (!Array.isArray(contentDisposition)) {
contentDisposition = contentDisposition.split(",");
}
contentDisposition = contentDisposition[contentDisposition.length - 1];
var mutate = (stream || message);
// Name and filename can come along with either content-disposition
// or content-type. Well-behaved agents use CD rather than CT,
// but sadly not all agents are well-behaved.
[contentDisposition, contentType].forEach(function (h) {
if (!h) return;
var cd = h.split(/; */);
cd.shift();
for (var i = 0, l = cd.length; i < l; i ++) {
var bit = cd[i].split("="),
name = bit.shift(),
val = stripQuotes(bit.join("="));
if (name === "filename" || name === "name") {
mutate[name] = val;
}
}
});
if (!contentType) {
return false;
}
// legacy
// TODO: Update this when/if jsgi-style headers are supported.
// this will keep working, but is less efficient than it could be.
if (!Array.isArray(contentType)) {
contentType = contentType.split(",");
}
contentType = contentType[contentType.length-1];
// make sure it's actually multipart.
var mpType = multipartExpression.exec(contentType);
if (!mpType) {
return false;
}
// make sure we have a boundary.
var boundary = boundaryExpression.exec(contentType);
if (!boundary) {
return false;
}
mutate.type = mpType[1];
mutate.boundary = "--" + boundary[1];
mutate.isMultiPart = true;
return true;
};
function simpleWriter (stream) {
stream.part = stream;
stream.type = false;
var started = false;
return function (chunk) {
if (!started) {
stream.emit("partBegin", stream);
started = true;
}
stream.emit("body", chunk);
};
}
function writer (stream) {
var buffer = "",
state = S_NEW_PART,
part = stream.part = stream;
stream.parts = [];
stream.parent = stream;
return function (chunk) {
if (stream._error) return;
// write to the buffer, and then process the buffer.
buffer += chunk;
while (buffer.length > 0) {
while (buffer.substr(0, 2) === CRLF) buffer = buffer.substr(2);
switch (state) {
case S_NEW_PART:
// part is a multipart message.
// we're either going to start reading a new part, or we're going to
// end the current part, depending on whether the boundary has -- at
// the end. either way, we expect --boundary right away.
var boundary = part.boundary,
len = boundary.length,
offset = buffer.indexOf(boundary);
if (offset === -1) {
if (buffer.length > MAX_BUFFER_LENGTH) {
return stream.error(new Error(
"Malformed: boundary not found at start of message"));
}
// keep waiting for it.
return;
}
if (offset > 0) {
return stream.error(Error("Malformed: data before the boundary"));
}
if (buffer.length < (len + 2)) {
// we'll need to see either -- or CRLF after the boundary.
// get it on the next pass.
return;
}
if (buffer.substr(len, 2) === "--") {
// this message is done.
// chomp off the boundary and crlf and move up
if (part !== stream) {
// wait to see the crlf, unless this is the top-level message.
if (buffer.length < (len + 4)) {
return;
}
if (buffer.substr(len+2, 2) !== CRLF) {
return stream.error(new Error(
"Malformed: CRLF not found after boundary"));
}
}
buffer = buffer.substr(len + 4);
stream.emit("partEnd", part);
stream.part = part = part.parent;
state = S_NEW_PART;
continue;
}
if (part !== stream) {
// wait to see the crlf, unless this is the top-level message.
if (buffer.length < (len + 2)) {
return;
}
if (buffer.substr(len, 2) !== CRLF) {
return stream.error(new Error(
"Malformed: CRLF not found after boundary"));
}
}
// walk past the crlf
buffer = buffer.substr(len + 2);
// mint a new child part, and start parsing headers.
stream.part = part = startPart(part);
state = S_HEADER;
continue;
case S_HEADER:
// just grab everything to the double crlf.
var headerEnd = buffer.indexOf(CRLF+CRLF);
if (headerEnd === -1) {
if (buffer.length > MAX_BUFFER_LENGTH) {
return stream.error(new Error(
"Malformed: header unreasonably long."));
}
return;
}
var headerString = buffer.substr(0, headerEnd);
// chomp off the header and the empty line.
buffer = buffer.substr(headerEnd + 4);
try {
parseHeaderString(part.headers, headerString);
} catch (ex) {
return stream.error(ex);
}
multipartHeaders(part);
// let the world know
stream.emit("partBegin", part);
if (part.isMultiPart) {
// it has a boundary and we're ready to grab parts out.
state = S_NEW_PART;
} else {
// it doesn't have a boundary, and is about to
// start spitting out body bits.
state = S_BODY;
}
continue;
case S_BODY:
// look for part.parent.boundary
var boundary = part.parent.boundary,
offset = buffer.indexOf(boundary);
if (offset === -1) {
// emit and wait for more data, but be careful, because
// we might only have half of the boundary so far.
// make sure to leave behind the boundary's length, so that we'll
// definitely get it next time if it's on its way.
var emittable = buffer.length - boundary.length;
if (buffer.substr(-1) === CR) emittable -= 1;
if (buffer.substr(-2) === CRLF) emittable -= 2;
if (emittable > 0) {
stream.emit("body", buffer.substr(0, emittable));
buffer = buffer.substr(emittable);
}
// haven't seen the boundary, so wait for more bytes.
return;
}
if (offset > 0) {
var emit = buffer.substr(0, offset);
if (emit.substr(-2) === CRLF) emit = emit.substr(0, emit.length-2);
if (emit) stream.emit("body", emit);
buffer = buffer.substr(offset);
}
// let em know we're done.
stream.emit("partEnd", part);
// now buffer starts with boundary.
if (buffer.substr(boundary.length, 2) === "--") {
// message end.
// parent ends, look for a new part in the grandparent.
stream.part = part = part.parent;
stream.emit("partEnd", part);
stream.part = part = part.parent;
state = S_NEW_PART;
buffer = buffer.substr(boundary.length + 4);
} else {
// another part coming for the parent message.
stream.part = part = part.parent;
state = S_NEW_PART;
}
continue;
}
}
};
};
function parseHeaderString (headers, string) {
var lines = string.split(CRLF),
field, value, line;
for (var i = 0, l = lines.length; i < l; i ++) {
line = lines[i];
if (line.match(wrapExpression)) {
if (!field) {
throw new Error("Malformed. First header starts with whitespace.");
}
value += line.replace(wrapExpression, " ");
continue;
} else if (field) {
// now that we know it's not wrapping, put it on the headers obj.
affixHeader(headers, field, value);
}
line = line.split(":");
field = line.shift().toLowerCase();
if (!field) {
throw new Error("Malformed: improper field name.");
}
value = line.join(":").replace(/^\s+/, "");
}
// now affix the last field.
affixHeader(headers, field, value);
};
function affixHeader (headers, field, value) {
if (!headers.hasOwnProperty(field)) {
headers[field] = value;
} else if (Array.isArray(headers[field])) {
headers[field].push(value);
} else {
headers[field] = [headers[field], value];
}
};
function startPart (parent) {
var part = {
headers : {},
parent : parent
};
parent.parts = parent.parts || [];
parent.parts.push(part);
return part;
};
function ender (stream) { return function () {
if (stream._error) return;
if (!stream.isMultiPart) stream.emit("partEnd", stream);
stream.emit("complete");
}};
function stripslashes(str) {
// + original by: Kevin van Zonneveld (http://kevin.vanzonneveld.net)
// + improved by: Ates Goral (http://magnetiq.com)
// + fixed by: Mick@el
// + improved by: marrtins
// + bugfixed by: Onno Marsman
// + improved by: rezna
// + input by: Rick Waldron
// + reimplemented by: Brett Zamir (http://brett-zamir.me)
// * example 1: stripslashes("Kevin\'s code");
// * returns 1: "Kevin's code"
// * example 2: stripslashes("Kevin\\\'s code");
// * returns 2: "Kevin\'s code"
return (str+"").replace(/\\(.?)/g, function (s, n1) {
switch(n1) {
case "\\":
return "\\";
case "0":
return "\0";
case "":
return "";
default:
return n1;
}
});
};
function stripQuotes (str) {
str = stripslashes(str);
return str.substr(1, str.length - 2);
};