Skip to content

Commit 733aff5

Browse files
authored
Parse trailing LWSP after multipart boundary delimiter (#17)
2 parents 1070819 + b9dc218 commit 733aff5

File tree

3 files changed

+132
-16
lines changed

3 files changed

+132
-16
lines changed

src/Component.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ export class Component implements Part {
2222
* @param data Component byte representation to parse
2323
*/
2424
public static parse(data: Uint8Array): Component {
25-
const hasHeaders = Multipart.findSequenceIndex(data, Multipart.CRLF) !== 0;
25+
const hasHeaders = !(data[0] === Multipart.CR && data[1] === Multipart.LF);
2626
const headersEndIndex = hasHeaders ? Multipart.findSequenceIndex(data, Multipart.combineArrays([Multipart.CRLF, Multipart.CRLF])) + 2 : 0;
2727

2828
const headersBuffer = data.slice(0, headersEndIndex);

src/Multipart.ts

Lines changed: 44 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -181,23 +181,23 @@ export class Multipart implements Part {
181181
console.warn("Invalid boundary:", new TextDecoder().decode(boundary), "\nMust be 1 to 70 characters long, not end with space, and may only contain: A-Z a-z 0-9 '()+_,-./:=? and space");
182182

183183
const parts: Uint8Array[] = [];
184-
const fullBoundarySequence = new Uint8Array(Multipart.combineArrays([Multipart.DOUBLE_DASH, boundary, Multipart.CRLF]));
185-
const endBoundarySequence = new Uint8Array(Multipart.combineArrays([Multipart.DOUBLE_DASH, boundary, Multipart.DOUBLE_DASH, Multipart.CRLF]));
186184

187-
let start = 0;
188-
while (true) {
189-
const boundaryIndex = Multipart.findSequenceIndex(data, fullBoundarySequence, start);
190-
if (boundaryIndex === -1) break;
191-
192-
const partStart = boundaryIndex + fullBoundarySequence.length;
193-
const nextBoundaryIndex = Multipart.findSequenceIndex(data, fullBoundarySequence, partStart);
194-
const endBoundaryIndex = Multipart.findSequenceIndex(data, endBoundarySequence, partStart);
185+
// add artificial CRLF at the start of the data
186+
const paddedData = Multipart.combineArrays([Multipart.CRLF, data]);
187+
const closingBoundaryDelimiter = Multipart.combineArrays([boundary, Multipart.DOUBLE_DASH]);
195188

196-
// -2 to ignore the mandatory CRLF at the end of the body
197-
const partEnd = nextBoundaryIndex === -1 ? (endBoundaryIndex === -1 ? data.length : endBoundaryIndex - 2) : nextBoundaryIndex - 2;
198-
199-
if (partStart < partEnd) parts.push(data.slice(partStart, partEnd));
200-
start = partEnd;
189+
let start = 0;
190+
while (start < paddedData.length) {
191+
const boundaryIndices = Multipart.findBoundaryBounds(paddedData, boundary, start);
192+
if (boundaryIndices === null) break;
193+
const [, boundaryEnd] = boundaryIndices;
194+
const nextBoundaryIndices =
195+
Multipart.findBoundaryBounds(paddedData, boundary, boundaryEnd + 1)
196+
?? Multipart.findBoundaryBounds(paddedData, closingBoundaryDelimiter, boundaryEnd + 1);
197+
if (nextBoundaryIndices === null) break;
198+
const [nextBoundaryStart] = nextBoundaryIndices;
199+
parts.push(paddedData.slice(boundaryEnd, nextBoundaryStart));
200+
start = nextBoundaryStart;
201201
}
202202

203203
const parsedParts = parts.map(Component.parse);
@@ -267,6 +267,35 @@ export class Multipart implements Part {
267267
return -1;
268268
}
269269

270+
/**
271+
* Find boundary delimiter start and end index
272+
* @param data Multipart body bytes
273+
* @param boundary The multipart boundary bytes
274+
* @param [start] The index to start the search at (i.e. the number of bytes to skip/ignore at the beginning of the byte array). Defaults to 0.
275+
* @returns The start and end index of the boundary delimiter, or `null` if no boundary delimiter can be found
276+
* @internal
277+
*/
278+
private static findBoundaryBounds(data: Uint8Array, boundary: Uint8Array, start = 0): [number, number] | null {
279+
if (start >= data.length) return null;
280+
const boundaryStartIndex = Multipart.findSequenceIndex(data, Multipart.combineArrays([Multipart.CRLF, Multipart.DOUBLE_DASH, boundary]), start);
281+
if (boundaryStartIndex === -1) return null;
282+
let currentEndOfBoundaryIndex = boundaryStartIndex + boundary.length + 4;
283+
while (currentEndOfBoundaryIndex < data.length) {
284+
const byte = data[currentEndOfBoundaryIndex];
285+
if (byte === Multipart.CR && data[currentEndOfBoundaryIndex + 1] === Multipart.LF)
286+
return [boundaryStartIndex, currentEndOfBoundaryIndex + 2];
287+
if (byte === Multipart.SP || byte === 0x09) {
288+
currentEndOfBoundaryIndex++;
289+
continue;
290+
}
291+
// encountered non-linear whitespace after boundary and before any CRLF
292+
// meaning the boundary could not be terminated, therefore continue search for boundary
293+
return Multipart.findBoundaryBounds(data, boundary, boundaryStartIndex + 2);
294+
}
295+
296+
return null;
297+
}
298+
270299
/**
271300
* Parse header params in the format `key=value;foo = "bar"; baz`
272301
*/

test/Multipart.test.js

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,93 @@ describe("Multipart", function () {
128128
expect(parsedMultipart).to.be.an.instanceof(Multipart);
129129
expect(parsedMultipart.parts).to.be.empty;
130130
});
131+
132+
it("should parse Multipart from empty Component bytes", function () {
133+
const multipart = new Multipart([new Component({})]);
134+
const multipartBytes = multipart.bytes();
135+
const parsedMultipart = Multipart.parse(multipartBytes);
136+
expect(parsedMultipart).to.be.an.instanceof(Multipart);
137+
expect(parsedMultipart.parts.length).to.equal(1);
138+
const part = parsedMultipart.parts[0];
139+
expect(part.bytes()).to.deep.equal(Multipart.CRLF);
140+
expect(part.headers).to.be.empty;
141+
expect(part.body).to.be.empty;
142+
});
143+
144+
it("should handle parsing of empty parts in multipart MIME string", function () {
145+
const string = "Content-type: multipart/mixed; boundary=\"simple boundary\"\r\n\r\n"
146+
+ "--simple boundary\r\n"
147+
+ "\r\n"
148+
+ "\r\n"
149+
+ "--simple boundary--\r\n";
150+
const multipart = Multipart.parse(new TextEncoder().encode(string));
151+
const multipartBytes = multipart.bytes();
152+
const parsedMultipart = Multipart.parse(multipartBytes);
153+
expect(parsedMultipart).to.be.an.instanceof(Multipart);
154+
expect(parsedMultipart.parts.length).to.equal(1);
155+
const part = parsedMultipart.parts[0];
156+
expect(part.bytes()).to.deep.equal(Multipart.CRLF);
157+
expect(part.headers).to.be.empty;
158+
expect(part.body).to.be.empty;
159+
});
160+
161+
it("should ignore linear whitespace after boundary delimiter", function () {
162+
const string =
163+
'--simple boundary \r\n' +
164+
'X-Foo: Bar\r\n' +
165+
'\r\n' +
166+
'The boundary delimiter of this part has trailing SP.\r\n' +
167+
'--simple boundary\t\t\r\n' +
168+
'X-Foo: Baz\r\n' +
169+
'\r\n' +
170+
'The boundary delimiter of this part has trailing tab.\r\n' +
171+
'--simple boundary \t\t\ \r\n' +
172+
'X-Foo: Foo\r\n' +
173+
'\r\n' +
174+
'The boundary delimiter of this part has trailing SP and tab.\r\n' +
175+
'--simple boundary--\t \t\r\n'
176+
177+
const parsedMultipart = Multipart.parseBody(new TextEncoder().encode(string), new TextEncoder().encode("simple boundary"));
178+
179+
expect(parsedMultipart).to.be.an.instanceof(Multipart);
180+
expect(parsedMultipart.parts.length).to.equal(3);
181+
const part1 = parsedMultipart.parts[0];
182+
expect(part1.headers.get("x-foo")).to.equal("Bar");
183+
expect(new TextDecoder().decode(part1.body)).to.equal("The boundary delimiter of this part has trailing SP.");
184+
const part2 = parsedMultipart.parts[1];
185+
expect(part2.headers.get("x-foo")).to.equal("Baz");
186+
expect(new TextDecoder().decode(part2.body)).to.equal("The boundary delimiter of this part has trailing tab.");
187+
const part3 = parsedMultipart.parts[2];
188+
expect(part3.headers.get("x-foo")).to.equal("Foo");
189+
expect(new TextDecoder().decode(part3.body)).to.equal("The boundary delimiter of this part has trailing SP and tab.");
190+
});
191+
192+
it("should handle strings that look like part boundary", function () {
193+
const string =
194+
'--simple boundary\r\n' +
195+
'X-Foo: Bar\r\n' +
196+
'\r\n' +
197+
'Can this handle\r\n' +
198+
'--simple boundary this is fake\r\n' +
199+
'\r\n' +
200+
'not new part\r\n' +
201+
'--simple boundary\r\n' +
202+
'X-Foo: Baz\r\n' +
203+
'\r\n' +
204+
'Final part\r\n' +
205+
'--simple boundary--\r\n'
206+
207+
const parsedMultipart = Multipart.parseBody(new TextEncoder().encode(string), new TextEncoder().encode("simple boundary"));
208+
209+
expect(parsedMultipart).to.be.an.instanceof(Multipart);
210+
expect(parsedMultipart.parts.length).to.equal(2);
211+
const part1 = parsedMultipart.parts[0];
212+
expect(part1.headers.get("x-foo")).to.equal("Bar");
213+
expect(new TextDecoder().decode(part1.body)).to.equal("Can this handle\r\n--simple boundary this is fake\r\n\r\nnot new part");
214+
const part2 = parsedMultipart.parts[1];
215+
expect(part2.headers.get("x-foo")).to.equal("Baz");
216+
expect(new TextDecoder().decode(part2.body)).to.equal("Final part");
217+
});
131218
});
132219

133220
describe("formData", function () {

0 commit comments

Comments
 (0)