feat: better parseMulti

astandrik · astandrik · commit f036177d0b70 · 2025-01-28T15:05:50.000+03:00
diff --git a/src/services/parsers/__tests__/parseMultipart.test.ts b/src/services/parsers/__tests__/parseMultipart.test.ts
@@ -0,0 +1,200 @@
+import {parseMultipart} from '../parseMultipart';
+
+describe('parseMultipart', () => {
+    const boundary = 'test-boundary';
+    const CRLF = '\r\n';
+
+    it('should parse a complete chunk with JSON content', () => {
+        const chunk = {
+            type: 'metrics',
+            data: {value: 42},
+        };
+        const responseText = [
+            `--${boundary}${CRLF}`,
+            'Content-Type: application/json\r\n',
+            `Content-Length: ${JSON.stringify(chunk).length}\r\n`,
+            '\r\n',
+            JSON.stringify(chunk),
+        ].join('');
+
+        const result = parseMultipart({
+            responseText,
+            lastProcessedLength: 0,
+            boundary,
+        });
+
+        expect(result.chunks).toHaveLength(1);
+        expect(result.chunks[0]).toEqual(chunk);
+        expect(result.lastProcessedLength).toBe(responseText.length);
+    });
+
+    it('headers can be in any order', () => {
+        const chunk = {
+            type: 'metrics',
+            data: {value: 42},
+        };
+        const responseText = [
+            `--${boundary}${CRLF}`,
+            `Content-Length: ${JSON.stringify(chunk).length}\r\n`,
+            'Content-Type: application/json\r\n',
+            '\r\n',
+            JSON.stringify(chunk),
+        ].join('');
+
+        const result = parseMultipart({
+            responseText,
+            lastProcessedLength: 0,
+            boundary,
+        });
+
+        expect(result.chunks).toHaveLength(1);
+        expect(result.chunks[0]).toEqual(chunk);
+        expect(result.lastProcessedLength).toBe(responseText.length);
+    });
+
+    it('should parse multiple complete chunks', () => {
+        const chunk1 = {type: 'metrics', data: {value: 1}};
+        const chunk2 = {type: 'metrics', data: {value: 2}};
+
+        const createChunkText = (chunk: any) =>
+            [
+                `--${boundary}${CRLF}`,
+                'Content-Type: application/json\r\n',
+                `Content-Length: ${JSON.stringify(chunk).length}\r\n`,
+                '\r\n',
+                JSON.stringify(chunk),
+            ].join('');
+
+        const responseText = createChunkText(chunk1) + createChunkText(chunk2);
+
+        const result = parseMultipart({
+            responseText,
+            lastProcessedLength: 0,
+            boundary,
+        });
+
+        expect(result.chunks).toHaveLength(2);
+        expect(result.chunks[0]).toEqual(chunk1);
+        expect(result.chunks[1]).toEqual(chunk2);
+        expect(result.lastProcessedLength).toBe(responseText.length);
+    });
+
+    it('should handle incomplete headers', () => {
+        const responseText = [
+            `--${boundary}${CRLF}`,
+            'Content-Type: application/json\r\n',
+            // Missing Content-Length and content
+        ].join('');
+
+        const result = parseMultipart({
+            responseText,
+            lastProcessedLength: 0,
+            boundary,
+        });
+
+        expect(result.chunks).toHaveLength(0);
+        expect(result.lastProcessedLength).toBe(0);
+    });
+
+    it('should handle incomplete content', () => {
+        const chunk = {type: 'metrics', data: {value: 42}};
+        const fullContent = JSON.stringify(chunk);
+        const partialContent = fullContent.slice(0, 5); // Incomplete JSON
+
+        const responseText = [
+            `--${boundary}${CRLF}`,
+            'Content-Type: application/json\r\n',
+            `Content-Length: ${fullContent.length}\r\n`,
+            '\r\n',
+            partialContent,
+        ].join('');
+
+        const result = parseMultipart({
+            responseText,
+            lastProcessedLength: 0,
+            boundary,
+        });
+
+        expect(result.chunks).toHaveLength(0);
+        expect(result.lastProcessedLength).toBe(0);
+    });
+
+    it('should handle invalid JSON content', () => {
+        const invalidJson = '{invalid:json}';
+        const responseText = [
+            `--${boundary}${CRLF}`,
+            'Content-Type: application/json\r\n',
+            `Content-Length: ${invalidJson.length}\r\n`,
+            '\r\n',
+            invalidJson,
+        ].join('');
+
+        const result = parseMultipart({
+            responseText,
+            lastProcessedLength: 0,
+            boundary,
+        });
+
+        expect(result.chunks).toHaveLength(0);
+        expect(result.lastProcessedLength).toBe(responseText.length);
+    });
+
+    it('should continue parsing from lastProcessedLength', () => {
+        const chunk1 = {type: 'metrics', data: {value: 1}};
+        const chunk2 = {type: 'metrics', data: {value: 2}};
+
+        const createChunkText = (chunk: any) =>
+            [
+                `--${boundary}${CRLF}`,
+                'Content-Type: application/json\r\n',
+                `Content-Length: ${JSON.stringify(chunk).length}\r\n`,
+                '\r\n',
+                JSON.stringify(chunk),
+            ].join('');
+
+        const chunk1Text = createChunkText(chunk1);
+        const chunk2Text = createChunkText(chunk2);
+        const responseText = chunk1Text + chunk2Text;
+
+        // First parse
+        const result1 = parseMultipart({
+            responseText,
+            lastProcessedLength: 0,
+            boundary,
+        });
+
+        expect(result1.chunks).toHaveLength(2);
+        expect(result1.lastProcessedLength).toBe(responseText.length);
+
+        // Parse with existing lastProcessedLength
+        const result2 = parseMultipart({
+            responseText,
+            lastProcessedLength: chunk1Text.length,
+            boundary,
+        });
+
+        expect(result2.chunks).toHaveLength(1);
+        expect(result2.chunks[0]).toEqual(chunk2);
+        expect(result2.lastProcessedLength).toBe(responseText.length);
+    });
+
+    it('should ignore non-json content type', () => {
+        const content = 'plain text content';
+        const responseText = [
+            `--${boundary}${CRLF}`,
+            'Content-Type: text/plain\r\n',
+            `Content-Length: ${content.length}\r\n`,
+            '\r\n',
+            content,
+        ].join('');
+
+        const result = parseMultipart({
+            responseText,
+            lastProcessedLength: 0,
+            boundary,
+        });
+
+        expect(result.chunks).toHaveLength(0);
+        expect(result.lastProcessedLength).toBe(responseText.length);
+    });
+});
diff --git a/src/services/parsers/parseMultipart.ts b/src/services/parsers/parseMultipart.ts
@@ -7,6 +7,52 @@ interface MultipartResult {
 }
 
 const CRLF = '\r\n';
+const HEADER_VALUE_DELIMITER = ': ';
+
+interface ParsedHeaders {
+    contentType?: string;
+    contentLength?: number;
+}
+
+function parseHeaders(data: string, startPos: number): [ParsedHeaders, number] {
+    const headers: ParsedHeaders = {};
+    let pos = startPos;
+
+    while (pos < data.length) {
+        // Check for end of headers
+        if (data.startsWith(CRLF, pos)) {
+            return [headers, pos + CRLF.length];
+        }
+
+        const nextCRLF = data.indexOf(CRLF, pos);
+        if (nextCRLF === -1) {
+            // Headers are incomplete
+            return [headers, startPos];
+        }
+
+        const line = data.slice(pos, nextCRLF);
+        const colonIndex = line.indexOf(HEADER_VALUE_DELIMITER);
+
+        if (colonIndex !== -1) {
+            const header = line.slice(0, colonIndex).toLowerCase();
+            const value = line.slice(colonIndex + HEADER_VALUE_DELIMITER.length, nextCRLF);
+
+            if (header.toLowerCase() === 'content-type') {
+                headers.contentType = value;
+            } else if (header.toLowerCase() === 'content-length') {
+                const length = parseInt(value, 10);
+                if (!isNaN(length)) {
+                    headers.contentLength = length;
+                }
+            }
+        }
+
+        pos = nextCRLF + CRLF.length;
+    }
+
+    // Headers are incomplete
+    return [headers, startPos];
+}
 
 export function parseMultipart({
     responseText,
@@ -17,47 +63,56 @@ export function parseMultipart({
     lastProcessedLength: number;
     boundary?: string;
 }): MultipartResult {
-    const newData = responseText.slice(lastProcessedLength);
-
-    if (!newData) {
-        return {chunks: [], lastProcessedLength};
-    }
-
-    // Split on boundary with double dashes and CRLF
+    const data = responseText;
     const boundaryStr = `--${boundary}${CRLF}`;
-    const parts = newData.split(boundaryStr);
-
-    let currentPosition = lastProcessedLength;
+    let pos = lastProcessedLength;
     const chunks: StreamingChunk[] = [];
 
-    for (let i = 0; i < parts.length; i++) {
-        const part = parts[i];
-        const isLastPart = i === parts.length - 1;
+    while (pos < data.length) {
+        // Look for boundary
+        const boundaryPos = data.indexOf(boundaryStr, pos);
+        if (boundaryPos === -1) {
+            break;
+        }
 
-        const lines = part.split(CRLF);
+        // Move position past boundary
+        pos = boundaryPos + boundaryStr.length;
 
-        const emptyLineIndex = lines.findIndex((line) => line === '');
-        if (emptyLineIndex === -1 || !lines[emptyLineIndex + 1]) {
-            if (isLastPart) {
-                break;
-            }
-            continue;
+        // Parse headers
+        const [headers, contentStart] = parseHeaders(data, pos);
+        if (contentStart === pos || !headers.contentLength) {
+            // Headers were incomplete or invalid
+            pos = lastProcessedLength;
+            break;
         }
 
-        const jsonContent = lines[emptyLineIndex + 1];
+        // Check if we have enough data for the content
+        const contentEnd = contentStart + headers.contentLength;
+        if (contentEnd > data.length) {
+            // Content is incomplete
+            pos = lastProcessedLength;
+            break;
+        }
 
-        let parsedChunk: StreamingChunk | null = null;
-        try {
-            parsedChunk = JSON.parse(jsonContent) as StreamingChunk;
-        } catch {}
+        // Extract content
+        const content = data.slice(contentStart, contentEnd);
 
-        if (!parsedChunk) {
-            break;
+        // Try to parse JSON content
+        try {
+            if (headers.contentType === 'application/json') {
+                const parsedChunk = JSON.parse(content) as StreamingChunk;
+                chunks.push(parsedChunk);
+            }
+        } catch {
+            // Invalid JSON, skip this chunk
         }
 
-        chunks.push(parsedChunk);
-        currentPosition += boundaryStr.length + part.length;
+        // Move position to end of content
+        pos = contentEnd;
     }
 
-    return {chunks, lastProcessedLength: currentPosition};
+    return {
+        chunks,
+        lastProcessedLength: pos,
+    };
 }