-
Notifications
You must be signed in to change notification settings - Fork 9
Refactor log data processing in dataLoader.ts for improved memory efficiency and error handling #73
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
+74
−30
Closed
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -293,48 +293,83 @@ function isGzipFile(buffer: ArrayBuffer): boolean { | |
} | ||
|
||
/** | ||
* Decompresses a gzip file using CompressionStream API | ||
* @param buffer - ArrayBuffer containing the gzip data | ||
* @returns Promise resolving to decompressed text | ||
* Parses log data from a stream, handling line-by-line NDJSON parsing. | ||
* This is memory-efficient and suitable for very large files. | ||
* @param stream - A ReadableStream of Uint8Array (e.g., from a decompressed file) | ||
* @returns A promise that resolves to an array of LogEntry objects | ||
*/ | ||
async function decompressGzip(buffer: ArrayBuffer): Promise<string> { | ||
try { | ||
// Check if CompressionStream is supported | ||
if (!('DecompressionStream' in window)) { | ||
throw new Error('DecompressionStream API is not supported in this browser'); | ||
async function parseLogDataFromStream(stream: ReadableStream<Uint8Array>): Promise<LogEntry[]> { | ||
const reader = stream.pipeThrough(new TextDecoderStream()).getReader(); | ||
let buffer = ''; | ||
const entries: LogEntry[] = []; | ||
|
||
while (true) { | ||
const { done, value } = await reader.read(); | ||
if (done) { | ||
if (buffer.trim()) { | ||
try { | ||
const parsedLine: LogEntry = JSON.parse(buffer); | ||
if (parsedLine && typeof parsedLine === 'object') { | ||
entries.push(parsedLine); | ||
} | ||
} catch (e) { | ||
console.warn(`Failed to parse final line as JSON: ${buffer.substring(0, 100)}...`); | ||
} | ||
} | ||
break; | ||
} | ||
|
||
// Create a decompression stream | ||
const ds = new DecompressionStream('gzip'); | ||
const decompressedStream = new Blob([buffer]).stream().pipeThrough(ds); | ||
const decompressedBlob = await new Response(decompressedStream).blob(); | ||
return await decompressedBlob.text(); | ||
} catch (error) { | ||
console.error('Error decompressing gzip file:', error); | ||
throw new Error(`Failed to decompress gzip file: ${error instanceof Error ? error.message : String(error)}`); | ||
buffer += value; | ||
const lines = buffer.split('\n'); | ||
buffer = lines.pop() || ''; | ||
|
||
for (const line of lines) { | ||
if (line.trim() === '') continue; | ||
try { | ||
const parsedLine: LogEntry = JSON.parse(line); | ||
if (parsedLine && typeof parsedLine === 'object') { | ||
entries.push(parsedLine); | ||
} | ||
} catch (e) { | ||
console.warn(`Failed to parse line as JSON: ${line.substring(0, 100)}...`); | ||
} | ||
} | ||
} | ||
|
||
if (entries.length === 0) { | ||
console.error("No valid JSON entries found in stream data"); | ||
throw new Error("No valid JSON entries found in stream data"); | ||
} | ||
|
||
return entries; | ||
} | ||
|
||
|
||
/** | ||
* Processes ArrayBuffer data, handling gzip decompression if needed | ||
* Processes ArrayBuffer data, handling gzip decompression and parsing if needed | ||
* @param buffer - ArrayBuffer containing the data | ||
* @returns Promise resolving to text string | ||
* @returns Promise resolving to an array of LogEntry objects | ||
*/ | ||
async function processArrayBuffer(buffer: ArrayBuffer): Promise<string> { | ||
async function processArrayBuffer(buffer: ArrayBuffer): Promise<LogEntry[]> { | ||
// Check if file is gzip compressed | ||
if (isGzipFile(buffer)) { | ||
try { | ||
const textData = await decompressGzip(buffer); | ||
return textData; | ||
if (!('DecompressionStream' in window)) { | ||
throw new Error('DecompressionStream API is not supported in this browser'); | ||
} | ||
const ds = new DecompressionStream('gzip'); | ||
const decompressedStream = new Blob([buffer]).stream().pipeThrough(ds); | ||
return await parseLogDataFromStream(decompressedStream); | ||
} catch (error) { | ||
console.error("Error decompressing gzip data:", error); | ||
throw new Error(`Failed to decompress gzip data: ${error instanceof Error ? error.message : String(error)}`); | ||
console.error('Error decompressing or parsing gzip stream:', error); | ||
const message = error instanceof Error ? error.message : String(error); | ||
throw new Error(`Failed to process gzip stream: ${message}`); | ||
} | ||
} else { | ||
// Convert ArrayBuffer to string if it's not compressed | ||
// For non-gzipped files that are small enough to fit in memory | ||
const decoder = new TextDecoder(); | ||
const textData = decoder.decode(buffer); | ||
return textData; | ||
return parseLogData(textData); | ||
} | ||
} | ||
|
||
|
@@ -351,9 +386,7 @@ export async function loadLogData(url: string): Promise<LogEntry[]> { | |
} | ||
|
||
const buffer = await response.arrayBuffer(); | ||
const textData = await processArrayBuffer(buffer); | ||
|
||
return parseLogData(textData); | ||
return await processArrayBuffer(buffer); | ||
} catch (error) { | ||
console.error("Error loading log data:", error); | ||
throw error; | ||
|
@@ -367,6 +400,18 @@ export async function loadLogData(url: string): Promise<LogEntry[]> { | |
* @returns Promise resolving to an array of LogEntry objects | ||
*/ | ||
export function loadLogDataFromFile(file: File): Promise<LogEntry[]> { | ||
// For large files, we should use streaming to avoid memory issues | ||
const LARGE_FILE_THRESHOLD = 100 * 1024 * 1024; // 100 MB | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you share the constant with CodeViewer.tsx? Edit: nvm, I realize it's a different limit |
||
if (file.size > LARGE_FILE_THRESHOLD) { | ||
console.log(`File size (${file.size} bytes) exceeds threshold, using streaming.`); | ||
// Note: This does not handle gzipped files selected locally, as we can't | ||
// easily detect gzip from a stream without reading parts of it first. | ||
// The assumption is that very large local files are not gzipped or | ||
// have already been decompressed. | ||
return parseLogDataFromStream(file.stream() as ReadableStream<Uint8Array>); | ||
} | ||
|
||
// For smaller files, reading into memory is faster and simpler. | ||
return new Promise((resolve, reject) => { | ||
const reader = new FileReader(); | ||
|
||
|
@@ -381,8 +426,7 @@ export function loadLogDataFromFile(file: File): Promise<LogEntry[]> { | |
throw new Error("Expected ArrayBuffer from FileReader"); | ||
} | ||
|
||
const textData = await processArrayBuffer(result); | ||
resolve(parseLogData(textData)); | ||
resolve(await processArrayBuffer(result)); | ||
} catch (error) { | ||
console.error("Error parsing data from file:", error); | ||
reject(error); | ||
|
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
very minor nit - you can de-duplicate this with the other place (above) where you handle this logic
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
missed the comments. will update here in next PR