fix: Resolve Web Worker error by handling File objects correctly and enhance performance with chunked processing

wechuli · wechuli · commit c0eebb7a2517 · 2025-09-09T17:47:54.000+03:00
diff --git a/PERFORMANCE_OPTIMIZATIONS.md b/PERFORMANCE_OPTIMIZATIONS.md
@@ -3,48 +3,56 @@
 ## Issue Resolution
 
 ### Problem Identified
+
 The error "Cannot read properties of undefined (reading 'split')" was caused by the Web Worker expecting a string `fileContent` parameter, but receiving a `File` object instead.
 
 ### Root Cause
+
 The FileUpload component was passing a `File` object directly to the Web Worker, but the worker was trying to call `.split()` on `undefined` because it expected the file content as a string.
 
 ### Solution Implemented
+
 1. **Updated Web Worker**: Modified `csvWorker.js` to properly handle `File` objects by using the `file.text()` method to read file content asynchronously.
 2. **Error Handling**: Added comprehensive error handling for file reading failures and processing errors.
 3. **Proper Async Flow**: Implemented proper promise-based file reading with `.then()` and `.catch()` handlers.
 
 ## Performance Improvements Implemented
 
 ### 1. Web Worker Integration ✅
+
 - **Non-blocking CSV processing**: Large files no longer freeze the UI during upload and processing
 - **Progress tracking**: Real-time progress updates showing rows processed vs total rows
 - **Chunked processing**: Processes data in 10,000-row chunks to maintain responsiveness
 - **Memory efficient**: Processes data incrementally rather than loading everything into memory at once
 
 ### 2. DataProcessor Utility Class ✅
+
 - **Memory-efficient aggregation**: Optimized data structures for large datasets
 - **Intelligent sampling**: Automatically samples large datasets while preserving trends
 - **Efficient filtering**: Early termination and optimized filtering logic
 - **Performance-aware operations**: Limits data points and uses chunked processing
 
 ### 3. Component Optimizations ✅
+
 - **Memoized calculations**: Uses `useMemo` for expensive computations like repository aggregation
 - **Callback optimization**: Uses `useCallback` to prevent unnecessary re-renders
 - **Efficient data structures**: Pre-compiled regex patterns and optimized lookup operations
 
 ### 4. UI/UX Improvements ✅
+
 - **Progress indicators**: Visual progress bar with row count display
 - **Error recovery**: Graceful error handling with user-friendly messages
 - **Background processing**: Non-blocking file uploads maintain UI responsiveness
 
 ## Technical Implementation Details
 
 ### Web Worker Architecture
+
 ```javascript
 // File object handling
-file.text().then(fileContent => {
+file.text().then((fileContent) => {
   processCSVContent(fileContent, chunkSize);
-})
+});
 
 // Chunked processing
 function processChunk(startIndex) {
@@ -54,6 +62,7 @@ function processChunk(startIndex) {
 ```
 
 ### DataProcessor Optimizations
+
 ```typescript
 // Memory-efficient repository aggregation
 static aggregateByRepository(data, topN = 10, breakdown = "quantity") {
@@ -69,9 +78,10 @@ private static sampleData(data, targetSize) {
 ```
 
 ### Component Optimizations
+
 ```typescript
 // Memoized expensive calculations
-const { topRepos, repoTotals, dailyData } = useMemo(() => 
+const { topRepos, repoTotals, dailyData } = useMemo(() =>
   DataProcessor.aggregateByRepository(data, 10, breakdown),
   [data, breakdown]
 );
@@ -89,26 +99,30 @@ static filterData(data, filters) {
 ## Performance Benefits
 
 ### Before Optimizations
+
 - UI freezing during large file uploads
 - Slow rendering with large datasets
 - Memory issues with extensive data
 - Poor user experience during processing
 
 ### After Optimizations
+
 - ✅ Non-blocking file processing with progress tracking
 - ✅ Responsive UI even with large datasets (1000+ data points)
 - ✅ Memory-efficient processing with chunked operations
 - ✅ Optimized rendering with memoized calculations
 - ✅ Graceful error handling and recovery
 
 ## Testing Results
+
 - ✅ Build compilation successful with no errors
 - ✅ Development server running on localhost:3001
 - ✅ Web Worker properly handles File objects
 - ✅ Progress tracking functional during file processing
 - ✅ All existing functionality preserved
 
 ## Privacy-First Approach Maintained
+
 - ✅ All processing remains client-side
 - ✅ No data sent to external servers
 - ✅ Web Workers run in browser context
diff --git a/src/components/charts/ServiceChart.tsx b/src/components/charts/ServiceChart.tsx
@@ -995,14 +995,21 @@ function RepositoryBasedChart({
             <AreaChart data={orgChartData}>
               <CartesianGrid strokeDasharray="3 3" stroke="#374151" />
               <XAxis dataKey="date" stroke="#9ca3af" fontSize={12} />
-              <YAxis stroke="#9ca3af" fontSize={12} tickFormatter={getFormatter()} />
+              <YAxis
+                stroke="#9ca3af"
+                fontSize={12}
+                tickFormatter={getFormatter()}
+              />
               <Tooltip
                 contentStyle={{
                   backgroundColor: "#1f2937",
                   border: "1px solid #374151",
                   borderRadius: "8px",
                 }}
-                formatter={(value: number) => [getFormatter()(value), getBreakdownLabel()]}
+                formatter={(value: number) => [
+                  getFormatter()(value),
+                  getBreakdownLabel(),
+                ]}
                 labelStyle={{ color: "#d1d5db" }}
               />
               {orgsToShow.map((org: string, index: number) => (
diff --git a/src/lib/dataProcessor.ts b/src/lib/dataProcessor.ts
@@ -20,23 +20,27 @@ export class DataProcessor {
     data: ServiceData[],
     options: ProcessingOptions = {}
   ): Record<string, any> {
-    const { maxDataPoints = this.DEFAULT_MAX_DATA_POINTS, chunkSize = this.DEFAULT_CHUNK_SIZE } = options;
-    
+    const {
+      maxDataPoints = this.DEFAULT_MAX_DATA_POINTS,
+      chunkSize = this.DEFAULT_CHUNK_SIZE,
+    } = options;
+
     // Sort data by date to enable efficient sampling
     const sortedData = [...data].sort((a, b) => a.date.localeCompare(b.date));
-    
+
     // If data is too large, sample it intelligently
-    const sampledData = sortedData.length > maxDataPoints 
-      ? this.sampleData(sortedData, maxDataPoints)
-      : sortedData;
+    const sampledData =
+      sortedData.length > maxDataPoints
+        ? this.sampleData(sortedData, maxDataPoints)
+        : sortedData;
 
     const aggregated: Record<string, any> = {};
-    
+
     // Process in chunks to avoid blocking the main thread
     for (let i = 0; i < sampledData.length; i += chunkSize) {
       const chunk = sampledData.slice(i, i + chunkSize);
-      
-      chunk.forEach(item => {
+
+      chunk.forEach((item) => {
         const date = item.date;
         if (!aggregated[date]) {
           aggregated[date] = {
@@ -45,15 +49,16 @@ export class DataProcessor {
             quantity: 0,
             repositories: new Set<string>(),
             organizations: new Set<string>(),
-            skus: new Set<string>()
+            skus: new Set<string>(),
           };
         }
-        
+
         aggregated[date].cost += item.cost;
         aggregated[date].quantity += item.quantity;
-        
+
         if (item.repository) aggregated[date].repositories.add(item.repository);
-        if (item.organization) aggregated[date].organizations.add(item.organization);
+        if (item.organization)
+          aggregated[date].organizations.add(item.organization);
         aggregated[date].skus.add(item.sku);
       });
     }
@@ -74,16 +79,19 @@ export class DataProcessor {
   /**
    * Intelligently sample large datasets while preserving trends
    */
-  private static sampleData(data: ServiceData[], targetSize: number): ServiceData[] {
+  private static sampleData(
+    data: ServiceData[],
+    targetSize: number
+  ): ServiceData[] {
     if (data.length <= targetSize) return data;
-    
+
     const step = Math.ceil(data.length / targetSize);
     const sampled: ServiceData[] = [];
-    
+
     for (let i = 0; i < data.length; i += step) {
       sampled.push(data[i]);
     }
-    
+
     return sampled;
   }
 
@@ -114,37 +122,48 @@ export class DataProcessor {
       costCenter?: string;
     }
   ): ServiceData[] {
-    const { startDate, endDate, organization, repository, costCenter } = filters;
-    
-    return data.filter(item => {
+    const { startDate, endDate, organization, repository, costCenter } =
+      filters;
+
+    return data.filter((item) => {
       // Date filtering (most selective first)
       if (startDate && item.date < startDate) return false;
       if (endDate && item.date > endDate) return false;
-      
+
       // String filtering with exact matches for performance
-      if (organization && organization !== "all" && item.organization !== organization) return false;
-      if (repository && repository !== "all" && item.repository !== repository) return false;
-      if (costCenter && costCenter !== "all" && item.costCenter !== costCenter) return false;
-      
+      if (
+        organization &&
+        organization !== "all" &&
+        item.organization !== organization
+      )
+        return false;
+      if (repository && repository !== "all" && item.repository !== repository)
+        return false;
+      if (costCenter && costCenter !== "all" && item.costCenter !== costCenter)
+        return false;
+
       return true;
     });
   }
 
   /**
    * Memory-efficient unique value extraction
    */
-  static getUniqueValues(data: ServiceData[], field: keyof ServiceData): string[] {
+  static getUniqueValues(
+    data: ServiceData[],
+    field: keyof ServiceData
+  ): string[] {
     const seen = new Set<string>();
     const result: string[] = [];
-    
+
     for (const item of data) {
       const value = item[field] as string;
       if (value && !seen.has(value)) {
         seen.add(value);
         result.push(value);
       }
     }
-    
+
     return result.sort();
   }
 
@@ -163,22 +182,28 @@ export class DataProcessor {
       actionsStorage: [] as ServiceData[],
       packages: [] as ServiceData[],
       copilot: [] as ServiceData[],
-      codespaces: [] as ServiceData[]
+      codespaces: [] as ServiceData[],
     };
 
-    data.forEach(item => {
+    data.forEach((item) => {
       const sku = item.sku.toLowerCase();
-      
+
       // Simple pattern matching for basic categorization
-      if (sku.includes('storage')) {
+      if (sku.includes("storage")) {
         categories.actionsStorage.push(item);
-      } else if (sku.includes('action') || sku.includes('minute') || sku.includes('linux') || sku.includes('windows') || sku.includes('macos')) {
+      } else if (
+        sku.includes("action") ||
+        sku.includes("minute") ||
+        sku.includes("linux") ||
+        sku.includes("windows") ||
+        sku.includes("macos")
+      ) {
         categories.actionsMinutes.push(item);
-      } else if (sku.includes('package')) {
+      } else if (sku.includes("package")) {
         categories.packages.push(item);
-      } else if (sku.includes('copilot')) {
+      } else if (sku.includes("copilot")) {
         categories.copilot.push(item);
-      } else if (sku.includes('codespace')) {
+      } else if (sku.includes("codespace")) {
         categories.codespaces.push(item);
       }
     });
@@ -200,9 +225,9 @@ export class DataProcessor {
   } {
     // First pass: calculate repository totals
     const repoTotals: Record<string, { cost: number; quantity: number }> = {};
-    
-    data.forEach(item => {
-      const repo = item.repository || 'Unknown';
+
+    data.forEach((item) => {
+      const repo = item.repository || "Unknown";
       if (!repoTotals[repo]) {
         repoTotals[repo] = { cost: 0, quantity: 0 };
       }
@@ -212,31 +237,34 @@ export class DataProcessor {
 
     // Get top repositories
     const topRepos = Object.entries(repoTotals)
-      .sort(([, a], [, b]) => breakdown === "cost" ? b.cost - a.cost : b.quantity - a.quantity)
+      .sort(([, a], [, b]) =>
+        breakdown === "cost" ? b.cost - a.cost : b.quantity - a.quantity
+      )
       .slice(0, topN)
       .map(([repo]) => repo);
 
     // Second pass: aggregate daily data
     const dailyData: Record<string, any> = {};
-    
-    data.forEach(item => {
+
+    data.forEach((item) => {
       const date = item.date;
-      const repo = topRepos.includes(item.repository || 'Unknown') 
-        ? (item.repository || 'Unknown')
-        : 'Others';
-      
+      const repo = topRepos.includes(item.repository || "Unknown")
+        ? item.repository || "Unknown"
+        : "Others";
+
       if (!dailyData[date]) {
         dailyData[date] = { date, total: 0, totalQuantity: 0 };
-        topRepos.forEach(r => {
+        topRepos.forEach((r) => {
           dailyData[date][r] = 0;
           dailyData[date][`${r}_quantity`] = 0;
         });
-        dailyData[date]['Others'] = 0;
-        dailyData[date]['Others_quantity'] = 0;
+        dailyData[date]["Others"] = 0;
+        dailyData[date]["Others_quantity"] = 0;
       }
-      
+
       dailyData[date][repo] = (dailyData[date][repo] || 0) + item.cost;
-      dailyData[date][`${repo}_quantity`] = (dailyData[date][`${repo}_quantity`] || 0) + item.quantity;
+      dailyData[date][`${repo}_quantity`] =
+        (dailyData[date][`${repo}_quantity`] || 0) + item.quantity;
       dailyData[date].total += item.cost;
       dailyData[date].totalQuantity += item.quantity;
     });
diff --git a/test-regex.js b/test-regex.js