diff --git a/.gitignore b/.gitignore index 0783c46..4356c6d 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ node_modules .vercel build/ -.DS_Store \ No newline at end of file +.DS_Store +package-lock.json \ No newline at end of file diff --git a/src/api/XcodeProject.ts b/src/api/XcodeProject.ts index 6467c1e..fcb3f0e 100644 --- a/src/api/XcodeProject.ts +++ b/src/api/XcodeProject.ts @@ -3,7 +3,7 @@ import { readFileSync } from "fs"; import path from "path"; import crypto from "crypto"; -import { parse } from "../json"; +import { parse, parseOptimized } from "../json"; import * as json from "../json/types"; import { AbstractObject } from "./AbstractObject"; @@ -212,10 +212,61 @@ export class XcodeProject extends Map { return new XcodeProject(filePath, json); } - constructor(public filePath: string, props: Partial) { + /** + * Optimized open method for large projects + * @param filePath -- path to a `pbxproj` file + * @param options -- optimization options + */ + static openLazy(filePath: string, options: { + skipFullInflation?: boolean; + progressCallback?: (message: string) => void; + } = {}) { + const { skipFullInflation = true, progressCallback } = options; + + progressCallback?.('Reading file...'); + console.time('šŸ“ File read'); + const contents = readFileSync(filePath, "utf8"); + console.timeEnd('šŸ“ File read'); + + progressCallback?.('Parsing JSON...'); + console.time('šŸ” JSON parsing'); + const fileSizeMB = contents.length / 1024 / 1024; + let json; + + if (fileSizeMB > 5) { + // Use optimized parser for large files + json = parseOptimized(contents, { + progressCallback: (processed, total, stage, memoryMB) => { + progressCallback?.(`${stage}: ${processed}/${total}${memoryMB ? ` (${memoryMB}MB)` : ''}`); + } + }); + } else { + json = parse(contents); + } + console.timeEnd('šŸ” JSON parsing'); + + const objectCount = Object.keys(json.objects || {}).length; + console.log(`šŸ“Š Found ${objectCount.toLocaleString()} objects`); + + progressCallback?.('Creating project...'); + console.time('šŸ—ļø Project creation'); + const project = new XcodeProject(filePath, json, { skipFullInflation }); + console.timeEnd('šŸ—ļø Project creation'); + + return project; + } + + constructor( + public filePath: string, + props: Partial, + options: { skipFullInflation?: boolean } = {} + ) { super(); - const json = JSON.parse(JSON.stringify(props)); + const { skipFullInflation = false } = options; + + // Optimize: avoid deep clone for large projects + const json = skipFullInflation ? props : JSON.parse(JSON.stringify(props)); assert(json.objects, "objects is required"); assert(json.rootObject, "rootObject is required"); @@ -228,9 +279,19 @@ export class XcodeProject extends Map { assertRootObject(json.rootObject, json.objects?.[json.rootObject]); // Inflate the root object. + console.time('🌱 Root object inflation'); this.rootObject = this.getObject(json.rootObject); - // This should never be needed in a compliant project. - this.ensureAllObjectsInflated(); + console.timeEnd('🌱 Root object inflation'); + + // Skip full inflation for large projects + if (!skipFullInflation) { + console.time('🌳 Full object inflation'); + this.ensureAllObjectsInflated(); + console.timeEnd('🌳 Full object inflation'); + } else { + const remainingCount = Object.keys(this.internalJsonObjects).length; + console.log(`ā­ļø Skipping full inflation of ${remainingCount.toLocaleString()} objects (lazy mode)`); + } } /** The directory containing the `*.xcodeproj/project.pbxproj` file, e.g. `/ios/` in React Native. */ @@ -285,14 +346,71 @@ export class XcodeProject extends Map { // This method exists for sanity if (Object.keys(this.internalJsonObjects).length === 0) return; - debug( - "inflating unreferenced objects: %o", - Object.keys(this.internalJsonObjects) - ); + const remaining = Object.keys(this.internalJsonObjects).length; + debug("inflating unreferenced objects: %o", Object.keys(this.internalJsonObjects)); + + let processed = 0; + while (Object.keys(this.internalJsonObjects).length > 0) { + const uuid = Object.keys(this.internalJsonObjects)[0]; + this.getObject(uuid); + processed++; + + // Progress for large batches + if (remaining > 1000 && processed % 500 === 0) { + console.log(` āš™ļø Inflated ${processed}/${remaining} objects...`); + } + } + } + + /** + * Manually trigger full inflation of all objects (for lazy-loaded projects) + */ + forceFullInflation(progressCallback?: (processed: number, total: number) => void) { + const remaining = Object.keys(this.internalJsonObjects).length; + if (remaining === 0) { + console.log('āœ… All objects already inflated'); + return; + } + + console.log(`šŸ”„ Force inflating ${remaining.toLocaleString()} remaining objects...`); + console.time('🌳 Full inflation'); + + let processed = 0; while (Object.keys(this.internalJsonObjects).length > 0) { const uuid = Object.keys(this.internalJsonObjects)[0]; this.getObject(uuid); + processed++; + + if (progressCallback && processed % 100 === 0) { + progressCallback(processed, remaining); + } } + + console.timeEnd('🌳 Full inflation'); + console.log('āœ… Full inflation completed'); + } + + /** + * Get project statistics without full inflation + */ + getQuickStats() { + const totalObjects = this.size + Object.keys(this.internalJsonObjects).length; + const inflatedObjects = this.size; + const uninflatedObjects = Object.keys(this.internalJsonObjects).length; + + return { + totalObjects, + inflatedObjects, + uninflatedObjects, + inflationPercentage: ((inflatedObjects / totalObjects) * 100).toFixed(1) + }; + } + + /** + * Get uninflated objects for analysis (read-only access) + */ + getUninflatedObjects(): Readonly>> { + return this.internalJsonObjects; } createModel>(opts: TProps) { diff --git a/src/api/__tests__/optimized-xcode-project.test.ts b/src/api/__tests__/optimized-xcode-project.test.ts new file mode 100644 index 0000000..ca8c114 --- /dev/null +++ b/src/api/__tests__/optimized-xcode-project.test.ts @@ -0,0 +1,190 @@ +/** + * Tests for optimized XcodeProject functionality + * + * These tests verify that the enhanced XcodeProject class: + * - Supports lazy loading with openLazy() + * - Provides performance statistics via getQuickStats() + * - Maintains backward compatibility with original open() + * - Handles large projects efficiently + */ + +import path from 'path'; +import { XcodeProject } from '../XcodeProject'; + +const FIXTURES_DIR = path.join(__dirname, '../../json/__tests__/fixtures'); +const SMALL_FIXTURE = path.join(FIXTURES_DIR, 'project.pbxproj'); +const MEDIUM_FIXTURE = path.join(FIXTURES_DIR, 'AFNetworking.pbxproj'); + +describe('Optimized XcodeProject', () => { + describe('openLazy', () => { + it('should open project with lazy loading', () => { + const project = XcodeProject.openLazy(SMALL_FIXTURE, { + skipFullInflation: true + }); + + expect(project).toBeDefined(); + expect(project.rootObject).toBeDefined(); + expect(project.filePath).toBe(SMALL_FIXTURE); + }); + + it('should handle progress callbacks', () => { + const progressCallback = jest.fn(); + + const project = XcodeProject.openLazy(SMALL_FIXTURE, { + skipFullInflation: true, + progressCallback + }); + + expect(project).toBeDefined(); + expect(progressCallback).toHaveBeenCalled(); + }); + + it('should work with larger files', () => { + const project = XcodeProject.openLazy(MEDIUM_FIXTURE, { + skipFullInflation: true + }); + + expect(project).toBeDefined(); + expect(project.rootObject).toBeDefined(); + + const stats = project.getQuickStats(); + expect(stats.totalObjects).toBeGreaterThan(0); + }); + }); + + describe('getQuickStats', () => { + it('should provide project statistics', () => { + const project = XcodeProject.openLazy(SMALL_FIXTURE, { + skipFullInflation: true + }); + + const stats = project.getQuickStats(); + + expect(stats.totalObjects).toBeGreaterThan(0); + expect(stats.inflatedObjects).toBeGreaterThanOrEqual(0); + expect(stats.uninflatedObjects).toBeGreaterThanOrEqual(0); + expect(stats.inflationPercentage).toBeDefined(); + expect(parseFloat(stats.inflationPercentage)).toBeGreaterThanOrEqual(0); + expect(parseFloat(stats.inflationPercentage)).toBeLessThanOrEqual(100); + }); + }); + + describe('getUninflatedObjects', () => { + it('should provide access to uninflated objects', () => { + const project = XcodeProject.openLazy(SMALL_FIXTURE, { + skipFullInflation: true + }); + + const uninflated = project.getUninflatedObjects(); + + expect(uninflated).toBeDefined(); + expect(typeof uninflated).toBe('object'); + }); + }); + + describe('forceFullInflation', () => { + it('should inflate remaining objects', () => { + const project = XcodeProject.openLazy(MEDIUM_FIXTURE, { + skipFullInflation: true + }); + + const statsBefore = project.getQuickStats(); + + // Only test if there are uninflated objects + if (statsBefore.uninflatedObjects > 0) { + project.forceFullInflation(); + + const statsAfter = project.getQuickStats(); + expect(statsAfter.uninflatedObjects).toBe(0); + expect(statsAfter.inflationPercentage).toBe('100.0'); + } + }); + + it('should handle progress callback during inflation', () => { + const project = XcodeProject.openLazy(MEDIUM_FIXTURE, { + skipFullInflation: true + }); + + const progressCallback = jest.fn(); + + project.forceFullInflation(progressCallback); + + // Progress callback may or may not be called depending on remaining objects + expect(progressCallback).toHaveBeenCalledTimes(expect.any(Number)); + }); + }); + + describe('Integration with Enhanced Parsing', () => { + it('should work with optimized parsing for medium files', () => { + // This tests the integration where openLazy automatically uses parseOptimized + const project = XcodeProject.openLazy(MEDIUM_FIXTURE, { + skipFullInflation: true + }); + + expect(project).toBeDefined(); + expect(project.rootObject).toBeDefined(); + + // Should have loaded the main structure + const mainGroup = project.rootObject.props.mainGroup; + expect(mainGroup).toBeDefined(); + expect(mainGroup.getDisplayName()).toBeDefined(); + }); + + it('should preserve all core functionality', () => { + const project = XcodeProject.openLazy(SMALL_FIXTURE, { + skipFullInflation: true + }); + + // Test core API still works + expect(project.archiveVersion).toBeDefined(); + expect(project.objectVersion).toBeDefined(); + expect(project.rootObject).toBeDefined(); + expect(project.getProjectRoot()).toBeDefined(); + + // Test that we can still access objects + const rootObject = project.rootObject; + expect(rootObject.props).toBeDefined(); + expect(rootObject.props.mainGroup).toBeDefined(); + }); + }); + + describe('Backward Compatibility', () => { + it('original open method should still work', () => { + const project = XcodeProject.open(SMALL_FIXTURE); + + expect(project).toBeDefined(); + expect(project.rootObject).toBeDefined(); + expect(project.size).toBeGreaterThan(0); + }); + + it('openLazy should be compatible with original open results', () => { + const originalProject = XcodeProject.open(SMALL_FIXTURE); + const lazyProject = XcodeProject.openLazy(SMALL_FIXTURE, { + skipFullInflation: false // Full inflation for comparison + }); + + expect(lazyProject.archiveVersion).toBe(originalProject.archiveVersion); + expect(lazyProject.objectVersion).toBe(originalProject.objectVersion); + expect(lazyProject.rootObject.uuid).toBe(originalProject.rootObject.uuid); + }); + }); + + describe('Memory Management', () => { + it('lazy loading should use less initial memory', () => { + // This test is informational - memory usage can vary + if (global.gc) global.gc(); + const startMemory = process.memoryUsage().heapUsed; + + const project = XcodeProject.openLazy(MEDIUM_FIXTURE, { + skipFullInflation: true + }); + + const endMemory = process.memoryUsage().heapUsed; + const memoryIncrease = (endMemory - startMemory) / 1024 / 1024; // MB + + // Should use reasonable memory (this is more of a smoke test) + expect(memoryIncrease).toBeLessThan(100); // Less than 100MB + expect(project).toBeDefined(); + }); + }); +}); diff --git a/src/json/OptimizedParser.ts b/src/json/OptimizedParser.ts new file mode 100644 index 0000000..e0bfdd6 --- /dev/null +++ b/src/json/OptimizedParser.ts @@ -0,0 +1,314 @@ +import * as parser from "./parser/parser"; +import { XcodeProject } from "./types"; +import { JsonVisitor } from "./visitor/JsonVisitor"; +import { OptimizedJsonVisitor } from "./visitor/OptimizedJsonVisitor"; +import { StreamingJsonVisitor } from "./visitor/StreamingJsonVisitor"; +import { MemoryEfficientVisitor } from "./visitor/MemoryEfficientVisitor"; +import { Writer } from "./writer"; + +export interface ParseOptions { + progressCallback?: (processed: number, total: number, stage: string, memoryMB?: number) => void; + chunkSize?: number; + maxMemoryMB?: number; + forceStreaming?: boolean; + forceOptimized?: boolean; + estimateObjects?: boolean; +} + +/** + * Auto-selects the best parsing strategy based on input size and options + * - Small files (<1MB): Original JsonVisitor + * - Medium files (1-20MB): OptimizedJsonVisitor + * - Large files (>20MB): StreamingJsonVisitor + */ +export function parseOptimized( + text: string, + options: ParseOptions = {} +): Partial { + const fileSizeMB = text.length / 1024 / 1024; + const { + progressCallback, + chunkSize = 1000, + maxMemoryMB = 1024, + forceStreaming = false, + forceOptimized = false, + estimateObjects = true + } = options; + + console.log(`šŸ“Š Input analysis:`); + console.log(` File size: ${fileSizeMB.toFixed(2)} MB`); + console.log(` Characters: ${text.length.toLocaleString()}`); + + // Estimate object count for progress reporting + let estimatedObjects = 0; + if (estimateObjects) { + // Quick estimation based on pattern matching + const objectMatches = text.match(/isa\s*=/g); + estimatedObjects = objectMatches ? objectMatches.length : Math.floor(text.length / 1000); + console.log(` Estimated objects: ${estimatedObjects.toLocaleString()}`); + } + + // Choose parsing strategy + let strategy: 'original' | 'optimized' | 'streaming'; + + if (forceStreaming) { + strategy = 'streaming'; + } else if (forceOptimized) { + strategy = 'optimized'; + } else if (fileSizeMB > 50) { + strategy = 'streaming'; + } else if (fileSizeMB > 5) { + strategy = 'optimized'; + } else { + strategy = 'original'; + } + + console.log(` Strategy: ${strategy}`); + console.log(); + + // Parse with selected strategy + console.time('⚔ Total parsing time'); + + let cst; + console.time('šŸ” CST generation'); + try { + cst = parser.parse(text); + } catch (error) { + console.timeEnd('šŸ” CST generation'); + console.error('āŒ CST parsing failed:', error); + throw error; + } + console.timeEnd('šŸ” CST generation'); + + let visitor: JsonVisitor | OptimizedJsonVisitor | StreamingJsonVisitor; + let result: Partial; + + try { + switch (strategy) { + case 'streaming': + console.log('🌊 Using StreamingJsonVisitor for large file'); + visitor = new StreamingJsonVisitor({ + chunkSize, + maxMemoryMB, + progressCallback: (processed, total, memoryMB) => { + progressCallback?.(processed, total, 'streaming', memoryMB); + }, + estimatedObjects + }); + break; + + case 'optimized': + console.log('⚔ Using OptimizedJsonVisitor for medium file'); + visitor = new OptimizedJsonVisitor({ + progressCallback: (processed, total, stage) => { + progressCallback?.(processed, total, stage); + }, + estimatedObjects + }); + break; + + default: + console.log('šŸ“ Using original JsonVisitor for small file'); + visitor = new JsonVisitor(); + break; + } + + visitor.visit(cst); + result = visitor.context; + + // Show performance stats + if ('getStats' in visitor) { + const stats = visitor.getStats(); + console.log('šŸ“Š Parsing statistics:', stats); + } + + } catch (error) { + console.error('āŒ Visitor processing failed:', error); + throw error; + } + + console.timeEnd('⚔ Total parsing time'); + + const finalMemoryMB = Math.round(process.memoryUsage().heapUsed / 1024 / 1024); + console.log(` Final memory usage: ${finalMemoryMB}MB`); + + return result; +} + +/** + * Parse with explicit strategy selection + */ +export function parseWithStrategy( + text: string, + strategy: 'original' | 'optimized' | 'streaming', + options: ParseOptions = {} +): Partial { + return parseOptimized(text, { + ...options, + forceOptimized: strategy === 'optimized', + forceStreaming: strategy === 'streaming' + }); +} + +/** + * Performance benchmark comparing all parsing strategies + */ +export async function benchmarkParsing( + text: string, + options: { warmupRuns?: number; testRuns?: number } = {} +): Promise<{ + original: { time: number; memory: number }; + optimized: { time: number; memory: number }; + streaming: { time: number; memory: number }; +}> { + const { warmupRuns = 1, testRuns = 3 } = options; + const fileSizeMB = text.length / 1024 / 1024; + + console.log(`šŸ Benchmarking parsing strategies (${fileSizeMB.toFixed(2)}MB file)`); + console.log(` Warmup runs: ${warmupRuns}, Test runs: ${testRuns}`); + + const strategies = ['original', 'optimized', 'streaming'] as const; + const results: any = {}; + + for (const strategy of strategies) { + console.log(`\nšŸ”„ Testing ${strategy} strategy...`); + + // Warmup runs + for (let i = 0; i < warmupRuns; i++) { + try { + parseWithStrategy(text, strategy); + if (global.gc) global.gc(); // Force cleanup between runs + } catch (error) { + console.warn(` Warmup ${i + 1} failed for ${strategy}:`, error); + } + } + + // Test runs + const times: number[] = []; + const memories: number[] = []; + + for (let i = 0; i < testRuns; i++) { + if (global.gc) global.gc(); // Start with clean memory + + const startMemory = process.memoryUsage().heapUsed; + const startTime = process.hrtime(); + + try { + parseWithStrategy(text, strategy, { + progressCallback: undefined // Disable progress for cleaner benchmarks + }); + + const [seconds, nanoseconds] = process.hrtime(startTime); + const time = seconds * 1000 + nanoseconds / 1000000; // Convert to ms + const endMemory = process.memoryUsage().heapUsed; + const memoryIncrease = Math.round((endMemory - startMemory) / 1024 / 1024); + + times.push(time); + memories.push(memoryIncrease); + + console.log(` Run ${i + 1}: ${time.toFixed(2)}ms, +${memoryIncrease}MB`); + + } catch (error) { + console.error(` Run ${i + 1} failed for ${strategy}:`, error); + times.push(Infinity); + memories.push(Infinity); + } + } + + // Calculate averages (excluding failed runs) + const validTimes = times.filter(t => t !== Infinity); + const validMemories = memories.filter(m => m !== Infinity); + + results[strategy] = { + time: validTimes.length > 0 ? validTimes.reduce((a, b) => a + b) / validTimes.length : Infinity, + memory: validMemories.length > 0 ? validMemories.reduce((a, b) => a + b) / validMemories.length : Infinity + }; + } + + // Display summary + console.log('\nšŸ“Š Benchmark Results Summary:'); + console.log('Strategy | Avg Time (ms) | Avg Memory (MB)'); + console.log('-------------|---------------|----------------'); + + for (const strategy of strategies) { + const result = results[strategy]; + const timeStr = result.time === Infinity ? 'FAILED' : result.time.toFixed(2).padStart(8); + const memStr = result.memory === Infinity ? 'FAILED' : result.memory.toFixed(2).padStart(8); + console.log(`${strategy.padEnd(12)} | ${timeStr} | ${memStr}`); + } + + // Find best strategy + const validResults = Object.entries(results).filter(([_, r]: any) => r.time !== Infinity); + if (validResults.length > 0) { + const fastest = validResults.reduce((best, current) => { + const currentResult = current[1] as { time: number; memory: number }; + const bestResult = best[1] as { time: number; memory: number }; + return currentResult.time < bestResult.time ? current : best; + }); + const mostMemoryEfficient = validResults.reduce((best, current) => { + const currentResult = current[1] as { time: number; memory: number }; + const bestResult = best[1] as { time: number; memory: number }; + return currentResult.memory < bestResult.memory ? current : best; + }); + + const fastestResult = fastest[1] as { time: number; memory: number }; + const memEffResult = mostMemoryEfficient[1] as { time: number; memory: number }; + + console.log(`\nšŸ† Fastest: ${fastest[0]} (${fastestResult.time.toFixed(2)}ms)`); + console.log(`šŸ† Most memory efficient: ${mostMemoryEfficient[0]} (+${memEffResult.memory.toFixed(2)}MB)`); + } + + return results; +} + +/** + * Memory-efficient analysis that doesn't build the full object tree + * Perfect for very large files where you only need metadata/statistics + */ +export function analyzeProjectMetadata( + text: string, + options: { progressCallback?: (processed: number, total: number, stage: string) => void } = {} +): { + archiveVersion?: number; + objectVersion?: number; + rootObject?: string; + objectCount: number; + fileTypeCount: Record; + objectTypeCount: Record; + targetNames: string[]; + estimatedMemoryMB: number; + topFileTypes: [string, number][]; + topObjectTypes: [string, number][]; + parsingTimeMs: number; +} { + const fileSizeMB = text.length / 1024 / 1024; + const { progressCallback } = options; + + console.log(`šŸ”¬ Memory-efficient analysis of ${fileSizeMB.toFixed(2)}MB file`); + console.log(` Strategy: Extract metadata only, avoid building full object tree`); + + const startTime = Date.now(); + + // Parse CST + console.time('šŸ” CST generation'); + const cst = parser.parse(text); + console.timeEnd('šŸ” CST generation'); + + // Use memory-efficient visitor + const visitor = new MemoryEfficientVisitor({ progressCallback }); + visitor.visit(cst); + + const analysisResult = visitor.getAnalysis(); + const parsingTime = Date.now() - startTime; + + return { + ...analysisResult, + parsingTimeMs: parsingTime + }; +} + +// Re-export for backward compatibility +export { parse } from "./index"; +export function build(project: Partial): string { + return new Writer(project).getResults(); +} diff --git a/src/json/__tests__/optimized-parser.test.ts b/src/json/__tests__/optimized-parser.test.ts new file mode 100644 index 0000000..20dafeb --- /dev/null +++ b/src/json/__tests__/optimized-parser.test.ts @@ -0,0 +1,232 @@ +/** + * Tests for optimized parser functionality + * + * These tests verify that the parser optimizations provide: + * - Equivalent functionality to the original parser + * - Better performance for large files + * - Memory-efficient analysis capabilities + * - Automatic strategy selection based on file size + */ + +import fs from 'fs'; +import path from 'path'; +import { + parseOptimized, + parseWithStrategy, + analyzeProjectMetadata +} from '../OptimizedParser'; +import { parse as parseOriginal } from '../index'; + +const FIXTURES_DIR = path.join(__dirname, 'fixtures'); +const SMALL_FIXTURE = path.join(FIXTURES_DIR, 'project.pbxproj'); +const MEDIUM_FIXTURE = path.join(FIXTURES_DIR, 'AFNetworking.pbxproj'); + +describe('OptimizedParser', () => { + describe('parseOptimized', () => { + it('should parse small files correctly', () => { + const content = fs.readFileSync(SMALL_FIXTURE, 'utf8'); + + const result = parseOptimized(content); + + expect(result).toBeDefined(); + expect(result.objects).toBeDefined(); + expect(result.rootObject).toBeDefined(); + expect(Object.keys(result.objects || {}).length).toBeGreaterThan(0); + }); + + it('should produce equivalent results to original parser', () => { + const content = fs.readFileSync(SMALL_FIXTURE, 'utf8'); + + const optimizedResult = parseOptimized(content); + const originalResult = parseOriginal(content); + + // Core structure should match + expect(optimizedResult.archiveVersion).toBe(originalResult.archiveVersion); + expect(optimizedResult.objectVersion).toBe(originalResult.objectVersion); + expect(optimizedResult.rootObject).toBe(originalResult.rootObject); + expect(Object.keys(optimizedResult.objects || {})).toEqual(Object.keys(originalResult.objects || {})); + }); + + it('should handle medium files with AFNetworking fixture', () => { + const content = fs.readFileSync(MEDIUM_FIXTURE, 'utf8'); + + const result = parseOptimized(content); + + expect(result).toBeDefined(); + expect(result.objects).toBeDefined(); + expect(Object.keys(result.objects || {}).length).toBeGreaterThan(100); // AFNetworking has many objects + }); + + it('should respect forceOptimized option', () => { + const content = fs.readFileSync(SMALL_FIXTURE, 'utf8'); + + const result = parseOptimized(content, { forceOptimized: true }); + + expect(result).toBeDefined(); + expect(result.objects).toBeDefined(); + }); + + it('should respect forceStreaming option', () => { + const content = fs.readFileSync(SMALL_FIXTURE, 'utf8'); + + const result = parseOptimized(content, { forceStreaming: true }); + + expect(result).toBeDefined(); + expect(result.objects).toBeDefined(); + }); + }); + + describe('parseWithStrategy', () => { + it('should parse with original strategy', () => { + const content = fs.readFileSync(SMALL_FIXTURE, 'utf8'); + + const result = parseWithStrategy(content, 'original'); + const originalResult = parseOriginal(content); + + expect(result).toEqual(originalResult); + }); + + it('should parse with optimized strategy', () => { + const content = fs.readFileSync(SMALL_FIXTURE, 'utf8'); + + const result = parseWithStrategy(content, 'optimized'); + + expect(result).toBeDefined(); + expect(result.objects).toBeDefined(); + }); + + it('should parse with streaming strategy', () => { + const content = fs.readFileSync(SMALL_FIXTURE, 'utf8'); + + const result = parseWithStrategy(content, 'streaming'); + + expect(result).toBeDefined(); + expect(result.objects).toBeDefined(); + }); + }); + + describe('analyzeProjectMetadata', () => { + it('should extract metadata without building full object tree', () => { + const content = fs.readFileSync(MEDIUM_FIXTURE, 'utf8'); + + const metadata = analyzeProjectMetadata(content); + + expect(metadata.objectCount).toBeGreaterThan(0); + expect(metadata.fileTypeCount).toBeDefined(); + expect(metadata.objectTypeCount).toBeDefined(); + expect(metadata.topFileTypes).toBeInstanceOf(Array); + expect(metadata.topObjectTypes).toBeInstanceOf(Array); + expect(metadata.parsingTimeMs).toBeGreaterThan(0); + }); + + it('should identify object types', () => { + const content = fs.readFileSync(SMALL_FIXTURE, 'utf8'); + + const metadata = analyzeProjectMetadata(content); + + const objectTypes = Object.keys(metadata.objectTypeCount); + expect(objectTypes.length).toBeGreaterThan(0); + + // Should find some known Xcode object types + const knownTypes = ['PBXProject', 'PBXFileReference', 'PBXGroup', 'PBXNativeTarget', 'PBXBuildFile']; + const hasKnownType = objectTypes.some(type => knownTypes.includes(type)); + expect(hasKnownType).toBe(true); + }); + + it('should identify file types when present', () => { + const content = fs.readFileSync(MEDIUM_FIXTURE, 'utf8'); + + const metadata = analyzeProjectMetadata(content); + + const fileTypes = Object.keys(metadata.fileTypeCount); + // AFNetworking should have various file types + expect(fileTypes.length).toBeGreaterThan(0); + }); + + it('should extract target names', () => { + const content = fs.readFileSync(MEDIUM_FIXTURE, 'utf8'); + + const metadata = analyzeProjectMetadata(content); + + expect(metadata.targetNames).toBeInstanceOf(Array); + // AFNetworking should have at least one target + expect(metadata.targetNames.length).toBeGreaterThan(0); + }); + + it('should provide performance timing', () => { + const content = fs.readFileSync(SMALL_FIXTURE, 'utf8'); + + const metadata = analyzeProjectMetadata(content); + + expect(metadata.parsingTimeMs).toBeGreaterThan(0); + expect(metadata.parsingTimeMs).toBeLessThan(10000); // Should be under 10 seconds + }); + }); + + describe('Performance Comparison', () => { + it('should handle parsing without crashes', () => { + const content = fs.readFileSync(MEDIUM_FIXTURE, 'utf8'); + + // All strategies should work without crashing + expect(() => parseWithStrategy(content, 'original')).not.toThrow(); + expect(() => parseWithStrategy(content, 'optimized')).not.toThrow(); + expect(() => parseWithStrategy(content, 'streaming')).not.toThrow(); + }); + + it('should produce consistent object counts across strategies', () => { + const content = fs.readFileSync(SMALL_FIXTURE, 'utf8'); + + const originalResult = parseWithStrategy(content, 'original'); + const optimizedResult = parseWithStrategy(content, 'optimized'); + const streamingResult = parseWithStrategy(content, 'streaming'); + + const originalCount = Object.keys(originalResult.objects || {}).length; + const optimizedCount = Object.keys(optimizedResult.objects || {}).length; + const streamingCount = Object.keys(streamingResult.objects || {}).length; + + expect(optimizedCount).toBe(originalCount); + expect(streamingCount).toBe(originalCount); + }); + }); + + describe('Error Handling', () => { + it('should handle malformed input gracefully', () => { + const malformedContent = '{ invalid pbxproj content }'; + + expect(() => parseOptimized(malformedContent)).toThrow(); + }); + + it('should handle empty input', () => { + expect(() => parseOptimized('')).toThrow(); + }); + }); + + describe('Configuration Options', () => { + it('should accept custom configuration without crashing', () => { + const content = fs.readFileSync(SMALL_FIXTURE, 'utf8'); + + expect(() => parseOptimized(content, { + chunkSize: 100, + maxMemoryMB: 256, + estimateObjects: false + })).not.toThrow(); + }); + }); + + describe('Integration with existing API', () => { + it('should work with XcodeProject.openLazy', () => { + // Import here to avoid circular dependency + const { XcodeProject } = require('../../api/XcodeProject'); + + const project = XcodeProject.openLazy(SMALL_FIXTURE, { + skipFullInflation: true + }); + + expect(project).toBeDefined(); + expect(project.rootObject).toBeDefined(); + + const stats = project.getQuickStats(); + expect(stats.totalObjects).toBeGreaterThan(0); + }); + }); +}); \ No newline at end of file diff --git a/src/json/index.ts b/src/json/index.ts index 4435400..47d5666 100644 --- a/src/json/index.ts +++ b/src/json/index.ts @@ -17,3 +17,4 @@ export function build(project: Partial): string { } export * from "./types"; +export { parseOptimized, parseWithStrategy, benchmarkParsing, analyzeProjectMetadata } from "./OptimizedParser"; diff --git a/src/json/visitor/MemoryEfficientVisitor.ts b/src/json/visitor/MemoryEfficientVisitor.ts new file mode 100644 index 0000000..1685354 --- /dev/null +++ b/src/json/visitor/MemoryEfficientVisitor.ts @@ -0,0 +1,238 @@ +import { BaseVisitor } from "../parser/parser"; +import { XcodeProject } from "../types"; + +/** + * Ultra-memory efficient visitor that doesn't build the complete object in memory + * Only extracts essential metadata and counts, avoiding full object construction + */ +export class MemoryEfficientVisitor extends BaseVisitor { + // Instead of building full context, just extract key metadata + metadata: { + archiveVersion?: number; + objectVersion?: number; + rootObject?: string; + objectCount: number; + fileTypeCount: Record; + objectTypeCount: Record; + targetNames: string[]; + estimatedMemoryMB: number; + } = { + objectCount: 0, + fileTypeCount: {}, + objectTypeCount: {}, + targetNames: [], + estimatedMemoryMB: 0 + }; + + private currentObjectType: string | null = null; + private currentObjectData: any = {}; + private memoryEstimate = 0; + private progressCallback?: (processed: number, total: number, stage: string) => void; + private processedObjects = 0; + + constructor(options: { + progressCallback?: (processed: number, total: number, stage: string) => void; + } = {}) { + super(); + this.validateVisitor(); + this.progressCallback = options.progressCallback; + } + + head(ctx: any) { + console.log('šŸ”¬ Memory-efficient analysis starting...'); + console.time('🧠 Memory-efficient parsing'); + + if (ctx.array) { + this.visit(ctx.array); + } else if (ctx.object) { + this.visit(ctx.object); + } + + console.timeEnd('🧠 Memory-efficient parsing'); + + // Estimate final memory usage + this.metadata.estimatedMemoryMB = Math.round(this.memoryEstimate / 1024 / 1024); + + console.log(`šŸ“Š Analysis complete:`); + console.log(` Objects processed: ${this.metadata.objectCount.toLocaleString()}`); + console.log(` Memory footprint: ${this.metadata.estimatedMemoryMB}MB (estimated)`); + console.log(` Object types: ${Object.keys(this.metadata.objectTypeCount).length}`); + console.log(` File types: ${Object.keys(this.metadata.fileTypeCount).length}`); + } + + // Override to avoid building full objects + object(ctx: any) { + if (!ctx.objectItem) { + return null; // Return null instead of empty object + } + + this.metadata.objectCount++; + this.processedObjects++; + + // Reset current object tracking + this.currentObjectType = null; + this.currentObjectData = {}; + + // Process items to extract metadata only + const items = ctx.objectItem; + for (const item of items) { + this.visit(item); + } + + // Analyze current object + this.analyzeCurrentObject(); + + // Progress reporting + if (this.processedObjects % 1000 === 0) { + this.progressCallback?.(this.processedObjects, this.metadata.objectCount, 'analyzing objects'); + + // Estimate memory usage (very rough) + this.memoryEstimate = this.processedObjects * 100; // ~100 bytes per object estimate + } + + // Don't return the full object - save memory + return null; + } + + // Override to avoid building arrays + array(ctx: any) { + if (!ctx.value) { + return null; + } + + // Just process for analysis without building array + const values = ctx.value; + for (const value of values) { + this.visit(value); + } + + return null; // Don't build the actual array + } + + // Track object properties for analysis + objectItem(ctx: any) { + const key = this.visit(ctx.identifier); + const value = this.visit(ctx.value); + + // Store in current object data for analysis + if (key && value !== null) { + this.currentObjectData[key] = value; + } + + return null; // Don't build the actual object item + } + + // Extract essential data from current object + private analyzeCurrentObject() { + const { isa, name, lastKnownFileType, explicitFileType, targets } = this.currentObjectData; + + // Track object type + if (isa) { + this.currentObjectType = isa; + this.metadata.objectTypeCount[isa] = (this.metadata.objectTypeCount[isa] || 0) + 1; + + // Extract special metadata + switch (isa) { + case 'PBXProject': + // This is the root project + if (this.currentObjectData.archiveVersion) { + this.metadata.archiveVersion = this.currentObjectData.archiveVersion; + } + if (this.currentObjectData.objectVersion) { + this.metadata.objectVersion = this.currentObjectData.objectVersion; + } + break; + + case 'PBXNativeTarget': + case 'PBXAggregateTarget': + case 'PBXLegacyTarget': + if (name && typeof name === 'string') { + this.metadata.targetNames.push(name); + } + break; + + case 'PBXFileReference': + const fileType = lastKnownFileType || explicitFileType || 'unknown'; + this.metadata.fileTypeCount[fileType] = (this.metadata.fileTypeCount[fileType] || 0) + 1; + break; + } + } + + // Clear current object data to save memory + this.currentObjectData = {}; + } + + // Optimized identifier processing + identifier(ctx: any) { + if (ctx.QuotedString) { + return ctx.QuotedString[0].payload ?? ctx.QuotedString[0].image; + } else if (ctx.StringLiteral) { + const literal = ctx.StringLiteral[0].payload ?? ctx.StringLiteral[0].image; + return parseType(literal); + } + throw new Error("unhandled identifier: " + JSON.stringify(ctx)); + } + + // Optimized value processing + value(ctx: any) { + if (ctx.identifier) { + return this.visit(ctx.identifier); + } else if (ctx.DataLiteral) { + return ctx.DataLiteral[0].payload ?? ctx.DataLiteral[0].image; + } else if (ctx.object) { + return this.visit(ctx.object); + } else if (ctx.array) { + return this.visit(ctx.array); + } + throw new Error("unhandled value: " + JSON.stringify(ctx)); + } + + // Get analysis results without the full parsed object + getAnalysis() { + return { + ...this.metadata, + topFileTypes: Object.entries(this.metadata.fileTypeCount) + .sort(([,a], [,b]) => b - a) + .slice(0, 10), + topObjectTypes: Object.entries(this.metadata.objectTypeCount) + .sort(([,a], [,b]) => b - a) + .slice(0, 10) + }; + } + + // Minimal XcodeProject structure for compatibility + getMinimalProject(): Partial { + return { + archiveVersion: this.metadata.archiveVersion || 1, + objectVersion: this.metadata.objectVersion || 55, + objects: {}, // Empty - we didn't build the full object tree + rootObject: this.metadata.rootObject || '', + classes: {} + }; + } +} + +// Optimized parseType function +function parseType(literal: string): number | string { + // Fast common value check + if (literal.length < 30) { + switch (literal) { + case 'PBXProject': + case 'PBXNativeTarget': + case 'PBXFileReference': + case 'PBXGroup': + case 'PBXSourcesBuildPhase': + case '': + case 'SDKROOT': + return literal; + } + } + + // Simplified number parsing + if (/^\d+$/.test(literal)) { + const num = parseInt(literal, 10); + if (!isNaN(num)) return num; + } + + return literal; +} diff --git a/src/json/visitor/OptimizedJsonVisitor.ts b/src/json/visitor/OptimizedJsonVisitor.ts new file mode 100644 index 0000000..0818a86 --- /dev/null +++ b/src/json/visitor/OptimizedJsonVisitor.ts @@ -0,0 +1,225 @@ +import { BaseVisitor } from "../parser/parser"; +import { XcodeProject } from "../types"; + +/** + * Performance-optimized JsonVisitor for large pbxproj files + * Key optimizations: + * - Eliminates expensive object spread operations + * - Adds progress reporting for large objects + * - Pre-allocates objects when possible + * - Reduces memory allocations + */ +export class OptimizedJsonVisitor extends BaseVisitor { + context: Partial = {}; + + // Progress tracking for large files + private itemsProcessed = 0; + private totalEstimate = 0; + private lastProgressReport = Date.now(); + private progressCallback?: (processed: number, total: number, stage: string) => void; + + // Performance counters + private objectCount = 0; + private arrayCount = 0; + + constructor(options: { + progressCallback?: (processed: number, total: number, stage: string) => void; + estimatedObjects?: number; + } = {}) { + super(); + this.validateVisitor(); + this.progressCallback = options.progressCallback; + this.totalEstimate = options.estimatedObjects || 0; + } + + private reportProgress(stage: string) { + const now = Date.now(); + // Report every 100ms or every 1000 items to avoid overwhelming the callback + if (now - this.lastProgressReport > 100 || this.itemsProcessed % 1000 === 0) { + this.progressCallback?.(this.itemsProcessed, this.totalEstimate, stage); + this.lastProgressReport = now; + } + } + + head(ctx: any) { + console.time('šŸ” JSON visitor processing'); + if (ctx.array) { + this.context = this.visit(ctx.array); + } else if (ctx.object) { + this.context = this.visit(ctx.object); + } + console.timeEnd('šŸ” JSON visitor processing'); + console.log(` Objects processed: ${this.objectCount.toLocaleString()}`); + console.log(` Arrays processed: ${this.arrayCount.toLocaleString()}`); + } + + // MAJOR OPTIMIZATION: Replace expensive object spread with direct assignment + object(ctx: any) { + if (!ctx.objectItem) { + return {}; + } + + this.objectCount++; + this.itemsProcessed++; + + // Pre-allocate object instead of using spread operator + const result: any = {}; + const items = ctx.objectItem; + + // Direct property assignment is much faster than object spread + for (let i = 0; i < items.length; i++) { + const item = this.visit(items[i]); + // Object.assign or direct assignment is faster than spread + if (item && typeof item === 'object') { + const keys = Object.keys(item); + for (let j = 0; j < keys.length; j++) { + const key = keys[j]; + result[key] = item[key]; + } + } + } + + // Progress reporting for large objects + if (this.objectCount % 500 === 0) { + this.reportProgress('parsing objects'); + } + + return result; + } + + // Optimized array processing + array(ctx: any) { + if (!ctx.value) { + return []; + } + + this.arrayCount++; + this.itemsProcessed++; + + const values = ctx.value; + // Pre-allocate array with known length + const result = new Array(values.length); + + // Use for loop instead of map for better performance + for (let i = 0; i < values.length; i++) { + result[i] = this.visit(values[i]); + } + + if (this.arrayCount % 100 === 0) { + this.reportProgress('parsing arrays'); + } + + return result; + } + + // Optimized object item processing + objectItem(ctx: any) { + const key = this.visit(ctx.identifier); + const value = this.visit(ctx.value); + + // Create object directly instead of using computed property + const result: any = {}; + result[key] = value; + return result; + } + + // Optimized identifier processing with caching for common values + private static readonly COMMON_IDENTIFIERS: Record = { + 'isa': 'isa', + 'children': 'children', + 'name': 'name', + 'path': 'path', + 'sourceTree': 'sourceTree', + 'fileRef': 'fileRef', + 'files': 'files', + 'buildPhases': 'buildPhases', + 'buildSettings': 'buildSettings', + 'targets': 'targets', + 'objects': 'objects', + 'rootObject': 'rootObject', + }; + + identifier(ctx: any) { + let literal: string; + + if (ctx.QuotedString) { + literal = ctx.QuotedString[0].payload ?? ctx.QuotedString[0].image; + } else if (ctx.StringLiteral) { + literal = ctx.StringLiteral[0].payload ?? ctx.StringLiteral[0].image; + } else { + throw new Error("unhandled identifier: " + JSON.stringify(ctx)); + } + + // Use cached common identifiers to reduce memory allocations + return OptimizedJsonVisitor.COMMON_IDENTIFIERS[literal] || parseType(literal); + } + + value(ctx: any) { + if (ctx.identifier) { + return this.visit(ctx.identifier); + } else if (ctx.DataLiteral) { + return ctx.DataLiteral[0].payload ?? ctx.DataLiteral[0].image; + } else if (ctx.object) { + return this.visit(ctx.object); + } else if (ctx.array) { + return this.visit(ctx.array); + } + throw new Error("unhandled value: " + JSON.stringify(ctx)); + } + + getStats() { + return { + objectCount: this.objectCount, + arrayCount: this.arrayCount, + itemsProcessed: this.itemsProcessed + }; + } +} + +// Optimized parseType function with better number handling +const numberRegex = /^[+-]?([0-9]+\.?[0-9]*|\.[0-9]+)$/; +const integerRegex = /^\d+$/; +const octalRegex = /^0\d+$/; + +function parseType(literal: string): number | string { + // Fast path for common string values to avoid regex + if (literal.length < 20) { + switch (literal) { + case 'PBXProject': + case 'PBXNativeTarget': + case 'PBXFileReference': + case 'PBXGroup': + case 'PBXSourcesBuildPhase': + case 'PBXFrameworksBuildPhase': + case 'PBXResourcesBuildPhase': + case '': + case '': + case 'SDKROOT': + case 'SOURCE_ROOT': + case 'BUILT_PRODUCTS_DIR': + return literal; + } + } + + // Preserve octal literals with leading zeros + if (octalRegex.test(literal)) { + return literal; + } + + // Handle decimal numbers but preserve trailing zeros + if (numberRegex.test(literal)) { + if (literal.endsWith('0') && literal.includes('.')) { + return literal; // Preserve trailing zero + } + const num = parseFloat(literal); + if (!isNaN(num)) return num; + } + + // Handle integers - most common numeric case + if (integerRegex.test(literal)) { + const num = parseInt(literal, 10); + if (!isNaN(num)) return num; + } + + return literal; +} diff --git a/src/json/visitor/StreamingJsonVisitor.ts b/src/json/visitor/StreamingJsonVisitor.ts new file mode 100644 index 0000000..c445a1a --- /dev/null +++ b/src/json/visitor/StreamingJsonVisitor.ts @@ -0,0 +1,320 @@ +import { BaseVisitor } from "../parser/parser"; +import { XcodeProject } from "../types"; + +/** + * Streaming JsonVisitor for extremely large pbxproj files + * Processes objects in chunks to reduce memory pressure + */ +export class StreamingJsonVisitor extends BaseVisitor { + context: Partial = {}; + + // Streaming configuration + private chunkSize: number; + private processedChunks = 0; + private currentChunk: any[] = []; + private deferredObjects: any[] = []; + + // Memory management + private maxMemoryMB: number; + private startMemory: number; + + // Progress tracking + private progressCallback?: (processed: number, total: number, memoryMB: number) => void; + private itemsProcessed = 0; + private totalEstimate = 0; + + constructor(options: { + chunkSize?: number; + maxMemoryMB?: number; + progressCallback?: (processed: number, total: number, memoryMB: number) => void; + estimatedObjects?: number; + } = {}) { + super(); + this.validateVisitor(); + + this.chunkSize = options.chunkSize || 1000; + this.maxMemoryMB = options.maxMemoryMB || 512; // Default 512MB limit + this.progressCallback = options.progressCallback; + this.totalEstimate = options.estimatedObjects || 0; + this.startMemory = this.getMemoryUsageMB(); + + console.log(`🌊 Streaming parser initialized:`); + console.log(` Chunk size: ${this.chunkSize.toLocaleString()}`); + console.log(` Memory limit: ${this.maxMemoryMB}MB`); + console.log(` Starting memory: ${this.startMemory}MB`); + } + + private getMemoryUsageMB(): number { + const usage = process.memoryUsage(); + return Math.round(usage.heapUsed / 1024 / 1024); + } + + private checkMemoryPressure(): boolean { + const currentMemory = this.getMemoryUsageMB(); + const memoryIncrease = currentMemory - this.startMemory; + + if (memoryIncrease > this.maxMemoryMB) { + console.log(`āš ļø Memory pressure detected: ${currentMemory}MB (+${memoryIncrease}MB)`); + return true; + } + + return false; + } + + private reportProgress() { + const currentMemory = this.getMemoryUsageMB(); + this.progressCallback?.(this.itemsProcessed, this.totalEstimate, currentMemory); + } + + head(ctx: any) { + console.time('🌊 Streaming JSON parsing'); + + if (ctx.array) { + this.context = this.visit(ctx.array); + } else if (ctx.object) { + this.context = this.visit(ctx.object); + } + + // Process any remaining deferred objects + this.flushDeferredObjects(); + + console.timeEnd('🌊 Streaming JSON parsing'); + console.log(` Chunks processed: ${this.processedChunks}`); + console.log(` Items processed: ${this.itemsProcessed.toLocaleString()}`); + console.log(` Final memory: ${this.getMemoryUsageMB()}MB`); + } + + // Streaming object processing with memory management + object(ctx: any) { + if (!ctx.objectItem) { + return {}; + } + + const items = ctx.objectItem; + + // For very large objects, process in streaming fashion + if (items.length > this.chunkSize) { + return this.processLargeObject(items); + } + + // Regular processing for smaller objects + return this.processRegularObject(items); + } + + private processLargeObject(items: any[]): any { + console.log(`šŸ”„ Processing large object with ${items.length.toLocaleString()} items`); + + const result: any = {}; + let processedItems = 0; + + // Process items in chunks + for (let i = 0; i < items.length; i += this.chunkSize) { + const chunk = items.slice(i, i + this.chunkSize); + + // Process chunk + for (const item of chunk) { + const processedItem = this.visit(item); + if (processedItem && typeof processedItem === 'object') { + Object.assign(result, processedItem); + } + processedItems++; + this.itemsProcessed++; + } + + this.processedChunks++; + + // Memory pressure check + if (this.checkMemoryPressure()) { + // Force garbage collection hint + if (global.gc) { + global.gc(); + console.log(` Triggered garbage collection at chunk ${this.processedChunks}`); + } + } + + // Progress reporting + if (this.processedChunks % 10 === 0) { + console.log(` Processed ${processedItems.toLocaleString()}/${items.length.toLocaleString()} items (${((processedItems/items.length)*100).toFixed(1)}%)`); + this.reportProgress(); + } + } + + return result; + } + + private processRegularObject(items: any[]): any { + const result: any = {}; + + for (const item of items) { + const processedItem = this.visit(item); + if (processedItem && typeof processedItem === 'object') { + Object.assign(result, processedItem); + } + } + + this.itemsProcessed += items.length; + return result; + } + + // Optimized array processing with streaming + array(ctx: any) { + if (!ctx.value) { + return []; + } + + const values = ctx.value; + + // For large arrays, use streaming approach + if (values.length > this.chunkSize) { + return this.processLargeArray(values); + } + + // Regular processing for smaller arrays + const result = new Array(values.length); + for (let i = 0; i < values.length; i++) { + result[i] = this.visit(values[i]); + } + + this.itemsProcessed += values.length; + return result; + } + + private processLargeArray(values: any[]): any[] { + console.log(`šŸ”„ Processing large array with ${values.length.toLocaleString()} items`); + + const result: any[] = []; + + for (let i = 0; i < values.length; i += this.chunkSize) { + const chunk = values.slice(i, i + this.chunkSize); + + // Process chunk + const processedChunk = chunk.map(value => this.visit(value)); + result.push(...processedChunk); + + this.itemsProcessed += chunk.length; + this.processedChunks++; + + // Memory and progress management + if (this.processedChunks % 5 === 0) { + this.reportProgress(); + + if (this.checkMemoryPressure() && global.gc) { + global.gc(); + } + } + } + + return result; + } + + // Deferred object processing for memory management + private addDeferredObject(obj: any) { + this.deferredObjects.push(obj); + + // Process deferred objects in batches + if (this.deferredObjects.length >= this.chunkSize) { + this.flushDeferredObjects(); + } + } + + private flushDeferredObjects() { + if (this.deferredObjects.length === 0) return; + + console.log(`šŸ”„ Processing ${this.deferredObjects.length} deferred objects`); + + // Process all deferred objects + for (const obj of this.deferredObjects) { + // Process deferred object + this.visit(obj); + } + + // Clear deferred objects + this.deferredObjects = []; + + // Trigger cleanup + if (global.gc) { + global.gc(); + } + } + + // Same optimized methods as OptimizedJsonVisitor + objectItem(ctx: any) { + const key = this.visit(ctx.identifier); + const value = this.visit(ctx.value); + + const result: any = {}; + result[key] = value; + return result; + } + + identifier(ctx: any) { + if (ctx.QuotedString) { + return ctx.QuotedString[0].payload ?? ctx.QuotedString[0].image; + } else if (ctx.StringLiteral) { + const literal = ctx.StringLiteral[0].payload ?? ctx.StringLiteral[0].image; + return parseType(literal); + } + throw new Error("unhandled identifier: " + JSON.stringify(ctx)); + } + + value(ctx: any) { + if (ctx.identifier) { + return this.visit(ctx.identifier); + } else if (ctx.DataLiteral) { + return ctx.DataLiteral[0].payload ?? ctx.DataLiteral[0].image; + } else if (ctx.object) { + return this.visit(ctx.object); + } else if (ctx.array) { + return this.visit(ctx.array); + } + throw new Error("unhandled value: " + JSON.stringify(ctx)); + } + + getStats() { + return { + chunksProcessed: this.processedChunks, + itemsProcessed: this.itemsProcessed, + deferredObjects: this.deferredObjects.length, + currentMemoryMB: this.getMemoryUsageMB(), + memoryIncreaseMB: this.getMemoryUsageMB() - this.startMemory + }; + } +} + +// Shared optimized parseType function +function parseType(literal: string): number | string { + // Fast path for common values + if (literal.length < 20) { + switch (literal) { + case 'PBXProject': + case 'PBXNativeTarget': + case 'PBXFileReference': + case 'PBXGroup': + case '': + case 'SDKROOT': + return literal; + } + } + + // Preserve octal literals + if (/^0\d+$/.test(literal)) { + return literal; + } + + // Handle numbers + if (/^[+-]?([0-9]+\.?[0-9]*|\.[0-9]+)$/.test(literal)) { + if (/0$/.test(literal)) { + return literal; // Preserve trailing zero + } + const num = parseFloat(literal); + if (!isNaN(num)) return num; + } + + // Handle integers + if (/^\d+$/.test(literal)) { + const num = parseInt(literal, 10); + if (!isNaN(num)) return num; + } + + return literal; +} diff --git a/src/json/visitor/__tests__/optimized-visitors.test.ts b/src/json/visitor/__tests__/optimized-visitors.test.ts new file mode 100644 index 0000000..f8f979d --- /dev/null +++ b/src/json/visitor/__tests__/optimized-visitors.test.ts @@ -0,0 +1,278 @@ +/** + * Tests for optimized JSON visitors + * + * These tests verify that the new visitor implementations: + * - Produce equivalent results to the original JsonVisitor + * - Provide performance and memory optimizations + * - Handle various file sizes correctly + * - Maintain data integrity across parsing strategies + */ + +import fs from 'fs'; +import path from 'path'; +import { parse as parseCST } from '../../parser/parser'; +import { JsonVisitor } from '../JsonVisitor'; +import { OptimizedJsonVisitor } from '../OptimizedJsonVisitor'; +import { StreamingJsonVisitor } from '../StreamingJsonVisitor'; +import { MemoryEfficientVisitor } from '../MemoryEfficientVisitor'; + +const FIXTURES_DIR = path.join(__dirname, '../../__tests__/fixtures'); +const SMALL_FIXTURE = path.join(FIXTURES_DIR, 'project.pbxproj'); +const MEDIUM_FIXTURE = path.join(FIXTURES_DIR, 'AFNetworking.pbxproj'); + +describe('Optimized Visitors', () => { + describe('OptimizedJsonVisitor', () => { + it('should produce equivalent results to original JsonVisitor', () => { + const content = fs.readFileSync(SMALL_FIXTURE, 'utf8'); + const cst = parseCST(content); + + // Parse with original visitor + const originalVisitor = new JsonVisitor(); + originalVisitor.visit(cst); + const originalResult = originalVisitor.context; + + // Parse with optimized visitor + const optimizedVisitor = new OptimizedJsonVisitor(); + optimizedVisitor.visit(cst); + const optimizedResult = optimizedVisitor.context; + + // Core properties should match + expect(optimizedResult.archiveVersion).toBe(originalResult.archiveVersion); + expect(optimizedResult.objectVersion).toBe(originalResult.objectVersion); + expect(optimizedResult.rootObject).toBe(originalResult.rootObject); + expect(Object.keys(optimizedResult.objects || {})).toEqual( + Object.keys(originalResult.objects || {}) + ); + }); + + it('should provide processing statistics', () => { + const content = fs.readFileSync(SMALL_FIXTURE, 'utf8'); + const cst = parseCST(content); + + const visitor = new OptimizedJsonVisitor(); + visitor.visit(cst); + + const stats = visitor.getStats(); + expect(stats).toBeDefined(); + expect(stats.objectCount).toBeGreaterThan(0); + expect(stats.arrayCount).toBeGreaterThanOrEqual(0); + expect(stats.itemsProcessed).toBeGreaterThan(0); + }); + }); + + describe('StreamingJsonVisitor', () => { + it('should parse files correctly with streaming', () => { + const content = fs.readFileSync(SMALL_FIXTURE, 'utf8'); + const cst = parseCST(content); + + const visitor = new StreamingJsonVisitor({ + chunkSize: 10 // Small chunks to test chunking logic + }); + visitor.visit(cst); + + const result = visitor.context; + expect(result).toBeDefined(); + expect(result.objects).toBeDefined(); + expect(Object.keys(result.objects || {}).length).toBeGreaterThan(0); + }); + + it('should provide streaming statistics', () => { + const content = fs.readFileSync(SMALL_FIXTURE, 'utf8'); + const cst = parseCST(content); + + const visitor = new StreamingJsonVisitor({ + chunkSize: 50, + maxMemoryMB: 100 + }); + visitor.visit(cst); + + const stats = visitor.getStats(); + expect(stats).toBeDefined(); + expect(stats.itemsProcessed).toBeGreaterThan(0); + expect(stats.currentMemoryMB).toBeGreaterThan(0); + }); + + it('should handle custom configuration', () => { + const content = fs.readFileSync(SMALL_FIXTURE, 'utf8'); + const cst = parseCST(content); + + const visitor = new StreamingJsonVisitor({ + chunkSize: 5, + maxMemoryMB: 50 + }); + + expect(() => visitor.visit(cst)).not.toThrow(); + }); + }); + + describe('MemoryEfficientVisitor', () => { + it('should extract metadata without full object construction', () => { + const content = fs.readFileSync(MEDIUM_FIXTURE, 'utf8'); + const cst = parseCST(content); + + const visitor = new MemoryEfficientVisitor(); + visitor.visit(cst); + + const analysis = visitor.getAnalysis(); + + expect(analysis.objectCount).toBeGreaterThan(0); + expect(analysis.fileTypeCount).toBeDefined(); + expect(analysis.objectTypeCount).toBeDefined(); + expect(analysis.topFileTypes).toBeInstanceOf(Array); + expect(analysis.topObjectTypes).toBeInstanceOf(Array); + }); + + it('should extract target names when present', () => { + const content = fs.readFileSync(MEDIUM_FIXTURE, 'utf8'); + const cst = parseCST(content); + + const visitor = new MemoryEfficientVisitor(); + visitor.visit(cst); + + const analysis = visitor.getAnalysis(); + + expect(analysis.targetNames).toBeInstanceOf(Array); + expect(analysis.targetNames.length).toBeGreaterThan(0); + }); + + it('should return minimal project structure', () => { + const content = fs.readFileSync(SMALL_FIXTURE, 'utf8'); + const cst = parseCST(content); + + const visitor = new MemoryEfficientVisitor(); + visitor.visit(cst); + + const minimalProject = visitor.getMinimalProject(); + + expect(minimalProject.archiveVersion).toBeDefined(); + expect(minimalProject.objectVersion).toBeDefined(); + expect(minimalProject.objects).toEqual({}); + expect(minimalProject.classes).toEqual({}); + }); + }); + + describe('Cross-Visitor Consistency', () => { + it('all visitors should extract same object count from same input', () => { + const content = fs.readFileSync(SMALL_FIXTURE, 'utf8'); + const cst = parseCST(content); + + // Original visitor + const originalVisitor = new JsonVisitor(); + originalVisitor.visit(cst); + const originalCount = Object.keys(originalVisitor.context.objects || {}).length; + + // Optimized visitor + const optimizedVisitor = new OptimizedJsonVisitor(); + optimizedVisitor.visit(cst); + const optimizedCount = Object.keys(optimizedVisitor.context.objects || {}).length; + + // Streaming visitor + const streamingVisitor = new StreamingJsonVisitor(); + streamingVisitor.visit(cst); + const streamingCount = Object.keys(streamingVisitor.context.objects || {}).length; + + // Memory-efficient visitor + const memoryEfficientVisitor = new MemoryEfficientVisitor(); + memoryEfficientVisitor.visit(cst); + const memoryEfficientCount = memoryEfficientVisitor.getAnalysis().objectCount; + + // All should extract the same number of objects + expect(optimizedCount).toBe(originalCount); + expect(streamingCount).toBe(originalCount); + expect(memoryEfficientCount).toBe(originalCount); + }); + + it('optimized strategies should preserve data structure integrity', () => { + const content = fs.readFileSync(SMALL_FIXTURE, 'utf8'); + const cst = parseCST(content); + + // Parse with original + const originalVisitor = new JsonVisitor(); + originalVisitor.visit(cst); + const original = originalVisitor.context; + + // Parse with optimized + const optimizedVisitor = new OptimizedJsonVisitor(); + optimizedVisitor.visit(cst); + const optimized = optimizedVisitor.context; + + // Key structural elements should match + expect(optimized.archiveVersion).toBe(original.archiveVersion); + expect(optimized.objectVersion).toBe(original.objectVersion); + expect(optimized.rootObject).toBe(original.rootObject); + + // Verify a few sample objects maintain their isa property + const originalObjects = original.objects || {}; + const optimizedObjects = optimized.objects || {}; + + for (const [uuid, originalObj] of Object.entries(originalObjects).slice(0, 5)) { + const optimizedObj = optimizedObjects[uuid]; + expect(optimizedObj).toBeDefined(); + expect((optimizedObj as any).isa).toBe((originalObj as any).isa); + } + }); + }); + + describe('Error Handling', () => { + it('visitors should handle parsing errors gracefully', () => { + // Test with minimal valid content + const simpleContent = '{ archiveVersion = 1; objects = {}; rootObject = ""; }'; + + expect(() => { + const cst = parseCST(simpleContent); + const visitor = new OptimizedJsonVisitor(); + visitor.visit(cst); + }).not.toThrow(); + }); + }); + + describe('Real-world Compatibility', () => { + const availableFixtures = [ + 'project.pbxproj', + 'project-multitarget.pbxproj', + 'AFNetworking.pbxproj' + ].filter(fixture => { + const fixturePath = path.join(FIXTURES_DIR, fixture); + return fs.existsSync(fixturePath); + }); + + availableFixtures.forEach(fixture => { + it(`should handle ${fixture} with all strategies`, () => { + const fixturePath = path.join(FIXTURES_DIR, fixture); + const content = fs.readFileSync(fixturePath, 'utf8'); + const cst = parseCST(content); + + // Test main visitors + const visitors = [ + new JsonVisitor(), + new OptimizedJsonVisitor(), + new StreamingJsonVisitor({ chunkSize: 20 }) + ]; + + const results = visitors.map(visitor => { + try { + visitor.visit(cst); + return { + success: true, + objectCount: Object.keys(visitor.context.objects || {}).length + }; + } catch (error) { + return { success: false, error }; + } + }); + + // All visitors should succeed + results.forEach((result, index) => { + expect(result.success).toBe(true); + }); + + // All should extract same number of objects + const objectCounts = results.map(r => r.objectCount); + const firstCount = objectCounts[0]; + objectCounts.forEach(count => { + expect(count).toBe(firstCount); + }); + }); + }); + }); +}); \ No newline at end of file