diff --git a/src/analyze/index.js b/src/analyze/index.js index 4a3247d..31dce09 100644 --- a/src/analyze/index.js +++ b/src/analyze/index.js @@ -8,32 +8,59 @@ const path = require('path'); const { parseCustomFunctionSignature } = require('./utils/customFunctionParser'); const { getAllFiles } = require('../utils/fileProcessor'); const { analyzeJsFile } = require('./javascript'); -const { analyzeTsFile } = require('./typescript'); +const { analyzeTsFiles } = require('./typescript'); const { analyzePythonFile } = require('./python'); const { analyzeRubyFile } = require('./ruby'); const { analyzeGoFile } = require('./go'); -async function analyzeDirectory(dirPath, customFunctions) { - const allEvents = {}; - - const customFunctionSignatures = (customFunctions && customFunctions?.length > 0) ? customFunctions.map(parseCustomFunctionSignature) : null; +/** + * Adds an event to the events collection, merging properties if event already exists + * @param {Object} allEvents - Collection of all events + * @param {Object} event - Event to add + * @param {string} baseDir - Base directory for relative path calculation + */ +function addEventToCollection(allEvents, event, baseDir) { + const relativeFilePath = path.relative(baseDir, event.filePath); + + const implementation = { + path: relativeFilePath, + line: event.line, + function: event.functionName, + destination: event.source + }; - const files = getAllFiles(dirPath); + if (!allEvents[event.eventName]) { + allEvents[event.eventName] = { + implementations: [implementation], + properties: event.properties, + }; + } else { + allEvents[event.eventName].implementations.push(implementation); + allEvents[event.eventName].properties = { + ...allEvents[event.eventName].properties, + ...event.properties, + }; + } +} +/** + * Processes all files that are not TypeScript files + * @param {Array} files - Array of file paths + * @param {Object} allEvents - Collection to add events to + * @param {string} baseDir - Base directory for relative paths + * @param {Array} customFunctionSignatures - Custom function signatures to detect + */ +async function processFiles(files, allEvents, baseDir, customFunctionSignatures) { for (const file of files) { let events = []; const isJsFile = /\.(jsx?)$/.test(file); - const isTsFile = /\.(tsx?)$/.test(file); const isPythonFile = /\.(py)$/.test(file); const isRubyFile = /\.(rb)$/.test(file); const isGoFile = /\.(go)$/.test(file); if (isJsFile) { events = analyzeJsFile(file, customFunctionSignatures); - } else if (isTsFile) { - // Pass null program so analyzeTsFile will create a per-file program using the file's nearest tsconfig.json - events = analyzeTsFile(file, null, customFunctionSignatures); } else if (isPythonFile) { events = await analyzePythonFile(file, customFunctionSignatures); } else if (isRubyFile) { @@ -41,36 +68,42 @@ async function analyzeDirectory(dirPath, customFunctions) { } else if (isGoFile) { events = await analyzeGoFile(file, customFunctionSignatures); } else { - continue; + continue; // Skip unsupported file types } - events.forEach((event) => { - const relativeFilePath = path.relative(dirPath, event.filePath); + events.forEach(event => addEventToCollection(allEvents, event, baseDir)); + } +} + +async function analyzeDirectory(dirPath, customFunctions) { + const allEvents = {}; + + const customFunctionSignatures = (customFunctions?.length > 0) + ? customFunctions.map(parseCustomFunctionSignature) + : null; + + const files = getAllFiles(dirPath); + + // Separate TypeScript files from others for optimized processing + const tsFiles = []; + const otherFiles = []; + + for (const file of files) { + const isTsFile = /\.(tsx?)$/.test(file); + if (isTsFile) { + tsFiles.push(file); + } else { + otherFiles.push(file); + } + } - if (!allEvents[event.eventName]) { - allEvents[event.eventName] = { - implementations: [{ - path: relativeFilePath, - line: event.line, - function: event.functionName, - destination: event.source - }], - properties: event.properties, - }; - } else { - allEvents[event.eventName].implementations.push({ - path: relativeFilePath, - line: event.line, - function: event.functionName, - destination: event.source - }); + // First process non-TypeScript files + await processFiles(otherFiles, allEvents, dirPath, customFunctionSignatures); - allEvents[event.eventName].properties = { - ...allEvents[event.eventName].properties, - ...event.properties, - }; - } - }); + // Process TypeScript files with optimized batch processing + if (tsFiles.length > 0) { + const tsEvents = analyzeTsFiles(tsFiles, customFunctionSignatures); + tsEvents.forEach(event => addEventToCollection(allEvents, event, dirPath)); } return allEvents; diff --git a/src/analyze/typescript/index.js b/src/analyze/typescript/index.js index 03de75d..4711f07 100644 --- a/src/analyze/typescript/index.js +++ b/src/analyze/typescript/index.js @@ -3,40 +3,130 @@ * @module analyze/typescript */ -const { getProgram, findTrackingEvents, ProgramError, SourceFileError } = require('./parser'); +const { getProgram, findTrackingEvents, ProgramError, SourceFileError, DEFAULT_COMPILER_OPTIONS } = require('./parser'); +const ts = require('typescript'); +const path = require('path'); + +/** + * Creates a standalone TypeScript program for a single file + * This is used as a fallback when the main program can't resolve the file + * @param {string} filePath - Path to the TypeScript file + * @returns {Object} TypeScript program + */ +function createStandaloneProgram(filePath) { + const compilerOptions = { + ...DEFAULT_COMPILER_OPTIONS, + // We intentionally allow module resolution here so that imported constants + // (e.g. event name strings defined in a sibling file) can be followed by the + // TypeScript compiler. + isolatedModules: true + }; + + return ts.createProgram([filePath], compilerOptions); +} + +/** + * Deduplicates events based on source, eventName, line, and functionName + * @param {Array} events - Array of events to deduplicate + * @returns {Array} Deduplicated events + */ +function deduplicateEvents(events) { + const uniqueEvents = new Map(); + + for (const event of events) { + const key = `${event.source}|${event.eventName}|${event.line}|${event.functionName}`; + if (!uniqueEvents.has(key)) { + uniqueEvents.set(key, event); + } + } + + return Array.from(uniqueEvents.values()); +} + +/** + * Attempts to analyze a file using a standalone program as fallback + * @param {string} filePath - Path to the TypeScript file + * @param {Array} customFunctionSignatures - Custom function signatures to detect + * @returns {Array} Array of events or empty array if failed + */ +function tryStandaloneAnalysis(filePath, customFunctionSignatures) { + try { + console.warn(`Unable to resolve ${filePath} in main program. Attempting standalone analysis.`); + + const standaloneProgram = createStandaloneProgram(filePath); + const sourceFile = standaloneProgram.getSourceFile(filePath); + + if (!sourceFile) { + console.warn(`Standalone analysis failed: could not get source file for ${filePath}`); + return []; + } + + const checker = standaloneProgram.getTypeChecker(); + const events = findTrackingEvents(sourceFile, checker, filePath, customFunctionSignatures || []); + + return deduplicateEvents(events); + } catch (standaloneError) { + console.warn(`Standalone analysis failed for ${filePath}: ${standaloneError.message}`); + return []; + } +} + +/** + * Gets or creates a cached TypeScript program for efficient reuse + * @param {string} filePath - Path to the TypeScript file + * @param {Map} programCache - Map of tsconfig paths to programs + * @returns {Object} TypeScript program + */ +function getCachedTsProgram(filePath, programCache) { + // Locate nearest tsconfig.json (may be undefined) + const searchPath = path.dirname(filePath); + const configPath = ts.findConfigFile(searchPath, ts.sys.fileExists, 'tsconfig.json'); + + // We only cache when a tsconfig.json exists because the resulting program + // represents an entire project. If no config is present we build a + // stand-alone program that should not be reused for other files – otherwise + // later files would be missing from the program (which is precisely what + // caused the regression we are fixing). + const shouldCache = Boolean(configPath); + const cacheKey = configPath; // undefined when shouldCache is false + + if (shouldCache && programCache.has(cacheKey)) { + return programCache.get(cacheKey); + } + + const program = getProgram(filePath, null); + + if (shouldCache) { + programCache.set(cacheKey, program); + } + + return program; +} /** * Analyzes a TypeScript file for analytics tracking calls * @param {string} filePath - Path to the TypeScript file to analyze * @param {Object} [program] - Optional existing TypeScript program to reuse - * @param {string} [customFunctionSignature] - Optional custom function signature to detect + * @param {Array} [customFunctionSignatures] - Optional custom function signatures to detect * @returns {Array} Array of tracking events found in the file */ function analyzeTsFile(filePath, program = null, customFunctionSignatures = null) { try { - // Get or create TypeScript program (only once) + // Get or create TypeScript program const tsProgram = getProgram(filePath, program); // Get source file from program const sourceFile = tsProgram.getSourceFile(filePath); if (!sourceFile) { - throw new SourceFileError(filePath); + // Try standalone analysis as fallback + return tryStandaloneAnalysis(filePath, customFunctionSignatures); } - // Get type checker + // Get type checker and find tracking events const checker = tsProgram.getTypeChecker(); - - // Single-pass collection covering built-in + all custom configs const events = findTrackingEvents(sourceFile, checker, filePath, customFunctionSignatures || []); - // Deduplicate events - const unique = new Map(); - for (const evt of events) { - const key = `${evt.source}|${evt.eventName}|${evt.line}|${evt.functionName}`; - if (!unique.has(key)) unique.set(key, evt); - } - - return Array.from(unique.values()); + return deduplicateEvents(events); } catch (error) { if (error instanceof ProgramError) { @@ -46,9 +136,37 @@ function analyzeTsFile(filePath, program = null, customFunctionSignatures = null } else { console.error(`Error analyzing TypeScript file ${filePath}: ${error.message}`); } + + return []; } +} - return []; +/** + * Analyzes multiple TypeScript files with program reuse for better performance + * @param {Array} tsFiles - Array of TypeScript file paths + * @param {Array} customFunctionSignatures - Custom function signatures to detect + * @returns {Array} Array of all tracking events found across all files + */ +function analyzeTsFiles(tsFiles, customFunctionSignatures) { + const allEvents = []; + const tsProgramCache = new Map(); // tsconfig path -> program + + for (const file of tsFiles) { + try { + // Use cached program or create new one + const program = getCachedTsProgram(file, tsProgramCache); + const events = analyzeTsFile(file, program, customFunctionSignatures); + + allEvents.push(...events); + } catch (error) { + console.warn(`Error processing TypeScript file ${file}: ${error.message}`); + } + } + + return allEvents; } -module.exports = { analyzeTsFile }; +module.exports = { + analyzeTsFile, + analyzeTsFiles +}; diff --git a/src/analyze/typescript/parser.js b/src/analyze/typescript/parser.js index 8038276..ceef6ec 100644 --- a/src/analyze/typescript/parser.js +++ b/src/analyze/typescript/parser.js @@ -32,6 +32,103 @@ class SourceFileError extends Error { } } +/** + * Default TypeScript compiler options for analysis + */ +const DEFAULT_COMPILER_OPTIONS = { + target: ts.ScriptTarget.Latest, + module: ts.ModuleKind.CommonJS, + allowJs: true, + checkJs: false, + noEmit: true, + jsx: ts.JsxEmit.Preserve, + moduleResolution: ts.ModuleResolutionKind.NodeJs, + allowSyntheticDefaultImports: true, + esModuleInterop: true, + skipLibCheck: true +}; + +/** + * Maximum number of files to include in TypeScript program for performance + */ +const MAX_FILES_THRESHOLD = 10000; + +/** + * Attempts to parse tsconfig.json and extract compiler options and file names + * @param {string} configPath - Path to tsconfig.json + * @returns {Object|null} Parsed config with options and fileNames, or null if failed + */ +function parseTsConfig(configPath) { + try { + const readResult = ts.readConfigFile(configPath, ts.sys.readFile); + if (readResult.error || !readResult.config) { + return null; + } + + const parseResult = ts.parseJsonConfigFileContent( + readResult.config, + ts.sys, + path.dirname(configPath) + ); + + if (parseResult.errors && parseResult.errors.length > 0) { + return null; + } + + return { + options: parseResult.options, + fileNames: parseResult.fileNames + }; + } catch (error) { + console.warn(`Failed to parse tsconfig.json at ${configPath}. Error: ${error.message}`); + return null; + } +} + +/** + * Determines the appropriate files to include in the TypeScript program + * @param {string} filePath - Target file path + * @param {string|null} configPath - Path to tsconfig.json if found + * @returns {Object} Configuration with compilerOptions and rootNames + */ +function getProgramConfiguration(filePath, configPath) { + let compilerOptions = { ...DEFAULT_COMPILER_OPTIONS }; + let rootNames = [filePath]; + + if (!configPath) { + return { compilerOptions, rootNames }; + } + + const config = parseTsConfig(configPath); + if (!config) { + console.warn(`Failed to parse tsconfig.json at ${configPath}. Analyzing ${filePath} in isolation.`); + return { compilerOptions, rootNames }; + } + + // Inherit compiler options from tsconfig + compilerOptions = { ...compilerOptions, ...config.options }; + + // Determine file inclusion strategy based on project size + const projectFileCount = config.fileNames.length; + + if (projectFileCount > 0 && projectFileCount <= MAX_FILES_THRESHOLD) { + // Small to medium project: include all files for better type checking + rootNames = [...config.fileNames]; + if (!rootNames.includes(filePath)) { + rootNames.push(filePath); + } + } else if (projectFileCount > MAX_FILES_THRESHOLD) { + // Large project: only include the target file to avoid performance issues + console.warn( + `Large TypeScript project detected (${projectFileCount} files). ` + + `Analyzing ${filePath} in isolation for performance.` + ); + rootNames = [filePath]; + } + + return { compilerOptions, rootNames }; +} + /** * Gets or creates a TypeScript program for analysis * @param {string} filePath - Path to the TypeScript file @@ -45,38 +142,15 @@ function getProgram(filePath, existingProgram) { } try { - // Try to locate a tsconfig.json nearest to the file to inherit compiler options (important for path aliases) + // Find the nearest tsconfig.json const searchPath = path.dirname(filePath); const configPath = ts.findConfigFile(searchPath, ts.sys.fileExists, 'tsconfig.json'); - let compilerOptions = { - target: ts.ScriptTarget.Latest, - module: ts.ModuleKind.CommonJS, - allowJs: true, - checkJs: false, - noEmit: true, - jsx: ts.JsxEmit.Preserve - }; - let rootNames = [filePath]; - - if (configPath) { - // Read and parse the tsconfig.json - const readResult = ts.readConfigFile(configPath, ts.sys.readFile); - if (!readResult.error && readResult.config) { - const parseResult = ts.parseJsonConfigFileContent( - readResult.config, - ts.sys, - path.dirname(configPath) - ); - if (!parseResult.errors || parseResult.errors.length === 0) { - compilerOptions = { ...compilerOptions, ...parseResult.options }; - rootNames = parseResult.fileNames.length > 0 ? parseResult.fileNames : rootNames; - } - } - } + // Get program configuration + const { compilerOptions, rootNames } = getProgramConfiguration(filePath, configPath); - const program = ts.createProgram(rootNames, compilerOptions); - return program; + // Create and return the TypeScript program + return ts.createProgram(rootNames, compilerOptions); } catch (error) { throw new ProgramError(filePath, error); } @@ -94,27 +168,37 @@ function findTrackingEvents(sourceFile, checker, filePath, customConfigs = []) { const events = []; /** - * Helper to test if a CallExpression matches a custom function name. - * We simply rely on node.expression.getText() which preserves the fully qualified name. + * Tests if a CallExpression matches a custom function name + * @param {Object} callNode - The call expression node + * @param {string} functionName - Function name to match + * @returns {boolean} True if matches */ - const matchesCustomFn = (callNode, fnName) => { - if (!fnName) return false; + function matchesCustomFunction(callNode, functionName) { + if (!functionName || !callNode.expression) { + return false; + } + try { - return callNode.expression && callNode.expression.getText() === fnName; + return callNode.expression.getText() === functionName; } catch { return false; } - }; + } + /** + * Recursively visits AST nodes to find tracking calls + * @param {Object} node - Current AST node + */ function visit(node) { try { if (ts.isCallExpression(node)) { - let matchedCustom = null; + let matchedCustomConfig = null; + // Check for custom function matches if (Array.isArray(customConfigs) && customConfigs.length > 0) { - for (const cfg of customConfigs) { - if (cfg && matchesCustomFn(node, cfg.functionName)) { - matchedCustom = cfg; + for (const config of customConfigs) { + if (config && matchesCustomFunction(node, config.functionName)) { + matchedCustomConfig = config; break; } } @@ -125,9 +209,12 @@ function findTrackingEvents(sourceFile, checker, filePath, customConfigs = []) { sourceFile, checker, filePath, - matchedCustom /* may be null */ + matchedCustomConfig ); - if (event) events.push(event); + + if (event) { + events.push(event); + } } ts.forEachChild(node, visit); @@ -137,7 +224,6 @@ function findTrackingEvents(sourceFile, checker, filePath, customConfigs = []) { } ts.forEachChild(sourceFile, visit); - return events; } @@ -172,5 +258,6 @@ module.exports = { getProgram, findTrackingEvents, ProgramError, - SourceFileError + SourceFileError, + DEFAULT_COMPILER_OPTIONS }; diff --git a/tests/cli.test.js b/tests/cli.test.js index 6fe73c6..9ade855 100644 --- a/tests/cli.test.js +++ b/tests/cli.test.js @@ -56,6 +56,24 @@ function compareYAMLFiles(actualPath, expectedPath) { assert.ok(actual.source); assert.ok(actual.source.repository); + // Helper to sort implementations deterministically + const sortImpls = (impls = []) => + impls.slice().sort((a, b) => { + if (a.path !== b.path) return a.path.localeCompare(b.path); + if (a.line !== b.line) return a.line - b.line; + if ((a.destination || '') !== (b.destination || '')) return (a.destination || '').localeCompare(b.destination || ''); + return (a.function || '').localeCompare(b.function || ''); + }); + + // Normalise events so that order of implementations does not matter + const normaliseEvent = (evt) => { + if (!evt) return evt; + return { + ...evt, + implementations: sortImpls(evt.implementations) + }; + }; + // Compare events using deep equality (order-insensitive) const diff = {}; for (const eventName in expected.events) { @@ -64,14 +82,17 @@ function compareYAMLFiles(actualPath, expectedPath) { continue; } - const actualEvent = actual.events[eventName]; - const expectedEvent = expected.events[eventName]; + const actualEvent = normaliseEvent(actual.events[eventName]); + const expectedEvent = normaliseEvent(expected.events[eventName]); if (!_.isEqual(actualEvent, expectedEvent)) { diff[eventName] = { properties: { missing: Object.keys(expectedEvent.properties || {}).filter(p => !actualEvent.properties?.[p]), - unexpected: Object.keys(actualEvent.properties || {}).filter(p => !expectedEvent.properties?.[p]) + unexpected: Object.keys(actualEvent.properties || {}).filter(p => !expectedEvent.properties?.[p]), + changed: Object.keys(expectedEvent.properties || {}).filter(p => + actualEvent.properties?.[p] && !_.isEqual(actualEvent.properties[p], expectedEvent.properties[p]) + ) }, implementations: { missing: (expectedEvent.implementations || []).filter(impl => diff --git a/tests/fixtures/tracking-schema-all.yaml b/tests/fixtures/tracking-schema-all.yaml index b160385..7b2b856 100644 --- a/tests/fixtures/tracking-schema-all.yaml +++ b/tests/fixtures/tracking-schema-all.yaml @@ -989,7 +989,7 @@ events: destination: segment properties: documentId: - type: any + type: number documentType: type: string cart_viewed: