Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 87 additions & 24 deletions src/analyze/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,36 @@
*/

const path = require('path');
const { execSync } = require('child_process');

const { parseCustomFunctionSignature } = require('./utils/customFunctionParser');
const { getAllFiles } = require('../utils/fileProcessor');
const { analyzeJsFile } = require('./javascript');
const { analyzeTsFiles } = require('./typescript');
const { analyzePythonFile } = require('./python');
const { analyzeRubyFile } = require('./ruby');
const { analyzeRubyFile, prebuildConstantMaps } = require('./ruby');
const { analyzeGoFile } = require('./go');

/**
* Analyzes a single file for analytics tracking calls
*
* Note: typescript files are handled separately by analyzeTsFiles, which is a batch processor
*
* @param {string} file - Path to the file to analyze
* @param {Array<string>} customFunctionSignatures - Custom function signatures to detect
* @returns {Promise<Array<Object>>} Array of events found in the file
*/
async function analyzeFile(file, customFunctionSignatures) {
if (/\.jsx?$/.test(file)) return analyzeJsFile(file, customFunctionSignatures)
if (/\.py$/.test(file)) return analyzePythonFile(file, customFunctionSignatures)
if (/\.rb$/.test(file)) return analyzeRubyFile(file, customFunctionSignatures)
if (/\.go$/.test(file)) return analyzeGoFile(file, customFunctionSignatures)
return []
}

/**
* Adds an event to the events collection, merging properties if event already exists
*
* @param {Object} allEvents - Collection of all events
* @param {Object} event - Event to add
* @param {string} baseDir - Base directory for relative path calculation
Expand Down Expand Up @@ -44,37 +63,72 @@ function addEventToCollection(allEvents, event, baseDir) {
}

/**
* Processes all files that are not TypeScript files
* Processes all files that are not TypeScript files in parallel
*
* Checks the system's file descriptor limit and uses 80% of it to avoid running out of file descriptors
* Creates a promise pool and launches one analysis for each file in parallel
* When a slot frees up, the next file is launched
* Waits for the remaining work to complete
*
* @param {Array<string>} files - Array of file paths
* @param {Object} allEvents - Collection to add events to
* @param {string} baseDir - Base directory for relative paths
* @param {Array} customFunctionSignatures - Custom function signatures to detect
*/
async function processFiles(files, allEvents, baseDir, customFunctionSignatures) {
for (const file of files) {
let events = [];

const isJsFile = /\.(jsx?)$/.test(file);
const isPythonFile = /\.(py)$/.test(file);
const isRubyFile = /\.(rb)$/.test(file);
const isGoFile = /\.(go)$/.test(file);

if (isJsFile) {
events = analyzeJsFile(file, customFunctionSignatures);
} else if (isPythonFile) {
events = await analyzePythonFile(file, customFunctionSignatures);
} else if (isRubyFile) {
events = await analyzeRubyFile(file, customFunctionSignatures);
} else if (isGoFile) {
events = await analyzeGoFile(file, customFunctionSignatures);
} else {
continue; // Skip unsupported file types
// Default concurrency limit
let concurrencyLimit = 64;

// Detect soft file descriptor limit from the system using `ulimit -n` (POSIX shells)
try {
const stdout = execSync('sh -c "ulimit -n"', { encoding: 'utf8' }).trim();
if (stdout !== 'unlimited') {
const limit = parseInt(stdout, 10);
if (!Number.isNaN(limit) && limit > 0) {
// Use 80% of the limit to keep head-room for other descriptors
concurrencyLimit = Math.max(4, Math.floor(limit * 0.8));
}
}
} catch (_) {}

events.forEach(event => addEventToCollection(allEvents, event, baseDir));
let next = 0; // index of the next file to start
const inFlight = new Set(); // promises currently running

// helper: launch one analysis and wire bookkeeping
const launch = (file) => {
const p = analyzeFile(file, customFunctionSignatures)
.then((events) => {
if (events) events.forEach(e => addEventToCollection(allEvents, e, baseDir))
})
.finally(() => inFlight.delete(p));
inFlight.add(p);
}

// prime the pool
while (next < Math.min(concurrencyLimit, files.length)) {
launch(files[next++]);
}

// whenever a slot frees up, start the next file
while (next < files.length) {
await Promise.race(inFlight); // wait for one to finish
launch(files[next++]); // and immediately fill the slot
}

// wait for the remaining work
await Promise.all(inFlight);
}

/**
* Analyze a directory recursively for analytics tracking calls
*
* This function scans all supported files in a directory tree and identifies analytics tracking calls,
* handling different file types appropriately.
*
* @param {string} dirPath - Path to the directory to analyze
* @param {Array<string>} [customFunctions=null] - Array of custom tracking function signatures to detect
* @returns {Promise<Object>} Object mapping event names to their tracking implementations
*/
async function analyzeDirectory(dirPath, customFunctions) {
const allEvents = {};

Expand All @@ -86,19 +140,28 @@ async function analyzeDirectory(dirPath, customFunctions) {

// Separate TypeScript files from others for optimized processing
const tsFiles = [];
const otherFiles = [];
const nonTsFiles = [];
const rubyFiles = [];

for (const file of files) {
const isTsFile = /\.(tsx?)$/.test(file);
if (isTsFile) {
tsFiles.push(file);
} else {
otherFiles.push(file);
nonTsFiles.push(file);
if (/\.rb$/.test(file)) {
rubyFiles.push(file);
}
}
}

// Prebuild constant maps for all Ruby directories to ensure constant resolution across files
if (rubyFiles.length > 0) {
await prebuildConstantMaps(rubyFiles);
}

// First process non-TypeScript files
await processFiles(otherFiles, allEvents, dirPath, customFunctionSignatures);
await processFiles(nonTsFiles, allEvents, dirPath, customFunctionSignatures);

// Process TypeScript files with optimized batch processing
if (tsFiles.length > 0) {
Expand Down
11 changes: 9 additions & 2 deletions src/analyze/javascript/extractors/event-extractor.js
Original file line number Diff line number Diff line change
Expand Up @@ -161,11 +161,18 @@ function extractCustomEvent(node, constantMap, customConfig) {
function processEventData(eventData, source, filePath, line, functionName, customConfig) {
const { eventName, propertiesNode } = eventData;

if (!eventName || !propertiesNode || propertiesNode.type !== NODE_TYPES.OBJECT_EXPRESSION) {
// Must at least have an event name – properties are optional.
if (!eventName) {
return null;
}

let properties = extractProperties(propertiesNode);
// Default to empty properties when none are supplied.
let properties = {};

// Only attempt extraction when we have a literal object expression.
if (propertiesNode && propertiesNode.type === NODE_TYPES.OBJECT_EXPRESSION) {
properties = extractProperties(propertiesNode);
}

// Handle custom extra params
if (source === 'custom' && customConfig && eventData.extraArgs) {
Expand Down
75 changes: 70 additions & 5 deletions src/analyze/javascript/utils/function-finder.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,74 @@ const { NODE_TYPES } = require('../constants');
* @returns {string} The function name or 'global' if not in a function
*/
function findWrappingFunction(node, ancestors) {
const REACT_HOOKS = new Set([
'useEffect',
'useLayoutEffect',
'useInsertionEffect',
'useCallback',
'useMemo',
'useReducer',
'useState',
'useImperativeHandle',
'useDeferredValue',
'useTransition'
]);

let hookName = null; // e.g. "useEffect" or "useCallback(handleFoo)"
let componentName = null;
let firstNonHookFunction = null;

// Traverse ancestors from closest to furthest
for (let i = ancestors.length - 1; i >= 0; i--) {
const current = ancestors[i];
const functionName = extractFunctionName(current, node, ancestors[i - 1]);

if (functionName) {
return functionName;

// Detect React hook call (CallExpression with Identifier callee)
if (!hookName && current.type === NODE_TYPES.CALL_EXPRESSION && current.callee && current.callee.type === NODE_TYPES.IDENTIFIER && REACT_HOOKS.has(current.callee.name)) {
hookName = current.callee.name; // store plain hook name; we'll format later if needed
}

// Existing logic to extract named function contexts
const fnName = extractFunctionName(current, node, ancestors[i - 1]);
if (fnName) {
if (REACT_HOOKS.has(stripParens(fnName.split('.')[0]))) {
// fnName itself is a hook signature like "useCallback(handleFoo)" or "useEffect()"
if (!hookName) hookName = fnName;
continue;
}

// First non-hook function up the tree is treated as component/container name
if (!componentName) {
componentName = fnName;
}

// Early exit when we already have both pieces
if (hookName && componentName) {
break;
}

// Save first non-hook function for fallback when no hook detected
if (!firstNonHookFunction) {
firstNonHookFunction = fnName;
}
}
}


// If we detected hook + component, compose them
if (hookName && componentName) {
const formattedHook = typeof hookName === 'string' && hookName.endsWith('()') ? hookName.slice(0, -2) : hookName;
return `${componentName}.${formattedHook}`;
}

// If only hook signature found (no component) – return the hook signature itself
if (hookName) {
return hookName;
}

// Fallbacks to previous behaviour
if (firstNonHookFunction) {
return firstNonHookFunction;
}

return 'global';
}

Expand Down Expand Up @@ -118,6 +176,13 @@ function isFunctionNode(node) {
);
}

/**
* Utility to strip trailing parens from simple hook signatures
*/
function stripParens(name) {
return name.endsWith('()') ? name.slice(0, -2) : name;
}

module.exports = {
findWrappingFunction
};
71 changes: 45 additions & 26 deletions src/analyze/python/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,19 @@ const path = require('path');

// Singleton instance of Pyodide
let pyodide = null;
// Cache indicator to ensure we load pythonTrackingAnalyzer.py only once per process
let pythonAnalyzerLoaded = false;

// Simple mutex to ensure calls into the single Pyodide interpreter are serialized
let pyodideLock = Promise.resolve();

async function withPyodide(callback) {
// Chain the callback onto the existing lock promise
const resultPromise = pyodideLock.then(callback, callback);
// Replace lock with a promise that resolves when current callback finishes
pyodideLock = resultPromise.then(() => {}, () => {});
return resultPromise;
}

/**
* Initialize Pyodide runtime lazily
Expand Down Expand Up @@ -69,36 +82,42 @@ async function analyzePythonFile(filePath, customFunctionSignatures = null) {
// Read the Python file only once
const code = fs.readFileSync(filePath, 'utf8');

// Initialize Pyodide if not already done
const py = await initPyodide();
// All interaction with Pyodide must be serialized to avoid race conditions
const events = await withPyodide(async () => {
// Initialize Pyodide if not already done
const py = await initPyodide();

// Load the Python analyzer code (idempotent – redefining functions is fine)
const analyzerPath = path.join(__dirname, 'pythonTrackingAnalyzer.py');
if (!fs.existsSync(analyzerPath)) {
throw new Error(`Python analyzer not found at: ${analyzerPath}`);
}
const analyzerCode = fs.readFileSync(analyzerPath, 'utf8');
// Prevent the analyzer from executing any __main__ blocks that expect CLI usage
py.globals.set('__name__', null);
py.runPython(analyzerCode);
// Load the analyzer definitions into the Pyodide runtime once
if (!pythonAnalyzerLoaded) {
const analyzerPath = path.join(__dirname, 'pythonTrackingAnalyzer.py');
if (!fs.existsSync(analyzerPath)) {
throw new Error(`Python analyzer not found at: ${analyzerPath}`);
}
const analyzerCode = fs.readFileSync(analyzerPath, 'utf8');
// Prevent the analyzer from executing any __main__ blocks that expect CLI usage
py.globals.set('__name__', null);
py.runPython(analyzerCode);
pythonAnalyzerLoaded = true;
}

// Helper to run analysis with a given custom config (can be null)
const runAnalysis = (customConfig) => {
py.globals.set('code', code);
py.globals.set('filepath', filePath);
py.globals.set('custom_config_json', customConfig ? JSON.stringify(customConfig) : null);
py.runPython('import json');
py.runPython('custom_config = None if custom_config_json == None else json.loads(custom_config_json)');
const result = py.runPython('analyze_python_code(code, filepath, custom_config)');
return JSON.parse(result);
};
// Helper to run analysis with a given custom config (can be null)
const runAnalysis = (customConfig) => {
py.globals.set('code', code);
py.globals.set('filepath', filePath);
py.globals.set('custom_config_json', customConfig ? JSON.stringify(customConfig) : null);
py.runPython('import json');
py.runPython('custom_config = None if custom_config_json == None else json.loads(custom_config_json)');
const result = py.runPython('analyze_python_code(code, filepath, custom_config)');
return JSON.parse(result);
};

// Prepare config argument (array or null)
const configArg = Array.isArray(customFunctionSignatures) && customFunctionSignatures.length > 0
? customFunctionSignatures
: null;
// Prepare config argument (array or null)
const configArg = Array.isArray(customFunctionSignatures) && customFunctionSignatures.length > 0
? customFunctionSignatures
: null;

const events = runAnalysis(configArg);
return runAnalysis(configArg);
});

return events;
} catch (error) {
Expand Down
16 changes: 15 additions & 1 deletion src/analyze/ruby/extractors.js
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ async function extractEventName(node, source, customConfig = null, constantMap =
* @param {Object} customConfig - Custom configuration for custom functions
* @returns {Object|null} - The extracted properties or null
*/
async function extractProperties(node, source, customConfig = null) {
async function extractProperties(node, source, customConfig = null, variableMap = null) {
const { HashNode, ArrayNode } = await prismPromise;

if (source === 'segment' || source === 'rudderstack') {
Expand Down Expand Up @@ -248,6 +248,20 @@ async function extractProperties(node, source, customConfig = null) {
if (propsArg instanceof HashNode) {
const hashProps = await extractHashProperties(propsArg);
Object.assign(properties, hashProps);
} else {
// Attempt to resolve variable references (e.g., a local variable containing a hash)
const prism = await prismPromise;
const LocalVariableReadNode = prism.LocalVariableReadNode;

if (variableMap && LocalVariableReadNode && propsArg instanceof LocalVariableReadNode) {
const varName = propsArg.name;
if (variableMap[varName]) {
Object.assign(properties, variableMap[varName]);
}
} else if (variableMap && propsArg && typeof propsArg.name === 'string' && variableMap[propsArg.name]) {
// Fallback: match by variable name when node type isn't LocalVariableReadNode
Object.assign(properties, variableMap[propsArg.name]);
}
}

return Object.keys(properties).length > 0 ? properties : null;
Expand Down
Loading