diff --git a/packages/cli/src/cli.test.ts b/packages/cli/src/cli.test.ts index 4c4b398..42da986 100644 --- a/packages/cli/src/cli.test.ts +++ b/packages/cli/src/cli.test.ts @@ -58,11 +58,16 @@ describe('CLI Structure', () => { expect(jsonOption).toBeDefined(); }); - it('stats command should have json option', () => { - const options = statsCommand.options; - const jsonOption = options.find((opt) => opt.long === '--json'); + it('stats command should have show subcommand with json option', () => { + const subcommands = statsCommand.commands; + const showCommand = subcommands.find((cmd) => cmd.name() === 'show'); - expect(jsonOption).toBeDefined(); + expect(showCommand).toBeDefined(); + + if (showCommand) { + const jsonOption = showCommand.options.find((opt) => opt.long === '--json'); + expect(jsonOption).toBeDefined(); + } }); it('clean command should have force option', () => { diff --git a/packages/cli/src/commands/stats.ts b/packages/cli/src/commands/stats.ts index 249c52c..733938b 100644 --- a/packages/cli/src/commands/stats.ts +++ b/packages/cli/src/commands/stats.ts @@ -1,8 +1,13 @@ import * as fs from 'node:fs/promises'; import * as path from 'node:path'; import { + compareStats, type DetailedIndexStats, ensureStorageDirectory, + exportLanguageStatsAsMarkdown, + exportPackageStatsAsMarkdown, + exportStatsAsCsv, + exportStatsAsJson, getStorageFilePaths, getStoragePath, RepositoryIndexer, @@ -22,74 +27,99 @@ import { output, } from '../utils/output.js'; -export const statsCommand = new Command('stats') - .description('Show indexing statistics') - .option('--json', 'Output stats as JSON', false) - .option('-v, --verbose', 'Show detailed breakdown with tables', false) - .action(async (options) => { - const spinner = ora('Loading statistics...').start(); +/** + * Format duration in human-readable format + */ +function formatDuration(ms: number): string { + if (ms < 1000) return `${ms}ms`; + const seconds = ms / 1000; + if (seconds < 60) return `${seconds.toFixed(1)}s`; + const minutes = seconds / 60; + if (minutes < 60) return `${minutes.toFixed(1)}min`; + const hours = minutes / 60; + if (hours < 24) return `${hours.toFixed(1)}h`; + const days = hours / 24; + return `${days.toFixed(1)}d`; +} + +/** + * Helper function to load current stats + */ +async function loadCurrentStats(): Promise<{ + stats: DetailedIndexStats | null; + githubStats: unknown | null; + repositoryPath: string; +}> { + // Load config + const config = await loadConfig(); + if (!config) { + throw new Error('No config found. Run "dev init" first to initialize dev-agent'); + } + + // Resolve repository path + const repositoryPath = config.repository?.path || config.repositoryPath || process.cwd(); + const resolvedRepoPath = path.resolve(repositoryPath); + + // Get centralized storage paths + const storagePath = await getStoragePath(resolvedRepoPath); + await ensureStorageDirectory(storagePath); + const filePaths = getStorageFilePaths(storagePath); + + const indexer = new RepositoryIndexer({ + repositoryPath: resolvedRepoPath, + vectorStorePath: filePaths.vectors, + statePath: filePaths.indexerState, + excludePatterns: config.repository?.excludePatterns || config.excludePatterns, + languages: config.repository?.languages || config.languages, + }); + + await indexer.initialize(); + + const stats = (await indexer.getStats()) as DetailedIndexStats | null; + // Try to load GitHub stats + let githubStats = null; + try { + // Try to load repository from state file + let repository: string | undefined; try { - // Load config - const config = await loadConfig(); - if (!config) { - spinner.fail('No config found'); - logger.error('Run "dev init" first to initialize dev-agent'); - process.exit(1); - return; // TypeScript needs this - } + const stateContent = await fs.readFile(filePaths.githubState, 'utf-8'); + const state = JSON.parse(stateContent); + repository = state.repository; + } catch { + // State file doesn't exist + } - // Resolve repository path - const repositoryPath = config.repository?.path || config.repositoryPath || process.cwd(); - const resolvedRepoPath = path.resolve(repositoryPath); - - // Get centralized storage paths - const storagePath = await getStoragePath(resolvedRepoPath); - await ensureStorageDirectory(storagePath); - const filePaths = getStorageFilePaths(storagePath); - - const indexer = new RepositoryIndexer({ - repositoryPath: resolvedRepoPath, - vectorStorePath: filePaths.vectors, - statePath: filePaths.indexerState, - excludePatterns: config.repository?.excludePatterns || config.excludePatterns, - languages: config.repository?.languages || config.languages, - }); - - await indexer.initialize(); - - const stats = (await indexer.getStats()) as DetailedIndexStats | null; - - // Try to load GitHub stats - let githubStats = null; - try { - // Try to load repository from state file - let repository: string | undefined; - try { - const stateContent = await fs.readFile(filePaths.githubState, 'utf-8'); - const state = JSON.parse(stateContent); - repository = state.repository; - } catch { - // State file doesn't exist - } + const githubIndexer = new GitHubIndexer( + { + vectorStorePath: `${filePaths.vectors}-github`, + statePath: filePaths.githubState, + autoUpdate: false, + }, + repository + ); + await githubIndexer.initialize(); + githubStats = githubIndexer.getStats(); + await githubIndexer.close(); + } catch { + // GitHub not indexed, ignore + } - const githubIndexer = new GitHubIndexer( - { - vectorStorePath: `${filePaths.vectors}-github`, - statePath: filePaths.githubState, - autoUpdate: false, - }, - repository - ); - await githubIndexer.initialize(); - githubStats = githubIndexer.getStats(); - await githubIndexer.close(); - } catch { - // GitHub not indexed, ignore - } + await indexer.close(); - await indexer.close(); + return { stats, githubStats, repositoryPath: resolvedRepoPath }; +} +// Main stats command (show current stats) +const showStatsCommand = new Command('show') + .description('Show current indexing statistics (default)') + .option('--json', 'Output stats as JSON', false) + .option('-v, --verbose', 'Show detailed breakdown with tables', false) + .action(async (options) => { + const spinner = ora('Loading statistics...').start(); + + try { + const { stats, githubStats, repositoryPath: resolvedRepoPath } = await loadCurrentStats(); spinner.stop(); if (!stats) { @@ -142,8 +172,18 @@ export const statsCommand = new Command('stats') } // GitHub stats (compact) - if (githubStats) { - output.log(formatGitHubSummary(githubStats)); + if (githubStats && typeof githubStats === 'object' && 'repository' in githubStats) { + output.log( + formatGitHubSummary( + githubStats as { + repository: string; + totalDocuments: number; + byType: { issue?: number; pull_request?: number }; + byState: { open?: number; closed?: number; merged?: number }; + lastIndexed: string; + } + ) + ); } else { output.log(`🔗 ${chalk.gray('GitHub not indexed. Run')} ${chalk.cyan('dev gh index')}`); } @@ -155,3 +195,207 @@ export const statsCommand = new Command('stats') process.exit(1); } }); + +// Compare command - compare two stat snapshots +const compareCommand = new Command('compare') + .description('Compare two stat snapshots to see changes over time') + .argument('', 'Path to "before" stats JSON file') + .argument('', 'Path to "after" stats JSON file') + .option('--json', 'Output comparison as JSON', false) + .action(async (beforePath: string, afterPath: string, options) => { + const spinner = ora('Loading stat snapshots...').start(); + + try { + // Load both stat files + const beforeContent = await fs.readFile(beforePath, 'utf-8'); + const afterContent = await fs.readFile(afterPath, 'utf-8'); + + const beforeStats: DetailedIndexStats = JSON.parse(beforeContent); + const afterStats: DetailedIndexStats = JSON.parse(afterContent); + + spinner.text = 'Comparing statistics...'; + + // Calculate diff + const diff = compareStats(beforeStats, afterStats); + + spinner.stop(); + + if (options.json) { + console.log(JSON.stringify(diff, null, 2)); + return; + } + + // Pretty print comparison + output.log(''); + output.log(chalk.bold.cyan('📊 Stats Comparison')); + output.log(''); + + // Summary + output.log(chalk.bold('Summary:')); + output.log( + ` Trend: ${diff.summary.overallTrend === 'growing' ? chalk.green('Growing') : diff.summary.overallTrend === 'shrinking' ? chalk.red('Shrinking') : chalk.gray('Stable')}` + ); + if (diff.summary.languagesAdded.length > 0) { + output.log(` Languages added: ${chalk.green(diff.summary.languagesAdded.join(', '))}`); + } + if (diff.summary.languagesRemoved.length > 0) { + output.log(` Languages removed: ${chalk.red(diff.summary.languagesRemoved.join(', '))}`); + } + output.log(''); + + // Overall changes + output.log(chalk.bold('Overall Changes:')); + const fileChange = diff.files.absolute; + const fileSymbol = fileChange > 0 ? '↑' : fileChange < 0 ? '↓' : '•'; + const fileColor = fileChange > 0 ? chalk.green : fileChange < 0 ? chalk.red : chalk.gray; + const filePercent = diff.files.percent; + output.log( + ` Files: ${fileColor(`${fileSymbol} ${fileChange >= 0 ? '+' : ''}${fileChange} (${filePercent >= 0 ? '+' : ''}${filePercent.toFixed(1)}%)`)} [${diff.files.before} → ${diff.files.after}]` + ); + + const docChange = diff.documents.absolute; + const docSymbol = docChange > 0 ? '↑' : docChange < 0 ? '↓' : '•'; + const docColor = docChange > 0 ? chalk.green : docChange < 0 ? chalk.red : chalk.gray; + const docPercent = diff.documents.percent; + output.log( + ` Documents: ${docColor(`${docSymbol} ${docChange >= 0 ? '+' : ''}${docChange} (${docPercent >= 0 ? '+' : ''}${docPercent.toFixed(1)}%)`)} [${diff.documents.before} → ${diff.documents.after}]` + ); + + const vecChange = diff.vectors.absolute; + const vecSymbol = vecChange > 0 ? '↑' : vecChange < 0 ? '↓' : '•'; + const vecColor = vecChange > 0 ? chalk.green : vecChange < 0 ? chalk.red : chalk.gray; + const vecPercent = diff.vectors.percent; + output.log( + ` Vectors: ${vecColor(`${vecSymbol} ${vecChange >= 0 ? '+' : ''}${vecChange} (${vecPercent >= 0 ? '+' : ''}${vecPercent.toFixed(1)}%)`)} [${diff.vectors.before} → ${diff.vectors.after}]` + ); + + output.log(` Time between snapshots: ${chalk.gray(formatDuration(diff.timeDelta))}`); + output.log(''); + + // Language changes + if (diff.languages && Object.keys(diff.languages).length > 0) { + output.log(chalk.bold('By Language (top changes):')); + const langChanges = Object.entries(diff.languages) + .map(([lang, langDiff]) => ({ lang, diff: langDiff })) + .filter((item) => item.diff.files.absolute !== 0) + .sort((a, b) => Math.abs(b.diff.files.absolute) - Math.abs(a.diff.files.absolute)) + .slice(0, 5); + + for (const { lang, diff: langDiff } of langChanges) { + const filesDiff = langDiff.files.absolute; + const symbol = filesDiff > 0 ? '↑' : '↓'; + const color = filesDiff > 0 ? chalk.green : chalk.red; + output.log( + ` ${chalk.cyan(lang)}: ${color(`${symbol} ${filesDiff >= 0 ? '+' : ''}${filesDiff} files (${langDiff.files.percent.toFixed(1)}%)`)} [${langDiff.files.before} → ${langDiff.files.after}]` + ); + } + output.log(''); + } + + // Component type changes + if (diff.componentTypes && Object.keys(diff.componentTypes).length > 0) { + output.log(chalk.bold('By Component Type (top changes):')); + const changedTypes = Object.entries(diff.componentTypes) + .filter(([_, countDiff]) => countDiff.absolute !== 0) + .sort((a, b) => Math.abs(b[1].absolute) - Math.abs(a[1].absolute)) + .slice(0, 5); + + for (const [type, countDiff] of changedTypes) { + const change = countDiff.absolute; + const symbol = change > 0 ? '↑' : '↓'; + const color = change > 0 ? chalk.green : chalk.red; + output.log( + ` ${type}: ${color(`${symbol} ${change >= 0 ? '+' : ''}${change} (${countDiff.percent.toFixed(1)}%)`)} [${countDiff.before} → ${countDiff.after}]` + ); + } + output.log(''); + } + } catch (error) { + spinner.fail('Failed to compare statistics'); + logger.error(error instanceof Error ? error.message : String(error)); + process.exit(1); + } + }); + +// Export command - export current stats in various formats +const exportCommand = new Command('export') + .description('Export current statistics in various formats') + .option('-f, --format ', 'Output format (json, markdown)', 'json') + .option('-o, --output ', 'Output file (default: stdout)') + .action(async (options) => { + const spinner = ora('Loading statistics...').start(); + + try { + const { stats } = await loadCurrentStats(); + + if (!stats) { + spinner.fail('No statistics available'); + output.warn('Run "dev index" to index your repository first'); + process.exit(1); + } + + spinner.text = `Exporting as ${options.format}...`; + + let outputContent: string; + + switch (options.format.toLowerCase()) { + case 'json': + outputContent = exportStatsAsJson(stats); + break; + case 'csv': + outputContent = exportStatsAsCsv(stats); + break; + case 'markdown': + case 'md': { + // Build markdown with language and package tables + const lines: string[] = []; + lines.push('# Repository Statistics'); + lines.push(''); + lines.push(`**Repository:** ${stats.repositoryPath}`); + lines.push(`**Files Scanned:** ${stats.filesScanned}`); + lines.push(`**Documents Indexed:** ${stats.documentsIndexed}`); + lines.push(`**Vectors Stored:** ${stats.vectorsStored}`); + lines.push(`**Duration:** ${stats.duration}ms`); + lines.push(''); + + if (stats.byLanguage && Object.keys(stats.byLanguage).length > 0) { + lines.push(exportLanguageStatsAsMarkdown(stats.byLanguage)); + lines.push(''); + } + + if (stats.byPackage && Object.keys(stats.byPackage).length > 0) { + lines.push(exportPackageStatsAsMarkdown(stats.byPackage)); + lines.push(''); + } + + outputContent = lines.join('\n'); + break; + } + default: + spinner.fail(`Unknown format: ${options.format}`); + logger.error('Supported formats: json, csv, markdown'); + process.exit(1); + } + + spinner.stop(); + + // Output to file or stdout + if (options.output) { + await fs.writeFile(options.output, outputContent, 'utf-8'); + output.success(`Statistics exported to ${chalk.cyan(options.output)}`); + } else { + console.log(outputContent); + } + } catch (error) { + spinner.fail('Failed to export statistics'); + logger.error(error instanceof Error ? error.message : String(error)); + process.exit(1); + } + }); + +// Main stats command with subcommands +export const statsCommand = new Command('stats') + .description('Manage and view indexing statistics') + .addCommand(showStatsCommand, { isDefault: true }) + .addCommand(compareCommand) + .addCommand(exportCommand); diff --git a/packages/core/src/events/__tests__/event-bus.test.ts b/packages/core/src/events/__tests__/event-bus.test.ts index d707af1..cb9d793 100644 --- a/packages/core/src/events/__tests__/event-bus.test.ts +++ b/packages/core/src/events/__tests__/event-bus.test.ts @@ -237,6 +237,9 @@ describe('AsyncEventBus', () => { describe('error handling', () => { it('should not crash on handler error', async () => { + // Mock console.error to suppress expected error logs in test output + const consoleErrorSpy = vi.spyOn(console, 'error').mockImplementation(() => {}); + const errorHandler = vi.fn().mockRejectedValue(new Error('Handler error')); const goodHandler = vi.fn(); @@ -249,6 +252,95 @@ describe('AsyncEventBus', () => { expect(errorHandler).toHaveBeenCalled(); expect(goodHandler).toHaveBeenCalled(); + + // Restore console.error + consoleErrorSpy.mockRestore(); + }); + }); + + describe('logger integration', () => { + it('should use kero logger when provided', async () => { + const mockLogger = { + trace: vi.fn(), + debug: vi.fn(), + info: vi.fn(), + success: vi.fn(), + warn: vi.fn(), + error: vi.fn(), + fatal: vi.fn(), + child: vi.fn(), + startTimer: vi.fn(() => vi.fn()), + isLevelEnabled: vi.fn(() => true), + level: 'debug' as const, + }; + + const busWithLogger = new AsyncEventBus({ debug: true, logger: mockLogger }); + + // Test debug logging for subscription + busWithLogger.on('test.event', vi.fn()); + expect(mockLogger.debug).toHaveBeenCalledWith('Subscribed to "test.event" (priority: 0)'); + + // Test debug logging for emit + await busWithLogger.emit('test.event', { data: 'test' }); + expect(mockLogger.debug).toHaveBeenCalledWith( + expect.objectContaining({ payload: { data: 'test' } }), + 'Emitting "test.event"' + ); + + busWithLogger.removeAllListeners(); + }); + + it('should use kero logger for error handling', async () => { + const mockLogger = { + trace: vi.fn(), + debug: vi.fn(), + info: vi.fn(), + success: vi.fn(), + warn: vi.fn(), + error: vi.fn(), + fatal: vi.fn(), + child: vi.fn(), + startTimer: vi.fn(() => vi.fn()), + isLevelEnabled: vi.fn(() => true), + level: 'debug' as const, + }; + + const busWithLogger = new AsyncEventBus({ logger: mockLogger }); + + const error = new Error('Test error'); + const errorHandler = vi.fn().mockRejectedValue(error); + busWithLogger.on('error.event', errorHandler); + + await busWithLogger.emit('error.event', {}); + await new Promise((resolve) => setTimeout(resolve, 10)); + + expect(mockLogger.error).toHaveBeenCalledWith(error, 'Handler error for "error.event"'); + + busWithLogger.removeAllListeners(); + }); + + it('should fallback to console when no logger provided', async () => { + const consoleDebugSpy = vi.spyOn(console, 'debug').mockImplementation(() => {}); + const consoleErrorSpy = vi.spyOn(console, 'error').mockImplementation(() => {}); + + const busWithoutLogger = new AsyncEventBus({ debug: true }); + + busWithoutLogger.on('test.event', vi.fn()); + expect(consoleDebugSpy).toHaveBeenCalledWith( + '[EventBus] Subscribed to "test.event" (priority: 0)' + ); + + const errorHandler = vi.fn().mockRejectedValue(new Error('Test error')); + busWithoutLogger.on('error.event', errorHandler); + + await busWithoutLogger.emit('error.event', {}); + await new Promise((resolve) => setTimeout(resolve, 10)); + + expect(consoleErrorSpy).toHaveBeenCalled(); + + consoleDebugSpy.mockRestore(); + consoleErrorSpy.mockRestore(); + busWithoutLogger.removeAllListeners(); }); }); }); diff --git a/packages/core/src/events/event-bus.ts b/packages/core/src/events/event-bus.ts index 9a528d1..0c14696 100644 --- a/packages/core/src/events/event-bus.ts +++ b/packages/core/src/events/event-bus.ts @@ -7,6 +7,7 @@ import { randomUUID } from 'node:crypto'; import { EventEmitter } from 'node:events'; +import type { Logger } from '@lytics/kero'; import type { EmitOptions, EventBus, @@ -37,6 +38,8 @@ export interface AsyncEventBusOptions { source?: string; /** Enable debug logging */ debug?: boolean; + /** Optional kero logger for structured logging */ + logger?: Logger; } /** @@ -52,7 +55,8 @@ export interface AsyncEventBusOptions { export class AsyncEventBus implements EventBus { private emitter: EventEmitter; private handlers: Map = new Map(); - private options: Required; + private options: Required>; + private logger?: Logger; constructor(options: AsyncEventBusOptions = {}) { this.options = { @@ -61,6 +65,7 @@ export class AsyncEventBus implements EventBus { source: options.source ?? 'event-bus', debug: options.debug ?? false, }; + this.logger = options.logger; this.emitter = new EventEmitter(); this.emitter.setMaxListeners(this.options.maxListeners); @@ -103,7 +108,11 @@ export class AsyncEventBus implements EventBus { (entry as HandlerEntry & { _wrapped: typeof wrappedHandler })._wrapped = wrappedHandler; if (this.options.debug) { - console.debug(`[EventBus] Subscribed to "${eventName}" (priority: ${entry.priority})`); + if (this.logger) { + this.logger.debug(`Subscribed to "${eventName}" (priority: ${entry.priority})`); + } else { + console.debug(`[EventBus] Subscribed to "${eventName}" (priority: ${entry.priority})`); + } } // Return unsubscribe function @@ -135,7 +144,11 @@ export class AsyncEventBus implements EventBus { handlerList.splice(index, 1); if (this.options.debug) { - console.debug(`[EventBus] Unsubscribed from "${eventName}"`); + if (this.logger) { + this.logger.debug(`Unsubscribed from "${eventName}"`); + } else { + console.debug(`[EventBus] Unsubscribed from "${eventName}"`); + } } } } @@ -151,7 +164,11 @@ export class AsyncEventBus implements EventBus { }; if (this.options.debug) { - console.debug(`[EventBus] Emitting "${eventName}"`, { payload, meta }); + if (this.logger) { + this.logger.debug({ payload, meta }, `Emitting "${eventName}"`); + } else { + console.debug(`[EventBus] Emitting "${eventName}"`, { payload, meta }); + } } if (options.waitForHandlers) { @@ -211,7 +228,12 @@ export class AsyncEventBus implements EventBus { } if (this.options.debug) { - console.debug(`[EventBus] Removed all listeners${eventName ? ` for "${eventName}"` : ''}`); + const message = `Removed all listeners${eventName ? ` for "${eventName}"` : ''}`; + if (this.logger) { + this.logger.debug(message); + } else { + console.debug(`[EventBus] ${message}`); + } } } @@ -233,7 +255,15 @@ export class AsyncEventBus implements EventBus { // Handle promise rejection if (result instanceof Promise) { result.catch((error) => { - console.error(`[EventBus] Handler error for "${eventName}":`, error); + if (this.logger) { + if (error instanceof Error) { + this.logger.error(error, `Handler error for "${eventName}"`); + } else { + this.logger.error({ error }, `Handler error for "${eventName}"`); + } + } else { + console.error(`[EventBus] Handler error for "${eventName}":`, error); + } }); } }; @@ -260,7 +290,15 @@ export class AsyncEventBus implements EventBus { try { await entry.handler(payload); } catch (error) { - console.error(`[EventBus] Handler error for "${eventName}":`, error); + if (this.logger) { + if (error instanceof Error) { + this.logger.error(error, `Handler error for "${eventName}"`); + } else { + this.logger.error({ error }, `Handler error for "${eventName}"`); + } + } else { + console.error(`[EventBus] Handler error for "${eventName}":`, error); + } } }); diff --git a/packages/core/src/indexer/index.ts b/packages/core/src/indexer/index.ts index c43f8e7..64e9eb2 100644 --- a/packages/core/src/indexer/index.ts +++ b/packages/core/src/indexer/index.ts @@ -10,6 +10,7 @@ import type { Document } from '../scanner/types'; import { getCurrentSystemResources, getOptimalConcurrency } from '../utils/concurrency'; import { VectorStorage } from '../vector'; import type { EmbeddingDocument, SearchOptions, SearchResult } from '../vector/types'; +import { validateDetailedIndexStats, validateIndexerState } from './schemas/validation.js'; import { StatsAggregator } from './stats-aggregator'; import { mergeStats } from './stats-merger'; import type { @@ -20,10 +21,13 @@ import type { IndexerState, IndexOptions, IndexStats, + LanguageStats, + PackageStats, SupportedLanguage, UpdateOptions, } from './types'; import { getExtensionForLanguage, prepareDocumentsForEmbedding } from './utils'; +import { aggregateChangeFrequency, calculateChangeFrequency } from './utils/change-frequency.js'; const INDEXER_VERSION = '1.0.0'; const DEFAULT_STATE_PATH = '.dev-agent/indexer-state.json'; @@ -450,7 +454,15 @@ export class RepositoryIndexer { const incrementalUpdatesSince = this.state.incrementalUpdatesSince || 0; const warning = this.getStatsWarning(incrementalUpdatesSince); - return { + // Enrich stats with change frequency (optional, non-blocking) + const enrichedByLanguage = await this.enrichLanguageStatsWithChangeFrequency( + this.state.stats.byLanguage + ); + const enrichedByPackage = await this.enrichPackageStatsWithChangeFrequency( + this.state.stats.byPackage + ); + + const stats = { filesScanned: this.state.stats.totalFiles, documentsExtracted: this.state.stats.totalDocuments, documentsIndexed: this.state.stats.totalDocuments, @@ -460,9 +472,9 @@ export class RepositoryIndexer { startTime: this.state.lastIndexTime, endTime: this.state.lastIndexTime, repositoryPath: this.state.repositoryPath, - byLanguage: this.state.stats.byLanguage, + byLanguage: enrichedByLanguage, byComponentType: this.state.stats.byComponentType, - byPackage: this.state.stats.byPackage, + byPackage: enrichedByPackage, statsMetadata: { isIncremental: false, // getStats returns full picture lastFullIndex, @@ -471,6 +483,123 @@ export class RepositoryIndexer { warning, }, }; + + // Validate stats before returning (ensures API contract) + const validation = validateDetailedIndexStats(stats); + if (!validation.success) { + console.warn(`Invalid stats detected: ${validation.error}`); + return null; + } + + return validation.data; + } + + /** + * Enrich language stats with change frequency data + * Non-blocking: returns original stats if git analysis fails + */ + private async enrichLanguageStatsWithChangeFrequency( + byLanguage?: Partial> + ): Promise> | undefined> { + if (!byLanguage) return byLanguage; + + try { + // Calculate change frequency for repository + const changeFreq = await calculateChangeFrequency({ + repositoryPath: this.config.repositoryPath, + maxCommits: 1000, + }); + + // Enrich each language with aggregate stats + const enriched: Partial> = {}; + + for (const [lang, langStats] of Object.entries(byLanguage) as Array< + [SupportedLanguage, LanguageStats] + >) { + // Filter change frequency by file extension for this language + const langExtensions = this.getExtensionsForLanguage(lang); + const langFiles = new Map( + [...changeFreq.entries()].filter(([filePath]) => + langExtensions.some((ext) => filePath.endsWith(ext)) + ) + ); + + const aggregate = aggregateChangeFrequency(langFiles); + + enriched[lang] = { + ...langStats, + avgCommitsPerFile: aggregate.avgCommitsPerFile, + lastModified: aggregate.lastModified ?? undefined, + }; + } + + return enriched; + } catch (error) { + // Git not available or analysis failed - return original stats without change frequency + const errorMessage = error instanceof Error ? error.message : String(error); + console.warn( + `[indexer] Unable to calculate change frequency for language stats: ${errorMessage}` + ); + return byLanguage; + } + } + + /** + * Enrich package stats with change frequency data + * Non-blocking: returns original stats if git analysis fails + */ + private async enrichPackageStatsWithChangeFrequency( + byPackage?: Record + ): Promise | undefined> { + if (!byPackage) return byPackage; + + try { + // Calculate change frequency for repository + const changeFreq = await calculateChangeFrequency({ + repositoryPath: this.config.repositoryPath, + maxCommits: 1000, + }); + + // Enrich each package with aggregate stats + const enriched: Record = {}; + + for (const [pkgPath, pkgStats] of Object.entries(byPackage)) { + // Filter change frequency by package path + const pkgFiles = new Map( + [...changeFreq.entries()].filter(([filePath]) => filePath.startsWith(pkgPath)) + ); + + const aggregate = aggregateChangeFrequency(pkgFiles); + + enriched[pkgPath] = { + ...pkgStats, + totalCommits: aggregate.totalCommits, + lastModified: aggregate.lastModified ?? undefined, + }; + } + + return enriched; + } catch (error) { + // Git not available or analysis failed - return original stats without change frequency + const errorMessage = error instanceof Error ? error.message : String(error); + console.warn( + `[indexer] Unable to calculate change frequency for package stats: ${errorMessage}` + ); + return byPackage; + } + } + + /** + * Get file extensions for a language + */ + private getExtensionsForLanguage(language: SupportedLanguage): string[] { + const extensionMap: Record = { + typescript: ['.ts', '.tsx'], + javascript: ['.js', '.jsx', '.mjs', '.cjs'], + go: ['.go'], + markdown: ['.md', '.markdown'], + }; + return extensionMap[language] || []; } /** @@ -548,10 +677,20 @@ export class RepositoryIndexer { private async loadState(): Promise { try { const stateContent = await fs.readFile(this.config.statePath, 'utf-8'); - this.state = JSON.parse(stateContent); + const data = JSON.parse(stateContent); + + // Validate state with Zod schema + const validation = validateIndexerState(data); + if (!validation.success) { + console.warn(`Invalid indexer state (will start fresh): ${validation.error}`); + this.state = null; + return; + } + + this.state = validation.data; // Validate state compatibility - if (this.state && this.state.version !== INDEXER_VERSION) { + if (this.state.version !== INDEXER_VERSION) { console.warn( `Indexer state version mismatch: ${this.state.version} vs ${INDEXER_VERSION}. May need re-indexing.` ); @@ -570,6 +709,13 @@ export class RepositoryIndexer { return; } + // Validate state before saving (defensive check) + const validation = validateIndexerState(this.state); + if (!validation.success) { + // Log warning but don't block saving - state was valid when created + console.warn(`Indexer state validation warning: ${validation.error}`); + } + // Ensure directory exists await fs.mkdir(path.dirname(this.config.statePath), { recursive: true }); diff --git a/packages/core/src/indexer/schemas/__tests__/stats.test.ts b/packages/core/src/indexer/schemas/__tests__/stats.test.ts new file mode 100644 index 0000000..4f21d1f --- /dev/null +++ b/packages/core/src/indexer/schemas/__tests__/stats.test.ts @@ -0,0 +1,559 @@ +/** + * Tests for indexer statistics schemas + */ + +import { describe, expect, it } from 'vitest'; +import { + DetailedIndexStatsSchema, + FileMetadataSchema, + IndexErrorSchema, + IndexerStateSchema, + IndexStatsSchema, + LanguageStatsSchema, + PackageStatsSchema, + StatsMetadataSchema, +} from '../stats'; + +describe('LanguageStatsSchema', () => { + it('should validate valid language stats', () => { + const valid = { + files: 10, + components: 100, + lines: 5000, + }; + + const result = LanguageStatsSchema.safeParse(valid); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data).toEqual(valid); + } + }); + + it('should validate language stats with change frequency', () => { + const valid = { + files: 10, + components: 100, + lines: 5000, + avgCommitsPerFile: 5.5, + lastModified: new Date('2024-01-01'), + }; + + const result = LanguageStatsSchema.safeParse(valid); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data.avgCommitsPerFile).toBe(5.5); + expect(result.data.lastModified).toBeInstanceOf(Date); + } + }); + + it('should reject negative numbers', () => { + const invalid = { + files: -1, + components: 100, + lines: 5000, + }; + + const result = LanguageStatsSchema.safeParse(invalid); + expect(result.success).toBe(false); + }); + + it('should reject decimal numbers', () => { + const invalid = { + files: 10.5, + components: 100, + lines: 5000, + }; + + const result = LanguageStatsSchema.safeParse(invalid); + expect(result.success).toBe(false); + }); + + it('should reject missing fields', () => { + const invalid = { + files: 10, + components: 100, + // missing lines + }; + + const result = LanguageStatsSchema.safeParse(invalid); + expect(result.success).toBe(false); + }); +}); + +describe('PackageStatsSchema', () => { + it('should validate valid package stats', () => { + const valid = { + name: '@my/package', + path: 'packages/my-package', + files: 50, + components: 200, + languages: { + typescript: 180, + javascript: 20, + }, + }; + + const result = PackageStatsSchema.safeParse(valid); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data.name).toBe('@my/package'); + expect(result.data.languages?.typescript).toBe(180); + } + }); + + it('should validate package stats with change frequency', () => { + const valid = { + name: '@my/package', + path: 'packages/my-package', + files: 50, + components: 200, + languages: {}, + totalCommits: 125, + lastModified: new Date('2024-01-15'), + }; + + const result = PackageStatsSchema.safeParse(valid); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data.totalCommits).toBe(125); + expect(result.data.lastModified).toBeInstanceOf(Date); + } + }); + + it('should require languages field (can be empty)', () => { + const valid = { + name: '@my/package', + path: 'packages/my-package', + files: 50, + components: 200, + languages: {}, + }; + + const result = PackageStatsSchema.safeParse(valid); + expect(result.success).toBe(true); + }); + + it('should reject empty name', () => { + const invalid = { + name: '', + path: 'packages/my-package', + files: 50, + components: 200, + }; + + const result = PackageStatsSchema.safeParse(invalid); + expect(result.success).toBe(false); + }); +}); + +describe('StatsMetadataSchema', () => { + it('should validate valid stats metadata', () => { + const valid = { + isIncremental: false, + lastFullIndex: new Date('2024-01-01'), + lastUpdate: new Date('2024-01-02'), + incrementalUpdatesSince: 0, + }; + + const result = StatsMetadataSchema.safeParse(valid); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data.isIncremental).toBe(false); + expect(result.data.lastFullIndex).toBeInstanceOf(Date); + } + }); + + it('should coerce date strings to Date objects', () => { + const valid = { + isIncremental: true, + lastFullIndex: '2024-01-01T00:00:00Z', + lastUpdate: '2024-01-02T00:00:00Z', + incrementalUpdatesSince: 3, + affectedLanguages: ['typescript', 'javascript'], + }; + + const result = StatsMetadataSchema.safeParse(valid); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data.lastFullIndex).toBeInstanceOf(Date); + expect(result.data.lastUpdate).toBeInstanceOf(Date); + expect(result.data.affectedLanguages).toEqual(['typescript', 'javascript']); + } + }); + + it('should allow optional warning field', () => { + const valid = { + isIncremental: false, + lastFullIndex: new Date(), + lastUpdate: new Date(), + incrementalUpdatesSince: 0, + warning: 'Stats may be stale', + }; + + const result = StatsMetadataSchema.safeParse(valid); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data.warning).toBe('Stats may be stale'); + } + }); +}); + +describe('IndexErrorSchema', () => { + it('should validate valid index error', () => { + const valid = { + type: 'scanner', + file: 'src/test.ts', + message: 'Parse error', + timestamp: new Date(), + }; + + const result = IndexErrorSchema.safeParse(valid); + expect(result.success).toBe(true); + }); + + it('should validate all error types', () => { + const types = ['scanner', 'embedder', 'storage', 'filesystem'] as const; + + for (const type of types) { + const error = { + type, + message: 'Error occurred', + timestamp: new Date(), + }; + + const result = IndexErrorSchema.safeParse(error); + expect(result.success).toBe(true); + } + }); + + it('should reject invalid error types', () => { + const invalid = { + type: 'unknown', + message: 'Error occurred', + timestamp: new Date(), + }; + + const result = IndexErrorSchema.safeParse(invalid); + expect(result.success).toBe(false); + }); + + it('should allow optional file field', () => { + const valid = { + type: 'storage', + message: 'Storage error', + timestamp: new Date(), + }; + + const result = IndexErrorSchema.safeParse(valid); + expect(result.success).toBe(true); + }); +}); + +describe('IndexStatsSchema', () => { + it('should validate valid index stats', () => { + const valid = { + filesScanned: 100, + documentsExtracted: 500, + documentsIndexed: 500, + vectorsStored: 500, + duration: 5000, + errors: [], + startTime: new Date('2024-01-01T00:00:00Z'), + endTime: new Date('2024-01-01T00:01:00Z'), + repositoryPath: '/path/to/repo', + }; + + const result = IndexStatsSchema.safeParse(valid); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data.filesScanned).toBe(100); + expect(result.data.duration).toBe(5000); + } + }); + + it('should validate with stats metadata', () => { + const valid = { + filesScanned: 100, + documentsExtracted: 500, + documentsIndexed: 500, + vectorsStored: 500, + duration: 5000, + errors: [], + startTime: new Date(), + endTime: new Date(), + repositoryPath: '/path/to/repo', + statsMetadata: { + isIncremental: false, + lastFullIndex: new Date(), + lastUpdate: new Date(), + incrementalUpdatesSince: 0, + }, + }; + + const result = IndexStatsSchema.safeParse(valid); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data.statsMetadata).toBeDefined(); + expect(result.data.statsMetadata?.isIncremental).toBe(false); + } + }); + + it('should validate with errors', () => { + const valid = { + filesScanned: 100, + documentsExtracted: 498, + documentsIndexed: 498, + vectorsStored: 498, + duration: 5000, + errors: [ + { + type: 'scanner', + file: 'bad.ts', + message: 'Parse error', + timestamp: new Date(), + }, + { + type: 'embedder', + message: 'Embedding failed', + timestamp: new Date(), + }, + ], + startTime: new Date(), + endTime: new Date(), + repositoryPath: '/path/to/repo', + }; + + const result = IndexStatsSchema.safeParse(valid); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data.errors).toHaveLength(2); + expect(result.data.errors[0].type).toBe('scanner'); + } + }); +}); + +describe('DetailedIndexStatsSchema', () => { + it('should validate detailed stats with all breakdowns', () => { + const valid = { + filesScanned: 100, + documentsExtracted: 500, + documentsIndexed: 500, + vectorsStored: 500, + duration: 5000, + errors: [], + startTime: new Date(), + endTime: new Date(), + repositoryPath: '/path/to/repo', + byLanguage: { + typescript: { files: 80, components: 400, lines: 10000 }, + javascript: { files: 20, components: 100, lines: 2000 }, + }, + byComponentType: { + function: 200, + class: 50, + interface: 100, + }, + byPackage: { + 'packages/core': { + name: '@my/core', + path: 'packages/core', + files: 50, + components: 250, + languages: { + typescript: 250, + }, + }, + }, + }; + + const result = DetailedIndexStatsSchema.safeParse(valid); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data.byLanguage?.typescript.files).toBe(80); + expect(result.data.byComponentType?.function).toBe(200); + expect(result.data.byPackage?.['packages/core'].name).toBe('@my/core'); + } + }); + + it('should allow optional detail fields', () => { + const valid = { + filesScanned: 100, + documentsExtracted: 500, + documentsIndexed: 500, + vectorsStored: 500, + duration: 5000, + errors: [], + startTime: new Date(), + endTime: new Date(), + repositoryPath: '/path/to/repo', + }; + + const result = DetailedIndexStatsSchema.safeParse(valid); + expect(result.success).toBe(true); + }); +}); + +describe('FileMetadataSchema', () => { + it('should validate valid file metadata', () => { + const valid = { + path: 'src/index.ts', + hash: 'abc123', + lastModified: new Date('2024-01-01'), + lastIndexed: new Date('2024-01-02'), + documentIds: ['doc1', 'doc2'], + size: 1024, + language: 'typescript', + }; + + const result = FileMetadataSchema.safeParse(valid); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data.path).toBe('src/index.ts'); + expect(result.data.documentIds).toHaveLength(2); + } + }); + + it('should coerce date strings', () => { + const valid = { + path: 'src/index.ts', + hash: 'abc123', + lastModified: '2024-01-01T00:00:00Z', + lastIndexed: '2024-01-02T00:00:00Z', + documentIds: [], + size: 1024, + language: 'typescript', + }; + + const result = FileMetadataSchema.safeParse(valid); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data.lastModified).toBeInstanceOf(Date); + expect(result.data.lastIndexed).toBeInstanceOf(Date); + } + }); + + it('should reject negative size', () => { + const invalid = { + path: 'src/index.ts', + hash: 'abc123', + lastModified: new Date(), + lastIndexed: new Date(), + documentIds: [], + size: -1, + language: 'typescript', + }; + + const result = FileMetadataSchema.safeParse(invalid); + expect(result.success).toBe(false); + }); +}); + +describe('IndexerStateSchema', () => { + it('should validate valid indexer state', () => { + const valid = { + version: '1.0.0', + embeddingModel: 'all-MiniLM-L6-v2', + embeddingDimension: 384, + repositoryPath: '/path/to/repo', + lastIndexTime: new Date('2024-01-01'), + files: { + 'src/index.ts': { + path: 'src/index.ts', + hash: 'abc123', + lastModified: new Date('2024-01-01'), + lastIndexed: new Date('2024-01-01'), + documentIds: ['doc1'], + size: 1024, + language: 'typescript', + }, + }, + stats: { + totalFiles: 1, + totalDocuments: 1, + totalVectors: 1, + }, + }; + + const result = IndexerStateSchema.safeParse(valid); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data.version).toBe('1.0.0'); + expect(result.data.embeddingDimension).toBe(384); + expect(Object.keys(result.data.files)).toHaveLength(1); + } + }); + + it('should validate with detailed stats', () => { + const valid = { + version: '1.0.0', + embeddingModel: 'all-MiniLM-L6-v2', + embeddingDimension: 384, + repositoryPath: '/path/to/repo', + lastIndexTime: new Date(), + files: {}, + stats: { + totalFiles: 10, + totalDocuments: 100, + totalVectors: 100, + byLanguage: { + typescript: { files: 10, components: 100, lines: 5000 }, + }, + byComponentType: { + function: 50, + class: 25, + }, + }, + }; + + const result = IndexerStateSchema.safeParse(valid); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data.stats.byLanguage?.typescript.files).toBe(10); + expect(result.data.stats.byComponentType?.function).toBe(50); + } + }); + + it('should validate with incremental updates', () => { + const valid = { + version: '1.0.0', + embeddingModel: 'all-MiniLM-L6-v2', + embeddingDimension: 384, + repositoryPath: '/path/to/repo', + lastIndexTime: new Date('2024-01-01'), + lastUpdate: new Date('2024-01-02'), + incrementalUpdatesSince: 3, + files: {}, + stats: { + totalFiles: 10, + totalDocuments: 100, + totalVectors: 100, + }, + }; + + const result = IndexerStateSchema.safeParse(valid); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data.incrementalUpdatesSince).toBe(3); + expect(result.data.lastUpdate).toBeInstanceOf(Date); + } + }); + + it('should reject invalid embedding dimension', () => { + const invalid = { + version: '1.0.0', + embeddingModel: 'all-MiniLM-L6-v2', + embeddingDimension: 0, // Must be positive + repositoryPath: '/path/to/repo', + lastIndexTime: new Date(), + files: {}, + stats: { + totalFiles: 0, + totalDocuments: 0, + totalVectors: 0, + }, + }; + + const result = IndexerStateSchema.safeParse(invalid); + expect(result.success).toBe(false); + }); +}); diff --git a/packages/core/src/indexer/schemas/__tests__/validation.test.ts b/packages/core/src/indexer/schemas/__tests__/validation.test.ts new file mode 100644 index 0000000..fc996c3 --- /dev/null +++ b/packages/core/src/indexer/schemas/__tests__/validation.test.ts @@ -0,0 +1,367 @@ +/** + * Tests for indexer validation utilities + */ + +import { describe, expect, it } from 'vitest'; +import { + assertDetailedIndexStats, + assertIndexerState, + validateDetailedIndexStats, + validateFileMetadata, + validateIndexerState, + validateIndexStats, + validateLanguageStats, + validatePackageStats, + validateStatsMetadata, +} from '../validation'; + +describe('validateLanguageStats', () => { + it('should return success for valid stats', () => { + const valid = { + files: 10, + components: 100, + lines: 5000, + }; + + const result = validateLanguageStats(valid); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data).toEqual(valid); + } + }); + + it('should return error for invalid stats', () => { + const invalid = { + files: -1, + components: 100, + lines: 5000, + }; + + const result = validateLanguageStats(invalid); + expect(result.success).toBe(false); + if (!result.success) { + expect(result.error).toContain('Invalid language stats'); + expect(result.details).toBeDefined(); + } + }); +}); + +describe('validatePackageStats', () => { + it('should return success for valid stats', () => { + const valid = { + name: '@my/package', + path: 'packages/my-package', + files: 50, + components: 200, + languages: {}, + }; + + const result = validatePackageStats(valid); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data.name).toBe('@my/package'); + } + }); + + it('should return error for empty name', () => { + const invalid = { + name: '', + path: 'packages/my-package', + files: 50, + components: 200, + }; + + const result = validatePackageStats(invalid); + expect(result.success).toBe(false); + if (!result.success) { + expect(result.error).toContain('Invalid package stats'); + } + }); +}); + +describe('validateStatsMetadata', () => { + it('should return success for valid metadata', () => { + const valid = { + isIncremental: false, + lastFullIndex: new Date('2024-01-01'), + lastUpdate: new Date('2024-01-02'), + incrementalUpdatesSince: 0, + }; + + const result = validateStatsMetadata(valid); + expect(result.success).toBe(true); + }); + + it('should return error for missing fields', () => { + const invalid = { + isIncremental: false, + lastFullIndex: new Date(), + // missing lastUpdate + incrementalUpdatesSince: 0, + }; + + const result = validateStatsMetadata(invalid); + expect(result.success).toBe(false); + if (!result.success) { + expect(result.error).toContain('Invalid stats metadata'); + } + }); +}); + +describe('validateIndexStats', () => { + it('should return success for valid stats', () => { + const valid = { + filesScanned: 100, + documentsExtracted: 500, + documentsIndexed: 500, + vectorsStored: 500, + duration: 5000, + errors: [], + startTime: new Date(), + endTime: new Date(), + repositoryPath: '/path/to/repo', + }; + + const result = validateIndexStats(valid); + expect(result.success).toBe(true); + }); + + it('should return error for negative numbers', () => { + const invalid = { + filesScanned: -1, + documentsExtracted: 500, + documentsIndexed: 500, + vectorsStored: 500, + duration: 5000, + errors: [], + startTime: new Date(), + endTime: new Date(), + repositoryPath: '/path/to/repo', + }; + + const result = validateIndexStats(invalid); + expect(result.success).toBe(false); + if (!result.success) { + expect(result.error).toContain('Invalid index stats'); + expect(result.error).toContain('filesScanned'); + } + }); +}); + +describe('validateDetailedIndexStats', () => { + it('should return success for valid detailed stats', () => { + const valid = { + filesScanned: 100, + documentsExtracted: 500, + documentsIndexed: 500, + vectorsStored: 500, + duration: 5000, + errors: [], + startTime: new Date(), + endTime: new Date(), + repositoryPath: '/path/to/repo', + byLanguage: { + typescript: { files: 80, components: 400, lines: 10000 }, + }, + byComponentType: { + function: 200, + }, + }; + + const result = validateDetailedIndexStats(valid); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data.byLanguage?.typescript.files).toBe(80); + } + }); + + it('should return error for invalid language stats', () => { + const invalid = { + filesScanned: 100, + documentsExtracted: 500, + documentsIndexed: 500, + vectorsStored: 500, + duration: 5000, + errors: [], + startTime: new Date(), + endTime: new Date(), + repositoryPath: '/path/to/repo', + byLanguage: { + typescript: { files: -1, components: 400, lines: 10000 }, + }, + }; + + const result = validateDetailedIndexStats(invalid); + expect(result.success).toBe(false); + if (!result.success) { + expect(result.error).toContain('Invalid detailed index stats'); + } + }); +}); + +describe('validateFileMetadata', () => { + it('should return success for valid metadata', () => { + const valid = { + path: 'src/index.ts', + hash: 'abc123', + lastModified: new Date(), + lastIndexed: new Date(), + documentIds: ['doc1'], + size: 1024, + language: 'typescript', + }; + + const result = validateFileMetadata(valid); + expect(result.success).toBe(true); + }); + + it('should coerce date strings', () => { + const valid = { + path: 'src/index.ts', + hash: 'abc123', + lastModified: '2024-01-01T00:00:00Z', + lastIndexed: '2024-01-02T00:00:00Z', + documentIds: [], + size: 1024, + language: 'typescript', + }; + + const result = validateFileMetadata(valid); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data.lastModified).toBeInstanceOf(Date); + } + }); + + it('should return error for negative size', () => { + const invalid = { + path: 'src/index.ts', + hash: 'abc123', + lastModified: new Date(), + lastIndexed: new Date(), + documentIds: [], + size: -1, + language: 'typescript', + }; + + const result = validateFileMetadata(invalid); + expect(result.success).toBe(false); + }); +}); + +describe('validateIndexerState', () => { + it('should return success for valid state', () => { + const valid = { + version: '1.0.0', + embeddingModel: 'all-MiniLM-L6-v2', + embeddingDimension: 384, + repositoryPath: '/path/to/repo', + lastIndexTime: new Date(), + files: {}, + stats: { + totalFiles: 0, + totalDocuments: 0, + totalVectors: 0, + }, + }; + + const result = validateIndexerState(valid); + expect(result.success).toBe(true); + }); + + it('should return error for invalid dimension', () => { + const invalid = { + version: '1.0.0', + embeddingModel: 'all-MiniLM-L6-v2', + embeddingDimension: 0, + repositoryPath: '/path/to/repo', + lastIndexTime: new Date(), + files: {}, + stats: { + totalFiles: 0, + totalDocuments: 0, + totalVectors: 0, + }, + }; + + const result = validateIndexerState(invalid); + expect(result.success).toBe(false); + if (!result.success) { + expect(result.error).toContain('Invalid indexer state'); + } + }); +}); + +describe('assertDetailedIndexStats', () => { + it('should return data for valid stats', () => { + const valid = { + filesScanned: 100, + documentsExtracted: 500, + documentsIndexed: 500, + vectorsStored: 500, + duration: 5000, + errors: [], + startTime: new Date(), + endTime: new Date(), + repositoryPath: '/path/to/repo', + }; + + const result = assertDetailedIndexStats(valid); + expect(result.filesScanned).toBe(100); + }); + + it('should throw for invalid stats', () => { + const invalid = { + filesScanned: -1, + documentsExtracted: 500, + documentsIndexed: 500, + vectorsStored: 500, + duration: 5000, + errors: [], + startTime: new Date(), + endTime: new Date(), + repositoryPath: '/path/to/repo', + }; + + expect(() => assertDetailedIndexStats(invalid)).toThrow(); + }); +}); + +describe('assertIndexerState', () => { + it('should return data for valid state', () => { + const valid = { + version: '1.0.0', + embeddingModel: 'all-MiniLM-L6-v2', + embeddingDimension: 384, + repositoryPath: '/path/to/repo', + lastIndexTime: new Date(), + files: {}, + stats: { + totalFiles: 0, + totalDocuments: 0, + totalVectors: 0, + }, + }; + + const result = assertIndexerState(valid); + expect(result.version).toBe('1.0.0'); + }); + + it('should throw for invalid state', () => { + const invalid = { + version: '1.0.0', + embeddingModel: 'all-MiniLM-L6-v2', + embeddingDimension: 0, // Invalid + repositoryPath: '/path/to/repo', + lastIndexTime: new Date(), + files: {}, + stats: { + totalFiles: 0, + totalDocuments: 0, + totalVectors: 0, + }, + }; + + expect(() => assertIndexerState(invalid)).toThrow(); + }); +}); diff --git a/packages/core/src/indexer/schemas/index.ts b/packages/core/src/indexer/schemas/index.ts new file mode 100644 index 0000000..7d8dcdf --- /dev/null +++ b/packages/core/src/indexer/schemas/index.ts @@ -0,0 +1,7 @@ +/** + * Indexer schemas and validation + * Centralized exports for all stats-related schemas and validators + */ + +export * from './stats.js'; +export * from './validation.js'; diff --git a/packages/core/src/indexer/schemas/stats.ts b/packages/core/src/indexer/schemas/stats.ts new file mode 100644 index 0000000..a2805bf --- /dev/null +++ b/packages/core/src/indexer/schemas/stats.ts @@ -0,0 +1,222 @@ +/** + * Zod schemas for indexer statistics + * Provides runtime validation and type inference for all stats types + */ + +import { z } from 'zod'; + +/** + * Supported languages for detailed statistics + */ +export const SupportedLanguageSchema = z.enum(['typescript', 'javascript', 'go', 'markdown']); + +/** + * Statistics for a specific language + */ +export const LanguageStatsSchema = z.object({ + /** Number of files in this language */ + files: z.number().int().nonnegative(), + + /** Number of components extracted from this language */ + components: z.number().int().nonnegative(), + + /** Total lines of code (approximate from component ranges) */ + lines: z.number().int().nonnegative(), + + /** Average commits per file (change frequency) */ + avgCommitsPerFile: z.number().nonnegative().optional(), + + /** Most recently modified file timestamp */ + lastModified: z.coerce.date().optional(), +}); + +/** + * Statistics for a package/module in a monorepo + */ +export const PackageStatsSchema = z.object({ + /** Package name (from package.json or go.mod) */ + name: z.string().min(1), + + /** Package path relative to repository root */ + path: z.string().min(1), + + /** Number of files in this package */ + files: z.number().int().nonnegative(), + + /** Number of components in this package */ + components: z.number().int().nonnegative(), + + /** Language breakdown within this package - Partial allows any subset of languages */ + languages: z.record(z.string(), z.number().int().nonnegative()), + + /** Total commits affecting this package */ + totalCommits: z.number().int().nonnegative().optional(), + + /** Most recently modified file timestamp */ + lastModified: z.coerce.date().optional(), +}); + +/** + * Metadata about the freshness and source of statistics + */ +export const StatsMetadataSchema = z.object({ + /** Whether this is from an incremental update (vs full index) */ + isIncremental: z.boolean(), + + /** Timestamp of the last full index */ + lastFullIndex: z.coerce.date(), + + /** Timestamp of the last update (full or incremental) */ + lastUpdate: z.coerce.date(), + + /** Number of incremental updates since last full index */ + incrementalUpdatesSince: z.number().int().nonnegative(), + + /** Languages affected by this update (only set for incremental updates) */ + affectedLanguages: z.array(SupportedLanguageSchema).optional(), + + /** Warning message if stats may be stale */ + warning: z.string().optional(), +}); + +/** + * Error during indexing + */ +export const IndexErrorSchema = z.object({ + /** Type of error */ + type: z.enum(['scanner', 'embedder', 'storage', 'filesystem']), + + /** File that caused the error (if applicable) */ + file: z.string().optional(), + + /** Error message */ + message: z.string(), + + /** Timestamp when error occurred */ + timestamp: z.coerce.date(), +}); + +/** + * Base statistics from an indexing operation + */ +export const IndexStatsSchema = z.object({ + /** Number of files scanned */ + filesScanned: z.number().int().nonnegative(), + + /** Number of documents extracted */ + documentsExtracted: z.number().int().nonnegative(), + + /** Number of documents indexed (embedded + stored) */ + documentsIndexed: z.number().int().nonnegative(), + + /** Number of vectors stored */ + vectorsStored: z.number().int().nonnegative(), + + /** Duration in milliseconds */ + duration: z.number().nonnegative(), + + /** Errors encountered during indexing */ + errors: z.array(IndexErrorSchema), + + /** Timestamp when indexing started */ + startTime: z.coerce.date(), + + /** Timestamp when indexing completed */ + endTime: z.coerce.date(), + + /** Repository path that was indexed */ + repositoryPath: z.string().min(1), + + /** Metadata about stats freshness and source */ + statsMetadata: StatsMetadataSchema.optional(), +}); + +/** + * Detailed statistics with language, component type, and package breakdowns + */ +export const DetailedIndexStatsSchema = IndexStatsSchema.extend({ + /** Statistics broken down by language - partial record allows any supported language */ + byLanguage: z.record(z.string(), LanguageStatsSchema).optional(), + + /** Statistics broken down by component type */ + byComponentType: z.record(z.string(), z.number().int().nonnegative()).optional(), + + /** Statistics broken down by package (for monorepos) */ + byPackage: z.record(z.string(), PackageStatsSchema).optional(), +}); + +/** + * Metadata tracked for each indexed file + */ +export const FileMetadataSchema = z.object({ + /** File path relative to repository root */ + path: z.string().min(1), + + /** Content hash (for change detection) */ + hash: z.string().min(1), + + /** Last modified timestamp */ + lastModified: z.coerce.date(), + + /** Last indexed timestamp */ + lastIndexed: z.coerce.date(), + + /** Document IDs extracted from this file */ + documentIds: z.array(z.string()), + + /** File size in bytes */ + size: z.number().int().nonnegative(), + + /** Language detected */ + language: z.string().min(1), +}); + +/** + * Indexer state persisted to disk + */ +export const IndexerStateSchema = z.object({ + /** Version of the indexer (for compatibility) */ + version: z.string().min(1), + + /** Embedding model used */ + embeddingModel: z.string().min(1), + + /** Embedding dimension */ + embeddingDimension: z.number().int().positive(), + + /** Repository path */ + repositoryPath: z.string().min(1), + + /** Last full index timestamp */ + lastIndexTime: z.coerce.date(), + + /** Last update timestamp (full or incremental) */ + lastUpdate: z.coerce.date().optional(), + + /** Number of incremental updates since last full index */ + incrementalUpdatesSince: z.number().int().nonnegative().optional(), + + /** File metadata map (path -> metadata) */ + files: z.record(z.string(), FileMetadataSchema), + + /** Total statistics */ + stats: z.object({ + totalFiles: z.number().int().nonnegative(), + totalDocuments: z.number().int().nonnegative(), + totalVectors: z.number().int().nonnegative(), + byLanguage: z.record(z.string(), LanguageStatsSchema).optional(), + byComponentType: z.record(z.string(), z.number().int().nonnegative()).optional(), + byPackage: z.record(z.string(), PackageStatsSchema).optional(), + }), +}); + +// Type inference from schemas +export type LanguageStats = z.infer; +export type PackageStats = z.infer; +export type StatsMetadata = z.infer; +export type IndexError = z.infer; +export type IndexStats = z.infer; +export type DetailedIndexStats = z.infer; +export type FileMetadata = z.infer; +export type IndexerState = z.infer; +export type SupportedLanguage = z.infer; diff --git a/packages/core/src/indexer/schemas/validation.ts b/packages/core/src/indexer/schemas/validation.ts new file mode 100644 index 0000000..986fe33 --- /dev/null +++ b/packages/core/src/indexer/schemas/validation.ts @@ -0,0 +1,160 @@ +/** + * Validation utilities for indexer statistics + * Provides safe, type-checked validation with helpful error messages + */ + +import type { ZodError } from 'zod'; +import type { + DetailedIndexStats, + FileMetadata, + IndexerState, + IndexStats, + LanguageStats, + PackageStats, + StatsMetadata, +} from './stats.js'; +import { + DetailedIndexStatsSchema, + FileMetadataSchema, + IndexerStateSchema, + IndexStatsSchema, + LanguageStatsSchema, + PackageStatsSchema, + StatsMetadataSchema, +} from './stats.js'; + +/** + * Validation result type + */ +export type ValidationResult = + | { success: true; data: T } + | { success: false; error: string; details: ZodError }; + +/** + * Validate LanguageStats + */ +export function validateLanguageStats(data: unknown): ValidationResult { + const result = LanguageStatsSchema.safeParse(data); + if (result.success) { + return { success: true, data: result.data }; + } + return { + success: false, + error: `Invalid language stats: ${result.error.message}`, + details: result.error, + }; +} + +/** + * Validate PackageStats + */ +export function validatePackageStats(data: unknown): ValidationResult { + const result = PackageStatsSchema.safeParse(data); + if (result.success) { + return { success: true, data: result.data }; + } + return { + success: false, + error: `Invalid package stats: ${result.error.message}`, + details: result.error, + }; +} + +/** + * Validate StatsMetadata + */ +export function validateStatsMetadata(data: unknown): ValidationResult { + const result = StatsMetadataSchema.safeParse(data); + if (result.success) { + return { success: true, data: result.data }; + } + return { + success: false, + error: `Invalid stats metadata: ${result.error.message}`, + details: result.error, + }; +} + +/** + * Validate IndexStats + */ +export function validateIndexStats(data: unknown): ValidationResult { + const result = IndexStatsSchema.safeParse(data); + if (result.success) { + return { success: true, data: result.data }; + } + return { + success: false, + error: `Invalid index stats: ${result.error.message}`, + details: result.error, + }; +} + +/** + * Validate DetailedIndexStats + */ +export function validateDetailedIndexStats(data: unknown): ValidationResult { + const result = DetailedIndexStatsSchema.safeParse(data); + if (result.success) { + return { success: true, data: result.data }; + } + return { + success: false, + error: `Invalid detailed index stats: ${result.error.message}`, + details: result.error, + }; +} + +/** + * Validate FileMetadata + */ +export function validateFileMetadata(data: unknown): ValidationResult { + const result = FileMetadataSchema.safeParse(data); + if (result.success) { + return { success: true, data: result.data }; + } + return { + success: false, + error: `Invalid file metadata: ${result.error.message}`, + details: result.error, + }; +} + +/** + * Validate IndexerState + */ +export function validateIndexerState(data: unknown): ValidationResult { + const result = IndexerStateSchema.safeParse(data); + if (result.success) { + return { success: true, data: result.data }; + } + return { + success: false, + error: `Invalid indexer state: ${result.error.message}`, + details: result.error, + }; +} + +/** + * Validate and coerce unknown data to DetailedIndexStats + * Throws on validation failure (for use in trusted contexts) + */ +export function assertDetailedIndexStats(data: unknown): DetailedIndexStats { + const result = validateDetailedIndexStats(data); + if (!result.success) { + throw new Error(result.error); + } + return result.data; +} + +/** + * Validate and coerce unknown data to IndexerState + * Throws on validation failure (for use in trusted contexts) + */ +export function assertIndexerState(data: unknown): IndexerState { + const result = validateIndexerState(data); + if (!result.success) { + throw new Error(result.error); + } + return result.data; +} diff --git a/packages/core/src/indexer/types.ts b/packages/core/src/indexer/types.ts index 59fdb4d..6b049de 100644 --- a/packages/core/src/indexer/types.ts +++ b/packages/core/src/indexer/types.ts @@ -156,6 +156,12 @@ export interface LanguageStats { /** Total lines of code (approximate from component ranges) */ lines: number; + + /** Average commits per file (change frequency) */ + avgCommitsPerFile?: number; + + /** Most recently modified file timestamp */ + lastModified?: Date; } /** @@ -176,6 +182,12 @@ export interface PackageStats { /** Language breakdown within this package */ languages: Partial>; + + /** Total commits affecting this package */ + totalCommits?: number; + + /** Most recently modified file timestamp */ + lastModified?: Date; } /** diff --git a/packages/core/src/indexer/utils/__tests__/change-frequency.test.ts b/packages/core/src/indexer/utils/__tests__/change-frequency.test.ts new file mode 100644 index 0000000..d458ba5 --- /dev/null +++ b/packages/core/src/indexer/utils/__tests__/change-frequency.test.ts @@ -0,0 +1,124 @@ +/** + * Tests for change frequency tracking + */ + +import { describe, expect, it } from 'vitest'; +import type { FileChangeFrequency } from '../change-frequency'; +import { aggregateChangeFrequency } from '../change-frequency'; + +describe('aggregateChangeFrequency', () => { + it('should calculate total commits and average', () => { + const frequencies = new Map([ + [ + 'src/index.ts', + { + filePath: 'src/index.ts', + commitCount: 10, + lastModified: new Date('2024-01-10'), + authorCount: 3, + }, + ], + [ + 'src/utils.ts', + { + filePath: 'src/utils.ts', + commitCount: 20, + lastModified: new Date('2024-01-15'), + authorCount: 2, + }, + ], + ]); + + const result = aggregateChangeFrequency(frequencies); + + expect(result.totalCommits).toBe(30); + expect(result.avgCommitsPerFile).toBe(15); + expect(result.lastModified).toEqual(new Date('2024-01-15')); + }); + + it('should filter by path prefix', () => { + const frequencies = new Map([ + [ + 'packages/core/src/index.ts', + { + filePath: 'packages/core/src/index.ts', + commitCount: 10, + lastModified: new Date('2024-01-10'), + authorCount: 3, + }, + ], + [ + 'packages/cli/src/index.ts', + { + filePath: 'packages/cli/src/index.ts', + commitCount: 20, + lastModified: new Date('2024-01-15'), + authorCount: 2, + }, + ], + ]); + + const result = aggregateChangeFrequency(frequencies, 'packages/core/'); + + expect(result.totalCommits).toBe(10); + expect(result.avgCommitsPerFile).toBe(10); + expect(result.lastModified).toEqual(new Date('2024-01-10')); + }); + + it('should handle empty frequencies', () => { + const frequencies = new Map(); + + const result = aggregateChangeFrequency(frequencies); + + expect(result.totalCommits).toBe(0); + expect(result.avgCommitsPerFile).toBe(0); + expect(result.lastModified).toBeNull(); + }); + + it('should handle single file', () => { + const frequencies = new Map([ + [ + 'README.md', + { + filePath: 'README.md', + commitCount: 5, + lastModified: new Date('2024-01-01'), + authorCount: 1, + }, + ], + ]); + + const result = aggregateChangeFrequency(frequencies); + + expect(result.totalCommits).toBe(5); + expect(result.avgCommitsPerFile).toBe(5); + expect(result.lastModified).toEqual(new Date('2024-01-01')); + }); + + it('should find most recent modification across files', () => { + const frequencies = new Map([ + [ + 'old.ts', + { + filePath: 'old.ts', + commitCount: 50, + lastModified: new Date('2023-01-01'), + authorCount: 5, + }, + ], + [ + 'new.ts', + { + filePath: 'new.ts', + commitCount: 2, + lastModified: new Date('2024-12-01'), + authorCount: 1, + }, + ], + ]); + + const result = aggregateChangeFrequency(frequencies); + + expect(result.lastModified).toEqual(new Date('2024-12-01')); + }); +}); diff --git a/packages/core/src/indexer/utils/__tests__/comparison.test.ts b/packages/core/src/indexer/utils/__tests__/comparison.test.ts new file mode 100644 index 0000000..4efa93f --- /dev/null +++ b/packages/core/src/indexer/utils/__tests__/comparison.test.ts @@ -0,0 +1,411 @@ +/** + * Tests for stats comparison utilities + */ + +import { describe, expect, it } from 'vitest'; +import type { DetailedIndexStats } from '../../types'; +import { compareStats, formatDiffSummary } from '../comparison'; + +describe('compareStats', () => { + it('should calculate file count changes', () => { + const before: DetailedIndexStats = { + filesScanned: 100, + documentsExtracted: 500, + documentsIndexed: 500, + vectorsStored: 500, + duration: 5000, + errors: [], + startTime: new Date('2024-01-01'), + endTime: new Date('2024-01-01'), + repositoryPath: '/test', + }; + + const after: DetailedIndexStats = { + ...before, + filesScanned: 120, + endTime: new Date('2024-01-02'), + }; + + const diff = compareStats(before, after); + + expect(diff.files.before).toBe(100); + expect(diff.files.after).toBe(120); + expect(diff.files.absolute).toBe(20); + expect(diff.files.percent).toBe(20); + }); + + it('should calculate language changes', () => { + const before: DetailedIndexStats = { + filesScanned: 100, + documentsExtracted: 500, + documentsIndexed: 500, + vectorsStored: 500, + duration: 5000, + errors: [], + startTime: new Date('2024-01-01'), + endTime: new Date('2024-01-01'), + repositoryPath: '/test', + byLanguage: { + typescript: { files: 80, components: 400, lines: 10000 }, + javascript: { files: 20, components: 100, lines: 2000 }, + }, + }; + + const after: DetailedIndexStats = { + ...before, + endTime: new Date('2024-01-02'), + byLanguage: { + typescript: { files: 90, components: 450, lines: 11000 }, + javascript: { files: 20, components: 100, lines: 2000 }, + go: { files: 10, components: 50, lines: 1500 }, + }, + }; + + const diff = compareStats(before, after); + + // TypeScript grew + expect(diff.languages.typescript.files.absolute).toBe(10); + expect(diff.languages.typescript.lines.absolute).toBe(1000); + + // JavaScript stayed the same + expect(diff.languages.javascript.files.absolute).toBe(0); + + // Go was added + expect(diff.languages.go.files.before).toBe(0); + expect(diff.languages.go.files.after).toBe(10); + expect(diff.summary.languagesAdded).toContain('go'); + }); + + it('should detect language removal', () => { + const before: DetailedIndexStats = { + filesScanned: 100, + documentsExtracted: 500, + documentsIndexed: 500, + vectorsStored: 500, + duration: 5000, + errors: [], + startTime: new Date('2024-01-01'), + endTime: new Date('2024-01-01'), + repositoryPath: '/test', + byLanguage: { + typescript: { files: 80, components: 400, lines: 10000 }, + javascript: { files: 20, components: 100, lines: 2000 }, + }, + }; + + const after: DetailedIndexStats = { + ...before, + endTime: new Date('2024-01-02'), + byLanguage: { + typescript: { files: 100, components: 500, lines: 12000 }, + }, + }; + + const diff = compareStats(before, after); + + expect(diff.summary.languagesRemoved).toContain('javascript'); + expect(diff.languages.javascript.files.after).toBe(0); + }); + + it('should calculate package changes', () => { + const before: DetailedIndexStats = { + filesScanned: 100, + documentsExtracted: 500, + documentsIndexed: 500, + vectorsStored: 500, + duration: 5000, + errors: [], + startTime: new Date('2024-01-01'), + endTime: new Date('2024-01-01'), + repositoryPath: '/test', + byPackage: { + 'packages/core': { + name: '@test/core', + path: 'packages/core', + files: 50, + components: 250, + languages: { typescript: 250 }, + }, + }, + }; + + const after: DetailedIndexStats = { + ...before, + endTime: new Date('2024-01-02'), + byPackage: { + 'packages/core': { + name: '@test/core', + path: 'packages/core', + files: 60, + components: 300, + languages: { typescript: 300 }, + }, + 'packages/cli': { + name: '@test/cli', + path: 'packages/cli', + files: 30, + components: 150, + languages: { typescript: 150 }, + }, + }, + }; + + const diff = compareStats(before, after); + + // Core package grew + expect(diff.packages['packages/core'].files.absolute).toBe(10); + expect(diff.packages['packages/core'].components.absolute).toBe(50); + + // CLI package was added + expect(diff.packages['packages/cli'].files.before).toBe(0); + expect(diff.packages['packages/cli'].files.after).toBe(30); + expect(diff.summary.packagesAdded).toContain('packages/cli'); + }); + + it('should calculate component type changes', () => { + const before: DetailedIndexStats = { + filesScanned: 100, + documentsExtracted: 500, + documentsIndexed: 500, + vectorsStored: 500, + duration: 5000, + errors: [], + startTime: new Date('2024-01-01'), + endTime: new Date('2024-01-01'), + repositoryPath: '/test', + byComponentType: { + function: 200, + class: 50, + interface: 100, + }, + }; + + const after: DetailedIndexStats = { + ...before, + endTime: new Date('2024-01-02'), + byComponentType: { + function: 220, + class: 60, + interface: 100, + type: 30, + }, + }; + + const diff = compareStats(before, after); + + expect(diff.componentTypes.function.absolute).toBe(20); + expect(diff.componentTypes.class.absolute).toBe(10); + expect(diff.componentTypes.interface.absolute).toBe(0); + expect(diff.componentTypes.type.before).toBe(0); + expect(diff.componentTypes.type.after).toBe(30); + }); + + it('should calculate negative changes (shrinking codebase)', () => { + const before: DetailedIndexStats = { + filesScanned: 100, + documentsExtracted: 500, + documentsIndexed: 500, + vectorsStored: 500, + duration: 5000, + errors: [], + startTime: new Date('2024-01-01'), + endTime: new Date('2024-01-01'), + repositoryPath: '/test', + }; + + const after: DetailedIndexStats = { + ...before, + filesScanned: 80, + documentsIndexed: 400, + endTime: new Date('2024-01-02'), + }; + + const diff = compareStats(before, after); + + expect(diff.files.absolute).toBe(-20); + expect(diff.files.percent).toBe(-20); + expect(diff.documents.absolute).toBe(-100); + expect(diff.summary.overallTrend).toBe('shrinking'); + }); + + it('should calculate time delta', () => { + const before: DetailedIndexStats = { + filesScanned: 100, + documentsExtracted: 500, + documentsIndexed: 500, + vectorsStored: 500, + duration: 5000, + errors: [], + startTime: new Date('2024-01-01T00:00:00Z'), + endTime: new Date('2024-01-01T00:00:00Z'), + repositoryPath: '/test', + }; + + const after: DetailedIndexStats = { + ...before, + endTime: new Date('2024-01-02T00:00:00Z'), + }; + + const diff = compareStats(before, after); + + expect(diff.timeDelta).toBe(86400000); // 24 hours in ms + }); + + it('should determine overall trend as growing', () => { + const before: DetailedIndexStats = { + filesScanned: 100, + documentsExtracted: 500, + documentsIndexed: 500, + vectorsStored: 500, + duration: 5000, + errors: [], + startTime: new Date('2024-01-01'), + endTime: new Date('2024-01-01'), + repositoryPath: '/test', + }; + + const after: DetailedIndexStats = { + ...before, + filesScanned: 130, + endTime: new Date('2024-01-02'), + }; + + const diff = compareStats(before, after); + + expect(diff.summary.overallTrend).toBe('growing'); + }); + + it('should determine overall trend as stable', () => { + const before: DetailedIndexStats = { + filesScanned: 100, + documentsExtracted: 500, + documentsIndexed: 500, + vectorsStored: 500, + duration: 5000, + errors: [], + startTime: new Date('2024-01-01'), + endTime: new Date('2024-01-01'), + repositoryPath: '/test', + }; + + const after: DetailedIndexStats = { + ...before, + filesScanned: 105, + endTime: new Date('2024-01-02'), + }; + + const diff = compareStats(before, after); + + expect(diff.summary.overallTrend).toBe('stable'); + }); + + it('should handle change frequency in language stats', () => { + const before: DetailedIndexStats = { + filesScanned: 100, + documentsExtracted: 500, + documentsIndexed: 500, + vectorsStored: 500, + duration: 5000, + errors: [], + startTime: new Date('2024-01-01'), + endTime: new Date('2024-01-01'), + repositoryPath: '/test', + byLanguage: { + typescript: { + files: 80, + components: 400, + lines: 10000, + avgCommitsPerFile: 5.5, + }, + }, + }; + + const after: DetailedIndexStats = { + ...before, + endTime: new Date('2024-01-02'), + byLanguage: { + typescript: { + files: 80, + components: 400, + lines: 10000, + avgCommitsPerFile: 6.2, + }, + }, + }; + + const diff = compareStats(before, after); + + expect(diff.languages.typescript.avgCommitsPerFile).toBeDefined(); + expect(diff.languages.typescript.avgCommitsPerFile?.absolute).toBeCloseTo(0.7, 1); + }); +}); + +describe('formatDiffSummary', () => { + it('should format a growing codebase summary', () => { + const before: DetailedIndexStats = { + filesScanned: 100, + documentsExtracted: 500, + documentsIndexed: 500, + vectorsStored: 500, + duration: 5000, + errors: [], + startTime: new Date('2024-01-01'), + endTime: new Date('2024-01-01'), + repositoryPath: '/test', + byLanguage: { + typescript: { files: 100, components: 500, lines: 10000 }, + }, + }; + + const after: DetailedIndexStats = { + ...before, + filesScanned: 120, + endTime: new Date('2024-01-02'), + byLanguage: { + typescript: { files: 110, components: 550, lines: 11000 }, + go: { files: 10, components: 50, lines: 1500 }, + }, + }; + + const diff = compareStats(before, after); + const summary = formatDiffSummary(diff); + + expect(summary).toContain('added 20 files'); + expect(summary).toContain('added 2,500 lines'); + expect(summary).toContain('new languages: go'); + expect(summary).toContain('trend: growing'); + }); + + it('should format a shrinking codebase summary', () => { + const before: DetailedIndexStats = { + filesScanned: 100, + documentsExtracted: 500, + documentsIndexed: 500, + vectorsStored: 500, + duration: 5000, + errors: [], + startTime: new Date('2024-01-01'), + endTime: new Date('2024-01-01'), + repositoryPath: '/test', + byLanguage: { + typescript: { files: 100, components: 500, lines: 10000 }, + }, + }; + + const after: DetailedIndexStats = { + ...before, + filesScanned: 70, + endTime: new Date('2024-01-02'), + byLanguage: { + typescript: { files: 70, components: 350, lines: 7000 }, + }, + }; + + const diff = compareStats(before, after); + const summary = formatDiffSummary(diff); + + expect(summary).toContain('removed 30 files'); + expect(summary).toContain('removed 3,000 lines'); + expect(summary).toContain('trend: shrinking'); + }); +}); diff --git a/packages/core/src/indexer/utils/__tests__/export.test.ts b/packages/core/src/indexer/utils/__tests__/export.test.ts new file mode 100644 index 0000000..f6f4482 --- /dev/null +++ b/packages/core/src/indexer/utils/__tests__/export.test.ts @@ -0,0 +1,244 @@ +/** + * Tests for stats export utilities + */ + +import { describe, expect, it } from 'vitest'; +import type { DetailedIndexStats } from '../../types'; +import { + exportLanguageStatsAsMarkdown, + exportPackageStatsAsMarkdown, + exportStatsAsCsv, + exportStatsAsJson, +} from '../export'; + +describe('exportStatsAsJson', () => { + it('should export basic stats as JSON', () => { + const stats: DetailedIndexStats = { + filesScanned: 100, + documentsExtracted: 500, + documentsIndexed: 500, + vectorsStored: 500, + duration: 5000, + errors: [], + startTime: new Date('2024-01-01'), + endTime: new Date('2024-01-01'), + repositoryPath: '/test', + }; + + const json = exportStatsAsJson(stats); + const parsed = JSON.parse(json); + + expect(parsed.filesScanned).toBe(100); + expect(parsed.documentsIndexed).toBe(500); + expect(parsed.repositoryPath).toBe('/test'); + }); + + it('should include metadata when present', () => { + const stats: DetailedIndexStats = { + filesScanned: 100, + documentsExtracted: 500, + documentsIndexed: 500, + vectorsStored: 500, + duration: 5000, + errors: [], + startTime: new Date('2024-01-01'), + endTime: new Date('2024-01-01'), + repositoryPath: '/test', + statsMetadata: { + isIncremental: false, + lastFullIndex: new Date('2024-01-01'), + lastUpdate: new Date('2024-01-01'), + incrementalUpdatesSince: 0, + }, + }; + + const json = exportStatsAsJson(stats); + const parsed = JSON.parse(json); + + expect(parsed.metadata).toBeDefined(); + expect(parsed.metadata.isIncremental).toBe(false); + }); + + it('should include details when present', () => { + const stats: DetailedIndexStats = { + filesScanned: 100, + documentsExtracted: 500, + documentsIndexed: 500, + vectorsStored: 500, + duration: 5000, + errors: [], + startTime: new Date('2024-01-01'), + endTime: new Date('2024-01-01'), + repositoryPath: '/test', + byLanguage: { + typescript: { files: 80, components: 400, lines: 10000 }, + }, + }; + + const json = exportStatsAsJson(stats); + const parsed = JSON.parse(json); + + expect(parsed.byLanguage).toBeDefined(); + expect(parsed.byLanguage.typescript.files).toBe(80); + }); + + it('should support pretty option', () => { + const stats: DetailedIndexStats = { + filesScanned: 100, + documentsExtracted: 500, + documentsIndexed: 500, + vectorsStored: 500, + duration: 5000, + errors: [], + startTime: new Date('2024-01-01'), + endTime: new Date('2024-01-01'), + repositoryPath: '/test', + }; + + const compact = exportStatsAsJson(stats, { pretty: false }); + const pretty = exportStatsAsJson(stats, { pretty: true }); + + expect(compact.includes('\n')).toBe(false); + expect(pretty.includes('\n')).toBe(true); + }); +}); + +describe('exportStatsAsCsv', () => { + it('should export stats as CSV', () => { + const stats: DetailedIndexStats = { + filesScanned: 100, + documentsExtracted: 500, + documentsIndexed: 500, + vectorsStored: 500, + duration: 5000, + errors: [], + startTime: new Date('2024-01-01'), + endTime: new Date('2024-01-01'), + repositoryPath: '/test', + }; + + const csv = exportStatsAsCsv(stats); + const lines = csv.split('\n'); + + expect(lines[0]).toBe('category,subcategory,metric,value'); + expect(lines).toContain('overview,files,total,100'); + expect(lines).toContain('overview,documents,total,500'); + }); + + it('should include language stats in CSV', () => { + const stats: DetailedIndexStats = { + filesScanned: 100, + documentsExtracted: 500, + documentsIndexed: 500, + vectorsStored: 500, + duration: 5000, + errors: [], + startTime: new Date('2024-01-01'), + endTime: new Date('2024-01-01'), + repositoryPath: '/test', + byLanguage: { + typescript: { + files: 80, + components: 400, + lines: 10000, + avgCommitsPerFile: 5.5, + }, + }, + }; + + const csv = exportStatsAsCsv(stats); + + expect(csv).toContain('language,typescript,files,80'); + expect(csv).toContain('language,typescript,lines,10000'); + expect(csv).toContain('language,typescript,avg_commits_per_file,5.5'); + }); + + it('should include package stats in CSV', () => { + const stats: DetailedIndexStats = { + filesScanned: 100, + documentsExtracted: 500, + documentsIndexed: 500, + vectorsStored: 500, + duration: 5000, + errors: [], + startTime: new Date('2024-01-01'), + endTime: new Date('2024-01-01'), + repositoryPath: '/test', + byPackage: { + 'packages/core': { + name: '@test/core', + path: 'packages/core', + files: 50, + components: 250, + languages: {}, + totalCommits: 125, + }, + }, + }; + + const csv = exportStatsAsCsv(stats); + + expect(csv).toContain('package,@test/core,files,50'); + expect(csv).toContain('package,@test/core,total_commits,125'); + }); +}); + +describe('exportLanguageStatsAsMarkdown', () => { + it('should export language stats as markdown table', () => { + const byLanguage = { + typescript: { + files: 80, + components: 400, + lines: 10000, + avgCommitsPerFile: 5.5, + }, + javascript: { + files: 20, + components: 100, + lines: 2000, + }, + }; + + const markdown = exportLanguageStatsAsMarkdown(byLanguage); + const lines = markdown.split('\n'); + + expect(lines[0]).toContain('| Language |'); + expect(lines[1]).toContain('|----------|'); + expect(lines[2]).toContain('| typescript |'); + expect(lines[2]).toContain('| 80 |'); + expect(lines[2]).toContain('| 5.5 |'); + expect(lines[3]).toContain('| javascript |'); + expect(lines[3]).toContain('| N/A |'); + }); +}); + +describe('exportPackageStatsAsMarkdown', () => { + it('should export package stats as markdown table', () => { + const byPackage = { + 'packages/core': { + name: '@test/core', + path: 'packages/core', + files: 50, + components: 250, + languages: {}, + totalCommits: 125, + }, + 'packages/cli': { + name: '@test/cli', + path: 'packages/cli', + files: 30, + components: 150, + languages: {}, + }, + }; + + const markdown = exportPackageStatsAsMarkdown(byPackage); + const lines = markdown.split('\n'); + + expect(lines[0]).toContain('| Package |'); + expect(lines[2]).toContain('| @test/core |'); + expect(lines[2]).toContain('| 125 |'); + expect(lines[3]).toContain('| @test/cli |'); + expect(lines[3]).toContain('| N/A |'); + }); +}); diff --git a/packages/core/src/indexer/utils/change-frequency.ts b/packages/core/src/indexer/utils/change-frequency.ts new file mode 100644 index 0000000..98b01ee --- /dev/null +++ b/packages/core/src/indexer/utils/change-frequency.ts @@ -0,0 +1,178 @@ +/** + * Change Frequency Tracker + * + * Calculates git commit frequency for files and packages to show + * which parts of the codebase change most often. + */ + +import { execSync } from 'node:child_process'; + +/** + * File change frequency data + */ +export interface FileChangeFrequency { + /** File path relative to repository root */ + filePath: string; + + /** Total commits touching this file */ + commitCount: number; + + /** Last modification timestamp */ + lastModified: Date; + + /** Number of authors who modified this file */ + authorCount: number; +} + +/** + * Options for calculating change frequency + */ +export interface ChangeFrequencyOptions { + /** Repository path */ + repositoryPath: string; + + /** Only count commits since this date */ + since?: Date; + + /** Maximum number of commits to analyze (default: 1000) */ + maxCommits?: number; +} + +/** + * Calculate change frequency for all tracked files in a repository + */ +export async function calculateChangeFrequency( + options: ChangeFrequencyOptions +): Promise> { + const { repositoryPath, since, maxCommits = 1000 } = options; + + const frequencies = new Map(); + + try { + // Build git log command + const args = [ + 'log', + `--max-count=${maxCommits}`, + '--pretty=format:%H', + '--name-only', + '--diff-filter=AMCR', // Added, Modified, Copied, Renamed + ]; + + if (since) { + args.push(`--since="${since.toISOString()}"`); + } + + const output = execSync(`git ${args.join(' ')}`, { + cwd: repositoryPath, + encoding: 'utf-8', + stdio: ['pipe', 'pipe', 'ignore'], // Suppress stderr + }); + + // Parse output to count file occurrences + const lines = output.split('\n').filter((line) => line.trim()); + + for (const line of lines) { + // Skip commit hashes (40 char hex strings) + if (/^[0-9a-f]{40}$/.test(line)) { + continue; + } + + // This is a file path + const filePath = line.trim(); + if (!filePath) continue; + + const existing = frequencies.get(filePath); + if (existing) { + existing.commitCount++; + } else { + // Get additional metadata for this file + const metadata = await getFileMetadata(repositoryPath, filePath); + frequencies.set(filePath, { + filePath, + commitCount: 1, + lastModified: metadata.lastModified, + authorCount: metadata.authorCount, + }); + } + } + } catch (_error) { + // Git command failed (repo not initialized, etc.) + // Return empty map + } + + return frequencies; +} + +/** + * Get metadata for a specific file + */ +async function getFileMetadata( + repositoryPath: string, + filePath: string +): Promise<{ lastModified: Date; authorCount: number }> { + try { + // Get last modification time + const dateOutput = execSync(`git log -1 --pretty=format:%ai -- "${filePath}"`, { + cwd: repositoryPath, + encoding: 'utf-8', + stdio: ['pipe', 'pipe', 'ignore'], + }); + + // Get unique authors count + const authorsOutput = execSync( + `git log --pretty=format:%ae -- "${filePath}" | sort -u | wc -l`, + { + cwd: repositoryPath, + encoding: 'utf-8', + stdio: ['pipe', 'pipe', 'ignore'], + } + ); + + return { + lastModified: dateOutput ? new Date(dateOutput.trim()) : new Date(), + authorCount: Number.parseInt(authorsOutput.trim(), 10) || 1, + }; + } catch (_error) { + // If git command fails, return defaults + return { + lastModified: new Date(), + authorCount: 1, + }; + } +} + +/** + * Calculate aggregate stats from file frequencies + */ +export function aggregateChangeFrequency( + frequencies: Map, + filterPath?: string +): { + totalCommits: number; + avgCommitsPerFile: number; + lastModified: Date | null; +} { + let totalCommits = 0; + let fileCount = 0; + let mostRecent: Date | null = null; + + for (const [filePath, frequency] of frequencies) { + // Apply filter if specified + if (filterPath && !filePath.startsWith(filterPath)) { + continue; + } + + totalCommits += frequency.commitCount; + fileCount++; + + if (!mostRecent || frequency.lastModified > mostRecent) { + mostRecent = frequency.lastModified; + } + } + + return { + totalCommits, + avgCommitsPerFile: fileCount > 0 ? totalCommits / fileCount : 0, + lastModified: mostRecent, + }; +} diff --git a/packages/core/src/indexer/utils/comparison.ts b/packages/core/src/indexer/utils/comparison.ts new file mode 100644 index 0000000..8d2cf25 --- /dev/null +++ b/packages/core/src/indexer/utils/comparison.ts @@ -0,0 +1,295 @@ +/** + * Stats Comparison Utilities + * + * Compare two stat snapshots to understand what changed between indexes. + * Useful for trend analysis and displaying deltas to users. + */ + +import type { + DetailedIndexStats, + LanguageStats, + PackageStats, + SupportedLanguage, +} from '../types.js'; + +/** + * Difference between two numeric values + */ +export interface NumericDiff { + before: number; + after: number; + absolute: number; // after - before (can be negative) + percent: number; // (after - before) / before * 100 +} + +/** + * High-level differences between two snapshots + */ +export interface StatsDiff { + /** File count changes */ + files: NumericDiff; + + /** Document count changes */ + documents: NumericDiff; + + /** Vector count changes */ + vectors: NumericDiff; + + /** Total lines changes (across all languages) */ + totalLines: NumericDiff; + + /** Per-language changes */ + languages: Record; + + /** Per-component-type changes */ + componentTypes: Record; + + /** Per-package changes */ + packages: Record; + + /** Time between snapshots (ms) */ + timeDelta: number; + + /** Summary */ + summary: { + languagesAdded: string[]; + languagesRemoved: string[]; + packagesAdded: string[]; + packagesRemoved: string[]; + overallTrend: 'growing' | 'shrinking' | 'stable'; + }; +} + +/** + * Language-specific differences + */ +export interface LanguageDiff { + files: NumericDiff; + components: NumericDiff; + lines: NumericDiff; + avgCommitsPerFile?: NumericDiff; +} + +/** + * Package-specific differences + */ +export interface PackageDiff { + files: NumericDiff; + components: NumericDiff; + totalCommits?: NumericDiff; +} + +/** + * Calculate numeric difference + */ +function calculateNumericDiff(before: number, after: number): NumericDiff { + const absolute = after - before; + const percent = before > 0 ? (absolute / before) * 100 : after > 0 ? 100 : 0; + + return { + before, + after, + absolute, + percent: Number.parseFloat(percent.toFixed(2)), + }; +} + +/** + * Calculate language stats difference + */ +function calculateLanguageDiff( + before: LanguageStats | undefined, + after: LanguageStats | undefined +): LanguageDiff { + const beforeStats = before || { files: 0, components: 0, lines: 0 }; + const afterStats = after || { files: 0, components: 0, lines: 0 }; + + const diff: LanguageDiff = { + files: calculateNumericDiff(beforeStats.files, afterStats.files), + components: calculateNumericDiff(beforeStats.components, afterStats.components), + lines: calculateNumericDiff(beforeStats.lines, afterStats.lines), + }; + + // Include change frequency if available + if (beforeStats.avgCommitsPerFile !== undefined || afterStats.avgCommitsPerFile !== undefined) { + diff.avgCommitsPerFile = calculateNumericDiff( + beforeStats.avgCommitsPerFile || 0, + afterStats.avgCommitsPerFile || 0 + ); + } + + return diff; +} + +/** + * Calculate package stats difference + */ +function calculatePackageDiff( + before: PackageStats | undefined, + after: PackageStats | undefined +): PackageDiff { + const beforeStats = before || { name: '', path: '', files: 0, components: 0, languages: {} }; + const afterStats = after || { name: '', path: '', files: 0, components: 0, languages: {} }; + + const diff: PackageDiff = { + files: calculateNumericDiff(beforeStats.files, afterStats.files), + components: calculateNumericDiff(beforeStats.components, afterStats.components), + }; + + // Include commit count if available + if (beforeStats.totalCommits !== undefined || afterStats.totalCommits !== undefined) { + diff.totalCommits = calculateNumericDiff( + beforeStats.totalCommits || 0, + afterStats.totalCommits || 0 + ); + } + + return diff; +} + +/** + * Compare two stat snapshots + * + * @param before - Earlier snapshot + * @param after - Later snapshot + * @returns Comprehensive diff showing all changes + */ +export function compareStats(before: DetailedIndexStats, after: DetailedIndexStats): StatsDiff { + // Calculate time delta + const timeDelta = after.endTime.getTime() - before.endTime.getTime(); + + // High-level diffs + const files = calculateNumericDiff(before.filesScanned, after.filesScanned); + const documents = calculateNumericDiff(before.documentsIndexed, after.documentsIndexed); + const vectors = calculateNumericDiff(before.vectorsStored, after.vectorsStored); + + // Calculate total lines + const beforeLines = Object.values(before.byLanguage || {}).reduce( + (sum, lang) => sum + lang.lines, + 0 + ); + const afterLines = Object.values(after.byLanguage || {}).reduce( + (sum, lang) => sum + lang.lines, + 0 + ); + const totalLines = calculateNumericDiff(beforeLines, afterLines); + + // Language diffs + const allLanguages = new Set([ + ...Object.keys(before.byLanguage || {}), + ...Object.keys(after.byLanguage || {}), + ]); + const languages: Record = {}; + for (const lang of allLanguages) { + languages[lang] = calculateLanguageDiff( + before.byLanguage?.[lang as SupportedLanguage], + after.byLanguage?.[lang as SupportedLanguage] + ); + } + + // Component type diffs + const allComponentTypes = new Set([ + ...Object.keys(before.byComponentType || {}), + ...Object.keys(after.byComponentType || {}), + ]); + const componentTypes: Record = {}; + for (const type of allComponentTypes) { + componentTypes[type] = calculateNumericDiff( + before.byComponentType?.[type] || 0, + after.byComponentType?.[type] || 0 + ); + } + + // Package diffs + const allPackages = new Set([ + ...Object.keys(before.byPackage || {}), + ...Object.keys(after.byPackage || {}), + ]); + const packages: Record = {}; + for (const pkg of allPackages) { + packages[pkg] = calculatePackageDiff(before.byPackage?.[pkg], after.byPackage?.[pkg]); + } + + // Summary + const beforeLanguages = new Set(Object.keys(before.byLanguage || {})); + const afterLanguages = new Set(Object.keys(after.byLanguage || {})); + const beforePackages = new Set(Object.keys(before.byPackage || {})); + const afterPackages = new Set(Object.keys(after.byPackage || {})); + + const languagesAdded = [...afterLanguages].filter((lang) => !beforeLanguages.has(lang)); + const languagesRemoved = [...beforeLanguages].filter((lang) => !afterLanguages.has(lang)); + const packagesAdded = [...afterPackages].filter((pkg) => !beforePackages.has(pkg)); + const packagesRemoved = [...beforePackages].filter((pkg) => !afterPackages.has(pkg)); + + // Determine overall trend + let overallTrend: 'growing' | 'shrinking' | 'stable'; + if (files.absolute > 10) { + overallTrend = 'growing'; + } else if (files.absolute < -10) { + overallTrend = 'shrinking'; + } else { + overallTrend = 'stable'; + } + + return { + files, + documents, + vectors, + totalLines, + languages, + componentTypes, + packages, + timeDelta, + summary: { + languagesAdded, + languagesRemoved, + packagesAdded, + packagesRemoved, + overallTrend, + }, + }; +} + +/** + * Get a human-readable summary of the diff + */ +export function formatDiffSummary(diff: StatsDiff): string { + const lines: string[] = []; + + // Files + if (diff.files.absolute !== 0) { + const direction = diff.files.absolute > 0 ? 'added' : 'removed'; + lines.push( + `${direction} ${Math.abs(diff.files.absolute)} files (${diff.files.percent > 0 ? '+' : ''}${diff.files.percent}%)` + ); + } + + // Lines + if (diff.totalLines.absolute !== 0) { + const direction = diff.totalLines.absolute > 0 ? 'added' : 'removed'; + lines.push( + `${direction} ${Math.abs(diff.totalLines.absolute).toLocaleString()} lines (${diff.totalLines.percent > 0 ? '+' : ''}${diff.totalLines.percent}%)` + ); + } + + // Languages added/removed + if (diff.summary.languagesAdded.length > 0) { + lines.push(`new languages: ${diff.summary.languagesAdded.join(', ')}`); + } + if (diff.summary.languagesRemoved.length > 0) { + lines.push(`removed languages: ${diff.summary.languagesRemoved.join(', ')}`); + } + + // Packages added/removed + if (diff.summary.packagesAdded.length > 0) { + lines.push(`new packages: ${diff.summary.packagesAdded.join(', ')}`); + } + if (diff.summary.packagesRemoved.length > 0) { + lines.push(`removed packages: ${diff.summary.packagesRemoved.join(', ')}`); + } + + // Overall trend + lines.push(`trend: ${diff.summary.overallTrend}`); + + return lines.join('\n'); +} diff --git a/packages/core/src/indexer/utils/export.ts b/packages/core/src/indexer/utils/export.ts new file mode 100644 index 0000000..59e7309 --- /dev/null +++ b/packages/core/src/indexer/utils/export.ts @@ -0,0 +1,159 @@ +/** + * Stats Export Utilities + * + * Export stats in various formats for external analysis, dashboards, or reporting. + */ + +import type { DetailedIndexStats, LanguageStats, PackageStats } from '../types.js'; + +/** + * Export options + */ +export interface ExportOptions { + /** Pretty print JSON (default: true) */ + pretty?: boolean; + + /** Include metadata (default: true) */ + includeMetadata?: boolean; + + /** Include detailed breakdowns (default: true) */ + includeDetails?: boolean; +} + +/** + * Export stats as JSON + */ +export function exportStatsAsJson(stats: DetailedIndexStats, options: ExportOptions = {}): string { + const { pretty = true, includeMetadata = true, includeDetails = true } = options; + + const data: Record = { + filesScanned: stats.filesScanned, + documentsIndexed: stats.documentsIndexed, + vectorsStored: stats.vectorsStored, + duration: stats.duration, + repositoryPath: stats.repositoryPath, + }; + + if (includeMetadata && stats.statsMetadata) { + data.metadata = { + isIncremental: stats.statsMetadata.isIncremental, + lastFullIndex: stats.statsMetadata.lastFullIndex.toISOString(), + lastUpdate: stats.statsMetadata.lastUpdate.toISOString(), + incrementalUpdatesSince: stats.statsMetadata.incrementalUpdatesSince, + warning: stats.statsMetadata.warning, + }; + } + + if (includeDetails) { + if (stats.byLanguage) { + data.byLanguage = stats.byLanguage; + } + if (stats.byComponentType) { + data.byComponentType = stats.byComponentType; + } + if (stats.byPackage) { + data.byPackage = stats.byPackage; + } + } + + return JSON.stringify(data, null, pretty ? 2 : undefined); +} + +/** + * Export stats as CSV + * Flattens hierarchical data into rows for spreadsheet analysis + */ +export function exportStatsAsCsv(stats: DetailedIndexStats): string { + const rows: string[] = []; + + // Header + rows.push('category,subcategory,metric,value'); + + // Overview + rows.push(`overview,files,total,${stats.filesScanned}`); + rows.push(`overview,documents,total,${stats.documentsIndexed}`); + rows.push(`overview,vectors,total,${stats.vectorsStored}`); + rows.push(`overview,duration,milliseconds,${stats.duration}`); + + // By language + if (stats.byLanguage) { + for (const [lang, langStats] of Object.entries(stats.byLanguage)) { + rows.push(`language,${lang},files,${langStats.files}`); + rows.push(`language,${lang},components,${langStats.components}`); + rows.push(`language,${lang},lines,${langStats.lines}`); + if (langStats.avgCommitsPerFile !== undefined) { + rows.push(`language,${lang},avg_commits_per_file,${langStats.avgCommitsPerFile}`); + } + if (langStats.lastModified) { + rows.push(`language,${lang},last_modified,${langStats.lastModified.toISOString()}`); + } + } + } + + // By component type + if (stats.byComponentType) { + for (const [type, count] of Object.entries(stats.byComponentType)) { + rows.push(`component,${type},count,${count}`); + } + } + + // By package + if (stats.byPackage) { + for (const [_pkgPath, pkgStats] of Object.entries(stats.byPackage)) { + const pkgName = pkgStats.name.replace(/,/g, ';'); // Escape commas + rows.push(`package,${pkgName},files,${pkgStats.files}`); + rows.push(`package,${pkgName},components,${pkgStats.components}`); + if (pkgStats.totalCommits !== undefined) { + rows.push(`package,${pkgName},total_commits,${pkgStats.totalCommits}`); + } + if (pkgStats.lastModified) { + rows.push(`package,${pkgName},last_modified,${pkgStats.lastModified.toISOString()}`); + } + } + } + + return rows.join('\n'); +} + +/** + * Export language stats as markdown table + */ +export function exportLanguageStatsAsMarkdown( + byLanguage: Partial> +): string { + const lines: string[] = []; + + // Header + lines.push('| Language | Files | Components | Lines | Avg Commits/File |'); + lines.push('|----------|-------|------------|-------|------------------|'); + + // Rows + for (const [lang, stats] of Object.entries(byLanguage)) { + if (!stats) continue; + const avgCommits = stats.avgCommitsPerFile?.toFixed(1) ?? 'N/A'; + lines.push( + `| ${lang} | ${stats.files} | ${stats.components} | ${stats.lines.toLocaleString()} | ${avgCommits} |` + ); + } + + return lines.join('\n'); +} + +/** + * Export package stats as markdown table + */ +export function exportPackageStatsAsMarkdown(byPackage: Record): string { + const lines: string[] = []; + + // Header + lines.push('| Package | Files | Components | Total Commits |'); + lines.push('|---------|-------|------------|---------------|'); + + // Rows + for (const [, stats] of Object.entries(byPackage)) { + const commits = stats.totalCommits ?? 'N/A'; + lines.push(`| ${stats.name} | ${stats.files} | ${stats.components} | ${commits} |`); + } + + return lines.join('\n'); +} diff --git a/packages/core/src/indexer/utils/index.ts b/packages/core/src/indexer/utils/index.ts index ca3c5d0..3bf1e0e 100644 --- a/packages/core/src/indexer/utils/index.ts +++ b/packages/core/src/indexer/utils/index.ts @@ -6,6 +6,23 @@ * @module indexer/utils */ +// Change frequency tracking +export { + aggregateChangeFrequency, + type ChangeFrequencyOptions, + calculateChangeFrequency, + type FileChangeFrequency, +} from './change-frequency'; + +// Stats comparison +export { + compareStats, + formatDiffSummary, + type LanguageDiff, + type NumericDiff, + type PackageDiff, + type StatsDiff, +} from './comparison'; // Document preparation export { filterDocumentsByExport, @@ -14,7 +31,14 @@ export { prepareDocumentForEmbedding, prepareDocumentsForEmbedding, } from './documents'; - +// Stats export +export { + type ExportOptions, + exportLanguageStatsAsMarkdown, + exportPackageStatsAsMarkdown, + exportStatsAsCsv, + exportStatsAsJson, +} from './export'; // Text formatting export { cleanDocumentText, @@ -22,7 +46,6 @@ export { formatDocumentTextWithSignature, truncateText, } from './formatting'; - // Language mapping export { getExtensionForLanguage, diff --git a/packages/core/src/storage/validation.ts b/packages/core/src/storage/validation.ts index de13656..7a82dd3 100644 --- a/packages/core/src/storage/validation.ts +++ b/packages/core/src/storage/validation.ts @@ -3,6 +3,9 @@ * * Following TypeScript Standards Rule #2: No Type Assertions Without Validation * State files (JSON) must be validated at runtime to handle corruption/format changes + * + * Note: IndexerState validation has been moved to packages/core/src/indexer/schemas/ + * This file now only contains RepositoryMetadata validation */ import { z } from 'zod'; @@ -11,19 +14,21 @@ import { z } from 'zod'; * Repository metadata schema */ export const RepositoryMetadataSchema = z.object({ - version: z.string(), + version: z.string().min(1), repository: z.object({ - path: z.string(), - remote: z.string().optional(), + path: z.string().min(1), + remote: z.string().optional(), // Can be URL or git remote name branch: z.string().optional(), lastCommit: z.string().optional(), + totalCommits: z.number().int().nonnegative().optional(), }), indexed: z .object({ - timestamp: z.string(), + timestamp: z.union([z.string(), z.coerce.date()]), // Support both string and Date files: z.number().int().nonnegative(), components: z.number().int().nonnegative(), size: z.number().int().nonnegative(), + languages: z.array(z.string()).optional(), }) .optional(), config: z @@ -34,8 +39,8 @@ export const RepositoryMetadataSchema = z.object({ .optional(), migrated: z .object({ - timestamp: z.string(), - from: z.string(), + timestamp: z.union([z.string(), z.coerce.date()]), // Support both string and Date + from: z.string().min(1), }) .optional(), });