diff --git a/packages/cli/src/cli.ts b/packages/cli/src/cli.ts index d6b31e9..8123bc0 100644 --- a/packages/cli/src/cli.ts +++ b/packages/cli/src/cli.ts @@ -11,6 +11,7 @@ import { gitCommand } from './commands/git.js'; import { indexCommand } from './commands/index.js'; import { initCommand } from './commands/init.js'; import { mcpCommand } from './commands/mcp.js'; +import { metricsCommand } from './commands/metrics.js'; import { planCommand } from './commands/plan.js'; import { searchCommand } from './commands/search.js'; import { statsCommand } from './commands/stats.js'; @@ -38,6 +39,7 @@ program.addCommand(ghCommand); program.addCommand(gitCommand); program.addCommand(updateCommand); program.addCommand(statsCommand); +program.addCommand(metricsCommand); program.addCommand(dashboardCommand); program.addCommand(compactCommand); program.addCommand(cleanCommand); diff --git a/packages/cli/src/commands/index.ts b/packages/cli/src/commands/index.ts index 0796355..4159c75 100644 --- a/packages/cli/src/commands/index.ts +++ b/packages/cli/src/commands/index.ts @@ -2,11 +2,14 @@ import { execSync } from 'node:child_process'; import { existsSync } from 'node:fs'; import { join } from 'node:path'; import { + AsyncEventBus, ensureStorageDirectory, GitIndexer, getStorageFilePaths, getStoragePath, + type IndexUpdatedEvent, LocalGitExtractor, + MetricsStore, RepositoryIndexer, updateIndexedStats, VectorStorage, @@ -116,16 +119,47 @@ export const indexCommand = new Command('index') const filePaths = getStorageFilePaths(storagePath); spinner.text = 'Initializing indexer...'; - const indexer = new RepositoryIndexer({ - repositoryPath: resolvedRepoPath, - vectorStorePath: filePaths.vectors, - statePath: filePaths.indexerState, - excludePatterns: config.repository?.excludePatterns || config.excludePatterns, - languages: config.repository?.languages || config.languages, - embeddingModel: config.embeddingModel, - embeddingDimension: config.dimension, + + // Create event bus for metrics (no logger in CLI to keep it simple) + const eventBus = new AsyncEventBus(); + + // Initialize metrics store (no logger in CLI to avoid noise) + const metricsDbPath = join(storagePath, 'metrics.db'); + const metricsStore = new MetricsStore(metricsDbPath); + + // Subscribe to index.updated events for automatic metrics persistence + eventBus.on('index.updated', async (event) => { + try { + const snapshotId = metricsStore.recordSnapshot( + event.stats, + event.isIncremental ? 'update' : 'index' + ); + + // Store code metadata if available + if (event.codeMetadata && event.codeMetadata.length > 0) { + metricsStore.appendCodeMetadata(snapshotId, event.codeMetadata); + } + } catch (error) { + // Log error but don't fail indexing - metrics are non-critical + logger.error( + `Failed to record metrics: ${error instanceof Error ? error.message : String(error)}` + ); + } }); + const indexer = new RepositoryIndexer( + { + repositoryPath: resolvedRepoPath, + vectorStorePath: filePaths.vectors, + statePath: filePaths.indexerState, + excludePatterns: config.repository?.excludePatterns || config.excludePatterns, + languages: config.repository?.languages || config.languages, + embeddingModel: config.embeddingModel, + embeddingDimension: config.dimension, + }, + eventBus + ); + await indexer.initialize(); spinner.text = 'Scanning repository...'; @@ -165,6 +199,7 @@ export const indexCommand = new Command('index') }); await indexer.close(); + metricsStore.close(); const codeDuration = (Date.now() - startTime) / 1000; diff --git a/packages/cli/src/commands/metrics.ts b/packages/cli/src/commands/metrics.ts new file mode 100644 index 0000000..baf77e6 --- /dev/null +++ b/packages/cli/src/commands/metrics.ts @@ -0,0 +1,260 @@ +/** + * Metrics commands - View repository metrics and file analytics + */ + +import * as path from 'node:path'; +import { + type FileMetrics, + getConcentratedOwnership, + getLargestFiles, + getMostActive, + getStoragePath, + MetricsStore, +} from '@lytics/dev-agent-core'; +import chalk from 'chalk'; +import { Command } from 'commander'; +import { loadConfig } from '../utils/config.js'; +import { logger } from '../utils/logger.js'; + +/** + * Create progress bar for visualization + */ +function createBar(value: number, max: number, width = 10): string { + const filled = Math.round((value / max) * width); + const empty = width - filled; + return '█'.repeat(filled) + '░'.repeat(empty); +} + +/** + * Get activity level label with color + */ +function getActivityLabel(activity: FileMetrics['activity']): string { + const labels = { + 'very-high': chalk.red.bold('Very High'), + high: chalk.red('High'), + medium: chalk.yellow('Medium'), + low: chalk.blue('Low'), + minimal: chalk.gray('Minimal'), + }; + return labels[activity]; +} + +/** + * Get size label with color + */ +function getSizeLabel(size: FileMetrics['size']): string { + const labels = { + 'very-large': chalk.red.bold('Very Large'), + large: chalk.red('Large'), + medium: chalk.yellow('Medium'), + small: chalk.blue('Small'), + tiny: chalk.gray('Tiny'), + }; + return labels[size]; +} + +/** + * Get ownership label with color + */ +function getOwnershipLabel(ownership: FileMetrics['ownership']): string { + const labels = { + single: chalk.red('Single'), + pair: chalk.yellow('Pair'), + 'small-team': chalk.blue('Small Team'), + shared: chalk.green('Shared'), + }; + return labels[ownership]; +} + +/** + * Format file metrics with visualization + */ +function formatFileMetrics(file: FileMetrics, maxCommits: number, maxLOC: number): string { + const activityBar = createBar(file.commitCount, maxCommits); + const sizeBar = createBar(file.linesOfCode, maxLOC); + const ownershipBar = createBar(10 - file.authorCount, 10); // Invert: fewer authors = more concentrated + + const lastModified = file.lastModified ? `📅 ${file.lastModified.toLocaleDateString()}` : ''; + + return ` +${chalk.bold(file.filePath)} + +📊 Activity: ${activityBar} ${getActivityLabel(file.activity)} (${file.commitCount} commits) +📏 Size: ${sizeBar} ${getSizeLabel(file.size)} (${file.linesOfCode} LOC, ${file.numFunctions} functions) +👥 Ownership: ${ownershipBar} ${getOwnershipLabel(file.ownership)} (${file.authorCount} ${file.authorCount === 1 ? 'author' : 'authors'}) +${lastModified} +`; +} + +/** + * Activity command - Show most active files + */ +const activityCommand = new Command('activity') + .description('Show most active files by commit frequency') + .option('-n, --limit ', 'Number of files to show', '10') + .action(async (options) => { + try { + const config = await loadConfig(); + if (!config) { + logger.error('No config found. Run "dev init" first.'); + process.exit(1); + } + + const repositoryPath = config.repository?.path || config.repositoryPath || process.cwd(); + const storagePath = await getStoragePath(path.resolve(repositoryPath)); + const metricsDbPath = path.join(storagePath, 'metrics.db'); + + const store = new MetricsStore(metricsDbPath); + const latestSnapshot = store.getLatestSnapshot(path.resolve(repositoryPath)); + + if (!latestSnapshot) { + logger.warn('No metrics found. Index your repository first with "dev index".'); + store.close(); + process.exit(0); + } + + const files = getMostActive(store, latestSnapshot.id, Number.parseInt(options.limit, 10)); + store.close(); + + if (files.length === 0) { + logger.warn('No file metrics available.'); + process.exit(0); + } + + // Calculate max values for scaling bars + const maxCommits = Math.max(...files.map((f) => f.commitCount)); + const maxLOC = Math.max(...files.map((f) => f.linesOfCode)); + + logger.log(''); + logger.log(chalk.bold.cyan(`📊 Most Active Files (by commits)`)); + logger.log(''); + + for (const file of files) { + logger.log(formatFileMetrics(file, maxCommits, maxLOC)); + } + } catch (error) { + logger.error( + `Failed to get activity metrics: ${error instanceof Error ? error.message : String(error)}` + ); + process.exit(1); + } + }); + +/** + * Size command - Show largest files + */ +const sizeCommand = new Command('size') + .description('Show largest files by lines of code') + .option('-n, --limit ', 'Number of files to show', '10') + .action(async (options) => { + try { + const config = await loadConfig(); + if (!config) { + logger.error('No config found. Run "dev init" first.'); + process.exit(1); + } + + const repositoryPath = config.repository?.path || config.repositoryPath || process.cwd(); + const storagePath = await getStoragePath(path.resolve(repositoryPath)); + const metricsDbPath = path.join(storagePath, 'metrics.db'); + + const store = new MetricsStore(metricsDbPath); + const latestSnapshot = store.getLatestSnapshot(path.resolve(repositoryPath)); + + if (!latestSnapshot) { + logger.warn('No metrics found. Index your repository first with "dev index".'); + store.close(); + process.exit(0); + } + + const files = getLargestFiles(store, latestSnapshot.id, Number.parseInt(options.limit, 10)); + store.close(); + + if (files.length === 0) { + logger.warn('No file metrics available.'); + process.exit(0); + } + + const maxCommits = Math.max(...files.map((f) => f.commitCount)); + const maxLOC = Math.max(...files.map((f) => f.linesOfCode)); + + logger.log(''); + logger.log(chalk.bold.cyan(`📏 Largest Files (by LOC)`)); + logger.log(''); + + for (const file of files) { + logger.log(formatFileMetrics(file, maxCommits, maxLOC)); + } + } catch (error) { + logger.error( + `Failed to get size metrics: ${error instanceof Error ? error.message : String(error)}` + ); + process.exit(1); + } + }); + +/** + * Ownership command - Show files with concentrated ownership + */ +const ownershipCommand = new Command('ownership') + .description('Show files with concentrated ownership (single/pair authors)') + .option('-n, --limit ', 'Number of files to show', '10') + .action(async (options) => { + try { + const config = await loadConfig(); + if (!config) { + logger.error('No config found. Run "dev init" first.'); + process.exit(1); + } + + const repositoryPath = config.repository?.path || config.repositoryPath || process.cwd(); + const storagePath = await getStoragePath(path.resolve(repositoryPath)); + const metricsDbPath = path.join(storagePath, 'metrics.db'); + + const store = new MetricsStore(metricsDbPath); + const latestSnapshot = store.getLatestSnapshot(path.resolve(repositoryPath)); + + if (!latestSnapshot) { + logger.warn('No metrics found. Index your repository first with "dev index".'); + store.close(); + process.exit(0); + } + + const files = getConcentratedOwnership( + store, + latestSnapshot.id, + Number.parseInt(options.limit, 10) + ); + store.close(); + + if (files.length === 0) { + logger.warn('No files with concentrated ownership found.'); + process.exit(0); + } + + const maxCommits = Math.max(...files.map((f) => f.commitCount)); + const maxLOC = Math.max(...files.map((f) => f.linesOfCode)); + + logger.log(''); + logger.log(chalk.bold.cyan(`👥 Concentrated Ownership (knowledge silos)`)); + logger.log(''); + + for (const file of files) { + logger.log(formatFileMetrics(file, maxCommits, maxLOC)); + } + } catch (error) { + logger.error( + `Failed to get ownership metrics: ${error instanceof Error ? error.message : String(error)}` + ); + process.exit(1); + } + }); + +/** + * Metrics parent command + */ +export const metricsCommand = new Command('metrics') + .description('View repository metrics and file analytics') + .addCommand(activityCommand) + .addCommand(sizeCommand) + .addCommand(ownershipCommand); diff --git a/packages/cli/src/commands/update.ts b/packages/cli/src/commands/update.ts index 2c7f475..8fa8398 100644 --- a/packages/cli/src/commands/update.ts +++ b/packages/cli/src/commands/update.ts @@ -1,8 +1,11 @@ import * as path from 'node:path'; import { + AsyncEventBus, ensureStorageDirectory, getStorageFilePaths, getStoragePath, + type IndexUpdatedEvent, + MetricsStore, RepositoryIndexer, } from '@lytics/dev-agent-core'; import chalk from 'chalk'; @@ -38,14 +41,45 @@ export const updateCommand = new Command('update') const filePaths = getStorageFilePaths(storagePath); spinner.text = 'Initializing indexer...'; - const indexer = new RepositoryIndexer({ - repositoryPath: resolvedRepoPath, - vectorStorePath: filePaths.vectors, - statePath: filePaths.indexerState, - excludePatterns: config.repository?.excludePatterns || config.excludePatterns, - languages: config.repository?.languages || config.languages, + + // Create event bus for metrics (no logger in CLI to keep it simple) + const eventBus = new AsyncEventBus(); + + // Initialize metrics store (no logger in CLI to avoid noise) + const metricsDbPath = path.join(storagePath, 'metrics.db'); + const metricsStore = new MetricsStore(metricsDbPath); + + // Subscribe to index.updated events for automatic metrics persistence + eventBus.on('index.updated', async (event) => { + try { + const snapshotId = metricsStore.recordSnapshot( + event.stats, + event.isIncremental ? 'update' : 'index' + ); + + // Store code metadata if available + if (event.codeMetadata && event.codeMetadata.length > 0) { + metricsStore.appendCodeMetadata(snapshotId, event.codeMetadata); + } + } catch (error) { + // Log error but don't fail update - metrics are non-critical + logger.error( + `Failed to record metrics: ${error instanceof Error ? error.message : String(error)}` + ); + } }); + const indexer = new RepositoryIndexer( + { + repositoryPath: resolvedRepoPath, + vectorStorePath: filePaths.vectors, + statePath: filePaths.indexerState, + excludePatterns: config.repository?.excludePatterns || config.excludePatterns, + languages: config.repository?.languages || config.languages, + }, + eventBus + ); + await indexer.initialize(); spinner.text = 'Detecting changed files...'; @@ -66,6 +100,7 @@ export const updateCommand = new Command('update') }); await indexer.close(); + metricsStore.close(); const duration = (Date.now() - startTime) / 1000; diff --git a/packages/core/package.json b/packages/core/package.json index c30baa8..810d2fd 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -30,6 +30,7 @@ "test:watch": "vitest" }, "devDependencies": { + "@types/better-sqlite3": "^7.6.13", "@types/mdast": "^4.0.4", "@types/node": "^24.10.1", "tree-sitter-wasms": "^0.1.13", @@ -39,6 +40,7 @@ "@lancedb/lancedb": "^0.22.3", "@lytics/kero": "workspace:*", "@xenova/transformers": "^2.17.2", + "better-sqlite3": "^12.5.0", "globby": "^16.0.0", "remark": "^15.0.1", "remark-parse": "^11.0.0", diff --git a/packages/core/src/events/__tests__/event-bus.test.ts b/packages/core/src/events/__tests__/event-bus.test.ts index cb9d793..26e45eb 100644 --- a/packages/core/src/events/__tests__/event-bus.test.ts +++ b/packages/core/src/events/__tests__/event-bus.test.ts @@ -3,6 +3,7 @@ */ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { createDetailedIndexStats } from '../../indexer/__tests__/test-factories'; import { AsyncEventBus, createTypedEventBus } from '../event-bus'; import type { SystemEventMap } from '../types'; @@ -352,11 +353,19 @@ describe('createTypedEventBus', () => { const handler = vi.fn(); bus.on('index.updated', handler); + const stats = createDetailedIndexStats({ + filesScanned: 100, + documentsIndexed: 100, + duration: 1000, + repositoryPath: '/test', + }); + await bus.emit('index.updated', { type: 'code', documentsCount: 100, duration: 1000, path: '/test', + stats, }); await new Promise((resolve) => setTimeout(resolve, 10)); @@ -365,6 +374,7 @@ describe('createTypedEventBus', () => { documentsCount: 100, duration: 1000, path: '/test', + stats, }); }); diff --git a/packages/core/src/events/types.ts b/packages/core/src/events/types.ts index ccae00d..eebe446 100644 --- a/packages/core/src/events/types.ts +++ b/packages/core/src/events/types.ts @@ -5,6 +5,9 @@ * Designed for Node.js async patterns. */ +import type { DetailedIndexStats } from '../indexer/types.js'; +import type { CodeMetadata } from '../metrics/types.js'; + /** * Event handler function type * All handlers are async to support non-blocking operations @@ -141,6 +144,12 @@ export interface IndexUpdatedEvent { documentsCount: number; duration: number; path: string; + /** Full statistics snapshot */ + stats: DetailedIndexStats; + /** Whether this was an incremental update (vs full index) */ + isIncremental?: boolean; + /** Per-file code metadata for metrics storage */ + codeMetadata?: CodeMetadata[]; } export interface IndexErrorEvent { diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 2647a1b..bdb966f 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -7,6 +7,7 @@ export * from './git'; export * from './github'; export * from './indexer'; export * from './map'; +export * from './metrics'; export * from './observability'; export * from './scanner'; export * from './storage'; diff --git a/packages/core/src/indexer/index.ts b/packages/core/src/indexer/index.ts index 64e9eb2..81bb6b3 100644 --- a/packages/core/src/indexer/index.ts +++ b/packages/core/src/indexer/index.ts @@ -5,6 +5,10 @@ import * as crypto from 'node:crypto'; import * as fs from 'node:fs/promises'; import * as path from 'node:path'; +import type { Logger } from '@lytics/kero'; +import type { EventBus } from '../events/types.js'; +import { buildCodeMetadata } from '../metrics/collector.js'; +import type { CodeMetadata } from '../metrics/types.js'; import { scanRepository } from '../scanner'; import type { Document } from '../scanner/types'; import { getCurrentSystemResources, getOptimalConcurrency } from '../utils/concurrency'; @@ -37,11 +41,13 @@ const DEFAULT_STATE_PATH = '.dev-agent/indexer-state.json'; * Orchestrates repository scanning, embedding generation, and vector storage */ export class RepositoryIndexer { - private readonly config: Required; + private readonly config: Required> & Pick; private vectorStorage: VectorStorage; private state: IndexerState | null = null; + private eventBus?: EventBus; + private logger?: Logger; - constructor(config: IndexerConfig) { + constructor(config: IndexerConfig, eventBus?: EventBus) { this.config = { statePath: path.join(config.repositoryPath, DEFAULT_STATE_PATH), embeddingModel: 'Xenova/all-MiniLM-L6-v2', @@ -57,6 +63,9 @@ export class RepositoryIndexer { embeddingModel: this.config.embeddingModel, dimension: this.config.embeddingDimension, }); + + this.eventBus = eventBus; + this.logger = config.logger; } /** @@ -271,6 +280,34 @@ export class RepositoryIndexer { this.state.lastUpdate = endTime; } + // Build code metadata for metrics storage + let codeMetadata: CodeMetadata[] | undefined; + if (this.eventBus) { + try { + codeMetadata = await buildCodeMetadata(this.config.repositoryPath, scanResult.documents); + } catch (error) { + // Not critical if metadata collection fails + this.logger?.warn({ error }, 'Failed to collect code metadata for metrics'); + } + } + + // Emit index.updated event (fire-and-forget) + if (this.eventBus) { + void this.eventBus.emit( + 'index.updated', + { + type: 'code', + documentsCount: documentsIndexed, + duration: stats.duration, + path: this.config.repositoryPath, + stats, + isIncremental: false, + codeMetadata, + }, + { waitForHandlers: false } + ); + } + return stats; } catch (error) { errors.push({ @@ -358,6 +395,7 @@ export class RepositoryIndexer { let documentsIndexed = 0; let incrementalStats: ReturnType | null = null; const affectedLanguages = new Set(); + let scannedDocuments: Document[] = []; if (filesToReindex.length > 0) { const scanResult = await scanRepository({ @@ -367,6 +405,7 @@ export class RepositoryIndexer { logger: options.logger, }); + scannedDocuments = scanResult.documents; documentsExtracted = scanResult.documents.length; // Calculate stats for incremental changes @@ -410,7 +449,7 @@ export class RepositoryIndexer { const warning = this.getStatsWarning(incrementalUpdatesSince); // Return incremental stats (what changed) with metadata - return { + const stats: DetailedIndexStats = { filesScanned: filesToReindex.length, documentsExtracted, documentsIndexed, @@ -431,6 +470,36 @@ export class RepositoryIndexer { warning, }, }; + + // Build code metadata for metrics storage (only for updated files) + let codeMetadata: CodeMetadata[] | undefined; + if (this.eventBus && scannedDocuments.length > 0) { + try { + codeMetadata = await buildCodeMetadata(this.config.repositoryPath, scannedDocuments); + } catch (error) { + // Not critical if metadata collection fails + this.logger?.warn({ error }, 'Failed to collect code metadata for metrics during update'); + } + } + + // Emit index.updated event (fire-and-forget) + if (this.eventBus) { + void this.eventBus.emit( + 'index.updated', + { + type: 'code', + documentsCount: documentsIndexed, + duration: stats.duration, + path: this.config.repositoryPath, + stats, + isIncremental: true, + codeMetadata, + }, + { waitForHandlers: false } + ); + } + + return stats; } /** diff --git a/packages/core/src/indexer/types.ts b/packages/core/src/indexer/types.ts index 6b049de..3040567 100644 --- a/packages/core/src/indexer/types.ts +++ b/packages/core/src/indexer/types.ts @@ -295,6 +295,9 @@ export interface IndexerConfig { /** Glob patterns to exclude */ excludePatterns?: string[]; + /** Logger for warnings and errors */ + logger?: Logger; + /** Languages to index (default: all supported) */ languages?: string[]; } diff --git a/packages/core/src/metrics/__tests__/analytics.test.ts b/packages/core/src/metrics/__tests__/analytics.test.ts new file mode 100644 index 0000000..c0c7f18 --- /dev/null +++ b/packages/core/src/metrics/__tests__/analytics.test.ts @@ -0,0 +1,287 @@ +/** + * Tests for Metrics Analytics + */ + +import * as fs from 'node:fs'; +import * as os from 'node:os'; +import * as path from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { createDetailedIndexStats } from '../../indexer/__tests__/test-factories.js'; +import { + getConcentratedOwnership, + getFileMetrics, + getFileTrend, + getLargestFiles, + getMostActive, + getSnapshotSummary, +} from '../analytics.js'; +import { MetricsStore } from '../store.js'; +import type { CodeMetadata } from '../types.js'; + +describe('Metrics Analytics', () => { + let tempDbPath: string; + let store: MetricsStore; + let snapshotId: string; + + beforeEach(() => { + // Create temp database + const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'analytics-test-')); + tempDbPath = path.join(tempDir, 'test-metrics.db'); + store = new MetricsStore(tempDbPath); + + // Create a snapshot + const stats = createDetailedIndexStats({ + repositoryPath: '/test/repo', + filesScanned: 5, + documentsIndexed: 10, + }); + snapshotId = store.recordSnapshot(stats, 'index'); + + // Add code metadata for testing + const metadata: CodeMetadata[] = [ + { + filePath: 'src/very-active.ts', + commitCount: 100, + authorCount: 1, + linesOfCode: 2000, + numFunctions: 50, + numImports: 20, + }, + { + filePath: 'src/medium-active.ts', + commitCount: 30, + authorCount: 3, + linesOfCode: 500, + numFunctions: 15, + numImports: 10, + }, + { + filePath: 'src/low-active.ts', + commitCount: 5, + authorCount: 5, + linesOfCode: 100, + numFunctions: 5, + numImports: 3, + }, + ]; + + store.appendCodeMetadata(snapshotId, metadata); + }); + + afterEach(() => { + store.close(); + if (fs.existsSync(tempDbPath)) { + fs.unlinkSync(tempDbPath); + const tempDir = path.dirname(tempDbPath); + fs.rmSync(tempDir, { recursive: true }); + } + }); + + describe('getFileMetrics', () => { + it('should return files with classified metrics', () => { + const metrics = getFileMetrics(store, snapshotId); + + expect(metrics.length).toBe(3); + expect(metrics[0].filePath).toBeDefined(); + expect(metrics[0].activity).toBeDefined(); + expect(metrics[0].size).toBeDefined(); + expect(metrics[0].ownership).toBeDefined(); + }); + + it('should classify activity levels correctly', () => { + const metrics = getFileMetrics(store, snapshotId); + + // Find by file path + const veryActive = metrics.find((m) => m.filePath === 'src/very-active.ts'); + const mediumActive = metrics.find((m) => m.filePath === 'src/medium-active.ts'); + const lowActive = metrics.find((m) => m.filePath === 'src/low-active.ts'); + + // 100 commits = very-high + expect(veryActive?.activity).toBe('very-high'); + expect(veryActive?.commitCount).toBe(100); + + // 30 commits = medium + expect(mediumActive?.activity).toBe('medium'); + expect(mediumActive?.commitCount).toBe(30); + + // 5 commits = low + expect(lowActive?.activity).toBe('low'); + expect(lowActive?.commitCount).toBe(5); + }); + + it('should classify size correctly', () => { + const metrics = getFileMetrics(store, snapshotId); + + const veryActive = metrics.find((m) => m.filePath === 'src/very-active.ts'); + const mediumActive = metrics.find((m) => m.filePath === 'src/medium-active.ts'); + const lowActive = metrics.find((m) => m.filePath === 'src/low-active.ts'); + + // 2000 LOC = very-large + expect(veryActive?.size).toBe('very-large'); + + // 500 LOC = medium + expect(mediumActive?.size).toBe('medium'); + + // 100 LOC = small + expect(lowActive?.size).toBe('small'); + }); + + it('should classify ownership correctly', () => { + const metrics = getFileMetrics(store, snapshotId); + + const veryActive = metrics.find((m) => m.filePath === 'src/very-active.ts'); + const mediumActive = metrics.find((m) => m.filePath === 'src/medium-active.ts'); + const lowActive = metrics.find((m) => m.filePath === 'src/low-active.ts'); + + // 1 author = single + expect(veryActive?.ownership).toBe('single'); + expect(veryActive?.authorCount).toBe(1); + + // 3 authors = small-team + expect(mediumActive?.ownership).toBe('small-team'); + + // 5 authors = small-team + expect(lowActive?.ownership).toBe('small-team'); + }); + + it('should respect limit parameter', () => { + const metrics = getFileMetrics(store, snapshotId, { limit: 2 }); + expect(metrics.length).toBe(2); + }); + + it('should return empty array for non-existent snapshot', () => { + const metrics = getFileMetrics(store, 'non-existent-id'); + expect(metrics.length).toBe(0); + }); + }); + + describe('getMostActive', () => { + it('should return files sorted by activity', () => { + const active = getMostActive(store, snapshotId, 10); + + expect(active.length).toBe(3); + // Should be sorted by commit count descending + expect(active[0].commitCount).toBeGreaterThanOrEqual(active[1].commitCount); + expect(active[1].commitCount).toBeGreaterThanOrEqual(active[2].commitCount); + }); + }); + + describe('getLargestFiles', () => { + it('should return files sorted by size', () => { + const largest = getLargestFiles(store, snapshotId, 10); + + expect(largest.length).toBe(3); + // Should be sorted by LOC descending + expect(largest[0].linesOfCode).toBeGreaterThanOrEqual(largest[1].linesOfCode); + expect(largest[1].linesOfCode).toBeGreaterThanOrEqual(largest[2].linesOfCode); + }); + }); + + describe('getConcentratedOwnership', () => { + it('should return files with single or pair ownership', () => { + const concentrated = getConcentratedOwnership(store, snapshotId, 10); + + expect(concentrated.length).toBeGreaterThan(0); + // All should have single or pair ownership + for (const file of concentrated) { + expect(['single', 'pair']).toContain(file.ownership); + } + }); + }); + + describe('getFileTrend', () => { + it('should return file metadata across snapshots', async () => { + // Wait to ensure different timestamp + await new Promise((resolve) => setTimeout(resolve, 10)); + + // Create a second snapshot + const stats2 = createDetailedIndexStats({ + repositoryPath: '/test/repo', + filesScanned: 5, + }); + const snapshotId2 = store.recordSnapshot(stats2, 'update'); + + // Add updated metadata + const updatedMetadata: CodeMetadata[] = [ + { + filePath: 'src/very-active.ts', + commitCount: 110, // Increased + authorCount: 2, // More authors + linesOfCode: 2100, // More LOC + numFunctions: 52, + numImports: 22, + }, + ]; + store.appendCodeMetadata(snapshotId2, updatedMetadata); + + const trend = getFileTrend(store, 'src/very-active.ts', 10); + + expect(trend.length).toBe(2); + // Most recent first + expect(trend[0].commitCount).toBe(110); + expect(trend[1].commitCount).toBe(100); + }); + + it('should return empty array for non-existent file', () => { + const trend = getFileTrend(store, 'src/non-existent.ts', 10); + expect(trend.length).toBe(0); + }); + }); + + describe('getSnapshotSummary', () => { + it('should calculate summary statistics', () => { + const summary = getSnapshotSummary(store, snapshotId); + + expect(summary).toBeDefined(); + expect(summary?.totalFiles).toBe(3); + expect(summary?.totalLOC).toBe(2600); // 2000 + 500 + 100 + expect(summary?.totalFunctions).toBe(70); // 50 + 15 + 5 + expect(summary?.avgLOC).toBe(867); // 2600 / 3, rounded + }); + + it('should categorize files by activity', () => { + const summary = getSnapshotSummary(store, snapshotId); + + expect(summary).toBeDefined(); + // Should have activity metrics + expect(summary?.veryActiveFiles).toBe(1); // very-active.ts has 100 commits + expect(summary?.highActivityFiles).toBe(1); // Only 1 file >= 50 commits + expect(summary?.veryActivePercent).toBeGreaterThan(0); + }); + + it('should categorize files by size', () => { + const summary = getSnapshotSummary(store, snapshotId); + + expect(summary).toBeDefined(); + // Should have size metrics + expect(summary?.veryLargeFiles).toBe(1); // very-active.ts has 2000 LOC + expect(summary?.largeFiles).toBe(1); // Only 1 file >= 1000 LOC + expect(summary?.veryLargePercent).toBeGreaterThan(0); + }); + + it('should categorize files by ownership', () => { + const summary = getSnapshotSummary(store, snapshotId); + + expect(summary).toBeDefined(); + // Should have ownership metrics + expect(summary?.singleAuthorFiles).toBe(1); // very-active.ts has 1 author + expect(summary?.pairAuthorFiles).toBe(0); // No files with exactly 2 authors + expect(summary?.singleAuthorPercent).toBeGreaterThan(0); + }); + + it('should return null for non-existent snapshot', () => { + const summary = getSnapshotSummary(store, 'non-existent-id'); + expect(summary).toBeNull(); + }); + + it('should return null for snapshot with no metadata', () => { + const stats = createDetailedIndexStats({ + repositoryPath: '/test/repo2', + }); + const emptySnapshotId = store.recordSnapshot(stats, 'index'); + + const summary = getSnapshotSummary(store, emptySnapshotId); + expect(summary).toBeNull(); + }); + }); +}); diff --git a/packages/core/src/metrics/__tests__/store.test.ts b/packages/core/src/metrics/__tests__/store.test.ts new file mode 100644 index 0000000..abc39e8 --- /dev/null +++ b/packages/core/src/metrics/__tests__/store.test.ts @@ -0,0 +1,296 @@ +/** + * Tests for MetricsStore + */ + +import * as fs from 'node:fs'; +import * as os from 'node:os'; +import * as path from 'node:path'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { createDetailedIndexStats } from '../../indexer/__tests__/test-factories.js'; +import { MetricsStore } from '../store.js'; + +describe('MetricsStore', () => { + let tempDbPath: string; + let store: MetricsStore; + + beforeEach(() => { + // Create temp database path + const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'metrics-test-')); + tempDbPath = path.join(tempDir, 'test-metrics.db'); + store = new MetricsStore(tempDbPath); + }); + + afterEach(() => { + // Clean up + store.close(); + if (fs.existsSync(tempDbPath)) { + fs.unlinkSync(tempDbPath); + const tempDir = path.dirname(tempDbPath); + fs.rmSync(tempDir, { recursive: true }); + } + }); + + describe('recordSnapshot', () => { + it('should record a snapshot successfully', () => { + const stats = createDetailedIndexStats({ + repositoryPath: '/test/repo', + filesScanned: 10, + documentsIndexed: 20, + vectorsStored: 20, + duration: 1000, + }); + + const id = store.recordSnapshot(stats, 'index'); + + expect(id).toBeTruthy(); + expect(typeof id).toBe('string'); + }); + + it('should generate unique IDs for each snapshot', () => { + const stats = createDetailedIndexStats(); + + const id1 = store.recordSnapshot(stats, 'index'); + const id2 = store.recordSnapshot(stats, 'update'); + + expect(id1).not.toBe(id2); + }); + + it('should store both index and update triggers', () => { + const stats = createDetailedIndexStats(); + + const indexId = store.recordSnapshot(stats, 'index'); + const updateId = store.recordSnapshot(stats, 'update'); + + const indexSnapshot = store.getSnapshot(indexId); + const updateSnapshot = store.getSnapshot(updateId); + + expect(indexSnapshot?.trigger).toBe('index'); + expect(updateSnapshot?.trigger).toBe('update'); + }); + }); + + describe('getSnapshot', () => { + it('should retrieve a snapshot by ID', () => { + const stats = createDetailedIndexStats({ + repositoryPath: '/test/repo', + filesScanned: 10, + documentsIndexed: 20, + }); + + const id = store.recordSnapshot(stats, 'index'); + const snapshot = store.getSnapshot(id); + + expect(snapshot).toBeTruthy(); + expect(snapshot?.id).toBe(id); + expect(snapshot?.repositoryPath).toBe('/test/repo'); + expect(snapshot?.stats.filesScanned).toBe(10); + expect(snapshot?.stats.documentsIndexed).toBe(20); + expect(snapshot?.trigger).toBe('index'); + }); + + it('should return null for non-existent ID', () => { + const snapshot = store.getSnapshot('non-existent-id'); + expect(snapshot).toBeNull(); + }); + }); + + describe('getSnapshots', () => { + beforeEach(() => { + // Seed with multiple snapshots + const repo1 = '/test/repo1'; + const repo2 = '/test/repo2'; + + store.recordSnapshot(createDetailedIndexStats({ repositoryPath: repo1 }), 'index'); + store.recordSnapshot(createDetailedIndexStats({ repositoryPath: repo1 }), 'update'); + store.recordSnapshot(createDetailedIndexStats({ repositoryPath: repo2 }), 'index'); + }); + + it('should retrieve all snapshots with default limit', () => { + const snapshots = store.getSnapshots({}); + expect(snapshots.length).toBe(3); + }); + + it('should filter by repository path', () => { + const snapshots = store.getSnapshots({ repositoryPath: '/test/repo1' }); + expect(snapshots.length).toBe(2); + expect(snapshots.every((s) => s.repositoryPath === '/test/repo1')).toBe(true); + }); + + it('should filter by trigger type', () => { + const snapshots = store.getSnapshots({ trigger: 'index' }); + expect(snapshots.length).toBe(2); + expect(snapshots.every((s) => s.trigger === 'index')).toBe(true); + }); + + it('should respect limit parameter', () => { + const snapshots = store.getSnapshots({ limit: 2 }); + expect(snapshots.length).toBe(2); + }); + + it('should return snapshots in descending timestamp order', () => { + const snapshots = store.getSnapshots({}); + expect(snapshots.length).toBeGreaterThan(1); + + for (let i = 1; i < snapshots.length; i++) { + expect(snapshots[i - 1].timestamp.getTime()).toBeGreaterThanOrEqual( + snapshots[i].timestamp.getTime() + ); + } + }); + + it('should filter by since date', () => { + const now = new Date(); + const oneHourAgo = new Date(now.getTime() - 3600000); + + const snapshots = store.getSnapshots({ since: oneHourAgo }); + expect(snapshots.length).toBeGreaterThan(0); + }); + + it('should filter by until date', () => { + const futureDate = new Date(Date.now() + 3600000); + const snapshots = store.getSnapshots({ until: futureDate }); + expect(snapshots.length).toBe(3); + }); + }); + + describe('getLatestSnapshot', () => { + it('should return the most recent snapshot', () => { + const stats1 = createDetailedIndexStats({ filesScanned: 10 }); + const stats2 = createDetailedIndexStats({ filesScanned: 20 }); + + // Use explicit timestamps to ensure deterministic ordering + store.recordSnapshot(stats1, 'index', new Date('2024-01-01T10:00:00Z')); + const latestId = store.recordSnapshot(stats2, 'update', new Date('2024-01-01T11:00:00Z')); + + const latest = store.getLatestSnapshot(); + expect(latest?.id).toBe(latestId); + expect(latest?.stats.filesScanned).toBe(20); + }); + + it('should filter by repository path', () => { + store.recordSnapshot( + createDetailedIndexStats({ repositoryPath: '/repo1' }), + 'index', + new Date('2024-01-01T10:00:00Z') + ); + const repo2Id = store.recordSnapshot( + createDetailedIndexStats({ repositoryPath: '/repo2' }), + 'index', + new Date('2024-01-01T11:00:00Z') + ); + + const latest = store.getLatestSnapshot('/repo2'); + expect(latest?.id).toBe(repo2Id); + }); + + it('should return null when no snapshots exist', () => { + const latest = store.getLatestSnapshot(); + expect(latest).toBeNull(); + }); + }); + + describe('getCount', () => { + it('should return correct count of all snapshots', () => { + store.recordSnapshot(createDetailedIndexStats(), 'index'); + store.recordSnapshot(createDetailedIndexStats(), 'update'); + store.recordSnapshot(createDetailedIndexStats(), 'index'); + + expect(store.getCount()).toBe(3); + }); + + it('should return correct count filtered by repository path', () => { + store.recordSnapshot(createDetailedIndexStats({ repositoryPath: '/repo1' }), 'index'); + store.recordSnapshot(createDetailedIndexStats({ repositoryPath: '/repo1' }), 'update'); + store.recordSnapshot(createDetailedIndexStats({ repositoryPath: '/repo2' }), 'index'); + + expect(store.getCount('/repo1')).toBe(2); + expect(store.getCount('/repo2')).toBe(1); + }); + + it('should return 0 for empty database', () => { + expect(store.getCount()).toBe(0); + }); + }); + + describe('pruneOldSnapshots', () => { + it('should delete snapshots older than retention period', async () => { + // Record a snapshot + store.recordSnapshot(createDetailedIndexStats(), 'index'); + + // Wait 2ms to ensure the snapshot is in the past + await new Promise((resolve) => setTimeout(resolve, 2)); + + // Prune snapshots older than 0 days (should delete all) + const deleted = store.pruneOldSnapshots(0); + expect(deleted).toBeGreaterThan(0); + expect(store.getCount()).toBe(0); + }); + + it('should not delete recent snapshots', () => { + store.recordSnapshot(createDetailedIndexStats(), 'index'); + store.recordSnapshot(createDetailedIndexStats(), 'update'); + + // Prune snapshots older than 90 days (should delete none) + const deleted = store.pruneOldSnapshots(90); + expect(deleted).toBe(0); + expect(store.getCount()).toBe(2); + }); + + it('should return 0 when no snapshots to prune', () => { + const deleted = store.pruneOldSnapshots(30); + expect(deleted).toBe(0); + }); + }); + + describe('close', () => { + it('should close database without error', () => { + expect(() => store.close()).not.toThrow(); + }); + + it('should not throw when closed multiple times', () => { + store.close(); + expect(() => store.close()).not.toThrow(); + }); + }); + + describe('logger integration', () => { + it('should work without a logger', () => { + const storeWithoutLogger = new MetricsStore(tempDbPath); + const stats = createDetailedIndexStats(); + + expect(() => { + storeWithoutLogger.recordSnapshot(stats, 'index'); + }).not.toThrow(); + + storeWithoutLogger.close(); + }); + + it('should call logger methods when provided', () => { + const mockLogger = { + trace: vi.fn(), + debug: vi.fn(), + info: vi.fn(), + warn: vi.fn(), + error: vi.fn(), + success: vi.fn(), + fatal: vi.fn(), + child: vi.fn(), + startTimer: vi.fn(), + isLevelEnabled: vi.fn(), + level: 'info' as const, + }; + + const tempDbPath2 = path.join(path.dirname(tempDbPath), 'test-metrics-2.db'); + const storeWithLogger = new MetricsStore(tempDbPath2, mockLogger); + const stats = createDetailedIndexStats(); + + storeWithLogger.recordSnapshot(stats, 'index'); + + expect(mockLogger.info).toHaveBeenCalled(); + expect(mockLogger.debug).toHaveBeenCalled(); + + storeWithLogger.close(); + fs.unlinkSync(tempDbPath2); + }); + }); +}); diff --git a/packages/core/src/metrics/analytics.ts b/packages/core/src/metrics/analytics.ts new file mode 100644 index 0000000..9f91639 --- /dev/null +++ b/packages/core/src/metrics/analytics.ts @@ -0,0 +1,208 @@ +/** + * Metrics Analytics + * + * Factual metrics about repository files. + * No "risk scores" - just observable data for developers to interpret. + */ + +import type { MetricsStore } from './store.js'; +import type { CodeMetadata } from './types.js'; + +/** + * File metrics with activity classification + */ +export interface FileMetrics { + filePath: string; + activity: 'very-high' | 'high' | 'medium' | 'low' | 'minimal'; + commitCount: number; + size: 'very-large' | 'large' | 'medium' | 'small' | 'tiny'; + linesOfCode: number; + ownership: 'single' | 'pair' | 'small-team' | 'shared'; + authorCount: number; + lastModified?: Date; + numFunctions: number; + numImports: number; +} + +/** + * Classify activity level based on commit count + */ +function classifyActivity(commits: number): FileMetrics['activity'] { + if (commits >= 100) return 'very-high'; + if (commits >= 50) return 'high'; + if (commits >= 20) return 'medium'; + if (commits >= 5) return 'low'; + return 'minimal'; +} + +/** + * Classify size based on lines of code + */ +function classifySize(loc: number): FileMetrics['size'] { + if (loc >= 2000) return 'very-large'; + if (loc >= 1000) return 'large'; + if (loc >= 500) return 'medium'; + if (loc >= 100) return 'small'; + return 'tiny'; +} + +/** + * Classify ownership based on author count + */ +function classifyOwnership(authors: number): FileMetrics['ownership'] { + if (authors === 1) return 'single'; + if (authors === 2) return 'pair'; + if (authors <= 5) return 'small-team'; + return 'shared'; +} + +/** + * Get file metrics from a snapshot + * + * Returns factual metrics about files without judgment. + * Developers can filter/sort based on what matters to them. + * + * @param store - MetricsStore instance + * @param snapshotId - Snapshot ID to analyze + * @param options - Query options + * @returns Array of file metrics + */ +export function getFileMetrics( + store: MetricsStore, + snapshotId: string, + options?: { + sortBy?: 'activity' | 'size' | 'ownership'; + limit?: number; + } +): FileMetrics[] { + const sortBy = options?.sortBy || 'activity'; + const limit = options?.limit || 100; + + // Map sortBy to MetricsStore query format + const sortMapping = { + activity: 'commits_desc' as const, + size: 'lines_desc' as const, + ownership: 'risk_desc' as const, // Risk formula weights single authors + }; + + const metadata = store.getCodeMetadata({ + snapshotId, + sortBy: sortMapping[sortBy], + limit, + }); + + return metadata.map((m) => ({ + filePath: m.filePath, + activity: classifyActivity(m.commitCount || 0), + commitCount: m.commitCount || 0, + size: classifySize(m.linesOfCode), + linesOfCode: m.linesOfCode, + ownership: classifyOwnership(m.authorCount || 1), + authorCount: m.authorCount || 1, + lastModified: m.lastModified, + numFunctions: m.numFunctions, + numImports: m.numImports, + })); +} + +/** + * Get most active files (by commit count) + */ +export function getMostActive(store: MetricsStore, snapshotId: string, limit = 10): FileMetrics[] { + return getFileMetrics(store, snapshotId, { sortBy: 'activity', limit }); +} + +/** + * Get largest files (by LOC) + */ +export function getLargestFiles( + store: MetricsStore, + snapshotId: string, + limit = 10 +): FileMetrics[] { + return getFileMetrics(store, snapshotId, { sortBy: 'size', limit }); +} + +/** + * Get files with concentrated ownership (single/pair authors) + */ +export function getConcentratedOwnership( + store: MetricsStore, + snapshotId: string, + limit = 10 +): FileMetrics[] { + const all = getFileMetrics(store, snapshotId, { sortBy: 'ownership', limit: 1000 }); + return all.filter((m) => m.ownership === 'single' || m.ownership === 'pair').slice(0, limit); +} + +/** + * Get trend for a specific file across snapshots + * + * Shows how a file's metrics have changed over time. + * + * @param store - MetricsStore instance + * @param filePath - File path to analyze + * @param limit - Number of snapshots to analyze (default: 10) + * @returns Array of metadata ordered by time (newest first) + */ +export function getFileTrend(store: MetricsStore, filePath: string, limit = 10): CodeMetadata[] { + return store.getCodeMetadataForFile(filePath, limit); +} + +/** + * Get summary statistics for a snapshot + * + * Provides aggregate metrics for all files in a snapshot. + * + * @param store - MetricsStore instance + * @param snapshotId - Snapshot ID to analyze + * @returns Summary statistics + */ +export function getSnapshotSummary(store: MetricsStore, snapshotId: string) { + const metadata = store.getCodeMetadata({ + snapshotId, + limit: 10000, // Get all files + }); + + if (metadata.length === 0) { + return null; + } + + const totalLOC = metadata.reduce((sum, m) => sum + m.linesOfCode, 0); + const totalFunctions = metadata.reduce((sum, m) => sum + m.numFunctions, 0); + const avgLOC = Math.round(totalLOC / metadata.length); + + // Activity distribution + const veryActiveFiles = metadata.filter((m) => (m.commitCount || 0) >= 100).length; + const highActivityFiles = metadata.filter((m) => (m.commitCount || 0) >= 50).length; + + // Size distribution + const veryLargeFiles = metadata.filter((m) => m.linesOfCode >= 2000).length; + const largeFiles = metadata.filter((m) => m.linesOfCode >= 1000).length; + + // Ownership distribution + const singleAuthorFiles = metadata.filter((m) => (m.authorCount || 1) === 1).length; + const pairAuthorFiles = metadata.filter((m) => (m.authorCount || 1) === 2).length; + + return { + totalFiles: metadata.length, + totalLOC, + totalFunctions, + avgLOC, + + // Activity metrics + veryActiveFiles, + highActivityFiles, + veryActivePercent: Math.round((veryActiveFiles / metadata.length) * 100), + + // Size metrics + veryLargeFiles, + largeFiles, + veryLargePercent: Math.round((veryLargeFiles / metadata.length) * 100), + + // Ownership metrics + singleAuthorFiles, + pairAuthorFiles, + singleAuthorPercent: Math.round((singleAuthorFiles / metadata.length) * 100), + }; +} diff --git a/packages/core/src/metrics/collector.ts b/packages/core/src/metrics/collector.ts new file mode 100644 index 0000000..3d106a6 --- /dev/null +++ b/packages/core/src/metrics/collector.ts @@ -0,0 +1,80 @@ +/** + * Metrics Collector + * + * Builds CodeMetadata from scanner results and change frequency data. + */ + +import { calculateChangeFrequency } from '../indexer/utils/change-frequency.js'; +import type { Document } from '../scanner/types.js'; +import type { CodeMetadata } from './types.js'; + +/** + * Count lines of code in a snippet + */ +function countLines(content: string): number { + return content.split('\n').length; +} + +/** + * Build code metadata from indexer state + * + * Combines data from: + * - Scanner results (documents, imports) + * - Git history (change frequency) + * + * @param repositoryPath - Repository path + * @param documents - Scanned documents + * @returns Array of code metadata + */ +export async function buildCodeMetadata( + repositoryPath: string, + documents: Document[] +): Promise { + // Calculate change frequency for all files + const changeFreq = await calculateChangeFrequency({ repositoryPath }).catch(() => new Map()); + + // Group documents by file + const fileToDocuments = new Map(); + for (const doc of documents) { + const filePath = doc.metadata.file; + const existing = fileToDocuments.get(filePath) || []; + existing.push(doc); + fileToDocuments.set(filePath, existing); + } + + // Build metadata for each file + const metadata: CodeMetadata[] = []; + + for (const [filePath, docs] of fileToDocuments) { + const freq = changeFreq.get(filePath); + + // Estimate LOC from first document's snippet (approximate) + // In practice, this is an underestimate since snippet is truncated + // But it's good enough for relative comparisons + const linesOfCode = docs[0]?.metadata.snippet + ? countLines(docs[0].metadata.snippet) + : docs[0]?.metadata.endLine - docs[0]?.metadata.startLine || 0; + + // Count unique imports across all documents in this file + const allImports = new Set(); + for (const doc of docs) { + if (doc.metadata.imports) { + for (const imp of doc.metadata.imports) { + allImports.add(imp); + } + } + } + + metadata.push({ + filePath, + commitCount: freq?.commitCount, + lastModified: freq?.lastModified, + authorCount: freq?.authorCount, + linesOfCode, + numFunctions: docs.length, // Each document is a function/component + numImports: allImports.size, + }); + } + + return metadata; +} diff --git a/packages/core/src/metrics/index.ts b/packages/core/src/metrics/index.ts new file mode 100644 index 0000000..a6e191d --- /dev/null +++ b/packages/core/src/metrics/index.ts @@ -0,0 +1,32 @@ +/** + * Metrics Module + * + * Provides persistent storage for repository metrics and snapshots. + */ + +export { + type FileMetrics, + getConcentratedOwnership, + getFileMetrics, + getFileTrend, + getLargestFiles, + getMostActive, + getSnapshotSummary, +} from './analytics.js'; +export { buildCodeMetadata } from './collector.js'; +export { initializeDatabase, METRICS_SCHEMA_V1 } from './schema.js'; +export { MetricsStore } from './store.js'; +export type { + CodeMetadata, + CodeMetadataQuery, + Hotspot, + MetricsConfig, + Snapshot, + SnapshotQuery, +} from './types.js'; +export { + CodeMetadataSchema, + DEFAULT_METRICS_CONFIG, + HotspotSchema, + SnapshotQuerySchema, +} from './types.js'; diff --git a/packages/core/src/metrics/schema.ts b/packages/core/src/metrics/schema.ts new file mode 100644 index 0000000..82fcc6a --- /dev/null +++ b/packages/core/src/metrics/schema.ts @@ -0,0 +1,99 @@ +/** + * Metrics Database Schema + * + * SQLite schema definitions for metrics storage. + */ + +import type Database from 'better-sqlite3'; + +/** + * Schema version 1: Core snapshots table + * + * Design philosophy: + * - Single table for MVP (snapshots) + * - JSON storage for flexibility (no schema migrations needed) + * - Denormalized fields for fast queries + * - Future tables can be added without breaking this + */ +export const METRICS_SCHEMA_V1 = ` + -- Core snapshots table + CREATE TABLE IF NOT EXISTS snapshots ( + id TEXT PRIMARY KEY, + timestamp INTEGER NOT NULL, + repository_path TEXT NOT NULL, + stats TEXT NOT NULL, -- JSON serialized DetailedIndexStats + + -- Denormalized for fast queries (avoid parsing JSON) + trigger TEXT CHECK(trigger IN ('index', 'update')), + total_files INTEGER, + total_documents INTEGER, + total_vectors INTEGER, + duration_ms INTEGER, + + created_at INTEGER NOT NULL + ); + + -- Index for time-based queries (most common) + CREATE INDEX IF NOT EXISTS idx_snapshots_timestamp + ON snapshots(timestamp DESC); + + -- Index for repository-specific queries + CREATE INDEX IF NOT EXISTS idx_snapshots_repo + ON snapshots(repository_path, timestamp DESC); + + -- Index for filtering by trigger type + CREATE INDEX IF NOT EXISTS idx_snapshots_trigger + ON snapshots(trigger, timestamp DESC); + + -- Code metadata table (per-file metrics for hotspot detection) + CREATE TABLE IF NOT EXISTS code_metadata ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + snapshot_id TEXT NOT NULL, + file_path TEXT NOT NULL, + + -- Data we have or can easily get: + commit_count INTEGER, -- From change frequency + last_modified INTEGER, -- From change frequency (timestamp) + author_count INTEGER, -- From change frequency + lines_of_code INTEGER, -- Count lines during scan + num_functions INTEGER, -- From document count + num_imports INTEGER, -- From DocumentMetadata.imports + + -- Calculated risk score + risk_score REAL, -- (commit_count * lines_of_code) / max(author_count, 1) + + FOREIGN KEY (snapshot_id) REFERENCES snapshots(id) ON DELETE CASCADE, + UNIQUE (snapshot_id, file_path) + ); + + -- Index for querying by snapshot + CREATE INDEX IF NOT EXISTS idx_code_metadata_snapshot + ON code_metadata(snapshot_id); + + -- Index for finding hotspots (highest risk files) + CREATE INDEX IF NOT EXISTS idx_code_metadata_risk + ON code_metadata(risk_score DESC); + + -- Index for file-specific queries + CREATE INDEX IF NOT EXISTS idx_code_metadata_file + ON code_metadata(file_path); +`; + +/** + * Initialize database with schema and optimizations + */ +export function initializeDatabase(db: Database.Database): void { + // Enable WAL (Write-Ahead Logging) mode for better concurrency + // This allows readers and writers to operate concurrently + db.pragma('journal_mode = WAL'); + + // Use NORMAL synchronous mode for better performance + // Still safe with WAL mode enabled + db.pragma('synchronous = NORMAL'); + + // Enable foreign keys + db.pragma('foreign_keys = ON'); + + // Create schema + db.exec(METRICS_SCHEMA_V1); +} diff --git a/packages/core/src/metrics/store.ts b/packages/core/src/metrics/store.ts new file mode 100644 index 0000000..8840a4f --- /dev/null +++ b/packages/core/src/metrics/store.ts @@ -0,0 +1,420 @@ +/** + * Metrics Store + * + * SQLite-based storage for repository metrics and snapshots. + * Provides automatic persistence via event bus integration. + */ + +import * as crypto from 'node:crypto'; +import type { Logger } from '@lytics/kero'; +import Database from 'better-sqlite3'; +import type { DetailedIndexStats } from '../indexer/types.js'; +import { initializeDatabase } from './schema.js'; +import { + type CodeMetadata, + type CodeMetadataQuery, + type Snapshot, + type SnapshotQuery, + SnapshotQuerySchema, +} from './types.js'; + +/** + * Metrics Store Class + * + * Stores snapshots of repository statistics over time. + * Designed to work with event bus for automatic persistence. + */ +export class MetricsStore { + private db: Database.Database; + + constructor( + dbPath: string, + private logger?: Logger + ) { + try { + this.db = new Database(dbPath); + initializeDatabase(this.db); + this.logger?.info({ dbPath }, 'Metrics store initialized'); + } catch (error) { + this.logger?.error({ error }, 'Failed to initialize metrics DB'); + throw error; + } + } + + /** + * Record a snapshot + * + * @param stats - Repository statistics to record + * @param trigger - What triggered this snapshot ('index' or 'update') + * @param customTimestamp - Optional timestamp (for testing) + * @returns Snapshot ID + * @throws Error if database write fails + */ + recordSnapshot( + stats: DetailedIndexStats, + trigger: 'index' | 'update', + customTimestamp?: Date + ): string { + const id = crypto.randomUUID(); + const timestamp = customTimestamp ? customTimestamp.getTime() : Date.now(); + + try { + this.db + .prepare( + ` + INSERT INTO snapshots + (id, timestamp, repository_path, stats, trigger, + total_files, total_documents, total_vectors, duration_ms, created_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + ` + ) + .run( + id, + timestamp, + stats.repositoryPath, + JSON.stringify(stats), + trigger, + stats.filesScanned, + stats.documentsIndexed, + stats.vectorsStored, + stats.duration, + timestamp + ); + + this.logger?.debug( + { + id, + trigger, + files: stats.filesScanned, + documents: stats.documentsIndexed, + }, + 'Recorded snapshot' + ); + + return id; + } catch (error) { + this.logger?.error({ error }, 'Failed to record snapshot'); + throw error; + } + } + + /** + * Query snapshots with filters + * + * @param query - Query parameters (since, until, limit, etc.) + * @returns Array of snapshots matching the query + */ + getSnapshots(query: SnapshotQuery): Snapshot[] { + // Validate query with Zod + const validated = SnapshotQuerySchema.parse(query); + const { since, until, limit, repositoryPath, trigger } = validated; + + let sql = 'SELECT * FROM snapshots WHERE 1=1'; + const params: unknown[] = []; + + if (since) { + sql += ' AND timestamp >= ?'; + params.push(since.getTime()); + } + + if (until) { + sql += ' AND timestamp <= ?'; + params.push(until.getTime()); + } + + if (repositoryPath) { + sql += ' AND repository_path = ?'; + params.push(repositoryPath); + } + + if (trigger) { + sql += ' AND trigger = ?'; + params.push(trigger); + } + + sql += ' ORDER BY timestamp DESC LIMIT ?'; + params.push(limit); + + const rows = this.db.prepare(sql).all(...params) as Array<{ + id: string; + timestamp: number; + repository_path: string; + stats: string; + trigger: 'index' | 'update'; + }>; + + return rows.map((row) => ({ + id: row.id, + timestamp: new Date(row.timestamp), + repositoryPath: row.repository_path, + stats: JSON.parse(row.stats) as DetailedIndexStats, + trigger: row.trigger, + })); + } + + /** + * Get the latest snapshot + * + * @param repositoryPath - Optional repository path filter + * @returns Latest snapshot or null if none exist + */ + getLatestSnapshot(repositoryPath?: string): Snapshot | null { + const snapshots = this.getSnapshots({ limit: 1, repositoryPath }); + return snapshots[0] || null; + } + + /** + * Get count of snapshots + * + * @param repositoryPath - Optional repository path filter + * @returns Total number of snapshots + */ + getCount(repositoryPath?: string): number { + let sql = 'SELECT COUNT(*) as count FROM snapshots'; + const params: unknown[] = []; + + if (repositoryPath) { + sql += ' WHERE repository_path = ?'; + params.push(repositoryPath); + } + + const result = this.db.prepare(sql).get(...params) as { count: number }; + return result.count; + } + + /** + * Get a specific snapshot by ID + * + * @param id - Snapshot ID + * @returns Snapshot or null if not found + */ + getSnapshot(id: string): Snapshot | null { + const row = this.db.prepare('SELECT * FROM snapshots WHERE id = ?').get(id) as + | { + id: string; + timestamp: number; + repository_path: string; + stats: string; + trigger: 'index' | 'update'; + } + | undefined; + + if (!row) return null; + + return { + id: row.id, + timestamp: new Date(row.timestamp), + repositoryPath: row.repository_path, + stats: JSON.parse(row.stats) as DetailedIndexStats, + trigger: row.trigger, + }; + } + + /** + * Delete old snapshots based on retention policy + * + * @param retentionDays - Number of days to keep + * @returns Number of snapshots deleted + */ + pruneOldSnapshots(retentionDays: number): number { + const cutoff = Date.now() - retentionDays * 86400000; + + const result = this.db.prepare('DELETE FROM snapshots WHERE timestamp < ?').run(cutoff); + + if (result.changes > 0) { + this.logger?.info( + { + deleted: result.changes, + retentionDays, + }, + 'Pruned old snapshots' + ); + } + + return result.changes; + } + + /** + * Calculate risk score for a file + * Formula: (commit_count * lines_of_code) / max(author_count, 1) + * + * Rationale: + * - High commit count = frequently changed (more bugs) + * - High LOC = more complex (harder to maintain) + * - Low author count = knowledge concentrated (bus factor risk) + */ + private calculateRiskScore(metadata: CodeMetadata): number { + const commitCount = metadata.commitCount || 0; + const authorCount = Math.max(metadata.authorCount || 1, 1); + const linesOfCode = metadata.linesOfCode; + + return (commitCount * linesOfCode) / authorCount; + } + + /** + * Append code metadata for a snapshot + * + * @param snapshotId - Snapshot ID to associate metadata with + * @param metadata - Array of file metadata to store + * @returns Number of records inserted + */ + appendCodeMetadata(snapshotId: string, metadata: CodeMetadata[]): number { + if (metadata.length === 0) return 0; + + const stmt = this.db.prepare(` + INSERT INTO code_metadata + (snapshot_id, file_path, commit_count, last_modified, author_count, + lines_of_code, num_functions, num_imports, risk_score) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + `); + + const insert = this.db.transaction((items: CodeMetadata[]) => { + for (const item of items) { + const riskScore = this.calculateRiskScore(item); + stmt.run( + snapshotId, + item.filePath, + item.commitCount || null, + item.lastModified ? item.lastModified.getTime() : null, + item.authorCount || null, + item.linesOfCode, + item.numFunctions, + item.numImports, + riskScore + ); + } + }); + + try { + insert(metadata); + this.logger?.debug({ snapshotId, count: metadata.length }, 'Appended code metadata'); + return metadata.length; + } catch (error) { + this.logger?.error({ error, snapshotId }, 'Failed to append code metadata'); + throw error; + } + } + + /** + * Get code metadata for a snapshot + * + * @param query - Query parameters + * @returns Array of code metadata + */ + getCodeMetadata(query: CodeMetadataQuery): CodeMetadata[] { + let sql = 'SELECT * FROM code_metadata WHERE snapshot_id = ?'; + const params: unknown[] = [query.snapshotId]; + + if (query.minRiskScore !== undefined) { + sql += ' AND risk_score >= ?'; + params.push(query.minRiskScore); + } + + // Sort order + const sortBy = query.sortBy || 'risk_desc'; + switch (sortBy) { + case 'risk_desc': + sql += ' ORDER BY risk_score DESC'; + break; + case 'risk_asc': + sql += ' ORDER BY risk_score ASC'; + break; + case 'lines_desc': + sql += ' ORDER BY lines_of_code DESC'; + break; + case 'commits_desc': + sql += ' ORDER BY commit_count DESC'; + break; + } + + sql += ' LIMIT ?'; + params.push(query.limit || 100); + + const rows = this.db.prepare(sql).all(...params) as Array<{ + file_path: string; + commit_count: number | null; + last_modified: number | null; + author_count: number | null; + lines_of_code: number; + num_functions: number; + num_imports: number; + risk_score: number; + }>; + + return rows.map((row) => ({ + filePath: row.file_path, + commitCount: row.commit_count || undefined, + lastModified: row.last_modified ? new Date(row.last_modified) : undefined, + authorCount: row.author_count || undefined, + linesOfCode: row.lines_of_code, + numFunctions: row.num_functions, + numImports: row.num_imports, + riskScore: row.risk_score, + })); + } + + /** + * Get code metadata for a specific file across snapshots + * + * @param filePath - File path to query + * @param limit - Maximum number of snapshots to return (default: 10) + * @returns Array of code metadata ordered by snapshot timestamp (newest first) + */ + getCodeMetadataForFile(filePath: string, limit = 10): CodeMetadata[] { + const sql = ` + SELECT cm.*, s.timestamp + FROM code_metadata cm + JOIN snapshots s ON cm.snapshot_id = s.id + WHERE cm.file_path = ? + ORDER BY s.timestamp DESC + LIMIT ? + `; + + const rows = this.db.prepare(sql).all(filePath, limit) as Array<{ + file_path: string; + commit_count: number | null; + last_modified: number | null; + author_count: number | null; + lines_of_code: number; + num_functions: number; + num_imports: number; + risk_score: number; + }>; + + return rows.map((row) => ({ + filePath: row.file_path, + commitCount: row.commit_count || undefined, + lastModified: row.last_modified ? new Date(row.last_modified) : undefined, + authorCount: row.author_count || undefined, + linesOfCode: row.lines_of_code, + numFunctions: row.num_functions, + numImports: row.num_imports, + riskScore: row.risk_score, + })); + } + + /** + * Get count of code metadata records for a snapshot + * + * @param snapshotId - Snapshot ID + * @returns Total number of code metadata records + */ + getCodeMetadataCount(snapshotId: string): number { + const result = this.db + .prepare('SELECT COUNT(*) as count FROM code_metadata WHERE snapshot_id = ?') + .get(snapshotId) as { count: number }; + return result.count; + } + + /** + * Close the database connection + */ + close(): void { + try { + this.db?.close(); + this.logger?.debug({}, 'Metrics store closed'); + } catch (error) { + this.logger?.error({ error }, 'Failed to close metrics store'); + } + } +} diff --git a/packages/core/src/metrics/types.ts b/packages/core/src/metrics/types.ts new file mode 100644 index 0000000..eebfdb7 --- /dev/null +++ b/packages/core/src/metrics/types.ts @@ -0,0 +1,146 @@ +/** + * Metrics Store Types + * + * Type definitions for the metrics storage system. + */ + +import { z } from 'zod'; +import type { DetailedIndexStats } from '../indexer/types.js'; + +/** + * A single metrics snapshot + */ +export interface Snapshot { + id: string; + timestamp: Date; + repositoryPath: string; + stats: DetailedIndexStats; + trigger: 'index' | 'update'; +} + +/** + * Query parameters for retrieving snapshots + */ +export interface SnapshotQuery { + /** Start date (inclusive) */ + since?: Date; + + /** End date (inclusive) */ + until?: Date; + + /** Maximum number of results (default: 100, max: 1000) */ + limit?: number; + + /** Filter by repository path */ + repositoryPath?: string; + + /** Filter by trigger type */ + trigger?: 'index' | 'update'; +} + +/** + * Zod schema for validating snapshot queries + */ +export const SnapshotQuerySchema = z.object({ + since: z.coerce.date().optional(), + until: z.coerce.date().optional(), + limit: z.number().int().positive().max(1000).default(100), + repositoryPath: z.string().optional(), + trigger: z.enum(['index', 'update']).optional(), +}); + +/** + * Metrics store configuration + */ +export interface MetricsConfig { + /** Enable metrics collection (default: true) */ + enabled?: boolean; + + /** Retention period in days (default: 90) */ + retentionDays?: number; + + /** Maximum database size in MB (default: 100) */ + maxSizeMB?: number; +} + +/** + * Default metrics configuration + */ +export const DEFAULT_METRICS_CONFIG: Required = { + enabled: true, + retentionDays: 90, + maxSizeMB: 100, +}; + +/** + * Per-file code metadata for hotspot detection + */ +export interface CodeMetadata { + filePath: string; + commitCount?: number; + lastModified?: Date; + authorCount?: number; + linesOfCode: number; + numFunctions: number; + numImports: number; + riskScore?: number; +} + +/** + * Zod schema for code metadata + */ +export const CodeMetadataSchema = z.object({ + filePath: z.string().min(1), + commitCount: z.number().int().nonnegative().optional(), + lastModified: z.coerce.date().optional(), + authorCount: z.number().int().positive().optional(), + linesOfCode: z.number().int().nonnegative(), + numFunctions: z.number().int().nonnegative(), + numImports: z.number().int().nonnegative(), + riskScore: z.number().nonnegative().optional(), +}); + +/** + * Query parameters for retrieving code metadata + */ +export interface CodeMetadataQuery { + /** Snapshot ID to query */ + snapshotId: string; + + /** Minimum risk score threshold */ + minRiskScore?: number; + + /** Maximum number of results (default: 100) */ + limit?: number; + + /** Sort order (default: 'risk_desc') */ + sortBy?: 'risk_desc' | 'risk_asc' | 'lines_desc' | 'commits_desc'; +} + +/** + * Hotspot detection result + */ +export interface Hotspot { + filePath: string; + riskScore: number; + commitCount: number; + authorCount: number; + linesOfCode: number; + numFunctions: number; + lastModified?: Date; + reason: string; // Human-readable explanation +} + +/** + * Zod schema for hotspot results + */ +export const HotspotSchema = z.object({ + filePath: z.string(), + riskScore: z.number().nonnegative(), + commitCount: z.number().int().nonnegative(), + authorCount: z.number().int().positive(), + linesOfCode: z.number().int().nonnegative(), + numFunctions: z.number().int().nonnegative(), + lastModified: z.coerce.date().optional(), + reason: z.string(), +}); diff --git a/packages/core/src/scanner/__tests__/fixtures/arrow-functions.ts b/packages/core/src/scanner/__tests__/fixtures/arrow-functions.ts index 57a8931..5865b46 100644 --- a/packages/core/src/scanner/__tests__/fixtures/arrow-functions.ts +++ b/packages/core/src/scanner/__tests__/fixtures/arrow-functions.ts @@ -71,7 +71,6 @@ export const API_CONFIG = { export const SUPPORTED_LANGUAGES = ['typescript', 'javascript', 'python', 'go']; // Exported call expression (factory pattern) -// biome-ignore lint/suspicious/noEmptyBlockStatements: Test fixture export const AppContext = (() => ({ value: null }))(); // Typed exported constant @@ -83,7 +82,6 @@ export const THEME_CONFIG: { dark: boolean; primary: string } = { // ============================================ // NON-EXPORTED - Should NOT be extracted // ============================================ -// biome-ignore lint/correctness/noUnusedVariables: Test fixtures for non-extraction // Plain constant (primitive) - never extracted const plainConstant = 42; diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 817d015..eb93145 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -103,6 +103,9 @@ importers: '@xenova/transformers': specifier: ^2.17.2 version: 2.17.2 + better-sqlite3: + specifier: ^12.5.0 + version: 12.5.0 globby: specifier: ^16.0.0 version: 16.0.0 @@ -128,6 +131,9 @@ importers: specifier: ^4.1.13 version: 4.1.13 devDependencies: + '@types/better-sqlite3': + specifier: ^7.6.13 + version: 7.6.13 '@types/mdast': specifier: ^4.0.4 version: 4.0.4 @@ -1844,6 +1850,12 @@ packages: resolution: {integrity: sha512-Kgq5yXTvnUnvlhob0xJpOH4na9PWtuFhHSf94MpDwnENWgiFeJKDNANQV2MT1WpXZYkK2WSWfVYKhVkR7bc8TA==} dev: true + /@types/better-sqlite3@7.6.13: + resolution: {integrity: sha512-NMv9ASNARoKksWtsq/SHakpYAYnhBrQgGD8zkLYk/jaK8jUGn08CfEdTRgYhMypUQAfzSP8W6gNLe0q19/t4VA==} + dependencies: + '@types/node': 24.10.1 + dev: true + /@types/chai@5.2.3: resolution: {integrity: sha512-Mw558oeA9fFbv65/y4mHtXDs9bPnFMZAL/jxdPFUpOHHIXX91mcgEHbS5Lahr+pwZFR8A7GQleRWeI6cGFC2UA==} dependencies: @@ -2332,6 +2344,21 @@ packages: is-windows: 1.0.2 dev: true + /better-sqlite3@12.5.0: + resolution: {integrity: sha512-WwCZ/5Diz7rsF29o27o0Gcc1Du+l7Zsv7SYtVPG0X3G/uUI1LqdxrQI7c9Hs2FWpqXXERjW9hp6g3/tH7DlVKg==} + engines: {node: 20.x || 22.x || 23.x || 24.x || 25.x} + requiresBuild: true + dependencies: + bindings: 1.5.0 + prebuild-install: 7.1.3 + dev: false + + /bindings@1.5.0: + resolution: {integrity: sha512-p2q/t/mhvuOj/UeLlV6566GD/guowlr0hHxClI0W9m7MWYkL1F0hLo+0Aexs9HSPCtR1SXQ0TD3MMKrXZajbiQ==} + dependencies: + file-uri-to-path: 1.0.0 + dev: false + /bl@4.1.0: resolution: {integrity: sha512-1W07cM9gS6DcLperZfFSj+bWLtaPGSOHWhPiGzXmvVJbRLdG82sH/Kn8EtW1VqWVA54AKf2h5k5BbnIbwF3h6w==} dependencies: @@ -2941,6 +2968,10 @@ packages: dependencies: picomatch: 4.0.3 + /file-uri-to-path@1.0.0: + resolution: {integrity: sha512-0Zt+s3L7Vf1biwWZ29aARiVYLx7iMGnEUl9x33fbB/j3jR81u/O2LbqK+Bm1CDSNDKVtJ/YjwY7TUd5SkeLQLw==} + dev: false + /fill-range@7.1.1: resolution: {integrity: sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==} engines: {node: '>=8'}