Skip to content

Commit 2ee6ad9

Browse files
committed
feat(metrics): add code_metadata table and MetricsStore methods
Phase 2.1: Code Metadata Schema & Store Methods Database Schema: - Added code_metadata table with foreign key to snapshots - Stores per-file metrics: commit_count, author_count, LOC, functions, imports - Includes calculated risk_score for hotspot detection - Indexes for efficient querying (by snapshot, risk, file) - CASCADE DELETE when snapshots are removed Types & Schemas: - Added CodeMetadata interface with Zod schema - Added CodeMetadataQuery for filtering/sorting - Added Hotspot interface for analysis results - Exported all new types from metrics module MetricsStore Methods: - appendCodeMetadata() - Bulk insert with transaction - getCodeMetadata() - Query with filtering and sorting - getCodeMetadataForFile() - File history across snapshots - getCodeMetadataCount() - Count records per snapshot - calculateRiskScore() - Risk formula: (commits * LOC) / authors Risk Score Formula: - High commits = frequently changed (more bugs) - High LOC = more complex (harder to maintain) - Low authors = knowledge concentrated (bus factor) Next: Analytics module and CLI integration
1 parent ec6448f commit 2ee6ad9

File tree

4 files changed

+290
-1
lines changed

4 files changed

+290
-1
lines changed

packages/core/src/metrics/index.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,16 @@
77
export { initializeDatabase, METRICS_SCHEMA_V1 } from './schema.js';
88
export { MetricsStore } from './store.js';
99
export type {
10+
CodeMetadata,
11+
CodeMetadataQuery,
12+
Hotspot,
1013
MetricsConfig,
1114
Snapshot,
1215
SnapshotQuery,
1316
} from './types.js';
1417
export {
18+
CodeMetadataSchema,
1519
DEFAULT_METRICS_CONFIG,
20+
HotspotSchema,
1621
SnapshotQuerySchema,
1722
} from './types.js';

packages/core/src/metrics/schema.ts

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,39 @@ export const METRICS_SCHEMA_V1 = `
4444
-- Index for filtering by trigger type
4545
CREATE INDEX IF NOT EXISTS idx_snapshots_trigger
4646
ON snapshots(trigger, timestamp DESC);
47+
48+
-- Code metadata table (per-file metrics for hotspot detection)
49+
CREATE TABLE IF NOT EXISTS code_metadata (
50+
id INTEGER PRIMARY KEY AUTOINCREMENT,
51+
snapshot_id TEXT NOT NULL,
52+
file_path TEXT NOT NULL,
53+
54+
-- Data we have or can easily get:
55+
commit_count INTEGER, -- From change frequency
56+
last_modified INTEGER, -- From change frequency (timestamp)
57+
author_count INTEGER, -- From change frequency
58+
lines_of_code INTEGER, -- Count lines during scan
59+
num_functions INTEGER, -- From document count
60+
num_imports INTEGER, -- From DocumentMetadata.imports
61+
62+
-- Calculated risk score
63+
risk_score REAL, -- (commit_count * lines_of_code) / max(author_count, 1)
64+
65+
FOREIGN KEY (snapshot_id) REFERENCES snapshots(id) ON DELETE CASCADE,
66+
UNIQUE (snapshot_id, file_path)
67+
);
68+
69+
-- Index for querying by snapshot
70+
CREATE INDEX IF NOT EXISTS idx_code_metadata_snapshot
71+
ON code_metadata(snapshot_id);
72+
73+
-- Index for finding hotspots (highest risk files)
74+
CREATE INDEX IF NOT EXISTS idx_code_metadata_risk
75+
ON code_metadata(risk_score DESC);
76+
77+
-- Index for file-specific queries
78+
CREATE INDEX IF NOT EXISTS idx_code_metadata_file
79+
ON code_metadata(file_path);
4780
`;
4881

4982
/**

packages/core/src/metrics/store.ts

Lines changed: 179 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,13 @@ import type { Logger } from '@lytics/kero';
1010
import Database from 'better-sqlite3';
1111
import type { DetailedIndexStats } from '../indexer/types.js';
1212
import { initializeDatabase } from './schema.js';
13-
import { type Snapshot, type SnapshotQuery, SnapshotQuerySchema } from './types.js';
13+
import {
14+
type CodeMetadata,
15+
type CodeMetadataQuery,
16+
type Snapshot,
17+
type SnapshotQuery,
18+
SnapshotQuerySchema,
19+
} from './types.js';
1420

1521
/**
1622
* Metrics Store Class
@@ -223,6 +229,178 @@ export class MetricsStore {
223229
return result.changes;
224230
}
225231

232+
/**
233+
* Calculate risk score for a file
234+
* Formula: (commit_count * lines_of_code) / max(author_count, 1)
235+
*
236+
* Rationale:
237+
* - High commit count = frequently changed (more bugs)
238+
* - High LOC = more complex (harder to maintain)
239+
* - Low author count = knowledge concentrated (bus factor risk)
240+
*/
241+
private calculateRiskScore(metadata: CodeMetadata): number {
242+
const commitCount = metadata.commitCount || 0;
243+
const authorCount = Math.max(metadata.authorCount || 1, 1);
244+
const linesOfCode = metadata.linesOfCode;
245+
246+
return (commitCount * linesOfCode) / authorCount;
247+
}
248+
249+
/**
250+
* Append code metadata for a snapshot
251+
*
252+
* @param snapshotId - Snapshot ID to associate metadata with
253+
* @param metadata - Array of file metadata to store
254+
* @returns Number of records inserted
255+
*/
256+
appendCodeMetadata(snapshotId: string, metadata: CodeMetadata[]): number {
257+
if (metadata.length === 0) return 0;
258+
259+
const stmt = this.db.prepare(`
260+
INSERT INTO code_metadata
261+
(snapshot_id, file_path, commit_count, last_modified, author_count,
262+
lines_of_code, num_functions, num_imports, risk_score)
263+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
264+
`);
265+
266+
const insert = this.db.transaction((items: CodeMetadata[]) => {
267+
for (const item of items) {
268+
const riskScore = this.calculateRiskScore(item);
269+
stmt.run(
270+
snapshotId,
271+
item.filePath,
272+
item.commitCount || null,
273+
item.lastModified ? item.lastModified.getTime() : null,
274+
item.authorCount || null,
275+
item.linesOfCode,
276+
item.numFunctions,
277+
item.numImports,
278+
riskScore
279+
);
280+
}
281+
});
282+
283+
try {
284+
insert(metadata);
285+
this.logger?.debug({ snapshotId, count: metadata.length }, 'Appended code metadata');
286+
return metadata.length;
287+
} catch (error) {
288+
this.logger?.error({ error, snapshotId }, 'Failed to append code metadata');
289+
throw error;
290+
}
291+
}
292+
293+
/**
294+
* Get code metadata for a snapshot
295+
*
296+
* @param query - Query parameters
297+
* @returns Array of code metadata
298+
*/
299+
getCodeMetadata(query: CodeMetadataQuery): CodeMetadata[] {
300+
let sql = 'SELECT * FROM code_metadata WHERE snapshot_id = ?';
301+
const params: unknown[] = [query.snapshotId];
302+
303+
if (query.minRiskScore !== undefined) {
304+
sql += ' AND risk_score >= ?';
305+
params.push(query.minRiskScore);
306+
}
307+
308+
// Sort order
309+
const sortBy = query.sortBy || 'risk_desc';
310+
switch (sortBy) {
311+
case 'risk_desc':
312+
sql += ' ORDER BY risk_score DESC';
313+
break;
314+
case 'risk_asc':
315+
sql += ' ORDER BY risk_score ASC';
316+
break;
317+
case 'lines_desc':
318+
sql += ' ORDER BY lines_of_code DESC';
319+
break;
320+
case 'commits_desc':
321+
sql += ' ORDER BY commit_count DESC';
322+
break;
323+
}
324+
325+
sql += ' LIMIT ?';
326+
params.push(query.limit || 100);
327+
328+
const rows = this.db.prepare(sql).all(...params) as Array<{
329+
file_path: string;
330+
commit_count: number | null;
331+
last_modified: number | null;
332+
author_count: number | null;
333+
lines_of_code: number;
334+
num_functions: number;
335+
num_imports: number;
336+
risk_score: number;
337+
}>;
338+
339+
return rows.map((row) => ({
340+
filePath: row.file_path,
341+
commitCount: row.commit_count || undefined,
342+
lastModified: row.last_modified ? new Date(row.last_modified) : undefined,
343+
authorCount: row.author_count || undefined,
344+
linesOfCode: row.lines_of_code,
345+
numFunctions: row.num_functions,
346+
numImports: row.num_imports,
347+
riskScore: row.risk_score,
348+
}));
349+
}
350+
351+
/**
352+
* Get code metadata for a specific file across snapshots
353+
*
354+
* @param filePath - File path to query
355+
* @param limit - Maximum number of snapshots to return (default: 10)
356+
* @returns Array of code metadata ordered by snapshot timestamp (newest first)
357+
*/
358+
getCodeMetadataForFile(filePath: string, limit = 10): CodeMetadata[] {
359+
const sql = `
360+
SELECT cm.*, s.timestamp
361+
FROM code_metadata cm
362+
JOIN snapshots s ON cm.snapshot_id = s.id
363+
WHERE cm.file_path = ?
364+
ORDER BY s.timestamp DESC
365+
LIMIT ?
366+
`;
367+
368+
const rows = this.db.prepare(sql).all(filePath, limit) as Array<{
369+
file_path: string;
370+
commit_count: number | null;
371+
last_modified: number | null;
372+
author_count: number | null;
373+
lines_of_code: number;
374+
num_functions: number;
375+
num_imports: number;
376+
risk_score: number;
377+
}>;
378+
379+
return rows.map((row) => ({
380+
filePath: row.file_path,
381+
commitCount: row.commit_count || undefined,
382+
lastModified: row.last_modified ? new Date(row.last_modified) : undefined,
383+
authorCount: row.author_count || undefined,
384+
linesOfCode: row.lines_of_code,
385+
numFunctions: row.num_functions,
386+
numImports: row.num_imports,
387+
riskScore: row.risk_score,
388+
}));
389+
}
390+
391+
/**
392+
* Get count of code metadata records for a snapshot
393+
*
394+
* @param snapshotId - Snapshot ID
395+
* @returns Total number of code metadata records
396+
*/
397+
getCodeMetadataCount(snapshotId: string): number {
398+
const result = this.db
399+
.prepare('SELECT COUNT(*) as count FROM code_metadata WHERE snapshot_id = ?')
400+
.get(snapshotId) as { count: number };
401+
return result.count;
402+
}
403+
226404
/**
227405
* Close the database connection
228406
*/

packages/core/src/metrics/types.ts

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,3 +71,76 @@ export const DEFAULT_METRICS_CONFIG: Required<MetricsConfig> = {
7171
retentionDays: 90,
7272
maxSizeMB: 100,
7373
};
74+
75+
/**
76+
* Per-file code metadata for hotspot detection
77+
*/
78+
export interface CodeMetadata {
79+
filePath: string;
80+
commitCount?: number;
81+
lastModified?: Date;
82+
authorCount?: number;
83+
linesOfCode: number;
84+
numFunctions: number;
85+
numImports: number;
86+
riskScore?: number;
87+
}
88+
89+
/**
90+
* Zod schema for code metadata
91+
*/
92+
export const CodeMetadataSchema = z.object({
93+
filePath: z.string().min(1),
94+
commitCount: z.number().int().nonnegative().optional(),
95+
lastModified: z.coerce.date().optional(),
96+
authorCount: z.number().int().positive().optional(),
97+
linesOfCode: z.number().int().nonnegative(),
98+
numFunctions: z.number().int().nonnegative(),
99+
numImports: z.number().int().nonnegative(),
100+
riskScore: z.number().nonnegative().optional(),
101+
});
102+
103+
/**
104+
* Query parameters for retrieving code metadata
105+
*/
106+
export interface CodeMetadataQuery {
107+
/** Snapshot ID to query */
108+
snapshotId: string;
109+
110+
/** Minimum risk score threshold */
111+
minRiskScore?: number;
112+
113+
/** Maximum number of results (default: 100) */
114+
limit?: number;
115+
116+
/** Sort order (default: 'risk_desc') */
117+
sortBy?: 'risk_desc' | 'risk_asc' | 'lines_desc' | 'commits_desc';
118+
}
119+
120+
/**
121+
* Hotspot detection result
122+
*/
123+
export interface Hotspot {
124+
filePath: string;
125+
riskScore: number;
126+
commitCount: number;
127+
authorCount: number;
128+
linesOfCode: number;
129+
numFunctions: number;
130+
lastModified?: Date;
131+
reason: string; // Human-readable explanation
132+
}
133+
134+
/**
135+
* Zod schema for hotspot results
136+
*/
137+
export const HotspotSchema = z.object({
138+
filePath: z.string(),
139+
riskScore: z.number().nonnegative(),
140+
commitCount: z.number().int().nonnegative(),
141+
authorCount: z.number().int().positive(),
142+
linesOfCode: z.number().int().nonnegative(),
143+
numFunctions: z.number().int().nonnegative(),
144+
lastModified: z.coerce.date().optional(),
145+
reason: z.string(),
146+
});

0 commit comments

Comments
 (0)