Skip to content

Commit 7576454

Browse files
committed
feat(core): add git commit indexing (#92)
- Add GitIndexer for indexing commits into vector store - Support semantic search over commit messages - Support file-specific history retrieval - Batch embedding and storage with progress reporting - Include file paths in embeddings for better search - Store full commit metadata including issue/PR refs - Add createGitIndexer factory function - Add 17 comprehensive unit tests Part of Epic: Intelligent Git History (v0.4.0) #90
1 parent 813f37e commit 7576454

File tree

3 files changed

+614
-1
lines changed

3 files changed

+614
-1
lines changed
Lines changed: 318 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,318 @@
1+
import { beforeEach, describe, expect, it, vi } from 'vitest';
2+
import type { VectorStorage } from '../../vector';
3+
import type { SearchResult } from '../../vector/types';
4+
import type { GitExtractor } from '../extractor';
5+
import { GitIndexer } from '../indexer';
6+
import type { GitCommit } from '../types';
7+
8+
// Mock commit data
9+
const createMockCommit = (overrides: Partial<GitCommit> = {}): GitCommit => ({
10+
hash: 'abc123def456789012345678901234567890abcd',
11+
shortHash: 'abc123d',
12+
message: 'feat: add new feature\n\nThis adds a great new feature.',
13+
subject: 'feat: add new feature',
14+
body: 'This adds a great new feature.',
15+
author: {
16+
name: 'Test User',
17+
18+
date: '2025-01-15T10:00:00Z',
19+
},
20+
committer: {
21+
name: 'Test User',
22+
23+
date: '2025-01-15T10:00:00Z',
24+
},
25+
files: [
26+
{ path: 'src/feature.ts', status: 'added', additions: 50, deletions: 0 },
27+
{ path: 'src/index.ts', status: 'modified', additions: 5, deletions: 2 },
28+
],
29+
stats: {
30+
additions: 55,
31+
deletions: 2,
32+
filesChanged: 2,
33+
},
34+
refs: {
35+
branches: [],
36+
tags: [],
37+
issueRefs: [123],
38+
prRefs: [],
39+
},
40+
parents: ['parent123'],
41+
...overrides,
42+
});
43+
44+
describe('GitIndexer', () => {
45+
let mockExtractor: GitExtractor;
46+
let mockVectorStorage: VectorStorage;
47+
let indexer: GitIndexer;
48+
49+
beforeEach(() => {
50+
// Create mock extractor
51+
mockExtractor = {
52+
getCommits: vi.fn().mockResolvedValue([
53+
createMockCommit(),
54+
createMockCommit({
55+
hash: 'def456abc789012345678901234567890abcdef',
56+
shortHash: 'def456a',
57+
subject: 'fix: resolve bug #456',
58+
body: 'Fixes the critical bug.',
59+
refs: { branches: [], tags: [], issueRefs: [456], prRefs: [] },
60+
}),
61+
]),
62+
getCommit: vi.fn(),
63+
getBlame: vi.fn(),
64+
getRepositoryInfo: vi.fn(),
65+
};
66+
67+
// Create mock vector storage
68+
mockVectorStorage = {
69+
initialize: vi.fn().mockResolvedValue(undefined),
70+
addDocuments: vi.fn().mockResolvedValue(undefined),
71+
search: vi.fn().mockResolvedValue([]),
72+
getDocument: vi.fn(),
73+
deleteDocuments: vi.fn(),
74+
getStats: vi.fn(),
75+
optimize: vi.fn(),
76+
close: vi.fn(),
77+
} as unknown as VectorStorage;
78+
79+
indexer = new GitIndexer({
80+
extractor: mockExtractor,
81+
vectorStorage: mockVectorStorage,
82+
commitLimit: 100,
83+
batchSize: 10,
84+
});
85+
});
86+
87+
describe('index', () => {
88+
it('should extract and index commits', async () => {
89+
const result = await indexer.index();
90+
91+
expect(mockExtractor.getCommits).toHaveBeenCalledWith({
92+
limit: 100,
93+
since: undefined,
94+
until: undefined,
95+
author: undefined,
96+
noMerges: true,
97+
});
98+
99+
expect(mockVectorStorage.addDocuments).toHaveBeenCalled();
100+
expect(result.commitsIndexed).toBe(2);
101+
expect(result.errors).toHaveLength(0);
102+
});
103+
104+
it('should respect limit option', async () => {
105+
await indexer.index({ limit: 50 });
106+
107+
expect(mockExtractor.getCommits).toHaveBeenCalledWith(expect.objectContaining({ limit: 50 }));
108+
});
109+
110+
it('should pass date filters to extractor', async () => {
111+
await indexer.index({
112+
since: '2025-01-01',
113+
until: '2025-01-31',
114+
});
115+
116+
expect(mockExtractor.getCommits).toHaveBeenCalledWith(
117+
expect.objectContaining({
118+
since: '2025-01-01',
119+
until: '2025-01-31',
120+
})
121+
);
122+
});
123+
124+
it('should pass author filter to extractor', async () => {
125+
await indexer.index({ author: '[email protected]' });
126+
127+
expect(mockExtractor.getCommits).toHaveBeenCalledWith(
128+
expect.objectContaining({ author: '[email protected]' })
129+
);
130+
});
131+
132+
it('should handle empty repository', async () => {
133+
vi.mocked(mockExtractor.getCommits).mockResolvedValue([]);
134+
135+
const result = await indexer.index();
136+
137+
expect(result.commitsIndexed).toBe(0);
138+
expect(mockVectorStorage.addDocuments).not.toHaveBeenCalled();
139+
});
140+
141+
it('should handle extraction errors', async () => {
142+
vi.mocked(mockExtractor.getCommits).mockRejectedValue(new Error('Git error'));
143+
144+
const result = await indexer.index();
145+
146+
expect(result.commitsIndexed).toBe(0);
147+
expect(result.errors).toHaveLength(1);
148+
expect(result.errors[0]).toContain('Git error');
149+
});
150+
151+
it('should handle storage errors gracefully', async () => {
152+
vi.mocked(mockVectorStorage.addDocuments).mockRejectedValue(new Error('Storage error'));
153+
154+
const result = await indexer.index();
155+
156+
expect(result.errors).toHaveLength(1);
157+
expect(result.errors[0]).toContain('Storage error');
158+
});
159+
160+
it('should report progress', async () => {
161+
const progressUpdates: Array<{ phase: string; percentComplete: number }> = [];
162+
163+
await indexer.index({
164+
onProgress: (progress) => {
165+
progressUpdates.push({
166+
phase: progress.phase,
167+
percentComplete: progress.percentComplete,
168+
});
169+
},
170+
});
171+
172+
expect(progressUpdates).toContainEqual(expect.objectContaining({ phase: 'extracting' }));
173+
expect(progressUpdates).toContainEqual(expect.objectContaining({ phase: 'embedding' }));
174+
expect(progressUpdates).toContainEqual(expect.objectContaining({ phase: 'storing' }));
175+
expect(progressUpdates).toContainEqual(
176+
expect.objectContaining({ phase: 'complete', percentComplete: 100 })
177+
);
178+
});
179+
180+
it('should batch documents correctly', async () => {
181+
// Create many commits
182+
const manyCommits = Array.from({ length: 25 }, (_, i) =>
183+
createMockCommit({
184+
hash: `hash${i.toString().padStart(38, '0')}`,
185+
shortHash: `h${i}`,
186+
subject: `Commit ${i}`,
187+
})
188+
);
189+
vi.mocked(mockExtractor.getCommits).mockResolvedValue(manyCommits);
190+
191+
await indexer.index();
192+
193+
// With batchSize 10, 25 commits should result in 3 batches
194+
expect(mockVectorStorage.addDocuments).toHaveBeenCalledTimes(3);
195+
});
196+
});
197+
198+
describe('search', () => {
199+
it('should search for commits by semantic query', async () => {
200+
const mockCommit = createMockCommit();
201+
vi.mocked(mockVectorStorage.search).mockResolvedValue([
202+
{
203+
id: `commit:${mockCommit.hash}`,
204+
score: 0.9,
205+
metadata: {
206+
type: 'commit',
207+
hash: mockCommit.hash,
208+
_commit: mockCommit,
209+
},
210+
} as SearchResult,
211+
]);
212+
213+
const results = await indexer.search('add new feature');
214+
215+
expect(mockVectorStorage.search).toHaveBeenCalledWith('add new feature', {
216+
limit: 10,
217+
scoreThreshold: 0,
218+
filter: { type: 'commit' },
219+
});
220+
expect(results).toHaveLength(1);
221+
expect(results[0].hash).toBe(mockCommit.hash);
222+
});
223+
224+
it('should respect limit option', async () => {
225+
await indexer.search('query', { limit: 5 });
226+
227+
expect(mockVectorStorage.search).toHaveBeenCalledWith(
228+
'query',
229+
expect.objectContaining({ limit: 5 })
230+
);
231+
});
232+
233+
it('should filter out results without commit metadata', async () => {
234+
vi.mocked(mockVectorStorage.search).mockResolvedValue([
235+
{
236+
id: 'commit:abc',
237+
score: 0.9,
238+
metadata: { type: 'commit' }, // Missing _commit
239+
} as SearchResult,
240+
]);
241+
242+
const results = await indexer.search('query');
243+
244+
expect(results).toHaveLength(0);
245+
});
246+
});
247+
248+
describe('getFileHistory', () => {
249+
it('should get history for a specific file', async () => {
250+
const mockCommits = [createMockCommit()];
251+
vi.mocked(mockExtractor.getCommits).mockResolvedValue(mockCommits);
252+
253+
const results = await indexer.getFileHistory('src/feature.ts');
254+
255+
expect(mockExtractor.getCommits).toHaveBeenCalledWith({
256+
path: 'src/feature.ts',
257+
limit: 20,
258+
follow: true,
259+
noMerges: true,
260+
});
261+
expect(results).toEqual(mockCommits);
262+
});
263+
264+
it('should respect limit option', async () => {
265+
await indexer.getFileHistory('src/file.ts', { limit: 5 });
266+
267+
expect(mockExtractor.getCommits).toHaveBeenCalledWith(expect.objectContaining({ limit: 5 }));
268+
});
269+
});
270+
271+
describe('document preparation', () => {
272+
it('should create proper document structure', async () => {
273+
await indexer.index();
274+
275+
const addCall = vi.mocked(mockVectorStorage.addDocuments).mock.calls[0];
276+
const documents = addCall[0];
277+
278+
expect(documents[0]).toMatchObject({
279+
id: expect.stringMatching(/^commit:/),
280+
text: expect.stringContaining('feat: add new feature'),
281+
metadata: expect.objectContaining({
282+
type: 'commit',
283+
hash: expect.any(String),
284+
shortHash: expect.any(String),
285+
subject: expect.any(String),
286+
author: expect.any(String),
287+
authorEmail: expect.any(String),
288+
date: expect.any(String),
289+
filesChanged: expect.any(Number),
290+
additions: expect.any(Number),
291+
deletions: expect.any(Number),
292+
issueRefs: expect.any(Array),
293+
prRefs: expect.any(Array),
294+
_commit: expect.any(Object),
295+
}),
296+
});
297+
});
298+
299+
it('should include file paths in text for better search', async () => {
300+
await indexer.index();
301+
302+
const addCall = vi.mocked(mockVectorStorage.addDocuments).mock.calls[0];
303+
const documents = addCall[0];
304+
305+
expect(documents[0].text).toContain('src/feature.ts');
306+
expect(documents[0].text).toContain('src/index.ts');
307+
});
308+
309+
it('should include issue refs in metadata', async () => {
310+
await indexer.index();
311+
312+
const addCall = vi.mocked(mockVectorStorage.addDocuments).mock.calls[0];
313+
const documents = addCall[0];
314+
315+
expect(documents[0].metadata.issueRefs).toContain(123);
316+
});
317+
});
318+
});

packages/core/src/git/index.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
/**
22
* Git Module
33
*
4-
* Provides git history extraction and types for semantic search.
4+
* Provides git history extraction, indexing, and types for semantic search.
55
*/
66

77
export * from './extractor';
8+
export * from './indexer';
89
export * from './types';

0 commit comments

Comments
 (0)