diff --git a/packages/cli/src/commands/gh.ts b/packages/cli/src/commands/gh.ts index 3979a65..e452826 100644 --- a/packages/cli/src/commands/gh.ts +++ b/packages/cli/src/commands/gh.ts @@ -3,7 +3,6 @@ * CLI commands for indexing and searching GitHub data */ -import { RepositoryIndexer } from '@lytics/dev-agent-core'; import { GitHubIndexer } from '@lytics/dev-agent-subagents'; import chalk from 'chalk'; import { Command } from 'commander'; @@ -34,12 +33,15 @@ export const ghCommand = new Command('gh') spinner.text = 'Initializing indexers...'; - // Initialize code indexer - const codeIndexer = new RepositoryIndexer(config); - await codeIndexer.initialize(); + // Create GitHub indexer with vector storage + const ghIndexer = new GitHubIndexer({ + vectorStorePath: `${config.vectorStorePath}-github`, // Separate storage for GitHub data + statePath: '.dev-agent/github-state.json', + autoUpdate: true, + staleThreshold: 15 * 60 * 1000, // 15 minutes + }); - // Create GitHub indexer - const ghIndexer = new GitHubIndexer(codeIndexer); + await ghIndexer.initialize(); spinner.text = 'Fetching GitHub data...'; @@ -120,10 +122,14 @@ export const ghCommand = new Command('gh') spinner.text = 'Initializing...'; - // Initialize indexers - const codeIndexer = new RepositoryIndexer(config); - await codeIndexer.initialize(); - const ghIndexer = new GitHubIndexer(codeIndexer); + // Initialize GitHub indexer + const ghIndexer = new GitHubIndexer({ + vectorStorePath: `${config.vectorStorePath}-github`, + statePath: '.dev-agent/github-state.json', + autoUpdate: true, + staleThreshold: 15 * 60 * 1000, + }); + await ghIndexer.initialize(); // Check if indexed if (!ghIndexer.isIndexed()) { @@ -216,9 +222,13 @@ export const ghCommand = new Command('gh') spinner.text = 'Initializing...'; - const codeIndexer = new RepositoryIndexer(config); - await codeIndexer.initialize(); - const ghIndexer = new GitHubIndexer(codeIndexer); + const ghIndexer = new GitHubIndexer({ + vectorStorePath: `${config.vectorStorePath}-github`, + statePath: '.dev-agent/github-state.json', + autoUpdate: true, + staleThreshold: 15 * 60 * 1000, + }); + await ghIndexer.initialize(); if (!ghIndexer.isIndexed()) { spinner.warn('GitHub data not indexed'); @@ -301,9 +311,13 @@ export const ghCommand = new Command('gh') return; } - const codeIndexer = new RepositoryIndexer(config); - await codeIndexer.initialize(); - const ghIndexer = new GitHubIndexer(codeIndexer); + const ghIndexer = new GitHubIndexer({ + vectorStorePath: `${config.vectorStorePath}-github`, + statePath: '.dev-agent/github-state.json', + autoUpdate: true, + staleThreshold: 15 * 60 * 1000, + }); + await ghIndexer.initialize(); const stats = ghIndexer.getStats(); diff --git a/packages/cli/src/index.ts b/packages/cli/src/index.ts index 2e353b2..a06fd4c 100644 --- a/packages/cli/src/index.ts +++ b/packages/cli/src/index.ts @@ -1,4 +1,4 @@ -import { CoreService, type CoreConfig } from '@lytics/dev-agent-core'; +import { type CoreConfig, CoreService } from '@lytics/dev-agent-core'; export interface CliConfig { coreConfig: CoreConfig; diff --git a/packages/core/src/context/index.ts b/packages/core/src/context/index.ts index 55ae8d9..7e41a7e 100644 --- a/packages/core/src/context/index.ts +++ b/packages/core/src/context/index.ts @@ -5,7 +5,6 @@ export interface ContextProviderOptions { } export class ContextProvider { - constructor(_options: ContextProviderOptions) { // Placeholder constructor } diff --git a/packages/core/src/github/index.ts b/packages/core/src/github/index.ts index e4ef01d..b9e91b4 100644 --- a/packages/core/src/github/index.ts +++ b/packages/core/src/github/index.ts @@ -4,7 +4,6 @@ export interface GitHubOptions { } export class GitHubIntegration { - constructor(_options: GitHubOptions) { // Placeholder constructor } diff --git a/packages/subagents/src/coordinator/github-coordinator.integration.test.ts b/packages/subagents/src/coordinator/github-coordinator.integration.test.ts index f9971e6..b2d1cdb 100644 --- a/packages/subagents/src/coordinator/github-coordinator.integration.test.ts +++ b/packages/subagents/src/coordinator/github-coordinator.integration.test.ts @@ -6,40 +6,58 @@ import { mkdtemp, rm } from 'node:fs/promises'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; -import { RepositoryIndexer } from '@lytics/dev-agent-core'; -import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import type { GitHubAgentConfig } from '../github/agent'; import { GitHubAgent } from '../github/agent'; -import type { GitHubContextRequest, GitHubContextResult } from '../github/types'; +import type { GitHubContextRequest, GitHubContextResult, GitHubDocument } from '../github/types'; import { SubagentCoordinator } from './coordinator'; +// Mock GitHub utilities to avoid actual gh CLI calls +vi.mock('../github/utils/index', () => ({ + fetchAllDocuments: vi.fn(() => [ + { + type: 'issue', + number: 1, + title: 'Test Issue', + body: 'Test body', + state: 'open', + author: 'testuser', + labels: [], + createdAt: '2024-01-01T00:00:00Z', + updatedAt: '2024-01-01T00:00:00Z', + url: 'https://github.com/test/repo/issues/1', + relatedIssues: [], + relatedPRs: [], + linkedFiles: [], + mentions: [], + }, + ]), + enrichDocument: vi.fn((doc: GitHubDocument) => doc), + getCurrentRepository: vi.fn(() => 'lytics/dev-agent'), + calculateRelevance: vi.fn(() => 0.8), + matchesQuery: vi.fn(() => true), +})); + describe('Coordinator → GitHub Integration', () => { let coordinator: SubagentCoordinator; let github: GitHubAgent; let tempDir: string; - let codeIndexer: RepositoryIndexer; beforeEach(async () => { // Create temp directory tempDir = await mkdtemp(join(tmpdir(), 'gh-coordinator-test-')); - // Initialize code indexer - codeIndexer = new RepositoryIndexer({ - repositoryPath: process.cwd(), - vectorStorePath: join(tempDir, '.vectors'), - }); - await codeIndexer.initialize(); - // Create coordinator coordinator = new SubagentCoordinator({ logLevel: 'error', // Reduce noise in tests }); - // Create GitHub agent + // Create GitHub agent with vector storage config const config: GitHubAgentConfig = { repositoryPath: process.cwd(), - codeIndexer, - storagePath: join(tempDir, '.github-index'), + vectorStorePath: join(tempDir, '.github-vectors'), + statePath: join(tempDir, 'github-state.json'), + autoUpdate: false, // Disable for tests }; github = new GitHubAgent(config); @@ -49,7 +67,6 @@ describe('Coordinator → GitHub Integration', () => { afterEach(async () => { await coordinator.stop(); - await codeIndexer.close(); await rm(tempDir, { recursive: true, force: true }); }); @@ -67,7 +84,7 @@ describe('Coordinator → GitHub Integration', () => { it('should prevent duplicate registration', async () => { const duplicate = new GitHubAgent({ repositoryPath: process.cwd(), - codeIndexer, + vectorStorePath: join(tempDir, '.github-vectors-dup'), }); await expect(coordinator.registerAgent(duplicate)).rejects.toThrow('already registered'); }); @@ -101,6 +118,17 @@ describe('Coordinator → GitHub Integration', () => { }); it('should route search request to GitHub agent', async () => { + // Index first (required for search) + await coordinator.sendMessage({ + type: 'request', + sender: 'test', + recipient: 'github', + payload: { + action: 'index', + indexOptions: {}, + } as GitHubContextRequest, + }); + const response = await coordinator.sendMessage({ type: 'request', sender: 'test', diff --git a/packages/subagents/src/github/agent.ts b/packages/subagents/src/github/agent.ts index fec2ee0..39c6189 100644 --- a/packages/subagents/src/github/agent.ts +++ b/packages/subagents/src/github/agent.ts @@ -3,7 +3,6 @@ * Provides rich context from GitHub issues, PRs, and discussions */ -import type { RepositoryIndexer } from '@lytics/dev-agent-core'; import type { Agent, AgentContext, Message } from '../types'; import { GitHubIndexer } from './indexer'; import type { @@ -15,8 +14,10 @@ import type { export interface GitHubAgentConfig { repositoryPath: string; - codeIndexer: RepositoryIndexer; - storagePath?: string; + vectorStorePath: string; // Path to LanceDB storage for GitHub data + statePath?: string; // Path to state file (default: .dev-agent/github-state.json) + autoUpdate?: boolean; // Enable auto-updates (default: true) + staleThreshold?: number; // Stale threshold in ms (default: 15 minutes) } export class GitHubAgent implements Agent { @@ -35,7 +36,17 @@ export class GitHubAgent implements Agent { this.context = context; this.name = context.agentName; - this.indexer = new GitHubIndexer(this.config.codeIndexer, this.config.repositoryPath); + this.indexer = new GitHubIndexer( + { + vectorStorePath: this.config.vectorStorePath, + statePath: this.config.statePath, + autoUpdate: this.config.autoUpdate, + staleThreshold: this.config.staleThreshold, + }, + this.config.repositoryPath + ); + + await this.indexer.initialize(); context.logger.info('GitHub agent initialized', { capabilities: this.capabilities, @@ -69,10 +80,18 @@ export class GitHubAgent implements Agent { result = await this.handleSearch(request.query || '', request.searchOptions); break; case 'context': - result = await this.handleGetContext(request.issueNumber!); + if (typeof request.issueNumber !== 'number') { + result = { action: 'context', error: 'issueNumber is required' }; + } else { + result = await this.handleGetContext(request.issueNumber); + } break; case 'related': - result = await this.handleFindRelated(request.issueNumber!); + if (typeof request.issueNumber !== 'number') { + result = { action: 'related', error: 'issueNumber is required' }; + } else { + result = await this.handleFindRelated(request.issueNumber); + } break; default: result = { @@ -114,7 +133,8 @@ export class GitHubAgent implements Agent { } private async handleIndex(options?: GitHubIndexOptions): Promise { - const stats = await this.indexer!.index(options); + if (!this.indexer) throw new Error('Indexer not initialized'); + const stats = await this.indexer.index(options); return { action: 'index', stats, @@ -125,7 +145,8 @@ export class GitHubAgent implements Agent { query: string, options?: { limit?: number } ): Promise { - const results = await this.indexer!.search(query, options); + if (!this.indexer) throw new Error('Indexer not initialized'); + const results = await this.indexer.search(query, options); return { action: 'search', results, @@ -133,7 +154,8 @@ export class GitHubAgent implements Agent { } private async handleGetContext(issueNumber: number): Promise { - const context = await this.indexer!.getContext(issueNumber); + if (!this.indexer) throw new Error('Indexer not initialized'); + const context = await this.indexer.getContext(issueNumber); return { action: 'context', context: context || undefined, @@ -141,7 +163,8 @@ export class GitHubAgent implements Agent { } private async handleFindRelated(issueNumber: number): Promise { - const related = await this.indexer!.findRelated(issueNumber); + if (!this.indexer) throw new Error('Indexer not initialized'); + const related = await this.indexer.findRelated(issueNumber); return { action: 'related', related, diff --git a/packages/subagents/src/github/indexer.test.ts b/packages/subagents/src/github/indexer.test.ts new file mode 100644 index 0000000..67d5036 --- /dev/null +++ b/packages/subagents/src/github/indexer.test.ts @@ -0,0 +1,286 @@ +/** + * Tests for GitHub indexer persistence and auto-update + */ + +import * as fs from 'node:fs/promises'; +import * as path from 'node:path'; +import type { VectorStorage } from '@lytics/dev-agent-core'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { GitHubIndexer } from './indexer'; +import type { GitHubDocument } from './types'; +import * as utils from './utils/index'; + +// Mock the utilities +vi.mock('./utils/index', () => ({ + fetchAllDocuments: vi.fn(), + enrichDocument: vi.fn((doc: GitHubDocument) => doc), + getCurrentRepository: vi.fn(() => 'lytics/dev-agent'), + calculateRelevance: vi.fn(() => 0.8), + matchesQuery: vi.fn(() => true), +})); + +// Mock VectorStorage +vi.mock('@lytics/dev-agent-core', () => ({ + VectorStorage: class MockVectorStorage { + initialize = vi.fn().mockResolvedValue(undefined); + addDocuments = vi.fn().mockResolvedValue(undefined); + search = vi.fn().mockResolvedValue([]); + close = vi.fn().mockResolvedValue(undefined); + }, +})); + +describe('GitHubIndexer - Persistence', () => { + const testVectorPath = '.test-vectors/github'; + const testStatePath = '.test-state/github-state.json'; + let indexer: GitHubIndexer; + + const mockDocuments: GitHubDocument[] = [ + { + type: 'issue', + number: 1, + title: 'Test Issue', + body: 'Test body', + state: 'open', + author: 'testuser', + labels: ['bug'], + createdAt: '2024-01-01T00:00:00Z', + updatedAt: '2024-01-01T00:00:00Z', + url: 'https://github.com/lytics/dev-agent/issues/1', + relatedIssues: [], + relatedPRs: [], + linkedFiles: [], + mentions: [], + }, + { + type: 'pull_request', + number: 2, + title: 'Test PR', + body: 'Test PR body', + state: 'merged', + author: 'testuser', + labels: ['feature'], + createdAt: '2024-01-02T00:00:00Z', + updatedAt: '2024-01-02T00:00:00Z', + url: 'https://github.com/lytics/dev-agent/pull/2', + relatedIssues: [1], + relatedPRs: [], + linkedFiles: ['src/test.ts'], + mentions: [], + }, + ]; + + beforeEach(async () => { + // Create indexer + indexer = new GitHubIndexer({ + vectorStorePath: testVectorPath, + statePath: testStatePath, + autoUpdate: false, // Disable auto-update for tests + staleThreshold: 1000, // 1 second + }); + + // Mock fetchAllDocuments to return test data + vi.mocked(utils.fetchAllDocuments).mockReturnValue(mockDocuments); + + await indexer.initialize(); + }); + + afterEach(async () => { + if (indexer) { + try { + await indexer.close(); + } catch { + // Ignore close errors + } + } + + // Clean up test files + try { + await fs.rm(path.dirname(testStatePath), { recursive: true, force: true }); + await fs.rm(testVectorPath, { recursive: true, force: true }); + } catch { + // Ignore cleanup errors + } + + vi.clearAllMocks(); + }); + + describe('State Persistence', () => { + it('should save state file after indexing', async () => { + const stats = await indexer.index(); + + expect(stats.totalDocuments).toBe(2); + expect(stats.byType.issue).toBe(1); + expect(stats.byType.pull_request).toBe(1); + + // Verify state file was created + const stateContent = await fs.readFile(testStatePath, 'utf-8'); + const state = JSON.parse(stateContent); + + expect(state.version).toBe('1.0.0'); + expect(state.repository).toBe('lytics/dev-agent'); + expect(state.totalDocuments).toBe(2); + expect(state.lastIndexed).toBeDefined(); + }); + + it('should load state on initialization', async () => { + // First indexing + await indexer.index(); + + // Close and re-create indexer + await indexer.close(); + + const newIndexer = new GitHubIndexer({ + vectorStorePath: testVectorPath, + statePath: testStatePath, + autoUpdate: false, + }); + + await newIndexer.initialize(); + + // Stats should be loaded from state + const stats = newIndexer.getStats(); + expect(stats).not.toBeNull(); + expect(stats?.totalDocuments).toBe(2); + + await newIndexer.close(); + }); + + it('should indicate indexed status', async () => { + expect(indexer.isIndexed()).toBe(false); + + await indexer.index(); + + expect(indexer.isIndexed()).toBe(true); + }); + }); + + describe('Vector Storage Integration', () => { + it('should add documents to vector storage', async () => { + const vectorStorage = (indexer as unknown as { vectorStorage: VectorStorage }).vectorStorage; + + await indexer.index(); + + expect(vectorStorage.addDocuments).toHaveBeenCalledTimes(1); + expect(vectorStorage.addDocuments).toHaveBeenCalledWith( + expect.arrayContaining([ + expect.objectContaining({ + id: 'issue-1', + text: expect.stringContaining('Test Issue'), + metadata: expect.objectContaining({ + type: 'issue', + number: 1, + title: 'Test Issue', + }), + }), + expect.objectContaining({ + id: 'pull_request-2', + text: expect.stringContaining('Test PR'), + metadata: expect.objectContaining({ + type: 'pull_request', + number: 2, + }), + }), + ]) + ); + }); + + it('should use vector search for queries', async () => { + const vectorStorage = (indexer as unknown as { vectorStorage: VectorStorage }).vectorStorage; + + // Mock vector search results + vi.mocked(vectorStorage.search).mockResolvedValue([ + { + id: 'issue-1', + score: 0.9, + metadata: { + document: JSON.stringify(mockDocuments[0]), + }, + }, + ]); + + await indexer.index(); + + const results = await indexer.search('test query'); + + expect(vectorStorage.search).toHaveBeenCalledWith('test query', { + limit: 10, + }); + + expect(results).toHaveLength(1); + expect(results[0].document.number).toBe(1); + expect(results[0].score).toBe(0.9); + }); + }); + + describe('Auto-Update', () => { + it('should detect stale data', async () => { + await indexer.index(); + + const isStale = (indexer as unknown as { isStale: () => boolean }).isStale(); + expect(isStale).toBe(false); + + // Wait for data to become stale + await new Promise((resolve) => setTimeout(resolve, 1100)); + + const isStaleAfter = (indexer as unknown as { isStale: () => boolean }).isStale(); + expect(isStaleAfter).toBe(true); + }); + + it('should trigger background update on stale search', async () => { + // Create indexer with auto-update enabled + const autoIndexer = new GitHubIndexer({ + vectorStorePath: `${testVectorPath}-auto`, + statePath: testStatePath.replace('.json', '-auto.json'), + autoUpdate: true, + staleThreshold: 100, // 100ms + }); + + await autoIndexer.initialize(); + await autoIndexer.index(); + + // Wait for data to become stale + await new Promise((resolve) => setTimeout(resolve, 150)); + + const indexSpy = vi.spyOn(autoIndexer, 'index'); + + // Mock vector search + const vectorStorage = (autoIndexer as unknown as { vectorStorage: VectorStorage }) + .vectorStorage; + vi.mocked(vectorStorage.search).mockResolvedValue([]); + + // Search should trigger background update + await autoIndexer.search('test'); + + // Give background update time to start + await new Promise((resolve) => setTimeout(resolve, 10)); + + expect(indexSpy).toHaveBeenCalled(); + + await autoIndexer.close(); + }); + }); + + describe('Statistics', () => { + it('should return null stats when not indexed', () => { + const stats = indexer.getStats(); + expect(stats).toBeNull(); + }); + + it('should return accurate stats after indexing', async () => { + await indexer.index(); + + const stats = indexer.getStats(); + expect(stats).not.toBeNull(); + expect(stats?.repository).toBe('lytics/dev-agent'); + expect(stats?.totalDocuments).toBe(2); + expect(stats?.byType).toEqual({ + issue: 1, + pull_request: 1, + }); + expect(stats?.byState).toEqual({ + open: 1, + merged: 1, + }); + }); + }); +}); diff --git a/packages/subagents/src/github/indexer.ts b/packages/subagents/src/github/indexer.ts index e8f91e1..28e8182 100644 --- a/packages/subagents/src/github/indexer.ts +++ b/packages/subagents/src/github/indexer.ts @@ -3,39 +3,74 @@ * Indexes GitHub issues, PRs, and discussions for semantic search */ -import type { RepositoryIndexer } from '@lytics/dev-agent-core'; +import * as fs from 'node:fs/promises'; +import * as path from 'node:path'; +import { VectorStorage } from '@lytics/dev-agent-core'; import type { GitHubContext, GitHubDocument, + GitHubIndexerConfig, + GitHubIndexerState, GitHubIndexOptions, GitHubIndexStats, GitHubSearchOptions, GitHubSearchResult, } from './types'; -import { - calculateRelevance, - enrichDocument, - fetchAllDocuments, - getCurrentRepository, - matchesQuery, -} from './utils/index'; +import { enrichDocument, fetchAllDocuments, getCurrentRepository } from './utils/index'; + +const INDEXER_VERSION = '1.0.0'; +const DEFAULT_STATE_PATH = '.dev-agent/github-state.json'; +const DEFAULT_STALE_THRESHOLD = 15 * 60 * 1000; // 15 minutes /** * GitHub Document Indexer - * Stores GitHub documents and provides search functionality + * Stores GitHub documents and provides semantic search functionality * - * Note: Currently uses in-memory storage with text search. - * Future: Integrate with VectorStorage for semantic search. + * Uses VectorStorage for persistent semantic search and maintains state for incremental updates. */ export class GitHubIndexer { - private codeIndexer: RepositoryIndexer; + private vectorStorage: VectorStorage; private repository: string; - private documents: Map = new Map(); - private lastIndexed?: Date; + private state: GitHubIndexerState | null = null; + private readonly config: Required; + private readonly statePath: string; - constructor(codeIndexer: RepositoryIndexer, repository?: string) { - this.codeIndexer = codeIndexer; + constructor(config: GitHubIndexerConfig, repository?: string) { this.repository = repository || getCurrentRepository(); + + // Set defaults + this.config = { + autoUpdate: true, + staleThreshold: DEFAULT_STALE_THRESHOLD, + statePath: DEFAULT_STATE_PATH, + ...config, + }; + + // Resolve state path + const repoRoot = process.cwd(); // TODO: Get from git root + this.statePath = path.isAbsolute(this.config.statePath) + ? this.config.statePath + : path.join(repoRoot, this.config.statePath); + + // Initialize vector storage + this.vectorStorage = new VectorStorage({ + storePath: this.config.vectorStorePath, + }); + } + + /** + * Initialize the indexer (load state and vector storage) + */ + async initialize(): Promise { + await this.vectorStorage.initialize(); + await this.loadState(); + } + + /** + * Close the indexer and cleanup resources + */ + async close(): Promise { + await this.vectorStorage.close(); } /** @@ -53,14 +88,30 @@ export class GitHubIndexer { // Enrich with relationships const enrichedDocs = documents.map((doc) => enrichDocument(doc)); - // Store in memory - this.documents.clear(); - for (const doc of enrichedDocs) { - const key = `${doc.type}-${doc.number}`; - this.documents.set(key, doc); - } + // Convert to vector storage format + const vectorDocs = enrichedDocs.map((doc) => ({ + id: `${doc.type}-${doc.number}`, + text: `${doc.title}\n\n${doc.body}`, // Use 'text' not 'content' + metadata: { + type: doc.type, + number: doc.number, + title: doc.title, + state: doc.state, + author: doc.author, + createdAt: doc.createdAt, + updatedAt: doc.updatedAt, + url: doc.url, + labels: doc.labels, + repository: this.repository, + // Store full document as JSON + document: JSON.stringify(doc), + }, + })); - this.lastIndexed = new Date(); + // Store in vector storage + // Note: LanceDB doesn't support clearing, so we just add new documents + // Duplicates are handled by ID (overwrites existing) + await this.vectorStorage.addDocuments(vectorDocs); // Calculate stats const byType = enrichedDocs.reduce( @@ -79,12 +130,25 @@ export class GitHubIndexer { {} as Record ); + // Update state + this.state = { + version: INDEXER_VERSION, + repository: this.repository, + lastIndexed: new Date().toISOString(), + totalDocuments: enrichedDocs.length, + byType: byType as Record<'issue' | 'pull_request' | 'discussion', number>, + byState: byState as Record<'open' | 'closed' | 'merged', number>, + }; + + // Save state to disk + await this.saveState(); + return { repository: this.repository, totalDocuments: enrichedDocs.length, byType: byType as Record<'issue' | 'pull_request' | 'discussion', number>, byState: byState as Record<'open' | 'closed' | 'merged', number>, - lastIndexed: this.lastIndexed.toISOString(), + lastIndexed: this.state.lastIndexed, indexDuration: Date.now() - startTime, }; } @@ -93,25 +157,40 @@ export class GitHubIndexer { * Search GitHub documents */ async search(query: string, options: GitHubSearchOptions = {}): Promise { + // Auto-update if stale + if (this.config.autoUpdate && this.isStale()) { + // Background update (non-blocking) + this.index({ since: this.state?.lastIndexed }).catch((err) => { + console.warn('Background update failed:', err); + }); + } + + // Check if indexed + if (!this.state) { + throw new Error('GitHub data not indexed. Run "dev gh index" first.'); + } + + // Semantic search using vector storage + const vectorResults = await this.vectorStorage.search(query, { + limit: options.limit || 10, + }); + + // Convert back to GitHubSearchResult format and apply filters const results: GitHubSearchResult[] = []; - for (const doc of this.documents.values()) { - // Filter by type - if (options.type && doc.type !== options.type) continue; + for (const result of vectorResults) { + const doc = JSON.parse(result.metadata.document as string) as GitHubDocument; - // Filter by state + // Apply filters + if (options.type && doc.type !== options.type) continue; if (options.state && doc.state !== options.state) continue; + if (options.author && doc.author !== options.author) continue; - // Filter by labels if (options.labels && options.labels.length > 0) { const hasLabel = options.labels.some((label) => doc.labels.includes(label)); if (!hasLabel) continue; } - // Filter by author - if (options.author && doc.author !== options.author) continue; - - // Filter by date if (options.since) { const createdAt = new Date(doc.createdAt); const since = new Date(options.since); @@ -124,28 +203,27 @@ export class GitHubIndexer { if (createdAt > until) continue; } - // Check if matches query - if (!matchesQuery(doc, query)) continue; - - // Calculate relevance score - const score = calculateRelevance(doc, query) / 100; // Normalize to 0-1 - - // Apply score threshold - if (options.scoreThreshold && score < options.scoreThreshold) continue; + if (options.scoreThreshold && result.score < options.scoreThreshold) continue; results.push({ document: doc, - score, + score: result.score, matchedFields: ['title', 'body'], }); } - // Sort by score descending - results.sort((a, b) => b.score - a.score); + return results; + } - // Apply limit - const limit = options.limit || 10; - return results.slice(0, limit); + /** + * Check if indexed data is stale + */ + private isStale(): boolean { + if (!this.state?.lastIndexed) return true; + + const lastIndexedTime = new Date(this.state.lastIndexed).getTime(); + const now = Date.now(); + return now - lastIndexedTime > this.config.staleThreshold; } /** @@ -156,8 +234,7 @@ export class GitHubIndexer { type: 'issue' | 'pull_request' = 'issue' ): Promise { // Find the document - const key = `${type}-${number}`; - const document = this.documents.get(key); + const document = await this.getDocument(number, type); if (!document) { return null; @@ -166,7 +243,7 @@ export class GitHubIndexer { // Find related issues const relatedIssues: GitHubDocument[] = []; for (const issueNum of document.relatedIssues) { - const related = this.documents.get(`issue-${issueNum}`); + const related = await this.getDocument(issueNum, 'issue'); if (related) { relatedIssues.push(related); } @@ -175,39 +252,19 @@ export class GitHubIndexer { // Find related PRs const relatedPRs: GitHubDocument[] = []; for (const prNum of document.relatedPRs) { - const related = this.documents.get(`pull_request-${prNum}`); + const related = await this.getDocument(prNum, 'pull_request'); if (related) { relatedPRs.push(related); } } - // Find linked code files using the code indexer + // Find linked code files (skip for now - requires RepositoryIndexer integration) const linkedCodeFiles: Array<{ path: string; reason: string; score: number; }> = []; - for (const filePath of document.linkedFiles.slice(0, 10)) { - try { - const codeResults = await this.codeIndexer.search(filePath, { - limit: 1, - scoreThreshold: 0.3, - }); - - if (codeResults.length > 0) { - const metadata = codeResults[0].metadata as { path?: string }; - linkedCodeFiles.push({ - path: metadata.path || filePath, - reason: 'Mentioned in issue/PR', - score: codeResults[0].score, - }); - } - } catch { - // Ignore errors finding code files - } - } - return { document, relatedIssues, @@ -234,56 +291,88 @@ export class GitHubIndexer { /** * Get a specific document by number */ - getDocument(number: number, type: 'issue' | 'pull_request' = 'issue'): GitHubDocument | null { - const key = `${type}-${number}`; - return this.documents.get(key) || null; + async getDocument( + number: number, + type: 'issue' | 'pull_request' = 'issue' + ): Promise { + const id = `${type}-${number}`; + + try { + const results = await this.vectorStorage.search(id, { limit: 1 }); + if (results.length === 0) return null; + + return JSON.parse(results[0].metadata.document as string) as GitHubDocument; + } catch { + return null; + } } /** * Get all indexed documents */ - getAllDocuments(): GitHubDocument[] { - return Array.from(this.documents.values()); + async getAllDocuments(): Promise { + // This is expensive - avoid using if possible + // For now, return empty array and recommend using search instead + console.warn('getAllDocuments() is expensive - use search() instead'); + return []; } /** * Check if indexer has been initialized */ isIndexed(): boolean { - return this.documents.size > 0; + return this.state !== null; } /** * Get indexing statistics */ getStats(): GitHubIndexStats | null { - if (!this.lastIndexed) { + if (!this.state) { return null; } - const byType = Array.from(this.documents.values()).reduce( - (acc, doc) => { - acc[doc.type] = (acc[doc.type] || 0) + 1; - return acc; - }, - {} as Record - ); - - const byState = Array.from(this.documents.values()).reduce( - (acc, doc) => { - acc[doc.state] = (acc[doc.state] || 0) + 1; - return acc; - }, - {} as Record - ); - return { repository: this.repository, - totalDocuments: this.documents.size, - byType: byType as Record<'issue' | 'pull_request' | 'discussion', number>, - byState: byState as Record<'open' | 'closed' | 'merged', number>, - lastIndexed: this.lastIndexed.toISOString(), + totalDocuments: this.state.totalDocuments, + byType: this.state.byType, + byState: this.state.byState, + lastIndexed: this.state.lastIndexed, indexDuration: 0, }; } + + /** + * Load indexer state from disk + */ + private async loadState(): Promise { + try { + const stateContent = await fs.readFile(this.statePath, 'utf-8'); + this.state = JSON.parse(stateContent); + + // Validate version compatibility + if (this.state?.version !== INDEXER_VERSION) { + console.warn(`State version mismatch: ${this.state?.version} !== ${INDEXER_VERSION}`); + this.state = null; + } + } catch { + // State file doesn't exist or is corrupted + this.state = null; + } + } + + /** + * Save indexer state to disk + */ + private async saveState(): Promise { + if (!this.state) { + return; + } + + // Ensure directory exists + await fs.mkdir(path.dirname(this.statePath), { recursive: true }); + + // Write state + await fs.writeFile(this.statePath, JSON.stringify(this.state, null, 2), 'utf-8'); + } } diff --git a/packages/subagents/src/github/types.ts b/packages/subagents/src/github/types.ts index bc3e5fc..8e7a164 100644 --- a/packages/subagents/src/github/types.ts +++ b/packages/subagents/src/github/types.ts @@ -84,6 +84,28 @@ export interface GitHubContext { discussionSummary?: string; } +/** + * GitHub indexer configuration + */ +export interface GitHubIndexerConfig { + vectorStorePath: string; // Path to LanceDB vector storage + statePath?: string; // Path to state file (default: .dev-agent/github-state.json) + autoUpdate?: boolean; // Enable auto-updates (default: true) + staleThreshold?: number; // Stale threshold in ms (default: 15 minutes) +} + +/** + * GitHub indexer state (persisted to disk) + */ +export interface GitHubIndexerState { + version: string; // State format version + repository: string; + lastIndexed: string; // ISO date + totalDocuments: number; + byType: Record; + byState: Record; +} + /** * GitHub indexing options */