|
| 1 | +/** |
| 2 | + * Context Assembler |
| 3 | + * Assembles rich context packages for LLM consumption |
| 4 | + * |
| 5 | + * Philosophy: Provide raw, structured context - let the LLM do the reasoning |
| 6 | + */ |
| 7 | + |
| 8 | +import type { RepositoryIndexer } from '@lytics/dev-agent-core'; |
| 9 | +import type { |
| 10 | + CodebasePatterns, |
| 11 | + ContextAssemblyOptions, |
| 12 | + ContextMetadata, |
| 13 | + ContextPackage, |
| 14 | + IssueContext, |
| 15 | + RelatedHistory, |
| 16 | + RelevantCodeContext, |
| 17 | +} from '../context-types'; |
| 18 | +import type { GitHubIssue } from '../types'; |
| 19 | +import { fetchGitHubIssue } from './github'; |
| 20 | + |
| 21 | +/** Default options for context assembly */ |
| 22 | +const DEFAULT_OPTIONS: Required<ContextAssemblyOptions> = { |
| 23 | + includeCode: true, |
| 24 | + includeHistory: true, |
| 25 | + includePatterns: true, |
| 26 | + maxCodeResults: 10, |
| 27 | + maxHistoryResults: 5, |
| 28 | + tokenBudget: 4000, |
| 29 | +}; |
| 30 | + |
| 31 | +/** |
| 32 | + * Assemble a context package for a GitHub issue |
| 33 | + * |
| 34 | + * @param issueNumber - GitHub issue number |
| 35 | + * @param indexer - Repository indexer for code search |
| 36 | + * @param repositoryPath - Path to repository |
| 37 | + * @param options - Assembly options |
| 38 | + * @returns Complete context package |
| 39 | + */ |
| 40 | +export async function assembleContext( |
| 41 | + issueNumber: number, |
| 42 | + indexer: RepositoryIndexer | null, |
| 43 | + repositoryPath: string, |
| 44 | + options: ContextAssemblyOptions = {} |
| 45 | +): Promise<ContextPackage> { |
| 46 | + const opts = { ...DEFAULT_OPTIONS, ...options }; |
| 47 | + |
| 48 | + // 1. Fetch issue with comments |
| 49 | + const issue = await fetchGitHubIssue(issueNumber, repositoryPath, { includeComments: true }); |
| 50 | + const issueContext = convertToIssueContext(issue); |
| 51 | + |
| 52 | + // 2. Search for relevant code |
| 53 | + let relevantCode: RelevantCodeContext[] = []; |
| 54 | + if (opts.includeCode && indexer) { |
| 55 | + relevantCode = await findRelevantCode(issue, indexer, opts.maxCodeResults); |
| 56 | + } |
| 57 | + |
| 58 | + // 3. Detect codebase patterns |
| 59 | + let codebasePatterns: CodebasePatterns = {}; |
| 60 | + if (opts.includePatterns && indexer) { |
| 61 | + codebasePatterns = await detectCodebasePatterns(indexer); |
| 62 | + } |
| 63 | + |
| 64 | + // 4. Find related history (TODO: implement when GitHub indexer is available) |
| 65 | + const relatedHistory: RelatedHistory[] = []; |
| 66 | + // if (opts.includeHistory && githubIndexer) { |
| 67 | + // relatedHistory = await findRelatedHistory(issue, githubIndexer, opts.maxHistoryResults); |
| 68 | + // } |
| 69 | + |
| 70 | + // 5. Calculate approximate token count |
| 71 | + const tokensUsed = estimateTokens(issueContext, relevantCode, codebasePatterns, relatedHistory); |
| 72 | + |
| 73 | + // 6. Assemble metadata |
| 74 | + const metadata: ContextMetadata = { |
| 75 | + generatedAt: new Date().toISOString(), |
| 76 | + tokensUsed, |
| 77 | + codeSearchUsed: opts.includeCode && indexer !== null, |
| 78 | + historySearchUsed: opts.includeHistory && relatedHistory.length > 0, |
| 79 | + repositoryPath, |
| 80 | + }; |
| 81 | + |
| 82 | + return { |
| 83 | + issue: issueContext, |
| 84 | + relevantCode, |
| 85 | + codebasePatterns, |
| 86 | + relatedHistory, |
| 87 | + metadata, |
| 88 | + }; |
| 89 | +} |
| 90 | + |
| 91 | +/** |
| 92 | + * Convert GitHubIssue to IssueContext |
| 93 | + */ |
| 94 | +function convertToIssueContext(issue: GitHubIssue): IssueContext { |
| 95 | + return { |
| 96 | + number: issue.number, |
| 97 | + title: issue.title, |
| 98 | + body: issue.body || '', |
| 99 | + labels: issue.labels, |
| 100 | + author: issue.author || 'unknown', |
| 101 | + createdAt: issue.createdAt, |
| 102 | + updatedAt: issue.updatedAt, |
| 103 | + state: issue.state, |
| 104 | + comments: (issue.comments || []).map((c) => ({ |
| 105 | + author: c.author || 'unknown', |
| 106 | + body: c.body || '', |
| 107 | + createdAt: c.createdAt || new Date().toISOString(), |
| 108 | + })), |
| 109 | + }; |
| 110 | +} |
| 111 | + |
| 112 | +/** |
| 113 | + * Find relevant code using semantic search |
| 114 | + */ |
| 115 | +async function findRelevantCode( |
| 116 | + issue: GitHubIssue, |
| 117 | + indexer: RepositoryIndexer, |
| 118 | + maxResults: number |
| 119 | +): Promise<RelevantCodeContext[]> { |
| 120 | + // Build search query from issue title and body |
| 121 | + const searchQuery = buildSearchQuery(issue); |
| 122 | + |
| 123 | + try { |
| 124 | + const results = await indexer.search(searchQuery, { |
| 125 | + limit: maxResults, |
| 126 | + scoreThreshold: 0.5, |
| 127 | + }); |
| 128 | + |
| 129 | + return results.map((r) => ({ |
| 130 | + file: (r.metadata.path as string) || (r.metadata.file as string) || '', |
| 131 | + name: (r.metadata.name as string) || 'unknown', |
| 132 | + type: (r.metadata.type as string) || 'unknown', |
| 133 | + snippet: (r.metadata.snippet as string) || '', |
| 134 | + relevanceScore: r.score, |
| 135 | + reason: inferRelevanceReason(r.metadata, issue), |
| 136 | + })); |
| 137 | + } catch { |
| 138 | + // Return empty array if search fails |
| 139 | + return []; |
| 140 | + } |
| 141 | +} |
| 142 | + |
| 143 | +/** |
| 144 | + * Build a search query from issue content |
| 145 | + */ |
| 146 | +function buildSearchQuery(issue: GitHubIssue): string { |
| 147 | + // Combine title and first part of body for search |
| 148 | + const bodyPreview = (issue.body || '').slice(0, 500); |
| 149 | + |
| 150 | + // Extract key terms (simple heuristic) |
| 151 | + const combined = `${issue.title} ${bodyPreview}`; |
| 152 | + |
| 153 | + // Remove markdown artifacts |
| 154 | + const cleaned = combined |
| 155 | + .replace(/```[\s\S]*?```/g, '') // Remove code blocks |
| 156 | + .replace(/\[([^\]]+)\]\([^)]+\)/g, '$1') // Remove links, keep text |
| 157 | + .replace(/[#*_`]/g, '') // Remove markdown formatting |
| 158 | + .trim(); |
| 159 | + |
| 160 | + return cleaned; |
| 161 | +} |
| 162 | + |
| 163 | +/** |
| 164 | + * Infer why a code result is relevant |
| 165 | + */ |
| 166 | +function inferRelevanceReason(metadata: Record<string, unknown>, issue: GitHubIssue): string { |
| 167 | + const name = (metadata.name as string) || ''; |
| 168 | + const type = (metadata.type as string) || ''; |
| 169 | + const title = issue.title.toLowerCase(); |
| 170 | + |
| 171 | + // Simple heuristics for reason |
| 172 | + if (title.includes(name.toLowerCase())) { |
| 173 | + return `Name matches issue title`; |
| 174 | + } |
| 175 | + |
| 176 | + if (type === 'function' || type === 'method') { |
| 177 | + return `Similar function pattern`; |
| 178 | + } |
| 179 | + |
| 180 | + if (type === 'class') { |
| 181 | + return `Related class structure`; |
| 182 | + } |
| 183 | + |
| 184 | + if (type === 'interface' || type === 'type') { |
| 185 | + return `Relevant type definition`; |
| 186 | + } |
| 187 | + |
| 188 | + return `Semantic similarity`; |
| 189 | +} |
| 190 | + |
| 191 | +/** |
| 192 | + * Detect codebase patterns from indexed data |
| 193 | + */ |
| 194 | +async function detectCodebasePatterns(indexer: RepositoryIndexer): Promise<CodebasePatterns> { |
| 195 | + // Search for test files to detect test pattern |
| 196 | + let testPattern: string | undefined; |
| 197 | + let testLocation: string | undefined; |
| 198 | + |
| 199 | + try { |
| 200 | + const testResults = await indexer.search('test describe it expect', { |
| 201 | + limit: 5, |
| 202 | + scoreThreshold: 0.5, |
| 203 | + }); |
| 204 | + |
| 205 | + if (testResults.length > 0) { |
| 206 | + const testPath = (testResults[0].metadata.path as string) || ''; |
| 207 | + if (testPath.includes('.test.')) { |
| 208 | + testPattern = '*.test.ts'; |
| 209 | + } else if (testPath.includes('.spec.')) { |
| 210 | + testPattern = '*.spec.ts'; |
| 211 | + } |
| 212 | + |
| 213 | + if (testPath.includes('__tests__')) { |
| 214 | + testLocation = '__tests__/'; |
| 215 | + } else if (testPath.includes('/test/')) { |
| 216 | + testLocation = 'test/'; |
| 217 | + } |
| 218 | + } |
| 219 | + } catch { |
| 220 | + // Ignore errors in pattern detection |
| 221 | + } |
| 222 | + |
| 223 | + return { |
| 224 | + testPattern, |
| 225 | + testLocation, |
| 226 | + }; |
| 227 | +} |
| 228 | + |
| 229 | +/** |
| 230 | + * Estimate token count for context package |
| 231 | + */ |
| 232 | +function estimateTokens( |
| 233 | + issue: IssueContext, |
| 234 | + code: RelevantCodeContext[], |
| 235 | + patterns: CodebasePatterns, |
| 236 | + history: RelatedHistory[] |
| 237 | +): number { |
| 238 | + // Rough estimation: ~4 chars per token |
| 239 | + let chars = 0; |
| 240 | + |
| 241 | + // Issue content |
| 242 | + chars += issue.title.length; |
| 243 | + chars += issue.body.length; |
| 244 | + chars += issue.comments.reduce((sum, c) => sum + c.body.length, 0); |
| 245 | + |
| 246 | + // Code snippets |
| 247 | + chars += code.reduce( |
| 248 | + (sum, c) => sum + (c.snippet?.length || 0) + c.file.length + c.name.length, |
| 249 | + 0 |
| 250 | + ); |
| 251 | + |
| 252 | + // Patterns (small) |
| 253 | + chars += JSON.stringify(patterns).length; |
| 254 | + |
| 255 | + // History |
| 256 | + chars += history.reduce((sum, h) => sum + h.title.length + (h.summary?.length || 0), 0); |
| 257 | + |
| 258 | + return Math.ceil(chars / 4); |
| 259 | +} |
| 260 | + |
| 261 | +/** |
| 262 | + * Format context package for LLM consumption |
| 263 | + */ |
| 264 | +export function formatContextPackage(context: ContextPackage): string { |
| 265 | + const lines: string[] = []; |
| 266 | + |
| 267 | + // Issue section |
| 268 | + lines.push(`# Issue #${context.issue.number}: ${context.issue.title}`); |
| 269 | + lines.push(''); |
| 270 | + lines.push( |
| 271 | + `**Author:** ${context.issue.author} | **State:** ${context.issue.state} | **Labels:** ${context.issue.labels.join(', ') || 'none'}` |
| 272 | + ); |
| 273 | + lines.push(''); |
| 274 | + lines.push('## Description'); |
| 275 | + lines.push(''); |
| 276 | + lines.push(context.issue.body || '_No description provided_'); |
| 277 | + lines.push(''); |
| 278 | + |
| 279 | + // Comments |
| 280 | + if (context.issue.comments.length > 0) { |
| 281 | + lines.push('## Comments'); |
| 282 | + lines.push(''); |
| 283 | + for (const comment of context.issue.comments) { |
| 284 | + lines.push(`**${comment.author}** (${comment.createdAt}):`); |
| 285 | + lines.push(comment.body); |
| 286 | + lines.push(''); |
| 287 | + } |
| 288 | + } |
| 289 | + |
| 290 | + // Relevant code |
| 291 | + if (context.relevantCode.length > 0) { |
| 292 | + lines.push('## Relevant Code'); |
| 293 | + lines.push(''); |
| 294 | + for (const code of context.relevantCode) { |
| 295 | + lines.push(`### ${code.name} (${code.type})`); |
| 296 | + lines.push( |
| 297 | + `**File:** \`${code.file}\` | **Relevance:** ${(code.relevanceScore * 100).toFixed(0)}%` |
| 298 | + ); |
| 299 | + lines.push(`**Reason:** ${code.reason}`); |
| 300 | + lines.push(''); |
| 301 | + if (code.snippet) { |
| 302 | + lines.push('```typescript'); |
| 303 | + lines.push(code.snippet); |
| 304 | + lines.push('```'); |
| 305 | + lines.push(''); |
| 306 | + } |
| 307 | + } |
| 308 | + } |
| 309 | + |
| 310 | + // Codebase patterns |
| 311 | + if (context.codebasePatterns.testPattern || context.codebasePatterns.testLocation) { |
| 312 | + lines.push('## Codebase Patterns'); |
| 313 | + lines.push(''); |
| 314 | + if (context.codebasePatterns.testPattern) { |
| 315 | + lines.push(`- **Test naming:** ${context.codebasePatterns.testPattern}`); |
| 316 | + } |
| 317 | + if (context.codebasePatterns.testLocation) { |
| 318 | + lines.push(`- **Test location:** ${context.codebasePatterns.testLocation}`); |
| 319 | + } |
| 320 | + lines.push(''); |
| 321 | + } |
| 322 | + |
| 323 | + // Related history |
| 324 | + if (context.relatedHistory.length > 0) { |
| 325 | + lines.push('## Related History'); |
| 326 | + lines.push(''); |
| 327 | + for (const item of context.relatedHistory) { |
| 328 | + const typeLabel = item.type === 'pr' ? 'PR' : 'Issue'; |
| 329 | + lines.push(`- **${typeLabel} #${item.number}:** ${item.title} (${item.state})`); |
| 330 | + } |
| 331 | + lines.push(''); |
| 332 | + } |
| 333 | + |
| 334 | + // Metadata |
| 335 | + lines.push('---'); |
| 336 | + lines.push( |
| 337 | + `*Context assembled at ${context.metadata.generatedAt} | ~${context.metadata.tokensUsed} tokens*` |
| 338 | + ); |
| 339 | + |
| 340 | + return lines.join('\n'); |
| 341 | +} |
0 commit comments