diff --git a/apps/cli/src/main.ts b/apps/cli/src/main.ts index cc2c01c..a44a64c 100644 --- a/apps/cli/src/main.ts +++ b/apps/cli/src/main.ts @@ -5,7 +5,7 @@ import path from 'node:path'; import { parseArgs } from 'node:util'; import { fileURLToPath } from 'node:url'; -import { createApiServer, GHCrawlService } from '@ghcrawl/api-core'; +import { createApiServer, formatTriageMarkdown, GHCrawlService } from '@ghcrawl/api-core'; import { runInitWizard } from './init-wizard.js'; import { startTui } from './tui/app.js'; @@ -25,6 +25,7 @@ type CommandName = | 'cluster' | 'clusters' | 'cluster-detail' + | 'triage' | 'search' | 'neighbors' | 'tui' @@ -52,6 +53,7 @@ function usage(devMode = false): string { ' cluster [--k ] [--threshold ]', ' clusters [--min-size ] [--limit ] [--sort recent|size] [--search ] [--include-closed]', ' cluster-detail --id [--member-limit ] [--body-chars ] [--include-closed]', + ' triage [--limit ] [--min-size ] [--json]', ' search --query [--mode keyword|semantic|hybrid]', ' neighbors --number [--limit ] [--threshold ]', ' tui [owner/repo]', @@ -120,6 +122,7 @@ export function parseRepoFlags(args: string[]): { owner: string; repo: string; v 'no-sync': { type: 'boolean' }, 'no-embed': { type: 'boolean' }, 'no-cluster': { type: 'boolean' }, + json: { type: 'boolean' }, }, }); @@ -494,6 +497,21 @@ export async function run(argv: string[], stdout: NodeJS.WritableStream = proces stdout.write(`${JSON.stringify(result, null, 2)}\n`); return; } + case 'triage': { + const { owner, repo, values } = parseRepoFlags(rest); + const report = getService().generateTriageReport({ + owner, + repo, + limit: typeof values.limit === 'string' ? parsePositiveInteger('limit', values.limit) : undefined, + minSize: typeof values['min-size'] === 'string' ? parsePositiveInteger('min-size', values['min-size']) : undefined, + }); + if (values.json === true) { + stdout.write(`${JSON.stringify(report, null, 2)}\n`); + } else { + stdout.write(`${formatTriageMarkdown(report)}\n`); + } + return; + } case 'search': { const { owner, repo, values } = parseRepoFlags(rest); if (typeof values.query !== 'string') { diff --git a/packages/api-core/src/api/server.ts b/packages/api-core/src/api/server.ts index 79032c8..50112b2 100644 --- a/packages/api-core/src/api/server.ts +++ b/packages/api-core/src/api/server.ts @@ -3,6 +3,7 @@ import http from 'node:http'; import { actionRequestSchema, closeClusterRequestSchema, closeThreadRequestSchema, refreshRequestSchema } from '@ghcrawl/api-contract'; import { ZodError } from 'zod'; +import { formatTriageMarkdown } from '../report/triage.js'; import { GHCrawlService, parseRepoParams } from '../service.js'; function sendJson(res: http.ServerResponse, status: number, payload: unknown): void { @@ -167,6 +168,21 @@ export function createApiServer(service: GHCrawlService): http.Server { return; } + if (req.method === 'GET' && url.pathname === '/triage') { + const params = parseRepoParams(url); + const limit = url.searchParams.has('limit') ? Number(url.searchParams.get('limit')) : undefined; + const minSize = url.searchParams.has('min-size') ? Number(url.searchParams.get('min-size')) : undefined; + const format = url.searchParams.get('format'); + const report = service.generateTriageReport({ ...params, limit, minSize }); + if (format === 'markdown') { + res.writeHead(200, { 'Content-Type': 'text/markdown' }); + res.end(formatTriageMarkdown(report)); + } else { + sendJson(res, 200, report); + } + return; + } + if (req.method === 'POST' && url.pathname === '/actions/rerun') { const body = actionRequestSchema.parse(await readBody(req)); sendJson(res, 200, await service.rerunAction(body)); diff --git a/packages/api-core/src/index.ts b/packages/api-core/src/index.ts index 15471e0..5f7b912 100644 --- a/packages/api-core/src/index.ts +++ b/packages/api-core/src/index.ts @@ -3,4 +3,5 @@ export * from './config.js'; export * from './documents/normalize.js'; export * from './search/exact.js'; export * from './cluster/build.js'; +export * from './report/triage.js'; export * from './service.js'; diff --git a/packages/api-core/src/report/triage.test.ts b/packages/api-core/src/report/triage.test.ts new file mode 100644 index 0000000..f0350b4 --- /dev/null +++ b/packages/api-core/src/report/triage.test.ts @@ -0,0 +1,119 @@ +import assert from 'node:assert/strict'; +import test from 'node:test'; + +import type { ClusterSummaryDto, RepoStatsDto, RepositoryDto } from '@ghcrawl/api-contract'; + +import { formatTriageMarkdown, generateSuggestedActions, type TriageReport } from './triage.js'; + +function makeCluster(overrides: Partial = {}): ClusterSummaryDto { + return { + clusterId: 1, + displayTitle: 'Download stalls on large files', + isClosed: false, + closedAtLocal: null, + closeReasonLocal: null, + totalCount: 7, + issueCount: 7, + pullRequestCount: 0, + latestUpdatedAt: '2026-01-15T00:00:00Z', + representativeThreadId: 100, + representativeNumber: 42, + representativeKind: 'issue', + ...overrides, + }; +} + +const repository: RepositoryDto = { + id: 1, + owner: 'openclaw', + name: 'openclaw', + fullName: 'openclaw/openclaw', + githubRepoId: '123', + updatedAt: '2026-03-01T00:00:00Z', +}; + +const stats: RepoStatsDto = { + openIssueCount: 120, + openPullRequestCount: 14, + lastGithubReconciliationAt: '2026-03-10T00:00:00Z', + lastEmbedRefreshAt: '2026-03-11T00:00:00Z', + staleEmbedThreadCount: 9, + staleEmbedSourceCount: 9, + latestClusterRunId: 88, + latestClusterRunFinishedAt: '2026-03-12T00:00:00Z', +}; + +test('generateSuggestedActions returns close, growth, and stale actions when triggered', () => { + const now = Date.now(); + const threeDaysAgo = new Date(now - 3 * 24 * 60 * 60 * 1000).toISOString(); + const fortyDaysAgo = new Date(now - 40 * 24 * 60 * 60 * 1000).toISOString(); + + const actions = generateSuggestedActions([ + makeCluster({ clusterId: 1, totalCount: 7, issueCount: 7, pullRequestCount: 0, latestUpdatedAt: threeDaysAgo }), + makeCluster({ + clusterId: 2, + displayTitle: 'Errors spike on nightly builds', + totalCount: 8, + issueCount: 6, + pullRequestCount: 2, + latestUpdatedAt: threeDaysAgo, + }), + makeCluster({ + clusterId: 3, + displayTitle: 'Legacy auth failures', + totalCount: 4, + issueCount: 4, + pullRequestCount: 0, + latestUpdatedAt: fortyDaysAgo, + }), + ]); + + assert(actions.some((action) => action.action === 'review_duplicate_candidates' && action.clusterId === 1)); + assert(actions.some((action) => action.action === 'investigate_growth' && action.clusterId === 1)); + assert(actions.some((action) => action.action === 'investigate_growth' && action.clusterId === 2)); + assert(actions.some((action) => action.action === 'stale_cluster' && action.clusterId === 3)); +}); + +test('generateSuggestedActions returns empty for clusters below thresholds', () => { + const actions = generateSuggestedActions([ + makeCluster({ clusterId: 10, totalCount: 2, issueCount: 2, pullRequestCount: 0, latestUpdatedAt: null }), + makeCluster({ clusterId: 11, totalCount: 4, issueCount: 3, pullRequestCount: 1, latestUpdatedAt: '2026-03-01T00:00:00Z' }), + ]); + + assert.deepEqual(actions, []); +}); + +test('formatTriageMarkdown includes expected sections and content', () => { + const topClusters = [makeCluster({ clusterId: 5, displayTitle: 'High CPU in indexer' })]; + const suggestedActions = generateSuggestedActions(topClusters); + const report: TriageReport = { + repository, + generatedAt: '2026-03-18T10:00:00Z', + stats, + topClusters, + suggestedActions, + }; + + const markdown = formatTriageMarkdown(report); + + assert.match(markdown, /# Triage Report: openclaw\/openclaw/); + assert.match(markdown, /## Summary/); + assert.match(markdown, /## Top Clusters by Size/); + assert.match(markdown, /\| # \| Cluster \| Representative \| Members \| Issues \| PRs \| Last Updated \|/); + assert.match(markdown, /## Suggested Actions/); + assert.match(markdown, /High CPU in indexer/); +}); + +test('stale cluster threshold requires more than 30 days', () => { + const now = Date.now(); + const exactlyThirtyDaysAgo = new Date(now - 30 * 24 * 60 * 60 * 1000).toISOString(); + const thirtyOneDaysAgo = new Date(now - 31 * 24 * 60 * 60 * 1000).toISOString(); + + const actions = generateSuggestedActions([ + makeCluster({ clusterId: 21, totalCount: 3, issueCount: 3, latestUpdatedAt: exactlyThirtyDaysAgo }), + makeCluster({ clusterId: 22, totalCount: 3, issueCount: 3, latestUpdatedAt: thirtyOneDaysAgo }), + ]); + + assert.equal(actions.some((action) => action.action === 'stale_cluster' && action.clusterId === 21), false); + assert.equal(actions.some((action) => action.action === 'stale_cluster' && action.clusterId === 22), true); +}); diff --git a/packages/api-core/src/report/triage.ts b/packages/api-core/src/report/triage.ts new file mode 100644 index 0000000..9f55e8c --- /dev/null +++ b/packages/api-core/src/report/triage.ts @@ -0,0 +1,118 @@ +import type { ClusterSummaryDto, RepoStatsDto, RepositoryDto } from '@ghcrawl/api-contract'; + +export type TriageAction = { + action: 'review_duplicate_candidates' | 'investigate_growth' | 'stale_cluster'; + clusterId: number; + displayTitle: string; + reason: string; +}; + +export type TriageReport = { + repository: RepositoryDto; + generatedAt: string; + stats: RepoStatsDto; + topClusters: ClusterSummaryDto[]; + suggestedActions: TriageAction[]; +}; + +export function generateSuggestedActions(clusters: ClusterSummaryDto[]): TriageAction[] { + const actions: TriageAction[] = []; + const now = new Date(); + + for (const cluster of clusters) { + if (cluster.totalCount >= 5 && cluster.pullRequestCount === 0) { + actions.push({ + action: 'review_duplicate_candidates', + clusterId: cluster.clusterId, + displayTitle: cluster.displayTitle, + reason: `${cluster.totalCount} clustered reports with similar content`, + }); + } + + if (cluster.latestUpdatedAt && cluster.totalCount >= 6) { + const lastUpdated = new Date(cluster.latestUpdatedAt); + const daysSince = (now.getTime() - lastUpdated.getTime()) / (1000 * 60 * 60 * 24); + if (daysSince <= 7) { + actions.push({ + action: 'investigate_growth', + clusterId: cluster.clusterId, + displayTitle: cluster.displayTitle, + reason: `${cluster.totalCount} clustered items with activity in the last ${Math.max(0, Math.floor(daysSince))} days`, + }); + } + } + + if (cluster.latestUpdatedAt && cluster.totalCount >= 3) { + const lastUpdated = new Date(cluster.latestUpdatedAt); + const daysSince = (now.getTime() - lastUpdated.getTime()) / (1000 * 60 * 60 * 24); + if (daysSince > 30) { + actions.push({ + action: 'stale_cluster', + clusterId: cluster.clusterId, + displayTitle: cluster.displayTitle, + reason: `last updated ${Math.floor(daysSince)} days ago with ${cluster.totalCount} clustered items`, + }); + } + } + } + + return actions; +} + +export function formatTriageMarkdown(report: TriageReport): string { + const lines: string[] = []; + + lines.push(`# Triage Report: ${report.repository.fullName}`); + lines.push(''); + lines.push(`Generated: ${report.generatedAt}`); + + const lastSync = report.stats.lastGithubReconciliationAt ?? 'never'; + const lastEmbed = report.stats.lastEmbedRefreshAt ?? 'never'; + const lastCluster = report.stats.latestClusterRunFinishedAt ?? 'never'; + lines.push(`Data freshness: last sync ${lastSync}, last embed ${lastEmbed}, last cluster ${lastCluster}`); + lines.push(''); + + lines.push('## Summary'); + lines.push(''); + lines.push(`- Open issues: ${report.stats.openIssueCount} | Open PRs: ${report.stats.openPullRequestCount}`); + lines.push(`- Clusters: ${report.topClusters.length} shown`); + lines.push(`- Stale embeddings: ${report.stats.staleEmbedThreadCount} threads need re-embedding`); + lines.push(''); + + lines.push('## Top Clusters by Size'); + lines.push(''); + lines.push('| # | Cluster | Representative | Members | Issues | PRs | Last Updated |'); + lines.push('|---|---------|---------------|---------|--------|-----|-------------|'); + + for (let index = 0; index < report.topClusters.length; index += 1) { + const cluster = report.topClusters[index]; + const representative = cluster.representativeNumber + ? `#${cluster.representativeNumber} (${cluster.representativeKind ?? 'unknown'})` + : '-'; + const updatedAt = cluster.latestUpdatedAt ? cluster.latestUpdatedAt.split('T')[0] : '-'; + lines.push( + `| ${index + 1} | ${cluster.displayTitle} | ${representative} | ${cluster.totalCount} | ${cluster.issueCount} | ${cluster.pullRequestCount} | ${updatedAt} |`, + ); + } + + lines.push(''); + + if (report.suggestedActions.length > 0) { + lines.push('## Suggested Actions'); + lines.push(''); + + for (const action of report.suggestedActions) { + const prefix = + action.action === 'review_duplicate_candidates' + ? 'Review candidates in' + : action.action === 'investigate_growth' + ? 'Investigate' + : 'Stale:'; + lines.push(`- **${prefix} Cluster ${action.clusterId}** - ${action.reason}: "${action.displayTitle}"`); + } + + lines.push(''); + } + + return lines.join('\n'); +} diff --git a/packages/api-core/src/service.ts b/packages/api-core/src/service.ts index b03674f..0d9259f 100644 --- a/packages/api-core/src/service.ts +++ b/packages/api-core/src/service.ts @@ -62,6 +62,7 @@ import { openDb, type SqliteDatabase } from './db/sqlite.js'; import { buildCanonicalDocument, isBotLikeAuthor } from './documents/normalize.js'; import { makeGitHubClient, type GitHubClient } from './github/client.js'; import { OpenAiProvider, type AiProvider } from './openai/provider.js'; +import { generateSuggestedActions, type TriageReport } from './report/triage.js'; import { cosineSimilarity, normalizeEmbedding, rankNearestNeighbors } from './search/exact.js'; type RunTable = 'sync_runs' | 'summary_runs' | 'embedding_runs' | 'cluster_runs'; @@ -1473,6 +1474,37 @@ export class GHCrawlService { }); } + generateTriageReport(params: { owner: string; repo: string; limit?: number; minSize?: number }): TriageReport { + const repository = this.requireRepository(params.owner, params.repo); + const limit = params.limit ?? 20; + const minSize = params.minSize ?? 3; + + const summariesResponse = this.listClusterSummaries({ + owner: params.owner, + repo: params.repo, + minSize, + limit, + sort: 'size', + }); + + const suggestedActions = generateSuggestedActions(summariesResponse.clusters); + + return { + repository: { + id: repository.id, + owner: repository.owner, + name: repository.name, + fullName: repository.fullName, + githubRepoId: repository.githubRepoId ?? null, + updatedAt: repository.updatedAt, + }, + generatedAt: new Date().toISOString(), + stats: summariesResponse.stats, + topClusters: summariesResponse.clusters, + suggestedActions, + }; + } + getClusterDetailDump(params: { owner: string; repo: string;