Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions apps/cli/src/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ type CommandName =
| 'purge-comments'
| 'embed'
| 'cluster'
| 'diff'
| 'clusters'
| 'cluster-detail'
| 'search'
Expand Down Expand Up @@ -50,6 +51,7 @@ function usage(devMode = false): string {
' close-cluster <owner/repo> --id <cluster-id>',
' embed <owner/repo> [--number <thread>]',
' cluster <owner/repo> [--k <count>] [--threshold <score>]',
' diff <owner/repo>',
' clusters <owner/repo> [--min-size <count>] [--limit <count>] [--sort recent|size] [--search <text>] [--include-closed]',
' cluster-detail <owner/repo> --id <cluster-id> [--member-limit <count>] [--body-chars <count>] [--include-closed]',
' search <owner/repo> --query <text> [--mode keyword|semantic|hybrid]',
Expand Down Expand Up @@ -457,6 +459,12 @@ export async function run(argv: string[], stdout: NodeJS.WritableStream = proces
stdout.write(`${JSON.stringify(result, null, 2)}\n`);
return;
}
case 'diff': {
const { owner, repo } = parseRepoFlags(rest);
const result = getService().diffClusters({ owner, repo });
stdout.write(`${JSON.stringify(result, null, 2)}\n`);
return;
}
case 'clusters': {
const { owner, repo, values } = parseRepoFlags(rest);
const sort = values.sort === 'recent' || values.sort === 'size' ? values.sort : undefined;
Expand Down
72 changes: 70 additions & 2 deletions apps/cli/src/tui/app.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import { fileURLToPath } from 'node:url';
import blessed from 'neo-blessed';

import type {
ClusterDiffResponse,
GHCrawlService,
TuiClusterDetail,
TuiClusterSortMode,
Expand Down Expand Up @@ -175,6 +176,7 @@ export async function startTui(params: StartTuiParams): Promise<void> {
let activeJob: BackgroundRefreshJob | null = null;
let modalOpen = false;
let exitRequested = false;
let diffOverlay: ClusterDiffResponse | null = null;

const clearCaches = (): void => {
clusterDetailCache.clear();
Expand Down Expand Up @@ -390,7 +392,7 @@ export async function startTui(params: StartTuiParams): Promise<void> {
widgets.members.select(memberIndex);
}

widgets.detail.setContent(renderDetailPane(threadDetail, clusterDetail, focusPane));
widgets.detail.setContent(diffOverlay ? renderDiffPane(diffOverlay) : renderDetailPane(threadDetail, clusterDetail, focusPane));
updatePaneStyles(widgets, focusPane);
const activeJobs = [syncJobRunning ? 'sync' : null, embedJobRunning ? 'embed' : null, clusterJobRunning ? 'cluster' : null]
.filter(Boolean)
Expand All @@ -401,7 +403,7 @@ export async function startTui(params: StartTuiParams): Promise<void> {
footerLines.unshift('');
}
footerLines.push(
`${status} | jobs:${activeJobs} | h/? help # jump g update p repos u author / filter s sort f min l layout x closed`,
`${status} | jobs:${activeJobs} | h/? help # jump d diff g update p repos u author / filter s sort f min l layout x closed`,
);
footerLines.push(
`Tab focus arrows move-or-scroll PgUp/PgDn page r refresh o open q quit`,
Expand Down Expand Up @@ -1123,6 +1125,23 @@ export async function startTui(params: StartTuiParams): Promise<void> {
if (modalOpen) return;
promptAuthorThreads();
});
widgets.screen.key(['d'], () => {
if (modalOpen) return;
if (diffOverlay) {
diffOverlay = null;
render();
return;
}
if (!currentRepository.owner || !currentRepository.repo) return;
try {
diffOverlay = params.service.diffClusters({ owner: currentRepository.owner, repo: currentRepository.repo });
status = 'Showing cluster diff';
} catch {
diffOverlay = null;
status = 'No diff data (run cluster twice to generate transitions)';
}
render();
});
widgets.screen.on('resize', () => render());

widgets.screen.on('destroy', () => {
Expand Down Expand Up @@ -1215,6 +1234,55 @@ function updatePaneStyles(widgets: Widgets, focus: TuiFocusPane): void {
widgets.detail.style.border = { fg: focus === 'detail' ? 'white' : '#fde74c' };
}

export function renderDiffPane(diff: ClusterDiffResponse): string {
const s = diff.summary;
const total = s.continuing + s.growing + s.shrinking + s.splitting + s.merging + s.forming + s.dissolving;
const lines: string[] = [
'{bold}Cluster Diff{/bold} (press d to close)',
`Run ${diff.fromRunId} -> ${diff.toRunId} | ${total} transitions`,
'',
'{bold}Summary{/bold}',
` {green-fg}continuing{/green-fg} ${s.continuing}`,
` {green-fg}growing{/green-fg} ${s.growing}`,
` {yellow-fg}shrinking{/yellow-fg} ${s.shrinking}`,
` {yellow-fg}splitting{/yellow-fg} ${s.splitting}`,
` {cyan-fg}merging{/cyan-fg} ${s.merging}`,
` {blue-fg}forming{/blue-fg} ${s.forming}`,
` {red-fg}dissolving{/red-fg} ${s.dissolving}`,
'',
'{bold}Transitions{/bold}',
'',
];

const colorTag = (t: string): string => {
switch (t) {
case 'continuing': case 'growing': return 'green-fg';
case 'shrinking': case 'splitting': return 'yellow-fg';
case 'merging': return 'cyan-fg';
case 'forming': return 'blue-fg';
case 'dissolving': return 'red-fg';
default: return 'white-fg';
}
};

for (const t of diff.transitions) {
const from = t.fromClusterId !== null ? `#${t.fromClusterId}` : '(new)';
const to = t.toClusterId !== null ? `#${t.toClusterId}` : '(gone)';
const jaccard = t.jaccardScore !== null ? `J=${(t.jaccardScore * 100).toFixed(0)}%` : '';
const delta = t.membersAdded > 0 || t.membersRemoved > 0
? ` +${t.membersAdded}/-${t.membersRemoved} (${t.membersRetained} kept)`
: ` (${t.membersRetained} members)`;
const tag = colorTag(t.transition);
lines.push(` ${from} -> ${to} {${tag}}${t.transition}{/${tag}} ${jaccard}${delta}`);
}

if (diff.transitions.length === 0) {
lines.push(' No transitions recorded. Run `ghcrawl cluster` twice to generate diff data.');
}

return lines.join('\n');
}

export function renderDetailPane(
threadDetail: TuiThreadDetail | null,
clusterDetail: TuiClusterDetail | null,
Expand Down
Binary file added docs/screenshots/ghcrawl-clusters-real.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/screenshots/ghcrawl-diff-bug.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/screenshots/ghcrawl-diff-view.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/screenshots/ghcrawl-perf-real.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/screenshots/ghcrawl-perf-tests.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
38 changes: 38 additions & 0 deletions packages/api-contract/src/contracts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,44 @@ export const clusterResultSchema = z.object({
});
export type ClusterResultDto = z.infer<typeof clusterResultSchema>;

export const transitionTypeSchema = z.enum(['continuing', 'growing', 'shrinking', 'splitting', 'merging', 'forming', 'dissolving']);
export type TransitionType = z.infer<typeof transitionTypeSchema>;

export const clusterTransitionSchema = z.object({
fromClusterId: z.number().int().nullable(),
toClusterId: z.number().int().nullable(),
transition: transitionTypeSchema,
jaccardScore: z.number().nullable(),
membersAdded: z.number().int().nonnegative(),
membersRemoved: z.number().int().nonnegative(),
membersRetained: z.number().int().nonnegative(),
});
export type ClusterTransitionDto = z.infer<typeof clusterTransitionSchema>;

export const clusterDiffResponseSchema = z.object({
repository: repositorySchema,
fromRunId: z.number().int().positive(),
toRunId: z.number().int().positive(),
transitions: z.array(clusterTransitionSchema),
summary: z.object({
continuing: z.number().int().nonnegative(),
growing: z.number().int().nonnegative(),
shrinking: z.number().int().nonnegative(),
splitting: z.number().int().nonnegative(),
merging: z.number().int().nonnegative(),
forming: z.number().int().nonnegative(),
dissolving: z.number().int().nonnegative(),
}),
});
export type ClusterDiffResponse = z.infer<typeof clusterDiffResponseSchema>;

export const diffResultSchema = z.object({
fromRunId: z.number().int().positive(),
toRunId: z.number().int().positive(),
transitionCount: z.number().int().nonnegative(),
});
export type DiffResultDto = z.infer<typeof diffResultSchema>;

export const refreshRequestSchema = z.object({
owner: z.string(),
repo: z.string(),
Expand Down
6 changes: 6 additions & 0 deletions packages/api-core/src/api/server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,12 @@ export function createApiServer(service: GHCrawlService): http.Server {
return;
}

if (req.method === 'GET' && url.pathname === '/diff') {
const params = parseRepoParams(url);
sendJson(res, 200, service.diffClusters(params));
return;
}

if (req.method === 'GET' && url.pathname === '/cluster-summaries') {
const params = parseRepoParams(url);
const sortParam = url.searchParams.get('sort');
Expand Down
123 changes: 123 additions & 0 deletions packages/api-core/src/cluster/lineage-perf.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
import assert from 'node:assert/strict';
import { performance } from 'node:perf_hooks';
import test from 'node:test';

import { computeClusterTransitions, type ClusterSnapshot } from './lineage.js';

/**
* Generate synthetic cluster data at a given scale.
*
* Creates `clusterCount` clusters each with `membersPerCluster` members.
* The "new" run shares ~80% of members with the old run (simulating realistic
* churn between consecutive clustering runs).
*/
function generateScenario(clusterCount: number, membersPerCluster: number): {
oldClusters: ClusterSnapshot[];
newClusters: ClusterSnapshot[];
totalMembers: number;
} {
const oldClusters: ClusterSnapshot[] = [];
const newClusters: ClusterSnapshot[] = [];
let nextMemberId = 1;

for (let i = 0; i < clusterCount; i++) {
const oldMembers = new Set<number>();
const newMembers = new Set<number>();

// 80% overlap: shared members
const sharedCount = Math.floor(membersPerCluster * 0.8);
for (let j = 0; j < sharedCount; j++) {
const id = nextMemberId++;
oldMembers.add(id);
newMembers.add(id);
}

// 20% churn: old-only and new-only members
const churnCount = membersPerCluster - sharedCount;
for (let j = 0; j < churnCount; j++) {
oldMembers.add(nextMemberId++);
}
for (let j = 0; j < churnCount; j++) {
newMembers.add(nextMemberId++);
}

oldClusters.push({ clusterId: i + 1, members: oldMembers });
newClusters.push({ clusterId: clusterCount + i + 1, members: newMembers });
}

return { oldClusters, newClusters, totalMembers: nextMemberId - 1 };
}

function median(values: number[]): number {
const sorted = [...values].sort((a, b) => a - b);
const mid = Math.floor(sorted.length / 2);
return sorted.length % 2 === 0 ? (sorted[mid - 1] + sorted[mid]) / 2 : sorted[mid];
}

const SCALES = [
{ clusters: 100, membersPerCluster: 8, label: '100 clusters (800 members)' },
{ clusters: 500, membersPerCluster: 8, label: '500 clusters (4,000 members)' },
{ clusters: 1000, membersPerCluster: 10, label: '1,000 clusters (10,000 members)' },
{ clusters: 2000, membersPerCluster: 10, label: '2,000 clusters (20,000 members)' },
];

const WARMUP_RUNS = 2;
const BENCH_RUNS = 5;

test('lineage performance at multiple scales', () => {
const results: Array<{ label: string; medianMs: number; transitionCount: number }> = [];

for (const scale of SCALES) {
const { oldClusters, newClusters, totalMembers } = generateScenario(
scale.clusters,
scale.membersPerCluster,
);

// Warmup
for (let i = 0; i < WARMUP_RUNS; i++) {
computeClusterTransitions(oldClusters, newClusters);
}

// Bench
const durations: number[] = [];
let lastResult: ReturnType<typeof computeClusterTransitions> | null = null;
for (let i = 0; i < BENCH_RUNS; i++) {
const start = performance.now();
lastResult = computeClusterTransitions(oldClusters, newClusters);
durations.push(performance.now() - start);
}

const med = median(durations);
results.push({
label: scale.label,
medianMs: med,
transitionCount: lastResult?.length ?? 0,
});
}

// Print results table
console.log('\n=== Lineage Performance Benchmark ===\n');
console.log('Scale | Median | Transitions');
console.log('-----------------------------------|------------|------------');
for (const r of results) {
const label = r.label.padEnd(35);
const ms = `${r.medianMs.toFixed(1)} ms`.padStart(10);
console.log(`${label}| ${ms} | ${r.transitionCount}`);
}
console.log('');

// ghcrawl/ghcrawl has ~17k issues. With typical cluster sizes of 8-15,
// that's roughly 1,100-2,100 clusters. Assert sub-second at 2,000 clusters.
const largest = results[results.length - 1];
assert.ok(
largest.medianMs < 1000,
`Expected <1s at ${largest.label}, got ${largest.medianMs.toFixed(1)}ms`,
);

// Assert sub-100ms at 500 clusters (the stated comfortable range)
const mid = results[1];
assert.ok(
mid.medianMs < 100,
`Expected <100ms at ${mid.label}, got ${mid.medianMs.toFixed(1)}ms`,
);
});
Loading
Loading