diff --git a/apps/cli/src/main.ts b/apps/cli/src/main.ts index cc2c01c..e17cb43 100644 --- a/apps/cli/src/main.ts +++ b/apps/cli/src/main.ts @@ -42,8 +42,8 @@ function usage(devMode = false): string { ' init [--reconfigure]', ' doctor', ' version', - ' sync [--since ] [--limit ] [--include-comments] [--full-reconcile]', - ' refresh [--no-sync] [--no-embed] [--no-cluster]', + ' sync [--since ] [--limit ] [--include-comments] [--include-discussions] [--full-reconcile]', + ' refresh [--no-sync] [--no-embed] [--no-cluster] [--include-discussions]', ' threads [--numbers ] [--kind issue|pull_request] [--include-closed]', ' author --login [--include-closed]', ' close-thread --number ', @@ -100,6 +100,7 @@ export function parseRepoFlags(args: string[]): { owner: string; repo: string; v since: { type: 'string' }, limit: { type: 'string' }, 'include-comments': { type: 'boolean' }, + 'include-discussions': { type: 'boolean' }, 'full-reconcile': { type: 'boolean' }, 'include-closed': { type: 'boolean' }, kind: { type: 'string' }, @@ -339,6 +340,7 @@ export async function run(argv: string[], stdout: NodeJS.WritableStream = proces since: typeof values.since === 'string' ? resolveSinceValue(values.since) : undefined, limit: typeof values.limit === 'string' ? Number(values.limit) : undefined, includeComments: values['include-comments'] === true, + includeDiscussions: values['include-discussions'] === true, fullReconcile: values['full-reconcile'] === true, onProgress: writeProgress, }); @@ -353,6 +355,7 @@ export async function run(argv: string[], stdout: NodeJS.WritableStream = proces sync: values['no-sync'] === true ? false : undefined, embed: values['no-embed'] === true ? false : undefined, cluster: values['no-cluster'] === true ? false : undefined, + includeDiscussions: values['include-discussions'] === true, onProgress: writeProgress, }); stdout.write(`${JSON.stringify(result, null, 2)}\n`); diff --git a/packages/api-contract/src/contracts.ts b/packages/api-contract/src/contracts.ts index 1771e66..9c5ada6 100644 --- a/packages/api-contract/src/contracts.ts +++ b/packages/api-contract/src/contracts.ts @@ -1,6 +1,6 @@ import { z } from 'zod'; -export const threadKindSchema = z.enum(['issue', 'pull_request']); +export const threadKindSchema = z.enum(['issue', 'pull_request', 'discussion']); export type ThreadKind = z.infer; export const searchModeSchema = z.enum(['keyword', 'semantic', 'hybrid']); @@ -217,6 +217,7 @@ export const refreshRequestSchema = z.object({ sync: z.boolean().optional(), embed: z.boolean().optional(), cluster: z.boolean().optional(), + includeDiscussions: z.boolean().optional(), }); export type RefreshRequest = z.infer; diff --git a/packages/api-core/src/api/server.ts b/packages/api-core/src/api/server.ts index 79032c8..91634a2 100644 --- a/packages/api-core/src/api/server.ts +++ b/packages/api-core/src/api/server.ts @@ -42,7 +42,7 @@ export function createApiServer(service: GHCrawlService): http.Server { if (req.method === 'GET' && url.pathname === '/threads') { const params = parseRepoParams(url); const kindParam = url.searchParams.get('kind'); - const kind = kindParam === 'issue' || kindParam === 'pull_request' ? kindParam : undefined; + const kind = kindParam === 'issue' || kindParam === 'pull_request' || kindParam === 'discussion' ? kindParam : undefined; const numbersValue = url.searchParams.get('numbers'); const numbers = numbersValue && numbersValue.trim().length > 0 diff --git a/packages/api-core/src/github/client.test.ts b/packages/api-core/src/github/client.test.ts new file mode 100644 index 0000000..59684b2 --- /dev/null +++ b/packages/api-core/src/github/client.test.ts @@ -0,0 +1,65 @@ +import assert from 'node:assert/strict'; +import test from 'node:test'; + +import { mapDiscussionToRecord } from './client.js'; + +function makeDiscussionNode(overrides: Record = {}): Record { + return { + number: 42, + title: 'Discussion title', + body: 'Discussion body', + author: { login: 'alice' }, + labels: { nodes: [{ name: 'help wanted' }, { name: 'good first discussion' }] }, + createdAt: '2026-03-09T00:00:00Z', + updatedAt: '2026-03-10T00:00:00Z', + closed: false, + url: 'https://github.com/openclaw/openclaw/discussions/42', + category: { name: 'Ideas' }, + ...overrides, + }; +} + +test('mapDiscussionToRecord maps a normal discussion node correctly', () => { + const mapped = mapDiscussionToRecord(makeDiscussionNode()); + assert.equal(mapped.number, 42); + assert.equal(mapped.title, 'Discussion title'); + assert.equal(mapped.body, 'Discussion body'); + assert.deepEqual(mapped.user, { login: 'alice', type: 'User' }); + assert.equal(mapped.html_url, 'https://github.com/openclaw/openclaw/discussions/42'); + assert.equal(mapped.state, 'open'); + assert.equal(mapped.created_at, '2026-03-09T00:00:00Z'); + assert.equal(mapped.updated_at, '2026-03-10T00:00:00Z'); + assert.equal(mapped._ghcrawl_kind, 'discussion'); + assert.deepEqual(mapped.labels, [{ name: 'Ideas' }, { name: 'help wanted' }, { name: 'good first discussion' }]); +}); + +test('mapDiscussionToRecord handles null author', () => { + const mapped = mapDiscussionToRecord(makeDiscussionNode({ author: null })); + assert.deepEqual(mapped.user, { login: null, type: 'User' }); +}); + +test('mapDiscussionToRecord handles null body', () => { + const mapped = mapDiscussionToRecord(makeDiscussionNode({ body: null })); + assert.equal(mapped.body, ''); +}); + +test('mapDiscussionToRecord handles null category', () => { + const mapped = mapDiscussionToRecord(makeDiscussionNode({ category: null })); + assert.deepEqual(mapped.labels, [{ name: 'discussion' }, { name: 'help wanted' }, { name: 'good first discussion' }]); +}); + +test("mapDiscussionToRecord maps closed discussions to state 'closed'", () => { + const mapped = mapDiscussionToRecord(makeDiscussionNode({ closed: true })); + assert.equal(mapped.state, 'closed'); +}); + +test("mapDiscussionToRecord maps open discussions to state 'open'", () => { + const mapped = mapDiscussionToRecord(makeDiscussionNode({ closed: false })); + assert.equal(mapped.state, 'open'); +}); + +test('mapDiscussionToRecord includes category as first label', () => { + const mapped = mapDiscussionToRecord(makeDiscussionNode({ category: { name: 'Q&A' } })); + const labels = mapped.labels as Array<{ name: string }>; + assert.equal(labels[0]?.name, 'Q&A'); +}); diff --git a/packages/api-core/src/github/client.ts b/packages/api-core/src/github/client.ts index dc2f733..11db804 100644 --- a/packages/api-core/src/github/client.ts +++ b/packages/api-core/src/github/client.ts @@ -13,6 +13,13 @@ export type GitHubClient = { reporter?: GitHubReporter, state?: 'open' | 'closed', ) => Promise>>; + listRepositoryDiscussions?: ( + owner: string, + repo: string, + since?: string, + limit?: number, + reporter?: GitHubReporter, + ) => Promise>>; getIssue: (owner: string, repo: string, number: number, reporter?: GitHubReporter) => Promise>; getPull: (owner: string, repo: string, number: number, reporter?: GitHubReporter) => Promise>; listIssueComments: (owner: string, repo: string, number: number, reporter?: GitHubReporter) => Promise>>; @@ -48,6 +55,19 @@ type OctokitPage = { data: T[]; }; +type DiscussionNode = { + number: number; + title: string; + body: string | null; + author: { login: string } | null; + labels: { nodes: Array<{ name: string }> }; + createdAt: string; + updatedAt: string; + closed: boolean; + url: string; + category: { name: string } | null; +}; + function delay(ms: number): Promise { return new Promise((resolve) => setTimeout(resolve, ms)); } @@ -71,6 +91,38 @@ function formatResetTime(resetSeconds: string | null | undefined): string | null return new Date(value * 1000).toISOString(); } +function isDiscussionFeatureDisabledError(error: unknown): boolean { + const message = error instanceof Error ? error.message : String(error); + return /discussion/i.test(message) && /disabled|not enabled|enable/i.test(message); +} + +export function mapDiscussionToRecord(node: Record): Record { + const labelNodes = ((node.labels as { nodes?: unknown } | undefined)?.nodes ?? []) as unknown[]; + const mappedLabels = labelNodes + .map((label) => (label && typeof label === 'object' && typeof (label as { name?: unknown }).name === 'string' ? { name: String((label as { name: unknown }).name) } : null)) + .filter((label): label is { name: string } => label !== null); + const categoryName = + node.category && typeof node.category === 'object' && typeof (node.category as { name?: unknown }).name === 'string' + ? String((node.category as { name: unknown }).name) + : 'discussion'; + const authorLogin = + node.author && typeof node.author === 'object' && typeof (node.author as { login?: unknown }).login === 'string' + ? String((node.author as { login: unknown }).login) + : null; + return { + number: Number(node.number), + title: String(node.title ?? ''), + body: typeof node.body === 'string' ? node.body : '', + user: { login: authorLogin, type: 'User' }, + html_url: String(node.url ?? ''), + state: node.closed ? 'closed' : 'open', + labels: [{ name: categoryName }, ...mappedLabels], + created_at: typeof node.createdAt === 'string' ? node.createdAt : null, + updated_at: typeof node.updatedAt === 'string' ? node.updatedAt : null, + _ghcrawl_kind: 'discussion', + }; +} + export function makeGitHubClient(options: RequestOptions): GitHubClient { const userAgent = options.userAgent ?? 'ghcrawl'; const timeoutMs = options.timeoutMs ?? 30_000; @@ -184,6 +236,88 @@ export function makeGitHubClient(options: RequestOptions): GitHubClient { }) as AsyncIterable>>, ); }, + async listRepositoryDiscussions(owner, repo, since, limit, reporter) { + reporter?.(`[github] request GRAPHQL repository discussions for ${owner}/${repo}`); + const octokit = createOctokit(reporter); + const out: Array> = []; + const sinceMs = since ? Date.parse(since) : Number.NEGATIVE_INFINITY; + let hasNextPage = true; + let cursor: string | null = null; + let pageIndex = 0; + try { + while (hasNextPage) { + const response = (await octokit.graphql( + `query($owner: String!, $name: String!, $cursor: String) { + repository(owner: $owner, name: $name) { + discussions(first: 100, after: $cursor, orderBy: {field: UPDATED_AT, direction: DESC}) { + pageInfo { + hasNextPage + endCursor + } + nodes { + number + title + body + author { login } + labels(first: 10) { nodes { name } } + createdAt + updatedAt + closed + url + category { name } + } + } + } + }`, + { owner, name: repo, cursor }, + )) as { + repository: { + discussions: { + pageInfo: { hasNextPage: boolean; endCursor: string | null }; + nodes: DiscussionNode[]; + }; + } | null; + }; + pageIndex += 1; + const discussions = response.repository?.discussions; + if (!discussions) { + return out; + } + let stopForSince = false; + for (const node of discussions.nodes) { + const updatedAtMs = Date.parse(node.updatedAt); + if (Number.isFinite(sinceMs) && Number.isFinite(updatedAtMs) && updatedAtMs < sinceMs) { + stopForSince = true; + break; + } + out.push(mapDiscussionToRecord(node as unknown as Record)); + if (typeof limit === 'number' && out.length >= limit) { + break; + } + } + + reporter?.(`[github] page ${pageIndex} fetched discussions=${discussions.nodes.length} accumulated=${out.length}`); + if ((typeof limit === 'number' && out.length >= limit) || stopForSince) { + break; + } + hasNextPage = discussions.pageInfo.hasNextPage; + cursor = discussions.pageInfo.endCursor; + if (hasNextPage) { + await delay(pageDelayMs); + } + } + return out; + } catch (error) { + if (isDiscussionFeatureDisabledError(error)) { + const message = error instanceof Error ? error.message : String(error); + reporter?.(`[github] warning discussions unavailable for ${owner}/${repo}; skipping: ${message}`); + return []; + } + const message = error instanceof Error ? error.message : String(error); + const status = typeof (error as { status?: unknown })?.status === 'number' ? Number((error as { status?: unknown }).status) : undefined; + throw new GitHubRequestError(`GitHub request failed for GRAPHQL discussions ${owner}/${repo}: ${message}`, status); + } + }, async getIssue(owner, repo, number, reporter) { return request(`GET /repos/${owner}/${repo}/issues/${number}`, reporter, async (octokit) => { const response = await octokit.rest.issues.get({ owner, repo, issue_number: number }); diff --git a/packages/api-core/src/service.ts b/packages/api-core/src/service.ts index b03674f..c96947b 100644 --- a/packages/api-core/src/service.ts +++ b/packages/api-core/src/service.ts @@ -70,7 +70,7 @@ type ThreadRow = { id: number; repo_id: number; number: number; - kind: 'issue' | 'pull_request'; + kind: 'issue' | 'pull_request' | 'discussion'; state: string; closed_at_gh: string | null; closed_at_local: string | null; @@ -150,6 +150,7 @@ type SyncRunStats = { effectiveSince: string | null; limit: number | null; includeComments: boolean; + includeDiscussions?: boolean; fullReconcile?: boolean; isFullOpenScan: boolean; isOverlappingOpenScan: boolean; @@ -182,14 +183,14 @@ export type TuiClusterSummary = { latestUpdatedAt: string | null; representativeThreadId: number | null; representativeNumber: number | null; - representativeKind: 'issue' | 'pull_request' | null; + representativeKind: 'issue' | 'pull_request' | 'discussion' | null; searchText: string; }; export type TuiClusterMember = { id: number; number: number; - kind: 'issue' | 'pull_request'; + kind: 'issue' | 'pull_request' | 'discussion'; isClosed: boolean; title: string; updatedAtGh: string | null; @@ -210,7 +211,7 @@ export type TuiClusterDetail = { latestUpdatedAt: string | null; representativeThreadId: number | null; representativeNumber: number | null; - representativeKind: 'issue' | 'pull_request' | null; + representativeKind: 'issue' | 'pull_request' | 'discussion' | null; members: TuiClusterMember[]; }; @@ -251,6 +252,7 @@ type SyncOptions = { since?: string; limit?: number; includeComments?: boolean; + includeDiscussions?: boolean; fullReconcile?: boolean; onProgress?: (message: string) => void; startedAt?: string; @@ -317,6 +319,7 @@ function parseSyncRunStats(statsJson: string | null): SyncRunStats | null { effectiveSince: typeof parsed.effectiveSince === 'string' ? parsed.effectiveSince : null, limit: typeof parsed.limit === 'number' ? parsed.limit : null, includeComments: parsed.includeComments === true, + includeDiscussions: parsed.includeDiscussions === true, isFullOpenScan: parsed.isFullOpenScan === true, isOverlappingOpenScan: parsed.isOverlappingOpenScan === true, overlapReferenceAt: typeof parsed.overlapReferenceAt === 'string' ? parsed.overlapReferenceAt : null, @@ -410,7 +413,7 @@ function threadToDto(row: ThreadRow, clusterId?: number | null): ThreadDto { id: row.id, repoId: row.repo_id, number: row.number, - kind: row.kind, + kind: row.kind as ThreadDto['kind'], state: row.state, isClosed: isEffectivelyClosed(row), closedAtGh: row.closed_at_gh ?? null, @@ -523,7 +526,7 @@ export class GHCrawlService { return repositoriesResponseSchema.parse({ repositories: rows.map(repositoryToDto) }); } - listThreads(params: { owner: string; repo: string; kind?: 'issue' | 'pull_request'; numbers?: number[]; includeClosed?: boolean }): ThreadsResponse { + listThreads(params: { owner: string; repo: string; kind?: 'issue' | 'pull_request' | 'discussion'; numbers?: number[]; includeClosed?: boolean }): ThreadsResponse { const repository = this.requireRepository(params.owner, params.repo); const clusterIds = new Map(); const clusterRows = this.db @@ -755,6 +758,7 @@ export class GHCrawlService { ): Promise { const crawlStartedAt = params.startedAt ?? nowIso(); const includeComments = params.includeComments ?? false; + const includeDiscussions = params.includeDiscussions ?? false; const github = this.requireGithub(); params.onProgress?.(`[sync] fetching repository metadata for ${params.owner}/${params.repo}`); const reporter = params.onProgress ? (message: string) => params.onProgress?.(message.replace(/^\[github\]/, '[sync/github]')) : undefined; @@ -780,6 +784,9 @@ export class GHCrawlService { ? '[sync] comment hydration enabled; fetching issue comments, reviews, and review comments' : '[sync] metadata-only mode; skipping comment, review, and review-comment fetches', ); + if (includeDiscussions) { + params.onProgress?.('[sync] discussion sync enabled; crawling GitHub Discussions metadata'); + } if (isFullOpenScan) { params.onProgress?.('[sync] full open scan; no prior completed overlap/full cursor was found for this repository'); } else if (params.since === undefined && effectiveSince && overlapReferenceAt) { @@ -819,6 +826,35 @@ export class GHCrawlService { } } + if (includeDiscussions && github.listRepositoryDiscussions) { + const discussions = await github.listRepositoryDiscussions( + params.owner, + params.repo, + effectiveSince, + params.limit, + reporter, + ); + params.onProgress?.(`[sync] discovered ${discussions.length} discussions to process`); + for (const [index, discussion] of discussions.entries()) { + if (index > 0 && index % SYNC_BATCH_SIZE === 0) { + params.onProgress?.(`[sync] discussion batch boundary reached at ${index} threads; sleeping 5s before continuing`); + await new Promise((resolve) => setTimeout(resolve, SYNC_BATCH_DELAY_MS)); + } + const number = Number(discussion.number); + params.onProgress?.(`[sync] discussion ${index + 1}/${discussions.length} #${number}`); + try { + const threadId = this.upsertThread(repoId, 'discussion', discussion, crawlStartedAt); + this.refreshDocument(threadId); + threadsSynced += 1; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + throw new Error(`sync failed while processing discussion #${number}: ${message}`); + } + } + } else if (includeDiscussions) { + params.onProgress?.('[sync] discussion sync requested but GitHub client does not support discussions; skipping'); + } + const shouldSweepClosedOverlap = params.limit === undefined && effectiveSince !== undefined; if (!shouldSweepClosedOverlap) { params.onProgress?.('[sync] skipping closed overlap sweep because this scan has no overlap window'); @@ -876,6 +912,7 @@ export class GHCrawlService { effectiveSince: effectiveSince ?? null, limit: params.limit ?? null, includeComments, + includeDiscussions, fullReconcile: params.fullReconcile ?? false, isFullOpenScan, isOverlappingOpenScan, @@ -1274,7 +1311,7 @@ export class GHCrawlService { targetBySource.set(row.source_kind, row.embedding); } - const aggregated = new Map(); + const aggregated = new Map(); for (const row of rows) { if (row.id === targetRow.id) continue; const targetEmbedding = targetBySource.get(row.source_kind); @@ -1384,6 +1421,7 @@ export class GHCrawlService { sync?: boolean; embed?: boolean; cluster?: boolean; + includeDiscussions?: boolean; onProgress?: (message: string) => void; }): Promise { const selected = { @@ -1406,6 +1444,7 @@ export class GHCrawlService { sync = await this.syncRepository({ owner: params.owner, repo: params.repo, + includeDiscussions: params.includeDiscussions, onProgress: params.onProgress, }); } @@ -1850,7 +1889,7 @@ export class GHCrawlService { where repo_id = ? and state = 'open' and closed_at_local is null group by kind`, ) - .all(repoId) as Array<{ kind: 'issue' | 'pull_request'; count: number }>; + .all(repoId) as Array<{ kind: 'issue' | 'pull_request' | 'discussion'; count: number }>; const latestRun = this.getLatestClusterRun(repoId); const latestSync = (this.db .prepare("select finished_at from sync_runs where repo_id = ? and status = 'completed' order by id desc limit 1") @@ -2005,7 +2044,7 @@ export class GHCrawlService { close_reason_local: string | null; representative_thread_id: number | null; representative_number: number | null; - representative_kind: 'issue' | 'pull_request' | null; + representative_kind: 'issue' | 'pull_request' | 'discussion' | null; representative_title: string | null; latest_updated_at: string | null; issue_count: number; @@ -2071,7 +2110,7 @@ export class GHCrawlService { close_reason_local: string | null; representative_thread_id: number | null; representative_number: number | null; - representative_kind: 'issue' | 'pull_request' | null; + representative_kind: 'issue' | 'pull_request' | 'discussion' | null; representative_title: string | null; latest_updated_at: string | null; issue_count: number; @@ -2212,7 +2251,7 @@ export class GHCrawlService { private upsertThread( repoId: number, - kind: 'issue' | 'pull_request', + kind: 'issue' | 'pull_request' | 'discussion', payload: Record, pulledAt: string, ): number { @@ -2294,6 +2333,7 @@ export class GHCrawlService { where repo_id = ? and state = 'open' and closed_at_local is null + and kind in ('issue', 'pull_request') and (last_pulled_at is null or last_pulled_at < ?) order by number asc`, ) @@ -2377,6 +2417,7 @@ export class GHCrawlService { where repo_id = ? and state = 'open' and closed_at_local is null + and kind in ('issue', 'pull_request') and (last_pulled_at is null or last_pulled_at < ?) order by number asc`, )