diff --git a/README.md b/README.md index 19eb8c7..adb181f 100644 --- a/README.md +++ b/README.md @@ -166,6 +166,8 @@ These commands are intended more for scripts, bots, and agent integrations than ```bash ghcrawl threads owner/repo --numbers 42,43,44 ghcrawl threads owner/repo --numbers 42,43,44 --include-closed +ghcrawl pr-template owner/repo --template-file ./pull_request_template.md +ghcrawl pr-template owner/repo --max-distance 200 ghcrawl author owner/repo --login lqquan ghcrawl close-thread owner/repo --number 42 ghcrawl close-cluster owner/repo --id 123 @@ -178,6 +180,8 @@ ghcrawl search owner/repo --query "download stalls" Use `threads --numbers ...` when you want several specific issue or PR records in one CLI call instead of paying process startup overhead repeatedly. +Use `pr-template` when you want to flag pull requests that still contain the repository PR template verbatim or are only a small edit-distance away from it. Pass `--template-file` to force a local template snapshot, or omit it to let `ghcrawl` probe common GitHub PR-template paths for the target repo. When the body contains the `## Summary` ... `## Risks and Mitigations` template block, `levenshteinDistance` is computed on that extracted section and `fullBodyLevenshteinDistance` keeps the broader whole-description score. + Use `author --login ...` when you want all currently open issue/PR records from one user plus the strongest stored same-author similarity match for each item. By default, JSON list commands filter out locally closed issues/PRs and completely closed clusters. Use `--include-closed` when you need to inspect those records too. @@ -220,6 +224,7 @@ The skill is built around the stable JSON CLI surface and is intentionally conse ghcrawl doctor --json ghcrawl refresh owner/repo ghcrawl threads owner/repo --numbers 42,43,44 +ghcrawl pr-template owner/repo --max-distance 200 ghcrawl clusters owner/repo --min-size 10 --limit 20 --sort recent ghcrawl cluster-detail owner/repo --id 123 --member-limit 20 --body-chars 280 ``` diff --git a/apps/cli/README.md b/apps/cli/README.md index 4e3f6ce..5fd1bc2 100644 --- a/apps/cli/README.md +++ b/apps/cli/README.md @@ -151,6 +151,8 @@ These commands are intended more for scripts, bots, and agent integrations than ```bash ghcrawl threads owner/repo --numbers 42,43,44 +ghcrawl pr-template owner/repo --template-file ./pull_request_template.md +ghcrawl pr-template owner/repo --max-distance 200 ghcrawl author owner/repo --login lqquan ghcrawl cluster owner/repo ghcrawl clusters owner/repo --min-size 10 --limit 20 @@ -160,6 +162,8 @@ ghcrawl search owner/repo --query "download stalls" Use `threads --numbers ...` when you want several specific issue or PR records in one CLI call instead of paying process startup overhead repeatedly. +Use `pr-template` when you want to flag pull requests that still contain the repository PR template verbatim or are only a small edit-distance away from it. Pass `--template-file` to force a local template snapshot, or omit it to let `ghcrawl` probe common GitHub PR-template paths for the target repo. When the body contains the `## Summary` ... `## Risks and Mitigations` template block, `levenshteinDistance` is computed on that extracted section and `fullBodyLevenshteinDistance` keeps the broader whole-description score. + Use `author --login ...` when you want all currently open issue/PR records from one user plus the strongest stored same-author similarity match for each item. ## Cost To Operate @@ -195,6 +199,7 @@ The skill is built around the stable JSON CLI surface and is intentionally conse ghcrawl doctor --json ghcrawl refresh owner/repo ghcrawl threads owner/repo --numbers 42,43,44 +ghcrawl pr-template owner/repo --max-distance 200 ghcrawl clusters owner/repo --min-size 10 --limit 20 --sort recent ghcrawl cluster-detail owner/repo --id 123 --member-limit 20 --body-chars 280 ``` diff --git a/apps/cli/src/main.test.ts b/apps/cli/src/main.test.ts index a0c99ce..2ef9a6d 100644 --- a/apps/cli/src/main.test.ts +++ b/apps/cli/src/main.test.ts @@ -20,6 +20,7 @@ test('run prints usage with no command', async () => { assert.match(output, /\n version\n/); assert.match(output, /refresh /); assert.match(output, /threads /); + assert.match(output, /pr-template /); assert.match(output, /author --login /); assert.match(output, /close-thread --number /); assert.match(output, /close-cluster --id /); @@ -43,6 +44,7 @@ test('run prints usage for help flag', async () => { assert.match(output, /\n version\n/); assert.match(output, /refresh /); assert.match(output, /threads /); + assert.match(output, /pr-template /); assert.match(output, /author --login /); assert.match(output, /close-thread --number /); assert.match(output, /tui \[owner\/repo\]/); @@ -168,6 +170,14 @@ test('parseRepoFlags accepts kind filter for threads', () => { assert.equal(parsed.repo, 'openclaw'); assert.equal(parsed.values.kind, 'pull_request'); }); + +test('parseRepoFlags accepts pr-template options', () => { + const parsed = parseRepoFlags(['openclaw/openclaw', '--template-file', './template.md', '--max-distance', '200']); + assert.equal(parsed.owner, 'openclaw'); + assert.equal(parsed.repo, 'openclaw'); + assert.equal(parsed.values['template-file'], './template.md'); + assert.equal(parsed.values['max-distance'], '200'); +}); test('resolveSinceValue keeps ISO timestamps', () => { assert.equal(resolveSinceValue('2026-03-01T00:00:00Z'), '2026-03-01T00:00:00.000Z'); }); diff --git a/apps/cli/src/main.ts b/apps/cli/src/main.ts index cc2c01c..38077d9 100644 --- a/apps/cli/src/main.ts +++ b/apps/cli/src/main.ts @@ -16,6 +16,7 @@ type CommandName = | 'sync' | 'refresh' | 'threads' + | 'pr-template' | 'author' | 'close-thread' | 'close-cluster' @@ -45,6 +46,7 @@ function usage(devMode = false): string { ' sync [--since ] [--limit ] [--include-comments] [--full-reconcile]', ' refresh [--no-sync] [--no-embed] [--no-cluster]', ' threads [--numbers ] [--kind issue|pull_request] [--include-closed]', + ' pr-template [--template-file ] [--max-distance ] [--limit ] [--include-closed]', ' author --login [--include-closed]', ' close-thread --number ', ' close-cluster --id ', @@ -105,6 +107,8 @@ export function parseRepoFlags(args: string[]): { owner: string; repo: string; v kind: { type: 'string' }, number: { type: 'string' }, numbers: { type: 'string' }, + 'template-file': { type: 'string' }, + 'max-distance': { type: 'string' }, login: { type: 'string' }, query: { type: 'string' }, mode: { type: 'string' }, @@ -207,6 +211,14 @@ function parsePositiveInteger(name: string, value: string): number { return parsed; } +function parseNonNegativeInteger(name: string, value: string): number { + const parsed = Number(value); + if (!Number.isSafeInteger(parsed) || parsed < 0) { + throw new Error(`Invalid ${name}: ${value}`); + } + return parsed; +} + function parsePositiveIntegerList(name: string, value: string): number[] { const parts = value .split(',') @@ -371,6 +383,36 @@ export async function run(argv: string[], stdout: NodeJS.WritableStream = proces stdout.write(`${JSON.stringify(result, null, 2)}\n`); return; } + case 'pr-template': { + const { owner, repo, values } = parseRepoFlags(rest); + const templatePath = + typeof values['template-file'] === 'string' && values['template-file'].trim().length > 0 + ? path.resolve(values['template-file']) + : null; + const template = templatePath + ? { + text: readFileSync(templatePath, 'utf8'), + source: { + mode: 'file' as const, + label: templatePath, + }, + } + : await getService().getPullRequestTemplate({ owner, repo }); + const result = getService().findPullRequestTemplateMatches({ + owner, + repo, + templateText: template.text, + templateSource: template.source, + maxDistance: + typeof values['max-distance'] === 'string' + ? parseNonNegativeInteger('max-distance', values['max-distance']) + : undefined, + limit: typeof values.limit === 'string' ? parsePositiveInteger('limit', values.limit) : undefined, + includeClosed: values['include-closed'] === true, + }); + stdout.write(`${JSON.stringify(result, null, 2)}\n`); + return; + } case 'author': { const { owner, repo, values } = parseRepoFlags(rest); if (typeof values.login !== 'string' || values.login.trim().length === 0) { diff --git a/packages/api-contract/src/contracts.test.ts b/packages/api-contract/src/contracts.test.ts index 3c88d08..3e7d0f5 100644 --- a/packages/api-contract/src/contracts.test.ts +++ b/packages/api-contract/src/contracts.test.ts @@ -1,7 +1,13 @@ import test from 'node:test'; import assert from 'node:assert/strict'; -import { actionRequestSchema, healthResponseSchema, neighborsResponseSchema, searchResponseSchema } from './contracts.js'; +import { + actionRequestSchema, + healthResponseSchema, + neighborsResponseSchema, + prTemplateMatchesResponseSchema, + searchResponseSchema, +} from './contracts.js'; test('health schema accepts configured status payload', () => { const parsed = healthResponseSchema.parse({ @@ -87,3 +93,62 @@ test('neighbors schema accepts repository, source thread, and neighbor list', () assert.equal(parsed.neighbors[0].number, 43); }); + +test('pr template matches schema accepts heuristic match payload', () => { + const parsed = prTemplateMatchesResponseSchema.parse({ + repository: { + id: 1, + owner: 'openclaw', + name: 'openclaw', + fullName: 'openclaw/openclaw', + githubRepoId: null, + updatedAt: new Date().toISOString(), + }, + template: { + source: { + mode: 'github', + label: '.github/pull_request_template.md', + }, + length: 128, + }, + filters: { + exact: true, + maxDistance: 200, + includeClosed: false, + }, + matches: [ + { + thread: { + id: 11, + repoId: 1, + number: 43, + kind: 'pull_request', + state: 'open', + isClosed: false, + closedAtGh: null, + closedAtLocal: null, + closeReasonLocal: null, + title: 'Fix downloader hang', + body: 'Checklist here', + authorLogin: 'alice', + htmlUrl: 'https://github.com/openclaw/openclaw/pull/43', + labels: ['bug'], + updatedAtGh: new Date().toISOString(), + clusterId: null, + }, + exactMatch: true, + exactMatchOffset: 12, + templateSectionFound: true, + templateSectionExactMatch: false, + templateSectionStartOffset: 12, + templateSectionEndOffset: 140, + levenshteinDistance: 42, + fullBodyLevenshteinDistance: 55, + bodyLength: 150, + }, + ], + }); + + assert.equal(parsed.matches[0].exactMatch, true); + assert.equal(parsed.matches[0].levenshteinDistance, 42); +}); diff --git a/packages/api-contract/src/contracts.ts b/packages/api-contract/src/contracts.ts index 1771e66..6c5b43e 100644 --- a/packages/api-contract/src/contracts.ts +++ b/packages/api-contract/src/contracts.ts @@ -36,6 +36,41 @@ export const threadSchema = z.object({ }); export type ThreadDto = z.infer; +export const prTemplateSourceSchema = z.object({ + mode: z.enum(['file', 'github']), + label: z.string(), +}); +export type PrTemplateSourceDto = z.infer; + +export const prTemplateMatchSchema = z.object({ + thread: threadSchema, + exactMatch: z.boolean(), + exactMatchOffset: z.number().int().nonnegative().nullable(), + templateSectionFound: z.boolean(), + templateSectionExactMatch: z.boolean(), + templateSectionStartOffset: z.number().int().nonnegative().nullable(), + templateSectionEndOffset: z.number().int().nonnegative().nullable(), + levenshteinDistance: z.number().int().nonnegative().nullable(), + fullBodyLevenshteinDistance: z.number().int().nonnegative().nullable(), + bodyLength: z.number().int().nonnegative(), +}); +export type PrTemplateMatchDto = z.infer; + +export const prTemplateMatchesResponseSchema = z.object({ + repository: repositorySchema, + template: z.object({ + source: prTemplateSourceSchema, + length: z.number().int().positive(), + }), + filters: z.object({ + exact: z.boolean(), + maxDistance: z.number().int().nonnegative().nullable(), + includeClosed: z.boolean(), + }), + matches: z.array(prTemplateMatchSchema), +}); +export type PrTemplateMatchesResponse = z.infer; + export const healthResponseSchema = z.object({ ok: z.boolean(), configPath: z.string(), diff --git a/packages/api-core/src/api/server.test.ts b/packages/api-core/src/api/server.test.ts index 0661f04..21e286e 100644 --- a/packages/api-core/src/api/server.test.ts +++ b/packages/api-core/src/api/server.test.ts @@ -30,6 +30,7 @@ test('health endpoint returns contract payload', async () => { github: { checkAuth: async () => undefined, getRepo: async () => ({}), + getFileContents: async () => { throw new Error("not expected"); }, listRepositoryIssues: async () => [], getIssue: async () => ({}), getPull: async () => ({}), @@ -80,6 +81,7 @@ test('neighbors endpoint returns contract payload', async () => { github: { checkAuth: async () => undefined, getRepo: async () => ({}), + getFileContents: async () => { throw new Error("not expected"); }, listRepositoryIssues: async () => [], getIssue: async () => ({}), getPull: async () => ({}), @@ -172,6 +174,7 @@ test('threads endpoint can filter by a bulk number list', async () => { github: { checkAuth: async () => undefined, getRepo: async () => ({}), + getFileContents: async () => { throw new Error("not expected"); }, listRepositoryIssues: async () => [], getIssue: async () => ({}), getPull: async () => ({}), @@ -241,6 +244,7 @@ test('author-threads endpoint returns one author with strongest same-author matc github: { checkAuth: async () => undefined, getRepo: async () => ({}), + getFileContents: async () => { throw new Error("not expected"); }, listRepositoryIssues: async () => [], getIssue: async () => ({}), getPull: async () => ({}), @@ -320,6 +324,7 @@ test('close-thread and includeClosed thread routes expose locally closed items', github: { checkAuth: async () => undefined, getRepo: async () => ({}), + getFileContents: async () => { throw new Error("not expected"); }, listRepositoryIssues: async () => [], getIssue: async () => ({}), getPull: async () => ({}), @@ -403,6 +408,7 @@ test('server returns 400 for malformed request inputs', async () => { github: { checkAuth: async () => undefined, getRepo: async () => ({}), + getFileContents: async () => { throw new Error("not expected"); }, listRepositoryIssues: async () => [], getIssue: async () => ({}), getPull: async () => ({}), @@ -457,6 +463,7 @@ test('cluster summary and detail endpoints return contract payloads', async () = github: { checkAuth: async () => undefined, getRepo: async () => ({}), + getFileContents: async () => { throw new Error("not expected"); }, listRepositoryIssues: async () => [], getIssue: async () => ({}), getPull: async () => ({}), diff --git a/packages/api-core/src/cluster/perf.integration.ts b/packages/api-core/src/cluster/perf.integration.ts index a5796e9..e2357e6 100644 --- a/packages/api-core/src/cluster/perf.integration.ts +++ b/packages/api-core/src/cluster/perf.integration.ts @@ -95,6 +95,7 @@ function createGitHubStub(): GHCrawlService['github'] { return { checkAuth: async () => undefined, getRepo: async () => ({}), + getFileContents: async () => { throw new Error("not expected"); }, listRepositoryIssues: async () => [], getIssue: async () => ({}), getPull: async () => ({}), diff --git a/packages/api-core/src/github/client.ts b/packages/api-core/src/github/client.ts index dc2f733..25ce60e 100644 --- a/packages/api-core/src/github/client.ts +++ b/packages/api-core/src/github/client.ts @@ -5,6 +5,13 @@ import { Octokit } from 'octokit'; export type GitHubClient = { checkAuth: (reporter?: GitHubReporter) => Promise; getRepo: (owner: string, repo: string, reporter?: GitHubReporter) => Promise>; + getFileContents: ( + owner: string, + repo: string, + filePath: string, + ref?: string, + reporter?: GitHubReporter, + ) => Promise; listRepositoryIssues: ( owner: string, repo: string, @@ -167,6 +174,23 @@ export function makeGitHubClient(options: RequestOptions): GitHubClient { return response.data as Record; }); }, + async getFileContents(owner, repo, filePath, ref, reporter) { + return request(`GET /repos/${owner}/${repo}/contents/${filePath}`, reporter, async (octokit) => { + const response = await octokit.rest.repos.getContent({ + owner, + repo, + path: filePath, + ref, + mediaType: { + format: 'raw', + }, + }); + if (typeof response.data !== 'string') { + throw new Error(`GitHub content for ${filePath} was not returned as raw text.`); + } + return response.data; + }); + }, async listRepositoryIssues(owner, repo, since, limit, reporter, state = 'open') { return paginate( `GET /repos/${owner}/${repo}/issues state=${state} per_page=100`, diff --git a/packages/api-core/src/heuristics/pr-template.test.ts b/packages/api-core/src/heuristics/pr-template.test.ts new file mode 100644 index 0000000..290a0ea --- /dev/null +++ b/packages/api-core/src/heuristics/pr-template.test.ts @@ -0,0 +1,82 @@ +import test from 'node:test'; +import assert from 'node:assert/strict'; + +import { + boundedLevenshteinDistance, + extractPrTemplateSection, + findExactTemplateOffset, + normalizePrTemplateText, +} from './pr-template.js'; + +test('normalizePrTemplateText trims outer whitespace, strips bom, and normalizes newlines', () => { + assert.equal(normalizePrTemplateText('\uFEFF\r\n## Checklist\r\n- [ ] item\r\n'), '## Checklist\n- [ ] item'); +}); + +test('findExactTemplateOffset returns the first offset for exact template containment', () => { + assert.equal(findExactTemplateOffset('before\nTEMPLATE\nafter', 'TEMPLATE'), 7); + assert.equal(findExactTemplateOffset('body only', 'missing'), null); +}); + +test('normalizing CRLF and LF text preserves exact template matches across line ending styles', () => { + const unixTemplate = '## Summary\n\n- Problem:\n- Fix:'; + const windowsTemplate = '## Summary\r\n\r\n- Problem:\r\n- Fix:'; + const unixBody = 'Intro\n\n## Summary\n\n- Problem:\n- Fix:\n\nFooter'; + const windowsBody = 'Intro\r\n\r\n## Summary\r\n\r\n- Problem:\r\n- Fix:\r\n\r\nFooter'; + + assert.equal( + findExactTemplateOffset(normalizePrTemplateText(windowsBody), normalizePrTemplateText(unixTemplate)), + 7, + ); + assert.equal( + findExactTemplateOffset(normalizePrTemplateText(unixBody), normalizePrTemplateText(windowsTemplate)), + 7, + ); +}); + +test('extractPrTemplateSection slices the summary-through-risks chunk when both anchors are present', () => { + const template = + 'Intro\n\n## Summary\n\n- Problem:\n\n## Risks and Mitigations\n\nList only real risks.\n\n- Risk:\n - Mitigation:'; + const body = + 'Preamble\n\n## Summary\n\n- Problem: changed\n\n## Risks and Mitigations\n\nList only real risks.\n\n- Risk:\n - Mitigation:\n\nFooter'; + const section = extractPrTemplateSection(body, template); + + assert.equal(section.startOffset, 10); + assert.equal(section.endOffset, 114); + assert.equal( + section.bodySection, + '## Summary\n\n- Problem: changed\n\n## Risks and Mitigations\n\nList only real risks.\n\n- Risk:\n - Mitigation:', + ); +}); + +test('extractPrTemplateSection returns no body section when the ending anchor is missing', () => { + const template = + '## Summary\n\n- Problem:\n\n## Risks and Mitigations\n\nList only real risks.\n\n- Risk:\n - Mitigation:'; + const body = '## Summary\n\n- Problem:\n\n## Risks and Mitigations\n\nChanged tail'; + const section = extractPrTemplateSection(body, template); + + assert.equal(section.startOffset, 0); + assert.equal(section.endOffset, null); + assert.equal(section.bodySection, null); +}); + +test('extractPrTemplateSection handles CRLF body text with LF template anchors', () => { + const template = + '## Summary\n\n- Problem:\n\n## Risks and Mitigations\n\nList only real risks.\n\n- Risk:\n - Mitigation:'; + const body = + 'Preamble\r\n\r\n## Summary\r\n\r\n- Problem:\r\n\r\n## Risks and Mitigations\r\n\r\nList only real risks.\r\n\r\n- Risk:\r\n - Mitigation:\r\n'; + const section = extractPrTemplateSection(normalizePrTemplateText(body), normalizePrTemplateText(template)); + + assert.equal(section.startOffset, 10); + assert.equal( + section.bodySection, + '## Summary\n\n- Problem:\n\n## Risks and Mitigations\n\nList only real risks.\n\n- Risk:\n - Mitigation:', + ); +}); + +test('boundedLevenshteinDistance returns a distance within the threshold', () => { + assert.equal(boundedLevenshteinDistance('template body', 'template xody', 2), 1); +}); + +test('boundedLevenshteinDistance returns null when the threshold is exceeded', () => { + assert.equal(boundedLevenshteinDistance('template body', 'totally different text', 3), null); +}); diff --git a/packages/api-core/src/heuristics/pr-template.ts b/packages/api-core/src/heuristics/pr-template.ts new file mode 100644 index 0000000..08a2e30 --- /dev/null +++ b/packages/api-core/src/heuristics/pr-template.ts @@ -0,0 +1,125 @@ +export const PULL_REQUEST_TEMPLATE_SECTION_START_MARKER = '## Summary'; +export const PULL_REQUEST_TEMPLATE_SECTION_END_MARKER = '## Risks and Mitigations'; + +export type PrTemplateSectionMatch = { + templateSection: string; + bodySection: string | null; + startOffset: number | null; + endOffset: number | null; +}; + +export function normalizePrTemplateText(value: string): string { + return value.replace(/^\uFEFF/, '').replace(/\r\n?/g, '\n').trim(); +} + +export function findExactTemplateOffset(body: string, template: string): number | null { + if (!template) { + return null; + } + const offset = body.indexOf(template); + return offset >= 0 ? offset : null; +} + +export function extractPrTemplateSection(body: string, template: string): PrTemplateSectionMatch { + const templateStartOffset = template.indexOf(PULL_REQUEST_TEMPLATE_SECTION_START_MARKER); + const templateEndOffset = template.indexOf(PULL_REQUEST_TEMPLATE_SECTION_END_MARKER, templateStartOffset); + const templateSection = + templateStartOffset >= 0 && templateEndOffset >= 0 + ? template.slice(templateStartOffset) + : template; + + const bodyStartOffset = body.indexOf(PULL_REQUEST_TEMPLATE_SECTION_START_MARKER); + if (bodyStartOffset < 0 || templateEndOffset < 0) { + return { + templateSection, + bodySection: null, + startOffset: null, + endOffset: null, + }; + } + + const templateTail = template.slice(templateEndOffset); + const bodyTailOffset = body.indexOf(templateTail, bodyStartOffset); + if (bodyTailOffset < 0) { + return { + templateSection, + bodySection: null, + startOffset: bodyStartOffset, + endOffset: null, + }; + } + + const bodyEndOffset = bodyTailOffset + templateTail.length; + return { + templateSection, + bodySection: body.slice(bodyStartOffset, bodyEndOffset), + startOffset: bodyStartOffset, + endOffset: bodyEndOffset, + }; +} + +export function boundedLevenshteinDistance(left: string, right: string, maxDistance: number): number | null { + if (maxDistance < 0 || !Number.isSafeInteger(maxDistance)) { + throw new Error(`Invalid maxDistance: ${maxDistance}`); + } + + if (left === right) { + return 0; + } + + const leftLength = left.length; + const rightLength = right.length; + if (Math.abs(leftLength - rightLength) > maxDistance) { + return null; + } + if (leftLength === 0) { + return rightLength <= maxDistance ? rightLength : null; + } + if (rightLength === 0) { + return leftLength <= maxDistance ? leftLength : null; + } + + let previous = new Array(rightLength + 1); + let current = new Array(rightLength + 1); + for (let column = 0; column <= rightLength; column += 1) { + previous[column] = column; + } + + const sentinel = maxDistance + 1; + for (let row = 1; row <= leftLength; row += 1) { + current.fill(sentinel); + current[0] = row; + + const minColumn = Math.max(1, row - maxDistance); + const maxColumn = Math.min(rightLength, row + maxDistance); + if (minColumn > maxColumn) { + return null; + } + if (minColumn > 1) { + current[minColumn - 1] = sentinel; + } + + let rowMin = sentinel; + for (let column = minColumn; column <= maxColumn; column += 1) { + const substitutionCost = left[row - 1] === right[column - 1] ? 0 : 1; + const insertion = current[column - 1] + 1; + const deletion = previous[column] + 1; + const substitution = previous[column - 1] + substitutionCost; + const value = Math.min(insertion, deletion, substitution); + current[column] = value; + if (value < rowMin) { + rowMin = value; + } + } + + if (rowMin > maxDistance) { + return null; + } + + const swap = previous; + previous = current; + current = swap; + } + + return previous[rightLength] <= maxDistance ? previous[rightLength] : null; +} diff --git a/packages/api-core/src/service.test.ts b/packages/api-core/src/service.test.ts index 4087df8..5333484 100644 --- a/packages/api-core/src/service.test.ts +++ b/packages/api-core/src/service.test.ts @@ -51,6 +51,7 @@ test('doctor reports config path and successful auth smoke checks', async () => githubChecked += 1; }, getRepo: async () => ({}), + getFileContents: async () => { throw new Error("not expected"); }, listRepositoryIssues: async () => [], getIssue: async () => ({}), getPull: async () => ({}), @@ -94,6 +95,7 @@ test('doctor reports invalid token format without attempting auth', async () => githubChecked += 1; }, getRepo: async () => ({}), + getFileContents: async () => { throw new Error("not expected"); }, listRepositoryIssues: async () => [], getIssue: async () => ({}), getPull: async () => ({}), @@ -146,6 +148,7 @@ test('syncRepository defaults to metadata-only mode, preserves thread kind, and const service = makeTestService({ checkAuth: async () => undefined, getRepo: async () => ({ id: 1, full_name: 'openclaw/openclaw' }), + getFileContents: async () => { throw new Error("not expected"); }, listRepositoryIssues: async (_owner, _repo, _since, limit) => [ { @@ -281,6 +284,7 @@ test('syncRepository fetches comments, reviews, and review comments when include const service = makeTestService({ checkAuth: async () => undefined, getRepo: async () => ({ id: 1, full_name: 'openclaw/openclaw' }), + getFileContents: async () => { throw new Error("not expected"); }, listRepositoryIssues: async () => [ { id: 101, @@ -375,6 +379,7 @@ test('summarizeRepository excludes hydrated comments by default and reports toke { checkAuth: async () => undefined, getRepo: async () => ({ id: 1, full_name: 'openclaw/openclaw' }), + getFileContents: async () => { throw new Error("not expected"); }, listRepositoryIssues: async () => [], getIssue: async () => { throw new Error('not expected'); @@ -483,6 +488,7 @@ test('summarizeRepository includes hydrated human comments when includeComments { checkAuth: async () => undefined, getRepo: async () => ({ id: 1, full_name: 'openclaw/openclaw' }), + getFileContents: async () => { throw new Error("not expected"); }, listRepositoryIssues: async () => [], getIssue: async () => { throw new Error('not expected'); @@ -588,6 +594,7 @@ test('purgeComments removes hydrated comments and refreshes canonical documents' const service = makeTestService({ checkAuth: async () => undefined, getRepo: async () => ({ id: 1, full_name: 'openclaw/openclaw' }), + getFileContents: async () => { throw new Error("not expected"); }, listRepositoryIssues: async () => [], getIssue: async () => { throw new Error('not expected'); @@ -685,6 +692,7 @@ test('embedRepository batches multi-source embeddings and skips unchanged inputs { checkAuth: async () => undefined, getRepo: async () => ({ id: 1, full_name: 'openclaw/openclaw' }), + getFileContents: async () => { throw new Error("not expected"); }, listRepositoryIssues: async () => [], getIssue: async () => { throw new Error('not expected'); @@ -794,6 +802,7 @@ test('embedRepository truncates oversized inputs before submission', async () => github: { checkAuth: async () => undefined, getRepo: async () => ({ id: 1, full_name: 'openclaw/openclaw' }), + getFileContents: async () => { throw new Error("not expected"); }, listRepositoryIssues: async () => [], getIssue: async () => { throw new Error('not expected'); @@ -918,6 +927,7 @@ test('embedRepository isolates a failing oversized item from a mixed batch and r github: { checkAuth: async () => undefined, getRepo: async () => ({ id: 1, full_name: 'openclaw/openclaw' }), + getFileContents: async () => { throw new Error("not expected"); }, listRepositoryIssues: async () => [], getIssue: async () => { throw new Error('not expected'); @@ -1038,6 +1048,7 @@ test('listNeighbors returns exact nearest neighbors for an embedded thread', () const service = makeTestService({ checkAuth: async () => undefined, getRepo: async () => ({}), + getFileContents: async () => { throw new Error("not expected"); }, listRepositoryIssues: async () => [], getIssue: async () => ({}), getPull: async () => ({}), @@ -1121,6 +1132,7 @@ test('listAuthorThreads returns one author view with strongest same-author match const service = makeTestService({ checkAuth: async () => undefined, getRepo: async () => ({}), + getFileContents: async () => { throw new Error("not expected"); }, listRepositoryIssues: async () => [], getIssue: async () => ({}), getPull: async () => ({}), @@ -1194,6 +1206,7 @@ test('clusterRepository emits timed progress updates while identifying similarit const service = makeTestService({ checkAuth: async () => undefined, getRepo: async () => ({}), + getFileContents: async () => { throw new Error("not expected"); }, listRepositoryIssues: async () => [], getIssue: async () => ({}), getPull: async () => ({}), @@ -1250,6 +1263,7 @@ test('clusterRepository merges source kinds into one edge without directional du const service = makeTestService({ checkAuth: async () => undefined, getRepo: async () => ({}), + getFileContents: async () => { throw new Error("not expected"); }, listRepositoryIssues: async () => [], getIssue: async () => ({}), getPull: async () => ({}), @@ -1312,6 +1326,7 @@ test('clusterRepository prunes older cluster runs for the repo after a successfu const service = makeTestService({ checkAuth: async () => undefined, getRepo: async () => ({}), + getFileContents: async () => { throw new Error("not expected"); }, listRepositoryIssues: async () => [], getIssue: async () => ({}), getPull: async () => ({}), @@ -1372,6 +1387,7 @@ test('tui snapshot returns mixed issue and pull request counts with default rece const service = makeTestService({ checkAuth: async () => undefined, getRepo: async () => ({}), + getFileContents: async () => { throw new Error("not expected"); }, listRepositoryIssues: async () => [], getIssue: async () => ({}), getPull: async () => ({}), @@ -1472,6 +1488,7 @@ test('tui cluster detail and thread detail expose members, summaries, and neighb const service = makeTestService({ checkAuth: async () => undefined, getRepo: async () => ({}), + getFileContents: async () => { throw new Error("not expected"); }, listRepositoryIssues: async () => [], getIssue: async () => ({}), getPull: async () => ({}), @@ -1575,6 +1592,7 @@ test('getTuiThreadDetail prefers stored cluster neighbors over exact embedding s const service = makeTestService({ checkAuth: async () => undefined, getRepo: async () => ({}), + getFileContents: async () => { throw new Error("not expected"); }, listRepositoryIssues: async () => [], getIssue: async () => ({}), getPull: async () => ({}), @@ -1639,6 +1657,7 @@ test('refreshRepository runs sync, embed, and cluster in order and returns the c { checkAuth: async () => undefined, getRepo: async () => ({ id: 1, full_name: 'openclaw/openclaw' }), + getFileContents: async () => { throw new Error("not expected"); }, listRepositoryIssues: async () => [ { id: 100, @@ -1710,6 +1729,7 @@ test('agent cluster summary and detail dumps expose repo stats, snippets, and su const service = makeTestService({ checkAuth: async () => undefined, getRepo: async () => ({}), + getFileContents: async () => { throw new Error("not expected"); }, listRepositoryIssues: async () => [], getIssue: async () => ({}), getPull: async () => ({}), @@ -1856,6 +1876,7 @@ test('getTuiThreadDetail can skip neighbor loading for fast browse paths', () => const service = makeTestService({ checkAuth: async () => undefined, getRepo: async () => ({ id: 1, full_name: 'openclaw/openclaw' }), + getFileContents: async () => { throw new Error("not expected"); }, listRepositoryIssues: async () => [], getIssue: async () => { throw new Error('not expected'); @@ -1936,6 +1957,7 @@ test('local thread closure updates default thread filters and auto-closes fully const service = makeTestService({ checkAuth: async () => undefined, getRepo: async () => ({}), + getFileContents: async () => { throw new Error("not expected"); }, listRepositoryIssues: async () => [], getIssue: async () => ({}), getPull: async () => ({}), @@ -2021,6 +2043,7 @@ test('manual cluster closure is hidden from JSON summaries by default but remain const service = makeTestService({ checkAuth: async () => undefined, getRepo: async () => ({}), + getFileContents: async () => { throw new Error("not expected"); }, listRepositoryIssues: async () => [], getIssue: async () => ({}), getPull: async () => ({}), @@ -2091,6 +2114,7 @@ test('syncRepository reconciles stale open threads and marks confirmed closures const service = makeTestService({ checkAuth: async () => undefined, getRepo: async () => ({ id: 1, full_name: 'openclaw/openclaw' }), + getFileContents: async () => { throw new Error("not expected"); }, listRepositoryIssues: async (_owner, _repo, _since, _limit, _reporter, state = 'open') => { if (state === 'closed') { closedListCalls += 1; @@ -2198,6 +2222,7 @@ test('syncRepository treats missing stale pull requests as closed and continues' const service = makeTestService({ checkAuth: async () => undefined, getRepo: async () => ({ id: 1, full_name: 'openclaw/openclaw' }), + getFileContents: async () => { throw new Error("not expected"); }, listRepositoryIssues: async () => { listRepositoryIssuesCalls += 1; return listRepositoryIssuesCalls === 1 @@ -2284,6 +2309,7 @@ test('syncRepository skips stale-open reconciliation for filtered crawls', async const service = makeTestService({ checkAuth: async () => undefined, getRepo: async () => ({ id: 1, full_name: 'openclaw/openclaw' }), + getFileContents: async () => { throw new Error("not expected"); }, listRepositoryIssues: async (_owner, _repo, _since, limit) => { listRepositoryIssuesCalls += 1; return listRepositoryIssuesCalls === 1 @@ -2348,6 +2374,7 @@ test('syncRepository leaves unseen stale open items alone by default when closed const service = makeTestService({ checkAuth: async () => undefined, getRepo: async () => ({ id: 1, full_name: 'openclaw/openclaw' }), + getFileContents: async () => { throw new Error("not expected"); }, listRepositoryIssues: async (_owner, _repo, _since, _limit, _reporter, state = 'open') => { if (state === 'closed') { return []; @@ -2413,6 +2440,7 @@ test('syncRepository performs direct stale-open reconciliation when fullReconcil const service = makeTestService({ checkAuth: async () => undefined, getRepo: async () => ({ id: 1, full_name: 'openclaw/openclaw' }), + getFileContents: async () => { throw new Error("not expected"); }, listRepositoryIssues: async (_owner, _repo, _since, _limit, _reporter, state = 'open') => { if (state === 'closed') { return []; @@ -2490,6 +2518,7 @@ test('syncRepository derives the default overlapping since window from the last const service = makeTestService({ checkAuth: async () => undefined, getRepo: async () => ({ id: 1, full_name: 'openclaw/openclaw' }), + getFileContents: async () => { throw new Error("not expected"); }, listRepositoryIssues: async (_owner, _repo, since, _limit, _reporter, state = 'open') => { if (state === 'closed') { closedSinceValues.push(since); @@ -2591,6 +2620,7 @@ test('syncRepository uses an explicit since window for both open and closed over const service = makeTestService({ checkAuth: async () => undefined, getRepo: async () => ({ id: 1, full_name: 'openclaw/openclaw' }), + getFileContents: async () => { throw new Error("not expected"); }, listRepositoryIssues: async (_owner, _repo, since, _limit, _reporter, state = 'open') => { if (state === 'closed') { closedSinceValues.push(since); @@ -2663,6 +2693,7 @@ test('syncRepository skips the closed overlap sweep on the first full scan with const service = makeTestService({ checkAuth: async () => undefined, getRepo: async () => ({ id: 1, full_name: 'openclaw/openclaw' }), + getFileContents: async () => { throw new Error("not expected"); }, listRepositoryIssues: async (_owner, _repo, since, _limit, _reporter, state = 'open') => { if (state === 'closed') { closedSinceValues.push(since); @@ -2718,10 +2749,405 @@ test('syncRepository skips the closed overlap sweep on the first full scan with } }); +test('findPullRequestTemplateMatches returns exact and near-template pull requests', () => { + const service = makeTestService({ + checkAuth: async () => undefined, + getRepo: async () => ({ id: 1, full_name: 'openclaw/openclaw' }), + getFileContents: async () => { + throw new Error('not expected'); + }, + listRepositoryIssues: async () => [], + getIssue: async () => { + throw new Error('not expected'); + }, + getPull: async () => { + throw new Error('not expected'); + }, + listIssueComments: async () => [], + listPullReviews: async () => [], + listPullReviewComments: async () => [], + }); + + try { + const now = '2026-03-09T00:00:00Z'; + service.db + .prepare( + `insert into repositories (id, owner, name, full_name, github_repo_id, raw_json, updated_at) + values (?, ?, ?, ?, ?, ?, ?)`, + ) + .run(1, 'openclaw', 'openclaw', 'openclaw/openclaw', '1', '{"default_branch":"main"}', now); + const insertThread = service.db.prepare( + `insert into threads ( + id, repo_id, github_id, number, kind, state, title, body, author_login, author_type, html_url, + labels_json, assignees_json, raw_json, content_hash, is_draft, created_at_gh, updated_at_gh, closed_at_gh, + merged_at_gh, first_pulled_at, last_pulled_at, updated_at + ) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, + ); + insertThread.run( + 11, + 1, + '101', + 43, + 'pull_request', + 'open', + 'Fix downloader hang', + 'Lead-in note.\n\n## Summary\n- [ ] explain the fix\n- [ ] add tests', + 'alice', + 'User', + 'https://github.com/openclaw/openclaw/pull/43', + '[]', + '[]', + '{}', + 'hash-43', + 0, + now, + now, + null, + null, + now, + now, + now, + ); + insertThread.run( + 12, + 1, + '102', + 44, + 'pull_request', + 'open', + 'Partial template edits', + '## Summary\n- [ ] explain teh fix\n- [ ] add tests soon', + 'bob', + 'User', + 'https://github.com/openclaw/openclaw/pull/44', + '[]', + '[]', + '{}', + 'hash-44', + 0, + now, + now, + null, + null, + now, + now, + now, + ); + insertThread.run( + 13, + 1, + '103', + 45, + 'pull_request', + 'open', + 'Real write-up', + 'Implemented the fix, added tests, and documented the migration.', + 'carol', + 'User', + 'https://github.com/openclaw/openclaw/pull/45', + '[]', + '[]', + '{}', + 'hash-45', + 0, + now, + now, + null, + null, + now, + now, + now, + ); + + const result = service.findPullRequestTemplateMatches({ + owner: 'openclaw', + repo: 'openclaw', + templateText: '## Summary\n- [ ] explain the fix\n- [ ] add tests', + templateSource: { + mode: 'file', + label: '/tmp/pr-template.md', + }, + maxDistance: 12, + }); + + assert.equal(result.template.length, 48); + assert.deepEqual(result.matches.map((match) => match.thread.number), [43, 44]); + assert.equal(result.matches[0]?.exactMatch, true); + assert.equal(result.matches[0]?.exactMatchOffset, 15); + assert.equal(result.matches[0]?.templateSectionFound, false); + assert.equal(result.matches[0]?.templateSectionExactMatch, false); + assert.equal(result.matches[0]?.levenshteinDistance, null); + assert.equal(result.matches[0]?.fullBodyLevenshteinDistance, null); + assert.equal(result.matches[1]?.exactMatch, false); + assert.equal(result.matches[1]?.templateSectionFound, false); + assert.equal(result.matches[1]?.levenshteinDistance, 7); + assert.equal(result.matches[1]?.fullBodyLevenshteinDistance, 7); + } finally { + service.close(); + } +}); + +test('findPullRequestTemplateMatches prefers section distance when summary-to-risks anchors bound the copied template', () => { + const service = makeTestService({ + checkAuth: async () => undefined, + getRepo: async () => ({ id: 1, full_name: 'openclaw/openclaw' }), + getFileContents: async () => { + throw new Error('not expected'); + }, + listRepositoryIssues: async () => [], + getIssue: async () => { + throw new Error('not expected'); + }, + getPull: async () => { + throw new Error('not expected'); + }, + listIssueComments: async () => [], + listPullReviews: async () => [], + listPullReviewComments: async () => [], + }); + + try { + const now = '2026-03-09T00:00:00Z'; + const templateText = + '## Summary\n\nDescribe the problem and fix in 2-5 bullets:\n\n- Problem:\n\n## Change Type (select all)\n\n- [ ] Bug fix\n\n## Risks and Mitigations\n\nList only real risks for this PR. Add/remove entries as needed. If none, write `None`.\n\n- Risk:\n - Mitigation:'; + service.db + .prepare( + `insert into repositories (id, owner, name, full_name, github_repo_id, raw_json, updated_at) + values (?, ?, ?, ?, ?, ?, ?)`, + ) + .run(1, 'openclaw', 'openclaw', 'openclaw/openclaw', '1', '{"default_branch":"main"}', now); + service.db + .prepare( + `insert into threads ( + id, repo_id, github_id, number, kind, state, title, body, author_login, author_type, html_url, + labels_json, assignees_json, raw_json, content_hash, is_draft, created_at_gh, updated_at_gh, closed_at_gh, + merged_at_gh, first_pulled_at, last_pulled_at, updated_at + ) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, + ) + .run( + 21, + 1, + '201', + 88, + 'pull_request', + 'open', + 'Template barely changed', + '- Added a short preamble\n- Kept the rest of the template\n\n## Summary\r\n\r\nDescribe the problem and fix in 2-5 bullets:\r\n\r\n- Problem:\r\n\r\n## Change Type (select all)\r\n\r\n- [x] Bug fix\r\n\r\n## Risks and Mitigations\r\n\r\nList only real risks for this PR. Add/remove entries as needed. If none, write `None`.\r\n\r\n- Risk:\r\n - Mitigation:\r\n', + 'alice', + 'User', + 'https://github.com/openclaw/openclaw/pull/88', + '[]', + '[]', + '{}', + 'hash-88', + 0, + now, + now, + null, + null, + now, + now, + now, + ); + + const result = service.findPullRequestTemplateMatches({ + owner: 'openclaw', + repo: 'openclaw', + templateText, + templateSource: { + mode: 'file', + label: '/tmp/openclaw-template.md', + }, + maxDistance: 20, + }); + + assert.deepEqual(result.matches.map((match) => match.thread.number), [88]); + assert.equal(result.matches[0]?.exactMatch, false); + assert.equal(result.matches[0]?.templateSectionFound, true); + assert.equal(result.matches[0]?.templateSectionExactMatch, false); + assert.equal(result.matches[0]?.levenshteinDistance, 1); + assert.equal(result.matches[0]?.fullBodyLevenshteinDistance, null); + } finally { + service.close(); + } +}); + +test('findPullRequestTemplateMatches normalizes LF and CRLF bodies before exact template comparison', () => { + const service = makeTestService({ + checkAuth: async () => undefined, + getRepo: async () => ({ id: 1, full_name: 'openclaw/openclaw' }), + getFileContents: async () => { + throw new Error('not expected'); + }, + listRepositoryIssues: async () => [], + getIssue: async () => { + throw new Error('not expected'); + }, + getPull: async () => { + throw new Error('not expected'); + }, + listIssueComments: async () => [], + listPullReviews: async () => [], + listPullReviewComments: async () => [], + }); + + try { + const now = '2026-03-09T00:00:00Z'; + service.db + .prepare( + `insert into repositories (id, owner, name, full_name, github_repo_id, raw_json, updated_at) + values (?, ?, ?, ?, ?, ?, ?)`, + ) + .run(1, 'openclaw', 'openclaw', 'openclaw/openclaw', '1', '{"default_branch":"main"}', now); + const insertThread = service.db.prepare( + `insert into threads ( + id, repo_id, github_id, number, kind, state, title, body, author_login, author_type, html_url, + labels_json, assignees_json, raw_json, content_hash, is_draft, created_at_gh, updated_at_gh, closed_at_gh, + merged_at_gh, first_pulled_at, last_pulled_at, updated_at + ) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, + ); + insertThread.run( + 31, + 1, + '301', + 91, + 'pull_request', + 'open', + 'Windows body line endings', + 'Intro\r\n\r\n## Summary\r\n\r\n- Problem:\r\n- Fix:\r\n\r\nFooter', + 'alice', + 'User', + 'https://github.com/openclaw/openclaw/pull/91', + '[]', + '[]', + '{}', + 'hash-91', + 0, + now, + now, + null, + null, + now, + now, + now, + ); + insertThread.run( + 32, + 1, + '302', + 92, + 'pull_request', + 'open', + 'Unix body line endings', + 'Intro\n\n## Summary\n\n- Problem:\n- Fix:\n\nFooter', + 'bob', + 'User', + 'https://github.com/openclaw/openclaw/pull/92', + '[]', + '[]', + '{}', + 'hash-92', + 0, + now, + now, + null, + null, + now, + now, + now, + ); + + const windowsTemplate = '## Summary\r\n\r\n- Problem:\r\n- Fix:'; + const unixTemplate = '## Summary\n\n- Problem:\n- Fix:'; + + const windowsResult = service.findPullRequestTemplateMatches({ + owner: 'openclaw', + repo: 'openclaw', + templateText: unixTemplate, + templateSource: { + mode: 'file', + label: '/tmp/unix-template.md', + }, + maxDistance: 10, + }); + const unixResult = service.findPullRequestTemplateMatches({ + owner: 'openclaw', + repo: 'openclaw', + templateText: windowsTemplate, + templateSource: { + mode: 'file', + label: '/tmp/windows-template.md', + }, + maxDistance: 10, + }); + + const windowsMatch = windowsResult.matches.find((match) => match.thread.number === 91); + const unixMatch = unixResult.matches.find((match) => match.thread.number === 92); + + assert.equal(windowsMatch?.exactMatch, true); + assert.equal(windowsMatch?.exactMatchOffset, 7); + assert.equal(unixMatch?.exactMatch, true); + assert.equal(unixMatch?.exactMatchOffset, 7); + } finally { + service.close(); + } +}); + +test('getPullRequestTemplate auto-discovers a common GitHub template path', async () => { + const attemptedPaths: string[] = []; + const service = makeTestService({ + checkAuth: async () => undefined, + getRepo: async () => ({ id: 1, full_name: 'openclaw/openclaw' }), + getFileContents: async (_owner, _repo, filePath, ref) => { + attemptedPaths.push(`${ref ?? 'none'}:${filePath}`); + if (filePath === 'pull_request_template.md') { + return '\n## Summary\n- [ ] explain the fix\n'; + } + const error = new Error('Not Found') as Error & { status?: number }; + error.status = 404; + throw error; + }, + listRepositoryIssues: async () => [], + getIssue: async () => { + throw new Error('not expected'); + }, + getPull: async () => { + throw new Error('not expected'); + }, + listIssueComments: async () => [], + listPullReviews: async () => [], + listPullReviewComments: async () => [], + }); + + try { + const now = '2026-03-09T00:00:00Z'; + service.db + .prepare( + `insert into repositories (id, owner, name, full_name, github_repo_id, raw_json, updated_at) + values (?, ?, ?, ?, ?, ?, ?)`, + ) + .run(1, 'openclaw', 'openclaw', 'openclaw/openclaw', '1', '{"default_branch":"main"}', now); + + const result = await service.getPullRequestTemplate({ owner: 'openclaw', repo: 'openclaw' }); + + assert.equal(result.source.mode, 'github'); + assert.equal(result.source.label, 'pull_request_template.md'); + assert.equal(result.text, '## Summary\n- [ ] explain the fix'); + assert.deepEqual(attemptedPaths.slice(0, 3), [ + 'main:.github/pull_request_template.md', + 'main:.github/PULL_REQUEST_TEMPLATE.md', + 'main:pull_request_template.md', + ]); + } finally { + service.close(); + } +}); + test('repository-scoped reads and neighbors do not leak across repos in the same database', () => { const service = makeTestService({ checkAuth: async () => undefined, getRepo: async () => ({ id: 1, full_name: 'owner-one/repo-one' }), + getFileContents: async () => { throw new Error("not expected"); }, listRepositoryIssues: async () => [], getIssue: async () => { throw new Error('not expected'); diff --git a/packages/api-core/src/service.ts b/packages/api-core/src/service.ts index b03674f..a0119b3 100644 --- a/packages/api-core/src/service.ts +++ b/packages/api-core/src/service.ts @@ -17,6 +17,7 @@ import { embedResultSchema, healthResponseSchema, neighborsResponseSchema, + prTemplateMatchesResponseSchema, refreshResponseSchema, repositoriesResponseSchema, searchResponseSchema, @@ -34,6 +35,7 @@ import { type EmbedResultDto, type HealthResponse, type NeighborsResponse, + type PrTemplateMatchesResponse, type RefreshResponse, type RepositoriesResponse, type RepositoryDto, @@ -61,6 +63,12 @@ import { migrate } from './db/migrate.js'; import { openDb, type SqliteDatabase } from './db/sqlite.js'; import { buildCanonicalDocument, isBotLikeAuthor } from './documents/normalize.js'; import { makeGitHubClient, type GitHubClient } from './github/client.js'; +import { + boundedLevenshteinDistance, + extractPrTemplateSection, + findExactTemplateOffset, + normalizePrTemplateText, +} from './heuristics/pr-template.js'; import { OpenAiProvider, type AiProvider } from './openai/provider.js'; import { cosineSimilarity, normalizeEmbedding, rankNearestNeighbors } from './search/exact.js'; @@ -85,6 +93,16 @@ type ThreadRow = { last_pulled_at: string | null; }; +type RepositoryRow = { + id: number; + owner: string; + name: string; + full_name: string; + github_repo_id: string | null; + raw_json: string; + updated_at: string; +}; + type CommentSeed = { githubId: string; commentType: string; @@ -258,6 +276,7 @@ type SyncOptions = { type SearchResultInternal = SearchResponse; type NeighborsResultInternal = NeighborsResponse; +type PullRequestTemplateMatchesInternal = PrTemplateMatchesResponse; const SYNC_BATCH_SIZE = 100; const SYNC_BATCH_DELAY_MS = 5000; @@ -268,6 +287,16 @@ const EMBED_ESTIMATED_CHARS_PER_TOKEN = 3; const EMBED_MAX_ITEM_TOKENS = 7000; const EMBED_MAX_BATCH_TOKENS = 250000; const EMBED_TRUNCATION_MARKER = '\n\n[truncated for embedding]'; +const COMMON_PULL_REQUEST_TEMPLATE_PATHS = [ + '.github/pull_request_template.md', + '.github/PULL_REQUEST_TEMPLATE.md', + 'pull_request_template.md', + 'PULL_REQUEST_TEMPLATE.md', + 'docs/pull_request_template.md', + 'docs/PULL_REQUEST_TEMPLATE.md', + '.github/PULL_REQUEST_TEMPLATE/default.md', + '.github/PULL_REQUEST_TEMPLATE/pull_request_template.md', +] as const; function nowIso(): string { return new Date().toISOString(); @@ -335,6 +364,10 @@ function parseArray(value: string): string[] { return JSON.parse(value) as string[]; } +function parseObject(value: string): Record { + return JSON.parse(value) as Record; +} + function userLogin(payload: Record): string | null { const user = payload.user as Record | undefined; const login = user?.login; @@ -682,6 +715,139 @@ export class GHCrawlService { }); } + async getPullRequestTemplate(params: { owner: string; repo: string }): Promise<{ text: string; source: { mode: 'github'; label: string } }> { + const repository = this.getStoredRepositoryRow(params.owner, params.repo); + const raw = parseObject(repository.raw_json); + const defaultBranch = + typeof raw.default_branch === 'string' && raw.default_branch.trim().length > 0 ? raw.default_branch : undefined; + const github = this.requireGithub(); + + for (const candidatePath of COMMON_PULL_REQUEST_TEMPLATE_PATHS) { + try { + const text = await github.getFileContents(params.owner, params.repo, candidatePath, defaultBranch); + const normalized = normalizePrTemplateText(text); + if (normalized.length === 0) { + continue; + } + return { + text: normalized, + source: { + mode: 'github', + label: candidatePath, + }, + }; + } catch (error) { + if (isMissingGitHubResourceError(error)) { + continue; + } + throw error; + } + } + + throw new Error( + `No pull request template was found for ${repository.full_name}. Tried ${COMMON_PULL_REQUEST_TEMPLATE_PATHS.join(', ')}.`, + ); + } + + findPullRequestTemplateMatches(params: { + owner: string; + repo: string; + templateText: string; + templateSource: { mode: 'file' | 'github'; label: string }; + maxDistance?: number; + limit?: number; + includeClosed?: boolean; + }): PullRequestTemplateMatchesInternal { + const repository = this.requireRepository(params.owner, params.repo); + const normalizedTemplate = normalizePrTemplateText(params.templateText); + if (!normalizedTemplate) { + throw new Error('Template text is empty after normalization.'); + } + if (params.maxDistance !== undefined && (!Number.isSafeInteger(params.maxDistance) || params.maxDistance < 0)) { + throw new Error(`Invalid maxDistance: ${params.maxDistance}`); + } + + let sql = `select * from threads where repo_id = ? and kind = 'pull_request'`; + const args: Array = [repository.id]; + if (!params.includeClosed) { + sql += " and state = 'open' and closed_at_local is null"; + } + sql += ' order by updated_at_gh desc, number desc'; + const rows = this.db.prepare(sql).all(...args) as ThreadRow[]; + + const matches = rows + .map((row) => { + const normalizedBody = normalizePrTemplateText(row.body ?? ''); + const exactMatchOffset = findExactTemplateOffset(normalizedBody, normalizedTemplate); + const templateSection = extractPrTemplateSection(normalizedBody, normalizedTemplate); + const templateSectionLevenshteinDistance = + params.maxDistance === undefined + ? null + : templateSection.bodySection + ? boundedLevenshteinDistance(templateSection.bodySection, templateSection.templateSection, params.maxDistance) + : null; + const fullBodyLevenshteinDistance = + params.maxDistance === undefined + ? null + : boundedLevenshteinDistance(normalizedBody, normalizedTemplate, params.maxDistance); + return { + thread: threadToDto(row), + exactMatch: exactMatchOffset !== null, + exactMatchOffset, + templateSectionFound: templateSection.bodySection !== null, + templateSectionExactMatch: + templateSection.bodySection !== null && templateSection.bodySection === templateSection.templateSection, + templateSectionStartOffset: templateSection.startOffset, + templateSectionEndOffset: templateSection.endOffset, + levenshteinDistance: templateSectionLevenshteinDistance ?? fullBodyLevenshteinDistance, + fullBodyLevenshteinDistance, + bodyLength: normalizedBody.length, + }; + }) + .filter( + (match) => + match.exactMatch || match.templateSectionExactMatch || match.levenshteinDistance !== null || match.fullBodyLevenshteinDistance !== null, + ) + .sort((left, right) => { + if (left.exactMatch !== right.exactMatch) { + return left.exactMatch ? -1 : 1; + } + if (left.templateSectionExactMatch !== right.templateSectionExactMatch) { + return left.templateSectionExactMatch ? -1 : 1; + } + if (left.templateSectionFound !== right.templateSectionFound) { + return left.templateSectionFound ? -1 : 1; + } + const leftDistance = left.levenshteinDistance ?? Number.MAX_SAFE_INTEGER; + const rightDistance = right.levenshteinDistance ?? Number.MAX_SAFE_INTEGER; + if (leftDistance !== rightDistance) { + return leftDistance - rightDistance; + } + const leftFullBodyDistance = left.fullBodyLevenshteinDistance ?? Number.MAX_SAFE_INTEGER; + const rightFullBodyDistance = right.fullBodyLevenshteinDistance ?? Number.MAX_SAFE_INTEGER; + if (leftFullBodyDistance !== rightFullBodyDistance) { + return leftFullBodyDistance - rightFullBodyDistance; + } + const leftUpdatedAt = left.thread.updatedAtGh ? Date.parse(left.thread.updatedAtGh) : 0; + const rightUpdatedAt = right.thread.updatedAtGh ? Date.parse(right.thread.updatedAtGh) : 0; + return rightUpdatedAt - leftUpdatedAt || right.thread.number - left.thread.number; + }); + + return prTemplateMatchesResponseSchema.parse({ + repository, + template: { + source: params.templateSource, + length: normalizedTemplate.length, + }, + filters: { + exact: true, + maxDistance: params.maxDistance ?? null, + includeClosed: params.includeClosed === true, + }, + matches: params.limit ? matches.slice(0, params.limit) : matches, + }); + } + closeThreadLocally(params: { owner: string; repo: string; threadNumber: number }): CloseResponse { const repository = this.requireRepository(params.owner, params.repo); const row = this.db @@ -2185,13 +2351,17 @@ export class GHCrawlService { return this.github as GitHubClient; } - private requireRepository(owner: string, repo: string): RepositoryDto { + private getStoredRepositoryRow(owner: string, repo: string): RepositoryRow { const fullName = `${owner}/${repo}`; - const row = this.db.prepare('select * from repositories where full_name = ? limit 1').get(fullName) as Record | undefined; + const row = this.db.prepare('select * from repositories where full_name = ? limit 1').get(fullName) as RepositoryRow | undefined; if (!row) { throw new Error(`Repository ${fullName} not found. Run sync first.`); } - return repositoryToDto(row); + return row; + } + + private requireRepository(owner: string, repo: string): RepositoryDto { + return repositoryToDto(this.getStoredRepositoryRow(owner, repo)); } private upsertRepository(owner: string, repo: string, payload: Record): number {