Skip to content

Commit 88ede2a

Browse files
committed
fix broken githubUrl in sources
1 parent 4d1caff commit 88ede2a

File tree

3 files changed

+68
-40
lines changed

3 files changed

+68
-40
lines changed

mimir-rag/src/ingest/pipeline.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ export async function runIngestionPipeline(
125125
});
126126

127127
preparedChunks.forEach((chunk, index) => {
128-
const links = resolveSourceLinks(filepath, chunk.chunkTitle, appConfig);
128+
const links = resolveSourceLinks(filepath, chunk.chunkTitle, appConfig, document.sourceUrl);
129129
targetState.set(chunk.checksum, {
130130
filepath,
131131
chunkId: index,
@@ -164,7 +164,7 @@ export async function runIngestionPipeline(
164164
});
165165

166166
chunkedFile.chunks.forEach((chunk, index) => {
167-
const links = resolveSourceLinks(filepath, chunk.qualifiedName, appConfig);
167+
const links = resolveSourceLinks(filepath, chunk.qualifiedName, appConfig, document.sourceUrl);
168168
targetState.set(chunk.checksum, {
169169
filepath,
170170
chunkId: index,
@@ -349,7 +349,7 @@ export async function runIngestionPipeline(
349349
if (entry.chunk.sourceType === 'mdx') {
350350
const contextHeader = contexts[index]?.trim() ?? "";
351351
const contextualText = `${contextHeader}---${entry.chunk.chunk.chunkContent}`;
352-
const links = resolveSourceLinks(filepath, entry.chunk.chunk.chunkTitle, appConfig);
352+
const links = resolveSourceLinks(filepath, entry.chunk.chunk.chunkTitle, appConfig, document.sourceUrl);
353353
pendingEmbeddings.push({
354354
filepath,
355355
chunkId: entry.chunkId,
@@ -464,7 +464,7 @@ export async function runIngestionPipeline(
464464
const upsertPayload: DocumentChunk[] = pendingEmbeddings.map((entry, index) => {
465465
const links = entry.githubUrl
466466
? { githubUrl: entry.githubUrl, docsUrl: undefined, finalUrl: entry.githubUrl }
467-
: resolveSourceLinks(entry.filepath, entry.chunkTitle, appConfig);
467+
: resolveSourceLinks(entry.filepath, entry.chunkTitle, appConfig, documentMap.get(entry.filepath)?.sourceUrl);
468468
return {
469469
content: entry.content,
470470
contextualText: entry.contextualText,

mimir-rag/src/query/askAi.ts

Lines changed: 39 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import type { SupabaseVectorStore } from "../supabase/client";
44
import type { RetrievedChunk } from "../supabase/types";
55
import type { Logger } from "pino";
66
import { getLogger } from "../utils/logger";
7+
import { resolveSourceLinks } from "../utils/sourceLinks";
78

89
export interface AskAiOptions {
910
question: string;
@@ -138,21 +139,25 @@ export async function askAi(
138139
}
139140
if (chunk.sources && chunk.sources.length > 0) {
140141
collectedSources.length = 0;
141-
// Map sources from matches (which have entityType, startLine, endLine)
142-
const sourceMap = new Map(matches.map(m => [`${m.filepath}:${m.chunkTitle}`, m]));
143-
collectedSources.push(...chunk.sources.map((src) => {
144-
const match = sourceMap.get(`${src.filepath}:${src.chunkTitle}`);
145-
return {
146-
filepath: src.filepath,
147-
chunkTitle: src.chunkTitle,
148-
githubUrl: match?.githubUrl,
149-
docsUrl: match?.docsUrl,
150-
finalUrl: src.url || match?.githubUrl || match?.finalUrl || src.filepath,
151-
entityType: match?.entityType,
152-
startLine: match?.startLine,
153-
endLine: match?.endLine,
154-
};
155-
}));
142+
collectedSources.push(...chunk.sources
143+
.filter((src) => src.filepath) // Filter out sources without filepath; during the first stream sources are empty
144+
.map((src) => {
145+
// Recompute URLs using resolveSourceLinks for consistency
146+
const links = resolveSourceLinks(
147+
src.filepath!,
148+
src.chunkTitle,
149+
context?.config,
150+
src.url
151+
);
152+
153+
return {
154+
filepath: src.filepath!,
155+
chunkTitle: src.chunkTitle,
156+
githubUrl: links.githubUrl,
157+
docsUrl: links.docsUrl,
158+
finalUrl: links.finalUrl || src.url || src.filepath!,
159+
};
160+
}));
156161
}
157162
}
158163
}
@@ -169,21 +174,25 @@ export async function askAi(
169174
signal: options.signal,
170175
});
171176

172-
// Map sources from matches (which have entityType, startLine, endLine)
173-
const sourceMap = new Map(matches.map(m => [`${m.filepath}:${m.chunkTitle}`, m]));
174-
const sources: AskAiSource[] = result.sources.map((src) => {
175-
const match = sourceMap.get(`${src.filepath}:${src.chunkTitle}`);
176-
return {
177-
filepath: src.filepath,
178-
chunkTitle: src.chunkTitle,
179-
githubUrl: match?.githubUrl,
180-
docsUrl: match?.docsUrl,
181-
finalUrl: src.url || match?.githubUrl || match?.finalUrl || src.filepath,
182-
entityType: match?.entityType,
183-
startLine: match?.startLine,
184-
endLine: match?.endLine,
185-
};
186-
});
177+
// Recompute URLs using resolveSourceLinks for consistency
178+
const sources: AskAiSource[] = result.sources
179+
.filter((src) => src.filepath) // Filter out sources without filepath
180+
.map((src) => {
181+
const links = resolveSourceLinks(
182+
src.filepath!,
183+
src.chunkTitle,
184+
context?.config,
185+
src.url
186+
);
187+
188+
return {
189+
filepath: src.filepath!,
190+
chunkTitle: src.chunkTitle,
191+
githubUrl: links.githubUrl,
192+
docsUrl: links.docsUrl,
193+
finalUrl: links.finalUrl || src.url || src.filepath!,
194+
};
195+
});
187196

188197
activeLogger.info({ answer: result.answer, sourcesCount: sources.length }, "answer from the AI");
189198

mimir-rag/src/utils/sourceLinks.ts

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,17 +19,32 @@ export interface SourceLinkResult {
1919
export function resolveSourceLinks(
2020
filepath: string,
2121
chunkTitle?: string,
22-
config?: AppConfig
22+
config?: AppConfig,
23+
existingSourceUrl?: string
2324
): SourceLinkResult {
2425
const sanitizedTitle = sanitizeSourceTitle(chunkTitle, filepath);
2526
const slug = slugifyHeading(sanitizedTitle);
2627

27-
const baseGithubUrl = computeGithubUrl(filepath, config?.github);
28+
// For TypeScript files, compute GitHub URL (which checks codeUrl/codeDirectory)
29+
const isTypeScriptFile = filepath?.endsWith(".ts") || filepath?.endsWith(".tsx");
30+
let baseGithubUrl: string | undefined;
31+
if (isTypeScriptFile) {
32+
// Always compute GitHub URL for TypeScript files (uses codeUrl/codeDirectory if available)
33+
baseGithubUrl = computeGithubUrl(filepath, config?.github) || existingSourceUrl;
34+
} else {
35+
// For MDX files, compute it or use existing sourceUrl
36+
baseGithubUrl = computeGithubUrl(filepath, config?.github) || existingSourceUrl;
37+
}
38+
2839
const baseDocsUrl = computeDocsUrl(filepath, config?.docs);
2940

3041
const githubUrl = appendSlug(baseGithubUrl, slug);
3142
const docsUrl = appendSlug(baseDocsUrl, slug);
32-
const finalUrl = docsUrl ?? githubUrl ?? baseDocsUrl ?? baseGithubUrl;
43+
44+
// For TypeScript files, always prefer githubUrl over docsUrl
45+
const finalUrl = isTypeScriptFile
46+
? (githubUrl ?? baseGithubUrl)
47+
: (docsUrl ?? githubUrl ?? baseDocsUrl ?? baseGithubUrl);
3348

3449
return { githubUrl, docsUrl, finalUrl, sanitizedTitle, slug };
3550
}
@@ -41,14 +56,18 @@ export function sanitizeSourceTitle(title?: string, fallback?: string): string {
4156
}
4257

4358
function computeGithubUrl(filepath: string, githubConfig?: GithubConfig): string | undefined {
44-
if (!githubConfig?.githubUrl) {
59+
// Try githubUrl first, then fall back to codeUrl for TypeScript files
60+
const url = githubConfig?.githubUrl || githubConfig?.codeUrl;
61+
if (!url) {
4562
return undefined;
4663
}
4764

4865
try {
49-
const parsed = parseGithubUrl(githubConfig.githubUrl);
66+
const parsed = parseGithubUrl(url);
5067
const branch = githubConfig.branch ?? parsed.branch ?? DEFAULT_BRANCH;
51-
const scopedPath = joinRepoPaths(parsed.path, githubConfig.directory);
68+
// Use codeDirectory if codeUrl is used, otherwise use directory
69+
const directory = githubConfig.codeUrl ? githubConfig.codeDirectory : githubConfig.directory;
70+
const scopedPath = joinRepoPaths(parsed.path, directory);
5271
const repoPath = joinRepoPaths(scopedPath, filepath);
5372
return buildSourceUrl(parsed.owner, parsed.repo, branch, repoPath);
5473
} catch {

0 commit comments

Comments
 (0)