Skip to content

Commit 1359aee

Browse files
committed
implement openai compatible endpoints; update llm provider implementations to return structured answers with sources; add mcp/match endpoint for semantic search
1 parent 4340815 commit 1359aee

File tree

14 files changed

+340
-358
lines changed

14 files changed

+340
-358
lines changed

mimir-rag/README.md

Lines changed: 44 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# mimir-rag
22

3-
Utility CLI + API that ingests docs into Supabase and exposes `/ask` + `/ingest` endpoints.
3+
Utility CLI + API that ingests docs into Supabase and exposes OpenAI-compatible chat completions, MCP endpoints, and ingestion endpoints.
44

55
## Local workflow
66

@@ -66,9 +66,9 @@ Key configuration variables include:
6666

6767
## API Endpoints
6868

69-
### POST /ask
69+
### POST /v1/chat/completions
7070

71-
Query your documentation with authentication required.
71+
OpenAI-compatible chat completions endpoint that queries your documentation with RAG. Requires API key authentication.
7272

7373
**Headers:**
7474
- `x-api-key: <MIMIR_SERVER_API_KEY>` or `Authorization: Bearer <MIMIR_SERVER_API_KEY>`
@@ -77,21 +77,21 @@ Query your documentation with authentication required.
7777
**Request body:**
7878
```json
7979
{
80-
"question": "How do I implement authentication?",
80+
"messages": [
81+
{
82+
"role": "user",
83+
"content": "How do I implement authentication?"
84+
}
85+
],
8186
"matchCount": 10,
8287
"similarityThreshold": 0.2,
83-
"systemPrompt": "You are a helpful coding assistant"
88+
"systemPrompt": "You are a helpful coding assistant",
89+
"stream": false
8490
}
8591
```
8692

8793
**Response:**
88-
```json
89-
{
90-
"status": "ok",
91-
"answer": "Based on the documentation...",
92-
"sources": [...]
93-
}
94-
```
94+
OpenAI-compatible chat completion response format with retrieved documentation context.
9595

9696
### POST /mcp/ask
9797

@@ -124,6 +124,38 @@ Query your documentation via MCP (Model Context Protocol) without server API key
124124

125125
**Note:** The `/mcp/ask` endpoint bypasses the `MIMIR_SERVER_API_KEY` authentication and allows clients to specify their own LLM provider, model, and API key. This is designed for use with the [mimir-mcp](../mimir-mcp) MCP server.
126126

127+
### POST /mcp/match
128+
129+
Semantic search endpoint that returns matching documentation chunks without generating an AI response. No authentication required.
130+
131+
**Headers:**
132+
- `Content-Type: application/json`
133+
134+
**Request body:**
135+
```json
136+
{
137+
"question": "How do I implement authentication?",
138+
"matchCount": 10,
139+
"similarityThreshold": 0.2
140+
}
141+
```
142+
143+
**Response:**
144+
```json
145+
{
146+
"status": "ok",
147+
"matches": [
148+
{
149+
"title": "Authentication Guide",
150+
"url": "https://example.com/docs/auth",
151+
"similarity": 0.85
152+
}
153+
]
154+
}
155+
```
156+
157+
**Note:** This endpoint is faster than `/mcp/ask` since it only performs semantic search without LLM inference. Useful for discovering relevant documentation.
158+
127159
### POST /ingest
128160

129161
Trigger documentation ingestion manually.

mimir-rag/src/config/loadConfig.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ export async function loadAppConfig(configPath?: string): Promise<AppConfig> {
134134
apiKey: getEnv("MIMIR_LLM_CHAT_API_KEY", false),
135135
baseUrl: getEnv("MIMIR_LLM_CHAT_BASE_URL", false),
136136
temperature: getEnvNumber("MIMIR_LLM_CHAT_TEMPERATURE", 0),
137-
maxOutputTokens: getEnvNumber("MIMIR_LLM_CHAT_MAX_OUTPUT_TOKENS", 2000),
137+
maxOutputTokens: getEnvNumber("MIMIR_LLM_CHAT_MAX_OUTPUT_TOKENS", 8000),
138138
limits: {
139139
concurrency: getEnvNumber("MIMIR_LLM_CHAT_LIMITS_CONCURRENCY", 8),
140140
maxRequestsPerMinute: getEnvNumber("MIMIR_LLM_CHAT_LIMITS_MAX_REQUESTS_PER_MINUTE", 500),

mimir-rag/src/llm/base.ts

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ import pLimit from "p-limit";
33
import pRetry from "p-retry";
44
import { ChatModelConfig, EmbeddingModelConfig } from "../config/types";
55
import { batchChunks } from "../utils/batchChunks";
6-
import type { ChatProvider, EmbedOptions, EmbeddingProvider, GenerateAnswerOptions } from "./types";
6+
import type { ChatProvider, EmbedOptions, EmbeddingProvider, GenerateAnswerOptions, StructuredAnswerResult } from "./types";
77
import Bottleneck from "bottleneck";
88
import { Logger } from "pino";
99
import { countTokensInBatch, countTokens } from "../utils/tokenEncoder";
@@ -134,9 +134,9 @@ export abstract class BaseChatProvider implements ChatProvider {
134134
}
135135
}
136136

137-
async generateAnswer(options: GenerateAnswerOptions & { stream?: false }): Promise<string>;
138-
async generateAnswer(options: GenerateAnswerOptions & { stream: true }): Promise<AsyncIterable<string>>;
139-
async generateAnswer(options: GenerateAnswerOptions): Promise<string | AsyncIterable<string>> {
137+
async generateAnswer(options: GenerateAnswerOptions & { stream?: false }): Promise<StructuredAnswerResult>;
138+
async generateAnswer(options: GenerateAnswerOptions & { stream: true }): Promise<AsyncIterable<StructuredAnswerResult>>;
139+
async generateAnswer(options: GenerateAnswerOptions): Promise<StructuredAnswerResult | AsyncIterable<StructuredAnswerResult>> {
140140
const tokens = this.estimateChatTokens(options);
141141
return this.scheduleWithRateLimits(tokens, () => this.complete(options), {
142142
logPrefix: `${this.config.provider}:chat`,
@@ -151,7 +151,7 @@ export abstract class BaseChatProvider implements ChatProvider {
151151

152152
const systemPrompt = "Please give a short succinct context (150-250 tokens) to situate this chunk within the overall document for the purposes of improving search retrieval of the chunk. Answer only with the succinct context and nothing else.";
153153

154-
const userPrompt = "Summarize how this chunk fits into the broader file. Highlight the chunks role, upstream dependencies, and any follow-on sections a reader should review."
154+
const userPrompt = "Summarize how this chunk fits into the broader file. Highlight the chunk's role, upstream dependencies, and any follow-on sections a reader should review."
155155

156156
const limit = pLimit(Math.max(1, this.concurrencyLimit));
157157
const response = await Promise.all(
@@ -167,7 +167,7 @@ export abstract class BaseChatProvider implements ChatProvider {
167167
})))
168168
);
169169

170-
return response.map((summary) => summary.trim());
170+
return response.map((result) => result.answer.trim());
171171
}
172172

173173
protected estimateChatTokens(options: GenerateAnswerOptions): number {
@@ -188,7 +188,7 @@ export abstract class BaseChatProvider implements ChatProvider {
188188
return tokens;
189189
}
190190

191-
protected abstract complete(options: GenerateAnswerOptions): Promise<string | AsyncIterable<string>>;
191+
protected abstract complete(options: GenerateAnswerOptions): Promise<StructuredAnswerResult | AsyncIterable<StructuredAnswerResult>>;
192192

193193
private async scheduleWithRateLimits<T>(tokens: number, task: () => Promise<T>, { logPrefix, signal }: ScheduleOptions): Promise<T> {
194194
await this.reserveTokens(tokens);

mimir-rag/src/llm/prompt.ts

Lines changed: 41 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,57 @@
11
import type { DocumentChunk } from "../supabase/types";
22
import type { GenerateAnswerOptions, contextualChunkInput } from "./types";
3+
import { z } from "zod";
34

45
const DEFAULT_SYSTEM_PROMPT = [
5-
"You are a meticulous assistant that answers questions using the provided documentation context.",
6-
"Use only the supplied context to craft your answer.",
7-
"If the answer cannot be determined from the context, say you do not know.",
8-
"Whenever you reference a Source include an inline citation like [S2], and finish your response with a line formatted as `Sources: S1, S2` listing the unique sources you relied on.",
6+
"You are a MeshJS expert assistant. Help developers with MeshJS questions using the provided context.",
7+
"Use the documentation context to answer questions about MeshJS and Cardano development.",
8+
"Provide accurate code examples and explanations based on the context provided.",
9+
"",
10+
"When answering:",
11+
"- Give direct, helpful answers based on the context",
12+
"- Include relevant code examples when available",
13+
"- Explain concepts clearly for developers",
14+
"- If the context doesn't cover the question, say so clearly.",
15+
"- Do not invent or assume APIs, methods, or functionality not in the documentation.",
16+
"",
17+
"IMPORTANT:",
18+
"- Do NOT add conclusions, summary sections, or 'For more information' references at the end",
19+
"- Do NOT suggest referring to documentation or additional resources",
20+
"- Sources are handled separately by the system - just provide the answer content",
21+
"- End your response when the answer is complete, without extra closing remarks",
22+
"",
23+
"Be concise but thorough. Focus on practical, actionable guidance for MeshJS development.",
924
].join(" ");
1025

26+
export const sourceSchema = z.object({
27+
filepath: z.string().describe("The file path of the source"),
28+
chunkTitle: z.string().describe("The title or description of the source chunk"),
29+
url: z.string().optional().describe("The URL to access the source"),
30+
});
31+
32+
export const answerWithSourcesSchema = z.object({
33+
sources: z.array(sourceSchema).describe("Array of sources that were used to generate the answer. Provide this FIRST."),
34+
answer: z.string().describe("The answer to the user's question"),
35+
});
36+
1137
function formatDocumentChunks(chunks: DocumentChunk[]): string {
12-
return chunks
38+
const formattedChunks = chunks
1339
.map((chunk, index) => {
1440
const header = `Source ${index + 1}: ${chunk.filepath}#${chunk.chunkId}`;
1541
const title = chunk.chunkTitle ? ` (${chunk.chunkTitle})` : "";
1642
const body = chunk.contextualText?.trim() || chunk.content.trim();
1743
return `${header}${title}\n${body}`;
1844
})
19-
.join("\n\n")
20-
.trim();
45+
.join("\n\n");
46+
47+
// Add available sources metadata for structured output
48+
const availableSources = chunks.map((chunk, index) => {
49+
const title = chunk.chunkTitle || `${chunk.filepath}#${chunk.chunkId}`;
50+
const url = chunk.finalUrl || chunk.githubUrl || chunk.docsUrl || chunk.filepath;
51+
return `${index + 1}. filepath: "${chunk.filepath}", chunkTitle: "${title}", url: "${url}"`;
52+
}).join("\n");
53+
54+
return `${formattedChunks}\n\n---\n\nAvailable sources (select only the sources you actually used):\n${availableSources}`.trim();
2155
}
2256

2357
function formatSingleChunkContext(context: contextualChunkInput): string {
Lines changed: 18 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,14 @@
11
import { Logger } from "pino";
2-
import { BaseChatProvider, type ProviderRateLimits } from "../base";
3-
import type { ChatModelConfig, ProviderLimitsConfig } from "../../config/types";
4-
import type { GenerateAnswerOptions } from "../types";
5-
import { buildPromptMessages } from "../prompt";
2+
import { BaseChatProvider } from "../base";
3+
import type { ChatModelConfig } from "../../config/types";
4+
import type { GenerateAnswerOptions, StructuredAnswerResult } from "../types";
5+
import { buildPromptMessages, answerWithSourcesSchema } from "../prompt";
66
import { createAnthropic } from '@ai-sdk/anthropic';
7-
import { generateText, streamText } from 'ai';
7+
import { generateObject, streamObject } from 'ai';
8+
import { resolveBaseUrl, mergeLimits } from "../../utils/providerUtils";
89

910
const ANTHROPIC_DEFAULT_BASE_URL = "https://api.anthropic.com/";
1011

11-
function resolveBaseUrl(url?: string): string {
12-
if (!url) {
13-
return ANTHROPIC_DEFAULT_BASE_URL;
14-
}
15-
16-
return url.endsWith("/") ? url : `${url}/`;
17-
}
18-
19-
function mergeLimits(defaults: ProviderRateLimits, override?: ProviderLimitsConfig): ProviderRateLimits {
20-
if (!override) {
21-
return defaults;
22-
}
23-
24-
return {
25-
...defaults,
26-
...override,
27-
};
28-
}
29-
3012
export class AnthropicChatProvider extends BaseChatProvider {
3113
private readonly sdk: ReturnType<typeof createAnthropic>;
3214

@@ -51,11 +33,11 @@ export class AnthropicChatProvider extends BaseChatProvider {
5133

5234
this.sdk = createAnthropic({
5335
apiKey: config.apiKey,
54-
baseURL: resolveBaseUrl(config.baseUrl),
36+
baseURL: resolveBaseUrl(config.baseUrl, ANTHROPIC_DEFAULT_BASE_URL),
5537
});
5638
}
5739

58-
protected async complete(options: GenerateAnswerOptions): Promise<string | AsyncIterable<string>> {
40+
protected async complete(options: GenerateAnswerOptions): Promise<StructuredAnswerResult | AsyncIterable<StructuredAnswerResult>> {
5941
const { system, user } = buildPromptMessages(options);
6042
const model = this.sdk(this.config.model);
6143

@@ -69,11 +51,17 @@ export class AnthropicChatProvider extends BaseChatProvider {
6951
};
7052

7153
if (options.stream) {
72-
const { textStream } = await streamText(baseOptions);
73-
return textStream;
54+
const { partialObjectStream } = streamObject({
55+
...baseOptions,
56+
schema: answerWithSourcesSchema,
57+
});
58+
return partialObjectStream as AsyncIterable<StructuredAnswerResult>;
7459
}
7560

76-
const { text } = await generateText(baseOptions);
77-
return text.trim();
61+
const { object } = await generateObject({
62+
...baseOptions,
63+
schema: answerWithSourcesSchema,
64+
});
65+
return object as StructuredAnswerResult;
7866
}
7967
}

mimir-rag/src/llm/providers/google.ts

Lines changed: 19 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,14 @@
11
import { Logger } from "pino";
2-
import { BaseChatProvider, BaseEmbeddingProvider, type ProviderRateLimits } from "../base";
3-
import type { ChatModelConfig, EmbeddingModelConfig, ProviderLimitsConfig } from "../../config/types";
4-
import type { EmbedOptions, GenerateAnswerOptions } from "../types";
5-
import { buildPromptMessages } from "../prompt";
2+
import { BaseChatProvider, BaseEmbeddingProvider } from "../base";
3+
import type { ChatModelConfig, EmbeddingModelConfig } from "../../config/types";
4+
import type { EmbedOptions, GenerateAnswerOptions, StructuredAnswerResult } from "../types";
5+
import { buildPromptMessages, answerWithSourcesSchema } from "../prompt";
66
import { createGoogleGenerativeAI } from '@ai-sdk/google';
7-
import { embedMany, generateText, streamText } from 'ai';
7+
import { embedMany, generateObject, streamObject } from 'ai';
8+
import { resolveBaseUrl, mergeLimits } from "../../utils/providerUtils";
89

910
const GEMINI_DEFAULT_BASE_URL = "https://generativelanguage.googleapis.com/";
1011

11-
function resolveBaseUrl(url?: string): string {
12-
if (!url) {
13-
return GEMINI_DEFAULT_BASE_URL;
14-
}
15-
16-
return url.endsWith("/") ? url : `${url}/`;
17-
}
18-
19-
function mergeLimits(defaults: ProviderRateLimits, override?: ProviderLimitsConfig): ProviderRateLimits {
20-
if (!override) {
21-
return defaults;
22-
}
23-
24-
return {
25-
...defaults,
26-
...override,
27-
};
28-
}
29-
3012
export class GoogleEmbeddingProvider extends BaseEmbeddingProvider {
3113
private readonly sdk: ReturnType<typeof createGoogleGenerativeAI>;
3214

@@ -52,7 +34,7 @@ export class GoogleEmbeddingProvider extends BaseEmbeddingProvider {
5234

5335
this.sdk = createGoogleGenerativeAI({
5436
apiKey: config.apiKey,
55-
baseURL: resolveBaseUrl(config.baseUrl),
37+
baseURL: resolveBaseUrl(config.baseUrl, GEMINI_DEFAULT_BASE_URL),
5638
});
5739
}
5840

@@ -92,11 +74,11 @@ export class GoogleChatProvider extends BaseChatProvider {
9274

9375
this.sdk = createGoogleGenerativeAI({
9476
apiKey: config.apiKey,
95-
baseURL: resolveBaseUrl(config.baseUrl),
77+
baseURL: resolveBaseUrl(config.baseUrl, GEMINI_DEFAULT_BASE_URL),
9678
});
9779
}
9880

99-
protected async complete(options: GenerateAnswerOptions): Promise<string | AsyncIterable<string>> {
81+
protected async complete(options: GenerateAnswerOptions): Promise<StructuredAnswerResult | AsyncIterable<StructuredAnswerResult>> {
10082
const { system, user } = buildPromptMessages(options);
10183
const model = this.sdk(this.config.model);
10284

@@ -110,11 +92,17 @@ export class GoogleChatProvider extends BaseChatProvider {
11092
};
11193

11294
if (options.stream) {
113-
const { textStream } = await streamText(baseOptions);
114-
return textStream;
95+
const { partialObjectStream } = streamObject({
96+
...baseOptions,
97+
schema: answerWithSourcesSchema,
98+
});
99+
return partialObjectStream as AsyncIterable<StructuredAnswerResult>;
115100
}
116101

117-
const { text } = await generateText(baseOptions);
118-
return text.trim();
102+
const { object } = await generateObject({
103+
...baseOptions,
104+
schema: answerWithSourcesSchema,
105+
});
106+
return object as StructuredAnswerResult;
119107
}
120108
}

0 commit comments

Comments
 (0)