MeshJS
diff --git a/‎mimir-rag/README.md‎
Lines changed: 44 additions & 12 deletions b/‎mimir-rag/README.md‎
Lines changed: 44 additions & 12 deletions
diff --git a/‎mimir-rag/src/config/loadConfig.ts‎
Lines changed: 1 addition & 1 deletion b/‎mimir-rag/src/config/loadConfig.ts‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎mimir-rag/src/llm/base.ts‎
Lines changed: 7 additions & 7 deletions b/‎mimir-rag/src/llm/base.ts‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎mimir-rag/src/llm/prompt.ts‎
Lines changed: 41 additions & 7 deletions b/‎mimir-rag/src/llm/prompt.ts‎
Lines changed: 41 additions & 7 deletions
diff --git a/‎mimir-rag/src/llm/providers/anthropic.ts‎
Lines changed: 18 additions & 30 deletions b/‎mimir-rag/src/llm/providers/anthropic.ts‎
Lines changed: 18 additions & 30 deletions
diff --git a/‎mimir-rag/src/llm/providers/google.ts‎
Lines changed: 19 additions & 31 deletions b/‎mimir-rag/src/llm/providers/google.ts‎
Lines changed: 19 additions & 31 deletions
@@ -1,6 +1,6 @@
 # mimir-rag
 
-Utility CLI + API that ingests docs into Supabase and exposes `/ask` + `/ingest` endpoints.
+Utility CLI + API that ingests docs into Supabase and exposes OpenAI-compatible chat completions, MCP endpoints, and ingestion endpoints.
 
 ## Local workflow
 
@@ -66,9 +66,9 @@ Key configuration variables include:
 
 ## API Endpoints
 
-### POST /ask
+### POST /v1/chat/completions
 
-Query your documentation with authentication required.
+OpenAI-compatible chat completions endpoint that queries your documentation with RAG. Requires API key authentication.
 
 **Headers:**
 - `x-api-key: <MIMIR_SERVER_API_KEY>` or `Authorization: Bearer <MIMIR_SERVER_API_KEY>`
@@ -77,21 +77,21 @@ Query your documentation with authentication required.
 **Request body:**
 ```json
 {
-  "question": "How do I implement authentication?",
+  "messages": [
+    {
+      "role": "user",
+      "content": "How do I implement authentication?"
+    }
+  ],
   "matchCount": 10,
   "similarityThreshold": 0.2,
-  "systemPrompt": "You are a helpful coding assistant"
+  "systemPrompt": "You are a helpful coding assistant",
+  "stream": false
 }
 ```
 
 **Response:**
-```json
-{
-  "status": "ok",
-  "answer": "Based on the documentation...",
-  "sources": [...]
-}
-```
+OpenAI-compatible chat completion response format with retrieved documentation context.
 
 ### POST /mcp/ask
 
@@ -124,6 +124,38 @@ Query your documentation via MCP (Model Context Protocol) without server API key
 
 **Note:** The `/mcp/ask` endpoint bypasses the `MIMIR_SERVER_API_KEY` authentication and allows clients to specify their own LLM provider, model, and API key. This is designed for use with the [mimir-mcp](../mimir-mcp) MCP server.
 
+### POST /mcp/match
+
+Semantic search endpoint that returns matching documentation chunks without generating an AI response. No authentication required.
+
+**Headers:**
+- `Content-Type: application/json`
+
+**Request body:**
+```json
+{
+  "question": "How do I implement authentication?",
+  "matchCount": 10,
+  "similarityThreshold": 0.2
+}
+```
+
+**Response:**
+```json
+{
+  "status": "ok",
+  "matches": [
+    {
+      "title": "Authentication Guide",
+      "url": "https://example.com/docs/auth",
+      "similarity": 0.85
+    }
+  ]
+}
+```
+
+**Note:** This endpoint is faster than `/mcp/ask` since it only performs semantic search without LLM inference. Useful for discovering relevant documentation.
+
 ### POST /ingest
 
 Trigger documentation ingestion manually.
 
@@ -134,7 +134,7 @@ export async function loadAppConfig(configPath?: string): Promise<AppConfig> {
                 apiKey: getEnv("MIMIR_LLM_CHAT_API_KEY", false),
                 baseUrl: getEnv("MIMIR_LLM_CHAT_BASE_URL", false),
                 temperature: getEnvNumber("MIMIR_LLM_CHAT_TEMPERATURE", 0),
-                maxOutputTokens: getEnvNumber("MIMIR_LLM_CHAT_MAX_OUTPUT_TOKENS", 2000),
+                maxOutputTokens: getEnvNumber("MIMIR_LLM_CHAT_MAX_OUTPUT_TOKENS", 8000),
                 limits: {
                     concurrency: getEnvNumber("MIMIR_LLM_CHAT_LIMITS_CONCURRENCY", 8),
                     maxRequestsPerMinute: getEnvNumber("MIMIR_LLM_CHAT_LIMITS_MAX_REQUESTS_PER_MINUTE", 500),
 
@@ -3,7 +3,7 @@ import pLimit from "p-limit";
 import pRetry from "p-retry";
 import { ChatModelConfig, EmbeddingModelConfig } from "../config/types";
 import { batchChunks } from "../utils/batchChunks";
-import type { ChatProvider, EmbedOptions, EmbeddingProvider, GenerateAnswerOptions } from "./types";
+import type { ChatProvider, EmbedOptions, EmbeddingProvider, GenerateAnswerOptions, StructuredAnswerResult } from "./types";
 import Bottleneck from "bottleneck";
 import { Logger } from "pino";
 import { countTokensInBatch, countTokens } from "../utils/tokenEncoder";
@@ -134,9 +134,9 @@ export abstract class BaseChatProvider implements ChatProvider {
         }
     }
 
-    async generateAnswer(options: GenerateAnswerOptions & { stream?: false }): Promise<string>;
-    async generateAnswer(options: GenerateAnswerOptions & { stream: true }): Promise<AsyncIterable<string>>;
-    async generateAnswer(options: GenerateAnswerOptions): Promise<string | AsyncIterable<string>> {
+    async generateAnswer(options: GenerateAnswerOptions & { stream?: false }): Promise<StructuredAnswerResult>;
+    async generateAnswer(options: GenerateAnswerOptions & { stream: true }): Promise<AsyncIterable<StructuredAnswerResult>>;
+    async generateAnswer(options: GenerateAnswerOptions): Promise<StructuredAnswerResult | AsyncIterable<StructuredAnswerResult>> {
         const tokens = this.estimateChatTokens(options);
         return this.scheduleWithRateLimits(tokens, () => this.complete(options), {
             logPrefix: `${this.config.provider}:chat`,
@@ -151,7 +151,7 @@ export abstract class BaseChatProvider implements ChatProvider {
 
         const systemPrompt = "Please give a short succinct context (150-250 tokens) to situate this chunk within the overall document for the purposes of improving search retrieval of the chunk. Answer only with the succinct context and nothing else.";
 
-        const userPrompt = "Summarize how this chunk fits into the broader file. Highlight the chunk’s role, upstream dependencies, and any follow-on sections a reader should review."
+        const userPrompt = "Summarize how this chunk fits into the broader file. Highlight the chunk's role, upstream dependencies, and any follow-on sections a reader should review."
 
         const limit = pLimit(Math.max(1, this.concurrencyLimit));
         const response = await Promise.all(
@@ -167,7 +167,7 @@ export abstract class BaseChatProvider implements ChatProvider {
             })))
         );
 
-        return response.map((summary) => summary.trim());
+        return response.map((result) => result.answer.trim());
     }
 
     protected estimateChatTokens(options: GenerateAnswerOptions): number {
@@ -188,7 +188,7 @@ export abstract class BaseChatProvider implements ChatProvider {
         return tokens;
     }
 
-    protected abstract complete(options: GenerateAnswerOptions): Promise<string | AsyncIterable<string>>;
+    protected abstract complete(options: GenerateAnswerOptions): Promise<StructuredAnswerResult | AsyncIterable<StructuredAnswerResult>>;
 
     private async scheduleWithRateLimits<T>(tokens: number, task: () => Promise<T>, { logPrefix, signal }: ScheduleOptions): Promise<T> {
         await this.reserveTokens(tokens);
 
@@ -1,23 +1,57 @@
 import type { DocumentChunk } from "../supabase/types";
 import type { GenerateAnswerOptions, contextualChunkInput } from "./types";
+import { z } from "zod";
 
 const DEFAULT_SYSTEM_PROMPT = [
-    "You are a meticulous assistant that answers questions using the provided documentation context.",
-    "Use only the supplied context to craft your answer.",
-    "If the answer cannot be determined from the context, say you do not know.",
-    "Whenever you reference a Source include an inline citation like [S2], and finish your response with a line formatted as `Sources: S1, S2` listing the unique sources you relied on.",
+    "You are a MeshJS expert assistant. Help developers with MeshJS questions using the provided context.",
+    "Use the documentation context to answer questions about MeshJS and Cardano development.",
+    "Provide accurate code examples and explanations based on the context provided.",
+    "",
+    "When answering:",
+    "- Give direct, helpful answers based on the context",
+    "- Include relevant code examples when available",
+    "- Explain concepts clearly for developers",
+    "- If the context doesn't cover the question, say so clearly.",
+    "- Do not invent or assume APIs, methods, or functionality not in the documentation.",
+    "",
+    "IMPORTANT:",
+    "- Do NOT add conclusions, summary sections, or 'For more information' references at the end",
+    "- Do NOT suggest referring to documentation or additional resources",
+    "- Sources are handled separately by the system - just provide the answer content",
+    "- End your response when the answer is complete, without extra closing remarks",
+    "",
+    "Be concise but thorough. Focus on practical, actionable guidance for MeshJS development.",
 ].join(" ");
 
+export const sourceSchema = z.object({
+    filepath: z.string().describe("The file path of the source"),
+    chunkTitle: z.string().describe("The title or description of the source chunk"),
+    url: z.string().optional().describe("The URL to access the source"),
+});
+
+export const answerWithSourcesSchema = z.object({
+    sources: z.array(sourceSchema).describe("Array of sources that were used to generate the answer. Provide this FIRST."),
+    answer: z.string().describe("The answer to the user's question"),
+});
+
 function formatDocumentChunks(chunks: DocumentChunk[]): string {
-    return chunks
+    const formattedChunks = chunks
         .map((chunk, index) => {
             const header = `Source ${index + 1}: ${chunk.filepath}#${chunk.chunkId}`;
             const title = chunk.chunkTitle ? ` (${chunk.chunkTitle})` : "";
             const body = chunk.contextualText?.trim() || chunk.content.trim();
             return `${header}${title}\n${body}`;
         })
-        .join("\n\n")
-        .trim();
+        .join("\n\n");
+
+    // Add available sources metadata for structured output
+    const availableSources = chunks.map((chunk, index) => {
+        const title = chunk.chunkTitle || `${chunk.filepath}#${chunk.chunkId}`;
+        const url = chunk.finalUrl || chunk.githubUrl || chunk.docsUrl || chunk.filepath;
+        return `${index + 1}. filepath: "${chunk.filepath}", chunkTitle: "${title}", url: "${url}"`;
+    }).join("\n");
+
+    return `${formattedChunks}\n\n---\n\nAvailable sources (select only the sources you actually used):\n${availableSources}`.trim();
 }
 
 function formatSingleChunkContext(context: contextualChunkInput): string {
 
@@ -1,32 +1,14 @@
 import { Logger } from "pino";
-import { BaseChatProvider, type ProviderRateLimits } from "../base";
-import type { ChatModelConfig, ProviderLimitsConfig } from "../../config/types";
-import type { GenerateAnswerOptions } from "../types";
-import { buildPromptMessages } from "../prompt";
+import { BaseChatProvider } from "../base";
+import type { ChatModelConfig } from "../../config/types";
+import type { GenerateAnswerOptions, StructuredAnswerResult } from "../types";
+import { buildPromptMessages, answerWithSourcesSchema } from "../prompt";
 import { createAnthropic } from '@ai-sdk/anthropic';
-import { generateText, streamText } from 'ai';
+import { generateObject, streamObject } from 'ai';
+import { resolveBaseUrl, mergeLimits } from "../../utils/providerUtils";
 
 const ANTHROPIC_DEFAULT_BASE_URL = "https://api.anthropic.com/";
 
-function resolveBaseUrl(url?: string): string {
-    if (!url) {
-        return ANTHROPIC_DEFAULT_BASE_URL;
-    }
-
-    return url.endsWith("/") ? url : `${url}/`;
-}
-
-function mergeLimits(defaults: ProviderRateLimits, override?: ProviderLimitsConfig): ProviderRateLimits {
-    if (!override) {
-        return defaults;
-    }
-
-    return {
-        ...defaults,
-        ...override,
-    };
-}
-
 export class AnthropicChatProvider extends BaseChatProvider {
     private readonly sdk: ReturnType<typeof createAnthropic>;
 
@@ -51,11 +33,11 @@ export class AnthropicChatProvider extends BaseChatProvider {
 
         this.sdk = createAnthropic({
             apiKey: config.apiKey,
-            baseURL: resolveBaseUrl(config.baseUrl),
+            baseURL: resolveBaseUrl(config.baseUrl, ANTHROPIC_DEFAULT_BASE_URL),
         });
     }
 
-    protected async complete(options: GenerateAnswerOptions): Promise<string | AsyncIterable<string>> {
+    protected async complete(options: GenerateAnswerOptions): Promise<StructuredAnswerResult | AsyncIterable<StructuredAnswerResult>> {
         const { system, user } = buildPromptMessages(options);
         const model = this.sdk(this.config.model);
 
@@ -69,11 +51,17 @@ export class AnthropicChatProvider extends BaseChatProvider {
         };
 
         if (options.stream) {
-            const { textStream } = await streamText(baseOptions);
-            return textStream;
+            const { partialObjectStream } = streamObject({
+                ...baseOptions,
+                schema: answerWithSourcesSchema,
+            });
+            return partialObjectStream as AsyncIterable<StructuredAnswerResult>;
         }
 
-        const { text } = await generateText(baseOptions);
-        return text.trim();
+        const { object } = await generateObject({
+            ...baseOptions,
+            schema: answerWithSourcesSchema,
+        });
+        return object as StructuredAnswerResult;
     }
 }
@@ -1,32 +1,14 @@
 import { Logger } from "pino";
-import { BaseChatProvider, BaseEmbeddingProvider, type ProviderRateLimits } from "../base";
-import type { ChatModelConfig, EmbeddingModelConfig, ProviderLimitsConfig } from "../../config/types";
-import type { EmbedOptions, GenerateAnswerOptions } from "../types";
-import { buildPromptMessages } from "../prompt";
+import { BaseChatProvider, BaseEmbeddingProvider } from "../base";
+import type { ChatModelConfig, EmbeddingModelConfig } from "../../config/types";
+import type { EmbedOptions, GenerateAnswerOptions, StructuredAnswerResult } from "../types";
+import { buildPromptMessages, answerWithSourcesSchema } from "../prompt";
 import { createGoogleGenerativeAI } from '@ai-sdk/google';
-import { embedMany, generateText, streamText } from 'ai';
+import { embedMany, generateObject, streamObject } from 'ai';
+import { resolveBaseUrl, mergeLimits } from "../../utils/providerUtils";
 
 const GEMINI_DEFAULT_BASE_URL = "https://generativelanguage.googleapis.com/";
 
-function resolveBaseUrl(url?: string): string {
-    if (!url) {
-        return GEMINI_DEFAULT_BASE_URL;
-    }
-
-    return url.endsWith("/") ? url : `${url}/`;
-}
-
-function mergeLimits(defaults: ProviderRateLimits, override?: ProviderLimitsConfig): ProviderRateLimits {
-    if (!override) {
-        return defaults;
-    }
-
-    return {
-        ...defaults,
-        ...override,
-    };
-}
-
 export class GoogleEmbeddingProvider extends BaseEmbeddingProvider {
     private readonly sdk: ReturnType<typeof createGoogleGenerativeAI>;
 
@@ -52,7 +34,7 @@ export class GoogleEmbeddingProvider extends BaseEmbeddingProvider {
 
         this.sdk = createGoogleGenerativeAI({
             apiKey: config.apiKey,
-            baseURL: resolveBaseUrl(config.baseUrl),
+            baseURL: resolveBaseUrl(config.baseUrl, GEMINI_DEFAULT_BASE_URL),
         });
     }
 
@@ -92,11 +74,11 @@ export class GoogleChatProvider extends BaseChatProvider {
 
         this.sdk = createGoogleGenerativeAI({
             apiKey: config.apiKey,
-            baseURL: resolveBaseUrl(config.baseUrl),
+            baseURL: resolveBaseUrl(config.baseUrl, GEMINI_DEFAULT_BASE_URL),
         });
     }
 
-    protected async complete(options: GenerateAnswerOptions): Promise<string | AsyncIterable<string>> {
+    protected async complete(options: GenerateAnswerOptions): Promise<StructuredAnswerResult | AsyncIterable<StructuredAnswerResult>> {
         const { system, user } = buildPromptMessages(options);
         const model = this.sdk(this.config.model);
 
@@ -110,11 +92,17 @@ export class GoogleChatProvider extends BaseChatProvider {
         };
 
         if (options.stream) {
-            const { textStream } = await streamText(baseOptions);
-            return textStream;
+            const { partialObjectStream } = streamObject({
+                ...baseOptions,
+                schema: answerWithSourcesSchema,
+            });
+            return partialObjectStream as AsyncIterable<StructuredAnswerResult>;
         }
 
-        const { text } = await generateText(baseOptions);
-        return text.trim();
+        const { object } = await generateObject({
+            ...baseOptions,
+            schema: answerWithSourcesSchema,
+        });
+        return object as StructuredAnswerResult;
     }
 }