fix(embedder): route batch embedding to /v1/embeddings (Issue #629) (#631)

jlin53882 · web-flow · commit 3697ed57c06b · 2026-04-17T15:30:14.000+08:00
* fix(embedder): route batch embedding to /v1/embeddings Fix Issue #629: Ollama batch embedding fails after PR #621 PR #621 fixed single embedding by switching to /api/embeddings with prompt field. However, batch requests send payload.input as string[], which causes Ollama to reject with 'cannot unmarshal array into prompt'. Changes: - Route single input (string) to /api/embeddings + prompt (PR #621) - Route batch input (string[]) to /v1/embeddings + input[] - Add response validation for count and non-empty embeddings - Add test/embedder-ollama-batch-routing.test.mjs with 5 test cases Testing: - Local testing with jina-v5-retrieval-test confirms both paths work - All 5 new tests pass Closes #629 * fix(embedder): address PR review comments (Issue #629) - Add embedder-ollama-batch-routing.test.mjs to CI manifest - Add comments explaining why provider options are omitted for Ollama batch - Add note about /v1/embeddings no-fallback assumption Reviewed by: rwmjhb * fix(embedder): address PR review comments (Issue #629) - Add embedder-ollama-batch-routing.test.mjs to CI manifest - Add comments explaining why provider options are omitted for Ollama batch - Add note about /v1/embeddings no-fallback assumption Reviewed by: rwmjhb
diff --git a/commit_msg.txt b/commit_msg.txt
@@ -0,0 +1,7 @@
+fix(embedder): address PR review comments (Issue #629)
+
+- Add embedder-ollama-batch-routing.test.mjs to CI manifest
+- Add comments explaining why provider options are omitted for Ollama batch
+- Add note about /v1/embeddings no-fallback assumption
+
+Reviewed by: rwmjhb
diff --git a/scripts/ci-test-manifest.mjs b/scripts/ci-test-manifest.mjs
@@ -47,6 +47,8 @@ export const CI_TEST_MANIFEST = [
   { group: "core-regression", runner: "node", file: "test/store-serialization.test.mjs" },
   { group: "core-regression", runner: "node", file: "test/access-tracker-retry.test.mjs" },
   { group: "core-regression", runner: "node", file: "test/embedder-cache.test.mjs" },
+  // Issue #629 batch embedding fix
+  { group: "llm-clients-and-auth", runner: "node", file: "test/embedder-ollama-batch-routing.test.mjs" },
 ];
 
 export function getEntriesForGroup(group) {
diff --git a/src/embedder.ts b/src/embedder.ts
@@ -569,39 +569,89 @@ export class Embedder {
    * Call embeddings.create using native fetch (bypasses OpenAI SDK).
    * Used exclusively for Ollama endpoints where AbortController must work
    * correctly to avoid long-lived stalled sockets.
+   *
+   * For Ollama 0.20.5+: /v1/embeddings may return empty arrays for some models,
+   * so we use /api/embeddings with "prompt" field for single requests (PR #621).
+   * For batch requests, we use /v1/embeddings with "input" array as it's more
+   * efficient and confirmed working in local testing.
+   *
+   * See: https://github.com/CortexReach/memory-lancedb-pro/issues/620
+   * Fix: https://github.com/CortexReach/memory-lancedb-pro/issues/629
    */
   private async embedWithNativeFetch(payload: any, signal?: AbortSignal): Promise<any> {
     if (!this._baseURL) {
       throw new Error("embedWithNativeFetch requires a baseURL");
     }
 
-    // Fix for Ollama 0.20.5+: /v1/embeddings returns empty arrays for both `input` and `prompt`.
-    // Only /api/embeddings + `prompt` parameter works correctly.
-    // See: https://github.com/CortexReach/memory-lancedb-pro/issues/620
     const base = this._baseURL.replace(/\/$/, "").replace(/\/v1$/, "");
-    const endpoint = base + "/api/embeddings";
-
     const apiKey = this.clients[0]?.apiKey ?? "ollama";
 
-    // Ollama's /api/embeddings requires "prompt" field, not "input"
-    const ollamaPayload = {
-      model: payload.model,
-      prompt: payload.input,
-    };
+    // Handle batch requests with /v1/embeddings + input array
+    // NOTE: /v1/embeddings is used unconditionally for batch with no fallback.
+    // If a model doesn't support that endpoint, failure will be silent from the user's perspective.
+    // This is acceptable because most Ollama embedding models support /v1/embeddings.
+    if (Array.isArray(payload.input)) {
+      const response = await fetch(base + "/v1/embeddings", {
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          "Authorization": `Bearer ${apiKey}`,
+        },
+        body: JSON.stringify({
+          model: payload.model,
+          input: payload.input,
+          // NOTE: Other provider options (encoding_format, normalized, dimensions, etc.)
+          // from buildPayload() are intentionally not included. Ollama embedding models
+          // do not support these parameters, so omitting them is correct.
+        }),
+        signal,
+      });
 
-    const response = await fetch(endpoint, {
+      if (!response.ok) {
+        const body = await response.text().catch(() => "");
+        throw new Error(
+          `Ollama batch embedding failed: ${response.status} ${response.statusText} ??${body.slice(0, 200)}`
+        );
+      }
+
+      const data = await response.json();
+
+      // Validate response count and non-empty embeddings
+      if (
+        !Array.isArray(data?.data) ||
+        data.data.length !== payload.input.length ||
+        data.data.some((item: any) => {
+          const embedding = item?.embedding;
+          return !Array.isArray(embedding) || embedding.length === 0;
+        })
+      ) {
+        throw new Error(
+          `Ollama batch embedding returned invalid response for ${payload.input.length} inputs`
+        );
+      }
+
+      return data;
+    }
+
+    // Single request: use /api/embeddings + prompt (PR #621 fix)
+    const response = await fetch(base + "/api/embeddings", {
       method: "POST",
       headers: {
         "Content-Type": "application/json",
         "Authorization": `Bearer ${apiKey}`,
       },
-      body: JSON.stringify(ollamaPayload),
-      signal: signal,
+      body: JSON.stringify({
+        model: payload.model,
+        prompt: payload.input,
+      }),
+      signal,
     });
 
     if (!response.ok) {
       const body = await response.text().catch(() => "");
-      throw new Error(`Ollama embedding failed: ${response.status} ${response.statusText} ??${body.slice(0, 200)}`);
+      throw new Error(
+        `Ollama embedding failed: ${response.status} ${response.statusText} ??${body.slice(0, 200)}`
+      );
     }
 
     const data = await response.json();
diff --git a/test/embedder-ollama-batch-routing.test.mjs b/test/embedder-ollama-batch-routing.test.mjs
@@ -0,0 +1,269 @@
+import assert from "node:assert/strict";
+import http from "node:http";
+import { test } from "node:test";
+
+import jitiFactory from "jiti";
+
+const jiti = jitiFactory(import.meta.url, { interopDefault: true });
+const { Embedder } = jiti("../src/embedder.ts");
+
+const DIMS = 1024;
+
+/**
+ * Test: Ollama embedWithNativeFetch routes single vs batch requests correctly.
+ *
+ * Issue #629: After PR #621 fixed single embedding, batch embedding failed
+ * because /api/embeddings only accepts a single string prompt.
+ *
+ * Fix:
+ * - Single requests: use /api/embeddings + prompt
+ * - Batch requests: use /v1/embeddings + input array
+ *
+ * This test verifies the routing and validation:
+ * 1. Single requests hit /api/embeddings
+ * 2. Batch requests hit /v1/embeddings
+ * 3. Batch responses with wrong count are rejected
+ * 4. Batch responses with empty embeddings are rejected
+ * 5. Single-element batch still routes to /v1/embeddings
+ *
+ * NOTE: Uses port 0 to let OS assign an available port, avoiding EADDRINUSE
+ * when developers have Ollama running locally on port 11434.
+ */
+
+function readJson(req) {
+  return new Promise((resolve, reject) => {
+    let body = "";
+    req.on("data", (chunk) => { body += chunk; });
+    req.on("end", () => resolve(JSON.parse(body)));
+    req.on("error", reject);
+  });
+}
+
+function makeOllamaMock(handler) {
+  return http.createServer((req, res) => {
+    if (req.method === "POST" && req.url === "/api/embeddings") {
+      handler(req, res, "api");
+      return;
+    }
+    if (req.method === "POST" && req.url === "/v1/embeddings") {
+      handler(req, res, "v1");
+      return;
+    }
+    res.writeHead(404, { "Content-Type": "text/plain" });
+    res.end("unexpected endpoint");
+  });
+}
+
+function dims() {
+  return Array.from({ length: DIMS }, () => Math.random() * 0.1);
+}
+
+/**
+ * Helper to start a mock server and get its actual port.
+ * Uses port 0 to let OS assign an available port.
+ */
+async function startMockServer(server) {
+  return new Promise((resolve, reject) => {
+    server.listen(0, "127.0.0.1", () => {
+      const addr = server.address();
+      if (addr && typeof addr === "object") {
+        resolve(addr.port);
+      } else {
+        reject(new Error("Failed to get server port"));
+      }
+    });
+    server.on("error", reject);
+  });
+}
+
+test("single requests use /api/embeddings with prompt field", async () => {
+  let capturedBody = null;
+
+  const server = makeOllamaMock(async (req, res, route) => {
+    capturedBody = await readJson(req);
+    res.writeHead(200, { "Content-Type": "application/json" });
+    res.end(JSON.stringify({ embedding: dims() }));
+  });
+
+  const port = await startMockServer(server);
+  const baseURL = `http://127.0.0.1:${port}/v1`;
+
+  try {
+    const embedder = new Embedder({
+      provider: "openai-compatible",
+      apiKey: "test-key",
+      model: "mxbai-embed-large",
+      baseURL,
+      dimensions: DIMS,
+    });
+
+    const result = await embedder.embedPassage("hello world");
+
+    assert.equal(capturedBody?.model, "mxbai-embed-large");
+    assert.equal(capturedBody?.prompt, "hello world");
+    assert.equal(Array.isArray(capturedBody?.prompt), false, "prompt should be a string, not array");
+    assert.equal(result.length, DIMS);
+  } finally {
+    await new Promise((resolve) => server.close(resolve));
+  }
+});
+
+test("batch requests use /v1/embeddings with input array", async () => {
+  let capturedBody = null;
+
+  const server = makeOllamaMock(async (req, res, route) => {
+    capturedBody = await readJson(req);
+    const embeddings = capturedBody.input.map((_, i) => ({
+      embedding: dims(),
+      index: i,
+    }));
+    res.writeHead(200, { "Content-Type": "application/json" });
+    res.end(JSON.stringify({ data: embeddings }));
+  });
+
+  const port = await startMockServer(server);
+  const baseURL = `http://127.0.0.1:${port}/v1`;
+
+  try {
+    const embedder = new Embedder({
+      provider: "openai-compatible",
+      apiKey: "test-key",
+      model: "mxbai-embed-large",
+      baseURL,
+      dimensions: DIMS,
+    });
+
+    const inputs = ["a", "b", "c"];
+    const result = await embedder.embedBatchPassage(inputs);
+
+    assert.equal(capturedBody?.model, "mxbai-embed-large");
+    assert.deepEqual(capturedBody?.input, inputs);
+    assert.equal(Array.isArray(capturedBody?.input), true, "input should be an array");
+    assert.equal(result.length, 3);
+    result.forEach((emb) => assert.equal(emb.length, DIMS));
+  } finally {
+    await new Promise((resolve) => server.close(resolve));
+  }
+});
+
+test("batch rejects response with wrong number of embeddings", async () => {
+  const server = makeOllamaMock(async (req, res, route) => {
+    if (route !== "v1") {
+      res.writeHead(404);
+      res.end("unexpected route");
+      return;
+    }
+    const body = await readJson(req);
+    // Intentionally return fewer embeddings than requested
+    const embeddings = Array.from({ length: Math.max(1, body.input.length - 1) }, (_, i) => ({
+      embedding: dims(),
+      index: i,
+    }));
+    res.writeHead(200, { "Content-Type": "application/json" });
+    res.end(JSON.stringify({ data: embeddings }));
+  });
+
+  const port = await startMockServer(server);
+  const baseURL = `http://127.0.0.1:${port}/v1`;
+
+  try {
+    const embedder = new Embedder({
+      provider: "openai-compatible",
+      apiKey: "test-key",
+      model: "mxbai-embed-large",
+      baseURL,
+      dimensions: DIMS,
+    });
+
+    const inputs = ["a", "b", "c"];
+    await assert.rejects(
+      async () => embedder.embedBatchPassage(inputs),
+      (err) => {
+        assert.ok(
+          /unexpected result count|invalid response/i.test(err.message),
+          `Expected count validation error, got: ${err.message}`,
+        );
+        return true;
+      },
+    );
+  } finally {
+    await new Promise((resolve) => server.close(resolve));
+  }
+});
+
+test("batch rejects response with empty embedding array", async () => {
+  const server = makeOllamaMock(async (req, res, route) => {
+    if (route !== "v1") {
+      res.writeHead(404);
+      res.end("unexpected route");
+      return;
+    }
+    const body = await readJson(req);
+    // Return correct count but one embedding is empty
+    const embeddings = body.input.map((_, i) => ({
+      embedding: i === 1 ? [] : dims(), // second one is empty
+      index: i,
+    }));
+    res.writeHead(200, { "Content-Type": "application/json" });
+    res.end(JSON.stringify({ data: embeddings }));
+  });
+
+  const port = await startMockServer(server);
+  const baseURL = `http://127.0.0.1:${port}/v1`;
+
+  try {
+    const embedder = new Embedder({
+      provider: "openai-compatible",
+      apiKey: "test-key",
+      model: "mxbai-embed-large",
+      baseURL,
+      dimensions: DIMS,
+    });
+
+    const inputs = ["a", "b", "c"];
+    await assert.rejects(
+      async () => embedder.embedBatchPassage(inputs),
+      (err) => {
+        assert.ok(
+          /invalid response/i.test(err.message),
+          `Expected invalid response error, got: ${err.message}`,
+        );
+        return true;
+      },
+    );
+  } finally {
+    await new Promise((resolve) => server.close(resolve));
+  }
+});
+
+test("single-element batch still routes to /v1/embeddings", async () => {
+  let capturedRoute = null;
+
+  const server = makeOllamaMock(async (req, res, route) => {
+    capturedRoute = route;
+    res.writeHead(200, { "Content-Type": "application/json" });
+    res.end(JSON.stringify({ data: [{ embedding: dims(), index: 0 }] }));
+  });
+
+  const port = await startMockServer(server);
+  const baseURL = `http://127.0.0.1:${port}/v1`;
+
+  try {
+    const embedder = new Embedder({
+      provider: "openai-compatible",
+      apiKey: "test-key",
+      model: "mxbai-embed-large",
+      baseURL,
+      dimensions: DIMS,
+    });
+
+    // Even with single element, batch route should be used
+    const result = await embedder.embedBatchPassage(["only-one"]);
+
+    assert.equal(capturedRoute, "v1", "single-element batch should use /v1/embeddings, not /api/embeddings");
+    assert.equal(result.length, 1);
+    assert.equal(result[0].length, DIMS);
+  } finally {
+    await new Promise((resolve) => server.close(resolve));
+  }
+});