Skip to content

Commit 3697ed5

Browse files
authored
fix(embedder): route batch embedding to /v1/embeddings (Issue #629) (#631)
* fix(embedder): route batch embedding to /v1/embeddings Fix Issue #629: Ollama batch embedding fails after PR #621 PR #621 fixed single embedding by switching to /api/embeddings with prompt field. However, batch requests send payload.input as string[], which causes Ollama to reject with 'cannot unmarshal array into prompt'. Changes: - Route single input (string) to /api/embeddings + prompt (PR #621) - Route batch input (string[]) to /v1/embeddings + input[] - Add response validation for count and non-empty embeddings - Add test/embedder-ollama-batch-routing.test.mjs with 5 test cases Testing: - Local testing with jina-v5-retrieval-test confirms both paths work - All 5 new tests pass Closes #629 * fix(embedder): address PR review comments (Issue #629) - Add embedder-ollama-batch-routing.test.mjs to CI manifest - Add comments explaining why provider options are omitted for Ollama batch - Add note about /v1/embeddings no-fallback assumption Reviewed by: rwmjhb * fix(embedder): address PR review comments (Issue #629) - Add embedder-ollama-batch-routing.test.mjs to CI manifest - Add comments explaining why provider options are omitted for Ollama batch - Add note about /v1/embeddings no-fallback assumption Reviewed by: rwmjhb
1 parent 16ee3e4 commit 3697ed5

File tree

4 files changed

+342
-14
lines changed

4 files changed

+342
-14
lines changed

commit_msg.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
fix(embedder): address PR review comments (Issue #629)
2+
3+
- Add embedder-ollama-batch-routing.test.mjs to CI manifest
4+
- Add comments explaining why provider options are omitted for Ollama batch
5+
- Add note about /v1/embeddings no-fallback assumption
6+
7+
Reviewed by: rwmjhb

scripts/ci-test-manifest.mjs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ export const CI_TEST_MANIFEST = [
4747
{ group: "core-regression", runner: "node", file: "test/store-serialization.test.mjs" },
4848
{ group: "core-regression", runner: "node", file: "test/access-tracker-retry.test.mjs" },
4949
{ group: "core-regression", runner: "node", file: "test/embedder-cache.test.mjs" },
50+
// Issue #629 batch embedding fix
51+
{ group: "llm-clients-and-auth", runner: "node", file: "test/embedder-ollama-batch-routing.test.mjs" },
5052
];
5153

5254
export function getEntriesForGroup(group) {

src/embedder.ts

Lines changed: 64 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -569,39 +569,89 @@ export class Embedder {
569569
* Call embeddings.create using native fetch (bypasses OpenAI SDK).
570570
* Used exclusively for Ollama endpoints where AbortController must work
571571
* correctly to avoid long-lived stalled sockets.
572+
*
573+
* For Ollama 0.20.5+: /v1/embeddings may return empty arrays for some models,
574+
* so we use /api/embeddings with "prompt" field for single requests (PR #621).
575+
* For batch requests, we use /v1/embeddings with "input" array as it's more
576+
* efficient and confirmed working in local testing.
577+
*
578+
* See: https://github.com/CortexReach/memory-lancedb-pro/issues/620
579+
* Fix: https://github.com/CortexReach/memory-lancedb-pro/issues/629
572580
*/
573581
private async embedWithNativeFetch(payload: any, signal?: AbortSignal): Promise<any> {
574582
if (!this._baseURL) {
575583
throw new Error("embedWithNativeFetch requires a baseURL");
576584
}
577585

578-
// Fix for Ollama 0.20.5+: /v1/embeddings returns empty arrays for both `input` and `prompt`.
579-
// Only /api/embeddings + `prompt` parameter works correctly.
580-
// See: https://github.com/CortexReach/memory-lancedb-pro/issues/620
581586
const base = this._baseURL.replace(/\/$/, "").replace(/\/v1$/, "");
582-
const endpoint = base + "/api/embeddings";
583-
584587
const apiKey = this.clients[0]?.apiKey ?? "ollama";
585588

586-
// Ollama's /api/embeddings requires "prompt" field, not "input"
587-
const ollamaPayload = {
588-
model: payload.model,
589-
prompt: payload.input,
590-
};
589+
// Handle batch requests with /v1/embeddings + input array
590+
// NOTE: /v1/embeddings is used unconditionally for batch with no fallback.
591+
// If a model doesn't support that endpoint, failure will be silent from the user's perspective.
592+
// This is acceptable because most Ollama embedding models support /v1/embeddings.
593+
if (Array.isArray(payload.input)) {
594+
const response = await fetch(base + "/v1/embeddings", {
595+
method: "POST",
596+
headers: {
597+
"Content-Type": "application/json",
598+
"Authorization": `Bearer ${apiKey}`,
599+
},
600+
body: JSON.stringify({
601+
model: payload.model,
602+
input: payload.input,
603+
// NOTE: Other provider options (encoding_format, normalized, dimensions, etc.)
604+
// from buildPayload() are intentionally not included. Ollama embedding models
605+
// do not support these parameters, so omitting them is correct.
606+
}),
607+
signal,
608+
});
591609

592-
const response = await fetch(endpoint, {
610+
if (!response.ok) {
611+
const body = await response.text().catch(() => "");
612+
throw new Error(
613+
`Ollama batch embedding failed: ${response.status} ${response.statusText} ??${body.slice(0, 200)}`
614+
);
615+
}
616+
617+
const data = await response.json();
618+
619+
// Validate response count and non-empty embeddings
620+
if (
621+
!Array.isArray(data?.data) ||
622+
data.data.length !== payload.input.length ||
623+
data.data.some((item: any) => {
624+
const embedding = item?.embedding;
625+
return !Array.isArray(embedding) || embedding.length === 0;
626+
})
627+
) {
628+
throw new Error(
629+
`Ollama batch embedding returned invalid response for ${payload.input.length} inputs`
630+
);
631+
}
632+
633+
return data;
634+
}
635+
636+
// Single request: use /api/embeddings + prompt (PR #621 fix)
637+
const response = await fetch(base + "/api/embeddings", {
593638
method: "POST",
594639
headers: {
595640
"Content-Type": "application/json",
596641
"Authorization": `Bearer ${apiKey}`,
597642
},
598-
body: JSON.stringify(ollamaPayload),
599-
signal: signal,
643+
body: JSON.stringify({
644+
model: payload.model,
645+
prompt: payload.input,
646+
}),
647+
signal,
600648
});
601649

602650
if (!response.ok) {
603651
const body = await response.text().catch(() => "");
604-
throw new Error(`Ollama embedding failed: ${response.status} ${response.statusText} ??${body.slice(0, 200)}`);
652+
throw new Error(
653+
`Ollama embedding failed: ${response.status} ${response.statusText} ??${body.slice(0, 200)}`
654+
);
605655
}
606656

607657
const data = await response.json();
Lines changed: 269 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,269 @@
1+
import assert from "node:assert/strict";
2+
import http from "node:http";
3+
import { test } from "node:test";
4+
5+
import jitiFactory from "jiti";
6+
7+
const jiti = jitiFactory(import.meta.url, { interopDefault: true });
8+
const { Embedder } = jiti("../src/embedder.ts");
9+
10+
const DIMS = 1024;
11+
12+
/**
13+
* Test: Ollama embedWithNativeFetch routes single vs batch requests correctly.
14+
*
15+
* Issue #629: After PR #621 fixed single embedding, batch embedding failed
16+
* because /api/embeddings only accepts a single string prompt.
17+
*
18+
* Fix:
19+
* - Single requests: use /api/embeddings + prompt
20+
* - Batch requests: use /v1/embeddings + input array
21+
*
22+
* This test verifies the routing and validation:
23+
* 1. Single requests hit /api/embeddings
24+
* 2. Batch requests hit /v1/embeddings
25+
* 3. Batch responses with wrong count are rejected
26+
* 4. Batch responses with empty embeddings are rejected
27+
* 5. Single-element batch still routes to /v1/embeddings
28+
*
29+
* NOTE: Uses port 0 to let OS assign an available port, avoiding EADDRINUSE
30+
* when developers have Ollama running locally on port 11434.
31+
*/
32+
33+
function readJson(req) {
34+
return new Promise((resolve, reject) => {
35+
let body = "";
36+
req.on("data", (chunk) => { body += chunk; });
37+
req.on("end", () => resolve(JSON.parse(body)));
38+
req.on("error", reject);
39+
});
40+
}
41+
42+
function makeOllamaMock(handler) {
43+
return http.createServer((req, res) => {
44+
if (req.method === "POST" && req.url === "/api/embeddings") {
45+
handler(req, res, "api");
46+
return;
47+
}
48+
if (req.method === "POST" && req.url === "/v1/embeddings") {
49+
handler(req, res, "v1");
50+
return;
51+
}
52+
res.writeHead(404, { "Content-Type": "text/plain" });
53+
res.end("unexpected endpoint");
54+
});
55+
}
56+
57+
function dims() {
58+
return Array.from({ length: DIMS }, () => Math.random() * 0.1);
59+
}
60+
61+
/**
62+
* Helper to start a mock server and get its actual port.
63+
* Uses port 0 to let OS assign an available port.
64+
*/
65+
async function startMockServer(server) {
66+
return new Promise((resolve, reject) => {
67+
server.listen(0, "127.0.0.1", () => {
68+
const addr = server.address();
69+
if (addr && typeof addr === "object") {
70+
resolve(addr.port);
71+
} else {
72+
reject(new Error("Failed to get server port"));
73+
}
74+
});
75+
server.on("error", reject);
76+
});
77+
}
78+
79+
test("single requests use /api/embeddings with prompt field", async () => {
80+
let capturedBody = null;
81+
82+
const server = makeOllamaMock(async (req, res, route) => {
83+
capturedBody = await readJson(req);
84+
res.writeHead(200, { "Content-Type": "application/json" });
85+
res.end(JSON.stringify({ embedding: dims() }));
86+
});
87+
88+
const port = await startMockServer(server);
89+
const baseURL = `http://127.0.0.1:${port}/v1`;
90+
91+
try {
92+
const embedder = new Embedder({
93+
provider: "openai-compatible",
94+
apiKey: "test-key",
95+
model: "mxbai-embed-large",
96+
baseURL,
97+
dimensions: DIMS,
98+
});
99+
100+
const result = await embedder.embedPassage("hello world");
101+
102+
assert.equal(capturedBody?.model, "mxbai-embed-large");
103+
assert.equal(capturedBody?.prompt, "hello world");
104+
assert.equal(Array.isArray(capturedBody?.prompt), false, "prompt should be a string, not array");
105+
assert.equal(result.length, DIMS);
106+
} finally {
107+
await new Promise((resolve) => server.close(resolve));
108+
}
109+
});
110+
111+
test("batch requests use /v1/embeddings with input array", async () => {
112+
let capturedBody = null;
113+
114+
const server = makeOllamaMock(async (req, res, route) => {
115+
capturedBody = await readJson(req);
116+
const embeddings = capturedBody.input.map((_, i) => ({
117+
embedding: dims(),
118+
index: i,
119+
}));
120+
res.writeHead(200, { "Content-Type": "application/json" });
121+
res.end(JSON.stringify({ data: embeddings }));
122+
});
123+
124+
const port = await startMockServer(server);
125+
const baseURL = `http://127.0.0.1:${port}/v1`;
126+
127+
try {
128+
const embedder = new Embedder({
129+
provider: "openai-compatible",
130+
apiKey: "test-key",
131+
model: "mxbai-embed-large",
132+
baseURL,
133+
dimensions: DIMS,
134+
});
135+
136+
const inputs = ["a", "b", "c"];
137+
const result = await embedder.embedBatchPassage(inputs);
138+
139+
assert.equal(capturedBody?.model, "mxbai-embed-large");
140+
assert.deepEqual(capturedBody?.input, inputs);
141+
assert.equal(Array.isArray(capturedBody?.input), true, "input should be an array");
142+
assert.equal(result.length, 3);
143+
result.forEach((emb) => assert.equal(emb.length, DIMS));
144+
} finally {
145+
await new Promise((resolve) => server.close(resolve));
146+
}
147+
});
148+
149+
test("batch rejects response with wrong number of embeddings", async () => {
150+
const server = makeOllamaMock(async (req, res, route) => {
151+
if (route !== "v1") {
152+
res.writeHead(404);
153+
res.end("unexpected route");
154+
return;
155+
}
156+
const body = await readJson(req);
157+
// Intentionally return fewer embeddings than requested
158+
const embeddings = Array.from({ length: Math.max(1, body.input.length - 1) }, (_, i) => ({
159+
embedding: dims(),
160+
index: i,
161+
}));
162+
res.writeHead(200, { "Content-Type": "application/json" });
163+
res.end(JSON.stringify({ data: embeddings }));
164+
});
165+
166+
const port = await startMockServer(server);
167+
const baseURL = `http://127.0.0.1:${port}/v1`;
168+
169+
try {
170+
const embedder = new Embedder({
171+
provider: "openai-compatible",
172+
apiKey: "test-key",
173+
model: "mxbai-embed-large",
174+
baseURL,
175+
dimensions: DIMS,
176+
});
177+
178+
const inputs = ["a", "b", "c"];
179+
await assert.rejects(
180+
async () => embedder.embedBatchPassage(inputs),
181+
(err) => {
182+
assert.ok(
183+
/unexpected result count|invalid response/i.test(err.message),
184+
`Expected count validation error, got: ${err.message}`,
185+
);
186+
return true;
187+
},
188+
);
189+
} finally {
190+
await new Promise((resolve) => server.close(resolve));
191+
}
192+
});
193+
194+
test("batch rejects response with empty embedding array", async () => {
195+
const server = makeOllamaMock(async (req, res, route) => {
196+
if (route !== "v1") {
197+
res.writeHead(404);
198+
res.end("unexpected route");
199+
return;
200+
}
201+
const body = await readJson(req);
202+
// Return correct count but one embedding is empty
203+
const embeddings = body.input.map((_, i) => ({
204+
embedding: i === 1 ? [] : dims(), // second one is empty
205+
index: i,
206+
}));
207+
res.writeHead(200, { "Content-Type": "application/json" });
208+
res.end(JSON.stringify({ data: embeddings }));
209+
});
210+
211+
const port = await startMockServer(server);
212+
const baseURL = `http://127.0.0.1:${port}/v1`;
213+
214+
try {
215+
const embedder = new Embedder({
216+
provider: "openai-compatible",
217+
apiKey: "test-key",
218+
model: "mxbai-embed-large",
219+
baseURL,
220+
dimensions: DIMS,
221+
});
222+
223+
const inputs = ["a", "b", "c"];
224+
await assert.rejects(
225+
async () => embedder.embedBatchPassage(inputs),
226+
(err) => {
227+
assert.ok(
228+
/invalid response/i.test(err.message),
229+
`Expected invalid response error, got: ${err.message}`,
230+
);
231+
return true;
232+
},
233+
);
234+
} finally {
235+
await new Promise((resolve) => server.close(resolve));
236+
}
237+
});
238+
239+
test("single-element batch still routes to /v1/embeddings", async () => {
240+
let capturedRoute = null;
241+
242+
const server = makeOllamaMock(async (req, res, route) => {
243+
capturedRoute = route;
244+
res.writeHead(200, { "Content-Type": "application/json" });
245+
res.end(JSON.stringify({ data: [{ embedding: dims(), index: 0 }] }));
246+
});
247+
248+
const port = await startMockServer(server);
249+
const baseURL = `http://127.0.0.1:${port}/v1`;
250+
251+
try {
252+
const embedder = new Embedder({
253+
provider: "openai-compatible",
254+
apiKey: "test-key",
255+
model: "mxbai-embed-large",
256+
baseURL,
257+
dimensions: DIMS,
258+
});
259+
260+
// Even with single element, batch route should be used
261+
const result = await embedder.embedBatchPassage(["only-one"]);
262+
263+
assert.equal(capturedRoute, "v1", "single-element batch should use /v1/embeddings, not /api/embeddings");
264+
assert.equal(result.length, 1);
265+
assert.equal(result[0].length, DIMS);
266+
} finally {
267+
await new Promise((resolve) => server.close(resolve));
268+
}
269+
});

0 commit comments

Comments
 (0)