Skip to content

Commit 17401b2

Browse files
committed
feat: improve typing
1 parent c98a5df commit 17401b2

File tree

1 file changed

+23
-8
lines changed

1 file changed

+23
-8
lines changed

src/services/code-index/embedders/openai-compatible.ts

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,19 @@ import {
88
} from "../constants"
99
import { getDefaultModelId } from "../../../shared/embeddingModels"
1010

11+
interface EmbeddingItem {
12+
embedding: string | number[]
13+
[key: string]: any
14+
}
15+
16+
interface OpenAIEmbeddingResponse {
17+
data: EmbeddingItem[]
18+
usage?: {
19+
prompt_tokens?: number
20+
total_tokens?: number
21+
}
22+
}
23+
1124
/**
1225
* OpenAI Compatible implementation of the embedder interface with batching and rate limiting.
1326
* This embedder allows using any OpenAI-compatible API endpoint by specifying a custom baseURL.
@@ -108,21 +121,23 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
108121
): Promise<{ embeddings: number[][]; usage: { promptTokens: number; totalTokens: number } }> {
109122
for (let attempts = 0; attempts < MAX_RETRIES; attempts++) {
110123
try {
111-
const response = await this.embeddingsClient.embeddings.create({
124+
const response = (await this.embeddingsClient.embeddings.create({
112125
input: batchTexts,
113126
model: model,
114-
// The OpenAI package has custom parsing that truncates embedding dimension to 256,
115-
// which destroys accuracy.
116-
// If we pass `encoding_format: "base64"`, it does not perform any parsing,
117-
// leaving parsing up to us.
127+
// OpenAI package (as of v4.78.1) has a parsing issue that truncates embedding dimensions to 256
128+
// when processing numeric arrays, which breaks compatibility with models using larger dimensions.
129+
// By requesting base64 encoding, we bypass the package's parser and handle decoding ourselves.
118130
encoding_format: "base64",
119-
})
131+
})) as OpenAIEmbeddingResponse
120132

121133
// Convert base64 embeddings to float32 arrays
122-
const processedEmbeddings = response.data.map((item: any) => {
134+
const processedEmbeddings = response.data.map((item: EmbeddingItem) => {
123135
if (typeof item.embedding === "string") {
124136
const buffer = Buffer.from(item.embedding, "base64")
137+
138+
// Create Float32Array view over the buffer
125139
const float32Array = new Float32Array(buffer.buffer, buffer.byteOffset, buffer.byteLength / 4)
140+
126141
return {
127142
...item,
128143
embedding: Array.from(float32Array),
@@ -134,7 +149,7 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
134149
// Replace the original data with processed embeddings
135150
response.data = processedEmbeddings
136151

137-
const embeddings = response.data.map((item) => item.embedding)
152+
const embeddings = response.data.map((item) => item.embedding as number[])
138153

139154
return {
140155
embeddings: embeddings,

0 commit comments

Comments
 (0)