1- import { vitest , describe , it , expect , beforeEach , afterEach } from "vitest"
1+ import { vitest , describe , it , expect , beforeEach , afterEach , vi } from "vitest"
22import type { MockedClass , MockedFunction } from "vitest"
33import { OpenAI } from "openai"
44import { OpenAICompatibleEmbedder } from "../openai-compatible"
@@ -110,6 +110,7 @@ describe("OpenAICompatibleEmbedder", () => {
110110 expect ( mockEmbeddingsCreate ) . toHaveBeenCalledWith ( {
111111 input : testTexts ,
112112 model : testModelId ,
113+ encoding_format : "base64" ,
113114 } )
114115 expect ( result ) . toEqual ( {
115116 embeddings : [ [ 0.1 , 0.2 , 0.3 ] ] ,
@@ -130,6 +131,7 @@ describe("OpenAICompatibleEmbedder", () => {
130131 expect ( mockEmbeddingsCreate ) . toHaveBeenCalledWith ( {
131132 input : testTexts ,
132133 model : testModelId ,
134+ encoding_format : "base64" ,
133135 } )
134136 expect ( result ) . toEqual ( {
135137 embeddings : [
@@ -154,6 +156,7 @@ describe("OpenAICompatibleEmbedder", () => {
154156 expect ( mockEmbeddingsCreate ) . toHaveBeenCalledWith ( {
155157 input : testTexts ,
156158 model : customModel ,
159+ encoding_format : "base64" ,
157160 } )
158161 } )
159162
@@ -173,6 +176,97 @@ describe("OpenAICompatibleEmbedder", () => {
173176 } )
174177 } )
175178
179+ /**
180+ * Test base64 conversion logic
181+ */
182+ describe ( "base64 conversion" , ( ) => {
183+ it ( "should convert base64 encoded embeddings to float arrays" , async ( ) => {
184+ const testTexts = [ "Hello world" ]
185+
186+ // Create a Float32Array with test values that can be exactly represented in Float32
187+ const testEmbedding = new Float32Array ( [ 0.25 , 0.5 , 0.75 , 1.0 ] )
188+
189+ // Convert to base64 string (simulating what OpenAI API returns)
190+ const buffer = Buffer . from ( testEmbedding . buffer )
191+ const base64String = buffer . toString ( "base64" )
192+
193+ const mockResponse = {
194+ data : [ { embedding : base64String } ] , // Base64 string instead of array
195+ usage : { prompt_tokens : 10 , total_tokens : 15 } ,
196+ }
197+ mockEmbeddingsCreate . mockResolvedValue ( mockResponse )
198+
199+ const result = await embedder . createEmbeddings ( testTexts )
200+
201+ expect ( mockEmbeddingsCreate ) . toHaveBeenCalledWith ( {
202+ input : testTexts ,
203+ model : testModelId ,
204+ encoding_format : "base64" ,
205+ } )
206+
207+ // Verify the base64 string was converted back to the original float array
208+ expect ( result ) . toEqual ( {
209+ embeddings : [ [ 0.25 , 0.5 , 0.75 , 1.0 ] ] ,
210+ usage : { promptTokens : 10 , totalTokens : 15 } ,
211+ } )
212+ } )
213+
214+ it ( "should handle multiple base64 encoded embeddings" , async ( ) => {
215+ const testTexts = [ "Hello world" , "Goodbye world" ]
216+
217+ // Create test embeddings with values that can be exactly represented in Float32
218+ const embedding1 = new Float32Array ( [ 0.25 , 0.5 , 0.75 ] )
219+ const embedding2 = new Float32Array ( [ 1.0 , 1.25 , 1.5 ] )
220+
221+ // Convert to base64 strings
222+ const base64String1 = Buffer . from ( embedding1 . buffer ) . toString ( "base64" )
223+ const base64String2 = Buffer . from ( embedding2 . buffer ) . toString ( "base64" )
224+
225+ const mockResponse = {
226+ data : [ { embedding : base64String1 } , { embedding : base64String2 } ] ,
227+ usage : { prompt_tokens : 20 , total_tokens : 30 } ,
228+ }
229+ mockEmbeddingsCreate . mockResolvedValue ( mockResponse )
230+
231+ const result = await embedder . createEmbeddings ( testTexts )
232+
233+ expect ( result ) . toEqual ( {
234+ embeddings : [
235+ [ 0.25 , 0.5 , 0.75 ] ,
236+ [ 1.0 , 1.25 , 1.5 ] ,
237+ ] ,
238+ usage : { promptTokens : 20 , totalTokens : 30 } ,
239+ } )
240+ } )
241+
242+ it ( "should handle mixed base64 and array embeddings" , async ( ) => {
243+ const testTexts = [ "Hello world" , "Goodbye world" ]
244+
245+ // Create one base64 embedding and one regular array (edge case)
246+ const embedding1 = new Float32Array ( [ 0.25 , 0.5 , 0.75 ] )
247+ const base64String1 = Buffer . from ( embedding1 . buffer ) . toString ( "base64" )
248+
249+ const mockResponse = {
250+ data : [
251+ { embedding : base64String1 } , // Base64 string
252+ { embedding : [ 1.0 , 1.25 , 1.5 ] } , // Regular array
253+ ] ,
254+ usage : { prompt_tokens : 20 , total_tokens : 30 } ,
255+ }
256+ mockEmbeddingsCreate . mockResolvedValue ( mockResponse )
257+
258+ const result = await embedder . createEmbeddings ( testTexts )
259+
260+ expect ( result ) . toEqual ( {
261+ embeddings : [
262+ [ 0.25 , 0.5 , 0.75 ] ,
263+ [ 1.0 , 1.25 , 1.5 ] ,
264+ ] ,
265+ usage : { promptTokens : 20 , totalTokens : 30 } ,
266+ } )
267+ } )
268+ } )
269+
176270 /**
177271 * Test batching logic when texts exceed token limits
178272 */
@@ -249,11 +343,15 @@ describe("OpenAICompatibleEmbedder", () => {
249343 const testTexts = [ "Hello world" ]
250344 const rateLimitError = { status : 429 , message : "Rate limit exceeded" }
251345
346+ // Create base64 encoded embedding for successful response
347+ const testEmbedding = new Float32Array ( [ 0.25 , 0.5 , 0.75 ] )
348+ const base64String = Buffer . from ( testEmbedding . buffer ) . toString ( "base64" )
349+
252350 mockEmbeddingsCreate
253351 . mockRejectedValueOnce ( rateLimitError )
254352 . mockRejectedValueOnce ( rateLimitError )
255353 . mockResolvedValueOnce ( {
256- data : [ { embedding : [ 0.1 , 0.2 , 0.3 ] } ] ,
354+ data : [ { embedding : base64String } ] ,
257355 usage : { prompt_tokens : 10 , total_tokens : 15 } ,
258356 } )
259357
@@ -268,7 +366,7 @@ describe("OpenAICompatibleEmbedder", () => {
268366 expect ( mockEmbeddingsCreate ) . toHaveBeenCalledTimes ( 3 )
269367 expect ( console . warn ) . toHaveBeenCalledWith ( expect . stringContaining ( "Rate limit hit, retrying in" ) )
270368 expect ( result ) . toEqual ( {
271- embeddings : [ [ 0.1 , 0.2 , 0.3 ] ] ,
369+ embeddings : [ [ 0.25 , 0.5 , 0.75 ] ] ,
272370 usage : { promptTokens : 10 , totalTokens : 15 } ,
273371 } )
274372 } )
@@ -360,5 +458,84 @@ describe("OpenAICompatibleEmbedder", () => {
360458 await expect ( embedder . createEmbeddings ( testTexts ) ) . rejects . toThrow ( )
361459 } )
362460 } )
461+
462+ /**
463+ * Test to confirm OpenAI package bug with base64 encoding
464+ * This test verifies that when we request encoding_format: "base64",
465+ * the OpenAI package returns unparsed base64 strings as expected.
466+ * This is the behavior we rely on in our workaround.
467+ */
468+ describe ( "OpenAI package base64 behavior verification" , ( ) => {
469+ it ( "should return unparsed base64 when encoding_format is base64" , async ( ) => {
470+ const testTexts = [ "Hello world" ]
471+
472+ // Create a real OpenAI instance to test the actual package behavior
473+ const realOpenAI = new ( ( await vi . importActual ( "openai" ) ) as any ) . OpenAI ( {
474+ baseURL : testBaseUrl ,
475+ apiKey : testApiKey ,
476+ } )
477+
478+ // Create test embedding data as base64 using values that can be exactly represented in Float32
479+ const testEmbedding = new Float32Array ( [ 0.25 , 0.5 , 0.75 , 1.0 ] )
480+ const buffer = Buffer . from ( testEmbedding . buffer )
481+ const base64String = buffer . toString ( "base64" )
482+
483+ // Mock the raw API response that would come from OpenAI
484+ const mockApiResponse = {
485+ data : [
486+ {
487+ object : "embedding" ,
488+ embedding : base64String , // Raw base64 string from API
489+ index : 0 ,
490+ } ,
491+ ] ,
492+ model : "text-embedding-3-small" ,
493+ object : "list" ,
494+ usage : {
495+ prompt_tokens : 2 ,
496+ total_tokens : 2 ,
497+ } ,
498+ }
499+
500+ // Mock the methodRequest method which is called by post()
501+ const mockMethodRequest = vi . fn ( )
502+ const mockAPIPromise = {
503+ then : vi . fn ( ) . mockImplementation ( ( callback ) => {
504+ return Promise . resolve ( callback ( mockApiResponse ) )
505+ } ) ,
506+ catch : vi . fn ( ) ,
507+ finally : vi . fn ( ) ,
508+ }
509+ mockMethodRequest . mockReturnValue ( mockAPIPromise )
510+
511+ // Replace the methodRequest method on the client
512+ ; ( realOpenAI as any ) . post = vi . fn ( ) . mockImplementation ( ( path , opts ) => {
513+ return mockMethodRequest ( "post" , path , opts )
514+ } )
515+
516+ // Call the embeddings.create method with base64 encoding
517+ const response = await realOpenAI . embeddings . create ( {
518+ input : testTexts ,
519+ model : "text-embedding-3-small" ,
520+ encoding_format : "base64" ,
521+ } )
522+
523+ // Verify that the response contains the raw base64 string
524+ // This confirms the OpenAI package doesn't parse base64 when explicitly requested
525+ expect ( response . data [ 0 ] . embedding ) . toBe ( base64String )
526+ expect ( typeof response . data [ 0 ] . embedding ) . toBe ( "string" )
527+
528+ // Verify we can manually convert it back to the original float array
529+ const returnedBuffer = Buffer . from ( response . data [ 0 ] . embedding as string , "base64" )
530+ const returnedFloat32Array = new Float32Array (
531+ returnedBuffer . buffer ,
532+ returnedBuffer . byteOffset ,
533+ returnedBuffer . byteLength / 4 ,
534+ )
535+ const returnedArray = Array . from ( returnedFloat32Array )
536+
537+ expect ( returnedArray ) . toEqual ( [ 0.25 , 0.5 , 0.75 , 1.0 ] )
538+ } )
539+ } )
363540 } )
364541} )
0 commit comments