@@ -458,5 +458,84 @@ describe("OpenAICompatibleEmbedder", () => {
458458 await expect ( embedder . createEmbeddings ( testTexts ) ) . rejects . toThrow ( )
459459 } )
460460 } )
461+
462+ /**
463+ * Test to confirm OpenAI package bug with base64 encoding
464+ * This test verifies that when we request encoding_format: "base64",
465+ * the OpenAI package returns unparsed base64 strings as expected.
466+ * This is the behavior we rely on in our workaround.
467+ */
468+ describe ( "OpenAI package base64 behavior verification" , ( ) => {
469+ it ( "should return unparsed base64 when encoding_format is base64" , async ( ) => {
470+ const testTexts = [ "Hello world" ]
471+
472+ // Create a real OpenAI instance to test the actual package behavior
473+ const realOpenAI = new ( jest . requireActual ( "openai" ) . OpenAI ) ( {
474+ baseURL : testBaseUrl ,
475+ apiKey : testApiKey ,
476+ } )
477+
478+ // Create test embedding data as base64 using values that can be exactly represented in Float32
479+ const testEmbedding = new Float32Array ( [ 0.25 , 0.5 , 0.75 , 1.0 ] )
480+ const buffer = Buffer . from ( testEmbedding . buffer )
481+ const base64String = buffer . toString ( "base64" )
482+
483+ // Mock the raw API response that would come from OpenAI
484+ const mockApiResponse = {
485+ data : [
486+ {
487+ object : "embedding" ,
488+ embedding : base64String , // Raw base64 string from API
489+ index : 0 ,
490+ } ,
491+ ] ,
492+ model : "text-embedding-3-small" ,
493+ object : "list" ,
494+ usage : {
495+ prompt_tokens : 2 ,
496+ total_tokens : 2 ,
497+ } ,
498+ }
499+
500+ // Mock the methodRequest method which is called by post()
501+ const mockMethodRequest = jest . fn ( )
502+ const mockAPIPromise = {
503+ then : jest . fn ( ) . mockImplementation ( ( callback ) => {
504+ return Promise . resolve ( callback ( mockApiResponse ) )
505+ } ) ,
506+ catch : jest . fn ( ) ,
507+ finally : jest . fn ( ) ,
508+ }
509+ mockMethodRequest . mockReturnValue ( mockAPIPromise )
510+
511+ // Replace the methodRequest method on the client
512+ ; ( realOpenAI as any ) . post = jest . fn ( ) . mockImplementation ( ( path , opts ) => {
513+ return mockMethodRequest ( "post" , path , opts )
514+ } )
515+
516+ // Call the embeddings.create method with base64 encoding
517+ const response = await realOpenAI . embeddings . create ( {
518+ input : testTexts ,
519+ model : "text-embedding-3-small" ,
520+ encoding_format : "base64" ,
521+ } )
522+
523+ // Verify that the response contains the raw base64 string
524+ // This confirms the OpenAI package doesn't parse base64 when explicitly requested
525+ expect ( response . data [ 0 ] . embedding ) . toBe ( base64String )
526+ expect ( typeof response . data [ 0 ] . embedding ) . toBe ( "string" )
527+
528+ // Verify we can manually convert it back to the original float array
529+ const returnedBuffer = Buffer . from ( response . data [ 0 ] . embedding as string , "base64" )
530+ const returnedFloat32Array = new Float32Array (
531+ returnedBuffer . buffer ,
532+ returnedBuffer . byteOffset ,
533+ returnedBuffer . byteLength / 4 ,
534+ )
535+ const returnedArray = Array . from ( returnedFloat32Array )
536+
537+ expect ( returnedArray ) . toEqual ( [ 0.25 , 0.5 , 0.75 , 1.0 ] )
538+ } )
539+ } )
461540 } )
462541} )
0 commit comments