Skip to content

Commit c98a5df

Browse files
Dixie Flatlinedaniel-lxs
authored andcommitted
Add tests to verify openai base64 and brokenness behavior
1 parent b20841e commit c98a5df

File tree

1 file changed

+79
-0
lines changed

1 file changed

+79
-0
lines changed

src/services/code-index/embedders/__tests__/openai-compatible.spec.ts

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -458,5 +458,84 @@ describe("OpenAICompatibleEmbedder", () => {
458458
await expect(embedder.createEmbeddings(testTexts)).rejects.toThrow()
459459
})
460460
})
461+
462+
/**
463+
* Test to confirm OpenAI package bug with base64 encoding
464+
* This test verifies that when we request encoding_format: "base64",
465+
* the OpenAI package returns unparsed base64 strings as expected.
466+
* This is the behavior we rely on in our workaround.
467+
*/
468+
describe("OpenAI package base64 behavior verification", () => {
469+
it("should return unparsed base64 when encoding_format is base64", async () => {
470+
const testTexts = ["Hello world"]
471+
472+
// Create a real OpenAI instance to test the actual package behavior
473+
const realOpenAI = new (jest.requireActual("openai").OpenAI)({
474+
baseURL: testBaseUrl,
475+
apiKey: testApiKey,
476+
})
477+
478+
// Create test embedding data as base64 using values that can be exactly represented in Float32
479+
const testEmbedding = new Float32Array([0.25, 0.5, 0.75, 1.0])
480+
const buffer = Buffer.from(testEmbedding.buffer)
481+
const base64String = buffer.toString("base64")
482+
483+
// Mock the raw API response that would come from OpenAI
484+
const mockApiResponse = {
485+
data: [
486+
{
487+
object: "embedding",
488+
embedding: base64String, // Raw base64 string from API
489+
index: 0,
490+
},
491+
],
492+
model: "text-embedding-3-small",
493+
object: "list",
494+
usage: {
495+
prompt_tokens: 2,
496+
total_tokens: 2,
497+
},
498+
}
499+
500+
// Mock the methodRequest method which is called by post()
501+
const mockMethodRequest = jest.fn()
502+
const mockAPIPromise = {
503+
then: jest.fn().mockImplementation((callback) => {
504+
return Promise.resolve(callback(mockApiResponse))
505+
}),
506+
catch: jest.fn(),
507+
finally: jest.fn(),
508+
}
509+
mockMethodRequest.mockReturnValue(mockAPIPromise)
510+
511+
// Replace the methodRequest method on the client
512+
;(realOpenAI as any).post = jest.fn().mockImplementation((path, opts) => {
513+
return mockMethodRequest("post", path, opts)
514+
})
515+
516+
// Call the embeddings.create method with base64 encoding
517+
const response = await realOpenAI.embeddings.create({
518+
input: testTexts,
519+
model: "text-embedding-3-small",
520+
encoding_format: "base64",
521+
})
522+
523+
// Verify that the response contains the raw base64 string
524+
// This confirms the OpenAI package doesn't parse base64 when explicitly requested
525+
expect(response.data[0].embedding).toBe(base64String)
526+
expect(typeof response.data[0].embedding).toBe("string")
527+
528+
// Verify we can manually convert it back to the original float array
529+
const returnedBuffer = Buffer.from(response.data[0].embedding as string, "base64")
530+
const returnedFloat32Array = new Float32Array(
531+
returnedBuffer.buffer,
532+
returnedBuffer.byteOffset,
533+
returnedBuffer.byteLength / 4,
534+
)
535+
const returnedArray = Array.from(returnedFloat32Array)
536+
537+
expect(returnedArray).toEqual([0.25, 0.5, 0.75, 1.0])
538+
})
539+
})
461540
})
462541
})

0 commit comments

Comments
 (0)