Skip to content

Commit 5645a52

Browse files
committed
fix: sanitize non-ASCII characters in API keys for HTTP headers
- Added sanitizeForHeader() method to replace non-ASCII characters with ? - Added isAsciiOnly() method to check if string contains only ASCII - Added warning when API key contains non-ASCII characters - Added comprehensive tests for API key sanitization - Fixed ESLint warnings by using charCodeAt instead of regex with control chars Fixes #7959 - ByteString conversion error with Unicode characters
1 parent 72bc790 commit 5645a52

File tree

2 files changed

+190
-2
lines changed

2 files changed

+190
-2
lines changed

src/services/code-index/embedders/__tests__/openai-compatible.spec.ts

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,157 @@ describe("OpenAICompatibleEmbedder", () => {
114114
"embeddings:validation.baseUrlRequired",
115115
)
116116
})
117+
118+
it("should warn when API key contains non-ASCII characters", () => {
119+
const apiKeyWithUnicode = "test-key-•-with-unicode"
120+
const warnSpy = vitest.spyOn(console, "warn")
121+
122+
embedder = new OpenAICompatibleEmbedder(testBaseUrl, apiKeyWithUnicode, testModelId)
123+
124+
expect(warnSpy).toHaveBeenCalledWith(expect.stringContaining("API key contains non-ASCII characters"))
125+
expect(embedder).toBeDefined()
126+
})
127+
128+
it("should not warn when API key contains only ASCII characters", () => {
129+
const warnSpy = vitest.spyOn(console, "warn")
130+
131+
embedder = new OpenAICompatibleEmbedder(testBaseUrl, testApiKey, testModelId)
132+
133+
expect(warnSpy).not.toHaveBeenCalledWith(expect.stringContaining("API key contains non-ASCII characters"))
134+
})
135+
})
136+
137+
describe("API key sanitization", () => {
138+
it("should sanitize non-ASCII characters in API key for direct HTTP requests", async () => {
139+
const apiKeyWithUnicode = "test-key-•-with-unicode-§"
140+
const sanitizedKey = "test-key-?-with-unicode-?"
141+
const fullUrl = "https://api.example.com/v1/embeddings"
142+
143+
embedder = new OpenAICompatibleEmbedder(fullUrl, apiKeyWithUnicode, testModelId)
144+
145+
const mockFetch = vitest.fn().mockResolvedValue({
146+
ok: true,
147+
status: 200,
148+
json: async () => ({
149+
data: [{ embedding: [0.1, 0.2, 0.3] }],
150+
usage: { prompt_tokens: 10, total_tokens: 15 },
151+
}),
152+
text: async () => "",
153+
})
154+
global.fetch = mockFetch
155+
156+
await embedder.createEmbeddings(["test text"])
157+
158+
expect(mockFetch).toHaveBeenCalledWith(
159+
fullUrl,
160+
expect.objectContaining({
161+
headers: expect.objectContaining({
162+
"api-key": sanitizedKey,
163+
Authorization: `Bearer ${sanitizedKey}`,
164+
}),
165+
}),
166+
)
167+
})
168+
169+
it("should handle API keys with emoji and special Unicode characters", async () => {
170+
const apiKeyWithEmoji = "key-😀-test-™-api"
171+
// Emoji (😀) is multi-byte and gets replaced with ?? (one for each byte)
172+
const sanitizedKey = "key-??-test-?-api"
173+
const fullUrl = "https://api.example.com/v1/embeddings"
174+
175+
embedder = new OpenAICompatibleEmbedder(fullUrl, apiKeyWithEmoji, testModelId)
176+
177+
const mockFetch = vitest.fn().mockResolvedValue({
178+
ok: true,
179+
status: 200,
180+
json: async () => ({
181+
data: [{ embedding: [0.1, 0.2, 0.3] }],
182+
usage: { prompt_tokens: 10, total_tokens: 15 },
183+
}),
184+
text: async () => "",
185+
})
186+
global.fetch = mockFetch
187+
188+
await embedder.createEmbeddings(["test"])
189+
190+
expect(mockFetch).toHaveBeenCalledWith(
191+
fullUrl,
192+
expect.objectContaining({
193+
headers: expect.objectContaining({
194+
"api-key": sanitizedKey,
195+
Authorization: `Bearer ${sanitizedKey}`,
196+
}),
197+
}),
198+
)
199+
})
200+
201+
it("should preserve ASCII characters when sanitizing", async () => {
202+
const apiKeyMixed = "abc123-•-XYZ789-§-!@#$%^&*()"
203+
const sanitizedKey = "abc123-?-XYZ789-?-!@#$%^&*()"
204+
const fullUrl = "https://api.example.com/v1/embeddings"
205+
206+
embedder = new OpenAICompatibleEmbedder(fullUrl, apiKeyMixed, testModelId)
207+
208+
const mockFetch = vitest.fn().mockResolvedValue({
209+
ok: true,
210+
status: 200,
211+
json: async () => ({
212+
data: [{ embedding: [0.1, 0.2, 0.3] }],
213+
usage: { prompt_tokens: 10, total_tokens: 15 },
214+
}),
215+
text: async () => "",
216+
})
217+
global.fetch = mockFetch
218+
219+
await embedder.createEmbeddings(["test"])
220+
221+
expect(mockFetch).toHaveBeenCalledWith(
222+
fullUrl,
223+
expect.objectContaining({
224+
headers: expect.objectContaining({
225+
"api-key": sanitizedKey,
226+
Authorization: `Bearer ${sanitizedKey}`,
227+
}),
228+
}),
229+
)
230+
})
231+
232+
it("should handle empty API key gracefully", () => {
233+
expect(() => new OpenAICompatibleEmbedder(testBaseUrl, "", testModelId)).toThrow(
234+
"embeddings:validation.apiKeyRequired",
235+
)
236+
})
237+
238+
it("should handle API key that is entirely non-ASCII", async () => {
239+
const apiKeyAllUnicode = "•§™€£¥"
240+
const sanitizedKey = "??????"
241+
const fullUrl = "https://api.example.com/v1/embeddings"
242+
243+
embedder = new OpenAICompatibleEmbedder(fullUrl, apiKeyAllUnicode, testModelId)
244+
245+
const mockFetch = vitest.fn().mockResolvedValue({
246+
ok: true,
247+
status: 200,
248+
json: async () => ({
249+
data: [{ embedding: [0.1, 0.2, 0.3] }],
250+
usage: { prompt_tokens: 10, total_tokens: 15 },
251+
}),
252+
text: async () => "",
253+
})
254+
global.fetch = mockFetch
255+
256+
await embedder.createEmbeddings(["test"])
257+
258+
expect(mockFetch).toHaveBeenCalledWith(
259+
fullUrl,
260+
expect.objectContaining({
261+
headers: expect.objectContaining({
262+
"api-key": sanitizedKey,
263+
Authorization: `Bearer ${sanitizedKey}`,
264+
}),
265+
}),
266+
)
267+
})
117268
})
118269

119270
describe("embedderInfo", () => {

src/services/code-index/embedders/openai-compatible.ts

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,32 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
4949
mutex: new Mutex(),
5050
}
5151

52+
/**
53+
* Sanitizes a string to ensure it only contains ASCII characters suitable for HTTP headers.
54+
* Non-ASCII characters are replaced with '?' to maintain the string structure.
55+
* @param value The string to sanitize
56+
* @returns The sanitized string containing only ASCII characters
57+
*/
58+
private static sanitizeForHeader(value: string): string {
59+
// Replace any non-ASCII characters (code > 127) with '?'
60+
// Using charCodeAt to avoid ESLint no-control-regex warning
61+
return value
62+
.split("")
63+
.map((char) => (char.charCodeAt(0) > 127 ? "?" : char))
64+
.join("")
65+
}
66+
67+
/**
68+
* Validates if a string contains only ASCII characters.
69+
* @param value The string to validate
70+
* @returns true if the string contains only ASCII characters, false otherwise
71+
*/
72+
private static isAsciiOnly(value: string): boolean {
73+
// Check if all characters have code points <= 127
74+
// Using every() to avoid ESLint no-control-regex warning
75+
return value.split("").every((char) => char.charCodeAt(0) <= 127)
76+
}
77+
5278
/**
5379
* Creates a new OpenAI Compatible embedder
5480
* @param baseUrl The base URL for the OpenAI-compatible API endpoint
@@ -64,6 +90,14 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
6490
throw new Error(t("embeddings:validation.apiKeyRequired"))
6591
}
6692

93+
// Warn if API key contains non-ASCII characters
94+
if (!OpenAICompatibleEmbedder.isAsciiOnly(apiKey)) {
95+
console.warn(
96+
"API key contains non-ASCII characters. These will be replaced with '?' for HTTP header compatibility. " +
97+
"Please ensure your API key contains only ASCII characters for proper authentication.",
98+
)
99+
}
100+
67101
this.baseUrl = baseUrl
68102
this.apiKey = apiKey
69103
this.embeddingsClient = new OpenAI({
@@ -195,14 +229,17 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
195229
batchTexts: string[],
196230
model: string,
197231
): Promise<OpenAIEmbeddingResponse> {
232+
// Sanitize the API key to ensure it only contains ASCII characters
233+
const sanitizedApiKey = OpenAICompatibleEmbedder.sanitizeForHeader(this.apiKey)
234+
198235
const response = await fetch(url, {
199236
method: "POST",
200237
headers: {
201238
"Content-Type": "application/json",
202239
// Azure OpenAI uses 'api-key' header, while OpenAI uses 'Authorization'
203240
// We'll try 'api-key' first for Azure compatibility
204-
"api-key": this.apiKey,
205-
Authorization: `Bearer ${this.apiKey}`,
241+
"api-key": sanitizedApiKey,
242+
Authorization: `Bearer ${sanitizedApiKey}`,
206243
},
207244
body: JSON.stringify({
208245
input: batchTexts,

0 commit comments

Comments
 (0)