Skip to content

Commit d671376

Browse files
committed
fix: unescape HTML entities in Gemini provider streaming responses
- Apply unescapeHtmlEntities to all text yielded from Gemini API - Fixes issue where square brackets were missing in Python/other code - Add comprehensive tests for HTML entity unescaping - Ensures consistency with other providers Fixes #8107
1 parent 87b45de commit d671376

File tree

2 files changed

+152
-4
lines changed

2 files changed

+152
-4
lines changed

src/api/providers/__tests__/gemini.spec.ts

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,133 @@ describe("GeminiHandler", () => {
9090
)
9191
})
9292

93+
it("should unescape HTML entities in text messages", async () => {
94+
// Setup the mock implementation to return text with HTML entities
95+
;(handler["client"].models.generateContentStream as any).mockResolvedValue({
96+
[Symbol.asyncIterator]: async function* () {
97+
yield { text: "array[0]" }
98+
yield { text: " and <div>" }
99+
yield { text: " with & symbol" }
100+
yield { usageMetadata: { promptTokenCount: 10, candidatesTokenCount: 5 } }
101+
},
102+
})
103+
104+
const stream = handler.createMessage(systemPrompt, mockMessages)
105+
const chunks = []
106+
107+
for await (const chunk of stream) {
108+
chunks.push(chunk)
109+
}
110+
111+
// Should have unescaped HTML entities
112+
expect(chunks[0]).toEqual({ type: "text", text: "array[0]" })
113+
expect(chunks[1]).toEqual({ type: "text", text: " and <div>" })
114+
expect(chunks[2]).toEqual({ type: "text", text: " with & symbol" })
115+
})
116+
117+
it("should unescape square bracket entities in Python code", async () => {
118+
// Test the exact scenario from the issue
119+
;(handler["client"].models.generateContentStream as any).mockResolvedValue({
120+
[Symbol.asyncIterator]: async function* () {
121+
yield {
122+
candidates: [
123+
{
124+
content: {
125+
parts: [
126+
{
127+
text: 'print(f"The first character is: {populated_variable&#91;0&#93;}")',
128+
},
129+
],
130+
},
131+
},
132+
],
133+
}
134+
yield { usageMetadata: { promptTokenCount: 10, candidatesTokenCount: 5 } }
135+
},
136+
})
137+
138+
const stream = handler.createMessage(systemPrompt, mockMessages)
139+
const chunks = []
140+
141+
for await (const chunk of stream) {
142+
chunks.push(chunk)
143+
}
144+
145+
// Should have properly unescaped square brackets
146+
expect(chunks[0]).toEqual({
147+
type: "text",
148+
text: 'print(f"The first character is: {populated_variable[0]}")',
149+
})
150+
})
151+
152+
it("should unescape named square bracket entities", async () => {
153+
// Test named entities for square brackets
154+
;(handler["client"].models.generateContentStream as any).mockResolvedValue({
155+
[Symbol.asyncIterator]: async function* () {
156+
yield {
157+
candidates: [
158+
{
159+
content: {
160+
parts: [
161+
{
162+
text: "matrix&lsqb;i&rsqb;&lsqb;j&rsqb;",
163+
},
164+
],
165+
},
166+
},
167+
],
168+
}
169+
yield { usageMetadata: { promptTokenCount: 10, candidatesTokenCount: 5 } }
170+
},
171+
})
172+
173+
const stream = handler.createMessage(systemPrompt, mockMessages)
174+
const chunks = []
175+
176+
for await (const chunk of stream) {
177+
chunks.push(chunk)
178+
}
179+
180+
// Should have properly unescaped named entities
181+
expect(chunks[0]).toEqual({ type: "text", text: "matrix[i][j]" })
182+
})
183+
184+
it("should unescape HTML entities in thinking/reasoning parts", async () => {
185+
// Test that entities are unescaped in thinking parts too
186+
;(handler["client"].models.generateContentStream as any).mockResolvedValue({
187+
[Symbol.asyncIterator]: async function* () {
188+
yield {
189+
candidates: [
190+
{
191+
content: {
192+
parts: [
193+
{
194+
thought: true,
195+
text: "Need to access array&#91;0&#93; element",
196+
},
197+
],
198+
},
199+
},
200+
],
201+
}
202+
yield { usageMetadata: { promptTokenCount: 10, candidatesTokenCount: 5 } }
203+
},
204+
})
205+
206+
const stream = handler.createMessage(systemPrompt, mockMessages)
207+
const chunks = []
208+
209+
for await (const chunk of stream) {
210+
chunks.push(chunk)
211+
}
212+
213+
// Should have properly unescaped in reasoning text
214+
expect(chunks[0]).toEqual({
215+
type: "reasoning",
216+
text: "Need to access array[0] element",
217+
})
218+
})
219+
93220
it("should handle API errors", async () => {
94221
const mockError = new Error("Gemini API error")
95222
;(handler["client"].models.generateContentStream as any).mockRejectedValue(mockError)
@@ -143,6 +270,26 @@ describe("GeminiHandler", () => {
143270
const result = await handler.completePrompt("Test prompt")
144271
expect(result).toBe("")
145272
})
273+
274+
it("should unescape HTML entities in completePrompt response", async () => {
275+
// Mock the response with HTML entities
276+
;(handler["client"].models.generateContent as any).mockResolvedValue({
277+
text: "array&#91;0&#93; and &lt;div&gt;",
278+
})
279+
280+
const result = await handler.completePrompt("Test prompt")
281+
expect(result).toBe("array[0] and <div>")
282+
})
283+
284+
it("should handle square brackets in completePrompt for Python code", async () => {
285+
// Test the exact scenario from the issue
286+
;(handler["client"].models.generateContent as any).mockResolvedValue({
287+
text: 'print(f"The first character is: {populated_variable&#91;0&#93;}")',
288+
})
289+
290+
const result = await handler.completePrompt("Update Python script")
291+
expect(result).toBe('print(f"The first character is: {populated_variable[0]}")')
292+
})
146293
})
147294

148295
describe("getModel", () => {

src/api/providers/gemini.ts

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import { convertAnthropicContentToGemini, convertAnthropicMessageToGemini } from
1717
import { t } from "i18next"
1818
import type { ApiStream, GroundingSource } from "../transform/stream"
1919
import { getModelParams } from "../transform/model-params"
20+
import { unescapeHtmlEntities } from "../../utils/text-normalization"
2021

2122
import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
2223
import { BaseProvider } from "./base-provider"
@@ -109,12 +110,12 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
109110
if (part.thought) {
110111
// This is a thinking/reasoning part
111112
if (part.text) {
112-
yield { type: "reasoning", text: part.text }
113+
yield { type: "reasoning", text: unescapeHtmlEntities(part.text) }
113114
}
114115
} else {
115116
// This is regular content
116117
if (part.text) {
117-
yield { type: "text", text: part.text }
118+
yield { type: "text", text: unescapeHtmlEntities(part.text) }
118119
}
119120
}
120121
}
@@ -123,7 +124,7 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
123124

124125
// Fallback to the original text property if no candidates structure
125126
else if (chunk.text) {
126-
yield { type: "text", text: chunk.text }
127+
yield { type: "text", text: unescapeHtmlEntities(chunk.text) }
127128
}
128129

129130
if (chunk.usageMetadata) {
@@ -234,7 +235,7 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
234235
config: promptConfig,
235236
})
236237

237-
let text = result.text ?? ""
238+
let text = unescapeHtmlEntities(result.text ?? "")
238239

239240
const candidate = result.candidates?.[0]
240241
if (candidate?.groundingMetadata) {

0 commit comments

Comments
 (0)