Skip to content

Commit f24c1e6

Browse files
qdaxbdaniel-lxs
andauthored
use assistantMessageParser class instead of parseAssistantMessage (#5341)
Co-authored-by: Daniel Riccio <[email protected]>
1 parent 603c6c6 commit f24c1e6

File tree

25 files changed

+755
-5
lines changed

25 files changed

+755
-5
lines changed

packages/types/src/experiment.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ import type { Keys, Equals, AssertEqual } from "./type-fu.js"
66
* ExperimentId
77
*/
88

9-
export const experimentIds = ["powerSteering", "multiFileApplyDiff", "preventFocusDisruption"] as const
9+
export const experimentIds = ["powerSteering", "multiFileApplyDiff", "preventFocusDisruption", "assistantMessageParser"] as const
1010

1111
export const experimentIdsSchema = z.enum(experimentIds)
1212

@@ -20,6 +20,7 @@ export const experimentsSchema = z.object({
2020
powerSteering: z.boolean().optional(),
2121
multiFileApplyDiff: z.boolean().optional(),
2222
preventFocusDisruption: z.boolean().optional(),
23+
assistantMessageParser: z.boolean().optional(),
2324
})
2425

2526
export type Experiments = z.infer<typeof experimentsSchema>
Lines changed: 251 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,251 @@
1+
import { type ToolName, toolNames } from "@roo-code/types"
2+
import { TextContent, ToolUse, ToolParamName, toolParamNames } from "../../shared/tools"
3+
import { AssistantMessageContent } from "./parseAssistantMessage"
4+
5+
/**
6+
* Parser for assistant messages. Maintains state between chunks
7+
* to avoid reprocessing the entire message on each update.
8+
*/
9+
export class AssistantMessageParser {
10+
private contentBlocks: AssistantMessageContent[] = []
11+
private currentTextContent: TextContent | undefined = undefined
12+
private currentTextContentStartIndex = 0
13+
private currentToolUse: ToolUse | undefined = undefined
14+
private currentToolUseStartIndex = 0
15+
private currentParamName: ToolParamName | undefined = undefined
16+
private currentParamValueStartIndex = 0
17+
private readonly MAX_ACCUMULATOR_SIZE = 1024 * 1024 // 1MB limit
18+
private readonly MAX_PARAM_LENGTH = 1024 * 100 // 100KB per parameter limit
19+
private accumulator = ""
20+
21+
/**
22+
* Initialize a new AssistantMessageParser instance.
23+
*/
24+
constructor() {
25+
this.reset()
26+
}
27+
28+
/**
29+
* Reset the parser state.
30+
*/
31+
public reset(): void {
32+
this.contentBlocks = []
33+
this.currentTextContent = undefined
34+
this.currentTextContentStartIndex = 0
35+
this.currentToolUse = undefined
36+
this.currentToolUseStartIndex = 0
37+
this.currentParamName = undefined
38+
this.currentParamValueStartIndex = 0
39+
this.accumulator = ""
40+
}
41+
42+
/**
43+
* Returns the current parsed content blocks
44+
*/
45+
46+
public getContentBlocks(): AssistantMessageContent[] {
47+
// Return a shallow copy to prevent external mutation
48+
return this.contentBlocks.slice()
49+
}
50+
/**
51+
* Process a new chunk of text and update the parser state.
52+
* @param chunk The new chunk of text to process.
53+
*/
54+
public processChunk(chunk: string): AssistantMessageContent[] {
55+
if (this.accumulator.length + chunk.length > this.MAX_ACCUMULATOR_SIZE) {
56+
throw new Error("Assistant message exceeds maximum allowed size")
57+
}
58+
// Store the current length of the accumulator before adding the new chunk
59+
const accumulatorStartLength = this.accumulator.length
60+
61+
for (let i = 0; i < chunk.length; i++) {
62+
const char = chunk[i]
63+
this.accumulator += char
64+
const currentPosition = accumulatorStartLength + i
65+
66+
// There should not be a param without a tool use.
67+
if (this.currentToolUse && this.currentParamName) {
68+
const currentParamValue = this.accumulator.slice(this.currentParamValueStartIndex)
69+
if (currentParamValue.length > this.MAX_PARAM_LENGTH) {
70+
// Reset to a safe state
71+
this.currentParamName = undefined
72+
this.currentParamValueStartIndex = 0
73+
continue
74+
}
75+
const paramClosingTag = `</${this.currentParamName}>`
76+
// Streamed param content: always write the currently accumulated value
77+
if (currentParamValue.endsWith(paramClosingTag)) {
78+
// End of param value.
79+
// Do not trim content parameters to preserve newlines, but strip first and last newline only
80+
const paramValue = currentParamValue.slice(0, -paramClosingTag.length)
81+
this.currentToolUse.params[this.currentParamName] =
82+
this.currentParamName === "content"
83+
? paramValue.replace(/^\n/, "").replace(/\n$/, "")
84+
: paramValue.trim()
85+
this.currentParamName = undefined
86+
continue
87+
} else {
88+
// Partial param value is accumulating.
89+
// Write the currently accumulated param content in real time
90+
this.currentToolUse.params[this.currentParamName] = currentParamValue
91+
continue
92+
}
93+
}
94+
95+
// No currentParamName.
96+
97+
if (this.currentToolUse) {
98+
const currentToolValue = this.accumulator.slice(this.currentToolUseStartIndex)
99+
const toolUseClosingTag = `</${this.currentToolUse.name}>`
100+
if (currentToolValue.endsWith(toolUseClosingTag)) {
101+
// End of a tool use.
102+
this.currentToolUse.partial = false
103+
104+
this.currentToolUse = undefined
105+
continue
106+
} else {
107+
const possibleParamOpeningTags = toolParamNames.map((name) => `<${name}>`)
108+
for (const paramOpeningTag of possibleParamOpeningTags) {
109+
if (this.accumulator.endsWith(paramOpeningTag)) {
110+
// Start of a new parameter.
111+
const paramName = paramOpeningTag.slice(1, -1)
112+
if (!toolParamNames.includes(paramName as ToolParamName)) {
113+
// Handle invalid parameter name gracefully
114+
continue
115+
}
116+
this.currentParamName = paramName as ToolParamName
117+
this.currentParamValueStartIndex = this.accumulator.length
118+
break
119+
}
120+
}
121+
122+
// There's no current param, and not starting a new param.
123+
124+
// Special case for write_to_file where file contents could
125+
// contain the closing tag, in which case the param would have
126+
// closed and we end up with the rest of the file contents here.
127+
// To work around this, get the string between the starting
128+
// content tag and the LAST content tag.
129+
const contentParamName: ToolParamName = "content"
130+
131+
if (
132+
this.currentToolUse.name === "write_to_file" &&
133+
this.accumulator.endsWith(`</${contentParamName}>`)
134+
) {
135+
const toolContent = this.accumulator.slice(this.currentToolUseStartIndex)
136+
const contentStartTag = `<${contentParamName}>`
137+
const contentEndTag = `</${contentParamName}>`
138+
const contentStartIndex = toolContent.indexOf(contentStartTag) + contentStartTag.length
139+
const contentEndIndex = toolContent.lastIndexOf(contentEndTag)
140+
141+
if (contentStartIndex !== -1 && contentEndIndex !== -1 && contentEndIndex > contentStartIndex) {
142+
// Don't trim content to preserve newlines, but strip first and last newline only
143+
this.currentToolUse.params[contentParamName] = toolContent
144+
.slice(contentStartIndex, contentEndIndex)
145+
.replace(/^\n/, "")
146+
.replace(/\n$/, "")
147+
}
148+
}
149+
150+
// Partial tool value is accumulating.
151+
continue
152+
}
153+
}
154+
155+
// No currentToolUse.
156+
157+
let didStartToolUse = false
158+
const possibleToolUseOpeningTags = toolNames.map((name) => `<${name}>`)
159+
160+
for (const toolUseOpeningTag of possibleToolUseOpeningTags) {
161+
if (this.accumulator.endsWith(toolUseOpeningTag)) {
162+
// Extract and validate the tool name
163+
const extractedToolName = toolUseOpeningTag.slice(1, -1)
164+
165+
// Check if the extracted tool name is valid
166+
if (!toolNames.includes(extractedToolName as ToolName)) {
167+
// Invalid tool name, treat as plain text and continue
168+
continue
169+
}
170+
171+
// Start of a new tool use.
172+
this.currentToolUse = {
173+
type: "tool_use",
174+
name: extractedToolName as ToolName,
175+
params: {},
176+
partial: true,
177+
}
178+
179+
this.currentToolUseStartIndex = this.accumulator.length
180+
181+
// This also indicates the end of the current text content.
182+
if (this.currentTextContent) {
183+
this.currentTextContent.partial = false
184+
185+
// Remove the partially accumulated tool use tag from the
186+
// end of text (<tool).
187+
this.currentTextContent.content = this.currentTextContent.content
188+
.slice(0, -toolUseOpeningTag.slice(0, -1).length)
189+
.trim()
190+
191+
// No need to push, currentTextContent is already in contentBlocks
192+
this.currentTextContent = undefined
193+
}
194+
195+
// Immediately push new tool_use block as partial
196+
let idx = this.contentBlocks.findIndex((block) => block === this.currentToolUse)
197+
if (idx === -1) {
198+
this.contentBlocks.push(this.currentToolUse)
199+
}
200+
201+
didStartToolUse = true
202+
break
203+
}
204+
}
205+
206+
if (!didStartToolUse) {
207+
// No tool use, so it must be text either at the beginning or
208+
// between tools.
209+
if (this.currentTextContent === undefined) {
210+
// If this is the first chunk and we're at the beginning of processing,
211+
// set the start index to the current position in the accumulator
212+
this.currentTextContentStartIndex = currentPosition
213+
214+
// Create a new text content block and add it to contentBlocks
215+
this.currentTextContent = {
216+
type: "text",
217+
content: this.accumulator.slice(this.currentTextContentStartIndex).trim(),
218+
partial: true,
219+
}
220+
221+
// Add the new text content to contentBlocks immediately
222+
// Ensures it appears in the UI right away
223+
this.contentBlocks.push(this.currentTextContent)
224+
} else {
225+
// Update the existing text content
226+
this.currentTextContent.content = this.accumulator.slice(this.currentTextContentStartIndex).trim()
227+
}
228+
}
229+
}
230+
// Do not call finalizeContentBlocks() here.
231+
// Instead, update any partial blocks in the array and add new ones as they're completed.
232+
// This matches the behavior of the original parseAssistantMessage function.
233+
return this.getContentBlocks()
234+
}
235+
236+
/**
237+
* Finalize any partial content blocks.
238+
* Should be called after processing the last chunk.
239+
*/
240+
public finalizeContentBlocks(): void {
241+
// Mark all partial blocks as complete
242+
for (const block of this.contentBlocks) {
243+
if (block.partial) {
244+
block.partial = false
245+
}
246+
if (block.type === "text" && typeof block.content === "string") {
247+
block.content = block.content.trim()
248+
}
249+
}
250+
}
251+
}

0 commit comments

Comments
 (0)