Skip to content

Commit c3bf7fb

Browse files
committed
fix: prevent browser screenshot memory accumulation
- Remove old screenshots from apiConversationHistory after each browser action - Simplify BrowserSessionRow to display only current state without pagination - Remove screenshot history and pagination controls This prevents memory issues where screenshots accumulated indefinitely, consuming significant memory and wasting tokens by sending all historical screenshots to the model on every request. Now only the latest screenshot is kept in memory (~100-200KB) instead of potentially MBs of accumulated screenshots.
1 parent 98b8d5b commit c3bf7fb

File tree

2 files changed

+53
-273
lines changed

2 files changed

+53
-273
lines changed

src/core/tools/browserActionTool.ts

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,32 @@ export async function browserActionTool(
148148
}
149149
}
150150

151+
// Clean up old browser screenshots from API conversation history to prevent memory accumulation
152+
// Only keep the latest screenshot - old ones are no longer needed by the model
153+
if (browserActionResult?.screenshot) {
154+
const apiHistory = cline.apiConversationHistory
155+
for (let i = apiHistory.length - 1; i >= 0; i--) {
156+
const message = apiHistory[i]
157+
if (Array.isArray(message.content)) {
158+
// Filter out old screenshot image blocks
159+
message.content = message.content.filter((block) => {
160+
// Remove base64 image blocks (browser screenshots)
161+
// Keep text blocks and other content
162+
if (
163+
block.type === "image" &&
164+
"source" in block &&
165+
block.source.type === "base64" &&
166+
(block.source.media_type === "image/webp" || block.source.media_type === "image/png")
167+
) {
168+
// This is likely an old browser screenshot - remove it
169+
return false
170+
}
171+
return true
172+
})
173+
}
174+
}
175+
}
176+
151177
switch (action) {
152178
case "launch":
153179
case "click":

webview-ui/src/components/chat/BrowserSessionRow.tsx

Lines changed: 27 additions & 273 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ import React, { memo, useEffect, useMemo, useRef, useState } from "react"
22
import { useSize } from "react-use"
33
import deepEqual from "fast-deep-equal"
44
import { useTranslation } from "react-i18next"
5-
import { VSCodeButton } from "@vscode/webview-ui-toolkit/react"
65

76
import type { ClineMessage } from "@roo-code/types"
87

@@ -12,7 +11,6 @@ import { vscode } from "@src/utils/vscode"
1211
import { useExtensionState } from "@src/context/ExtensionStateContext"
1312

1413
import CodeBlock, { CODE_BLOCK_BG_COLOR } from "../common/CodeBlock"
15-
import { ChatRowContent } from "./ChatRow"
1614
import { ProgressIndicator } from "./ProgressIndicator"
1715
import { Globe, Pointer, SquareTerminal } from "lucide-react"
1816

@@ -30,7 +28,6 @@ const BrowserSessionRow = memo((props: BrowserSessionRowProps) => {
3028
const { messages, isLast, onHeightChange, lastModifiedMessage } = props
3129
const { t } = useTranslation()
3230
const prevHeightRef = useRef(0)
33-
const [maxActionHeight, setMaxActionHeight] = useState(0)
3431
const [consoleLogsExpanded, setConsoleLogsExpanded] = useState(false)
3532

3633
const { browserViewportSize = "900x600" } = useExtensionState()
@@ -55,182 +52,58 @@ const BrowserSessionRow = memo((props: BrowserSessionRowProps) => {
5552
}, [messages, lastModifiedMessage, isLast])
5653

5754
const isBrowsing = useMemo(() => {
58-
return isLast && messages.some((m) => m.say === "browser_action_result") && !isLastApiReqInterrupted // after user approves, browser_action_result with "" is sent to indicate that the session has started
55+
return isLast && messages.some((m) => m.say === "browser_action_result") && !isLastApiReqInterrupted
5956
}, [isLast, messages, isLastApiReqInterrupted])
6057

61-
// Organize messages into pages with current state and next action
62-
const pages = useMemo(() => {
63-
const result: {
64-
currentState: {
65-
url?: string
66-
screenshot?: string
67-
mousePosition?: string
68-
consoleLogs?: string
69-
messages: ClineMessage[] // messages up to and including the result
70-
}
71-
nextAction?: {
72-
messages: ClineMessage[] // messages leading to next result
73-
}
74-
}[] = []
75-
76-
let currentStateMessages: ClineMessage[] = []
77-
let nextActionMessages: ClineMessage[] = []
78-
79-
messages.forEach((message) => {
80-
if (message.ask === "browser_action_launch") {
81-
// Start first page
82-
currentStateMessages = [message]
83-
} else if (message.say === "browser_action_result") {
84-
if (message.text === "") {
85-
// first browser_action_result is an empty string that signals that session has started
86-
return
87-
}
88-
// Complete current state
89-
currentStateMessages.push(message)
90-
const resultData = JSON.parse(message.text || "{}") as BrowserActionResult
91-
92-
// Add page with current state and previous next actions
93-
result.push({
94-
currentState: {
95-
url: resultData.currentUrl,
96-
screenshot: resultData.screenshot,
97-
mousePosition: resultData.currentMousePosition,
98-
consoleLogs: resultData.logs,
99-
messages: [...currentStateMessages],
100-
},
101-
nextAction:
102-
nextActionMessages.length > 0
103-
? {
104-
messages: [...nextActionMessages],
105-
}
106-
: undefined,
107-
})
108-
109-
// Reset for next page
110-
currentStateMessages = []
111-
nextActionMessages = []
112-
} else if (
113-
message.say === "api_req_started" ||
114-
message.say === "text" ||
115-
message.say === "browser_action"
116-
) {
117-
// These messages lead to the next result, so they should always go in nextActionMessages
118-
nextActionMessages.push(message)
119-
} else {
120-
// Any other message types
121-
currentStateMessages.push(message)
122-
}
123-
})
124-
125-
// Add incomplete page if exists
126-
if (currentStateMessages.length > 0 || nextActionMessages.length > 0) {
127-
result.push({
128-
currentState: {
129-
messages: [...currentStateMessages],
130-
},
131-
nextAction:
132-
nextActionMessages.length > 0
133-
? {
134-
messages: [...nextActionMessages],
135-
}
136-
: undefined,
137-
})
138-
}
139-
140-
return result
141-
}, [messages])
142-
143-
// Auto-advance to latest page
144-
const [currentPageIndex, setCurrentPageIndex] = useState(0)
145-
useEffect(() => {
146-
setCurrentPageIndex(pages.length - 1)
147-
}, [pages.length])
148-
14958
// Get initial URL from launch message
15059
const initialUrl = useMemo(() => {
15160
const launchMessage = messages.find((m) => m.ask === "browser_action_launch")
15261
return launchMessage?.text || ""
15362
}, [messages])
15463

155-
// Find the latest available URL and screenshot
64+
// Find the LATEST browser action result only (no history needed)
15665
const latestState = useMemo(() => {
157-
for (let i = pages.length - 1; i >= 0; i--) {
158-
const page = pages[i]
159-
if (page.currentState.url || page.currentState.screenshot) {
66+
// Search backwards to find the most recent browser_action_result
67+
for (let i = messages.length - 1; i >= 0; i--) {
68+
const message = messages[i]
69+
if (message.say === "browser_action_result" && message.text && message.text !== "") {
70+
const resultData = JSON.parse(message.text) as BrowserActionResult
16071
return {
161-
url: page.currentState.url,
162-
mousePosition: page.currentState.mousePosition,
163-
consoleLogs: page.currentState.consoleLogs,
164-
screenshot: page.currentState.screenshot,
72+
url: resultData.currentUrl,
73+
screenshot: resultData.screenshot,
74+
mousePosition: resultData.currentMousePosition,
75+
consoleLogs: resultData.logs,
16576
}
16677
}
16778
}
168-
return { url: undefined, mousePosition: undefined, consoleLogs: undefined, screenshot: undefined }
169-
}, [pages])
170-
171-
const currentPage = pages[currentPageIndex]
172-
const isLastPage = currentPageIndex === pages.length - 1
173-
174-
// Use latest state if we're on the last page and don't have a state yet
175-
const displayState = isLastPage
176-
? {
177-
url: currentPage?.currentState.url || latestState.url || initialUrl,
178-
mousePosition:
179-
currentPage?.currentState.mousePosition || latestState.mousePosition || defaultMousePosition,
180-
consoleLogs: currentPage?.currentState.consoleLogs,
181-
screenshot: currentPage?.currentState.screenshot || latestState.screenshot,
182-
}
183-
: {
184-
url: currentPage?.currentState.url || initialUrl,
185-
mousePosition: currentPage?.currentState.mousePosition || defaultMousePosition,
186-
consoleLogs: currentPage?.currentState.consoleLogs,
187-
screenshot: currentPage?.currentState.screenshot,
188-
}
189-
190-
const [actionContent, { height: actionHeight }] = useSize(
191-
<div>
192-
{currentPage?.nextAction?.messages.map((message) => (
193-
<BrowserSessionRowContent
194-
key={message.ts}
195-
{...props}
196-
message={message}
197-
setMaxActionHeight={setMaxActionHeight}
198-
/>
199-
))}
200-
{!isBrowsing && messages.some((m) => m.say === "browser_action_result") && currentPageIndex === 0 && (
201-
<BrowserActionBox action={"launch"} text={initialUrl} />
202-
)}
203-
</div>,
204-
)
79+
return { url: undefined, screenshot: undefined, mousePosition: undefined, consoleLogs: undefined }
80+
}, [messages])
20581

206-
useEffect(() => {
207-
if (actionHeight === 0 || actionHeight === Infinity) {
208-
return
209-
}
210-
if (actionHeight > maxActionHeight) {
211-
setMaxActionHeight(actionHeight)
212-
}
213-
}, [actionHeight, maxActionHeight])
82+
// Display state is simply the latest state or defaults
83+
const displayState = {
84+
url: latestState.url || initialUrl,
85+
screenshot: latestState.screenshot,
86+
mousePosition: latestState.mousePosition || defaultMousePosition,
87+
consoleLogs: latestState.consoleLogs,
88+
}
21489

215-
// Track latest click coordinate
90+
// Find latest click position for cursor display
21691
const latestClickPosition = useMemo(() => {
21792
if (!isBrowsing) return undefined
21893

219-
// Look through current page's next actions for the latest browser_action
220-
const actions = currentPage?.nextAction?.messages || []
221-
for (let i = actions.length - 1; i >= 0; i--) {
222-
const message = actions[i]
223-
if (message.say === "browser_action") {
224-
const browserAction = JSON.parse(message.text || "{}") as ClineSayBrowserAction
94+
// Look through messages backwards for the latest browser_action with click
95+
for (let i = messages.length - 1; i >= 0; i--) {
96+
const message = messages[i]
97+
if (message.say === "browser_action" && message.text) {
98+
const browserAction = JSON.parse(message.text) as ClineSayBrowserAction
22599
if (browserAction.action === "click" && browserAction.coordinate) {
226100
return browserAction.coordinate
227101
}
228102
}
229103
}
230104
return undefined
231-
}, [isBrowsing, currentPage?.nextAction?.messages])
105+
}, [isBrowsing, messages])
232106

233-
// Use latest click position while browsing, otherwise use display state
234107
const mousePosition = isBrowsing
235108
? latestClickPosition || displayState.mousePosition
236109
: displayState.mousePosition || defaultMousePosition
@@ -353,38 +226,6 @@ const BrowserSessionRow = memo((props: BrowserSessionRowProps) => {
353226
<CodeBlock source={displayState.consoleLogs || t("chat:browser.noNewLogs")} language="shell" />
354227
)}
355228
</div>
356-
357-
{/* Action content with min height */}
358-
<div style={{ minHeight: maxActionHeight }}>{actionContent}</div>
359-
360-
{/* Pagination moved to bottom */}
361-
{pages.length > 1 && (
362-
<div
363-
style={{
364-
display: "flex",
365-
justifyContent: "space-between",
366-
alignItems: "center",
367-
padding: "8px 0px",
368-
marginTop: "15px",
369-
borderTop: "1px solid var(--vscode-editorGroup-border)",
370-
}}>
371-
<div>
372-
{t("chat:browser.navigation.step", { current: currentPageIndex + 1, total: pages.length })}
373-
</div>
374-
<div style={{ display: "flex", gap: "4px" }}>
375-
<VSCodeButton
376-
disabled={currentPageIndex === 0 || isBrowsing}
377-
onClick={() => setCurrentPageIndex((i) => i - 1)}>
378-
{t("chat:browser.navigation.previous")}
379-
</VSCodeButton>
380-
<VSCodeButton
381-
disabled={currentPageIndex === pages.length - 1 || isBrowsing}
382-
onClick={() => setCurrentPageIndex((i) => i + 1)}>
383-
{t("chat:browser.navigation.next")}
384-
</VSCodeButton>
385-
</div>
386-
</div>
387-
)}
388229
</div>,
389230
)
390231

@@ -402,93 +243,6 @@ const BrowserSessionRow = memo((props: BrowserSessionRowProps) => {
402243
return browserSessionRow
403244
}, deepEqual)
404245

405-
interface BrowserSessionRowContentProps extends Omit<BrowserSessionRowProps, "messages"> {
406-
message: ClineMessage
407-
setMaxActionHeight: (height: number) => void
408-
isStreaming: boolean
409-
}
410-
411-
const BrowserSessionRowContent = ({
412-
message,
413-
isExpanded,
414-
onToggleExpand,
415-
lastModifiedMessage,
416-
isLast,
417-
setMaxActionHeight,
418-
isStreaming,
419-
}: BrowserSessionRowContentProps) => {
420-
const { t } = useTranslation()
421-
const headerStyle: React.CSSProperties = {
422-
display: "flex",
423-
alignItems: "center",
424-
gap: "10px",
425-
marginBottom: "10px",
426-
wordBreak: "break-word",
427-
}
428-
429-
switch (message.type) {
430-
case "say":
431-
switch (message.say) {
432-
case "api_req_started":
433-
case "text":
434-
return (
435-
<div style={{ padding: "10px 0 10px 0" }}>
436-
<ChatRowContent
437-
message={message}
438-
isExpanded={isExpanded(message.ts)}
439-
onToggleExpand={() => {
440-
if (message.say === "api_req_started") {
441-
setMaxActionHeight(0)
442-
}
443-
onToggleExpand(message.ts)
444-
}}
445-
lastModifiedMessage={lastModifiedMessage}
446-
isLast={isLast}
447-
isStreaming={isStreaming}
448-
/>
449-
</div>
450-
)
451-
452-
case "browser_action":
453-
const browserAction = JSON.parse(message.text || "{}") as ClineSayBrowserAction
454-
return (
455-
<BrowserActionBox
456-
action={browserAction.action}
457-
coordinate={browserAction.coordinate}
458-
text={browserAction.text}
459-
/>
460-
)
461-
462-
default:
463-
return null
464-
}
465-
466-
case "ask":
467-
switch (message.ask) {
468-
case "browser_action_launch":
469-
return (
470-
<>
471-
<div style={headerStyle}>
472-
<span style={{ fontWeight: "bold" }}>{t("chat:browser.sessionStarted")}</span>
473-
</div>
474-
<div
475-
style={{
476-
borderRadius: 3,
477-
border: "1px solid var(--vscode-editorGroup-border)",
478-
overflow: "hidden",
479-
backgroundColor: CODE_BLOCK_BG_COLOR,
480-
}}>
481-
<CodeBlock source={message.text} language="shell" />
482-
</div>
483-
</>
484-
)
485-
486-
default:
487-
return null
488-
}
489-
}
490-
}
491-
492246
const BrowserActionBox = ({
493247
action,
494248
coordinate,

0 commit comments

Comments
 (0)