Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions packages/evals/.docker/entrypoints/runner.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
#!/bin/bash

# Set environment variable to suppress WSL install prompt for VS Code
export DONT_PROMPT_WSL_INSTALL=1

if [ $# -eq 0 ]; then
exec bash
else
Expand Down
12 changes: 6 additions & 6 deletions packages/evals/Dockerfile.runner
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,11 @@ ARG PYTHON_EXT_VERSION=2025.6.1
ARG RUST_EXT_VERSION=0.3.2482

RUN mkdir -p /roo/.vscode-template \
&& code --no-sandbox --user-data-dir /roo/.vscode-template --install-extension golang.go@${GOLANG_EXT_VERSION} \
&& code --no-sandbox --user-data-dir /roo/.vscode-template --install-extension dbaeumer.vscode-eslint@${ESLINT_EXT_VERSION} \
&& code --no-sandbox --user-data-dir /roo/.vscode-template --install-extension redhat.java@${JAVA_EXT_VERSION} \
&& code --no-sandbox --user-data-dir /roo/.vscode-template --install-extension ms-python.python@${PYTHON_EXT_VERSION} \
&& code --no-sandbox --user-data-dir /roo/.vscode-template --install-extension rust-lang.rust-analyzer@${RUST_EXT_VERSION}
&& yes | code --no-sandbox --user-data-dir /roo/.vscode-template --install-extension golang.go@${GOLANG_EXT_VERSION} \
&& yes | code --no-sandbox --user-data-dir /roo/.vscode-template --install-extension dbaeumer.vscode-eslint@${ESLINT_EXT_VERSION} \
&& yes | code --no-sandbox --user-data-dir /roo/.vscode-template --install-extension redhat.java@${JAVA_EXT_VERSION} \
&& yes | code --no-sandbox --user-data-dir /roo/.vscode-template --install-extension ms-python.python@${PYTHON_EXT_VERSION} \
&& yes | code --no-sandbox --user-data-dir /roo/.vscode-template --install-extension rust-lang.rust-analyzer@${RUST_EXT_VERSION}

# Copy evals
ARG EVALS_COMMIT=main
Expand Down Expand Up @@ -128,7 +128,7 @@ RUN cp -r /roo/.vscode-template /roo/.vscode

# Build the Roo Code extension
RUN pnpm vsix -- --out ../bin/roo-code.vsix \
&& code --no-sandbox --user-data-dir /roo/.vscode --install-extension bin/roo-code.vsix
&& yes | code --no-sandbox --user-data-dir /roo/.vscode --install-extension bin/roo-code.vsix

# Copy entrypoint script
COPY packages/evals/.docker/entrypoints/runner.sh /usr/local/bin/entrypoint.sh
Expand Down
2 changes: 1 addition & 1 deletion packages/evals/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ The initial build process can take a minute or two. Upon success you should see
Additionally, you'll find in Docker Desktop that database and redis services are running:
<img width="1283" alt="Screenshot 2025-06-05 at 12 07 09 PM" src="https://github.com/user-attachments/assets/ad75d791-9cc7-41e3-8168-df7b21b49da2" />

Navigate to [localhost:3000](http://localhost:3000/) in your browser and click the 🚀 button.
Navigate to [localhost:3446](http://localhost:3446/) in your browser and click the 🚀 button.

By default a evals run will run all programming exercises in [Roo Code Evals](https://github.com/RooCodeInc/Roo-Code-Evals) repository with the Claude Sonnet 4 model and default settings. For basic configuration you can specify the LLM to use and any subset of the exercises you'd like. For advanced configuration you can import a Roo Code settings file which will allow you to run the evals with Roo Code configured any way you'd like (this includes custom modes, a footgun prompt, etc).

Expand Down
2 changes: 1 addition & 1 deletion packages/evals/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ services:
context: ../../
dockerfile: packages/evals/Dockerfile.web
ports:
- "${EVALS_WEB_PORT:-3000}:3000"
- "${EVALS_WEB_PORT:-3446}:3000"
environment:
- HOST_EXECUTION_METHOD=docker
volumes:
Expand Down
62 changes: 62 additions & 0 deletions src/core/assistant-message/DirectiveStreamingParser.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import { Directive, ParsingState, TextContentParser, ToolUseParser, ParameterParser } from "./parsers"

export class DirectiveStreamingParser {
static parse(assistantMessage: string): Directive[] {
const state: ParsingState = {
contentBlocks: [],
currentTextContent: undefined,
currentTextContentStartIndex: 0,
currentToolUse: undefined,
currentToolUseStartIndex: 0,
currentParamName: undefined,
currentParamValueStartIndex: 0,
accumulator: "",
}

for (let i = 0; i < assistantMessage.length; i++) {
const char = assistantMessage[i]
state.accumulator += char

// There should not be a param without a tool use.
if (ParameterParser.parse(state)) {
continue
}

// No currentParamName.
if (ToolUseParser.parse(state)) {
continue
}

// No currentToolUse.
const didStartToolUse = ToolUseParser.checkForToolStart(state)
TextContentParser.parse(state, i, didStartToolUse)
}

// Handle remaining partial content
this.handlePartialContent(state)

return state.contentBlocks
}

private static handlePartialContent(state: ParsingState): void {
if (state.currentToolUse) {
// Stream did not complete tool call, add it as partial.
if (state.currentParamName) {
// Tool call has a parameter that was not completed.
state.currentToolUse.params[state.currentParamName] = state.accumulator
.slice(state.currentParamValueStartIndex)
.trim()
}

state.contentBlocks.push(state.currentToolUse)
}

// NOTE: It doesn't matter if check for currentToolUse or
// currentTextContent, only one of them will be defined since only one can
// be partial at a time.
if (state.currentTextContent) {
// Stream did not complete text content, add it as partial.
state.contentBlocks.push(state.currentTextContent)
}
}
}
160 changes: 8 additions & 152 deletions src/core/assistant-message/parseAssistantMessage.ts
Original file line number Diff line number Diff line change
@@ -1,156 +1,12 @@
import { type ToolName, toolNames } from "@roo-code/types"
import { DirectiveStreamingParser } from "./DirectiveStreamingParser"
import type { Directive } from "./parsers/types"

import { TextContent, ToolUse, ToolParamName, toolParamNames } from "../../shared/tools"
// Re-export types for backward compatibility
export type { TextDirective, ToolDirective, Directive } from "./parsers/types"

export type AssistantMessageContent = TextContent | ToolUse
// Backward compatibility alias
export type AssistantMessageContent = Directive

export function parseAssistantMessage(assistantMessage: string): AssistantMessageContent[] {
let contentBlocks: AssistantMessageContent[] = []
let currentTextContent: TextContent | undefined = undefined
let currentTextContentStartIndex = 0
let currentToolUse: ToolUse | undefined = undefined
let currentToolUseStartIndex = 0
let currentParamName: ToolParamName | undefined = undefined
let currentParamValueStartIndex = 0
let accumulator = ""

for (let i = 0; i < assistantMessage.length; i++) {
const char = assistantMessage[i]
accumulator += char

// There should not be a param without a tool use.
if (currentToolUse && currentParamName) {
const currentParamValue = accumulator.slice(currentParamValueStartIndex)
const paramClosingTag = `</${currentParamName}>`
if (currentParamValue.endsWith(paramClosingTag)) {
// End of param value.
currentToolUse.params[currentParamName] = currentParamValue.slice(0, -paramClosingTag.length).trim()
currentParamName = undefined
continue
} else {
// Partial param value is accumulating.
continue
}
}

// No currentParamName.

if (currentToolUse) {
const currentToolValue = accumulator.slice(currentToolUseStartIndex)
const toolUseClosingTag = `</${currentToolUse.name}>`
if (currentToolValue.endsWith(toolUseClosingTag)) {
// End of a tool use.
currentToolUse.partial = false
contentBlocks.push(currentToolUse)
currentToolUse = undefined
continue
} else {
const possibleParamOpeningTags = toolParamNames.map((name) => `<${name}>`)
for (const paramOpeningTag of possibleParamOpeningTags) {
if (accumulator.endsWith(paramOpeningTag)) {
// Start of a new parameter.
currentParamName = paramOpeningTag.slice(1, -1) as ToolParamName
currentParamValueStartIndex = accumulator.length
break
}
}

// There's no current param, and not starting a new param.

// Special case for write_to_file where file contents could
// contain the closing tag, in which case the param would have
// closed and we end up with the rest of the file contents here.
// To work around this, we get the string between the starting
// content tag and the LAST content tag.
const contentParamName: ToolParamName = "content"

if (currentToolUse.name === "write_to_file" && accumulator.endsWith(`</${contentParamName}>`)) {
const toolContent = accumulator.slice(currentToolUseStartIndex)
const contentStartTag = `<${contentParamName}>`
const contentEndTag = `</${contentParamName}>`
const contentStartIndex = toolContent.indexOf(contentStartTag) + contentStartTag.length
const contentEndIndex = toolContent.lastIndexOf(contentEndTag)

if (contentStartIndex !== -1 && contentEndIndex !== -1 && contentEndIndex > contentStartIndex) {
currentToolUse.params[contentParamName] = toolContent
.slice(contentStartIndex, contentEndIndex)
.trim()
}
}

// Partial tool value is accumulating.
continue
}
}

// No currentToolUse.

let didStartToolUse = false
const possibleToolUseOpeningTags = toolNames.map((name) => `<${name}>`)

for (const toolUseOpeningTag of possibleToolUseOpeningTags) {
if (accumulator.endsWith(toolUseOpeningTag)) {
// Start of a new tool use.
currentToolUse = {
type: "tool_use",
name: toolUseOpeningTag.slice(1, -1) as ToolName,
params: {},
partial: true,
}

currentToolUseStartIndex = accumulator.length

// This also indicates the end of the current text content.
if (currentTextContent) {
currentTextContent.partial = false

// Remove the partially accumulated tool use tag from the
// end of text (<tool).
currentTextContent.content = currentTextContent.content
.slice(0, -toolUseOpeningTag.slice(0, -1).length)
.trim()

contentBlocks.push(currentTextContent)
currentTextContent = undefined
}

didStartToolUse = true
break
}
}

if (!didStartToolUse) {
// No tool use, so it must be text either at the beginning or
// between tools.
if (currentTextContent === undefined) {
currentTextContentStartIndex = i
}

currentTextContent = {
type: "text",
content: accumulator.slice(currentTextContentStartIndex).trim(),
partial: true,
}
}
}

if (currentToolUse) {
// Stream did not complete tool call, add it as partial.
if (currentParamName) {
// Tool call has a parameter that was not completed.
currentToolUse.params[currentParamName] = accumulator.slice(currentParamValueStartIndex).trim()
}

contentBlocks.push(currentToolUse)
}

// NOTE: It doesn't matter if check for currentToolUse or
// currentTextContent, only one of them will be defined since only one can
// be partial at a time.
if (currentTextContent) {
// Stream did not complete text content, add it as partial.
contentBlocks.push(currentTextContent)
}

return contentBlocks
export function parseAssistantMessage(assistantMessage: string): Directive[] {
return DirectiveStreamingParser.parse(assistantMessage)
}
22 changes: 22 additions & 0 deletions src/core/assistant-message/parsers/ParameterParser.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import { ParsingState } from "./types"

export class ParameterParser {
static parse(state: ParsingState): boolean {
if (!state.currentToolUse || !state.currentParamName) return false

const currentParamValue = state.accumulator.slice(state.currentParamValueStartIndex)
const paramClosingTag = `</${state.currentParamName}>`

if (currentParamValue.endsWith(paramClosingTag)) {
// End of param value.
state.currentToolUse.params[state.currentParamName] = currentParamValue
.slice(0, -paramClosingTag.length)
.trim()
state.currentParamName = undefined
return true
} else {
// Partial param value is accumulating.
return true
}
}
}
32 changes: 32 additions & 0 deletions src/core/assistant-message/parsers/TextContentParser.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import { ParsingState } from "./types"

export class TextContentParser {
static parse(state: ParsingState, currentIndex: number, didStartToolUse: boolean): void {
if (!didStartToolUse) {
// No tool use, so it must be text either at the beginning or between tools.
if (state.currentTextContent === undefined) {
state.currentTextContentStartIndex = currentIndex
}

state.currentTextContent = {
type: "text",
content: state.accumulator.slice(state.currentTextContentStartIndex).trim(),
partial: true,
}
}
}

static finalize(state: ParsingState, toolUseOpeningTag: string): void {
if (state.currentTextContent) {
state.currentTextContent.partial = false

// Remove the partially accumulated tool use tag from the end of text (<tool).
state.currentTextContent.content = state.currentTextContent.content
.slice(0, -toolUseOpeningTag.slice(0, -1).length)
.trim()

state.contentBlocks.push(state.currentTextContent)
state.currentTextContent = undefined
}
}
}
Loading