RooCodeInc
diff --git a/‎src/services/deep-research/DeepResearchService.ts‎
Lines changed: 76 additions & 14 deletions b/‎src/services/deep-research/DeepResearchService.ts‎
Lines changed: 76 additions & 14 deletions
diff --git a/‎src/services/deep-research/TextSplitter.ts‎
Lines changed: 12 additions & 7 deletions b/‎src/services/deep-research/TextSplitter.ts‎
Lines changed: 12 additions & 7 deletions
diff --git a/‎src/services/deep-research/__tests__/utils/progress.test.ts‎
Lines changed: 0 additions & 19 deletions b/‎src/services/deep-research/__tests__/utils/progress.test.ts‎
Lines changed: 0 additions & 19 deletions
diff --git a/‎src/services/deep-research/utils/progress.ts‎
Lines changed: 0 additions & 17 deletions b/‎src/services/deep-research/utils/progress.ts‎
Lines changed: 0 additions & 17 deletions
diff --git a/‎src/services/deep-research/utils/prompt.ts‎
Lines changed: 0 additions & 54 deletions b/‎src/services/deep-research/utils/prompt.ts‎
Lines changed: 0 additions & 54 deletions
diff --git a/‎webview-ui/package-lock.json‎
Lines changed: 2 additions & 1 deletion b/‎webview-ui/package-lock.json‎
Lines changed: 2 additions & 1 deletion
@@ -24,8 +24,7 @@ import {
 	ResearchQuery,
 	ResearchTokenUsage,
 } from "./types"
-import { truncatePrompt, trimPrompt } from "./utils/prompt"
-import { getTreeSize } from "./utils/progress"
+import { RecursiveCharacterTextSplitter, encoder } from "./TextSplitter"
 
 export class DeepResearchService {
 	public readonly providerId: string
@@ -83,7 +82,7 @@ export class DeepResearchService {
 	private researchSystemPrompt() {
 		const now = new Date().toISOString()
 
-		return trimPrompt(`
+		return this.trimPrompt(`
             You are an expert researcher. Today is ${now}. Follow these instructions when responding:
             - You may be asked to research subjects that is after your knowledge cutoff, assume the user is right when presented with news.
             - The user is a highly experienced analyst, no need to simplify it, be as detailed as possible and make sure your response is correct.
@@ -102,7 +101,7 @@ export class DeepResearchService {
 	private chatSystemPrompt() {
 		const now = new Date().toISOString()
 
-		return trimPrompt(`
+		return this.trimPrompt(`
 			You are an expert research assistant helping to explain and clarify research findings. Today is ${now}. Follow these guidelines:
 
 			- You always answer the with markdown formatting. You will be penalized if you do not answer with markdown when it would be possible.
@@ -139,7 +138,7 @@ export class DeepResearchService {
 	public async generateFollowUps({ query, count = 3 }: { query: string; count?: number }) {
 		console.log(`[generateFollowUps] generating up to ${count} follow-up questions`)
 
-		const prompt = trimPrompt(`
+		const prompt = this.trimPrompt(`
             Given the following query from the user, ask some follow up questions to clarify the research direction.
             Return a maximum of ${count} questions, but return less if the original query is clear.
 			Make sure each question is unique and not similar to each other.
@@ -191,7 +190,7 @@ export class DeepResearchService {
 	}): Promise<ResearchQuery[]> {
 		console.log(`[generateQueries] generating up to ${breadth} queries`)
 
-		const prompt = trimPrompt(`
+		const prompt = this.trimPrompt(`
 			Given the following prompt from the user, generate a list of SERP queries to research the topic.
 			Return a maximum of ${breadth} queries, but feel free to return less if the original prompt is clear.
 			Make sure each query is unique and not similar to each other: <prompt>${query}</prompt>
@@ -251,11 +250,11 @@ export class DeepResearchService {
 		const contents = result.data
 			.map((item) => item.markdown)
 			.filter((content) => content !== undefined)
-			.map((content) => truncatePrompt(content, 25_000))
+			.map((content) => this.truncatePrompt(content, 25_000))
 
 		console.log(`[generateLearnings] extracting learnings from "${query}"`)
 
-		const prompt = trimPrompt(`
+		const prompt = this.trimPrompt(`
 			Given the following contents from a SERP search for the query <query>${query}</query>, generate a list of learnings from the contents.
 			Return a maximum of ${learningsCount} learnings, but feel free to return less if the contents are clear.
 			Make sure each learning is unique and not similar to each other.
@@ -303,12 +302,12 @@ export class DeepResearchService {
 	}
 
 	private async generateReport({ learnings, visitedUrls }: { learnings: string[]; visitedUrls: string[] }) {
-		const learningsString = truncatePrompt(
+		const learningsString = this.truncatePrompt(
 			learnings.map((learning) => `<learning>\n${learning}\n</learning>`).join("\n"),
 			150_000,
 		)
 
-		const prompt = trimPrompt(`
+		const prompt = this.trimPrompt(`
 			Given the following prompt from the user, write a final report on the topic using the learnings from research.
 			Make it as as detailed as possible, aim for 3 or more pages, include ALL the learnings from research:
 
@@ -375,7 +374,7 @@ export class DeepResearchService {
 	private async runDeepResearch() {
 		this.status = "research"
 
-		const query = trimPrompt(`
+		const query = this.trimPrompt(`
 			Initial Query: ${this.inquiry.initialQuery}
 
 			Follow-up Questions and Answers:
@@ -418,7 +417,7 @@ export class DeepResearchService {
 				}),
 			})
 
-		this.progress.expectedQueries = getTreeSize({ breadth: this.breadth, depth: this.depth })
+		this.progress.expectedQueries = this.getTreeSize({ breadth: this.breadth, depth: this.depth })
 		onProgressUpdated()
 
 		console.log(`[transitionToResearch] query = ${query}`)
@@ -462,7 +461,7 @@ export class DeepResearchService {
 
 		this.messages.push({
 			role: "system",
-			content: trimPrompt(`
+			content: this.trimPrompt(`
 				${this.chatSystemPrompt()}
 
 				Here is the complete research context:
@@ -585,7 +584,7 @@ export class DeepResearchService {
 
 					console.log(`[deepResearch] researching deeper, breadth: ${newBreadth}, depth: ${newDepth}`)
 
-					const nextQuery = trimPrompt(`
+					const nextQuery = this.trimPrompt(`
 						Previous research goal: ${researchGoal}
 						Follow-up research directions: ${newLearnings.followUpQuestions.map((q) => `\n${q}`).join("")}
 					`)
@@ -798,4 +797,67 @@ export class DeepResearchService {
 
 		this.postMessage({ type: "research.tokenUsage", text: JSON.stringify(this.tokenUsage) })
 	}
+
+	private trimPrompt(prompt: string) {
+		return prompt
+			.split("\n")
+			.map((line) => line.trim())
+			.join("\n")
+	}
+
+	private truncatePrompt(prompt: string, contextSize = 128_000, minChunkSize = 140): string {
+		if (!prompt) {
+			return ""
+		}
+		const length = encoder.encode(prompt).length
+
+		if (length <= contextSize) {
+			return prompt
+		}
+
+		const overflowTokens = length - contextSize
+
+		// On average it's 3 characters per token, so multiply by 3 to get a rough
+		// estimate of the number of characters.
+		const chunkSize = prompt.length - overflowTokens * 3
+
+		if (chunkSize < minChunkSize) {
+			return prompt.slice(0, minChunkSize)
+		}
+
+		const splitter = new RecursiveCharacterTextSplitter({
+			chunkSize,
+			chunkOverlap: 0,
+		})
+
+		const truncated = splitter.splitText(prompt)[0] ?? ""
+
+		// Last catch, there's a chance that the trimmed prompt is same length as
+		// the original prompt, due to how tokens are split & innerworkings of the
+		// splitter, handle this case by just doing a hard cut.
+		if (truncated.length === prompt.length) {
+			return this.truncatePrompt(prompt.slice(0, chunkSize), contextSize, minChunkSize)
+		}
+
+		// Recursively trim until the prompt is within the context size.
+		return this.truncatePrompt(truncated, contextSize, minChunkSize)
+	}
+
+	// Calculate total expected queries across all depth levels.
+	// At each level, the breadth is halved, so level 1 has full breadth,
+	// level 2 has breadth/2, level 3 has breadth/4, etc.
+	// For breadth = 4, depth = 2, the expected queries are:
+	// D2: 2^2 * 1 = 4
+	// D1: 2^1 * 2 = 4
+	// D0: 2^0 * 4 = 4
+	// Total: 12
+	private getTreeSize = ({ breadth, depth }: { breadth: number; depth: number }) => {
+		let value = 0
+
+		for (let i = depth; i >= 0; i--) {
+			value = value + Math.pow(2, i) * Math.ceil(breadth / Math.pow(2, i))
+		}
+
+		return value
+	}
 }
@@ -1,3 +1,5 @@
+import { getEncoding } from "js-tiktoken"
+
 interface TextSplitterParams {
 	chunkSize: number
 	chunkOverlap: number
@@ -89,29 +91,27 @@ export class RecursiveCharacterTextSplitter extends TextSplitter implements Recu
 	splitText(text: string): string[] {
 		const finalChunks: string[] = []
 
-		// Get appropriate separator to use
+		// Get appropriate separator to use.
 		let separator: string = this.separators[this.separators.length - 1]!
+
 		for (const s of this.separators) {
 			if (s === "") {
 				separator = s
 				break
 			}
+
 			if (text.includes(s)) {
 				separator = s
 				break
 			}
 		}
 
 		// Now that we have the separator, split the text
-		let splits: string[]
-		if (separator) {
-			splits = text.split(separator)
-		} else {
-			splits = text.split("")
-		}
+		const splits = separator ? text.split(separator) : text.split("")
 
 		// Now go merging things, recursively splitting longer texts.
 		let goodSplits: string[] = []
+
 		for (const s of splits) {
 			if (s.length < this.chunkSize) {
 				goodSplits.push(s)
@@ -121,14 +121,19 @@ export class RecursiveCharacterTextSplitter extends TextSplitter implements Recu
 					finalChunks.push(...mergedText)
 					goodSplits = []
 				}
+
 				const otherInfo = this.splitText(s)
 				finalChunks.push(...otherInfo)
 			}
 		}
+
 		if (goodSplits.length) {
 			const mergedText = this.mergeSplits(goodSplits, separator)
 			finalChunks.push(...mergedText)
 		}
+
 		return finalChunks
 	}
 }
+
+export const encoder = getEncoding("o200k_base")