Skip to content

Commit f93855c

Browse files
committed
Consolidate files
1 parent e856612 commit f93855c

File tree

6 files changed

+90
-112
lines changed

6 files changed

+90
-112
lines changed

src/services/deep-research/DeepResearchService.ts

Lines changed: 76 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,7 @@ import {
2424
ResearchQuery,
2525
ResearchTokenUsage,
2626
} from "./types"
27-
import { truncatePrompt, trimPrompt } from "./utils/prompt"
28-
import { getTreeSize } from "./utils/progress"
27+
import { RecursiveCharacterTextSplitter, encoder } from "./TextSplitter"
2928

3029
export class DeepResearchService {
3130
public readonly providerId: string
@@ -83,7 +82,7 @@ export class DeepResearchService {
8382
private researchSystemPrompt() {
8483
const now = new Date().toISOString()
8584

86-
return trimPrompt(`
85+
return this.trimPrompt(`
8786
You are an expert researcher. Today is ${now}. Follow these instructions when responding:
8887
- You may be asked to research subjects that is after your knowledge cutoff, assume the user is right when presented with news.
8988
- The user is a highly experienced analyst, no need to simplify it, be as detailed as possible and make sure your response is correct.
@@ -102,7 +101,7 @@ export class DeepResearchService {
102101
private chatSystemPrompt() {
103102
const now = new Date().toISOString()
104103

105-
return trimPrompt(`
104+
return this.trimPrompt(`
106105
You are an expert research assistant helping to explain and clarify research findings. Today is ${now}. Follow these guidelines:
107106
108107
- You always answer the with markdown formatting. You will be penalized if you do not answer with markdown when it would be possible.
@@ -139,7 +138,7 @@ export class DeepResearchService {
139138
public async generateFollowUps({ query, count = 3 }: { query: string; count?: number }) {
140139
console.log(`[generateFollowUps] generating up to ${count} follow-up questions`)
141140

142-
const prompt = trimPrompt(`
141+
const prompt = this.trimPrompt(`
143142
Given the following query from the user, ask some follow up questions to clarify the research direction.
144143
Return a maximum of ${count} questions, but return less if the original query is clear.
145144
Make sure each question is unique and not similar to each other.
@@ -191,7 +190,7 @@ export class DeepResearchService {
191190
}): Promise<ResearchQuery[]> {
192191
console.log(`[generateQueries] generating up to ${breadth} queries`)
193192

194-
const prompt = trimPrompt(`
193+
const prompt = this.trimPrompt(`
195194
Given the following prompt from the user, generate a list of SERP queries to research the topic.
196195
Return a maximum of ${breadth} queries, but feel free to return less if the original prompt is clear.
197196
Make sure each query is unique and not similar to each other: <prompt>${query}</prompt>
@@ -251,11 +250,11 @@ export class DeepResearchService {
251250
const contents = result.data
252251
.map((item) => item.markdown)
253252
.filter((content) => content !== undefined)
254-
.map((content) => truncatePrompt(content, 25_000))
253+
.map((content) => this.truncatePrompt(content, 25_000))
255254

256255
console.log(`[generateLearnings] extracting learnings from "${query}"`)
257256

258-
const prompt = trimPrompt(`
257+
const prompt = this.trimPrompt(`
259258
Given the following contents from a SERP search for the query <query>${query}</query>, generate a list of learnings from the contents.
260259
Return a maximum of ${learningsCount} learnings, but feel free to return less if the contents are clear.
261260
Make sure each learning is unique and not similar to each other.
@@ -303,12 +302,12 @@ export class DeepResearchService {
303302
}
304303

305304
private async generateReport({ learnings, visitedUrls }: { learnings: string[]; visitedUrls: string[] }) {
306-
const learningsString = truncatePrompt(
305+
const learningsString = this.truncatePrompt(
307306
learnings.map((learning) => `<learning>\n${learning}\n</learning>`).join("\n"),
308307
150_000,
309308
)
310309

311-
const prompt = trimPrompt(`
310+
const prompt = this.trimPrompt(`
312311
Given the following prompt from the user, write a final report on the topic using the learnings from research.
313312
Make it as as detailed as possible, aim for 3 or more pages, include ALL the learnings from research:
314313
@@ -375,7 +374,7 @@ export class DeepResearchService {
375374
private async runDeepResearch() {
376375
this.status = "research"
377376

378-
const query = trimPrompt(`
377+
const query = this.trimPrompt(`
379378
Initial Query: ${this.inquiry.initialQuery}
380379
381380
Follow-up Questions and Answers:
@@ -418,7 +417,7 @@ export class DeepResearchService {
418417
}),
419418
})
420419

421-
this.progress.expectedQueries = getTreeSize({ breadth: this.breadth, depth: this.depth })
420+
this.progress.expectedQueries = this.getTreeSize({ breadth: this.breadth, depth: this.depth })
422421
onProgressUpdated()
423422

424423
console.log(`[transitionToResearch] query = ${query}`)
@@ -462,7 +461,7 @@ export class DeepResearchService {
462461

463462
this.messages.push({
464463
role: "system",
465-
content: trimPrompt(`
464+
content: this.trimPrompt(`
466465
${this.chatSystemPrompt()}
467466
468467
Here is the complete research context:
@@ -585,7 +584,7 @@ export class DeepResearchService {
585584

586585
console.log(`[deepResearch] researching deeper, breadth: ${newBreadth}, depth: ${newDepth}`)
587586

588-
const nextQuery = trimPrompt(`
587+
const nextQuery = this.trimPrompt(`
589588
Previous research goal: ${researchGoal}
590589
Follow-up research directions: ${newLearnings.followUpQuestions.map((q) => `\n${q}`).join("")}
591590
`)
@@ -798,4 +797,67 @@ export class DeepResearchService {
798797

799798
this.postMessage({ type: "research.tokenUsage", text: JSON.stringify(this.tokenUsage) })
800799
}
800+
801+
private trimPrompt(prompt: string) {
802+
return prompt
803+
.split("\n")
804+
.map((line) => line.trim())
805+
.join("\n")
806+
}
807+
808+
private truncatePrompt(prompt: string, contextSize = 128_000, minChunkSize = 140): string {
809+
if (!prompt) {
810+
return ""
811+
}
812+
const length = encoder.encode(prompt).length
813+
814+
if (length <= contextSize) {
815+
return prompt
816+
}
817+
818+
const overflowTokens = length - contextSize
819+
820+
// On average it's 3 characters per token, so multiply by 3 to get a rough
821+
// estimate of the number of characters.
822+
const chunkSize = prompt.length - overflowTokens * 3
823+
824+
if (chunkSize < minChunkSize) {
825+
return prompt.slice(0, minChunkSize)
826+
}
827+
828+
const splitter = new RecursiveCharacterTextSplitter({
829+
chunkSize,
830+
chunkOverlap: 0,
831+
})
832+
833+
const truncated = splitter.splitText(prompt)[0] ?? ""
834+
835+
// Last catch, there's a chance that the trimmed prompt is same length as
836+
// the original prompt, due to how tokens are split & innerworkings of the
837+
// splitter, handle this case by just doing a hard cut.
838+
if (truncated.length === prompt.length) {
839+
return this.truncatePrompt(prompt.slice(0, chunkSize), contextSize, minChunkSize)
840+
}
841+
842+
// Recursively trim until the prompt is within the context size.
843+
return this.truncatePrompt(truncated, contextSize, minChunkSize)
844+
}
845+
846+
// Calculate total expected queries across all depth levels.
847+
// At each level, the breadth is halved, so level 1 has full breadth,
848+
// level 2 has breadth/2, level 3 has breadth/4, etc.
849+
// For breadth = 4, depth = 2, the expected queries are:
850+
// D2: 2^2 * 1 = 4
851+
// D1: 2^1 * 2 = 4
852+
// D0: 2^0 * 4 = 4
853+
// Total: 12
854+
private getTreeSize = ({ breadth, depth }: { breadth: number; depth: number }) => {
855+
let value = 0
856+
857+
for (let i = depth; i >= 0; i--) {
858+
value = value + Math.pow(2, i) * Math.ceil(breadth / Math.pow(2, i))
859+
}
860+
861+
return value
862+
}
801863
}

src/services/deep-research/TextSplitter.ts

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import { getEncoding } from "js-tiktoken"
2+
13
interface TextSplitterParams {
24
chunkSize: number
35
chunkOverlap: number
@@ -89,29 +91,27 @@ export class RecursiveCharacterTextSplitter extends TextSplitter implements Recu
8991
splitText(text: string): string[] {
9092
const finalChunks: string[] = []
9193

92-
// Get appropriate separator to use
94+
// Get appropriate separator to use.
9395
let separator: string = this.separators[this.separators.length - 1]!
96+
9497
for (const s of this.separators) {
9598
if (s === "") {
9699
separator = s
97100
break
98101
}
102+
99103
if (text.includes(s)) {
100104
separator = s
101105
break
102106
}
103107
}
104108

105109
// Now that we have the separator, split the text
106-
let splits: string[]
107-
if (separator) {
108-
splits = text.split(separator)
109-
} else {
110-
splits = text.split("")
111-
}
110+
const splits = separator ? text.split(separator) : text.split("")
112111

113112
// Now go merging things, recursively splitting longer texts.
114113
let goodSplits: string[] = []
114+
115115
for (const s of splits) {
116116
if (s.length < this.chunkSize) {
117117
goodSplits.push(s)
@@ -121,14 +121,19 @@ export class RecursiveCharacterTextSplitter extends TextSplitter implements Recu
121121
finalChunks.push(...mergedText)
122122
goodSplits = []
123123
}
124+
124125
const otherInfo = this.splitText(s)
125126
finalChunks.push(...otherInfo)
126127
}
127128
}
129+
128130
if (goodSplits.length) {
129131
const mergedText = this.mergeSplits(goodSplits, separator)
130132
finalChunks.push(...mergedText)
131133
}
134+
132135
return finalChunks
133136
}
134137
}
138+
139+
export const encoder = getEncoding("o200k_base")

src/services/deep-research/__tests__/utils/progress.test.ts

Lines changed: 0 additions & 19 deletions
This file was deleted.

src/services/deep-research/utils/progress.ts

Lines changed: 0 additions & 17 deletions
This file was deleted.

src/services/deep-research/utils/prompt.ts

Lines changed: 0 additions & 54 deletions
This file was deleted.

webview-ui/package-lock.json

Lines changed: 2 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)