ItzCrazyKns · VibhorGautam · Mar 8, 2026 · Mar 8, 2026
diff --git a/src/lib/agents/search/api.ts b/src/lib/agents/search/api.ts
@@ -49,13 +49,26 @@ class APISearchAgent {
       type: 'researchComplete',
     });
 
-    const finalContext =
-      searchResults?.searchFindings
-        .map(
-          (f, index) =>
-            `<result index=${index + 1} title=${f.metadata.title}>${f.content}</result>`,
-        )
-        .join('\n') || '';
+    // Cap each result and total context to stay within reasonable token budgets
+    const maxCharsPerResult = 24000;
+    const maxTotalChars = 80000;
+
+    let totalChars = 0;
+    const contextParts: string[] = [];
+
+    if (searchResults?.searchFindings) {
+      for (let i = 0; i < searchResults.searchFindings.length; i++) {
+        const f = searchResults.searchFindings[i];
+        const truncated = f.content.slice(0, maxCharsPerResult);
+        const part = `<result index=${i + 1} title=${f.metadata.title}>${truncated}</result>`;
+
+        if (totalChars + part.length > maxTotalChars) break;
+        totalChars += part.length;
+        contextParts.push(part);
+      }
+    }
+
+    const finalContext = contextParts.join('\n');
 
     const widgetContext = widgetOutputs
       .map((o) => {

diff --git a/src/lib/agents/search/index.ts b/src/lib/agents/search/index.ts
@@ -98,13 +98,26 @@ class SearchAgent {
       type: 'researchComplete',
     });
 
-    const finalContext =
-      searchResults?.searchFindings
-        .map(
-          (f, index) =>
-            `<result index=${index + 1} title=${f.metadata.title}>${f.content}</result>`,
-        )
-        .join('\n') || '';
+    // Cap each result and total context to stay within reasonable token budgets
+    const maxCharsPerResult = 24000;
+    const maxTotalChars = 80000;
+
+    let totalChars = 0;
+    const contextParts: string[] = [];
+
+    if (searchResults?.searchFindings) {
+      for (let i = 0; i < searchResults.searchFindings.length; i++) {
+        const f = searchResults.searchFindings[i];
+        const truncated = f.content.slice(0, maxCharsPerResult);
+        const part = `<result index=${i + 1} title=${f.metadata.title}>${truncated}</result>`;
+
+        if (totalChars + part.length > maxTotalChars) break;
+        totalChars += part.length;
+        contextParts.push(part);
+      }
+    }
+
+    const finalContext = contextParts.join('\n');
 
     const widgetContext = widgetOutputs
       .map((o) => {

diff --git a/src/lib/agents/search/researcher/actions/scrapeURL.ts b/src/lib/agents/search/researcher/actions/scrapeURL.ts
@@ -3,6 +3,7 @@ import { ResearchAction } from '../../types';
 import { Chunk, ReadingResearchBlock } from '@/lib/types';
 import TurnDown from 'turndown';
 import path from 'path';
+import { splitText } from '@/lib/utils/splitText';
 
 const turndownService = new TurnDown();
 
@@ -40,11 +41,18 @@ const scrapeURLAction: ResearchAction<typeof schema> = {
       params.urls.map(async (url) => {
         try {
           const res = await fetch(url);
-          const text = await res.text();
+          let text = await res.text();
 
           const title =
             text.match(/<title>(.*?)<\/title>/i)?.[1] || `Content from ${url}`;
 
+          // Cap raw HTML before Turndown so we don't spend CPU converting
+          // megabytes of markup we'll mostly throw away after tokenization.
+          const maxHtmlChars = 200_000;
+          if (text.length > maxHtmlChars) {
+            text = text.slice(0, maxHtmlChars);
+          }
+
           if (
             !readingEmitted &&
             researchBlock &&
@@ -110,8 +118,14 @@ const scrapeURLAction: ResearchAction<typeof schema> = {
 
           const markdown = turndownService.turndown(text);
 
+          // Limit scraped content to avoid blowing up the context window.
+          // splitText chunks by token count — we only keep the first chunk.
+          const maxTokensPerPage = 6000;
+          const chunks = splitText(markdown, maxTokensPerPage, 0);
+          const content = chunks.length > 0 ? chunks[0] : markdown;
+
           results.push({
-            content: markdown,
+            content,
             metadata: {
               url,
               title: title,