|
| 1 | +import { search } from "@/agents/deep-research/tools/search.tool"; |
| 2 | +import { planSearch, PlanSearchOutput } from "@/agents/deep-research/tools/plan-search.tool"; |
| 3 | +import { websiteToMd } from "@/agents/deep-research/tools/website-to-md.tool"; |
| 4 | +import { summarize } from "@/agents/deep-research/tools/summarize.tool"; |
| 5 | +import { judgeResults } from "@/agents/deep-research/tools/judge-results.tool"; |
| 6 | +import { extractFacts } from "@/agents/deep-research/tools/extract-facts.tool"; |
| 7 | +import { judgeFacts, JudgeFactsOutput } from "@/agents/deep-research/tools/judge-facts.tool"; |
| 8 | +import { z } from "zod"; |
| 9 | +import { pickaxe } from "@/pickaxe-client"; |
| 10 | + |
| 11 | +const MessageSchema = z.object({ |
| 12 | + message: z.string(), |
| 13 | +}); |
| 14 | + |
| 15 | +const SourceSchema = z.object({ |
| 16 | + url: z.string(), |
| 17 | + title: z.string().optional(), |
| 18 | + index: z.number(), |
| 19 | +}); |
| 20 | + |
| 21 | +const ResponseSchema = z.object({ |
| 22 | + result: z.object({ |
| 23 | + isComplete: z.boolean(), |
| 24 | + reason: z.string(), |
| 25 | + sources: z.array(SourceSchema), |
| 26 | + summary: z.string().optional(), |
| 27 | + facts: z.array(z.object({ |
| 28 | + text: z.string(), |
| 29 | + sourceIndex: z.number(), |
| 30 | + })).optional(), |
| 31 | + iterations: z.number().optional(), |
| 32 | + factsJudgment: z.object({ |
| 33 | + reason: z.string(), |
| 34 | + hasEnoughFacts: z.boolean(), |
| 35 | + missingAspects: z.array(z.string()), |
| 36 | + }).optional(), |
| 37 | + searchPlans: z.string().optional(), |
| 38 | + }), |
| 39 | +}); |
| 40 | + |
| 41 | + |
| 42 | +type Source = z.infer<typeof SourceSchema>; |
| 43 | +type Fact = { |
| 44 | + text: string; |
| 45 | + sourceIndex: number; |
| 46 | +}; |
| 47 | + |
| 48 | +export const deepResearchAgent = pickaxe.agent({ |
| 49 | + name: "deep-research-agent", |
| 50 | + description: "A tool that performs deep research on a given query", |
| 51 | + inputSchema: MessageSchema, |
| 52 | + outputSchema: ResponseSchema, |
| 53 | + executionTimeout: "15m", |
| 54 | + fn: async (input, ctx) => { |
| 55 | + ctx.logger.info(`Starting deep research agent with query: ${input.message}`); |
| 56 | + |
| 57 | + let iteration = 0; |
| 58 | + const maxIterations = 3; |
| 59 | + const allFacts: Fact[] = []; |
| 60 | + const allSources: Source[] = []; |
| 61 | + let missingAspects: string[] = []; |
| 62 | + let plan: PlanSearchOutput | undefined = undefined; |
| 63 | + let factsJudgment: JudgeFactsOutput | undefined = undefined; |
| 64 | + |
| 65 | + while (!ctx.cancelled && iteration < maxIterations) { |
| 66 | + iteration++; |
| 67 | + ctx.logger.info(`Starting iteration ${iteration}/${maxIterations}`); |
| 68 | + |
| 69 | + // Plan the search based on the query, existing facts, and missing aspects |
| 70 | + ctx.logger.info( |
| 71 | + `Planning search with ${allFacts.length} existing facts and ${missingAspects.length} missing aspects` |
| 72 | + ); |
| 73 | + |
| 74 | + plan = await planSearch.run({ |
| 75 | + query: input.message, |
| 76 | + existingFacts: allFacts.map((f) => f.text), |
| 77 | + missingAspects: missingAspects, |
| 78 | + }); |
| 79 | + |
| 80 | + ctx.logger.info( |
| 81 | + `Search plan for iteration ${iteration}: ${plan.reasoning}. Queries:` |
| 82 | + ); |
| 83 | + |
| 84 | + for (const query of plan.queries) { |
| 85 | + ctx.logger.info(`${query}`); |
| 86 | + } |
| 87 | + |
| 88 | + ctx.logger.info(`Executing ${plan.queries.length} search queries`); |
| 89 | + const results = await search.run ( |
| 90 | + plan.queries.map((query: string) => ({ query })) |
| 91 | + ); |
| 92 | + |
| 93 | + // Flatten and deduplicate sources |
| 94 | + const newSources = results.flatMap((result) => result.sources); |
| 95 | + const uniqueSources = new Map( |
| 96 | + newSources.map((source, index) => [source.url, { ...source, index }]) |
| 97 | + ); |
| 98 | + |
| 99 | + ctx.logger.info( |
| 100 | + `Found ${newSources.length} new sources, ${uniqueSources.size} unique sources` |
| 101 | + ); |
| 102 | + |
| 103 | + // Add new sources to all sources |
| 104 | + allSources.push(...Array.from(uniqueSources.values())); |
| 105 | + |
| 106 | + // Convert sources to markdown |
| 107 | + ctx.logger.info(`Converting ${uniqueSources.size} sources to markdown`); |
| 108 | + const mdResults = await websiteToMd.run( |
| 109 | + Array.from(uniqueSources.values()) |
| 110 | + .sort((a, b) => a.index - b.index) |
| 111 | + .map((source) => ({ |
| 112 | + url: source.url, |
| 113 | + index: source.index, |
| 114 | + title: source.title || "", |
| 115 | + })) |
| 116 | + ); |
| 117 | + |
| 118 | + // Extract facts from each source |
| 119 | + ctx.logger.info("Extracting facts from markdown content"); |
| 120 | + const factsResults = await extractFacts.run( |
| 121 | + mdResults.map((result) => ({ |
| 122 | + source: result.markdown, |
| 123 | + query: input.message, |
| 124 | + sourceInfo: { |
| 125 | + url: result.url, |
| 126 | + title: result.title, |
| 127 | + index: result.index, |
| 128 | + }, |
| 129 | + })) |
| 130 | + ); |
| 131 | + |
| 132 | + // Add new facts to all facts |
| 133 | + const newFacts = factsResults.flatMap((result) => result.facts); |
| 134 | + allFacts.push(...newFacts); |
| 135 | + ctx.logger.info( |
| 136 | + `Extracted ${newFacts.length} new facts, total facts: ${allFacts.length}` |
| 137 | + ); |
| 138 | + |
| 139 | + // Judge if we have enough facts |
| 140 | + ctx.logger.info("Judging if we have enough facts"); |
| 141 | + factsJudgment = await judgeFacts.run({ |
| 142 | + query: input.message, |
| 143 | + facts: allFacts.map((f) => f.text), |
| 144 | + }); |
| 145 | + |
| 146 | + // Update missing aspects for next iteration |
| 147 | + missingAspects = factsJudgment.missingAspects; |
| 148 | + ctx.logger.info(`Missing aspects: ${missingAspects.join(", ")}`); |
| 149 | + |
| 150 | + // If we have enough facts or reached max iterations, generate final summary |
| 151 | + if (factsJudgment.hasEnoughFacts || iteration >= maxIterations) { |
| 152 | + ctx.logger.info( |
| 153 | + `Generating final summary (hasEnoughFacts: ${ |
| 154 | + factsJudgment.hasEnoughFacts |
| 155 | + }, reachedMaxIterations: ${iteration >= maxIterations})` |
| 156 | + ); |
| 157 | + break; |
| 158 | + } |
| 159 | + } |
| 160 | + |
| 161 | + // Always summarize and judge results after the loop |
| 162 | + const summarizeResult = await summarize.run({ |
| 163 | + text: input.message, |
| 164 | + facts: allFacts, |
| 165 | + sources: allSources, |
| 166 | + }); |
| 167 | + |
| 168 | + ctx.logger.info("Judging final results"); |
| 169 | + const judgeResult = await judgeResults.run({ |
| 170 | + query: input.message, |
| 171 | + result: summarizeResult.summary, |
| 172 | + }); |
| 173 | + |
| 174 | + ctx.logger.info( |
| 175 | + `Deep research complete (isComplete: ${judgeResult.isComplete}, totalFacts: ${allFacts.length}, totalSources: ${allSources.length}, iterations: ${iteration})` |
| 176 | + ); |
| 177 | + |
| 178 | + return { |
| 179 | + result: { |
| 180 | + isComplete: judgeResult.isComplete, |
| 181 | + reason: judgeResult.reason, |
| 182 | + sources: allSources, |
| 183 | + summary: summarizeResult.summary, |
| 184 | + facts: allFacts, |
| 185 | + iterations: iteration, |
| 186 | + factsJudgment: factsJudgment, |
| 187 | + searchPlans: plan?.reasoning, |
| 188 | + }, |
| 189 | + }; |
| 190 | + }, |
| 191 | +}); |
0 commit comments