Skip to content

Commit 758864c

Browse files
committed
2 parents 752099f + c90e83b commit 758864c

File tree

23 files changed

+676
-100
lines changed

23 files changed

+676
-100
lines changed

pnpm-lock.yaml

Lines changed: 8 additions & 8 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
import { search } from "@/agents/deep-research/tools/search.tool";
2+
import { planSearch, PlanSearchOutput } from "@/agents/deep-research/tools/plan-search.tool";
3+
import { websiteToMd } from "@/agents/deep-research/tools/website-to-md.tool";
4+
import { summarize } from "@/agents/deep-research/tools/summarize.tool";
5+
import { judgeResults } from "@/agents/deep-research/tools/judge-results.tool";
6+
import { extractFacts } from "@/agents/deep-research/tools/extract-facts.tool";
7+
import { judgeFacts, JudgeFactsOutput } from "@/agents/deep-research/tools/judge-facts.tool";
8+
import { z } from "zod";
9+
import { pickaxe } from "@/pickaxe-client";
10+
11+
const MessageSchema = z.object({
12+
message: z.string(),
13+
});
14+
15+
const SourceSchema = z.object({
16+
url: z.string(),
17+
title: z.string().optional(),
18+
index: z.number(),
19+
});
20+
21+
const ResponseSchema = z.object({
22+
result: z.object({
23+
isComplete: z.boolean(),
24+
reason: z.string(),
25+
sources: z.array(SourceSchema),
26+
summary: z.string().optional(),
27+
facts: z.array(z.object({
28+
text: z.string(),
29+
sourceIndex: z.number(),
30+
})).optional(),
31+
iterations: z.number().optional(),
32+
factsJudgment: z.object({
33+
reason: z.string(),
34+
hasEnoughFacts: z.boolean(),
35+
missingAspects: z.array(z.string()),
36+
}).optional(),
37+
searchPlans: z.string().optional(),
38+
}),
39+
});
40+
41+
42+
type Source = z.infer<typeof SourceSchema>;
43+
type Fact = {
44+
text: string;
45+
sourceIndex: number;
46+
};
47+
48+
export const deepResearchAgent = pickaxe.agent({
49+
name: "deep-research-agent",
50+
description: "A tool that performs deep research on a given query",
51+
inputSchema: MessageSchema,
52+
outputSchema: ResponseSchema,
53+
executionTimeout: "15m",
54+
fn: async (input, ctx) => {
55+
ctx.logger.info(`Starting deep research agent with query: ${input.message}`);
56+
57+
let iteration = 0;
58+
const maxIterations = 3;
59+
const allFacts: Fact[] = [];
60+
const allSources: Source[] = [];
61+
let missingAspects: string[] = [];
62+
let plan: PlanSearchOutput | undefined = undefined;
63+
let factsJudgment: JudgeFactsOutput | undefined = undefined;
64+
65+
while (!ctx.cancelled && iteration < maxIterations) {
66+
iteration++;
67+
ctx.logger.info(`Starting iteration ${iteration}/${maxIterations}`);
68+
69+
// Plan the search based on the query, existing facts, and missing aspects
70+
ctx.logger.info(
71+
`Planning search with ${allFacts.length} existing facts and ${missingAspects.length} missing aspects`
72+
);
73+
74+
plan = await planSearch.run({
75+
query: input.message,
76+
existingFacts: allFacts.map((f) => f.text),
77+
missingAspects: missingAspects,
78+
});
79+
80+
ctx.logger.info(
81+
`Search plan for iteration ${iteration}: ${plan.reasoning}. Queries:`
82+
);
83+
84+
for (const query of plan.queries) {
85+
ctx.logger.info(`${query}`);
86+
}
87+
88+
ctx.logger.info(`Executing ${plan.queries.length} search queries`);
89+
const results = await search.run (
90+
plan.queries.map((query: string) => ({ query }))
91+
);
92+
93+
// Flatten and deduplicate sources
94+
const newSources = results.flatMap((result) => result.sources);
95+
const uniqueSources = new Map(
96+
newSources.map((source, index) => [source.url, { ...source, index }])
97+
);
98+
99+
ctx.logger.info(
100+
`Found ${newSources.length} new sources, ${uniqueSources.size} unique sources`
101+
);
102+
103+
// Add new sources to all sources
104+
allSources.push(...Array.from(uniqueSources.values()));
105+
106+
// Convert sources to markdown
107+
ctx.logger.info(`Converting ${uniqueSources.size} sources to markdown`);
108+
const mdResults = await websiteToMd.run(
109+
Array.from(uniqueSources.values())
110+
.sort((a, b) => a.index - b.index)
111+
.map((source) => ({
112+
url: source.url,
113+
index: source.index,
114+
title: source.title || "",
115+
}))
116+
);
117+
118+
// Extract facts from each source
119+
ctx.logger.info("Extracting facts from markdown content");
120+
const factsResults = await extractFacts.run(
121+
mdResults.map((result) => ({
122+
source: result.markdown,
123+
query: input.message,
124+
sourceInfo: {
125+
url: result.url,
126+
title: result.title,
127+
index: result.index,
128+
},
129+
}))
130+
);
131+
132+
// Add new facts to all facts
133+
const newFacts = factsResults.flatMap((result) => result.facts);
134+
allFacts.push(...newFacts);
135+
ctx.logger.info(
136+
`Extracted ${newFacts.length} new facts, total facts: ${allFacts.length}`
137+
);
138+
139+
// Judge if we have enough facts
140+
ctx.logger.info("Judging if we have enough facts");
141+
factsJudgment = await judgeFacts.run({
142+
query: input.message,
143+
facts: allFacts.map((f) => f.text),
144+
});
145+
146+
// Update missing aspects for next iteration
147+
missingAspects = factsJudgment.missingAspects;
148+
ctx.logger.info(`Missing aspects: ${missingAspects.join(", ")}`);
149+
150+
// If we have enough facts or reached max iterations, generate final summary
151+
if (factsJudgment.hasEnoughFacts || iteration >= maxIterations) {
152+
ctx.logger.info(
153+
`Generating final summary (hasEnoughFacts: ${
154+
factsJudgment.hasEnoughFacts
155+
}, reachedMaxIterations: ${iteration >= maxIterations})`
156+
);
157+
break;
158+
}
159+
}
160+
161+
// Always summarize and judge results after the loop
162+
const summarizeResult = await summarize.run({
163+
text: input.message,
164+
facts: allFacts,
165+
sources: allSources,
166+
});
167+
168+
ctx.logger.info("Judging final results");
169+
const judgeResult = await judgeResults.run({
170+
query: input.message,
171+
result: summarizeResult.summary,
172+
});
173+
174+
ctx.logger.info(
175+
`Deep research complete (isComplete: ${judgeResult.isComplete}, totalFacts: ${allFacts.length}, totalSources: ${allSources.length}, iterations: ${iteration})`
176+
);
177+
178+
return {
179+
result: {
180+
isComplete: judgeResult.isComplete,
181+
reason: judgeResult.reason,
182+
sources: allSources,
183+
summary: summarizeResult.summary,
184+
facts: allFacts,
185+
iterations: iteration,
186+
factsJudgment: factsJudgment,
187+
searchPlans: plan?.reasoning,
188+
},
189+
};
190+
},
191+
});
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
import { search } from '@/agents/deep-research/tools/search.tool';
2+
import { summarize } from '@/agents/deep-research/tools/summarize.tool';
3+
import { pickaxe } from '@/pickaxe-client';
4+
5+
export const deepResearchTaskbox = pickaxe.toolbox({
6+
tools: [
7+
search,
8+
summarize,
9+
],
10+
});
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
export * from './deep-research.agent';
2+
export * from './deep-research.toolbox';
3+
export * from './tools';
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
import { z } from "zod";
2+
import { generateObject } from "ai";
3+
import { openai } from "@ai-sdk/openai";
4+
import { pickaxe } from "@/pickaxe-client";
5+
6+
const ExtractFactsInputSchema = z.object({
7+
source: z.string(),
8+
query: z.string(),
9+
sourceInfo: z.object({
10+
url: z.string(),
11+
title: z.string().optional(),
12+
index: z.number(),
13+
}),
14+
});
15+
16+
type ExtractFactsInput = z.infer<typeof ExtractFactsInputSchema>;
17+
18+
const FactSchema = z.object({
19+
text: z.string(),
20+
sourceIndex: z.number(),
21+
});
22+
23+
const ExtractFactsOutputSchema = z.object({
24+
facts: z.array(FactSchema),
25+
});
26+
27+
export const extractFacts = pickaxe.tool({
28+
name: "extract-facts",
29+
description: "Extract relevant facts from a source that are related to a query",
30+
inputSchema: ExtractFactsInputSchema,
31+
outputSchema: ExtractFactsOutputSchema,
32+
fn: async (input, ctx) => {
33+
const result = await generateObject({
34+
abortSignal: ctx.abortController.signal,
35+
prompt: `
36+
Extract relevant facts from the following source that are related to this query:
37+
"""${input.query}"""
38+
39+
Source:
40+
"""${input.source}"""
41+
42+
Extract only factual statements that are directly relevant to the query. Each fact should be a complete, standalone statement.
43+
`,
44+
model: openai("gpt-4.1-mini"),
45+
schema: z.object({
46+
facts: z.array(z.string()),
47+
}),
48+
});
49+
50+
return {
51+
facts: result.object.facts.map((fact) => ({
52+
text: fact,
53+
sourceIndex: input.sourceInfo.index,
54+
})),
55+
};
56+
},
57+
});
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
export * from './plan-search.tool';
2+
export * from './search.tool';
3+
export * from './summarize.tool';
4+
export * from './extract-facts.tool';
5+
export * from './judge-results.tool';
6+
export * from './judge-facts.tool';
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
import { z } from "zod";
2+
import { generateObject } from "ai";
3+
import { openai } from "@ai-sdk/openai";
4+
import { pickaxe } from "@/pickaxe-client";
5+
6+
const JudgeFactsInputSchema = z.object({
7+
query: z.string(),
8+
facts: z.array(z.string()),
9+
});
10+
11+
const JudgeFactsOutputSchema = z.object({
12+
hasEnoughFacts: z.boolean(),
13+
reason: z.string(),
14+
missingAspects: z.array(z.string()),
15+
});
16+
17+
export type JudgeFactsOutput = z.infer<typeof JudgeFactsOutputSchema>;
18+
19+
export const judgeFacts = pickaxe.tool({
20+
name: "judge-facts",
21+
description: "Judge if we have enough facts to comprehensively answer a query",
22+
inputSchema: JudgeFactsInputSchema,
23+
outputSchema: JudgeFactsOutputSchema,
24+
fn: async (input, ctx) => {
25+
const result = await generateObject({
26+
abortSignal: ctx.abortController.signal,
27+
prompt: `
28+
Evaluate if we have enough facts to comprehensively answer this query:
29+
"""${input.query}"""
30+
31+
Current facts:
32+
${input.facts.map((fact, i) => `${i + 1}. ${fact}`).join("\n")}
33+
34+
Consider:
35+
1. Are there any key aspects of the query that aren't covered by the current facts?
36+
2. Are the facts diverse enough to provide a complete picture?
37+
3. Are there any gaps in the information that would prevent a comprehensive answer?
38+
4. Are there any technical jargon words that are not defined in the facts that require additional research?
39+
`,
40+
model: openai("gpt-4.1-mini"),
41+
schema: z.object({
42+
hasEnoughFacts: z.boolean(),
43+
reason: z.string(),
44+
missingAspects: z.array(z.string()),
45+
}),
46+
});
47+
48+
return {
49+
hasEnoughFacts: result.object.hasEnoughFacts,
50+
reason: result.object.reason,
51+
missingAspects: result.object.missingAspects,
52+
};
53+
},
54+
});

0 commit comments

Comments
 (0)