Skip to content

Commit 0fecc22

Browse files
committed
Add OpenAI Deep Research tutorial implementation using AgentEx state machines
- Implement 5-state workflow (TRIAGE, CLARIFYING, INSTRUCTION_BUILDING, RESEARCHING, WAITING_FOR_INPUT) - Add exact prompts from OpenAI Deep Research cookbook - Configure for o4-mini-deep-research-2025-06-26 model - Integrate WebSearchTool and custom MCP file search server - Include comprehensive error handling and state management - Update .gitignore to exclude development documentation files
1 parent 66a152d commit 0fecc22

File tree

24 files changed

+1539
-0
lines changed

24 files changed

+1539
-0
lines changed

.gitignore

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,12 @@ dist
1313
.envrc
1414
codegen.log
1515
Brewfile.lock.json
16+
17+
# OpenAI Deep Research Tutorial development files
18+
0.0.19
19+
agentex_openai_deep_research.md
20+
clarification_log.md
21+
debug_log.md
22+
deep_research_prompts.md
23+
openai_deep_research.md
24+
to_do.md
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# Python
2+
__pycache__/
3+
*.py[cod]
4+
*$py.class
5+
*.so
6+
.Python
7+
build/
8+
develop-eggs/
9+
dist/
10+
downloads/
11+
eggs/
12+
.eggs/
13+
lib/
14+
lib64/
15+
parts/
16+
sdist/
17+
var/
18+
wheels/
19+
*.egg-info/
20+
.installed.cfg
21+
*.egg
22+
23+
# Environments
24+
.env**
25+
.venv
26+
env/
27+
venv/
28+
ENV/
29+
env.bak/
30+
venv.bak/
31+
32+
# IDE
33+
.idea/
34+
.vscode/
35+
*.swp
36+
*.swo
37+
38+
# Git
39+
.git
40+
.gitignore
41+
42+
# Misc
43+
.DS_Store
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# syntax=docker/dockerfile:1.3
2+
FROM python:3.12-slim
3+
COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/
4+
5+
# Install system dependencies
6+
RUN apt-get update && apt-get install -y \
7+
htop \
8+
vim \
9+
curl \
10+
tar \
11+
python3-dev \
12+
postgresql-client \
13+
build-essential \
14+
libpq-dev \
15+
gcc \
16+
cmake \
17+
netcat-openbsd \
18+
&& apt-get clean \
19+
&& rm -rf /var/lib/apt/lists/*
20+
21+
# Install tctl (Temporal CLI)
22+
RUN curl -L https://github.com/temporalio/tctl/releases/download/v1.18.1/tctl_1.18.1_linux_arm64.tar.gz -o /tmp/tctl.tar.gz && \
23+
tar -xzf /tmp/tctl.tar.gz -C /usr/local/bin && \
24+
chmod +x /usr/local/bin/tctl && \
25+
rm /tmp/tctl.tar.gz
26+
27+
RUN uv pip install --system --upgrade pip setuptools wheel
28+
29+
ENV UV_HTTP_TIMEOUT=1000
30+
31+
# Copy just the requirements file to optimize caching
32+
COPY 030_oai_deep_research/requirements.txt /app/requirements.txt
33+
34+
WORKDIR /app/
35+
36+
# Install the required Python packages
37+
RUN uv pip install --system -r requirements.txt
38+
39+
# Copy the project code
40+
COPY 030_oai_deep_research/project /app/project
41+
42+
WORKDIR /app/project
43+
44+
# Run the ACP server using uvicorn
45+
CMD ["uvicorn", "acp:acp", "--host", "0.0.0.0", "--port", "8000"]
46+
47+
# When we deploy the worker, we will replace the CMD with the following
48+
# CMD ["python", "-m", "run_worker"]
Lines changed: 290 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,290 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"from agentex import Agentex\n",
10+
"\n",
11+
"client = Agentex(base_url=\"http://localhost:5003\")\n"
12+
]
13+
},
14+
{
15+
"cell_type": "code",
16+
"execution_count": null,
17+
"metadata": {},
18+
"outputs": [],
19+
"source": [
20+
"AGENT_NAME = \"at030-oai-deep-research\"\n"
21+
]
22+
},
23+
{
24+
"cell_type": "code",
25+
"execution_count": null,
26+
"metadata": {},
27+
"outputs": [],
28+
"source": [
29+
"# (REQUIRED) Create a new task. For Agentic agents, you must create a task for messages to be associated with.\n",
30+
"import uuid\n",
31+
"\n",
32+
"rpc_response = client.agents.create_task(\n",
33+
" agent_name=AGENT_NAME,\n",
34+
" params={\n",
35+
" \"name\": f\"{str(uuid.uuid4())[:8]}-task\",\n",
36+
" \"params\": {}\n",
37+
" }\n",
38+
")\n",
39+
"\n",
40+
"task = rpc_response.result\n",
41+
"print(task)\n"
42+
]
43+
},
44+
{
45+
"cell_type": "code",
46+
"execution_count": null,
47+
"metadata": {},
48+
"outputs": [],
49+
"source": [
50+
"# Send an event to the agent\n",
51+
"\n",
52+
"# The response is expected to be a list of TaskMessage objects, which is a union of the following types:\n",
53+
"# - TextContent: A message with just text content \n",
54+
"# - DataContent: A message with JSON-serializable data content\n",
55+
"# - ToolRequestContent: A message with a tool request, which contains a JSON-serializable request to call a tool\n",
56+
"# - ToolResponseContent: A message with a tool response, which contains response object from a tool call in its content\n",
57+
"\n",
58+
"# When processing the message/send response, if you are expecting more than TextContent, such as DataContent, ToolRequestContent, or ToolResponseContent, you can process them as well\n",
59+
"\n",
60+
"rpc_response = client.agents.send_event(\n",
61+
" agent_name=AGENT_NAME,\n",
62+
" params={\n",
63+
" \"content\": {\"type\": \"text\", \"author\": \"user\", \"content\": \"I want to research the latest developments in artificial intelligence and machine learning\"},\n",
64+
" \"task_id\": task.id,\n",
65+
" }\n",
66+
")\n",
67+
"\n",
68+
"event = rpc_response.result\n",
69+
"print(event)\n"
70+
]
71+
},
72+
{
73+
"cell_type": "code",
74+
"execution_count": null,
75+
"metadata": {},
76+
"outputs": [],
77+
"source": [
78+
"# Subscribe to the async task messages produced by the agent\n",
79+
"from agentex.lib.utils.dev_tools import subscribe_to_async_task_messages\n",
80+
"\n",
81+
"task_messages = subscribe_to_async_task_messages(\n",
82+
" client=client,\n",
83+
" task=task, \n",
84+
" only_after_timestamp=event.created_at, \n",
85+
" print_messages=True,\n",
86+
" rich_print=True,\n",
87+
" timeout=10, # Short timeout for clarifying questions\n",
88+
")\n"
89+
]
90+
},
91+
{
92+
"cell_type": "code",
93+
"execution_count": null,
94+
"metadata": {},
95+
"outputs": [],
96+
"source": [
97+
"# Respond to clarifying questions from the agent\n",
98+
"# Replace this content with your actual response to the clarifying questions\n",
99+
"\n",
100+
"clarification_response = \"\"\"I'm specifically interested in:\n",
101+
"1. Recent breakthroughs in large language models like GPT-4, Claude, and Gemini\n",
102+
"2. Advances in computer vision and image generation models \n",
103+
"3. New developments in AI safety and alignment research\n",
104+
"4. Applications of AI in scientific research and drug discovery\n",
105+
"5. The latest funding rounds and company acquisitions in the AI space\n",
106+
"\n",
107+
"I want a comprehensive report that covers both technical advances and business/industry developments from the past 6 months.\"\"\"\n",
108+
"\n",
109+
"rpc_response = client.agents.send_event(\n",
110+
" agent_name=AGENT_NAME,\n",
111+
" params={\n",
112+
" \"content\": {\"type\": \"text\", \"author\": \"user\", \"content\": clarification_response},\n",
113+
" \"task_id\": task.id,\n",
114+
" }\n",
115+
")\n",
116+
"\n",
117+
"event = rpc_response.result\n",
118+
"print(event)\n"
119+
]
120+
},
121+
{
122+
"cell_type": "code",
123+
"execution_count": null,
124+
"metadata": {},
125+
"outputs": [],
126+
"source": [
127+
"# Subscribe to messages after clarification response\n",
128+
"from agentex.lib.utils.dev_tools import subscribe_to_async_task_messages\n",
129+
"\n",
130+
"task_messages = subscribe_to_async_task_messages(\n",
131+
" client=client,\n",
132+
" task=task, \n",
133+
" only_after_timestamp=event.created_at, \n",
134+
" print_messages=True,\n",
135+
" rich_print=True,\n",
136+
" timeout=60, # Longer timeout for deep research\n",
137+
")\n"
138+
]
139+
},
140+
{
141+
"cell_type": "code",
142+
"execution_count": null,
143+
"metadata": {},
144+
"outputs": [],
145+
"source": [
146+
"# Ask a follow-up question after the research is complete\n",
147+
"follow_up_question = \"Can you provide more details about the specific AI safety techniques that have been developed recently?\"\n",
148+
"\n",
149+
"rpc_response = client.agents.send_event(\n",
150+
" agent_name=AGENT_NAME,\n",
151+
" params={\n",
152+
" \"content\": {\"type\": \"text\", \"author\": \"user\", \"content\": follow_up_question},\n",
153+
" \"task_id\": task.id,\n",
154+
" }\n",
155+
")\n",
156+
"\n",
157+
"event = rpc_response.result\n",
158+
"print(event)\n"
159+
]
160+
},
161+
{
162+
"cell_type": "code",
163+
"execution_count": null,
164+
"metadata": {},
165+
"outputs": [],
166+
"source": [
167+
"# Subscribe to follow-up response\n",
168+
"task_messages = subscribe_to_async_task_messages(\n",
169+
" client=client,\n",
170+
" task=task, \n",
171+
" only_after_timestamp=event.created_at, \n",
172+
" print_messages=True,\n",
173+
" rich_print=True,\n",
174+
" timeout=60,\n",
175+
")\n"
176+
]
177+
},
178+
{
179+
"cell_type": "code",
180+
"execution_count": null,
181+
"metadata": {},
182+
"outputs": [],
183+
"source": [
184+
"# Send responses to clarifying questions\n",
185+
"# The agent will ask 2-3 questions - answer them one by one\n",
186+
"\n",
187+
"# First clarification response\n",
188+
"rpc_response = client.agents.send_event(\n",
189+
" agent_name=AGENT_NAME,\n",
190+
" params={\n",
191+
" \"content\": {\"type\": \"text\", \"author\": \"user\", \"content\": \"I'm interested in recent hardware developments and companies working on quantum processors.\"},\n",
192+
" \"task_id\": task.id,\n",
193+
" }\n",
194+
")\n",
195+
"\n",
196+
"event = rpc_response.result\n",
197+
"print(event)\n"
198+
]
199+
},
200+
{
201+
"cell_type": "code",
202+
"execution_count": null,
203+
"metadata": {},
204+
"outputs": [],
205+
"source": [
206+
"# Subscribe to see the next clarifying question or research starting\n",
207+
"task_messages = subscribe_to_async_task_messages(\n",
208+
" client=client,\n",
209+
" task=task, \n",
210+
" only_after_timestamp=event.created_at, \n",
211+
" print_messages=True,\n",
212+
" rich_print=True,\n",
213+
" timeout=10,\n",
214+
")\n"
215+
]
216+
},
217+
{
218+
"cell_type": "code",
219+
"execution_count": null,
220+
"metadata": {},
221+
"outputs": [],
222+
"source": [
223+
"# Second clarification response (if asked)\n",
224+
"rpc_response = client.agents.send_event(\n",
225+
" agent_name=AGENT_NAME,\n",
226+
" params={\n",
227+
" \"content\": {\"type\": \"text\", \"author\": \"user\", \"content\": \"Yes, I want to know about breakthroughs from 2024-2025, especially around error correction and qubit stability.\"},\n",
228+
" \"task_id\": task.id,\n",
229+
" }\n",
230+
")\n",
231+
"\n",
232+
"event = rpc_response.result\n",
233+
"print(event)\n"
234+
]
235+
},
236+
{
237+
"cell_type": "code",
238+
"execution_count": null,
239+
"metadata": {},
240+
"outputs": [],
241+
"source": [
242+
"# Subscribe to see the deep research results\n",
243+
"# This will take longer as the agent performs multiple searches\n",
244+
"task_messages = subscribe_to_async_task_messages(\n",
245+
" client=client,\n",
246+
" task=task, \n",
247+
" only_after_timestamp=event.created_at, \n",
248+
" print_messages=True,\n",
249+
" rich_print=True,\n",
250+
" timeout=60, # Longer timeout for deep research\n",
251+
")\n"
252+
]
253+
},
254+
{
255+
"cell_type": "code",
256+
"execution_count": null,
257+
"metadata": {},
258+
"outputs": [],
259+
"source": [
260+
"# Optional: Ask a follow-up question after seeing the research\n",
261+
"rpc_response = client.agents.send_event(\n",
262+
" agent_name=AGENT_NAME,\n",
263+
" params={\n",
264+
" \"content\": {\"type\": \"text\", \"author\": \"user\", \"content\": \"Can you tell me more about IBM's recent quantum developments?\"},\n",
265+
" \"task_id\": task.id,\n",
266+
" }\n",
267+
")\n",
268+
"\n",
269+
"event = rpc_response.result\n",
270+
"\n",
271+
"# Subscribe to see the follow-up research\n",
272+
"task_messages = subscribe_to_async_task_messages(\n",
273+
" client=client,\n",
274+
" task=task, \n",
275+
" only_after_timestamp=event.created_at, \n",
276+
" print_messages=True,\n",
277+
" rich_print=True,\n",
278+
" timeout=60,\n",
279+
")\n"
280+
]
281+
}
282+
],
283+
"metadata": {
284+
"language_info": {
285+
"name": "python"
286+
}
287+
},
288+
"nbformat": 4,
289+
"nbformat_minor": 2
290+
}

0 commit comments

Comments
 (0)