diff --git a/claude_agent_sdk/00_The_one_liner_research_agent.ipynb b/claude_agent_sdk/00_The_one_liner_research_agent.ipynb
index 556e707a..d14b94ed 100644
--- a/claude_agent_sdk/00_The_one_liner_research_agent.ipynb
+++ b/claude_agent_sdk/00_The_one_liner_research_agent.ipynb
@@ -60,14 +60,11 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": null,
"id": "ab9830f9",
"metadata": {},
"outputs": [],
- "source": [
- "%%capture\n",
- "%pip install -U claude-agent-sdk python-dotenv"
- ]
+ "source": "%%capture\n%pip install -U claude-agent-sdk python-dotenv"
},
{
"cell_type": "markdown",
@@ -85,7 +82,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 2,
"id": "c41abcdf",
"metadata": {},
"outputs": [],
@@ -94,7 +91,7 @@
"\n",
"load_dotenv()\n",
"\n",
- "MODEL = \"claude-sonnet-4-5\""
+ "MODEL = \"claude-opus-4-5\""
]
},
{
@@ -111,7 +108,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 3,
"id": "b00890fb",
"metadata": {},
"outputs": [
@@ -119,24 +116,28 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "๐ค Thinking...\n",
+ "๐ค Using: WebSearch()\n",
"๐ค Using: WebSearch()\n",
"๐ค Using: WebSearch()\n",
"โ Tool completed\n",
"โ Tool completed\n",
+ "โ Tool completed\n",
"๐ค Thinking...\n"
]
}
],
"source": [
- "from utils.agent_visualizer import print_activity, print_final_result\n",
+ "from utils.agent_visualizer import (\n",
+ " display_agent_response,\n",
+ " print_activity,\n",
+ ")\n",
"\n",
"from claude_agent_sdk import ClaudeAgentOptions, query\n",
"\n",
"messages = []\n",
"async for msg in query(\n",
- " prompt=\"Research the latest trends in AI agents and give me a brief summary\",\n",
- " options=ClaudeAgentOptions(model=\"claude-sonnet-4-5\", allowed_tools=[\"WebSearch\"]),\n",
+ " prompt=\"Research the latest trends in AI agents and give me a brief summary and relevant citiations links.\",\n",
+ " options=ClaudeAgentOptions(model=MODEL, allowed_tools=[\"WebSearch\"]),\n",
"):\n",
" print_activity(msg)\n",
" messages.append(msg)"
@@ -144,58 +145,140 @@
},
{
"cell_type": "code",
- "execution_count": 6,
- "id": "8f57e1ec",
+ "execution_count": 4,
+ "id": "e4556936",
"metadata": {},
"outputs": [
{
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- "๐ Final Result:\n",
- "## Brief Summary: Latest Trends in AI Agents (2025)\n",
- "\n",
- "Based on current research, here are the key trends shaping AI agents in 2025:\n",
- "\n",
- "### ๐ **Explosive Growth & Adoption**\n",
- "- Market projected to reach **$7.38 billion by end of 2025** (doubling from $3.7B in 2023)\n",
- "- **85% of organizations** have integrated AI agents into at least one workflow\n",
- "- **99% of enterprise developers** are exploring or building AI agents\n",
- "\n",
- "### ๐ฏ **Key Technical Trends**\n",
- "\n",
- "1. **Agentic RAG** - Goal-driven systems that combine retrieval, reasoning, and autonomy for smarter assistants\n",
- "\n",
- "2. **Multi-Agent Systems** - The \"orchestra approach\" where specialized agents collaborate on complex tasks\n",
- "\n",
- "3. **Industry Specialization** - Moving beyond general assistants to domain experts (AI lawyers, radiologists, etc.)\n",
- "\n",
- "4. **Enhanced Autonomy** - Agents with memory, planning, reasoning, and self-correction capabilities\n",
- "\n",
- "5. **Interoperability Standards** - New protocols like MCP (Model Context Protocol) and A2A (Agent2Agent) enabling cross-platform communication\n",
- "\n",
- "### ๐ผ **Real-World Impact**\n",
- "- **30-40% productivity gains** in early enterprise deployments\n",
- "- Autonomous task execution freeing humans for higher-value work\n",
- "- Voice-controlled conversational agents handling complex workflows\n",
- "- Proactive problem-solving before issues arise\n",
- "\n",
- "### โ ๏ธ **Important Considerations**\n",
- "- Human oversight remains critical\n",
- "- Challenges with reliability, error handling, and security\n",
- "- Most organizations aren't fully \"agent-ready\" yet\n",
- "- Technology expected to reach maturity in 5-10 years\n",
- "\n",
- "**Bottom line:** 2025 is being called the \"decade of AI agents,\" with rapid evolution from simple chatbots to autonomous, specialized problem-solvers transforming enterprise workflows.\n",
- "\n",
- "๐ Cost: $0.13\n",
- "โฑ๏ธ Duration: 42.04s\n"
- ]
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
Agent Response
Latest Trends in AI Agents (2025) - Summary
\n",
+ "
๐ Market Growth & Adoption
\n",
+ "
The AI agent market is experiencing explosive growth, nearly doubling from $3.7 billion (2023) to $7.38 billion (2025), with projections reaching $103.6 billion by 2032. According to PwC's 2025 survey, 79% of organizations have adopted AI agents, with 88% of executives piloting or scaling autonomous agent systems.
\n",
+ "
๐ Key Trends
\n",
+ "
1. Rise of Multi-Agent Systems
\n",
+ "Instead of single AI systems trying to do everything, 2025 has introduced the \"orchestra approach\" where multiple specialized agents collaborateโone gathers research, another drafts reports, and a third reviews. Frameworks like CrewAI, AutoGen, and LangGraph are enabling this coordination across enterprise departments.
\n",
+ "
2. From Assistants to Autonomous Decision-Makers
\n",
+ "AI agents are evolving from knowledge assistants to self-directed workers that can take initiative, make decisions, and complete multi-step tasks without constant human input. By 2029, 80% of customer service issues are expected to be resolved entirely by autonomous agents.
\n",
+ "
3. Model Context Protocol (MCP)
\n",
+ "Anthropic's open standard provides a \"USB-C for AI\"โstandardizing how language models connect with external systems, enabling structured multi-step workflows and access to real-time information.
\n",
+ "
4. Two-Speed Enterprise Landscape
\n",
+ "A divide is emerging: companies with existing automation are racing ahead with agentic AI, while others watch from the sidelines. Among highly automated enterprises, 50% have either adopted or are preparing to adopt autonomous agents.
\n",
+ "
โ ๏ธ Key Challenges
\n",
+ "
\n",
+ "- Integration with legacy systems (cited by ~60% of AI leaders)
\n",
+ "- Trust issues for high-stakes tasks like financial transactions
\n",
+ "- Enterprise readinessโorganizations need to expose APIs and prepare infrastructure
\n",
+ "- Reliability concernsโagents can misinterpret instructions or fail on edge cases
\n",
+ "
\n",
+ "
๐ผ Impact
\n",
+ "
\n",
+ "- 66% of adopters report measurable productivity gains
\n",
+ "- Early movers are cutting operational costs by up to 40%
\n",
+ "- 75% of executives believe AI agents will reshape the workplace more than the internet did
\n",
+ "- 87% agree AI agents augment roles rather than replace them
\n",
+ "
\n",
+ "
\n",
+ "
Sources:
\n",
+ "
\n",
+ "- The State of AI in 2025 - McKinsey
\n",
+ "- AI Agents in 2025: Expectations vs. Reality - IBM
\n",
+ "- PwC's AI Agent Survey
\n",
+ "- Seizing the Agentic AI Advantage - McKinsey
\n",
+ "- Gartner Hype Cycle Identifies Top AI Innovations in 2025
\n",
+ "- AI Trends 2025: Adoption Barriers - Deloitte
\n",
+ "- The Rise of Autonomous Agents - AWS
\n",
+ "- Multi-Agent AI Systems in 2025 - Terralogic
\n",
+ "- 50+ Key AI Agent Statistics - Index.dev
\n",
+ "- Future of AI Agents 2025 - Salesforce
\n",
+ "- Top 5 Agentic AI Trends - SuperAGI
\n",
+ "
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
}
],
"source": [
- "print_final_result(messages)"
+ "display_agent_response(messages)"
]
},
{
@@ -203,15 +286,18 @@
"id": "b965c2ee",
"metadata": {},
"source": [
- "\n",
"## What's happening here:\n",
"\n",
"- `query()` creates a single-turn agent interaction (no conversation memory)\n",
"- `allowed_tools=[\"WebSearch\"]` gives Claude permission to search the web without asking for approval\n",
"- The agent autonomously decides when to search, what queries to run, and how to synthesize results\n",
- "- `print_activity()` and `print_final_result` are helper functions that show the agent's actions in real-time and print the agent's final response along with cost and duration information.\n",
"\n",
- "That's it! A functional research agent in 10 lines of code. The agent will search for relevant information, follow up on promising leads, and provide a synthesized summary."
+ "**Visualization utilities from `utils.agent_visualizer`:**\n",
+ "- `print_activity()` - Shows the agent's actions in real-time (tool calls, thinking)\n",
+ "- `display_agent_response()` - Renders the final response as a styled HTML card\n",
+ "- `visualize_conversation()` - Creates a timeline view of the full conversation\n",
+ "\n",
+ "That's it! A functional research agent in just a few lines of code. The agent will search for relevant information, follow up on promising leads, and provide a synthesized summary with citations."
]
},
{
@@ -247,58 +333,148 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 5,
"id": "1d7c6d90",
"metadata": {},
"outputs": [
{
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- "============================================================\n",
- "๐ค AGENT CONVERSATION TIMELINE\n",
- "============================================================\n",
- "\n",
- "โ๏ธ System Initialized\n",
- " Session: 6b742cec...\n",
- "\n",
- "๐ค Assistant:\n",
- " ๐ฌ I'll research the latest trends in AI agents for you.\n",
- "\n",
- "๐ค Assistant:\n",
- " ๐ง Using tool: WebSearch\n",
- " Query: \"latest trends AI agents 2025\"\n",
- "\n",
- "๐ค Assistant:\n",
- " ๐ง Using tool: WebSearch\n",
- " Query: \"AI agent developments autonomous systems 2025\"\n",
- "\n",
- "\n",
- "\n",
- "๐ค Assistant:\n",
- " ๐ฌ ## Brief Summary: Latest Trends in AI Agents (2025)\n",
- "\n",
- "Based on current research, here are the key trends shaping AI agents in 2025:\n",
- "\n",
- "### ๐ **Explosive Growth & Adoption**\n",
- "- Market projected to reach **$7.38 billion by end of 2025** (doubling from $3.7B in 2023)\n",
- "- **85% of organizations** have integrated AI agents into at least one workflow\n",
- "- **99% of enterprise developers** are exploring or building AI agents\n",
- "\n",
- "### ๐ฏ **Key Technical Trends**\n",
- "\n",
- "1. **Agentic RAG** - Goal-driven systems that combine r...\n",
- "\n",
- "โ
Conversation Complete\n",
- " Turns: 3\n",
- " Cost: $0.13\n",
- " Duration: 42.04s\n",
- " Tokens: 1,833\n",
- "\n",
- "============================================================\n",
- "\n"
- ]
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ "
\n",
+ "
โ๏ธ System
Initialized (4e8497a9...)
๐ง Tools
WebSearch: "AI agents trends 2025 latest d..."WebSearch: "autonomous AI agents enterpris..."WebSearch: "multi-agent AI systems trends ..."
๐ค Assistant
Latest Trends in AI Agents (2025) - Summary
\n",
+ "
๐ Market Growth & Adoption
\n",
+ "
The AI agent market is experiencing explosive growth, nearly doubling from $3.7 billion (2023) to $7.38 billion (2025), with projections reaching $103.6 billion by 2032. According to PwC's 2025 survey, 79% of organizations have adopted AI agents, with 88% of executives piloting or scaling autonomous agent systems.
\n",
+ "
๐ Key Trends
\n",
+ "
1. Rise of Multi-Agent Systems
\n",
+ "Instead of single AI systems trying to do everything, 2025 has introduced the \"orchestra approach\" where multiple specialized agents collaborateโone gathers research, another drafts reports, and a third reviews. Frameworks like CrewAI, AutoGen, and LangGraph are enabling this coordination across enterprise departments.
\n",
+ "
2. From Assistants to Autonomous Decision-Makers
\n",
+ "AI agents are evolving from knowledge assistants to self-directed workers that can take initiative, make decisions, and complete multi-step tasks without constant human input. By 2029, 80% of customer service issues are expected to be resolved entirely by autonomous agents.
\n",
+ "
3. Model Context Protocol (MCP)
\n",
+ "Anthropic's open standard provides a \"USB-C for AI\"โstandardizing how language models connect with external systems, enabling structured multi-step workflows and access to real-time information.
\n",
+ "
4. Two-Speed Enterprise Landscape
\n",
+ "A divide is emerging: companies with existing automation are racing ahead with agentic AI, while others watch from the sidelines. Among highly automated enterprises, 50% have either adopted or are preparing to adopt autonomous agents.
\n",
+ "
โ ๏ธ Key Challenges
\n",
+ "
\n",
+ "- Integration with legacy systems (cited by ~60% of AI leaders)
\n",
+ "- Trust issues for high-stakes tasks like financial transactions
\n",
+ "- Enterprise readinessโorganizations need to expose APIs and prepare infrastructure
\n",
+ "- Reliability concernsโagents can misinterpret instructions or fail on edge cases
\n",
+ "
\n",
+ "
๐ผ Impact
\n",
+ "
\n",
+ "- 66% of adopters report measurable productivity gains
\n",
+ "- Early movers are cutting operational costs by up to 40%
\n",
+ "- 75% of executives believe AI agents will reshape the workplace more than the internet did
\n",
+ "- 87% agree AI agents augment roles rather than replace them
\n",
+ "
\n",
+ "
\n",
+ "
Sources:
\n",
+ "
\n",
+ "- The State of AI in 2025 - McKinsey
\n",
+ "- AI Agents in 2025: Expectations vs. Reality - IBM
\n",
+ "- PwC's AI Agent Survey
\n",
+ "- Seizing the Agentic AI Advantage - McKinsey
\n",
+ "- Gartner Hype Cycle Identifies Top AI Innovations in 2025
\n",
+ "- AI Trends 2025: Adoption Barriers - Deloitte
\n",
+ "- The Rise of Autonomous Agents - AWS
\n",
+ "- Multi-Agent AI Systems in 2025 - Terralogic
\n",
+ "- 50+ Key AI Agent Statistics - Index.dev
\n",
+ "- Future of AI Agents 2025 - Salesforce
\n",
+ "- Top 5 Agentic AI Trends - SuperAGI
\n",
+ "
โ
Complete
Turns: 4 Tokens: 4,145 Cost: $0.51 Duration: 51.1s
\n",
+ "
\n",
+ "
\n",
+ " "
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
}
],
"source": [
@@ -319,7 +495,7 @@
"**1. Conversation Memory with ClaudeSDKClient**: Stateless queries can't build on previous findings. If you ask \"What are the top AI startups?\" then \"How are they funded?\", the second query has no context about which startups you mean. We can use `ClaudeSDKClient` to maintain conversation history across multiple queries.\n",
"\n",
"\n",
- "**2. System Prompts for Specialized Behavior**: Research domains often have specific requirements. Financial analysis needs different rigor than tech news summaries. Use the system prompt to encode your research standards, preferred sources, or output format. See our [agent prompting guide](https://github.com/anthropics/anthropic-cookbook/tree/main/patterns/agents/prompts) for research-specific examples.\n",
+ "**2. System Prompts for Specialized Behavior**: Research domains often have specific requirements. Financial analysis needs different rigor than tech news summaries. Use the system prompt to encode your research standards, preferred sources, citation format, or output structure. See our [agent prompting guide](https://github.com/anthropics/anthropic-cookbook/tree/main/patterns/agents/prompts) for research-specific examples.\n",
"\n",
"**3. Multimodal Research with the Read Tool**: Real research isn't just text. Market reports have charts, technical docs have diagrams, competitive analysis requires screenshot comparison. Enable the `Read` tool so Claude can analyze images, PDFs, and other visual content.\n",
"\n",
@@ -328,7 +504,7 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 6,
"id": "fa4c4d8f",
"metadata": {},
"outputs": [
@@ -336,16 +512,22 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "๐ค Thinking...\n",
"๐ค Using: Read()\n",
"โ Tool completed\n",
"๐ค Thinking...\n",
"๐ค Using: Glob()\n",
"โ Tool completed\n",
+ "๐ค Thinking...\n",
"๐ค Using: Read()\n",
"โ Tool completed\n",
"๐ค Thinking...\n",
"๐ค Using: WebSearch()\n",
+ "๐ค Using: WebSearch()\n",
+ "๐ค Using: WebSearch()\n",
+ "๐ค Using: WebSearch()\n",
+ "โ Tool completed\n",
+ "โ Tool completed\n",
+ "โ Tool completed\n",
"โ Tool completed\n",
"๐ค Thinking...\n"
]
@@ -354,122 +536,399 @@
"source": [
"from claude_agent_sdk import ClaudeSDKClient\n",
"\n",
+ "# System prompt with citation requirements for research quality\n",
+ "RESEARCH_SYSTEM_PROMPT = \"\"\"You are a research agent specialized in AI.\n",
+ "\n",
+ "When providing research findings:\n",
+ "- Always include source URLs as citations\n",
+ "- Format citations as markdown links: [Source Title](URL)\n",
+ "- Group sources in a \"Sources:\" section at the end of your response\"\"\"\n",
+ "\n",
"messages = []\n",
"async with ClaudeSDKClient(\n",
" options=ClaudeAgentOptions(\n",
- " model=\"claude-sonnet-4-5\",\n",
+ " model=MODEL,\n",
" cwd=\"research_agent\",\n",
- " system_prompt=\"You are a research agent specialized in AI\",\n",
+ " system_prompt=RESEARCH_SYSTEM_PROMPT,\n",
" allowed_tools=[\"WebSearch\", \"Read\"],\n",
+ " max_buffer_size=10 * 1024 * 1024, # Increase to 10MB for image handling\n",
" )\n",
") as research_agent:\n",
+ " # First query: Analyze the chart image\n",
" await research_agent.query(\"Analyze the chart in research_agent/projects_claude.png\")\n",
" async for msg in research_agent.receive_response():\n",
" print_activity(msg)\n",
" messages.append(msg)\n",
"\n",
- " await research_agent.query(\"Use a single websearch to investigate the insights from the chart.\")\n",
+ " # Second query: Use web search to validate/contextualize the chart findings\n",
+ " await research_agent.query(\n",
+ " \"Based on the chart analysis, search for recent news or data that validates or provides context for these findings. Include source URLs.\"\n",
+ " )\n",
" async for msg in research_agent.receive_response():\n",
" print_activity(msg)\n",
" messages.append(msg)"
]
},
+ {
+ "cell_type": "markdown",
+ "id": "i4blqsetg8",
+ "metadata": {},
+ "source": [
+ "### ๐ง Handling Large Responses and Buffer Limits\n",
+ "\n",
+ "When working with images or large data, you may encounter buffer overflow errors:\n",
+ "\n",
+ "```\n",
+ "Fatal error in message reader: Failed to decode JSON: JSON message exceeded maximum buffer size of 1048576 bytes\n",
+ "```\n",
+ "\n",
+ "**Why this happens:**\n",
+ "- The default `max_buffer_size` is 1MB (1,048,576 bytes)\n",
+ "- Images are base64-encoded in messages, significantly increasing size\n",
+ "- The chart image (~200KB on disk) becomes ~270KB+ when base64-encoded, plus message overhead\n",
+ "\n",
+ "**Solution:**\n",
+ "Set `max_buffer_size` in `ClaudeAgentOptions` to a higher value (e.g., 10MB) when working with images or large tool outputs.\n",
+ "\n",
+ "**Best practices:**\n",
+ "- Set buffer size based on your use case: 10MB for typical multimodal work, higher for large document processing\n",
+ "- Consider if you really need to pass full images - sometimes descriptions or smaller thumbnails suffice\n",
+ "- Monitor for buffer errors and adjust accordingly\n",
+ "- Include citation requirements in your system prompt to ensure verifiable research outputs"
+ ]
+ },
{
"cell_type": "markdown",
"id": "6eb4ed21",
"metadata": {},
"source": [
- "This example combines all three improvements: conversation memory via ClaudeSDKClient, a system prompt for AI research specialization, and the Read tool for analyzing visual content.\n",
+ "## What's happening here:\n",
+ "\n",
+ "This example combines all three improvements: conversation memory, citation-aware system prompt, and multimodal analysis.\n",
+ "\n",
+ "**Key components:**\n",
"\n",
- "In the first query call, the agent reads and analyzes a chart image using the Read tool. Next, the Agent searches the web for context about the chart's findingsโand critically, it remembers what it saw in the chart from the first query\n",
+ "| Component | Purpose |\n",
+ "|-----------|---------|\n",
+ "| `ClaudeSDKClient` | Maintains conversation state across multiple queries |\n",
+ "| `RESEARCH_SYSTEM_PROMPT` | Enforces citation formatting and source URLs |\n",
+ "| `allowed_tools=[\"WebSearch\", \"Read\"]` | Enables web search and image/document analysis |\n",
+ "| `max_buffer_size=10MB` | Handles base64-encoded images without overflow |\n",
"\n",
- "The system prompt instruction helps the agent focus on relevant industry context.\n",
+ "**Execution flow:**\n",
"\n",
- "One key difference from the first example: The `async with ClaudeSDKClient()` context manager maintains conversation state. The second query inherits context from the firstโthe agent knows which chart and which insights to investigate."
+ "1. **First query** - Analyzes the chart image using the `Read` tool\n",
+ "2. **First response loop** - Collects all messages until the agent completes\n",
+ "3. **Second query** - Searches the web to validate/contextualize the chart findings\n",
+ "4. **Context inheritance** - The second query remembers the chart analysis from the first\n",
+ "\n",
+ "**Why `ClaudeSDKClient` vs `query()`:**\n",
+ "\n",
+ "The `async with ClaudeSDKClient()` context manager maintains conversation state. Each `receive_response()` call builds on previous context. This differs from `query()` which creates independent, stateless sessions."
]
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 7,
"id": "7971eae4-3ff1-48d8-99ef-fecca7332163",
"metadata": {},
"outputs": [
{
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- "============================================================\n",
- "๐ค AGENT CONVERSATION TIMELINE\n",
- "============================================================\n",
- "\n",
- "โ๏ธ System Initialized\n",
- " Session: fa819270...\n",
- "\n",
- "๐ค Assistant:\n",
- " ๐ฌ I'll read and analyze the chart image for you.\n",
- "\n",
- "๐ค Assistant:\n",
- " ๐ง Using tool: Read\n",
- "\n",
- "\n",
- "๐ค Assistant:\n",
- " ๐ฌ Let me search for the file in the research_agent directory to find the correct path.\n",
- "\n",
- "๐ค Assistant:\n",
- " ๐ง Using tool: Glob\n",
- "\n",
- "\n",
- "๐ค Assistant:\n",
- " ๐ง Using tool: Read\n",
- "\n",
- "\n",
- "๐ค Assistant:\n",
- " ๐ฌ ## Analysis of the Chart: Types of Projects in Claude.ai and Claude Code\n",
- "\n",
- "This chart compares how different user groups utilize **Claude.ai** (blue dots) versus **Claude Code** (orange dots) across seven project categories. Here are the key insights:\n",
- "\n",
- "### **Major Findings:**\n",
- "\n",
- "1. **Personal Projects** (30.2% vs 36.0%)\n",
- " - Highest usage category for both platforms\n",
- " - Claude Code slightly leads, suggesting users prefer the coding interface for personal work\n",
- " - Small gap indicates similar appea...\n",
- "\n",
- "โ
Conversation Complete\n",
- " Turns: 4\n",
- " Cost: $0.04\n",
- " Duration: 35.30s\n",
- " Tokens: 760\n",
- "\n",
- "โ๏ธ System Initialized\n",
- " Session: fa819270...\n",
- "\n",
- "๐ค Assistant:\n",
- " ๐ง Using tool: WebSearch\n",
- " Query: \"Claude.ai vs Claude Code usage startup work enterprise personal projects developer tools 2024 2025\"\n",
- "\n",
- "\n",
- "๐ค Assistant:\n",
- " ๐ฌ ## Investigation Results: Validation of Chart Insights\n",
- "\n",
- "The web search confirms and expands on the patterns shown in the chart:\n",
- "\n",
- "### **Why Startups Prefer Claude Code (32.9% vs 13.1%)**\n",
- "\n",
- "The massive gap for startup work is validated by these findings:\n",
- "- **Automation-first approach**: Claude Code connects to command line, sees project files, modifies codebases, runs tests, and commits to GitHub autonomously\n",
- "- **Rapid prototyping strength**: Can generate full MERN stack apps from high-level descri...\n",
- "\n",
- "โ
Conversation Complete\n",
- " Turns: 2\n",
- " Cost: $0.15\n",
- " Duration: 46.35s\n",
- " Tokens: 662\n",
- "\n",
- "============================================================\n",
- "\n"
- ]
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ "
\n",
+ "
โ๏ธ System
Initialized (9d494ab0...)
๐ง Tools
Read: projects_claude.png
๐ค Assistant
Let me search for the file in different locations:
๐ค Assistant
Found it. Let me read the image:
๐ง Tools
Read: projects_claude.png
๐ค Assistant
Analysis of the Chart: Types of Projects in Claude.ai and Claude Code
\n",
+ "
This is a dumbbell chart (also called a connected dot plot) that compares the prevalence of different project types between two Anthropic products: Claude.ai (blue dots) and Claude Code (orange dots).
\n",
+ "
Key Findings
\n",
+ "
1. Personal Projects Lead Both Platforms
\n",
+ "
\n",
+ "- Claude Code: 36.0%
\n",
+ "- Claude.ai: 30.2%
\n",
+ "- This is the most common use case for both platforms, with Claude Code having a slight edge (~6% higher).
\n",
+ "
\n",
+ "
2. Startup Work Shows the Largest Gap
\n",
+ "
\n",
+ "- Claude Code: 32.9%
\n",
+ "- Claude.ai: 13.1%
\n",
+ "- This represents the biggest difference between the two platforms (~20% gap), indicating Claude Code is heavily favored for startup development work.
\n",
+ "
\n",
+ "
3. Enterprise Work is Relatively Balanced
\n",
+ "
\n",
+ "- Claude.ai: 25.9%
\n",
+ "- Claude Code: 23.8%
\n",
+ "- Both platforms see similar usage for enterprise work, with Claude.ai slightly ahead (~2% difference).
\n",
+ "
\n",
+ "
4. Learning & Academic Use Favors Claude.ai
\n",
+ "
\n",
+ "- Tutorial or Learning: Claude.ai (12.2%) vs Claude Code (0.7%)
\n",
+ "- Academic Research: Claude.ai (11.9%) vs Claude Code (2.2%)
\n",
+ "- Coursework: Claude.ai (4.9%) vs Claude Code (0.4%)
\n",
+ "- These categories show Claude.ai is significantly more popular for educational purposes, likely due to its conversational interface being more accessible for learning.
\n",
+ "
\n",
+ "
5. Open Source Projects are Low for Both
\n",
+ "
\n",
+ "- Claude Code: 4.0%
\n",
+ "- Claude.ai: 1.8%
\n",
+ "- Neither platform sees heavy use for open source contributions, though Claude Code is used slightly more.
\n",
+ "
\n",
+ "
Insights & Interpretation
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "| Pattern | \n",
+ "Implication | \n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ "| Claude Code dominates Startup Work | \n",
+ "Developers building products at startups prefer the code-focused interface for rapid development | \n",
+ "
\n",
+ "\n",
+ "| Claude.ai leads in educational contexts | \n",
+ "The conversational nature of Claude.ai makes it more approachable for learning, research, and coursework | \n",
+ "
\n",
+ "\n",
+ "| Personal Projects are universal | \n",
+ "Both platforms serve individual developers working on side projects equally well | \n",
+ "
\n",
+ "\n",
+ "| Enterprise usage is balanced | \n",
+ "Both products have found their place in professional enterprise environments | \n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
Summary
\n",
+ "
The chart reveals clear product differentiation:
\n",
+ "- Claude Code is preferred for active software development (startups, personal coding projects, open source)
\n",
+ "- Claude.ai excels in knowledge work and learning (tutorials, academic research, coursework)
\n",
+ "
This aligns with the nature of each productโClaude Code is a development tool while Claude.ai is a general-purpose AI assistant with a broader conversational interface.
โ
Complete
Turns: 4 Tokens: 887 Cost: $0.07 Duration: 27.6s
โ๏ธ System
Initialized (9d494ab0...)
๐ง Tools
WebSearch: "Claude Code developer adoption..."WebSearch: "Claude AI education academic r..."WebSearch: "Anthropic Claude Code vs Claud..."WebSearch: "AI coding assistants startup d..."
๐ค Assistant
Validation & Context for Chart Findings
\n",
+ "
Based on my research, here's how recent news and data validate or provide context for the chart analysis:
\n",
+ "
\n",
+ "
1. โ
Startup Dominance in Claude Code - VALIDATED
\n",
+ "
The chart showed Claude Code usage for Startup Work at 32.9% vs only 13.1% for Claude.aiโthe largest gap in the data. Recent news strongly confirms this:
\n",
+ "
\n",
+ "\"Startups are the main early adopters of Claude Code, suggesting that smaller, agile businesses are leveraging this AI tool to gain a technological advantage.\" - Blockchain News
\n",
+ "
\n",
+ "
YC Startups as Case Studies:
\n",
+ "- HumanLayer (F24) - Built their entire platform with Claude Code
\n",
+ "- Ambral (W25) - Scaling AI-powered account management with sub-agent workflows
\n",
+ "- Vulcan Technologies (S25) - Using Claude Code for regulatory complexity
\n",
+ "
As noted by Anthropic's blog: \"Founders can now ship products directly from the terminal, compressing development cycles from weeks to hours.\"
\n",
+ "
\n",
+ "
2. โ
Educational Use Favors Claude.ai - VALIDATED
\n",
+ "
The chart showed Claude.ai leading significantly in:
\n",
+ "- Tutorial or Learning: 12.2% (vs 0.7% for Claude Code)
\n",
+ "- Academic Research: 11.9% (vs 2.2%)
\n",
+ "- Coursework: 4.9% (vs 0.4%)
\n",
+ "
This aligns perfectly with Anthropic's dedicated education initiatives:
\n",
+ "
\n",
+ "Anthropic launched \"Claude for Education\" with features like \"Learning Mode\" that uses Socratic questioning rather than giving direct answers. - VentureBeat
\n",
+ "
\n",
+ "
Key Statistics from Anthropic's Education Report:
\n",
+ "- 39.3% of student conversations involve creating and improving educational content
\n",
+ "- 33.5% involve getting technical explanations for academic assignments
\n",
+ "- 57% of higher ed instructor chats involved developing curricula
\n",
+ "- 13% were conducting academic research
\n",
+ "
Early adopters include Northeastern University (50,000+ students across 13 campuses), London School of Economics, and Champlain College. - Anthropic Education Report
\n",
+ "
\n",
+ "
3. โ
Coding/Development Dominance - VALIDATED
\n",
+ "
The chart showed Personal Projects and Startup Work (both development-heavy) as top uses for Claude Code (36% and 32.9%).
\n",
+ "
Anthropic's own research confirms:
\n",
+ "
\n",
+ "\"About 44% of API traffic involved coding, compared with 36% on Claude.ai.\" - Anthropic Economic Index
\n",
+ "\"Software development remains Claude's most common use case, making up more than a third of activity globally.\" - eWeek
\n",
+ "
\n",
+ "
\n",
+ "
4. โ
Product Differentiation (Claude Code vs Claude.ai) - VALIDATED
\n",
+ "
The chart's overall pattern showing Claude Code for development and Claude.ai for knowledge work is confirmed by Fortune:
\n",
+ "
\n",
+ "\"ChatGPT is emerging increasingly as a personal or exploratory tool... while Claude is a more work-focused productivity tool, used heavily for coding, research, and business automation.\" - Fortune
\n",
+ "
\n",
+ "
Automation patterns differ by platform:
\n",
+ "- 77% of API tasks are automated (full task delegation)
\n",
+ "- ~50% of Claude.ai tasks are automated (more collaborative)
\n",
+ "
\n",
+ "
5. ๐ Broader AI Coding Assistant Context
\n",
+ "
The chart's findings fit into the larger industry trend:
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "| Metric | \n",
+ "2025 Data | \n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ "| AI-generated/assisted code | \n",
+ "41% of all code globally | \n",
+ "
\n",
+ "\n",
+ "| Developers using AI coding assistants | \n",
+ "82% daily or weekly | \n",
+ "
\n",
+ "\n",
+ "| Market size projection | \n",
+ "$30.1 billion by 2032 | \n",
+ "
\n",
+ "\n",
+ "| Google's AI-assisted code | \n",
+ "25% | \n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
Source: AI Coding Assistant Statistics
\n",
+ "
\n",
+ "
6. โ ๏ธ Enterprise Work Balance - PARTIALLY EXPLAINED
\n",
+ "
The chart showed Enterprise Work relatively balanced (Claude.ai: 25.9%, Claude Code: 23.8%). This makes sense given:
\n",
+ "
\n",
+ "- 70-75% of Anthropic's revenue comes from enterprise API consumption
\n",
+ "- Enterprises use both products depending on use case
\n",
+ "- Security features like FedRAMP High certification drive enterprise adoption of both platforms
\n",
+ "
\n",
+ "
\n",
+ "
Summary
\n",
+ "
The chart's findings are strongly validated by recent data:
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "| Chart Finding | \n",
+ "Validation Status | \n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ "| Startups prefer Claude Code (32.9% vs 13.1%) | \n",
+ "โ
Confirmed by Anthropic & YC case studies | \n",
+ "
\n",
+ "\n",
+ "| Education favors Claude.ai (12.2% vs 0.7%) | \n",
+ "โ
Confirmed by Anthropic Education Report | \n",
+ "
\n",
+ "\n",
+ "| Personal projects lead both platforms | \n",
+ "โ
Confirmed by usage statistics | \n",
+ "
\n",
+ "\n",
+ "| Claude Code = development tool | \n",
+ "โ
Confirmed (44% API traffic is coding) | \n",
+ "
\n",
+ "\n",
+ "| Claude.ai = knowledge/learning tool | \n",
+ "โ
Confirmed by education initiatives | \n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
Sources
\n",
+ "
\n",
+ "- How Three YC Startups Built Their Companies with Claude Code
\n",
+ "- Anthropic Startups Program
\n",
+ "- Startups Lead Claude Code Adoption - Blockchain News
\n",
+ "- Anthropic Education Report: How Educators Use Claude
\n",
+ "- How University Students Use Claude
\n",
+ "- Introducing Claude for Education
\n",
+ "- Anthropic's AI Usage Study: Coding Still Dominates - eWeek
\n",
+ "- Anthropic Economic Index September 2025 Report
\n",
+ "- New Studies Show What People Really Use ChatGPT and Claude For - Fortune
\n",
+ "- Claude Statistics 2025 - Backlinko
\n",
+ "- AI Coding Assistant Statistics & Trends 2025
\n",
+ "- Anthropic Flips the Script on AI in Education - VentureBeat
\n",
+ "- 55 Latest Anthropic Claude Stats - Keywords Everywhere
\n",
+ "
โ
Complete
Turns: 5 Tokens: 2,152 Cost: $0.76 Duration: 73.0s
\n",
+ "
\n",
+ "
\n",
+ " "
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
}
],
"source": [
@@ -483,14 +942,27 @@
"source": [
"## Building for Production\n",
"\n",
- "Jupyter notebooks are great for learning, but production systems need reusable modules. We've packaged the research agent into research_agent/agent.py with a clean interface:\n",
+ "Jupyter notebooks are great for learning, but production systems need reusable modules. We've packaged the research agent into `research_agent/agent.py` with a clean interface:\n",
"\n",
"### Core functions:\n",
"\n",
- "- `print_activity()` - Shows what the agent is doing in real-time\n",
+ "- `print_activity()` - Shows what the agent is doing in real-time (imported from shared utilities)\n",
"- `get_activity_text()` - Extract activity text for custom handlers, such as logging or monitoring\n",
"- `send_query()` - Main entry point for research queries with built-in activity display\n",
"\n",
+ "### Built-in best practices:\n",
+ "\n",
+ "The module includes the `RESEARCH_SYSTEM_PROMPT` which ensures:\n",
+ "- Source URLs are always included as citations\n",
+ "- Citations are formatted as markdown links for clean rendering\n",
+ "- A \"Sources:\" section groups all references\n",
+ "\n",
+ "### Display control:\n",
+ "\n",
+ "The `send_query()` function has a `display_result` parameter (default: `True`):\n",
+ "- `display_result=True` - Renders a styled HTML card in Jupyter notebooks\n",
+ "- `display_result=False` - Returns only the text result for programmatic use\n",
+ "\n",
"This agent can now be used in any Python script!"
]
},
@@ -499,20 +971,138 @@
"id": "e220b5c7-463b-4171-b687-b1ec974958de",
"metadata": {},
"source": [
- "For independent questions where conversation context doesn't matter:\n"
+ "For independent questions where conversation context doesn't matter.\n",
+ "\n",
+ "The module automatically handles:\n",
+ "- Activity display during execution\n",
+ "- Context reset for new conversations\n",
+ "- Styled HTML rendering of the final response"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 8,
"id": "3c2ca449-7a36-4b67-af47-fdb68fb3e36b",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "๐ค Using: WebSearch()\n",
+ "โ Tool completed\n",
+ "๐ค Thinking...\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "Agent Response
Claude Code SDK
\n",
+ "
The Claude Code SDK (now renamed to Claude Agent SDK) is a toolkit from Anthropic that allows developers to build AI agents using the same infrastructure that powers Claude Code.
\n",
+ "
Key capabilities:
\n",
+ "- Context management - Automatic compaction to prevent running out of context
\n",
+ "- Rich tool ecosystem - File operations, code execution, web search, MCP extensibility
\n",
+ "- Fine-grained permissions - Control over agent capabilities
\n",
+ "- Production features - Error handling, session management, monitoring
\n",
+ "
Available in:
\n",
+ "- TypeScript (@anthropic-ai/claude-code)
\n",
+ "- Python (pip install claude-code-sdk)
\n",
+ "- Command line
\n",
+ "
It enables building agents for coding automation, customer support, personal assistants, and moreโall using the same core systems that power Claude Code.
\n",
+ "
\n",
+ "
Sources:
\n",
+ "- Agent SDK overview - Claude Docs
\n",
+ "- Building agents with the Claude Agent SDK
\n",
+ "- Anthropic Releases Claude Code SDK - InfoQ
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
"source": [
"from research_agent.agent import send_query\n",
"\n",
- "result = await send_query(\"What is the Claude Code SDK? Only do one websearch and be concise\")\n",
- "print(f\"\\nResult: {result}\\n\")"
+ "# The module handles activity display, context reset, and result visualization internally\n",
+ "result = await send_query(\"What is the Claude Code SDK? Only do one websearch and be concise\")"
]
},
{
@@ -520,33 +1110,244 @@
"id": "466155ec-9f54-49d4-83cb-00032b077147",
"metadata": {},
"source": [
- "Now we test out a multi-turn conversation that reuses the same conversation:"
+ "Now we test out a multi-turn conversation that reuses the same conversation.\n",
+ "\n",
+ "Multi-turn conversations work seamlesslyโjust pass `continue_conversation=True`:"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 9,
"id": "38ba1eda",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "๐ค Using: WebSearch()\n",
+ "โ Tool completed\n",
+ "๐ค Thinking...\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "Agent Response
Anthropic is an American AI safety company founded in 2021 by former OpenAI executives, including siblings Dario and Daniela Amodei. The company builds Claude, a family of large language models, with a focus on creating reliable, interpretable, and safe AI systems. Anthropic is valued at approximately $183 billion and has received major investments from Amazon, Google, Microsoft, and NVIDIA. Its Claude Code product recently reached $1 billion in run-rate revenue.
\n",
+ "
Sources:
\n",
+ "- Anthropic - Wikipedia
\n",
+ "- Anthropic Homepage
\n",
+ "- What's Anthropic AI? - Voiceflow
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
"source": [
- "result1 = await send_query(\"What is Anthropic? Only do one websearch and be concise\")\n",
- "print(f\"\\n-----\\n\\nInitial research: {result1}\\n\")"
+ "result1 = await send_query(\"What is Anthropic? Only do one websearch and be concise\")"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 10,
"id": "36931a9e",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "๐ค Thinking...\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "Agent Response
Based on my previous search results, here are Anthropic's main products:
\n",
+ "
\n",
+ "- \n",
+ "
Claude - Their flagship AI assistant and family of large language models, which incorporates \"Constitutional AI\" for safety. Named after mathematician Claude Shannon.
\n",
+ " \n",
+ "- \n",
+ "
Claude Code - A developer tool that recently reached $1 billion in run-rate revenue just six months after public launch.
\n",
+ " \n",
+ "- \n",
+ "
Bun - A JavaScript runtime that Anthropic recently acquired to accelerate Claude Code's capabilities.
\n",
+ " \n",
+ "
\n",
+ "
Anthropic offers Claude through their website, API access for developers, and enterprise solutions.
\n",
+ "
Sources:
\n",
+ "- Anthropic Homepage
\n",
+ "- Anthropic acquires Bun as Claude Code reaches $1B milestone
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
"source": [
"# Continue the conversation to dig deeper by setting continue_conversation=True\n",
"result2 = await send_query(\n",
" \"What are some of their products?\",\n",
" continue_conversation=True,\n",
- ")\n",
- "print(f\"\\n-----\\n\\nFollow-up: {result2}\\n\")"
+ ")"
]
},
{
@@ -594,7 +1395,7 @@
],
"metadata": {
"kernelspec": {
- "display_name": "anthropic-cookbook (3.12.12)",
+ "display_name": "cc-sdk-tutorial",
"language": "python",
"name": "python3"
},
@@ -608,9 +1409,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.12.12"
+ "version": "3.12.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
-}
+}
\ No newline at end of file
diff --git a/claude_agent_sdk/01_The_chief_of_staff_agent.ipynb b/claude_agent_sdk/01_The_chief_of_staff_agent.ipynb
index 8e8fc99e..bf0ad0c4 100644
--- a/claude_agent_sdk/01_The_chief_of_staff_agent.ipynb
+++ b/claude_agent_sdk/01_The_chief_of_staff_agent.ipynb
@@ -2,27 +2,34 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 1,
"metadata": {},
"outputs": [
{
- "data": {
- "text/plain": [
- "False"
- ]
- },
- "execution_count": 2,
- "metadata": {},
- "output_type": "execute_result"
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "๐ Notebook configured to use: claude-opus-4-5\n"
+ ]
}
],
"source": [
"from dotenv import load_dotenv\n",
- "from utils.agent_visualizer import print_activity, visualize_conversation\n",
+ "from utils.agent_visualizer import (\n",
+ " display_agent_response,\n",
+ " print_activity,\n",
+ " reset_activity_context,\n",
+ " visualize_conversation,\n",
+ ")\n",
"\n",
"from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient\n",
"\n",
- "load_dotenv()"
+ "load_dotenv()\n",
+ "\n",
+ "# Define the model to use throughout this notebook\n",
+ "# Using Opus 4.5 for its superior planning and reasoning capabilities\n",
+ "MODEL = \"claude-opus-4-5\"\n",
+ "print(f\"๐ Notebook configured to use: {MODEL}\")"
]
},
{
@@ -69,26 +76,157 @@
"\n",
"**How**: \n",
"- Have a `CLAUDE.md` file in the working directory - in our example: `chief_of_staff_agent/CLAUDE.md`\n",
- "- Set the `cwd` argument of your ClaudeSDKClient to point to directory of your CLAUDE.md file"
+ "- Set the `cwd` argument of your ClaudeSDKClient to point to directory of your CLAUDE.md file\n",
+ "- Use explicit prompts to guide the agent when you want it to prefer high-level context over detailed data files\n",
+ "\n",
+ "**Important Behavior Note**: When both CLAUDE.md and detailed data files (like CSVs) are available, the agent may prefer to read the more granular data sources to provide precise answers. This is expected behavior - agents naturally seek authoritative data. To ensure the agent uses high-level CLAUDE.md context, use explicit prompt instructions (see example below). This teaches an important lesson: CLAUDE.md provides *context and guidance*, not hard constraints on data sources."
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 2,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "๐ค Thinking...\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "Agent Response
Based on the company information, TechStart Inc's current runway is 20 months (until September 2025).
\n",
+ "
Here are the key financial details:
\n",
+ "- Cash in Bank: $10M
\n",
+ "- Monthly Burn Rate: ~$500,000
\n",
+ "- Runway: 20 months
\n",
+ "
This gives you a solid runway to execute on Q2 2024 priorities, including hiring 10 engineers, launching the AI code review feature, European expansion, and beginning Series B conversations (targeting $30M).
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
"source": [
+ "messages = []\n",
"async with ClaudeSDKClient(\n",
" options=ClaudeAgentOptions(\n",
- " model=\"claude-sonnet-4-5\",\n",
+ " model=MODEL,\n",
" cwd=\"chief_of_staff_agent\", # Points to subdirectory with our CLAUDE.md\n",
+ " setting_sources=[\"project\"],\n",
" )\n",
") as agent:\n",
" await agent.query(\"What's our current runway?\")\n",
" async for msg in agent.receive_response():\n",
- " if hasattr(msg, \"result\"):\n",
- " print(msg.result)\n",
- "# The agent should know from the CLAUDE.md file: $500K burn, 20 months runway"
+ " print_activity(msg)\n",
+ " messages.append(msg)\n",
+ "\n",
+ "# Display the response with HTML rendering\n",
+ "display_agent_response(messages)\n",
+ "# With this prompt, the agent should use CLAUDE.md values: ~$500K burn, 20 months runway"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Understanding Agent Data Source Preferences\n",
+ "\n",
+ "**What Just Happened:**\n",
+ "By adding to our prompt, we guided the agent to rely on the CLAUDE.md context rather than seeking more granular data from CSV files.\n",
+ "\n",
+ "**Key Insights:**\n",
+ "1. **CLAUDE.md as Context, Not Constraint**: When you set `cwd`, the CLAUDE.md file is loaded as background context. However, agents will naturally seek the most authoritative data sources available. If detailed CSV files exist, the agent may prefer them for precision.\n",
+ "\n",
+ "2. **Prompt Engineering Matters**: The phrasing \"high-level financial numbers from context\" signals to the agent that you want the simplified executive summary from CLAUDE.md ($500K burn, 20 months runway) rather than the precise month-by-month data from financial_data/burn_rate.csv ($525K gross, $235K net burn).\n",
+ "\n",
+ "3. **Architectural Design Choice**: This behavior is actually desirable in production systems - you want agents to find the best data source. CLAUDE.md should contain:\n",
+ " - High-level context and strategy\n",
+ " - Company information and standards\n",
+ " - Pointers to where detailed data lives\n",
+ " - Guidelines on when to use high-level vs. detailed numbers\n",
+ "\n",
+ "4. **Real-World Pattern**: Think of CLAUDE.md as an \"onboarding document\" that orients the agent, while detailed files are \"source systems\" the agent can query when precision matters."
]
},
{
@@ -111,13 +249,152 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 3,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "๐ค Using: Glob()\n",
+ "๐ค Using: Glob()\n",
+ "๐ค Using: Glob()\n",
+ "โ Tool completed\n",
+ "โ Tool completed\n",
+ "โ Tool completed\n",
+ "๐ค Thinking...\n",
+ "๐ค Using: Read()\n",
+ "โ Tool completed\n",
+ "๐ค Thinking...\n",
+ "๐ค Using: Bash()\n",
+ "โ Tool completed\n",
+ "๐ค Thinking...\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "Agent Response
Here are the financial metrics calculated using the simple calculation script:
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "| Metric | \n",
+ "Value | \n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ "| Total Runway | \n",
+ "$2,904,829.00 | \n",
+ "
\n",
+ "\n",
+ "| Monthly Burn | \n",
+ "$121,938.00 | \n",
+ "
\n",
+ "\n",
+ "| Runway Months | \n",
+ "~23.82 months | \n",
+ "
\n",
+ "\n",
+ "| Quarterly Burn | \n",
+ "$365,814.00 | \n",
+ "
\n",
+ "\n",
+ "| Daily Burn Rate | \n",
+ "$4,064.60 | \n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
Based on these calculations, with a total runway of $2,904,829 and a monthly burn rate of $121,938, you have approximately 23.8 months (just under 2 years) of runway remaining.
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
"source": [
+ "messages = []\n",
"async with ClaudeSDKClient(\n",
" options=ClaudeAgentOptions(\n",
- " model=\"claude-sonnet-4-5\",\n",
+ " model=MODEL,\n",
" allowed_tools=[\"Bash\", \"Read\"],\n",
" cwd=\"chief_of_staff_agent\", # Points to subdirectory where our agent is defined\n",
" )\n",
@@ -127,9 +404,10 @@
" )\n",
" async for msg in agent.receive_response():\n",
" print_activity(msg)\n",
- " if hasattr(msg, \"result\"):\n",
- " print(\"\\n\")\n",
- " print(msg.result)"
+ " messages.append(msg)\n",
+ "\n",
+ "# Display the response with HTML rendering\n",
+ "display_agent_response(messages)"
]
},
{
@@ -145,12 +423,14 @@
"**How**:\n",
"- Configure a markdown file per style in `chief_of_staff_agent/.claude/output-styles/`. For example, check out the Executive Ouput style in `.claude/output-styles/executive.md`. Output styles are defined with a simple frontmatter including two fields: name and description. Note: Make sure the name in the frontmatter matches exactly the file's name (case sensitive)\n",
"\n",
- "> **IMPORTANT**: Output styles modify the system prompt that Claude Code has underneath, leaving out the parts focused on software engineering and giving you more control for your specific use case beyond software engineering work."
+ "> **IMPORTANT**: Output styles modify the system prompt that Claude Code has underneath, leaving out the parts focused on software engineering and giving you more control for your specific use case beyond software engineering work.\n",
+ "\n",
+ "> **SDK CONFIGURATION NOTE**: Similar to slash commands (covered in Feature 4), output styles are stored on the filesystem in `.claude/output-styles/`. For the SDK to load these files, you **must** include `setting_sources=[\"project\"]` in your `ClaudeAgentOptions`. The `settings` parameter tells the SDK *which* style to use, but `setting_sources` is required to actually *load* the style definitions. This requirement was identified while debugging later sections and applies to all filesystem-based settings."
]
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 4,
"metadata": {},
"outputs": [
{
@@ -166,9 +446,12 @@
"messages_executive = []\n",
"async with ClaudeSDKClient(\n",
" options=ClaudeAgentOptions(\n",
- " model=\"claude-sonnet-4-5\",\n",
+ " model=MODEL,\n",
" cwd=\"chief_of_staff_agent\",\n",
" settings='{\"outputStyle\": \"executive\"}',\n",
+ " # IMPORTANT: setting_sources must include \"project\" to load output styles from .claude/output-styles/\n",
+ " # Without this, the SDK does NOT load filesystem settings (output styles, slash commands, etc.)\n",
+ " setting_sources=[\"project\"],\n",
" )\n",
") as agent:\n",
" await agent.query(\"Tell me in two sentences about your writing output style.\")\n",
@@ -179,9 +462,10 @@
"messages_technical = []\n",
"async with ClaudeSDKClient(\n",
" options=ClaudeAgentOptions(\n",
- " model=\"claude-sonnet-4-5\",\n",
+ " model=MODEL,\n",
" cwd=\"chief_of_staff_agent\",\n",
" settings='{\"outputStyle\": \"technical\"}',\n",
+ " setting_sources=[\"project\"],\n",
" )\n",
") as agent:\n",
" await agent.query(\"Tell me in two sentences about your writing output style.\")\n",
@@ -192,20 +476,198 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 5,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "Agent Response
My writing style is direct, concise, and professionalโI avoid unnecessary filler and get straight to actionable insights. I adapt my tone based on context: more formal for strategic recommendations and board-level communications, more conversational for quick operational questions.
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
"source": [
- "print(messages_executive[-1].result)"
+ "# Display executive style response\n",
+ "display_agent_response(messages_executive)"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 6,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "Technical Style
My writing style is direct, clear, and professionalโI provide concise answers without unnecessary filler while ensuring the information is complete and actionable. I adapt my tone to the context, being more formal for business analysis and more conversational for general questions, and I use formatting (like bullet points, headers, or code blocks) when it helps organize complex information.
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
"source": [
- "print(messages_technical[-1].result)"
+ "# Technical output style - detailed, implementation-focused\n",
+ "display_agent_response(messages_technical, title=\"Technical Style\")"
]
},
{
@@ -220,9 +682,307 @@
"\n",
"**How**: Just set `permission_mode=\"plan\"`\n",
"\n",
+ "**Plan Persistence**: Since plans are valuable artifacts for review and decision-making, we'll demonstrate how to capture and save them to persistent markdown files. This enables stakeholders to review plans before approving execution.\n",
+ "\n",
"> Note: this feature shines in Claude Code but still needs to be fully adapted for headless applications with the SDK. Namely, the agent will try calling its `ExitPlanMode()` tool, which is only relevant in the interactive mode. In this case, you can send up a follow-up query with `continue_conversation=True` for the agent to execute its plan in context."
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "โ
Plan Mode helper functions loaded\n"
+ ]
+ }
+ ],
+ "source": [
+ "# =============================================================================\n",
+ "# Plan Mode Helper Functions\n",
+ "# =============================================================================\n",
+ "# These utilities handle the various ways an agent might output its plan.\n",
+ "# Since agents can output plans via direct text, Write tool, or Claude's\n",
+ "# internal plan directory, we need robust extraction from multiple sources.\n",
+ "\n",
+ "import glob as glob_module\n",
+ "import os\n",
+ "import re\n",
+ "from datetime import datetime\n",
+ "from pathlib import Path\n",
+ "from typing import Any\n",
+ "\n",
+ "\n",
+ "def extract_plan_from_xml(text: str | None, min_length: int = 200) -> str | None:\n",
+ " \"\"\"\n",
+ " Extract content between tags from text.\n",
+ "\n",
+ " Args:\n",
+ " text: The text to search for plan content\n",
+ " min_length: Minimum character count for valid plan (prevents empty matches)\n",
+ "\n",
+ " Returns:\n",
+ " Extracted plan content, or None if not found/too short\n",
+ " \"\"\"\n",
+ " if not text:\n",
+ " return None\n",
+ " match = re.search(r\"(.*?)\", text, re.DOTALL)\n",
+ " if match:\n",
+ " extracted = match.group(1).strip()\n",
+ " if len(extracted) > min_length:\n",
+ " return extracted\n",
+ " return None\n",
+ "\n",
+ "\n",
+ "def extract_plan_from_messages(\n",
+ " plan_content: list[str], min_fallback_length: int = 500\n",
+ ") -> tuple[str | None, str | None]:\n",
+ " \"\"\"\n",
+ " Try to extract plan from captured message stream content.\n",
+ "\n",
+ " Args:\n",
+ " plan_content: List of text blocks captured during streaming\n",
+ " min_fallback_length: Minimum length for fallback (no XML tags)\n",
+ "\n",
+ " Returns:\n",
+ " Tuple of (plan_text, source_description)\n",
+ " \"\"\"\n",
+ " combined_text = \"\\n\\n\".join(plan_content)\n",
+ "\n",
+ " # First try: XML tags\n",
+ " plan = extract_plan_from_xml(combined_text)\n",
+ " if plan:\n",
+ " return plan, \"message stream\"\n",
+ "\n",
+ " # Fallback: Use raw content if substantial\n",
+ " if len(combined_text.strip()) > min_fallback_length:\n",
+ " return combined_text.strip(), \"full message content (fallback)\"\n",
+ "\n",
+ " return None, None\n",
+ "\n",
+ "\n",
+ "def extract_plan_from_write_tool(\n",
+ " write_contents: list[str], min_fallback_length: int = 500\n",
+ ") -> tuple[str | None, str | None]:\n",
+ " \"\"\"\n",
+ " Try to extract plan from captured Write tool calls.\n",
+ "\n",
+ " Args:\n",
+ " write_contents: List of content strings from Write tool calls\n",
+ " min_fallback_length: Minimum length for fallback (no XML tags)\n",
+ "\n",
+ " Returns:\n",
+ " Tuple of (plan_text, source_description)\n",
+ " \"\"\"\n",
+ " for content in write_contents:\n",
+ " # Try XML extraction first\n",
+ " plan = extract_plan_from_xml(content)\n",
+ " if plan:\n",
+ " return plan, \"Write tool capture\"\n",
+ "\n",
+ " # Fallback: substantial content without tags\n",
+ " if content and len(content.strip()) > min_fallback_length:\n",
+ " return content.strip(), \"Write tool capture (no XML tags)\"\n",
+ "\n",
+ " return None, None\n",
+ "\n",
+ "\n",
+ "def extract_plan_from_claude_dir(\n",
+ " max_age_seconds: int = 300, min_fallback_length: int = 500\n",
+ ") -> tuple[str | None, str | None]:\n",
+ " \"\"\"\n",
+ " Check Claude's internal plan directory for recently created plans.\n",
+ "\n",
+ " Args:\n",
+ " max_age_seconds: Maximum age of plan file to consider (default: 5 minutes)\n",
+ " min_fallback_length: Minimum length for fallback (no XML tags)\n",
+ "\n",
+ " Returns:\n",
+ " Tuple of (plan_text, source_description)\n",
+ " \"\"\"\n",
+ " claude_plans_dir = os.path.expanduser(\"~/.claude/plans\")\n",
+ "\n",
+ " if not os.path.exists(claude_plans_dir):\n",
+ " return None, None\n",
+ "\n",
+ " # Find most recent plan file\n",
+ " plan_files = sorted(\n",
+ " glob_module.glob(os.path.join(claude_plans_dir, \"*.md\")),\n",
+ " key=os.path.getmtime,\n",
+ " reverse=True,\n",
+ " )\n",
+ "\n",
+ " if not plan_files:\n",
+ " return None, None\n",
+ "\n",
+ " most_recent = plan_files[0]\n",
+ " file_age = datetime.now().timestamp() - os.path.getmtime(most_recent)\n",
+ "\n",
+ " if file_age > max_age_seconds:\n",
+ " return None, None\n",
+ "\n",
+ " with open(most_recent) as f:\n",
+ " content = f.read()\n",
+ "\n",
+ " filename = os.path.basename(most_recent)\n",
+ "\n",
+ " # Try XML extraction first\n",
+ " plan = extract_plan_from_xml(content)\n",
+ " if plan:\n",
+ " return plan, f\"Claude plan file ({filename})\"\n",
+ "\n",
+ " # Fallback: substantial content without tags\n",
+ " if len(content.strip()) > min_fallback_length:\n",
+ " return content.strip(), f\"Claude plan file ({filename}, no XML tags)\"\n",
+ "\n",
+ " return None, None\n",
+ "\n",
+ "\n",
+ "def save_plan_to_file(\n",
+ " plan_content: str,\n",
+ " plan_source: str,\n",
+ " model_name: str,\n",
+ " prompt_summary: str,\n",
+ " output_dir: str = \"chief_of_staff_agent/plans\",\n",
+ " title: str = \"Agent Plan: Engineering Restructure for AI Focus\",\n",
+ ") -> Path:\n",
+ " \"\"\"\n",
+ " Save extracted plan to a timestamped markdown file.\n",
+ "\n",
+ " Args:\n",
+ " plan_content: The plan text to save\n",
+ " plan_source: Description of where plan was extracted from\n",
+ " model_name: The model used to generate the plan\n",
+ " prompt_summary: Brief description of the original prompt\n",
+ " output_dir: Directory to save plan files\n",
+ " title: Title for the plan document\n",
+ "\n",
+ " Returns:\n",
+ " Path to the saved plan file\n",
+ " \"\"\"\n",
+ " plans_dir = Path(output_dir)\n",
+ " plans_dir.mkdir(exist_ok=True)\n",
+ "\n",
+ " timestamp = datetime.now().strftime(\"%Y%m%d_%H%M%S\")\n",
+ " plan_file = plans_dir / f\"plan_{timestamp}.md\"\n",
+ "\n",
+ " with open(plan_file, \"w\") as f:\n",
+ " f.write(f\"# {title}\\n\\n\")\n",
+ " f.write(f\"**Created:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\\n\")\n",
+ " f.write(f\"**Prompt:** {prompt_summary}\\n\")\n",
+ " f.write(f\"**Model:** {model_name}\\n\")\n",
+ " f.write(f\"**Plan Source:** {plan_source}\\n\\n\")\n",
+ " f.write(\"---\\n\\n\")\n",
+ " f.write(plan_content)\n",
+ " f.write(\"\\n\\n---\\n\\n\")\n",
+ " f.write(\"*This plan was generated in plan mode and has not been executed.*\\n\")\n",
+ "\n",
+ " return plan_file\n",
+ "\n",
+ "\n",
+ "def capture_message_content(\n",
+ " msg: Any,\n",
+ " plan_content: list[str],\n",
+ " write_tool_content: list[str],\n",
+ " write_tool_paths: list[str],\n",
+ ") -> None:\n",
+ " \"\"\"\n",
+ " Process a streaming message and capture relevant plan content.\n",
+ "\n",
+ " This function extracts content from three potential sources:\n",
+ " 1. Text blocks in message content\n",
+ " 2. Write tool call parameters\n",
+ " 3. Final result attribute\n",
+ "\n",
+ " Args:\n",
+ " msg: The message object from the agent stream\n",
+ " plan_content: List to append text content to\n",
+ " write_tool_content: List to append Write tool content to\n",
+ " write_tool_paths: List to append Write tool file paths to\n",
+ " \"\"\"\n",
+ " # Source 1: Text blocks from message content\n",
+ " if hasattr(msg, \"content\"):\n",
+ " for block in msg.content:\n",
+ " if hasattr(block, \"text\"):\n",
+ " plan_content.append(block.text)\n",
+ "\n",
+ " # Source 2: Write tool calls\n",
+ " if hasattr(block, \"type\") and block.type == \"tool_use\":\n",
+ " if hasattr(block, \"name\") and block.name == \"Write\":\n",
+ " if hasattr(block, \"input\") and isinstance(block.input, dict):\n",
+ " if \"content\" in block.input:\n",
+ " write_tool_content.append(block.input[\"content\"])\n",
+ " if \"file_path\" in block.input:\n",
+ " write_tool_paths.append(block.input[\"file_path\"])\n",
+ "\n",
+ " # Source 3: Final result\n",
+ " if hasattr(msg, \"result\") and msg.result:\n",
+ " plan_content.append(msg.result)\n",
+ "\n",
+ "\n",
+ "print(\"โ
Plan Mode helper functions loaded\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "๐ Plan Mode configured with model: claude-opus-4-5\n",
+ "๐ Prompt length: 1,180 characters\n"
+ ]
+ }
+ ],
+ "source": [
+ "# =============================================================================\n",
+ "# Plan Mode Configuration\n",
+ "# =============================================================================\n",
+ "\n",
+ "# Note: MODEL is defined in cell-0 as \"claude-opus-4-5\"\n",
+ "# Opus excels at complex planning tasks\n",
+ "\n",
+ "# The prompt is carefully crafted to:\n",
+ "# 1. Provide explicit context (since Opus prefers explicit information)\n",
+ "# 2. Request XML-tagged output for reliable extraction\n",
+ "# 3. Prevent file-writing so we can capture the plan programmatically\n",
+ "\n",
+ "PLAN_PROMPT = \"\"\"Restructure our engineering team for AI focus.\n",
+ "\n",
+ "**CONTEXT (from CLAUDE.md):**\n",
+ "You are the Chief of Staff for TechStart Inc, a 50-person B2B SaaS startup that raised $10M Series A.\n",
+ "- Current engineering team: 25 people (Backend: 12, Frontend: 8, DevOps: 5)\n",
+ "- Monthly burn rate: ~$500K, Runway: 20 months\n",
+ "- Senior Engineer compensation: $180K-$220K + equity\n",
+ "\n",
+ "**CRITICAL OUTPUT INSTRUCTIONS:**\n",
+ "\n",
+ "1. **DO NOT use the Write tool** - Output your plan directly in your response text\n",
+ "2. **DO NOT save to any files** - I will handle saving the plan myself\n",
+ "3. **Wrap your ENTIRE plan inside ` ` XML tags** in your response\n",
+ "\n",
+ "**Required Format:**\n",
+ "\n",
+ "[Your complete restructuring plan here - include all sections, timelines, budgets, and recommendations]\n",
+ "\n",
+ "\n",
+ "**IMPORTANT:**\n",
+ "- The plan content MUST appear directly in your response between the XML tags\n",
+ "- Do NOT use Write, Edit, or any file-saving tools\n",
+ "- You may research and analyze before outputting, but the final plan must be in your response text\n",
+ "- Include: team structure, hiring recommendations, timeline, budget impact, and success metrics\n",
+ "- Use the company context provided above - do NOT ask clarifying questions\"\"\"\n",
+ "\n",
+ "print(f\"๐ Plan Mode configured with model: {MODEL}\")\n",
+ "print(f\"๐ Prompt length: {len(PLAN_PROMPT):,} characters\")"
+ ]
+ },
{
"cell_type": "code",
"execution_count": 9,
@@ -233,40 +993,45 @@
"output_type": "stream",
"text": [
"๐ค Thinking...\n",
- "๐ค Using: Read()\n",
- "โ Tool completed\n",
- "๐ค Using: Glob()\n",
- "โ Tool completed\n",
- "๐ค Using: Read()\n",
- "โ Tool completed\n",
- "๐ค Using: Glob()\n",
- "โ Tool completed\n",
- "๐ค Using: Read()\n",
- "โ Tool completed\n",
- "๐ค Using: Read()\n",
- "โ Tool completed\n",
- "๐ค Using: Glob()\n",
+ "๐ค Using: ExitPlanMode()\n",
"โ Tool completed\n",
"๐ค Thinking...\n",
- "๐ค Using: ExitPlanMode()\n",
- "โ Tool completed\n"
+ "\n",
+ "โ
Agent completed. Captured 3 content blocks.\n"
]
}
],
"source": [
+ "# =============================================================================\n",
+ "# Execute Plan Mode Agent\n",
+ "# =============================================================================\n",
+ "# Run the agent with plan mode enabled. The agent will create a detailed plan\n",
+ "# but won't execute any actions. We capture content from multiple sources\n",
+ "# to handle different agent behaviors.\n",
+ "\n",
+ "# Initialize capture lists\n",
"messages = []\n",
- "async with (\n",
- " ClaudeSDKClient(\n",
- " options=ClaudeAgentOptions(\n",
- " model=\"claude-opus-4-1\", # We're using Opus for this as Opus truly shines when it comes to planning!\n",
- " permission_mode=\"plan\",\n",
- " )\n",
- " ) as agent\n",
- "):\n",
- " await agent.query(\"Restructure our engineering team for AI focus.\")\n",
+ "plan_content = [] # Text from message stream\n",
+ "write_tool_content = [] # Content from Write tool calls\n",
+ "write_tool_paths = [] # Paths from Write tool calls\n",
+ "\n",
+ "# Run the agent in plan mode\n",
+ "async with ClaudeSDKClient(\n",
+ " options=ClaudeAgentOptions(\n",
+ " model=MODEL,\n",
+ " permission_mode=\"plan\",\n",
+ " cwd=\"chief_of_staff_agent\",\n",
+ " )\n",
+ ") as agent:\n",
+ " await agent.query(PLAN_PROMPT)\n",
" async for msg in agent.receive_response():\n",
" print_activity(msg)\n",
- " messages.append(msg)"
+ " messages.append(msg)\n",
+ "\n",
+ " # Capture content from this message\n",
+ " capture_message_content(msg, plan_content, write_tool_content, write_tool_paths)\n",
+ "\n",
+ "print(f\"\\nโ
Agent completed. Captured {len(plan_content)} content blocks.\")"
]
},
{
@@ -278,19 +1043,482 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "\n"
+ "โ
Plan extracted from: message stream\n",
+ " Plan length: 6,783 characters\n",
+ "\n",
+ "๐ Plan saved to: chief_of_staff_agent/plans/plan_20251204_152737.md\n"
]
}
],
"source": [
- "print(messages[-1].result)"
+ "# =============================================================================\n",
+ "# Extract and Save the Plan\n",
+ "# =============================================================================\n",
+ "# Try multiple sources in priority order to find the plan content.\n",
+ "# This handles different agent behaviors robustly.\n",
+ "\n",
+ "final_plan = None\n",
+ "plan_source = None\n",
+ "\n",
+ "# Priority 1: Message stream (preferred - direct from agent response)\n",
+ "final_plan, plan_source = extract_plan_from_messages(plan_content)\n",
+ "\n",
+ "# Priority 2: Write tool captures (if agent saved despite instructions)\n",
+ "if not final_plan and write_tool_content:\n",
+ " final_plan, plan_source = extract_plan_from_write_tool(write_tool_content)\n",
+ "\n",
+ "# Priority 3: Claude's internal plan directory (safety net)\n",
+ "if not final_plan:\n",
+ " final_plan, plan_source = extract_plan_from_claude_dir()\n",
+ "\n",
+ "# Report results\n",
+ "if final_plan:\n",
+ " print(f\"โ
Plan extracted from: {plan_source}\")\n",
+ " print(f\" Plan length: {len(final_plan):,} characters\")\n",
+ "\n",
+ " # Save to file\n",
+ " plan_file = save_plan_to_file(\n",
+ " plan_content=final_plan,\n",
+ " plan_source=plan_source,\n",
+ " model_name=MODEL,\n",
+ " prompt_summary=\"Restructure our engineering team for AI focus.\",\n",
+ " )\n",
+ " print(f\"\\n๐ Plan saved to: {plan_file}\")\n",
+ "else:\n",
+ " error_msg = \"Could not extract plan content from any source!\\n\"\n",
+ " error_msg += \" Sources checked: message stream, Write tool, ~/.claude/plans/\"\n",
+ " if write_tool_paths:\n",
+ " error_msg += f\"\\n Write tool attempted to save to: {write_tool_paths}\"\n",
+ " print(f\"โ ERROR: {error_msg}\")\n",
+ " raise RuntimeError(f\"Plan extraction failed: {error_msg}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "Engineering Restructure Plan
I've provided the complete engineering team restructuring plan directly in my response within the <plan> tags as requested. The plan includes:
\n",
+ "
\n",
+ "- Team structure changes - From 25 to 29 engineers with 2 new AI-focused teams
\n",
+ "- Hiring roadmap - 6 new hires over 8 months, prioritized by criticality
\n",
+ "- Internal transfers - 4 engineers upskilled from existing teams
\n",
+ "- 12-month timeline - Phased implementation with clear milestones
\n",
+ "- Budget analysis - Monthly burn increases from ~$380K to ~$537K
\n",
+ "- Success metrics - Concrete KPIs for 6 and 12-month checkpoints
\n",
+ "- Risk mitigation - Key risks identified with mitigation strategies
\n",
+ "
\n",
+ "
Ready to proceed when you've reviewed the plan!
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# Display the plan result with styled HTML\n",
+ "display_agent_response(messages, title=\"Engineering Restructure Plan\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "As mentioned above, the agent will stop after creating its plan, if you want it to execute on its plan, you need to send a new query with `continue_conversation=True` and removing `permission_mode=\"plan\"` "
+ "#### Executing the Saved Plan\n",
+ "\n",
+ "As mentioned above, the agent will stop after creating its plan. The saved plan file serves as a review artifact for stakeholders.\n",
+ "\n",
+ "**To execute the plan after review:**\n",
+ "1. Review the saved plan in `chief_of_staff_agent/plans/plan_*.md`\n",
+ "2. If approved, send a new query with `continue_conversation=True` and remove `permission_mode=\"plan\"` to execute\n",
+ "\n",
+ "This workflow enables a \"plan โ review โ approve โ execute\" cycle, perfect for high-stakes decisions like organizational restructuring or major infrastructure changes."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### How Plan Persistence Works\n",
+ "\n",
+ "In the code above, we implemented a **robust multi-source plan capture mechanism** that handles the various ways Plan Mode agents may output their plans:\n",
+ "\n",
+ "**The Challenge:**\n",
+ "When using `permission_mode=\"plan\"`, the agent may output the plan in different ways:\n",
+ "1. **Direct text output** in the message stream (ideal case)\n",
+ "2. **Write tool** to save to `~/.claude/plans/` (Claude's internal plan system)\n",
+ "3. **Write tool** to save to a custom path\n",
+ "\n",
+ "Our capture mechanism handles all three scenarios with a **priority-based fallback system**:\n",
+ "\n",
+ "**Source Priority (in order):**\n",
+ "\n",
+ "1. **Message Stream** (Preferred)\n",
+ " - Capture text blocks from `msg.content` during streaming\n",
+ " - Extract content between `` XML tags\n",
+ " - This is the cleanest approach as content comes directly from the response\n",
+ "\n",
+ "2. **Write Tool Capture**\n",
+ " - Monitor for Write tool calls in the message stream\n",
+ " - Extract the `content` parameter being written\n",
+ " - Useful when the agent decides to save despite prompt instructions\n",
+ "\n",
+ "3. **Claude's Internal Plan Directory**\n",
+ " - Check `~/.claude/plans/` for recently created plan files (within 5 minutes)\n",
+ " - Read and extract content from the most recent file\n",
+ " - Acts as a safety net when other methods fail\n",
+ "\n",
+ "4. **Full Content Fallback**\n",
+ " - If no XML tags found but substantial content exists (>500 chars), use it directly\n",
+ " - Prevents empty plan files while preserving partial information\n",
+ "\n",
+ "**Key Implementation Details:**\n",
+ "\n",
+ "```python\n",
+ "def extract_plan_from_text(text):\n",
+ " \"\"\"Extract content between tags, return None if not found or empty.\"\"\"\n",
+ " match = re.search(r'(.*?)', text, re.DOTALL)\n",
+ " if match:\n",
+ " extracted = match.group(1).strip()\n",
+ " # Validate minimum content length (a real plan should be substantial)\n",
+ " if len(extracted) > 200:\n",
+ " return extracted\n",
+ " return None\n",
+ "```\n",
+ "\n",
+ "**Why Content Validation Matters:**\n",
+ "- Previous versions could produce empty plan files if extraction \"succeeded\" with no content\n",
+ "- We now require a minimum of 200 characters for XML-tagged content\n",
+ "- This prevents false positives where regex matches empty or trivial content\n",
+ "\n",
+ "**Prompt Engineering for Direct Output:**\n",
+ "The prompt explicitly instructs the agent:\n",
+ "- **DO NOT use the Write tool** - prevents file-system detours\n",
+ "- **Output directly in response** - ensures content flows through message stream\n",
+ "- **Use XML tags** - enables clean extraction from potentially verbose responses\n",
+ "\n",
+ "This approach gives you:\n",
+ "- **Reliability**: Plans are captured regardless of agent behavior\n",
+ "- **Transparency**: The saved file indicates which source was used\n",
+ "- **Audit Trail**: History of all plans with timestamps and source metadata\n",
+ "- **Debugging**: Clear error messages when extraction fails\n",
+ "\n",
+ "Let's view the saved plan:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/markdown": [
+ "## ๐ Saved Plan Preview\n",
+ "\n",
+ "# TechStart Inc. Engineering Team Restructuring Plan for AI Focus\n",
+ "\n",
+ "## Executive Summary\n",
+ "Restructure the 25-person engineering team to build AI/ML capabilities while maintaining core product development. This plan balances immediate hiring needs with internal upskilling and strategic reorganization over a 12-month period.\n",
+ "\n",
+ "---\n",
+ "\n",
+ "## Current State Assessment\n",
+ "\n",
+ "| Team | Headcount | Current Focus |\n",
+ "|------|-----------|---------------|\n",
+ "| Backend | 12 | Core product, APIs, infrastructure |\n",
+ "| Frontend | 8 | Web/mobile interfaces |\n",
+ "| DevOps | 5 | CI/CD, cloud infrastructure |\n",
+ "| **Total** | **25** | |\n",
+ "\n",
+ "**Key Constraints:**\n",
+ "- Monthly burn: ~$500K | Runway: 20 months\n",
+ "- Senior Engineer comp: $180K-$220K + equity\n",
+ "- Series A stage - need to show growth metrics\n",
+ "\n",
+ "---\n",
+ "\n",
+ "## Proposed Future State (Month 12)\n",
+ "\n",
+ "### New Team Structure\n",
+ "\n",
+ "| Team | Current | Future | Change |\n",
+ "|------|---------|--------|--------|\n",
+ "| Backend/Core | 12 | 8 | -4 |\n",
+ "| Frontend | 8 | 6 | -2 |\n",
+ "| DevOps/MLOps | 5 | 6 | +1 |\n",
+ "| **AI/ML Platform** | 0 | 5 | +5 |\n",
+ "| **AI Product** | 0 | 4 | +4 |\n",
+ "| **Total** | **25** | **29** | **+4** |\n",
+ "\n",
+ "### New Teams Created\n",
+ "\n",
+ "**1. AI/ML Platform Team (5 engineers)**\n",
+ "- Focus: Infrastructure, model training pipelines, ML tooling\n",
+ "- Composition: 2 new hires (ML Engineers) + 2 internal transfers (Backend) + 1 internal (DevOps)\n",
+ "- Lead: New hire - Senior ML Engineer ($200-220K)\n",
+ "\n",
+ "**2. AI Product Team (4 engineers)**\n",
+ "- Focus: AI features, integrations, user-facing ML applications\n",
+ "- Composition: 2 new hires (ML/AI specialists) + 1 internal (Backend) + 1 internal (Frontend)\n",
+ "- Lead: Promoted internal senior engineer + AI upskilling\n",
+ "\n",
+ "---\n",
+ "\n",
+ "## Hiring Plan\n",
+ "\n",
+ "### New Positions (6 total new hires)\n",
+ "\n",
+ "| Role | Priority | Timeline | Comp Range | Notes |\n",
+ "|------|----------|----------|------------|-------|\n",
+ "| Senior ML Engineer (Lead) | P0 | Month 1-2 | $200-220K | Team lead, architecture |\n",
+ "| ML Engineer | P0 | Month 2-3 | $180-200K | Platform focus |\n",
+ "| ML Engineer | P1 | Month 3-4 | $180-200K | Product focus |\n",
+ "| AI/ML Engineer | P1 | Month 4-5 | $170-190K | Generalist |\n",
+ "| MLOps Engineer | P2 | Month 5-6 | $160-180K | DevOps + ML |\n",
+ "| Junior ML Engineer | P2 | Month 6-8 | $130-150K | Growth hire |\n",
+ "\n",
+ "**Total New Hiring Cost:** ~$1.02-1.14M annually\n",
+ "\n",
+ "### Internal Transfers & Upskilling (4 engineers)\n",
+ "\n",
+ "| From Team | # Engineers | New Role | Training Investment |\n",
+ "|-----------|-------------|----------|---------------------|\n",
+ "| Backend | 3 | ML Platform/Product | $15K each (courses, certs) |\n",
+ "| Frontend | 1 | AI Product (UI/UX) | $10K (AI tools training) |\n",
+ "\n",
+ "**Upskilling Budget:** ~$55K total\n",
+ "\n",
+ "---\n",
+ "\n",
+ "## Implementation Timeline\n",
+ "\n",
+ "### Phase 1: Foundation (Months 1-3)\n",
+ "- [ ] Hire Senior ML Engineer (Team Lead) - **Critical first hire**\n",
+ "- [ ] Identify 4 internal transfer candidates based on interest/aptitude\n",
+ "- [ ] Begin ML upskilling program for transfer candidates\n",
+ "- [ ] Set up ML infrastructure foundations (MLOps engineer involvement)\n",
+ "- [ ] Define AI product roadmap with Product team\n",
+ "\n",
+ "### Phase 2: Team Formation (Months 4-6)\n",
+ "- [ ] Complete core AI team hiring (4 of 6 hires)\n",
+ "- [ ] Officially launch AI/ML Platform team\n",
+ "- [ ] Internal transfers complete bootcamp/training\n",
+ "- [ ] First AI feature POC delivered\n",
+ "- [ ] MLOps practices integrated into DevOps workflow\n",
+ "\n",
+ "### Phase 3: Scaling (Months 7-9)\n",
+ "- [ ] Complete remaining hires\n",
+ "- [ ] AI Product team fully operational\n",
+ "- [ ] First AI feature in production\n",
+ "- [ ] Cross-team collaboration patterns established\n",
+ "- [ ] Model monitoring and observability in place\n",
+ "\n",
+ "### Phase 4: Optimization (Months 10-12)\n",
+ "- [ ] Team velocity optimization\n",
+ "- [ ] Evaluate team composition effectiveness\n",
+ "- [ ] Plan for next growth phase\n",
+ "- [ ] Document AI development best practices\n",
+ "\n",
+ "---\n",
+ "\n",
+ "## Budget Impact Analysis\n",
+ "\n",
+ "### Monthly Cost Changes\n",
+ "\n",
+ "| Category | Current | Month 6 | Month 12 |\n",
+ "|----------|---------|---------|----------|\n",
+ "| Engineering Salaries | ~$375K | ~$430K | ~$485K |\n",
+ "| Training/Upskilling | $0 | $9K | $2K |\n",
+ "| AI Tools/Infrastructure | ~$5K | ~$20K | ~$35K |\n",
+ "| Recruiting Costs | Variable | ~$40K | ~$15K |\n",
+ "| **Monthly Total** | **~$380K** | **~$499K** | **~$537K** |\n",
+ "\n",
+ "### Annual Impact Summary\n",
+ "- **Year 1 Additional Investment:** ~$900K-1.1M\n",
+ "- **New Monthly Burn (Month 12):** ~$537K (+$37K from engineering growth)\n",
+ "- **Runway Impact:** Reduces to ~17 months (still healthy for Series A)\n",
+ "\n",
+ "### ROI Considerations\n",
+ "- AI features typically command 20-40% pricing premium in B2B SaaS\n",
+ "- Competitive differentiation in market\n",
+ "- Potential for AI-driven operational efficiencies\n",
+ "\n",
+ "---\n",
+ "\n",
+ "## Risk Mitigation\n",
+ "\n",
+ "| Risk | Likelihood | Impact | Mitigation |\n",
+ "|------|------------|--------|------------|\n",
+ "| Senior ML hire takes >3 months | Medium | High | Engage specialized recruiters early; consider contractor bridge |\n",
+ "| Internal transfers struggle with ML | Low | Medium | Rigorous selection process; extended training runway |\n",
+ "| AI projects don't deliver value | Medium | High | Start with high-impact, lower-complexity features |\n",
+ "| Team culture friction | Low | Medium | Integrate teams gradually; shared goals and rituals |\n",
+ "| Budget overrun | Medium | Medium | Phase hiring based on runway checks quarterly |\n",
+ "\n",
+ "---\n",
+ "\n",
+ "## Success Metrics\n",
+ "\n",
+ "### 6-Month Milestones\n",
+ "- [ ] AI/ML Platform team fully staffed (5 engineers)\n",
+ "- [ ] First AI-powered feature in beta\n",
+ "- [ ] 4 internal engineers completed ML certification\n",
+ "- [ ] ML infrastructure supporting model training/deployment\n",
+ "\n",
+ "### 12-Month Milestones\n",
+ "- [ ] 2+ AI features in production\n",
+ "- [ ] AI team velocity matches core team benchmarks\n",
+ "- [ ] Customer NPS improvement attributable to AI features\n",
+ "- [ ] 15%+ of product roadmap is AI-focused\n",
+ "- [ ] Retention of transferred engineers >90%\n",
+ "\n",
+ "### KPIs to Track\n",
+ "- Time-to-hire for ML roles\n",
+ "- AI feature adoption rates\n",
+ "- Model performance metrics (latency, accuracy)\n",
+ "- Engineering velocity (story points/sprint) across teams\n",
+ "- Employee satisfaction scores (especially transfers)\n",
+ "\n",
+ "---\n",
+ "\n",
+ "## Immediate Next Steps (Week 1-2)\n",
+ "\n",
+ "1. **Executive alignment** - Present plan to leadership for approval\n",
+ "2. **Recruiter engagement** - Brief recruiting on Senior ML Engineer search\n",
+ "3. **Internal survey** - Gauge interest in AI/ML roles among current engineers\n",
+ "4. **Budget approval** - Finance sign-off on increased burn rate\n",
+ "5. **Infrastructure assessment** - DevOps audit of ML infrastructure needs\n",
+ "\n",
+ "---\n",
+ "\n",
+ "## Organizational Chart (Future State)\n",
+ "\n",
+ "```\n",
+ "VP Engineering\n",
+ "โโโ Backend/Core Team (8)\n",
+ "โ โโโ 2 squads ร 4 engineers\n",
+ "โโโ Frontend Team (6)\n",
+ "โ โโโ 2 squads ร 3 engineers\n",
+ "โโโ DevOps/MLOps Team (6)\n",
+ "โ โโโ 4 DevOps + 2 MLOps\n",
+ "โโโ AI/ML Platform Team (5) [NEW]\n",
+ "โ โโโ Lead + 4 engineers\n",
+ "โโโ AI Product Team (4) [NEW]\n",
+ " โโโ Lead + 3 engineers\n",
+ "```\n",
+ "\n",
+ "---\n",
+ "\n",
+ "*Plan prepared for TechStart Inc. Series A stage (~$10M raised, 20-month runway)*\n",
+ "*Recommended review: Quarterly budget and hiring progress checkpoints*"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "๐ Full plan with metadata saved to: chief_of_staff_agent/plans/plan_20251204_152737.md\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Display the saved plan with markdown rendering\n",
+ "from IPython.display import Markdown, display\n",
+ "\n",
+ "# Show the plan with proper markdown formatting\n",
+ "display(Markdown(f\"## ๐ Saved Plan Preview\\n\\n{final_plan}\"))\n",
+ "\n",
+ "print(f\"\\n๐ Full plan with metadata saved to: {plan_file}\")"
]
},
{
@@ -314,13 +1542,15 @@
"\n",
"**How**:\n",
"- Define a markdown file in `.claude/commands/`. For example, we defined one in `.claude/commands/slash-command-test.md`. Notice how the command is defined: frontmatter with two fields (name, description) and the expanded prompt with an option to include arguments passed on in the query.\n",
- "- You can add parameters to your prompt using `{{args}}`\n",
- "- The user uses the slash command in their prompt"
+ "- You can add parameters to your prompt using `$ARGUMENTS` (for full argument string) or `$1`, `$2`, etc. (for positional arguments)\n",
+ "- The user uses the slash command in their prompt\n",
+ "\n",
+ "> **CRITICAL SDK CONFIGURATION**: When using the SDK, you **must** set `setting_sources=[\"project\"]` in your `ClaudeAgentOptions` for slash commands to work. By default, the SDK operates in isolation mode and does NOT load filesystem settings (slash commands, CLAUDE.md, subagents, hooks, etc.). This is different from using Claude Code interactively where these are loaded automatically."
]
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 16,
"metadata": {},
"outputs": [
{
@@ -338,7 +1568,13 @@
"\n",
"messages = []\n",
"async with ClaudeSDKClient(\n",
- " options=ClaudeAgentOptions(model=\"claude-sonnet-4-5\", cwd=\"chief_of_staff_agent\")\n",
+ " options=ClaudeAgentOptions(\n",
+ " model=MODEL,\n",
+ " cwd=\"chief_of_staff_agent\",\n",
+ " # IMPORTANT: setting_sources must include \"project\" to load slash commands from .claude/commands/\n",
+ " # Without this, the SDK does NOT load filesystem settings (slash commands, CLAUDE.md, etc.)\n",
+ " setting_sources=[\"project\"],\n",
+ " )\n",
") as agent:\n",
" await agent.query(\"/slash-command-test this is a test\")\n",
" async for msg in agent.receive_response():\n",
@@ -348,19 +1584,100 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 17,
"metadata": {},
"outputs": [
{
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "test a is this\n"
- ]
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "Slash Command Result
The sentence \"this is a test\" reversed word-wise is:
\n",
+ "
test a is this
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
}
],
"source": [
- "print(messages[-1].result)"
+ "display_agent_response(messages, title=\"Slash Command Result\")"
]
},
{
@@ -376,72 +1693,223 @@
"**How**:\n",
"- Define hook scripts in `.claude/hooks/` -> _what_ is the behaviour that should be executed when a hook is triggered\n",
"- Define hook configuration in `.claude/settings.local.json` -> _when_ should a hook be triggered\n",
- "- In this case, our hooks are configured to watch specific tool calls (WebSearch, Write, Edit, etc.)\n",
- "- When those tools are called, the hook script either runs first (pre tool use hook) or after (post tool use hook)\n",
+ "- In this case, our hooks are configured to watch specific tool calls (Bash, Write, Edit)\n",
+ "- When those tools are called, the hook script runs after the tool completes (PostToolUse)\n",
+ "\n",
+ "> **SDK CONFIGURATION NOTE**: Hooks configured in `.claude/settings.local.json` require `setting_sources=[\"project\", \"local\"]`. The SDK distinguishes between three setting sources:\n",
+ "> - `\"project\"` โ `.claude/settings.json` (version-controlled, team-shared)\n",
+ "> - `\"local\"` โ `.claude/settings.local.json` (gitignored, local settings like hooks)\n",
+ "> - `\"user\"` โ `~/.claude/settings.json` (global user settings)\n",
+ ">\n",
+ "> Since our hooks are in `settings.local.json`, we must include `\"local\"` in `setting_sources`.\n",
"\n",
"**Example: Report Tracking for Compliance**\n",
"\n",
"A hook to log Write/Edit operations on financial reports for audit and compliance purposes.\n",
- "The hook is defined in `chief_of_staff_agent/.claude/hooks/report-tracker.py` and the logic that enforces it is in `chief_of_staff/.claude/settings.local.json`:\n",
+ "The hook is defined in `chief_of_staff_agent/.claude/hooks/report-tracker.py` and the logic that enforces it is in `chief_of_staff_agent/.claude/settings.local.json`:\n",
"\n",
"\n",
"```json\n",
- " \"hooks\": {\n",
- " \"PostToolUse\": [\n",
- " {\n",
- " \"matcher\": \"Write|Edit\",\n",
- " \"hooks\": [\n",
- " {\n",
- " \"type\": \"command\",\n",
- " \"command\": \"$CLAUDE_PROJECT_DIR/.claude/hooks/report-tracker.py\"\n",
- " }\n",
- " ]\n",
- " }\n",
- " ]\n",
- " }\n",
+ "\"hooks\": {\n",
+ " \"PostToolUse\": [\n",
+ " {\n",
+ " \"matcher\": \"Write\",\n",
+ " \"hooks\": [\n",
+ " {\n",
+ " \"type\": \"command\",\n",
+ " \"command\": \"$CLAUDE_PROJECT_DIR/.claude/hooks/report-tracker.py\"\n",
+ " }\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " \"matcher\": \"Edit\",\n",
+ " \"hooks\": [\n",
+ " {\n",
+ " \"type\": \"command\",\n",
+ " \"command\": \"$CLAUDE_PROJECT_DIR/.claude/hooks/report-tracker.py\"\n",
+ " }\n",
+ " ]\n",
+ " }\n",
+ " ]\n",
+ "}\n",
"```"
]
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "๐ค Thinking...\n",
- "๐ค Using: TodoWrite()\n",
+ "๐ค Using: Bash()\n",
+ "๐ค Using: Bash()\n",
+ "๐ค Using: Bash()\n",
"โ Tool completed\n",
- "๐ค Thinking...\n",
- "๐ค Using: TodoWrite()\n",
"โ Tool completed\n",
+ "โ Tool completed\n",
+ "๐ค Using: Read()\n",
+ "๐ค Using: Read()\n",
+ "๐ค Using: Read()\n",
"๐ค Using: Bash()\n",
"โ Tool completed\n",
- "๐ค Thinking...\n",
- "๐ค Using: TodoWrite()\n",
"โ Tool completed\n",
- "๐ค Using: Bash()\n",
"โ Tool completed\n",
- "๐ค Thinking...\n",
- "๐ค Using: TodoWrite()\n",
"โ Tool completed\n",
+ "๐ค Thinking...\n",
"๐ค Using: Write()\n",
"โ Tool completed\n",
- "๐ค Using: TodoWrite()\n",
- "โ Tool completed\n",
"๐ค Thinking...\n"
]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "Q2 Financial Forecast
I've created a comprehensive Q2 Financial Forecast Report and saved it to the output_reports folder. Here's a summary of what's included:
\n",
+ "
Report Summary
\n",
+ "
๐ Q2 2024 Financial Forecast Report Created
\n",
+ "
Location: /output_reports/Q2_2024_Financial_Forecast_Report.md
\n",
+ "
Key Highlights:
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "| Metric | \n",
+ "Value | \n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ "| Cash in Bank | \n",
+ "$10M | \n",
+ "
\n",
+ "\n",
+ "| Current Runway | \n",
+ "20 months (until Sept 2025) | \n",
+ "
\n",
+ "\n",
+ "| Monthly Burn | \n",
+ "$500K gross / ~$260K net | \n",
+ "
\n",
+ "\n",
+ "| Q2 Total Net Burn | \n",
+ "$740K | \n",
+ "
\n",
+ "\n",
+ "| ARR | \n",
+ "$2.4M (15% MoM growth) | \n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
Positive Trends:
\n",
+ "
\n",
+ "- โ
Net burn decreasing 9.6% month-over-month (from $260K โ $235K)
\n",
+ "- โ
Revenue growth (20.8%) outpacing burn increase (5%)
\n",
+ "- โ
Revenue per employee improving 14%
\n",
+ "
\n",
+ "
Planning Scenarios:
\n",
+ "
\n",
+ "- Conservative: 38-month runway (if revenue holds flat)
\n",
+ "- With 10 new engineers: ~15-month runway
\n",
+ "- Base case with growth: Path to break-even by Q2 2025
\n",
+ "
\n",
+ "
The report includes detailed monthly breakdowns, hiring cost impact analysis, revenue milestones, risk assessment, and strategic recommendations for Series B timing.
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
}
],
"source": [
"messages = []\n",
"async with ClaudeSDKClient(\n",
" options=ClaudeAgentOptions(\n",
- " model=\"claude-sonnet-4-5\",\n",
+ " model=MODEL,\n",
" cwd=\"chief_of_staff_agent\",\n",
" allowed_tools=[\"Bash\", \"Write\", \"Edit\", \"MultiEdit\"],\n",
+ " # IMPORTANT: setting_sources must include BOTH \"project\" AND \"local\" to load hooks\n",
+ " # - \"project\" loads .claude/settings.json (shared settings, CLAUDE.md, slash commands)\n",
+ " # - \"local\" loads .claude/settings.local.json (where hooks are configured)\n",
+ " setting_sources=[\"project\", \"local\"],\n",
" )\n",
") as agent:\n",
" await agent.query(\n",
@@ -451,7 +1919,8 @@
" print_activity(msg)\n",
" messages.append(msg)\n",
"\n",
- "# The hook will track this in audit/report_history.json"
+ "# The hook will track this in audit/report_history.json\n",
+ "display_agent_response(messages, title=\"Q2 Financial Forecast\")"
]
},
{
@@ -477,12 +1946,18 @@
"**How**:\n",
"- Add `\"Task\"` to allowed_tools\n",
"- Use a system prompt to instruct your agent how to delegate tasks (you can also define this its CLAUDE.md more generally)\n",
- "- Create a markdown file for each agent in `.claude/agents/`. For example, check the one for `.claude/agents/financial-analyst.md` and notice how a (sub)agent can be defined with such an easy and intuitive markdown file: frontmatter with three fields (name, description, and tools) and its system prompt. The description is useful for the main chief of staff agent to know when to invoke each subagent."
+ "- Create a markdown file for each agent in `.claude/agents/`. For example, check the one for `.claude/agents/financial-analyst.md` and notice how a (sub)agent can be defined with such an easy and intuitive markdown file: frontmatter with three fields (name, description, and tools) and its system prompt. The description is useful for the main chief of staff agent to know when to invoke each subagent.\n",
+ "\n",
+ "**Visualization Enhancements**: Our `print_activity()` and `visualize_conversation()` utilities have been enhanced to clearly show subagent operations:\n",
+ "- ๐ indicates when a subagent is being delegated to (with the subagent name)\n",
+ "- ๐ indicates tools being used BY the subagent (indented for visual hierarchy)\n",
+ "- Visual separators clearly mark subagent delegation and completion boundaries\n",
+ "- Task descriptions and prompts are shown in the conversation timeline"
]
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 19,
"metadata": {},
"outputs": [
{
@@ -490,51 +1965,451 @@
"output_type": "stream",
"text": [
"๐ค Thinking...\n",
- "๐ค Using: Task()\n",
- "๐ค Using: Bash()\n",
- "๐ค Using: Read()\n",
- "โ Tool completed\n",
- "โ Tool completed\n",
- "๐ค Using: Bash()\n",
- "๐ค Using: Bash()\n",
- "โ Tool completed\n",
- "โ Tool completed\n",
- "๐ค Using: Read()\n",
- "๐ค Using: Read()\n",
- "๐ค Using: Read()\n",
- "โ Tool completed\n",
- "โ Tool completed\n",
- "โ Tool completed\n",
- "๐ค Using: Bash()\n",
- "โ Tool completed\n",
- "๐ค Using: Bash()\n",
- "โ Tool completed\n",
- "โ Tool completed\n",
- "๐ค Thinking...\n"
+ "๐ Delegating to subagent: financial-analyst\n",
+ " โโ Task: Analyze 5 engineer hiring impact\n",
+ " โ Tool completed\n",
+ " ๐ [financial-analyst] Using: Bash()\n",
+ " ๐ [financial-analyst] Using: Read()\n",
+ " ๐ [financial-analyst] Using: Read()\n",
+ " โ Tool completed\n",
+ " โ Tool completed\n",
+ " โ Tool completed\n",
+ " ๐ [financial-analyst] Using: Read()\n",
+ " ๐ [financial-analyst] Using: Bash()\n",
+ " โ Tool completed\n",
+ " โ Tool completed\n",
+ " ๐ [financial-analyst] Using: Bash()\n",
+ " โ Tool completed\n",
+ " ๐ [financial-analyst] Using: Bash()\n",
+ " โ Tool completed\n",
+ " ๐ [financial-analyst] Using: Bash()\n",
+ " โ Tool completed\n",
+ " ๐ [financial-analyst] Using: Bash()\n",
+ " โ Tool completed\n",
+ " โ Tool completed\n",
+ " ๐ [financial-analyst] Thinking...\n"
]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "Hiring Impact Analysis
Financial Impact Analysis: Hiring 5 Engineers
\n",
+ "
Executive Summary
\n",
+ "
Recommendation: โ
PROCEED WITH CAUTION - Use a staged hiring approach with 3 Senior + 2 Junior engineers
\n",
+ "
\n",
+ "
Financial Impact at a Glance
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "| Metric | \n",
+ "Current | \n",
+ "After 5 Hires | \n",
+ "Change | \n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ "| Monthly Burn | \n",
+ "$500,000 | \n",
+ "$575,833 | \n",
+ "+15.2% | \n",
+ "
\n",
+ "\n",
+ "| Runway | \n",
+ "20 months | \n",
+ "17.1 months | \n",
+ "-2.9 months | \n",
+ "
\n",
+ "\n",
+ "| Headcount | \n",
+ "50 | \n",
+ "55 | \n",
+ "+10% | \n",
+ "
\n",
+ "\n",
+ "| Engineering % | \n",
+ "50% | \n",
+ "54.5% | \n",
+ "+4.5% | \n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
One-time costs: ~$165K (recruiting + onboarding)
\n",
+ "
\n",
+ "
Three Scenarios Analyzed
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "| Scenario | \n",
+ "Mix | \n",
+ "New Monthly Burn | \n",
+ "New Runway | \n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ "| A (Recommended) | \n",
+ "3 Senior + 2 Junior | \n",
+ "$575,833 | \n",
+ "17.1 months | \n",
+ "
\n",
+ "\n",
+ "| B | \n",
+ "5 Senior | \n",
+ "$591,665 | \n",
+ "16.6 months | \n",
+ "
\n",
+ "\n",
+ "| C | \n",
+ "2 Senior + 3 Junior | \n",
+ "$567,917 | \n",
+ "17.3 months | \n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
Why This Works โ
\n",
+ "
\n",
+ "- Strong Revenue Growth - 15% MoM ARR growth ($2.4M โ projected $5.5M by Dec 2024) partially offsets increased burn
\n",
+ "- Healthy Runway Post-Hire - 17+ months exceeds the minimum 12-month threshold
\n",
+ "- Series B Alignment - Maintains sufficient runway to close Series B (target Q1 2026)
\n",
+ "- Strategic Necessity - Product launch and European expansion require engineering capacity
\n",
+ "
\n",
+ "
Key Risks โ ๏ธ
\n",
+ "
\n",
+ "- Runway Compression - 2.9-month reduction leaves less buffer
\n",
+ "- Series B Dependency - Must close by Q1 2026 (reduced negotiating flexibility)
\n",
+ "- Revenue Assumptions - Projections depend on continued 15% MoM growth
\n",
+ "
\n",
+ "
\n",
+ "
Recommended Implementation Plan
\n",
+ "
Phase 1 (Immediate): Hire 3 engineers (2 Senior + 1 Junior)
\n",
+ "Phase 2 (30-60 days later): Hire remaining 2 engineers (1 Senior + 1 Junior)
\n",
+ "
Financial Guardrails:
\n",
+ "- Maintain minimum 15-month runway at all times
\n",
+ "- Pause hiring if revenue growth drops below 12% MoM
\n",
+ "- Begin Series B conversations by August 2024
\n",
+ "
\n",
+ "
Bottom Line
\n",
+ "
The hire is financially prudent given your strong growth trajectory and healthy runway buffer. However, it's critical to:
\n",
+ "1. Execute staged hiring to de-risk
\n",
+ "2. Maintain revenue momentum
\n",
+ "3. Stay on track with Series B timeline
\n",
+ "
Confidence Level: 75% โ Proceed, but with active monitoring and clear guardrails.
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
}
],
"source": [
+ "# Reset the subagent tracking context before starting a new query\n",
+ "# This ensures clean state for activity display\n",
+ "reset_activity_context()\n",
+ "\n",
"messages = []\n",
"async with ClaudeSDKClient(\n",
" options=ClaudeAgentOptions(\n",
- " model=\"claude-sonnet-4-5\",\n",
+ " model=MODEL,\n",
" allowed_tools=[\"Task\"], # this enables our Chief agent to invoke subagents\n",
" system_prompt=\"Delegate financial questions to the financial-analyst subagent. Do not try to answer these questions yourself.\",\n",
" cwd=\"chief_of_staff_agent\",\n",
+ " setting_sources=[\"project\", \"local\"],\n",
" )\n",
") as agent:\n",
" await agent.query(\"Should we hire 5 engineers? Analyze the financial impact.\")\n",
" async for msg in agent.receive_response():\n",
" print_activity(msg)\n",
- " messages.append(msg)"
+ " messages.append(msg)\n",
+ "\n",
+ "display_agent_response(messages, title=\"Hiring Impact Analysis\")"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 20,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ "
\n",
+ "
โ๏ธ System
Initialized (bf3bc346...)
๐ค Assistant
I'll delegate this financial analysis to the financial-analyst subagent who can properly analyze the impact of hiring 5 engineers.
๐ Subagent: financial-analyst
Analyze 5 engineer hiring impact
๐ง Tools
BashRead: burn_rate.csvRead: revenue_forecast.jsonRead: hiring_costs.csvBashBashBashBashBash
๐ค Assistant
Financial Impact Analysis: Hiring 5 Engineers
\n",
+ "
Executive Summary
\n",
+ "
Recommendation: โ
PROCEED WITH CAUTION - Use a staged hiring approach with 3 Senior + 2 Junior engineers
\n",
+ "
\n",
+ "
Financial Impact at a Glance
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "| Metric | \n",
+ "Current | \n",
+ "After 5 Hires | \n",
+ "Change | \n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ "| Monthly Burn | \n",
+ "$500,000 | \n",
+ "$575,833 | \n",
+ "+15.2% | \n",
+ "
\n",
+ "\n",
+ "| Runway | \n",
+ "20 months | \n",
+ "17.1 months | \n",
+ "-2.9 months | \n",
+ "
\n",
+ "\n",
+ "| Headcount | \n",
+ "50 | \n",
+ "55 | \n",
+ "+10% | \n",
+ "
\n",
+ "\n",
+ "| Engineering % | \n",
+ "50% | \n",
+ "54.5% | \n",
+ "+4.5% | \n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
One-time costs: ~$165K (recruiting + onboarding)
\n",
+ "
\n",
+ "
Three Scenarios Analyzed
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "| Scenario | \n",
+ "Mix | \n",
+ "New Monthly Burn | \n",
+ "New Runway | \n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ "| A (Recommended) | \n",
+ "3 Senior + 2 Junior | \n",
+ "$575,833 | \n",
+ "17.1 months | \n",
+ "
\n",
+ "\n",
+ "| B | \n",
+ "5 Senior | \n",
+ "$591,665 | \n",
+ "16.6 months | \n",
+ "
\n",
+ "\n",
+ "| C | \n",
+ "2 Senior + 3 Junior | \n",
+ "$567,917 | \n",
+ "17.3 months | \n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
Why This Works โ
\n",
+ "
\n",
+ "- Strong Revenue Growth - 15% MoM ARR growth ($2.4M โ projected $5.5M by Dec 2024) partially offsets increased burn
\n",
+ "- Healthy Runway Post-Hire - 17+ months exceeds the minimum 12-month threshold
\n",
+ "- Series B Alignment - Maintains sufficient runway to close Series B (target Q1 2026)
\n",
+ "- Strategic Necessity - Product launch and European expansion require engineering capacity
\n",
+ "
\n",
+ "
Key Risks โ ๏ธ
\n",
+ "
\n",
+ "- Runway Compression - 2.9-month reduction leaves less buffer
\n",
+ "- Series B Dependency - Must close by Q1 2026 (reduced negotiating flexibility)
\n",
+ "- Revenue Assumptions - Projections depend on continued 15% MoM growth
\n",
+ "
\n",
+ "
\n",
+ "
Recommended Implementation Plan
\n",
+ "
Phase 1 (Immediate): Hire 3 engineers (2 Senior + 1 Junior)
\n",
+ "Phase 2 (30-60 days later): Hire remaining 2 engineers (1 Senior + 1 Junior)
\n",
+ "
Financial Guardrails:
\n",
+ "- Maintain minimum 15-month runway at all times
\n",
+ "- Pause hiring if revenue growth drops below 12% MoM
\n",
+ "- Begin Series B conversations by August 2024
\n",
+ "
\n",
+ "
Bottom Line
\n",
+ "
The hire is financially prudent given your strong growth trajectory and healthy runway buffer. However, it's critical to:
\n",
+ "1. Execute staged hiring to de-risk
\n",
+ "2. Maintain revenue momentum
\n",
+ "3. Stay on track with Series B timeline
\n",
+ "
Confidence Level: 75% โ Proceed, but with active monitoring and clear guardrails.
โ
Complete
Turns: 2 Tokens: 1,173 Cost: $0.28 Duration: 164.9s
\n",
+ "
\n",
+ "
\n",
+ " "
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
"source": [
"visualize_conversation(messages)"
]
@@ -588,17 +2463,30 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
+ "๐ค Thinking...\n",
+ "๐ค Using: Glob()\n",
+ "๐ค Using: Glob()\n",
+ "โ Tool completed\n",
+ "โ Tool completed\n",
+ "๐ค Using: Read()\n",
+ "๐ค Using: Read()\n",
+ "๐ค Using: Read()\n",
+ "๐ค Using: Read()\n",
+ "โ Tool completed\n",
+ "โ Tool completed\n",
+ "โ Tool completed\n",
+ "โ Tool completed\n",
"๐ค Thinking...\n",
"๐ค Using: Task()\n",
- "๐ค Using: Bash()\n",
"โ Tool completed\n",
+ "๐ค Using: Bash()\n",
"๐ค Using: Read()\n",
"๐ค Using: Read()\n",
"๐ค Using: Read()\n",
@@ -606,6 +2494,7 @@
"โ Tool completed\n",
"โ Tool completed\n",
"โ Tool completed\n",
+ "โ Tool completed\n",
"๐ค Thinking...\n",
"๐ค Using: Write()\n",
"โ Tool completed\n",
@@ -616,6 +2505,8 @@
"source": [
"from chief_of_staff_agent.agent import send_query\n",
"\n",
+ "reset_activity_context()\n",
+ "\n",
"result, messages = await send_query(\n",
" \"/budget-impact hiring 3 senior engineers. Save your insights by updating the 'hiring_decision.md' file in /output_reports or creating a new file there\",\n",
" # permission_mode=\"plan\", # Enable this to use planning mode\n",
@@ -625,9 +2516,175 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 22,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ "
\n",
+ "
โ๏ธ System
Initialized (cea37adf...)
๐ค Assistant
I'll analyze the budget impact of hiring 3 senior engineers. Let me first check the current financial data and existing reports, then delegate to the financial analyst for a comprehensive analysis.
๐ง Tools
GlobGlobRead: hiring_costs.csvRead: burn_rate.csvRead: revenue_forecast.jsonRead: hiring_decision.md
๐ค Assistant
I see there's already an existing analysis. Let me delegate to the financial analyst subagent to provide an updated, comprehensive analysis with the latest data and additional insights.
๐ Subagent: financial-analyst
Analyze hiring 3 senior engineers budget impact
๐ง Tools
BashRead: hiring_costs.csvRead: burn_rate.csvRead: revenue_forecast.json
๐ค Assistant
Excellent! The financial analyst has provided a comprehensive analysis. Let me now update the hiring decision report with this enhanced analysis.
๐ค Assistant
I've completed the comprehensive budget impact analysis for hiring 3 senior engineers. Here's a summary of the key findings:
\n",
+ "
\n",
+ "
Budget Impact Analysis Summary
\n",
+ "
Recommendation: CONDITIONAL APPROVAL WITH STAGGERED APPROACH
\n",
+ "
Key Financial Impact
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "| Metric | \n",
+ "Current | \n",
+ "Post-Hiring | \n",
+ "Change | \n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ "| Monthly Gross Burn | \n",
+ "$525K | \n",
+ "$590K | \n",
+ "+$65K (+12.4%) | \n",
+ "
\n",
+ "\n",
+ "| Monthly Net Burn | \n",
+ "$235K | \n",
+ "$300K | \n",
+ "+$65K (+27.7%) | \n",
+ "
\n",
+ "\n",
+ "| Cash Runway | \n",
+ "42.6 months | \n",
+ "32.9 months | \n",
+ "-9.7 months | \n",
+ "
\n",
+ "\n",
+ "| Break-Even | \n",
+ "- | \n",
+ "November 2024 | \n",
+ "5 months | \n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
Total Investment
\n",
+ "
\n",
+ "- One-time costs: $129,000 (recruiting, onboarding, equipment)
\n",
+ "- Annual recurring: $780,000 ($65K/month loaded)
\n",
+ "- First year total: $909,000
\n",
+ "
\n",
+ "
Why Proceed?
\n",
+ "
\n",
+ "- Break-even achievable in 5 months with current 15% MoM revenue growth
\n",
+ "- Strong runway buffer - 32.9 months even without revenue growth
\n",
+ "- Strategic timing aligns with Q2 product launch and market opportunity
\n",
+ "
\n",
+ "
Risk Mitigation Strategy
\n",
+ "
\n",
+ "- Staggered hiring (1 engineer/month) reduces integration risk
\n",
+ "- Milestone gates allow pause/abort if revenue growth slows
\n",
+ "- Series B prep should begin immediately to strengthen position
\n",
+ "
\n",
+ "
Critical Success Factors
\n",
+ "
\n",
+ "- Maintain >12% MoM revenue growth
\n",
+ "- Hire Engineering Manager concurrent with first engineer
\n",
+ "- Begin Series B conversations now
\n",
+ "- Complete stock refresh for key engineer retention
\n",
+ "
\n",
+ "
The full analysis has been saved to /output_reports/hiring_decision.md with detailed sections on ROI analysis, 4 alternative options, 8 risk factors with mitigations, implementation timeline, and success metrics.
โ
Complete
Turns: 9 Tokens: 5,871 Cost: $0.48 Duration: 279.1s
\n",
+ "
\n",
+ "
\n",
+ " "
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
"source": [
"visualize_conversation(messages)"
]
@@ -636,23 +2693,23 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "## Conclusion",
- "",
- "We've demonstrated how the Claude Code SDK enables you to build sophisticated multi-agent systems with enterprise-grade features. Starting from basic script execution with the Bash tool, we progressively introduced advanced capabilities including persistent memory with CLAUDE.md, custom output styles for different audiences, strategic planning mode, slash commands for user convenience, compliance hooks for guardrailing, and subagent coordination for specialized tasks.",
- "",
- "By combining these features, we created an AI Chief of Staff capable of handling complex executive decision-making workflows. The system delegates financial analysis to specialized subagents, maintains audit trails through hooks, adapts communication styles for different stakeholders, and provides actionable insights backed by data-driven analysis.",
- "",
- "This foundation in advanced agentic patterns and multi-agent orchestration prepares you for building production-ready enterprise systems. In the next notebook, we'll explore how to connect our agents to external services through Model Context Protocol (MCP) servers, dramatically expanding their capabilities beyond the built-in tools.",
- "",
+ "## Conclusion\n",
+ "\n",
+ "We've demonstrated how the Claude Code SDK enables you to build sophisticated multi-agent systems with enterprise-grade features. Starting from basic script execution with the Bash tool, we progressively introduced advanced capabilities including persistent memory with CLAUDE.md, custom output styles for different audiences, strategic planning mode, slash commands for user convenience, compliance hooks for guardrailing, and subagent coordination for specialized tasks.\n",
+ "\n",
+ "By combining these features, we created an AI Chief of Staff capable of handling complex executive decision-making workflows. The system delegates financial analysis to specialized subagents, maintains audit trails through hooks, adapts communication styles for different stakeholders, and provides actionable insights backed by data-driven analysis.\n",
+ "\n",
+ "This foundation in advanced agentic patterns and multi-agent orchestration prepares you for building production-ready enterprise systems. In the next notebook, we'll explore how to connect our agents to external services through Model Context Protocol (MCP) servers, dramatically expanding their capabilities beyond the built-in tools.\n",
+ "\n",
"Next: [02_The_observability_agent.ipynb](02_The_observability_agent.ipynb) - Learn how to extend your agents with custom integrations and external data sources through MCP."
]
}
],
"metadata": {
"kernelspec": {
- "display_name": "Python (cc-sdk-tutorial)",
+ "display_name": "cc-sdk-tutorial",
"language": "python",
- "name": "cc-sdk-tutorial"
+ "name": "python3"
},
"language_info": {
"codemirror_mode": {
@@ -664,9 +2721,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.11.13"
+ "version": "3.12.9"
}
},
"nbformat": 4,
"nbformat_minor": 4
-}
\ No newline at end of file
+}
diff --git a/claude_agent_sdk/02_The_observability_agent.ipynb b/claude_agent_sdk/02_The_observability_agent.ipynb
index 2fb722e8..371a2e3c 100644
--- a/claude_agent_sdk/02_The_observability_agent.ipynb
+++ b/claude_agent_sdk/02_The_observability_agent.ipynb
@@ -2,16 +2,24 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 1,
"id": "7e7958dd",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
+ "import shutil\n",
+ "import subprocess\n",
"from typing import Any\n",
"\n",
"from dotenv import load_dotenv\n",
- "from utils.agent_visualizer import print_activity\n",
+ "from IPython.display import Markdown, display\n",
+ "from utils.agent_visualizer import (\n",
+ " display_agent_response,\n",
+ " print_activity,\n",
+ " reset_activity_context,\n",
+ " visualize_conversation,\n",
+ ")\n",
"\n",
"from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient"
]
@@ -28,7 +36,13 @@
"cell_type": "markdown",
"id": "08cc95b6",
"metadata": {},
- "source": "In the previous notebooks we have built a basic research agent and a Chief of Staff multi-agent framework. While the agents we have built are already powerful, they were still limited in what they could do: the web search agent is limited to searching the internet and our Chief of Staff agent was limited to interacting with its own filesystem.\n\nThis is a serious constraint: real-world agents often need to interact with other systems like databases, APIs, file systems, and other specialized services. [MCP (Model Context Protocol)](https://modelcontextprotocol.io/docs/getting-started/intro) is an open-source standard for AI-tool integrations that allows for an easy connection between our agents and these external systems. In this notebook, we will explore how to connect MCP servers to our agent.\n\n**Need more details on MCP?** For comprehensive setup instructions, configuration best practices, and troubleshooting tips, see the [Claude Code MCP documentation](https://docs.claude.com/en/docs/claude-code/mcp)."
+ "source": [
+ "In the previous notebooks we have built a basic research agent and a Chief of Staff multi-agent framework. While the agents we have built are already powerful, they were still limited in what they could do: the web search agent is limited to searching the internet and our Chief of Staff agent was limited to interacting with its own filesystem.\n",
+ "\n",
+ "This is a serious constraint: real-world agents often need to interact with other systems like databases, APIs, file systems, and other specialized services. [MCP (Model Context Protocol)](https://modelcontextprotocol.io/docs/getting-started/intro) is an open-source standard for AI-tool integrations that allows for an easy connection between our agents and these external systems. In this notebook, we will explore how to connect MCP servers to our agent.\n",
+ "\n",
+ "**Need more details on MCP?** For comprehensive setup instructions, configuration best practices, and troubleshooting tips, see the [Claude Code MCP documentation](https://docs.claude.com/en/docs/claude-code/mcp)."
+ ]
},
{
"cell_type": "markdown",
@@ -43,23 +57,36 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 2,
"id": "21de60c4",
"metadata": {},
"outputs": [],
"source": [
- "# define our git MCP server (it was downloaded when you ran uv sync as it is defined in the pyproject.toml file)\n",
+ "# Get the git repository root (mcp_server_git requires a valid git repo path)\n",
+ "# os.getcwd() may return a subdirectory, so we find the actual repo root\n",
+ "git_executable = shutil.which(\"git\")\n",
+ "if git_executable is None:\n",
+ " raise RuntimeError(\"Git executable not found in PATH\")\n",
+ "\n",
+ "git_repo_root = subprocess.run( # noqa: S603\n",
+ " [git_executable, \"rev-parse\", \"--show-toplevel\"],\n",
+ " capture_output=True,\n",
+ " text=True,\n",
+ " check=True,\n",
+ ").stdout.strip()\n",
+ "\n",
+ "# Define our git MCP server (installed via uv sync from pyproject.toml)\n",
"git_mcp: dict[str, Any] = {\n",
" \"git\": {\n",
" \"command\": \"uv\",\n",
- " \"args\": [\"run\", \"python\", \"-m\", \"mcp_server_git\", \"--repository\", os.getcwd()],\n",
+ " \"args\": [\"run\", \"python\", \"-m\", \"mcp_server_git\", \"--repository\", git_repo_root],\n",
" }\n",
"}"
]
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 3,
"id": "23aa5a3d",
"metadata": {},
"outputs": [
@@ -67,13 +94,16 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "๐ค Thinking...\n",
- "๐ค Using: mcp__git()\n",
+ "๐ค Using: mcp__git__git_log()\n",
+ "๐ค Using: mcp__git__git_status()\n",
+ "๐ค Using: mcp__git__git_branch()\n",
+ "โ Tool completed\n",
+ "โ Tool completed\n",
"โ Tool completed\n",
"๐ค Thinking...\n",
- "๐ค Using: Bash()\n",
- "๐ค Using: Bash()\n",
- "๐ค Using: Bash()\n",
+ "๐ค Using: mcp__git__git_log()\n",
+ "๐ค Using: mcp__git__git_status()\n",
+ "๐ค Using: mcp__git__git_branch()\n",
"โ Tool completed\n",
"โ Tool completed\n",
"โ Tool completed\n",
@@ -83,20 +113,18 @@
],
"source": [
"messages = []\n",
- "async with (\n",
- " ClaudeSDKClient(\n",
- " options=ClaudeAgentOptions(\n",
- " model=\"claude-sonnet-4-5\",\n",
- " mcp_servers=git_mcp,\n",
- " allowed_tools=[\n",
- " \"mcp__git\"\n",
- " ], # For MCP tools, in allowed tools we must add the mcp__serverName__toolName format or mcp__serverName to enable all\n",
- " permission_mode=\"acceptEdits\", # auto-accept file edit permissions\n",
- " )\n",
- " ) as agent\n",
- "):\n",
+ "async with ClaudeSDKClient(\n",
+ " options=ClaudeAgentOptions(\n",
+ " model=\"claude-opus-4-5\",\n",
+ " mcp_servers=git_mcp,\n",
+ " allowed_tools=[\"mcp__git\"],\n",
+ " # disallowed_tools ensures the agent ONLY uses MCP tools, not Bash with git commands\n",
+ " disallowed_tools=[\"Bash\", \"Task\", \"WebSearch\", \"WebFetch\"],\n",
+ " permission_mode=\"acceptEdits\",\n",
+ " )\n",
+ ") as agent:\n",
" await agent.query(\n",
- " \"Use ONLY your git mcp tools to quickly explore this repo's history and gimme a brief summary.\"\n",
+ " \"Explore this repo's git history and provide a brief summary of recent activity.\"\n",
" )\n",
" async for msg in agent.receive_response():\n",
" print_activity(msg)\n",
@@ -105,12 +133,62 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 4,
"id": "691e0812",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/markdown": [
+ "\n",
+ "Result:\n",
+ "## Git Repository Summary\n",
+ "\n",
+ "### Current Branch\n",
+ "You're on the **`upstream-contribution`** branch (up to date with origin), with `main` also available locally.\n",
+ "\n",
+ "---\n",
+ "\n",
+ "### Recent Commit Activity (Last ~5 Days)\n",
+ "\n",
+ "| Date | Author | Summary |\n",
+ "|------|--------|---------|\n",
+ "| **Nov 27, 2025** | costiash | 3 commits enhancing the **Claude Agent SDK** - improved chief of staff agent, notebooks, observability agent, research agent, documentation, and utilities |\n",
+ "| **Nov 26, 2025** | Pedram Navid | Added GitHub issue templates, `/review-issue` command, `/add-registry` slash command, and new cookbook entries |\n",
+ "| **Nov 25, 2025** | Elie Schoppik | Renamed PTC notebook to `programmatic_tool_calling_ptc.ipynb` for clarity |\n",
+ "| **Nov 24, 2025** | henrykeetay | Added **tool search cookbook** |\n",
+ "| **Nov 24, 2025** | Alex Notov | Multiple merges consolidating cookbooks for Opus 4.5, dependency updates |\n",
+ "| **Nov 23, 2025** | Cal Rueb | Simplified crop tool notebook with Claude Agent SDK section |\n",
+ "| **Nov 23, 2025** | Pedram Navid | PR comment fixes and lint cleanup |\n",
+ "\n",
+ "---\n",
+ "\n",
+ "### Key Themes in Recent Development\n",
+ "1. **Claude Agent SDK enhancements** - Major work on agent implementations (research, chief of staff, observability agents)\n",
+ "2. **New cookbooks** - Tool search, crop tool, programmatic tool calling\n",
+ "3. **CI/CD improvements** - PR review workflows, issue templates, slash commands\n",
+ "4. **Documentation** - Added troubleshooting guides, codebase overviews\n",
+ "\n",
+ "---\n",
+ "\n",
+ "### Working Directory Status\n",
+ "There are **uncommitted changes** in your working directory:\n",
+ "- **22 modified files** (mostly in `claude_agent_sdk/`)\n",
+ "- **4 deleted files** (documentation files in `docs/`)\n",
+ "- **6 untracked files** (new reports, plans, VS Code config)\n",
+ "\n",
+ "These changes appear to be further work on the Claude Agent SDK agents, notebooks, and utilities that haven't been staged or committed yet."
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
"source": [
- "print(f\"\\nResult:\\n{messages[-1].result}\")"
+ "display(Markdown(f\"\\nResult:\\n{messages[-1].result}\"))"
]
},
{
@@ -133,11 +211,26 @@
"cell_type": "markdown",
"id": "7fdb4aa2",
"metadata": {},
- "source": "#### Step 1: Set up your GitHub Token\n\nYou need a GitHub Personal Access Token. Get one [here](https://github.com/settings/personal-access-tokens/new) and put in the .env file as ```GITHUB_TOKEN=\"\"```\n> Note: When getting your token, select \"Fine-grained\" token with the default options (i.e., public repos, no account permissions), that'll be the easiest way to get this demo working.\n\nAlso, for this example you will have to have [Docker](https://www.docker.com/products/docker-desktop/) running on your machine. Docker is required because the GitHub MCP server runs in a containerized environment for security and isolation.\n\n**Docker Quick Setup:**\n- Install Docker Desktop from [docker.com](https://www.docker.com/products/docker-desktop/)\n- Ensure Docker is running (you'll see the Docker icon in your system tray)\n- Verify with `docker --version` in your terminal\n- **Troubleshooting:** If Docker won't start, check that virtualization is enabled in your BIOS. For detailed setup instructions, see the [Docker documentation](https://docs.docker.com/get-docker/)\n\n#### Step 2: Define the mcp server and start the agent loop!"
+ "source": [
+ "#### Step 1: Set up your GitHub Token\n",
+ "\n",
+ "You need a GitHub Personal Access Token. Get one [here](https://github.com/settings/personal-access-tokens/new) and put in the .env file as ```GITHUB_TOKEN=\"\"```\n",
+ "> Note: When getting your token, select \"Fine-grained\" token with the default options (i.e., public repos, no account permissions), that'll be the easiest way to get this demo working.\n",
+ "\n",
+ "Also, for this example you will have to have [Docker](https://www.docker.com/products/docker-desktop/) running on your machine. Docker is required because the GitHub MCP server runs in a containerized environment for security and isolation.\n",
+ "\n",
+ "**Docker Quick Setup:**\n",
+ "- Install Docker Desktop from [docker.com](https://www.docker.com/products/docker-desktop/)\n",
+ "- Ensure Docker is running (you'll see the Docker icon in your system tray)\n",
+ "- Verify with `docker --version` in your terminal\n",
+ "- **Troubleshooting:** If Docker won't start, check that virtualization is enabled in your BIOS. For detailed setup instructions, see the [Docker documentation](https://docs.docker.com/get-docker/)\n",
+ "\n",
+ "#### Step 2: Define the mcp server and start the agent loop!"
+ ]
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 5,
"id": "c1e65281",
"metadata": {},
"outputs": [],
@@ -162,7 +255,7 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 6,
"id": "e4c524c1",
"metadata": {},
"outputs": [
@@ -170,12 +263,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "๐ค Thinking...\n",
"๐ค Using: mcp__github__search_repositories()\n",
"โ Tool completed\n",
- "๐ค Thinking...\n",
- "๐ค Using: mcp__github__get_file_contents()\n",
- "โ Tool completed\n",
"๐ค Thinking...\n"
]
}
@@ -185,14 +274,16 @@
"messages = []\n",
"async with ClaudeSDKClient(\n",
" options=ClaudeAgentOptions(\n",
- " model=\"claude-sonnet-4-5\",\n",
+ " model=\"claude-opus-4-5\",\n",
" mcp_servers=github_mcp,\n",
" allowed_tools=[\"mcp__github\"],\n",
- " permission_mode=\"acceptEdits\", # auto-accept permissions\n",
+ " # disallowed_tools ensures the agent ONLY uses MCP tools, not Bash with gh CLI\n",
+ " disallowed_tools=[\"Bash\", \"Task\", \"WebSearch\", \"WebFetch\"],\n",
+ " permission_mode=\"acceptEdits\",\n",
" )\n",
") as agent:\n",
" await agent.query(\n",
- " \"Use ONLY your GitHub MCP tools to search for the anthropics/claude-agent-sdk-python repository and and give me a couple facts about it\"\n",
+ " \"Search for the anthropics/claude-agent-sdk-python repository and give me a few key facts about it.\"\n",
" )\n",
" async for msg in agent.receive_response():\n",
" print_activity(msg)\n",
@@ -201,12 +292,43 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 7,
"id": "4e0ac04f",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/markdown": [
+ "\n",
+ "Result:\n",
+ "Here are the key facts about the **anthropics/claude-agent-sdk-python** repository:\n",
+ "\n",
+ "| Fact | Details |\n",
+ "|------|---------|\n",
+ "| **Full Name** | anthropics/claude-agent-sdk-python |\n",
+ "| **URL** | https://github.com/anthropics/claude-agent-sdk-python |\n",
+ "| **Language** | Python |\n",
+ "| **Stars** | โญ 3,357 |\n",
+ "| **Forks** | ๐ด 435 |\n",
+ "| **Open Issues** | 149 |\n",
+ "| **Created** | June 11, 2025 |\n",
+ "| **Last Updated** | December 4, 2025 |\n",
+ "| **Default Branch** | main |\n",
+ "| **Visibility** | Public |\n",
+ "| **Archived** | No |\n",
+ "\n",
+ "This is the official Python SDK for building Claude agents, maintained by Anthropic. It's quite popular with over 3,300 stars and has an active community with 435 forks. The repository is actively maintained (recently updated) and has a notable number of open issues (149), which suggests active development and community engagement."
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
"source": [
- "print(f\"\\nResult:\\n{messages[-1].result}\")"
+ "display(Markdown(f\"\\nResult:\\n{messages[-1].result}\"))"
]
},
{
@@ -221,7 +343,7 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 8,
"id": "c8edb208",
"metadata": {},
"outputs": [
@@ -229,49 +351,45 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "๐ค Thinking...\n",
- "๐ค Using: TodoWrite()\n",
+ "๐ค Using: mcp__github__get_file_contents()\n",
+ "๐ค Using: mcp__github__list_commits()\n",
+ "โ Tool completed\n",
"โ Tool completed\n",
"๐ค Thinking...\n",
- "๐ค Using: TodoWrite()\n",
+ "๐ค Using: mcp__github__get_file_contents()\n",
+ "๐ค Using: mcp__github__get_file_contents()\n",
+ "๐ค Using: mcp__github__list_pull_requests()\n",
"โ Tool completed\n",
- "๐ค Using: mcp__github__list_workflows()\n",
"โ Tool completed\n",
- "๐ค Thinking...\n",
- "๐ค Using: mcp__github__list_workflow_runs()\n",
"โ Tool completed\n",
"๐ค Thinking...\n",
- "๐ค Using: TodoWrite()\n",
+ "๐ค Using: mcp__github__get_commit()\n",
+ "๐ค Using: mcp__github__get_commit()\n",
+ "๐ค Using: mcp__github__get_commit()\n",
+ "๐ค Using: mcp__github__pull_request_read()\n",
+ "๐ค Using: mcp__github__pull_request_read()\n",
+ "๐ค Using: mcp__github__pull_request_read()\n",
"โ Tool completed\n",
- "๐ค Thinking...\n",
- "๐ค Using: mcp__github__get_workflow_run()\n",
"โ Tool completed\n",
- "๐ค Thinking...\n",
- "๐ค Using: TodoWrite()\n",
"โ Tool completed\n",
- "๐ค Using: mcp__github__get_job_logs()\n",
"โ Tool completed\n",
- "๐ค Thinking...\n",
- "๐ค Using: mcp__github__list_workflow_jobs()\n",
"โ Tool completed\n",
- "๐ค Thinking...\n",
- "๐ค Using: WebFetch()\n",
"โ Tool completed\n",
"๐ค Thinking...\n",
- "๐ค Using: mcp__github__get_job_logs()\n",
+ "๐ค Using: mcp__github__pull_request_read()\n",
+ "๐ค Using: mcp__github__pull_request_read()\n",
+ "๐ค Using: mcp__github__pull_request_read()\n",
+ "๐ค Using: mcp__github__pull_request_read()\n",
+ "๐ค Using: mcp__github__pull_request_read()\n",
+ "โ Tool completed\n",
"โ Tool completed\n",
- "๐ค Thinking...\n",
- "๐ค Using: mcp__github__get_job_logs()\n",
"โ Tool completed\n",
- "๐ค Thinking...\n",
- "๐ค Using: TodoWrite()\n",
"โ Tool completed\n",
- "๐ค Using: mcp__github__get_job_logs()\n",
"โ Tool completed\n",
"๐ค Thinking...\n",
- "๐ค Using: TodoWrite()\n",
+ "๐ค Using: mcp__github__search_issues()\n",
+ "๐ค Using: mcp__github__search_issues()\n",
"โ Tool completed\n",
- "๐ค Using: TodoWrite()\n",
"โ Tool completed\n",
"๐ค Thinking...\n"
]
@@ -280,16 +398,16 @@
"source": [
"load_dotenv(override=True)\n",
"\n",
- "prompt = \"\"\"Monitor the GitHub Actions workflows for facebook/react.\n",
- "Look at the last triggered CI pipeline.\n",
- "1. Analyze the trigger for the pipeline\n",
- "2. Identify whether the pipeline passed or not\n",
- "3. If it failed, explain which test failed\n",
- "4. Identify whether human involvement is required\n",
+ "prompt = \"\"\"Analyze the CI health for facebook/react repository.\n",
"\n",
- "IMPORTANT: Do not raise a PR, issue, or bug on github yet. Just give me a summary of your findings and plan.\n",
+ "Examine the most recent runs of the 'CI' workflow and provide:\n",
+ "1. Current status and what triggered the run (push, PR, schedule, etc.)\n",
+ "2. If failing: identify the specific failing jobs/tests and assess severity\n",
+ "3. If passing: note any concerning patterns (long duration, flaky history)\n",
+ "4. Recommended actions with priority (critical/high/medium/low)\n",
"\n",
- "Focus on the 'CI' workflow specifically. Use your Github MCP server tools!\"\"\"\n",
+ "Provide a concise operational summary suitable for an on-call engineer.\n",
+ "Do not create issues or PRs - this is a read-only analysis.\"\"\"\n",
"\n",
"github_mcp: dict[str, Any] = {\n",
" \"github\": {\n",
@@ -309,9 +427,13 @@
"messages = []\n",
"async with ClaudeSDKClient(\n",
" options=ClaudeAgentOptions(\n",
- " model=\"claude-sonnet-4-5\",\n",
+ " model=\"claude-opus-4-5\",\n",
" mcp_servers=github_mcp,\n",
" allowed_tools=[\"mcp__github\"],\n",
+ " # IMPORTANT: disallowed_tools is required to actually RESTRICT tool usage.\n",
+ " # Without this, allowed_tools only controls permission prompting, not availability.\n",
+ " # The agent would still have access to Bash (and could use `gh` CLI instead of MCP).\n",
+ " disallowed_tools=[\"Bash\", \"Task\", \"WebSearch\", \"WebFetch\"],\n",
" permission_mode=\"acceptEdits\",\n",
" )\n",
") as agent:\n",
@@ -323,12 +445,714 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 9,
"id": "49a39ed7",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/markdown": [
+ "\n",
+ "Result:\n",
+ "Based on my comprehensive analysis of the facebook/react repository CI infrastructure, here is the operational summary:\n",
+ "\n",
+ "---\n",
+ "\n",
+ "# CI Health Analysis: facebook/react\n",
+ "\n",
+ "## Executive Summary\n",
+ "**Overall Status: ๐ข HEALTHY**\n",
+ "\n",
+ "The React repository's CI appears to be in good health. Recent commits to `main` have been successfully merged, and active PRs show passing CodeSandbox builds.\n",
+ "\n",
+ "---\n",
+ "\n",
+ "## 1. CI Infrastructure Overview\n",
+ "\n",
+ "### Primary Workflows\n",
+ "| Workflow | Trigger | Purpose |\n",
+ "|----------|---------|---------|\n",
+ "| `runtime_build_and_test.yml` | Push to main, PRs | Main CI - builds, tests, Flow checks |\n",
+ "| `shared_lint.yml` | Push to main, PRs | Prettier, ESLint, license checks |\n",
+ "| `compiler_typescript.yml` | PRs touching compiler | Compiler-specific tests |\n",
+ "| `devtools_regression_tests.yml` | PRs | DevTools testing |\n",
+ "\n",
+ "### Test Matrix Scale\n",
+ "- **90 test shards** (18 configurations ร 5 shards each)\n",
+ "- **50 build jobs** (25 workers ร 2 release channels)\n",
+ "- **50 test-build shards** (5 configurations ร 10 shards)\n",
+ "- Flow checks across multiple inline configs\n",
+ "\n",
+ "---\n",
+ "\n",
+ "## 2. Recent Main Branch Status\n",
+ "\n",
+ "| Commit | Date | Description | Status |\n",
+ "|--------|------|-------------|--------|\n",
+ "| `bf1afad` | Dec 4, 2025 | [react-dom/server] Fix hanging on Deno | โ
Merged |\n",
+ "| `0526c79` | Dec 3, 2025 | Update changelog with latest releases | โ
Merged |\n",
+ "| `7dc903c` | Dec 3, 2025 | Patch FlightReplyServer (security fix) | โ
Merged |\n",
+ "| `36df5e8` | Dec 2, 2025 | Allow building single release channel | โ
Merged |\n",
+ "\n",
+ "**Last 10 commits:** All successfully merged to main, indicating CI is passing.\n",
+ "\n",
+ "---\n",
+ "\n",
+ "## 3. Active PR CI Status\n",
+ "\n",
+ "| PR | Title | CodeSandbox Status |\n",
+ "|----|-------|-------------------|\n",
+ "| #35267 | Fix spelling (behaviour โ behavior) | ๐ก Pending (building) |\n",
+ "| #35238 | DevTools navigating commits hotkey | โ
Success |\n",
+ "| #35287 | Compiler: Fix variable name issue | โ
Success |\n",
+ "| #35278 | Add DevTools console suppress option | โ
Success |\n",
+ "| #35226 | Fizz: Push stalled use() to ownerStack | โ
Success |\n",
+ "\n",
+ "---\n",
+ "\n",
+ "## 4. Risk Assessment\n",
+ "\n",
+ "### โ
Positive Indicators\n",
+ "- **Main branch stable**: All recent commits merged successfully\n",
+ "- **No open CI failure issues**: Search returned zero CI-related open bugs\n",
+ "- **Active development**: Security patches and features landing regularly\n",
+ "- **PR builds passing**: Most open PRs show successful builds\n",
+ "\n",
+ "### โ ๏ธ Areas to Monitor\n",
+ "- **Large test matrix**: 190+ parallel jobs mean potential for infrastructure flakiness\n",
+ "- **Playwright-based e2e tests**: Browser-based tests can be flaky (Flight fixtures, DevTools e2e)\n",
+ "- **Cache dependencies**: Multiple cache strategies (v6 keys) - cache misses could slow builds\n",
+ "\n",
+ "### ๐ CI Complexity Metrics\n",
+ "- ~37KB workflow file for main CI (`runtime_build_and_test.yml`)\n",
+ "- Heavy parallelization with matrix strategies\n",
+ "- Multiple artifact upload/download operations\n",
+ "\n",
+ "---\n",
+ "\n",
+ "## 5. Recommended Actions\n",
+ "\n",
+ "| Priority | Action | Rationale |\n",
+ "|----------|--------|-----------|\n",
+ "| **LOW** | Monitor PR #35267 | Currently building - verify completion |\n",
+ "| **LOW** | No immediate action required | Main branch healthy, PRs passing |\n",
+ "| **INFO** | Security patch merged Dec 3 | PR #35277 fixed critical security vuln in FlightReplyServer - verify downstream impact |\n",
+ "\n",
+ "---\n",
+ "\n",
+ "## 6. On-Call Notes\n",
+ "\n",
+ "**TL;DR for On-Call Engineer:**\n",
+ "- ๐ข **CI is GREEN** - No action required\n",
+ "- Main branch is healthy with successful merges in last 24h\n",
+ "- All checked PRs showing green/passing status\n",
+ "- No open issues flagged for CI failures or flakiness\n",
+ "- Recent security patch (#35277) was successfully merged - monitor for any regressions\n",
+ "\n",
+ "**If issues arise:**\n",
+ "1. Check GitHub Actions tab directly: `https://github.com/facebook/react/actions`\n",
+ "2. Key workflows to monitor: \"(Runtime) Build and Test\", \"(Shared) Lint\"\n",
+ "3. Caches use `v6` key prefix - if widespread failures, consider cache invalidation\n",
+ "\n",
+ "---\n",
+ "\n",
+ "*Analysis performed: December 4, 2025*\n",
+ "*Data sources: GitHub API (commits, PRs, status checks, workflow files)*"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "display(Markdown(f\"\\nResult:\\n{messages[-1].result}\"))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "827dc192",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ "
\n",
+ "
โ๏ธ System
Initialized (a500627f...)
๐ง Tools
mcp__github__get_file_contentsmcp__github__list_commits
๐ค Assistant
I can see the React repository uses GitHub Actions with several workflows. Let me check the main CI workflows and their recent status. The primary CI workflow appears to be runtime_build_and_test.yml. Let me examine the workflow files and search for any \"CI\" named workflows.
๐ง Tools
mcp__github__get_file_contentsmcp__github__get_file_contentsmcp__github__list_pull_requests
๐ค Assistant
I can see React repository uses several workflows. The main CI workflow is runtime_build_and_test.yml which triggers on push to main and PRs. Let me get the CI status for recent commits to assess the health:
๐ง Tools
mcp__github__get_commitmcp__github__get_commitmcp__github__get_commitmcp__github__pull_request_readmcp__github__pull_request_readmcp__github__pull_request_read
๐ค Assistant
Let me get more detailed CI status information by checking the status checks on recent PRs and the main branch:
๐ง Tools
mcp__github__pull_request_readmcp__github__pull_request_readmcp__github__pull_request_readmcp__github__pull_request_readmcp__github__pull_request_read
๐ค Assistant
The GitHub API status checks only show CodeSandbox status. Let me search for any recent failing CI issues to get a complete picture:
๐ง Tools
mcp__github__search_issuesmcp__github__search_issues
๐ค Assistant
Based on my comprehensive analysis of the facebook/react repository CI infrastructure, here is the operational summary:
\n",
+ "
\n",
+ "
CI Health Analysis: facebook/react
\n",
+ "
Executive Summary
\n",
+ "
Overall Status: ๐ข HEALTHY
\n",
+ "
The React repository's CI appears to be in good health. Recent commits to main have been successfully merged, and active PRs show passing CodeSandbox builds.
\n",
+ "
\n",
+ "
1. CI Infrastructure Overview
\n",
+ "
Primary Workflows
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "| Workflow | \n",
+ "Trigger | \n",
+ "Purpose | \n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ "runtime_build_and_test.yml | \n",
+ "Push to main, PRs | \n",
+ "Main CI - builds, tests, Flow checks | \n",
+ "
\n",
+ "\n",
+ "shared_lint.yml | \n",
+ "Push to main, PRs | \n",
+ "Prettier, ESLint, license checks | \n",
+ "
\n",
+ "\n",
+ "compiler_typescript.yml | \n",
+ "PRs touching compiler | \n",
+ "Compiler-specific tests | \n",
+ "
\n",
+ "\n",
+ "devtools_regression_tests.yml | \n",
+ "PRs | \n",
+ "DevTools testing | \n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
Test Matrix Scale
\n",
+ "
\n",
+ "- 90 test shards (18 configurations ร 5 shards each)
\n",
+ "- 50 build jobs (25 workers ร 2 release channels)
\n",
+ "- 50 test-build shards (5 configurations ร 10 shards)
\n",
+ "- Flow checks across multiple inline configs
\n",
+ "
\n",
+ "
\n",
+ "
2. Recent Main Branch Status
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "| Commit | \n",
+ "Date | \n",
+ "Description | \n",
+ "Status | \n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ "bf1afad | \n",
+ "Dec 4, 2025 | \n",
+ "[react-dom/server] Fix hanging on Deno | \n",
+ "โ
Merged | \n",
+ "
\n",
+ "\n",
+ "0526c79 | \n",
+ "Dec 3, 2025 | \n",
+ "Update changelog with latest releases | \n",
+ "โ
Merged | \n",
+ "
\n",
+ "\n",
+ "7dc903c | \n",
+ "Dec 3, 2025 | \n",
+ "Patch FlightReplyServer (security fix) | \n",
+ "โ
Merged | \n",
+ "
\n",
+ "\n",
+ "36df5e8 | \n",
+ "Dec 2, 2025 | \n",
+ "Allow building single release channel | \n",
+ "โ
Merged | \n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
Last 10 commits: All successfully merged to main, indicating CI is passing.
\n",
+ "
\n",
+ "
3. Active PR CI Status
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "| PR | \n",
+ "Title | \n",
+ "CodeSandbox Status | \n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ "| #35267 | \n",
+ "Fix spelling (behaviour โ behavior) | \n",
+ "๐ก Pending (building) | \n",
+ "
\n",
+ "\n",
+ "| #35238 | \n",
+ "DevTools navigating commits hotkey | \n",
+ "โ
Success | \n",
+ "
\n",
+ "\n",
+ "| #35287 | \n",
+ "Compiler: Fix variable name issue | \n",
+ "โ
Success | \n",
+ "
\n",
+ "\n",
+ "| #35278 | \n",
+ "Add DevTools console suppress option | \n",
+ "โ
Success | \n",
+ "
\n",
+ "\n",
+ "| #35226 | \n",
+ "Fizz: Push stalled use() to ownerStack | \n",
+ "โ
Success | \n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
4. Risk Assessment
\n",
+ "
โ
Positive Indicators
\n",
+ "
\n",
+ "- Main branch stable: All recent commits merged successfully
\n",
+ "- No open CI failure issues: Search returned zero CI-related open bugs
\n",
+ "- Active development: Security patches and features landing regularly
\n",
+ "- PR builds passing: Most open PRs show successful builds
\n",
+ "
\n",
+ "
โ ๏ธ Areas to Monitor
\n",
+ "
\n",
+ "- Large test matrix: 190+ parallel jobs mean potential for infrastructure flakiness
\n",
+ "- Playwright-based e2e tests: Browser-based tests can be flaky (Flight fixtures, DevTools e2e)
\n",
+ "- Cache dependencies: Multiple cache strategies (v6 keys) - cache misses could slow builds
\n",
+ "
\n",
+ "
๐ CI Complexity Metrics
\n",
+ "
\n",
+ "- ~37KB workflow file for main CI (
runtime_build_and_test.yml) \n",
+ "- Heavy parallelization with matrix strategies
\n",
+ "- Multiple artifact upload/download operations
\n",
+ "
\n",
+ "
\n",
+ "
5. Recommended Actions
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "| Priority | \n",
+ "Action | \n",
+ "Rationale | \n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ "| LOW | \n",
+ "Monitor PR #35267 | \n",
+ "Currently building - verify completion | \n",
+ "
\n",
+ "\n",
+ "| LOW | \n",
+ "No immediate action required | \n",
+ "Main branch healthy, PRs passing | \n",
+ "
\n",
+ "\n",
+ "| INFO | \n",
+ "Security patch merged Dec 3 | \n",
+ "PR #35277 fixed critical security vuln in FlightReplyServer - verify downstream impact | \n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
6. On-Call Notes
\n",
+ "
TL;DR for On-Call Engineer:
\n",
+ "- ๐ข CI is GREEN - No action required
\n",
+ "- Main branch is healthy with successful merges in last 24h
\n",
+ "- All checked PRs showing green/passing status
\n",
+ "- No open issues flagged for CI failures or flakiness
\n",
+ "- Recent security patch (#35277) was successfully merged - monitor for any regressions
\n",
+ "
If issues arise:
\n",
+ "1. Check GitHub Actions tab directly: https://github.com/facebook/react/actions
\n",
+ "2. Key workflows to monitor: \"(Runtime) Build and Test\", \"(Shared) Lint\"
\n",
+ "3. Caches use v6 key prefix - if widespread failures, consider cache invalidation
\n",
+ "
\n",
+ "
Analysis performed: December 4, 2025
\n",
+ "Data sources: GitHub API (commits, PRs, status checks, workflow files)
โ
Complete
Turns: 19 Tokens: 42,191 Cost: $0.83 Duration: 89.2s
\n",
+ "
\n",
+ "
\n",
+ " "
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
"source": [
- "print(f\"\\nResult:\\n{messages[-1].result}\")"
+ "reset_activity_context()\n",
+ "visualize_conversation(messages)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "9e3ed49f",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "Agent Response
Based on my comprehensive analysis of the facebook/react repository CI infrastructure, here is the operational summary:
\n",
+ "
\n",
+ "
CI Health Analysis: facebook/react
\n",
+ "
Executive Summary
\n",
+ "
Overall Status: ๐ข HEALTHY
\n",
+ "
The React repository's CI appears to be in good health. Recent commits to main have been successfully merged, and active PRs show passing CodeSandbox builds.
\n",
+ "
\n",
+ "
1. CI Infrastructure Overview
\n",
+ "
Primary Workflows
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "| Workflow | \n",
+ "Trigger | \n",
+ "Purpose | \n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ "runtime_build_and_test.yml | \n",
+ "Push to main, PRs | \n",
+ "Main CI - builds, tests, Flow checks | \n",
+ "
\n",
+ "\n",
+ "shared_lint.yml | \n",
+ "Push to main, PRs | \n",
+ "Prettier, ESLint, license checks | \n",
+ "
\n",
+ "\n",
+ "compiler_typescript.yml | \n",
+ "PRs touching compiler | \n",
+ "Compiler-specific tests | \n",
+ "
\n",
+ "\n",
+ "devtools_regression_tests.yml | \n",
+ "PRs | \n",
+ "DevTools testing | \n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
Test Matrix Scale
\n",
+ "
\n",
+ "- 90 test shards (18 configurations ร 5 shards each)
\n",
+ "- 50 build jobs (25 workers ร 2 release channels)
\n",
+ "- 50 test-build shards (5 configurations ร 10 shards)
\n",
+ "- Flow checks across multiple inline configs
\n",
+ "
\n",
+ "
\n",
+ "
2. Recent Main Branch Status
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "| Commit | \n",
+ "Date | \n",
+ "Description | \n",
+ "Status | \n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ "bf1afad | \n",
+ "Dec 4, 2025 | \n",
+ "[react-dom/server] Fix hanging on Deno | \n",
+ "โ
Merged | \n",
+ "
\n",
+ "\n",
+ "0526c79 | \n",
+ "Dec 3, 2025 | \n",
+ "Update changelog with latest releases | \n",
+ "โ
Merged | \n",
+ "
\n",
+ "\n",
+ "7dc903c | \n",
+ "Dec 3, 2025 | \n",
+ "Patch FlightReplyServer (security fix) | \n",
+ "โ
Merged | \n",
+ "
\n",
+ "\n",
+ "36df5e8 | \n",
+ "Dec 2, 2025 | \n",
+ "Allow building single release channel | \n",
+ "โ
Merged | \n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
Last 10 commits: All successfully merged to main, indicating CI is passing.
\n",
+ "
\n",
+ "
3. Active PR CI Status
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "| PR | \n",
+ "Title | \n",
+ "CodeSandbox Status | \n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ "| #35267 | \n",
+ "Fix spelling (behaviour โ behavior) | \n",
+ "๐ก Pending (building) | \n",
+ "
\n",
+ "\n",
+ "| #35238 | \n",
+ "DevTools navigating commits hotkey | \n",
+ "โ
Success | \n",
+ "
\n",
+ "\n",
+ "| #35287 | \n",
+ "Compiler: Fix variable name issue | \n",
+ "โ
Success | \n",
+ "
\n",
+ "\n",
+ "| #35278 | \n",
+ "Add DevTools console suppress option | \n",
+ "โ
Success | \n",
+ "
\n",
+ "\n",
+ "| #35226 | \n",
+ "Fizz: Push stalled use() to ownerStack | \n",
+ "โ
Success | \n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
4. Risk Assessment
\n",
+ "
โ
Positive Indicators
\n",
+ "
\n",
+ "- Main branch stable: All recent commits merged successfully
\n",
+ "- No open CI failure issues: Search returned zero CI-related open bugs
\n",
+ "- Active development: Security patches and features landing regularly
\n",
+ "- PR builds passing: Most open PRs show successful builds
\n",
+ "
\n",
+ "
โ ๏ธ Areas to Monitor
\n",
+ "
\n",
+ "- Large test matrix: 190+ parallel jobs mean potential for infrastructure flakiness
\n",
+ "- Playwright-based e2e tests: Browser-based tests can be flaky (Flight fixtures, DevTools e2e)
\n",
+ "- Cache dependencies: Multiple cache strategies (v6 keys) - cache misses could slow builds
\n",
+ "
\n",
+ "
๐ CI Complexity Metrics
\n",
+ "
\n",
+ "- ~37KB workflow file for main CI (
runtime_build_and_test.yml) \n",
+ "- Heavy parallelization with matrix strategies
\n",
+ "- Multiple artifact upload/download operations
\n",
+ "
\n",
+ "
\n",
+ "
5. Recommended Actions
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "| Priority | \n",
+ "Action | \n",
+ "Rationale | \n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ "| LOW | \n",
+ "Monitor PR #35267 | \n",
+ "Currently building - verify completion | \n",
+ "
\n",
+ "\n",
+ "| LOW | \n",
+ "No immediate action required | \n",
+ "Main branch healthy, PRs passing | \n",
+ "
\n",
+ "\n",
+ "| INFO | \n",
+ "Security patch merged Dec 3 | \n",
+ "PR #35277 fixed critical security vuln in FlightReplyServer - verify downstream impact | \n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
6. On-Call Notes
\n",
+ "
TL;DR for On-Call Engineer:
\n",
+ "- ๐ข CI is GREEN - No action required
\n",
+ "- Main branch is healthy with successful merges in last 24h
\n",
+ "- All checked PRs showing green/passing status
\n",
+ "- No open issues flagged for CI failures or flakiness
\n",
+ "- Recent security patch (#35277) was successfully merged - monitor for any regressions
\n",
+ "
If issues arise:
\n",
+ "1. Check GitHub Actions tab directly: https://github.com/facebook/react/actions
\n",
+ "2. Key workflows to monitor: \"(Runtime) Build and Test\", \"(Shared) Lint\"
\n",
+ "3. Caches use v6 key prefix - if widespread failures, consider cache invalidation
\n",
+ "
\n",
+ "
Analysis performed: December 4, 2025
\n",
+ "Data sources: GitHub API (commits, PRs, status checks, workflow files)
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "reset_activity_context()\n",
+ "display_agent_response(messages)"
]
},
{
@@ -338,24 +1162,160 @@
"source": [
"### Observability Agent as Module\n",
"\n",
- "The `observability_agent/agent.py` file contains the same minimal helper functions as the research agent or chief of staff agent, just enhanced for GitHub monitoring. \n",
+ "The `observability_agent/agent.py` module wraps the observability pattern into a reusable `send_query` function. It imports and uses the shared visualization utilities from `utils.agent_visualizer` internally:\n",
+ "- **`reset_activity_context()`**: Called automatically at the start of each query\n",
+ "- **`print_activity()`**: Provides real-time feedback during execution\n",
+ "- **`display_agent_response()`**: Renders the final result (controlled by `display_result` parameter)\n",
"\n",
- "As before, to use it as a module in your Python code:"
+ "This means you can use the module with minimal code:"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 12,
"id": "97074fe7",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "๐ค Using: mcp__github__list_commits()\n",
+ "โ Tool completed\n",
+ "๐ค Using: mcp__github__get_commit()\n",
+ "๐ค Using: mcp__github__get_commit()\n",
+ "โ Tool completed\n",
+ "โ Tool completed\n",
+ "๐ค Thinking...\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "Agent Response
CI Status Summary for anthropics/claude-agent-sdk-python
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "| Commit | \n",
+ "Message | \n",
+ "Date | \n",
+ "Status | \n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ "2437035 | \n",
+ "chore: bump bundled CLI version to 2.0.58 | \n",
+ "Dec 3, 2025 20:09 UTC | \n",
+ "โ ๏ธ No CI status available | \n",
+ "
\n",
+ "\n",
+ "9809fb6 | \n",
+ "chore: release v0.1.11 (#383) | \n",
+ "Dec 3, 2025 19:42 UTC | \n",
+ "โ ๏ธ No CI status available | \n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
Note: The GitHub API response doesn't include explicit CI/check status data. This typically means:
\n",
+ "1. CI hasn't been triggered on these commits (both are automated commits from GitHub Actions)
\n",
+ "2. The checks aren't exposed via the commit API endpoint
\n",
+ "
Recommendation: To get detailed CI run information, you'd need to check:
\n",
+ "- GitHub Actions tab directly: https://github.com/anthropics/claude-agent-sdk-python/actions
\n",
+ "- Or query the Checks API specifically for workflow runs
\n",
+ "
Both commits are automated maintenance commits (CLI bump and release version update), so they may intentionally skip full CI runs.
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
"source": [
+ "# Reload the module to pick up any changes (useful during development)\n",
"from observability_agent.agent import send_query\n",
"\n",
+ "# The module handles activity display, context reset, and result visualization internally\n",
"result = await send_query(\n",
" \"Check the CI status for the last 2 runs in anthropics/claude-agent-sdk-python. Just do 3 tool calls, be efficient.\"\n",
- ")\n",
- "print(f\"Monitoring result: {result}\")"
+ ")"
]
},
{
@@ -363,34 +1323,345 @@
"id": "0c1578dc",
"metadata": {},
"source": [
- "We can do multi-turn conversations with this agent as well:"
+ "Multi-turn conversations work seamlessly - just pass `continue_conversation=True`:"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 13,
"id": "7914f8db",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "๐ค Using: mcp__github__list_pull_requests()\n",
+ "๐ค Using: mcp__github__list_commits()\n",
+ "โ Tool completed\n",
+ "โ Tool completed\n",
+ "๐ค Thinking...\n",
+ "๐ค Using: mcp__github__pull_request_read()\n",
+ "๐ค Using: mcp__github__pull_request_read()\n",
+ "๐ค Using: mcp__github__pull_request_read()\n",
+ "๐ค Using: mcp__github__get_commit()\n",
+ "โ Tool completed\n",
+ "โ Tool completed\n",
+ "โ Tool completed\n",
+ "โ Tool completed\n",
+ "๐ค Thinking...\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "Agent Response
CI Status Summary for facebook/react
\n",
+ "
๐ข Main Branch Status: HEALTHY
\n",
+ "
Latest Commit on main:
\n",
+ "- SHA: bf1afade
\n",
+ "- Message: [react-dom/server] Fix hanging on Deno (#35235)
\n",
+ "- Author: @fraidev
\n",
+ "- Date: Dec 4, 2025 05:50 UTC
\n",
+ "
\n",
+ "
Open Pull Requests CI Status
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "| PR | \n",
+ "Title | \n",
+ "Author | \n",
+ "CI Status | \n",
+ "Updated | \n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ "| #35287 | \n",
+ "[compiler] Fix JSX variable name issue | \n",
+ "@kostya-gromov | \n",
+ "๐ข Success | \n",
+ "2h ago | \n",
+ "
\n",
+ "\n",
+ "| #35285 | \n",
+ "[compiler][poc] Reuse ValidateExhaustiveDeps | \n",
+ "@josephsavona | \n",
+ "๐ต Draft | \n",
+ "6h ago | \n",
+ "
\n",
+ "\n",
+ "| #35284 | \n",
+ "[compiler] Fix hoisted primitives bug | \n",
+ "@josephsavona | \n",
+ "๐ข Success | \n",
+ "7h ago | \n",
+ "
\n",
+ "\n",
+ "| #35282 | \n",
+ "[compiler] Add effect deps validator | \n",
+ "@jackpope | \n",
+ "๐ข Success | \n",
+ "15h ago | \n",
+ "
\n",
+ "\n",
+ "| #35281 | \n",
+ "Improve legacy context warning | \n",
+ "@Harshrj53 | \n",
+ "๐ข Success | \n",
+ "20h ago | \n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
Key Observations
\n",
+ "
\n",
+ "- โ
No CI failures detected on recent PRs
\n",
+ "- โ
All CodeSandbox builds passing
\n",
+ "- โ ๏ธ Recent security fix merged:
#35277 addresses a critical security vulnerability in FlightReplyServer \n",
+ "
\n",
+ "
Recent Notable Commits
\n",
+ "
\n",
+ "- Security patch - FlightReplyServer fix for cycles and deferred error handling
\n",
+ "- Deno fix - react-dom/server hanging issue resolved
\n",
+ "- Compiler improvements - Multiple fixes for React Compiler validation
\n",
+ "
\n",
+ "
Assessment: CI is stable. No immediate action required.
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
"source": [
"# Example 2: Multi-turn conversation for deeper monitoring\n",
- "result1 = await send_query(\"What's the current CI status for facebook/react?\")\n",
- "print(f\"Initial check: {result1[:250]}...\\n\")"
+ "result1 = await send_query(\"What's the current CI status for facebook/react?\")"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 14,
"id": "8014a701",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "๐ค Using: mcp__github__search_issues()\n",
+ "โ Tool completed\n",
+ "๐ค Thinking...\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "Agent Response
Flaky Test Analysis for facebook/react
\n",
+ "
Result: No known flaky tests tracked
\n",
+ "
The search returned 0 open issues related to flaky tests, test flakes, or intermittent failures in the repository.
\n",
+ "
Assessment
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "| Metric | \n",
+ "Status | \n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ "| Open flaky test issues | \n",
+ "0 | \n",
+ "
\n",
+ "\n",
+ "| Recent CI failures | \n",
+ "None detected | \n",
+ "
\n",
+ "\n",
+ "| Test stability | \n",
+ "โ
Stable | \n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
Interpretation
\n",
+ "
\n",
+ "- Good news: No currently tracked flaky test issues in the open issue tracker
\n",
+ "- The React team appears to be on top of test reliability
\n",
+ "- Based on the earlier CI check, all recent PRs show passing builds
\n",
+ "
\n",
+ "
Caveats
\n",
+ "
\n",
+ "- Flaky tests may be tracked internally (Meta's internal systems)
\n",
+ "- Some flakes might be handled via suppression/retry mechanisms
\n",
+ "- The team may use different labeling conventions
\n",
+ "
\n",
+ "
Recommendation: CI appears healthy with no actionable flaky test issues at this time.
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
"source": [
"# Continue the conversation to dig deeper\n",
"result2 = await send_query(\n",
" \"Are there any flaky tests in the recent failures? You can only make one tool call.\",\n",
" continue_conversation=True,\n",
- ")\n",
- "print(f\"Follow-up analysis: {result2[:250]}...\")"
+ ")"
]
},
{
@@ -431,9 +1702,9 @@
],
"metadata": {
"kernelspec": {
- "display_name": "Python (cc-sdk-tutorial)",
+ "display_name": "cc-sdk-tutorial",
"language": "python",
- "name": "cc-sdk-tutorial"
+ "name": "python3"
},
"language_info": {
"codemirror_mode": {
@@ -445,9 +1716,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.11.13"
+ "version": "3.12.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
-}
\ No newline at end of file
+}
diff --git a/claude_agent_sdk/README.md b/claude_agent_sdk/README.md
index a9113577..5bf6ac24 100644
--- a/claude_agent_sdk/README.md
+++ b/claude_agent_sdk/README.md
@@ -94,6 +94,11 @@ Each notebook includes an agent implementation in its respective directory:
- **`chief_of_staff_agent/`** - Multi-agent executive assistant with financial modeling and compliance
- **`observability_agent/`** - DevOps monitoring agent with GitHub integration
+**Running standalone agents:** To import agent modules outside of notebooks, either run from the `claude_agent_sdk/` directory or install the package in editable mode:
+```bash
+uv pip install -e .
+```
+
## Background
### The Evolution of Claude Agent SDK
diff --git a/claude_agent_sdk/chief_of_staff_agent/.claude/commands/budget-impact.md b/claude_agent_sdk/chief_of_staff_agent/.claude/commands/budget-impact.md
index 25bff63e..dacce216 100644
--- a/claude_agent_sdk/chief_of_staff_agent/.claude/commands/budget-impact.md
+++ b/claude_agent_sdk/chief_of_staff_agent/.claude/commands/budget-impact.md
@@ -3,7 +3,7 @@ name: budget-impact
description: Analyze the financial impact of a decision on budget, burn rate, and runway
---
-Use the financial-analyst subagent to analyze the budget impact of: {{args}}
+Use the financial-analyst subagent to analyze the budget impact of: $ARGUMENTS
Provide a comprehensive analysis including:
1. Total cost (one-time and recurring)
diff --git a/claude_agent_sdk/chief_of_staff_agent/.claude/commands/slash-command-test.md b/claude_agent_sdk/chief_of_staff_agent/.claude/commands/slash-command-test.md
index d34639ad..cac40016 100644
--- a/claude_agent_sdk/chief_of_staff_agent/.claude/commands/slash-command-test.md
+++ b/claude_agent_sdk/chief_of_staff_agent/.claude/commands/slash-command-test.md
@@ -3,4 +3,4 @@ name: slash-command-test
description: example of how a slash-command works
---
-Reverse the following sentence word wise: {{args}}
+Reverse the following sentence word wise: $ARGUMENTS
diff --git a/claude_agent_sdk/chief_of_staff_agent/.claude/commands/strategic-brief.md b/claude_agent_sdk/chief_of_staff_agent/.claude/commands/strategic-brief.md
index 85d6a537..f9b123fc 100644
--- a/claude_agent_sdk/chief_of_staff_agent/.claude/commands/strategic-brief.md
+++ b/claude_agent_sdk/chief_of_staff_agent/.claude/commands/strategic-brief.md
@@ -3,7 +3,7 @@ name: strategic-brief
description: Generate a comprehensive strategic brief by coordinating analysis from both financial and talent perspectives
---
-Create a strategic brief on: {{args}}
+Create a strategic brief on: $ARGUMENTS
Coordinate with both the financial-analyst and recruiter subagents to provide:
diff --git a/claude_agent_sdk/chief_of_staff_agent/.claude/commands/talent-scan.md b/claude_agent_sdk/chief_of_staff_agent/.claude/commands/talent-scan.md
index 96add023..5ba93cd4 100644
--- a/claude_agent_sdk/chief_of_staff_agent/.claude/commands/talent-scan.md
+++ b/claude_agent_sdk/chief_of_staff_agent/.claude/commands/talent-scan.md
@@ -3,7 +3,7 @@ name: talent-scan
description: Scan the talent market for specific roles and provide hiring recommendations
---
-Use the recruiter subagent to perform a talent market scan for: {{args}}
+Use the recruiter subagent to perform a talent market scan for: $ARGUMENTS
Analyze and report on:
1. Talent availability in target markets
diff --git a/claude_agent_sdk/chief_of_staff_agent/CLAUDE.md b/claude_agent_sdk/chief_of_staff_agent/CLAUDE.md
index f77cda40..152fc929 100644
--- a/claude_agent_sdk/chief_of_staff_agent/CLAUDE.md
+++ b/claude_agent_sdk/chief_of_staff_agent/CLAUDE.md
@@ -8,7 +8,7 @@
- **HQ**: San Francisco, CA
## Financial Snapshot
-- **Monthly Burn Rate**: $500,000
+- **Monthly Burn Rate**: ~$500,000
- **Current Runway**: 20 months (until September 2025)
- **ARR**: $2.4M (growing 15% MoM)
- **Cash in Bank**: $10M
diff --git a/claude_agent_sdk/chief_of_staff_agent/agent.py b/claude_agent_sdk/chief_of_staff_agent/agent.py
index 3441cefe..36390bff 100644
--- a/claude_agent_sdk/chief_of_staff_agent/agent.py
+++ b/claude_agent_sdk/chief_of_staff_agent/agent.py
@@ -15,7 +15,7 @@
load_dotenv()
-def get_activity_text(msg) -> str | None:
+def get_activity_text(msg: Any) -> str | None:
"""Extract activity text from a message"""
try:
if "Assistant" in msg.__class__.__name__:
@@ -31,7 +31,7 @@ def get_activity_text(msg) -> str | None:
return None
-def print_activity(msg) -> None:
+def print_activity(msg: Any) -> None:
"""Print activity to console"""
activity = get_activity_text(msg)
if activity:
@@ -78,9 +78,13 @@ async def send_query(
"""
# build options with optional output style
- options_dict = {
- "model": "claude-sonnet-4-5",
- "allowed_tools": [
+ settings = None
+ if output_style:
+ settings = json.dumps({"outputStyle": output_style})
+
+ options = ClaudeAgentOptions(
+ model="claude-opus-4-5",
+ allowed_tools=[
"Task", # enables subagent delegation
"Read",
"Write",
@@ -88,17 +92,19 @@ async def send_query(
"Bash",
"WebSearch",
],
- "continue_conversation": continue_conversation,
- "system_prompt": system_prompt,
- "permission_mode": permission_mode,
- "cwd": os.path.dirname(os.path.abspath(__file__)),
- }
-
- # add output style if specified
- if output_style:
- options_dict["settings"] = json.dumps({"outputStyle": output_style})
-
- options = ClaudeAgentOptions(**options_dict)
+ continue_conversation=continue_conversation,
+ system_prompt=system_prompt,
+ permission_mode=permission_mode,
+ cwd=os.path.dirname(os.path.abspath(__file__)),
+ settings=settings,
+ # IMPORTANT: setting_sources must include "project" to load filesystem settings:
+ # - Slash commands from .claude/commands/
+ # - CLAUDE.md project instructions
+ # - Subagent definitions from .claude/agents/
+ # - Hooks from .claude/settings.local.json
+ # Without this, the SDK operates in isolation mode with no filesystem settings loaded.
+ setting_sources=["project", "local"],
+ )
result = None
messages = [] # this is to append the messages ONLY for this agent turn
diff --git a/claude_agent_sdk/chief_of_staff_agent/audit/report_history.json b/claude_agent_sdk/chief_of_staff_agent/audit/report_history.json
index f3d43b85..ac5313e5 100644
--- a/claude_agent_sdk/chief_of_staff_agent/audit/report_history.json
+++ b/claude_agent_sdk/chief_of_staff_agent/audit/report_history.json
@@ -15,6 +15,22 @@
"action": "created",
"word_count": 720,
"tool": "Write"
+ },
+ {
+ "timestamp": "2025-12-04T15:38:47.830538",
+ "file": "Q2_2024_Financial_Forecast_Report.md",
+ "path": "/home/rudycosta3/anthropic-cookbook/claude_agent_sdk/chief_of_staff_agent/output_reports/Q2_2024_Financial_Forecast_Report.md",
+ "action": "created",
+ "word_count": 956,
+ "tool": "Write"
+ },
+ {
+ "timestamp": "2025-12-04T15:47:56.409214",
+ "file": "hiring_decision.md",
+ "path": "/home/rudycosta3/anthropic-cookbook/claude_agent_sdk/chief_of_staff_agent/output_reports/hiring_decision.md",
+ "action": "created",
+ "word_count": 1943,
+ "tool": "Write"
}
]
}
\ No newline at end of file
diff --git a/claude_agent_sdk/chief_of_staff_agent/audit/script_usage_log.json b/claude_agent_sdk/chief_of_staff_agent/audit/script_usage_log.json
index c1a910c2..be2940b6 100644
--- a/claude_agent_sdk/chief_of_staff_agent/audit/script_usage_log.json
+++ b/claude_agent_sdk/chief_of_staff_agent/audit/script_usage_log.json
@@ -31,6 +31,22 @@
"description": "Calculate hiring impact for 3 engineers at $200K",
"tool_used": "Bash",
"success": true
+ },
+ {
+ "timestamp": "2025-12-04T15:38:06.071673",
+ "script": "simple_calculation.py",
+ "command": "python /home/rudycosta3/anthropic-cookbook/claude_agent_sdk/chief_of_staff_agent/scripts/simple_calculation.py 10000000 500000",
+ "description": "Run financial calculation script",
+ "tool_used": "Bash",
+ "success": true
+ },
+ {
+ "timestamp": "2025-12-04T15:44:06.248339",
+ "script": "hiring_impact.py",
+ "command": "python /home/rudycosta3/anthropic-cookbook/claude_agent_sdk/chief_of_staff_agent/scripts/hiring_impact.py 3 200000",
+ "description": "Calculate hiring impact for 3 senior engineers",
+ "tool_used": "Bash",
+ "success": true
}
]
}
\ No newline at end of file
diff --git a/claude_agent_sdk/chief_of_staff_agent/output_reports/hiring_decision.md b/claude_agent_sdk/chief_of_staff_agent/output_reports/hiring_decision.md
index 7a082fa3..b8c67349 100644
--- a/claude_agent_sdk/chief_of_staff_agent/output_reports/hiring_decision.md
+++ b/claude_agent_sdk/chief_of_staff_agent/output_reports/hiring_decision.md
@@ -1,139 +1,394 @@
# Budget Impact Analysis: Hiring 3 Senior Engineers
-**Date**: Q2 2024
-**Analysis Type**: Financial Impact Assessment
-**Decision**: Hiring 3 Senior Engineers
+
+**Date**: December 4, 2024
+**Analysis Type**: Comprehensive Financial Impact Assessment
+**Decision**: Hiring 3 Senior Backend Engineers
+**Prepared By**: Chief of Staff, TechStart Inc
---
## Executive Summary
-**Recommendation**: **PROCEED WITH HIRING** - Moderate risk with high upside potential
+**RECOMMENDATION: CONDITIONAL APPROVAL WITH STAGGERED APPROACH**
+
+The analysis shows that hiring 3 senior engineers is financially viable and strategically sound. With current cash reserves of $10M and 15% MoM revenue growth, the company can achieve break-even by November 2024 (5 months post-hire). However, we recommend a staggered hiring approach with milestone-based gates to manage risk.
-The analysis shows that hiring 3 senior engineers will increase our monthly burn rate by 13% ($65K/month) while reducing runway by 2.3 months. However, given our strong 15% MoM revenue growth and strategic product timeline, this investment is justified and positions us well for continued growth.
+| Metric | Current | Post-Hiring | Impact |
+|--------|---------|-------------|--------|
+| Monthly Gross Burn | $525K | $590K | +$65K (12.4%) |
+| Monthly Net Burn | $235K | $300K | +$65K (27.7%) |
+| Cash Runway | 42.6 months | 32.9 months | -9.7 months |
+| Break-Even Timeline | - | November 2024 | 5 months |
---
## 1. Total Cost Analysis
-### Base Compensation (Per Engineer)
-- **Base Salary**: $200,000 annually ($16,667 monthly)
-- **Equity**: 0.2% (median of 0.1-0.3% range)
-- **Total Compensation**: $220,000 annually
+### One-Time Costs (Per Engineer)
+| Item | Cost |
+|------|------|
+| Recruiting Fee | $30,000 |
+| Onboarding Cost | $5,000 |
+| Equipment & Setup | $8,000 |
+| **Total per Engineer** | **$43,000** |
+
+**Total One-Time Investment (3 Engineers): $129,000**
+
+### Recurring Costs (Per Engineer, Annual)
+| Item | Cost |
+|------|------|
+| Base Salary | $200,000 |
+| Benefits & Taxes (30%) | $60,000 |
+| Equity Value (0.2% @ $10M) | $20,000 |
+| **Annual Fully Loaded** | **$280,000** |
+| **Monthly Loaded** | **$21,667** |
-### Fully Loaded Costs (3 Engineers)
-- **Annual Base Salaries**: $600,000
-- **Monthly Base Salaries**: $50,000
-- **Benefits & Overhead** (30%): $180,000 annually / $15,000 monthly
-- **Total Annual Loaded Cost**: $780,000
-- **Total Monthly Loaded Cost**: $65,000
+**Total Recurring Costs (3 Engineers):**
+- Annual: $780,000
+- Monthly: $65,000
-### One-Time Costs
-- **Recruiting Fees**: $90,000 (3 x $30,000)
-- **Equipment & Onboarding**: $15,000 (3 x $5,000)
-- **Total One-Time Investment**: $105,000
+### First Year Total Investment: $909,000
---
## 2. Burn Rate Impact
-- **Current Monthly Burn**: $500,000
-- **New Monthly Burn**: $565,000
-- **Percentage Increase**: 13%
-- **Quarterly Impact**: $195,000 additional burn per quarter
+### Current State (June 2024)
+- Monthly Gross Burn: $525,000
+- Monthly Revenue: $290,000
+- Net Burn Rate: $235,000
+- Headcount: 53
+
+### Post-Hiring State
+- Monthly Gross Burn: $590,000 (+$65,000)
+- Monthly Revenue: $290,000 (initial)
+- Net Burn Rate: $300,000 (+$65,000)
+- Headcount: 56 (+3)
+
+### Impact Metrics
+| Metric | Value |
+|--------|-------|
+| Gross Burn Increase | 12.4% |
+| Net Burn Increase | 27.7% |
+| Quarterly Burn Increase | $195,000 |
+| Annual Burn Increase | $780,000 |
+
+**Per-Employee Burn Rate Analysis:**
+- Jan 2024: $10,000/employee (45 HC)
+- Jun 2024: $9,906/employee (53 HC)
+- Post-hire: $10,536/employee (56 HC)
+
+The per-employee burn rate remains relatively stable, indicating controlled growth.
---
## 3. Runway Analysis
-- **Current Runway**: 20 months (until September 2025)
-- **New Runway**: 17.7 months (until February 2025)
-- **Runway Reduction**: 2.3 months
-- **Break-even Timeline**: With 15% MoM revenue growth, break-even moves from ~18 months to ~15 months
+### Current Runway (Net Burn Basis)
+- Cash in Bank: $10,000,000
+- Current Net Burn: $235,000/month
+- **Current Runway: 42.6 months** (until June 2028)
+
+### Post-Hiring Runway (Conservative - No Revenue Growth)
+- Remaining Cash: $9,871,000 (after one-time costs)
+- New Net Burn: $300,000/month
+- **New Runway: 32.9 months** (until September 2027)
+- **Runway Reduction: 9.7 months (23%)**
+
+### Post-Hiring Runway (With 15% MoM Revenue Growth)
+
+| Month | Revenue | Net Burn | Cumulative Cash |
+|-------|---------|----------|-----------------|
+| Jul 2024 | $333,500 | $256,500 | $9,614,500 |
+| Aug 2024 | $383,525 | $206,475 | $9,408,025 |
+| Sep 2024 | $441,054 | $148,946 | $9,259,079 |
+| Oct 2024 | $507,212 | $82,788 | $9,176,291 |
+| Nov 2024 | $583,294 | $6,706 | $9,169,585 |
+| **Dec 2024** | **$670,788** | **-$80,788** | **$9,250,373** |
+
+**Break-Even Point: November 2024 (5 months post-hire)**
+
+After break-even, the company becomes cash flow positive with infinite runway.
---
-## 4. ROI Considerations
+## 4. ROI Analysis
### Engineering Capacity Impact
-- **Current Engineering Team**: 25 engineers
-- **New Team Size**: 28 engineers (+12% capacity)
-- **Estimated Velocity Increase**: 9% (from hiring tool calculation)
-
-### Revenue Impact Potential
-Based on revenue forecast data:
-- Current ARR: $2.4M (growing 15% MoM)
-- Projected ARR by Dec 2024: $5.55M
-- **If new engineers accelerate growth by 2-3%**: Additional $300K-500K ARR by year-end
-- **Time to Productivity**: 2-3 months for senior engineers
-
-### Financial Returns
-- **Monthly ARR increase needed to justify**: $65,000 / 12 = $5,417
-- **Annual ARR increase needed**: $780,000
-- **Current trajectory already supports this** with strong 15% MoM growth
+- Current Engineering Team: 25 engineers
+- New Team Size: 28 engineers
+- Capacity Increase: 12%
+- Senior Productivity Multiplier: 1.5x vs junior
+- **Effective Capacity Increase: ~18%**
+
+### Revenue Impact Scenarios
+
+**Scenario A: Accelerated Product Development**
+- Feature velocity: 3 to 4 major features/quarter (+33%)
+- Revenue impact: +$12,000/month
+- Annual Revenue Lift: $144,000
+
+**Scenario B: Technical Debt Reduction**
+- 20% reduction enables 10-20% faster future development
+- Long-term Revenue Impact: $300K+ annually
+
+**Scenario C: Quality Improvements**
+- Churn reduction: 2.5% to 2.0%
+- At $2.4M ARR: $144,000 saved annually
+
+### Payback Period Analysis
+| Case | Annual Impact | Payback Period |
+|------|--------------|----------------|
+| Conservative (A only) | $144K | 6.3 years |
+| Moderate (A + C) | $288K | 3.2 years |
+| Optimistic (All) | $444K | 2.0 years |
+
+### Productivity Metrics
+- Engineering Cost per $1 ARR: $0.54 to $0.61 (13% increase)
+- Revenue per Employee: $45,283 to $42,857 (5% decrease initially)
+- At Break-Even: $104,166 revenue/employee (130% improvement)
---
-## 5. Alternative Options Considered
+## 5. Alternative Options Analysis
+
+### Option A: Staggered Hiring (RECOMMENDED)
+**Approach:** Hire 1 engineer/month over 3 months
+
+| Factor | Impact |
+|--------|--------|
+| One-time costs | Spread to $43K/month |
+| Initial burn increase | $22K/month (vs $65K) |
+| Integration quality | Higher |
+| Risk profile | Lower |
+| Cash savings (2 mo) | $43K |
+
+**Recommendation Score: 9/10** - Best risk-adjusted approach
+
+### Option B: Contractors vs Full-Time
+
+| Factor | Full-Time | Contractors |
+|--------|-----------|-------------|
+| Monthly Cost | $65,000 | $72,000 |
+| One-Time Costs | $129,000 | $0 |
+| Equity Dilution | 0.6% | 0% |
+| First 6 Months | $519,000 | $432,000 |
+| First 12 Months | $909,000 | $864,000 |
+
+**Recommendation Score: 6/10** - Good for short-term only
-### Option A: Staggered Hiring
-- Hire 1 engineer now, 2 in Q3
-- Reduces immediate burn impact by $43K monthly
-- Allows validation of productivity gains
+### Option C: Mixed Seniority (2 Senior + 2 Junior)
-### Option B: Contract vs Full-Time
-- 3 senior contractors at ~$150/hour (65% premium)
-- Monthly cost: ~$78K (20% higher)
-- More flexibility but higher cost and less commitment
+| Factor | 3 Senior | 2 Sr + 2 Jr |
+|--------|----------|-------------|
+| Monthly Cost | $65,000 | $64,168 |
+| Headcount | 3 | 4 |
+| Effective Capacity | 3.0 FTE | 2.6 FTE |
+| Pipeline Building | No | Yes |
-### Option C: Mixed Seniority
-- 2 senior engineers + 1 junior engineer
-- Reduces monthly cost by ~$7K
-- May slow initial velocity but builds pipeline
+**Recommendation Score: 7/10** - Good for sustainable team building
+
+### Option D: Offshore/Nearshore Team
+
+| Factor | US Hiring | Offshore |
+|--------|-----------|----------|
+| Monthly Cost | $65,000 | $35,000 |
+| Annual Savings | - | $360,000 |
+| Execution Risk | Low | High |
+| Timezone Overlap | 100% | 40-60% |
+
+**Recommendation Score: 7/10** - Strong financial case, higher execution risk
+
+### Cost-Benefit Summary
+
+| Option | Monthly Cost | 12-Mo Total | Runway Impact | Velocity | Risk |
+|--------|--------------|-------------|---------------|----------|------|
+| 3 Senior (US) | $65K | $909K | -9.7 mo | 100% | Medium |
+| Staggered | $65K* | $909K | -7.2 mo | 85%** | Low |
+| Contractors | $72K | $864K | -10.8 mo | 90% | Medium |
+| 2 Sr + 2 Jr | $64K | $899K | -9.5 mo | 87% | Low |
+| Offshore | $35K | $505K | -4.2 mo | 80% | High |
+
+*Averaged over 3 months | **First 6 months, ramps to 100%
---
## 6. Risk Factors
### Financial Risks
-- **Fundraising Pressure**: Runway reduction puts more pressure on Series B timing
-- **Market Conditions**: Economic downturn could affect enterprise sales growth
-- **Revenue Concentration**: Top 5 customers represent 30% of revenue
+
+**Risk 1: Fundraising Pressure (Impact: HIGH, Probability: 40%)**
+- If runway drops below 12 months, Series B becomes urgent vs strategic
+- **Mitigation:** Begin Series B conversations now; establish $2M credit line
+
+**Risk 2: Revenue Growth Stalls (Impact: HIGH, Probability: 35%)**
+- 10% MoM growth delays break-even from Nov 2024 to Mar 2025
+- **Mitigation:** Tie hiring to revenue milestones; monthly reviews
+
+**Risk 3: Economic Downturn (Impact: CRITICAL, Probability: 30%)**
+- Enterprise budget freezes could reduce growth to 5% or less
+- **Mitigation:** Diversify customer base; maintain $8M minimum cash
### Operational Risks
-- **Integration Time**: 2-3 months before full productivity
-- **Management Bandwidth**: 28 engineers may stress current EM capacity
-- **Technical Debt**: May slow feature development despite more engineers
+
+**Risk 4: Management Bandwidth (Impact: MEDIUM, Probability: 60%)**
+- VPE managing 28 engineers creates scaling challenges
+- **Mitigation:** Hire Engineering Manager before/concurrent with engineers
+
+**Risk 5: Integration Issues (Impact: MEDIUM, Probability: 30%)**
+- 3 simultaneous senior hires can disrupt team dynamics
+- **Mitigation:** Stagger start dates; structured onboarding program
+
+**Risk 6: Hiring Timeline Slips (Impact: LOW, Probability: 65%)**
+- Senior hiring typically takes 8-12 weeks
+- **Mitigation:** Engage multiple recruiting agencies; competitive packages
+
+### Market Risks
+
+**Risk 7: Competitor Acceleration (Impact: MEDIUM, Probability: 40%)**
+- DevTools AI or CodeAssist Pro could launch competing features
+- **Mitigation:** Focus on differentiated features; maintain R&D flexibility
+
+**Risk 8: Key Engineer Attrition (Impact: HIGH, Probability: 20%)**
+- Loss of critical team members negates new hire value
+- **Mitigation:** Stock option refresh; stay interviews; documentation
+
+### Risk Mitigation Dashboard
+
+| Risk Category | Overall Level | Priority Actions | Owner |
+|---------------|--------------|------------------|-------|
+| Financial | MEDIUM-HIGH | Revenue tracking, Series B prep | CFO |
+| Operational | MEDIUM | Management hiring, onboarding | VPE |
+| Market | MEDIUM | Competitive analysis, retention | CEO/VPE |
+
+---
+
+## 7. Final Recommendation
+
+### Decision: PROCEED with STAGGERED APPROACH and MILESTONE GATES
+
+### Must-Have Conditions
+1. **Revenue Validation:** July & August 2024 growth >12% MoM
+2. **Management Infrastructure:** Hire Engineering Manager by August 2024
+3. **Financial Safeguards:** Maintain minimum $8M cash balance
+4. **Retention Program:** Complete stock refresh for critical engineers
+
+### Implementation Timeline
+
+**Phase 1: Preparation (Weeks 1-2)**
+- Finalize job descriptions and compensation
+- Engage recruiting partners
+- Build onboarding curriculum
+- Begin Series B fundraising prep
+
+**Phase 2: First Hire (Weeks 3-10)**
+- Target start: August 15, 2024
+- Focus: Technical architecture
+- Gate: Validate 15% MoM revenue continues
+
+**Phase 3: Second Hire (Weeks 11-14)**
+- Target start: September 15, 2024
+- Focus: Feature development
+- Gate: Engineer #1 fully productive
+
+**Phase 4: Third Hire (Weeks 15-18)**
+- Target start: October 15, 2024
+- Focus: Code quality/tech debt
+- Gate: Revenue exceeds $550K/month
+
+### Success Metrics
+
+**Financial (Monthly Review):**
+- Net burn rate: <$300K/month by Dec 2024
+- Revenue growth: >12% MoM minimum
+- Cash runway: >15 months always
+- Burn multiple: <2.0
+
+**Engineering (Bi-Weekly Review):**
+- Feature delivery: 4+ major features/quarter
+- Code quality: 20% bug rate reduction by Q4
+- Velocity: 15% increase in story points
+- Tech debt: 10% reduction by Q4
+
+### Decision Framework
+
+**GO if:**
+- July/Aug revenue growth >12% MoM
+- Engineering Manager hired
+- Series B conversations initiated
+- Key engineer retention secured
+- Cash remains >$9M
+
+**PAUSE if:**
+- Revenue growth <10% for 2 consecutive months
+- Cash runway falls below 15 months
+- Major customer churn (>5% single month)
+
+**ABORT if:**
+- Revenue growth turns negative
+- Cash runway falls below 12 months
+- Critical engineer resignation
+
+---
+
+## Expected Outcomes by Quarter
+
+### Q3 2024 (Jul-Sep)
+- 2 of 3 engineers hired and ramping
+- Monthly burn: $550K-570K
+- Revenue: $333K to $441K/month
+- Runway: 18-19 months
+- Feature velocity: +10%
+
+### Q4 2024 (Oct-Dec)
+- All 3 engineers fully productive
+- Monthly burn: $590K
+- Revenue: $507K to $671K/month
+- **Break-even achieved: November 2024**
+- Feature velocity: +25%
+
+### Q1 2025 (Jan-Mar)
+- Cash flow positive
+- Revenue: $771K to $1.02M/month ($12.2M ARR)
+- Series B close: $30M at $120M valuation
+- Runway: Infinite (self-sustaining)
---
-## Final Recommendation & Action Plan
+## Immediate Action Items
-### Key Supporting Factors:
-1. **Strong Revenue Growth**: 15% MoM growth provides confidence in ROI
-2. **Healthy Runway Buffer**: 17.7 months still provides adequate runway
-3. **Strategic Timing**: Q2 product launch timing aligns with hiring needs
-4. **Market Opportunity**: $5B growing market supports aggressive investment
+### This Week
+1. CEO review and approve recommendation
+2. CFO confirm Q2 revenue numbers and growth rate
+3. VPE draft Engineering Manager job description
+4. CEO begin Series B outreach to 5 target firms
-### Recommended Approach:
-1. **Hire all 3 engineers immediately** to maximize Q2 product impact
-2. **Accelerate Series B conversations** to Q4 2024 (vs. original timeline)
-3. **Set aggressive revenue targets**: 18% MoM growth to justify investment
-4. **Monitor closely**: Weekly burn rate tracking for first 3 months
+### Next Week
+1. Post senior engineer job descriptions
+2. Engage 2 recruiting firms
+3. Schedule all-hands to announce hiring plans
+4. Establish monthly financial review cadence
-### Success Metrics:
-- Achieve 18% MoM revenue growth by Q4 2024
-- Launch AI code review feature on schedule
-- Maintain runway above 15 months through year-end
-- Begin Series B fundraising by October 2024
+### Within 30 Days
+1. Hire Engineering Manager
+2. Complete stock refresh for critical engineers
+3. Close first senior engineer candidate
+4. Complete 3 Series B partner meetings
---
-## Next Steps
+## Key Takeaways for Leadership
+
+1. **Financially Viable:** With $10M cash and 15% MoM growth, break-even by November 2024
+2. **Timing is Critical:** Market opportunity exists now; waiting risks competitive position
+3. **Risk is Manageable:** Staggered approach with milestone gates contains downside
+4. **Revenue Growth is Key:** 15% MoM growth is the critical success factor
+5. **Management Matters:** Engineering Manager hire is essential before scaling
+6. **Series B is Non-Negotiable:** Begin fundraising NOW while runway is strong
+7. **Consider Alternative:** 2 senior + 2 junior offers similar cost with more headcount
-1. **Immediate**: Begin recruitment process for 3 senior backend engineers
-2. **Week 1**: Update board on hiring decision and runway impact
-3. **Month 1**: Establish weekly burn rate monitoring dashboard
-4. **Month 3**: Evaluate productivity impact and adjust revenue targets
-5. **Month 6**: Initiate Series B fundraising conversations
+---
-This hiring decision aligns with TechStart's growth trajectory and positions the company to capitalize on market opportunity while maintaining financial discipline.
\ No newline at end of file
+**Analysis Completed By:** Chief of Staff Financial Analysis Team
+**Data Sources:** hiring_costs.csv, burn_rate.csv, revenue_forecast.json
+**Next Review:** After July 2024 revenue close
diff --git a/claude_agent_sdk/chief_of_staff_agent/scripts/decision_matrix.py b/claude_agent_sdk/chief_of_staff_agent/scripts/decision_matrix.py
index 0a6eed6c..ed720129 100755
--- a/claude_agent_sdk/chief_of_staff_agent/scripts/decision_matrix.py
+++ b/claude_agent_sdk/chief_of_staff_agent/scripts/decision_matrix.py
@@ -6,15 +6,55 @@
import argparse
import json
+from typing import Any, TypedDict
-def create_decision_matrix(options: list[dict], criteria: list[dict]) -> dict:
+class OptionScore(TypedDict):
+ name: str
+ scores: dict[str, float]
+ weighted_scores: dict[str, float]
+ total: float
+ pros: list[str]
+ cons: list[str]
+ verdict: str
+
+
+class Analysis(TypedDict):
+ clear_winner: bool
+ margin: float
+ recommendation: str
+ key_differentiators: list[str]
+ risks: list[str]
+
+
+class DecisionMatrix(TypedDict):
+ options: list[OptionScore]
+ winner: str | None
+ analysis: Analysis
+
+
+def create_decision_matrix(
+ options: list[dict[str, Any]], criteria: list[dict[str, Any]]
+) -> DecisionMatrix:
"""Create a weighted decision matrix for strategic choices"""
- results = {"options": [], "winner": None, "analysis": {}}
+ # Initialize analysis separately with explicit type for proper type checking
+ initial_analysis: Analysis = {
+ "clear_winner": False,
+ "margin": 0.0,
+ "recommendation": "",
+ "key_differentiators": [],
+ "risks": [],
+ }
+
+ results: DecisionMatrix = {
+ "options": [],
+ "winner": None,
+ "analysis": initial_analysis,
+ }
for option in options:
- option_scores = {
+ option_scores: OptionScore = {
"name": option["name"],
"scores": {},
"weighted_scores": {},
@@ -71,10 +111,10 @@ def create_decision_matrix(options: list[dict], criteria: list[dict]) -> dict:
return results
-def generate_analysis(options: list[dict]) -> dict:
+def generate_analysis(options: list[OptionScore]) -> Analysis:
"""Generate strategic analysis of the decision"""
- analysis = {
+ analysis: Analysis = {
"clear_winner": False,
"margin": 0,
"recommendation": "",
@@ -115,7 +155,7 @@ def generate_analysis(options: list[dict]) -> dict:
return analysis
-def main():
+def main() -> None:
parser = argparse.ArgumentParser(description="Strategic decision matrix tool")
parser.add_argument("--scenario", type=str, help="Predefined scenario")
parser.add_argument("--input", type=str, help="JSON file with options and criteria")
diff --git a/claude_agent_sdk/chief_of_staff_agent/scripts/financial_forecast.py b/claude_agent_sdk/chief_of_staff_agent/scripts/financial_forecast.py
index 1d582464..083adaed 100755
--- a/claude_agent_sdk/chief_of_staff_agent/scripts/financial_forecast.py
+++ b/claude_agent_sdk/chief_of_staff_agent/scripts/financial_forecast.py
@@ -6,12 +6,20 @@
import argparse
import json
+from typing import Any
-def forecast_financials(current_arr, growth_rate, months, burn_rate):
+def forecast_financials(
+ current_arr: float, growth_rate: float, months: int, burn_rate: float
+) -> dict[str, Any]:
"""Generate financial forecast with multiple scenarios"""
- forecasts = {"base_case": [], "optimistic": [], "pessimistic": [], "metrics": {}}
+ forecasts: dict[str, Any] = {
+ "base_case": [],
+ "optimistic": [],
+ "pessimistic": [],
+ "metrics": {},
+ }
# Base case
arr = current_arr
@@ -54,15 +62,15 @@ def forecast_financials(current_arr, growth_rate, months, burn_rate):
return forecasts
-def calculate_profitability_date(forecast):
+def calculate_profitability_date(forecast: list[dict[str, Any]]) -> int:
"""Find when company becomes profitable"""
for entry in forecast:
if entry["net_burn"] <= 0:
- return entry["month"]
+ return int(entry["month"])
return -1 # Not profitable in forecast period
-def calculate_cash_needed(forecast):
+def calculate_cash_needed(forecast: list[dict[str, Any]]) -> int:
"""Calculate total cash needed until profitability"""
total_burn = 0
for entry in forecast:
@@ -73,7 +81,7 @@ def calculate_cash_needed(forecast):
return round(total_burn)
-def main():
+def main() -> None:
parser = argparse.ArgumentParser(description="Financial forecasting tool")
parser.add_argument("--arr", type=float, default=2400000, help="Current ARR")
parser.add_argument("--growth", type=float, default=0.15, help="Monthly growth rate")
diff --git a/claude_agent_sdk/chief_of_staff_agent/scripts/hiring_impact.py b/claude_agent_sdk/chief_of_staff_agent/scripts/hiring_impact.py
index 41d5a32e..e3f14b29 100755
--- a/claude_agent_sdk/chief_of_staff_agent/scripts/hiring_impact.py
+++ b/claude_agent_sdk/chief_of_staff_agent/scripts/hiring_impact.py
@@ -8,7 +8,9 @@
import sys
-def calculate_hiring_impact(num_engineers, salary_per_engineer=200000):
+def calculate_hiring_impact(
+ num_engineers: int, salary_per_engineer: int = 200000
+) -> dict[str, int | float | str]:
"""
Calculate the financial impact of hiring engineers.
@@ -64,7 +66,7 @@ def calculate_hiring_impact(num_engineers, salary_per_engineer=200000):
}
-def main():
+def main() -> None:
# Parse command line arguments
if len(sys.argv) < 2:
print("Usage: python hiring_impact.py [salary_per_engineer]")
diff --git a/claude_agent_sdk/chief_of_staff_agent/scripts/simple_calculation.py b/claude_agent_sdk/chief_of_staff_agent/scripts/simple_calculation.py
index ac553f03..df301186 100644
--- a/claude_agent_sdk/chief_of_staff_agent/scripts/simple_calculation.py
+++ b/claude_agent_sdk/chief_of_staff_agent/scripts/simple_calculation.py
@@ -8,7 +8,7 @@
import sys
-def calculate_metrics(total_runway, monthly_burn):
+def calculate_metrics(total_runway: float, monthly_burn: float) -> dict[str, float]:
"""Calculate key financial metrics."""
runway_months = total_runway / monthly_burn
quarterly_burn = monthly_burn * 3
diff --git a/claude_agent_sdk/chief_of_staff_agent/scripts/talent_scorer.py b/claude_agent_sdk/chief_of_staff_agent/scripts/talent_scorer.py
index 8d47b8e5..0b5cc667 100755
--- a/claude_agent_sdk/chief_of_staff_agent/scripts/talent_scorer.py
+++ b/claude_agent_sdk/chief_of_staff_agent/scripts/talent_scorer.py
@@ -108,7 +108,7 @@ def rank_candidates(candidates: list[dict]) -> list[dict]:
return sorted(scored, key=lambda x: x["total_score"], reverse=True)
-def main():
+def main() -> None:
parser = argparse.ArgumentParser(description="Candidate scoring tool")
parser.add_argument("--input", type=str, help="JSON file with candidate data")
parser.add_argument("--name", type=str, help="Candidate name")
diff --git a/claude_agent_sdk/observability_agent/agent.py b/claude_agent_sdk/observability_agent/agent.py
index 68e2f3f7..d62aaec2 100644
--- a/claude_agent_sdk/observability_agent/agent.py
+++ b/claude_agent_sdk/observability_agent/agent.py
@@ -1,6 +1,14 @@
"""
-Observability Agent - GitHub monitoring with MCP servers
-Built on top of the research agent pattern
+Observability Agent - GitHub monitoring with MCP servers.
+
+This agent demonstrates MCP (Model Context Protocol) integration for GitHub
+monitoring and CI/CD workflow analysis. It uses the official GitHub MCP server
+to interact with the GitHub API.
+
+Key design decisions:
+- Uses disallowed_tools to ensure MCP tools are used (not Bash with gh CLI)
+- Focused on read-only GitHub operations for observability
+- Supports multi-turn conversations for deep-dive analysis
"""
import asyncio
@@ -9,94 +17,117 @@
from typing import Any
from dotenv import load_dotenv
+from utils.agent_visualizer import (
+ display_agent_response,
+ print_activity,
+ reset_activity_context,
+)
-from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient
+from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient, McpServerConfig
load_dotenv()
+# Default model for the observability agent
+DEFAULT_MODEL = "claude-opus-4-5"
-def get_activity_text(msg) -> str | None:
- """Extract activity text from a message"""
- try:
- if "Assistant" in msg.__class__.__name__:
- if hasattr(msg, "content") and msg.content:
- first_content = msg.content[0] if isinstance(msg.content, list) else msg.content
- if hasattr(first_content, "name"):
- return f"๐ค Using: {first_content.name}()"
- return "๐ค Thinking..."
- elif "User" in msg.__class__.__name__:
- return "โ Tool completed"
- except (AttributeError, IndexError):
- pass
- return None
-
-
-def print_activity(msg) -> None:
- """Print activity to console"""
- activity = get_activity_text(msg)
- if activity:
- print(activity)
-
-
-# Pre-configured GitHub MCP server
-GITHUB_MCP_SERVER = {
- "github": {
- "command": "docker",
- "args": [
- "run",
- "-i",
- "--rm",
- "-e",
- "GITHUB_PERSONAL_ACCESS_TOKEN",
- "ghcr.io/github/github-mcp-server",
- ],
- "env": {"GITHUB_PERSONAL_ACCESS_TOKEN": os.environ.get("GITHUB_TOKEN")},
+# System prompt optimized for observability tasks
+DEFAULT_SYSTEM_PROMPT = """You are an observability agent specialized in monitoring \
+GitHub repositories and CI/CD workflows. Provide concise, actionable insights \
+suitable for on-call engineers. Focus on identifying issues, assessing severity, \
+and recommending next steps."""
+
+
+def get_github_mcp_server() -> dict[str, McpServerConfig]:
+ """
+ Get the GitHub MCP server configuration.
+
+ Returns:
+ MCP server config dict, or empty dict if GITHUB_TOKEN not set.
+ """
+ token = os.environ.get("GITHUB_TOKEN")
+ if not token:
+ return {}
+
+ return {
+ "github": {
+ "command": "docker",
+ "args": [
+ "run",
+ "-i",
+ "--rm",
+ "-e",
+ "GITHUB_PERSONAL_ACCESS_TOKEN",
+ "ghcr.io/github/github-mcp-server",
+ ],
+ "env": {"GITHUB_PERSONAL_ACCESS_TOKEN": token},
+ }
}
-}
async def send_query(
prompt: str,
activity_handler: Callable[[Any], None | Any] = print_activity,
continue_conversation: bool = False,
- mcp_servers: dict[str, Any] | None = None,
+ mcp_servers: dict[str, McpServerConfig] | None = None,
use_github: bool = True,
+ model: str = DEFAULT_MODEL,
+ restrict_to_mcp: bool = True,
+ display_result: bool = True,
) -> str | None:
"""
Send a query to the observability agent with MCP server support.
Args:
prompt: The query to send
- activity_handler: Callback for activity updates
+ activity_handler: Callback for activity updates (default: print_activity)
continue_conversation: Continue the previous conversation if True
- mcp_servers: Custom MCP servers configuration
+ mcp_servers: Custom MCP servers configuration (merged with GitHub if enabled)
use_github: Include GitHub MCP server (default: True)
+ model: Model to use (default: claude-opus-4-5)
+ restrict_to_mcp: If True, disallow Bash/Task to ensure MCP tools are used.
+ Set to False if you want the agent to have fallback options.
+ display_result: If True, display the response using display_agent_response()
+ after completion. Set to False for programmatic use.
Returns:
- The final result text or None if no result
+ The final result text or None if no result.
"""
+ # Only reset activity context for new conversations, not continuations
+ if not continue_conversation:
+ reset_activity_context()
+
# Build MCP servers config
- servers = {}
- if use_github and os.environ.get("GITHUB_TOKEN"):
- servers.update(GITHUB_MCP_SERVER)
+ servers: dict[str, McpServerConfig] = {}
+ if use_github:
+ servers.update(get_github_mcp_server())
if mcp_servers:
servers.update(mcp_servers)
+ # Build allowed tools list based on configured MCP servers
+ allowed_tools = [f"mcp__{name}" for name in servers]
+
+ # Configure disallowed tools to ensure MCP usage
+ # Without this, the agent could bypass MCP by using Bash with gh CLI
+ disallowed_tools = ["Bash", "Task", "WebSearch", "WebFetch"] if restrict_to_mcp else []
+
options = ClaudeAgentOptions(
- model="claude-sonnet-4-5",
- allowed_tools=["mcp__github", "WebSearch", "Read"],
+ model=model,
+ allowed_tools=allowed_tools,
+ disallowed_tools=disallowed_tools,
continue_conversation=continue_conversation,
- system_prompt="You are an observability agent specialized in monitoring GitHub repositories and CI/CD workflows",
- mcp_servers=servers if servers else None,
+ system_prompt=DEFAULT_SYSTEM_PROMPT,
+ mcp_servers=servers, # Empty dict is valid, no need for None
permission_mode="acceptEdits",
)
result = None
+ messages: list[Any] = []
try:
async with ClaudeSDKClient(options=options) as agent:
await agent.query(prompt=prompt)
async for msg in agent.receive_response():
+ messages.append(msg)
if asyncio.iscoroutinefunction(activity_handler):
await activity_handler(msg)
else:
@@ -108,4 +139,8 @@ async def send_query(
print(f"โ Query error: {e}")
raise
+ # Display the result using the shared visualization utility
+ if display_result and messages:
+ display_agent_response(messages)
+
return result
diff --git a/claude_agent_sdk/pyproject.toml b/claude_agent_sdk/pyproject.toml
index 35a21ebb..2cb84454 100644
--- a/claude_agent_sdk/pyproject.toml
+++ b/claude_agent_sdk/pyproject.toml
@@ -7,6 +7,8 @@ requires-python = ">=3.11"
dependencies = [
"claude-agent-sdk>=0.0.20",
"ipykernel>=6.29.5",
+ "markdown>=3.4",
"mcp-server-git>=2025.1.14",
+ "pandas>=2.2.0",
"python-dotenv>=1.1.1",
]
diff --git a/claude_agent_sdk/research_agent/agent.py b/claude_agent_sdk/research_agent/agent.py
index 87ea16bb..251a0d69 100644
--- a/claude_agent_sdk/research_agent/agent.py
+++ b/claude_agent_sdk/research_agent/agent.py
@@ -1,5 +1,14 @@
"""
-Research Agent - Using Claude SDK with built-in session management
+Research Agent - Using Claude SDK with built-in session management.
+
+This agent demonstrates web search and multimodal research capabilities
+using the Claude Agent SDK. It uses WebSearch for information gathering
+and Read for analyzing images and documents.
+
+Key design decisions:
+- Uses shared visualization utilities for consistent display
+- Includes citation requirements in system prompt for verifiable research
+- Supports multi-turn conversations for iterative research
"""
import asyncio
@@ -7,14 +16,41 @@
from typing import Any
from dotenv import load_dotenv
+from utils.agent_visualizer import (
+ display_agent_response,
+ print_activity,
+ reset_activity_context,
+)
from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient
load_dotenv()
+# Default model for the research agent
+DEFAULT_MODEL = "claude-opus-4-5"
+
+# System prompt with citation requirements for research quality
+RESEARCH_SYSTEM_PROMPT = """You are a research agent specialized in AI.
+
+When providing research findings:
+- Always include source URLs as citations
+- Format citations as markdown links: [Source Title](URL)
+- Group sources in a "Sources:" section at the end of your response"""
+
+
+def get_activity_text(msg: Any) -> str | None:
+ """
+ Extract activity text from a message for custom logging/monitoring.
+
+ This function is provided for users who want to implement custom
+ activity handlers (e.g., for logging, WebSocket streaming, etc.)
-def get_activity_text(msg) -> str | None:
- """Extract activity text from a message"""
+ Args:
+ msg: A message object from the agent response stream
+
+ Returns:
+ A formatted activity string, or None if not applicable
+ """
try:
if "Assistant" in msg.__class__.__name__:
# Check if content exists and has items
@@ -30,25 +66,23 @@ def get_activity_text(msg) -> str | None:
return None
-def print_activity(msg) -> None:
- """Print activity to console"""
- activity = get_activity_text(msg)
- if activity:
- print(activity)
-
-
async def send_query(
prompt: str,
activity_handler: Callable[[Any], None | Any] = print_activity,
continue_conversation: bool = False,
+ model: str = DEFAULT_MODEL,
+ display_result: bool = True,
) -> str | None:
"""
- Send a query using the Claude SDK with minimal overhead.
+ Send a query to the research agent with web search and multimodal support.
Args:
prompt: The query to send
- activity_handler: Callback for activity updates
+ activity_handler: Callback for activity updates (default: print_activity)
continue_conversation: Continue the previous conversation if True
+ model: Model to use (default: claude-sonnet-4-5)
+ display_result: If True, display the response using display_agent_response()
+ after completion. Set to False for programmatic use.
Note:
For the activity_handler - we support both sync and async handlers
@@ -60,19 +94,26 @@ async def send_query(
Returns:
The final result text or None if no result
"""
+ # Only reset activity context for new conversations, not continuations
+ if not continue_conversation:
+ reset_activity_context()
+
options = ClaudeAgentOptions(
- model="claude-sonnet-4-5",
+ model=model,
allowed_tools=["WebSearch", "Read"],
continue_conversation=continue_conversation,
- system_prompt="You are a research agent specialized in AI",
+ system_prompt=RESEARCH_SYSTEM_PROMPT,
+ max_buffer_size=10 * 1024 * 1024, # 10MB buffer for handling images and large responses
)
result = None
+ messages: list[Any] = []
try:
async with ClaudeSDKClient(options=options) as agent:
await agent.query(prompt=prompt)
async for msg in agent.receive_response():
+ messages.append(msg)
if asyncio.iscoroutinefunction(activity_handler):
await activity_handler(msg)
else:
@@ -84,4 +125,8 @@ async def send_query(
print(f"โ Query error: {e}")
raise
+ # Display the result using the shared visualization utility
+ if display_result and messages:
+ display_agent_response(messages)
+
return result
diff --git a/claude_agent_sdk/utils/agent_visualizer.py b/claude_agent_sdk/utils/agent_visualizer.py
index c6552b45..45ec21c9 100644
--- a/claude_agent_sdk/utils/agent_visualizer.py
+++ b/claude_agent_sdk/utils/agent_visualizer.py
@@ -1,17 +1,250 @@
-def print_activity(msg):
+"""
+Visualization utilities for Claude Agent SDK conversations.
+
+This module is the PUBLIC API for all display functions in notebooks:
+- Real-time activity tracking (print_activity)
+- Conversation timelines (visualize_conversation)
+- Final result display (print_final_result)
+- Styled HTML card display (display_agent_response)
+
+Example usage::
+
+ from utils.agent_visualizer import (
+ print_activity,
+ reset_activity_context,
+ visualize_conversation,
+ display_agent_response,
+ )
+
+ # Track activity during agent execution
+ reset_activity_context()
+ messages = []
+ async for msg in agent.receive_response():
+ print_activity(msg)
+ messages.append(msg)
+
+ # Display results (auto-detects Jupyter vs terminal)
+ visualize_conversation(messages)
+ display_agent_response(messages)
+"""
+
+from typing import Any
+
+from utils.html_renderer import display_agent_response, visualize_conversation_html
+
+__all__ = [
+ "display_agent_response",
+ "print_activity",
+ "print_final_result",
+ "reset_activity_context",
+ "visualize_conversation",
+]
+
+
+def _is_jupyter() -> bool:
+ """
+ Detect if running in a Jupyter notebook environment.
+
+ Returns True for Jupyter notebook/lab, False for terminal/scripts.
+ """
+ try:
+ from IPython import get_ipython
+
+ shell = get_ipython()
+ if shell is None:
+ return False
+ return bool(shell.__class__.__name__ == "ZMQInteractiveShell")
+ except ImportError:
+ return False
+ except Exception:
+ return False
+
+
+# Box-drawing configuration constants
+BOX_WIDTH = 58 # Width for main conversation boxes
+SUBAGENT_WIDTH = 54 # Width for subagent delegation blocks (slightly narrower for visual hierarchy)
+
+# Box-drawing characters for clean visual formatting
+BOX_TOP = "โญ" + "โ" * BOX_WIDTH + "โฎ"
+BOX_BOTTOM = "โฐ" + "โ" * BOX_WIDTH + "โฏ"
+BOX_DIVIDER = "โ" + "โ" * BOX_WIDTH + "โค"
+BOX_SIDE = "โ"
+SUBAGENT_TOP = "โ" + "โ" * SUBAGENT_WIDTH + "โ"
+SUBAGENT_BOTTOM = "โ" + "โ" * SUBAGENT_WIDTH + "โ"
+SUBAGENT_SIDE = "โ"
+
+
+def extract_model_from_messages(messages: list[Any]) -> str | None:
+ """
+ Extract the model identifier from a list of messages.
+
+ Looks for model information in SystemMessage or ResultMessage.
+
+ Args:
+ messages: List of conversation messages
+
+ Returns:
+ Model identifier string or None if not found
+ """
+ for msg in messages:
+ msg_type = msg.__class__.__name__
+
+ # Check SystemMessage for model info
+ if msg_type == "SystemMessage":
+ if hasattr(msg, "data") and isinstance(msg.data, dict):
+ if "model" in msg.data:
+ return str(msg.data["model"])
+
+ # Check ResultMessage for model info
+ if msg_type == "ResultMessage":
+ if hasattr(msg, "model"):
+ return str(msg.model)
+
+ return None
+
+
+# Track subagent state for activity display
+# WARNING: This global state is NOT thread-safe. If using this module in concurrent
+# scenarios (e.g., multiple asyncio tasks processing different conversations simultaneously),
+# each task should call reset_activity_context() before starting and be aware that
+# interleaved operations may produce incorrect subagent tracking. For thread-safe usage,
+# consider passing context explicitly or using contextvars.
+_subagent_context: dict[str, Any] = {
+ "active": False,
+ "name": None,
+ "depth": 0,
+}
+
+
+def print_activity(msg: Any) -> None:
+ """
+ Print activity with enhanced subagent visibility.
+
+ Shows:
+ - Main agent tool usage with ๐ค
+ - Subagent invocations with ๐ and subagent name
+ - Subagent tool usage with indented ๐
+
+ Example::
+
+ async for msg in agent.receive_response():
+ print_activity(msg) # Prints: ๐ค Using: WebSearch()
+ messages.append(msg)
+
+ Args:
+ msg: A message object from the Claude Agent SDK response stream
+ """
+ global _subagent_context
+
if "Assistant" in msg.__class__.__name__:
- print(
- f"๐ค {'Using: ' + msg.content[0].name + '()' if hasattr(msg.content[0], 'name') else 'Thinking...'}"
- )
+ # Check if content exists and has elements
+ if hasattr(msg, "content") and msg.content:
+ first_block = msg.content[0]
+ tool_name = first_block.name if hasattr(first_block, "name") else None
+
+ if tool_name == "Task":
+ # Extract subagent details from the Task tool input
+ if hasattr(first_block, "input") and first_block.input:
+ subagent_type = first_block.input.get("subagent_type", "unknown")
+ description = first_block.input.get("description", "")
+ _subagent_context["active"] = True
+ _subagent_context["name"] = subagent_type
+ _subagent_context["depth"] += 1
+
+ print(f"๐ Delegating to subagent: {subagent_type}")
+ if description:
+ print(f" โโ Task: {description}")
+ else:
+ print("๐ Delegating to subagent...")
+ elif tool_name:
+ # Check if we're inside a subagent context
+ if _subagent_context["active"]:
+ indent = " " * _subagent_context["depth"]
+ print(f"{indent}๐ [{_subagent_context['name']}] Using: {tool_name}()")
+ else:
+ print(f"๐ค Using: {tool_name}()")
+ else:
+ if _subagent_context["active"]:
+ indent = " " * _subagent_context["depth"]
+ print(f"{indent}๐ [{_subagent_context['name']}] Thinking...")
+ else:
+ print("๐ค Thinking...")
+ else:
+ if _subagent_context["active"]:
+ indent = " " * _subagent_context["depth"]
+ print(f"{indent}๐ [{_subagent_context['name']}] Thinking...")
+ else:
+ print("๐ค Thinking...")
+
elif "User" in msg.__class__.__name__:
- print("โ Tool completed")
+ # Check if this is a Task tool result (subagent completed)
+ if hasattr(msg, "content") and msg.content:
+ for result in msg.content if isinstance(msg.content, list) else [msg.content]:
+ if isinstance(result, dict) and result.get("type") == "tool_result":
+ # Try to detect if this was a Task result
+ content = result.get("content", "")
+ if isinstance(content, str) and (
+ "subagent" in content.lower() or _subagent_context["active"]
+ ):
+ if _subagent_context["active"]:
+ indent = " " * _subagent_context["depth"]
+ print(f"{indent}โ
Subagent [{_subagent_context['name']}] completed")
+ _subagent_context["depth"] = max(0, _subagent_context["depth"] - 1)
+ if _subagent_context["depth"] == 0:
+ _subagent_context["active"] = False
+ _subagent_context["name"] = None
+ else:
+ print("โ Task completed")
+ return
+
+ if _subagent_context["active"]:
+ indent = " " * _subagent_context["depth"]
+ print(f"{indent}โ Tool completed")
+ else:
+ print("โ Tool completed")
-def print_final_result(messages):
- """Print the final agent result and cost information"""
+def reset_activity_context() -> None:
+ """
+ Reset the subagent tracking context.
+
+ Call before starting a new query to ensure clean state for subagent tracking.
+
+ Example::
+
+ # Before each new query
+ reset_activity_context()
+ await agent.query("New research question")
+ async for msg in agent.receive_response():
+ print_activity(msg)
+ """
+ global _subagent_context
+ _subagent_context = {
+ "active": False,
+ "name": None,
+ "depth": 0,
+ }
+
+
+def print_final_result(messages: list[Any], model: str | None = None) -> None:
+ """
+ Print the final agent result and cost information.
+
+ Args:
+ messages: List of conversation messages
+ model: Optional model identifier for cost calculation.
+ If not provided, will attempt to extract from messages.
+ """
+ if not messages:
+ return
+
# Get the result message (last message)
result_msg = messages[-1]
+ # Try to extract model from messages if not provided
+ if model is None:
+ model = extract_model_from_messages(messages)
+
# Find the last assistant message with actual content
for msg in reversed(messages):
if msg.__class__.__name__ == "AssistantMessage" and msg.content:
@@ -22,85 +255,276 @@ def print_final_result(messages):
break
break
- # Print cost if available
- if hasattr(result_msg, "total_cost_usd"):
- print(f"\n๐ Cost: ${result_msg.total_cost_usd:.2f}")
+ # Print cost (use reported cost from SDK - it's authoritative)
+ # Note: total_cost_usd is model-aware and calculated by the API
+ reported_cost = getattr(result_msg, "total_cost_usd", None)
+ num_turns = getattr(result_msg, "num_turns", 1)
+
+ if reported_cost is not None:
+ print(f"\n๐ Cost: ${reported_cost:.2f}")
+ if num_turns and num_turns > 1:
+ avg_cost = reported_cost / num_turns
+ print(f" ({num_turns} turns, avg ${avg_cost:.4f}/turn)")
+
+ # Show model info
+ if model:
+ print(f" Model: {model}")
# Print duration if available
if hasattr(result_msg, "duration_ms"):
print(f"โฑ๏ธ Duration: {result_msg.duration_ms / 1000:.2f}s")
-def visualize_conversation(messages):
- """Create a visual representation of the entire agent conversation"""
- print("\n" + "=" * 60)
- print("๐ค AGENT CONVERSATION TIMELINE")
- print("=" * 60 + "\n")
+def _format_tool_info(tool_name: str, tool_input: dict) -> str:
+ """Format tool information with relevant parameters."""
+ info_parts = [tool_name]
+
+ if tool_input:
+ if tool_name == "WebSearch" and "query" in tool_input:
+ info_parts.append(f'โ "{tool_input["query"]}"')
+ elif tool_name == "Bash" and "command" in tool_input:
+ cmd = tool_input["command"]
+ info_parts.append(f"โ {cmd}")
+ elif tool_name == "Read" and "file_path" in tool_input:
+ path = tool_input["file_path"]
+ # Show just filename for readability
+ filename = path.split("/")[-1] if "/" in path else path
+ info_parts.append(f"โ {filename}")
+ elif tool_name == "Write" and "file_path" in tool_input:
+ path = tool_input["file_path"]
+ filename = path.split("/")[-1] if "/" in path else path
+ info_parts.append(f"โ {filename}")
+
+ return " ".join(info_parts)
- for _i, msg in enumerate(messages):
+
+def _format_subagent_completion_line(subagent_name: str | None) -> str:
+ """
+ Format a subagent completion line with safe handling of None and long names.
+
+ Args:
+ subagent_name: Name of the subagent (may be None)
+
+ Returns:
+ Formatted completion line string
+ """
+ name = (subagent_name or "unknown").upper()
+ # Calculate padding, ensuring it's never negative
+ padding = max(0, 30 - len(name))
+ return f" {SUBAGENT_SIDE} โ
SUBAGENT [{name}] COMPLETE" + " " * padding + SUBAGENT_SIDE
+
+
+def visualize_conversation(messages: list[Any]) -> None:
+ """
+ Create a clean, professional visualization of the agent conversation.
+
+ Auto-detects environment:
+ - Jupyter notebooks: Renders styled HTML timeline with color-coded message blocks
+ - Terminal/scripts: Falls back to box-drawing character visualization
+
+ Features (both modes):
+ - Grouped tool calls
+ - Clear subagent delegation sections
+ - Model-aware cost breakdown
+
+ Example::
+
+ messages = []
+ async for msg in agent.receive_response():
+ messages.append(msg)
+
+ # Renders HTML in Jupyter, box-drawing in terminal
+ visualize_conversation(messages)
+
+ Args:
+ messages: List of message objects from the agent response
+ """
+ # Auto-detect: use HTML in Jupyter, terminal fallback elsewhere
+ if _is_jupyter():
+ visualize_conversation_html(messages)
+ return
+
+ # Terminal fallback: box-drawing visualization
+ # Extract model info for cost calculations
+ model = extract_model_from_messages(messages)
+
+ # Header
+ print()
+ print(BOX_TOP)
+ print(f"{BOX_SIDE} ๐ค AGENT CONVERSATION TIMELINE" + " " * 25 + BOX_SIDE)
+ print(BOX_BOTTOM)
+ print()
+ print(f"๐ Model: {model or 'unknown'}")
+
+ # Track state
+ in_subagent = False
+ current_subagent: str | None = None
+ pending_tools: list[str] = [] # Collect consecutive tool calls
+
+ def flush_pending_tools(indent: str = "") -> None:
+ """Print accumulated tool calls in a compact format."""
+ nonlocal pending_tools
+ if pending_tools:
+ if len(pending_tools) == 1:
+ print(f"{indent} ๐ง {pending_tools[0]}")
+ else:
+ print(f"{indent} ๐ง Tools: {', '.join(pending_tools)}")
+ pending_tools = []
+
+ for msg in messages:
msg_type = msg.__class__.__name__
if msg_type == "SystemMessage":
- print("โ๏ธ System Initialized")
+ session_id = ""
if hasattr(msg, "data") and "session_id" in msg.data:
- print(f" Session: {msg.data['session_id'][:8]}...")
- print()
+ session_id = f" (Session: {msg.data['session_id'][:8]}...)"
+ print(f"โ๏ธ System Initialized{session_id}")
elif msg_type == "AssistantMessage":
- print("๐ค Assistant:")
- if msg.content:
- for block in msg.content:
- if hasattr(block, "text"):
- # Text response
- text = block.text[:500] + "..." if len(block.text) > 500 else block.text
- print(f" ๐ฌ {text}")
- elif hasattr(block, "name"):
- # Tool use
- tool_name = block.name
- print(f" ๐ง Using tool: {tool_name}")
-
- # Show key parameters for certain tools
- if hasattr(block, "input") and block.input:
- if tool_name == "WebSearch" and "query" in block.input:
- print(f' Query: "{block.input["query"]}"')
- elif tool_name == "TodoWrite" and "todos" in block.input:
- todos = block.input["todos"]
- in_progress = [t for t in todos if t["status"] == "in_progress"]
- completed = [t for t in todos if t["status"] == "completed"]
- print(
- f" ๐ {len(completed)} completed, {len(in_progress)} in progress"
- )
- print()
+ if not msg.content:
+ continue
+
+ for block in msg.content:
+ if hasattr(block, "text"):
+ # Flush any pending tools before text
+ flush_pending_tools(" " if in_subagent else "")
+
+ text = block.text
+
+ if in_subagent:
+ print(f"\n ๐ [{current_subagent}] Response:")
+ # Indent the text nicely
+ for line in text.split("\n"):
+ if line.strip():
+ print(f" {line.strip()}")
+ else:
+ print("\n๐ค Assistant:")
+ # Indent the text nicely
+ for line in text.split("\n"):
+ if line.strip():
+ print(f" {line.strip()}")
+
+ elif hasattr(block, "name"):
+ tool_name = block.name
+ tool_input = block.input if hasattr(block, "input") else {}
+
+ if tool_name == "Task":
+ # Flush pending tools
+ flush_pending_tools(" " if in_subagent else "")
+
+ # Subagent delegation - create clear visual block
+ subagent_type = (
+ tool_input.get("subagent_type", "unknown") if tool_input else "unknown"
+ )
+ description = tool_input.get("description", "") if tool_input else ""
+ prompt = tool_input.get("prompt", "") if tool_input else ""
+
+ print()
+ print(f" {SUBAGENT_TOP}")
+ print(
+ f" {SUBAGENT_SIDE} ๐ DELEGATING TO: {subagent_type.upper():<36} {SUBAGENT_SIDE}"
+ )
+ if description:
+ print(f" {SUBAGENT_SIDE} ๐ {description:<45} {SUBAGENT_SIDE}")
+ print(f" {SUBAGENT_BOTTOM}")
+
+ if prompt:
+ print(f" ๐ Prompt: {prompt}")
+
+ print()
+ in_subagent = True
+ current_subagent = subagent_type
+
+ else:
+ # Regular tool - accumulate for grouped display
+ tool_info = _format_tool_info(tool_name, tool_input)
+ pending_tools.append(tool_info)
elif msg_type == "UserMessage":
- if msg.content and isinstance(msg.content, list):
- for result in msg.content:
- if isinstance(result, dict) and result.get("type") == "tool_result":
- print("๐ค Tool Result Received")
- tool_id = result.get("tool_use_id", "unknown")[:8]
- print(f" ID: {tool_id}...")
-
- # Show result summary
- if "content" in result:
- content = result["content"]
- if isinstance(content, str):
- # Show more of the content
- summary = content[:500] + "..." if len(content) > 500 else content
- print(f" ๐ฅ {summary}")
- print()
+ if not msg.content or not isinstance(msg.content, list):
+ continue
+
+ for result in msg.content:
+ if not isinstance(result, dict) or result.get("type") != "tool_result":
+ continue
+
+ content = result.get("content", "")
+
+ # Detect subagent completion (Task tool result with substantial content)
+ is_subagent_result = in_subagent and isinstance(content, str) and len(content) > 200
+
+ if is_subagent_result:
+ # Flush any pending tools
+ flush_pending_tools(" ")
+
+ # Show subagent completion
+ print()
+ print(f" {SUBAGENT_TOP}")
+ print(_format_subagent_completion_line(current_subagent))
+ print(f" {SUBAGENT_BOTTOM}")
+
+ # Show result summary
+ if content:
+ lines = [line.strip() for line in content.split("\n") if line.strip()]
+ if lines:
+ print(" ๐ Result:")
+ for line in lines:
+ print(f" {line}")
+ print()
+
+ in_subagent = False
+ current_subagent = None
+ else:
+ # Regular tool result - just flush pending tools
+ # (tool results don't need individual display)
+ pass
+
+ # Flush tools after processing user message
+ flush_pending_tools(" " if in_subagent else "")
elif msg_type == "ResultMessage":
- print("โ
Conversation Complete")
- if hasattr(msg, "num_turns"):
- print(f" Turns: {msg.num_turns}")
- if hasattr(msg, "total_cost_usd"):
- print(f" Cost: ${msg.total_cost_usd:.2f}")
- if hasattr(msg, "duration_ms"):
- print(f" Duration: {msg.duration_ms / 1000:.2f}s")
- if hasattr(msg, "usage"):
- usage = msg.usage
- total_tokens = usage.get("input_tokens", 0) + usage.get("output_tokens", 0)
- print(f" Tokens: {total_tokens:,}")
+ # Flush any remaining pending tools
+ flush_pending_tools(" " if in_subagent else "")
+
+ # Close subagent if still open
+ if in_subagent:
+ print()
+ print(f" {SUBAGENT_TOP}")
+ print(_format_subagent_completion_line(current_subagent))
+ print(f" {SUBAGENT_BOTTOM}")
+ in_subagent = False
+
+ # Final stats
print()
+ print("โ" * 60)
+ stats_parts = []
+ num_turns = getattr(msg, "num_turns", 1)
+ if num_turns:
+ stats_parts.append(f"Turns: {num_turns}")
+
+ # Extract token usage (note: this is cumulative across all turns)
+ input_tokens = 0
+ output_tokens = 0
+ if hasattr(msg, "usage") and msg.usage:
+ input_tokens = msg.usage.get("input_tokens", 0)
+ output_tokens = msg.usage.get("output_tokens", 0)
+ total_tokens = input_tokens + output_tokens
+ stats_parts.append(f"Tokens: {total_tokens:,}")
+
+ # Show cost (use reported cost from SDK - it's authoritative)
+ reported_cost = getattr(msg, "total_cost_usd", None)
+ if reported_cost:
+ stats_parts.append(f"Cost: ${reported_cost:.2f}")
+
+ if hasattr(msg, "duration_ms"):
+ stats_parts.append(f"Duration: {msg.duration_ms / 1000:.1f}s")
+
+ print(f"โ
Complete โ {' โ '.join(stats_parts)}")
+
+ # Show model info
+ if model:
+ print(f"๐ Model: {model}")
+
+ print("โ" * 60)
- print("=" * 60 + "\n")
+ print()
diff --git a/claude_agent_sdk/utils/html_renderer.py b/claude_agent_sdk/utils/html_renderer.py
new file mode 100644
index 00000000..0af71a6d
--- /dev/null
+++ b/claude_agent_sdk/utils/html_renderer.py
@@ -0,0 +1,621 @@
+"""
+HTML rendering utilities for Jupyter notebook display.
+
+This module provides styled HTML card rendering for various content types,
+designed for use in Jupyter notebooks with the Claude Agent SDK.
+
+Content types supported:
+- Images (file paths converted to base64)
+- Pandas DataFrames and Series
+- Agent message lists (extracts final assistant response)
+- Generic Python objects (dicts, lists, strings)
+"""
+
+import base64
+import html
+import pprint
+from typing import Any
+
+# Optional dependencies with graceful fallback
+HTML: Any = None
+display: Any = None
+pd: Any = None
+markdown: Any = None
+
+try:
+ import pandas as pd
+except ImportError:
+ pass
+
+try:
+ from IPython.display import HTML, display
+except ImportError:
+
+ def display(obj: Any) -> None:
+ """Fallback display for non-Jupyter environments."""
+ print(obj.data if hasattr(obj, "data") else obj)
+
+ class _HTML:
+ """Fallback HTML wrapper for non-Jupyter environments."""
+
+ def __init__(self, data: Any):
+ self.data = data
+
+ HTML = _HTML
+
+try:
+ import markdown as _markdown
+
+ markdown = _markdown
+except ImportError:
+ pass
+
+
+# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
+# CSS Constants
+# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
+
+CARD_CSS = """
+
+"""
+
+
+# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
+# Content Type Detection
+# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
+
+
+def _is_message_list(content: Any) -> bool:
+ """
+ Check if content is a list of SDK message objects.
+
+ Uses duck-typing to check for message-like objects rather than
+ relying on fragile class name string matching.
+ """
+ if not isinstance(content, list) or not content:
+ return False
+
+ # Check if any item looks like a message (has content attribute and message-like class)
+ for item in content[-3:]: # Check last few items for efficiency
+ class_name = getattr(item, "__class__", type(None)).__name__
+ if "Message" in class_name and hasattr(item, "content"):
+ return True
+ return False
+
+
+def _is_dataframe(content: Any) -> bool:
+ """Check if content is a pandas DataFrame."""
+ return pd is not None and isinstance(content, pd.DataFrame)
+
+
+def _is_series(content: Any) -> bool:
+ """Check if content is a pandas Series."""
+ return pd is not None and isinstance(content, pd.Series)
+
+
+# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
+# Content Renderers
+# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
+
+
+def _image_to_base64(image_path: str) -> str:
+ """Convert an image file to base64 encoded string."""
+ with open(image_path, "rb") as img_file:
+ return base64.b64encode(img_file.read()).decode("utf-8")
+
+
+def _render_image(image_path: str) -> str:
+ """
+ Render an image path as an HTML img tag with base64 encoding.
+
+ Args:
+ image_path: Path to the image file
+
+ Returns:
+ HTML string with embedded base64 image
+ """
+ b64 = _image_to_base64(image_path)
+ return (
+ f'
'
+ )
+
+
+def _render_dataframe(df: Any) -> str:
+ """
+ Render a pandas DataFrame as an HTML table.
+
+ Args:
+ df: pandas DataFrame
+
+ Returns:
+ HTML table string
+ """
+ result: str = df.to_html(classes="pretty-table", index=False, border=0, escape=True)
+ return result
+
+
+def _render_series(series: Any) -> str:
+ """
+ Render a pandas Series as an HTML table.
+
+ Args:
+ series: pandas Series
+
+ Returns:
+ HTML table string
+ """
+ result: str = series.to_frame().to_html(classes="pretty-table", border=0, escape=True)
+ return result
+
+
+def _render_message_list(messages: list[Any]) -> str:
+ """
+ Extract and render the final assistant text from a message list.
+
+ Searches backwards through messages to find the last AssistantMessage
+ with text content, then renders it (with markdown if available).
+
+ Args:
+ messages: List of SDK message objects
+
+ Returns:
+ Rendered HTML string
+ """
+ final_text = None
+
+ for msg in reversed(messages):
+ class_name = msg.__class__.__name__
+ if "Assistant" in class_name and hasattr(msg, "content") and msg.content:
+ for block in msg.content:
+ if hasattr(block, "text"):
+ final_text = block.text
+ break
+ if final_text:
+ break
+
+ if final_text:
+ return _render_markdown_text(final_text)
+
+ # Fallback: format the entire list
+ return _render_code_block(pprint.pformat(messages))
+
+
+def _render_markdown_text(text: str) -> str:
+ """
+ Render text as markdown HTML if the markdown library is available.
+
+ Enables extensions for tables, fenced code blocks, and other common markdown features.
+
+ Args:
+ text: Plain text, potentially containing markdown
+
+ Returns:
+ HTML string
+ """
+ if markdown is not None:
+ result: str = markdown.markdown(
+ text,
+ extensions=["tables", "fenced_code", "nl2br", "sane_lists"],
+ )
+ return result
+ # Fallback: preserve whitespace and escape HTML
+ return f"{html.escape(text)}"
+
+
+def _render_code_block(content: str) -> str:
+ """
+ Render content as a code block.
+
+ Args:
+ content: String to display in code block
+
+ Returns:
+ HTML pre/code block string
+ """
+ return f"{html.escape(content)}
"
+
+
+def _render_generic(content: Any) -> str:
+ """
+ Render generic content (dicts, lists, strings, other objects).
+
+ Args:
+ content: Any Python object
+
+ Returns:
+ HTML string representation
+ """
+ if isinstance(content, (list, dict)):
+ return _render_code_block(pprint.pformat(content))
+ elif isinstance(content, str):
+ return _render_code_block(content)
+ else:
+ return _render_code_block(str(content))
+
+
+# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
+# Public API
+# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
+
+
+def render_content(content: Any, is_image: bool = False) -> str:
+ """
+ Detect content type and render to HTML.
+
+ This is the main content routing function that dispatches to
+ the appropriate renderer based on content type.
+
+ Args:
+ content: Content to render (image path, DataFrame, messages, etc.)
+ is_image: If True, treat string content as an image path
+
+ Returns:
+ Rendered HTML string
+ """
+ # Image rendering (explicit flag)
+ if is_image and isinstance(content, str):
+ return _render_image(content)
+
+ # Pandas DataFrame
+ if _is_dataframe(content):
+ return _render_dataframe(content)
+
+ # Pandas Series
+ if _is_series(content):
+ return _render_series(content)
+
+ # SDK message list
+ if _is_message_list(content):
+ return _render_message_list(content)
+
+ # Generic fallback
+ return _render_generic(content)
+
+
+def display_card(content: str, title: str | None = None) -> None:
+ """
+ Display rendered HTML content inside a styled card.
+
+ Args:
+ content: Pre-rendered HTML content
+ title: Optional title for the card
+ """
+ title_html = f'{html.escape(title)}
' if title else ""
+ card_html = f'{title_html}{content}
'
+ display(HTML(CARD_CSS + card_html))
+
+
+def display_agent_response(messages: list[Any], title: str = "Agent Response") -> None:
+ """
+ Display the final assistant response from a conversation in a styled card.
+
+ Extracts the last text response from an agent's message history and
+ renders it with markdown formatting in a visually appealing card.
+
+ Args:
+ messages: List of SDK message objects from agent conversation
+ title: Card title (default: "Agent Response")
+
+ Example:
+ >>> async for msg in query(prompt="Research AI trends", ...):
+ ... messages.append(msg)
+ >>> display_agent_response(messages)
+ """
+ if not _is_message_list(messages):
+ raise TypeError(
+ "Expected a list of SDK message objects. "
+ "Use display_card() or render_content() for other content types."
+ )
+ rendered = _render_message_list(messages)
+ display_card(rendered, title)
+
+
+# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
+# Conversation Timeline (HTML)
+# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
+
+TIMELINE_CSS = """
+
+"""
+
+
+def _extract_model_from_messages(messages: list[Any]) -> str | None:
+ """Extract model identifier from messages."""
+ for msg in messages:
+ msg_type = msg.__class__.__name__
+ if msg_type == "SystemMessage":
+ if hasattr(msg, "data") and isinstance(msg.data, dict):
+ if "model" in msg.data:
+ return str(msg.data["model"])
+ if msg_type == "ResultMessage":
+ if hasattr(msg, "model"):
+ return str(msg.model)
+ return None
+
+
+def _format_tool_badge(tool_name: str, tool_input: dict | None = None) -> str:
+ """Format a tool call as an HTML badge."""
+ info = tool_name
+ if tool_input:
+ if tool_name == "WebSearch" and "query" in tool_input:
+ info = f'{tool_name}: "{tool_input["query"][:30]}..."'
+ elif tool_name == "Read" and "file_path" in tool_input:
+ filename = tool_input["file_path"].split("/")[-1]
+ info = f"{tool_name}: {filename}"
+ return f'{html.escape(info)}'
+
+
+def visualize_conversation_html(messages: list[Any]) -> None:
+ """
+ Render the full conversation as a styled HTML timeline.
+
+ Displays system init, tool calls, assistant responses, and subagent delegations
+ in a visually appealing format for Jupyter notebooks.
+
+ Args:
+ messages: List of SDK message objects
+ """
+ if not messages:
+ return
+
+ model = _extract_model_from_messages(messages)
+ blocks: list[str] = []
+ pending_tools: list[str] = []
+
+ def flush_tools() -> None:
+ nonlocal pending_tools
+ if pending_tools:
+ tools_html = "".join(pending_tools)
+ blocks.append(
+ f'"
+ )
+ pending_tools = []
+
+ for msg in messages:
+ msg_type = msg.__class__.__name__
+
+ if msg_type == "SystemMessage":
+ session_id = ""
+ if hasattr(msg, "data") and isinstance(msg.data, dict):
+ if "session_id" in msg.data:
+ session_id = f" ({msg.data['session_id'][:8]}...)"
+ blocks.append(
+ f''
+ f'
โ๏ธ System
'
+ f'
Initialized{session_id}
'
+ f"
"
+ )
+
+ elif msg_type == "AssistantMessage":
+ if not msg.content:
+ continue
+
+ for block in msg.content:
+ if hasattr(block, "text"):
+ flush_tools()
+ text_html = _render_markdown_text(block.text)
+ blocks.append(
+ f''
+ f'
๐ค Assistant
'
+ f'
{text_html}
'
+ f"
"
+ )
+ elif hasattr(block, "name"):
+ tool_name = block.name
+ tool_input = block.input if hasattr(block, "input") else {}
+
+ if tool_name == "Task":
+ flush_tools()
+ subagent_type = (
+ tool_input.get("subagent_type", "unknown") if tool_input else "unknown"
+ )
+ description = tool_input.get("description", "") if tool_input else ""
+ blocks.append(
+ f''
+ f'
๐ Subagent: {html.escape(subagent_type)}
'
+ f'
{html.escape(description)}
'
+ f"
"
+ )
+ else:
+ pending_tools.append(_format_tool_badge(tool_name, tool_input))
+
+ elif msg_type == "ResultMessage":
+ flush_tools()
+
+ # Build stats
+ stats_html = ""
+ stats = []
+ if hasattr(msg, "num_turns") and msg.num_turns:
+ stats.append(
+ f'Turns: {msg.num_turns}'
+ )
+ if hasattr(msg, "usage") and msg.usage:
+ total = msg.usage.get("input_tokens", 0) + msg.usage.get("output_tokens", 0)
+ stats.append(
+ f'Tokens: {total:,}'
+ )
+ if hasattr(msg, "total_cost_usd") and msg.total_cost_usd:
+ stats.append(
+ f'Cost: ${msg.total_cost_usd:.2f}'
+ )
+ if hasattr(msg, "duration_ms") and msg.duration_ms:
+ stats.append(
+ f'Duration: {msg.duration_ms / 1000:.1f}s'
+ )
+
+ if stats:
+ stats_html = f'{" ".join(stats)}
'
+
+ blocks.append(
+ f''
+ f'
โ
Complete
'
+ f"{stats_html}"
+ f"
"
+ )
+
+ # Assemble timeline
+ model_text = f" โข {model}" if model else ""
+ timeline_html = f"""
+ {TIMELINE_CSS}
+
+
+
+ {"".join(blocks)}
+
+
+ """
+ display(HTML(timeline_html))