Skip to content

Commit 7db7152

Browse files
sarth6claudeDouweM
authored
Rehaul Tavily search tool (#4158)
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com> Co-authored-by: Douwe Maan <douwe@pydantic.dev>
1 parent d33d1c3 commit 7db7152

File tree

10 files changed

+639
-10
lines changed

10 files changed

+639
-10
lines changed

docs/common-tools.md

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,39 @@ Feel free to click on the links to dive deeper into each story!
139139
"""
140140
```
141141

142+
### Configuring Parameters
143+
144+
The `tavily_search_tool` factory accepts optional parameters that control search behavior. `max_results` is always developer-controlled and never appears in the LLM tool schema. Other parameters, when provided, are fixed for all searches and hidden from the LLM's tool schema. Parameters left unset remain available for the LLM to set per-call.
145+
146+
For example, you can lock in `max_results` and `include_domains` at tool creation time while still letting the LLM control `exclude_domains`:
147+
148+
```py {title="tavily_domain_filtering.py"}
149+
import os
150+
151+
from pydantic_ai import Agent
152+
from pydantic_ai.common_tools.tavily import tavily_search_tool
153+
154+
api_key = os.getenv('TAVILY_API_KEY')
155+
assert api_key is not None
156+
157+
agent = Agent(
158+
'openai:gpt-5.2',
159+
tools=[tavily_search_tool(api_key, max_results=5, include_domains=['arxiv.org'])],
160+
instructions='Search for information and return the results.',
161+
)
162+
163+
result = agent.run_sync(
164+
'Find recent papers about transformer architectures'
165+
)
166+
print(result.output)
167+
"""
168+
Here are some recent papers about transformer architectures from arxiv.org:
169+
170+
1. "Attention Is All You Need" - The foundational paper on the Transformer model.
171+
2. "FlashAttention: Fast and Memory-Efficient Exact Attention" - Proposes an IO-aware attention algorithm.
172+
"""
173+
```
174+
142175
## Exa Search Tool
143176

144177
!!! info

pydantic_ai_slim/pydantic_ai/common_tools/tavily.py

Lines changed: 110 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1-
from dataclasses import dataclass
2-
from typing import Literal
1+
from dataclasses import KW_ONLY, dataclass
2+
from functools import partial
3+
from inspect import signature
4+
from typing import Literal, overload
35

46
from pydantic import TypeAdapter
57
from typing_extensions import Any, TypedDict
@@ -16,6 +18,9 @@
1618

1719
__all__ = ('tavily_search_tool',)
1820

21+
_UNSET: Any = object()
22+
"""Sentinel to distinguish "not provided" from None in factory kwargs."""
23+
1924

2025
class TavilySearchResult(TypedDict):
2126
"""A Tavily search result.
@@ -44,38 +49,133 @@ class TavilySearchTool:
4449
client: AsyncTavilyClient
4550
"""The Tavily search client."""
4651

52+
_: KW_ONLY
53+
54+
max_results: int | None = None
55+
"""The maximum number of results. If None, the Tavily default is used."""
56+
4757
async def __call__(
4858
self,
4959
query: str,
50-
search_deep: Literal['basic', 'advanced'] = 'basic',
51-
topic: Literal['general', 'news'] = 'general',
52-
time_range: Literal['day', 'week', 'month', 'year', 'd', 'w', 'm', 'y'] | None = None,
60+
search_depth: Literal['basic', 'advanced', 'fast', 'ultra-fast'] = 'basic',
61+
topic: Literal['general', 'news', 'finance'] = 'general',
62+
time_range: Literal['day', 'week', 'month', 'year'] | None = None,
63+
include_domains: list[str] | None = None,
64+
exclude_domains: list[str] | None = None,
5365
) -> list[TavilySearchResult]:
5466
"""Searches Tavily for the given query and returns the results.
5567
5668
Args:
5769
query: The search query to execute with Tavily.
58-
search_deep: The depth of the search.
70+
search_depth: The depth of the search.
5971
topic: The category of the search.
6072
time_range: The time range back from the current date to filter results.
73+
include_domains: List of domains to specifically include in the search results.
74+
exclude_domains: List of domains to specifically exclude from the search results.
6175
6276
Returns:
6377
A list of search results from Tavily.
6478
"""
65-
results = await self.client.search(query, search_depth=search_deep, topic=topic, time_range=time_range) # type: ignore[reportUnknownMemberType]
79+
results: dict[str, Any] = await self.client.search( # pyright: ignore[reportUnknownMemberType,reportUnknownVariableType]
80+
query,
81+
search_depth=search_depth,
82+
topic=topic,
83+
time_range=time_range, # pyright: ignore[reportArgumentType]
84+
max_results=self.max_results, # pyright: ignore[reportArgumentType]
85+
include_domains=include_domains, # pyright: ignore[reportArgumentType]
86+
exclude_domains=exclude_domains, # pyright: ignore[reportArgumentType]
87+
)
6688
return tavily_search_ta.validate_python(results['results'])
6789

6890

69-
def tavily_search_tool(api_key: str):
91+
@overload
92+
def tavily_search_tool(
93+
api_key: str,
94+
*,
95+
max_results: int | None = None,
96+
search_depth: Literal['basic', 'advanced', 'fast', 'ultra-fast'] = _UNSET,
97+
topic: Literal['general', 'news', 'finance'] = _UNSET,
98+
time_range: Literal['day', 'week', 'month', 'year'] | None = _UNSET,
99+
include_domains: list[str] | None = _UNSET,
100+
exclude_domains: list[str] | None = _UNSET,
101+
) -> Tool[Any]: ...
102+
103+
104+
@overload
105+
def tavily_search_tool(
106+
*,
107+
client: AsyncTavilyClient,
108+
max_results: int | None = None,
109+
search_depth: Literal['basic', 'advanced', 'fast', 'ultra-fast'] = _UNSET,
110+
topic: Literal['general', 'news', 'finance'] = _UNSET,
111+
time_range: Literal['day', 'week', 'month', 'year'] | None = _UNSET,
112+
include_domains: list[str] | None = _UNSET,
113+
exclude_domains: list[str] | None = _UNSET,
114+
) -> Tool[Any]: ...
115+
116+
117+
def tavily_search_tool(
118+
api_key: str | None = None,
119+
*,
120+
client: AsyncTavilyClient | None = None,
121+
max_results: int | None = None,
122+
search_depth: Literal['basic', 'advanced', 'fast', 'ultra-fast'] = _UNSET,
123+
topic: Literal['general', 'news', 'finance'] = _UNSET,
124+
time_range: Literal['day', 'week', 'month', 'year'] | None = _UNSET,
125+
include_domains: list[str] | None = _UNSET,
126+
exclude_domains: list[str] | None = _UNSET,
127+
) -> Tool[Any]:
70128
"""Creates a Tavily search tool.
71129
130+
`max_results` is always developer-controlled and does not appear in the LLM tool schema.
131+
Other parameters, when provided, are fixed for all searches and hidden from the LLM's
132+
tool schema. Parameters left unset remain available for the LLM to set per-call.
133+
72134
Args:
73-
api_key: The Tavily API key.
135+
api_key: The Tavily API key. Required if `client` is not provided.
74136
75137
You can get one by signing up at [https://app.tavily.com/home](https://app.tavily.com/home).
138+
client: An existing AsyncTavilyClient. If provided, `api_key` is ignored.
139+
This is useful for sharing a client across multiple tool instances.
140+
max_results: The maximum number of results. If None, the Tavily default is used.
141+
search_depth: The depth of the search.
142+
topic: The category of the search.
143+
time_range: The time range back from the current date to filter results.
144+
include_domains: List of domains to specifically include in the search results.
145+
exclude_domains: List of domains to specifically exclude from the search results.
76146
"""
147+
if client is None:
148+
if api_key is None:
149+
raise ValueError('Either api_key or client must be provided')
150+
client = AsyncTavilyClient(api_key)
151+
func = TavilySearchTool(client=client, max_results=max_results).__call__
152+
153+
kwargs: dict[str, Any] = {}
154+
if search_depth is not _UNSET:
155+
kwargs['search_depth'] = search_depth
156+
if topic is not _UNSET:
157+
kwargs['topic'] = topic
158+
if time_range is not _UNSET:
159+
kwargs['time_range'] = time_range
160+
if include_domains is not _UNSET:
161+
kwargs['include_domains'] = include_domains
162+
if exclude_domains is not _UNSET:
163+
kwargs['exclude_domains'] = exclude_domains
164+
165+
if kwargs:
166+
original = func
167+
func = partial(func, **kwargs)
168+
func.__name__ = original.__name__ # type: ignore[union-attr]
169+
func.__qualname__ = original.__qualname__
170+
# partial with keyword args only updates defaults, not removes params.
171+
# Set __signature__ explicitly to exclude bound params from the tool schema.
172+
orig_sig = signature(original)
173+
func.__signature__ = orig_sig.replace( # type: ignore[attr-defined]
174+
parameters=[p for name, p in orig_sig.parameters.items() if name not in kwargs]
175+
)
176+
77177
return Tool[Any](
78-
TavilySearchTool(client=AsyncTavilyClient(api_key)).__call__,
178+
func, # pyright: ignore[reportArgumentType]
79179
name='tavily_search',
80180
description='Searches Tavily for the given query and returns the results.',
81181
)

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ dev = [
101101
"dirty-equals>=0.9.0",
102102
"duckduckgo-search>=7.0.0",
103103
"exa-py>=2.0.0",
104+
"tavily-python>=0.5.0",
104105
"inline-snapshot>=0.19.3",
105106
"pytest>=9.0.0",
106107
"pytest-examples>=0.0.18",
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
interactions:
2+
- request:
3+
headers:
4+
accept:
5+
- '*/*'
6+
accept-encoding:
7+
- gzip, deflate, br, zstd
8+
connection:
9+
- keep-alive
10+
content-length:
11+
- '78'
12+
content-type:
13+
- application/json
14+
host:
15+
- api.tavily.com
16+
method: POST
17+
parsed_body:
18+
query: What is Pydantic AI?
19+
search_depth: basic
20+
topic: general
21+
uri: https://api.tavily.com/search
22+
response:
23+
headers:
24+
connection:
25+
- keep-alive
26+
content-length:
27+
- '3705'
28+
content-security-policy:
29+
- default-src 'none'; script-src 'self'; connect-src 'self'; img-src 'self'; style-src 'self';base-uri 'self';form-action
30+
'self'; require-trusted-types-for 'script'; upgrade-insecure-requests;
31+
content-type:
32+
- application/json
33+
parsed_body:
34+
answer: null
35+
follow_up_questions: null
36+
images: []
37+
query: What is Pydantic AI?
38+
request_id: 3b2f7385-b01d-479e-9825-1ce0f8c59b93
39+
response_time: 0.89
40+
results:
41+
- content: '## AI Agent Insider. # Pydantic AI: Agent Framework. Introducing **Pydantic AI**-- a groundbreaking Python
42+
framework specifically designed to simplify the creation of production-grade AI agents. **Pydantic AI** is a Python
43+
framework that acts as a bridge between developers and LLMs, providing tools to create **agents** -- entities that
44+
execute specific tasks based on system prompts, functions, and structured outputs. Here''s a basic example of using
45+
Pydantic AI to create an agent that responds to user queries:. from pydantic_ai import Agentagent = Agent("openai:gpt-4",
46+
system_prompt="Be a helpful assistant.")result = await agent.run("Hello, how are you?")print(result.data) # Outputs
47+
the response. from pydantic_ai import ModelRetry@agent.tooldef validate_data(ctx): if not ctx.input_data: raise
48+
ModelRetry("Data missing, retrying..."). Pydantic AI is transforming how developers build AI agents. Install Pydantic
49+
AI today and build your first agent!**. ### Agent Framework / shim to use Pydantic with LLMs. Contribute to pydantic/pydantic-ai
50+
development by creating an account.... ## GitHub - pydantic/pydantic-ai: Agent Framework / shim to use Pydantic
51+
with LLMs. ## Published in AI Agent Insider.'
52+
raw_content: null
53+
score: 0.9999875
54+
title: 'Pydantic AI: Agent Framework'
55+
url: https://medium.com/ai-agent-insider/pydantic-ai-agent-framework-02b138e8db71
56+
- content: Pydantic AI is a Python agent framework designed to help you quickly, confidently, and painlessly build production
57+
grade applications and workflows with
58+
raw_content: null
59+
score: 0.99997807
60+
title: Pydantic AI - Pydantic AI
61+
url: https://ai.pydantic.dev/
62+
- content: Pydantic AI lets you integrate large language models like GPT-5 **directly into your Python applications**.
63+
raw_content: null
64+
score: 0.9999398
65+
title: Build Production-Ready AI Agents in Python with Pydantic AI
66+
url: https://www.youtube.com/watch?v=-WB0T0XmDrY
67+
- content: '*"Pydantic AI is a Python agent framework designed to help you quickly, confidently, and painlessly build
68+
production grade applications and workflows with Generative AI."*. So they built Pydantic AI with a single, simple
69+
aim, to bring that FastAPI feeling to building applications and workflows with generative AI. A: Pydantic AI is
70+
a Python agent framework from the creators of Pydantic Validation. Q: How is Pydantic AI different from other agent
71+
frameworks like LangChain or LlamaIndex? Q: What LLM providers does Pydantic AI support? Q: Can I use my own custom
72+
models with Pydantic AI? Q: Can I use Pydantic AI with existing FastAPI applications? A: Yes, Pydantic AI is designed
73+
to integrate well with FastAPI and other Python web frameworks. A: Pydantic AI uses Pydantic models to define structured
74+
output types, ensuring LLM responses are validated and type-safe. A: Yes, Pydantic AI is designed specifically for
75+
production-grade applications with features like durable execution, observability integration, and type safety.'
76+
raw_content: null
77+
score: 0.9999125
78+
title: What is Pydantic AI?. Build production-ready AI agents with...
79+
url: https://medium.com/@tahirbalarabe2/what-is-pydantic-ai-15cc81dea3c3
80+
- content: I've been using Pydantic AI to build some basic agents and multi agents and it seems quite straight forward
81+
and I'm quite pleased with it.
82+
raw_content: null
83+
score: 0.9999001
84+
title: 'Pydantic AI : r/LLMDevs'
85+
url: https://www.reddit.com/r/LLMDevs/comments/1iih8az/pydantic_ai/
86+
status:
87+
code: 200
88+
message: OK
89+
version: 1
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
interactions:
2+
- request:
3+
headers:
4+
accept:
5+
- '*/*'
6+
accept-encoding:
7+
- gzip, deflate, br, zstd
8+
connection:
9+
- keep-alive
10+
content-length:
11+
- '130'
12+
content-type:
13+
- application/json
14+
host:
15+
- api.tavily.com
16+
method: POST
17+
parsed_body:
18+
include_domains:
19+
- arxiv.org
20+
max_results: 2
21+
query: attention mechanisms
22+
search_depth: basic
23+
topic: general
24+
uri: https://api.tavily.com/search
25+
response:
26+
headers:
27+
connection:
28+
- keep-alive
29+
content-length:
30+
- '1668'
31+
content-security-policy:
32+
- default-src 'none'; script-src 'self'; connect-src 'self'; img-src 'self'; style-src 'self';base-uri 'self';form-action
33+
'self'; require-trusted-types-for 'script'; upgrade-insecure-requests;
34+
content-type:
35+
- application/json
36+
parsed_body:
37+
answer: null
38+
follow_up_questions: null
39+
images: []
40+
query: attention mechanisms
41+
request_id: 139680ef-e03d-48c8-a3bc-950081750288
42+
response_time: 1.31
43+
results:
44+
- content: by H Hays · 2026 · Cited by 1 -- Attention mechanisms represent a fundamental paradigm shift in neural network
45+
architectures, enabling models to selectively focus on relevant
46+
raw_content: null
47+
score: 0.81770587
48+
title: '[2601.03329] Attention mechanisms in neural networks'
49+
url: https://arxiv.org/abs/2601.03329
50+
- content: '# Title:A General Survey on Attention Mechanisms in Deep Learning. View a PDF of the paper titled A General
51+
Survey on Attention Mechanisms in Deep Learning, by Gianni Brauwers and Flavius Frasincar. > Abstract:Attention
52+
is an important mechanism that can be employed for a variety of deep learning models across many different domains
53+
and tasks. The various attention mechanisms are explained by means of a framework consisting of a general attention
54+
model, uniform notation, and a comprehensive taxonomy of attention mechanisms. | Subjects: | Machine Learning (cs.LG)
55+
|. | Cite as: | arXiv:2203.14263 [cs.LG] |. | | (or arXiv:2203.14263v1 [cs.LG] for this version) |. View a PDF
56+
of the paper titled A General Survey on Attention Mechanisms in Deep Learning, by Gianni Brauwers and Flavius Frasincar.
57+
### References & Citations. # Bibliographic and Citation Tools. # Recommenders and Search Tools. Have an idea for
58+
a project that will add value for arXiv''s community?'
59+
raw_content: null
60+
score: 0.8138313
61+
title: A General Survey on Attention Mechanisms in Deep ...
62+
url: https://arxiv.org/abs/2203.14263
63+
status:
64+
code: 200
65+
message: OK
66+
version: 1

0 commit comments

Comments
 (0)