Skip to content

Commit 0b7123d

Browse files
committed
Merge branch 'main' into cb/pytest
2 parents 042d52e + 7d4be9e commit 0b7123d

File tree

5 files changed

+484
-12
lines changed

5 files changed

+484
-12
lines changed

python/thirdweb-ai/pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ dependencies = [
1919
"pydantic>=2.10.6,<3",
2020
"jsonref>=1.1.0,<2",
2121
"httpx>=0.28.1,<0.29",
22+
"aiohttp>=3.11.14",
2223
]
2324

2425
[project.optional-dependencies]
@@ -51,6 +52,7 @@ dev = [
5152
"pytest-asyncio>=0.23.5,<0.24",
5253
"pytest-mock>=3.12.0,<4",
5354
"pytest-cov>=4.1.0,<5",
55+
"ipython>=8.34.0",
5456
]
5557

5658
[tool.hatch.build.targets.sdist]

python/thirdweb-ai/src/thirdweb_ai/common/utils.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import importlib.util
22
import re
3+
from typing import Any
34

45

56
def has_module(module_name: str) -> bool:
@@ -33,3 +34,23 @@ def normalize_chain_id(
3334
return [extract_digits(c) for c in in_value]
3435

3536
return extract_digits(in_value)
37+
38+
39+
def is_encoded(encoded_data: str) -> bool:
40+
encoded_data = encoded_data.removeprefix("0x")
41+
42+
try:
43+
bytes.fromhex(encoded_data)
44+
return True
45+
except ValueError:
46+
return False
47+
48+
49+
def clean_resolve(out: dict[str, Any]):
50+
if "transactions" in out["data"]:
51+
for transaction in out["data"]["transactions"]:
52+
if "data" in transaction and is_encoded(transaction["data"]):
53+
transaction.pop("data")
54+
if "logs_bloom" in transaction:
55+
transaction.pop("logs_bloom")
56+
return out

python/thirdweb-ai/src/thirdweb_ai/services/insight.py

Lines changed: 97 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from typing import Annotated, Any
22

3-
from thirdweb_ai.common.utils import normalize_chain_id
3+
from thirdweb_ai.common.utils import clean_resolve, normalize_chain_id
44
from thirdweb_ai.services.service import Service
55
from thirdweb_ai.tools.tool import tool
66

@@ -12,15 +12,15 @@ def __init__(self, secret_key: str, chain_id: int | str | list[int | str]):
1212
self.chain_ids = normalized if isinstance(normalized, list) else [normalized]
1313

1414
@tool(
15-
description="Retrieve blockchain events with flexible filtering options. Use this to search for specific events or to analyze event patterns across multiple blocks."
15+
description="Retrieve blockchain events with flexible filtering options. Use this to search for specific events or to analyze event patterns across multiple blocks. Do not use this tool to simply look up a single transaction."
1616
)
1717
def get_all_events(
1818
self,
1919
chain: Annotated[
2020
list[int | str] | int | str | None,
2121
"Chain ID(s) to query (e.g., 1 for Ethereum Mainnet, 137 for Polygon). Specify multiple IDs as a list [1, 137] for cross-chain queries (max 5).",
2222
] = None,
23-
address: Annotated[
23+
contract_address: Annotated[
2424
str | None,
2525
"Contract address to filter events by (e.g., '0x1234...'). Only return events emitted by this contract.",
2626
] = None,
@@ -55,8 +55,8 @@ def get_all_events(
5555
normalized_chain = normalize_chain_id(chain) if chain is not None else self.chain_ids
5656
if normalized_chain:
5757
params["chain"] = normalized_chain
58-
if address:
59-
params["filter_address"] = address
58+
if contract_address:
59+
params["filter_address"] = contract_address
6060
if block_number_gte:
6161
params["filter_block_number_gte"] = block_number_gte
6262
if block_number_lt:
@@ -294,13 +294,13 @@ def get_token_prices(
294294
return self._get("tokens/price", params)
295295

296296
@tool(
297-
description="Get contract ABI and metadata about a smart contract, including name, symbol, decimals, and other contract-specific information. This tool also retrieve the Application Binary Interface (ABI) for a smart contract. Essential for decoding contract data and interacting with the contract"
297+
description="Get contract ABI and metadata about a smart contract, including name, symbol, decimals, and other contract-specific information. Use this when asked about a contract's functions, interface, or capabilities. This tool specifically retrieves details about deployed smart contracts (NOT regular wallet addresses or transaction hashes)."
298298
)
299299
def get_contract_metadata(
300300
self,
301301
contract_address: Annotated[
302302
str,
303-
"The contract address to get metadata for (e.g., '0x1234...'). Works for tokens and other contract types.",
303+
"The contract address to get metadata for (e.g., '0xC02aaA39b223FE8D0A0e5C4F27eAD9083C756Cc2' for WETH). Must be a deployed smart contract address (not a regular wallet). Use this for queries like 'what functions does this contract have' or 'get the ABI for contract 0x1234...'.",
304304
],
305305
chain: Annotated[
306306
list[int | str] | int | str | None,
@@ -425,13 +425,55 @@ def get_nft_transfers(
425425
return self._get(f"nfts/transfers/{contract_address}", params)
426426

427427
@tool(
428-
description="Search and analyze blockchain input data: block number, transaction or block hash, wallet or contract address, event signature or function selector. It returns a detailed analyzed information about the input data."
428+
description="Get detailed information about a specific block by its number or hash. Use this when asked about blockchain blocks (e.g., 'What's in block 12345678?' or 'Tell me about this block: 0xabc123...'). This tool is specifically for block data, NOT transactions, addresses, or contracts."
429+
)
430+
def get_block_details(
431+
self,
432+
block_identifier: Annotated[
433+
str,
434+
"Block number or block hash to look up. Can be either a simple number (e.g., '12345678') or a block hash (e.g., '0xd4e56740f876aef8c010b86a40d5f56745a118d0906a34e69aec8c0db1cb8fa3' for Ethereum block 0). Use for queries like 'what happened in block 14000000' or 'show me block 0xd4e56...'.",
435+
],
436+
chain: Annotated[
437+
list[int | str] | int | str | None,
438+
"Chain ID(s) to query (e.g., 1 for Ethereum). Specify the blockchain network where the block exists.",
439+
] = None,
440+
) -> dict[str, Any]:
441+
params = {}
442+
normalized_chain = normalize_chain_id(chain) if chain is not None else self.chain_ids
443+
if normalized_chain:
444+
params["chain"] = normalized_chain
445+
out = self._get(f"resolve/{block_identifier}", params)
446+
return clean_resolve(out)
447+
448+
@tool(
449+
description="Look up transactions for a wallet or contract address. Use this when asked about a specific Ethereum address (e.g., '0x1234...') to get account details including balance, transaction count, and contract verification status. This tool is specifically for addresses (accounts and contracts), NOT transaction hashes or ENS names."
450+
)
451+
def get_address_transactions(
452+
self,
453+
address: Annotated[
454+
str,
455+
"Wallet or contract address to look up (e.g., '0xd8dA6BF26964aF9D7eEd9e03E53415D37aA96045' for Vitalik's address). Must be a valid blockchain address starting with 0x and 42 characters long.",
456+
],
457+
chain: Annotated[
458+
list[int | str] | int | str | None,
459+
"Chain ID(s) to query (e.g., 1 for Ethereum). Specify the blockchain network for the address.",
460+
] = None,
461+
) -> dict[str, Any]:
462+
params = {}
463+
normalized_chain = normalize_chain_id(chain) if chain is not None else self.chain_ids
464+
if normalized_chain:
465+
params["chain"] = normalized_chain
466+
out = self._get(f"resolve/{address}", params)
467+
return clean_resolve(out)
468+
469+
@tool(
470+
description="Look up transactions associated with an ENS domain name (anything ending in .eth like 'vitalik.eth'). This tool is specifically for ENS domains, NOT addresses, transaction hashes, or contract queries."
429471
)
430-
def resolve(
472+
def get_ens_transactions(
431473
self,
432-
input_data: Annotated[
474+
ens_name: Annotated[
433475
str,
434-
"Any blockchain input data: block number, transaction or block hash, address, event signature or function selector",
476+
"ENS name to resolve (e.g., 'vitalik.eth', 'thirdweb.eth'). Must be a valid ENS domain ending with .eth.",
435477
],
436478
chain: Annotated[
437479
list[int | str] | int | str | None,
@@ -442,4 +484,47 @@ def resolve(
442484
normalized_chain = normalize_chain_id(chain) if chain is not None else self.chain_ids
443485
if normalized_chain:
444486
params["chain"] = normalized_chain
445-
return self._get(f"resolve/{input_data}", params)
487+
out = self._get(f"resolve/{ens_name}", params)
488+
return clean_resolve(out)
489+
490+
@tool(
491+
description="Get detailed information about a specific transaction by its hash. Use this when asked to analyze, look up, check, or get details about a transaction hash (e.g., 'What can you tell me about this transaction: 0x5407ea41...'). This tool specifically deals with transaction hashes (txid/txhash), NOT addresses, contracts, or ENS names."
492+
)
493+
def get_transaction_details(
494+
self,
495+
transaction_hash: Annotated[
496+
str,
497+
"Transaction hash to look up (e.g., '0x5407ea41de24b7353d70eab42d72c92b42a44e140f930e349973cfc7b8c9c1d7'). Must be a valid transaction hash beginning with 0x and typically 66 characters long. Use this for queries like 'tell me about this transaction' or 'what happened in transaction 0x1234...'.",
498+
],
499+
chain: Annotated[
500+
list[int | str] | int | str | None,
501+
"Chain ID(s) to query (e.g., 1 for Ethereum). Specify the blockchain network where the transaction exists.",
502+
] = None,
503+
) -> dict[str, Any]:
504+
params = {}
505+
normalized_chain = normalize_chain_id(chain) if chain is not None else self.chain_ids
506+
if normalized_chain:
507+
params["chain"] = normalized_chain
508+
out = self._get(f"resolve/{transaction_hash}", params)
509+
return clean_resolve(out)
510+
511+
@tool(
512+
description="Decode a function or event signature. Use this when you need to understand what a specific function selector or event signature does and what parameters it accepts."
513+
)
514+
def decode_signature(
515+
self,
516+
signature: Annotated[
517+
str,
518+
"Function or event signature to decode (e.g., '0x095ea7b3' for the approve function). Usually begins with 0x.",
519+
],
520+
chain: Annotated[
521+
list[int | str] | int | str | None,
522+
"Chain ID(s) to query (e.g., 1 for Ethereum). Specify to improve signature lookup accuracy.",
523+
] = None,
524+
) -> dict[str, Any]:
525+
params = {}
526+
normalized_chain = normalize_chain_id(chain) if chain is not None else self.chain_ids
527+
if normalized_chain:
528+
params["chain"] = normalized_chain
529+
out = self._get(f"resolve/{signature}", params)
530+
return clean_resolve(out)
Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
import asyncio
2+
import hashlib
3+
import json
4+
import mimetypes
5+
import os
6+
from collections.abc import AsyncGenerator
7+
from dataclasses import asdict, is_dataclass
8+
from io import BytesIO
9+
from pathlib import Path
10+
from typing import Annotated, Any
11+
12+
import httpx
13+
from pydantic import BaseModel
14+
15+
from thirdweb_ai.services.service import Service
16+
from thirdweb_ai.tools.tool import tool
17+
18+
19+
async def async_read_file_chunks(file_path: str | Path, chunk_size: int = 8192) -> AsyncGenerator[bytes, None]:
20+
"""Read file in chunks asynchronously to avoid loading entire file into memory."""
21+
async with asyncio.Lock():
22+
path_obj = Path(file_path) if isinstance(file_path, str) else file_path
23+
with path_obj.open("rb") as f:
24+
while chunk := f.read(chunk_size):
25+
yield chunk
26+
27+
28+
class Storage(Service):
29+
def __init__(self, secret_key: str):
30+
super().__init__(base_url="https://storage.thirdweb.com", secret_key=secret_key)
31+
self.gateway_url = self._get_gateway_url()
32+
self.gateway_hostname = "ipfscdn.io"
33+
34+
def _get_gateway_url(self) -> str:
35+
return hashlib.sha256(self.secret_key.encode()).hexdigest()[:32]
36+
37+
@tool(description="Fetch content from IPFS by hash. Retrieves data stored on IPFS using the thirdweb gateway.")
38+
def fetch_ipfs_content(
39+
self,
40+
ipfs_hash: Annotated[
41+
str, "The IPFS hash/URI to fetch content from (e.g., 'ipfs://QmXyZ...'). Must start with 'ipfs://'."
42+
],
43+
) -> dict[str, Any]:
44+
if not ipfs_hash.startswith("ipfs://"):
45+
return {"error": "Invalid IPFS hash"}
46+
47+
ipfs_hash = ipfs_hash.removeprefix("ipfs://")
48+
path = f"https://{self.gateway_url}.{self.gateway_hostname}.ipfscdn.io/ipfs/{ipfs_hash}"
49+
return self._get(path)
50+
51+
async def _async_post_file(self, url: str, files: dict[str, Any]) -> dict[str, Any]:
52+
"""Post files to a URL using async client with proper authorization headers."""
53+
headers = self._make_headers()
54+
# Remove the Content-Type as httpx will set it correctly for multipart/form-data
55+
headers.pop("Content-Type", None)
56+
57+
async with httpx.AsyncClient() as client:
58+
response = await client.post(url, files=files, headers=headers)
59+
response.raise_for_status()
60+
return response.json()
61+
62+
def _is_json_serializable(self, data: Any) -> bool:
63+
"""Check if data is JSON serializable (dict, dataclass, or BaseModel)."""
64+
return isinstance(data, dict) or is_dataclass(data) or isinstance(data, BaseModel)
65+
66+
def _convert_to_json(self, data: Any) -> str:
67+
"""Convert data to JSON string."""
68+
if isinstance(data, dict):
69+
return json.dumps(data)
70+
if is_dataclass(data):
71+
# Handle dataclass properly
72+
if isinstance(data, type):
73+
raise ValueError(f"Expected dataclass instance, got dataclass type: {data}")
74+
return json.dumps(asdict(data))
75+
if isinstance(data, BaseModel):
76+
return data.model_dump_json()
77+
raise ValueError(f"Cannot convert {type(data)} to JSON")
78+
79+
def _is_valid_path(self, path: str) -> bool:
80+
"""Check if the string is a valid file or directory path."""
81+
return Path(path).exists()
82+
83+
async def _prepare_directory_files(
84+
self, directory_path: Path, chunk_size: int = 8192
85+
) -> list[tuple[str, BytesIO, str]]:
86+
"""
87+
Prepare files from a directory for upload, preserving directory structure.
88+
Returns a list of tuples (relative_path, file_buffer, content_type).
89+
"""
90+
files_data = []
91+
92+
for root, _, files in os.walk(directory_path):
93+
for file in files:
94+
file_path = Path(root) / file
95+
# Preserve the directory structure in the relative path
96+
relative_path = str(file_path.relative_to(directory_path))
97+
content_type = mimetypes.guess_type(str(file_path))[0] or "application/octet-stream"
98+
99+
# Create a buffer and read the file in chunks
100+
buffer = BytesIO()
101+
async for chunk in async_read_file_chunks(file_path, chunk_size):
102+
buffer.write(chunk)
103+
buffer.seek(0) # Reset buffer position
104+
105+
files_data.append((relative_path, buffer, content_type))
106+
107+
return files_data
108+
109+
@tool(
110+
description="Upload a file, directory, or JSON data to IPFS. Stores any type on decentralized storage and returns an IPFS URI."
111+
)
112+
async def upload_to_ipfs(
113+
self,
114+
data: Annotated[
115+
Any, "Data to upload: can be a file path, directory path, dict, dataclass, or BaseModel instance."
116+
],
117+
) -> str:
118+
"""
119+
Upload data to IPFS and return the IPFS hash.
120+
121+
Supports:
122+
- File paths (streams content)
123+
- Directory paths (preserves directory structure)
124+
- Dict objects (converted to JSON)
125+
- Dataclass instances (converted to JSON)
126+
- Pydantic BaseModel instances (converted to JSON)
127+
128+
Always uses streaming for file uploads to handle large files efficiently.
129+
"""
130+
storage_url = f"{self.base_url}/ipfs/upload"
131+
132+
# Handle JSON-serializable data types
133+
if self._is_json_serializable(data):
134+
json_content = self._convert_to_json(data)
135+
files = {"file": ("data.json", BytesIO(json_content.encode()), "application/json")}
136+
body = await self._async_post_file(storage_url, files)
137+
return f"ipfs://{body['IpfsHash']}"
138+
139+
# Handle string paths to files or directories
140+
if isinstance(data, str) and self._is_valid_path(data):
141+
path = Path(data)
142+
143+
# Single file upload with streaming
144+
if path.is_file():
145+
content_type = mimetypes.guess_type(str(path))[0] or "application/octet-stream"
146+
147+
# Create a buffer to hold chunks for streaming upload
148+
buffer = BytesIO()
149+
async for chunk in async_read_file_chunks(path):
150+
buffer.write(chunk)
151+
152+
buffer.seek(0) # Reset buffer position
153+
files = {"file": (path.name, buffer, content_type)}
154+
body = await self._async_post_file(storage_url, files)
155+
return f"ipfs://{body['IpfsHash']}"
156+
157+
# Directory upload - preserve directory structure
158+
if path.is_dir():
159+
# Prepare all files from the directory with preserved structure
160+
files_data = await self._prepare_directory_files(path)
161+
162+
if not files_data:
163+
raise ValueError(f"Directory is empty: {data}")
164+
165+
files_dict = {
166+
f"file{i}": (relative_path, buffer, content_type)
167+
for i, (relative_path, buffer, content_type) in enumerate(files_data)
168+
}
169+
body = await self._async_post_file(storage_url, files_dict)
170+
return f"ipfs://{body['IpfsHash']}"
171+
172+
raise ValueError(f"Path exists but is neither a file nor a directory: {data}")
173+
174+
try:
175+
content_type = mimetypes.guess_type(data)[0] or "application/octet-stream"
176+
files = {"file": ("data.txt", BytesIO(data.encode()), content_type)}
177+
body = await self._async_post_file(storage_url, files)
178+
return f"ipfs://{body['IpfsHash']}"
179+
except TypeError as e:
180+
raise TypeError(
181+
f"Unsupported data type: {type(data)}. Must be a valid file/directory path, dict, dataclass, or BaseModel."
182+
) from e

0 commit comments

Comments
 (0)