zilliztech · jinhonglin-ryan · May 18, 2025 · May 18, 2025 · May 18, 2025 · May 18, 2025
diff --git a/deepsearcher/agent/chain_of_rag.py b/deepsearcher/agent/chain_of_rag.py
@@ -4,6 +4,7 @@
 from deepsearcher.agent.collection_router import CollectionRouter
 from deepsearcher.embedding.base import BaseEmbedding
 from deepsearcher.llm.base import BaseLLM
+from deepsearcher.llm_tracer import lazy_traceable
 from deepsearcher.utils import log
 from deepsearcher.vector_db import RetrievalResult
 from deepsearcher.vector_db.base import BaseVectorDB, deduplicate_results
@@ -119,6 +120,7 @@ def __init__(
         )
         self.text_window_splitter = text_window_splitter
 
+    @lazy_traceable(run_type="llm", name="reflect_get_subquery")
     def _reflect_get_subquery(self, query: str, intermediate_context: List[str]) -> Tuple[str, int]:
         chat_response = self.llm.chat(
             [
@@ -133,6 +135,7 @@ def _reflect_get_subquery(self, query: str, intermediate_context: List[str]) ->
         )
         return self.llm.remove_think(chat_response.content), chat_response.total_tokens
 
+    @lazy_traceable(run_type="retriever", name="retrieve_and_answer")
     def _retrieve_and_answer(self, query: str) -> Tuple[str, List[RetrievalResult], int]:
         consume_tokens = 0
         if self.route_collection:
@@ -169,6 +172,7 @@ def _retrieve_and_answer(self, query: str) -> Tuple[str, List[RetrievalResult],
             consume_tokens + chat_response.total_tokens,
         )
 
+    @lazy_traceable(run_type="parser", name="get_supported_docs")
     def _get_supported_docs(
         self,
         retrieved_results: List[RetrievalResult],
@@ -199,6 +203,7 @@ def _get_supported_docs(
             token_usage = chat_response.total_tokens
         return supported_retrieved_results, token_usage
 
+    @lazy_traceable(run_type="llm", name="check_has_enough_info")
     def _check_has_enough_info(
         self, query: str, intermediate_contexts: List[str]
     ) -> Tuple[bool, int]:
@@ -219,6 +224,9 @@ def _check_has_enough_info(
         has_enough_info = self.llm.remove_think(chat_response.content).strip().lower() == "yes"
         return has_enough_info, chat_response.total_tokens
 
+    @lazy_traceable(
+        run_type="retriever", name="chain_of_thought_retrieve", tags=["rag", "retrieval"]
+    )
     def retrieve(self, query: str, **kwargs) -> Tuple[List[RetrievalResult], int, dict]:
         """
         Retrieves relevant documents based on the input query and iteratively refines the search.
@@ -274,6 +282,12 @@ def retrieve(self, query: str, **kwargs) -> Tuple[List[RetrievalResult], int, di
         additional_info = {"intermediate_context": intermediate_contexts}
         return all_retrieved_results, token_usage, additional_info
 
+    @lazy_traceable(
+        run_type="chain",
+        name="chain_of_rag_query",
+        tags=["rag", "reasoning"],
+        metadata={"description": "Multi-hop reasoning query agent"},
+    )
     def query(self, query: str, **kwargs) -> Tuple[str, List[RetrievalResult], int]:
         """
         Executes a query and returns the final answer along with all retrieved results and total token usage.

diff --git a/deepsearcher/agent/collection_router.py b/deepsearcher/agent/collection_router.py
@@ -2,6 +2,7 @@
 
 from deepsearcher.agent.base import BaseAgent
 from deepsearcher.llm.base import BaseLLM
+from deepsearcher.llm_tracer import lazy_traceable
 from deepsearcher.utils import log
 from deepsearcher.vector_db.base import BaseVectorDB
 
@@ -39,6 +40,11 @@ def __init__(self, llm: BaseLLM, vector_db: BaseVectorDB, dim: int, **kwargs):
             for collection_info in self.vector_db.list_collections(dim=dim)
         ]
 
+    @lazy_traceable(
+        run_type="tool",
+        name="collection_router",
+        metadata={"description": "Routes queries to appropriate vector collections"},
+    )
     def invoke(self, query: str, dim: int, **kwargs) -> Tuple[List[str], int]:
         """
         Determine which collections are relevant for the given query.

diff --git a/deepsearcher/agent/deep_search.py b/deepsearcher/agent/deep_search.py
@@ -5,6 +5,7 @@
 from deepsearcher.agent.collection_router import CollectionRouter
 from deepsearcher.embedding.base import BaseEmbedding
 from deepsearcher.llm.base import BaseLLM
+from deepsearcher.llm_tracer import lazy_traceable
 from deepsearcher.utils import log
 from deepsearcher.vector_db import RetrievalResult
 from deepsearcher.vector_db.base import BaseVectorDB, deduplicate_results
@@ -108,6 +109,7 @@ def __init__(
         )
         self.text_window_splitter = text_window_splitter
 
+    @lazy_traceable(run_type="llm", name="generate_sub_queries")
     def _generate_sub_queries(self, original_query: str) -> Tuple[List[str], int]:
         chat_response = self.llm.chat(
             messages=[
@@ -117,6 +119,7 @@ def _generate_sub_queries(self, original_query: str) -> Tuple[List[str], int]:
         response_content = self.llm.remove_think(chat_response.content)
         return self.llm.literal_eval(response_content), chat_response.total_tokens
 
+    @lazy_traceable(run_type="retriever", name="search_chunks_from_vectordb")
     async def _search_chunks_from_vectordb(self, query: str, sub_queries: List[str]):
         consume_tokens = 0
         if self.route_collection:
@@ -170,6 +173,7 @@ async def _search_chunks_from_vectordb(self, query: str, sub_queries: List[str])
                 )
         return all_retrieved_results, consume_tokens
 
+    @lazy_traceable(run_type="llm", name="generate_gap_queries")
     def _generate_gap_queries(
         self, original_query: str, all_sub_queries: List[str], all_chunks: List[RetrievalResult]
     ) -> Tuple[List[str], int]:
@@ -184,6 +188,7 @@ def _generate_gap_queries(
         response_content = self.llm.remove_think(chat_response.content)
         return self.llm.literal_eval(response_content), chat_response.total_tokens
 
+    @lazy_traceable(run_type="retriever", name="retrieve_documents")
     def retrieve(self, original_query: str, **kwargs) -> Tuple[List[RetrievalResult], int, dict]:
         """
         Retrieve relevant documents from the knowledge base for the given query.
@@ -203,6 +208,7 @@ def retrieve(self, original_query: str, **kwargs) -> Tuple[List[RetrievalResult]
         """
         return asyncio.run(self.async_retrieve(original_query, **kwargs))
 
+    @lazy_traceable(run_type="retriever", name="async_retrieve_documents")
     async def async_retrieve(
         self, original_query: str, **kwargs
     ) -> Tuple[List[RetrievalResult], int, dict]:
@@ -268,6 +274,12 @@ async def async_retrieve(
         additional_info = {"all_sub_queries": all_sub_queries}
         return all_search_res, total_tokens, additional_info
 
+    @lazy_traceable(
+        run_type="chain",
+        name="deep_search_query",
+        tags=["search", "rag"],
+        metadata={"description": "Comprehensive search agent query"},
+    )
     def query(self, query: str, **kwargs) -> Tuple[str, List[RetrievalResult], int]:
         """
         Query the agent and generate an answer based on retrieved documents.

diff --git a/deepsearcher/agent/naive_rag.py b/deepsearcher/agent/naive_rag.py
@@ -4,6 +4,7 @@
 from deepsearcher.agent.collection_router import CollectionRouter
 from deepsearcher.embedding.base import BaseEmbedding
 from deepsearcher.llm.base import BaseLLM
+from deepsearcher.llm_tracer import lazy_traceable
 from deepsearcher.utils import log
 from deepsearcher.vector_db.base import BaseVectorDB, RetrievalResult, deduplicate_results
 
@@ -92,6 +93,12 @@ def retrieve(self, query: str, **kwargs) -> Tuple[List[RetrievalResult], int, di
         all_retrieved_results = deduplicate_results(all_retrieved_results)
         return all_retrieved_results, consume_tokens, {}
 
+    @lazy_traceable(
+        run_type="chain",
+        name="naive_rag_query",
+        tags=["rag", "simple"],
+        metadata={"description": "Simple RAG query agent"},
+    )
     def query(self, query: str, **kwargs) -> Tuple[str, List[RetrievalResult], int]:
         """
         Query the agent and generate an answer based on retrieved documents.

diff --git a/deepsearcher/agent/rag_router.py b/deepsearcher/agent/rag_router.py
@@ -2,6 +2,7 @@
 
 from deepsearcher.agent import RAGAgent
 from deepsearcher.llm.base import BaseLLM
+from deepsearcher.llm_tracer import lazy_traceable
 from deepsearcher.utils import log
 from deepsearcher.vector_db import RetrievalResult
 
@@ -53,6 +54,7 @@ def __init__(
                     "Please provide agent descriptions or set __description__ attribute for each agent class."
                 )
 
+    @lazy_traceable(run_type="tool", name="rag_agent_router")
     def _route(self, query: str) -> Tuple[RAGAgent, int]:
         description_str = "\n".join(
             [f"[{i + 1}]: {description}" for i, description in enumerate(self.agent_descriptions)]
@@ -76,11 +78,18 @@ def _route(self, query: str) -> Tuple[RAGAgent, int]:
         )
         return self.rag_agents[selected_agent_index], chat_response.total_tokens
 
+    @lazy_traceable(run_type="retriever", name="router_retrieve")
     def retrieve(self, query: str, **kwargs) -> Tuple[List[RetrievalResult], int, dict]:
         agent, n_token_router = self._route(query)
         retrieved_results, n_token_retrieval, metadata = agent.retrieve(query, **kwargs)
         return retrieved_results, n_token_router + n_token_retrieval, metadata
 
+    @lazy_traceable(
+        run_type="chain",
+        name="router_query",
+        tags=["router", "rag"],
+        metadata={"description": "Query router to appropriate RAG agent"},
+    )
     def query(self, query: str, **kwargs) -> Tuple[str, List[RetrievalResult], int]:
         agent, n_token_router = self._route(query)
         answer, retrieved_results, n_token_retrieval = agent.query(query, **kwargs)

diff --git a/deepsearcher/cli.py b/deepsearcher/cli.py
@@ -36,7 +36,6 @@ def main():
         sys.exit(1)
 
     config = Configuration()  # Customize your config here
-    init_config(config=config)
 
     parser = argparse.ArgumentParser(prog="deepsearcher", description="Deep Searcher.")
     subparsers = parser.add_subparsers(dest="subcommand", title="subcommands")
@@ -50,6 +49,11 @@ def main():
         default=3,
         help="Max iterations of reflection. Default is 3.",
     )
+    query_parser.add_argument(
+        "--llm_tracing",
+        action="store_true",
+        help="Enable LangSmith tracing for LLM calls. Requires LANGSMITH_API_KEY and LANGSMITH_PROJECT environment variables.",
+    )
 
     ## Arguments of loading
     load_parser = subparsers.add_parser(
@@ -87,14 +91,23 @@ def main():
     )
 
     args = parser.parse_args()
+
     if args.subcommand == "query":
+        # Only enable LLM tracing for query subcommand if requested
+        if hasattr(args, "llm_tracing") and args.llm_tracing:
+            config.llm_tracing = True
+
+        init_config(config=config)
+
         final_answer, refs, consumed_tokens = query(args.query, max_iter=args.max_iter)
         log.color_print("\n==== FINAL ANSWER====\n")
         log.color_print(final_answer)
         log.color_print("\n### References\n")
         for i, ref in enumerate(refs):
             log.color_print(f"{i + 1}. {ref.text[:60]}… {ref.reference}")
     elif args.subcommand == "load":
+        init_config(config=config)
+
         urls = [url for url in args.load_path if url.startswith("http")]
         local_files = [file for file in args.load_path if not file.startswith("http")]
         kwargs = {}

diff --git a/deepsearcher/config.yaml b/deepsearcher/config.yaml
@@ -165,6 +165,7 @@ provide_settings:
 
 query_settings:
   max_iter: 3
+  llm_tracing: false  # Set to true to enable LangSmith tracing for LLM calls (requires LANGSMITH_API_KEY and LANGSMITH_PROJECT environment variables)
 
 load_settings:
   chunk_size: 1500

diff --git a/deepsearcher/configuration.py b/deepsearcher/configuration.py
@@ -38,6 +38,8 @@ def __init__(self, config_path: str = DEFAULT_CONFIG_YAML_PATH):
         self.provide_settings = config_data["provide_settings"]
         self.query_settings = config_data["query_settings"]
         self.load_settings = config_data["load_settings"]
+        # Load LLM tracing setting from query_settings if available
+        self._llm_tracing_enabled = self.query_settings.get("llm_tracing", False)
 
     def load_config_from_yaml(self, config_path: str):
         """
@@ -88,6 +90,24 @@ def get_provider_config(self, feature: FeatureType):
 
         return self.provide_settings[feature]
 
+    @property
+    def llm_tracing(self) -> bool:
+        """Get the current LLM tracing status"""
+        return self._llm_tracing_enabled
+
+    @llm_tracing.setter
+    def llm_tracing(self, enabled: bool):
+        """
+        Enable or disable LLM tracing.
+
+        When enabled, LangSmith tracing will automatically use LANGSMITH_API_KEY
+        and LANGSMITH_PROJECT environment variables.
+
+        Args:
+            enabled: Whether to enable LLM tracing
+        """
+        self._llm_tracing_enabled = enabled
+
 
 class ModuleFactory:
     """
@@ -202,6 +222,12 @@ def init_config(config: Configuration):
         web_crawler, \
         default_searcher, \
         naive_rag
+
+    if config.llm_tracing:
+        from deepsearcher.llm_tracer import configure_langsmith
+
+        configure_langsmith()
+
     module_factory = ModuleFactory(config)
     llm = module_factory.create_llm()
     embedding_model = module_factory.create_embedding()

diff --git a/deepsearcher/llm/aliyun.py b/deepsearcher/llm/aliyun.py
@@ -2,6 +2,7 @@
 from typing import Dict, List
 
 from deepsearcher.llm.base import BaseLLM, ChatResponse
+from deepsearcher.llm_tracer import wrap_client
 
 
 class Aliyun(BaseLLM):
@@ -42,7 +43,8 @@ def __init__(self, model: str = "deepseek-r1", **kwargs):
         else:
             base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1"
 
-        self.client = OpenAI_(api_key=api_key, base_url=base_url, **kwargs)
+        aliyun_client = OpenAI_(api_key=api_key, base_url=base_url, **kwargs)
+        self.client = wrap_client(aliyun_client, client_type="aliyun")
 
     def chat(self, messages: List[Dict]) -> ChatResponse:
         """

diff --git a/deepsearcher/llm/anthropic_llm.py b/deepsearcher/llm/anthropic_llm.py
@@ -2,6 +2,7 @@
 from typing import Dict, List
 
 from deepsearcher.llm.base import BaseLLM, ChatResponse
+from deepsearcher.llm_tracer import wrap_client
 
 
 class Anthropic(BaseLLM):
@@ -40,7 +41,9 @@ def __init__(self, model: str = "claude-3-7-sonnet-latest", max_tokens: int = 81
             base_url = kwargs.pop("base_url")
         else:
             base_url = None
-        self.client = anthropic.Anthropic(api_key=api_key, base_url=base_url, **kwargs)
+
+        anthropic_client = anthropic.Anthropic(api_key=api_key, base_url=base_url, **kwargs)
+        self.client = wrap_client(anthropic_client, client_type="anthropic")
 
     def chat(self, messages: List[Dict]) -> ChatResponse:
         """

diff --git a/deepsearcher/llm/azure_openai.py b/deepsearcher/llm/azure_openai.py
@@ -1,6 +1,7 @@
 from typing import Dict, List
 
 from deepsearcher.llm.base import BaseLLM, ChatResponse
+from deepsearcher.llm_tracer import wrap_client
 
 
 class AzureOpenAI(BaseLLM):
@@ -32,18 +33,19 @@ def __init__(
         self.model = model
         import os
 
-        from openai import AzureOpenAI
+        from openai import AzureOpenAI as AzureOpenAI_
 
         if azure_endpoint is None:
             azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
         if api_key is None:
             api_key = os.getenv("AZURE_OPENAI_KEY")
-        self.client = AzureOpenAI(
+        azure_openai_client = AzureOpenAI_(
             azure_endpoint=azure_endpoint,
             api_key=api_key,
             api_version=api_version,
             **kwargs,
         )
+        self.client = wrap_client(azure_openai_client, client_type="azure_openai")
 
     def chat(self, messages: List[Dict]) -> ChatResponse:
         """

diff --git a/deepsearcher/llm/base.py b/deepsearcher/llm/base.py
@@ -3,6 +3,8 @@
 from abc import ABC
 from typing import Dict, List
 
+from deepsearcher.llm_tracer import lazy_traceable
+
 
 class ChatResponse(ABC):
     """
@@ -51,6 +53,7 @@ def __init__(self):
         """
         pass
 
+    @lazy_traceable(run_type="llm", name="base_llm_chat")
     def chat(self, messages: List[Dict]) -> ChatResponse:
         """
         Send a chat message to the language model and get a response.