Instrument langsmith tracing

strickvl · strickvl · commit effbaa3bd286 · 2025-02-18T15:00:11.000+01:00
diff --git a/llm-complete-guide/requirements.txt b/llm-complete-guide/requirements.txt
@@ -22,10 +22,8 @@ gradio
 huggingface-hub
 elasticsearch
 tenacity
-langsmith
+langfuse
 pinecone
-nest_asyncio
-asyncio
 
 # optional requirements for S3 artifact store
 # s3fs>2022.3.0
diff --git a/llm-complete-guide/run.py b/llm-complete-guide/run.py
@@ -233,7 +233,10 @@ def main(
                 "--query-text is required when using 'query' command"
             )
         response = process_input_with_retrieval(
-            query_text, model=model, use_reranking=use_reranker
+            query_text,
+            model=model,
+            use_reranking=use_reranker,
+            tracing_tags=["cli", "dev"],
         )
         console = Console()
         md = Markdown(response)
diff --git a/llm-complete-guide/utils/llm_utils.py b/llm-complete-guide/utils/llm_utils.py
@@ -19,7 +19,6 @@
 # functionality
 # https://github.com/langchain-ai/langchain/blob/master/libs/text-splitters/langchain_text_splitters/character.py
 
-import asyncio
 import logging
 import os
 
@@ -42,7 +41,7 @@
 import re
 from typing import List, Optional, Tuple
 
-# import litellm
+import litellm
 import numpy as np
 import psycopg2
 import tiktoken
@@ -64,8 +63,8 @@
 
 logger = logging.getLogger(__name__)
 
-# logs all litellm requests to langsmith
-# litellm.success_callback = ["langsmith"]
+# logs all litellm requests to langfuse
+litellm.callbacks = ["langfuse"]
 
 
 def split_text_with_regex(
@@ -492,69 +491,42 @@ def get_topn_similar_docs(
         raise ValueError("No valid vector store client provided")
 
 
-async def async_get_completion_from_messages(
-    messages, model=OPENAI_MODEL, temperature=0, max_tokens=1000
+def get_completion_from_messages(
+    messages,
+    model=OPENAI_MODEL,
+    temperature=0,
+    max_tokens=1000,
+    tracing_tags: List[str] = [],
 ):
-    """Asynchronous version of get_completion_from_messages.
+    """Generates a completion response from the given messages using the specified model.
 
     Args:
         messages (list): The list of messages to generate a completion from.
         model (str, optional): The model to use for generating the completion. Defaults to OPENAI_MODEL.
-        temperature (float, optional): The temperature to use for the completion. Defaults to 0.
-        max_tokens (int, optional): The maximum number of tokens to generate. Defaults to 1000.
+        temperature (float, optional): The temperature to use for the completion. Defaults to 0.4.
+        max_tokens (int, optional): The maximum number of tokens to generate.
+            Defaults to 1000.
+        tracing_tags (List[str], optional): The tags to use for tracing the completion.
+            Defaults to an empty list.
 
     Returns:
         str: The content of the completion response.
     """
-    import litellm
-
-    litellm.success_callback = ["langsmith"]
-
     model = MODEL_NAME_MAP.get(model, model)
     completion_response = litellm.completion(
         model=model,
         messages=messages,
         temperature=temperature,
         max_tokens=max_tokens,
         api_key=get_openai_api_key(),
+        metadata={
+            "project": "llm-complete-guide-rag",
+            "tags": tracing_tags,
+        },
     )
     return completion_response.choices[0].message.content
 
 
-def get_completion_from_messages(
-    messages, model=OPENAI_MODEL, temperature=0, max_tokens=1000
-):
-    """Synchronous wrapper for async_get_completion_from_messages.
-
-    Args:
-        messages (list): The list of messages to generate a completion from.
-        model (str, optional): The model to use for generating the completion. Defaults to OPENAI_MODEL.
-        temperature (float, optional): The temperature to use for the completion. Defaults to 0.
-        max_tokens (int, optional): The maximum number of tokens to generate. Defaults to 1000.
-
-    Returns:
-        str: The content of the completion response.
-    """
-    try:
-        loop = asyncio.get_running_loop()
-    except RuntimeError:  # No running event loop
-        return asyncio.run(
-            async_get_completion_from_messages(
-                messages, model, temperature, max_tokens
-            )
-        )
-    else:
-        # If we're already in an event loop, create a new one in a thread
-        import nest_asyncio
-
-        nest_asyncio.apply()
-        return asyncio.run(
-            async_get_completion_from_messages(
-                messages, model, temperature, max_tokens
-            )
-        )
-
-
 def get_embeddings(text):
     """Generates embeddings for the given text using a SentenceTransformer model.
 
@@ -620,6 +592,7 @@ def process_input_with_retrieval(
     model: str = OPENAI_MODEL,
     n_items_retrieved: int = 20,
     use_reranking: bool = False,
+    tracing_tags: List[str] = [],
 ) -> str:
     """Process the input with retrieval.
 
@@ -704,4 +677,8 @@ def process_input_with_retrieval(
         },
     ]
     logger.debug("CONTEXT USED\n\n", messages[2]["content"], "\n\n")
-    return get_completion_from_messages(messages, model=model)
+    return get_completion_from_messages(
+        messages,
+        model=model,
+        tracing_tags=tracing_tags,
+    )

Original file line number	Diff line number	Diff line change
`@@ -233,7 +233,10 @@ def main(`
`233`	`233`	`"--query-text is required when using 'query' command"`
`234`	`234`	`)`
`235`	`235`	`response = process_input_with_retrieval(`
`236`		`- query_text, model=model, use_reranking=use_reranker`
	`236`	`+ query_text,`
	`237`	`+ model=model,`
	`238`	`+ use_reranking=use_reranker,`
	`239`	`+ tracing_tags=["cli", "dev"],`
`237`	`240`	`)`
`238`	`241`	`console = Console()`
`239`	`242`	`md = Markdown(response)`