Skip to content

Commit effbaa3

Browse files
committed
Instrument langsmith tracing
1 parent 44c3055 commit effbaa3

File tree

3 files changed

+30
-52
lines changed

3 files changed

+30
-52
lines changed

llm-complete-guide/requirements.txt

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,8 @@ gradio
2222
huggingface-hub
2323
elasticsearch
2424
tenacity
25-
langsmith
25+
langfuse
2626
pinecone
27-
nest_asyncio
28-
asyncio
2927

3028
# optional requirements for S3 artifact store
3129
# s3fs>2022.3.0

llm-complete-guide/run.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,10 @@ def main(
233233
"--query-text is required when using 'query' command"
234234
)
235235
response = process_input_with_retrieval(
236-
query_text, model=model, use_reranking=use_reranker
236+
query_text,
237+
model=model,
238+
use_reranking=use_reranker,
239+
tracing_tags=["cli", "dev"],
237240
)
238241
console = Console()
239242
md = Markdown(response)

llm-complete-guide/utils/llm_utils.py

Lines changed: 25 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
# functionality
2020
# https://github.com/langchain-ai/langchain/blob/master/libs/text-splitters/langchain_text_splitters/character.py
2121

22-
import asyncio
2322
import logging
2423
import os
2524

@@ -42,7 +41,7 @@
4241
import re
4342
from typing import List, Optional, Tuple
4443

45-
# import litellm
44+
import litellm
4645
import numpy as np
4746
import psycopg2
4847
import tiktoken
@@ -64,8 +63,8 @@
6463

6564
logger = logging.getLogger(__name__)
6665

67-
# logs all litellm requests to langsmith
68-
# litellm.success_callback = ["langsmith"]
66+
# logs all litellm requests to langfuse
67+
litellm.callbacks = ["langfuse"]
6968

7069

7170
def split_text_with_regex(
@@ -492,69 +491,42 @@ def get_topn_similar_docs(
492491
raise ValueError("No valid vector store client provided")
493492

494493

495-
async def async_get_completion_from_messages(
496-
messages, model=OPENAI_MODEL, temperature=0, max_tokens=1000
494+
def get_completion_from_messages(
495+
messages,
496+
model=OPENAI_MODEL,
497+
temperature=0,
498+
max_tokens=1000,
499+
tracing_tags: List[str] = [],
497500
):
498-
"""Asynchronous version of get_completion_from_messages.
501+
"""Generates a completion response from the given messages using the specified model.
499502
500503
Args:
501504
messages (list): The list of messages to generate a completion from.
502505
model (str, optional): The model to use for generating the completion. Defaults to OPENAI_MODEL.
503-
temperature (float, optional): The temperature to use for the completion. Defaults to 0.
504-
max_tokens (int, optional): The maximum number of tokens to generate. Defaults to 1000.
506+
temperature (float, optional): The temperature to use for the completion. Defaults to 0.4.
507+
max_tokens (int, optional): The maximum number of tokens to generate.
508+
Defaults to 1000.
509+
tracing_tags (List[str], optional): The tags to use for tracing the completion.
510+
Defaults to an empty list.
505511
506512
Returns:
507513
str: The content of the completion response.
508514
"""
509-
import litellm
510-
511-
litellm.success_callback = ["langsmith"]
512-
513515
model = MODEL_NAME_MAP.get(model, model)
514516
completion_response = litellm.completion(
515517
model=model,
516518
messages=messages,
517519
temperature=temperature,
518520
max_tokens=max_tokens,
519521
api_key=get_openai_api_key(),
522+
metadata={
523+
"project": "llm-complete-guide-rag",
524+
"tags": tracing_tags,
525+
},
520526
)
521527
return completion_response.choices[0].message.content
522528

523529

524-
def get_completion_from_messages(
525-
messages, model=OPENAI_MODEL, temperature=0, max_tokens=1000
526-
):
527-
"""Synchronous wrapper for async_get_completion_from_messages.
528-
529-
Args:
530-
messages (list): The list of messages to generate a completion from.
531-
model (str, optional): The model to use for generating the completion. Defaults to OPENAI_MODEL.
532-
temperature (float, optional): The temperature to use for the completion. Defaults to 0.
533-
max_tokens (int, optional): The maximum number of tokens to generate. Defaults to 1000.
534-
535-
Returns:
536-
str: The content of the completion response.
537-
"""
538-
try:
539-
loop = asyncio.get_running_loop()
540-
except RuntimeError: # No running event loop
541-
return asyncio.run(
542-
async_get_completion_from_messages(
543-
messages, model, temperature, max_tokens
544-
)
545-
)
546-
else:
547-
# If we're already in an event loop, create a new one in a thread
548-
import nest_asyncio
549-
550-
nest_asyncio.apply()
551-
return asyncio.run(
552-
async_get_completion_from_messages(
553-
messages, model, temperature, max_tokens
554-
)
555-
)
556-
557-
558530
def get_embeddings(text):
559531
"""Generates embeddings for the given text using a SentenceTransformer model.
560532
@@ -620,6 +592,7 @@ def process_input_with_retrieval(
620592
model: str = OPENAI_MODEL,
621593
n_items_retrieved: int = 20,
622594
use_reranking: bool = False,
595+
tracing_tags: List[str] = [],
623596
) -> str:
624597
"""Process the input with retrieval.
625598
@@ -704,4 +677,8 @@ def process_input_with_retrieval(
704677
},
705678
]
706679
logger.debug("CONTEXT USED\n\n", messages[2]["content"], "\n\n")
707-
return get_completion_from_messages(messages, model=model)
680+
return get_completion_from_messages(
681+
messages,
682+
model=model,
683+
tracing_tags=tracing_tags,
684+
)

0 commit comments

Comments
 (0)