|
19 | 19 | # functionality |
20 | 20 | # https://github.com/langchain-ai/langchain/blob/master/libs/text-splitters/langchain_text_splitters/character.py |
21 | 21 |
|
22 | | -import asyncio |
23 | 22 | import logging |
24 | 23 | import os |
25 | 24 |
|
|
42 | 41 | import re |
43 | 42 | from typing import List, Optional, Tuple |
44 | 43 |
|
45 | | -# import litellm |
| 44 | +import litellm |
46 | 45 | import numpy as np |
47 | 46 | import psycopg2 |
48 | 47 | import tiktoken |
|
64 | 63 |
|
65 | 64 | logger = logging.getLogger(__name__) |
66 | 65 |
|
67 | | -# logs all litellm requests to langsmith |
68 | | -# litellm.success_callback = ["langsmith"] |
| 66 | +# logs all litellm requests to langfuse |
| 67 | +litellm.callbacks = ["langfuse"] |
69 | 68 |
|
70 | 69 |
|
71 | 70 | def split_text_with_regex( |
@@ -492,69 +491,42 @@ def get_topn_similar_docs( |
492 | 491 | raise ValueError("No valid vector store client provided") |
493 | 492 |
|
494 | 493 |
|
495 | | -async def async_get_completion_from_messages( |
496 | | - messages, model=OPENAI_MODEL, temperature=0, max_tokens=1000 |
| 494 | +def get_completion_from_messages( |
| 495 | + messages, |
| 496 | + model=OPENAI_MODEL, |
| 497 | + temperature=0, |
| 498 | + max_tokens=1000, |
| 499 | + tracing_tags: List[str] = [], |
497 | 500 | ): |
498 | | - """Asynchronous version of get_completion_from_messages. |
| 501 | + """Generates a completion response from the given messages using the specified model. |
499 | 502 |
|
500 | 503 | Args: |
501 | 504 | messages (list): The list of messages to generate a completion from. |
502 | 505 | model (str, optional): The model to use for generating the completion. Defaults to OPENAI_MODEL. |
503 | | - temperature (float, optional): The temperature to use for the completion. Defaults to 0. |
504 | | - max_tokens (int, optional): The maximum number of tokens to generate. Defaults to 1000. |
| 506 | + temperature (float, optional): The temperature to use for the completion. Defaults to 0.4. |
| 507 | + max_tokens (int, optional): The maximum number of tokens to generate. |
| 508 | + Defaults to 1000. |
| 509 | + tracing_tags (List[str], optional): The tags to use for tracing the completion. |
| 510 | + Defaults to an empty list. |
505 | 511 |
|
506 | 512 | Returns: |
507 | 513 | str: The content of the completion response. |
508 | 514 | """ |
509 | | - import litellm |
510 | | - |
511 | | - litellm.success_callback = ["langsmith"] |
512 | | - |
513 | 515 | model = MODEL_NAME_MAP.get(model, model) |
514 | 516 | completion_response = litellm.completion( |
515 | 517 | model=model, |
516 | 518 | messages=messages, |
517 | 519 | temperature=temperature, |
518 | 520 | max_tokens=max_tokens, |
519 | 521 | api_key=get_openai_api_key(), |
| 522 | + metadata={ |
| 523 | + "project": "llm-complete-guide-rag", |
| 524 | + "tags": tracing_tags, |
| 525 | + }, |
520 | 526 | ) |
521 | 527 | return completion_response.choices[0].message.content |
522 | 528 |
|
523 | 529 |
|
524 | | -def get_completion_from_messages( |
525 | | - messages, model=OPENAI_MODEL, temperature=0, max_tokens=1000 |
526 | | -): |
527 | | - """Synchronous wrapper for async_get_completion_from_messages. |
528 | | -
|
529 | | - Args: |
530 | | - messages (list): The list of messages to generate a completion from. |
531 | | - model (str, optional): The model to use for generating the completion. Defaults to OPENAI_MODEL. |
532 | | - temperature (float, optional): The temperature to use for the completion. Defaults to 0. |
533 | | - max_tokens (int, optional): The maximum number of tokens to generate. Defaults to 1000. |
534 | | -
|
535 | | - Returns: |
536 | | - str: The content of the completion response. |
537 | | - """ |
538 | | - try: |
539 | | - loop = asyncio.get_running_loop() |
540 | | - except RuntimeError: # No running event loop |
541 | | - return asyncio.run( |
542 | | - async_get_completion_from_messages( |
543 | | - messages, model, temperature, max_tokens |
544 | | - ) |
545 | | - ) |
546 | | - else: |
547 | | - # If we're already in an event loop, create a new one in a thread |
548 | | - import nest_asyncio |
549 | | - |
550 | | - nest_asyncio.apply() |
551 | | - return asyncio.run( |
552 | | - async_get_completion_from_messages( |
553 | | - messages, model, temperature, max_tokens |
554 | | - ) |
555 | | - ) |
556 | | - |
557 | | - |
558 | 530 | def get_embeddings(text): |
559 | 531 | """Generates embeddings for the given text using a SentenceTransformer model. |
560 | 532 |
|
@@ -620,6 +592,7 @@ def process_input_with_retrieval( |
620 | 592 | model: str = OPENAI_MODEL, |
621 | 593 | n_items_retrieved: int = 20, |
622 | 594 | use_reranking: bool = False, |
| 595 | + tracing_tags: List[str] = [], |
623 | 596 | ) -> str: |
624 | 597 | """Process the input with retrieval. |
625 | 598 |
|
@@ -704,4 +677,8 @@ def process_input_with_retrieval( |
704 | 677 | }, |
705 | 678 | ] |
706 | 679 | logger.debug("CONTEXT USED\n\n", messages[2]["content"], "\n\n") |
707 | | - return get_completion_from_messages(messages, model=model) |
| 680 | + return get_completion_from_messages( |
| 681 | + messages, |
| 682 | + model=model, |
| 683 | + tracing_tags=tracing_tags, |
| 684 | + ) |
0 commit comments