litellm kickoff

alizenhom · alizenhom · commit b3dbcfa975e2 · 2024-07-05T19:33:10.000+03:00
diff --git a/src/examples/litellm_example/basic.py b/src/examples/litellm_example/basic.py
@@ -0,0 +1,64 @@
+from langtrace_python_sdk import with_langtrace_root_span, langtrace
+from dotenv import load_dotenv
+from litellm import completion, acompletion
+import litellm
+
+litellm.set_verbose = False
+load_dotenv()
+langtrace.init(write_spans_to_console=True)
+
+
+@with_langtrace_root_span("Litellm Example OpenAI")
+def openAI():
+    response = completion(
+        model="gpt-3.5-turbo",
+        messages=[{"content": "Hello, how are you?", "role": "user"}],
+    )
+    return response
+
+
+@with_langtrace_root_span("Litellm Example Anthropic Completion")
+def anthropic():
+    response = completion(
+        model="claude-2",
+        messages=[{"content": "Hello, how are you?", "role": "user"}],
+        temperature=0.5,
+        top_p=0.5,
+        n=1,
+    )
+    print(response)
+    return response
+
+
+@with_langtrace_root_span("Litellm Example Anthropic Streaming")
+def anthropic_streaming():
+    response = completion(
+        model="claude-2",
+        messages=[{"content": "Hello, how are you?", "role": "user"}],
+        stream=True,
+        temperature=0.5,
+        # presence_penalty=0.5,
+        # frequency_penalty=0.5,
+        top_p=0.5,
+        n=1,
+        # logit_bias={"Hello": 1.0},
+        # top_logprobs=1,
+    )
+    for _ in response:
+        pass
+
+    return response
+
+
+@with_langtrace_root_span("Litellm Example OpenAI Async Streaming")
+async def async_anthropic_streaming():
+    response = await acompletion(
+        model="claude-2",
+        messages=[{"content": "Hello, how are you?", "role": "user"}],
+        stream=True,
+        temperature=0.5,
+        top_p=0.5,
+        n=1,
+    )
+    async for _ in response:
+        pass
diff --git a/src/langtrace_python_sdk/constants/instrumentation/common.py b/src/langtrace_python_sdk/constants/instrumentation/common.py
@@ -26,6 +26,7 @@
     "QDRANT": "Qdrant",
     "WEAVIATE": "Weaviate",
     "OLLAMA": "Ollama",
+    "LITELLM": "Litellm",
 }
 
 LANGTRACE_ADDITIONAL_SPAN_ATTRIBUTES_KEY = "langtrace_additional_attributes"
diff --git a/src/langtrace_python_sdk/instrumentation/__init__.py b/src/langtrace_python_sdk/instrumentation/__init__.py
@@ -14,6 +14,7 @@
 from .weaviate import WeaviateInstrumentation
 from .ollama import OllamaInstrumentor
 from .dspy import DspyInstrumentation
+from .litellm import LiteLLMInstrumentation
 
 __all__ = [
     "AnthropicInstrumentation",
@@ -32,4 +33,5 @@
     "WeaviateInstrumentation",
     "OllamaInstrumentor",
     "DspyInstrumentation",
+    "LiteLLMInstrumentation",
 ]
diff --git a/src/langtrace_python_sdk/instrumentation/litellm/__init__.py b/src/langtrace_python_sdk/instrumentation/litellm/__init__.py
@@ -0,0 +1,4 @@
+from .instrumentation import LiteLLMInstrumentation
+
+
+__all__ = ["LiteLLMInstrumentation"]
diff --git a/src/langtrace_python_sdk/instrumentation/litellm/instrumentation.py b/src/langtrace_python_sdk/instrumentation/litellm/instrumentation.py
@@ -0,0 +1,66 @@
+"""
+Copyright (c) 2024 Scale3 Labs
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from importlib_metadata import version as v
+import logging
+from typing import Collection
+
+from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
+from opentelemetry.trace import get_tracer
+from wrapt import wrap_function_wrapper as _W
+from .patch import litellm_patch, async_litellm_patch
+
+logging.basicConfig(level=logging.FATAL)
+
+
+class LiteLLMInstrumentation(BaseInstrumentor):
+    def instrumentation_dependencies(self) -> Collection[str]:
+        return ["litellm >= 1.0.0"]
+
+    def _instrument(self, **kwargs):
+        modules_to_instrument = [
+            ("litellm.llms.anthropic_text", "AnthropicTextCompletion.completion"),
+            ("litellm.llms.anthropic", "AnthropicChatCompletion.completion"),
+        ]
+
+        async_modules_to_instrument = [
+            ("litellm.llms.anthropic_text", "AnthropicTextCompletion.async_completion"),
+            ("litellm.llms.anthropic_text", "AnthropicTextCompletion.async_streaming"),
+        ]
+        tracer_provider = kwargs.get("tracer_provider")
+        tracer = get_tracer(__name__, "", tracer_provider)
+
+        version = v("litellm")
+
+        for module, name in async_modules_to_instrument:
+            _W(
+                module=module,
+                name=name,
+                wrapper=async_litellm_patch(name, tracer, version),
+            )
+
+        for module, name in modules_to_instrument:
+            _W(
+                module=module,
+                name=name,
+                wrapper=litellm_patch(name, tracer, version),
+            )
+
+    def _instrument_module(self, module_name):
+        pass
+
+    def _uninstrument(self, **kwargs):
+        pass
diff --git a/src/langtrace_python_sdk/instrumentation/litellm/patch.py b/src/langtrace_python_sdk/instrumentation/litellm/patch.py
@@ -0,0 +1,148 @@
+from langtrace_python_sdk.constants.instrumentation.common import SERVICE_PROVIDERS
+from langtrace_python_sdk.instrumentation.openai.patch import StreamWrapper
+from langtrace_python_sdk.utils import set_span_attribute
+from langtrace_python_sdk.utils.llm import (
+    calculate_prompt_tokens,
+    get_extra_attributes,
+    get_llm_request_attributes,
+    get_langtrace_attributes,
+    get_streaming_tokens,
+    is_streaming,
+    set_usage_attributes,
+)
+from langtrace.trace_attributes import LLMSpanAttributes, SpanAttributes, Event
+from langtrace_python_sdk.utils.silently_fail import silently_fail
+from openai import NOT_GIVEN
+from opentelemetry.trace import SpanKind, StatusCode, Status
+from opentelemetry import trace
+from opentelemetry.trace.propagation import set_span_in_context
+
+import json
+
+
+def litellm_patch(name, tracer, version):
+    def traced_method(wrapped, instance, args, kwargs):
+
+        print("Name", name)
+        service_provider = SERVICE_PROVIDERS["LITELLM"]
+        prompts = kwargs.get("messages")
+        optional_params = kwargs.get("optional_params", {})
+        options = {**kwargs, **optional_params}
+
+        span_attributes = {
+            **get_langtrace_attributes(version, service_provider),
+            **get_llm_request_attributes(options, prompts=prompts),
+            SpanAttributes.LLM_URL: kwargs.get("api_base"),
+            SpanAttributes.LLM_PATH: "completion",
+            **get_extra_attributes(),
+        }
+
+        attributes = LLMSpanAttributes(**span_attributes)
+        with tracer.start_span(
+            name=name,
+            kind=SpanKind.CLIENT,
+            context=set_span_in_context(trace.get_current_span()),
+        ) as span:
+
+            try:
+                set_input_attributes(span, attributes)
+                result = wrapped(*args, **kwargs)
+                if is_streaming(kwargs):
+                    return StreamWrapper(
+                        stream=result,
+                        span=trace.get_current_span(),
+                        prompt_tokens=get_streaming_tokens(kwargs),
+                        function_call=kwargs.get("functions") is not None,
+                        tool_calls=kwargs.get("tools") is not None,
+                    )
+
+                else:
+                    print("not streaming", span)
+                    set_response_attributes(span, result)
+                    return result
+
+            except Exception as err:
+                span.record_exception(err)
+                span.set_status(Status(StatusCode.ERROR, str(err)))
+                span.end()
+                raise
+
+    return traced_method
+
+
+def async_litellm_patch(name, tracer, version):
+    async def traced_method(wrapped, instance, args, kwargs):
+        print("Name", name)
+        service_provider = SERVICE_PROVIDERS["LITELLM"]
+        prompts = kwargs.get("messages")
+        optional_params = kwargs.get("optional_params", {})
+        options = {**kwargs, **optional_params}
+
+        span_attributes = {
+            **get_langtrace_attributes(version, service_provider),
+            **get_llm_request_attributes(options, prompts=prompts),
+            SpanAttributes.LLM_URL: kwargs.get("api_base"),
+            SpanAttributes.LLM_PATH: "completion",
+            **get_extra_attributes(),
+        }
+
+        attributes = LLMSpanAttributes(**span_attributes)
+        span = tracer.start_span(
+            name=name,
+            kind=SpanKind.CLIENT,
+            context=set_span_in_context(trace.get_current_span()),
+        )
+        with tracer.start_span(
+            name=name,
+            kind=SpanKind.CLIENT,
+            context=set_span_in_context(trace.get_current_span()),
+        ) as span:
+
+            try:
+                set_input_attributes(span, attributes)
+                result = await wrapped(*args, **kwargs)
+                if is_streaming(kwargs):
+                    return StreamWrapper(
+                        stream=result,
+                        span=span,
+                        prompt_tokens=get_streaming_tokens(kwargs),
+                        function_call=kwargs.get("functions") is not None,
+                        tool_calls=kwargs.get("tools") is not None,
+                    )
+
+                else:
+                    set_response_attributes(span, result)
+                    return result
+
+            except Exception as err:
+                span.record_exception(err)
+                span.set_status(Status(StatusCode.ERROR, str(err)))
+                span.end()
+                raise
+
+    return traced_method
+
+
+@silently_fail
+def set_input_attributes(span, attributes):
+    for field, value in attributes.model_dump(by_alias=True).items():
+        set_span_attribute(span, field, value)
+
+
+@silently_fail
+def set_response_attributes(span, result):
+    set_span_attribute(span, SpanAttributes.LLM_RESPONSE_MODEL, result.model)
+    set_span_attribute(span, SpanAttributes.LLM_RESPONSE_ID, result.id)
+    set_span_attribute(
+        span, SpanAttributes.LLM_SYSTEM_FINGERPRINT, result.system_fingerprint
+    )
+    set_usage_attributes(span, result.usage)
+
+    for choice in result.choices:
+        print("Choice", span.is_recording())
+        set_span_attribute(
+            span, SpanAttributes.LLM_RESPONSE_FINISH_REASON, choice.finish_reason
+        )
+        set_span_attribute(
+            span, SpanAttributes.LLM_COMPLETIONS, json.dumps(choice.message)
+        )
diff --git a/src/langtrace_python_sdk/langtrace.py b/src/langtrace_python_sdk/langtrace.py
@@ -53,6 +53,7 @@
     WeaviateInstrumentation,
     OllamaInstrumentor,
     DspyInstrumentation,
+    LiteLLMInstrumentation,
 )
 from opentelemetry.instrumentation.sqlalchemy import SQLAlchemyInstrumentor
 from colorama import Fore
@@ -112,6 +113,7 @@ def init(
         "ollama": OllamaInstrumentor(),
         "dspy": DspyInstrumentation(),
         "crewai": CrewAIInstrumentation(),
+        "litellm": LiteLLMInstrumentation(),
     }
 
     init_instrumentations(disable_instrumentations, all_instrumentations)
diff --git a/src/langtrace_python_sdk/utils/llm.py b/src/langtrace_python_sdk/utils/llm.py
@@ -57,6 +57,21 @@ def calculate_prompt_tokens(prompt_content, model):
         return estimate_tokens(prompt_content)  # Fallback method
 
 
+def get_streaming_tokens(kwargs):
+    prompt_tokens = 0
+    for message in kwargs.get("messages", {}):
+        prompt_tokens += calculate_prompt_tokens(
+            json.dumps(message), kwargs.get("model")
+        )
+
+    if kwargs.get("functions") is not None and kwargs.get("functions") != NOT_GIVEN:
+        for function in kwargs.get("functions"):
+            prompt_tokens += calculate_prompt_tokens(
+                json.dumps(function), kwargs.get("model")
+            )
+    return prompt_tokens
+
+
 def calculate_price_from_usage(model, usage):
     """
     Calculate the price of a model based on its usage."""
@@ -137,11 +152,12 @@ def get_base_url(instance):
 
 
 def is_streaming(kwargs):
-    return not (
-        kwargs.get("stream") is False
-        or kwargs.get("stream") is None
-        or kwargs.get("stream") == NOT_GIVEN
-    )
+    streaming = kwargs.get("stream", None)
+
+    if "optional_params" in kwargs:
+        streaming = kwargs.get("optional_params").get("stream", None)
+
+    return not (streaming is False or streaming is None or streaming == NOT_GIVEN)
 
 
 def set_usage_attributes(span, usage):

Original file line number	Diff line number	Diff line change
`@@ -26,6 +26,7 @@`
`26`	`26`	`"QDRANT": "Qdrant",`
`27`	`27`	`"WEAVIATE": "Weaviate",`
`28`	`28`	`"OLLAMA": "Ollama",`
	`29`	`+ "LITELLM": "Litellm",`
`29`	`30`	`}`
`30`	`31`
`31`	`32`	`LANGTRACE_ADDITIONAL_SPAN_ATTRIBUTES_KEY = "langtrace_additional_attributes"`
-Original file line number
+Diff line change
@@ @@ -0,0 +1,4 @@ @@
 +from .instrumentation import LiteLLMInstrumentation
++
++
 +__all__ = ["LiteLLMInstrumentation"]
Original file line number	Diff line number	Diff line change
`@@ -53,6 +53,7 @@`
`53`	`53`	`WeaviateInstrumentation,`
`54`	`54`	`OllamaInstrumentor,`
`55`	`55`	`DspyInstrumentation,`
	`56`	`+ LiteLLMInstrumentation,`
`56`	`57`	`)`
`57`	`58`	`from opentelemetry.instrumentation.sqlalchemy import SQLAlchemyInstrumentor`
`58`	`59`	`from colorama import Fore`
`@@ -112,6 +113,7 @@ def init(`
`112`	`113`	`"ollama": OllamaInstrumentor(),`
`113`	`114`	`"dspy": DspyInstrumentation(),`
`114`	`115`	`"crewai": CrewAIInstrumentation(),`
	`116`	`+ "litellm": LiteLLMInstrumentation(),`
`115`	`117`	`}`
`116`	`118`
`117`	`119`	`init_instrumentations(disable_instrumentations, all_instrumentations)`