Cerebras support

alizenhom · alizenhom · commit f77f5e8c0aaf · 2024-11-05T16:54:58.000+02:00
diff --git a/src/examples/cerebras_example/__init__.py b/src/examples/cerebras_example/__init__.py
@@ -0,0 +1,6 @@
+class CerebrasRunner:
+    def run(self):
+        from .main import completion_example, completion_with_tools_example
+
+        completion_with_tools_example()
+        completion_example()
diff --git a/src/examples/cerebras_example/main.py b/src/examples/cerebras_example/main.py
@@ -1,6 +1,8 @@
 from langtrace_python_sdk import langtrace
 from cerebras.cloud.sdk import Cerebras
 from dotenv import load_dotenv
+import re
+import json
 
 load_dotenv()
 
@@ -9,7 +11,7 @@
 client = Cerebras()
 
 
-def completion_example():
+def completion_example(stream=False):
     completion = client.chat.completions.create(
         messages=[
             {
@@ -18,5 +20,152 @@ def completion_example():
             }
         ],
         model="llama3.1-8b",
+        stream=stream,
     )
-    return completion
+
+    if stream:
+        for chunk in completion:
+            print(chunk)
+    else:
+        return completion
+
+
+def completion_with_tools_example(stream=False):
+    messages = [
+        {
+            "role": "system",
+            "content": "You are a helpful assistant with access to a calculator. Use the calculator tool to compute mathematical expressions when needed.",
+        },
+        {"role": "user", "content": "What's the result of 15 multiplied by 7?"},
+    ]
+
+    response = client.chat.completions.create(
+        model="llama3.1-8b",
+        messages=messages,
+        tools=tools,
+        stream=stream,
+    )
+
+    if stream:
+        # Handle streaming response
+        full_content = ""
+        for chunk in response:
+            if chunk.choices[0].delta.tool_calls:
+                tool_call = chunk.choices[0].delta.tool_calls[0]
+                if hasattr(tool_call, "function"):
+                    if tool_call.function.name == "calculate":
+                        arguments = json.loads(tool_call.function.arguments)
+                        result = calculate(arguments["expression"])
+                        print(f"Calculation result: {result}")
+
+                        # Get final response with calculation result
+                        messages.append(
+                            {
+                                "role": "assistant",
+                                "content": None,
+                                "tool_calls": [
+                                    {
+                                        "function": {
+                                            "name": "calculate",
+                                            "arguments": tool_call.function.arguments,
+                                        },
+                                        "id": tool_call.id,
+                                        "type": "function",
+                                    }
+                                ],
+                            }
+                        )
+                        messages.append(
+                            {
+                                "role": "tool",
+                                "content": str(result),
+                                "tool_call_id": tool_call.id,
+                            }
+                        )
+
+                        final_response = client.chat.completions.create(
+                            model="llama3.1-70b", messages=messages, stream=True
+                        )
+
+                        for final_chunk in final_response:
+                            if final_chunk.choices[0].delta.content:
+                                print(final_chunk.choices[0].delta.content, end="")
+            elif chunk.choices[0].delta.content:
+                print(chunk.choices[0].delta.content, end="")
+                full_content += chunk.choices[0].delta.content
+    else:
+        # Handle non-streaming response
+        choice = response.choices[0].message
+        if choice.tool_calls:
+            function_call = choice.tool_calls[0].function
+            if function_call.name == "calculate":
+                arguments = json.loads(function_call.arguments)
+                result = calculate(arguments["expression"])
+                print(f"Calculation result: {result}")
+
+                messages.append(
+                    {
+                        "role": "assistant",
+                        "content": None,
+                        "tool_calls": [
+                            {
+                                "function": {
+                                    "name": "calculate",
+                                    "arguments": function_call.arguments,
+                                },
+                                "id": choice.tool_calls[0].id,
+                                "type": "function",
+                            }
+                        ],
+                    }
+                )
+                messages.append(
+                    {
+                        "role": "tool",
+                        "content": str(result),
+                        "tool_call_id": choice.tool_calls[0].id,
+                    }
+                )
+
+                final_response = client.chat.completions.create(
+                    model="llama3.1-70b",
+                    messages=messages,
+                )
+
+                if final_response:
+                    print(final_response.choices[0].message.content)
+                else:
+                    print("No final response received")
+        else:
+            print("Unexpected response from the model")
+
+
+def calculate(expression):
+    expression = re.sub(r"[^0-9+\-*/().]", "", expression)
+
+    try:
+        result = eval(expression)
+        return str(result)
+    except (SyntaxError, ZeroDivisionError, NameError, TypeError, OverflowError):
+        return "Error: Invalid expression"
+
+
+tools = [
+    {
+        "type": "function",
+        "function": {
+            "name": "calculate",
+            "description": "A calculator tool that can perform basic arithmetic operations. Use this when you need to compute mathematical expressions or solve numerical problems.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "expression": {
+                        "type": "string",
+                        "description": "The mathematical expression to evaluate",
+                    }
+                },
+                "required": ["expression"],
+            },
+        },
+    }
+]
diff --git a/src/langtrace_python_sdk/constants/instrumentation/common.py b/src/langtrace_python_sdk/constants/instrumentation/common.py
@@ -35,6 +35,7 @@
     "AUTOGEN": "Autogen",
     "XAI": "XAI",
     "AWS_BEDROCK": "AWS Bedrock",
+    "CEREBRAS": "Cerebras",
 }
 
 LANGTRACE_ADDITIONAL_SPAN_ATTRIBUTES_KEY = "langtrace_additional_attributes"
diff --git a/src/langtrace_python_sdk/instrumentation/__init__.py b/src/langtrace_python_sdk/instrumentation/__init__.py
@@ -21,6 +21,7 @@
 from .aws_bedrock import AWSBedrockInstrumentation
 from .embedchain import EmbedchainInstrumentation
 from .litellm import LiteLLMInstrumentation
+from .cerebras import CerebrasInstrumentation
 
 __all__ = [
     "AnthropicInstrumentation",
@@ -46,4 +47,5 @@
     "GeminiInstrumentation",
     "MistralInstrumentation",
     "AWSBedrockInstrumentation",
+    "CerebrasInstrumentation",
 ]
diff --git a/src/langtrace_python_sdk/instrumentation/cerebras/__init__.py b/src/langtrace_python_sdk/instrumentation/cerebras/__init__.py
@@ -0,0 +1,3 @@
+from .instrumentation import CerebrasInstrumentation
+
+__all__ = ["CerebrasInstrumentation"]
diff --git a/src/langtrace_python_sdk/instrumentation/cerebras/instrumentation.py b/src/langtrace_python_sdk/instrumentation/cerebras/instrumentation.py
@@ -17,6 +17,7 @@
 from typing import Collection
 from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
 from opentelemetry.trace import get_tracer
+from opentelemetry.semconv.schemas import Schemas
 from wrapt import wrap_function_wrapper
 from importlib_metadata import version as v
 from .patch import chat_completions_create, async_chat_completions_create
@@ -32,7 +33,9 @@ def instrumentation_dependencies(self) -> Collection[str]:
 
     def _instrument(self, **kwargs):
         tracer_provider = kwargs.get("tracer_provider")
-        tracer = get_tracer(__name__, "", tracer_provider)
+        tracer = get_tracer(
+            __name__, "", tracer_provider, schema_url=Schemas.V1_27_0.value
+        )
         version = v("cerebras-cloud-sdk")
 
         wrap_function_wrapper(
@@ -46,3 +49,6 @@ def _instrument(self, **kwargs):
             name="resources.chat.completions.AsyncCompletionsResource.create",
             wrapper=async_chat_completions_create(version, tracer),
         )
+
+    def _uninstrument(self, **kwargs):
+        pass
diff --git a/src/langtrace_python_sdk/instrumentation/cerebras/patch.py b/src/langtrace_python_sdk/instrumentation/cerebras/patch.py
@@ -1,29 +1,138 @@
-"""
-Copyright (c) 2024 Scale3 Labs
+from langtrace_python_sdk.instrumentation.groq.patch import extract_content
+from opentelemetry.trace import SpanKind
+from langtrace_python_sdk.utils.llm import (
+    get_llm_request_attributes,
+    get_langtrace_attributes,
+    get_extra_attributes,
+    get_llm_url,
+    is_streaming,
+    set_event_completion,
+    set_span_attributes,
+    StreamWrapper,
+)
+from langtrace_python_sdk.utils.silently_fail import silently_fail
+from langtrace_python_sdk.constants.instrumentation.common import SERVICE_PROVIDERS
+from langtrace.trace_attributes import SpanAttributes
+from langtrace_python_sdk.utils import handle_span_error, set_span_attribute
 
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
 
-    http://www.apache.org/licenses/LICENSE-2.0
+def chat_completions_create(version: str, tracer):
+    def traced_method(wrapped, instance, args, kwargs):
+        llm_prompts = []
+        for message in kwargs.get("messages", []):
+            llm_prompts.append(message)
 
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
+        span_attributes = {
+            **get_langtrace_attributes(version, SERVICE_PROVIDERS["CEREBRAS"]),
+            **get_llm_request_attributes(kwargs, prompts=llm_prompts),
+            **get_llm_url(instance),
+            **get_extra_attributes(),
+        }
 
+        span_name = f"{span_attributes[SpanAttributes.LLM_OPERATION_NAME]} {span_attributes[SpanAttributes.LLM_REQUEST_MODEL]}"
+        with tracer.start_as_current_span(
+            name=span_name,
+            kind=SpanKind.CLIENT,
+            attributes=span_attributes,
+            end_on_exit=False,
+        ) as span:
 
-def chat_completions_create(version: str, tracer):
-    def traced_method(wrapped, instance, args, kwargs):
-        return wrapped(*args, **kwargs)
+            try:
+                _set_input_attributes(span, kwargs, span_attributes)
+                result = wrapped(*args, **kwargs)
+                if is_streaming(kwargs):
+                    return StreamWrapper(result, span)
+
+                if span.is_recording():
+                    _set_response_attributes(span, result)
+                span.end()
+                return result
+
+            except Exception as error:
+                handle_span_error(span, error)
+                raise
 
     return traced_method
 
 
 def async_chat_completions_create(version: str, tracer):
-    def traced_method(wrapped, instance, args, kwargs):
-        return wrapped(*args, **kwargs)
+    async def traced_method(wrapped, instance, args, kwargs):
+        llm_prompts = []
+        for message in kwargs.get("messages", []):
+            llm_prompts.append(message)
+
+        span_attributes = {
+            **get_langtrace_attributes(version, SERVICE_PROVIDERS["CEREBRAS"]),
+            **get_llm_request_attributes(kwargs, prompts=llm_prompts),
+            **get_llm_url(instance),
+            **get_extra_attributes(),
+        }
+
+        span_name = f"{span_attributes[SpanAttributes.LLM_OPERATION_NAME]} {span_attributes[SpanAttributes.LLM_REQUEST_MODEL]}"
+        with tracer.start_as_current_span(
+            name=span_name,
+            kind=SpanKind.CLIENT,
+            attributes=span_attributes,
+            end_on_exit=False,
+        ) as span:
+
+            try:
+                _set_input_attributes(span, kwargs, span_attributes)
+                result = await wrapped(*args, **kwargs)
+                if is_streaming(kwargs):
+                    return StreamWrapper(result, span)
+
+                if span.is_recording():
+                    _set_response_attributes(span, result)
+                span.end()
+                return result
+
+            except Exception as error:
+                handle_span_error(span, error)
+                raise
 
     return traced_method
+
+
+@silently_fail
+def _set_response_attributes(span, result):
+    set_span_attribute(span, SpanAttributes.LLM_RESPONSE_MODEL, result.model)
+
+    if getattr(result, "id", None):
+        set_span_attribute(span, SpanAttributes.LLM_RESPONSE_ID, result.id)
+
+    if getattr(result, "choices", None):
+        responses = [
+            {
+                "role": (
+                    choice.message.role
+                    if choice.message and choice.message.role
+                    else "assistant"
+                ),
+                "content": extract_content(choice),
+                **(
+                    {"content_filter_results": choice.content_filter_results}
+                    if hasattr(choice, "content_filter_results")
+                    else {}
+                ),
+            }
+            for choice in result.choices
+        ]
+        set_event_completion(span, responses)
+    # Get the usage
+    if getattr(result, "usage", None):
+        set_span_attribute(
+            span,
+            SpanAttributes.LLM_USAGE_PROMPT_TOKENS,
+            result.usage.prompt_tokens,
+        )
+        set_span_attribute(
+            span,
+            SpanAttributes.LLM_USAGE_COMPLETION_TOKENS,
+            result.usage.completion_tokens,
+        )
+
+
+@silently_fail
+def _set_input_attributes(span, kwargs, attributes):
+    set_span_attributes(span, attributes)
diff --git a/src/langtrace_python_sdk/langtrace.py b/src/langtrace_python_sdk/langtrace.py
diff --git a/src/langtrace_python_sdk/utils/__init__.py b/src/langtrace_python_sdk/utils/__init__.py
diff --git a/src/run_example.py b/src/run_example.py

Original file line number	Diff line number	Diff line change
`@@ -35,6 +35,7 @@`
`35`	`35`	`"AUTOGEN": "Autogen",`
`36`	`36`	`"XAI": "XAI",`
`37`	`37`	`"AWS_BEDROCK": "AWS Bedrock",`
	`38`	`+ "CEREBRAS": "Cerebras",`
`38`	`39`	`}`
`39`	`40`
`40`	`41`	`LANGTRACE_ADDITIONAL_SPAN_ATTRIBUTES_KEY = "langtrace_additional_attributes"`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+from .instrumentation import CerebrasInstrumentation`
	`2`	`+`
	`3`	`+__all__ = ["CerebrasInstrumentation"]`