remove handle streaming response from anthropic

alizenhom · alizenhom · commit 8fa1b28782fa · 2024-08-19T17:41:35.000+03:00
diff --git a/src/langtrace_python_sdk/instrumentation/anthropic/patch.py b/src/langtrace_python_sdk/instrumentation/anthropic/patch.py
@@ -17,16 +17,18 @@
 import json
 
 from langtrace.trace_attributes import Event, LLMSpanAttributes
-from langtrace_python_sdk.utils import set_span_attribute, silently_fail
+from langtrace_python_sdk.utils import set_span_attribute
+from langtrace_python_sdk.utils.silently_fail import silently_fail
+
 from langtrace_python_sdk.utils.llm import (
+    StreamWrapper,
     get_extra_attributes,
     get_langtrace_attributes,
     get_llm_request_attributes,
     get_llm_url,
     get_span_name,
     is_streaming,
     set_event_completion,
-    set_event_completion_chunk,
     set_usage_attributes,
 )
 from opentelemetry.trace import SpanKind
@@ -83,61 +85,7 @@ def traced_method(wrapped, instance, args, kwargs):
             span.end()
             raise
 
-    def handle_streaming_response(result, span):
-        """Process and yield streaming response chunks."""
-        result_content = []
-        span.add_event(Event.STREAM_START.value)
-        input_tokens = 0
-        output_tokens = 0
-        try:
-            for chunk in result:
-                if (
-                    hasattr(chunk, "message")
-                    and chunk.message is not None
-                    and hasattr(chunk.message, "model")
-                    and chunk.message.model is not None
-                ):
-                    span.set_attribute(
-                        SpanAttributes.LLM_RESPONSE_MODEL, chunk.message.model
-                    )
-                content = ""
-                if hasattr(chunk, "delta") and chunk.delta is not None:
-                    content = chunk.delta.text if hasattr(chunk.delta, "text") else ""
-                # Assuming content needs to be aggregated before processing
-                result_content.append(content if len(content) > 0 else "")
-
-                if hasattr(chunk, "message") and hasattr(chunk.message, "usage"):
-                    input_tokens += (
-                        chunk.message.usage.input_tokens
-                        if hasattr(chunk.message.usage, "input_tokens")
-                        else 0
-                    )
-                    output_tokens += (
-                        chunk.message.usage.output_tokens
-                        if hasattr(chunk.message.usage, "output_tokens")
-                        else 0
-                    )
-
-                # Assuming span.add_event is part of a larger logging or event system
-                # Add event for each chunk of content
-                if content:
-                    set_event_completion_chunk(span, "".join(content))
-
-                # Assuming this is part of a generator, yield chunk or aggregated content
-                yield content
-        finally:
-
-            # Finalize span after processing all chunks
-            span.add_event(Event.STREAM_END.value)
-            set_usage_attributes(
-                span, {"input_tokens": input_tokens, "output_tokens": output_tokens}
-            )
-            completion = [{"role": "assistant", "content": "".join(result_content)}]
-            set_event_completion(span, completion)
-
-            span.set_status(StatusCode.OK)
-            span.end()
-
+    @silently_fail
     def set_response_attributes(result, span, kwargs):
         if not is_streaming(kwargs):
             if hasattr(result, "content") and result.content is not None:
@@ -174,7 +122,7 @@ def set_response_attributes(result, span, kwargs):
             span.end()
             return result
         else:
-            return handle_streaming_response(result, span)
+            return StreamWrapper(result, span)
 
     # return the wrapped method
     return traced_method