checkout parts from streaming squash me

aabmass · aabmass · commit 9cd53600acee · 2025-03-18T21:03:42.000Z
diff --git a/instrumentation-genai/opentelemetry-instrumentation-vertexai/src/opentelemetry/instrumentation/vertexai/__init__.py b/instrumentation-genai/opentelemetry-instrumentation-vertexai/src/opentelemetry/instrumentation/vertexai/__init__.py
@@ -39,42 +39,50 @@
 ---
 """
 
+from __future__ import annotations
+
 from typing import Any, Collection
 
 from wrapt import (
-    wrap_function_wrapper,  # type: ignore[reportUnknownVariableType]
+    wrap_function_wrapper,  # pyright: ignore[reportUnknownVariableType]
 )
 
 from opentelemetry._events import get_event_logger
 from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
 from opentelemetry.instrumentation.utils import unwrap
 from opentelemetry.instrumentation.vertexai.package import _instruments
-from opentelemetry.instrumentation.vertexai.patch import (
-    generate_content_create,
-)
+from opentelemetry.instrumentation.vertexai.patch import MethodWrappers
 from opentelemetry.instrumentation.vertexai.utils import is_content_enabled
 from opentelemetry.semconv.schemas import Schemas
 from opentelemetry.trace import get_tracer
 
 
-def _client_classes():
+def _methods_to_wrap(
+    method_wrappers: MethodWrappers,
+):
     # This import is very slow, do it lazily in case instrument() is not called
-
     # pylint: disable=import-outside-toplevel
-    from google.cloud.aiplatform_v1.services.prediction_service import (
-        client,
-    )
+    from google.cloud.aiplatform_v1.services.prediction_service import client
     from google.cloud.aiplatform_v1beta1.services.prediction_service import (
         client as client_v1beta1,
     )
 
-    return (
+    for client_class in (
         client.PredictionServiceClient,
         client_v1beta1.PredictionServiceClient,
-    )
+    ):
+        yield (
+            client_class,
+            client_class.generate_content.__name__,  # pyright: ignore[reportUnknownMemberType]
+            method_wrappers.generate_content,
+        )
 
 
 class VertexAIInstrumentor(BaseInstrumentor):
+    def __init__(self) -> None:
+        super().__init__()
+        self._methods_to_unwrap: list[tuple[Any, str]] = []
+
     def instrumentation_dependencies(self) -> Collection[str]:
         return _instruments
 
@@ -95,15 +103,19 @@ def _instrument(self, **kwargs: Any):
             event_logger_provider=event_logger_provider,
         )
 
-        for client_class in _client_classes():
+        method_wrappers = MethodWrappers(
+            tracer, event_logger, is_content_enabled()
+        )
+        for client_class, method_name, wrapper in _methods_to_wrap(
+            method_wrappers
+        ):
             wrap_function_wrapper(
                 client_class,
-                name="generate_content",
-                wrapper=generate_content_create(
-                    tracer, event_logger, is_content_enabled()
-                ),
+                name=method_name,
+                wrapper=wrapper,
             )
+            self._methods_to_unwrap.append((client_class, method_name))
 
     def _uninstrument(self, **kwargs: Any) -> None:
-        for client_class in _client_classes():
-            unwrap(client_class, "generate_content")
+        for client_class, method_name in self._methods_to_unwrap:
+            unwrap(client_class, method_name)
diff --git a/instrumentation-genai/opentelemetry-instrumentation-vertexai/src/opentelemetry/instrumentation/vertexai/patch.py b/instrumentation-genai/opentelemetry-instrumentation-vertexai/src/opentelemetry/instrumentation/vertexai/patch.py
@@ -14,10 +14,12 @@
 
 from __future__ import annotations
 
+from contextlib import contextmanager
 from typing import (
     TYPE_CHECKING,
     Any,
     Callable,
+    Iterable,
     MutableSequence,
 )
 
@@ -87,17 +89,17 @@ def _extract_params(
     )
 
 
-def generate_content_create(
-    tracer: Tracer, event_logger: EventLogger, capture_content: bool
-):
-    """Wrap the `generate_content` method of the `GenerativeModel` class to trace it."""
+class MethodWrappers:
+    def __init__(
+        self, tracer: Tracer, event_logger: EventLogger, capture_content: bool
+    ) -> None:
+        self.tracer = tracer
+        self.event_logger = event_logger
+        self.capture_content = capture_content
 
-    def traced_method(
-        wrapped: Callable[
-            ...,
-            prediction_service.GenerateContentResponse
-            | prediction_service_v1beta1.GenerateContentResponse,
-        ],
+    @contextmanager
+    def _with_instrumentation(
+        self,
         instance: client.PredictionServiceClient
         | client_v1beta1.PredictionServiceClient,
         args: Any,
@@ -111,32 +113,58 @@ def traced_method(
         }
 
         span_name = get_span_name(span_attributes)
-        with tracer.start_as_current_span(
+
+        with self.tracer.start_as_current_span(
             name=span_name,
             kind=SpanKind.CLIENT,
             attributes=span_attributes,
         ) as span:
             for event in request_to_events(
-                params=params, capture_content=capture_content
+                params=params, capture_content=self.capture_content
             ):
-                event_logger.emit(event)
+                self.event_logger.emit(event)
 
             # TODO: set error.type attribute
             # https://github.com/open-telemetry/semantic-conventions/blob/main/docs/gen-ai/gen-ai-spans.md
-            response = wrapped(*args, **kwargs)
-            # TODO: handle streaming
-            # if is_streaming(kwargs):
-            #     return StreamWrapper(
-            #         result, span, event_logger, capture_content
-            #     )
-
-            if span.is_recording():
-                span.set_attributes(get_genai_response_attributes(response))
-            for event in response_to_events(
-                response=response, capture_content=capture_content
-            ):
-                event_logger.emit(event)
 
+            def handle_response(
+                response: prediction_service.GenerateContentResponse
+                | prediction_service_v1beta1.GenerateContentResponse,
+            ) -> None:
+                if span.is_recording():
+                    # When streaming, this is called multiple times so attributes would be
+                    # overwritten. In practice, it looks the API only returns the interesting
+                    # attributes on the last streamed response. However, I couldn't find
+                    # documentation for this and setting attributes shouldn't be too expensive.
+                    span.set_attributes(
+                        get_genai_response_attributes(response)
+                    )
+
+                for event in response_to_events(
+                    response=response, capture_content=self.capture_content
+                ):
+                    self.event_logger.emit(event)
+
+            yield handle_response
+
+    def generate_content(
+        self,
+        wrapped: Callable[
+            ...,
+            prediction_service.GenerateContentResponse
+            | prediction_service_v1beta1.GenerateContentResponse,
+        ],
+        instance: client.PredictionServiceClient
+        | client_v1beta1.PredictionServiceClient,
+        args: Any,
+        kwargs: Any,
+    ) -> (
+        prediction_service.GenerateContentResponse
+        | prediction_service_v1beta1.GenerateContentResponse
+    ):
+        with self._with_instrumentation(
+            instance, args, kwargs
+        ) as handle_response:
+            response = wrapped(*args, **kwargs)
+            handle_response(response)
             return response
-
-    return traced_method