diff --git a/src/langtrace_python_sdk/constants/instrumentation/common.py b/src/langtrace_python_sdk/constants/instrumentation/common.py index 19195655..b9721819 100644 --- a/src/langtrace_python_sdk/constants/instrumentation/common.py +++ b/src/langtrace_python_sdk/constants/instrumentation/common.py @@ -30,7 +30,7 @@ "QDRANT": "Qdrant", "WEAVIATE": "Weaviate", "OLLAMA": "Ollama", - "VERTEXAI": "VertexAI", + "VERTEXAI": "Vertex AI", "GEMINI": "Gemini", "MISTRAL": "Mistral", "EMBEDCHAIN": "Embedchain", diff --git a/src/langtrace_python_sdk/constants/instrumentation/vertexai.py b/src/langtrace_python_sdk/constants/instrumentation/vertexai.py index 8ea90ae7..26ee1e94 100644 --- a/src/langtrace_python_sdk/constants/instrumentation/vertexai.py +++ b/src/langtrace_python_sdk/constants/instrumentation/vertexai.py @@ -39,4 +39,24 @@ "method": "ChatSession", "operation": "send_message_streaming", }, + "PREDICTION_SERVICE_BETA_GENERATE_CONTENT": { + "module": "google.cloud.aiplatform_v1beta1.services.prediction_service.client", + "method": "PredictionServiceClient", + "operation": "generate_content", + }, + "PREDICTION_SERVICE_GENERATE_CONTENT": { + "module": "google.cloud.aiplatform_v1.services.prediction_service.client", + "method": "PredictionServiceClient", + "operation": "generate_content", + }, + "PREDICTION_SERVICE_BETA_STREAM_GENERATE_CONTENT": { + "module": "google.cloud.aiplatform_v1beta1.services.prediction_service.client", + "method": "PredictionServiceClient", + "operation": "stream_generate_content", + }, + "PREDICTION_SERVICE_STREAM_GENERATE_CONTENT": { + "module": "google.cloud.aiplatform_v1.services.prediction_service.client", + "method": "PredictionServiceClient", + "operation": "stream_generate_content", + }, } diff --git a/src/langtrace_python_sdk/instrumentation/vertexai/patch.py b/src/langtrace_python_sdk/instrumentation/vertexai/patch.py index 8ca55cfe..46134d2b 100644 --- a/src/langtrace_python_sdk/instrumentation/vertexai/patch.py +++ b/src/langtrace_python_sdk/instrumentation/vertexai/patch.py @@ -27,12 +27,13 @@ def patch_vertexai(name, version, tracer: Tracer): def traced_method(wrapped, instance, args, kwargs): service_provider = SERVICE_PROVIDERS["VERTEXAI"] prompts = serialize_prompts(args, kwargs) + span_attributes = { **get_langtrace_attributes(version, service_provider), **get_llm_request_attributes( kwargs, prompts=prompts, - model=get_llm_model(instance), + model=get_llm_model(instance, kwargs), ), **get_llm_url(instance), SpanAttributes.LLM_PATH: "", @@ -77,6 +78,10 @@ def set_response_attributes(span: Span, result): if hasattr(result, "text"): set_event_completion(span, [{"role": "assistant", "content": result.text}]) + if hasattr(result, "candidates"): + parts = result.candidates[0].content.parts + set_event_completion(span, [{"role": "assistant", "content": parts[0].text}]) + if hasattr(result, "usage_metadata") and result.usage_metadata is not None: usage = result.usage_metadata input_tokens = usage.prompt_token_count @@ -96,17 +101,23 @@ def set_response_attributes(span: Span, result): def is_streaming_response(response): - return isinstance(response, types.GeneratorType) or isinstance( - response, types.AsyncGeneratorType + return ( + isinstance(response, types.GeneratorType) + or isinstance(response, types.AsyncGeneratorType) + or str(type(response).__name__) == "_StreamingResponseIterator" ) -def get_llm_model(instance): +def get_llm_model(instance, kwargs): + if "request" in kwargs: + return kwargs.get("request").model.split("/")[-1] + if hasattr(instance, "_model_name"): return instance._model_name.replace("publishers/google/models/", "") return getattr(instance, "_model_id", "unknown") +@silently_fail def serialize_prompts(args, kwargs): if args and len(args) > 0: prompt_parts = [] @@ -122,5 +133,24 @@ def serialize_prompts(args, kwargs): return [{"role": "user", "content": "\n".join(prompt_parts)}] else: - content = kwargs.get("prompt") or kwargs.get("message") - return [{"role": "user", "content": content}] if content else [] + # Handle PredictionServiceClient for google-cloud-aiplatform. + if "request" in kwargs: + prompt = [] + prompt_body = kwargs.get("request") + if prompt_body.system_instruction: + for part in prompt_body.system_instruction.parts: + prompt.append({"role": "system", "content": part.text}) + + contents = prompt_body.contents + + if not contents: + return [] + + for c in contents: + role = c.role if c.role else "user" + content = c.parts[0].text if c.parts else "" + prompt.append({"role": role, "content": content}) + return prompt + else: + content = kwargs.get("prompt") or kwargs.get("message") + return [{"role": "user", "content": content}] if content else [] diff --git a/src/langtrace_python_sdk/utils/llm.py b/src/langtrace_python_sdk/utils/llm.py index 86cdac3f..b72700e0 100644 --- a/src/langtrace_python_sdk/utils/llm.py +++ b/src/langtrace_python_sdk/utils/llm.py @@ -395,18 +395,30 @@ def build_streaming_response(self, chunk): content = [chunk.text] # CohereV2 - if (hasattr(chunk, "delta") and - chunk.delta is not None and - hasattr(chunk.delta, "message") and - chunk.delta.message is not None and - hasattr(chunk.delta.message, "content") and - chunk.delta.message.content is not None and - hasattr(chunk.delta.message.content, "text") and - chunk.delta.message.content.text is not None): + if ( + hasattr(chunk, "delta") + and chunk.delta is not None + and hasattr(chunk.delta, "message") + and chunk.delta.message is not None + and hasattr(chunk.delta.message, "content") + and chunk.delta.message.content is not None + and hasattr(chunk.delta.message.content, "text") + and chunk.delta.message.content.text is not None + ): content = [chunk.delta.message.content.text] - + # google-cloud-aiplatform + if hasattr(chunk, "candidates") and chunk.candidates is not None: + for candidate in chunk.candidates: + if hasattr(candidate, "content") and candidate.content is not None: + for part in candidate.content.parts: + if hasattr(part, "text") and part.text is not None: + content.append(part.text) # Anthropic - if hasattr(chunk, "delta") and chunk.delta is not None and not hasattr(chunk.delta, "message"): + if ( + hasattr(chunk, "delta") + and chunk.delta is not None + and not hasattr(chunk.delta, "message") + ): content = [chunk.delta.text] if hasattr(chunk.delta, "text") else [] if isinstance(chunk, dict): @@ -425,9 +437,14 @@ def set_usage_attributes(self, chunk): # CohereV2 if hasattr(chunk, "type") and chunk.type == "message-end": - if (hasattr(chunk, "delta") and chunk.delta is not None and - hasattr(chunk.delta, "usage") and chunk.delta.usage is not None and - hasattr(chunk.delta.usage, "billed_units") and chunk.delta.usage.billed_units is not None): + if ( + hasattr(chunk, "delta") + and chunk.delta is not None + and hasattr(chunk.delta, "usage") + and chunk.delta.usage is not None + and hasattr(chunk.delta.usage, "billed_units") + and chunk.delta.usage.billed_units is not None + ): usage = chunk.delta.usage.billed_units self.completion_tokens = int(usage.output_tokens) self.prompt_tokens = int(usage.input_tokens) diff --git a/src/langtrace_python_sdk/version.py b/src/langtrace_python_sdk/version.py index 903a158a..dcbfb52f 100644 --- a/src/langtrace_python_sdk/version.py +++ b/src/langtrace_python_sdk/version.py @@ -1 +1 @@ -__version__ = "3.4.0" +__version__ = "3.5.0"