Merge pull request #537 from m1kl0sh/aws-embeddings-support

obinnascale3 · web-flow · commit 954e4ae5dda3 · 2025-06-10T21:45:38.000+01:00
Added support for embeddings via AWS Bedrock
diff --git a/src/langtrace_python_sdk/constants/instrumentation/aws_bedrock.py b/src/langtrace_python_sdk/constants/instrumentation/aws_bedrock.py
@@ -5,6 +5,10 @@
         "METHOD": "aws_bedrock.invoke_model",
         "ENDPOINT": "/invoke-model",
     },
+    "INVOKE_MODEL_WITH_RESPONSE_STREAM": {
+        "METHOD": "aws_bedrock.invoke_model_with_response_stream",
+        "ENDPOINT": "/invoke-model-with-response-stream",
+    },
     "CONVERSE": {
         "METHOD": AWSBedrockMethods.CONVERSE.value,
         "ENDPOINT": "/converse",
diff --git a/src/langtrace_python_sdk/instrumentation/aws_bedrock/patch.py b/src/langtrace_python_sdk/instrumentation/aws_bedrock/patch.py
@@ -15,6 +15,7 @@
 """
 
 import json
+import io
 
 from wrapt import ObjectProxy
 from itertools import tee
@@ -44,6 +45,7 @@
     set_span_attributes,
     set_usage_attributes,
 )
+from langtrace_python_sdk.utils import set_event_prompt
 
 
 def converse_stream(original_method, version, tracer):
@@ -170,12 +172,29 @@ def traced_method(*args, **kwargs):
     return traced_method
 
 
+def parse_vendor_and_model_name_from_model_id(model_id):
+    if model_id.startswith("arn:aws:bedrock:"):
+        # This needs to be in one of the following forms:
+        # arn:aws:bedrock:region:account-id:foundation-model/vendor.model-name
+        # arn:aws:bedrock:region:account-id:custom-model/vendor.model-name/model-id
+        parts = model_id.split("/")
+        identifiers = parts[1].split(".")
+        return identifiers[0], identifiers[1]
+    parts = model_id.split(".")
+    if len(parts) == 1:
+        return parts[0], parts[0]
+    else:
+        return parts[-2], parts[-1]
+
+
 def patch_invoke_model(original_method, tracer, version):
     def traced_method(*args, **kwargs):
         modelId = kwargs.get("modelId")
-        (vendor, _) = modelId.split(".")
+        vendor, _ = parse_vendor_and_model_name_from_model_id(modelId)
         span_attributes = {
             **get_langtrace_attributes(version, vendor, vendor_type="framework"),
+            SpanAttributes.LLM_PATH: APIS["INVOKE_MODEL"]["ENDPOINT"],
+            SpanAttributes.LLM_IS_STREAMING: False,
             **get_extra_attributes(),
         }
         with tracer.start_as_current_span(
@@ -196,9 +215,11 @@ def patch_invoke_model_with_response_stream(original_method, tracer, version):
     @wraps(original_method)
     def traced_method(*args, **kwargs):
         modelId = kwargs.get("modelId")
-        (vendor, _) = modelId.split(".")
+        vendor, _ = parse_vendor_and_model_name_from_model_id(modelId)
         span_attributes = {
             **get_langtrace_attributes(version, vendor, vendor_type="framework"),
+            SpanAttributes.LLM_PATH: APIS["INVOKE_MODEL_WITH_RESPONSE_STREAM"]["ENDPOINT"],
+            SpanAttributes.LLM_IS_STREAMING: True,
             **get_extra_attributes(),
         }
         span = tracer.start_span(
@@ -220,7 +241,7 @@ def handle_streaming_call(span, kwargs, response):
     def stream_finished(response_body):
         request_body = json.loads(kwargs.get("body"))
 
-        (vendor, model) = kwargs.get("modelId").split(".")
+        vendor, model = parse_vendor_and_model_name_from_model_id(kwargs.get("modelId"))
 
         set_span_attribute(span, SpanAttributes.LLM_REQUEST_MODEL, model)
         set_span_attribute(span, SpanAttributes.LLM_RESPONSE_MODEL, model)
@@ -244,18 +265,22 @@ def stream_finished(response_body):
 
 def handle_call(span, kwargs, response):
     modelId = kwargs.get("modelId")
-    (vendor, model_name) = modelId.split(".")
+    vendor, model_name = parse_vendor_and_model_name_from_model_id(modelId)
+    read_response_body = response.get("body").read()
+    request_body = json.loads(kwargs.get("body"))
+    response_body = json.loads(read_response_body)
     response["body"] = BufferedStreamBody(
-        response["body"]._raw_stream, response["body"]._content_length
+        io.BytesIO(read_response_body), len(read_response_body)
     )
-    request_body = json.loads(kwargs.get("body"))
-    response_body = json.loads(response.get("body").read())
 
     set_span_attribute(span, SpanAttributes.LLM_RESPONSE_MODEL, modelId)
     set_span_attribute(span, SpanAttributes.LLM_REQUEST_MODEL, modelId)
 
     if vendor == "amazon":
-        set_amazon_attributes(span, request_body, response_body)
+        if model_name.startswith("titan-embed-text"):
+            set_amazon_embedding_attributes(span, request_body, response_body)
+        else:
+            set_amazon_attributes(span, request_body, response_body)
 
     if vendor == "anthropic":
         if "prompt" in request_body:
@@ -359,6 +384,27 @@ def set_amazon_attributes(span, request_body, response_body):
     set_event_completion(span, completions)
 
 
+def set_amazon_embedding_attributes(span, request_body, response_body):
+    input_text = request_body.get("inputText")
+    set_event_prompt(span, input_text)
+
+    embeddings = response_body.get("embedding", [])
+    input_tokens = response_body.get("inputTextTokenCount")
+    set_usage_attributes(
+        span,
+        {
+            "input_tokens": input_tokens,
+            "output": len(embeddings),
+        },
+    )
+    set_span_attribute(
+        span, SpanAttributes.LLM_REQUEST_MODEL, request_body.get("modelId")
+    )
+    set_span_attribute(
+        span, SpanAttributes.LLM_RESPONSE_MODEL, request_body.get("modelId")
+    )
+
+
 def set_anthropic_completions_attributes(span, request_body, response_body):
     set_span_attribute(
         span,
diff --git a/src/tests/aws_bedrock/cassettes/test_chat_completion.yaml b/src/tests/aws_bedrock/cassettes/test_chat_completion.yaml
@@ -0,0 +1,45 @@
+interactions:
+- request:
+    body: '{"messages": [{"role": "user", "content": "Say this is a test three times"}],
+      "anthropic_version": "bedrock-2023-05-31", "max_tokens": 100}'
+    headers:
+      Accept:
+      - !!binary |
+        YXBwbGljYXRpb24vanNvbg==
+      Content-Length:
+      - '139'
+      Content-Type:
+      - !!binary |
+        YXBwbGljYXRpb24vanNvbg==
+      User-Agent:
+      - !!binary |
+        Qm90bzMvMS4zOC4xOCBtZC9Cb3RvY29yZSMxLjM4LjE4IHVhLzIuMSBvcy9tYWNvcyMyNC40LjAg
+        bWQvYXJjaCNhcm02NCBsYW5nL3B5dGhvbiMzLjEzLjEgbWQvcHlpbXBsI0NQeXRob24gbS9aLGIg
+        Y2ZnL3JldHJ5LW1vZGUjc3RhbmRhcmQgQm90b2NvcmUvMS4zOC4xOA==
+    method: POST
+    uri: https://bedrock-runtime.us-east-1.amazonaws.com/model/us.anthropic.claude-3-7-sonnet-20250219-v1%3A0/invoke
+  response:
+    body:
+      string: '{"id":"msg_bdrk_01NJB1bDTLkFh6pgfoAD5hkb","type":"message","role":"assistant","model":"claude-3-7-sonnet-20250219","content":[{"type":"text","text":"This
+        is a test.\nThis is a test.\nThis is a test."}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":14,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":20}}'
+    headers:
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '355'
+      Content-Type:
+      - application/json
+      Date:
+      - Mon, 19 May 2025 16:42:05 GMT
+      X-Amzn-Bedrock-Input-Token-Count:
+      - '14'
+      X-Amzn-Bedrock-Invocation-Latency:
+      - '926'
+      X-Amzn-Bedrock-Output-Token-Count:
+      - '20'
+      x-amzn-RequestId:
+      - c0a92363-ec28-4a8b-9c09-571131d946b0
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/src/tests/aws_bedrock/cassettes/test_generate_embedding.yaml b/src/tests/aws_bedrock/cassettes/test_generate_embedding.yaml
diff --git a/src/tests/aws_bedrock/conftest.py b/src/tests/aws_bedrock/conftest.py
@@ -0,0 +1,46 @@
+"""Unit tests configuration module."""
+
+import pytest
+import os
+
+from boto3.session import Session
+from botocore.config import Config
+
+from langtrace_python_sdk.instrumentation.aws_bedrock.instrumentation import (
+    AWSBedrockInstrumentation,
+)
+
+
+@pytest.fixture(autouse=True)
+def environment():
+    if not os.getenv("AWS_ACCESS_KEY_ID"):
+        os.environ["AWS_ACCESS_KEY_ID"] = "test_api_key"
+
+
+@pytest.fixture
+def aws_bedrock_client():
+    bedrock_config = Config(
+        region_name="us-east-1",
+        connect_timeout=300,
+        read_timeout=300,
+        retries={"total_max_attempts": 2, "mode": "standard"},
+    )
+    return Session().client("bedrock-runtime", config=bedrock_config)
+
+
+@pytest.fixture(scope="module")
+def vcr_config():
+    return {
+        "filter_headers": [
+            "authorization",
+            "X-Amz-Date",
+            "X-Amz-Security-Token",
+            "amz-sdk-invocation-id",
+            "amz-sdk-request",
+        ]
+    }
+
+
+@pytest.fixture(scope="session", autouse=True)
+def instrument():
+    AWSBedrockInstrumentation().instrument()
diff --git a/src/tests/aws_bedrock/test_invoke_model.py b/src/tests/aws_bedrock/test_invoke_model.py
@@ -0,0 +1,135 @@
+import pytest
+import json
+from tests.utils import (
+    assert_completion_in_events,
+    assert_prompt_in_events,
+    assert_token_count,
+)
+from importlib_metadata import version as v
+
+from langtrace.trace_attributes import SpanAttributes
+from langtrace_python_sdk.constants.instrumentation.aws_bedrock import APIS
+
+ANTHROPIC_VERSION = "bedrock-2023-05-31"
+
+
+@pytest.mark.vcr()
+def test_chat_completion(exporter, aws_bedrock_client):
+    model_id = "us.anthropic.claude-3-7-sonnet-20250219-v1:0"
+    messages_value = [{"role": "user", "content": "Say this is a test three times"}]
+
+    kwargs = {
+        "modelId": model_id,
+        "accept": "application/json",
+        "contentType": "application/json",
+        "body": json.dumps(
+            {
+                "messages": messages_value,
+                "anthropic_version": ANTHROPIC_VERSION,
+                "max_tokens": 100,
+            }
+        ),
+    }
+
+    aws_bedrock_client.invoke_model(**kwargs)
+    spans = exporter.get_finished_spans()
+    completion_span = spans[-1]
+    assert completion_span.name == "aws_bedrock.invoke_model"
+
+    attributes = completion_span.attributes
+
+    assert attributes.get(SpanAttributes.LANGTRACE_SDK_NAME) == "langtrace-python-sdk"
+    assert attributes.get(SpanAttributes.LANGTRACE_SERVICE_NAME) == "anthropic"
+    assert attributes.get(SpanAttributes.LANGTRACE_SERVICE_TYPE) == "framework"
+    assert attributes.get(SpanAttributes.LANGTRACE_SERVICE_VERSION) == v("boto3")
+    assert attributes.get(SpanAttributes.LANGTRACE_VERSION) == v("langtrace-python-sdk")
+    assert attributes.get(SpanAttributes.LLM_PATH) == APIS["INVOKE_MODEL"]["ENDPOINT"]
+    assert attributes.get(SpanAttributes.LLM_RESPONSE_MODEL) == model_id
+    assert attributes.get(SpanAttributes.LLM_IS_STREAMING) is False
+    assert_prompt_in_events(completion_span.events)
+    assert_completion_in_events(completion_span.events)
+    assert_token_count(attributes)
+
+
+@pytest.mark.skip(reason="Skipping streaming test due to no streaming support in vcrpy")
+def test_chat_completion_streaming(exporter, aws_bedrock_client):
+    model_id = "us.anthropic.claude-3-7-sonnet-20250219-v1:0"
+    messages_value = [{"role": "user", "content": "Say this is a test three times"}]
+
+    kwargs = {
+        "modelId": model_id,
+        "accept": "application/json",
+        "contentType": "application/json",
+        "body": json.dumps(
+            {
+                "messages": messages_value,
+                "anthropic_version": ANTHROPIC_VERSION,
+                "max_tokens": 100,
+            }
+        ),
+    }
+
+    response = aws_bedrock_client.invoke_model_with_response_stream(**kwargs)
+    chunk_count = 0
+
+    for chunk in response["body"]:
+        if chunk:
+            chunk_count += 1
+
+    spans = exporter.get_finished_spans()
+    streaming_span = spans[-1]
+    assert streaming_span.name == "aws_bedrock.invoke_model_with_response_stream"
+
+    attributes = streaming_span.attributes
+
+    assert attributes.get(SpanAttributes.LANGTRACE_SDK_NAME) == "langtrace-python-sdk"
+    assert attributes.get(SpanAttributes.LANGTRACE_SERVICE_NAME) == "anthropic"
+    assert attributes.get(SpanAttributes.LANGTRACE_SERVICE_TYPE) == "framework"
+    assert attributes.get(SpanAttributes.LANGTRACE_SERVICE_VERSION) == v("boto3")
+    assert attributes.get(SpanAttributes.LANGTRACE_VERSION) == v("langtrace-python-sdk")
+    assert (
+        attributes.get(SpanAttributes.LLM_PATH)
+        == APIS["INVOKE_MODEL_WITH_RESPONSE_STREAM"]["ENDPOINT"]
+    )
+    assert (
+        attributes.get(SpanAttributes.LLM_RESPONSE_MODEL)
+        == "claude-3-7-sonnet-20250219-v1:0"
+    )
+    assert attributes.get(SpanAttributes.LLM_IS_STREAMING) is True
+    assert_prompt_in_events(streaming_span.events)
+    assert_completion_in_events(streaming_span.events)
+    assert_token_count(attributes)
+
+
+@pytest.mark.vcr()
+def test_generate_embedding(exporter, aws_bedrock_client):
+    model_id = "amazon.titan-embed-text-v1"
+
+    kwargs = {
+        "modelId": model_id,
+        "accept": "application/json",
+        "contentType": "application/json",
+        "body": json.dumps(
+            {
+                "inputText": "Say this is a test three times",
+            }
+        ),
+    }
+
+    aws_bedrock_client.invoke_model(**kwargs)
+    spans = exporter.get_finished_spans()
+    completion_span = spans[-1]
+    assert completion_span.name == "aws_bedrock.invoke_model"
+
+    attributes = completion_span.attributes
+
+    assert attributes.get(SpanAttributes.LANGTRACE_SDK_NAME) == "langtrace-python-sdk"
+    assert attributes.get(SpanAttributes.LANGTRACE_SERVICE_NAME) == "amazon"
+    assert attributes.get(SpanAttributes.LANGTRACE_SERVICE_TYPE) == "framework"
+    assert attributes.get(SpanAttributes.LANGTRACE_SERVICE_VERSION) == v("boto3")
+    assert attributes.get(SpanAttributes.LANGTRACE_VERSION) == v("langtrace-python-sdk")
+    assert attributes.get(SpanAttributes.LLM_PATH) == APIS["INVOKE_MODEL"]["ENDPOINT"]
+    assert attributes.get(SpanAttributes.LLM_RESPONSE_MODEL) == model_id
+    assert attributes.get(SpanAttributes.LLM_IS_STREAMING) is False
+    assert_prompt_in_events(completion_span.events)
+    assert_token_count(attributes)
diff --git a/src/tests/aws_bedrock/test_model_id_parsing.py b/src/tests/aws_bedrock/test_model_id_parsing.py
@@ -0,0 +1,34 @@
+import pytest
+from langtrace_python_sdk.instrumentation.aws_bedrock.patch import (
+    parse_vendor_and_model_name_from_model_id,
+)
+
+
+def test_model_id_parsing():
+    model_id = "anthropic.claude-3-opus-20240229"
+    vendor, model_name = parse_vendor_and_model_name_from_model_id(model_id)
+    assert vendor == "anthropic"
+    assert model_name == "claude-3-opus-20240229"
+
+
+def test_model_id_parsing_cross_region_inference():
+    model_id = "us.anthropic.claude-3-opus-20240229"
+    vendor, model_name = parse_vendor_and_model_name_from_model_id(model_id)
+    assert vendor == "anthropic"
+    assert model_name == "claude-3-opus-20240229"
+
+
+def test_model_id_parsing_arn_custom_model_inference():
+    model_id = (
+        "arn:aws:bedrock:us-east-1:123456789012:custom-model/amazon.my-model/abc123"
+    )
+    vendor, model_name = parse_vendor_and_model_name_from_model_id(model_id)
+    assert vendor == "amazon"
+    assert model_name == "my-model"
+
+
+def test_model_id_parsing_arn_foundation_model_inference():
+    model_id = "arn:aws:bedrock:us-east-1:123456789012:foundation-model/anthropic.claude-3-opus-20240229"
+    vendor, model_name = parse_vendor_and_model_name_from_model_id(model_id)
+    assert vendor == "anthropic"
+    assert model_name == "claude-3-opus-20240229"