test: add additional tests for async embeddings generation

constantinius · constantinius · commit 3587c5eac05b · 2025-11-20T14:49:35.000+01:00
diff --git a/tests/integrations/google_genai/test_google_genai.py b/tests/integrations/google_genai/test_google_genai.py
@@ -1191,3 +1191,229 @@ def test_embed_content_span_origin(sentry_init, capture_events, mock_genai_clien
     assert event["contexts"]["trace"]["origin"] == "manual"
     for span in event["spans"]:
         assert span["origin"] == "auto.ai.google_genai"
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "send_default_pii, include_prompts",
+    [
+        (True, True),
+        (True, False),
+        (False, True),
+        (False, False),
+    ],
+)
+async def test_async_embed_content(
+    sentry_init, capture_events, send_default_pii, include_prompts, mock_genai_client
+):
+    """Test async embed_content method."""
+    sentry_init(
+        integrations=[GoogleGenAIIntegration(include_prompts=include_prompts)],
+        traces_sample_rate=1.0,
+        send_default_pii=send_default_pii,
+    )
+    events = capture_events()
+
+    # Mock the async HTTP response
+    mock_http_response = create_mock_http_response(EXAMPLE_EMBED_RESPONSE_JSON)
+
+    with mock.patch.object(
+        mock_genai_client._api_client,
+        "async_request",
+        return_value=mock_http_response,
+    ):
+        with start_transaction(name="google_genai_embeddings_async"):
+            await mock_genai_client.aio.models.embed_content(
+                model="text-embedding-004",
+                contents=[
+                    "What is your name?",
+                    "What is your favorite color?",
+                ],
+            )
+
+    assert len(events) == 1
+    (event,) = events
+
+    assert event["type"] == "transaction"
+    assert event["transaction"] == "google_genai_embeddings_async"
+
+    # Should have 1 span for embeddings
+    assert len(event["spans"]) == 1
+    (embed_span,) = event["spans"]
+
+    # Check embeddings span
+    assert embed_span["op"] == OP.GEN_AI_EMBEDDINGS
+    assert embed_span["description"] == "embeddings text-embedding-004"
+    assert embed_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
+    assert embed_span["data"][SPANDATA.GEN_AI_SYSTEM] == "gcp.gemini"
+    assert embed_span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-004"
+
+    # Check input texts if PII is allowed
+    if send_default_pii and include_prompts:
+        input_texts = json.loads(embed_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT])
+        assert input_texts == [
+            "What is your name?",
+            "What is your favorite color?",
+        ]
+    else:
+        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embed_span["data"]
+
+    # Check usage data (sum of token counts from statistics: 10 + 15 = 25)
+    # Note: Only available in newer versions with ContentEmbeddingStatistics
+    if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["data"]:
+        assert embed_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 25
+
+
+@pytest.mark.asyncio
+async def test_async_embed_content_string_input(
+    sentry_init, capture_events, mock_genai_client
+):
+    """Test async embed_content with a single string instead of list."""
+    sentry_init(
+        integrations=[GoogleGenAIIntegration(include_prompts=True)],
+        traces_sample_rate=1.0,
+        send_default_pii=True,
+    )
+    events = capture_events()
+
+    # Mock response with single embedding
+    single_embed_response = {
+        "embeddings": [
+            {
+                "values": [0.1, 0.2, 0.3],
+                "statistics": {
+                    "tokenCount": 5,
+                    "truncated": False,
+                },
+            },
+        ],
+        "metadata": {
+            "billableCharacterCount": 10,
+        },
+    }
+    mock_http_response = create_mock_http_response(single_embed_response)
+
+    with mock.patch.object(
+        mock_genai_client._api_client, "async_request", return_value=mock_http_response
+    ):
+        with start_transaction(name="google_genai_embeddings_async"):
+            await mock_genai_client.aio.models.embed_content(
+                model="text-embedding-004",
+                contents="Single text input",
+            )
+
+    (event,) = events
+    (embed_span,) = event["spans"]
+
+    # Check that single string is handled correctly
+    input_texts = json.loads(embed_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT])
+    assert input_texts == ["Single text input"]
+    # Should use token_count from statistics (5), not billable_character_count (10)
+    # Note: Only available in newer versions with ContentEmbeddingStatistics
+    if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["data"]:
+        assert embed_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5
+
+
+@pytest.mark.asyncio
+async def test_async_embed_content_error_handling(
+    sentry_init, capture_events, mock_genai_client
+):
+    """Test error handling in async embed_content."""
+    sentry_init(
+        integrations=[GoogleGenAIIntegration()],
+        traces_sample_rate=1.0,
+    )
+    events = capture_events()
+
+    # Mock an error at the HTTP level
+    with mock.patch.object(
+        mock_genai_client._api_client,
+        "async_request",
+        side_effect=Exception("Async Embedding API Error"),
+    ):
+        with start_transaction(name="google_genai_embeddings_async"):
+            with pytest.raises(Exception, match="Async Embedding API Error"):
+                await mock_genai_client.aio.models.embed_content(
+                    model="text-embedding-004",
+                    contents=["This will fail"],
+                )
+
+    # Should have both transaction and error events
+    assert len(events) == 2
+    error_event, _ = events
+
+    assert error_event["level"] == "error"
+    assert error_event["exception"]["values"][0]["type"] == "Exception"
+    assert error_event["exception"]["values"][0]["value"] == "Async Embedding API Error"
+    assert error_event["exception"]["values"][0]["mechanism"]["type"] == "google_genai"
+
+
+@pytest.mark.asyncio
+async def test_async_embed_content_without_statistics(
+    sentry_init, capture_events, mock_genai_client
+):
+    """Test async embed_content response without statistics (older package versions)."""
+    sentry_init(
+        integrations=[GoogleGenAIIntegration()],
+        traces_sample_rate=1.0,
+    )
+    events = capture_events()
+
+    # Response without statistics (typical for older google-genai versions)
+    # Embeddings exist but don't have the statistics field
+    old_version_response = {
+        "embeddings": [
+            {
+                "values": [0.1, 0.2, 0.3],
+            },
+            {
+                "values": [0.2, 0.3, 0.4],
+            },
+        ],
+    }
+    mock_http_response = create_mock_http_response(old_version_response)
+
+    with mock.patch.object(
+        mock_genai_client._api_client, "async_request", return_value=mock_http_response
+    ):
+        with start_transaction(name="google_genai_embeddings_async"):
+            await mock_genai_client.aio.models.embed_content(
+                model="text-embedding-004",
+                contents=["Test without statistics", "Another test"],
+            )
+
+    (event,) = events
+    (embed_span,) = event["spans"]
+
+    # No usage tokens since there are no statistics in older versions
+    # This is expected and the integration should handle it gracefully
+    assert SPANDATA.GEN_AI_USAGE_INPUT_TOKENS not in embed_span["data"]
+
+
+@pytest.mark.asyncio
+async def test_async_embed_content_span_origin(
+    sentry_init, capture_events, mock_genai_client
+):
+    """Test that async embed_content spans have correct origin."""
+    sentry_init(
+        integrations=[GoogleGenAIIntegration()],
+        traces_sample_rate=1.0,
+    )
+    events = capture_events()
+
+    mock_http_response = create_mock_http_response(EXAMPLE_EMBED_RESPONSE_JSON)
+
+    with mock.patch.object(
+        mock_genai_client._api_client, "async_request", return_value=mock_http_response
+    ):
+        with start_transaction(name="google_genai_embeddings_async"):
+            await mock_genai_client.aio.models.embed_content(
+                model="text-embedding-004",
+                contents=["Test origin"],
+            )
+
+    (event,) = events
+
+    assert event["contexts"]["trace"]["origin"] == "manual"
+    for span in event["spans"]:
+        assert span["origin"] == "auto.ai.google_genai"