Update code to validate log id and name

DylanRussell · DylanRussell · commit 26d416d946a3 · 2025-10-03T14:28:27.000Z
diff --git a/opentelemetry-exporter-gcp-logging/src/opentelemetry/exporter/cloud_logging/__init__.py b/opentelemetry-exporter-gcp-logging/src/opentelemetry/exporter/cloud_logging/__init__.py
@@ -17,6 +17,7 @@
 import datetime
 import json
 import logging
+import re
 import urllib.parse
 from typing import Any, Mapping, MutableMapping, Optional, Sequence
 
@@ -102,6 +103,8 @@
     24: LogSeverity.EMERGENCY,
 }
 
+INVALID_LOG_NAME_MESSAGE = "%s is not a valid log name. log name must be <512 characters and only contain characters: A-Za-z0-9/-_."
+
 
 def _convert_any_value_to_string(value: Any) -> str:
     if isinstance(value, bool):
@@ -176,6 +179,12 @@ def _set_payload_in_log_entry(log_entry: LogEntry, body: AnyValue):
         log_entry.text_payload = _convert_any_value_to_string(body)
 
 
+def is_log_id_valid(log_id: str) -> bool:
+    return len(log_id) < 512 and not bool(
+        re.search(r"[^A-Za-z0-9\-_/\.]", log_id)
+    )
+
+
 class CloudLoggingExporter(LogExporter):
     def __init__(
         self,
@@ -201,25 +210,28 @@ def __init__(
             )
         )
 
+    def pick_log_id(self, log_name_attr: Any, event_name: str | None) -> str:
+        if log_name_attr and isinstance(log_name_attr, str):
+            if is_log_id_valid(log_name_attr):
+                return log_name_attr.replace("/", "%2F")
+            logging.warning(INVALID_LOG_NAME_MESSAGE, log_name_attr)
+        if event_name and is_log_id_valid(event_name):
+            return event_name.replace("/", "%2F")
+        return self.default_log_name
+
     def export(self, batch: Sequence[LogData]):
         now = datetime.datetime.now()
         log_entries = []
         for log_data in batch:
             log_entry = LogEntry()
             log_record = log_data.log_record
             attributes = log_record.attributes or {}
+            if log_record.event_name:
+                attributes["event_name"] = log_record.event_name
             project_id = str(
                 attributes.get(PROJECT_ID_ATTRIBUTE_KEY, self.project_id)
             )
-            log_suffix = self.default_log_name
-            log_name_attr = attributes.get(LOG_NAME_ATTRIBUTE_KEY)
-            if log_name_attr and isinstance(log_name_attr, str):
-                log_suffix = urllib.parse.quote_plus(log_name_attr)
-            elif log_record.event_name:
-                log_suffix = urllib.parse.quote_plus(
-                    log_record.event_name.replace(".", "_")
-                )
-            log_entry.log_name = f"projects/{project_id}/logs/{log_suffix}"
+            log_entry.log_name = f"projects/{project_id}/logs/{self.pick_log_id(attributes.get(LOG_NAME_ATTRIBUTE_KEY), log_record.event_name)}"
             # If timestamp is unset fall back to observed_time_unix_nano as recommended,
             # see https://github.com/open-telemetry/opentelemetry-proto/blob/4abbb78/opentelemetry/proto/logs/v1/logs.proto#L176-L179
             ts = Timestamp()
diff --git a/opentelemetry-exporter-gcp-logging/tests/__snapshots__/test_cloud_logging/test_convert_gen_ai_body.json b/opentelemetry-exporter-gcp-logging/tests/__snapshots__/test_cloud_logging/test_convert_gen_ai_body.json
@@ -0,0 +1,93 @@
+[
+  {
+    "entries": [
+      {
+        "jsonPayload": {
+          "gen_ai.input.messages": [
+            {
+              "parts": [
+                {
+                  "content": "Get weather details in New Delhi and San Francisco?",
+                  "type": "text"
+                }
+              ],
+              "role": "user"
+            },
+            {
+              "parts": [
+                {
+                  "arguments": {
+                    "location": "New Delhi"
+                  },
+                  "id": "get_current_weather_0",
+                  "name": "get_current_weather",
+                  "type": "tool_call"
+                },
+                {
+                  "arguments": {
+                    "location": "San Francisco"
+                  },
+                  "id": "get_current_weather_1",
+                  "name": "get_current_weather",
+                  "type": "tool_call"
+                }
+              ],
+              "role": "model"
+            },
+            {
+              "parts": [
+                {
+                  "id": "get_current_weather_0",
+                  "response": {
+                    "content": "{\"temperature\": 35, \"unit\": \"C\"}"
+                  },
+                  "type": "tool_call_response"
+                },
+                {
+                  "id": "get_current_weather_1",
+                  "response": {
+                    "content": "{\"temperature\": 25, \"unit\": \"C\"}"
+                  },
+                  "type": "tool_call_response"
+                }
+              ],
+              "role": "user"
+            }
+          ],
+          "gen_ai.output.messages": [
+            {
+              "finish_reason": "stop",
+              "parts": [
+                {
+                  "content": "The current temperature in New Delhi is 35°C, and in San Francisco, it is 25°C.",
+                  "type": "text"
+                }
+              ],
+              "role": "model"
+            }
+          ],
+          "gen_ai.system_instructions": [
+            {
+              "content": "You are a clever language model",
+              "type": "text"
+            }
+          ]
+        },
+        "labels": {
+          "event_name": "gen_ai.client.inference.operation.details"
+        },
+        "logName": "projects/fakeproject/logs/gen_ai.client.inference.operation.details",
+        "resource": {
+          "labels": {
+            "location": "global",
+            "namespace": "",
+            "node_id": ""
+          },
+          "type": "generic_node"
+        },
+        "timestamp": "2025-01-15T21:25:10.997977393Z"
+      }
+    ],
+    "partialSuccess": true
+  }
+]
diff --git a/opentelemetry-exporter-gcp-logging/tests/__snapshots__/test_cloud_logging/test_convert_otlp_dict_body.json b/opentelemetry-exporter-gcp-logging/tests/__snapshots__/test_cloud_logging/test_convert_otlp_dict_body.json
@@ -21,11 +21,11 @@
           }
         },
         "labels": {
-          "event.name": "gen_ai.system.message",
+          "event_name": "random.genai.event",
           "gen_ai.system": "true",
           "test": "23"
         },
-        "logName": "projects/fakeproject/logs/random_genai_event",
+        "logName": "projects/fakeproject/logs/random.genai.event",
         "resource": {
           "labels": {
             "location": "global",
diff --git a/opentelemetry-exporter-gcp-logging/tests/test_cloud_logging.py b/opentelemetry-exporter-gcp-logging/tests/test_cloud_logging.py
@@ -26,16 +26,25 @@
 Be sure to review the changes.
 """
 import re
+from functools import partial
 from typing import List, Mapping, Union
+from unittest.mock import patch
 
 import pytest
 from fixtures.cloud_logging_fake import CloudLoggingFake, WriteLogEntriesCall
 from google.auth.credentials import AnonymousCredentials
 from google.cloud.logging_v2.services.logging_service_v2 import (
     LoggingServiceV2Client,
 )
+from google.cloud.logging_v2.services.logging_service_v2.transports.grpc import (
+    LoggingServiceV2GrpcTransport,
+)
+from grpc import insecure_channel
 from opentelemetry._logs.severity import SeverityNumber
-from opentelemetry.exporter.cloud_logging import CloudLoggingExporter
+from opentelemetry.exporter.cloud_logging import (
+    CloudLoggingExporter,
+    is_log_id_valid,
+)
 from opentelemetry.sdk._logs import LogData
 from opentelemetry.sdk._logs._internal import LogRecord
 from opentelemetry.sdk.resources import Resource
@@ -82,7 +91,6 @@ def test_convert_otlp_dict_body(
                 attributes={
                     "gen_ai.system": True,
                     "test": 23,
-                    "event.name": "gen_ai.system.message",
                 },
                 body={
                     "kvlistValue": {
@@ -154,6 +162,119 @@ def test_convert_non_json_dict_bytes(
     assert cloudloggingfake.get_calls() == snapshot_writelogentrycalls
 
 
+def test_convert_gen_ai_body(
+    cloudloggingfake: CloudLoggingFake,
+    snapshot_writelogentrycalls: List[WriteLogEntriesCall],
+) -> None:
+    log_data = [
+        LogData(
+            log_record=LogRecord(
+                event_name="gen_ai.client.inference.operation.details",
+                timestamp=1736976310997977393,
+                body={
+                    "gen_ai.input.messages": (
+                        {
+                            "role": "user",
+                            "parts": (
+                                {
+                                    "type": "text",
+                                    "content": "Get weather details in New Delhi and San Francisco?",
+                                },
+                            ),
+                        },
+                        {
+                            "role": "model",
+                            "parts": (
+                                {
+                                    "type": "tool_call",
+                                    "arguments": {"location": "New Delhi"},
+                                    "name": "get_current_weather",
+                                    "id": "get_current_weather_0",
+                                },
+                                {
+                                    "type": "tool_call",
+                                    "arguments": {"location": "San Francisco"},
+                                    "name": "get_current_weather",
+                                    "id": "get_current_weather_1",
+                                },
+                            ),
+                        },
+                        {
+                            "role": "user",
+                            "parts": (
+                                {
+                                    "type": "tool_call_response",
+                                    "response": {
+                                        "content": '{"temperature": 35, "unit": "C"}'
+                                    },
+                                    "id": "get_current_weather_0",
+                                },
+                                {
+                                    "type": "tool_call_response",
+                                    "response": {
+                                        "content": '{"temperature": 25, "unit": "C"}'
+                                    },
+                                    "id": "get_current_weather_1",
+                                },
+                            ),
+                        },
+                    ),
+                    "gen_ai.system_instructions": (
+                        {
+                            "type": "text",
+                            "content": "You are a clever language model",
+                        },
+                    ),
+                    "gen_ai.output.messages": (
+                        {
+                            "role": "model",
+                            "parts": (
+                                {
+                                    "type": "text",
+                                    "content": "The current temperature in New Delhi is 35°C, and in San Francisco, it is 25°C.",
+                                },
+                            ),
+                            "finish_reason": "stop",
+                        },
+                    ),
+                },
+            ),
+            instrumentation_scope=InstrumentationScope("test"),
+        )
+    ]
+    cloudloggingfake.exporter.export(log_data)
+    assert cloudloggingfake.get_calls() == snapshot_writelogentrycalls
+
+
+def test_is_log_id_valid():
+    assert is_log_id_valid(";") is False
+    assert is_log_id_valid("aB12//..--__") is True
+    assert is_log_id_valid("a" * 512) is False
+    assert is_log_id_valid("abc1212**") is False
+    assert is_log_id_valid("gen_ai.client.inference.operation.details") is True
+
+
+def test_pick_log_id() -> None:
+    exporter = CloudLoggingExporter(
+        project_id=PROJECT_ID,
+        default_log_name="test",
+    )
+    assert (
+        exporter.pick_log_id("valid_log_name_attr", "event_name_str")
+        == "valid_log_name_attr"
+    )
+    assert (
+        exporter.pick_log_id("invalid_attr**2", "event_name_str")
+        == "event_name_str"
+    )
+    assert exporter.pick_log_id(None, "event_name_str") == "event_name_str"
+    assert exporter.pick_log_id(None, None) == exporter.default_log_name
+    assert (
+        exporter.pick_log_id(None, "invalid_event_name_id24$")
+        == exporter.default_log_name
+    )
+
+
 @pytest.mark.parametrize(
     "body",
     [