diff --git a/CHANGELOG.md b/CHANGELOG.md index 01a840094e..4a95525734 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- `opentelemetry-util-genai` Add a utility to parse the `OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT` environment variable. + Add `gen_ai_latest_experimental` as a new value to the Sem Conv stability flag ([#3716](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3716)). - `opentelemetry-instrumentation-confluent-kafka` Add support for confluent-kafka <=2.11.0 ([#3685](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3685)) - `opentelemetry-instrumentation-system-metrics`: Add `cpython.gc.collected_objects` and `cpython.gc.uncollectable_objects` metrics diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_semconv.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_semconv.py index c443fcbfdd..1b1748e206 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_semconv.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/_semconv.py @@ -162,9 +162,10 @@ OTEL_SEMCONV_STABILITY_OPT_IN = "OTEL_SEMCONV_STABILITY_OPT_IN" -class _OpenTelemetryStabilitySignalType: +class _OpenTelemetryStabilitySignalType(Enum): HTTP = "http" DATABASE = "database" + GEN_AI = "gen_ai" class _StabilityMode(Enum): @@ -173,6 +174,7 @@ class _StabilityMode(Enum): HTTP_DUP = "http/dup" DATABASE = "database" DATABASE_DUP = "database/dup" + GEN_AI_LATEST_EXPERIMENTAL = "gen_ai_latest_experimental" def _report_new(mode: _StabilityMode): @@ -195,7 +197,7 @@ def _initialize(cls): return # Users can pass in comma delimited string for opt-in options - # Only values for http and database stability are supported for now + # Only values for http, gen ai, and database stability are supported for now opt_in = os.environ.get(OTEL_SEMCONV_STABILITY_OPT_IN) if not opt_in: @@ -203,6 +205,7 @@ def _initialize(cls): cls._OTEL_SEMCONV_STABILITY_SIGNAL_MAPPING = { _OpenTelemetryStabilitySignalType.HTTP: _StabilityMode.DEFAULT, _OpenTelemetryStabilitySignalType.DATABASE: _StabilityMode.DEFAULT, + _OpenTelemetryStabilitySignalType.GEN_AI: _StabilityMode.DEFAULT, } cls._initialized = True return @@ -215,6 +218,14 @@ def _initialize(cls): opt_in_list, _StabilityMode.HTTP, _StabilityMode.HTTP_DUP ) + cls._OTEL_SEMCONV_STABILITY_SIGNAL_MAPPING[ + _OpenTelemetryStabilitySignalType.GEN_AI + ] = cls._filter_mode( + opt_in_list, + _StabilityMode.DEFAULT, + _StabilityMode.GEN_AI_LATEST_EXPERIMENTAL, + ) + cls._OTEL_SEMCONV_STABILITY_SIGNAL_MAPPING[ _OpenTelemetryStabilitySignalType.DATABASE ] = cls._filter_mode( @@ -222,7 +233,6 @@ def _initialize(cls): _StabilityMode.DATABASE, _StabilityMode.DATABASE_DUP, ) - cls._initialized = True @staticmethod diff --git a/opentelemetry-instrumentation/tests/test_semconv.py b/opentelemetry-instrumentation/tests/test_semconv.py index 6a56efcc37..98befb32b7 100644 --- a/opentelemetry-instrumentation/tests/test_semconv.py +++ b/opentelemetry-instrumentation/tests/test_semconv.py @@ -54,6 +54,12 @@ def test_default_mode(self): ), _StabilityMode.DEFAULT, ) + self.assertEqual( + _OpenTelemetrySemanticConventionStability._get_opentelemetry_stability_opt_in_mode( + _OpenTelemetryStabilitySignalType.GEN_AI + ), + _StabilityMode.DEFAULT, + ) @stability_mode("http") def test_http_stable_mode(self): @@ -91,7 +97,16 @@ def test_database_dup_mode(self): _StabilityMode.DATABASE_DUP, ) - @stability_mode("database,http") + @stability_mode("gen_ai_latest_experimental") + def test_genai_latest_experimental(self): + self.assertEqual( + _OpenTelemetrySemanticConventionStability._get_opentelemetry_stability_opt_in_mode( + _OpenTelemetryStabilitySignalType.GEN_AI + ), + _StabilityMode.GEN_AI_LATEST_EXPERIMENTAL, + ) + + @stability_mode("database,http,gen_ai_latest_experimental") def test_multiple_stability_database_http_modes(self): self.assertEqual( _OpenTelemetrySemanticConventionStability._get_opentelemetry_stability_opt_in_mode( @@ -105,6 +120,12 @@ def test_multiple_stability_database_http_modes(self): ), _StabilityMode.HTTP, ) + self.assertEqual( + _OpenTelemetrySemanticConventionStability._get_opentelemetry_stability_opt_in_mode( + _OpenTelemetryStabilitySignalType.GEN_AI + ), + _StabilityMode.GEN_AI_LATEST_EXPERIMENTAL, + ) @stability_mode("database,http/dup") def test_multiple_stability_database_http_dup_modes(self): diff --git a/util/opentelemetry-util-genai/CHANGELOG.md b/util/opentelemetry-util-genai/CHANGELOG.md index 6209a70d6f..8a6b7ec6df 100644 --- a/util/opentelemetry-util-genai/CHANGELOG.md +++ b/util/opentelemetry-util-genai/CHANGELOG.md @@ -5,4 +5,7 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## Unreleased \ No newline at end of file +## Unreleased + +Repurpose the `OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT` environment variable when GEN AI stability mode is set to `gen_ai_latest_experimental`, +to take on an enum (`NO_CONTENT/SPAN_ONLY/EVENT_ONLY/SPAN_AND_EVENT`) instead of a boolean. Add a utility function to help parse this environment variable. \ No newline at end of file diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/environment_variables.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/environment_variables.py new file mode 100644 index 0000000000..2f939772c6 --- /dev/null +++ b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/environment_variables.py @@ -0,0 +1,17 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT = ( + "OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT" +) diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/types.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/types.py index 44ce90ce5e..569e7e7e00 100644 --- a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/types.py +++ b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/types.py @@ -14,22 +14,34 @@ from dataclasses import dataclass +from enum import Enum from typing import Any, Literal, Optional, Union +class ContentCapturingMode(Enum): + # Do not capture content (default). + NO_CONTENT = 0 + # Only capture content in spans. + SPAN_ONLY = 1 + # Only capture content in events. + EVENT_ONLY = 2 + # Capture content in both spans and events. + SPAN_AND_EVENT = 3 + + @dataclass() class ToolCall: - type: Literal["tool_call"] = "tool_call" arguments: Any name: str id: Optional[str] + type: Literal["tool_call"] = "tool_call" @dataclass() class ToolCallResponse: - type: Literal["tool_call_response"] = "tool_call_response" response: Any id: Optional[str] + type: Literal["tool_call_response"] = "tool_call_response" FinishReason = Literal[ @@ -39,8 +51,8 @@ class ToolCallResponse: @dataclass() class Text: - type: Literal["text"] = "text" content: str + type: Literal["text"] = "text" MessagePart = Union[Text, ToolCall, ToolCallResponse, Any] diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/utils.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/utils.py new file mode 100644 index 0000000000..91cb9221f1 --- /dev/null +++ b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/utils.py @@ -0,0 +1,56 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import os + +from opentelemetry.instrumentation._semconv import ( + _OpenTelemetrySemanticConventionStability, + _OpenTelemetryStabilitySignalType, + _StabilityMode, +) +from opentelemetry.util.genai.environment_variables import ( + OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT, +) +from opentelemetry.util.genai.types import ContentCapturingMode + +logger = logging.getLogger(__name__) + + +def get_content_capturing_mode() -> ContentCapturingMode: + """This function should not be called when GEN_AI stability mode is set to DEFAULT. + + When the GEN_AI stability mode is DEFAULT this function will raise a ValueError -- see the code below.""" + envvar = os.environ.get(OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT) + if ( + _OpenTelemetrySemanticConventionStability._get_opentelemetry_stability_opt_in_mode( + _OpenTelemetryStabilitySignalType.GEN_AI, + ) + == _StabilityMode.DEFAULT + ): + raise ValueError( + "This function should never be called when StabilityMode is default." + ) + if not envvar: + return ContentCapturingMode.NO_CONTENT + try: + return ContentCapturingMode[envvar.upper()] + except KeyError: + logger.warning( + "%s is not a valid option for `%s` environment variable. Must be one of %s. Defaulting to `NO_CONTENT`.", + envvar, + OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT, + ", ".join(e.name for e in ContentCapturingMode), + ) + return ContentCapturingMode.NO_CONTENT diff --git a/util/opentelemetry-util-genai/tests/test_utils.py b/util/opentelemetry-util-genai/tests/test_utils.py new file mode 100644 index 0000000000..675b6eba5f --- /dev/null +++ b/util/opentelemetry-util-genai/tests/test_utils.py @@ -0,0 +1,83 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import unittest +from unittest.mock import patch + +from opentelemetry.instrumentation._semconv import ( + OTEL_SEMCONV_STABILITY_OPT_IN, + _OpenTelemetrySemanticConventionStability, +) +from opentelemetry.util.genai.environment_variables import ( + OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT, +) +from opentelemetry.util.genai.types import ContentCapturingMode +from opentelemetry.util.genai.utils import get_content_capturing_mode + + +def patch_env_vars(stability_mode, content_capturing): + def decorator(test_case): + @patch.dict( + os.environ, + { + OTEL_SEMCONV_STABILITY_OPT_IN: stability_mode, + OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT: content_capturing, + }, + ) + def wrapper(*args, **kwargs): + # Reset state. + _OpenTelemetrySemanticConventionStability._initialized = False + _OpenTelemetrySemanticConventionStability._initialize() + return test_case(*args, **kwargs) + + return wrapper + + return decorator + + +class TestVersion(unittest.TestCase): + @patch_env_vars( + stability_mode="gen_ai_latest_experimental", + content_capturing="SPAN_ONLY", + ) + def test_get_content_capturing_mode_parses_valid_envvar(self): # pylint: disable=no-self-use + assert get_content_capturing_mode() == ContentCapturingMode.SPAN_ONLY + + @patch_env_vars( + stability_mode="gen_ai_latest_experimental", content_capturing="" + ) + def test_empty_content_capturing_envvar(self): # pylint: disable=no-self-use + assert get_content_capturing_mode() == ContentCapturingMode.NO_CONTENT + + @patch_env_vars(stability_mode="default", content_capturing="True") + def test_get_content_capturing_mode_raises_exception_when_semconv_stability_default( + self, + ): # pylint: disable=no-self-use + with self.assertRaises(ValueError): + get_content_capturing_mode() + + @patch_env_vars( + stability_mode="gen_ai_latest_experimental", + content_capturing="INVALID_VALUE", + ) + def test_get_content_capturing_mode_raises_exception_on_invalid_envvar( + self, + ): # pylint: disable=no-self-use + with self.assertLogs(level="WARNING") as cm: + assert ( + get_content_capturing_mode() == ContentCapturingMode.NO_CONTENT + ) + self.assertEqual(len(cm.output), 1) + self.assertIn("INVALID_VALUE is not a valid option for ", cm.output[0])