diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/CHANGELOG.md b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/CHANGELOG.md
index 4644ee3dc5..ed27904e63 100644
--- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/CHANGELOG.md
+++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/CHANGELOG.md
@@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Add example to `opentelemetry-instrumentation-openai-v2`
([#3006](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3006))
- Support for `AsyncOpenAI/AsyncCompletions` ([#2984](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/2984))
+- Add metrics ([#3180](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3180))
## Version 2.0b0 (2024-11-08)
diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/README.rst b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/README.rst
index d2cb0b5724..c402b30bc0 100644
--- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/README.rst
+++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/README.rst
@@ -7,7 +7,8 @@ OpenTelemetry OpenAI Instrumentation
:target: https://pypi.org/project/opentelemetry-instrumentation-openai-v2/
This library allows tracing LLM requests and logging of messages made by the
-`OpenAI Python API library `_.
+`OpenAI Python API library `_. It also captures
+the duration of the operations and the number of tokens used as metrics.
Installation
@@ -74,6 +75,48 @@ To uninstrument clients, call the uninstrument method:
# Uninstrument all clients
OpenAIInstrumentor().uninstrument()
+Bucket Boundaries
+-----------------
+
+This section describes the explicit bucket boundaries for metrics such as token usage and operation duration, and guides users to create Views to implement them according to the semantic conventions.
+
+The bucket boundaries are defined as follows:
+
+- For `gen_ai.client.token.usage`: [1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864]
+- For `gen_ai.client.operation.duration`: [0.01, 0.02, 0.04, 0.08, 0.16, 0.32, 0.64, 1.28, 2.56, 5.12, 10.24, 20.48, 40.96, 81.92]
+
+To implement these bucket boundaries, you can create Views in your OpenTelemetry SDK setup. Here is an example:
+
+.. code-block:: python
+
+ from opentelemetry.sdk.metrics import MeterProvider, View
+ from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
+ from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter
+ from opentelemetry.sdk.metrics.aggregation import ExplicitBucketHistogramAggregation
+
+ views = [
+ View(
+ instrument_name="gen_ai.client.token.usage",
+ aggregation=ExplicitBucketHistogramAggregation([1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864]),
+ ),
+ View(
+ instrument_name="gen_ai.client.operation.duration",
+ aggregation=ExplicitBucketHistogramAggregation([0.01, 0.02, 0.04, 0.08, 0.16, 0.32, 0.64, 1.28, 2.56, 5.12, 10.24, 20.48, 40.96, 81.92]),
+ ),
+ ]
+
+ metric_exporter = OTLPMetricExporter(endpoint="http://localhost:4317")
+ metric_reader = PeriodicExportingMetricReader(metric_exporter)
+ provider = MeterProvider(
+ metric_readers=[metric_reader],
+ views=views
+ )
+
+ from opentelemetry.sdk.metrics import set_meter_provider
+ set_meter_provider(provider)
+
+For more details, refer to the `OpenTelemetry GenAI Metrics documentation `_.
+
References
----------
* `OpenTelemetry OpenAI Instrumentation `_
diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/__init__.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/__init__.py
index ee3bbfdb73..ab4b6f9d7b 100644
--- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/__init__.py
+++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/__init__.py
@@ -49,13 +49,18 @@
from opentelemetry.instrumentation.openai_v2.package import _instruments
from opentelemetry.instrumentation.openai_v2.utils import is_content_enabled
from opentelemetry.instrumentation.utils import unwrap
+from opentelemetry.metrics import get_meter
from opentelemetry.semconv.schemas import Schemas
from opentelemetry.trace import get_tracer
+from .instruments import Instruments
from .patch import async_chat_completions_create, chat_completions_create
class OpenAIInstrumentor(BaseInstrumentor):
+ def __init__(self):
+ self._meter = None
+
def instrumentation_dependencies(self) -> Collection[str]:
return _instruments
@@ -75,12 +80,21 @@ def _instrument(self, **kwargs):
schema_url=Schemas.V1_28_0.value,
event_logger_provider=event_logger_provider,
)
+ meter_provider = kwargs.get("meter_provider")
+ self._meter = get_meter(
+ __name__,
+ "",
+ meter_provider,
+ schema_url=Schemas.V1_28_0.value,
+ )
+
+ instruments = Instruments(self._meter)
wrap_function_wrapper(
module="openai.resources.chat.completions",
name="Completions.create",
wrapper=chat_completions_create(
- tracer, event_logger, is_content_enabled()
+ tracer, event_logger, instruments, is_content_enabled()
),
)
@@ -88,7 +102,7 @@ def _instrument(self, **kwargs):
module="openai.resources.chat.completions",
name="AsyncCompletions.create",
wrapper=async_chat_completions_create(
- tracer, event_logger, is_content_enabled()
+ tracer, event_logger, instruments, is_content_enabled()
),
)
diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/instruments.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/instruments.py
new file mode 100644
index 0000000000..d1e184ac84
--- /dev/null
+++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/instruments.py
@@ -0,0 +1,11 @@
+from opentelemetry.semconv._incubating.metrics import gen_ai_metrics
+
+
+class Instruments:
+ def __init__(self, meter):
+ self.operation_duration_histogram = (
+ gen_ai_metrics.create_gen_ai_client_operation_duration(meter)
+ )
+ self.token_usage_histogram = (
+ gen_ai_metrics.create_gen_ai_client_token_usage(meter)
+ )
diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py
index cd284473ce..307b312fca 100644
--- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py
+++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py
@@ -13,6 +13,7 @@
# limitations under the License.
+from timeit import default_timer
from typing import Optional
from openai import Stream
@@ -21,8 +22,12 @@
from opentelemetry.semconv._incubating.attributes import (
gen_ai_attributes as GenAIAttributes,
)
+from opentelemetry.semconv._incubating.attributes import (
+ server_attributes as ServerAttributes,
+)
from opentelemetry.trace import Span, SpanKind, Tracer
+from .instruments import Instruments
from .utils import (
choice_to_event,
get_llm_request_attributes,
@@ -34,7 +39,10 @@
def chat_completions_create(
- tracer: Tracer, event_logger: EventLogger, capture_content: bool
+ tracer: Tracer,
+ event_logger: EventLogger,
+ instruments: Instruments,
+ capture_content: bool,
):
"""Wrap the `create` method of the `ChatCompletion` class to trace it."""
@@ -54,6 +62,9 @@ def traced_method(wrapped, instance, args, kwargs):
message_to_event(message, capture_content)
)
+ start = default_timer()
+ result = None
+ error_type = None
try:
result = wrapped(*args, **kwargs)
if is_streaming(kwargs):
@@ -69,14 +80,27 @@ def traced_method(wrapped, instance, args, kwargs):
return result
except Exception as error:
+ error_type = type(error).__qualname__
handle_span_exception(span, error)
raise
+ finally:
+ duration = max((default_timer() - start), 0)
+ _record_metrics(
+ instruments,
+ duration,
+ result,
+ span_attributes,
+ error_type,
+ )
return traced_method
def async_chat_completions_create(
- tracer: Tracer, event_logger: EventLogger, capture_content: bool
+ tracer: Tracer,
+ event_logger: EventLogger,
+ instruments: Instruments,
+ capture_content: bool,
):
"""Wrap the `create` method of the `AsyncChatCompletion` class to trace it."""
@@ -96,6 +120,9 @@ async def traced_method(wrapped, instance, args, kwargs):
message_to_event(message, capture_content)
)
+ start = default_timer()
+ result = None
+ error_type = None
try:
result = await wrapped(*args, **kwargs)
if is_streaming(kwargs):
@@ -111,12 +138,88 @@ async def traced_method(wrapped, instance, args, kwargs):
return result
except Exception as error:
+ error_type = type(error).__qualname__
handle_span_exception(span, error)
raise
+ finally:
+ duration = max((default_timer() - start), 0)
+ _record_metrics(
+ instruments,
+ duration,
+ result,
+ span_attributes,
+ error_type,
+ )
return traced_method
+def _record_metrics(
+ instruments: Instruments,
+ duration: float,
+ result,
+ span_attributes: dict,
+ error_type: Optional[str],
+):
+ common_attributes = {
+ GenAIAttributes.GEN_AI_OPERATION_NAME: GenAIAttributes.GenAiOperationNameValues.CHAT.value,
+ GenAIAttributes.GEN_AI_SYSTEM: GenAIAttributes.GenAiSystemValues.OPENAI.value,
+ GenAIAttributes.GEN_AI_REQUEST_MODEL: span_attributes[
+ GenAIAttributes.GEN_AI_REQUEST_MODEL
+ ],
+ }
+
+ if error_type:
+ common_attributes["error.type"] = error_type
+
+ if result and getattr(result, "model", None):
+ common_attributes[GenAIAttributes.GEN_AI_RESPONSE_MODEL] = result.model
+
+ if result and getattr(result, "service_tier", None):
+ common_attributes[
+ GenAIAttributes.GEN_AI_OPENAI_RESPONSE_SERVICE_TIER
+ ] = result.service_tier
+
+ if result and getattr(result, "system_fingerprint", None):
+ common_attributes["gen_ai.openai.response.system_fingerprint"] = (
+ result.system_fingerprint
+ )
+
+ if ServerAttributes.SERVER_ADDRESS in span_attributes:
+ common_attributes[ServerAttributes.SERVER_ADDRESS] = span_attributes[
+ ServerAttributes.SERVER_ADDRESS
+ ]
+
+ if ServerAttributes.SERVER_PORT in span_attributes:
+ common_attributes[ServerAttributes.SERVER_PORT] = span_attributes[
+ ServerAttributes.SERVER_PORT
+ ]
+
+ instruments.operation_duration_histogram.record(
+ duration,
+ attributes=common_attributes,
+ )
+
+ if result and getattr(result, "usage", None):
+ input_attributes = {
+ **common_attributes,
+ GenAIAttributes.GEN_AI_TOKEN_TYPE: GenAIAttributes.GenAiTokenTypeValues.INPUT.value,
+ }
+ instruments.token_usage_histogram.record(
+ result.usage.prompt_tokens,
+ attributes=input_attributes,
+ )
+
+ completion_attributes = {
+ **common_attributes,
+ GenAIAttributes.GEN_AI_TOKEN_TYPE: GenAIAttributes.GenAiTokenTypeValues.COMPLETION.value,
+ }
+ instruments.token_usage_histogram.record(
+ result.usage.completion_tokens,
+ attributes=completion_attributes,
+ )
+
+
def _set_response_attributes(
span, result, event_logger: EventLogger, capture_content: bool
):
diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/cassettes/test_async_chat_completion_metrics.yaml b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/cassettes/test_async_chat_completion_metrics.yaml
new file mode 100644
index 0000000000..e771e93cbe
--- /dev/null
+++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/cassettes/test_async_chat_completion_metrics.yaml
@@ -0,0 +1,133 @@
+interactions:
+- request:
+ body: |-
+ {
+ "messages": [
+ {
+ "role": "user",
+ "content": "Say this is a test"
+ }
+ ],
+ "model": "gpt-4o-mini",
+ "stream": false
+ }
+ headers:
+ accept:
+ - application/json
+ accept-encoding:
+ - gzip, deflate
+ authorization:
+ - Bearer test_openai_api_key
+ connection:
+ - keep-alive
+ content-length:
+ - '106'
+ content-type:
+ - application/json
+ host:
+ - api.openai.com
+ user-agent:
+ - AsyncOpenAI/Python 1.26.0
+ x-stainless-arch:
+ - arm64
+ x-stainless-async:
+ - async:asyncio
+ x-stainless-lang:
+ - python
+ x-stainless-os:
+ - MacOS
+ x-stainless-package-version:
+ - 1.26.0
+ x-stainless-runtime:
+ - CPython
+ x-stainless-runtime-version:
+ - 3.12.5
+ method: POST
+ uri: https://api.openai.com/v1/chat/completions
+ response:
+ body:
+ string: |-
+ {
+ "id": "chatcmpl-ASv9R2E7Yhb2e7bj4Xl0qm9s3J42Y",
+ "object": "chat.completion",
+ "created": 1731456237,
+ "model": "gpt-4o-mini-2024-07-18",
+ "choices": [
+ {
+ "index": 0,
+ "message": {
+ "role": "assistant",
+ "content": "This is a test. How can I assist you further?",
+ "refusal": null
+ },
+ "logprobs": null,
+ "finish_reason": "stop"
+ }
+ ],
+ "service_tier": "default",
+ "usage": {
+ "prompt_tokens": 12,
+ "completion_tokens": 12,
+ "total_tokens": 24,
+ "prompt_tokens_details": {
+ "cached_tokens": 0,
+ "audio_tokens": 0
+ },
+ "completion_tokens_details": {
+ "reasoning_tokens": 0,
+ "audio_tokens": 0,
+ "accepted_prediction_tokens": 0,
+ "rejected_prediction_tokens": 0
+ }
+ },
+ "system_fingerprint": "fp_0ba0d124f1"
+ }
+ headers:
+ CF-Cache-Status:
+ - DYNAMIC
+ CF-RAY:
+ - 8e1a80679a8311a6-MRS
+ Connection:
+ - keep-alive
+ Content-Type:
+ - application/json
+ Date:
+ - Wed, 13 Nov 2024 00:03:58 GMT
+ Server:
+ - cloudflare
+ Set-Cookie: test_set_cookie
+ Transfer-Encoding:
+ - chunked
+ X-Content-Type-Options:
+ - nosniff
+ access-control-expose-headers:
+ - X-Request-ID
+ alt-svc:
+ - h3=":443"; ma=86400
+ content-length:
+ - '796'
+ openai-organization: test_openai_org_id
+ openai-processing-ms:
+ - '359'
+ openai-version:
+ - '2020-10-01'
+ strict-transport-security:
+ - max-age=31536000; includeSubDomains; preload
+ x-ratelimit-limit-requests:
+ - '30000'
+ x-ratelimit-limit-tokens:
+ - '150000000'
+ x-ratelimit-remaining-requests:
+ - '29999'
+ x-ratelimit-remaining-tokens:
+ - '149999978'
+ x-ratelimit-reset-requests:
+ - 2ms
+ x-ratelimit-reset-tokens:
+ - 0s
+ x-request-id:
+ - req_41ea134c1fc450d4ca4cf8d0c6a7c53a
+ status:
+ code: 200
+ message: OK
+version: 1
diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/cassettes/test_chat_completion_metrics.yaml b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/cassettes/test_chat_completion_metrics.yaml
new file mode 100644
index 0000000000..1c6c11c858
--- /dev/null
+++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/cassettes/test_chat_completion_metrics.yaml
@@ -0,0 +1,135 @@
+interactions:
+- request:
+ body: |-
+ {
+ "messages": [
+ {
+ "role": "user",
+ "content": "Say this is a test"
+ }
+ ],
+ "model": "gpt-4o-mini",
+ "stream": false
+ }
+ headers:
+ accept:
+ - application/json
+ accept-encoding:
+ - gzip, deflate
+ authorization:
+ - Bearer test_openai_api_key
+ connection:
+ - keep-alive
+ content-length:
+ - '106'
+ content-type:
+ - application/json
+ host:
+ - api.openai.com
+ user-agent:
+ - OpenAI/Python 1.54.3
+ x-stainless-arch:
+ - arm64
+ x-stainless-async:
+ - 'false'
+ x-stainless-lang:
+ - python
+ x-stainless-os:
+ - MacOS
+ x-stainless-package-version:
+ - 1.54.3
+ x-stainless-retry-count:
+ - '0'
+ x-stainless-runtime:
+ - CPython
+ x-stainless-runtime-version:
+ - 3.12.6
+ method: POST
+ uri: https://api.openai.com/v1/chat/completions
+ response:
+ body:
+ string: |-
+ {
+ "id": "chatcmpl-ASYMQRl3A3DXL9FWCK9tnGRcKIO7q",
+ "object": "chat.completion",
+ "created": 1731368630,
+ "model": "gpt-4o-mini-2024-07-18",
+ "choices": [
+ {
+ "index": 0,
+ "message": {
+ "role": "assistant",
+ "content": "This is a test.",
+ "refusal": null
+ },
+ "logprobs": null,
+ "finish_reason": "stop"
+ }
+ ],
+ "service_tier": "default",
+ "usage": {
+ "prompt_tokens": 12,
+ "completion_tokens": 5,
+ "total_tokens": 17,
+ "prompt_tokens_details": {
+ "cached_tokens": 0,
+ "audio_tokens": 0
+ },
+ "completion_tokens_details": {
+ "reasoning_tokens": 0,
+ "audio_tokens": 0,
+ "accepted_prediction_tokens": 0,
+ "rejected_prediction_tokens": 0
+ }
+ },
+ "system_fingerprint": "fp_0ba0d124f1"
+ }
+ headers:
+ CF-Cache-Status:
+ - DYNAMIC
+ CF-RAY:
+ - 8e122593ff368bc8-SIN
+ Connection:
+ - keep-alive
+ Content-Type:
+ - application/json
+ Date:
+ - Mon, 11 Nov 2024 23:43:50 GMT
+ Server:
+ - cloudflare
+ Set-Cookie: test_set_cookie
+ Transfer-Encoding:
+ - chunked
+ X-Content-Type-Options:
+ - nosniff
+ access-control-expose-headers:
+ - X-Request-ID
+ alt-svc:
+ - h3=":443"; ma=86400
+ content-length:
+ - '765'
+ openai-organization: test_openai_org_id
+ openai-processing-ms:
+ - '287'
+ openai-version:
+ - '2020-10-01'
+ strict-transport-security:
+ - max-age=31536000; includeSubDomains; preload
+ x-ratelimit-limit-requests:
+ - '10000'
+ x-ratelimit-limit-tokens:
+ - '200000'
+ x-ratelimit-remaining-requests:
+ - '9999'
+ x-ratelimit-remaining-tokens:
+ - '199977'
+ x-ratelimit-reset-requests:
+ - 8.64s
+ x-ratelimit-reset-tokens:
+ - 6ms
+ x-request-id:
+ - req_58cff97afd0e7c0bba910ccf0b044a6f
+ status:
+ code: 200
+ message: OK
+version: 1
diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/conftest.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/conftest.py
index 18e6582dff..51521dbadd 100644
--- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/conftest.py
+++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/conftest.py
@@ -17,6 +17,17 @@
InMemoryLogExporter,
SimpleLogRecordProcessor,
)
+from opentelemetry.sdk.metrics import (
+ Histogram,
+ MeterProvider,
+)
+from opentelemetry.sdk.metrics.export import (
+ InMemoryMetricReader,
+)
+from opentelemetry.sdk.metrics.view import (
+ ExplicitBucketHistogramAggregation,
+ View,
+)
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
from opentelemetry.sdk.trace.export.in_memory_span_exporter import (
@@ -36,6 +47,12 @@ def fixture_log_exporter():
yield exporter
+@pytest.fixture(scope="function", name="metric_reader")
+def fixture_metric_reader():
+ exporter = InMemoryMetricReader()
+ yield exporter
+
+
@pytest.fixture(scope="function", name="tracer_provider")
def fixture_tracer_provider(span_exporter):
provider = TracerProvider()
@@ -52,6 +69,62 @@ def fixture_event_logger_provider(log_exporter):
return event_logger_provider
+@pytest.fixture(scope="function", name="meter_provider")
+def fixture_meter_provider(metric_reader):
+ token_usage_histogram_view = View(
+ instrument_type=Histogram,
+ instrument_name="gen_ai.client.token.usage",
+ aggregation=ExplicitBucketHistogramAggregation(
+ boundaries=[
+ 1,
+ 4,
+ 16,
+ 64,
+ 256,
+ 1024,
+ 4096,
+ 16384,
+ 65536,
+ 262144,
+ 1048576,
+ 4194304,
+ 16777216,
+ 67108864,
+ ]
+ ),
+ )
+
+ duration_histogram_view = View(
+ instrument_type=Histogram,
+ instrument_name="gen_ai.client.operation.duration",
+ aggregation=ExplicitBucketHistogramAggregation(
+ boundaries=[
+ 0.01,
+ 0.02,
+ 0.04,
+ 0.08,
+ 0.16,
+ 0.32,
+ 0.64,
+ 1.28,
+ 2.56,
+ 5.12,
+ 10.24,
+ 20.48,
+ 40.96,
+ 81.92,
+ ]
+ ),
+ )
+
+ meter_provider = MeterProvider(
+ metric_readers=[metric_reader],
+ views=[token_usage_histogram_view, duration_histogram_view],
+ )
+
+ return meter_provider
+
+
@pytest.fixture(autouse=True)
def environment():
if not os.getenv("OPENAI_API_KEY"):
@@ -83,7 +156,9 @@ def vcr_config():
@pytest.fixture(scope="function")
-def instrument_no_content(tracer_provider, event_logger_provider):
+def instrument_no_content(
+ tracer_provider, event_logger_provider, meter_provider
+):
os.environ.update(
{OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT: "False"}
)
@@ -92,6 +167,7 @@ def instrument_no_content(tracer_provider, event_logger_provider):
instrumentor.instrument(
tracer_provider=tracer_provider,
event_logger_provider=event_logger_provider,
+ meter_provider=meter_provider,
)
yield instrumentor
@@ -100,7 +176,9 @@ def instrument_no_content(tracer_provider, event_logger_provider):
@pytest.fixture(scope="function")
-def instrument_with_content(tracer_provider, event_logger_provider):
+def instrument_with_content(
+ tracer_provider, event_logger_provider, meter_provider
+):
os.environ.update(
{OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT: "True"}
)
@@ -108,6 +186,7 @@ def instrument_with_content(tracer_provider, event_logger_provider):
instrumentor.instrument(
tracer_provider=tracer_provider,
event_logger_provider=event_logger_provider,
+ meter_provider=meter_provider,
)
yield instrumentor
diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_chat_completions.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_chat_completions.py
index 4677b7cb95..9685903603 100644
--- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_chat_completions.py
+++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_chat_completions.py
@@ -32,6 +32,7 @@
from opentelemetry.semconv._incubating.attributes import (
server_attributes as ServerAttributes,
)
+from opentelemetry.semconv._incubating.metrics import gen_ai_metrics
@pytest.mark.vcr()
@@ -94,7 +95,9 @@ def test_chat_completion_no_content(
assert_message_in_logs(logs[1], "gen_ai.choice", choice_event, spans[0])
-def test_chat_completion_bad_endpoint(span_exporter, instrument_no_content):
+def test_chat_completion_bad_endpoint(
+ span_exporter, metric_reader, instrument_no_content
+):
llm_model_value = "gpt-4o-mini"
messages_value = [{"role": "user", "content": "Say this is a test"}]
@@ -116,10 +119,31 @@ def test_chat_completion_bad_endpoint(span_exporter, instrument_no_content):
"APIConnectionError" == spans[0].attributes[ErrorAttributes.ERROR_TYPE]
)
+ metrics = metric_reader.get_metrics_data().resource_metrics
+ assert len(metrics) == 1
+
+ metric_data = metrics[0].scope_metrics[0].metrics
+ duration_metric = next(
+ (
+ m
+ for m in metric_data
+ if m.name == gen_ai_metrics.GEN_AI_CLIENT_OPERATION_DURATION
+ ),
+ None,
+ )
+ assert duration_metric is not None
+ assert duration_metric.data.data_points[0].sum > 0
+ assert (
+ duration_metric.data.data_points[0].attributes[
+ ErrorAttributes.ERROR_TYPE
+ ]
+ == "APIConnectionError"
+ )
+
@pytest.mark.vcr()
def test_chat_completion_404(
- span_exporter, openai_client, instrument_no_content
+ span_exporter, openai_client, metric_reader, instrument_no_content
):
llm_model_value = "this-model-does-not-exist"
messages_value = [{"role": "user", "content": "Say this is a test"}]
@@ -135,6 +159,27 @@ def test_chat_completion_404(
assert_all_attributes(spans[0], llm_model_value)
assert "NotFoundError" == spans[0].attributes[ErrorAttributes.ERROR_TYPE]
+ metrics = metric_reader.get_metrics_data().resource_metrics
+ assert len(metrics) == 1
+
+ metric_data = metrics[0].scope_metrics[0].metrics
+ duration_metric = next(
+ (
+ m
+ for m in metric_data
+ if m.name == gen_ai_metrics.GEN_AI_CLIENT_OPERATION_DURATION
+ ),
+ None,
+ )
+ assert duration_metric is not None
+ assert duration_metric.data.data_points[0].sum > 0
+ assert (
+ duration_metric.data.data_points[0].attributes[
+ ErrorAttributes.ERROR_TYPE
+ ]
+ == "NotFoundError"
+ )
+
@pytest.mark.vcr()
def test_chat_completion_extra_params(
diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_chat_metrics.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_chat_metrics.py
new file mode 100644
index 0000000000..d0f7c5a596
--- /dev/null
+++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_chat_metrics.py
@@ -0,0 +1,190 @@
+import pytest
+
+from opentelemetry.semconv._incubating.attributes import (
+ gen_ai_attributes as GenAIAttributes,
+)
+from opentelemetry.semconv._incubating.attributes import (
+ server_attributes as ServerAttributes,
+)
+from opentelemetry.semconv._incubating.metrics import gen_ai_metrics
+
+
+def assert_all_metric_attributes(data_point):
+ assert GenAIAttributes.GEN_AI_OPERATION_NAME in data_point.attributes
+ assert (
+ data_point.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME]
+ == GenAIAttributes.GenAiOperationNameValues.CHAT.value
+ )
+ assert GenAIAttributes.GEN_AI_SYSTEM in data_point.attributes
+ assert (
+ data_point.attributes[GenAIAttributes.GEN_AI_SYSTEM]
+ == GenAIAttributes.GenAiSystemValues.OPENAI.value
+ )
+ assert GenAIAttributes.GEN_AI_REQUEST_MODEL in data_point.attributes
+ assert (
+ data_point.attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL]
+ == "gpt-4o-mini"
+ )
+ assert GenAIAttributes.GEN_AI_RESPONSE_MODEL in data_point.attributes
+ assert (
+ data_point.attributes[GenAIAttributes.GEN_AI_RESPONSE_MODEL]
+ == "gpt-4o-mini-2024-07-18"
+ )
+ assert "gen_ai.openai.response.system_fingerprint" in data_point.attributes
+ assert (
+ data_point.attributes["gen_ai.openai.response.system_fingerprint"]
+ == "fp_0ba0d124f1"
+ )
+ assert (
+ GenAIAttributes.GEN_AI_OPENAI_RESPONSE_SERVICE_TIER
+ in data_point.attributes
+ )
+ assert (
+ data_point.attributes[
+ GenAIAttributes.GEN_AI_OPENAI_RESPONSE_SERVICE_TIER
+ ]
+ == "default"
+ )
+ assert (
+ data_point.attributes[ServerAttributes.SERVER_ADDRESS]
+ == "api.openai.com"
+ )
+
+
+@pytest.mark.vcr()
+def test_chat_completion_metrics(
+ metric_reader, openai_client, instrument_with_content
+):
+ llm_model_value = "gpt-4o-mini"
+ messages_value = [{"role": "user", "content": "Say this is a test"}]
+
+ openai_client.chat.completions.create(
+ messages=messages_value, model=llm_model_value, stream=False
+ )
+
+ metrics = metric_reader.get_metrics_data().resource_metrics
+ assert len(metrics) == 1
+
+ metric_data = metrics[0].scope_metrics[0].metrics
+ assert len(metric_data) == 2
+
+ duration_metric = next(
+ (
+ m
+ for m in metric_data
+ if m.name == gen_ai_metrics.GEN_AI_CLIENT_OPERATION_DURATION
+ ),
+ None,
+ )
+ assert duration_metric is not None
+ assert duration_metric.data.data_points[0].sum > 0
+ assert_all_metric_attributes(duration_metric.data.data_points[0])
+
+ token_usage_metric = next(
+ (
+ m
+ for m in metric_data
+ if m.name == gen_ai_metrics.GEN_AI_CLIENT_TOKEN_USAGE
+ ),
+ None,
+ )
+ assert token_usage_metric is not None
+
+ input_token_usage = next(
+ (
+ d
+ for d in token_usage_metric.data.data_points
+ if d.attributes[GenAIAttributes.GEN_AI_TOKEN_TYPE]
+ == GenAIAttributes.GenAiTokenTypeValues.INPUT.value
+ ),
+ None,
+ )
+ assert input_token_usage is not None
+ assert input_token_usage.sum == 12
+ # assert against buckets [1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864]
+ assert input_token_usage.bucket_counts[2] == 1
+ assert_all_metric_attributes(input_token_usage)
+
+ output_token_usage = next(
+ (
+ d
+ for d in token_usage_metric.data.data_points
+ if d.attributes[GenAIAttributes.GEN_AI_TOKEN_TYPE]
+ == GenAIAttributes.GenAiTokenTypeValues.COMPLETION.value
+ ),
+ None,
+ )
+ assert output_token_usage is not None
+ assert output_token_usage.sum == 5
+ # assert against buckets [1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864]
+ assert output_token_usage.bucket_counts[2] == 1
+ assert_all_metric_attributes(output_token_usage)
+
+
+@pytest.mark.vcr()
+@pytest.mark.asyncio()
+async def test_async_chat_completion_metrics(
+ metric_reader, async_openai_client, instrument_with_content
+):
+ llm_model_value = "gpt-4o-mini"
+ messages_value = [{"role": "user", "content": "Say this is a test"}]
+
+ await async_openai_client.chat.completions.create(
+ messages=messages_value, model=llm_model_value, stream=False
+ )
+
+ metrics = metric_reader.get_metrics_data().resource_metrics
+ assert len(metrics) == 1
+
+ metric_data = metrics[0].scope_metrics[0].metrics
+ assert len(metric_data) == 2
+
+ duration_metric = next(
+ (
+ m
+ for m in metric_data
+ if m.name == gen_ai_metrics.GEN_AI_CLIENT_OPERATION_DURATION
+ ),
+ None,
+ )
+ assert duration_metric is not None
+ assert duration_metric.data.data_points[0].sum > 0
+ assert_all_metric_attributes(duration_metric.data.data_points[0])
+
+ token_usage_metric = next(
+ (
+ m
+ for m in metric_data
+ if m.name == gen_ai_metrics.GEN_AI_CLIENT_TOKEN_USAGE
+ ),
+ None,
+ )
+ assert token_usage_metric is not None
+
+ input_token_usage = next(
+ (
+ d
+ for d in token_usage_metric.data.data_points
+ if d.attributes[GenAIAttributes.GEN_AI_TOKEN_TYPE]
+ == GenAIAttributes.GenAiTokenTypeValues.INPUT.value
+ ),
+ None,
+ )
+
+ assert input_token_usage is not None
+ assert input_token_usage.sum == 12
+ assert_all_metric_attributes(input_token_usage)
+
+ output_token_usage = next(
+ (
+ d
+ for d in token_usage_metric.data.data_points
+ if d.attributes[GenAIAttributes.GEN_AI_TOKEN_TYPE]
+ == GenAIAttributes.GenAiTokenTypeValues.COMPLETION.value
+ ),
+ None,
+ )
+
+ assert output_token_usage is not None
+ assert output_token_usage.sum == 12
+ assert_all_metric_attributes(output_token_usage)