Skip to content
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Add example to `opentelemetry-instrumentation-openai-v2`
([#3006](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3006))
- Support for `AsyncOpenAI/AsyncCompletions` ([#2984](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/2984))
- Add metrics ([#3180](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3180))

## Version 2.0b0 (2024-11-08)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ OpenTelemetry OpenAI Instrumentation
:target: https://pypi.org/project/opentelemetry-instrumentation-openai-v2/

This library allows tracing LLM requests and logging of messages made by the
`OpenAI Python API library <https://pypi.org/project/openai/>`_.
`OpenAI Python API library <https://pypi.org/project/openai/>`_. It also captures
the duration of the operations and the number of tokens used as metrics.


Installation
Expand Down Expand Up @@ -74,6 +75,48 @@ To uninstrument clients, call the uninstrument method:
# Uninstrument all clients
OpenAIInstrumentor().uninstrument()

Bucket Boundaries
-----------------

This section describes the explicit bucket boundaries for metrics such as token usage and operation duration, and guides users to create Views to implement them according to the semantic conventions.

The bucket boundaries are defined as follows:

- For `gen_ai.client.token.usage`: [1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864]
- For `gen_ai.client.operation.duration`: [0.01, 0.02, 0.04, 0.08, 0.16, 0.32, 0.64, 1.28, 2.56, 5.12, 10.24, 20.48, 40.96, 81.92]

To implement these bucket boundaries, you can create Views in your OpenTelemetry SDK setup. Here is an example:

.. code-block:: python

from opentelemetry.sdk.metrics import MeterProvider, View
from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter
from opentelemetry.sdk.metrics.aggregation import ExplicitBucketHistogramAggregation

views = [
View(
instrument_name="gen_ai.client.token.usage",
aggregation=ExplicitBucketHistogramAggregation([1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864]),
),
View(
instrument_name="gen_ai.client.operation.duration",
aggregation=ExplicitBucketHistogramAggregation([0.01, 0.02, 0.04, 0.08, 0.16, 0.32, 0.64, 1.28, 2.56, 5.12, 10.24, 20.48, 40.96, 81.92]),
),
]

metric_exporter = OTLPMetricExporter(endpoint="http://localhost:4317")
metric_reader = PeriodicExportingMetricReader(metric_exporter)
provider = MeterProvider(
metric_readers=[metric_reader],
views=views
)

from opentelemetry.sdk.metrics import set_meter_provider
set_meter_provider(provider)

For more details, refer to the `OpenTelemetry GenAI Metrics documentation <https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-metrics/>`_.

References
----------
* `OpenTelemetry OpenAI Instrumentation <https://opentelemetry-python-contrib.readthedocs.io/en/latest/instrumentation/openai/openai.html>`_
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,18 @@
from opentelemetry.instrumentation.openai_v2.package import _instruments
from opentelemetry.instrumentation.openai_v2.utils import is_content_enabled
from opentelemetry.instrumentation.utils import unwrap
from opentelemetry.metrics import get_meter
from opentelemetry.semconv.schemas import Schemas
from opentelemetry.trace import get_tracer

from .instruments import Instruments
from .patch import async_chat_completions_create, chat_completions_create


class OpenAIInstrumentor(BaseInstrumentor):
def __init__(self):
self._meter = None

def instrumentation_dependencies(self) -> Collection[str]:
return _instruments

Expand All @@ -75,20 +80,29 @@ def _instrument(self, **kwargs):
schema_url=Schemas.V1_28_0.value,
event_logger_provider=event_logger_provider,
)
meter_provider = kwargs.get("meter_provider")
self._meter = get_meter(
__name__,
"",
meter_provider,
schema_url=Schemas.V1_28_0.value,
)

instruments = Instruments(self._meter)

wrap_function_wrapper(
module="openai.resources.chat.completions",
name="Completions.create",
wrapper=chat_completions_create(
tracer, event_logger, is_content_enabled()
tracer, event_logger, instruments, is_content_enabled()
),
)

wrap_function_wrapper(
module="openai.resources.chat.completions",
name="AsyncCompletions.create",
wrapper=async_chat_completions_create(
tracer, event_logger, is_content_enabled()
tracer, event_logger, instruments, is_content_enabled()
),
)

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from opentelemetry.semconv._incubating.metrics import gen_ai_metrics


class Instruments:
def __init__(self, meter):
self.operation_duration_histogram = (
gen_ai_metrics.create_gen_ai_client_operation_duration(meter)
)
self.token_usage_histogram = (
gen_ai_metrics.create_gen_ai_client_token_usage(meter)
)
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.


from timeit import default_timer
from typing import Optional

from openai import Stream
Expand All @@ -21,8 +22,12 @@
from opentelemetry.semconv._incubating.attributes import (
gen_ai_attributes as GenAIAttributes,
)
from opentelemetry.semconv._incubating.attributes import (
server_attributes as ServerAttributes,
)
from opentelemetry.trace import Span, SpanKind, Tracer

from .instruments import Instruments
from .utils import (
choice_to_event,
get_llm_request_attributes,
Expand All @@ -34,7 +39,10 @@


def chat_completions_create(
tracer: Tracer, event_logger: EventLogger, capture_content: bool
tracer: Tracer,
event_logger: EventLogger,
instruments: Instruments,
capture_content: bool,
):
"""Wrap the `create` method of the `ChatCompletion` class to trace it."""

Expand All @@ -54,6 +62,9 @@ def traced_method(wrapped, instance, args, kwargs):
message_to_event(message, capture_content)
)

start = default_timer()
result = None
error_type = None
try:
result = wrapped(*args, **kwargs)
if is_streaming(kwargs):
Expand All @@ -69,14 +80,27 @@ def traced_method(wrapped, instance, args, kwargs):
return result

except Exception as error:
error_type = type(error).__name__
handle_span_exception(span, error)
raise
finally:
duration = max((default_timer() - start), 0)
_record_metrics(
instruments,
duration,
result,
span_attributes,
error_type,
)

return traced_method


def async_chat_completions_create(
tracer: Tracer, event_logger: EventLogger, capture_content: bool
tracer: Tracer,
event_logger: EventLogger,
instruments: Instruments,
capture_content: bool,
):
"""Wrap the `create` method of the `AsyncChatCompletion` class to trace it."""

Expand All @@ -96,6 +120,9 @@ async def traced_method(wrapped, instance, args, kwargs):
message_to_event(message, capture_content)
)

start = default_timer()
result = None
error_type = None
try:
result = await wrapped(*args, **kwargs)
if is_streaming(kwargs):
Expand All @@ -111,12 +138,88 @@ async def traced_method(wrapped, instance, args, kwargs):
return result

except Exception as error:
error_type = type(error).__name__
handle_span_exception(span, error)
raise
finally:
duration = max((default_timer() - start), 0)
_record_metrics(
instruments,
duration,
result,
span_attributes,
error_type,
)

return traced_method


def _record_metrics(
instruments: Instruments,
duration: float,
result,
span_attributes: dict,
error_type: Optional[str],
):
common_attributes = {
GenAIAttributes.GEN_AI_OPERATION_NAME: GenAIAttributes.GenAiOperationNameValues.CHAT.value,
GenAIAttributes.GEN_AI_SYSTEM: GenAIAttributes.GenAiSystemValues.OPENAI.value,
GenAIAttributes.GEN_AI_REQUEST_MODEL: span_attributes[
GenAIAttributes.GEN_AI_REQUEST_MODEL
],
}

if error_type:
common_attributes["error.type"] = error_type

if result and getattr(result, "model", None):
common_attributes[GenAIAttributes.GEN_AI_RESPONSE_MODEL] = result.model

if result and getattr(result, "service_tier", None):
common_attributes[
GenAIAttributes.GEN_AI_OPENAI_RESPONSE_SERVICE_TIER
] = result.service_tier

if result and getattr(result, "system_fingerprint", None):
common_attributes["gen_ai.openai.response.system_fingerprint"] = (
result.system_fingerprint
)

if ServerAttributes.SERVER_ADDRESS in span_attributes:
common_attributes[ServerAttributes.SERVER_ADDRESS] = span_attributes[
ServerAttributes.SERVER_ADDRESS
]

if ServerAttributes.SERVER_PORT in span_attributes:
common_attributes[ServerAttributes.SERVER_PORT] = span_attributes[
ServerAttributes.SERVER_PORT
]

instruments.operation_duration_histogram.record(
duration,
attributes=common_attributes,
)

if result and getattr(result, "usage", None):
input_attributes = {
**common_attributes,
GenAIAttributes.GEN_AI_TOKEN_TYPE: GenAIAttributes.GenAiTokenTypeValues.INPUT.value,
}
instruments.token_usage_histogram.record(
result.usage.prompt_tokens,
attributes=input_attributes,
)

completion_attributes = {
**common_attributes,
GenAIAttributes.GEN_AI_TOKEN_TYPE: GenAIAttributes.GenAiTokenTypeValues.COMPLETION.value,
}
instruments.token_usage_histogram.record(
result.usage.completion_tokens,
attributes=completion_attributes,
)


def _set_response_attributes(
span, result, event_logger: EventLogger, capture_content: bool
):
Expand Down
Loading
Loading