Skip to content
Closed
Show file tree
Hide file tree
Changes from 57 commits
Commits
Show all changes
88 commits
Select commit Hold shift + click to select a range
00c2091
WIP initial code import
keith-decker Aug 15, 2025
40e6c48
remove references to tool types
keith-decker Aug 15, 2025
43526d6
add a simple unit test
keith-decker Aug 17, 2025
76dbd57
rename exporter to emitter.
keith-decker Aug 18, 2025
567e0a4
rename api file to client
keith-decker Aug 18, 2025
fd41dfb
Merge branch 'main' into util-genai-inference
keith-decker Aug 20, 2025
4bd72aa
WIP gen_ai chat refactor
keith-decker Aug 25, 2025
59414fa
Add provider.name, rename client to handler
keith-decker Aug 25, 2025
5127c39
add message to log functions
keith-decker Aug 25, 2025
66e0c26
remove events, emit structured logs
keith-decker Aug 26, 2025
4346580
update documentation
keith-decker Aug 27, 2025
4be03c4
first round of cleanup for lintcheck
keith-decker Aug 27, 2025
c19e4e6
typecheck fixes
keith-decker Aug 27, 2025
11d937b
typecheck fixes
keith-decker Aug 27, 2025
fdc71a3
linting fixes
keith-decker Aug 27, 2025
04d6e97
linting fixes, refactor for complexity
keith-decker Aug 27, 2025
a2e017a
typecheck fixes
keith-decker Aug 27, 2025
92cd5c8
update documentation
keith-decker Aug 28, 2025
770f878
rename context, update _to_semconv_dict name
keith-decker Sep 2, 2025
78bc6bf
refactor: rename emitters to generators and update method names for c…
keith-decker Sep 2, 2025
b7360f8
refactor: convert API LogRecord to SDK LogRecord, add unit test
keith-decker Sep 2, 2025
86152df
added changelog
keith-decker Sep 3, 2025
c0e03c4
Merge branch 'main' into util-genai-inference
keith-decker Sep 4, 2025
a5b04be
Merge branch 'main' into util-genai-inference
keith-decker Sep 5, 2025
3c94ecd
Wip Convert input messages
keith-decker Sep 5, 2025
a59ca1e
wip, refactor chat generation to output message
keith-decker Sep 5, 2025
a8bb486
cleanup
keith-decker Sep 5, 2025
f64bf69
merge test files
keith-decker Sep 5, 2025
4304819
lint updates
keith-decker Sep 8, 2025
f689ad1
remove logging, lower PR size
keith-decker Sep 8, 2025
b3bb75c
lint update
keith-decker Sep 8, 2025
fb30d45
check env for content capture
keith-decker Sep 8, 2025
fd45c43
check env for content capture on output
keith-decker Sep 8, 2025
aeaa319
lint updates
keith-decker Sep 8, 2025
6a19e04
remove metrics to reduce PR size
keith-decker Sep 8, 2025
ad062e3
remove metrics file
keith-decker Sep 8, 2025
fc0cde0
cleanup docs, remove meter/logger provider
keith-decker Sep 8, 2025
5cea4a6
remove overly complicated env check, add messages to test
keith-decker Sep 8, 2025
8e5c6ab
update file doc strings
keith-decker Sep 8, 2025
16f20b8
lint updates
keith-decker Sep 8, 2025
5ba84ef
remove unused properties
keith-decker Sep 9, 2025
381d38d
move utility function to utils
keith-decker Sep 9, 2025
b983dcd
Merge branch 'main' into util-genai-inference
keith-decker Sep 9, 2025
3a37f43
Additional span attributes
keith-decker Sep 9, 2025
b769ed6
Merge branch 'util-genai-inference' of github.com:zhirafovod/opentele…
keith-decker Sep 9, 2025
0776a7f
update span name
keith-decker Sep 9, 2025
7071d0f
Revert "Additional span attributes"
keith-decker Sep 9, 2025
bb184da
remove unused thread lock
keith-decker Sep 9, 2025
0b18708
update span name
keith-decker Sep 9, 2025
4f8e860
Merge branch 'main' into util-genai-inference
keith-decker Sep 9, 2025
e72a320
Update documentation, rename system to provider
keith-decker Sep 10, 2025
a704fc6
cleanup docs and unclear code
keith-decker Sep 10, 2025
54d753b
Merge branch 'main' into util-genai-inference
keith-decker Sep 11, 2025
0d4f204
add unit test for parent/child relationship
keith-decker Sep 11, 2025
82e4bff
cleanup some duplicated code
keith-decker Sep 11, 2025
d7c6187
cleanup duplicated start span code, move parent check into context ma…
keith-decker Sep 11, 2025
b12f865
remove unneccesary helper function
keith-decker Sep 11, 2025
b61437f
First commit for langchain instrumentation
wrisa Jun 4, 2025
1a11630
removed env
wrisa Jul 14, 2025
4f0e86c
added tool support and modified llm accordingly
wrisa Aug 10, 2025
4e07998
Added evaluation span and event. Added log api for event
wrisa Aug 12, 2025
0847f3b
added deepeval metric measure
wrisa Aug 12, 2025
b86d793
Fixing tests and adding tool call tests for langchain instrumentation
Aug 15, 2025
89388e1
Adding readme for test_langchain_llm.py
Aug 15, 2025
b26952d
WIP initial code import
keith-decker Aug 15, 2025
8578d76
remove references to tool types
keith-decker Aug 15, 2025
765758e
add a simple unit test
keith-decker Aug 17, 2025
3dbe121
rename exporter to emitter.
keith-decker Aug 18, 2025
2743109
rename api file to client
keith-decker Aug 18, 2025
5c009b3
WIP gen_ai chat refactor
keith-decker Aug 25, 2025
2b4d96c
Add provider.name, rename client to handler
keith-decker Aug 25, 2025
5e723c4
add message to log functions
keith-decker Aug 25, 2025
4404c20
remove events, emit structured logs
keith-decker Aug 26, 2025
a9f0f64
update documentation
keith-decker Aug 27, 2025
9eb69bf
first round of cleanup for lintcheck
keith-decker Aug 27, 2025
c92fdec
typecheck fixes
keith-decker Aug 27, 2025
91b18ad
typecheck fixes
keith-decker Aug 27, 2025
09d6f4c
linting fixes
keith-decker Aug 27, 2025
840e9fd
linting fixes, refactor for complexity
keith-decker Aug 27, 2025
70e8d1d
typecheck fixes
keith-decker Aug 27, 2025
a1485d2
update documentation
keith-decker Aug 28, 2025
a734de7
rename context, update _to_semconv_dict name
keith-decker Sep 2, 2025
c5bc930
refactor: rename emitters to generators and update method names for c…
keith-decker Sep 2, 2025
ab9f170
refactor: convert API LogRecord to SDK LogRecord, add unit test
keith-decker Sep 2, 2025
521824f
added changelog
keith-decker Sep 3, 2025
7c017c4
Merge branch 'main' into util-genai-inference
keith-decker Sep 16, 2025
cab5ca0
clean up extra functions
keith-decker Sep 16, 2025
d97fdca
Merge branch 'genai-utils-e2e-dev' into util-genai-inference
zhirafovod Sep 16, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions util/opentelemetry-util-genai/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,12 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## Unreleased
## [Unreleased]

Repurpose the `OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT` environment variable when GEN AI stability mode is set to `gen_ai_latest_experimental`,
to take on an enum (`NO_CONTENT/SPAN_ONLY/EVENT_ONLY/SPAN_AND_EVENT`) instead of a boolean. Add a utility function to help parse this environment variable.
to take on an enum (`NO_CONTENT/SPAN_ONLY/EVENT_ONLY/SPAN_AND_EVENT`) instead of a boolean. Add a utility function to help parse this environment variable.

### Added

- Generate Spans for LLM invocations
- Helper functions for starting and finishing LLM invocations
18 changes: 18 additions & 0 deletions util/opentelemetry-util-genai/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,24 @@ The GenAI Utils package will include boilerplate and helpers to standardize inst
This package will provide APIs and decorators to minimize the work needed to instrument genai libraries,
while providing standardization for generating both types of otel, "spans and metrics" and "spans, metrics and events"

This package relies on environment variables to configure capturing of message content.
By default, message content will not be captured.
Set the environment variable `OTEL_SEMCONV_STABILITY_OPT_IN` to `gen_ai_latest_experimental` to enable experimental features.
And set the environment variable `OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT` to `SPAN_ONLY` or `SPAN_AND_EVENT` to capture message content in spans.

This package provides these span attributes.
-> gen_ai.provider.name: Str(openai)
-> gen_ai.operation.name: Str(chat)
-> gen_ai.request.model: Str(gpt-3.5-turbo)
-> gen_ai.response.finish_reasons: Slice(["stop"])
-> gen_ai.response.model: Str(gpt-3.5-turbo-0125)
-> gen_ai.response.id: Str(chatcmpl-Bz8yrvPnydD9pObv625n2CGBPHS13)
-> gen_ai.usage.input_tokens: Int(24)
-> gen_ai.usage.output_tokens: Int(7)
-> gen_ai.input.messages: Str('[{"role": "Human", "parts": [{"content": "hello world", "type": "text"}]}]')
-> gen_ai.output.messages: Str('[{"role": "AI", "parts": [{"content": "hello back", "type": "text"}], "finish_reason": "stop"}]')


Installation
------------

Expand Down
4 changes: 2 additions & 2 deletions util/opentelemetry-util-genai/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ classifiers = [
"Programming Language :: Python :: 3.13",
]
dependencies = [
"opentelemetry-instrumentation ~= 0.51b0",
"opentelemetry-semantic-conventions ~= 0.51b0",
"opentelemetry-instrumentation ~= 0.57b0",
"opentelemetry-semantic-conventions ~= 0.57b0",
"opentelemetry-api>=1.31.0",
]

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright The OpenTelemetry Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Original file line number Diff line number Diff line change
@@ -0,0 +1,294 @@
# Copyright The OpenTelemetry Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Span generation utilities for GenAI telemetry.

This module maps GenAI (Generative AI) invocations to OpenTelemetry spans and
applies GenAI semantic convention attributes.

Classes:
- BaseTelemetryGenerator: Abstract base for GenAI telemetry emitters.
- SpanGenerator: Concrete implementation that creates and finalizes spans
for LLM operations (e.g., chat) and records input/output messages when
experimental mode and content capture settings allow.

Usage:
See `opentelemetry/util/genai/handler.py` for `TelemetryHandler`, which
constructs `LLMInvocation` objects and delegates to `SpanGenerator.start`,
`SpanGenerator.finish`, and `SpanGenerator.error` to produce spans that
follow the GenAI semantic conventions.
"""

import json
from contextlib import contextmanager
from dataclasses import asdict, dataclass, field
from typing import Any, Dict, List, Optional
from uuid import UUID

from opentelemetry import trace
from opentelemetry.semconv._incubating.attributes import (
gen_ai_attributes as GenAI,
)
from opentelemetry.semconv.attributes import (
error_attributes as ErrorAttributes,
)
from opentelemetry.trace import (
Span,
SpanKind,
Tracer,
set_span_in_context,
use_span,
)
from opentelemetry.trace.status import Status, StatusCode
from opentelemetry.util.genai.utils import (
ContentCapturingMode,
get_content_capturing_mode,
is_experimental_mode,
)
from opentelemetry.util.types import AttributeValue

from .types import Error, InputMessage, LLMInvocation, OutputMessage


@dataclass
class _SpanState:
span: Span
children: List[UUID] = field(default_factory=list)


def _apply_common_span_attributes(
span: Span, invocation: LLMInvocation
) -> None:
"""Apply attributes shared by finish() and error() and compute metrics.

Returns (genai_attributes) for use with metrics.
"""
request_model = invocation.attributes.get("request_model")
provider = invocation.attributes.get("provider")

_set_initial_span_attributes(span, request_model, provider)

finish_reasons = _collect_finish_reasons(invocation.chat_generations)
if finish_reasons:
span.set_attribute(
GenAI.GEN_AI_RESPONSE_FINISH_REASONS, finish_reasons
)

response_model = invocation.attributes.get("response_model_name")
response_id = invocation.attributes.get("response_id")
prompt_tokens = invocation.attributes.get("input_tokens")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Aren't these strings hardcoded in the GenAi thing you imported ?

completion_tokens = invocation.attributes.get("output_tokens")
_set_response_and_usage_attributes(
span,
response_model,
response_id,
prompt_tokens,
completion_tokens,
)


def _set_initial_span_attributes(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: all these helper functions that are < 10 lines seems like they are just called in 1 place and be inlined. IMO in-lining the code makes it easier to read

span: Span,
request_model: Optional[str],
provider: Optional[str],
) -> None:
span.set_attribute(
GenAI.GEN_AI_OPERATION_NAME, GenAI.GenAiOperationNameValues.CHAT.value
)
if request_model:
span.set_attribute(GenAI.GEN_AI_REQUEST_MODEL, request_model)
if provider is not None:
# TODO: clean provider name to match GenAiProviderNameValues?
span.set_attribute(GenAI.GEN_AI_PROVIDER_NAME, provider)


def _set_response_and_usage_attributes(
span: Span,
response_model: Optional[str],
response_id: Optional[str],
prompt_tokens: Optional[AttributeValue],
completion_tokens: Optional[AttributeValue],
) -> None:
if response_model is not None:
span.set_attribute(GenAI.GEN_AI_RESPONSE_MODEL, response_model)
if response_id is not None:
span.set_attribute(GenAI.GEN_AI_RESPONSE_ID, response_id)
if isinstance(prompt_tokens, (int, float)):
span.set_attribute(GenAI.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens)
if isinstance(completion_tokens, (int, float)):
span.set_attribute(GenAI.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens)


def _collect_finish_reasons(generations: List[OutputMessage]) -> List[str]:
finish_reasons: List[str] = []
for gen in generations:
finish_reasons.append(gen.finish_reason)
return finish_reasons


def _maybe_set_span_messages(
span: Span,
input_messages: List[InputMessage],
output_messages: List[OutputMessage],
) -> None:
if not is_experimental_mode() or get_content_capturing_mode() not in (
ContentCapturingMode.SPAN_ONLY,
ContentCapturingMode.SPAN_AND_EVENT,
):
return
message_parts: List[Dict[str, Any]] = [
asdict(message) for message in input_messages
]
if message_parts:
span.set_attribute("gen_ai.input.messages", json.dumps(message_parts))

generation_parts: List[Dict[str, Any]] = [
asdict(generation) for generation in output_messages
]
if generation_parts:
span.set_attribute(
"gen_ai.output.messages", json.dumps(generation_parts)
)


def _apply_finish_attributes(span: Span, invocation: LLMInvocation) -> None:
"""Apply attributes/messages common to finish() paths."""
_apply_common_span_attributes(span, invocation)
_maybe_set_span_messages(
span, invocation.messages, invocation.chat_generations
)


def _apply_error_attributes(span: Span, error: Error) -> None:
"""Apply status and error attributes common to error() paths."""
span.set_status(Status(StatusCode.ERROR, error.message))
if span.is_recording():
span.set_attribute(ErrorAttributes.ERROR_TYPE, error.type.__qualname__)


class BaseTelemetryGenerator:
"""
Abstract base for emitters mapping GenAI types -> OpenTelemetry.
"""

def start(self, invocation: LLMInvocation) -> None:
raise NotImplementedError

def finish(self, invocation: LLMInvocation) -> None:
raise NotImplementedError

def error(self, error: Error, invocation: LLMInvocation) -> None:
raise NotImplementedError


class SpanGenerator(BaseTelemetryGenerator):
"""
Generates only spans.
"""

def __init__(
self,
tracer: Optional[Tracer] = None,
):
self._tracer: Tracer = tracer or trace.get_tracer(__name__)

# TODO: Map from run_id -> _SpanState, to keep track of spans and parent/child relationships
self.spans: Dict[UUID, _SpanState] = {}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need to keep track of spans at all ? For vertexai / GenAi instrumentations we just emit 1 span for each LLM call and there's no reason to link them together. I think I'm missing some context

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Frameworks like langchain have parent child relationship where we want to keep track of spans. For case like vertexAI where there is only one span(no run_id/parent_id) should be handled in the current PR.


def _start_span(
self,
name: str,
kind: SpanKind,
parent_run_id: Optional[UUID] = None,
) -> Span:
parent_span = (
self.spans.get(parent_run_id)
if parent_run_id is not None
else None
)
if parent_span is not None:
ctx = set_span_in_context(parent_span.span)
span = self._tracer.start_span(name=name, kind=kind, context=ctx)
else:
# top-level or missing parent
span = self._tracer.start_span(name=name, kind=kind)
set_span_in_context(span)

return span

def _end_span(self, run_id: UUID):
state = self.spans[run_id]
for child_id in state.children:
child_state = self.spans.get(child_id)
if child_state:
child_state.span.end()
state.span.end()
del self.spans[run_id]

def start(self, invocation: LLMInvocation):
# Create/register the span; keep it active but do not end it here.
with self._start_span_for_invocation(invocation):
pass

@contextmanager
def _start_span_for_invocation(self, invocation: LLMInvocation):
"""Create/register a span for the invocation and yield it.

The span is not ended automatically on exiting the context; callers
must finalize via _finalize_invocation.
"""
# Establish parent/child relationship if a parent span exists.
parent_state = (
self.spans.get(invocation.parent_run_id)
if invocation.parent_run_id is not None
else None
)
if parent_state is not None:
parent_state.children.append(invocation.run_id)
span = self._start_span(
name=f"{GenAI.GenAiOperationNameValues.CHAT.value} {invocation.request_model}",
kind=SpanKind.CLIENT,
parent_run_id=invocation.parent_run_id,
)
with use_span(span, end_on_exit=False) as span:
span_state = _SpanState(
span=span,
)
self.spans[invocation.run_id] = span_state
yield span

def finish(self, invocation: LLMInvocation):
state = self.spans.get(invocation.run_id)
if state is None:
with self._start_span_for_invocation(invocation) as span:
_apply_finish_attributes(span, invocation)
self._end_span(invocation.run_id)
return

span = state.span
_apply_finish_attributes(span, invocation)
self._end_span(invocation.run_id)

def error(self, error: Error, invocation: LLMInvocation):
state = self.spans.get(invocation.run_id)
if state is None:
with self._start_span_for_invocation(invocation) as span:
_apply_error_attributes(span, error)
self._end_span(invocation.run_id)
return

span = state.span
_apply_error_attributes(span, error)
self._end_span(invocation.run_id)
Loading