Skip to content

Commit 1966c7a

Browse files
committed
Add input and output events for embeddings
1 parent ced260a commit 1966c7a

File tree

3 files changed

+198
-24
lines changed

3 files changed

+198
-24
lines changed

instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py

Lines changed: 45 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -442,16 +442,53 @@ def _set_embeddings_response_attributes(
442442
result.usage.total_tokens,
443443
)
444444

445-
# Emit event for embeddings if content capture is enabled
445+
# Emit events for embeddings if content capture is enabled
446446
if capture_content:
447-
input_event = Event(
448-
name="gen_ai.embeddings",
449-
attributes={
450-
GenAIAttributes.GEN_AI_SYSTEM: GenAIAttributes.GenAiSystemValues.OPENAI.value
451-
},
452-
body={"content": input_text, "role": "user"},
447+
from opentelemetry.semconv._incubating.attributes import (
448+
event_attributes as EventAttributes,
453449
)
454-
event_logger.emit(input_event)
450+
451+
# Emit input event
452+
input_event_attributes = {
453+
GenAIAttributes.GEN_AI_SYSTEM: GenAIAttributes.GenAiSystemValues.OPENAI.value,
454+
EventAttributes.EVENT_NAME: "gen_ai.embeddings.input",
455+
}
456+
event_logger.emit(
457+
Event(
458+
name="gen_ai.embeddings.input",
459+
attributes=input_event_attributes,
460+
body={"content": input_text, "role": "user"},
461+
)
462+
)
463+
464+
# Emit output event with embeddings data
465+
if getattr(result, "data", None) and len(result.data) > 0:
466+
embedding_data = []
467+
for item in result.data:
468+
if getattr(item, "embedding", None):
469+
embedding_data.append(
470+
{
471+
"index": item.index
472+
if hasattr(item, "index")
473+
else None,
474+
"embedding": item.embedding,
475+
"object": item.object
476+
if hasattr(item, "object")
477+
else "embedding",
478+
}
479+
)
480+
481+
output_event_attributes = {
482+
GenAIAttributes.GEN_AI_SYSTEM: GenAIAttributes.GenAiSystemValues.OPENAI.value,
483+
EventAttributes.EVENT_NAME: "gen_ai.embeddings.output",
484+
}
485+
event_logger.emit(
486+
Event(
487+
name="gen_ai.embeddings.output",
488+
attributes=output_event_attributes,
489+
body={"embeddings": embedding_data},
490+
)
491+
)
455492

456493

457494
class ToolCallBuffer:

instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_async_embeddings.py

Lines changed: 76 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@
2323
from opentelemetry.semconv._incubating.attributes import (
2424
error_attributes as ErrorAttributes,
2525
)
26+
from opentelemetry.semconv._incubating.attributes import (
27+
event_attributes as EventAttributes,
28+
)
2629
from opentelemetry.semconv._incubating.attributes import (
2730
gen_ai_attributes as GenAIAttributes,
2831
)
@@ -53,17 +56,48 @@ async def test_async_embeddings_with_content(
5356

5457
# Verify logs
5558
logs = log_exporter.get_finished_logs()
56-
assert len(logs) == 1 # Should contain the embeddings event
57-
58-
# Verify the content of the embeddings event
5959
assert (
60-
logs[0].log_record.attributes[GenAIAttributes.GEN_AI_SYSTEM]
61-
== GenAIAttributes.GenAiSystemValues.OPENAI.value
60+
len(logs) == 2
61+
) # Should contain both input and output embeddings events
62+
63+
# Find input and output events
64+
input_event = None
65+
output_event = None
66+
for log in logs:
67+
if (
68+
log.log_record.attributes.get(EventAttributes.EVENT_NAME)
69+
== "gen_ai.embeddings.input"
70+
):
71+
input_event = log
72+
elif (
73+
log.log_record.attributes.get(EventAttributes.EVENT_NAME)
74+
== "gen_ai.embeddings.output"
75+
):
76+
output_event = log
77+
78+
# Verify both events exist
79+
assert input_event is not None, "Input embeddings event not found"
80+
assert output_event is not None, "Output embeddings event not found"
81+
82+
# Verify input event content
83+
input_content = {"content": input_text, "role": "user"}
84+
assert_message_in_logs(
85+
input_event, "gen_ai.embeddings.input", input_content, spans[0]
6286
)
6387

64-
# Verify the input text is included in the event body
65-
assert dict(logs[0].log_record.body)["content"] == input_text
66-
assert_log_parent(logs[0], spans[0])
88+
# Verify output event content
89+
output_content = {
90+
"embeddings": [
91+
{
92+
"index": 0,
93+
"embedding": response.data[0].embedding,
94+
"object": "embedding",
95+
}
96+
]
97+
}
98+
assert_message_in_logs(
99+
output_event, "gen_ai.embeddings.output", output_content, spans[0]
100+
)
67101

68102

69103
@pytest.mark.asyncio
@@ -335,3 +369,37 @@ def assert_log_parent(log, span):
335369
assert (
336370
log.log_record.trace_flags == span.get_span_context().trace_flags
337371
)
372+
373+
374+
def assert_message_in_logs(log, event_name, expected_content, parent_span):
375+
assert log.log_record.attributes[EventAttributes.EVENT_NAME] == event_name
376+
assert (
377+
log.log_record.attributes[GenAIAttributes.GEN_AI_SYSTEM]
378+
== GenAIAttributes.GenAiSystemValues.OPENAI.value
379+
)
380+
381+
if not expected_content:
382+
assert not log.log_record.body
383+
else:
384+
assert log.log_record.body
385+
assert dict(log.log_record.body) == remove_none_values(
386+
expected_content
387+
)
388+
assert_log_parent(log, parent_span)
389+
390+
391+
def remove_none_values(body):
392+
result = {}
393+
for key, value in body.items():
394+
if value is None:
395+
continue
396+
if isinstance(value, dict):
397+
result[key] = remove_none_values(value)
398+
elif isinstance(value, list):
399+
result[key] = [
400+
remove_none_values(i) if isinstance(i, dict) else i
401+
for i in value
402+
]
403+
else:
404+
result[key] = value
405+
return result

instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_embeddings.py

Lines changed: 77 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@
2323
from opentelemetry.semconv._incubating.attributes import (
2424
error_attributes as ErrorAttributes,
2525
)
26+
from opentelemetry.semconv._incubating.attributes import (
27+
event_attributes as EventAttributes,
28+
)
2629
from opentelemetry.semconv._incubating.attributes import (
2730
gen_ai_attributes as GenAIAttributes,
2831
)
@@ -52,17 +55,49 @@ def test_embeddings_with_content(
5255

5356
# Verify logs
5457
logs = log_exporter.get_finished_logs()
55-
assert len(logs) == 1 # Should contain the embeddings event
56-
57-
# Verify the content of the embeddings event
5858
assert (
59-
logs[0].log_record.attributes[GenAIAttributes.GEN_AI_SYSTEM]
60-
== GenAIAttributes.GenAiSystemValues.OPENAI.value
59+
len(logs) == 2
60+
) # Should contain both input and output embeddings events
61+
62+
# Find input and output events
63+
input_event = None
64+
output_event = None
65+
for log in logs:
66+
if (
67+
log.log_record.attributes.get(EventAttributes.EVENT_NAME)
68+
== "gen_ai.embeddings.input"
69+
):
70+
input_event = log
71+
elif (
72+
log.log_record.attributes.get(EventAttributes.EVENT_NAME)
73+
== "gen_ai.embeddings.output"
74+
):
75+
output_event = log
76+
77+
# Verify both events exist
78+
assert input_event is not None, "Input embeddings event not found"
79+
assert output_event is not None, "Output embeddings event not found"
80+
81+
# Verify input event content
82+
input_content = {"content": input_text, "role": "user"}
83+
assert_message_in_logs(
84+
input_event, "gen_ai.embeddings.input", input_content, spans[0]
6185
)
6286

63-
# Verify the input text is included in the event body
64-
assert dict(logs[0].log_record.body)["content"] == input_text
65-
assert_log_parent(logs[0], spans[0])
87+
# Verify output event content
88+
# We don't need to specify exact values since the test provides a helper function to verify
89+
output_content = {
90+
"embeddings": [
91+
{
92+
"index": 0,
93+
"embedding": response.data[0].embedding,
94+
"object": "embedding",
95+
}
96+
]
97+
}
98+
assert_message_in_logs(
99+
output_event, "gen_ai.embeddings.output", output_content, spans[0]
100+
)
66101

67102

68103
@pytest.mark.vcr()
@@ -466,3 +501,37 @@ def assert_log_parent(log, span):
466501
assert (
467502
log.log_record.trace_flags == span.get_span_context().trace_flags
468503
)
504+
505+
506+
def assert_message_in_logs(log, event_name, expected_content, parent_span):
507+
assert log.log_record.attributes[EventAttributes.EVENT_NAME] == event_name
508+
assert (
509+
log.log_record.attributes[GenAIAttributes.GEN_AI_SYSTEM]
510+
== GenAIAttributes.GenAiSystemValues.OPENAI.value
511+
)
512+
513+
if not expected_content:
514+
assert not log.log_record.body
515+
else:
516+
assert log.log_record.body
517+
assert dict(log.log_record.body) == remove_none_values(
518+
expected_content
519+
)
520+
assert_log_parent(log, parent_span)
521+
522+
523+
def remove_none_values(body):
524+
result = {}
525+
for key, value in body.items():
526+
if value is None:
527+
continue
528+
if isinstance(value, dict):
529+
result[key] = remove_none_values(value)
530+
elif isinstance(value, list):
531+
result[key] = [
532+
remove_none_values(i) if isinstance(i, dict) else i
533+
for i in value
534+
]
535+
else:
536+
result[key] = value
537+
return result

0 commit comments

Comments
 (0)