Skip to content

Commit 3cb78c7

Browse files
authored
feat: Add time to first token attribute for opentelemetry-util-genai (#113)
1 parent c4fc439 commit 3cb78c7

25 files changed

+609
-2022
lines changed

instrumentation-loongsuite/loongsuite-instrumentation-agentscope/src/opentelemetry/instrumentation/agentscope/_wrapper.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from __future__ import annotations
1818

1919
import logging
20+
import timeit
2021
from functools import wraps
2122
from typing import Any, AsyncGenerator
2223

@@ -57,7 +58,13 @@ async def _wrap_streaming_response(
5758
"""Wrap streaming response to update invocation when done."""
5859
try:
5960
last_chunk = None
61+
first_token_received = False
6062
async for chunk in generator:
63+
# Record time when first token is received
64+
if not first_token_received:
65+
first_token_received = True
66+
invocation.monotonic_first_token_s = timeit.default_timer()
67+
6168
last_chunk = chunk
6269
yield chunk
6370

instrumentation-loongsuite/loongsuite-instrumentation-agentscope/tests/test_model.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ def _assert_chat_span_attributes(
3333
temperature: float = None,
3434
max_tokens: int = None,
3535
top_p: float = None,
36+
expect_time_to_first_token: bool = False,
3637
):
3738
"""Assert common chat model span attributes"""
3839
# Span name format: "chat {model}"
@@ -146,6 +147,24 @@ def _assert_chat_span_attributes(
146147
)
147148
assert span.attributes["gen_ai.request.top_p"] == top_p
148149

150+
# Assert time to first token for streaming responses (in nanoseconds)
151+
if expect_time_to_first_token:
152+
assert "gen_ai.response.time_to_first_token" in span.attributes, (
153+
"Missing gen_ai.response.time_to_first_token"
154+
)
155+
ttft_ns = span.attributes["gen_ai.response.time_to_first_token"]
156+
assert isinstance(ttft_ns, int), (
157+
f"time_to_first_token should be an integer (nanoseconds), got {type(ttft_ns)}"
158+
)
159+
assert ttft_ns > 0, (
160+
f"time_to_first_token should be positive, got {ttft_ns}"
161+
)
162+
else:
163+
# For non-streaming responses, TTFT should not be present
164+
assert "gen_ai.response.time_to_first_token" not in span.attributes, (
165+
"gen_ai.response.time_to_first_token should not be present for non-streaming"
166+
)
167+
149168

150169
@pytest.mark.vcr()
151170
def test_model_call_basic(instrument_no_content, span_exporter, request):
@@ -192,6 +211,7 @@ async def call_model():
192211
request_model="qwen-max",
193212
expect_input_messages=False, # Do not capture content by default
194213
expect_output_messages=False, # Do not capture content by default
214+
expect_time_to_first_token=True,
195215
)
196216

197217
print("✓ Model call (basic) completed successfully")
@@ -237,6 +257,7 @@ async def call_model():
237257
request_model="qwen-max",
238258
expect_input_messages=False,
239259
expect_output_messages=False,
260+
expect_time_to_first_token=True,
240261
)
241262

242263
print("✓ Model call (with messages) completed successfully")
@@ -274,6 +295,7 @@ async def test_model_call_async(instrument_no_content, span_exporter, request):
274295
request_model="qwen-max",
275296
expect_input_messages=False,
276297
expect_output_messages=False,
298+
expect_time_to_first_token=True,
277299
)
278300

279301
print("✓ Model call (async) completed successfully")
@@ -321,6 +343,7 @@ async def call_model():
321343
request_model="qwen-max",
322344
expect_input_messages=False,
323345
expect_output_messages=False,
346+
expect_time_to_first_token=True,
324347
)
325348

326349
print("✓ Model call (streaming) completed successfully")
@@ -369,6 +392,7 @@ async def call_model():
369392
request_model="qwen-max",
370393
expect_input_messages=False,
371394
expect_output_messages=False,
395+
expect_time_to_first_token=True,
372396
)
373397

374398
print("✓ Model call (with parameters) completed successfully")
@@ -417,6 +441,7 @@ async def call_model():
417441
request_model="qwen-max",
418442
expect_input_messages=True, # Content capture enabled
419443
expect_output_messages=True, # Content capture enabled
444+
expect_time_to_first_token=False,
420445
)
421446

422447
print("✓ Model call (with content capture) completed successfully")
@@ -459,6 +484,7 @@ async def call_model():
459484
request_model="qwen-max",
460485
expect_input_messages=False, # Content capture disabled
461486
expect_output_messages=False, # Content capture disabled
487+
expect_time_to_first_token=True,
462488
)
463489

464490
print("✓ Model call (no content capture) completed successfully")
@@ -505,6 +531,7 @@ async def call_model(content: str):
505531
request_model="qwen-max",
506532
expect_input_messages=False,
507533
expect_output_messages=False,
534+
expect_time_to_first_token=True,
508535
)
509536

510537
print("✓ Model call (multiple sequential) completed successfully")

instrumentation-loongsuite/loongsuite-instrumentation-dashscope/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ readme = "README.rst"
1010
license = "Apache-2.0"
1111
requires-python = ">=3.9"
1212
authors = [
13-
{ name = "LoongSuite Python Agent Authors", email = "qp467389@alibaba-inc.com" },
13+
{ name = "Minghui Zhang", email = "zmh1625lumian@gmail.com" },
1414
]
1515
classifiers = [
1616
"Development Status :: 4 - Beta",

instrumentation-loongsuite/loongsuite-instrumentation-dashscope/src/opentelemetry/instrumentation/dashscope/patch/generation.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from __future__ import annotations
1818

1919
import logging
20+
import timeit
2021

2122
from opentelemetry.util.genai.types import Error
2223

@@ -169,9 +170,15 @@ def _wrap_sync_generator(
169170
"""
170171
last_response = None
171172
accumulated_text = ""
173+
first_token_received = False
172174

173175
try:
174176
for chunk in generator:
177+
# Record time when first token is received
178+
if not first_token_received:
179+
first_token_received = True
180+
invocation.monotonic_first_token_s = timeit.default_timer()
181+
175182
last_response = chunk
176183

177184
# If incremental_output is True, accumulate text from each chunk
@@ -224,9 +231,15 @@ async def _wrap_async_generator(
224231
"""
225232
last_response = None
226233
accumulated_text = ""
234+
first_token_received = False
227235

228236
try:
229237
async for chunk in generator:
238+
# Record time when first token is received
239+
if not first_token_received:
240+
first_token_received = True
241+
invocation.monotonic_first_token_s = timeit.default_timer()
242+
230243
last_response = chunk
231244

232245
# If incremental_output is True, accumulate text from each chunk

instrumentation-loongsuite/loongsuite-instrumentation-dashscope/src/opentelemetry/instrumentation/dashscope/patch/image_synthesis.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,6 @@ def wrap_image_synthesis_async_call(
126126
try:
127127
# Create invocation object
128128
invocation = _create_invocation_from_image_synthesis(kwargs, model)
129-
invocation.attributes["gen_ai.request.async"] = True
130129

131130
# Start LLM invocation (creates span)
132131
handler.start_llm(invocation)
@@ -196,7 +195,6 @@ def wrap_image_synthesis_wait(wrapped, instance, args, kwargs, handler=None):
196195
invocation = _create_invocation_from_image_synthesis({}, "unknown")
197196
# TODO: Add semantic conventions for wait operations
198197
invocation.operation_name = "wait generate_content"
199-
invocation.attributes["gen_ai.request.async"] = True
200198
# Note: response_id will be set from response.output.task_id in _update_invocation_from_image_synthesis_response
201199
# We set task_id here as a fallback
202200
invocation.response_id = task_id

instrumentation-loongsuite/loongsuite-instrumentation-dashscope/src/opentelemetry/instrumentation/dashscope/patch/multimodal_conversation.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from __future__ import annotations
1818

1919
import logging
20+
import timeit
2021

2122
from opentelemetry.util.genai.types import Error
2223

@@ -111,9 +112,15 @@ def _wrap_multimodal_sync_generator(
111112
"""
112113
last_response = None
113114
accumulated_text = ""
115+
first_token_received = False
114116

115117
try:
116118
for chunk in generator:
119+
# Record time when first token is received
120+
if not first_token_received:
121+
first_token_received = True
122+
invocation.monotonic_first_token_s = timeit.default_timer()
123+
117124
last_response = chunk
118125

119126
# If incremental_output is True, accumulate text

instrumentation-loongsuite/loongsuite-instrumentation-dashscope/src/opentelemetry/instrumentation/dashscope/patch/video_synthesis.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,6 @@ def wrap_video_synthesis_async_call(
128128
try:
129129
# Create invocation object
130130
invocation = _create_invocation_from_video_synthesis(kwargs, model)
131-
invocation.attributes["gen_ai.request.async"] = True
132131

133132
# Start LLM invocation (creates span)
134133
handler.start_llm(invocation)
@@ -191,7 +190,6 @@ def wrap_video_synthesis_wait(wrapped, instance, args, kwargs, handler=None):
191190
# Create invocation object
192191
invocation = _create_invocation_from_video_synthesis({}, "unknown")
193192
invocation.operation_name = "wait generate_content"
194-
invocation.attributes["gen_ai.request.async"] = True
195193
invocation.response_id = task_id
196194

197195
# Start LLM invocation (creates span)

instrumentation-loongsuite/loongsuite-instrumentation-dashscope/src/opentelemetry/instrumentation/dashscope/utils/common.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ def _extract_task_id(task: Any) -> Optional[str]:
110110
if task_id:
111111
return task_id
112112
except (KeyError, AttributeError) as e:
113-
logger.debug("Failed to extract task_id from task: %s", e)
113+
logger.debug("Failed to extract task_id from task parameter: %s", e)
114114

115115
return None
116116

instrumentation-loongsuite/loongsuite-instrumentation-dashscope/src/opentelemetry/instrumentation/dashscope/utils/generation.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -253,10 +253,10 @@ def _extract_tool_definitions(kwargs: dict) -> list[ToolDefinition]:
253253
if isinstance(plugins, list):
254254
tools = plugins
255255
except (json.JSONDecodeError, TypeError, AttributeError) as e:
256+
# If parsing fails, return empty list
256257
logger.debug(
257-
"Failed to extract tool definitions from plugins: %s", e
258+
"Failed to parse tool definitions from response: %s", e
258259
)
259-
# If parsing fails, return empty list
260260
return tool_definitions
261261

262262
# Convert tool definitions to FunctionToolDefinition objects
@@ -421,8 +421,8 @@ def _extract_output_messages(response: Any) -> List[OutputMessage]:
421421
)
422422
)
423423
except (KeyError, AttributeError) as e:
424-
logger.debug("Failed to extract output messages from response: %s", e)
425424
# If any attribute access fails, return empty list
425+
logger.debug("Failed to extract output messages from response: %s", e)
426426
return output_messages
427427

428428
return output_messages
@@ -554,7 +554,8 @@ def _update_invocation_from_response(
554554
invocation.response_model_name = response_model
555555
except (KeyError, AttributeError) as e:
556556
logger.debug(
557-
"Failed to extract response model name from response: %s", e
557+
"Failed to extract response model from Generation response: %s",
558+
e,
558559
)
559560

560561
# Extract request ID (if available)
@@ -563,7 +564,9 @@ def _update_invocation_from_response(
563564
if request_id:
564565
invocation.response_id = request_id
565566
except (KeyError, AttributeError) as e:
566-
logger.debug("Failed to extract request id from response: %s", e)
567+
logger.debug(
568+
"Failed to extract request_id from Generation response: %s", e
569+
)
567570
except (KeyError, AttributeError) as e:
568571
# If any attribute access fails, silently continue with available data
569572
logger.debug(
@@ -591,7 +594,10 @@ def _create_accumulated_response(original_response, accumulated_text):
591594
return original_response
592595
except (AttributeError, TypeError) as e:
593596
# If we can't modify, create a wrapper object
594-
logger.debug("Failed to modify output text: %s", e)
597+
logger.debug(
598+
"Failed to modify output.text directly, creating wrapper: %s",
599+
e,
600+
)
595601

596602
# Create wrapper objects with accumulated text
597603
class AccumulatedOutput:
@@ -622,5 +628,7 @@ def __init__(self, original_response, accumulated_output):
622628
return AccumulatedResponse(original_response, accumulated_output)
623629
except (KeyError, AttributeError) as e:
624630
# If modification fails, return original response
625-
logger.debug("Failed to create accumulated response: %s", e)
631+
logger.debug(
632+
"Failed to create accumulated response, returning original: %s", e
633+
)
626634
return original_response

0 commit comments

Comments
 (0)