Skip to content

Commit ebb55fc

Browse files
committed
Add assertions in tests
1 parent 58ba8aa commit ebb55fc

File tree

4 files changed

+206
-5
lines changed

4 files changed

+206
-5
lines changed

instrumentation/opentelemetry-instrumentation-botocore/src/opentelemetry/instrumentation/botocore/extensions/bedrock.py

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -446,12 +446,31 @@ def _invoke_model_on_success(
446446
if original_body is not None:
447447
original_body.close()
448448

449-
def _on_stream_error_callback(self, span: Span, exception):
449+
def _on_stream_error_callback(
450+
self,
451+
span: Span,
452+
exception,
453+
instrumentor_context: _BotocoreInstrumentorContext,
454+
):
450455
span.set_status(Status(StatusCode.ERROR, str(exception)))
451456
if span.is_recording():
452457
span.set_attribute(ERROR_TYPE, type(exception).__qualname__)
453458
span.end()
454459

460+
metrics = instrumentor_context.metrics
461+
metrics_attributes = {
462+
**self._extract_metrics_attributes(),
463+
ERROR_TYPE: type(exception).__qualname__,
464+
}
465+
if operation_duration_histogram := metrics.get(
466+
GEN_AI_CLIENT_OPERATION_DURATION
467+
):
468+
duration = max((default_timer() - self._operation_start), 0)
469+
operation_duration_histogram.record(
470+
duration,
471+
attributes=metrics_attributes,
472+
)
473+
455474
def on_success(
456475
self,
457476
span: Span,
@@ -475,7 +494,9 @@ def stream_done_callback(response):
475494
span.end()
476495

477496
def stream_error_callback(exception):
478-
self._on_stream_error_callback(span, exception)
497+
self._on_stream_error_callback(
498+
span, exception, instrumentor_context
499+
)
479500

480501
result["stream"] = ConverseStreamWrapper(
481502
result["stream"],
@@ -513,7 +534,9 @@ def invoke_model_stream_done_callback(response):
513534
span.end()
514535

515536
def invoke_model_stream_error_callback(exception):
516-
self._on_stream_error_callback(span, exception)
537+
self._on_stream_error_callback(
538+
span, exception, instrumentor_context
539+
)
517540

518541
result["body"] = InvokeModelWithResponseStreamWrapper(
519542
result["body"],
@@ -716,3 +739,17 @@ def on_error(
716739

717740
if not self.should_end_span_on_exit():
718741
span.end()
742+
743+
metrics = instrumentor_context.metrics
744+
metrics_attributes = {
745+
**self._extract_metrics_attributes(),
746+
ERROR_TYPE: type(exception).__qualname__,
747+
}
748+
if operation_duration_histogram := metrics.get(
749+
GEN_AI_CLIENT_OPERATION_DURATION
750+
):
751+
duration = max((default_timer() - self._operation_start), 0)
752+
operation_duration_histogram.record(
753+
duration,
754+
attributes=metrics_attributes,
755+
)

instrumentation/opentelemetry-instrumentation-botocore/tests/bedrock_utils.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,22 @@
1919

2020
from botocore.response import StreamingBody
2121

22+
from opentelemetry.instrumentation.botocore.extensions.bedrock import (
23+
_GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS,
24+
_GEN_AI_CLIENT_TOKEN_USAGE_BUCKETS,
25+
)
26+
from opentelemetry.sdk.metrics._internal.point import ResourceMetrics
2227
from opentelemetry.sdk.trace import ReadableSpan
2328
from opentelemetry.semconv._incubating.attributes import (
2429
event_attributes as EventAttributes,
2530
)
2631
from opentelemetry.semconv._incubating.attributes import (
2732
gen_ai_attributes as GenAIAttributes,
2833
)
34+
from opentelemetry.semconv._incubating.metrics.gen_ai_metrics import (
35+
GEN_AI_CLIENT_OPERATION_DURATION,
36+
GEN_AI_CLIENT_TOKEN_USAGE,
37+
)
2938

3039

3140
# pylint: disable=too-many-branches, too-many-locals
@@ -259,3 +268,96 @@ def assert_message_in_logs(log, event_name, expected_content, parent_span):
259268
expected_content
260269
), dict(log.log_record.body)
261270
assert_log_parent(log, parent_span)
271+
272+
273+
def assert_all_metric_attributes(data_point, operation_name, model):
274+
assert GenAIAttributes.GEN_AI_OPERATION_NAME in data_point.attributes
275+
assert (
276+
data_point.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME]
277+
== operation_name
278+
)
279+
assert GenAIAttributes.GEN_AI_SYSTEM in data_point.attributes
280+
assert (
281+
data_point.attributes[GenAIAttributes.GEN_AI_SYSTEM]
282+
== GenAIAttributes.GenAiSystemValues.AWS_BEDROCK.value
283+
)
284+
assert GenAIAttributes.GEN_AI_REQUEST_MODEL in data_point.attributes
285+
assert data_point.attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL] == model
286+
287+
288+
def assert_metrics(
289+
resource_metrics: ResourceMetrics,
290+
operation_name: str,
291+
model: str,
292+
input_tokens: float | None = None,
293+
output_tokens: float | None = None,
294+
):
295+
assert len(resource_metrics) == 1
296+
297+
metric_data = resource_metrics[0].scope_metrics[0].metrics
298+
if input_tokens is not None or output_tokens is not None:
299+
expected_metrics_data_len = 2
300+
else:
301+
expected_metrics_data_len = 1
302+
assert len(metric_data) == expected_metrics_data_len
303+
304+
duration_metric = next(
305+
(m for m in metric_data if m.name == GEN_AI_CLIENT_OPERATION_DURATION),
306+
None,
307+
)
308+
assert duration_metric is not None
309+
310+
duration_point = duration_metric.data.data_points[0]
311+
assert duration_point.sum > 0
312+
assert_all_metric_attributes(duration_point, operation_name, model)
313+
assert duration_point.explicit_bounds == tuple(
314+
_GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS
315+
)
316+
317+
if input_tokens is not None:
318+
token_usage_metric = next(
319+
(m for m in metric_data if m.name == GEN_AI_CLIENT_TOKEN_USAGE),
320+
None,
321+
)
322+
assert token_usage_metric is not None
323+
324+
input_token_usage = next(
325+
(
326+
d
327+
for d in token_usage_metric.data.data_points
328+
if d.attributes[GenAIAttributes.GEN_AI_TOKEN_TYPE]
329+
== GenAIAttributes.GenAiTokenTypeValues.INPUT.value
330+
),
331+
None,
332+
)
333+
assert input_token_usage is not None
334+
assert input_token_usage.sum == input_tokens
335+
336+
assert input_token_usage.explicit_bounds == tuple(
337+
_GEN_AI_CLIENT_TOKEN_USAGE_BUCKETS
338+
)
339+
assert_all_metric_attributes(input_token_usage, operation_name, model)
340+
341+
if output_tokens is not None:
342+
token_usage_metric = next(
343+
(m for m in metric_data if m.name == GEN_AI_CLIENT_TOKEN_USAGE),
344+
None,
345+
)
346+
assert token_usage_metric is not None
347+
348+
output_token_usage = next(
349+
(
350+
d
351+
for d in token_usage_metric.data.data_points
352+
if d.attributes[GenAIAttributes.GEN_AI_TOKEN_TYPE]
353+
== GenAIAttributes.GenAiTokenTypeValues.COMPLETION.value
354+
),
355+
None,
356+
)
357+
assert output_token_usage is not None
358+
assert output_token_usage.sum == output_tokens
359+
360+
assert output_token_usage.explicit_bounds == tuple(
361+
_GEN_AI_CLIENT_TOKEN_USAGE_BUCKETS
362+
)
363+
assert_all_metric_attributes(output_token_usage, operation_name, model)

instrumentation/opentelemetry-instrumentation-botocore/tests/conftest.py

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,12 @@
1717
InMemoryLogExporter,
1818
SimpleLogRecordProcessor,
1919
)
20+
from opentelemetry.sdk.metrics import (
21+
MeterProvider,
22+
)
23+
from opentelemetry.sdk.metrics.export import (
24+
InMemoryMetricReader,
25+
)
2026
from opentelemetry.sdk.trace import TracerProvider
2127
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
2228
from opentelemetry.sdk.trace.export.in_memory_span_exporter import (
@@ -36,6 +42,12 @@ def fixture_log_exporter():
3642
yield exporter
3743

3844

45+
@pytest.fixture(scope="function", name="metric_reader")
46+
def fixture_metric_reader():
47+
reader = InMemoryMetricReader()
48+
yield reader
49+
50+
3951
@pytest.fixture(scope="function", name="tracer_provider")
4052
def fixture_tracer_provider(span_exporter):
4153
provider = TracerProvider()
@@ -52,6 +64,15 @@ def fixture_event_logger_provider(log_exporter):
5264
return event_logger_provider
5365

5466

67+
@pytest.fixture(scope="function", name="meter_provider")
68+
def fixture_meter_provider(metric_reader):
69+
meter_provider = MeterProvider(
70+
metric_readers=[metric_reader],
71+
)
72+
73+
return meter_provider
74+
75+
5576
@pytest.fixture
5677
def bedrock_runtime_client():
5778
return boto3.client("bedrock-runtime")
@@ -81,7 +102,9 @@ def vcr_config():
81102

82103

83104
@pytest.fixture(scope="function")
84-
def instrument_no_content(tracer_provider, event_logger_provider):
105+
def instrument_no_content(
106+
tracer_provider, event_logger_provider, meter_provider
107+
):
85108
os.environ.update(
86109
{OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT: "False"}
87110
)
@@ -90,6 +113,7 @@ def instrument_no_content(tracer_provider, event_logger_provider):
90113
instrumentor.instrument(
91114
tracer_provider=tracer_provider,
92115
event_logger_provider=event_logger_provider,
116+
meter_provider=meter_provider,
93117
)
94118

95119
yield instrumentor
@@ -98,14 +122,17 @@ def instrument_no_content(tracer_provider, event_logger_provider):
98122

99123

100124
@pytest.fixture(scope="function")
101-
def instrument_with_content(tracer_provider, event_logger_provider):
125+
def instrument_with_content(
126+
tracer_provider, event_logger_provider, meter_provider
127+
):
102128
os.environ.update(
103129
{OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT: "True"}
104130
)
105131
instrumentor = BotocoreInstrumentor()
106132
instrumentor.instrument(
107133
tracer_provider=tracer_provider,
108134
event_logger_provider=event_logger_provider,
135+
meter_provider=meter_provider,
109136
)
110137

111138
yield instrumentor

instrumentation/opentelemetry-instrumentation-botocore/tests/test_botocore_bedrock.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
assert_completion_attributes_from_streaming_body,
3535
assert_converse_completion_attributes,
3636
assert_message_in_logs,
37+
assert_metrics,
3738
assert_stream_completion_attributes,
3839
)
3940

@@ -51,6 +52,7 @@ def filter_message_keys(message, keys):
5152
def test_converse_with_content(
5253
span_exporter,
5354
log_exporter,
55+
metric_reader,
5456
bedrock_runtime_client,
5557
instrument_with_content,
5658
):
@@ -95,6 +97,13 @@ def test_converse_with_content(
9597
}
9698
assert_message_in_logs(logs[1], "gen_ai.choice", choice_body, span)
9799

100+
input_tokens = response["usage"]["inputTokens"]
101+
output_tokens = response["usage"]["outputTokens"]
102+
metrics = metric_reader.get_metrics_data().resource_metrics
103+
assert_metrics(
104+
metrics, "chat", llm_model_value, input_tokens, output_tokens
105+
)
106+
98107

99108
@pytest.mark.skipif(
100109
BOTO3_VERSION < (1, 35, 56), reason="Converse API not available"
@@ -103,6 +112,7 @@ def test_converse_with_content(
103112
def test_converse_with_content_different_events(
104113
span_exporter,
105114
log_exporter,
115+
metric_reader,
106116
bedrock_runtime_client,
107117
instrument_with_content,
108118
):
@@ -150,6 +160,13 @@ def test_converse_with_content_different_events(
150160
}
151161
assert_message_in_logs(logs[4], "gen_ai.choice", choice_body, span)
152162

163+
input_tokens = response["usage"]["inputTokens"]
164+
output_tokens = response["usage"]["outputTokens"]
165+
metrics = metric_reader.get_metrics_data().resource_metrics
166+
assert_metrics(
167+
metrics, "chat", llm_model_value, input_tokens, output_tokens
168+
)
169+
153170

154171
def converse_tool_call(
155172
span_exporter, log_exporter, bedrock_runtime_client, expect_content
@@ -452,6 +469,7 @@ def test_converse_tool_call_no_content(
452469
def test_converse_with_invalid_model(
453470
span_exporter,
454471
log_exporter,
472+
metric_reader,
455473
bedrock_runtime_client,
456474
instrument_with_content,
457475
):
@@ -479,6 +497,9 @@ def test_converse_with_invalid_model(
479497
user_content = filter_message_keys(messages[0], ["content"])
480498
assert_message_in_logs(logs[0], "gen_ai.user.message", user_content, span)
481499

500+
metrics = metric_reader.get_metrics_data().resource_metrics
501+
assert_metrics(metrics, "chat", llm_model_value)
502+
482503

483504
@pytest.mark.skipif(
484505
BOTO3_VERSION < (1, 35, 56), reason="ConverseStream API not available"
@@ -487,6 +508,7 @@ def test_converse_with_invalid_model(
487508
def test_converse_stream_with_content(
488509
span_exporter,
489510
log_exporter,
511+
metric_reader,
490512
bedrock_runtime_client,
491513
instrument_with_content,
492514
):
@@ -553,6 +575,11 @@ def test_converse_stream_with_content(
553575
}
554576
assert_message_in_logs(logs[1], "gen_ai.choice", choice_body, span)
555577

578+
metrics = metric_reader.get_metrics_data().resource_metrics
579+
assert_metrics(
580+
metrics, "chat", llm_model_value, input_tokens, output_tokens
581+
)
582+
556583

557584
@pytest.mark.skipif(
558585
BOTO3_VERSION < (1, 35, 56), reason="ConverseStream API not available"
@@ -561,6 +588,7 @@ def test_converse_stream_with_content(
561588
def test_converse_stream_with_content_different_events(
562589
span_exporter,
563590
log_exporter,
591+
metric_reader,
564592
bedrock_runtime_client,
565593
instrument_with_content,
566594
):
@@ -614,6 +642,9 @@ def test_converse_stream_with_content_different_events(
614642
}
615643
assert_message_in_logs(logs[4], "gen_ai.choice", choice_body, span)
616644

645+
metrics = metric_reader.get_metrics_data().resource_metrics
646+
assert_metrics(metrics, "chat", llm_model_value, mock.ANY, mock.ANY)
647+
617648

618649
def _rebuild_stream_message(response):
619650
message = {"content": []}
@@ -986,6 +1017,7 @@ def test_converse_stream_no_content_tool_call(
9861017
def test_converse_stream_handles_event_stream_error(
9871018
span_exporter,
9881019
log_exporter,
1020+
metric_reader,
9891021
bedrock_runtime_client,
9901022
instrument_with_content,
9911023
):
@@ -1039,6 +1071,9 @@ def test_converse_stream_handles_event_stream_error(
10391071
user_content = filter_message_keys(messages[0], ["content"])
10401072
assert_message_in_logs(logs[0], "gen_ai.user.message", user_content, span)
10411073

1074+
metrics = metric_reader.get_metrics_data().resource_metrics
1075+
assert_metrics(metrics, "chat", llm_model_value)
1076+
10421077

10431078
@pytest.mark.skipif(
10441079
BOTO3_VERSION < (1, 35, 56), reason="ConverseStream API not available"

0 commit comments

Comments
 (0)