Skip to content

Commit 2756c1e

Browse files
botocore: add basic handling for bedrock invoke.model (#3200)
* Add basic handling for invoke.model * Add changelog a please pylint * Record converse cassettes against us-east-1 * Avoid double copy of streaming body --------- Co-authored-by: Adrian Cole <[email protected]>
1 parent ec3c51d commit 2756c1e

File tree

12 files changed

+664
-157
lines changed

12 files changed

+664
-157
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
4343
([#3186](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3186))
4444
- `opentelemetry-opentelemetry-botocore` Add basic support for GenAI attributes for AWS Bedrock Converse API
4545
([#3161](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3161))
46+
- `opentelemetry-opentelemetry-botocore` Add basic support for GenAI attributes for AWS Bedrock InvokeModel API
47+
([#3200](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3200))
4648

4749
### Fixed
4850

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import json
2+
import os
3+
4+
import boto3
5+
6+
7+
def main():
8+
client = boto3.client("bedrock-runtime")
9+
response = client.invoke_model(
10+
modelId=os.getenv("CHAT_MODEL", "amazon.titan-text-lite-v1"),
11+
body=json.dumps(
12+
{
13+
"inputText": "Write a short poem on OpenTelemetry.",
14+
"textGenerationConfig": {},
15+
},
16+
),
17+
)
18+
19+
body = response["body"].read()
20+
response_data = json.loads(body.decode("utf-8"))
21+
print(response_data["results"][0]["outputText"])
22+
23+
24+
if __name__ == "__main__":
25+
main()

instrumentation/opentelemetry-instrumentation-botocore/src/opentelemetry/instrumentation/botocore/extensions/bedrock.py

Lines changed: 189 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,13 @@
1818

1919
from __future__ import annotations
2020

21+
import io
22+
import json
2123
import logging
2224
from typing import Any
2325

26+
from botocore.response import StreamingBody
27+
2428
from opentelemetry.instrumentation.botocore.extensions.types import (
2529
_AttributeMapT,
2630
_AwsSdkExtension,
@@ -58,7 +62,7 @@ class _BedrockRuntimeExtension(_AwsSdkExtension):
5862
Amazon Bedrock Runtime</a>.
5963
"""
6064

61-
_HANDLED_OPERATIONS = {"Converse"}
65+
_HANDLED_OPERATIONS = {"Converse", "InvokeModel"}
6266

6367
def extract_attributes(self, attributes: _AttributeMapT):
6468
if self._call_context.operation not in self._HANDLED_OPERATIONS:
@@ -73,6 +77,7 @@ def extract_attributes(self, attributes: _AttributeMapT):
7377
GenAiOperationNameValues.CHAT.value
7478
)
7579

80+
# Converse
7681
if inference_config := self._call_context.params.get(
7782
"inferenceConfig"
7883
):
@@ -97,6 +102,84 @@ def extract_attributes(self, attributes: _AttributeMapT):
97102
inference_config.get("stopSequences"),
98103
)
99104

105+
# InvokeModel
106+
# Get the request body if it exists
107+
body = self._call_context.params.get("body")
108+
if body:
109+
try:
110+
request_body = json.loads(body)
111+
112+
if "amazon.titan" in model_id:
113+
# titan interface is a text completion one
114+
attributes[GEN_AI_OPERATION_NAME] = (
115+
GenAiOperationNameValues.TEXT_COMPLETION.value
116+
)
117+
self._extract_titan_attributes(
118+
attributes, request_body
119+
)
120+
elif "amazon.nova" in model_id:
121+
self._extract_nova_attributes(attributes, request_body)
122+
elif "anthropic.claude" in model_id:
123+
self._extract_claude_attributes(
124+
attributes, request_body
125+
)
126+
except json.JSONDecodeError:
127+
_logger.debug("Error: Unable to parse the body as JSON")
128+
129+
def _extract_titan_attributes(self, attributes, request_body):
130+
config = request_body.get("textGenerationConfig", {})
131+
self._set_if_not_none(
132+
attributes, GEN_AI_REQUEST_TEMPERATURE, config.get("temperature")
133+
)
134+
self._set_if_not_none(
135+
attributes, GEN_AI_REQUEST_TOP_P, config.get("topP")
136+
)
137+
self._set_if_not_none(
138+
attributes, GEN_AI_REQUEST_MAX_TOKENS, config.get("maxTokenCount")
139+
)
140+
self._set_if_not_none(
141+
attributes,
142+
GEN_AI_REQUEST_STOP_SEQUENCES,
143+
config.get("stopSequences"),
144+
)
145+
146+
def _extract_nova_attributes(self, attributes, request_body):
147+
config = request_body.get("inferenceConfig", {})
148+
self._set_if_not_none(
149+
attributes, GEN_AI_REQUEST_TEMPERATURE, config.get("temperature")
150+
)
151+
self._set_if_not_none(
152+
attributes, GEN_AI_REQUEST_TOP_P, config.get("topP")
153+
)
154+
self._set_if_not_none(
155+
attributes, GEN_AI_REQUEST_MAX_TOKENS, config.get("max_new_tokens")
156+
)
157+
self._set_if_not_none(
158+
attributes,
159+
GEN_AI_REQUEST_STOP_SEQUENCES,
160+
config.get("stopSequences"),
161+
)
162+
163+
def _extract_claude_attributes(self, attributes, request_body):
164+
self._set_if_not_none(
165+
attributes,
166+
GEN_AI_REQUEST_MAX_TOKENS,
167+
request_body.get("max_tokens"),
168+
)
169+
self._set_if_not_none(
170+
attributes,
171+
GEN_AI_REQUEST_TEMPERATURE,
172+
request_body.get("temperature"),
173+
)
174+
self._set_if_not_none(
175+
attributes, GEN_AI_REQUEST_TOP_P, request_body.get("top_p")
176+
)
177+
self._set_if_not_none(
178+
attributes,
179+
GEN_AI_REQUEST_STOP_SEQUENCES,
180+
request_body.get("stop_sequences"),
181+
)
182+
100183
@staticmethod
101184
def _set_if_not_none(attributes, key, value):
102185
if value is not None:
@@ -115,13 +198,8 @@ def before_service_call(self, span: Span):
115198
if operation_name and request_model:
116199
span.update_name(f"{operation_name} {request_model}")
117200

118-
def on_success(self, span: Span, result: dict[str, Any]):
119-
if self._call_context.operation not in self._HANDLED_OPERATIONS:
120-
return
121-
122-
if not span.is_recording():
123-
return
124-
201+
# pylint: disable=no-self-use
202+
def _converse_on_success(self, span: Span, result: dict[str, Any]):
125203
if usage := result.get("usage"):
126204
if input_tokens := usage.get("inputTokens"):
127205
span.set_attribute(
@@ -140,6 +218,109 @@ def on_success(self, span: Span, result: dict[str, Any]):
140218
[stop_reason],
141219
)
142220

221+
def _invoke_model_on_success(
222+
self, span: Span, result: dict[str, Any], model_id: str
223+
):
224+
original_body = None
225+
try:
226+
original_body = result["body"]
227+
body_content = original_body.read()
228+
229+
# Replenish stream for downstream application use
230+
new_stream = io.BytesIO(body_content)
231+
result["body"] = StreamingBody(new_stream, len(body_content))
232+
233+
response_body = json.loads(body_content.decode("utf-8"))
234+
if "amazon.titan" in model_id:
235+
self._handle_amazon_titan_response(span, response_body)
236+
elif "amazon.nova" in model_id:
237+
self._handle_amazon_nova_response(span, response_body)
238+
elif "anthropic.claude" in model_id:
239+
self._handle_anthropic_claude_response(span, response_body)
240+
241+
except json.JSONDecodeError:
242+
_logger.debug("Error: Unable to parse the response body as JSON")
243+
except Exception as exc: # pylint: disable=broad-exception-caught
244+
_logger.debug("Error processing response: %s", exc)
245+
finally:
246+
if original_body is not None:
247+
original_body.close()
248+
249+
def on_success(self, span: Span, result: dict[str, Any]):
250+
if self._call_context.operation not in self._HANDLED_OPERATIONS:
251+
return
252+
253+
if not span.is_recording():
254+
return
255+
256+
# Converse
257+
self._converse_on_success(span, result)
258+
259+
model_id = self._call_context.params.get(_MODEL_ID_KEY)
260+
if not model_id:
261+
return
262+
263+
# InvokeModel
264+
if "body" in result and isinstance(result["body"], StreamingBody):
265+
self._invoke_model_on_success(span, result, model_id)
266+
267+
# pylint: disable=no-self-use
268+
def _handle_amazon_titan_response(
269+
self, span: Span, response_body: dict[str, Any]
270+
):
271+
if "inputTextTokenCount" in response_body:
272+
span.set_attribute(
273+
GEN_AI_USAGE_INPUT_TOKENS, response_body["inputTextTokenCount"]
274+
)
275+
if "results" in response_body and response_body["results"]:
276+
result = response_body["results"][0]
277+
if "tokenCount" in result:
278+
span.set_attribute(
279+
GEN_AI_USAGE_OUTPUT_TOKENS, result["tokenCount"]
280+
)
281+
if "completionReason" in result:
282+
span.set_attribute(
283+
GEN_AI_RESPONSE_FINISH_REASONS,
284+
[result["completionReason"]],
285+
)
286+
287+
# pylint: disable=no-self-use
288+
def _handle_amazon_nova_response(
289+
self, span: Span, response_body: dict[str, Any]
290+
):
291+
if "usage" in response_body:
292+
usage = response_body["usage"]
293+
if "inputTokens" in usage:
294+
span.set_attribute(
295+
GEN_AI_USAGE_INPUT_TOKENS, usage["inputTokens"]
296+
)
297+
if "outputTokens" in usage:
298+
span.set_attribute(
299+
GEN_AI_USAGE_OUTPUT_TOKENS, usage["outputTokens"]
300+
)
301+
if "stopReason" in response_body:
302+
span.set_attribute(
303+
GEN_AI_RESPONSE_FINISH_REASONS, [response_body["stopReason"]]
304+
)
305+
306+
# pylint: disable=no-self-use
307+
def _handle_anthropic_claude_response(
308+
self, span: Span, response_body: dict[str, Any]
309+
):
310+
if usage := response_body.get("usage"):
311+
if "input_tokens" in usage:
312+
span.set_attribute(
313+
GEN_AI_USAGE_INPUT_TOKENS, usage["input_tokens"]
314+
)
315+
if "output_tokens" in usage:
316+
span.set_attribute(
317+
GEN_AI_USAGE_OUTPUT_TOKENS, usage["output_tokens"]
318+
)
319+
if "stop_reason" in response_body:
320+
span.set_attribute(
321+
GEN_AI_RESPONSE_FINISH_REASONS, [response_body["stop_reason"]]
322+
)
323+
143324
def on_error(self, span: Span, exception: _BotoClientErrorT):
144325
if self._call_context.operation not in self._HANDLED_OPERATIONS:
145326
return

instrumentation/opentelemetry-instrumentation-botocore/tests/bedrock_utils.py

Lines changed: 72 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,83 @@
1414

1515
from __future__ import annotations
1616

17+
import json
1718
from typing import Any
1819

20+
from botocore.response import StreamingBody
21+
1922
from opentelemetry.sdk.trace import ReadableSpan
2023
from opentelemetry.semconv._incubating.attributes import (
2124
gen_ai_attributes as GenAIAttributes,
2225
)
2326

2427

28+
# pylint: disable=too-many-branches, too-many-locals
29+
def assert_completion_attributes_from_streaming_body(
30+
span: ReadableSpan,
31+
request_model: str,
32+
response: StreamingBody | None,
33+
operation_name: str = "chat",
34+
request_top_p: int | None = None,
35+
request_temperature: int | None = None,
36+
request_max_tokens: int | None = None,
37+
request_stop_sequences: list[str] | None = None,
38+
):
39+
input_tokens = None
40+
output_tokens = None
41+
finish_reason = None
42+
if response is not None:
43+
original_body = response["body"]
44+
body_content = original_body.read()
45+
response = json.loads(body_content.decode("utf-8"))
46+
assert response
47+
48+
if "amazon.titan" in request_model:
49+
input_tokens = response.get("inputTextTokenCount")
50+
results = response.get("results")
51+
if results:
52+
first_result = results[0]
53+
output_tokens = first_result.get("tokenCount")
54+
finish_reason = (first_result["completionReason"],)
55+
elif "amazon.nova" in request_model:
56+
if usage := response.get("usage"):
57+
input_tokens = usage["inputTokens"]
58+
output_tokens = usage["outputTokens"]
59+
else:
60+
input_tokens, output_tokens = None, None
61+
62+
if "stopReason" in response:
63+
finish_reason = (response["stopReason"],)
64+
else:
65+
finish_reason = None
66+
elif "anthropic.claude" in request_model:
67+
if usage := response.get("usage"):
68+
input_tokens = usage["input_tokens"]
69+
output_tokens = usage["output_tokens"]
70+
else:
71+
input_tokens, output_tokens = None, None
72+
73+
if "stop_reason" in response:
74+
finish_reason = (response["stop_reason"],)
75+
else:
76+
finish_reason = None
77+
78+
return assert_all_attributes(
79+
span,
80+
request_model,
81+
input_tokens,
82+
output_tokens,
83+
finish_reason,
84+
operation_name,
85+
request_top_p,
86+
request_temperature,
87+
request_max_tokens,
88+
tuple(request_stop_sequences)
89+
if request_stop_sequences is not None
90+
else request_stop_sequences,
91+
)
92+
93+
2594
def assert_completion_attributes(
2695
span: ReadableSpan,
2796
request_model: str,
@@ -38,7 +107,7 @@ def assert_completion_attributes(
38107
else:
39108
input_tokens, output_tokens = None, None
40109

41-
if response:
110+
if response and "stopReason" in response:
42111
finish_reason = (response["stopReason"],)
43112
else:
44113
finish_reason = None
@@ -60,10 +129,10 @@ def assert_completion_attributes(
60129

61130

62131
def assert_equal_or_not_present(value, attribute_name, span):
63-
if value:
132+
if value is not None:
64133
assert value == span.attributes[attribute_name]
65134
else:
66-
assert attribute_name not in span.attributes
135+
assert attribute_name not in span.attributes, attribute_name
67136

68137

69138
def assert_all_attributes(

0 commit comments

Comments
 (0)