Skip to content

Commit 42b2917

Browse files
committed
botocore: add extension for bedrock runtime api
1 parent 5488287 commit 42b2917

File tree

2 files changed

+385
-0
lines changed
  • instrumentation/opentelemetry-instrumentation-botocore/src/opentelemetry/instrumentation/botocore/extensions

2 files changed

+385
-0
lines changed

instrumentation/opentelemetry-instrumentation-botocore/src/opentelemetry/instrumentation/botocore/extensions/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ def loader():
3232

3333

3434
_KNOWN_EXTENSIONS = {
35+
"bedrock-runtime": _lazy_load(".bedrock", "_BedrockRuntimeExtension"),
3536
"dynamodb": _lazy_load(".dynamodb", "_DynamoDbExtension"),
3637
"lambda": _lazy_load(".lmbd", "_LambdaExtension"),
3738
"sns": _lazy_load(".sns", "_SnsExtension"),
Lines changed: 384 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,384 @@
1+
# Copyright The OpenTelemetry Authors
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# Includes work from:
16+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
17+
# SPDX-License-Identifier: Apache-2.0
18+
19+
from __future__ import annotations
20+
21+
import io
22+
import json
23+
import logging
24+
import math
25+
from typing import Any
26+
27+
from botocore.response import StreamingBody
28+
29+
from opentelemetry.instrumentation.botocore.extensions.types import (
30+
_AttributeMapT,
31+
_AwsSdkExtension,
32+
)
33+
from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import (
34+
GEN_AI_OPERATION_NAME,
35+
GEN_AI_REQUEST_MAX_TOKENS,
36+
GEN_AI_REQUEST_MODEL,
37+
GEN_AI_REQUEST_TEMPERATURE,
38+
GEN_AI_REQUEST_TOP_P,
39+
GEN_AI_RESPONSE_FINISH_REASONS,
40+
GEN_AI_SYSTEM,
41+
GEN_AI_USAGE_INPUT_TOKENS,
42+
GEN_AI_USAGE_OUTPUT_TOKENS,
43+
GenAiOperationNameValues,
44+
GenAiSystemValues,
45+
)
46+
from opentelemetry.trace.span import Span
47+
48+
_logger = logging.getLogger(__name__)
49+
50+
_MODEL_ID_KEY: str = "modelId"
51+
52+
53+
class _BedrockRuntimeExtension(_AwsSdkExtension):
54+
"""
55+
This class is an extension for <a
56+
href="https://docs.aws.amazon.com/bedrock/latest/APIReference/API_Operations_Amazon_Bedrock_Runtime.html">
57+
Amazon Bedrock Runtime</a>.
58+
"""
59+
60+
def extract_attributes(self, attributes: _AttributeMapT):
61+
attributes[GEN_AI_SYSTEM] = GenAiSystemValues.AWS_BEDROCK
62+
attributes[GEN_AI_OPERATION_NAME] = GenAiOperationNameValues.CHAT
63+
64+
model_id = self._call_context.params.get(_MODEL_ID_KEY)
65+
if model_id:
66+
attributes[GEN_AI_REQUEST_MODEL] = model_id
67+
68+
# Get the request body if it exists
69+
body = self._call_context.params.get("body")
70+
if body:
71+
try:
72+
request_body = json.loads(body)
73+
74+
if "amazon.titan" in model_id:
75+
self._extract_titan_attributes(
76+
attributes, request_body
77+
)
78+
if "amazon.nova" in model_id:
79+
self._extract_nova_attributes(attributes, request_body)
80+
elif "anthropic.claude" in model_id:
81+
self._extract_claude_attributes(
82+
attributes, request_body
83+
)
84+
elif "meta.llama" in model_id:
85+
self._extract_llama_attributes(
86+
attributes, request_body
87+
)
88+
elif "cohere.command" in model_id:
89+
self._extract_cohere_attributes(
90+
attributes, request_body
91+
)
92+
elif "ai21.jamba" in model_id:
93+
self._extract_ai21_attributes(attributes, request_body)
94+
elif "mistral" in model_id:
95+
self._extract_mistral_attributes(
96+
attributes, request_body
97+
)
98+
99+
except json.JSONDecodeError:
100+
_logger.debug("Error: Unable to parse the body as JSON")
101+
102+
def _extract_titan_attributes(self, attributes, request_body):
103+
config = request_body.get("textGenerationConfig", {})
104+
self._set_if_not_none(
105+
attributes, GEN_AI_REQUEST_TEMPERATURE, config.get("temperature")
106+
)
107+
self._set_if_not_none(
108+
attributes, GEN_AI_REQUEST_TOP_P, config.get("topP")
109+
)
110+
self._set_if_not_none(
111+
attributes, GEN_AI_REQUEST_MAX_TOKENS, config.get("maxTokenCount")
112+
)
113+
114+
def _extract_nova_attributes(self, attributes, request_body):
115+
config = request_body.get("inferenceConfig", {})
116+
self._set_if_not_none(
117+
attributes, GEN_AI_REQUEST_TEMPERATURE, config.get("temperature")
118+
)
119+
self._set_if_not_none(
120+
attributes, GEN_AI_REQUEST_TOP_P, config.get("top_p")
121+
)
122+
self._set_if_not_none(
123+
attributes, GEN_AI_REQUEST_MAX_TOKENS, config.get("max_new_tokens")
124+
)
125+
126+
def _extract_claude_attributes(self, attributes, request_body):
127+
self._set_if_not_none(
128+
attributes,
129+
GEN_AI_REQUEST_MAX_TOKENS,
130+
request_body.get("max_tokens"),
131+
)
132+
self._set_if_not_none(
133+
attributes,
134+
GEN_AI_REQUEST_TEMPERATURE,
135+
request_body.get("temperature"),
136+
)
137+
self._set_if_not_none(
138+
attributes, GEN_AI_REQUEST_TOP_P, request_body.get("top_p")
139+
)
140+
141+
def _extract_cohere_attributes(self, attributes, request_body):
142+
prompt = request_body.get("message")
143+
if prompt:
144+
attributes[GEN_AI_USAGE_INPUT_TOKENS] = math.ceil(len(prompt) / 6)
145+
self._set_if_not_none(
146+
attributes,
147+
GEN_AI_REQUEST_MAX_TOKENS,
148+
request_body.get("max_tokens"),
149+
)
150+
self._set_if_not_none(
151+
attributes,
152+
GEN_AI_REQUEST_TEMPERATURE,
153+
request_body.get("temperature"),
154+
)
155+
self._set_if_not_none(
156+
attributes, GEN_AI_REQUEST_TOP_P, request_body.get("p")
157+
)
158+
159+
def _extract_ai21_attributes(self, attributes, request_body):
160+
self._set_if_not_none(
161+
attributes,
162+
GEN_AI_REQUEST_MAX_TOKENS,
163+
request_body.get("max_tokens"),
164+
)
165+
self._set_if_not_none(
166+
attributes,
167+
GEN_AI_REQUEST_TEMPERATURE,
168+
request_body.get("temperature"),
169+
)
170+
self._set_if_not_none(
171+
attributes, GEN_AI_REQUEST_TOP_P, request_body.get("top_p")
172+
)
173+
174+
def _extract_llama_attributes(self, attributes, request_body):
175+
self._set_if_not_none(
176+
attributes,
177+
GEN_AI_REQUEST_MAX_TOKENS,
178+
request_body.get("max_gen_len"),
179+
)
180+
self._set_if_not_none(
181+
attributes,
182+
GEN_AI_REQUEST_TEMPERATURE,
183+
request_body.get("temperature"),
184+
)
185+
self._set_if_not_none(
186+
attributes, GEN_AI_REQUEST_TOP_P, request_body.get("top_p")
187+
)
188+
189+
def _extract_mistral_attributes(self, attributes, request_body):
190+
prompt = request_body.get("prompt")
191+
if prompt:
192+
attributes[GEN_AI_USAGE_INPUT_TOKENS] = math.ceil(len(prompt) / 6)
193+
self._set_if_not_none(
194+
attributes,
195+
GEN_AI_REQUEST_MAX_TOKENS,
196+
request_body.get("max_tokens"),
197+
)
198+
self._set_if_not_none(
199+
attributes,
200+
GEN_AI_REQUEST_TEMPERATURE,
201+
request_body.get("temperature"),
202+
)
203+
self._set_if_not_none(
204+
attributes, GEN_AI_REQUEST_TOP_P, request_body.get("top_p")
205+
)
206+
207+
@staticmethod
208+
def _set_if_not_none(attributes, key, value):
209+
if value is not None:
210+
attributes[key] = value
211+
212+
# pylint: disable=too-many-branches
213+
def on_success(self, span: Span, result: dict[str, Any]):
214+
model_id = self._call_context.params.get(_MODEL_ID_KEY)
215+
216+
if not model_id:
217+
return
218+
219+
if "body" in result and isinstance(result["body"], StreamingBody):
220+
original_body = None
221+
try:
222+
original_body = result["body"]
223+
body_content = original_body.read()
224+
225+
# Use one stream for telemetry
226+
stream = io.BytesIO(body_content)
227+
telemetry_content = stream.read()
228+
response_body = json.loads(telemetry_content.decode("utf-8"))
229+
if "amazon.titan" in model_id:
230+
self._handle_amazon_titan_response(span, response_body)
231+
if "amazon.nova" in model_id:
232+
self._handle_amazon_nova_response(span, response_body)
233+
elif "anthropic.claude" in model_id:
234+
self._handle_anthropic_claude_response(span, response_body)
235+
elif "meta.llama" in model_id:
236+
self._handle_meta_llama_response(span, response_body)
237+
elif "cohere.command" in model_id:
238+
self._handle_cohere_command_response(span, response_body)
239+
elif "ai21.jamba" in model_id:
240+
self._handle_ai21_jamba_response(span, response_body)
241+
elif "mistral" in model_id:
242+
self._handle_mistral_mistral_response(span, response_body)
243+
# Replenish stream for downstream application use
244+
new_stream = io.BytesIO(body_content)
245+
result["body"] = StreamingBody(new_stream, len(body_content))
246+
247+
except json.JSONDecodeError:
248+
_logger.debug(
249+
"Error: Unable to parse the response body as JSON"
250+
)
251+
except Exception as e: # pylint: disable=broad-exception-caught, invalid-name
252+
_logger.debug("Error processing response: %s", e)
253+
finally:
254+
if original_body is not None:
255+
original_body.close()
256+
257+
# pylint: disable=no-self-use
258+
def _handle_amazon_titan_response(
259+
self, span: Span, response_body: dict[str, Any]
260+
):
261+
if "inputTextTokenCount" in response_body:
262+
span.set_attribute(
263+
GEN_AI_USAGE_INPUT_TOKENS, response_body["inputTextTokenCount"]
264+
)
265+
if "results" in response_body and response_body["results"]:
266+
result = response_body["results"][0]
267+
if "tokenCount" in result:
268+
span.set_attribute(
269+
GEN_AI_USAGE_OUTPUT_TOKENS, result["tokenCount"]
270+
)
271+
if "completionReason" in result:
272+
span.set_attribute(
273+
GEN_AI_RESPONSE_FINISH_REASONS,
274+
[result["completionReason"]],
275+
)
276+
277+
# pylint: disable=no-self-use
278+
def _handle_amazon_nova_response(
279+
self, span: Span, response_body: dict[str, Any]
280+
):
281+
if "usage" in response_body:
282+
usage = response_body["usage"]
283+
if "inputTokens" in usage:
284+
span.set_attribute(
285+
GEN_AI_USAGE_INPUT_TOKENS, usage["inputTokens"]
286+
)
287+
if "outputTokens" in usage:
288+
span.set_attribute(
289+
GEN_AI_USAGE_OUTPUT_TOKENS, usage["outputTokens"]
290+
)
291+
if "stopReason" in response_body:
292+
span.set_attribute(
293+
GEN_AI_RESPONSE_FINISH_REASONS, [response_body["stopReason"]]
294+
)
295+
296+
# pylint: disable=no-self-use
297+
def _handle_anthropic_claude_response(
298+
self, span: Span, response_body: dict[str, Any]
299+
):
300+
if "usage" in response_body:
301+
usage = response_body["usage"]
302+
if "input_tokens" in usage:
303+
span.set_attribute(
304+
GEN_AI_USAGE_INPUT_TOKENS, usage["input_tokens"]
305+
)
306+
if "output_tokens" in usage:
307+
span.set_attribute(
308+
GEN_AI_USAGE_OUTPUT_TOKENS, usage["output_tokens"]
309+
)
310+
if "stop_reason" in response_body:
311+
span.set_attribute(
312+
GEN_AI_RESPONSE_FINISH_REASONS, [response_body["stop_reason"]]
313+
)
314+
315+
# pylint: disable=no-self-use
316+
def _handle_cohere_command_response(
317+
self, span: Span, response_body: dict[str, Any]
318+
):
319+
# Output tokens: Approximate from the response text
320+
if "text" in response_body:
321+
span.set_attribute(
322+
GEN_AI_USAGE_OUTPUT_TOKENS,
323+
math.ceil(len(response_body["text"]) / 6),
324+
)
325+
if "finish_reason" in response_body:
326+
span.set_attribute(
327+
GEN_AI_RESPONSE_FINISH_REASONS,
328+
[response_body["finish_reason"]],
329+
)
330+
331+
# pylint: disable=no-self-use
332+
def _handle_ai21_jamba_response(
333+
self, span: Span, response_body: dict[str, Any]
334+
):
335+
if "usage" in response_body:
336+
usage = response_body["usage"]
337+
if "prompt_tokens" in usage:
338+
span.set_attribute(
339+
GEN_AI_USAGE_INPUT_TOKENS, usage["prompt_tokens"]
340+
)
341+
if "completion_tokens" in usage:
342+
span.set_attribute(
343+
GEN_AI_USAGE_OUTPUT_TOKENS, usage["completion_tokens"]
344+
)
345+
if "choices" in response_body:
346+
choices = response_body["choices"][0]
347+
if "finish_reason" in choices:
348+
span.set_attribute(
349+
GEN_AI_RESPONSE_FINISH_REASONS, [choices["finish_reason"]]
350+
)
351+
352+
# pylint: disable=no-self-use
353+
def _handle_meta_llama_response(
354+
self, span: Span, response_body: dict[str, Any]
355+
):
356+
if "prompt_token_count" in response_body:
357+
span.set_attribute(
358+
GEN_AI_USAGE_INPUT_TOKENS, response_body["prompt_token_count"]
359+
)
360+
if "generation_token_count" in response_body:
361+
span.set_attribute(
362+
GEN_AI_USAGE_OUTPUT_TOKENS,
363+
response_body["generation_token_count"],
364+
)
365+
if "stop_reason" in response_body:
366+
span.set_attribute(
367+
GEN_AI_RESPONSE_FINISH_REASONS, [response_body["stop_reason"]]
368+
)
369+
370+
# pylint: disable=no-self-use
371+
def _handle_mistral_mistral_response(
372+
self, span: Span, response_body: dict[str, Any]
373+
):
374+
if "outputs" in response_body:
375+
outputs = response_body["outputs"][0]
376+
if "text" in outputs:
377+
span.set_attribute(
378+
GEN_AI_USAGE_OUTPUT_TOKENS,
379+
math.ceil(len(outputs["text"]) / 6),
380+
)
381+
if "stop_reason" in outputs:
382+
span.set_attribute(
383+
GEN_AI_RESPONSE_FINISH_REASONS, [outputs["stop_reason"]]
384+
)

0 commit comments

Comments
 (0)