|
36 | 36 | ) |
37 | 37 | from opentelemetry.trace.span import Span |
38 | 38 |
|
| 39 | + |
39 | 40 | _AGENT_ID: str = "agentId" |
40 | 41 | _KNOWLEDGE_BASE_ID: str = "knowledgeBaseId" |
41 | 42 | _DATA_SOURCE_ID: str = "dataSourceId" |
@@ -245,205 +246,3 @@ def on_success(self, span: Span, result: _BotoResultT, instrumentor_context: _Bo |
245 | 246 | AWS_BEDROCK_GUARDRAIL_ARN, |
246 | 247 | guardrail_arn, |
247 | 248 | ) |
248 | | - |
249 | | - |
250 | | -class _BedrockRuntimeExtension(_AwsSdkExtension): |
251 | | - """ |
252 | | - This class is an extension for <a |
253 | | - href="https://docs.aws.amazon.com/bedrock/latest/APIReference/API_Operations_Amazon_Bedrock_Runtime.html"> |
254 | | - Amazon Bedrock Runtime</a>. |
255 | | - """ |
256 | | - |
257 | | - def extract_attributes(self, attributes: _AttributeMapT): |
258 | | - attributes[GEN_AI_SYSTEM] = _AWS_BEDROCK_SYSTEM |
259 | | - |
260 | | - model_id = self._call_context.params.get(_MODEL_ID) |
261 | | - if model_id: |
262 | | - attributes[GEN_AI_REQUEST_MODEL] = model_id |
263 | | - |
264 | | - # Get the request body if it exists |
265 | | - body = self._call_context.params.get("body") |
266 | | - if body: |
267 | | - try: |
268 | | - request_body = json.loads(body) |
269 | | - |
270 | | - if "amazon.titan" in model_id: |
271 | | - self._extract_titan_attributes(attributes, request_body) |
272 | | - if "amazon.nova" in model_id: |
273 | | - self._extract_nova_attributes(attributes, request_body) |
274 | | - elif "anthropic.claude" in model_id: |
275 | | - self._extract_claude_attributes(attributes, request_body) |
276 | | - elif "meta.llama" in model_id: |
277 | | - self._extract_llama_attributes(attributes, request_body) |
278 | | - elif "cohere.command" in model_id: |
279 | | - self._extract_cohere_attributes(attributes, request_body) |
280 | | - elif "ai21.jamba" in model_id: |
281 | | - self._extract_ai21_attributes(attributes, request_body) |
282 | | - elif "mistral" in model_id: |
283 | | - self._extract_mistral_attributes(attributes, request_body) |
284 | | - |
285 | | - except json.JSONDecodeError: |
286 | | - _logger.debug("Error: Unable to parse the body as JSON") |
287 | | - |
288 | | - def _extract_titan_attributes(self, attributes, request_body): |
289 | | - config = request_body.get("textGenerationConfig", {}) |
290 | | - self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, config.get("temperature")) |
291 | | - self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, config.get("topP")) |
292 | | - self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, config.get("maxTokenCount")) |
293 | | - |
294 | | - def _extract_nova_attributes(self, attributes, request_body): |
295 | | - config = request_body.get("inferenceConfig", {}) |
296 | | - self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, config.get("temperature")) |
297 | | - self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, config.get("top_p")) |
298 | | - self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, config.get("max_new_tokens")) |
299 | | - |
300 | | - def _extract_claude_attributes(self, attributes, request_body): |
301 | | - self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get("max_tokens")) |
302 | | - self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, request_body.get("temperature")) |
303 | | - self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, request_body.get("top_p")) |
304 | | - |
305 | | - def _extract_cohere_attributes(self, attributes, request_body): |
306 | | - prompt = request_body.get("message") |
307 | | - if prompt: |
308 | | - attributes[GEN_AI_USAGE_INPUT_TOKENS] = math.ceil(len(prompt) / 6) |
309 | | - self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get("max_tokens")) |
310 | | - self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, request_body.get("temperature")) |
311 | | - self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, request_body.get("p")) |
312 | | - |
313 | | - def _extract_ai21_attributes(self, attributes, request_body): |
314 | | - self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get("max_tokens")) |
315 | | - self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, request_body.get("temperature")) |
316 | | - self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, request_body.get("top_p")) |
317 | | - |
318 | | - def _extract_llama_attributes(self, attributes, request_body): |
319 | | - self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get("max_gen_len")) |
320 | | - self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, request_body.get("temperature")) |
321 | | - self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, request_body.get("top_p")) |
322 | | - |
323 | | - def _extract_mistral_attributes(self, attributes, request_body): |
324 | | - prompt = request_body.get("prompt") |
325 | | - if prompt: |
326 | | - attributes[GEN_AI_USAGE_INPUT_TOKENS] = math.ceil(len(prompt) / 6) |
327 | | - self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get("max_tokens")) |
328 | | - self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, request_body.get("temperature")) |
329 | | - self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, request_body.get("top_p")) |
330 | | - |
331 | | - @staticmethod |
332 | | - def _set_if_not_none(attributes, key, value): |
333 | | - if value is not None: |
334 | | - attributes[key] = value |
335 | | - |
336 | | - # pylint: disable=too-many-branches |
337 | | - def on_success(self, span: Span, result: Dict[str, Any], instrumentor_context: _BotocoreInstrumentorContext): |
338 | | - model_id = self._call_context.params.get(_MODEL_ID) |
339 | | - |
340 | | - if not model_id: |
341 | | - return |
342 | | - |
343 | | - if "body" in result and isinstance(result["body"], StreamingBody): |
344 | | - original_body = None |
345 | | - try: |
346 | | - original_body = result["body"] |
347 | | - body_content = original_body.read() |
348 | | - |
349 | | - # Use one stream for telemetry |
350 | | - stream = io.BytesIO(body_content) |
351 | | - telemetry_content = stream.read() |
352 | | - response_body = json.loads(telemetry_content.decode("utf-8")) |
353 | | - if "amazon.titan" in model_id: |
354 | | - self._handle_amazon_titan_response(span, response_body) |
355 | | - if "amazon.nova" in model_id: |
356 | | - self._handle_amazon_nova_response(span, response_body) |
357 | | - elif "anthropic.claude" in model_id: |
358 | | - self._handle_anthropic_claude_response(span, response_body) |
359 | | - elif "meta.llama" in model_id: |
360 | | - self._handle_meta_llama_response(span, response_body) |
361 | | - elif "cohere.command" in model_id: |
362 | | - self._handle_cohere_command_response(span, response_body) |
363 | | - elif "ai21.jamba" in model_id: |
364 | | - self._handle_ai21_jamba_response(span, response_body) |
365 | | - elif "mistral" in model_id: |
366 | | - self._handle_mistral_mistral_response(span, response_body) |
367 | | - # Replenish stream for downstream application use |
368 | | - new_stream = io.BytesIO(body_content) |
369 | | - result["body"] = StreamingBody(new_stream, len(body_content)) |
370 | | - |
371 | | - except json.JSONDecodeError: |
372 | | - _logger.debug("Error: Unable to parse the response body as JSON") |
373 | | - except Exception as e: # pylint: disable=broad-exception-caught, invalid-name |
374 | | - _logger.debug("Error processing response: %s", e) |
375 | | - finally: |
376 | | - if original_body is not None: |
377 | | - original_body.close() |
378 | | - |
379 | | - # pylint: disable=no-self-use |
380 | | - def _handle_amazon_titan_response(self, span: Span, response_body: Dict[str, Any]): |
381 | | - if "inputTextTokenCount" in response_body: |
382 | | - span.set_attribute(GEN_AI_USAGE_INPUT_TOKENS, response_body["inputTextTokenCount"]) |
383 | | - if "results" in response_body and response_body["results"]: |
384 | | - result = response_body["results"][0] |
385 | | - if "tokenCount" in result: |
386 | | - span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, result["tokenCount"]) |
387 | | - if "completionReason" in result: |
388 | | - span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [result["completionReason"]]) |
389 | | - |
390 | | - # pylint: disable=no-self-use |
391 | | - def _handle_amazon_nova_response(self, span: Span, response_body: Dict[str, Any]): |
392 | | - if "usage" in response_body: |
393 | | - usage = response_body["usage"] |
394 | | - if "inputTokens" in usage: |
395 | | - span.set_attribute(GEN_AI_USAGE_INPUT_TOKENS, usage["inputTokens"]) |
396 | | - if "outputTokens" in usage: |
397 | | - span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, usage["outputTokens"]) |
398 | | - if "stopReason" in response_body: |
399 | | - span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [response_body["stopReason"]]) |
400 | | - |
401 | | - # pylint: disable=no-self-use |
402 | | - def _handle_anthropic_claude_response(self, span: Span, response_body: Dict[str, Any]): |
403 | | - if "usage" in response_body: |
404 | | - usage = response_body["usage"] |
405 | | - if "input_tokens" in usage: |
406 | | - span.set_attribute(GEN_AI_USAGE_INPUT_TOKENS, usage["input_tokens"]) |
407 | | - if "output_tokens" in usage: |
408 | | - span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, usage["output_tokens"]) |
409 | | - if "stop_reason" in response_body: |
410 | | - span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [response_body["stop_reason"]]) |
411 | | - |
412 | | - # pylint: disable=no-self-use |
413 | | - def _handle_cohere_command_response(self, span: Span, response_body: Dict[str, Any]): |
414 | | - # Output tokens: Approximate from the response text |
415 | | - if "text" in response_body: |
416 | | - span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, math.ceil(len(response_body["text"]) / 6)) |
417 | | - if "finish_reason" in response_body: |
418 | | - span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [response_body["finish_reason"]]) |
419 | | - |
420 | | - # pylint: disable=no-self-use |
421 | | - def _handle_ai21_jamba_response(self, span: Span, response_body: Dict[str, Any]): |
422 | | - if "usage" in response_body: |
423 | | - usage = response_body["usage"] |
424 | | - if "prompt_tokens" in usage: |
425 | | - span.set_attribute(GEN_AI_USAGE_INPUT_TOKENS, usage["prompt_tokens"]) |
426 | | - if "completion_tokens" in usage: |
427 | | - span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, usage["completion_tokens"]) |
428 | | - if "choices" in response_body: |
429 | | - choices = response_body["choices"][0] |
430 | | - if "finish_reason" in choices: |
431 | | - span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [choices["finish_reason"]]) |
432 | | - |
433 | | - # pylint: disable=no-self-use |
434 | | - def _handle_meta_llama_response(self, span: Span, response_body: Dict[str, Any]): |
435 | | - if "prompt_token_count" in response_body: |
436 | | - span.set_attribute(GEN_AI_USAGE_INPUT_TOKENS, response_body["prompt_token_count"]) |
437 | | - if "generation_token_count" in response_body: |
438 | | - span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, response_body["generation_token_count"]) |
439 | | - if "stop_reason" in response_body: |
440 | | - span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [response_body["stop_reason"]]) |
441 | | - |
442 | | - # pylint: disable=no-self-use |
443 | | - def _handle_mistral_mistral_response(self, span: Span, response_body: Dict[str, Any]): |
444 | | - if "outputs" in response_body: |
445 | | - outputs = response_body["outputs"][0] |
446 | | - if "text" in outputs: |
447 | | - span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, math.ceil(len(outputs["text"]) / 6)) |
448 | | - if "stop_reason" in outputs: |
449 | | - span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [outputs["stop_reason"]]) |
0 commit comments