feat: set up gen ai inference attributes for foundational text models (#111)

yiyuan-he · web-flow · commit e8c96ae1b98d · 2024-11-01T14:42:55.000-07:00
*Description of changes:* Adding auto-instrumentation support for GenAI inference parameters. The following foundational text models are supported: - AI21 Jamba - Amazon Titan - Anthropic Claude - Cohere Command - Meta Llama - Mistral AI Full list can be found [here](https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters.html). Note, we do not support Stability AI models at this time since they are focused on text to image. New inference parameter attributes added according to OpenTelemetry Semantic Conventions for [GenAI attributes](https://github.com/open-telemetry/semantic-conventions/blob/main/docs/gen-ai/gen-ai-spans.md#genai-attributes): - `gen_ai.request.max_tokens` - `gen_ai.request.temperature` - `gen_ai.request.top_p` - `gen_ai.response.finish_reasons` - `gen_ai.usage.input_tokens` - `gen_ai.usage.output_tokens` *Test Plan:* Set up sample app to make Bedrock Runtime `InvokeModel` API calls to the supported foundational models and verified the auto-instrumentation attributes. ![ai21-jamba](https://github.com/user-attachments/assets/83ef736c-5a49-41ce-951d-cc1d04ed92a8) ![amazon-titan](https://github.com/user-attachments/assets/bcd7d6b0-207f-43e6-aae8-13d99532be53) ![anthropic-claude](https://github.com/user-attachments/assets/747409e5-5cb6-489a-81c5-1d8299cee5c5) ![cohere-command](https://github.com/user-attachments/assets/b2f726d4-e053-4762-9d96-f187c549fe90) ![meta-llama](https://github.com/user-attachments/assets/5b982aa1-6ef4-4c65-a58a-d2c24cb57777) ![mistral-ai](https://github.com/user-attachments/assets/9a01df6f-27b6-43b3-94cf-6731686e495e) By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice.
diff --git a/aws-distro-opentelemetry-node-autoinstrumentation/src/aws-span-processing-util.ts b/aws-distro-opentelemetry-node-autoinstrumentation/src/aws-span-processing-util.ts
@@ -47,6 +47,12 @@ export class AwsSpanProcessingUtil {
   // TODO: Use Semantic Conventions once upgraded
   static GEN_AI_REQUEST_MODEL: string = 'gen_ai.request.model';
   static GEN_AI_SYSTEM: string = 'gen_ai.system';
+  static GEN_AI_REQUEST_MAX_TOKENS: string = 'gen_ai.request.max_tokens';
+  static GEN_AI_REQUEST_TEMPERATURE: string = 'gen_ai.request.temperature';
+  static GEN_AI_REQUEST_TOP_P: string = 'gen_ai.request.top_p';
+  static GEN_AI_RESPONSE_FINISH_REASONS: string = 'gen_ai.response.finish_reasons';
+  static GEN_AI_USAGE_INPUT_TOKENS: string = 'gen_ai.usage.input_tokens';
+  static GEN_AI_USAGE_OUTPUT_TOKENS: string = 'gen_ai.usage.output_tokens';
 
   static getDialectKeywords(): string[] {
     return SQL_DIALECT_KEYWORDS_JSON.keywords;
diff --git a/aws-distro-opentelemetry-node-autoinstrumentation/src/patches/aws/services/bedrock.ts b/aws-distro-opentelemetry-node-autoinstrumentation/src/patches/aws/services/bedrock.ts
@@ -211,11 +211,173 @@ export class BedrockRuntimeServiceExtension implements ServiceExtension {
       spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MODEL] = modelId;
     }
 
+    if (request.commandInput?.body) {
+      const requestBody = JSON.parse(request.commandInput.body);
+      if (modelId.includes('amazon.titan')) {
+        if (requestBody.textGenerationConfig?.temperature !== undefined) {
+          spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TEMPERATURE] =
+            requestBody.textGenerationConfig.temperature;
+        }
+        if (requestBody.textGenerationConfig?.topP !== undefined) {
+          spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TOP_P] = requestBody.textGenerationConfig.topP;
+        }
+        if (requestBody.textGenerationConfig?.maxTokenCount !== undefined) {
+          spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MAX_TOKENS] =
+            requestBody.textGenerationConfig.maxTokenCount;
+        }
+      } else if (modelId.includes('anthropic.claude')) {
+        if (requestBody.max_tokens !== undefined) {
+          spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MAX_TOKENS] = requestBody.max_tokens;
+        }
+        if (requestBody.temperature !== undefined) {
+          spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TEMPERATURE] = requestBody.temperature;
+        }
+        if (requestBody.top_p !== undefined) {
+          spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TOP_P] = requestBody.top_p;
+        }
+      } else if (modelId.includes('meta.llama')) {
+        if (requestBody.max_gen_len !== undefined) {
+          spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MAX_TOKENS] = requestBody.max_gen_len;
+        }
+        if (requestBody.temperature !== undefined) {
+          spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TEMPERATURE] = requestBody.temperature;
+        }
+        if (requestBody.top_p !== undefined) {
+          spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TOP_P] = requestBody.top_p;
+        }
+      } else if (modelId.includes('cohere.command')) {
+        if (requestBody.max_tokens !== undefined) {
+          spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MAX_TOKENS] = requestBody.max_tokens;
+        }
+        if (requestBody.temperature !== undefined) {
+          spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TEMPERATURE] = requestBody.temperature;
+        }
+        if (requestBody.p !== undefined) {
+          spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TOP_P] = requestBody.p;
+        }
+      } else if (modelId.includes('ai21.jamba')) {
+        if (requestBody.max_tokens !== undefined) {
+          spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MAX_TOKENS] = requestBody.max_tokens;
+        }
+        if (requestBody.temperature !== undefined) {
+          spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TEMPERATURE] = requestBody.temperature;
+        }
+        if (requestBody.top_p !== undefined) {
+          spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TOP_P] = requestBody.top_p;
+        }
+      } else if (modelId.includes('mistral.mistral')) {
+        if (requestBody.prompt !== undefined) {
+          // NOTE: We approximate the token count since this value is not directly available in the body
+          // According to Bedrock docs they use (total_chars / 6) to approximate token count for pricing.
+          // https://docs.aws.amazon.com/bedrock/latest/userguide/model-customization-prepare.html
+          spanAttributes[AwsSpanProcessingUtil.GEN_AI_USAGE_INPUT_TOKENS] = Math.ceil(requestBody.prompt.length / 6);
+        }
+        if (requestBody.max_tokens !== undefined) {
+          spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MAX_TOKENS] = requestBody.max_tokens;
+        }
+        if (requestBody.temperature !== undefined) {
+          spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TEMPERATURE] = requestBody.temperature;
+        }
+        if (requestBody.top_p !== undefined) {
+          spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TOP_P] = requestBody.top_p;
+        }
+      }
+    }
+
     return {
       isIncoming,
       spanAttributes,
       spanKind,
       spanName,
     };
   }
+
+  responseHook(response: NormalizedResponse, span: Span, tracer: Tracer, config: AwsSdkInstrumentationConfig): void {
+    const currentModelId = response.request.commandInput?.modelId;
+    if (response.data?.body) {
+      const decodedResponseBody = new TextDecoder().decode(response.data.body);
+      const responseBody = JSON.parse(decodedResponseBody);
+      if (currentModelId.includes('amazon.titan')) {
+        if (responseBody.inputTextTokenCount !== undefined) {
+          span.setAttribute(AwsSpanProcessingUtil.GEN_AI_USAGE_INPUT_TOKENS, responseBody.inputTextTokenCount);
+        }
+        if (responseBody.results?.[0]?.tokenCount !== undefined) {
+          span.setAttribute(AwsSpanProcessingUtil.GEN_AI_USAGE_OUTPUT_TOKENS, responseBody.results[0].tokenCount);
+        }
+        if (responseBody.results?.[0]?.completionReason !== undefined) {
+          span.setAttribute(AwsSpanProcessingUtil.GEN_AI_RESPONSE_FINISH_REASONS, [
+            responseBody.results[0].completionReason,
+          ]);
+        }
+      } else if (currentModelId.includes('anthropic.claude')) {
+        if (responseBody.usage?.input_tokens !== undefined) {
+          span.setAttribute(AwsSpanProcessingUtil.GEN_AI_USAGE_INPUT_TOKENS, responseBody.usage.input_tokens);
+        }
+        if (responseBody.usage?.output_tokens !== undefined) {
+          span.setAttribute(AwsSpanProcessingUtil.GEN_AI_USAGE_OUTPUT_TOKENS, responseBody.usage.output_tokens);
+        }
+        if (responseBody.stop_reason !== undefined) {
+          span.setAttribute(AwsSpanProcessingUtil.GEN_AI_RESPONSE_FINISH_REASONS, [responseBody.stop_reason]);
+        }
+      } else if (currentModelId.includes('meta.llama')) {
+        if (responseBody.prompt_token_count !== undefined) {
+          span.setAttribute(AwsSpanProcessingUtil.GEN_AI_USAGE_INPUT_TOKENS, responseBody.prompt_token_count);
+        }
+        if (responseBody.generation_token_count !== undefined) {
+          span.setAttribute(AwsSpanProcessingUtil.GEN_AI_USAGE_OUTPUT_TOKENS, responseBody.generation_token_count);
+        }
+        if (responseBody.stop_reason !== undefined) {
+          span.setAttribute(AwsSpanProcessingUtil.GEN_AI_RESPONSE_FINISH_REASONS, [responseBody.stop_reason]);
+        }
+      } else if (currentModelId.includes('cohere.command')) {
+        if (responseBody.prompt !== undefined) {
+          // NOTE: We approximate the token count since this value is not directly available in the body
+          // According to Bedrock docs they use (total_chars / 6) to approximate token count for pricing.
+          // https://docs.aws.amazon.com/bedrock/latest/userguide/model-customization-prepare.html
+          span.setAttribute(AwsSpanProcessingUtil.GEN_AI_USAGE_INPUT_TOKENS, Math.ceil(responseBody.prompt.length / 6));
+        }
+        if (responseBody.generations?.[0]?.text !== undefined) {
+          span.setAttribute(
+            AwsSpanProcessingUtil.GEN_AI_USAGE_OUTPUT_TOKENS,
+            // NOTE: We approximate the token count since this value is not directly available in the body
+            // According to Bedrock docs they use (total_chars / 6) to approximate token count for pricing.
+            // https://docs.aws.amazon.com/bedrock/latest/userguide/model-customization-prepare.html
+            Math.ceil(responseBody.generations[0].text.length / 6)
+          );
+        }
+        if (responseBody.generations?.[0]?.finish_reason !== undefined) {
+          span.setAttribute(AwsSpanProcessingUtil.GEN_AI_RESPONSE_FINISH_REASONS, [
+            responseBody.generations[0].finish_reason,
+          ]);
+        }
+      } else if (currentModelId.includes('ai21.jamba')) {
+        if (responseBody.usage?.prompt_tokens !== undefined) {
+          span.setAttribute(AwsSpanProcessingUtil.GEN_AI_USAGE_INPUT_TOKENS, responseBody.usage.prompt_tokens);
+        }
+        if (responseBody.usage?.completion_tokens !== undefined) {
+          span.setAttribute(AwsSpanProcessingUtil.GEN_AI_USAGE_OUTPUT_TOKENS, responseBody.usage.completion_tokens);
+        }
+        if (responseBody.choices?.[0]?.finish_reason !== undefined) {
+          span.setAttribute(AwsSpanProcessingUtil.GEN_AI_RESPONSE_FINISH_REASONS, [
+            responseBody.choices[0].finish_reason,
+          ]);
+        }
+      } else if (currentModelId.includes('mistral.mistral')) {
+        if (responseBody.outputs?.[0]?.text !== undefined) {
+          span.setAttribute(
+            AwsSpanProcessingUtil.GEN_AI_USAGE_OUTPUT_TOKENS,
+            // NOTE: We approximate the token count since this value is not directly available in the body
+            // According to Bedrock docs they use (total_chars / 6) to approximate token count for pricing.
+            // https://docs.aws.amazon.com/bedrock/latest/userguide/model-customization-prepare.html
+            Math.ceil(responseBody.outputs[0].text.length / 6)
+          );
+        }
+        if (responseBody.outputs?.[0]?.stop_reason !== undefined) {
+          span.setAttribute(AwsSpanProcessingUtil.GEN_AI_RESPONSE_FINISH_REASONS, [
+            responseBody.outputs[0].stop_reason,
+          ]);
+        }
+      }
+    }
+  }
 }
diff --git a/aws-distro-opentelemetry-node-autoinstrumentation/test/patches/aws/services/bedrock.test.ts b/aws-distro-opentelemetry-node-autoinstrumentation/test/patches/aws/services/bedrock.test.ts