feat: add support for new cohere command r models (#118)

yiyuan-he · mxiamxia · web-flow · commit 25fa5e9063a1 · 2024-11-21T16:03:11.000-08:00
*Description of changes:* Adding support for Cohere Command R models. The previous Cohere Command models are not yet fully deprecated ([EOL April 2025](https://docs.aws.amazon.com/bedrock/latest/userguide/model-lifecycle.html)) so we still include support for now. Beginning 11/05/24 - Calls to old Cohere Command models now throw an exception for deprecation. I wasn't able to find any official announcement for this change, but I noticed it while testing during development in the Java SDK. Interestingly, calls to the old model still return a response so the full gen ai attributes are still generated for the time being. ![Screenshot 2024-11-05 at 5 01 11 PM](https://github.com/user-attachments/assets/52c2d30e-5c75-431d-8b14-85fde9938cab) *Test Plan:* Verified the attributes for the Command R model is being generated with sample app auto-instrumentation. ![Screenshot 2024-11-05 at 4 52 36 PM](https://github.com/user-attachments/assets/81f076cc-7945-421d-8c9f-1e754d7acced) By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice. --------- Co-authored-by: Min Xia <mxiamxia@gmail.com>
diff --git a/aws-distro-opentelemetry-node-autoinstrumentation/src/patches/aws/services/bedrock.ts b/aws-distro-opentelemetry-node-autoinstrumentation/src/patches/aws/services/bedrock.ts
@@ -245,6 +245,22 @@ export class BedrockRuntimeServiceExtension implements ServiceExtension {
         if (requestBody.top_p !== undefined) {
           spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TOP_P] = requestBody.top_p;
         }
+      } else if (modelId.includes('cohere.command-r')) {
+        if (requestBody.max_tokens !== undefined) {
+          spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MAX_TOKENS] = requestBody.max_tokens;
+        }
+        if (requestBody.temperature !== undefined) {
+          spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TEMPERATURE] = requestBody.temperature;
+        }
+        if (requestBody.p !== undefined) {
+          spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TOP_P] = requestBody.p;
+        }
+        if (requestBody.message !== undefined) {
+          // NOTE: We approximate the token count since this value is not directly available in the body
+          // According to Bedrock docs they use (total_chars / 6) to approximate token count for pricing.
+          // https://docs.aws.amazon.com/bedrock/latest/userguide/model-customization-prepare.html
+          spanAttributes[AwsSpanProcessingUtil.GEN_AI_USAGE_INPUT_TOKENS] = Math.ceil(requestBody.message.length / 6);
+        }
       } else if (modelId.includes('cohere.command')) {
         if (requestBody.max_tokens !== undefined) {
           spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MAX_TOKENS] = requestBody.max_tokens;
@@ -255,6 +271,9 @@ export class BedrockRuntimeServiceExtension implements ServiceExtension {
         if (requestBody.p !== undefined) {
           spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TOP_P] = requestBody.p;
         }
+        if (requestBody.prompt !== undefined) {
+          spanAttributes[AwsSpanProcessingUtil.GEN_AI_USAGE_INPUT_TOKENS] = Math.ceil(requestBody.prompt.length / 6);
+        }
       } else if (modelId.includes('ai21.jamba')) {
         if (requestBody.max_tokens !== undefined) {
           spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MAX_TOKENS] = requestBody.max_tokens;
@@ -265,7 +284,7 @@ export class BedrockRuntimeServiceExtension implements ServiceExtension {
         if (requestBody.top_p !== undefined) {
           spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TOP_P] = requestBody.top_p;
         }
-      } else if (modelId.includes('mistral.mistral')) {
+      } else if (modelId.includes('mistral')) {
         if (requestBody.prompt !== undefined) {
           // NOTE: We approximate the token count since this value is not directly available in the body
           // According to Bedrock docs they use (total_chars / 6) to approximate token count for pricing.
@@ -329,13 +348,17 @@ export class BedrockRuntimeServiceExtension implements ServiceExtension {
         if (responseBody.stop_reason !== undefined) {
           span.setAttribute(AwsSpanProcessingUtil.GEN_AI_RESPONSE_FINISH_REASONS, [responseBody.stop_reason]);
         }
-      } else if (currentModelId.includes('cohere.command')) {
-        if (responseBody.prompt !== undefined) {
+      } else if (currentModelId.includes('cohere.command-r')) {
+        if (responseBody.text !== undefined) {
           // NOTE: We approximate the token count since this value is not directly available in the body
           // According to Bedrock docs they use (total_chars / 6) to approximate token count for pricing.
           // https://docs.aws.amazon.com/bedrock/latest/userguide/model-customization-prepare.html
-          span.setAttribute(AwsSpanProcessingUtil.GEN_AI_USAGE_INPUT_TOKENS, Math.ceil(responseBody.prompt.length / 6));
+          span.setAttribute(AwsSpanProcessingUtil.GEN_AI_USAGE_OUTPUT_TOKENS, Math.ceil(responseBody.text.length / 6));
         }
+        if (responseBody.finish_reason !== undefined) {
+          span.setAttribute(AwsSpanProcessingUtil.GEN_AI_RESPONSE_FINISH_REASONS, [responseBody.finish_reason]);
+        }
+      } else if (currentModelId.includes('cohere.command')) {
         if (responseBody.generations?.[0]?.text !== undefined) {
           span.setAttribute(
             AwsSpanProcessingUtil.GEN_AI_USAGE_OUTPUT_TOKENS,
@@ -362,7 +385,7 @@ export class BedrockRuntimeServiceExtension implements ServiceExtension {
             responseBody.choices[0].finish_reason,
           ]);
         }
-      } else if (currentModelId.includes('mistral.mistral')) {
+      } else if (currentModelId.includes('mistral')) {
         if (responseBody.outputs?.[0]?.text !== undefined) {
           span.setAttribute(
             AwsSpanProcessingUtil.GEN_AI_USAGE_OUTPUT_TOKENS,
diff --git a/aws-distro-opentelemetry-node-autoinstrumentation/test/patches/aws/services/bedrock.test.ts b/aws-distro-opentelemetry-node-autoinstrumentation/test/patches/aws/services/bedrock.test.ts
@@ -517,6 +517,60 @@ describe('BedrockRuntime', () => {
       expect(invokeModelSpan.kind).toBe(SpanKind.CLIENT);
     });
 
+    it('Add Cohere Command R model attributes to span', async () => {
+      const modelId: string = 'cohere.command-r-v1:0"';
+      const prompt: string = "Describe the purpose of a 'hello world' program in one line";
+      const nativeRequest: any = {
+        message: prompt,
+        max_tokens: 512,
+        temperature: 0.5,
+        p: 0.65,
+      };
+      const mockRequestBody: string = JSON.stringify(nativeRequest);
+      const mockResponseBody: any = {
+        finish_reason: 'COMPLETE',
+        text: 'test-generation-text',
+        prompt: prompt,
+        request: {
+          commandInput: {
+            modelId: modelId,
+          },
+        },
+      };
+
+      nock(`https://bedrock-runtime.${region}.amazonaws.com`)
+        .post(`/model/${encodeURIComponent(modelId)}/invoke`)
+        .reply(200, mockResponseBody);
+
+      await bedrock
+        .invokeModel({
+          modelId: modelId,
+          body: mockRequestBody,
+        })
+        .catch((err: any) => {
+          console.log('error', err);
+        });
+
+      const testSpans: ReadableSpan[] = getTestSpans();
+      const invokeModelSpans: ReadableSpan[] = testSpans.filter((s: ReadableSpan) => {
+        return s.name === 'BedrockRuntime.InvokeModel';
+      });
+      expect(invokeModelSpans.length).toBe(1);
+      const invokeModelSpan = invokeModelSpans[0];
+      expect(invokeModelSpan.attributes[AWS_ATTRIBUTE_KEYS.AWS_BEDROCK_AGENT_ID]).toBeUndefined();
+      expect(invokeModelSpan.attributes[AWS_ATTRIBUTE_KEYS.AWS_BEDROCK_KNOWLEDGE_BASE_ID]).toBeUndefined();
+      expect(invokeModelSpan.attributes[AWS_ATTRIBUTE_KEYS.AWS_BEDROCK_DATA_SOURCE_ID]).toBeUndefined();
+      expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_SYSTEM]).toBe('aws_bedrock');
+      expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MODEL]).toBe(modelId);
+      expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MAX_TOKENS]).toBe(512);
+      expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TEMPERATURE]).toBe(0.5);
+      expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TOP_P]).toBe(0.65);
+      expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_USAGE_INPUT_TOKENS]).toBe(10);
+      expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_USAGE_OUTPUT_TOKENS]).toBe(4);
+      expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_RESPONSE_FINISH_REASONS]).toEqual(['COMPLETE']);
+      expect(invokeModelSpan.kind).toBe(SpanKind.CLIENT);
+    });
+
     it('Add Meta Llama model attributes to span', async () => {
       const modelId: string = 'meta.llama2-13b-chat-v1';
       const prompt: string = 'Describe the purpose of an interpreter program in one line.';