feat: add support for new cohere command r models

yiyuan-he · yiyuan-he · commit 41663abe1495 · 2024-11-05T16:31:09.000-08:00
diff --git a/aws-distro-opentelemetry-node-autoinstrumentation/src/patches/aws/services/bedrock.ts b/aws-distro-opentelemetry-node-autoinstrumentation/src/patches/aws/services/bedrock.ts
@@ -245,6 +245,19 @@ export class BedrockRuntimeServiceExtension implements ServiceExtension {
         if (requestBody.top_p !== undefined) {
           spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TOP_P] = requestBody.top_p;
         }
+      } else if (modelId.includes('cohere.command-r')) {
+        if (requestBody.max_tokens !== undefined) {
+          spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MAX_TOKENS] = requestBody.max_tokens;
+        }
+        if (requestBody.temperature !== undefined) {
+          spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TEMPERATURE] = requestBody.temperature;
+        }
+        if (requestBody.p !== undefined) {
+          spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TOP_P] = requestBody.p;
+        }
+        if (requestBody.message !== undefined) {
+          spanAttributes[AwsSpanProcessingUtil.GEN_AI_USAGE_INPUT_TOKENS] = Math.ceil(requestBody.message.length / 6);
+        }
       } else if (modelId.includes('cohere.command')) {
         if (requestBody.max_tokens !== undefined) {
           spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MAX_TOKENS] = requestBody.max_tokens;
@@ -255,6 +268,9 @@ export class BedrockRuntimeServiceExtension implements ServiceExtension {
         if (requestBody.p !== undefined) {
           spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TOP_P] = requestBody.p;
         }
+        if (requestBody.prompt !== undefined) {
+          spanAttributes[AwsSpanProcessingUtil.GEN_AI_USAGE_INPUT_TOKENS] = Math.ceil(requestBody.prompt.length / 6);
+        }
       } else if (modelId.includes('ai21.jamba')) {
         if (requestBody.max_tokens !== undefined) {
           spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MAX_TOKENS] = requestBody.max_tokens;
@@ -329,13 +345,18 @@ export class BedrockRuntimeServiceExtension implements ServiceExtension {
         if (responseBody.stop_reason !== undefined) {
           span.setAttribute(AwsSpanProcessingUtil.GEN_AI_RESPONSE_FINISH_REASONS, [responseBody.stop_reason]);
         }
-      } else if (currentModelId.includes('cohere.command')) {
-        if (responseBody.prompt !== undefined) {
+      } else if (currentModelId.includes('cohere.command-r')) {
+        console.log('Response Body:', responseBody);
+        if (responseBody.text !== undefined) {
           // NOTE: We approximate the token count since this value is not directly available in the body
           // According to Bedrock docs they use (total_chars / 6) to approximate token count for pricing.
           // https://docs.aws.amazon.com/bedrock/latest/userguide/model-customization-prepare.html
-          span.setAttribute(AwsSpanProcessingUtil.GEN_AI_USAGE_INPUT_TOKENS, Math.ceil(responseBody.prompt.length / 6));
+          span.setAttribute(AwsSpanProcessingUtil.GEN_AI_USAGE_OUTPUT_TOKENS, Math.ceil(responseBody.text.length / 6));
         }
+        if (responseBody.finish_reason !== undefined) {
+          span.setAttribute(AwsSpanProcessingUtil.GEN_AI_RESPONSE_FINISH_REASONS, [responseBody.finish_reason]);
+        }
+      } else if (currentModelId.includes('cohere.command')) {
         if (responseBody.generations?.[0]?.text !== undefined) {
           span.setAttribute(
             AwsSpanProcessingUtil.GEN_AI_USAGE_OUTPUT_TOKENS,
diff --git a/aws-distro-opentelemetry-node-autoinstrumentation/test/patches/aws/services/bedrock.test.ts b/aws-distro-opentelemetry-node-autoinstrumentation/test/patches/aws/services/bedrock.test.ts
@@ -517,6 +517,60 @@ describe('BedrockRuntime', () => {
       expect(invokeModelSpan.kind).toBe(SpanKind.CLIENT);
     });
 
+    it('Add Cohere Command R model attributes to span', async () => {
+      const modelId: string = 'cohere.command-r-v1:0"';
+      const prompt: string = "Describe the purpose of a 'hello world' program in one line";
+      const nativeRequest: any = {
+        message: prompt,
+        max_tokens: 512,
+        temperature: 0.5,
+        p: 0.65,
+      };
+      const mockRequestBody: string = JSON.stringify(nativeRequest);
+      const mockResponseBody: any = {
+        finish_reason: 'COMPLETE',
+        text: 'test-generation-text',
+        prompt: prompt,
+        request: {
+          commandInput: {
+            modelId: modelId,
+          },
+        },
+      };
+
+      nock(`https://bedrock-runtime.${region}.amazonaws.com`)
+        .post(`/model/${encodeURIComponent(modelId)}/invoke`)
+        .reply(200, mockResponseBody);
+
+      await bedrock
+        .invokeModel({
+          modelId: modelId,
+          body: mockRequestBody,
+        })
+        .catch((err: any) => {
+          console.log('error', err);
+        });
+
+      const testSpans: ReadableSpan[] = getTestSpans();
+      const invokeModelSpans: ReadableSpan[] = testSpans.filter((s: ReadableSpan) => {
+        return s.name === 'BedrockRuntime.InvokeModel';
+      });
+      expect(invokeModelSpans.length).toBe(1);
+      const invokeModelSpan = invokeModelSpans[0];
+      expect(invokeModelSpan.attributes[AWS_ATTRIBUTE_KEYS.AWS_BEDROCK_AGENT_ID]).toBeUndefined();
+      expect(invokeModelSpan.attributes[AWS_ATTRIBUTE_KEYS.AWS_BEDROCK_KNOWLEDGE_BASE_ID]).toBeUndefined();
+      expect(invokeModelSpan.attributes[AWS_ATTRIBUTE_KEYS.AWS_BEDROCK_DATA_SOURCE_ID]).toBeUndefined();
+      expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_SYSTEM]).toBe('aws_bedrock');
+      expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MODEL]).toBe(modelId);
+      expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MAX_TOKENS]).toBe(512);
+      expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TEMPERATURE]).toBe(0.5);
+      expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TOP_P]).toBe(0.65);
+      expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_USAGE_INPUT_TOKENS]).toBe(10);
+      expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_USAGE_OUTPUT_TOKENS]).toBe(4);
+      expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_RESPONSE_FINISH_REASONS]).toEqual(['COMPLETE']);
+      expect(invokeModelSpan.kind).toBe(SpanKind.CLIENT);
+    });
+
     it('Add Meta Llama model attributes to span', async () => {
       const modelId: string = 'meta.llama2-13b-chat-v1';
       const prompt: string = 'Describe the purpose of an interpreter program in one line.';