Skip to content

Commit 842f9c9

Browse files
committed
feat: add unit tests for gen ai inference attributes
1 parent 6cd9bcc commit 842f9c9

File tree

2 files changed

+340
-10
lines changed

2 files changed

+340
-10
lines changed

aws-distro-opentelemetry-node-autoinstrumentation/src/patches/aws/services/bedrock.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -356,7 +356,9 @@ export class BedrockRuntimeServiceExtension implements ServiceExtension {
356356
);
357357
}
358358
if (responseBody.outputs?.[0]?.stop_reason) {
359-
span.setAttribute(AwsSpanProcessingUtil.GEN_AI_RESPONSE_FINISH_REASONS, responseBody.outputs[0].stop_reason);
359+
span.setAttribute(AwsSpanProcessingUtil.GEN_AI_RESPONSE_FINISH_REASONS, [
360+
responseBody.outputs[0].stop_reason,
361+
]);
360362
}
361363
}
362364
}

aws-distro-opentelemetry-node-autoinstrumentation/test/patches/aws/services/bedrock.test.ts

Lines changed: 337 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -276,19 +276,102 @@ describe('BedrockRuntime', () => {
276276
});
277277

278278
describe('InvokeModel', () => {
279-
it('adds modelId to span', async () => {
280-
const dummyModelId: string = 'ABCDEFGH';
281-
const dummyBody: string = 'HGFEDCBA';
279+
it('Add AI21 Jamba model attributes to span', async () => {
280+
const modelId: string = 'ai21.jamba-1-5-large-v1:0';
281+
const prompt: string = 'Describe the purpose of a compiler in one line.';
282+
const nativeRequest: any = {
283+
messages: [
284+
{
285+
role: 'user',
286+
content: prompt,
287+
},
288+
],
289+
top_p: 0.8,
290+
temperature: 0.6,
291+
max_tokens: 512,
292+
};
293+
const mockRequestBody: string = JSON.stringify(nativeRequest);
294+
const mockResponseBody: any = {
295+
stop_reason: 'end_turn',
296+
usage: {
297+
prompt_tokens: 21,
298+
completion_tokens: 24,
299+
},
300+
choices: [
301+
{
302+
finish_reason: 'stop',
303+
},
304+
],
305+
request: {
306+
commandInput: {
307+
modelId: modelId,
308+
},
309+
},
310+
};
311+
312+
nock(`https://bedrock-runtime.${region}.amazonaws.com`)
313+
.post(`/model/${encodeURIComponent(modelId)}/invoke`)
314+
.reply(200, mockResponseBody);
282315

283-
nock(`https://bedrock-runtime.${region}.amazonaws.com`).post(`/model/${dummyModelId}/invoke`).reply(200, {
284-
modelId: dummyModelId,
285-
body: dummyBody,
316+
await bedrock
317+
.invokeModel({
318+
modelId: modelId,
319+
body: mockRequestBody,
320+
})
321+
.catch((err: any) => {
322+
console.log('error', err);
323+
});
324+
325+
const testSpans: ReadableSpan[] = getTestSpans();
326+
const invokeModelSpans: ReadableSpan[] = testSpans.filter((s: ReadableSpan) => {
327+
return s.name === 'BedrockRuntime.InvokeModel';
286328
});
329+
expect(invokeModelSpans.length).toBe(1);
330+
const invokeModelSpan = invokeModelSpans[0];
331+
expect(invokeModelSpan.attributes[AWS_ATTRIBUTE_KEYS.AWS_BEDROCK_AGENT_ID]).toBeUndefined();
332+
expect(invokeModelSpan.attributes[AWS_ATTRIBUTE_KEYS.AWS_BEDROCK_KNOWLEDGE_BASE_ID]).toBeUndefined();
333+
expect(invokeModelSpan.attributes[AWS_ATTRIBUTE_KEYS.AWS_BEDROCK_DATA_SOURCE_ID]).toBeUndefined();
334+
expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_SYSTEM]).toBe('aws_bedrock');
335+
expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MODEL]).toBe(modelId);
336+
expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MAX_TOKENS]).toBe(512);
337+
expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TEMPERATURE]).toBe(0.6);
338+
expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TOP_P]).toBe(0.8);
339+
expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_USAGE_INPUT_TOKENS]).toBe(21);
340+
expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_USAGE_OUTPUT_TOKENS]).toBe(24);
341+
expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_RESPONSE_FINISH_REASONS]).toEqual(['stop']);
342+
expect(invokeModelSpan.kind).toBe(SpanKind.CLIENT);
343+
});
344+
it('Add Amazon Titan model attributes to span', async () => {
345+
const modelId: string = 'amazon.titan-text-express-v1';
346+
const prompt: string = 'Complete this text. It was the best of times it was the worst...';
347+
const nativeRequest: any = {
348+
inputText: prompt,
349+
textGenerationConfig: {
350+
maxTokenCount: 4096,
351+
stopSequences: [],
352+
temperature: 0,
353+
topP: 1,
354+
},
355+
};
356+
const mockRequestBody: string = JSON.stringify(nativeRequest);
357+
const mockResponseBody: any = {
358+
inputTextTokenCount: 15,
359+
results: [
360+
{
361+
tokenCount: 13,
362+
completionReason: 'CONTENT_FILTERED',
363+
},
364+
],
365+
};
366+
367+
nock(`https://bedrock-runtime.${region}.amazonaws.com`)
368+
.post(`/model/${modelId}/invoke`)
369+
.reply(200, mockResponseBody);
287370

288371
await bedrock
289372
.invokeModel({
290-
modelId: dummyModelId,
291-
body: dummyBody,
373+
modelId: modelId,
374+
body: mockRequestBody,
292375
})
293376
.catch((err: any) => {});
294377

@@ -301,7 +384,252 @@ describe('BedrockRuntime', () => {
301384
expect(invokeModelSpan.attributes[AWS_ATTRIBUTE_KEYS.AWS_BEDROCK_AGENT_ID]).toBeUndefined();
302385
expect(invokeModelSpan.attributes[AWS_ATTRIBUTE_KEYS.AWS_BEDROCK_KNOWLEDGE_BASE_ID]).toBeUndefined();
303386
expect(invokeModelSpan.attributes[AWS_ATTRIBUTE_KEYS.AWS_BEDROCK_DATA_SOURCE_ID]).toBeUndefined();
304-
expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MODEL]).toBe(dummyModelId);
387+
expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_SYSTEM]).toBe('aws_bedrock');
388+
expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MODEL]).toBe(modelId);
389+
expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MAX_TOKENS]).toBe(4096);
390+
expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TEMPERATURE]).toBe(0);
391+
expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TOP_P]).toBe(1);
392+
expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_USAGE_INPUT_TOKENS]).toBe(15);
393+
expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_USAGE_OUTPUT_TOKENS]).toBe(13);
394+
expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_RESPONSE_FINISH_REASONS]).toEqual([
395+
'CONTENT_FILTERED',
396+
]);
397+
expect(invokeModelSpan.kind).toBe(SpanKind.CLIENT);
398+
});
399+
400+
it('Add Anthropic Claude model attributes to span', async () => {
401+
const modelId: string = 'anthropic.claude-3-5-sonnet-20240620-v1:0';
402+
const prompt: string = 'Complete this text. It was the best of times it was the worst...';
403+
const nativeRequest: any = {
404+
anthropic_version: 'bedrock-2023-05-31',
405+
max_tokens: 1000,
406+
temperature: 1.0,
407+
top_p: 1,
408+
messages: [
409+
{
410+
role: 'user',
411+
content: [{ type: 'text', text: prompt }],
412+
},
413+
],
414+
};
415+
const mockRequestBody: string = JSON.stringify(nativeRequest);
416+
const mockResponseBody: any = {
417+
stop_reason: 'end_turn',
418+
usage: {
419+
input_tokens: 15,
420+
output_tokens: 13,
421+
},
422+
request: {
423+
commandInput: {
424+
modelId: modelId,
425+
},
426+
},
427+
};
428+
429+
nock(`https://bedrock-runtime.${region}.amazonaws.com`)
430+
.post(`/model/${encodeURIComponent(modelId)}/invoke`)
431+
.reply(200, mockResponseBody);
432+
433+
await bedrock
434+
.invokeModel({
435+
modelId: modelId,
436+
body: mockRequestBody,
437+
})
438+
.catch((err: any) => {
439+
console.log('error', err);
440+
});
441+
442+
const testSpans: ReadableSpan[] = getTestSpans();
443+
const invokeModelSpans: ReadableSpan[] = testSpans.filter((s: ReadableSpan) => {
444+
return s.name === 'BedrockRuntime.InvokeModel';
445+
});
446+
expect(invokeModelSpans.length).toBe(1);
447+
const invokeModelSpan = invokeModelSpans[0];
448+
expect(invokeModelSpan.attributes[AWS_ATTRIBUTE_KEYS.AWS_BEDROCK_AGENT_ID]).toBeUndefined();
449+
expect(invokeModelSpan.attributes[AWS_ATTRIBUTE_KEYS.AWS_BEDROCK_KNOWLEDGE_BASE_ID]).toBeUndefined();
450+
expect(invokeModelSpan.attributes[AWS_ATTRIBUTE_KEYS.AWS_BEDROCK_DATA_SOURCE_ID]).toBeUndefined();
451+
expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_SYSTEM]).toBe('aws_bedrock');
452+
expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MODEL]).toBe(modelId);
453+
expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MAX_TOKENS]).toBe(1000);
454+
expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TEMPERATURE]).toBe(1.0);
455+
expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TOP_P]).toBe(1);
456+
expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_USAGE_INPUT_TOKENS]).toBe(15);
457+
expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_USAGE_OUTPUT_TOKENS]).toBe(13);
458+
expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_RESPONSE_FINISH_REASONS]).toEqual(['end_turn']);
459+
expect(invokeModelSpan.kind).toBe(SpanKind.CLIENT);
460+
});
461+
462+
it('Add Cohere Command model attributes to span', async () => {
463+
const modelId: string = 'cohere.command-light-text-v14';
464+
const prompt: string = "Describe the purpose of a 'hello world' program in one line";
465+
const nativeRequest: any = {
466+
prompt: prompt,
467+
max_tokens: 512,
468+
temperature: 0.5,
469+
p: 0.65,
470+
};
471+
const mockRequestBody: string = JSON.stringify(nativeRequest);
472+
const mockResponseBody: any = {
473+
generations: [
474+
{
475+
finish_reason: 'COMPLETE',
476+
text: 'test-generation-text',
477+
},
478+
],
479+
prompt: prompt,
480+
request: {
481+
commandInput: {
482+
modelId: modelId,
483+
},
484+
},
485+
};
486+
487+
nock(`https://bedrock-runtime.${region}.amazonaws.com`)
488+
.post(`/model/${encodeURIComponent(modelId)}/invoke`)
489+
.reply(200, mockResponseBody);
490+
491+
await bedrock
492+
.invokeModel({
493+
modelId: modelId,
494+
body: mockRequestBody,
495+
})
496+
.catch((err: any) => {
497+
console.log('error', err);
498+
});
499+
500+
const testSpans: ReadableSpan[] = getTestSpans();
501+
const invokeModelSpans: ReadableSpan[] = testSpans.filter((s: ReadableSpan) => {
502+
return s.name === 'BedrockRuntime.InvokeModel';
503+
});
504+
expect(invokeModelSpans.length).toBe(1);
505+
const invokeModelSpan = invokeModelSpans[0];
506+
expect(invokeModelSpan.attributes[AWS_ATTRIBUTE_KEYS.AWS_BEDROCK_AGENT_ID]).toBeUndefined();
507+
expect(invokeModelSpan.attributes[AWS_ATTRIBUTE_KEYS.AWS_BEDROCK_KNOWLEDGE_BASE_ID]).toBeUndefined();
508+
expect(invokeModelSpan.attributes[AWS_ATTRIBUTE_KEYS.AWS_BEDROCK_DATA_SOURCE_ID]).toBeUndefined();
509+
expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_SYSTEM]).toBe('aws_bedrock');
510+
expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MODEL]).toBe(modelId);
511+
expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MAX_TOKENS]).toBe(512);
512+
expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TEMPERATURE]).toBe(0.5);
513+
expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TOP_P]).toBe(0.65);
514+
expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_USAGE_INPUT_TOKENS]).toBe(10);
515+
expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_USAGE_OUTPUT_TOKENS]).toBe(4);
516+
expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_RESPONSE_FINISH_REASONS]).toEqual(['COMPLETE']);
517+
expect(invokeModelSpan.kind).toBe(SpanKind.CLIENT);
518+
});
519+
520+
it('Add Meta Llama model attributes to span', async () => {
521+
const modelId: string = 'meta.llama2-13b-chat-v1';
522+
const prompt: string = 'Describe the purpose of an interpreter program in one line.';
523+
const nativeRequest: any = {
524+
prompt,
525+
max_gen_len: 512,
526+
temperature: 0.5,
527+
top_p: 0.9,
528+
};
529+
const mockRequestBody: string = JSON.stringify(nativeRequest);
530+
const mockResponseBody: any = {
531+
prompt_token_count: 31,
532+
generation_token_count: 49,
533+
stop_reason: 'stop',
534+
request: {
535+
commandInput: {
536+
modelId: modelId,
537+
},
538+
},
539+
};
540+
541+
nock(`https://bedrock-runtime.${region}.amazonaws.com`)
542+
.post(`/model/${encodeURIComponent(modelId)}/invoke`)
543+
.reply(200, mockResponseBody);
544+
545+
await bedrock
546+
.invokeModel({
547+
modelId: modelId,
548+
body: mockRequestBody,
549+
})
550+
.catch((err: any) => {
551+
console.log('error', err);
552+
});
553+
554+
const testSpans: ReadableSpan[] = getTestSpans();
555+
const invokeModelSpans: ReadableSpan[] = testSpans.filter((s: ReadableSpan) => {
556+
return s.name === 'BedrockRuntime.InvokeModel';
557+
});
558+
expect(invokeModelSpans.length).toBe(1);
559+
const invokeModelSpan = invokeModelSpans[0];
560+
expect(invokeModelSpan.attributes[AWS_ATTRIBUTE_KEYS.AWS_BEDROCK_AGENT_ID]).toBeUndefined();
561+
expect(invokeModelSpan.attributes[AWS_ATTRIBUTE_KEYS.AWS_BEDROCK_KNOWLEDGE_BASE_ID]).toBeUndefined();
562+
expect(invokeModelSpan.attributes[AWS_ATTRIBUTE_KEYS.AWS_BEDROCK_DATA_SOURCE_ID]).toBeUndefined();
563+
expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_SYSTEM]).toBe('aws_bedrock');
564+
expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MODEL]).toBe(modelId);
565+
expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MAX_TOKENS]).toBe(512);
566+
expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TEMPERATURE]).toBe(0.5);
567+
expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TOP_P]).toBe(0.9);
568+
expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_USAGE_INPUT_TOKENS]).toBe(31);
569+
expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_USAGE_OUTPUT_TOKENS]).toBe(49);
570+
expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_RESPONSE_FINISH_REASONS]).toEqual(['stop']);
571+
expect(invokeModelSpan.kind).toBe(SpanKind.CLIENT);
572+
});
573+
574+
it('Add Mistral AI model attributes to span', async () => {
575+
const modelId: string = 'mistral.mistral-7b-instruct-v0:2';
576+
const prompt: string = `
577+
<s>[INST]
578+
In Bash, how do I list all text files in the current directory
579+
(excluding subdirectories) that have been modified in the last month?
580+
[/INST]
581+
`;
582+
const nativeRequest: any = {
583+
prompt: prompt,
584+
max_tokens: 4096,
585+
temperature: 0.75,
586+
top_p: 1.0,
587+
};
588+
const mockRequestBody: string = JSON.stringify(nativeRequest);
589+
const mockResponseBody: any = {
590+
outputs: [
591+
{
592+
text: 'test-output-text',
593+
stop_reason: 'stop',
594+
},
595+
],
596+
request: {
597+
commandInput: {
598+
modelId: modelId,
599+
},
600+
},
601+
};
602+
603+
nock(`https://bedrock-runtime.${region}.amazonaws.com`)
604+
.post(`/model/${encodeURIComponent(modelId)}/invoke`)
605+
.reply(200, mockResponseBody);
606+
607+
await bedrock
608+
.invokeModel({
609+
modelId: modelId,
610+
body: mockRequestBody,
611+
})
612+
.catch((err: any) => {
613+
console.log('error', err);
614+
});
615+
616+
const testSpans: ReadableSpan[] = getTestSpans();
617+
const invokeModelSpans: ReadableSpan[] = testSpans.filter((s: ReadableSpan) => {
618+
return s.name === 'BedrockRuntime.InvokeModel';
619+
});
620+
expect(invokeModelSpans.length).toBe(1);
621+
const invokeModelSpan = invokeModelSpans[0];
622+
expect(invokeModelSpan.attributes[AWS_ATTRIBUTE_KEYS.AWS_BEDROCK_AGENT_ID]).toBeUndefined();
623+
expect(invokeModelSpan.attributes[AWS_ATTRIBUTE_KEYS.AWS_BEDROCK_KNOWLEDGE_BASE_ID]).toBeUndefined();
624+
expect(invokeModelSpan.attributes[AWS_ATTRIBUTE_KEYS.AWS_BEDROCK_DATA_SOURCE_ID]).toBeUndefined();
625+
expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_SYSTEM]).toBe('aws_bedrock');
626+
expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MODEL]).toBe(modelId);
627+
expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MAX_TOKENS]).toBe(4096);
628+
expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TEMPERATURE]).toBe(0.75);
629+
expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TOP_P]).toBe(1.0);
630+
expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_USAGE_INPUT_TOKENS]).toBe(31);
631+
expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_USAGE_OUTPUT_TOKENS]).toBe(3);
632+
expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_RESPONSE_FINISH_REASONS]).toEqual(['stop']);
305633
expect(invokeModelSpan.kind).toBe(SpanKind.CLIENT);
306634
});
307635
});

0 commit comments

Comments
 (0)