Skip to content

Commit e8c96ae

Browse files
authored
feat: set up gen ai inference attributes for foundational text models (#111)
*Description of changes:* Adding auto-instrumentation support for GenAI inference parameters. The following foundational text models are supported: - AI21 Jamba - Amazon Titan - Anthropic Claude - Cohere Command - Meta Llama - Mistral AI Full list can be found [here](https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters.html). Note, we do not support Stability AI models at this time since they are focused on text to image. New inference parameter attributes added according to OpenTelemetry Semantic Conventions for [GenAI attributes](https://github.com/open-telemetry/semantic-conventions/blob/main/docs/gen-ai/gen-ai-spans.md#genai-attributes): - `gen_ai.request.max_tokens` - `gen_ai.request.temperature` - `gen_ai.request.top_p` - `gen_ai.response.finish_reasons` - `gen_ai.usage.input_tokens` - `gen_ai.usage.output_tokens` *Test Plan:* Set up sample app to make Bedrock Runtime `InvokeModel` API calls to the supported foundational models and verified the auto-instrumentation attributes. ![ai21-jamba](https://github.com/user-attachments/assets/83ef736c-5a49-41ce-951d-cc1d04ed92a8) ![amazon-titan](https://github.com/user-attachments/assets/bcd7d6b0-207f-43e6-aae8-13d99532be53) ![anthropic-claude](https://github.com/user-attachments/assets/747409e5-5cb6-489a-81c5-1d8299cee5c5) ![cohere-command](https://github.com/user-attachments/assets/b2f726d4-e053-4762-9d96-f187c549fe90) ![meta-llama](https://github.com/user-attachments/assets/5b982aa1-6ef4-4c65-a58a-d2c24cb57777) ![mistral-ai](https://github.com/user-attachments/assets/9a01df6f-27b6-43b3-94cf-6731686e495e) By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice.
1 parent 425264c commit e8c96ae

File tree

3 files changed

+505
-9
lines changed

3 files changed

+505
-9
lines changed

aws-distro-opentelemetry-node-autoinstrumentation/src/aws-span-processing-util.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,12 @@ export class AwsSpanProcessingUtil {
4747
// TODO: Use Semantic Conventions once upgraded
4848
static GEN_AI_REQUEST_MODEL: string = 'gen_ai.request.model';
4949
static GEN_AI_SYSTEM: string = 'gen_ai.system';
50+
static GEN_AI_REQUEST_MAX_TOKENS: string = 'gen_ai.request.max_tokens';
51+
static GEN_AI_REQUEST_TEMPERATURE: string = 'gen_ai.request.temperature';
52+
static GEN_AI_REQUEST_TOP_P: string = 'gen_ai.request.top_p';
53+
static GEN_AI_RESPONSE_FINISH_REASONS: string = 'gen_ai.response.finish_reasons';
54+
static GEN_AI_USAGE_INPUT_TOKENS: string = 'gen_ai.usage.input_tokens';
55+
static GEN_AI_USAGE_OUTPUT_TOKENS: string = 'gen_ai.usage.output_tokens';
5056

5157
static getDialectKeywords(): string[] {
5258
return SQL_DIALECT_KEYWORDS_JSON.keywords;

aws-distro-opentelemetry-node-autoinstrumentation/src/patches/aws/services/bedrock.ts

Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,11 +211,173 @@ export class BedrockRuntimeServiceExtension implements ServiceExtension {
211211
spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MODEL] = modelId;
212212
}
213213

214+
if (request.commandInput?.body) {
215+
const requestBody = JSON.parse(request.commandInput.body);
216+
if (modelId.includes('amazon.titan')) {
217+
if (requestBody.textGenerationConfig?.temperature !== undefined) {
218+
spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TEMPERATURE] =
219+
requestBody.textGenerationConfig.temperature;
220+
}
221+
if (requestBody.textGenerationConfig?.topP !== undefined) {
222+
spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TOP_P] = requestBody.textGenerationConfig.topP;
223+
}
224+
if (requestBody.textGenerationConfig?.maxTokenCount !== undefined) {
225+
spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MAX_TOKENS] =
226+
requestBody.textGenerationConfig.maxTokenCount;
227+
}
228+
} else if (modelId.includes('anthropic.claude')) {
229+
if (requestBody.max_tokens !== undefined) {
230+
spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MAX_TOKENS] = requestBody.max_tokens;
231+
}
232+
if (requestBody.temperature !== undefined) {
233+
spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TEMPERATURE] = requestBody.temperature;
234+
}
235+
if (requestBody.top_p !== undefined) {
236+
spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TOP_P] = requestBody.top_p;
237+
}
238+
} else if (modelId.includes('meta.llama')) {
239+
if (requestBody.max_gen_len !== undefined) {
240+
spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MAX_TOKENS] = requestBody.max_gen_len;
241+
}
242+
if (requestBody.temperature !== undefined) {
243+
spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TEMPERATURE] = requestBody.temperature;
244+
}
245+
if (requestBody.top_p !== undefined) {
246+
spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TOP_P] = requestBody.top_p;
247+
}
248+
} else if (modelId.includes('cohere.command')) {
249+
if (requestBody.max_tokens !== undefined) {
250+
spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MAX_TOKENS] = requestBody.max_tokens;
251+
}
252+
if (requestBody.temperature !== undefined) {
253+
spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TEMPERATURE] = requestBody.temperature;
254+
}
255+
if (requestBody.p !== undefined) {
256+
spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TOP_P] = requestBody.p;
257+
}
258+
} else if (modelId.includes('ai21.jamba')) {
259+
if (requestBody.max_tokens !== undefined) {
260+
spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MAX_TOKENS] = requestBody.max_tokens;
261+
}
262+
if (requestBody.temperature !== undefined) {
263+
spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TEMPERATURE] = requestBody.temperature;
264+
}
265+
if (requestBody.top_p !== undefined) {
266+
spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TOP_P] = requestBody.top_p;
267+
}
268+
} else if (modelId.includes('mistral.mistral')) {
269+
if (requestBody.prompt !== undefined) {
270+
// NOTE: We approximate the token count since this value is not directly available in the body
271+
// According to Bedrock docs they use (total_chars / 6) to approximate token count for pricing.
272+
// https://docs.aws.amazon.com/bedrock/latest/userguide/model-customization-prepare.html
273+
spanAttributes[AwsSpanProcessingUtil.GEN_AI_USAGE_INPUT_TOKENS] = Math.ceil(requestBody.prompt.length / 6);
274+
}
275+
if (requestBody.max_tokens !== undefined) {
276+
spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MAX_TOKENS] = requestBody.max_tokens;
277+
}
278+
if (requestBody.temperature !== undefined) {
279+
spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TEMPERATURE] = requestBody.temperature;
280+
}
281+
if (requestBody.top_p !== undefined) {
282+
spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TOP_P] = requestBody.top_p;
283+
}
284+
}
285+
}
286+
214287
return {
215288
isIncoming,
216289
spanAttributes,
217290
spanKind,
218291
spanName,
219292
};
220293
}
294+
295+
responseHook(response: NormalizedResponse, span: Span, tracer: Tracer, config: AwsSdkInstrumentationConfig): void {
296+
const currentModelId = response.request.commandInput?.modelId;
297+
if (response.data?.body) {
298+
const decodedResponseBody = new TextDecoder().decode(response.data.body);
299+
const responseBody = JSON.parse(decodedResponseBody);
300+
if (currentModelId.includes('amazon.titan')) {
301+
if (responseBody.inputTextTokenCount !== undefined) {
302+
span.setAttribute(AwsSpanProcessingUtil.GEN_AI_USAGE_INPUT_TOKENS, responseBody.inputTextTokenCount);
303+
}
304+
if (responseBody.results?.[0]?.tokenCount !== undefined) {
305+
span.setAttribute(AwsSpanProcessingUtil.GEN_AI_USAGE_OUTPUT_TOKENS, responseBody.results[0].tokenCount);
306+
}
307+
if (responseBody.results?.[0]?.completionReason !== undefined) {
308+
span.setAttribute(AwsSpanProcessingUtil.GEN_AI_RESPONSE_FINISH_REASONS, [
309+
responseBody.results[0].completionReason,
310+
]);
311+
}
312+
} else if (currentModelId.includes('anthropic.claude')) {
313+
if (responseBody.usage?.input_tokens !== undefined) {
314+
span.setAttribute(AwsSpanProcessingUtil.GEN_AI_USAGE_INPUT_TOKENS, responseBody.usage.input_tokens);
315+
}
316+
if (responseBody.usage?.output_tokens !== undefined) {
317+
span.setAttribute(AwsSpanProcessingUtil.GEN_AI_USAGE_OUTPUT_TOKENS, responseBody.usage.output_tokens);
318+
}
319+
if (responseBody.stop_reason !== undefined) {
320+
span.setAttribute(AwsSpanProcessingUtil.GEN_AI_RESPONSE_FINISH_REASONS, [responseBody.stop_reason]);
321+
}
322+
} else if (currentModelId.includes('meta.llama')) {
323+
if (responseBody.prompt_token_count !== undefined) {
324+
span.setAttribute(AwsSpanProcessingUtil.GEN_AI_USAGE_INPUT_TOKENS, responseBody.prompt_token_count);
325+
}
326+
if (responseBody.generation_token_count !== undefined) {
327+
span.setAttribute(AwsSpanProcessingUtil.GEN_AI_USAGE_OUTPUT_TOKENS, responseBody.generation_token_count);
328+
}
329+
if (responseBody.stop_reason !== undefined) {
330+
span.setAttribute(AwsSpanProcessingUtil.GEN_AI_RESPONSE_FINISH_REASONS, [responseBody.stop_reason]);
331+
}
332+
} else if (currentModelId.includes('cohere.command')) {
333+
if (responseBody.prompt !== undefined) {
334+
// NOTE: We approximate the token count since this value is not directly available in the body
335+
// According to Bedrock docs they use (total_chars / 6) to approximate token count for pricing.
336+
// https://docs.aws.amazon.com/bedrock/latest/userguide/model-customization-prepare.html
337+
span.setAttribute(AwsSpanProcessingUtil.GEN_AI_USAGE_INPUT_TOKENS, Math.ceil(responseBody.prompt.length / 6));
338+
}
339+
if (responseBody.generations?.[0]?.text !== undefined) {
340+
span.setAttribute(
341+
AwsSpanProcessingUtil.GEN_AI_USAGE_OUTPUT_TOKENS,
342+
// NOTE: We approximate the token count since this value is not directly available in the body
343+
// According to Bedrock docs they use (total_chars / 6) to approximate token count for pricing.
344+
// https://docs.aws.amazon.com/bedrock/latest/userguide/model-customization-prepare.html
345+
Math.ceil(responseBody.generations[0].text.length / 6)
346+
);
347+
}
348+
if (responseBody.generations?.[0]?.finish_reason !== undefined) {
349+
span.setAttribute(AwsSpanProcessingUtil.GEN_AI_RESPONSE_FINISH_REASONS, [
350+
responseBody.generations[0].finish_reason,
351+
]);
352+
}
353+
} else if (currentModelId.includes('ai21.jamba')) {
354+
if (responseBody.usage?.prompt_tokens !== undefined) {
355+
span.setAttribute(AwsSpanProcessingUtil.GEN_AI_USAGE_INPUT_TOKENS, responseBody.usage.prompt_tokens);
356+
}
357+
if (responseBody.usage?.completion_tokens !== undefined) {
358+
span.setAttribute(AwsSpanProcessingUtil.GEN_AI_USAGE_OUTPUT_TOKENS, responseBody.usage.completion_tokens);
359+
}
360+
if (responseBody.choices?.[0]?.finish_reason !== undefined) {
361+
span.setAttribute(AwsSpanProcessingUtil.GEN_AI_RESPONSE_FINISH_REASONS, [
362+
responseBody.choices[0].finish_reason,
363+
]);
364+
}
365+
} else if (currentModelId.includes('mistral.mistral')) {
366+
if (responseBody.outputs?.[0]?.text !== undefined) {
367+
span.setAttribute(
368+
AwsSpanProcessingUtil.GEN_AI_USAGE_OUTPUT_TOKENS,
369+
// NOTE: We approximate the token count since this value is not directly available in the body
370+
// According to Bedrock docs they use (total_chars / 6) to approximate token count for pricing.
371+
// https://docs.aws.amazon.com/bedrock/latest/userguide/model-customization-prepare.html
372+
Math.ceil(responseBody.outputs[0].text.length / 6)
373+
);
374+
}
375+
if (responseBody.outputs?.[0]?.stop_reason !== undefined) {
376+
span.setAttribute(AwsSpanProcessingUtil.GEN_AI_RESPONSE_FINISH_REASONS, [
377+
responseBody.outputs[0].stop_reason,
378+
]);
379+
}
380+
}
381+
}
382+
}
221383
}

0 commit comments

Comments
 (0)