Skip to content
83 changes: 83 additions & 0 deletions packages/core/src/utils/ai/messageTruncation.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
export const DEFAULT_GEN_AI_MESSAGES_BYTE_LIMIT = 20000;

export function getByteSize(str: string): number {
let bytes = 0;
for (let i = 0; i < str.length; i++) {
const code = str.charCodeAt(i);
if (code < 0x80) {
bytes += 1;
} else if (code < 0x800) {
bytes += 2;
} else if (code < 0xd800 || code >= 0xe000) {
bytes += 3;
} else {
i++;
bytes += 4;
}
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: UTF-16 Surrogate Pair Handling Flaw

The getByteSize function's surrogate pair logic increments i without validating i+1 is within bounds or that the next character is a low surrogate. This can cause out-of-bounds access or an incorrect byte count for malformed UTF-16 sequences or strings ending with a high surrogate.

Fix in Cursor Fix in Web

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's ok to keep the previous getByteSize where you use TextEncoder directly, the binary search is meant to be for:

  1. truncating strings messages
  2. truncating array of messages

e.g:

export function truncateMessagesByBytes(messages: unknown[], maxBytes: number): unknown[] {
if (!Array.isArray(messages) || messages.length === 0) {
  return messages;
}

const fullSize = getByteSize(JSON.stringify(messages));

if (fullSize <= maxBytes) {
  return messages;
}

// Binary search for the minimum startIndex where remaining messages fit (works for single or multiple messages)
let left = 0;
let right = messages.length - 1;
let bestStartIndex = messages.length;

while (left <= right) {
  const mid = Math.floor((left + right) / 2);
  const remainingMessages = messages.slice(mid);
  const remainingSize = getByteSize(JSON.stringify(remainingMessages));

  if (remainingSize <= maxBytes) {
    bestStartIndex = mid;
    right = mid - 1; // Try to keep more messages
  } else {
    // If we're down to a single message and it doesn't fit, break and handle content truncation
    if (remainingMessages.length === 1) {
      bestStartIndex = mid; // Use this single message
      break;
    }
    left = mid + 1; // Need to remove more messages
  }
}

const remainingMessages = messages.slice(bestStartIndex);

// SPECIAL CASE: Single message handling (either started with 1, or reduced to 1 after binary search)
if (remainingMessages.length === 1) {
  const singleMessage = remainingMessages[0];
  const singleMessageSize = getByteSize(JSON.stringify(singleMessage));

  // If single message fits, return it
  if (singleMessageSize <= maxBytes) {
    return remainingMessages;
  }

  // Single message is too large, try to truncate its content
  if (
    typeof singleMessage === 'object' &&
    singleMessage !== null &&
    'content' in singleMessage &&
    typeof (singleMessage as { content: unknown }).content === 'string'
  ) {
    const originalContent = (singleMessage as { content: string }).content;
    const messageWithoutContent = { ...singleMessage, content: '' };
    const otherMessagePartsSize = getByteSize(JSON.stringify(messageWithoutContent));
    const availableContentBytes = maxBytes - otherMessagePartsSize;

    if (availableContentBytes <= 0) {
      return [];
    }

    const truncatedContent = truncateStringByBytes(originalContent, availableContentBytes);
    return [{ ...singleMessage, content: truncatedContent }];
  } else {
    return [];
  }
}

// Multiple messages remain and fit within limit
return remainingMessages;
}

where truncateStringByBytes also does a quick binary search.

}
return bytes;
}

function truncateStringByBytes(str: string, maxBytes: number): string {
if (getByteSize(str) <= maxBytes) {
return str;
}

let truncatedStr = str;
while (getByteSize(truncatedStr) > maxBytes && truncatedStr.length > 0) {
truncatedStr = truncatedStr.slice(0, -1);
}
return truncatedStr;
}

export function truncateMessagesByBytes(messages: unknown[], maxBytes: number): unknown[] {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you mind adding tests for this? You can use the files under dev-packages/node-integration-tests/suites/tracing

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add JSDocs for this files?

if (!Array.isArray(messages) || messages.length === 0) {
return messages;
}

let currentSize = getByteSize(JSON.stringify(messages));

if (currentSize <= maxBytes) {
return messages;
}

let startIndex = 0;

while (startIndex < messages.length - 1 && currentSize > maxBytes) {
const messageSize = getByteSize(JSON.stringify(messages[startIndex]));
currentSize -= messageSize;
startIndex++;
}
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Truncation Bug: Incorrect Byte Size Tracking

The truncateMessagesByBytes function incorrectly tracks the total byte size. currentSize is initialized with the byte size of the full JSON.stringify(messages) array, but then individual message sizes are subtracted without accounting for the changing JSON array overhead (brackets, commas) as messages are removed. This leads to currentSize becoming inaccurate, causing incorrect truncation decisions.

Fix in Cursor Fix in Web


const remainingMessages = messages.slice(startIndex);

if (remainingMessages.length === 1) {
const singleMessage = remainingMessages[0];
const singleMessageSize = getByteSize(JSON.stringify(singleMessage));

if (singleMessageSize > maxBytes) {
if (typeof singleMessage === 'object' && singleMessage !== null && 'content' in singleMessage && typeof (singleMessage as { content: unknown }).content === 'string') {
const originalContent = (singleMessage as { content: string }).content;
const messageWithoutContent = { ...singleMessage, content: '' };
const otherMessagePartsSize = getByteSize(JSON.stringify(messageWithoutContent));
const availableContentBytes = maxBytes - otherMessagePartsSize;

if (availableContentBytes <= 0) {
return [];
}

const truncatedContent = truncateStringByBytes(originalContent, availableContentBytes);
return [{ ...singleMessage, content: truncatedContent }];
} else {
return [];
}
}
}

return remainingMessages;
}


export function truncateGenAiMessages(messages: unknown[]): unknown[] {
return truncateMessagesByBytes(messages, DEFAULT_GEN_AI_MESSAGES_BYTE_LIMIT);
}
21 changes: 15 additions & 6 deletions packages/core/src/utils/anthropic-ai/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import {
GEN_AI_RESPONSE_TOOL_CALLS_ATTRIBUTE,
GEN_AI_SYSTEM_ATTRIBUTE,
} from '../ai/gen-ai-attributes';
import { truncateGenAiMessages } from '../ai/messageTruncation';
import { buildMethodPath, getFinalOperationName, getSpanOperation, setTokenUsageAttributes } from '../ai/utils';
import { handleCallbackErrors } from '../handleCallbackErrors';
import { instrumentAsyncIterableStream, instrumentMessageStream } from './streaming';
Expand Down Expand Up @@ -71,16 +72,24 @@ function extractRequestAttributes(args: unknown[], methodPath: string): Record<s
return attributes;
}

/**
* Add private request attributes to spans.
* This is only recorded if recordInputs is true.
*/
function addPrivateRequestAttributes(span: Span, params: Record<string, unknown>): void {
if ('messages' in params) {
span.setAttributes({ [GEN_AI_REQUEST_MESSAGES_ATTRIBUTE]: JSON.stringify(params.messages) });
const messages = params.messages;
if (Array.isArray(messages)) {
const truncatedMessages = truncateGenAiMessages(messages);
span.setAttributes({ [GEN_AI_REQUEST_MESSAGES_ATTRIBUTE]: JSON.stringify(truncatedMessages) });
} else {
span.setAttributes({ [GEN_AI_REQUEST_MESSAGES_ATTRIBUTE]: JSON.stringify(messages) });
}
}
if ('input' in params) {
span.setAttributes({ [GEN_AI_REQUEST_MESSAGES_ATTRIBUTE]: JSON.stringify(params.input) });
const input = params.input;
if (Array.isArray(input)) {
const truncatedInput = truncateGenAiMessages(input);
span.setAttributes({ [GEN_AI_REQUEST_MESSAGES_ATTRIBUTE]: JSON.stringify(truncatedInput) });
} else {
span.setAttributes({ [GEN_AI_REQUEST_MESSAGES_ATTRIBUTE]: JSON.stringify(input) });
}
}
if ('prompt' in params) {
span.setAttributes({ [GEN_AI_PROMPT_ATTRIBUTE]: JSON.stringify(params.prompt) });
Expand Down
33 changes: 22 additions & 11 deletions packages/core/src/utils/google-genai/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import {
GEN_AI_USAGE_OUTPUT_TOKENS_ATTRIBUTE,
GEN_AI_USAGE_TOTAL_TOKENS_ATTRIBUTE,
} from '../ai/gen-ai-attributes';
import { truncateGenAiMessages } from '../ai/messageTruncation';
import { buildMethodPath, getFinalOperationName, getSpanOperation } from '../ai/utils';
import { handleCallbackErrors } from '../handleCallbackErrors';
import { CHAT_PATH, CHATS_CREATE_METHOD, GOOGLE_GENAI_SYSTEM_NAME } from './constants';
Expand Down Expand Up @@ -128,25 +129,35 @@ function extractRequestAttributes(
return attributes;
}

/**
* Add private request attributes to spans.
* This is only recorded if recordInputs is true.
* Handles different parameter formats for different Google GenAI methods.
*/
function addPrivateRequestAttributes(span: Span, params: Record<string, unknown>): void {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you also revert back this JSDoc comment?

// For models.generateContent: ContentListUnion: Content | Content[] | PartUnion | PartUnion[]
if ('contents' in params) {
span.setAttributes({ [GEN_AI_REQUEST_MESSAGES_ATTRIBUTE]: JSON.stringify(params.contents) });
const contents = params.contents;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you revert the comment removal to help others understand the request structure? this could also be a string

if (Array.isArray(contents)) {
const truncatedContents = truncateGenAiMessages(contents);
span.setAttributes({ [GEN_AI_REQUEST_MESSAGES_ATTRIBUTE]: JSON.stringify(truncatedContents) });
} else {
span.setAttributes({ [GEN_AI_REQUEST_MESSAGES_ATTRIBUTE]: JSON.stringify(contents) });
}
}

// For chat.sendMessage: message can be string or Part[]
if ('message' in params) {
span.setAttributes({ [GEN_AI_REQUEST_MESSAGES_ATTRIBUTE]: JSON.stringify(params.message) });
const message = params.message;
if (Array.isArray(message)) {
const truncatedMessage = truncateGenAiMessages(message);
span.setAttributes({ [GEN_AI_REQUEST_MESSAGES_ATTRIBUTE]: JSON.stringify(truncatedMessage) });
} else {
span.setAttributes({ [GEN_AI_REQUEST_MESSAGES_ATTRIBUTE]: JSON.stringify(message) });
}
}

// For chats.create: history contains the conversation history
if ('history' in params) {
span.setAttributes({ [GEN_AI_REQUEST_MESSAGES_ATTRIBUTE]: JSON.stringify(params.history) });
const history = params.history;
if (Array.isArray(history)) {
const truncatedHistory = truncateGenAiMessages(history);
span.setAttributes({ [GEN_AI_REQUEST_MESSAGES_ATTRIBUTE]: JSON.stringify(truncatedHistory) });
} else {
span.setAttributes({ [GEN_AI_REQUEST_MESSAGES_ATTRIBUTE]: JSON.stringify(history) });
}
}
}

Expand Down
18 changes: 15 additions & 3 deletions packages/core/src/utils/openai/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import {
GEN_AI_RESPONSE_TOOL_CALLS_ATTRIBUTE,
GEN_AI_SYSTEM_ATTRIBUTE,
} from '../ai/gen-ai-attributes';
import { truncateGenAiMessages } from '../ai/messageTruncation';
import { OPENAI_INTEGRATION_NAME } from './constants';
import { instrumentStream } from './streaming';
import type {
Expand Down Expand Up @@ -188,13 +189,24 @@ function addResponseAttributes(span: Span, result: unknown, recordOutputs?: bool
}
}

// Extract and record AI request inputs, if present. This is intentionally separate from response attributes.
function addRequestAttributes(span: Span, params: Record<string, unknown>): void {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you also revert back this JSDoc comment?

if ('messages' in params) {
span.setAttributes({ [GEN_AI_REQUEST_MESSAGES_ATTRIBUTE]: JSON.stringify(params.messages) });
const messages = params.messages;
if (Array.isArray(messages)) {
const truncatedMessages = truncateGenAiMessages(messages);
span.setAttributes({ [GEN_AI_REQUEST_MESSAGES_ATTRIBUTE]: JSON.stringify(truncatedMessages) });
} else {
span.setAttributes({ [GEN_AI_REQUEST_MESSAGES_ATTRIBUTE]: JSON.stringify(messages) });
}
}
if ('input' in params) {
span.setAttributes({ [GEN_AI_REQUEST_MESSAGES_ATTRIBUTE]: JSON.stringify(params.input) });
const input = params.input;
if (Array.isArray(input)) {
const truncatedInput = truncateGenAiMessages(input);
span.setAttributes({ [GEN_AI_REQUEST_MESSAGES_ATTRIBUTE]: JSON.stringify(truncatedInput) });
} else {
span.setAttributes({ [GEN_AI_REQUEST_MESSAGES_ATTRIBUTE]: JSON.stringify(input) });
}
}
}

Expand Down
9 changes: 8 additions & 1 deletion packages/core/src/utils/vercel-ai/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { SEMANTIC_ATTRIBUTE_SENTRY_OP, SEMANTIC_ATTRIBUTE_SENTRY_ORIGIN } from '
import type { Event } from '../../types-hoist/event';
import type { Span, SpanAttributes, SpanAttributeValue, SpanJSON, SpanOrigin } from '../../types-hoist/span';
import { spanToJSON } from '../spanUtils';
import { truncateGenAiMessages } from '../ai/messageTruncation';
import { toolCallSpanMap } from './constants';
import type { TokenSummary } from './types';
import { accumulateTokensForParent, applyAccumulatedTokens } from './utils';
Expand Down Expand Up @@ -190,7 +191,13 @@ function processGenerateSpan(span: Span, name: string, attributes: SpanAttribute
}

if (attributes[AI_PROMPT_ATTRIBUTE]) {
span.setAttribute('gen_ai.prompt', attributes[AI_PROMPT_ATTRIBUTE]);
const prompt = attributes[AI_PROMPT_ATTRIBUTE];
if (Array.isArray(prompt)) {
const truncatedPrompt = truncateGenAiMessages(prompt);
span.setAttribute('gen_ai.prompt', JSON.stringify(truncatedPrompt));
} else {
span.setAttribute('gen_ai.prompt', prompt);
}
}
if (attributes[AI_MODEL_ID_ATTRIBUTE] && !attributes[GEN_AI_RESPONSE_MODEL_ATTRIBUTE]) {
span.setAttribute(GEN_AI_RESPONSE_MODEL_ATTRIBUTE, attributes[AI_MODEL_ID_ATTRIBUTE]);
Expand Down