diff --git a/packages/core/src/integrations/mcp-server/mediaFiltering.ts b/packages/core/src/integrations/mcp-server/mediaFiltering.ts new file mode 100644 index 000000000000..14768b879d7a --- /dev/null +++ b/packages/core/src/integrations/mcp-server/mediaFiltering.ts @@ -0,0 +1,123 @@ +import { getClient } from '../../currentScopes'; +import { isValidContentItem } from './validation'; + +const MEDIA_MIME_TYPES = new Set([ + 'image/jpeg', + 'image/jpg', + 'image/png', + 'image/gif', + 'image/webp', + 'image/svg+xml', + 'image/bmp', + 'image/tiff', + 'video/mp4', + 'video/avi', + 'video/mov', + 'video/wmv', + 'video/flv', + 'video/webm', + 'video/mkv', + 'audio/mp3', + 'audio/wav', + 'audio/ogg', + 'audio/mpeg', + 'audio/aac', + 'audio/flac', + 'application/pdf', + 'application/zip', + 'application/x-zip-compressed', +]); + +function isMediaContent(item: unknown): boolean { + if (!isValidContentItem(item)) { + return false; + } + + if (typeof item.type === 'string' && item.type === 'image') { + return true; + } + + if (typeof item.mimeType === 'string' && MEDIA_MIME_TYPES.has(item.mimeType.toLowerCase())) { + return true; + } + + if (typeof item.data === 'string' && item.data.length > 1000) { + const dataStart = item.data.substring(0, 50).toLowerCase(); + if (dataStart.includes('data:image/') || dataStart.includes('/9j/') || dataStart.includes('iVBORw0KGgo')) { + return true; + } + } + + return false; +} + +function recordDroppedMedia(reason: string, count: number = 1): void { + const client = getClient(); + if (client) { + client.recordDroppedEvent(reason as any, 'attachment', count); + } +} + +export function filterMediaFromContentItem(item: unknown): unknown | null { + if (!isValidContentItem(item)) { + return item; + } + + if (isMediaContent(item)) { + recordDroppedMedia('media_content_dropped'); + return null; + } + + if (Array.isArray(item.content)) { + const filteredContent = item.content + .map(contentItem => { + if (isMediaContent(contentItem)) { + recordDroppedMedia('media_content_dropped'); + return null; + } + return contentItem; + }) + .filter(contentItem => contentItem !== null); + + if (filteredContent.length === 0) { + return null; + } + + return { + ...item, + content: filteredContent, + }; + } + + if (isValidContentItem(item.content) && isMediaContent(item.content)) { + recordDroppedMedia('media_content_dropped'); + return null; + } + + return item; +} + +export function filterMediaFromContentArray(content: unknown[]): unknown[] { + return content + .map(item => filterMediaFromContentItem(item)) + .filter(item => item !== null); +} + +export function filterMediaFromAttributes(attributes: Record): Record { + const filteredAttributes = { ...attributes }; + + for (const [key, value] of Object.entries(filteredAttributes)) { + if (Array.isArray(value)) { + if (value.length > 0 && value.some(item => isValidContentItem(item))) { + const filtered = filterMediaFromContentArray(value); + if (filtered.length === 0) { + delete filteredAttributes[key]; + } else { + filteredAttributes[key] = filtered; + } + } + } + } + + return filteredAttributes; +} diff --git a/packages/core/src/integrations/mcp-server/resultExtraction.ts b/packages/core/src/integrations/mcp-server/resultExtraction.ts index 34dc2be9d09c..edf4f9722ff9 100644 --- a/packages/core/src/integrations/mcp-server/resultExtraction.ts +++ b/packages/core/src/integrations/mcp-server/resultExtraction.ts @@ -12,11 +12,6 @@ import { } from './attributes'; import { isValidContentItem } from './validation'; -/** - * Build attributes for tool result content items - * @param content - Array of content items from tool result - * @returns Attributes extracted from each content item including type, text, mime type, URI, and resource info - */ function buildAllContentItemAttributes(content: unknown[]): Record { const attributes: Record = { [MCP_TOOL_RESULT_CONTENT_COUNT_ATTRIBUTE]: content.length, @@ -115,7 +110,7 @@ export function extractPromptResultAttributes(result: unknown): Record; + + if (typeof obj.type === 'string' && (obj.type === 'image' || obj.type === 'image_url')) { + return true; + } + + if (typeof obj.mime_type === 'string' && MEDIA_MIME_TYPES.has(obj.mime_type.toLowerCase())) { + return true; + } + + if (typeof obj.mimeType === 'string' && MEDIA_MIME_TYPES.has(obj.mimeType.toLowerCase())) { + return true; + } + + if (typeof obj.data === 'string' && obj.data.length > 1000) { + const dataStart = obj.data.substring(0, 50).toLowerCase(); + if (dataStart.includes('data:image/') || dataStart.includes('/9j/') || dataStart.includes('ivborw0kggo')) { + return true; + } + } + + if (typeof obj.source === 'object' && obj.source !== null) { + const source = obj.source as Record; + if (typeof source.type === 'string' && source.type === 'base64' && typeof source.data === 'string') { + return true; + } + } + + return false; +} + +function recordDroppedMedia(count: number = 1): void { + const client = getClient(); + if (client) { + client.recordDroppedEvent('media_content_dropped' as any, 'attachment', count); + } +} + +export function filterMediaFromMessages(messages: unknown): unknown { + if (!Array.isArray(messages)) { + return messages; + } + + let droppedCount = 0; + + const filtered = messages.map(message => { + if (typeof message !== 'object' || message === null) { + return message; + } + + const msg = message as Record; + + if (Array.isArray(msg.content)) { + const filteredContent = msg.content.filter(item => { + if (isMediaContent(item)) { + droppedCount++; + return false; + } + return true; + }); + + if (filteredContent.length === 0) { + return { ...msg, content: '' }; + } + + return { ...msg, content: filteredContent }; + } + + if (isMediaContent(msg.content)) { + droppedCount++; + return { ...msg, content: '' }; + } + + return message; + }); + + if (droppedCount > 0) { + recordDroppedMedia(droppedCount); + } + + return filtered; +} + diff --git a/packages/core/src/utils/anthropic-ai/index.ts b/packages/core/src/utils/anthropic-ai/index.ts index 8e77dd76b34e..0907205bdba5 100644 --- a/packages/core/src/utils/anthropic-ai/index.ts +++ b/packages/core/src/utils/anthropic-ai/index.ts @@ -23,6 +23,7 @@ import { GEN_AI_RESPONSE_TOOL_CALLS_ATTRIBUTE, GEN_AI_SYSTEM_ATTRIBUTE, } from '../ai/gen-ai-attributes'; +import { filterMediaFromMessages } from '../ai/mediaFiltering'; import { buildMethodPath, getFinalOperationName, getSpanOperation, setTokenUsageAttributes } from '../ai/utils'; import { handleCallbackErrors } from '../handleCallbackErrors'; import { instrumentAsyncIterableStream, instrumentMessageStream } from './streaming'; @@ -71,16 +72,14 @@ function extractRequestAttributes(args: unknown[], methodPath: string): Record): void { if ('messages' in params) { - span.setAttributes({ [GEN_AI_REQUEST_MESSAGES_ATTRIBUTE]: JSON.stringify(params.messages) }); + const filtered = filterMediaFromMessages(params.messages); + span.setAttributes({ [GEN_AI_REQUEST_MESSAGES_ATTRIBUTE]: JSON.stringify(filtered) }); } if ('input' in params) { - span.setAttributes({ [GEN_AI_REQUEST_MESSAGES_ATTRIBUTE]: JSON.stringify(params.input) }); + const filtered = filterMediaFromMessages(params.input); + span.setAttributes({ [GEN_AI_REQUEST_MESSAGES_ATTRIBUTE]: JSON.stringify(filtered) }); } if ('prompt' in params) { span.setAttributes({ [GEN_AI_PROMPT_ATTRIBUTE]: JSON.stringify(params.prompt) }); diff --git a/packages/core/src/utils/google-genai/index.ts b/packages/core/src/utils/google-genai/index.ts index 20e6e2a53606..038f688fdf13 100644 --- a/packages/core/src/utils/google-genai/index.ts +++ b/packages/core/src/utils/google-genai/index.ts @@ -22,6 +22,7 @@ import { GEN_AI_USAGE_OUTPUT_TOKENS_ATTRIBUTE, GEN_AI_USAGE_TOTAL_TOKENS_ATTRIBUTE, } from '../ai/gen-ai-attributes'; +import { filterMediaFromMessages } from '../ai/mediaFiltering'; import { buildMethodPath, getFinalOperationName, getSpanOperation } from '../ai/utils'; import { handleCallbackErrors } from '../handleCallbackErrors'; import { CHAT_PATH, CHATS_CREATE_METHOD, GOOGLE_GENAI_SYSTEM_NAME } from './constants'; @@ -128,25 +129,20 @@ function extractRequestAttributes( return attributes; } -/** - * Add private request attributes to spans. - * This is only recorded if recordInputs is true. - * Handles different parameter formats for different Google GenAI methods. - */ function addPrivateRequestAttributes(span: Span, params: Record): void { - // For models.generateContent: ContentListUnion: Content | Content[] | PartUnion | PartUnion[] if ('contents' in params) { - span.setAttributes({ [GEN_AI_REQUEST_MESSAGES_ATTRIBUTE]: JSON.stringify(params.contents) }); + const filtered = filterMediaFromMessages(params.contents); + span.setAttributes({ [GEN_AI_REQUEST_MESSAGES_ATTRIBUTE]: JSON.stringify(filtered) }); } - // For chat.sendMessage: message can be string or Part[] if ('message' in params) { - span.setAttributes({ [GEN_AI_REQUEST_MESSAGES_ATTRIBUTE]: JSON.stringify(params.message) }); + const filtered = filterMediaFromMessages(params.message); + span.setAttributes({ [GEN_AI_REQUEST_MESSAGES_ATTRIBUTE]: JSON.stringify(filtered) }); } - // For chats.create: history contains the conversation history if ('history' in params) { - span.setAttributes({ [GEN_AI_REQUEST_MESSAGES_ATTRIBUTE]: JSON.stringify(params.history) }); + const filtered = filterMediaFromMessages(params.history); + span.setAttributes({ [GEN_AI_REQUEST_MESSAGES_ATTRIBUTE]: JSON.stringify(filtered) }); } } diff --git a/packages/core/src/utils/openai/index.ts b/packages/core/src/utils/openai/index.ts index 4ecfad625062..291ea2018a11 100644 --- a/packages/core/src/utils/openai/index.ts +++ b/packages/core/src/utils/openai/index.ts @@ -19,6 +19,7 @@ import { GEN_AI_RESPONSE_TOOL_CALLS_ATTRIBUTE, GEN_AI_SYSTEM_ATTRIBUTE, } from '../ai/gen-ai-attributes'; +import { filterMediaFromMessages } from '../ai/mediaFiltering'; import { OPENAI_INTEGRATION_NAME } from './constants'; import { instrumentStream } from './streaming'; import type { @@ -188,13 +189,14 @@ function addResponseAttributes(span: Span, result: unknown, recordOutputs?: bool } } -// Extract and record AI request inputs, if present. This is intentionally separate from response attributes. function addRequestAttributes(span: Span, params: Record): void { if ('messages' in params) { - span.setAttributes({ [GEN_AI_REQUEST_MESSAGES_ATTRIBUTE]: JSON.stringify(params.messages) }); + const filtered = filterMediaFromMessages(params.messages); + span.setAttributes({ [GEN_AI_REQUEST_MESSAGES_ATTRIBUTE]: JSON.stringify(filtered) }); } if ('input' in params) { - span.setAttributes({ [GEN_AI_REQUEST_MESSAGES_ATTRIBUTE]: JSON.stringify(params.input) }); + const filtered = filterMediaFromMessages(params.input); + span.setAttributes({ [GEN_AI_REQUEST_MESSAGES_ATTRIBUTE]: JSON.stringify(filtered) }); } } diff --git a/packages/core/src/utils/vercel-ai/index.ts b/packages/core/src/utils/vercel-ai/index.ts index 9b1cc2bc8aae..74faac33edc5 100644 --- a/packages/core/src/utils/vercel-ai/index.ts +++ b/packages/core/src/utils/vercel-ai/index.ts @@ -2,6 +2,7 @@ import type { Client } from '../../client'; import { SEMANTIC_ATTRIBUTE_SENTRY_OP, SEMANTIC_ATTRIBUTE_SENTRY_ORIGIN } from '../../semanticAttributes'; import type { Event } from '../../types-hoist/event'; import type { Span, SpanAttributes, SpanAttributeValue, SpanJSON, SpanOrigin } from '../../types-hoist/span'; +import { filterMediaFromMessages } from '../ai/mediaFiltering'; import { spanToJSON } from '../spanUtils'; import { toolCallSpanMap } from './constants'; import type { TokenSummary } from './types'; @@ -116,7 +117,16 @@ function processEndedVercelAiSpan(span: SpanJSON): void { attributes[GEN_AI_USAGE_OUTPUT_TOKENS_ATTRIBUTE] + attributes[GEN_AI_USAGE_INPUT_TOKENS_ATTRIBUTE]; } - // Rename AI SDK attributes to standardized gen_ai attributes + if (attributes[AI_PROMPT_MESSAGES_ATTRIBUTE]) { + try { + const messages = JSON.parse(String(attributes[AI_PROMPT_MESSAGES_ATTRIBUTE])); + const filtered = filterMediaFromMessages(messages); + attributes[AI_PROMPT_MESSAGES_ATTRIBUTE] = JSON.stringify(filtered); + } catch { + // noop + } + } + renameAttributeKey(attributes, AI_PROMPT_MESSAGES_ATTRIBUTE, 'gen_ai.request.messages'); renameAttributeKey(attributes, AI_RESPONSE_TEXT_ATTRIBUTE, 'gen_ai.response.text'); renameAttributeKey(attributes, AI_RESPONSE_TOOL_CALLS_ATTRIBUTE, 'gen_ai.response.tool_calls');