refactor tests, and add more docs

RulaKhaled · RulaKhaled · commit 4dd32e8983be · 2025-07-28T12:54:40.000+02:00
diff --git a/dev-packages/node-integration-tests/suites/tracing/openai/scenario.mjs b/dev-packages/node-integration-tests/suites/tracing/openai/scenario.mjs
@@ -61,11 +61,11 @@ class MockOpenAI {
         return {
           id: 'resp_mock456',
           object: 'response',
-          created: 1677652290,
+          created_at: 1677652290,
           model: params.model,
           input_text: params.input,
           output_text: `Response to: ${params.input}`,
-          finish_reason: 'stop',
+          status: 'completed',
           usage: {
             input_tokens: 5,
             output_tokens: 8,
@@ -260,7 +260,7 @@ async function run() {
       instructions: 'You are a translator',
     });
 
-    // Third test: error handling
+    // Third test: error handling in chat completions
     try {
       await client.chat.completions.create({
         model: 'error-model',
diff --git a/dev-packages/node-integration-tests/suites/tracing/openai/test.ts b/dev-packages/node-integration-tests/suites/tracing/openai/test.ts
@@ -45,11 +45,13 @@ describe('OpenAI integration', () => {
           'gen_ai.request.model': 'gpt-3.5-turbo',
           'gen_ai.response.model': 'gpt-3.5-turbo',
           'gen_ai.response.id': 'resp_mock456',
+          'gen_ai.response.finish_reasons': '["completed"]',
           'gen_ai.usage.input_tokens': 5,
           'gen_ai.usage.output_tokens': 8,
           'gen_ai.usage.total_tokens': 13,
           'openai.response.id': 'resp_mock456',
           'openai.response.model': 'gpt-3.5-turbo',
+          'openai.response.timestamp': '2023-03-01T06:31:30.000Z',
           'openai.usage.completion_tokens': 8,
           'openai.usage.prompt_tokens': 5,
         },
@@ -90,6 +92,7 @@ describe('OpenAI integration', () => {
           'gen_ai.usage.total_tokens': 30,
           'openai.response.id': 'chatcmpl-stream-123',
           'openai.response.model': 'gpt-4',
+          'openai.response.stream': true,
           'openai.response.timestamp': '2023-03-01T06:31:40.000Z',
           'openai.usage.completion_tokens': 18,
           'openai.usage.prompt_tokens': 12,
@@ -110,21 +113,37 @@ describe('OpenAI integration', () => {
           'gen_ai.request.stream': true,
           'gen_ai.response.model': 'gpt-4',
           'gen_ai.response.id': 'resp_stream_456',
-          'gen_ai.response.finish_reasons': '["in_progress"]',
-          'gen_ai.usage.input_tokens': 0,
-          'gen_ai.usage.output_tokens': 0,
-          'gen_ai.usage.total_tokens': 0,
+          'gen_ai.response.finish_reasons': '["in_progress","completed"]',
+          'gen_ai.usage.input_tokens': 6,
+          'gen_ai.usage.output_tokens': 10,
+          'gen_ai.usage.total_tokens': 16,
           'openai.response.id': 'resp_stream_456',
           'openai.response.model': 'gpt-4',
+          'openai.response.stream': true,
           'openai.response.timestamp': '2023-03-01T06:31:50.000Z',
-          'openai.usage.completion_tokens': 0,
-          'openai.usage.prompt_tokens': 0,
+          'openai.usage.completion_tokens': 10,
+          'openai.usage.prompt_tokens': 6,
         },
         description: 'chat gpt-4 stream-response',
         op: 'gen_ai.chat',
         origin: 'manual',
         status: 'ok',
       }),
+      // Sixth span - error handling in streaming context
+      expect.objectContaining({
+        data: {
+          'gen_ai.operation.name': 'chat',
+          'gen_ai.request.model': 'error-model',
+          'gen_ai.request.stream': true,
+          'gen_ai.system': 'openai',
+          'sentry.op': 'gen_ai.chat',
+          'sentry.origin': 'manual',
+        },
+        description: 'chat error-model stream-response',
+        op: 'gen_ai.chat',
+        origin: 'manual',
+        status: 'internal_error',
+      }),
     ]),
   };
 
@@ -170,13 +189,15 @@ describe('OpenAI integration', () => {
           'gen_ai.request.model': 'gpt-3.5-turbo',
           'gen_ai.request.messages': '"Translate this to French: Hello"',
           'gen_ai.response.text': 'Response to: Translate this to French: Hello',
+          'gen_ai.response.finish_reasons': '["completed"]',
           'gen_ai.response.model': 'gpt-3.5-turbo',
           'gen_ai.response.id': 'resp_mock456',
           'gen_ai.usage.input_tokens': 5,
           'gen_ai.usage.output_tokens': 8,
           'gen_ai.usage.total_tokens': 13,
           'openai.response.id': 'resp_mock456',
           'openai.response.model': 'gpt-3.5-turbo',
+          'openai.response.timestamp': '2023-03-01T06:31:30.000Z',
           'openai.usage.completion_tokens': 8,
           'openai.usage.prompt_tokens': 5,
         },
@@ -268,14 +289,13 @@ describe('OpenAI integration', () => {
           'gen_ai.request.stream': true,
           'gen_ai.request.messages': '[{"role":"user","content":"This will fail"}]',
           'gen_ai.system': 'openai',
-          'openai.response.stream': true,
           'sentry.op': 'gen_ai.chat',
           'sentry.origin': 'manual',
         },
         description: 'chat error-model stream-response',
         op: 'gen_ai.chat',
         origin: 'manual',
-        status: 'ok',
+        status: 'internal_error',
       }),
     ]),
   };
diff --git a/packages/core/src/utils/openai/index.ts b/packages/core/src/utils/openai/index.ts
@@ -1,5 +1,6 @@
 import { getCurrentScope } from '../../currentScopes';
 import { captureException } from '../../exports';
+import { SPAN_STATUS_ERROR } from '../../tracing';
 import { startSpan, startSpanManual } from '../../tracing/trace';
 import type { Span, SpanAttributeValue } from '../../types-hoist/span';
 import {
@@ -14,7 +15,6 @@ import {
   GEN_AI_RESPONSE_FINISH_REASONS_ATTRIBUTE,
   GEN_AI_RESPONSE_TEXT_ATTRIBUTE,
   GEN_AI_SYSTEM_ATTRIBUTE,
-  OPENAI_RESPONSE_STREAM_ATTRIBUTE,
 } from '../gen-ai-attributes';
 import { OPENAI_INTEGRATION_NAME } from './constants';
 import { instrumentStream } from './streaming';
@@ -143,9 +143,6 @@ function addRequestAttributes(span: Span, params: Record<string, unknown>): void
   if ('input' in params) {
     span.setAttributes({ [GEN_AI_REQUEST_MESSAGES_ATTRIBUTE]: JSON.stringify(params.input) });
   }
-  if ('stream' in params) {
-    span.setAttributes({ [OPENAI_RESPONSE_STREAM_ATTRIBUTE]: Boolean(params.stream) });
-  }
 }
 
 function getOptionsFromIntegration(): OpenAiOptions {
@@ -202,7 +199,14 @@ function instrumentMethod<T extends unknown[], R>(
               finalOptions.recordOutputs ?? false,
             ) as unknown as R;
           } catch (error) {
-            captureException(error);
+            // For streaming requests that fail before stream creation, we still want to record
+            // them as streaming requests but end the span gracefully
+            span.setStatus({ code: SPAN_STATUS_ERROR, message: 'internal_error' });
+            captureException(error, {
+              mechanism: {
+                handled: false,
+              },
+            });
             span.end();
             throw error;
           }
diff --git a/packages/core/src/utils/openai/streaming.ts b/packages/core/src/utils/openai/streaming.ts
@@ -1,6 +1,11 @@
 import { captureException } from '../../exports';
+import { SPAN_STATUS_ERROR } from '../../tracing';
 import type { Span } from '../../types-hoist/span';
-import { GEN_AI_RESPONSE_FINISH_REASONS_ATTRIBUTE, GEN_AI_RESPONSE_TEXT_ATTRIBUTE } from '../gen-ai-attributes';
+import {
+  GEN_AI_RESPONSE_FINISH_REASONS_ATTRIBUTE,
+  GEN_AI_RESPONSE_TEXT_ATTRIBUTE,
+  OPENAI_RESPONSE_STREAM_ATTRIBUTE,
+} from '../gen-ai-attributes';
 import { RESPONSE_EVENT_TYPES } from './constants';
 import type { OpenAIResponseObject } from './types';
 import { type ChatCompletionChunk, type ResponseStreamingEvent } from './types';
@@ -11,24 +16,48 @@ import {
   setTokenUsageAttributes,
 } from './utils';
 
+/**
+ * State object used to accumulate information from a stream of OpenAI events/chunks.
+ */
 interface StreamingState {
+  /** Types of events encountered in the stream. */
   eventTypes: string[];
+  /** Collected response text fragments (for output recording). */
   responseTexts: string[];
+  /** Reasons for finishing the response, as reported by the API. */
   finishReasons: string[];
-  responseId?: string;
-  responseModel?: string;
-  responseTimestamp?: number;
-  promptTokens?: number;
-  completionTokens?: number;
-  totalTokens?: number;
+  /** The response ID. */
+  responseId: string;
+  /** The model name. */
+  responseModel: string;
+  /** The timestamp of the response. */
+  responseTimestamp: number;
+  /** Number of prompt/input tokens used. */
+  promptTokens: number | undefined;
+  /** Number of completion/output tokens used. */
+  completionTokens: number | undefined;
+  /** Total number of tokens used (prompt + completion). */
+  totalTokens: number | undefined;
 }
 
+/**
+ * Processes a single OpenAI ChatCompletionChunk event, updating the streaming state.
+ *
+ * @param chunk - The ChatCompletionChunk event to process.
+ * @param state - The current streaming state to update.
+ * @param recordOutputs - Whether to record output text fragments.
+ */
 function processChatCompletionChunk(chunk: ChatCompletionChunk, state: StreamingState, recordOutputs: boolean): void {
   state.responseId = chunk.id ?? state.responseId;
   state.responseModel = chunk.model ?? state.responseModel;
   state.responseTimestamp = chunk.created ?? state.responseTimestamp;
 
   if (chunk.usage) {
+    // For stream responses, the input tokens remain constant across all events in the stream.
+    // Output tokens, however, are only finalized in the last event.
+    // Since we can't guarantee that the last event will include usage data or even be a typed event,
+    // we update the output token values on every event that includes them.
+    // This ensures that output token usage is always set, even if the final event lacks it.
     state.promptTokens = chunk.usage.prompt_tokens;
     state.completionTokens = chunk.usage.completion_tokens;
     state.totalTokens = chunk.usage.total_tokens;
@@ -44,17 +73,31 @@ function processChatCompletionChunk(chunk: ChatCompletionChunk, state: Streaming
   }
 }
 
+/**
+ * Processes a single OpenAI Responses API streaming event, updating the streaming state and span.
+ *
+ * @param streamEvent - The event to process (may be an error or unknown object).
+ * @param state - The current streaming state to update.
+ * @param recordOutputs - Whether to record output text fragments.
+ * @param span - The span to update with error status if needed.
+ */
 function processResponsesApiEvent(
   streamEvent: ResponseStreamingEvent | unknown | Error,
   state: StreamingState,
   recordOutputs: boolean,
+  span: Span,
 ): void {
   if (!(streamEvent && typeof streamEvent === 'object')) {
     state.eventTypes.push('unknown:non-object');
     return;
   }
   if (streamEvent instanceof Error) {
-    captureException(streamEvent);
+    span.setStatus({ code: SPAN_STATUS_ERROR, message: 'internal_error' });
+    captureException(streamEvent, {
+      mechanism: {
+        handled: false,
+      },
+    });
     return;
   }
 
@@ -71,32 +114,42 @@ function processResponsesApiEvent(
     return;
   }
 
-  const { response } = event as { response: OpenAIResponseObject };
-  state.responseId = response.id ?? state.responseId;
-  state.responseModel = response.model ?? state.responseModel;
-  state.responseTimestamp = response.created_at ?? state.responseTimestamp;
-
-  if (response.usage) {
-    state.promptTokens = response.usage.input_tokens;
-    state.completionTokens = response.usage.output_tokens;
-    state.totalTokens = response.usage.total_tokens;
-  }
+  if ('response' in event) {
+    const { response } = event as { response: OpenAIResponseObject };
+    state.responseId = response.id ?? state.responseId;
+    state.responseModel = response.model ?? state.responseModel;
+    state.responseTimestamp = response.created_at ?? state.responseTimestamp;
+
+    if (response.usage) {
+      // For stream responses, the input tokens remain constant across all events in the stream.
+      // Output tokens, however, are only finalized in the last event.
+      // Since we can't guarantee that the last event will include usage data or even be a typed event,
+      // we update the output token values on every event that includes them.
+      // This ensures that output token usage is always set, even if the final event lacks it.
+      state.promptTokens = response.usage.input_tokens;
+      state.completionTokens = response.usage.output_tokens;
+      state.totalTokens = response.usage.total_tokens;
+    }
 
-  if (response.status) {
-    state.finishReasons.push(response.status);
-  }
+    if (response.status) {
+      state.finishReasons.push(response.status);
+    }
 
-  if (recordOutputs && response.output_text) {
-    state.responseTexts.push(response.output_text);
+    if (recordOutputs && response.output_text) {
+      state.responseTexts.push(response.output_text);
+    }
   }
 }
+
 /**
- * Instrument a stream of OpenAI events
- * @param stream - The stream of events to instrument
- * @param span - The span to add attributes to
- * @param recordOutputs - Whether to record outputs
- * @param finishSpan - Optional function to finish the span manually
- * @returns A generator that yields the events
+ * Instruments a stream of OpenAI events, updating the provided span with relevant attributes and
+ * optionally recording output text. This function yields each event from the input stream as it is processed.
+ *
+ * @template T - The type of events in the stream.
+ * @param stream - The async iterable stream of events to instrument.
+ * @param span - The span to add attributes to and to finish at the end of the stream.
+ * @param recordOutputs - Whether to record output text fragments in the span.
+ * @returns An async generator yielding each event from the input stream.
  */
 export async function* instrumentStream<T>(
   stream: AsyncIterable<T>,
@@ -107,21 +160,31 @@ export async function* instrumentStream<T>(
     eventTypes: [],
     responseTexts: [],
     finishReasons: [],
+    responseId: '',
+    responseModel: '',
+    responseTimestamp: 0,
+    promptTokens: undefined,
+    completionTokens: undefined,
+    totalTokens: undefined,
   };
 
   try {
     for await (const event of stream) {
       if (isChatCompletionChunk(event)) {
         processChatCompletionChunk(event as ChatCompletionChunk, state, recordOutputs);
       } else if (isResponsesApiStreamEvent(event)) {
-        processResponsesApiEvent(event as ResponseStreamingEvent, state, recordOutputs);
+        processResponsesApiEvent(event as ResponseStreamingEvent, state, recordOutputs, span);
       }
       yield event;
     }
   } finally {
     setCommonResponseAttributes(span, state.responseId, state.responseModel, state.responseTimestamp);
     setTokenUsageAttributes(span, state.promptTokens, state.completionTokens, state.totalTokens);
 
+    span.setAttributes({
+      [OPENAI_RESPONSE_STREAM_ATTRIBUTE]: true,
+    });
+
     if (state.finishReasons.length) {
       span.setAttributes({
         [GEN_AI_RESPONSE_FINISH_REASONS_ATTRIBUTE]: JSON.stringify(state.finishReasons),
diff --git a/packages/core/src/utils/openai/utils.ts b/packages/core/src/utils/openai/utils.ts
@@ -145,22 +145,16 @@ export function setTokenUsageAttributes(
  * @param model - The response model
  * @param timestamp - The response timestamp
  */
-export function setCommonResponseAttributes(span: Span, id?: string, model?: string, timestamp?: number): void {
-  if (id) {
-    span.setAttributes({
-      [OPENAI_RESPONSE_ID_ATTRIBUTE]: id,
-      [GEN_AI_RESPONSE_ID_ATTRIBUTE]: id,
-    });
-  }
-  if (model) {
-    span.setAttributes({
-      [OPENAI_RESPONSE_MODEL_ATTRIBUTE]: model,
-      [GEN_AI_RESPONSE_MODEL_ATTRIBUTE]: model,
-    });
-  }
-  if (timestamp) {
-    span.setAttributes({
-      [OPENAI_RESPONSE_TIMESTAMP_ATTRIBUTE]: new Date(timestamp * 1000).toISOString(),
-    });
-  }
+export function setCommonResponseAttributes(span: Span, id: string, model: string, timestamp: number): void {
+  span.setAttributes({
+    [OPENAI_RESPONSE_ID_ATTRIBUTE]: id,
+    [GEN_AI_RESPONSE_ID_ATTRIBUTE]: id,
+  });
+  span.setAttributes({
+    [OPENAI_RESPONSE_MODEL_ATTRIBUTE]: model,
+    [GEN_AI_RESPONSE_MODEL_ATTRIBUTE]: model,
+  });
+  span.setAttributes({
+    [OPENAI_RESPONSE_TIMESTAMP_ATTRIBUTE]: new Date(timestamp * 1000).toISOString(),
+  });
 }