From b1afe2eab0daaf3fe53149a151e7d878c76a16ab Mon Sep 17 00:00:00 2001 From: richter Date: Fri, 27 Feb 2026 14:42:23 -0300 Subject: [PATCH 01/21] feat(agents): Initial implementation. --- package-lock.json | 12 + package.json | 12 + src/handlers/openai-agents/custom-span.ts | 57 +++ src/handlers/openai-agents/data-extraction.ts | 254 ++++++++++ src/handlers/openai-agents/embedded-tools.ts | 172 +++++++ src/handlers/openai-agents/index.ts | 455 ++++++++++++++++++ src/handlers/openai-agents/node.ts | 29 ++ src/handlers/openai-agents/span-mapping.ts | 104 ++++ src/index.ts | 9 + .../openai-agents/embedded-tool-calls.test.ts | 236 +++++++++ .../openai-agents/extract-llm-data.test.ts | 148 ++++++ .../openai-agents/extract-tool-data.test.ts | 73 +++ .../extract-workflow-data.test.ts | 87 ++++ .../openai-agents/map-span-name.test.ts | 82 ++++ .../openai-agents/map-span-type.test.ts | 60 +++ .../openai-agents/tracing-processor.test.ts | 314 ++++++++++++ 16 files changed, 2104 insertions(+) create mode 100644 src/handlers/openai-agents/custom-span.ts create mode 100644 src/handlers/openai-agents/data-extraction.ts create mode 100644 src/handlers/openai-agents/embedded-tools.ts create mode 100644 src/handlers/openai-agents/index.ts create mode 100644 src/handlers/openai-agents/node.ts create mode 100644 src/handlers/openai-agents/span-mapping.ts create mode 100644 tests/handlers/openai-agents/embedded-tool-calls.test.ts create mode 100644 tests/handlers/openai-agents/extract-llm-data.test.ts create mode 100644 tests/handlers/openai-agents/extract-tool-data.test.ts create mode 100644 tests/handlers/openai-agents/extract-workflow-data.test.ts create mode 100644 tests/handlers/openai-agents/map-span-name.test.ts create mode 100644 tests/handlers/openai-agents/map-span-type.test.ts create mode 100644 tests/handlers/openai-agents/tracing-processor.test.ts diff --git a/package-lock.json b/package-lock.json index 88b4a34d..c9479e47 100644 --- a/package-lock.json +++ b/package-lock.json @@ -48,6 +48,18 @@ "optionalDependencies": { "@langchain/openai": "^0.3.11", "tiktoken": "^1.0.13" + }, + "peerDependencies": { + "@openai/agents": ">=0.4.0", + "openai": ">=4.0.0" + }, + "peerDependenciesMeta": { + "@openai/agents": { + "optional": true + }, + "openai": { + "optional": true + } } }, "node_modules/@ampproject/remapping": { diff --git a/package.json b/package.json index 50b4f61a..eb2281c5 100644 --- a/package.json +++ b/package.json @@ -48,6 +48,18 @@ "@langchain/openai": "^0.3.11", "tiktoken": "^1.0.13" }, + "peerDependencies": { + "@openai/agents": ">=0.4.0", + "openai": ">=4.0.0" + }, + "peerDependenciesMeta": { + "@openai/agents": { + "optional": true + }, + "openai": { + "optional": true + } + }, "devDependencies": { "@hey-api/openapi-ts": "^0.88.0", "@types/jest": "^29.5.14", diff --git a/src/handlers/openai-agents/custom-span.ts b/src/handlers/openai-agents/custom-span.ts new file mode 100644 index 00000000..661fe9a0 --- /dev/null +++ b/src/handlers/openai-agents/custom-span.ts @@ -0,0 +1,57 @@ +/* eslint-disable @typescript-eslint/no-explicit-any */ + +/** + * A lightweight subtype of CustomSpanData that carries a reference to a + * pre-configured GalileoSpan so it can be injected into the agent tracing flow. + * + * The __galileoCustom flag is used by mapSpanType() to distinguish this from + * ordinary CustomSpanData objects. + */ +export interface GalileoCustomSpanData { + /** Always 'custom' to satisfy the SDK's SpanData union discriminant. */ + type: 'custom'; + /** (Optional) Display name for the span. */ + name?: string; + /** Arbitrary data payload, must contain a 'galileoSpan' key with the GalileoSpan reference. */ + data: Record & { galileoSpan: unknown }; + /** Sentinel flag used internally by mapSpanType() to identify this type. */ + __galileoCustom: true; +} + +/** + * Creates a GalileoCustomSpanData object that wraps an existing Galileo span. + * @param galileoSpan - The Galileo span object to embed. + * @param name - (Optional) Display name for the span. + * @param extraData - (Optional) Additional data to include in the span data payload. + * @returns A GalileoCustomSpanData object. + */ +export function createGalileoCustomSpanData( + galileoSpan: unknown, + name?: string, + extraData?: Record +): GalileoCustomSpanData { + return { + type: 'custom', + name, + data: { + ...extraData, + galileoSpan + }, + __galileoCustom: true + }; +} + +/** + * Type guard that checks whether a span data object is a GalileoCustomSpanData. + * @param spanData - The span data to check. + * @returns True if the span data is a GalileoCustomSpanData. + */ +export function isGalileoCustomSpanData( + spanData: unknown +): spanData is GalileoCustomSpanData { + return ( + typeof spanData === 'object' && + spanData !== null && + (spanData as any).__galileoCustom === true + ); +} diff --git a/src/handlers/openai-agents/data-extraction.ts b/src/handlers/openai-agents/data-extraction.ts new file mode 100644 index 00000000..3fbffdb3 --- /dev/null +++ b/src/handlers/openai-agents/data-extraction.ts @@ -0,0 +1,254 @@ +/* eslint-disable @typescript-eslint/no-explicit-any */ + +/** + * Normalised token count structure returned by parseUsage. + */ +export interface ParsedUsage { + inputTokens: number; + outputTokens: number; + totalTokens: number | null; + reasoningTokens: number; + cachedTokens: number; +} + +/** + * Normalises token counts from various OpenAI usage shapes. + * @param usageData - The raw usage data from a span. + * @returns Normalised token counts. + */ +export function parseUsage( + usageData: Record | null | undefined +): ParsedUsage { + if (!usageData) { + return { + inputTokens: 0, + outputTokens: 0, + totalTokens: null, + reasoningTokens: 0, + cachedTokens: 0 + }; + } + + // Support both input_tokens/output_tokens (Responses/Agents SDK) + // and prompt_tokens/completion_tokens (Chat Completions legacy) + const inputTokens = + (usageData.input_tokens as number | undefined) ?? + (usageData.prompt_tokens as number | undefined) ?? + 0; + const outputTokens = + (usageData.output_tokens as number | undefined) ?? + (usageData.completion_tokens as number | undefined) ?? + 0; + const totalTokens = (usageData.total_tokens as number | undefined) ?? null; + + // details is a flat Record in the Agents SDK + const details = (usageData.details as Record) ?? {}; + const reasoningTokens = + (details.reasoning_tokens as number | undefined) ?? + (usageData.reasoning_tokens as number | undefined) ?? + 0; + const cachedTokens = + (details.cached_tokens as number | undefined) ?? + (usageData.cached_tokens as number | undefined) ?? + 0; + + return { + inputTokens, + outputTokens, + totalTokens, + reasoningTokens, + cachedTokens + }; +} + +/** + * Extracts LLM-relevant fields from a GenerationSpanData or ResponseSpanData. + * @param spanData - The span data object (must have type 'generation' or 'response'). + * @returns A flat record of LLM span parameters. + */ +export function extractLlmData( + spanData: Record +): Record { + if (spanData.type === 'generation') { + const usage = parseUsage( + (spanData.usage as Record | undefined) ?? null + ); + const modelConfig = + (spanData.model_config as Record | undefined) ?? {}; + + return { + input: spanData.input !== undefined ? JSON.stringify(spanData.input) : '', + output: + spanData.output !== undefined ? JSON.stringify(spanData.output) : '', + model: (spanData.model as string | undefined) ?? 'unknown', + temperature: (modelConfig.temperature as number | undefined) ?? undefined, + modelParameters: modelConfig, + numInputTokens: usage.inputTokens, + numOutputTokens: usage.outputTokens, + totalTokens: usage.totalTokens ?? undefined, + numReasoningTokens: usage.reasoningTokens, + numCachedInputTokens: usage.cachedTokens, + metadata: { + gen_ai_system: 'openai', + model_config: JSON.stringify(modelConfig) + } + }; + } + + if (spanData.type === 'response') { + // ResponseSpanData uses underscore-prefixed fields in TypeScript SDK + const input = spanData._input ?? spanData.input; + const response = (spanData._response ?? spanData.response) as + | Record + | undefined; + + const model = + (response?.model as string | undefined) ?? + (spanData.model as string | undefined) ?? + 'unknown'; + const usage = parseUsage( + (response?.usage as Record | undefined) ?? null + ); + const temperature = + (response?.temperature as number | undefined) ?? undefined; + const tools = response?.tools; + + return { + input: input !== undefined ? JSON.stringify(input) : '', + output: + response?.output !== undefined ? JSON.stringify(response.output) : '', + model, + temperature, + tools: tools !== undefined ? JSON.stringify(tools) : undefined, + numInputTokens: usage.inputTokens, + numOutputTokens: usage.outputTokens, + totalTokens: usage.totalTokens ?? undefined, + numReasoningTokens: usage.reasoningTokens, + numCachedInputTokens: usage.cachedTokens, + metadata: { + gen_ai_system: 'openai' + }, + _responseObject: response + }; + } + + return {}; +} + +/** + * Extracts tool-relevant fields from a FunctionSpanData or GuardrailSpanData. + * @param spanData - The span data object (must have type 'function' or 'guardrail'). + * @returns A flat record of tool span parameters. + */ +export function extractToolData( + spanData: Record +): Record { + if (spanData.type === 'function') { + return { + input: + spanData.input !== undefined + ? typeof spanData.input === 'string' + ? spanData.input + : JSON.stringify(spanData.input) + : '', + output: + spanData.output !== undefined + ? typeof spanData.output === 'string' + ? spanData.output + : JSON.stringify(spanData.output) + : undefined, + metadata: + (spanData.mcp_data as Record | undefined) !== undefined + ? { mcp_data: JSON.stringify(spanData.mcp_data) } + : {} + }; + } + + if (spanData.type === 'guardrail') { + const triggered = Boolean(spanData.triggered); + return { + input: '', + output: triggered ? 'Guardrail triggered' : 'Guardrail passed', + metadata: { + triggered: String(triggered), + guardrail_name: String((spanData.name as string | undefined) ?? '') + } + }; + } + + // Transcription / Speech / speech_group / mcp_tools — map to tool but no deep extraction + return { + input: '', + output: undefined, + metadata: {} + }; +} + +/** + * Extracts workflow-relevant fields from an AgentSpanData, HandoffSpanData, or CustomSpanData. + * @param spanData - The span data object (must have type 'agent', 'handoff', or 'custom'). + * @returns A flat record of workflow span parameters. + */ +export function extractWorkflowData( + spanData: Record +): Record { + if (spanData.type === 'agent') { + const tools = spanData.tools; + const handoffs = spanData.handoffs; + const outputType = spanData.output_type; + return { + input: '', + output: undefined, + metadata: { + ...(tools !== undefined ? { tools: JSON.stringify(tools) } : {}), + ...(handoffs !== undefined + ? { handoffs: JSON.stringify(handoffs) } + : {}), + ...(outputType !== undefined + ? { output_type: JSON.stringify(outputType) } + : {}) + } + }; + } + + if (spanData.type === 'handoff') { + const from = String((spanData.from_agent as string | undefined) ?? ''); + const to = String((spanData.to_agent as string | undefined) ?? ''); + return { + input: from, + output: to, + metadata: { + from_agent: from, + to_agent: to + } + }; + } + + if (spanData.type === 'custom') { + const data = (spanData.data as Record | undefined) ?? {}; + const input = + data.input !== undefined + ? typeof data.input === 'string' + ? data.input + : JSON.stringify(data.input) + : ''; + const output = + data.output !== undefined + ? typeof data.output === 'string' + ? data.output + : JSON.stringify(data.output) + : undefined; + + // Everything except input/output goes to metadata + const metaEntries = Object.entries(data) + .filter(([k]) => k !== 'input' && k !== 'output') + .reduce>((acc, [k, v]) => { + acc[k] = typeof v === 'string' ? v : JSON.stringify(v); + return acc; + }, {}); + + return { input, output, metadata: metaEntries }; + } + + return { input: '', output: undefined, metadata: {} }; +} diff --git a/src/handlers/openai-agents/embedded-tools.ts b/src/handlers/openai-agents/embedded-tools.ts new file mode 100644 index 00000000..8de30e60 --- /dev/null +++ b/src/handlers/openai-agents/embedded-tools.ts @@ -0,0 +1,172 @@ +/* eslint-disable @typescript-eslint/no-explicit-any */ + +/** + * A single embedded tool call record extracted from a ResponseSpanData output array. + */ +export interface EmbeddedToolCall { + type: string; + function: { name: string }; + tool_call_id: string | null; + tool_call_type: string; + tool_call_input: string | null; + tool_call_output: string | null; + tool_call_status: string | null; +} + +const EMBEDDED_TOOL_TYPES = new Set([ + 'code_interpreter_call', + 'file_search_call', + 'web_search_call', + 'computer_call', + 'custom_tool_call' +]); + +/** + * Maps an OpenAI embedded tool call type to a display name. + * @param type - The tool call type string. + * @returns A human-readable tool name. + */ +export function getToolNameFromType(type: string): string { + switch (type) { + case 'code_interpreter_call': + return 'code_interpreter'; + case 'file_search_call': + return 'file_search'; + case 'web_search_call': + return 'web_search'; + case 'computer_call': + return 'computer'; + case 'custom_tool_call': + return 'custom_tool'; + default: + return type; + } +} + +/** + * Extracts the input field from an embedded tool call item. + * @param item - The raw output item from the response. + * @param type - The tool call type string. + * @returns The extracted input as a string, or null if none. + */ +export function extractToolInput( + item: Record, + type: string +): string | null { + switch (type) { + case 'code_interpreter_call': { + const code = item.code; + return code !== undefined ? String(code) : null; + } + case 'file_search_call': { + const queries = item.queries; + if (queries === undefined) return null; + return Array.isArray(queries) ? JSON.stringify(queries) : String(queries); + } + case 'web_search_call': { + const action = item.action as Record | undefined; + const query = action?.query; + return query !== undefined ? String(query) : null; + } + case 'computer_call': { + const action = item.action; + return action !== undefined ? JSON.stringify(action) : null; + } + case 'custom_tool_call': { + const input = item.input; + if (input === undefined) return null; + return typeof input === 'string' ? input : JSON.stringify(input); + } + default: + return null; + } +} + +/** + * Extracts the output field from an embedded tool call item. + * @param item - The raw output item from the response. + * @param type - The tool call type string. + * @returns The extracted output as a string, or null if none. + */ +export function extractToolOutput( + item: Record, + type: string +): string | null { + switch (type) { + case 'code_interpreter_call': { + // Concatenate all output logs and urls + const outputs = item.outputs as + | Array> + | undefined; + if (!Array.isArray(outputs) || outputs.length === 0) return null; + const parts = outputs + .map((o) => { + if (o.logs !== undefined) return String(o.logs); + if (o.url !== undefined) return String(o.url); + return null; + }) + .filter((p): p is string => p !== null); + return parts.length > 0 ? parts.join('\n') : null; + } + case 'file_search_call': { + const results = item.results; + if (results === undefined) return null; + return Array.isArray(results) ? JSON.stringify(results) : String(results); + } + case 'web_search_call': { + const action = item.action; + return action !== undefined ? JSON.stringify(action) : null; + } + case 'computer_call': + return null; + case 'custom_tool_call': { + const output = item.output; + if (output === undefined) return null; + return typeof output === 'string' ? output : JSON.stringify(output); + } + default: + return null; + } +} + +/** + * Walks the _response.output array and returns all embedded tool call records. + * @param response - The response object from a ResponseSpanData span. + * @returns An array of EmbeddedToolCall records. + */ +export function extractEmbeddedToolCalls( + response: Record | null | undefined +): EmbeddedToolCall[] { + if (!response) return []; + + const output = response.output; + if (!Array.isArray(output)) return []; + + const results: EmbeddedToolCall[] = []; + + for (const item of output) { + if (typeof item !== 'object' || item === null) continue; + const typedItem = item as Record; + const itemType = typedItem.type as string | undefined; + if (!itemType || !EMBEDDED_TOOL_TYPES.has(itemType)) continue; + + const toolName = getToolNameFromType(itemType); + const toolCallId = + (typedItem.id as string | undefined) ?? + (typedItem.tool_call_id as string | undefined) ?? + null; + const status = (typedItem.status as string | undefined) ?? null; + + results.push({ + type: itemType, + function: { name: toolName }, + tool_call_id: toolCallId, + tool_call_type: itemType, + tool_call_input: extractToolInput(typedItem, itemType), + tool_call_output: extractToolOutput(typedItem, itemType), + tool_call_status: status + }); + } + + return results; +} diff --git a/src/handlers/openai-agents/index.ts b/src/handlers/openai-agents/index.ts new file mode 100644 index 00000000..e892409c --- /dev/null +++ b/src/handlers/openai-agents/index.ts @@ -0,0 +1,455 @@ +/* eslint-disable @typescript-eslint/no-explicit-any */ +import { GalileoLogger } from '../../utils/galileo-logger'; +import { GalileoSingleton } from '../../singleton'; +import { calculateDurationNs } from '../../utils/utils'; +import type { JsonObject } from '../../types/base.types'; +import { type Node, createNode } from './node'; +import { mapSpanType, mapSpanName, GALILEO_CUSTOM_TYPE } from './span-mapping'; +import { + extractLlmData, + extractToolData, + extractWorkflowData +} from './data-extraction'; +import { + extractEmbeddedToolCalls, + type EmbeddedToolCall +} from './embedded-tools'; +import { + createGalileoCustomSpanData, + type GalileoCustomSpanData +} from './custom-span'; + +// Warn if @openai/agents package is not available (optional peer dependency) +import('@openai/agents-core' as string).catch(() => { + // eslint-disable-next-line no-console + console.warn( + '@openai/agents package is not installed. GalileoTracingProcessor will not function.' + ); +}); + +/** + * Minimal interface for an OpenAI Agents SDK Trace object. + */ +export interface AgentTrace { + traceId: string; + name?: string; + metadata?: Record; + startedAt?: string | null; + endedAt?: string | null; +} + +/** + * Minimal interface for an OpenAI Agents SDK Span object. + */ +export interface AgentSpan< + T extends Record = Record +> { + spanId: string; + traceId: string; + parentId?: string | null; + startedAt?: string | null; + endedAt?: string | null; + error?: { + message: string; + data?: Record; + } | null; + spanData: T & { type: string }; +} + +/** + * Minimal TracingProcessor interface from @openai/agents-core. + */ +export interface TracingProcessor { + onTraceStart(trace: AgentTrace): Promise; + onTraceEnd(trace: AgentTrace): Promise; + onSpanStart(span: AgentSpan): Promise; + onSpanEnd(span: AgentSpan): Promise; + shutdown(timeout?: number): Promise; + forceFlush(): Promise; +} + +/** + * GalileoTracingProcessor implements the OpenAI Agents SDK TracingProcessor interface + * to capture agent runs and emit them to GalileoLogger. + */ +export class GalileoTracingProcessor implements TracingProcessor { + private _nodes = new Map(); + private _lastOutput: unknown = null; + private _firstInput: unknown = null; + + /** + * Creates a new GalileoTracingProcessor. + * @param _galileoLogger - (Optional) The GalileoLogger instance to use. Defaults to singleton logger. + * @param _flushOnTraceEnd - (Optional) Whether to flush the logger after each trace ends. Defaults to true. + */ + constructor( + private readonly _galileoLogger: GalileoLogger = GalileoSingleton.getInstance().getClient(), + private readonly _flushOnTraceEnd: boolean = true + ) {} + + /** + * Called when a trace starts. Creates a root agent node. + * @param trace - The trace that started. + */ + async onTraceStart(trace: AgentTrace): Promise { + const spanParams: Record = { + name: trace.name || 'Agent Run', + startedAt: trace.startedAt || new Date().toISOString() + }; + + if (trace.metadata) { + // Convert metadata values to strings for Galileo + const meta: Record = {}; + for (const [k, v] of Object.entries(trace.metadata)) { + meta[k] = typeof v === 'string' ? v : JSON.stringify(v); + } + spanParams.metadata = meta; + } + + const node = createNode({ + nodeType: 'agent', + spanParams, + runId: trace.traceId, + parentRunId: null + }); + + this._nodes.set(trace.traceId, node); + } + + /** + * Called when a trace ends. Commits the span tree and optionally flushes the logger. + * @param trace - The trace that ended. + */ + async onTraceEnd(trace: AgentTrace): Promise { + const rootNode = this._nodes.get(trace.traceId); + if (rootNode) { + const startedAt = rootNode.spanParams.startedAt as string | undefined; + const endedAt = trace.endedAt || new Date().toISOString(); + const durationNs = + startedAt && endedAt + ? calculateDurationNs(new Date(startedAt), new Date(endedAt)) + : 0; + rootNode.spanParams.durationNs = durationNs; + rootNode.spanParams.endedAt = endedAt; + } + + this._commitTrace(trace); + this._galileoLogger.conclude({ concludeAll: true }); + + if (this._flushOnTraceEnd) { + await this._galileoLogger.flush(); + } + + this._nodes.clear(); + this._lastOutput = null; + this._firstInput = null; + } + + /** + * Called when a span starts. Maps span type, creates a Node, and links it to its parent. + * @param span - The span that started. + */ + async onSpanStart(span: AgentSpan): Promise { + const spanData = span.spanData; + const spanType = mapSpanType(spanData); + const spanName = mapSpanName(spanData, spanType); + + // Determine effective node type — galileo_custom delegates to inner span + const nodeType = spanType === GALILEO_CUSTOM_TYPE ? 'workflow' : spanType; + + // Extract initial data based on span type + let initialParams: Record = { + name: spanName, + startedAt: span.startedAt || new Date().toISOString() + }; + + if (nodeType === 'llm') { + initialParams = { ...initialParams, ...extractLlmData(spanData) }; + } else if (nodeType === 'tool') { + initialParams = { ...initialParams, ...extractToolData(spanData) }; + } else { + initialParams = { + ...initialParams, + ...extractWorkflowData(spanData) + }; + } + + const node = createNode({ + nodeType: nodeType as Node['nodeType'], + spanParams: initialParams, + runId: span.spanId, + parentRunId: span.parentId ?? span.traceId + }); + + this._nodes.set(span.spanId, node); + + // Link to parent node + const parentId = span.parentId ?? span.traceId; + const parentNode = this._nodes.get(parentId); + if (parentNode) { + parentNode.children.push(span.spanId); + } + } + + /** + * Called when a span ends. Finalises duration, merges data, and handles errors. + * @param span - The span that ended. + */ + async onSpanEnd(span: AgentSpan): Promise { + const node = this._nodes.get(span.spanId); + if (!node) return; + + const startedAt = node.spanParams.startedAt as string | undefined; + const endedAt = span.endedAt || new Date().toISOString(); + const durationNs = + startedAt && endedAt + ? calculateDurationNs(new Date(startedAt), new Date(endedAt)) + : 0; + node.spanParams.durationNs = durationNs; + + // Merge final data for response spans (embedded tool calls + response object) + const spanData = span.spanData; + if (spanData.type === 'response') { + const finalData = extractLlmData(spanData); + const responseObj = finalData._responseObject as + | Record + | undefined; + if (responseObj) { + const embeddedTools = extractEmbeddedToolCalls(responseObj); + if (embeddedTools.length > 0) { + node.spanParams.embeddedToolCalls = embeddedTools; + } + } + // Merge updated data (output may not have been available at span start) + const { _responseObject: _removed, ...rest } = finalData; + void _removed; + node.spanParams = { ...node.spanParams, ...rest }; + } else if (spanData.type === 'generation') { + // Refresh LLM data at end (usage may be populated now) + const finalData = extractLlmData(spanData); + node.spanParams = { ...node.spanParams, ...finalData }; + } + + // Handle errors + if (span.error) { + const errorMessage = span.error.message || 'Unknown error'; + const existingMeta = + (node.spanParams.metadata as Record | undefined) ?? {}; + node.spanParams.statusCode = 500; + node.spanParams.metadata = { + ...existingMeta, + error_message: errorMessage, + error_type: 'SpanError', + error_details: span.error.data + ? JSON.stringify(span.error.data) + : errorMessage + }; + } + + // Track last output for trace-level output + if (node.spanParams.output !== undefined) { + this._lastOutput = node.spanParams.output; + } + } + + /** + * Shuts down the processor, flushing any pending data. + * @param _timeout - (Optional) Shutdown timeout in milliseconds. + */ + async shutdown(timeout?: number): Promise { + void timeout; + await this._galileoLogger.flush(); + } + + /** + * Forces a flush of any pending data. + */ + async forceFlush(): Promise { + await this._galileoLogger.flush(); + } + + /** + * Finds the root node for the trace and recursively logs the span tree. + * @param trace - The trace to commit. + */ + private _commitTrace(trace: AgentTrace): void { + const rootNode = this._nodes.get(trace.traceId); + if (!rootNode) return; + this._logNodeTree(rootNode, true); + } + + /** + * Recursively emits nodes to GalileoLogger in correct parent→child order. + * @param node - The node to log. + * @param firstNode - Whether this is the root trace node. + */ + private _logNodeTree(node: Node, firstNode = false): void { + const params = node.spanParams; + const name = (params.name as string | undefined) ?? 'Agent Run'; + const durationNs = (params.durationNs as number | undefined) ?? 0; + const metadata = + (params.metadata as Record | undefined) ?? {}; + const statusCode = (params.statusCode as number | undefined) ?? 200; + const input = params.input !== undefined ? String(params.input) : ''; + const output = + params.output !== undefined ? String(params.output) : undefined; + const startedAt = + params.startedAt !== undefined + ? new Date(params.startedAt as string) + : undefined; + + if (firstNode) { + // Root node → startTrace + const traceInput = + this._firstInput !== null ? String(this._firstInput) : input; + const traceOutput = + this._lastOutput !== null ? String(this._lastOutput) : output; + this._galileoLogger.startTrace({ + input: traceInput || name, + output: traceOutput, + name, + createdAt: startedAt, + durationNs, + metadata + }); + } else if (node.nodeType === 'llm') { + const numInputTokens = + (params.numInputTokens as number | undefined) ?? undefined; + const numOutputTokens = + (params.numOutputTokens as number | undefined) ?? undefined; + const totalTokens = + (params.totalTokens as number | undefined) ?? undefined; + const numReasoningTokens = + (params.numReasoningTokens as number | undefined) ?? undefined; + const numCachedInputTokens = + (params.numCachedInputTokens as number | undefined) ?? undefined; + const temperature = + (params.temperature as number | undefined) ?? undefined; + const model = (params.model as string | undefined) ?? 'unknown'; + const tools = (params.tools as string | undefined) + ? (JSON.parse(params.tools as string) as Record[]) + : undefined; + + // Build embedded tool calls metadata + const embeddedToolCalls = params.embeddedToolCalls as + | EmbeddedToolCall[] + | undefined; + const llmMeta: Record = { ...metadata }; + if (embeddedToolCalls && embeddedToolCalls.length > 0) { + llmMeta.embedded_tool_calls = JSON.stringify(embeddedToolCalls); + } + + this._galileoLogger.addLlmSpan({ + input, + output: output ?? '', + name, + model, + durationNs, + numInputTokens, + numOutputTokens, + totalTokens, + numReasoningTokens, + numCachedInputTokens, + temperature, + statusCode, + metadata: llmMeta, + tools: tools as JsonObject[] | undefined, + createdAt: startedAt + }); + } else if (node.nodeType === 'tool') { + this._galileoLogger.addToolSpan({ + input, + output, + name, + durationNs, + statusCode, + metadata, + createdAt: startedAt + }); + } else { + // workflow or agent child nodes + this._galileoLogger.addWorkflowSpan({ + input, + output, + name, + durationNs, + metadata, + createdAt: startedAt + }); + } + + // Recursively log children + for (const childId of node.children) { + const childNode = this._nodes.get(childId); + if (childNode) { + this._logNodeTree(childNode, false); + } + } + + // Conclude workflow/agent spans after their children + if ( + !firstNode && + (node.nodeType === 'workflow' || node.nodeType === 'agent') + ) { + this._galileoLogger.conclude({ output, durationNs }); + } + } + + /** + * Creates a custom span backed by GalileoCustomSpanData. + * @param galileoSpan - The Galileo span object to embed. + * @param name - (Optional) Display name for the custom span. + * @param extraData - (Optional) Extra data to include in the span payload. + * @returns A GalileoCustomSpanData object that can be passed to the OpenAI Agents SDK. + */ + static addGalileoCustomSpan( + galileoSpan: unknown, + name?: string, + extraData?: Record + ): GalileoCustomSpanData { + return createGalileoCustomSpanData(galileoSpan, name, extraData); + } +} + +/** + * Registers a new GalileoTracingProcessor with the OpenAI Agents SDK. + * Requires @openai/agents-core to be installed. + * @param galileoLogger - (Optional) The GalileoLogger instance to use. + * @param flushOnTraceEnd - (Optional) Whether to flush after each trace ends. + * @returns The created GalileoTracingProcessor instance. + */ +export async function registerGalileoTraceProcessor(options?: { + galileoLogger?: GalileoLogger; + flushOnTraceEnd?: boolean; +}): Promise { + const processor = new GalileoTracingProcessor( + options?.galileoLogger, + options?.flushOnTraceEnd + ); + + const { addTraceProcessor } = (await import( + '@openai/agents-core' as string + )) as { + addTraceProcessor: (processor: TracingProcessor) => void; + }; + addTraceProcessor(processor); + + return processor; +} + +export { createGalileoCustomSpanData as GalileoCustomSpan } from './custom-span'; +export type { GalileoCustomSpanData } from './custom-span'; +export type { Node, NodeType } from './node'; +export { mapSpanType, mapSpanName, GALILEO_CUSTOM_TYPE } from './span-mapping'; +export { + extractLlmData, + extractToolData, + extractWorkflowData, + parseUsage +} from './data-extraction'; +export { + extractEmbeddedToolCalls, + getToolNameFromType, + extractToolInput, + extractToolOutput +} from './embedded-tools'; diff --git a/src/handlers/openai-agents/node.ts b/src/handlers/openai-agents/node.ts new file mode 100644 index 00000000..92c5c79e --- /dev/null +++ b/src/handlers/openai-agents/node.ts @@ -0,0 +1,29 @@ +/** + * Internal node data structure used to build an in-memory span tree + * during an OpenAI Agents run before committing to GalileoLogger. + */ + +/** + * Span type for an openai-agents node. + */ +export type NodeType = 'llm' | 'tool' | 'workflow' | 'agent'; + +/** + * Represents a node in the span tree built during an OpenAI Agents run. + */ +export interface Node { + nodeType: NodeType; + spanParams: Record; + runId: string; + parentRunId: string | null; + children: string[]; +} + +/** + * Creates a new Node with an empty children array. + * @param opts - The node configuration without the children field. + * @returns A new Node with an empty children array. + */ +export function createNode(opts: Omit): Node { + return { ...opts, children: [] }; +} diff --git a/src/handlers/openai-agents/span-mapping.ts b/src/handlers/openai-agents/span-mapping.ts new file mode 100644 index 00000000..af797fcf --- /dev/null +++ b/src/handlers/openai-agents/span-mapping.ts @@ -0,0 +1,104 @@ +/* eslint-disable @typescript-eslint/no-explicit-any */ +import type { NodeType } from './node'; + +/** + * The sentinel type string used to identify GalileoCustomSpan instances. + */ +export const GALILEO_CUSTOM_TYPE = 'galileo_custom'; + +/** + * Maps an OpenAI Agents SDK SpanData type string to a Galileo node type. + * @param spanData - The span data object with a type discriminant. + * @returns The corresponding Galileo node type or 'galileo_custom'. + */ +export function mapSpanType(spanData: { + type: string; + [key: string]: unknown; +}): NodeType | typeof GALILEO_CUSTOM_TYPE { + // Check for GalileoCustomSpan sentinel + if ((spanData as any).__galileoCustom === true) { + return GALILEO_CUSTOM_TYPE; + } + + switch (spanData.type) { + case 'generation': + case 'response': + return 'llm'; + + case 'function': + case 'guardrail': + case 'transcription': + case 'speech': + case 'speech_group': + case 'mcp_tools': + return 'tool'; + + case 'agent': + case 'handoff': + case 'custom': + return 'workflow'; + + default: + return 'workflow'; + } +} + +/** + * Derives a display name for a span. + * @param spanData - The span data object. + * @param spanType - The resolved node type. + * @returns A human-readable display name for the span. + */ +export function mapSpanName( + spanData: { type: string; name?: string; [key: string]: unknown }, + spanType: NodeType | typeof GALILEO_CUSTOM_TYPE +): string { + if (spanData.name) { + return String(spanData.name); + } + + // Handle galileo_custom sentinel before the switch + if (spanType === GALILEO_CUSTOM_TYPE) { + return 'Galileo Custom'; + } + + switch (spanData.type) { + case 'generation': + return 'Generation'; + case 'response': + return 'Response'; + case 'function': { + const funcData = spanData as any; + return funcData.name || 'Function'; + } + case 'guardrail': { + const guardrailData = spanData as any; + return guardrailData.name || 'Guardrail'; + } + case 'agent': { + const agentData = spanData as any; + return agentData.name || 'Agent'; + } + case 'handoff': { + const handoffData = spanData as any; + const from = handoffData.from_agent || handoffData.fromAgent || ''; + const to = handoffData.to_agent || handoffData.toAgent || ''; + if (from || to) { + return `Handoff: ${from} → ${to}`; + } + return 'Handoff'; + } + case 'custom': + return 'Custom'; + case 'transcription': + return 'Transcription'; + case 'speech': + return 'Speech'; + case 'speech_group': + return 'Speech Group'; + case 'mcp_tools': + return 'MCP Tools'; + default: + return 'Span'; + } +} diff --git a/src/index.ts b/src/index.ts index 8709bf09..1b8bd2b2 100644 --- a/src/index.ts +++ b/src/index.ts @@ -141,6 +141,11 @@ import { import { log } from './wrappers'; import { wrapOpenAI, wrapAzureOpenAI } from './handlers/openai'; import { GalileoCallback } from './handlers/langchain'; +import { + GalileoTracingProcessor, + GalileoCustomSpan, + registerGalileoTraceProcessor +} from './handlers/openai-agents'; import { getSessions, getSpans, getTraces, RecordType } from './utils/search'; export { // Legacy clients @@ -160,6 +165,10 @@ export { // OpenAI wrapOpenAI, wrapAzureOpenAI, + // OpenAI Agents + GalileoTracingProcessor, + GalileoCustomSpan, + registerGalileoTraceProcessor, // Datasets Dataset, Datasets, diff --git a/tests/handlers/openai-agents/embedded-tool-calls.test.ts b/tests/handlers/openai-agents/embedded-tool-calls.test.ts new file mode 100644 index 00000000..f0168582 --- /dev/null +++ b/tests/handlers/openai-agents/embedded-tool-calls.test.ts @@ -0,0 +1,236 @@ +import { + extractEmbeddedToolCalls, + getToolNameFromType, + extractToolInput, + extractToolOutput +} from '../../../src/handlers/openai-agents/embedded-tools'; + +describe('getToolNameFromType', () => { + test('test maps code_interpreter_call to code_interpreter', () => { + expect(getToolNameFromType('code_interpreter_call')).toBe( + 'code_interpreter' + ); + }); + + test('test maps file_search_call to file_search', () => { + expect(getToolNameFromType('file_search_call')).toBe('file_search'); + }); + + test('test maps web_search_call to web_search', () => { + expect(getToolNameFromType('web_search_call')).toBe('web_search'); + }); + + test('test maps computer_call to computer', () => { + expect(getToolNameFromType('computer_call')).toBe('computer'); + }); + + test('test maps custom_tool_call to custom_tool', () => { + expect(getToolNameFromType('custom_tool_call')).toBe('custom_tool'); + }); + + test('test returns original string for unknown type', () => { + expect(getToolNameFromType('unknown_type')).toBe('unknown_type'); + }); +}); + +describe('extractToolInput', () => { + test('test code_interpreter_call extracts code field', () => { + const result = extractToolInput( + { code: 'print("hello")' }, + 'code_interpreter_call' + ); + expect(result).toBe('print("hello")'); + }); + + test('test code_interpreter_call returns null when no code', () => { + expect(extractToolInput({}, 'code_interpreter_call')).toBeNull(); + }); + + test('test file_search_call extracts queries', () => { + const result = extractToolInput( + { queries: ['find docs', 'search code'] }, + 'file_search_call' + ); + expect(result).toBe(JSON.stringify(['find docs', 'search code'])); + }); + + test('test web_search_call extracts action.query', () => { + const result = extractToolInput( + { action: { query: 'latest news' } }, + 'web_search_call' + ); + expect(result).toBe('latest news'); + }); + + test('test web_search_call returns null when no action', () => { + expect(extractToolInput({}, 'web_search_call')).toBeNull(); + }); + + test('test computer_call extracts action object', () => { + const action = { type: 'click', coordinate: [100, 200] }; + const result = extractToolInput({ action }, 'computer_call'); + expect(result).toBe(JSON.stringify(action)); + }); + + test('test custom_tool_call extracts input string', () => { + const result = extractToolInput({ input: 'my input' }, 'custom_tool_call'); + expect(result).toBe('my input'); + }); + + test('test custom_tool_call serialises object input', () => { + const result = extractToolInput( + { input: { key: 'val' } }, + 'custom_tool_call' + ); + expect(result).toBe(JSON.stringify({ key: 'val' })); + }); +}); + +describe('extractToolOutput', () => { + test('test code_interpreter_call concatenates log outputs', () => { + const result = extractToolOutput( + { outputs: [{ logs: 'line1' }, { logs: 'line2' }] }, + 'code_interpreter_call' + ); + expect(result).toBe('line1\nline2'); + }); + + test('test code_interpreter_call extracts url output', () => { + const result = extractToolOutput( + { outputs: [{ url: 'https://example.com/file.png' }] }, + 'code_interpreter_call' + ); + expect(result).toBe('https://example.com/file.png'); + }); + + test('test code_interpreter_call returns null for empty outputs', () => { + expect( + extractToolOutput({ outputs: [] }, 'code_interpreter_call') + ).toBeNull(); + }); + + test('test file_search_call extracts results', () => { + const results = [{ id: '1', content: 'doc' }]; + const result = extractToolOutput({ results }, 'file_search_call'); + expect(result).toBe(JSON.stringify(results)); + }); + + test('test file_search_call returns null when no results', () => { + expect(extractToolOutput({}, 'file_search_call')).toBeNull(); + }); + + test('test web_search_call returns action as json', () => { + const action = { query: 'news', status: 'done' }; + const result = extractToolOutput({ action }, 'web_search_call'); + expect(result).toBe(JSON.stringify(action)); + }); + + test('test computer_call returns null', () => { + expect( + extractToolOutput({ result: 'screenshot' }, 'computer_call') + ).toBeNull(); + }); + + test('test custom_tool_call extracts output string', () => { + const result = extractToolOutput({ output: 'done' }, 'custom_tool_call'); + expect(result).toBe('done'); + }); +}); + +describe('extractEmbeddedToolCalls', () => { + test('test returns empty array for null response', () => { + expect(extractEmbeddedToolCalls(null)).toEqual([]); + }); + + test('test returns empty array for response without output', () => { + expect(extractEmbeddedToolCalls({})).toEqual([]); + }); + + test('test skips non-embedded-tool output items', () => { + const response = { + output: [{ type: 'message', content: 'hello' }] + }; + expect(extractEmbeddedToolCalls(response)).toEqual([]); + }); + + test('test extracts code_interpreter_call', () => { + const response = { + output: [ + { + type: 'code_interpreter_call', + id: 'ci_001', + code: 'x = 1', + outputs: [{ logs: 'output log' }], + status: 'completed' + } + ] + }; + const result = extractEmbeddedToolCalls(response); + expect(result.length).toBe(1); + expect(result[0].type).toBe('code_interpreter_call'); + expect(result[0].function.name).toBe('code_interpreter'); + expect(result[0].tool_call_id).toBe('ci_001'); + expect(result[0].tool_call_input).toBe('x = 1'); + expect(result[0].tool_call_output).toBe('output log'); + expect(result[0].tool_call_status).toBe('completed'); + }); + + test('test extracts file_search_call', () => { + const response = { + output: [ + { + type: 'file_search_call', + id: 'fs_001', + queries: ['find docs'], + results: [{ id: 'doc1', content: 'text' }] + } + ] + }; + const result = extractEmbeddedToolCalls(response); + expect(result.length).toBe(1); + expect(result[0].function.name).toBe('file_search'); + expect(result[0].tool_call_input).toBe(JSON.stringify(['find docs'])); + }); + + test('test extracts web_search_call', () => { + const response = { + output: [ + { + type: 'web_search_call', + id: 'ws_001', + action: { query: 'latest AI news' } + } + ] + }; + const result = extractEmbeddedToolCalls(response); + expect(result.length).toBe(1); + expect(result[0].function.name).toBe('web_search'); + expect(result[0].tool_call_input).toBe('latest AI news'); + }); + + test('test extracts multiple embedded tool calls', () => { + const response = { + output: [ + { type: 'code_interpreter_call', code: 'x=1', outputs: [] }, + { type: 'message', content: 'hi' }, + { type: 'web_search_call', action: { query: 'test' } } + ] + }; + const result = extractEmbeddedToolCalls(response); + expect(result.length).toBe(2); + expect(result[0].type).toBe('code_interpreter_call'); + expect(result[1].type).toBe('web_search_call'); + }); + + test('test handles null output items gracefully', () => { + const response = { + output: [ + null, + undefined, + { type: 'web_search_call', action: { query: 'q' } } + ] + }; + const result = extractEmbeddedToolCalls(response); + expect(result.length).toBe(1); + }); +}); diff --git a/tests/handlers/openai-agents/extract-llm-data.test.ts b/tests/handlers/openai-agents/extract-llm-data.test.ts new file mode 100644 index 00000000..69bf67f1 --- /dev/null +++ b/tests/handlers/openai-agents/extract-llm-data.test.ts @@ -0,0 +1,148 @@ +import { + extractLlmData, + parseUsage +} from '../../../src/handlers/openai-agents/data-extraction'; + +describe('parseUsage', () => { + test('test parse usage null returns zeros', () => { + const result = parseUsage(null); + expect(result).toEqual({ + inputTokens: 0, + outputTokens: 0, + totalTokens: null, + reasoningTokens: 0, + cachedTokens: 0 + }); + }); + + test('test parse usage undefined returns zeros', () => { + const result = parseUsage(undefined); + expect(result).toEqual({ + inputTokens: 0, + outputTokens: 0, + totalTokens: null, + reasoningTokens: 0, + cachedTokens: 0 + }); + }); + + test('test parse usage with input_tokens and output_tokens', () => { + const result = parseUsage({ + input_tokens: 10, + output_tokens: 20, + total_tokens: 30 + }); + expect(result.inputTokens).toBe(10); + expect(result.outputTokens).toBe(20); + expect(result.totalTokens).toBe(30); + }); + + test('test parse usage with legacy prompt_tokens and completion_tokens', () => { + const result = parseUsage({ prompt_tokens: 5, completion_tokens: 15 }); + expect(result.inputTokens).toBe(5); + expect(result.outputTokens).toBe(15); + }); + + test('test parse usage extracts reasoning_tokens from details', () => { + const result = parseUsage({ + input_tokens: 10, + output_tokens: 5, + details: { reasoning_tokens: 3, cached_tokens: 2 } + }); + expect(result.reasoningTokens).toBe(3); + expect(result.cachedTokens).toBe(2); + }); + + test('test parse usage extracts reasoning_tokens at top level', () => { + const result = parseUsage({ + input_tokens: 10, + output_tokens: 5, + reasoning_tokens: 4 + }); + expect(result.reasoningTokens).toBe(4); + }); +}); + +describe('extractLlmData generation', () => { + test('test extract generation span data', () => { + const spanData = { + type: 'generation', + input: [{ role: 'user', content: 'Hello' }], + output: [{ role: 'assistant', content: 'Hi' }], + model: 'gpt-4o', + model_config: { temperature: 0.7, max_tokens: 100 }, + usage: { input_tokens: 10, output_tokens: 5, total_tokens: 15 } + }; + const result = extractLlmData(spanData); + expect(result.model).toBe('gpt-4o'); + expect(result.temperature).toBe(0.7); + expect(result.numInputTokens).toBe(10); + expect(result.numOutputTokens).toBe(5); + expect(result.totalTokens).toBe(15); + expect(result.input).toBe(JSON.stringify(spanData.input)); + expect(result.output).toBe(JSON.stringify(spanData.output)); + }); + + test('test extract generation span with null usage', () => { + const spanData = { type: 'generation', model: 'gpt-4o' }; + const result = extractLlmData(spanData); + expect(result.numInputTokens).toBe(0); + expect(result.numOutputTokens).toBe(0); + expect(result.totalTokens).toBeUndefined(); + }); + + test('test extract generation metadata includes gen_ai_system openai', () => { + const spanData = { type: 'generation' }; + const result = extractLlmData(spanData); + const meta = result.metadata as Record; + expect(meta.gen_ai_system).toBe('openai'); + }); +}); + +describe('extractLlmData response', () => { + test('test extract response span data with _input and _response', () => { + const spanData = { + type: 'response', + _input: [{ role: 'user', content: 'Hello' }], + _response: { + model: 'gpt-4o', + usage: { input_tokens: 8, output_tokens: 4 }, + temperature: 0.5, + output: [{ type: 'message', content: 'Hi' }] + } + }; + const result = extractLlmData(spanData); + expect(result.model).toBe('gpt-4o'); + expect(result.temperature).toBe(0.5); + expect(result.numInputTokens).toBe(8); + expect(result.numOutputTokens).toBe(4); + }); + + test('test extract response span data with fallback input/response keys', () => { + const spanData = { + type: 'response', + input: 'some input', + response: { + model: 'gpt-3.5-turbo', + usage: { input_tokens: 2, output_tokens: 1 } + } + }; + const result = extractLlmData(spanData); + expect(result.model).toBe('gpt-3.5-turbo'); + expect(result.numInputTokens).toBe(2); + }); + + test('test extract response span with null response returns unknown model', () => { + const spanData = { type: 'response' }; + const result = extractLlmData(spanData); + expect(result.model).toBe('unknown'); + expect(result.numInputTokens).toBe(0); + }); +}); + +describe('extractLlmData unknown type', () => { + test('test extract returns empty record for unknown type', () => { + const result = extractLlmData({ type: 'unknown' }); + expect(Object.keys(result).length).toBe(0); + }); +}); diff --git a/tests/handlers/openai-agents/extract-tool-data.test.ts b/tests/handlers/openai-agents/extract-tool-data.test.ts new file mode 100644 index 00000000..33065509 --- /dev/null +++ b/tests/handlers/openai-agents/extract-tool-data.test.ts @@ -0,0 +1,73 @@ +import { extractToolData } from '../../../src/handlers/openai-agents/data-extraction'; + +describe('extractToolData', () => { + test('test extract function span data string input/output', () => { + const spanData = { + type: 'function', + input: '{"query":"hello"}', + output: 'result text' + }; + const result = extractToolData(spanData); + expect(result.input).toBe('{"query":"hello"}'); + expect(result.output).toBe('result text'); + }); + + test('test extract function span data object input serialised', () => { + const spanData = { + type: 'function', + input: { query: 'hello' }, + output: { answer: 'world' } + }; + const result = extractToolData(spanData); + expect(result.input).toBe(JSON.stringify({ query: 'hello' })); + expect(result.output).toBe(JSON.stringify({ answer: 'world' })); + }); + + test('test extract function span data missing output', () => { + const spanData = { type: 'function', input: 'test' }; + const result = extractToolData(spanData); + expect(result.output).toBeUndefined(); + }); + + test('test extract function span with mcp_data in metadata', () => { + const spanData = { + type: 'function', + input: 'test', + mcp_data: { server: 'my-server', tool: 'my-tool' } + }; + const result = extractToolData(spanData); + const meta = result.metadata as Record; + expect(meta.mcp_data).toBe( + JSON.stringify({ server: 'my-server', tool: 'my-tool' }) + ); + }); + + test('test extract guardrail span triggered', () => { + const spanData = { type: 'guardrail', triggered: true, name: 'PII Filter' }; + const result = extractToolData(spanData); + expect(result.input).toBe(''); + expect(result.output).toBe('Guardrail triggered'); + const meta = result.metadata as Record; + expect(meta.triggered).toBe('true'); + expect(meta.guardrail_name).toBe('PII Filter'); + }); + + test('test extract guardrail span not triggered', () => { + const spanData = { type: 'guardrail', triggered: false, name: 'Safety' }; + const result = extractToolData(spanData); + expect(result.output).toBe('Guardrail passed'); + const meta = result.metadata as Record; + expect(meta.triggered).toBe('false'); + }); + + test('test extract tool data for transcription returns empty', () => { + const result = extractToolData({ type: 'transcription' }); + expect(result.input).toBe(''); + expect(result.output).toBeUndefined(); + }); + + test('test extract tool data for mcp_tools returns empty', () => { + const result = extractToolData({ type: 'mcp_tools' }); + expect(result.input).toBe(''); + }); +}); diff --git a/tests/handlers/openai-agents/extract-workflow-data.test.ts b/tests/handlers/openai-agents/extract-workflow-data.test.ts new file mode 100644 index 00000000..3a813862 --- /dev/null +++ b/tests/handlers/openai-agents/extract-workflow-data.test.ts @@ -0,0 +1,87 @@ +import { extractWorkflowData } from '../../../src/handlers/openai-agents/data-extraction'; + +describe('extractWorkflowData', () => { + test('test extract agent span data with tools and handoffs', () => { + const spanData = { + type: 'agent', + name: 'PlannerAgent', + tools: ['search', 'calculator'], + handoffs: ['ReviewAgent'], + output_type: 'string' + }; + const result = extractWorkflowData(spanData); + expect(result.input).toBe(''); + const meta = result.metadata as Record; + expect(meta.tools).toBe(JSON.stringify(['search', 'calculator'])); + expect(meta.handoffs).toBe(JSON.stringify(['ReviewAgent'])); + expect(meta.output_type).toBe(JSON.stringify('string')); + }); + + test('test extract agent span data without optional fields', () => { + const result = extractWorkflowData({ type: 'agent' }); + expect(result.input).toBe(''); + expect(result.output).toBeUndefined(); + const meta = result.metadata as Record; + expect(Object.keys(meta).length).toBe(0); + }); + + test('test extract handoff span data', () => { + const spanData = { + type: 'handoff', + from_agent: 'AgentA', + to_agent: 'AgentB' + }; + const result = extractWorkflowData(spanData); + expect(result.input).toBe('AgentA'); + expect(result.output).toBe('AgentB'); + const meta = result.metadata as Record; + expect(meta.from_agent).toBe('AgentA'); + expect(meta.to_agent).toBe('AgentB'); + }); + + test('test extract handoff span data with missing agents', () => { + const result = extractWorkflowData({ type: 'handoff' }); + expect(result.input).toBe(''); + expect(result.output).toBe(''); + }); + + test('test extract custom span data with input and output', () => { + const spanData = { + type: 'custom', + data: { + input: 'custom input', + output: 'custom output', + extra_key: 'extra value' + } + }; + const result = extractWorkflowData(spanData); + expect(result.input).toBe('custom input'); + expect(result.output).toBe('custom output'); + const meta = result.metadata as Record; + expect(meta.extra_key).toBe('extra value'); + expect(meta.input).toBeUndefined(); + expect(meta.output).toBeUndefined(); + }); + + test('test extract custom span data with object input serialised', () => { + const spanData = { + type: 'custom', + data: { input: { query: 'hello' }, output: { answer: 'world' } } + }; + const result = extractWorkflowData(spanData); + expect(result.input).toBe(JSON.stringify({ query: 'hello' })); + expect(result.output).toBe(JSON.stringify({ answer: 'world' })); + }); + + test('test extract custom span data with no data field', () => { + const result = extractWorkflowData({ type: 'custom' }); + expect(result.input).toBe(''); + expect(result.output).toBeUndefined(); + }); + + test('test extract unknown span type returns empty', () => { + const result = extractWorkflowData({ type: 'future_type' }); + expect(result.input).toBe(''); + expect(result.output).toBeUndefined(); + }); +}); diff --git a/tests/handlers/openai-agents/map-span-name.test.ts b/tests/handlers/openai-agents/map-span-name.test.ts new file mode 100644 index 00000000..5d46bd7b --- /dev/null +++ b/tests/handlers/openai-agents/map-span-name.test.ts @@ -0,0 +1,82 @@ +import { + mapSpanName, + GALILEO_CUSTOM_TYPE +} from '../../../src/handlers/openai-agents/span-mapping'; + +describe('mapSpanName', () => { + test('test returns spanData.name when present', () => { + expect(mapSpanName({ type: 'generation', name: 'MySpan' }, 'llm')).toBe( + 'MySpan' + ); + }); + + test('test generation fallback is Generation', () => { + expect(mapSpanName({ type: 'generation' }, 'llm')).toBe('Generation'); + }); + + test('test response fallback is Response', () => { + expect(mapSpanName({ type: 'response' }, 'llm')).toBe('Response'); + }); + + test('test function fallback uses spanData.name or Function', () => { + expect(mapSpanName({ type: 'function', name: 'my_tool' }, 'tool')).toBe( + 'my_tool' + ); + expect(mapSpanName({ type: 'function' }, 'tool')).toBe('Function'); + }); + + test('test guardrail fallback uses spanData.name or Guardrail', () => { + expect( + mapSpanName({ type: 'guardrail', name: 'content_filter' }, 'tool') + ).toBe('content_filter'); + expect(mapSpanName({ type: 'guardrail' }, 'tool')).toBe('Guardrail'); + }); + + test('test agent fallback uses spanData.name or Agent', () => { + expect( + mapSpanName({ type: 'agent', name: 'PlannerAgent' }, 'workflow') + ).toBe('PlannerAgent'); + expect(mapSpanName({ type: 'agent' }, 'workflow')).toBe('Agent'); + }); + + test('test handoff formats from-to arrow', () => { + expect( + mapSpanName( + { type: 'handoff', from_agent: 'AgentA', to_agent: 'AgentB' }, + 'workflow' + ) + ).toBe('Handoff: AgentA → AgentB'); + }); + + test('test handoff fallback when no agents', () => { + expect(mapSpanName({ type: 'handoff' }, 'workflow')).toBe('Handoff'); + }); + + test('test custom fallback is Custom', () => { + expect(mapSpanName({ type: 'custom' }, 'workflow')).toBe('Custom'); + }); + + test('test galileo_custom sentinel fallback is Galileo Custom', () => { + expect(mapSpanName({ type: 'custom' }, GALILEO_CUSTOM_TYPE)).toBe( + 'Galileo Custom' + ); + }); + + test('test transcription fallback is Transcription', () => { + expect(mapSpanName({ type: 'transcription' }, 'tool')).toBe( + 'Transcription' + ); + }); + + test('test speech fallback is Speech', () => { + expect(mapSpanName({ type: 'speech' }, 'tool')).toBe('Speech'); + }); + + test('test speech_group fallback is Speech Group', () => { + expect(mapSpanName({ type: 'speech_group' }, 'tool')).toBe('Speech Group'); + }); + + test('test mcp_tools fallback is MCP Tools', () => { + expect(mapSpanName({ type: 'mcp_tools' }, 'tool')).toBe('MCP Tools'); + }); +}); diff --git a/tests/handlers/openai-agents/map-span-type.test.ts b/tests/handlers/openai-agents/map-span-type.test.ts new file mode 100644 index 00000000..43e108c9 --- /dev/null +++ b/tests/handlers/openai-agents/map-span-type.test.ts @@ -0,0 +1,60 @@ +import { + mapSpanType, + GALILEO_CUSTOM_TYPE +} from '../../../src/handlers/openai-agents/span-mapping'; + +describe('mapSpanType', () => { + test('test maps generation to llm', () => { + expect(mapSpanType({ type: 'generation' })).toBe('llm'); + }); + + test('test maps response to llm', () => { + expect(mapSpanType({ type: 'response' })).toBe('llm'); + }); + + test('test maps function to tool', () => { + expect(mapSpanType({ type: 'function' })).toBe('tool'); + }); + + test('test maps guardrail to tool', () => { + expect(mapSpanType({ type: 'guardrail' })).toBe('tool'); + }); + + test('test maps transcription to tool', () => { + expect(mapSpanType({ type: 'transcription' })).toBe('tool'); + }); + + test('test maps speech to tool', () => { + expect(mapSpanType({ type: 'speech' })).toBe('tool'); + }); + + test('test maps speech_group to tool', () => { + expect(mapSpanType({ type: 'speech_group' })).toBe('tool'); + }); + + test('test maps mcp_tools to tool', () => { + expect(mapSpanType({ type: 'mcp_tools' })).toBe('tool'); + }); + + test('test maps agent to workflow', () => { + expect(mapSpanType({ type: 'agent' })).toBe('workflow'); + }); + + test('test maps handoff to workflow', () => { + expect(mapSpanType({ type: 'handoff' })).toBe('workflow'); + }); + + test('test maps custom to workflow', () => { + expect(mapSpanType({ type: 'custom' })).toBe('workflow'); + }); + + test('test maps galileo_custom sentinel to galileo_custom', () => { + expect(mapSpanType({ type: 'custom', __galileoCustom: true })).toBe( + GALILEO_CUSTOM_TYPE + ); + }); + + test('test maps unknown type to workflow fallback', () => { + expect(mapSpanType({ type: 'unknown_future_type' })).toBe('workflow'); + }); +}); diff --git a/tests/handlers/openai-agents/tracing-processor.test.ts b/tests/handlers/openai-agents/tracing-processor.test.ts new file mode 100644 index 00000000..cef06489 --- /dev/null +++ b/tests/handlers/openai-agents/tracing-processor.test.ts @@ -0,0 +1,314 @@ +import { GalileoTracingProcessor } from '../../../src/handlers/openai-agents'; +import type { + AgentTrace, + AgentSpan +} from '../../../src/handlers/openai-agents'; + +// Helper to build a mock AgentTrace +function makeTrace(overrides: Partial = {}): AgentTrace { + return { + traceId: 'trace-001', + name: 'Test Agent Run', + metadata: {}, + startedAt: new Date('2024-01-01T00:00:00Z').toISOString(), + endedAt: new Date('2024-01-01T00:00:10Z').toISOString(), + ...overrides + }; +} + +// Helper to build a mock AgentSpan +function makeSpan( + overrides: Partial & { spanData: AgentSpan['spanData'] } +): AgentSpan { + return { + spanId: 'span-001', + traceId: 'trace-001', + parentId: 'trace-001', + startedAt: new Date('2024-01-01T00:00:01Z').toISOString(), + endedAt: new Date('2024-01-01T00:00:05Z').toISOString(), + error: null, + ...overrides + }; +} + +// Create a mock GalileoLogger for testing +function createMockLogger() { + return { + startTrace: jest.fn().mockReturnValue({}), + addLlmSpan: jest.fn().mockReturnValue({}), + addToolSpan: jest.fn().mockReturnValue({}), + addWorkflowSpan: jest.fn().mockReturnValue({}), + addAgentSpan: jest.fn().mockReturnValue({}), + conclude: jest.fn().mockReturnValue(undefined), + flush: jest.fn().mockResolvedValue(undefined) + }; +} + +describe('GalileoTracingProcessor lifecycle', () => { + test('test onTraceStart creates root node', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + // No external observable yet — verify no calls to logger + expect(mockLogger.startTrace).not.toHaveBeenCalled(); + }); + + test('test full trace lifecycle calls startTrace', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + await processor.onTraceEnd(trace); + + expect(mockLogger.startTrace).toHaveBeenCalledTimes(1); + const startTraceCall = mockLogger.startTrace.mock.calls[0][0]; + expect(startTraceCall.name).toBe('Test Agent Run'); + }); + + test('test full trace with llm span calls addLlmSpan', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + const span = makeSpan({ + spanId: 'span-gen-001', + parentId: 'trace-001', + spanData: { + type: 'generation', + model: 'gpt-4o', + input: [{ role: 'user', content: 'hello' }], + output: [{ role: 'assistant', content: 'hi' }], + usage: { input_tokens: 5, output_tokens: 3 } + } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + expect(mockLogger.addLlmSpan).toHaveBeenCalledTimes(1); + const llmCall = mockLogger.addLlmSpan.mock.calls[0][0]; + expect(llmCall.model).toBe('gpt-4o'); + expect(llmCall.numInputTokens).toBe(5); + expect(llmCall.numOutputTokens).toBe(3); + }); + + test('test full trace with tool span calls addToolSpan', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + const span = makeSpan({ + spanId: 'span-func-001', + parentId: 'trace-001', + spanData: { + type: 'function', + name: 'search_tool', + input: '{"query":"hello"}', + output: 'results' + } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + expect(mockLogger.addToolSpan).toHaveBeenCalledTimes(1); + const toolCall = mockLogger.addToolSpan.mock.calls[0][0]; + expect(toolCall.name).toBe('search_tool'); + }); + + test('test full trace with workflow span calls addWorkflowSpan and conclude', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + const span = makeSpan({ + spanId: 'span-agent-001', + parentId: 'trace-001', + spanData: { + type: 'agent', + name: 'PlannerAgent' + } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1); + const workflowCall = mockLogger.addWorkflowSpan.mock.calls[0][0]; + expect(workflowCall.name).toBe('PlannerAgent'); + // conclude is called for workflow spans + expect(mockLogger.conclude).toHaveBeenCalled(); + }); + + test('test error span sets status 500 in metadata', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + const span = makeSpan({ + spanId: 'span-err-001', + parentId: 'trace-001', + error: { message: 'Something went wrong', data: { code: 'ERR_001' } }, + spanData: { type: 'function', name: 'failing_tool', input: 'x' } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + expect(mockLogger.addToolSpan).toHaveBeenCalledTimes(1); + const toolCall = mockLogger.addToolSpan.mock.calls[0][0]; + expect(toolCall.statusCode).toBe(500); + expect(toolCall.metadata.error_message).toBe('Something went wrong'); + expect(toolCall.metadata.error_type).toBe('SpanError'); + }); + + test('test flushOnTraceEnd true calls flush', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, true); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + await processor.onTraceEnd(trace); + + expect(mockLogger.flush).toHaveBeenCalledTimes(1); + }); + + test('test flushOnTraceEnd false does not call flush', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + await processor.onTraceEnd(trace); + + expect(mockLogger.flush).not.toHaveBeenCalled(); + }); + + test('test shutdown calls flush', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + + await processor.shutdown(); + + expect(mockLogger.flush).toHaveBeenCalledTimes(1); + }); + + test('test forceFlush calls flush', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + + await processor.forceFlush(); + + expect(mockLogger.flush).toHaveBeenCalledTimes(1); + }); + + test('test nested workflow span is logged as child', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const agentSpan = makeSpan({ + spanId: 'span-agent-outer', + parentId: 'trace-001', + spanData: { type: 'agent', name: 'OuterAgent' } + }); + + const llmSpan = makeSpan({ + spanId: 'span-llm-inner', + parentId: 'span-agent-outer', + spanData: { + type: 'generation', + model: 'gpt-4o', + usage: { input_tokens: 2, output_tokens: 1 } + } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(agentSpan); + await processor.onSpanStart(llmSpan); + await processor.onSpanEnd(llmSpan); + await processor.onSpanEnd(agentSpan); + await processor.onTraceEnd(trace); + + expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1); + expect(mockLogger.addLlmSpan).toHaveBeenCalledTimes(1); + // conclude called for workflow span + expect(mockLogger.conclude).toHaveBeenCalled(); + }); + + test('test response span extracts embedded tool calls', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + const span = makeSpan({ + spanId: 'span-resp-001', + parentId: 'trace-001', + spanData: { + type: 'response', + _input: 'test input', + _response: { + model: 'gpt-4o', + usage: { input_tokens: 10, output_tokens: 5 }, + output: [ + { + type: 'web_search_call', + id: 'ws_001', + action: { query: 'latest news' } + } + ] + } + } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + expect(mockLogger.addLlmSpan).toHaveBeenCalledTimes(1); + const llmCall = mockLogger.addLlmSpan.mock.calls[0][0]; + expect(llmCall.metadata.embedded_tool_calls).toBeDefined(); + const embedded = JSON.parse(llmCall.metadata.embedded_tool_calls); + expect(embedded.length).toBe(1); + expect(embedded[0].type).toBe('web_search_call'); + }); + + test('test metadata values are stringified', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace({ + metadata: { run_id: 'abc123', count: 5 as unknown as string } + }); + + await processor.onTraceStart(trace); + await processor.onTraceEnd(trace); + + expect(mockLogger.startTrace).toHaveBeenCalledTimes(1); + const startCall = mockLogger.startTrace.mock.calls[0][0]; + // metadata values should all be strings + if (startCall.metadata) { + for (const v of Object.values(startCall.metadata)) { + expect(typeof v).toBe('string'); + } + } + }); + + test('test addGalileoCustomSpan creates a GalileoCustomSpanData', () => { + const mockSpan = { id: 'span-xyz' }; + const result = GalileoTracingProcessor.addGalileoCustomSpan( + mockSpan, + 'MyCustom' + ); + expect(result.type).toBe('custom'); + expect(result.__galileoCustom).toBe(true); + expect(result.data.galileoSpan).toBe(mockSpan); + expect(result.name).toBe('MyCustom'); + }); +}); From 906a007ca8ea37f4d0daa55619c2391f9ae364d7 Mon Sep 17 00:00:00 2001 From: richter Date: Fri, 27 Feb 2026 15:37:56 -0300 Subject: [PATCH 02/21] feat(agents): Tests for initial implementation. --- .../openai-agents/custom-span.test.ts | 188 +++++++ .../openai-agents/data-extraction.test.ts | 308 +++++++++++ ...ol-calls.test.ts => embedded-tool.test.ts} | 0 .../openai-agents/extract-llm-data.test.ts | 148 ----- .../openai-agents/extract-tool-data.test.ts | 73 --- .../extract-workflow-data.test.ts | 87 --- .../openai-agents/integration.test.ts | 414 ++++++++++++++ .../openai-agents/map-span-type.test.ts | 60 -- tests/handlers/openai-agents/node.test.ts | 133 +++++ ...span-name.test.ts => span-mapping.test.ts} | 57 ++ .../openai-agents/tracing-processor.test.ts | 521 ++++++++++++++++++ 11 files changed, 1621 insertions(+), 368 deletions(-) create mode 100644 tests/handlers/openai-agents/custom-span.test.ts create mode 100644 tests/handlers/openai-agents/data-extraction.test.ts rename tests/handlers/openai-agents/{embedded-tool-calls.test.ts => embedded-tool.test.ts} (100%) delete mode 100644 tests/handlers/openai-agents/extract-llm-data.test.ts delete mode 100644 tests/handlers/openai-agents/extract-tool-data.test.ts delete mode 100644 tests/handlers/openai-agents/extract-workflow-data.test.ts create mode 100644 tests/handlers/openai-agents/integration.test.ts delete mode 100644 tests/handlers/openai-agents/map-span-type.test.ts create mode 100644 tests/handlers/openai-agents/node.test.ts rename tests/handlers/openai-agents/{map-span-name.test.ts => span-mapping.test.ts} (61%) diff --git a/tests/handlers/openai-agents/custom-span.test.ts b/tests/handlers/openai-agents/custom-span.test.ts new file mode 100644 index 00000000..f6a425cc --- /dev/null +++ b/tests/handlers/openai-agents/custom-span.test.ts @@ -0,0 +1,188 @@ +import { + createGalileoCustomSpanData, + isGalileoCustomSpanData, + type GalileoCustomSpanData +} from '../../../src/handlers/openai-agents/custom-span'; + +describe('createGalileoCustomSpanData()', () => { + test('test creates span with galileoSpan only', () => { + const galileoSpan = { type: 'custom', data: 'test' }; + const result = createGalileoCustomSpanData(galileoSpan); + + expect(result.type).toBe('custom'); + expect(result.__galileoCustom).toBe(true); + expect(result.data.galileoSpan).toBe(galileoSpan); + expect(result.name).toBeUndefined(); + }); + + test('test creates span with name parameter', () => { + const galileoSpan = { test: 'data' }; + const result = createGalileoCustomSpanData(galileoSpan, 'My Custom Span'); + + expect(result.name).toBe('My Custom Span'); + expect(result.data.galileoSpan).toBe(galileoSpan); + }); + + test('test creates span with extraData', () => { + const galileoSpan = { test: 'data' }; + const extraData = { key1: 'value1', key2: 42 }; + const result = createGalileoCustomSpanData( + galileoSpan, + undefined, + extraData + ); + + expect(result.data.key1).toBe('value1'); + expect(result.data.key2).toBe(42); + expect(result.data.galileoSpan).toBe(galileoSpan); + }); + + test('test creates span with all parameters', () => { + const galileoSpan = { type: 'custom', nested: { data: true } }; + const extraData = { metadata: 'info', count: 5 }; + const result = createGalileoCustomSpanData( + galileoSpan, + 'Full Span', + extraData + ); + + expect(result.type).toBe('custom'); + expect(result.name).toBe('Full Span'); + expect(result.__galileoCustom).toBe(true); + expect(result.data.galileoSpan).toBe(galileoSpan); + expect(result.data.metadata).toBe('info'); + expect(result.data.count).toBe(5); + }); + + test('test sets type field to custom', () => { + const result = createGalileoCustomSpanData({}); + expect(result.type).toBe('custom'); + }); + + test('test sets __galileoCustom sentinel to true', () => { + const result = createGalileoCustomSpanData({}); + expect(result.__galileoCustom).toBe(true); + }); + + test('test extraData merges correctly with galileoSpan', () => { + const galileoSpan = { id: 'span-1' }; + const extraData = { tag1: 'tag', tag2: 'meta' }; + const result = createGalileoCustomSpanData( + galileoSpan, + undefined, + extraData + ); + + expect(result.data).toEqual({ + tag1: 'tag', + tag2: 'meta', + galileoSpan: { id: 'span-1' } + }); + }); + + test('test handles empty extraData', () => { + const galileoSpan = { test: 'data' }; + const result = createGalileoCustomSpanData(galileoSpan, undefined, {}); + + expect(result.data.galileoSpan).toBe(galileoSpan); + expect(Object.keys(result.data)).toEqual(['galileoSpan']); + }); + + test('test handles null galileoSpan', () => { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const result = createGalileoCustomSpanData(null as any); + expect(result.data.galileoSpan).toBe(null); + }); + + test('test handles undefined name parameter', () => { + const result = createGalileoCustomSpanData({}, undefined, { meta: 'data' }); + expect(result.name).toBeUndefined(); + }); +}); + +describe('isGalileoCustomSpanData() type guard', () => { + test('test returns true for valid GalileoCustomSpanData', () => { + const spanData: GalileoCustomSpanData = { + type: 'custom', + data: { galileoSpan: {} }, + __galileoCustom: true + }; + + expect(isGalileoCustomSpanData(spanData)).toBe(true); + }); + + test('test returns false for null', () => { + expect(isGalileoCustomSpanData(null)).toBe(false); + }); + + test('test returns false for undefined', () => { + expect(isGalileoCustomSpanData(undefined)).toBe(false); + }); + + test('test returns false for plain object without __galileoCustom', () => { + const plainObj = { + type: 'custom', + data: { galileoSpan: {} } + }; + + expect(isGalileoCustomSpanData(plainObj)).toBe(false); + }); + + test('test returns false for object with __galileoCustom false', () => { + const spanData = { + type: 'custom', + data: { galileoSpan: {} }, + __galileoCustom: false + }; + + expect(isGalileoCustomSpanData(spanData)).toBe(false); + }); + + test('test returns false for non-objects', () => { + expect(isGalileoCustomSpanData('string')).toBe(false); + expect(isGalileoCustomSpanData(123)).toBe(false); + expect(isGalileoCustomSpanData(true)).toBe(false); + expect(isGalileoCustomSpanData([])).toBe(false); + }); + + test('test requires __galileoCustom to be true', () => { + expect( + isGalileoCustomSpanData({ + type: 'custom', + data: { galileoSpan: {} }, + __galileoCustom: true + }) + ).toBe(true); + + expect( + isGalileoCustomSpanData({ + type: 'custom', + data: { galileoSpan: {} }, + __galileoCustom: 1 // truthy but not true + }) + ).toBe(false); + }); + + test('test type guard narrows type correctly', () => { + const unknownData: unknown = createGalileoCustomSpanData({}); + + if (isGalileoCustomSpanData(unknownData)) { + // TypeScript should allow these properties + const spanData: GalileoCustomSpanData = unknownData; + expect(spanData.type).toBe('custom'); + expect(spanData.__galileoCustom).toBe(true); + } + }); + + test('test requires all required fields', () => { + const partialWithoutData = { + type: 'custom', + __galileoCustom: true + // missing data field + }; + + // Type guard should handle this gracefully (either true if it doesn't check data, or false if it does) + const result = isGalileoCustomSpanData(partialWithoutData); + expect(typeof result).toBe('boolean'); + }); +}); diff --git a/tests/handlers/openai-agents/data-extraction.test.ts b/tests/handlers/openai-agents/data-extraction.test.ts new file mode 100644 index 00000000..20abcbcf --- /dev/null +++ b/tests/handlers/openai-agents/data-extraction.test.ts @@ -0,0 +1,308 @@ +import { + extractLlmData, + extractToolData, + extractWorkflowData, + parseUsage +} from '../../../src/handlers/openai-agents/data-extraction'; + +describe('parseUsage', () => { + test('test parse usage null returns zeros', () => { + const result = parseUsage(null); + expect(result).toEqual({ + inputTokens: 0, + outputTokens: 0, + totalTokens: null, + reasoningTokens: 0, + cachedTokens: 0 + }); + }); + + test('test parse usage undefined returns zeros', () => { + const result = parseUsage(undefined); + expect(result).toEqual({ + inputTokens: 0, + outputTokens: 0, + totalTokens: null, + reasoningTokens: 0, + cachedTokens: 0 + }); + }); + + test('test parse usage with input_tokens and output_tokens', () => { + const result = parseUsage({ + input_tokens: 10, + output_tokens: 20, + total_tokens: 30 + }); + expect(result.inputTokens).toBe(10); + expect(result.outputTokens).toBe(20); + expect(result.totalTokens).toBe(30); + }); + + test('test parse usage with legacy prompt_tokens and completion_tokens', () => { + const result = parseUsage({ prompt_tokens: 5, completion_tokens: 15 }); + expect(result.inputTokens).toBe(5); + expect(result.outputTokens).toBe(15); + }); + + test('test parse usage extracts reasoning_tokens from details', () => { + const result = parseUsage({ + input_tokens: 10, + output_tokens: 5, + details: { reasoning_tokens: 3, cached_tokens: 2 } + }); + expect(result.reasoningTokens).toBe(3); + expect(result.cachedTokens).toBe(2); + }); + + test('test parse usage extracts reasoning_tokens at top level', () => { + const result = parseUsage({ + input_tokens: 10, + output_tokens: 5, + reasoning_tokens: 4 + }); + expect(result.reasoningTokens).toBe(4); + }); +}); + +describe('extractLlmData generation', () => { + test('test extract generation span data', () => { + const spanData = { + type: 'generation', + input: [{ role: 'user', content: 'Hello' }], + output: [{ role: 'assistant', content: 'Hi' }], + model: 'gpt-4o', + model_config: { temperature: 0.7, max_tokens: 100 }, + usage: { input_tokens: 10, output_tokens: 5, total_tokens: 15 } + }; + const result = extractLlmData(spanData); + expect(result.model).toBe('gpt-4o'); + expect(result.temperature).toBe(0.7); + expect(result.numInputTokens).toBe(10); + expect(result.numOutputTokens).toBe(5); + expect(result.totalTokens).toBe(15); + expect(result.input).toBe(JSON.stringify(spanData.input)); + expect(result.output).toBe(JSON.stringify(spanData.output)); + }); + + test('test extract generation span with null usage', () => { + const spanData = { type: 'generation', model: 'gpt-4o' }; + const result = extractLlmData(spanData); + expect(result.numInputTokens).toBe(0); + expect(result.numOutputTokens).toBe(0); + expect(result.totalTokens).toBeUndefined(); + }); + + test('test extract generation metadata includes gen_ai_system openai', () => { + const spanData = { type: 'generation' }; + const result = extractLlmData(spanData); + const meta = result.metadata as Record; + expect(meta.gen_ai_system).toBe('openai'); + }); +}); + +describe('extractLlmData response', () => { + test('test extract response span data with _input and _response', () => { + const spanData = { + type: 'response', + _input: [{ role: 'user', content: 'Hello' }], + _response: { + model: 'gpt-4o', + usage: { input_tokens: 8, output_tokens: 4 }, + temperature: 0.5, + output: [{ type: 'message', content: 'Hi' }] + } + }; + const result = extractLlmData(spanData); + expect(result.model).toBe('gpt-4o'); + expect(result.temperature).toBe(0.5); + expect(result.numInputTokens).toBe(8); + expect(result.numOutputTokens).toBe(4); + }); + + test('test extract response span data with fallback input/response keys', () => { + const spanData = { + type: 'response', + input: 'some input', + response: { + model: 'gpt-3.5-turbo', + usage: { input_tokens: 2, output_tokens: 1 } + } + }; + const result = extractLlmData(spanData); + expect(result.model).toBe('gpt-3.5-turbo'); + expect(result.numInputTokens).toBe(2); + }); + + test('test extract response span with null response returns unknown model', () => { + const spanData = { type: 'response' }; + const result = extractLlmData(spanData); + expect(result.model).toBe('unknown'); + expect(result.numInputTokens).toBe(0); + }); +}); + +describe('extractLlmData unknown type', () => { + test('test extract returns empty record for unknown type', () => { + const result = extractLlmData({ type: 'unknown' }); + expect(Object.keys(result).length).toBe(0); + }); +}); + +describe('extractToolData', () => { + test('test extract function span data string input/output', () => { + const spanData = { + type: 'function', + input: '{"query":"hello"}', + output: 'result text' + }; + const result = extractToolData(spanData); + expect(result.input).toBe('{"query":"hello"}'); + expect(result.output).toBe('result text'); + }); + + test('test extract function span data object input serialised', () => { + const spanData = { + type: 'function', + input: { query: 'hello' }, + output: { answer: 'world' } + }; + const result = extractToolData(spanData); + expect(result.input).toBe(JSON.stringify({ query: 'hello' })); + expect(result.output).toBe(JSON.stringify({ answer: 'world' })); + }); + + test('test extract function span data missing output', () => { + const spanData = { type: 'function', input: 'test' }; + const result = extractToolData(spanData); + expect(result.output).toBeUndefined(); + }); + + test('test extract function span with mcp_data in metadata', () => { + const spanData = { + type: 'function', + input: 'test', + mcp_data: { server: 'my-server', tool: 'my-tool' } + }; + const result = extractToolData(spanData); + const meta = result.metadata as Record; + expect(meta.mcp_data).toBe( + JSON.stringify({ server: 'my-server', tool: 'my-tool' }) + ); + }); + + test('test extract guardrail span triggered', () => { + const spanData = { type: 'guardrail', triggered: true, name: 'PII Filter' }; + const result = extractToolData(spanData); + expect(result.input).toBe(''); + expect(result.output).toBe('Guardrail triggered'); + const meta = result.metadata as Record; + expect(meta.triggered).toBe('true'); + expect(meta.guardrail_name).toBe('PII Filter'); + }); + + test('test extract guardrail span not triggered', () => { + const spanData = { type: 'guardrail', triggered: false, name: 'Safety' }; + const result = extractToolData(spanData); + expect(result.output).toBe('Guardrail passed'); + const meta = result.metadata as Record; + expect(meta.triggered).toBe('false'); + }); + + test('test extract tool data for transcription returns empty', () => { + const result = extractToolData({ type: 'transcription' }); + expect(result.input).toBe(''); + expect(result.output).toBeUndefined(); + }); + + test('test extract tool data for mcp_tools returns empty', () => { + const result = extractToolData({ type: 'mcp_tools' }); + expect(result.input).toBe(''); + }); +}); + +describe('extractWorkflowData', () => { + test('test extract agent span data with tools and handoffs', () => { + const spanData = { + type: 'agent', + name: 'PlannerAgent', + tools: ['search', 'calculator'], + handoffs: ['ReviewAgent'], + output_type: 'string' + }; + const result = extractWorkflowData(spanData); + expect(result.input).toBe(''); + const meta = result.metadata as Record; + expect(meta.tools).toBe(JSON.stringify(['search', 'calculator'])); + expect(meta.handoffs).toBe(JSON.stringify(['ReviewAgent'])); + expect(meta.output_type).toBe(JSON.stringify('string')); + }); + + test('test extract agent span data without optional fields', () => { + const result = extractWorkflowData({ type: 'agent' }); + expect(result.input).toBe(''); + expect(result.output).toBeUndefined(); + const meta = result.metadata as Record; + expect(Object.keys(meta).length).toBe(0); + }); + + test('test extract handoff span data', () => { + const spanData = { + type: 'handoff', + from_agent: 'AgentA', + to_agent: 'AgentB' + }; + const result = extractWorkflowData(spanData); + expect(result.input).toBe('AgentA'); + expect(result.output).toBe('AgentB'); + const meta = result.metadata as Record; + expect(meta.from_agent).toBe('AgentA'); + expect(meta.to_agent).toBe('AgentB'); + }); + + test('test extract handoff span data with missing agents', () => { + const result = extractWorkflowData({ type: 'handoff' }); + expect(result.input).toBe(''); + expect(result.output).toBe(''); + }); + + test('test extract custom span data with input and output', () => { + const spanData = { + type: 'custom', + data: { + input: 'custom input', + output: 'custom output', + extra_key: 'extra value' + } + }; + const result = extractWorkflowData(spanData); + expect(result.input).toBe('custom input'); + expect(result.output).toBe('custom output'); + const meta = result.metadata as Record; + expect(meta.extra_key).toBe('extra value'); + expect(meta.input).toBeUndefined(); + expect(meta.output).toBeUndefined(); + }); + + test('test extract custom span data with object input serialised', () => { + const spanData = { + type: 'custom', + data: { input: { query: 'hello' }, output: { answer: 'world' } } + }; + const result = extractWorkflowData(spanData); + expect(result.input).toBe(JSON.stringify({ query: 'hello' })); + expect(result.output).toBe(JSON.stringify({ answer: 'world' })); + }); + + test('test extract custom span data with no data field', () => { + const result = extractWorkflowData({ type: 'custom' }); + expect(result.input).toBe(''); + expect(result.output).toBeUndefined(); + }); + + test('test extract unknown span type returns empty', () => { + const result = extractWorkflowData({ type: 'future_type' }); + expect(result.input).toBe(''); + expect(result.output).toBeUndefined(); + }); +}); diff --git a/tests/handlers/openai-agents/embedded-tool-calls.test.ts b/tests/handlers/openai-agents/embedded-tool.test.ts similarity index 100% rename from tests/handlers/openai-agents/embedded-tool-calls.test.ts rename to tests/handlers/openai-agents/embedded-tool.test.ts diff --git a/tests/handlers/openai-agents/extract-llm-data.test.ts b/tests/handlers/openai-agents/extract-llm-data.test.ts deleted file mode 100644 index 69bf67f1..00000000 --- a/tests/handlers/openai-agents/extract-llm-data.test.ts +++ /dev/null @@ -1,148 +0,0 @@ -import { - extractLlmData, - parseUsage -} from '../../../src/handlers/openai-agents/data-extraction'; - -describe('parseUsage', () => { - test('test parse usage null returns zeros', () => { - const result = parseUsage(null); - expect(result).toEqual({ - inputTokens: 0, - outputTokens: 0, - totalTokens: null, - reasoningTokens: 0, - cachedTokens: 0 - }); - }); - - test('test parse usage undefined returns zeros', () => { - const result = parseUsage(undefined); - expect(result).toEqual({ - inputTokens: 0, - outputTokens: 0, - totalTokens: null, - reasoningTokens: 0, - cachedTokens: 0 - }); - }); - - test('test parse usage with input_tokens and output_tokens', () => { - const result = parseUsage({ - input_tokens: 10, - output_tokens: 20, - total_tokens: 30 - }); - expect(result.inputTokens).toBe(10); - expect(result.outputTokens).toBe(20); - expect(result.totalTokens).toBe(30); - }); - - test('test parse usage with legacy prompt_tokens and completion_tokens', () => { - const result = parseUsage({ prompt_tokens: 5, completion_tokens: 15 }); - expect(result.inputTokens).toBe(5); - expect(result.outputTokens).toBe(15); - }); - - test('test parse usage extracts reasoning_tokens from details', () => { - const result = parseUsage({ - input_tokens: 10, - output_tokens: 5, - details: { reasoning_tokens: 3, cached_tokens: 2 } - }); - expect(result.reasoningTokens).toBe(3); - expect(result.cachedTokens).toBe(2); - }); - - test('test parse usage extracts reasoning_tokens at top level', () => { - const result = parseUsage({ - input_tokens: 10, - output_tokens: 5, - reasoning_tokens: 4 - }); - expect(result.reasoningTokens).toBe(4); - }); -}); - -describe('extractLlmData generation', () => { - test('test extract generation span data', () => { - const spanData = { - type: 'generation', - input: [{ role: 'user', content: 'Hello' }], - output: [{ role: 'assistant', content: 'Hi' }], - model: 'gpt-4o', - model_config: { temperature: 0.7, max_tokens: 100 }, - usage: { input_tokens: 10, output_tokens: 5, total_tokens: 15 } - }; - const result = extractLlmData(spanData); - expect(result.model).toBe('gpt-4o'); - expect(result.temperature).toBe(0.7); - expect(result.numInputTokens).toBe(10); - expect(result.numOutputTokens).toBe(5); - expect(result.totalTokens).toBe(15); - expect(result.input).toBe(JSON.stringify(spanData.input)); - expect(result.output).toBe(JSON.stringify(spanData.output)); - }); - - test('test extract generation span with null usage', () => { - const spanData = { type: 'generation', model: 'gpt-4o' }; - const result = extractLlmData(spanData); - expect(result.numInputTokens).toBe(0); - expect(result.numOutputTokens).toBe(0); - expect(result.totalTokens).toBeUndefined(); - }); - - test('test extract generation metadata includes gen_ai_system openai', () => { - const spanData = { type: 'generation' }; - const result = extractLlmData(spanData); - const meta = result.metadata as Record; - expect(meta.gen_ai_system).toBe('openai'); - }); -}); - -describe('extractLlmData response', () => { - test('test extract response span data with _input and _response', () => { - const spanData = { - type: 'response', - _input: [{ role: 'user', content: 'Hello' }], - _response: { - model: 'gpt-4o', - usage: { input_tokens: 8, output_tokens: 4 }, - temperature: 0.5, - output: [{ type: 'message', content: 'Hi' }] - } - }; - const result = extractLlmData(spanData); - expect(result.model).toBe('gpt-4o'); - expect(result.temperature).toBe(0.5); - expect(result.numInputTokens).toBe(8); - expect(result.numOutputTokens).toBe(4); - }); - - test('test extract response span data with fallback input/response keys', () => { - const spanData = { - type: 'response', - input: 'some input', - response: { - model: 'gpt-3.5-turbo', - usage: { input_tokens: 2, output_tokens: 1 } - } - }; - const result = extractLlmData(spanData); - expect(result.model).toBe('gpt-3.5-turbo'); - expect(result.numInputTokens).toBe(2); - }); - - test('test extract response span with null response returns unknown model', () => { - const spanData = { type: 'response' }; - const result = extractLlmData(spanData); - expect(result.model).toBe('unknown'); - expect(result.numInputTokens).toBe(0); - }); -}); - -describe('extractLlmData unknown type', () => { - test('test extract returns empty record for unknown type', () => { - const result = extractLlmData({ type: 'unknown' }); - expect(Object.keys(result).length).toBe(0); - }); -}); diff --git a/tests/handlers/openai-agents/extract-tool-data.test.ts b/tests/handlers/openai-agents/extract-tool-data.test.ts deleted file mode 100644 index 33065509..00000000 --- a/tests/handlers/openai-agents/extract-tool-data.test.ts +++ /dev/null @@ -1,73 +0,0 @@ -import { extractToolData } from '../../../src/handlers/openai-agents/data-extraction'; - -describe('extractToolData', () => { - test('test extract function span data string input/output', () => { - const spanData = { - type: 'function', - input: '{"query":"hello"}', - output: 'result text' - }; - const result = extractToolData(spanData); - expect(result.input).toBe('{"query":"hello"}'); - expect(result.output).toBe('result text'); - }); - - test('test extract function span data object input serialised', () => { - const spanData = { - type: 'function', - input: { query: 'hello' }, - output: { answer: 'world' } - }; - const result = extractToolData(spanData); - expect(result.input).toBe(JSON.stringify({ query: 'hello' })); - expect(result.output).toBe(JSON.stringify({ answer: 'world' })); - }); - - test('test extract function span data missing output', () => { - const spanData = { type: 'function', input: 'test' }; - const result = extractToolData(spanData); - expect(result.output).toBeUndefined(); - }); - - test('test extract function span with mcp_data in metadata', () => { - const spanData = { - type: 'function', - input: 'test', - mcp_data: { server: 'my-server', tool: 'my-tool' } - }; - const result = extractToolData(spanData); - const meta = result.metadata as Record; - expect(meta.mcp_data).toBe( - JSON.stringify({ server: 'my-server', tool: 'my-tool' }) - ); - }); - - test('test extract guardrail span triggered', () => { - const spanData = { type: 'guardrail', triggered: true, name: 'PII Filter' }; - const result = extractToolData(spanData); - expect(result.input).toBe(''); - expect(result.output).toBe('Guardrail triggered'); - const meta = result.metadata as Record; - expect(meta.triggered).toBe('true'); - expect(meta.guardrail_name).toBe('PII Filter'); - }); - - test('test extract guardrail span not triggered', () => { - const spanData = { type: 'guardrail', triggered: false, name: 'Safety' }; - const result = extractToolData(spanData); - expect(result.output).toBe('Guardrail passed'); - const meta = result.metadata as Record; - expect(meta.triggered).toBe('false'); - }); - - test('test extract tool data for transcription returns empty', () => { - const result = extractToolData({ type: 'transcription' }); - expect(result.input).toBe(''); - expect(result.output).toBeUndefined(); - }); - - test('test extract tool data for mcp_tools returns empty', () => { - const result = extractToolData({ type: 'mcp_tools' }); - expect(result.input).toBe(''); - }); -}); diff --git a/tests/handlers/openai-agents/extract-workflow-data.test.ts b/tests/handlers/openai-agents/extract-workflow-data.test.ts deleted file mode 100644 index 3a813862..00000000 --- a/tests/handlers/openai-agents/extract-workflow-data.test.ts +++ /dev/null @@ -1,87 +0,0 @@ -import { extractWorkflowData } from '../../../src/handlers/openai-agents/data-extraction'; - -describe('extractWorkflowData', () => { - test('test extract agent span data with tools and handoffs', () => { - const spanData = { - type: 'agent', - name: 'PlannerAgent', - tools: ['search', 'calculator'], - handoffs: ['ReviewAgent'], - output_type: 'string' - }; - const result = extractWorkflowData(spanData); - expect(result.input).toBe(''); - const meta = result.metadata as Record; - expect(meta.tools).toBe(JSON.stringify(['search', 'calculator'])); - expect(meta.handoffs).toBe(JSON.stringify(['ReviewAgent'])); - expect(meta.output_type).toBe(JSON.stringify('string')); - }); - - test('test extract agent span data without optional fields', () => { - const result = extractWorkflowData({ type: 'agent' }); - expect(result.input).toBe(''); - expect(result.output).toBeUndefined(); - const meta = result.metadata as Record; - expect(Object.keys(meta).length).toBe(0); - }); - - test('test extract handoff span data', () => { - const spanData = { - type: 'handoff', - from_agent: 'AgentA', - to_agent: 'AgentB' - }; - const result = extractWorkflowData(spanData); - expect(result.input).toBe('AgentA'); - expect(result.output).toBe('AgentB'); - const meta = result.metadata as Record; - expect(meta.from_agent).toBe('AgentA'); - expect(meta.to_agent).toBe('AgentB'); - }); - - test('test extract handoff span data with missing agents', () => { - const result = extractWorkflowData({ type: 'handoff' }); - expect(result.input).toBe(''); - expect(result.output).toBe(''); - }); - - test('test extract custom span data with input and output', () => { - const spanData = { - type: 'custom', - data: { - input: 'custom input', - output: 'custom output', - extra_key: 'extra value' - } - }; - const result = extractWorkflowData(spanData); - expect(result.input).toBe('custom input'); - expect(result.output).toBe('custom output'); - const meta = result.metadata as Record; - expect(meta.extra_key).toBe('extra value'); - expect(meta.input).toBeUndefined(); - expect(meta.output).toBeUndefined(); - }); - - test('test extract custom span data with object input serialised', () => { - const spanData = { - type: 'custom', - data: { input: { query: 'hello' }, output: { answer: 'world' } } - }; - const result = extractWorkflowData(spanData); - expect(result.input).toBe(JSON.stringify({ query: 'hello' })); - expect(result.output).toBe(JSON.stringify({ answer: 'world' })); - }); - - test('test extract custom span data with no data field', () => { - const result = extractWorkflowData({ type: 'custom' }); - expect(result.input).toBe(''); - expect(result.output).toBeUndefined(); - }); - - test('test extract unknown span type returns empty', () => { - const result = extractWorkflowData({ type: 'future_type' }); - expect(result.input).toBe(''); - expect(result.output).toBeUndefined(); - }); -}); diff --git a/tests/handlers/openai-agents/integration.test.ts b/tests/handlers/openai-agents/integration.test.ts new file mode 100644 index 00000000..af41de6f --- /dev/null +++ b/tests/handlers/openai-agents/integration.test.ts @@ -0,0 +1,414 @@ +import { GalileoTracingProcessor } from '../../../src/handlers/openai-agents'; +import type { + AgentTrace, + AgentSpan +} from '../../../src/handlers/openai-agents'; + +function createMockLogger() { + return { + startTrace: jest.fn().mockReturnValue({}), + addLlmSpan: jest.fn().mockReturnValue({}), + addToolSpan: jest.fn().mockReturnValue({}), + addWorkflowSpan: jest.fn().mockReturnValue({}), + addAgentSpan: jest.fn().mockReturnValue({}), + conclude: jest.fn().mockReturnValue(undefined), + flush: jest.fn().mockResolvedValue(undefined) + }; +} + +function makeTrace(overrides: Partial = {}): AgentTrace { + return { + traceId: 'trace-001', + name: 'Multi-Agent Flow', + metadata: {}, + startedAt: new Date('2024-01-01T00:00:00Z').toISOString(), + endedAt: new Date('2024-01-01T00:00:05Z').toISOString(), + ...overrides + }; +} + +function makeSpan( + overrides: Partial & { spanData: AgentSpan['spanData'] } +): AgentSpan { + return { + spanId: 'span-001', + traceId: 'trace-001', + parentId: 'trace-001', + startedAt: new Date('2024-01-01T00:00:01Z').toISOString(), + endedAt: new Date('2024-01-01T00:00:02Z').toISOString(), + error: null, + ...overrides + }; +} + +describe('Multi-agent integration flows', () => { + test('test multiple agents with handoff', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + + // PlannerAgent + const planner = makeSpan({ + spanId: 'agent-planner', + parentId: 'trace-001', + spanData: { type: 'agent', name: 'PlannerAgent' } + }); + + // Handoff to ExecutorAgent + const handoff = makeSpan({ + spanId: 'handoff-001', + parentId: 'agent-planner', + spanData: { + type: 'handoff', + from_agent: 'PlannerAgent', + to_agent: 'ExecutorAgent' + } + }); + + // ExecutorAgent + const executor = makeSpan({ + spanId: 'agent-executor', + parentId: 'handoff-001', + spanData: { type: 'agent', name: 'ExecutorAgent' } + }); + + await processor.onSpanStart(planner); + await processor.onSpanStart(handoff); + await processor.onSpanStart(executor); + await processor.onSpanEnd(executor); + await processor.onSpanEnd(handoff); + await processor.onSpanEnd(planner); + await processor.onTraceEnd(trace); + + // Verify all spans logged + expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(3); // 2 agents + 1 handoff + }); + + test('test agent->tool->llm->tool flow', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + + const agent = makeSpan({ + spanId: 'agent-001', + parentId: 'trace-001', + spanData: { type: 'agent' } + }); + + const tool1 = makeSpan({ + spanId: 'tool-001', + parentId: 'agent-001', + spanData: { type: 'function', name: 'search' } + }); + + const llm = makeSpan({ + spanId: 'llm-001', + parentId: 'agent-001', + spanData: { type: 'generation', model: 'gpt-4' } + }); + + const tool2 = makeSpan({ + spanId: 'tool-002', + parentId: 'agent-001', + spanData: { type: 'function', name: 'calculate' } + }); + + await processor.onSpanStart(agent); + await processor.onSpanStart(tool1); + await processor.onSpanEnd(tool1); + await processor.onSpanStart(llm); + await processor.onSpanEnd(llm); + await processor.onSpanStart(tool2); + await processor.onSpanEnd(tool2); + await processor.onSpanEnd(agent); + await processor.onTraceEnd(trace); + + expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1); // agent + expect(mockLogger.addToolSpan).toHaveBeenCalledTimes(2); // 2 tools + expect(mockLogger.addLlmSpan).toHaveBeenCalledTimes(1); // 1 llm + }); + + test('test guardrail triggered in flow', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + + const agent = makeSpan({ + spanId: 'agent-001', + parentId: 'trace-001', + spanData: { type: 'agent' } + }); + + const guardrail = makeSpan({ + spanId: 'guardrail-001', + parentId: 'agent-001', + spanData: { type: 'guardrail', name: 'PII Filter', triggered: true } + }); + + await processor.onSpanStart(agent); + await processor.onSpanStart(guardrail); + await processor.onSpanEnd(guardrail); + await processor.onSpanEnd(agent); + await processor.onTraceEnd(trace); + + const toolCall = mockLogger.addToolSpan.mock.calls[0][0]; + expect(toolCall.output).toBe('Guardrail triggered'); + }); + + test('test embedded tool calls from OpenAI response', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + + const response = makeSpan({ + spanId: 'response-001', + parentId: 'trace-001', + spanData: { + type: 'response', + model: 'gpt-4o', + _input: [{ role: 'user', content: 'search for python' }], + _response: { + model: 'gpt-4o', + output: [ + { + type: 'web_search_call', + action: { query: 'python programming' }, + id: 'search-1' + }, + { + type: 'code_interpreter_call', + code: 'print("result")', + outputs: [{ logs: 'result' }], + id: 'code-1' + } + ] + } + } + }); + + await processor.onSpanStart(response); + await processor.onSpanEnd(response); + await processor.onTraceEnd(trace); + + expect(mockLogger.addLlmSpan).toHaveBeenCalledTimes(1); + const llmCall = mockLogger.addLlmSpan.mock.calls[0][0]; + // Verify that either embeddedToolCalls exists or metadata includes them + if (llmCall.embeddedToolCalls) { + expect(llmCall.embeddedToolCalls.length).toBe(2); + expect(llmCall.embeddedToolCalls[0].type).toBe('web_search_call'); + expect(llmCall.embeddedToolCalls[1].type).toBe('code_interpreter_call'); + } else { + // May be in metadata as embedded_tool_calls + const meta = llmCall.metadata as Record; + expect(meta.embedded_tool_calls).toBeDefined(); + } + }); + + test('test custom spans mixed with regular spans', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + + const agent = makeSpan({ + spanId: 'agent-001', + parentId: 'trace-001', + spanData: { type: 'agent' } + }); + + const customSpan = makeSpan({ + spanId: 'custom-001', + parentId: 'agent-001', + spanData: { + type: 'custom', + __galileoCustom: true, + data: { input: 'test', output: 'result' } + } + }); + + const llm = makeSpan({ + spanId: 'llm-001', + parentId: 'agent-001', + spanData: { type: 'generation', model: 'gpt-4' } + }); + + await processor.onSpanStart(agent); + await processor.onSpanStart(customSpan); + await processor.onSpanEnd(customSpan); + await processor.onSpanStart(llm); + await processor.onSpanEnd(llm); + await processor.onSpanEnd(agent); + await processor.onTraceEnd(trace); + + expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(2); // agent + custom + expect(mockLogger.addLlmSpan).toHaveBeenCalledTimes(1); + // conclude is called for all non-root workflow/agent spans + expect(mockLogger.conclude).toHaveBeenCalled(); + }); + + test('test error in middle of flow handled', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + + const agent = makeSpan({ + spanId: 'agent-001', + parentId: 'trace-001', + spanData: { type: 'agent' } + }); + + const tool1 = makeSpan({ + spanId: 'tool-001', + parentId: 'agent-001', + spanData: { type: 'function', name: 'search' } + }); + + const errorTool = makeSpan({ + spanId: 'tool-002', + parentId: 'agent-001', + error: { message: 'Connection timeout' }, + spanData: { type: 'function', name: 'fetch' } + }); + + const tool3 = makeSpan({ + spanId: 'tool-003', + parentId: 'agent-001', + spanData: { type: 'function', name: 'parse' } + }); + + await processor.onSpanStart(agent); + await processor.onSpanStart(tool1); + await processor.onSpanEnd(tool1); + await processor.onSpanStart(errorTool); + await processor.onSpanEnd(errorTool); // Ends with error + await processor.onSpanStart(tool3); + await processor.onSpanEnd(tool3); + await processor.onSpanEnd(agent); + await processor.onTraceEnd(trace); + + // Verify error tool has error status + const errorToolCall = mockLogger.addToolSpan.mock.calls[1][0]; + expect(errorToolCall.statusCode).toBe(500); + const errorMeta = errorToolCall.metadata as Record; + expect(errorMeta.error_message).toBe('Connection timeout'); + + // Verify all tools logged + expect(mockLogger.addToolSpan).toHaveBeenCalledTimes(3); + }); + + test('test complex nested structure with multiple agents', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + + // Root agent + const rootAgent = makeSpan({ + spanId: 'root-agent', + parentId: 'trace-001', + spanData: { type: 'agent', name: 'RootAgent' } + }); + + // First branch: planning + const planningAgent = makeSpan({ + spanId: 'planning-agent', + parentId: 'root-agent', + spanData: { type: 'agent', name: 'PlanningAgent' } + }); + + const planLLM = makeSpan({ + spanId: 'plan-llm', + parentId: 'planning-agent', + spanData: { type: 'generation', model: 'gpt-4' } + }); + + // Second branch: execution + const executionAgent = makeSpan({ + spanId: 'execution-agent', + parentId: 'root-agent', + spanData: { type: 'agent', name: 'ExecutionAgent' } + }); + + const executionTool = makeSpan({ + spanId: 'exec-tool', + parentId: 'execution-agent', + spanData: { type: 'function', name: 'execute' } + }); + + await processor.onSpanStart(rootAgent); + await processor.onSpanStart(planningAgent); + await processor.onSpanStart(planLLM); + await processor.onSpanEnd(planLLM); + await processor.onSpanEnd(planningAgent); + await processor.onSpanStart(executionAgent); + await processor.onSpanStart(executionTool); + await processor.onSpanEnd(executionTool); + await processor.onSpanEnd(executionAgent); + await processor.onSpanEnd(rootAgent); + await processor.onTraceEnd(trace); + + // Verify all spans logged + expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(3); // 3 agents + expect(mockLogger.addLlmSpan).toHaveBeenCalledTimes(1); + expect(mockLogger.addToolSpan).toHaveBeenCalledTimes(1); + // conclude is called for all non-root workflow/agent spans + expect(mockLogger.conclude).toHaveBeenCalled(); + }); +}); + +describe('Output tracking integration', () => { + test('test last output preserved across multiple spans', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + + const llm1 = makeSpan({ + spanId: 'llm-001', + parentId: 'trace-001', + spanData: { + type: 'generation', + model: 'gpt-4', + input: [], + output: 'First output' + } + }); + + const llm2 = makeSpan({ + spanId: 'llm-002', + parentId: 'trace-001', + spanData: { + type: 'generation', + model: 'gpt-4', + input: [], + output: 'Final output' + } + }); + + await processor.onSpanStart(llm1); + await processor.onSpanEnd(llm1); + await processor.onSpanStart(llm2); + await processor.onSpanEnd(llm2); + await processor.onTraceEnd(trace); + + const startTraceCall = mockLogger.startTrace.mock.calls[0][0]; + // Output is stringified, so check for either the string or JSON-stringified version + expect( + startTraceCall.output === 'Final output' || + startTraceCall.output === '"Final output"' + ).toBe(true); + }); +}); diff --git a/tests/handlers/openai-agents/map-span-type.test.ts b/tests/handlers/openai-agents/map-span-type.test.ts deleted file mode 100644 index 43e108c9..00000000 --- a/tests/handlers/openai-agents/map-span-type.test.ts +++ /dev/null @@ -1,60 +0,0 @@ -import { - mapSpanType, - GALILEO_CUSTOM_TYPE -} from '../../../src/handlers/openai-agents/span-mapping'; - -describe('mapSpanType', () => { - test('test maps generation to llm', () => { - expect(mapSpanType({ type: 'generation' })).toBe('llm'); - }); - - test('test maps response to llm', () => { - expect(mapSpanType({ type: 'response' })).toBe('llm'); - }); - - test('test maps function to tool', () => { - expect(mapSpanType({ type: 'function' })).toBe('tool'); - }); - - test('test maps guardrail to tool', () => { - expect(mapSpanType({ type: 'guardrail' })).toBe('tool'); - }); - - test('test maps transcription to tool', () => { - expect(mapSpanType({ type: 'transcription' })).toBe('tool'); - }); - - test('test maps speech to tool', () => { - expect(mapSpanType({ type: 'speech' })).toBe('tool'); - }); - - test('test maps speech_group to tool', () => { - expect(mapSpanType({ type: 'speech_group' })).toBe('tool'); - }); - - test('test maps mcp_tools to tool', () => { - expect(mapSpanType({ type: 'mcp_tools' })).toBe('tool'); - }); - - test('test maps agent to workflow', () => { - expect(mapSpanType({ type: 'agent' })).toBe('workflow'); - }); - - test('test maps handoff to workflow', () => { - expect(mapSpanType({ type: 'handoff' })).toBe('workflow'); - }); - - test('test maps custom to workflow', () => { - expect(mapSpanType({ type: 'custom' })).toBe('workflow'); - }); - - test('test maps galileo_custom sentinel to galileo_custom', () => { - expect(mapSpanType({ type: 'custom', __galileoCustom: true })).toBe( - GALILEO_CUSTOM_TYPE - ); - }); - - test('test maps unknown type to workflow fallback', () => { - expect(mapSpanType({ type: 'unknown_future_type' })).toBe('workflow'); - }); -}); diff --git a/tests/handlers/openai-agents/node.test.ts b/tests/handlers/openai-agents/node.test.ts new file mode 100644 index 00000000..4a1aad87 --- /dev/null +++ b/tests/handlers/openai-agents/node.test.ts @@ -0,0 +1,133 @@ +import { createNode } from '../../../src/handlers/openai-agents/node'; + +describe('createNode()', () => { + test('test creates node with correct nodeType llm', () => { + const node = createNode({ + nodeType: 'llm', + spanParams: { name: 'GPT Call' }, + runId: 'span-001', + parentRunId: 'trace-001' + }); + + expect(node.nodeType).toBe('llm'); + expect(node.spanParams).toEqual({ name: 'GPT Call' }); + expect(node.runId).toBe('span-001'); + expect(node.parentRunId).toBe('trace-001'); + }); + + test('test creates node with correct nodeType tool', () => { + const node = createNode({ + nodeType: 'tool', + spanParams: { name: 'Search Tool' }, + runId: 'span-002', + parentRunId: 'span-001' + }); + + expect(node.nodeType).toBe('tool'); + }); + + test('test creates node with correct nodeType workflow', () => { + const node = createNode({ + nodeType: 'workflow', + spanParams: {}, + runId: 'span-003', + parentRunId: null + }); + + expect(node.nodeType).toBe('workflow'); + }); + + test('test creates node with correct nodeType agent', () => { + const node = createNode({ + nodeType: 'agent', + spanParams: { name: 'Planning Agent' }, + runId: 'span-004', + parentRunId: 'trace-001' + }); + + expect(node.nodeType).toBe('agent'); + }); + + test('test initializes children as empty array', () => { + const node = createNode({ + nodeType: 'llm', + spanParams: {}, + runId: 'span-001', + parentRunId: null + }); + + expect(Array.isArray(node.children)).toBe(true); + expect(node.children.length).toBe(0); + }); + + test('test preserves all spanParams fields', () => { + const spanParams = { + name: 'Test Span', + input: 'test input', + output: 'test output', + model: 'gpt-4', + metadata: { key: 'value' } + }; + + const node = createNode({ + nodeType: 'llm', + spanParams, + runId: 'span-001', + parentRunId: 'trace-001' + }); + + expect(node.spanParams).toEqual(spanParams); + }); + + test('test children array is mutable', () => { + const node = createNode({ + nodeType: 'agent', + spanParams: {}, + runId: 'span-001', + parentRunId: null + }); + + node.children.push('child-001'); + node.children.push('child-002'); + + expect(node.children).toEqual(['child-001', 'child-002']); + }); + + test('test node has required Node interface properties', () => { + const node = createNode({ + nodeType: 'llm', + spanParams: { name: 'Test' }, + runId: 'span-001', + parentRunId: 'parent-001' + }); + + // Verify all required properties exist + expect('nodeType' in node).toBe(true); + expect('spanParams' in node).toBe(true); + expect('runId' in node).toBe(true); + expect('parentRunId' in node).toBe(true); + expect('children' in node).toBe(true); + }); + + test('test empty spanParams preserved correctly', () => { + const node = createNode({ + nodeType: 'tool', + spanParams: {}, + runId: 'span-001', + parentRunId: 'trace-001' + }); + + expect(Object.keys(node.spanParams).length).toBe(0); + }); + + test('test parentRunId can be null', () => { + const node = createNode({ + nodeType: 'agent', + spanParams: {}, + runId: 'trace-001', + parentRunId: null + }); + + expect(node.parentRunId).toBeNull(); + }); +}); diff --git a/tests/handlers/openai-agents/map-span-name.test.ts b/tests/handlers/openai-agents/span-mapping.test.ts similarity index 61% rename from tests/handlers/openai-agents/map-span-name.test.ts rename to tests/handlers/openai-agents/span-mapping.test.ts index 5d46bd7b..acec22c7 100644 --- a/tests/handlers/openai-agents/map-span-name.test.ts +++ b/tests/handlers/openai-agents/span-mapping.test.ts @@ -1,8 +1,65 @@ import { + mapSpanType, mapSpanName, GALILEO_CUSTOM_TYPE } from '../../../src/handlers/openai-agents/span-mapping'; +describe('mapSpanType', () => { + test('test maps generation to llm', () => { + expect(mapSpanType({ type: 'generation' })).toBe('llm'); + }); + + test('test maps response to llm', () => { + expect(mapSpanType({ type: 'response' })).toBe('llm'); + }); + + test('test maps function to tool', () => { + expect(mapSpanType({ type: 'function' })).toBe('tool'); + }); + + test('test maps guardrail to tool', () => { + expect(mapSpanType({ type: 'guardrail' })).toBe('tool'); + }); + + test('test maps transcription to tool', () => { + expect(mapSpanType({ type: 'transcription' })).toBe('tool'); + }); + + test('test maps speech to tool', () => { + expect(mapSpanType({ type: 'speech' })).toBe('tool'); + }); + + test('test maps speech_group to tool', () => { + expect(mapSpanType({ type: 'speech_group' })).toBe('tool'); + }); + + test('test maps mcp_tools to tool', () => { + expect(mapSpanType({ type: 'mcp_tools' })).toBe('tool'); + }); + + test('test maps agent to workflow', () => { + expect(mapSpanType({ type: 'agent' })).toBe('workflow'); + }); + + test('test maps handoff to workflow', () => { + expect(mapSpanType({ type: 'handoff' })).toBe('workflow'); + }); + + test('test maps custom to workflow', () => { + expect(mapSpanType({ type: 'custom' })).toBe('workflow'); + }); + + test('test maps galileo_custom sentinel to galileo_custom', () => { + expect(mapSpanType({ type: 'custom', __galileoCustom: true })).toBe( + GALILEO_CUSTOM_TYPE + ); + }); + + test('test maps unknown type to workflow fallback', () => { + expect(mapSpanType({ type: 'unknown_future_type' })).toBe('workflow'); + }); +}); + describe('mapSpanName', () => { test('test returns spanData.name when present', () => { expect(mapSpanName({ type: 'generation', name: 'MySpan' }, 'llm')).toBe( diff --git a/tests/handlers/openai-agents/tracing-processor.test.ts b/tests/handlers/openai-agents/tracing-processor.test.ts index cef06489..f58d585d 100644 --- a/tests/handlers/openai-agents/tracing-processor.test.ts +++ b/tests/handlers/openai-agents/tracing-processor.test.ts @@ -312,3 +312,524 @@ describe('GalileoTracingProcessor lifecycle', () => { expect(result.name).toBe('MyCustom'); }); }); + +describe('Span tree construction edge cases', () => { + test('test multiple children linked to single parent', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + + // Create parent span + const parentSpan = makeSpan({ + spanId: 'parent-001', + parentId: 'trace-001', + spanData: { type: 'agent', name: 'Parent Agent' } + }); + await processor.onSpanStart(parentSpan); + + // Create multiple child spans + const child1 = makeSpan({ + spanId: 'child-001', + parentId: 'parent-001', + spanData: { + type: 'generation', + model: 'gpt-4', + input: [], + output: 'result 1' + } + }); + const child2 = makeSpan({ + spanId: 'child-002', + parentId: 'parent-001', + spanData: { + type: 'function', + name: 'search', + input: 'query', + output: 'result 2' + } + }); + + await processor.onSpanStart(child1); + await processor.onSpanStart(child2); + await processor.onSpanEnd(child1); + await processor.onSpanEnd(child2); + await processor.onSpanEnd(parentSpan); + await processor.onTraceEnd(trace); + + // Verify both children were logged + expect(mockLogger.addLlmSpan).toHaveBeenCalledTimes(1); + expect(mockLogger.addToolSpan).toHaveBeenCalledTimes(1); + // conclude is called for all non-root workflow/agent spans + expect(mockLogger.conclude).toHaveBeenCalled(); + }); + + test('test deeply nested spans (3 levels)', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + + // Level 1: Agent + const agent = makeSpan({ + spanId: 'agent-001', + parentId: 'trace-001', + spanData: { type: 'agent' } + }); + + // Level 2: LLM under agent + const llm = makeSpan({ + spanId: 'llm-001', + parentId: 'agent-001', + spanData: { type: 'generation', model: 'gpt-4' } + }); + + // Level 3: Tool under LLM + const tool = makeSpan({ + spanId: 'tool-001', + parentId: 'llm-001', + spanData: { type: 'function', name: 'calc' } + }); + + await processor.onSpanStart(agent); + await processor.onSpanStart(llm); + await processor.onSpanStart(tool); + await processor.onSpanEnd(tool); + await processor.onSpanEnd(llm); + await processor.onSpanEnd(agent); + await processor.onTraceEnd(trace); + + // All should be logged + expect(mockLogger.startTrace).toHaveBeenCalledTimes(1); + expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1); // agent + expect(mockLogger.addLlmSpan).toHaveBeenCalledTimes(1); + expect(mockLogger.addToolSpan).toHaveBeenCalledTimes(1); + }); + + test('test span with no parentId defaults to trace', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + + // Span with parentId undefined (should default to traceId) + const span = makeSpan({ + spanId: 'span-001', + parentId: undefined, + spanData: { type: 'function', name: 'tool' } + }); + + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + expect(mockLogger.addToolSpan).toHaveBeenCalledTimes(1); + }); + + test('test span parent link defaults to trace when parent not found', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + + // Create a parent agent first + const parentAgent = makeSpan({ + spanId: 'parent-001', + parentId: 'trace-001', + spanData: { type: 'agent' } + }); + await processor.onSpanStart(parentAgent); + + // Create a span with explicit parentId pointing to parent + const span = makeSpan({ + spanId: 'child-001', + parentId: 'parent-001', + spanData: { + type: 'function', + name: 'tool', + input: 'test', + output: 'result' + } + }); + + // Should not throw + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onSpanEnd(parentAgent); + await processor.onTraceEnd(trace); + + // Span is logged correctly + expect(mockLogger.addToolSpan).toHaveBeenCalledTimes(1); + }); +}); + +describe('Response span data merging', () => { + test('test response span merges embedded tools at end', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const span = makeSpan({ + spanId: 'response-001', + parentId: 'trace-001', + spanData: { + type: 'response', + _input: [{ role: 'user' }], + _response: { + model: 'gpt-4o', + output: [ + { + type: 'code_interpreter_call', + code: 'print("hello")', + outputs: [{ logs: 'hello' }], + id: 'call-1', + status: 'completed' + } + ] + } + } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + // addLlmSpan should be called for response type + expect(mockLogger.addLlmSpan).toHaveBeenCalledTimes(1); + const llmCall = mockLogger.addLlmSpan.mock.calls[0][0]; + // Verify that either embeddedToolCalls exists or metadata includes them + if (llmCall.embeddedToolCalls) { + expect(Array.isArray(llmCall.embeddedToolCalls)).toBe(true); + expect(llmCall.embeddedToolCalls[0].type).toBe('code_interpreter_call'); + } else { + // May be in metadata as embedded_tool_calls + const meta = llmCall.metadata as Record; + expect(meta.embedded_tool_calls).toBeDefined(); + } + }); + + test('test _responseObject removed from final params', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const span = makeSpan({ + spanId: 'response-001', + parentId: 'trace-001', + spanData: { + type: 'response', + _response: { output: [] } + } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + const llmCall = mockLogger.addLlmSpan.mock.calls[0][0]; + // _responseObject should not be in the final logged data + expect(llmCall._responseObject).toBeUndefined(); + }); + + test('test generation span updates usage on end', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const span = makeSpan({ + spanId: 'gen-001', + parentId: 'trace-001', + spanData: { + type: 'generation', + model: 'gpt-4', + input: [], + output: [], + usage: { input_tokens: 10, output_tokens: 5 } + } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + const llmCall = mockLogger.addLlmSpan.mock.calls[0][0]; + expect(llmCall.numInputTokens).toBe(10); + expect(llmCall.numOutputTokens).toBe(5); + }); + + test('test response span with no _responseObject handles gracefully', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const span = makeSpan({ + spanId: 'response-001', + parentId: 'trace-001', + spanData: { type: 'response' } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + // Should not throw + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + expect(mockLogger.addLlmSpan).toHaveBeenCalledTimes(1); + }); +}); + +describe('Error handling and recovery', () => { + test('test span error with message only', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const span = makeSpan({ + spanId: 'span-001', + parentId: 'trace-001', + error: { message: 'Test error' }, + spanData: { type: 'function', name: 'tool' } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + const toolCall = mockLogger.addToolSpan.mock.calls[0][0]; + expect(toolCall.statusCode).toBe(500); + const meta = toolCall.metadata as Record; + expect(meta.error_message).toBe('Test error'); + expect(meta.error_type).toBe('SpanError'); + }); + + test('test span error with message and data', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const errorData = { code: 'TOOL_ERROR', details: 'Connection failed' }; + const span = makeSpan({ + spanId: 'span-001', + parentId: 'trace-001', + error: { message: 'Tool failed', data: errorData }, + spanData: { + type: 'function', + name: 'failing_tool', + input: '', + output: undefined + } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + expect(mockLogger.addToolSpan).toHaveBeenCalledTimes(1); + const toolCall = mockLogger.addToolSpan.mock.calls[0][0]; + const meta = toolCall.metadata as Record; + expect(meta.error_details).toBe(JSON.stringify(errorData)); + }); + + test('test onSpanEnd without onSpanStart handled gracefully', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const span = makeSpan({ + spanId: 'orphan-span', + parentId: 'trace-001', + spanData: { type: 'tool' } + }); + + await processor.onTraceStart(trace); + // Skip onSpanStart + // Should not throw + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + expect(mockLogger.addToolSpan).not.toHaveBeenCalled(); + }); + + test('test error metadata merged with existing metadata', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const span = makeSpan({ + spanId: 'span-001', + parentId: 'trace-001', + spanData: { + type: 'agent', + data: { user_id: '123' } // Will go to metadata + }, + error: { message: 'Error occurred' } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + const workflowCall = mockLogger.addWorkflowSpan.mock.calls[0][0]; + const meta = workflowCall.metadata as Record; + expect(meta.error_message).toBe('Error occurred'); + }); + + test('test error on non-existent span ignored gracefully', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const span = makeSpan({ + spanId: 'never-started-span', + parentId: 'trace-001', + error: { message: 'This should be ignored' }, + spanData: { type: 'tool' } + }); + + await processor.onTraceStart(trace); + // Skip onSpanStart - span doesn't exist in processor + // Should not throw + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + expect(mockLogger.addToolSpan).not.toHaveBeenCalled(); + }); +}); + +describe('Date and duration handling', () => { + test('test valid startedAt and endedAt calculate durationNs', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + + const startTime = new Date('2024-01-01T00:00:00Z'); + const endTime = new Date('2024-01-01T00:00:05Z'); + + const trace = makeTrace({ + startedAt: startTime.toISOString(), + endedAt: endTime.toISOString() + }); + + await processor.onTraceStart(trace); + await processor.onTraceEnd(trace); + + const startTraceCall = mockLogger.startTrace.mock.calls[0][0]; + // 5 seconds = 5,000,000,000 nanoseconds + expect(startTraceCall.durationNs).toBeGreaterThan(0); + expect(startTraceCall.durationNs).toBeCloseTo(5_000_000_000, -4); + }); + + test('test missing startedAt sets durationNs to 0', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace({ + // eslint-disable-next-line @typescript-eslint/no-explicit-any + startedAt: undefined as any, // missing + endedAt: new Date().toISOString() + }); + + await processor.onTraceStart(trace); + await processor.onTraceEnd(trace); + + const startTraceCall = mockLogger.startTrace.mock.calls[0][0]; + expect(startTraceCall.durationNs).toBe(0); + }); + + test('test missing endedAt uses current time', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const startTime = new Date('2024-01-01T00:00:00Z'); + + const trace = makeTrace({ + startedAt: startTime.toISOString(), + // eslint-disable-next-line @typescript-eslint/no-explicit-any + endedAt: undefined as any // missing + }); + + await processor.onTraceStart(trace); + await processor.onTraceEnd(trace); + + const startTraceCall = mockLogger.startTrace.mock.calls[0][0]; + // Should calculate using current time, so durationNs >= 0 + expect(startTraceCall.durationNs).toBeGreaterThanOrEqual(0); + }); +}); + +describe('Metadata handling and serialization', () => { + test('test non-string metadata values stringified at trace start', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + + const trace = makeTrace({ + metadata: { + user_id: '123', + request_count: 5, + flags: true, + config: { nested: 'value' } + } + }); + + await processor.onTraceStart(trace); + await processor.onTraceEnd(trace); + + const startTraceCall = mockLogger.startTrace.mock.calls[0][0]; + const meta = startTraceCall.metadata as Record; + expect(meta.request_count).toBe('5'); + expect(meta.flags).toBe('true'); + expect(JSON.parse(meta.config)).toEqual({ nested: 'value' }); + }); + + test('test unicode characters preserved in metadata', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + + const trace = makeTrace({ + metadata: { message: 'Hello 世界 🌍' } + }); + + await processor.onTraceStart(trace); + await processor.onTraceEnd(trace); + + const startTraceCall = mockLogger.startTrace.mock.calls[0][0]; + const meta = startTraceCall.metadata as Record; + expect(meta.message).toBe('Hello 世界 🌍'); + }); + + test('test error overwrites specific metadata keys', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const span = makeSpan({ + spanId: 'span-001', + parentId: 'trace-001', + spanData: { + type: 'function', + name: 'tool', + input: '', + output: undefined + }, + error: { message: 'Tool error', data: { code: 'ECONNREFUSED' } } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + expect(mockLogger.addToolSpan).toHaveBeenCalledTimes(1); + const toolCall = mockLogger.addToolSpan.mock.calls[0][0]; + const meta = toolCall.metadata as Record; + expect(meta.error_message).toBe('Tool error'); + expect(meta.error_type).toBe('SpanError'); + }); +}); From 9016c3ab5680e4f366c5a9ff73af971a728f4ffe Mon Sep 17 00:00:00 2001 From: richter Date: Fri, 27 Feb 2026 15:52:29 -0300 Subject: [PATCH 03/21] feat(agents): Rebased to integrate new sdkLogger. --- src/handlers/openai-agents/index.ts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/handlers/openai-agents/index.ts b/src/handlers/openai-agents/index.ts index e892409c..5c5b3630 100644 --- a/src/handlers/openai-agents/index.ts +++ b/src/handlers/openai-agents/index.ts @@ -18,11 +18,12 @@ import { createGalileoCustomSpanData, type GalileoCustomSpanData } from './custom-span'; +import { getSdkLogger } from 'galileo-generated'; +const sdkLogger = getSdkLogger(); // Warn if @openai/agents package is not available (optional peer dependency) import('@openai/agents-core' as string).catch(() => { - // eslint-disable-next-line no-console - console.warn( + sdkLogger.warn( '@openai/agents package is not installed. GalileoTracingProcessor will not function.' ); }); From 3b8995e4f488a60964634e870208dd2a3b765fec Mon Sep 17 00:00:00 2001 From: richter Date: Wed, 4 Mar 2026 14:01:23 -0300 Subject: [PATCH 04/21] fix(agents): Updated galileo-generated version, fixed parseUsage read of token information. --- package-lock.json | 9 --------- src/handlers/openai-agents/data-extraction.ts | 15 +++++++++++---- .../openai-agents/data-extraction.test.ts | 13 ++++++++++++- 3 files changed, 23 insertions(+), 14 deletions(-) diff --git a/package-lock.json b/package-lock.json index c9479e47..d361912f 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9273,15 +9273,6 @@ "url": "https://github.com/sponsors/ljharb" } }, - "node_modules/undici": { - "version": "7.23.0", - "resolved": "https://registry.npmjs.org/undici/-/undici-7.23.0.tgz", - "integrity": "sha512-HVMxHKZKi+eL2mrUZDzDkKW3XvCjynhbtpSq20xQp4ePDFeSFuAfnvM0GIwZIv8fiKHjXFQ5WjxhCt15KRNj+g==", - "optional": true, - "engines": { - "node": ">=20.18.1" - } - }, "node_modules/undici-types": { "version": "6.21.0", "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz", diff --git a/src/handlers/openai-agents/data-extraction.ts b/src/handlers/openai-agents/data-extraction.ts index 3fbffdb3..f856187a 100644 --- a/src/handlers/openai-agents/data-extraction.ts +++ b/src/handlers/openai-agents/data-extraction.ts @@ -41,14 +41,21 @@ export function parseUsage( 0; const totalTokens = (usageData.total_tokens as number | undefined) ?? null; - // details is a flat Record in the Agents SDK - const details = (usageData.details as Record) ?? {}; + // Reasoning tokens live in output_tokens_details (Responses API) or details (legacy Agents SDK shape) + const outputDetails = + (usageData.output_tokens_details as Record | undefined) ?? + (usageData.details as Record | undefined) ?? + {}; + // Cached tokens live in input_tokens_details (Responses API) or the same details object + const inputDetails = + (usageData.input_tokens_details as Record | undefined) ?? + outputDetails; const reasoningTokens = - (details.reasoning_tokens as number | undefined) ?? + (outputDetails.reasoning_tokens as number | undefined) ?? (usageData.reasoning_tokens as number | undefined) ?? 0; const cachedTokens = - (details.cached_tokens as number | undefined) ?? + (inputDetails.cached_tokens as number | undefined) ?? (usageData.cached_tokens as number | undefined) ?? 0; diff --git a/tests/handlers/openai-agents/data-extraction.test.ts b/tests/handlers/openai-agents/data-extraction.test.ts index 20abcbcf..a232b775 100644 --- a/tests/handlers/openai-agents/data-extraction.test.ts +++ b/tests/handlers/openai-agents/data-extraction.test.ts @@ -45,7 +45,18 @@ describe('parseUsage', () => { expect(result.outputTokens).toBe(15); }); - test('test parse usage extracts reasoning_tokens from details', () => { + test('test parse usage extracts reasoning_tokens from output_tokens_details', () => { + const result = parseUsage({ + input_tokens: 10, + output_tokens: 5, + output_tokens_details: { reasoning_tokens: 3 }, + input_tokens_details: { cached_tokens: 2 } + }); + expect(result.reasoningTokens).toBe(3); + expect(result.cachedTokens).toBe(2); + }); + + test('test parse usage extracts reasoning_tokens from details (legacy shape)', () => { const result = parseUsage({ input_tokens: 10, output_tokens: 5, From b69d4ae187e8904373b89e8132815862cc247055 Mon Sep 17 00:00:00 2001 From: richter Date: Wed, 4 Mar 2026 15:07:35 -0300 Subject: [PATCH 05/21] fix(agents): Added support to log orphaned spans on onSpanStart. --- src/handlers/openai-agents/index.ts | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/src/handlers/openai-agents/index.ts b/src/handlers/openai-agents/index.ts index 5c5b3630..7a442045 100644 --- a/src/handlers/openai-agents/index.ts +++ b/src/handlers/openai-agents/index.ts @@ -175,21 +175,27 @@ export class GalileoTracingProcessor implements TracingProcessor { }; } + // Determine parent ID (prefer explicit parentId, fallback to traceId) + const parentId = span.parentId ?? span.traceId; + + // Validate that parent node exists before creating and linking this node + const parentNode = this._nodes.get(parentId); + if (!parentNode) { + sdkLogger.warn( + `Parent node ${parentId} not found for span ${span.spanId} in trace ${span.traceId}` + ); + return; + } + const node = createNode({ nodeType: nodeType as Node['nodeType'], spanParams: initialParams, runId: span.spanId, - parentRunId: span.parentId ?? span.traceId + parentRunId: parentId }); this._nodes.set(span.spanId, node); - - // Link to parent node - const parentId = span.parentId ?? span.traceId; - const parentNode = this._nodes.get(parentId); - if (parentNode) { - parentNode.children.push(span.spanId); - } + parentNode.children.push(span.spanId); } /** From a82115a8f2aef66dc03af373a8f880cccfc78292 Mon Sep 17 00:00:00 2001 From: richter Date: Thu, 5 Mar 2026 11:03:18 -0300 Subject: [PATCH 06/21] fix(agents): Improved support for agent spans. --- src/handlers/openai-agents/data-extraction.ts | 3 + src/handlers/openai-agents/index.ts | 40 +- src/handlers/openai-agents/span-mapping.ts | 2 + src/types/logging/logger.types.ts | 1 + src/utils/galileo-logger.ts | 4 +- .../openai-agents/integration.test.ts | 14 +- .../openai-agents/span-mapping.test.ts | 67 ++- .../openai-agents/tracing-processor.test.ts | 385 +++++++++++++++++- 8 files changed, 491 insertions(+), 25 deletions(-) diff --git a/src/handlers/openai-agents/data-extraction.ts b/src/handlers/openai-agents/data-extraction.ts index f856187a..af29dc85 100644 --- a/src/handlers/openai-agents/data-extraction.ts +++ b/src/handlers/openai-agents/data-extraction.ts @@ -203,9 +203,12 @@ export function extractWorkflowData( const tools = spanData.tools; const handoffs = spanData.handoffs; const outputType = spanData.output_type; + const agentType = + typeof spanData.agentType === 'string' ? spanData.agentType : undefined; return { input: '', output: undefined, + ...(agentType !== undefined ? { agentType } : {}), metadata: { ...(tools !== undefined ? { tools: JSON.stringify(tools) } : {}), ...(handoffs !== undefined diff --git a/src/handlers/openai-agents/index.ts b/src/handlers/openai-agents/index.ts index 7a442045..e9d960d4 100644 --- a/src/handlers/openai-agents/index.ts +++ b/src/handlers/openai-agents/index.ts @@ -3,6 +3,7 @@ import { GalileoLogger } from '../../utils/galileo-logger'; import { GalileoSingleton } from '../../singleton'; import { calculateDurationNs } from '../../utils/utils'; import type { JsonObject } from '../../types/base.types'; +import { AgentType } from '../../types/new-api.types'; import { type Node, createNode } from './node'; import { mapSpanType, mapSpanName, GALILEO_CUSTOM_TYPE } from './span-mapping'; import { @@ -69,6 +70,32 @@ export interface TracingProcessor { forceFlush(): Promise; } +/** + * Maps an OpenAI agent type string to a Galileo AgentType enum value. + * Returns undefined when no agentType is present so addAgentSpan() can use its default. + */ +function extractAgentType( + spanParams: Record +): AgentType | undefined { + const raw = spanParams.agentType; + if (typeof raw !== 'string' || !raw) { + return undefined; + } + + const typeMap: Record = { + classifier: AgentType.CLASSIFIER, + planner: AgentType.PLANNER, + react: AgentType.REACT, + reflection: AgentType.REFLECTION, + router: AgentType.ROUTER, + supervisor: AgentType.SUPERVISOR, + judge: AgentType.JUDGE, + default: AgentType.DEFAULT + }; + + return typeMap[raw.toLowerCase()] ?? AgentType.DEFAULT; +} + /** * GalileoTracingProcessor implements the OpenAI Agents SDK TracingProcessor interface * to capture agent runs and emit them to GalileoLogger. @@ -373,8 +400,19 @@ export class GalileoTracingProcessor implements TracingProcessor { metadata, createdAt: startedAt }); + } else if (node.nodeType === 'agent') { + this._galileoLogger.addAgentSpan({ + input, + output, + name, + durationNs, + metadata, + createdAt: startedAt, + agentType: extractAgentType(params), + statusCode + }); } else { - // workflow or agent child nodes + // workflow and other parent nodes this._galileoLogger.addWorkflowSpan({ input, output, diff --git a/src/handlers/openai-agents/span-mapping.ts b/src/handlers/openai-agents/span-mapping.ts index af797fcf..9bbbb937 100644 --- a/src/handlers/openai-agents/span-mapping.ts +++ b/src/handlers/openai-agents/span-mapping.ts @@ -34,6 +34,8 @@ export function mapSpanType(spanData: { return 'tool'; case 'agent': + return 'agent'; + case 'handoff': case 'custom': return 'workflow'; diff --git a/src/types/logging/logger.types.ts b/src/types/logging/logger.types.ts index df3ea0cb..8dd3efeb 100644 --- a/src/types/logging/logger.types.ts +++ b/src/types/logging/logger.types.ts @@ -416,6 +416,7 @@ export interface IGalileoLoggerSpan { tags?: string[]; agentType?: AgentType; stepNumber?: number; + statusCode?: number; }): AgentSpan; } diff --git a/src/utils/galileo-logger.ts b/src/utils/galileo-logger.ts index 5029d5f6..7faebd97 100644 --- a/src/utils/galileo-logger.ts +++ b/src/utils/galileo-logger.ts @@ -1184,6 +1184,7 @@ class GalileoLogger implements IGalileoLogger { tags?: string[]; agentType?: AgentType; stepNumber?: number; + statusCode?: number; }): AgentSpan { const span = new AgentSpan({ input: options.input, @@ -1196,7 +1197,8 @@ class GalileoLogger implements IGalileoLogger { tags: options.tags, metrics: new Metrics({ durationNs: options.durationNs }), agentType: options.agentType, - stepNumber: options.stepNumber + stepNumber: options.stepNumber, + statusCode: options.statusCode }); this.addChildSpanToParent(span); diff --git a/tests/handlers/openai-agents/integration.test.ts b/tests/handlers/openai-agents/integration.test.ts index af41de6f..b5611c9e 100644 --- a/tests/handlers/openai-agents/integration.test.ts +++ b/tests/handlers/openai-agents/integration.test.ts @@ -82,8 +82,9 @@ describe('Multi-agent integration flows', () => { await processor.onSpanEnd(planner); await processor.onTraceEnd(trace); - // Verify all spans logged - expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(3); // 2 agents + 1 handoff + // Verify all spans logged: 2 agents use addAgentSpan, 1 handoff uses addWorkflowSpan + expect(mockLogger.addAgentSpan).toHaveBeenCalledTimes(2); + expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1); }); test('test agent->tool->llm->tool flow', async () => { @@ -127,7 +128,7 @@ describe('Multi-agent integration flows', () => { await processor.onSpanEnd(agent); await processor.onTraceEnd(trace); - expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1); // agent + expect(mockLogger.addAgentSpan).toHaveBeenCalledTimes(1); // agent expect(mockLogger.addToolSpan).toHaveBeenCalledTimes(2); // 2 tools expect(mockLogger.addLlmSpan).toHaveBeenCalledTimes(1); // 1 llm }); @@ -249,7 +250,8 @@ describe('Multi-agent integration flows', () => { await processor.onSpanEnd(agent); await processor.onTraceEnd(trace); - expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(2); // agent + custom + expect(mockLogger.addAgentSpan).toHaveBeenCalledTimes(1); // agent + expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1); // custom (galileo_custom → workflow) expect(mockLogger.addLlmSpan).toHaveBeenCalledTimes(1); // conclude is called for all non-root workflow/agent spans expect(mockLogger.conclude).toHaveBeenCalled(); @@ -359,8 +361,8 @@ describe('Multi-agent integration flows', () => { await processor.onSpanEnd(rootAgent); await processor.onTraceEnd(trace); - // Verify all spans logged - expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(3); // 3 agents + // Verify all spans logged: 3 agents use addAgentSpan + expect(mockLogger.addAgentSpan).toHaveBeenCalledTimes(3); expect(mockLogger.addLlmSpan).toHaveBeenCalledTimes(1); expect(mockLogger.addToolSpan).toHaveBeenCalledTimes(1); // conclude is called for all non-root workflow/agent spans diff --git a/tests/handlers/openai-agents/span-mapping.test.ts b/tests/handlers/openai-agents/span-mapping.test.ts index acec22c7..a7fe11f2 100644 --- a/tests/handlers/openai-agents/span-mapping.test.ts +++ b/tests/handlers/openai-agents/span-mapping.test.ts @@ -3,6 +3,7 @@ import { mapSpanName, GALILEO_CUSTOM_TYPE } from '../../../src/handlers/openai-agents/span-mapping'; +import type { NodeType } from '../../../src/handlers/openai-agents/node'; describe('mapSpanType', () => { test('test maps generation to llm', () => { @@ -37,8 +38,8 @@ describe('mapSpanType', () => { expect(mapSpanType({ type: 'mcp_tools' })).toBe('tool'); }); - test('test maps agent to workflow', () => { - expect(mapSpanType({ type: 'agent' })).toBe('workflow'); + test('test maps agent to agent', () => { + expect(mapSpanType({ type: 'agent' })).toBe('agent'); }); test('test maps handoff to workflow', () => { @@ -90,10 +91,10 @@ describe('mapSpanName', () => { }); test('test agent fallback uses spanData.name or Agent', () => { - expect( - mapSpanName({ type: 'agent', name: 'PlannerAgent' }, 'workflow') - ).toBe('PlannerAgent'); - expect(mapSpanName({ type: 'agent' }, 'workflow')).toBe('Agent'); + expect(mapSpanName({ type: 'agent', name: 'PlannerAgent' }, 'agent')).toBe( + 'PlannerAgent' + ); + expect(mapSpanName({ type: 'agent' }, 'agent')).toBe('Agent'); }); test('test handoff formats from-to arrow', () => { @@ -137,3 +138,57 @@ describe('mapSpanName', () => { expect(mapSpanName({ type: 'mcp_tools' }, 'tool')).toBe('MCP Tools'); }); }); + +describe('agent span type distinction', () => { + test('test agent maps to agent not workflow', () => { + const result = mapSpanType({ type: 'agent' }); + expect(result).toBe('agent'); + expect(result).not.toBe('workflow'); + }); + + test('test handoff still maps to workflow', () => { + expect(mapSpanType({ type: 'handoff' })).toBe('workflow'); + }); + + test('test custom still maps to workflow', () => { + expect(mapSpanType({ type: 'custom' })).toBe('workflow'); + }); + + test('test galileo_custom sentinel is unaffected', () => { + expect(mapSpanType({ type: 'custom', __galileoCustom: true })).toBe( + GALILEO_CUSTOM_TYPE + ); + }); + + test('test mapSpanType returns NodeType or GALILEO_CUSTOM_TYPE for all known types', () => { + const knownTypes: Array<{ + type: string; + expected: NodeType | typeof GALILEO_CUSTOM_TYPE; + }> = [ + { type: 'generation', expected: 'llm' }, + { type: 'response', expected: 'llm' }, + { type: 'function', expected: 'tool' }, + { type: 'guardrail', expected: 'tool' }, + { type: 'transcription', expected: 'tool' }, + { type: 'speech', expected: 'tool' }, + { type: 'speech_group', expected: 'tool' }, + { type: 'mcp_tools', expected: 'tool' }, + { type: 'agent', expected: 'agent' }, + { type: 'handoff', expected: 'workflow' }, + { type: 'custom', expected: 'workflow' } + ]; + for (const { type, expected } of knownTypes) { + expect(mapSpanType({ type })).toBe(expected); + } + }); + + test('test mapSpanName returns Agent for agent type without name', () => { + expect(mapSpanName({ type: 'agent' }, 'agent')).toBe('Agent'); + }); + + test('test mapSpanName returns spanData.name for agent type with name', () => { + expect(mapSpanName({ type: 'agent', name: 'RouterAgent' }, 'agent')).toBe( + 'RouterAgent' + ); + }); +}); diff --git a/tests/handlers/openai-agents/tracing-processor.test.ts b/tests/handlers/openai-agents/tracing-processor.test.ts index f58d585d..8e9b695c 100644 --- a/tests/handlers/openai-agents/tracing-processor.test.ts +++ b/tests/handlers/openai-agents/tracing-processor.test.ts @@ -3,6 +3,7 @@ import type { AgentTrace, AgentSpan } from '../../../src/handlers/openai-agents'; +import { AgentType } from '../../../src/types/new-api.types'; // Helper to build a mock AgentTrace function makeTrace(overrides: Partial = {}): AgentTrace { @@ -121,7 +122,7 @@ describe('GalileoTracingProcessor lifecycle', () => { expect(toolCall.name).toBe('search_tool'); }); - test('test full trace with workflow span calls addWorkflowSpan and conclude', async () => { + test('test full trace with agent span calls addAgentSpan and conclude', async () => { const mockLogger = createMockLogger(); const processor = new GalileoTracingProcessor(mockLogger as never, false); const trace = makeTrace(); @@ -139,10 +140,10 @@ describe('GalileoTracingProcessor lifecycle', () => { await processor.onSpanEnd(span); await processor.onTraceEnd(trace); - expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1); - const workflowCall = mockLogger.addWorkflowSpan.mock.calls[0][0]; - expect(workflowCall.name).toBe('PlannerAgent'); - // conclude is called for workflow spans + expect(mockLogger.addAgentSpan).toHaveBeenCalledTimes(1); + const agentCall = mockLogger.addAgentSpan.mock.calls[0][0]; + expect(agentCall.name).toBe('PlannerAgent'); + // conclude is called for agent spans expect(mockLogger.conclude).toHaveBeenCalled(); }); @@ -209,7 +210,7 @@ describe('GalileoTracingProcessor lifecycle', () => { expect(mockLogger.flush).toHaveBeenCalledTimes(1); }); - test('test nested workflow span is logged as child', async () => { + test('test nested agent span is logged as child', async () => { const mockLogger = createMockLogger(); const processor = new GalileoTracingProcessor(mockLogger as never, false); const trace = makeTrace(); @@ -237,9 +238,9 @@ describe('GalileoTracingProcessor lifecycle', () => { await processor.onSpanEnd(agentSpan); await processor.onTraceEnd(trace); - expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1); + expect(mockLogger.addAgentSpan).toHaveBeenCalledTimes(1); expect(mockLogger.addLlmSpan).toHaveBeenCalledTimes(1); - // conclude called for workflow span + // conclude called for agent span expect(mockLogger.conclude).toHaveBeenCalled(); }); @@ -403,7 +404,7 @@ describe('Span tree construction edge cases', () => { // All should be logged expect(mockLogger.startTrace).toHaveBeenCalledTimes(1); - expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1); // agent + expect(mockLogger.addAgentSpan).toHaveBeenCalledTimes(1); // agent expect(mockLogger.addLlmSpan).toHaveBeenCalledTimes(1); expect(mockLogger.addToolSpan).toHaveBeenCalledTimes(1); }); @@ -679,8 +680,8 @@ describe('Error handling and recovery', () => { await processor.onSpanEnd(span); await processor.onTraceEnd(trace); - const workflowCall = mockLogger.addWorkflowSpan.mock.calls[0][0]; - const meta = workflowCall.metadata as Record; + const agentCall = mockLogger.addAgentSpan.mock.calls[0][0]; + const meta = agentCall.metadata as Record; expect(meta.error_message).toBe('Error occurred'); }); @@ -833,3 +834,365 @@ describe('Metadata handling and serialization', () => { expect(meta.error_type).toBe('SpanError'); }); }); + +describe('Agent span emission', () => { + test('test agent span calls addAgentSpan not addWorkflowSpan', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const span = makeSpan({ + spanId: 'agent-span-001', + parentId: 'trace-001', + spanData: { type: 'agent', name: 'TestAgent' } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + expect(mockLogger.addAgentSpan).toHaveBeenCalledTimes(1); + expect(mockLogger.addWorkflowSpan).not.toHaveBeenCalled(); + }); + + test('test agent span passes name and output correctly', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const span = makeSpan({ + spanId: 'agent-span-001', + parentId: 'trace-001', + spanData: { type: 'agent', name: 'RouterAgent', output: 'routed' } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + const agentCall = mockLogger.addAgentSpan.mock.calls[0][0]; + expect(agentCall.name).toBe('RouterAgent'); + }); + + test('test agent span with no agentType passes undefined agentType', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const span = makeSpan({ + spanId: 'agent-span-001', + parentId: 'trace-001', + spanData: { type: 'agent', name: 'Agent' } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + const agentCall = mockLogger.addAgentSpan.mock.calls[0][0]; + expect(agentCall.agentType).toBeUndefined(); + }); + + test('test agent span conclude is called after children', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const agentSpan = makeSpan({ + spanId: 'agent-001', + parentId: 'trace-001', + spanData: { type: 'agent' } + }); + + const toolSpan = makeSpan({ + spanId: 'tool-001', + parentId: 'agent-001', + spanData: { type: 'function', name: 'my_tool' } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(agentSpan); + await processor.onSpanStart(toolSpan); + await processor.onSpanEnd(toolSpan); + await processor.onSpanEnd(agentSpan); + await processor.onTraceEnd(trace); + + expect(mockLogger.addAgentSpan).toHaveBeenCalledTimes(1); + expect(mockLogger.addToolSpan).toHaveBeenCalledTimes(1); + expect(mockLogger.conclude).toHaveBeenCalled(); + }); + + test('test agent span error passes statusCode 500 as direct field', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const span = makeSpan({ + spanId: 'agent-err-001', + parentId: 'trace-001', + error: { message: 'Agent failed' }, + spanData: { type: 'agent', name: 'FailingAgent' } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + const agentCall = mockLogger.addAgentSpan.mock.calls[0][0]; + // statusCode is passed as a direct field, not folded into metadata + expect(agentCall.statusCode).toBe(500); + const meta = agentCall.metadata as Record; + expect(meta.error_message).toBe('Agent failed'); + expect(meta.status_code).toBeUndefined(); + }); + + test('test agent span without error passes statusCode 200', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const span = makeSpan({ + spanId: 'agent-ok-001', + parentId: 'trace-001', + spanData: { type: 'agent', name: 'HappyAgent' } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + const agentCall = mockLogger.addAgentSpan.mock.calls[0][0]; + expect(agentCall.statusCode).toBe(200); + const meta = agentCall.metadata as Record; + expect(meta.status_code).toBeUndefined(); + }); +}); + +describe('Agent type extraction', () => { + test('test agent span with planner agentType is passed to addAgentSpan', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const span = makeSpan({ + spanId: 'agent-planner-001', + parentId: 'trace-001', + spanData: { type: 'agent', name: 'PlannerAgent', agentType: 'planner' } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + const agentCall = mockLogger.addAgentSpan.mock.calls[0][0]; + expect(agentCall.agentType).toBe(AgentType.PLANNER); + }); + + test('test agent span with router agentType is passed to addAgentSpan', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const span = makeSpan({ + spanId: 'agent-router-001', + parentId: 'trace-001', + spanData: { type: 'agent', name: 'RouterAgent', agentType: 'router' } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + const agentCall = mockLogger.addAgentSpan.mock.calls[0][0]; + expect(agentCall.agentType).toBe(AgentType.ROUTER); + }); + + test('test agent span with uppercase agentType is normalized', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const span = makeSpan({ + spanId: 'agent-sup-001', + parentId: 'trace-001', + spanData: { + type: 'agent', + name: 'SupervisorAgent', + agentType: 'SUPERVISOR' + } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + const agentCall = mockLogger.addAgentSpan.mock.calls[0][0]; + expect(agentCall.agentType).toBe(AgentType.SUPERVISOR); + }); + + test('test agent span with unknown agentType defaults to default', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const span = makeSpan({ + spanId: 'agent-unknown-001', + parentId: 'trace-001', + spanData: { type: 'agent', name: 'WeirdAgent', agentType: 'unknown_type' } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + const agentCall = mockLogger.addAgentSpan.mock.calls[0][0]; + expect(agentCall.agentType).toBe(AgentType.DEFAULT); + }); + + test('test agent span with missing agentType returns undefined', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const span = makeSpan({ + spanId: 'agent-notype-001', + parentId: 'trace-001', + spanData: { type: 'agent' } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + const agentCall = mockLogger.addAgentSpan.mock.calls[0][0]; + expect(agentCall.agentType).toBeUndefined(); + }); + + test('test all known agentType values map correctly', async () => { + const typeMap: Array<{ input: string; expected: string }> = [ + { input: 'classifier', expected: AgentType.CLASSIFIER }, + { input: 'planner', expected: AgentType.PLANNER }, + { input: 'react', expected: AgentType.REACT }, + { input: 'reflection', expected: AgentType.REFLECTION }, + { input: 'router', expected: AgentType.ROUTER }, + { input: 'supervisor', expected: AgentType.SUPERVISOR }, + { input: 'judge', expected: AgentType.JUDGE }, + { input: 'default', expected: AgentType.DEFAULT } + ]; + + for (const { input, expected } of typeMap) { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const span = makeSpan({ + spanId: `agent-${input}-001`, + parentId: 'trace-001', + spanData: { type: 'agent', agentType: input } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + const agentCall = mockLogger.addAgentSpan.mock.calls[0][0]; + expect(agentCall.agentType).toBe(expected); + } + }); +}); + +describe('Span hierarchy correctness', () => { + test('test trace with agent child maintains correct parent-child order', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const agentSpan = makeSpan({ + spanId: 'agent-001', + parentId: 'trace-001', + spanData: { type: 'agent', name: 'RootAgent' } + }); + + const llmSpan = makeSpan({ + spanId: 'llm-001', + parentId: 'agent-001', + spanData: { type: 'generation', model: 'gpt-4o' } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(agentSpan); + await processor.onSpanStart(llmSpan); + await processor.onSpanEnd(llmSpan); + await processor.onSpanEnd(agentSpan); + await processor.onTraceEnd(trace); + + // startTrace is called first, then addAgentSpan, then addLlmSpan, then conclude + const callOrder = mockLogger.startTrace.mock.invocationCallOrder[0]; + const agentOrder = mockLogger.addAgentSpan.mock.invocationCallOrder[0]; + const llmOrder = mockLogger.addLlmSpan.mock.invocationCallOrder[0]; + const concludeOrder = mockLogger.conclude.mock.invocationCallOrder[0]; + + expect(callOrder).toBeLessThan(agentOrder); + expect(agentOrder).toBeLessThan(llmOrder); + expect(llmOrder).toBeLessThan(concludeOrder); + }); + + test('test workflow span type still uses addWorkflowSpan', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const handoffSpan = makeSpan({ + spanId: 'handoff-001', + parentId: 'trace-001', + spanData: { type: 'handoff', from_agent: 'A', to_agent: 'B' } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(handoffSpan); + await processor.onSpanEnd(handoffSpan); + await processor.onTraceEnd(trace); + + expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1); + expect(mockLogger.addAgentSpan).not.toHaveBeenCalled(); + }); + + test('test agent and workflow spans both call conclude', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const agentSpan = makeSpan({ + spanId: 'agent-001', + parentId: 'trace-001', + spanData: { type: 'agent' } + }); + + const handoffSpan = makeSpan({ + spanId: 'handoff-001', + parentId: 'agent-001', + spanData: { type: 'handoff' } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(agentSpan); + await processor.onSpanStart(handoffSpan); + await processor.onSpanEnd(handoffSpan); + await processor.onSpanEnd(agentSpan); + await processor.onTraceEnd(trace); + + // conclude is called 3 times: once for handoff (workflow), once for agent, once for concludeAll in onTraceEnd + expect(mockLogger.conclude).toHaveBeenCalledTimes(3); + }); +}); From 98eb59ce253f0178e7265bf97e8661a8894e5cc1 Mon Sep 17 00:00:00 2001 From: richter Date: Thu, 5 Mar 2026 11:58:51 -0300 Subject: [PATCH 07/21] fix(agents): firstInput refactored to feed first input data to span. --- src/handlers/openai-agents/index.ts | 34 ++++ .../openai-agents/tracing-processor.test.ts | 155 ++++++++++++++++++ 2 files changed, 189 insertions(+) diff --git a/src/handlers/openai-agents/index.ts b/src/handlers/openai-agents/index.ts index e9d960d4..1eee0172 100644 --- a/src/handlers/openai-agents/index.ts +++ b/src/handlers/openai-agents/index.ts @@ -99,6 +99,11 @@ function extractAgentType( /** * GalileoTracingProcessor implements the OpenAI Agents SDK TracingProcessor interface * to capture agent runs and emit them to GalileoLogger. + * + * Trace Input Handling: + * - Trace-level input is populated from the first LLM or Tool span with non-empty input + * - This ensures user queries are preserved in trace metadata + * - Falls back to trace name if no meaningful input is captured */ export class GalileoTracingProcessor implements TracingProcessor { private _nodes = new Map(); @@ -115,6 +120,25 @@ export class GalileoTracingProcessor implements TracingProcessor { private readonly _flushOnTraceEnd: boolean = true ) {} + /** + * Checks if a value is a meaningful, non-empty input string. + * Filters out null, undefined, empty strings, and JSON 'null'. + */ + private isMeaningfulInput(value: unknown): boolean { + if (value === null || value === undefined) { + return false; + } + const str = String(value).trim(); + if (str.length === 0) { + return false; + } + // Filter out JSON-serialized null (from earlier spans) + if (str === 'null' || str === '""') { + return false; + } + return true; + } + /** * Called when a trace starts. Creates a root agent node. * @param trace - The trace that started. @@ -284,6 +308,16 @@ export class GalileoTracingProcessor implements TracingProcessor { if (node.spanParams.output !== undefined) { this._lastOutput = node.spanParams.output; } + + // Track first input for trace-level input (capture from first meaningful span) + // Only capture from LLM or Tool spans (not workflow/agent), and only if we haven't captured yet + if ( + this._firstInput === null && + (node.nodeType === 'llm' || node.nodeType === 'tool') && + this.isMeaningfulInput(node.spanParams.input) + ) { + this._firstInput = node.spanParams.input; + } } /** diff --git a/tests/handlers/openai-agents/tracing-processor.test.ts b/tests/handlers/openai-agents/tracing-processor.test.ts index 8e9b695c..32f5ecce 100644 --- a/tests/handlers/openai-agents/tracing-processor.test.ts +++ b/tests/handlers/openai-agents/tracing-processor.test.ts @@ -1196,3 +1196,158 @@ describe('Span hierarchy correctness', () => { expect(mockLogger.conclude).toHaveBeenCalledTimes(3); }); }); + +describe('_firstInput population (trace-level input handling)', () => { + test('captures first input from LLM span', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + + // LLM span with input + const llm = makeSpan({ + spanId: 'llm-001', + parentId: 'trace-001', + spanData: { + type: 'generation', + model: 'gpt-4', + input: 'What is the weather in NYC?', + output: 'It is sunny...' + } + }); + + await processor.onSpanStart(llm); + await processor.onSpanEnd(llm); + await processor.onTraceEnd(trace); + + // Verify startTrace was called with the LLM input + // Note: input is JSON-stringified by extractLlmData + const startTraceCall = mockLogger.startTrace.mock.calls[0][0]; + expect(startTraceCall.input).toBe('"What is the weather in NYC?"'); + }); + + test('captures first input from tool span if LLM input unavailable', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + + // Tool span (with input, no LLM) + const tool = makeSpan({ + spanId: 'tool-001', + parentId: 'trace-001', + spanData: { + type: 'function', + name: 'search', + input: 'NYC weather forecast', + output: 'Sunny, 72F' + } + }); + + await processor.onSpanStart(tool); + await processor.onSpanEnd(tool); + await processor.onTraceEnd(trace); + + const startTraceCall = mockLogger.startTrace.mock.calls[0][0]; + expect(startTraceCall.input).toBe('NYC weather forecast'); + }); + + test('skips empty or null inputs, uses first meaningful one', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + + // First LLM with empty input + const llm1 = makeSpan({ + spanId: 'llm-001', + parentId: 'trace-001', + spanData: { + type: 'generation', + model: 'gpt-4', + input: '', + output: 'response' + } + }); + + // Second LLM with actual input + const llm2 = makeSpan({ + spanId: 'llm-002', + parentId: 'trace-001', + spanData: { + type: 'generation', + model: 'gpt-4', + input: 'Real question', + output: 'Real answer' + } + }); + + await processor.onSpanStart(llm1); + await processor.onSpanEnd(llm1); + await processor.onSpanStart(llm2); + await processor.onSpanEnd(llm2); + await processor.onTraceEnd(trace); + + // Should use input from llm2, not llm1 + // Note: input is JSON-stringified by extractLlmData + const startTraceCall = mockLogger.startTrace.mock.calls[0][0]; + expect(startTraceCall.input).toBe('"Real question"'); + }); + + test('falls back to trace name if no meaningful input captured', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace({ name: 'Agent Workflow' }); + + await processor.onTraceStart(trace); + await processor.onTraceEnd(trace); // No spans at all + + const startTraceCall = mockLogger.startTrace.mock.calls[0][0]; + // Should fall back to trace name + expect(startTraceCall.input).toBe('Agent Workflow'); + }); + + test('only captures input from first meaningful span, ignores later ones', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + + const llm1 = makeSpan({ + spanId: 'llm-001', + parentId: 'trace-001', + spanData: { + type: 'generation', + model: 'gpt-4', + input: 'First query', + output: 'First answer' + } + }); + + const llm2 = makeSpan({ + spanId: 'llm-002', + parentId: 'trace-001', + spanData: { + type: 'generation', + model: 'gpt-4', + input: 'Second query', + output: 'Second answer' + } + }); + + await processor.onSpanStart(llm1); + await processor.onSpanEnd(llm1); + await processor.onSpanStart(llm2); + await processor.onSpanEnd(llm2); + await processor.onTraceEnd(trace); + + const startTraceCall = mockLogger.startTrace.mock.calls[0][0]; + // Should use first input, not second + // Note: input is JSON-stringified by extractLlmData + expect(startTraceCall.input).toBe('"First query"'); + }); +}); From 2aa8c8b79884506757fd4982ee451761c149680d Mon Sep 17 00:00:00 2001 From: richter Date: Thu, 5 Mar 2026 13:05:08 -0300 Subject: [PATCH 08/21] fix(agents): Moved import test to inside constructor, to avoid promting every SDK user for OpenAI Agents dependency. --- src/handlers/openai-agents/index.ts | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/handlers/openai-agents/index.ts b/src/handlers/openai-agents/index.ts index 1eee0172..a00c4763 100644 --- a/src/handlers/openai-agents/index.ts +++ b/src/handlers/openai-agents/index.ts @@ -22,13 +22,6 @@ import { import { getSdkLogger } from 'galileo-generated'; const sdkLogger = getSdkLogger(); -// Warn if @openai/agents package is not available (optional peer dependency) -import('@openai/agents-core' as string).catch(() => { - sdkLogger.warn( - '@openai/agents package is not installed. GalileoTracingProcessor will not function.' - ); -}); - /** * Minimal interface for an OpenAI Agents SDK Trace object. */ @@ -109,6 +102,7 @@ export class GalileoTracingProcessor implements TracingProcessor { private _nodes = new Map(); private _lastOutput: unknown = null; private _firstInput: unknown = null; + private static _depCheckDone = false; /** * Creates a new GalileoTracingProcessor. @@ -118,7 +112,17 @@ export class GalileoTracingProcessor implements TracingProcessor { constructor( private readonly _galileoLogger: GalileoLogger = GalileoSingleton.getInstance().getClient(), private readonly _flushOnTraceEnd: boolean = true - ) {} + ) { + // Lazily check for @openai/agents-core package only when processor is instantiated + if (!GalileoTracingProcessor._depCheckDone) { + GalileoTracingProcessor._depCheckDone = true; + import('@openai/agents-core' as string).catch(() => { + sdkLogger.warn( + '@openai/agents package is not installed. GalileoTracingProcessor will not function.' + ); + }); + } + } /** * Checks if a value is a meaningful, non-empty input string. From ed8290a6b4a781b532b14af91ada07983a07d455 Mon Sep 17 00:00:00 2001 From: richter Date: Thu, 5 Mar 2026 17:19:36 -0300 Subject: [PATCH 09/21] fix(agents): Added processing of custom galileoSpan. --- src/handlers/openai-agents/custom-span.ts | 21 ++- src/handlers/openai-agents/data-extraction.ts | 60 +++++++ src/handlers/openai-agents/index.ts | 44 +++-- .../openai-agents/custom-span.test.ts | 10 +- .../openai-agents/data-extraction.test.ts | 161 ++++++++++++++++++ .../openai-agents/integration.test.ts | 158 ++++++++++++++++- .../openai-agents/tracing-processor.test.ts | 2 +- 7 files changed, 428 insertions(+), 28 deletions(-) diff --git a/src/handlers/openai-agents/custom-span.ts b/src/handlers/openai-agents/custom-span.ts index 661fe9a0..41a035bf 100644 --- a/src/handlers/openai-agents/custom-span.ts +++ b/src/handlers/openai-agents/custom-span.ts @@ -1,5 +1,22 @@ /* eslint-disable @typescript-eslint/no-explicit-any */ +/** + * Duck-typed interface describing the expected shape of a Galileo span object + * that can be injected into the OpenAI Agents tracing flow. + * + * Mirrors the fields extracted by galileo-python's GalileoCustomSpan handler: + * input, output, metadata (user_metadata), tags, status_code, and type. + */ +export interface GalileoSpanLike { + type?: string; + input?: unknown; + output?: unknown; + name?: string; + metadata?: Record; + tags?: string[]; + statusCode?: number; +} + /** * A lightweight subtype of CustomSpanData that carries a reference to a * pre-configured GalileoSpan so it can be injected into the agent tracing flow. @@ -13,7 +30,7 @@ export interface GalileoCustomSpanData { /** (Optional) Display name for the span. */ name?: string; /** Arbitrary data payload, must contain a 'galileoSpan' key with the GalileoSpan reference. */ - data: Record & { galileoSpan: unknown }; + data: Record & { galileoSpan: GalileoSpanLike }; /** Sentinel flag used internally by mapSpanType() to identify this type. */ __galileoCustom: true; } @@ -26,7 +43,7 @@ export interface GalileoCustomSpanData { * @returns A GalileoCustomSpanData object. */ export function createGalileoCustomSpanData( - galileoSpan: unknown, + galileoSpan: GalileoSpanLike, name?: string, extraData?: Record ): GalileoCustomSpanData { diff --git a/src/handlers/openai-agents/data-extraction.ts b/src/handlers/openai-agents/data-extraction.ts index af29dc85..5f2d711d 100644 --- a/src/handlers/openai-agents/data-extraction.ts +++ b/src/handlers/openai-agents/data-extraction.ts @@ -1,4 +1,6 @@ /* eslint-disable @typescript-eslint/no-explicit-any */ +import type { GalileoSpanLike } from './custom-span'; +import type { NodeType } from './node'; /** * Normalised token count structure returned by parseUsage. @@ -262,3 +264,61 @@ export function extractWorkflowData( return { input: '', output: undefined, metadata: {} }; } + +const VALID_GALILEO_NODE_TYPES: readonly string[] = [ + 'tool', + 'workflow', + 'agent' +]; + +/** + * Extracts span parameters from a GalileoCustomSpanData, delegating to the + * inner galileoSpan for input, output, metadata, tags, statusCode, and type. + * + * @param spanData - The span data object (must have __galileoCustom: true). + * @returns The effective node type and extracted parameters. + */ +export function extractGalileoCustomData(spanData: Record): { + nodeType: NodeType; + params: Record; +} { + const data = (spanData.data as Record | undefined) ?? {}; + const galileoSpan = data.galileoSpan as GalileoSpanLike | undefined; + + if (!galileoSpan || typeof galileoSpan !== 'object') { + return { nodeType: 'workflow', params: extractWorkflowData(spanData) }; + } + + const input = + galileoSpan.input !== undefined + ? typeof galileoSpan.input === 'string' + ? galileoSpan.input + : JSON.stringify(galileoSpan.input) + : ''; + const output = + galileoSpan.output !== undefined + ? typeof galileoSpan.output === 'string' + ? galileoSpan.output + : JSON.stringify(galileoSpan.output) + : undefined; + const metadata = galileoSpan.metadata ?? {}; + const tags = galileoSpan.tags; + const statusCode = galileoSpan.statusCode; + + const nodeType: NodeType = + typeof galileoSpan.type === 'string' && + VALID_GALILEO_NODE_TYPES.includes(galileoSpan.type) + ? (galileoSpan.type as NodeType) + : 'workflow'; + + return { + nodeType, + params: { + input, + output, + metadata, + ...(tags !== undefined ? { tags } : {}), + ...(statusCode !== undefined ? { statusCode } : {}) + } + }; +} diff --git a/src/handlers/openai-agents/index.ts b/src/handlers/openai-agents/index.ts index a00c4763..d3d868ec 100644 --- a/src/handlers/openai-agents/index.ts +++ b/src/handlers/openai-agents/index.ts @@ -9,7 +9,8 @@ import { mapSpanType, mapSpanName, GALILEO_CUSTOM_TYPE } from './span-mapping'; import { extractLlmData, extractToolData, - extractWorkflowData + extractWorkflowData, + extractGalileoCustomData } from './data-extraction'; import { extractEmbeddedToolCalls, @@ -17,7 +18,8 @@ import { } from './embedded-tools'; import { createGalileoCustomSpanData, - type GalileoCustomSpanData + type GalileoCustomSpanData, + type GalileoSpanLike } from './custom-span'; import { getSdkLogger } from 'galileo-generated'; const sdkLogger = getSdkLogger(); @@ -210,24 +212,31 @@ export class GalileoTracingProcessor implements TracingProcessor { const spanType = mapSpanType(spanData); const spanName = mapSpanName(spanData, spanType); - // Determine effective node type — galileo_custom delegates to inner span - const nodeType = spanType === GALILEO_CUSTOM_TYPE ? 'workflow' : spanType; - - // Extract initial data based on span type let initialParams: Record = { name: spanName, startedAt: span.startedAt || new Date().toISOString() }; - if (nodeType === 'llm') { + // Determine effective node type and extract data. + // galileo_custom delegates to the inner galileoSpan for type + fields. + let nodeType: Node['nodeType']; + + if (spanType === GALILEO_CUSTOM_TYPE) { + const custom = extractGalileoCustomData(spanData); + nodeType = custom.nodeType; + initialParams = { ...initialParams, ...custom.params }; + } else if (spanType === 'llm') { + nodeType = 'llm'; initialParams = { ...initialParams, ...extractLlmData(spanData) }; - } else if (nodeType === 'tool') { + } else if (spanType === 'tool') { + nodeType = 'tool'; initialParams = { ...initialParams, ...extractToolData(spanData) }; + } else if (spanType === 'agent') { + nodeType = 'agent'; + initialParams = { ...initialParams, ...extractWorkflowData(spanData) }; } else { - initialParams = { - ...initialParams, - ...extractWorkflowData(spanData) - }; + nodeType = 'workflow'; + initialParams = { ...initialParams, ...extractWorkflowData(spanData) }; } // Determine parent ID (prefer explicit parentId, fallback to traceId) @@ -243,7 +252,7 @@ export class GalileoTracingProcessor implements TracingProcessor { } const node = createNode({ - nodeType: nodeType as Node['nodeType'], + nodeType, spanParams: initialParams, runId: span.spanId, parentRunId: parentId @@ -361,6 +370,7 @@ export class GalileoTracingProcessor implements TracingProcessor { const durationNs = (params.durationNs as number | undefined) ?? 0; const metadata = (params.metadata as Record | undefined) ?? {}; + const tags = (params.tags as string[] | undefined) ?? undefined; const statusCode = (params.statusCode as number | undefined) ?? 200; const input = params.input !== undefined ? String(params.input) : ''; const output = @@ -436,6 +446,7 @@ export class GalileoTracingProcessor implements TracingProcessor { durationNs, statusCode, metadata, + tags, createdAt: startedAt }); } else if (node.nodeType === 'agent') { @@ -445,6 +456,7 @@ export class GalileoTracingProcessor implements TracingProcessor { name, durationNs, metadata, + tags, createdAt: startedAt, agentType: extractAgentType(params), statusCode @@ -457,6 +469,7 @@ export class GalileoTracingProcessor implements TracingProcessor { name, durationNs, metadata, + tags, createdAt: startedAt }); } @@ -486,7 +499,7 @@ export class GalileoTracingProcessor implements TracingProcessor { * @returns A GalileoCustomSpanData object that can be passed to the OpenAI Agents SDK. */ static addGalileoCustomSpan( - galileoSpan: unknown, + galileoSpan: GalileoSpanLike, name?: string, extraData?: Record ): GalileoCustomSpanData { @@ -521,13 +534,14 @@ export async function registerGalileoTraceProcessor(options?: { } export { createGalileoCustomSpanData as GalileoCustomSpan } from './custom-span'; -export type { GalileoCustomSpanData } from './custom-span'; +export type { GalileoCustomSpanData, GalileoSpanLike } from './custom-span'; export type { Node, NodeType } from './node'; export { mapSpanType, mapSpanName, GALILEO_CUSTOM_TYPE } from './span-mapping'; export { extractLlmData, extractToolData, extractWorkflowData, + extractGalileoCustomData, parseUsage } from './data-extraction'; export { diff --git a/tests/handlers/openai-agents/custom-span.test.ts b/tests/handlers/openai-agents/custom-span.test.ts index f6a425cc..5ef81153 100644 --- a/tests/handlers/openai-agents/custom-span.test.ts +++ b/tests/handlers/openai-agents/custom-span.test.ts @@ -16,7 +16,7 @@ describe('createGalileoCustomSpanData()', () => { }); test('test creates span with name parameter', () => { - const galileoSpan = { test: 'data' }; + const galileoSpan = { type: 'tool', input: 'data' }; const result = createGalileoCustomSpanData(galileoSpan, 'My Custom Span'); expect(result.name).toBe('My Custom Span'); @@ -24,7 +24,7 @@ describe('createGalileoCustomSpanData()', () => { }); test('test creates span with extraData', () => { - const galileoSpan = { test: 'data' }; + const galileoSpan = { type: 'tool', input: 'data' }; const extraData = { key1: 'value1', key2: 42 }; const result = createGalileoCustomSpanData( galileoSpan, @@ -65,7 +65,7 @@ describe('createGalileoCustomSpanData()', () => { }); test('test extraData merges correctly with galileoSpan', () => { - const galileoSpan = { id: 'span-1' }; + const galileoSpan = { name: 'span-1' }; const extraData = { tag1: 'tag', tag2: 'meta' }; const result = createGalileoCustomSpanData( galileoSpan, @@ -76,12 +76,12 @@ describe('createGalileoCustomSpanData()', () => { expect(result.data).toEqual({ tag1: 'tag', tag2: 'meta', - galileoSpan: { id: 'span-1' } + galileoSpan: { name: 'span-1' } }); }); test('test handles empty extraData', () => { - const galileoSpan = { test: 'data' }; + const galileoSpan = { type: 'tool', input: 'data' }; const result = createGalileoCustomSpanData(galileoSpan, undefined, {}); expect(result.data.galileoSpan).toBe(galileoSpan); diff --git a/tests/handlers/openai-agents/data-extraction.test.ts b/tests/handlers/openai-agents/data-extraction.test.ts index a232b775..5940686a 100644 --- a/tests/handlers/openai-agents/data-extraction.test.ts +++ b/tests/handlers/openai-agents/data-extraction.test.ts @@ -2,6 +2,7 @@ import { extractLlmData, extractToolData, extractWorkflowData, + extractGalileoCustomData, parseUsage } from '../../../src/handlers/openai-agents/data-extraction'; @@ -317,3 +318,163 @@ describe('extractWorkflowData', () => { expect(result.output).toBeUndefined(); }); }); + +describe('extractGalileoCustomData', () => { + test('test extracts tool type from galileoSpan', () => { + const spanData = { + type: 'custom', + __galileoCustom: true, + data: { + galileoSpan: { + type: 'tool', + input: 'tool input', + output: 'tool output', + metadata: { key: 'val' }, + tags: ['tag1'], + statusCode: 201 + } + } + }; + const result = extractGalileoCustomData(spanData); + expect(result.nodeType).toBe('tool'); + expect(result.params.input).toBe('tool input'); + expect(result.params.output).toBe('tool output'); + expect(result.params.metadata).toEqual({ key: 'val' }); + expect(result.params.tags).toEqual(['tag1']); + expect(result.params.statusCode).toBe(201); + }); + + test('test extracts workflow type from galileoSpan', () => { + const spanData = { + type: 'custom', + __galileoCustom: true, + data: { + galileoSpan: { + type: 'workflow', + input: 'wf in', + output: 'wf out' + } + } + }; + const result = extractGalileoCustomData(spanData); + expect(result.nodeType).toBe('workflow'); + expect(result.params.input).toBe('wf in'); + expect(result.params.output).toBe('wf out'); + }); + + test('test extracts agent type from galileoSpan', () => { + const spanData = { + type: 'custom', + __galileoCustom: true, + data: { + galileoSpan: { + type: 'agent', + input: 'agent in' + } + } + }; + const result = extractGalileoCustomData(spanData); + expect(result.nodeType).toBe('agent'); + expect(result.params.input).toBe('agent in'); + }); + + test('test falls back to workflow for unrecognized galileoSpan type', () => { + const spanData = { + type: 'custom', + __galileoCustom: true, + data: { + galileoSpan: { type: 'future_type', input: 'x' } + } + }; + const result = extractGalileoCustomData(spanData); + expect(result.nodeType).toBe('workflow'); + expect(result.params.input).toBe('x'); + }); + + test('test falls back to workflow for llm type (not delegated)', () => { + const spanData = { + type: 'custom', + __galileoCustom: true, + data: { + galileoSpan: { type: 'llm', input: 'prompt' } + } + }; + const result = extractGalileoCustomData(spanData); + expect(result.nodeType).toBe('workflow'); + }); + + test('test falls back to extractWorkflowData when no galileoSpan', () => { + const spanData = { + type: 'custom', + __galileoCustom: true, + data: { input: 'plain input', output: 'plain output' } + }; + const result = extractGalileoCustomData(spanData); + expect(result.nodeType).toBe('workflow'); + expect(result.params.input).toBe('plain input'); + expect(result.params.output).toBe('plain output'); + }); + + test('test falls back to extractWorkflowData when galileoSpan is not an object', () => { + const spanData = { + type: 'custom', + __galileoCustom: true, + data: { galileoSpan: 'not-an-object' } + }; + const result = extractGalileoCustomData(spanData); + expect(result.nodeType).toBe('workflow'); + }); + + test('test serializes object input/output from galileoSpan', () => { + const spanData = { + type: 'custom', + __galileoCustom: true, + data: { + galileoSpan: { + type: 'tool', + input: { query: 'hello' }, + output: { answer: 'world' } + } + } + }; + const result = extractGalileoCustomData(spanData); + expect(result.params.input).toBe(JSON.stringify({ query: 'hello' })); + expect(result.params.output).toBe(JSON.stringify({ answer: 'world' })); + }); + + test('test omits tags and statusCode when not provided', () => { + const spanData = { + type: 'custom', + __galileoCustom: true, + data: { + galileoSpan: { type: 'tool', input: 'in' } + } + }; + const result = extractGalileoCustomData(spanData); + expect(result.params).not.toHaveProperty('tags'); + expect(result.params).not.toHaveProperty('statusCode'); + }); + + test('test handles missing data field gracefully', () => { + const spanData = { + type: 'custom', + __galileoCustom: true + }; + const result = extractGalileoCustomData(spanData); + expect(result.nodeType).toBe('workflow'); + }); + + test('test defaults to empty input when galileoSpan has no input', () => { + const spanData = { + type: 'custom', + __galileoCustom: true, + data: { + galileoSpan: { type: 'tool' } + } + }; + const result = extractGalileoCustomData(spanData); + expect(result.params.input).toBe(''); + expect(result.params.output).toBeUndefined(); + expect(result.params.metadata).toEqual({}); + }); +}); diff --git a/tests/handlers/openai-agents/integration.test.ts b/tests/handlers/openai-agents/integration.test.ts index b5611c9e..993a5ba5 100644 --- a/tests/handlers/openai-agents/integration.test.ts +++ b/tests/handlers/openai-agents/integration.test.ts @@ -213,7 +213,7 @@ describe('Multi-agent integration flows', () => { } }); - test('test custom spans mixed with regular spans', async () => { + test('test galileo_custom span delegates to inner galileoSpan as tool', async () => { const mockLogger = createMockLogger(); const processor = new GalileoTracingProcessor(mockLogger as never, false); const trace = makeTrace(); @@ -232,7 +232,16 @@ describe('Multi-agent integration flows', () => { spanData: { type: 'custom', __galileoCustom: true, - data: { input: 'test', output: 'result' } + data: { + galileoSpan: { + type: 'tool', + input: 'custom tool input', + output: 'custom tool output', + metadata: { source: 'test' }, + tags: ['custom-tag'], + statusCode: 200 + } + } } }); @@ -250,13 +259,152 @@ describe('Multi-agent integration flows', () => { await processor.onSpanEnd(agent); await processor.onTraceEnd(trace); - expect(mockLogger.addAgentSpan).toHaveBeenCalledTimes(1); // agent - expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1); // custom (galileo_custom → workflow) + expect(mockLogger.addAgentSpan).toHaveBeenCalledTimes(1); + expect(mockLogger.addToolSpan).toHaveBeenCalledTimes(1); expect(mockLogger.addLlmSpan).toHaveBeenCalledTimes(1); - // conclude is called for all non-root workflow/agent spans + + const toolCall = mockLogger.addToolSpan.mock.calls[0][0]; + expect(toolCall.input).toBe('custom tool input'); + expect(toolCall.output).toBe('custom tool output'); + expect(toolCall.metadata).toEqual({ source: 'test' }); + expect(toolCall.tags).toEqual(['custom-tag']); + }); + + test('test galileo_custom span with workflow type', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + + const agent = makeSpan({ + spanId: 'agent-001', + parentId: 'trace-001', + spanData: { type: 'agent' } + }); + + const customSpan = makeSpan({ + spanId: 'custom-001', + parentId: 'agent-001', + spanData: { + type: 'custom', + __galileoCustom: true, + data: { + galileoSpan: { + type: 'workflow', + input: 'wf input', + output: 'wf output' + } + } + } + }); + + await processor.onSpanStart(agent); + await processor.onSpanStart(customSpan); + await processor.onSpanEnd(customSpan); + await processor.onSpanEnd(agent); + await processor.onTraceEnd(trace); + + expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1); + const wfCall = mockLogger.addWorkflowSpan.mock.calls[0][0]; + expect(wfCall.input).toBe('wf input'); + expect(wfCall.output).toBe('wf output'); expect(mockLogger.conclude).toHaveBeenCalled(); }); + test('test galileo_custom span with agent type', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + + const customSpan = makeSpan({ + spanId: 'custom-001', + parentId: 'trace-001', + spanData: { + type: 'custom', + __galileoCustom: true, + data: { + galileoSpan: { + type: 'agent', + input: 'agent input', + output: 'agent output', + metadata: { role: 'planner' } + } + } + } + }); + + await processor.onSpanStart(customSpan); + await processor.onSpanEnd(customSpan); + await processor.onTraceEnd(trace); + + expect(mockLogger.addAgentSpan).toHaveBeenCalledTimes(1); + const agentCall = mockLogger.addAgentSpan.mock.calls[0][0]; + expect(agentCall.input).toBe('agent input'); + expect(agentCall.output).toBe('agent output'); + expect(agentCall.metadata).toEqual({ role: 'planner' }); + }); + + test('test galileo_custom span without galileoSpan falls back to workflow', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + + const customSpan = makeSpan({ + spanId: 'custom-001', + parentId: 'trace-001', + spanData: { + type: 'custom', + __galileoCustom: true, + data: { input: 'fallback input', output: 'fallback output' } + } + }); + + await processor.onSpanStart(customSpan); + await processor.onSpanEnd(customSpan); + await processor.onTraceEnd(trace); + + expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1); + const wfCall = mockLogger.addWorkflowSpan.mock.calls[0][0]; + expect(wfCall.input).toBe('fallback input'); + expect(wfCall.output).toBe('fallback output'); + }); + + test('test galileo_custom span with unrecognized type falls back to workflow', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + + const customSpan = makeSpan({ + spanId: 'custom-001', + parentId: 'trace-001', + spanData: { + type: 'custom', + __galileoCustom: true, + data: { + galileoSpan: { + type: 'unknown_future_type', + input: 'some input' + } + } + } + }); + + await processor.onSpanStart(customSpan); + await processor.onSpanEnd(customSpan); + await processor.onTraceEnd(trace); + + expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1); + const wfCall = mockLogger.addWorkflowSpan.mock.calls[0][0]; + expect(wfCall.input).toBe('some input'); + }); + test('test error in middle of flow handled', async () => { const mockLogger = createMockLogger(); const processor = new GalileoTracingProcessor(mockLogger as never, false); diff --git a/tests/handlers/openai-agents/tracing-processor.test.ts b/tests/handlers/openai-agents/tracing-processor.test.ts index 32f5ecce..d5372aa6 100644 --- a/tests/handlers/openai-agents/tracing-processor.test.ts +++ b/tests/handlers/openai-agents/tracing-processor.test.ts @@ -302,7 +302,7 @@ describe('GalileoTracingProcessor lifecycle', () => { }); test('test addGalileoCustomSpan creates a GalileoCustomSpanData', () => { - const mockSpan = { id: 'span-xyz' }; + const mockSpan = { type: 'tool', name: 'span-xyz' }; const result = GalileoTracingProcessor.addGalileoCustomSpan( mockSpan, 'MyCustom' From a0f4289a4ae4d84304984705ecaf4bd68d68a8a0 Mon Sep 17 00:00:00 2001 From: richter Date: Thu, 5 Mar 2026 17:55:27 -0300 Subject: [PATCH 10/21] fix(agents): Added statusCode propagation for workflow spans. --- src/handlers/openai-agents/index.ts | 5 +- src/utils/galileo-logger.ts | 3 + .../openai-agents/integration.test.ts | 145 ++++++++++++++++++ 3 files changed, 151 insertions(+), 2 deletions(-) diff --git a/src/handlers/openai-agents/index.ts b/src/handlers/openai-agents/index.ts index d3d868ec..cb6af9ba 100644 --- a/src/handlers/openai-agents/index.ts +++ b/src/handlers/openai-agents/index.ts @@ -470,7 +470,8 @@ export class GalileoTracingProcessor implements TracingProcessor { durationNs, metadata, tags, - createdAt: startedAt + createdAt: startedAt, + statusCode }); } @@ -487,7 +488,7 @@ export class GalileoTracingProcessor implements TracingProcessor { !firstNode && (node.nodeType === 'workflow' || node.nodeType === 'agent') ) { - this._galileoLogger.conclude({ output, durationNs }); + this._galileoLogger.conclude({ output, durationNs, statusCode }); } } diff --git a/src/utils/galileo-logger.ts b/src/utils/galileo-logger.ts index 7faebd97..ad7a3b85 100644 --- a/src/utils/galileo-logger.ts +++ b/src/utils/galileo-logger.ts @@ -1115,6 +1115,7 @@ class GalileoLogger implements IGalileoLogger { * @param options.createdAt - (Optional) The timestamp when the span was created. * @param options.metadata - (Optional) Additional metadata as key-value pairs. * @param options.tags - (Optional) Array of tags to categorize the span. + * @param options.statusCode - (Optional) HTTP status code or execution status (e.g., 200 for success, 500 for error). * @param options.stepNumber - (Optional) The step number in a multi-step process. * @returns The created workflow span. */ @@ -1128,6 +1129,7 @@ class GalileoLogger implements IGalileoLogger { createdAt?: Date; metadata?: Record; tags?: string[]; + statusCode?: number; stepNumber?: number; }): WorkflowSpan { const span = new WorkflowSpan({ @@ -1139,6 +1141,7 @@ class GalileoLogger implements IGalileoLogger { createdAt: options.createdAt || GalileoApiClient.getTimestampRecord(), metadata: options.metadata, tags: options.tags, + statusCode: options.statusCode, metrics: new Metrics({ durationNs: options.durationNs }), stepNumber: options.stepNumber }); diff --git a/tests/handlers/openai-agents/integration.test.ts b/tests/handlers/openai-agents/integration.test.ts index 993a5ba5..bae074c9 100644 --- a/tests/handlers/openai-agents/integration.test.ts +++ b/tests/handlers/openai-agents/integration.test.ts @@ -562,3 +562,148 @@ describe('Output tracking integration', () => { ).toBe(true); }); }); + +describe('Workflow span statusCode propagation', () => { + test('test workflow span statusCode passed to addWorkflowSpan', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + + // Create a workflow span (handoff type maps to workflow nodeType) + const workflow = makeSpan({ + spanId: 'workflow-001', + parentId: 'trace-001', + spanData: { type: 'handoff', from_agent: 'Agent1', to_agent: 'Agent2' } + }); + + // Create a successful child LLM span + const llm = makeSpan({ + spanId: 'llm-001', + parentId: 'workflow-001', + spanData: { + type: 'generation', + model: 'gpt-4', + input: [], + output: 'successful response' + }, + error: null + }); + + await processor.onSpanStart(workflow); + await processor.onSpanStart(llm); + await processor.onSpanEnd(llm); + await processor.onSpanEnd(workflow); + await processor.onTraceEnd(trace); + + // Verify addWorkflowSpan was called (note: statusCode may be 200 by default) + expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1); + const workflowSpanCall = mockLogger.addWorkflowSpan.mock.calls[0][0]; + // Verify statusCode parameter is being passed through (defaults to 200 for success) + expect(workflowSpanCall.statusCode).toBe(200); + }); + + test('test workflow span with direct error has statusCode 500', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + + // Create a workflow span that itself has an error + const workflowWithError = makeSpan({ + spanId: 'workflow-001', + parentId: 'trace-001', + spanData: { type: 'handoff', from_agent: 'Agent1', to_agent: 'Agent2' }, + error: { + message: 'Workflow execution failed', + data: { reason: 'timeout' } + } + }); + + await processor.onSpanStart(workflowWithError); + await processor.onSpanEnd(workflowWithError); + await processor.onTraceEnd(trace); + + // Verify addWorkflowSpan was called with statusCode 500 + expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1); + const workflowSpanCall = mockLogger.addWorkflowSpan.mock.calls[0][0]; + expect(workflowSpanCall.statusCode).toBe(500); + }); + + test('test agent span statusCode passed to addAgentSpan', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + + // Create an agent span + const agent = makeSpan({ + spanId: 'agent-001', + parentId: 'trace-001', + spanData: { type: 'agent', name: 'TestAgent' } + }); + + // Create a child LLM span + const llm = makeSpan({ + spanId: 'llm-001', + parentId: 'agent-001', + spanData: { + type: 'generation', + model: 'gpt-4', + input: [], + output: 'test output' + }, + error: null + }); + + await processor.onSpanStart(agent); + await processor.onSpanStart(llm); + await processor.onSpanEnd(llm); + await processor.onSpanEnd(agent); + await processor.onTraceEnd(trace); + + // Verify addAgentSpan was called with statusCode parameter + expect(mockLogger.addAgentSpan).toHaveBeenCalledTimes(1); + const agentSpanCall = mockLogger.addAgentSpan.mock.calls[0][0]; + expect(agentSpanCall.statusCode).toBe(200); + }); + + test('test conclude called with statusCode for workflow spans', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + + // Create nested workflow spans to test conclude calls + const outerWorkflow = makeSpan({ + spanId: 'workflow-001', + parentId: 'trace-001', + spanData: { type: 'handoff', from_agent: 'Agent1', to_agent: 'Agent2' } + }); + + const innerWorkflow = makeSpan({ + spanId: 'workflow-002', + parentId: 'workflow-001', + spanData: { type: 'custom', name: 'InnerWorkflow' } + }); + + await processor.onSpanStart(outerWorkflow); + await processor.onSpanStart(innerWorkflow); + await processor.onSpanEnd(innerWorkflow); + await processor.onSpanEnd(outerWorkflow); + await processor.onTraceEnd(trace); + + // Verify conclude was called for the workflow spans + expect(mockLogger.conclude).toHaveBeenCalled(); + // Find calls that pass statusCode + const concludeCalls = mockLogger.conclude.mock.calls; + const callsWithStatusCode = concludeCalls.filter( + (call) => call[0]?.statusCode !== undefined + ); + expect(callsWithStatusCode.length).toBeGreaterThan(0); + }); +}); From cf052caedd239dbb8c809d142c666e2e9850dfbe Mon Sep 17 00:00:00 2001 From: richter Date: Thu, 12 Mar 2026 15:48:01 -0300 Subject: [PATCH 11/21] feature(examples): Added example for openai-agents. --- examples/openai/agents.js | 75 +++++++++++++++++++++++++++++++++++++++ examples/package.json | 5 ++- 2 files changed, 79 insertions(+), 1 deletion(-) create mode 100644 examples/openai/agents.js diff --git a/examples/openai/agents.js b/examples/openai/agents.js new file mode 100644 index 00000000..6e1e4174 --- /dev/null +++ b/examples/openai/agents.js @@ -0,0 +1,75 @@ +import dotenv from 'dotenv'; +import { z } from 'zod'; +import { Agent, run } from '@openai/agents'; +import { + init, + flush, + registerGalileoTraceProcessor +} from '../../dist/index.js'; + +dotenv.config(); + +await init({ + projectName: 'openai-agents-example' +}); + +await registerGalileoTraceProcessor(); + +const triageAgent = new Agent({ + name: 'Triage Agent', + instructions: + 'You determine which agent should handle the user request. ' + + 'If the question is about weather, hand off to the Weather Agent. ' + + 'Otherwise, answer the question yourself.', + handoffs: [] // populated below after declaring weatherAgent +}); + +const weatherAgent = new Agent({ + name: 'Weather Agent', + instructions: + 'You provide weather information. ' + + 'Given a city name, respond with a short, friendly weather summary. ' + + 'Make up plausible weather data for demonstration purposes.', + tools: [ + tool({ + name: 'get_weather', + description: 'Get the current weather for a city', + parameters: z.object({ + city: z.string().describe('The city to get weather for') + }), + execute: async (params) => { + const { city } = params; + const temps = { london: 14, tokyo: 22, 'new york': 18, paris: 16 }; + const temp = + temps[city.toLowerCase()] ?? Math.floor(Math.random() * 30); + return JSON.stringify({ + city, + temperature_c: temp, + condition: temp > 20 ? 'Sunny' : 'Partly cloudy' + }); + } + }) + ] +}); + +triageAgent.handoffs.push(weatherAgent); + +async function main() { + console.log('=== OpenAI Agents SDK + Galileo Tracing ===\n'); + + console.log('--- Simple single-agent run ---'); + const simpleResult = await run(triageAgent, 'What is 2 + 2?'); + console.log('Response:', simpleResult.finalOutput, '\n'); + + console.log('--- Handoff + tool call run ---'); + const weatherResult = await run(triageAgent, "What's the weather in Tokyo?"); + console.log('Response:', weatherResult.finalOutput, '\n'); + + await flush(); + console.log('Done — traces flushed to Galileo.'); +} + +main().catch((err) => { + console.error('Unhandled error:', err); + process.exit(1); +}); diff --git a/examples/package.json b/examples/package.json index bd6ef0ce..5eabced4 100644 --- a/examples/package.json +++ b/examples/package.json @@ -12,8 +12,11 @@ "@langchain/community": "^0.3.18", "@langchain/core": "^0.3.13", "@langchain/openai": "^0.3.11", + "@openai/agents": "^0.7.0", "@rungalileo/galileo": "file:..", "dotenv": "^16.4.5", - "typecript": "^0.0.1-security" + "openai": "^6.26.0", + "typecript": "^0.0.1-security", + "zod": "^4.0.0" } } From 1597d880e882cb867886717d2a53f015cf35cf70 Mon Sep 17 00:00:00 2001 From: richter Date: Tue, 17 Mar 2026 20:58:53 -0300 Subject: [PATCH 12/21] fix(workflow): Refactored agent span processing to mirror current Python process (should be reviewed, agent span processed as workflow). --- src/handlers/openai-agents/data-extraction.ts | 4 +- src/handlers/openai-agents/index.ts | 40 ++- .../openai-agents/data-extraction.test.ts | 6 +- .../openai-agents/integration.test.ts | 32 +- .../openai-agents/tracing-processor.test.ts | 293 +++++++++--------- 5 files changed, 199 insertions(+), 176 deletions(-) diff --git a/src/handlers/openai-agents/data-extraction.ts b/src/handlers/openai-agents/data-extraction.ts index 5f2d711d..25113e12 100644 --- a/src/handlers/openai-agents/data-extraction.ts +++ b/src/handlers/openai-agents/data-extraction.ts @@ -227,8 +227,8 @@ export function extractWorkflowData( const from = String((spanData.from_agent as string | undefined) ?? ''); const to = String((spanData.to_agent as string | undefined) ?? ''); return { - input: from, - output: to, + input: from ? JSON.stringify({ from_agent: from }) : '', + output: to ? JSON.stringify({ to_agent: to }) : undefined, metadata: { from_agent: from, to_agent: to diff --git a/src/handlers/openai-agents/index.ts b/src/handlers/openai-agents/index.ts index cb6af9ba..9dfcb178 100644 --- a/src/handlers/openai-agents/index.ts +++ b/src/handlers/openai-agents/index.ts @@ -68,7 +68,11 @@ export interface TracingProcessor { /** * Maps an OpenAI agent type string to a Galileo AgentType enum value. * Returns undefined when no agentType is present so addAgentSpan() can use its default. + * + * Currently not being used because of parity with galileo-python (which used workflow instead) + * Ts and Py have to be updated simultaneously. */ +// eslint-disable-next-line @typescript-eslint/no-unused-vars function extractAgentType( spanParams: Record ): AgentType | undefined { @@ -299,6 +303,17 @@ export class GalileoTracingProcessor implements TracingProcessor { // Refresh LLM data at end (usage may be populated now) const finalData = extractLlmData(spanData); node.spanParams = { ...node.spanParams, ...finalData }; + } else if (spanData.type === 'handoff') { + // to_agent is set on the span AFTER span.start() fires (inside withHandoffSpan's fn), + // so we must re-extract at span end to capture the populated to_agent value. + // Also re-compute the name so it reflects the final to_agent. + const refreshed = extractWorkflowData(spanData); + const refreshedName = mapSpanName(spanData, 'workflow'); + node.spanParams = { + ...node.spanParams, + ...refreshed, + name: refreshedName + }; } // Handle errors @@ -450,21 +465,20 @@ export class GalileoTracingProcessor implements TracingProcessor { createdAt: startedAt }); } else if (node.nodeType === 'agent') { - this._galileoLogger.addAgentSpan({ - input, + this._galileoLogger.addWorkflowSpan({ + input: input || 'Workflow Step', output, name, durationNs, metadata, tags, createdAt: startedAt, - agentType: extractAgentType(params), statusCode }); } else { // workflow and other parent nodes this._galileoLogger.addWorkflowSpan({ - input, + input: input || 'Workflow Step', output, name, durationNs, @@ -483,12 +497,26 @@ export class GalileoTracingProcessor implements TracingProcessor { } } - // Conclude workflow/agent spans after their children + // Conclude workflow/agent spans after their children. + // When the span itself has no output (always the case for agent spans, since + // AgentSpanData carries no output field), fall back to the last child's output. if ( !firstNode && (node.nodeType === 'workflow' || node.nodeType === 'agent') ) { - this._galileoLogger.conclude({ output, durationNs, statusCode }); + let concludeOutput = output; + if (concludeOutput === undefined && node.children.length > 0) { + const lastChildId = node.children[node.children.length - 1]; + const lastChild = this._nodes.get(lastChildId); + if (lastChild?.spanParams.output !== undefined) { + concludeOutput = String(lastChild.spanParams.output); + } + } + this._galileoLogger.conclude({ + output: concludeOutput, + durationNs, + statusCode + }); } } diff --git a/tests/handlers/openai-agents/data-extraction.test.ts b/tests/handlers/openai-agents/data-extraction.test.ts index 5940686a..1d956a73 100644 --- a/tests/handlers/openai-agents/data-extraction.test.ts +++ b/tests/handlers/openai-agents/data-extraction.test.ts @@ -265,8 +265,8 @@ describe('extractWorkflowData', () => { to_agent: 'AgentB' }; const result = extractWorkflowData(spanData); - expect(result.input).toBe('AgentA'); - expect(result.output).toBe('AgentB'); + expect(result.input).toBe('{"from_agent":"AgentA"}'); + expect(result.output).toBe('{"to_agent":"AgentB"}'); const meta = result.metadata as Record; expect(meta.from_agent).toBe('AgentA'); expect(meta.to_agent).toBe('AgentB'); @@ -275,7 +275,7 @@ describe('extractWorkflowData', () => { test('test extract handoff span data with missing agents', () => { const result = extractWorkflowData({ type: 'handoff' }); expect(result.input).toBe(''); - expect(result.output).toBe(''); + expect(result.output).toBeUndefined(); }); test('test extract custom span data with input and output', () => { diff --git a/tests/handlers/openai-agents/integration.test.ts b/tests/handlers/openai-agents/integration.test.ts index bae074c9..3871d66a 100644 --- a/tests/handlers/openai-agents/integration.test.ts +++ b/tests/handlers/openai-agents/integration.test.ts @@ -82,9 +82,9 @@ describe('Multi-agent integration flows', () => { await processor.onSpanEnd(planner); await processor.onTraceEnd(trace); - // Verify all spans logged: 2 agents use addAgentSpan, 1 handoff uses addWorkflowSpan - expect(mockLogger.addAgentSpan).toHaveBeenCalledTimes(2); - expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1); + // Verify all spans logged: 2 agents + 1 handoff all use addWorkflowSpan + expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(3); + expect(mockLogger.addAgentSpan).not.toHaveBeenCalled(); }); test('test agent->tool->llm->tool flow', async () => { @@ -128,7 +128,7 @@ describe('Multi-agent integration flows', () => { await processor.onSpanEnd(agent); await processor.onTraceEnd(trace); - expect(mockLogger.addAgentSpan).toHaveBeenCalledTimes(1); // agent + expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1); // agent (uses addWorkflowSpan) expect(mockLogger.addToolSpan).toHaveBeenCalledTimes(2); // 2 tools expect(mockLogger.addLlmSpan).toHaveBeenCalledTimes(1); // 1 llm }); @@ -259,7 +259,7 @@ describe('Multi-agent integration flows', () => { await processor.onSpanEnd(agent); await processor.onTraceEnd(trace); - expect(mockLogger.addAgentSpan).toHaveBeenCalledTimes(1); + expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1); // agent (uses addWorkflowSpan) expect(mockLogger.addToolSpan).toHaveBeenCalledTimes(1); expect(mockLogger.addLlmSpan).toHaveBeenCalledTimes(1); @@ -305,8 +305,10 @@ describe('Multi-agent integration flows', () => { await processor.onSpanEnd(agent); await processor.onTraceEnd(trace); - expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1); - const wfCall = mockLogger.addWorkflowSpan.mock.calls[0][0]; + // addWorkflowSpan called twice: once for the agent container, once for the custom workflow span + expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(2); + // The custom workflow span is the first child logged (index 1 after agent at index 0) + const wfCall = mockLogger.addWorkflowSpan.mock.calls[1][0]; expect(wfCall.input).toBe('wf input'); expect(wfCall.output).toBe('wf output'); expect(mockLogger.conclude).toHaveBeenCalled(); @@ -340,8 +342,8 @@ describe('Multi-agent integration flows', () => { await processor.onSpanEnd(customSpan); await processor.onTraceEnd(trace); - expect(mockLogger.addAgentSpan).toHaveBeenCalledTimes(1); - const agentCall = mockLogger.addAgentSpan.mock.calls[0][0]; + expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1); + const agentCall = mockLogger.addWorkflowSpan.mock.calls[0][0]; expect(agentCall.input).toBe('agent input'); expect(agentCall.output).toBe('agent output'); expect(agentCall.metadata).toEqual({ role: 'planner' }); @@ -509,8 +511,8 @@ describe('Multi-agent integration flows', () => { await processor.onSpanEnd(rootAgent); await processor.onTraceEnd(trace); - // Verify all spans logged: 3 agents use addAgentSpan - expect(mockLogger.addAgentSpan).toHaveBeenCalledTimes(3); + // Verify all spans logged: 3 agents use addWorkflowSpan + expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(3); expect(mockLogger.addLlmSpan).toHaveBeenCalledTimes(1); expect(mockLogger.addToolSpan).toHaveBeenCalledTimes(1); // conclude is called for all non-root workflow/agent spans @@ -632,7 +634,7 @@ describe('Workflow span statusCode propagation', () => { expect(workflowSpanCall.statusCode).toBe(500); }); - test('test agent span statusCode passed to addAgentSpan', async () => { + test('test agent span statusCode passed to addWorkflowSpan', async () => { const mockLogger = createMockLogger(); const processor = new GalileoTracingProcessor(mockLogger as never, false); const trace = makeTrace(); @@ -665,9 +667,9 @@ describe('Workflow span statusCode propagation', () => { await processor.onSpanEnd(agent); await processor.onTraceEnd(trace); - // Verify addAgentSpan was called with statusCode parameter - expect(mockLogger.addAgentSpan).toHaveBeenCalledTimes(1); - const agentSpanCall = mockLogger.addAgentSpan.mock.calls[0][0]; + // Verify addWorkflowSpan was called with statusCode parameter + expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1); + const agentSpanCall = mockLogger.addWorkflowSpan.mock.calls[0][0]; expect(agentSpanCall.statusCode).toBe(200); }); diff --git a/tests/handlers/openai-agents/tracing-processor.test.ts b/tests/handlers/openai-agents/tracing-processor.test.ts index d5372aa6..6bf70699 100644 --- a/tests/handlers/openai-agents/tracing-processor.test.ts +++ b/tests/handlers/openai-agents/tracing-processor.test.ts @@ -3,7 +3,6 @@ import type { AgentTrace, AgentSpan } from '../../../src/handlers/openai-agents'; -import { AgentType } from '../../../src/types/new-api.types'; // Helper to build a mock AgentTrace function makeTrace(overrides: Partial = {}): AgentTrace { @@ -122,7 +121,7 @@ describe('GalileoTracingProcessor lifecycle', () => { expect(toolCall.name).toBe('search_tool'); }); - test('test full trace with agent span calls addAgentSpan and conclude', async () => { + test('test full trace with agent span calls addWorkflowSpan and conclude', async () => { const mockLogger = createMockLogger(); const processor = new GalileoTracingProcessor(mockLogger as never, false); const trace = makeTrace(); @@ -140,8 +139,8 @@ describe('GalileoTracingProcessor lifecycle', () => { await processor.onSpanEnd(span); await processor.onTraceEnd(trace); - expect(mockLogger.addAgentSpan).toHaveBeenCalledTimes(1); - const agentCall = mockLogger.addAgentSpan.mock.calls[0][0]; + expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1); + const agentCall = mockLogger.addWorkflowSpan.mock.calls[0][0]; expect(agentCall.name).toBe('PlannerAgent'); // conclude is called for agent spans expect(mockLogger.conclude).toHaveBeenCalled(); @@ -238,7 +237,7 @@ describe('GalileoTracingProcessor lifecycle', () => { await processor.onSpanEnd(agentSpan); await processor.onTraceEnd(trace); - expect(mockLogger.addAgentSpan).toHaveBeenCalledTimes(1); + expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1); expect(mockLogger.addLlmSpan).toHaveBeenCalledTimes(1); // conclude called for agent span expect(mockLogger.conclude).toHaveBeenCalled(); @@ -404,7 +403,7 @@ describe('Span tree construction edge cases', () => { // All should be logged expect(mockLogger.startTrace).toHaveBeenCalledTimes(1); - expect(mockLogger.addAgentSpan).toHaveBeenCalledTimes(1); // agent + expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1); // agent (uses addWorkflowSpan) expect(mockLogger.addLlmSpan).toHaveBeenCalledTimes(1); expect(mockLogger.addToolSpan).toHaveBeenCalledTimes(1); }); @@ -680,7 +679,7 @@ describe('Error handling and recovery', () => { await processor.onSpanEnd(span); await processor.onTraceEnd(trace); - const agentCall = mockLogger.addAgentSpan.mock.calls[0][0]; + const agentCall = mockLogger.addWorkflowSpan.mock.calls[0][0]; const meta = agentCall.metadata as Record; expect(meta.error_message).toBe('Error occurred'); }); @@ -836,7 +835,7 @@ describe('Metadata handling and serialization', () => { }); describe('Agent span emission', () => { - test('test agent span calls addAgentSpan not addWorkflowSpan', async () => { + test('test agent span uses addWorkflowSpan not addAgentSpan', async () => { const mockLogger = createMockLogger(); const processor = new GalileoTracingProcessor(mockLogger as never, false); const trace = makeTrace(); @@ -852,11 +851,11 @@ describe('Agent span emission', () => { await processor.onSpanEnd(span); await processor.onTraceEnd(trace); - expect(mockLogger.addAgentSpan).toHaveBeenCalledTimes(1); - expect(mockLogger.addWorkflowSpan).not.toHaveBeenCalled(); + expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1); + expect(mockLogger.addAgentSpan).not.toHaveBeenCalled(); }); - test('test agent span passes name and output correctly', async () => { + test('test agent span passes name correctly', async () => { const mockLogger = createMockLogger(); const processor = new GalileoTracingProcessor(mockLogger as never, false); const trace = makeTrace(); @@ -872,30 +871,10 @@ describe('Agent span emission', () => { await processor.onSpanEnd(span); await processor.onTraceEnd(trace); - const agentCall = mockLogger.addAgentSpan.mock.calls[0][0]; + const agentCall = mockLogger.addWorkflowSpan.mock.calls[0][0]; expect(agentCall.name).toBe('RouterAgent'); }); - test('test agent span with no agentType passes undefined agentType', async () => { - const mockLogger = createMockLogger(); - const processor = new GalileoTracingProcessor(mockLogger as never, false); - const trace = makeTrace(); - - const span = makeSpan({ - spanId: 'agent-span-001', - parentId: 'trace-001', - spanData: { type: 'agent', name: 'Agent' } - }); - - await processor.onTraceStart(trace); - await processor.onSpanStart(span); - await processor.onSpanEnd(span); - await processor.onTraceEnd(trace); - - const agentCall = mockLogger.addAgentSpan.mock.calls[0][0]; - expect(agentCall.agentType).toBeUndefined(); - }); - test('test agent span conclude is called after children', async () => { const mockLogger = createMockLogger(); const processor = new GalileoTracingProcessor(mockLogger as never, false); @@ -920,133 +899,119 @@ describe('Agent span emission', () => { await processor.onSpanEnd(agentSpan); await processor.onTraceEnd(trace); - expect(mockLogger.addAgentSpan).toHaveBeenCalledTimes(1); + expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1); expect(mockLogger.addToolSpan).toHaveBeenCalledTimes(1); expect(mockLogger.conclude).toHaveBeenCalled(); }); - test('test agent span error passes statusCode 500 as direct field', async () => { + test('test agent span conclude receives last child output as fallback', async () => { const mockLogger = createMockLogger(); const processor = new GalileoTracingProcessor(mockLogger as never, false); const trace = makeTrace(); - const span = makeSpan({ - spanId: 'agent-err-001', + const agentSpan = makeSpan({ + spanId: 'agent-001', parentId: 'trace-001', - error: { message: 'Agent failed' }, - spanData: { type: 'agent', name: 'FailingAgent' } + spanData: { type: 'agent', name: 'MyAgent' } }); - await processor.onTraceStart(trace); - await processor.onSpanStart(span); - await processor.onSpanEnd(span); - await processor.onTraceEnd(trace); - - const agentCall = mockLogger.addAgentSpan.mock.calls[0][0]; - // statusCode is passed as a direct field, not folded into metadata - expect(agentCall.statusCode).toBe(500); - const meta = agentCall.metadata as Record; - expect(meta.error_message).toBe('Agent failed'); - expect(meta.status_code).toBeUndefined(); - }); - - test('test agent span without error passes statusCode 200', async () => { - const mockLogger = createMockLogger(); - const processor = new GalileoTracingProcessor(mockLogger as never, false); - const trace = makeTrace(); - - const span = makeSpan({ - spanId: 'agent-ok-001', - parentId: 'trace-001', - spanData: { type: 'agent', name: 'HappyAgent' } + const llmSpan = makeSpan({ + spanId: 'llm-001', + parentId: 'agent-001', + spanData: { + type: 'generation', + model: 'gpt-4o', + output: 'Final answer from LLM' + } }); await processor.onTraceStart(trace); - await processor.onSpanStart(span); - await processor.onSpanEnd(span); + await processor.onSpanStart(agentSpan); + await processor.onSpanStart(llmSpan); + await processor.onSpanEnd(llmSpan); + await processor.onSpanEnd(agentSpan); await processor.onTraceEnd(trace); - const agentCall = mockLogger.addAgentSpan.mock.calls[0][0]; - expect(agentCall.statusCode).toBe(200); - const meta = agentCall.metadata as Record; - expect(meta.status_code).toBeUndefined(); + // addWorkflowSpan is called before children — output is undefined at that point + const agentCall = mockLogger.addWorkflowSpan.mock.calls[0][0]; + expect(agentCall.output).toBeUndefined(); + + // conclude for the agent span (first conclude call) should carry the LLM child's output + const concludeCall = mockLogger.conclude.mock.calls[0][0]; + expect(concludeCall.output).toBe('"Final answer from LLM"'); }); -}); -describe('Agent type extraction', () => { - test('test agent span with planner agentType is passed to addAgentSpan', async () => { + test('test agent span conclude receives last of multiple children outputs', async () => { const mockLogger = createMockLogger(); const processor = new GalileoTracingProcessor(mockLogger as never, false); const trace = makeTrace(); - const span = makeSpan({ - spanId: 'agent-planner-001', + const agentSpan = makeSpan({ + spanId: 'agent-001', parentId: 'trace-001', - spanData: { type: 'agent', name: 'PlannerAgent', agentType: 'planner' } + spanData: { type: 'agent', name: 'MyAgent' } }); - await processor.onTraceStart(trace); - await processor.onSpanStart(span); - await processor.onSpanEnd(span); - await processor.onTraceEnd(trace); - - const agentCall = mockLogger.addAgentSpan.mock.calls[0][0]; - expect(agentCall.agentType).toBe(AgentType.PLANNER); - }); - - test('test agent span with router agentType is passed to addAgentSpan', async () => { - const mockLogger = createMockLogger(); - const processor = new GalileoTracingProcessor(mockLogger as never, false); - const trace = makeTrace(); + const toolSpan = makeSpan({ + spanId: 'tool-001', + parentId: 'agent-001', + spanData: { type: 'function', name: 'my_tool', output: 'Tool result' } + }); - const span = makeSpan({ - spanId: 'agent-router-001', - parentId: 'trace-001', - spanData: { type: 'agent', name: 'RouterAgent', agentType: 'router' } + const llmSpan = makeSpan({ + spanId: 'llm-001', + parentId: 'agent-001', + spanData: { + type: 'generation', + model: 'gpt-4o', + output: 'LLM final response' + } }); await processor.onTraceStart(trace); - await processor.onSpanStart(span); - await processor.onSpanEnd(span); + await processor.onSpanStart(agentSpan); + await processor.onSpanStart(toolSpan); + await processor.onSpanEnd(toolSpan); + await processor.onSpanStart(llmSpan); + await processor.onSpanEnd(llmSpan); + await processor.onSpanEnd(agentSpan); await processor.onTraceEnd(trace); - const agentCall = mockLogger.addAgentSpan.mock.calls[0][0]; - expect(agentCall.agentType).toBe(AgentType.ROUTER); + // The conclude for the agent span should use the last child (LLM), not the tool + const concludeCall = mockLogger.conclude.mock.calls[0][0]; + expect(concludeCall.output).toBe('"LLM final response"'); }); - test('test agent span with uppercase agentType is normalized', async () => { + test('test agent span conclude uses undefined when no children have output', async () => { const mockLogger = createMockLogger(); const processor = new GalileoTracingProcessor(mockLogger as never, false); const trace = makeTrace(); - const span = makeSpan({ - spanId: 'agent-sup-001', + const agentSpan = makeSpan({ + spanId: 'agent-001', parentId: 'trace-001', - spanData: { - type: 'agent', - name: 'SupervisorAgent', - agentType: 'SUPERVISOR' - } + spanData: { type: 'agent', name: 'EmptyAgent' } }); await processor.onTraceStart(trace); - await processor.onSpanStart(span); - await processor.onSpanEnd(span); + await processor.onSpanStart(agentSpan); + await processor.onSpanEnd(agentSpan); await processor.onTraceEnd(trace); - const agentCall = mockLogger.addAgentSpan.mock.calls[0][0]; - expect(agentCall.agentType).toBe(AgentType.SUPERVISOR); + const concludeCall = mockLogger.conclude.mock.calls[0][0]; + expect(concludeCall.output).toBeUndefined(); }); - test('test agent span with unknown agentType defaults to default', async () => { + test('test agent span error passes statusCode 500 as direct field', async () => { const mockLogger = createMockLogger(); const processor = new GalileoTracingProcessor(mockLogger as never, false); const trace = makeTrace(); const span = makeSpan({ - spanId: 'agent-unknown-001', + spanId: 'agent-err-001', parentId: 'trace-001', - spanData: { type: 'agent', name: 'WeirdAgent', agentType: 'unknown_type' } + error: { message: 'Agent failed' }, + spanData: { type: 'agent', name: 'FailingAgent' } }); await processor.onTraceStart(trace); @@ -1054,19 +1019,23 @@ describe('Agent type extraction', () => { await processor.onSpanEnd(span); await processor.onTraceEnd(trace); - const agentCall = mockLogger.addAgentSpan.mock.calls[0][0]; - expect(agentCall.agentType).toBe(AgentType.DEFAULT); + const agentCall = mockLogger.addWorkflowSpan.mock.calls[0][0]; + // statusCode is passed as a direct field, not folded into metadata + expect(agentCall.statusCode).toBe(500); + const meta = agentCall.metadata as Record; + expect(meta.error_message).toBe('Agent failed'); + expect(meta.status_code).toBeUndefined(); }); - test('test agent span with missing agentType returns undefined', async () => { + test('test agent span without error passes statusCode 200', async () => { const mockLogger = createMockLogger(); const processor = new GalileoTracingProcessor(mockLogger as never, false); const trace = makeTrace(); const span = makeSpan({ - spanId: 'agent-notype-001', + spanId: 'agent-ok-001', parentId: 'trace-001', - spanData: { type: 'agent' } + spanData: { type: 'agent', name: 'HappyAgent' } }); await processor.onTraceStart(trace); @@ -1074,41 +1043,10 @@ describe('Agent type extraction', () => { await processor.onSpanEnd(span); await processor.onTraceEnd(trace); - const agentCall = mockLogger.addAgentSpan.mock.calls[0][0]; - expect(agentCall.agentType).toBeUndefined(); - }); - - test('test all known agentType values map correctly', async () => { - const typeMap: Array<{ input: string; expected: string }> = [ - { input: 'classifier', expected: AgentType.CLASSIFIER }, - { input: 'planner', expected: AgentType.PLANNER }, - { input: 'react', expected: AgentType.REACT }, - { input: 'reflection', expected: AgentType.REFLECTION }, - { input: 'router', expected: AgentType.ROUTER }, - { input: 'supervisor', expected: AgentType.SUPERVISOR }, - { input: 'judge', expected: AgentType.JUDGE }, - { input: 'default', expected: AgentType.DEFAULT } - ]; - - for (const { input, expected } of typeMap) { - const mockLogger = createMockLogger(); - const processor = new GalileoTracingProcessor(mockLogger as never, false); - const trace = makeTrace(); - - const span = makeSpan({ - spanId: `agent-${input}-001`, - parentId: 'trace-001', - spanData: { type: 'agent', agentType: input } - }); - - await processor.onTraceStart(trace); - await processor.onSpanStart(span); - await processor.onSpanEnd(span); - await processor.onTraceEnd(trace); - - const agentCall = mockLogger.addAgentSpan.mock.calls[0][0]; - expect(agentCall.agentType).toBe(expected); - } + const agentCall = mockLogger.addWorkflowSpan.mock.calls[0][0]; + expect(agentCall.statusCode).toBe(200); + const meta = agentCall.metadata as Record; + expect(meta.status_code).toBeUndefined(); }); }); @@ -1137,9 +1075,9 @@ describe('Span hierarchy correctness', () => { await processor.onSpanEnd(agentSpan); await processor.onTraceEnd(trace); - // startTrace is called first, then addAgentSpan, then addLlmSpan, then conclude + // startTrace is called first, then addWorkflowSpan (agent), then addLlmSpan, then conclude const callOrder = mockLogger.startTrace.mock.invocationCallOrder[0]; - const agentOrder = mockLogger.addAgentSpan.mock.invocationCallOrder[0]; + const agentOrder = mockLogger.addWorkflowSpan.mock.invocationCallOrder[0]; const llmOrder = mockLogger.addLlmSpan.mock.invocationCallOrder[0]; const concludeOrder = mockLogger.conclude.mock.invocationCallOrder[0]; @@ -1195,6 +1133,61 @@ describe('Span hierarchy correctness', () => { // conclude is called 3 times: once for handoff (workflow), once for agent, once for concludeAll in onTraceEnd expect(mockLogger.conclude).toHaveBeenCalledTimes(3); }); + + test('test handoff span refreshes to_agent at onSpanEnd (late binding)', async () => { + // In the OpenAI Agents SDK, to_agent is set on handoffSpan.spanData AFTER span.start() fires + // (inside withHandoffSpan's fn callback). So onSpanStart sees to_agent = undefined. + // onSpanEnd must re-extract to capture the final populated to_agent value. + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const agentSpan = makeSpan({ + spanId: 'agent-001', + parentId: 'trace-001', + spanData: { type: 'agent', name: 'TriageAgent' } + }); + + // Simulate SDK behaviour: to_agent is absent at start, present at end + const handoffSpanData: AgentSpan['spanData'] = { + type: 'handoff', + from_agent: 'TriageAgent' + // to_agent not yet set + }; + const handoffSpan = makeSpan({ + spanId: 'handoff-001', + parentId: 'agent-001', + spanData: handoffSpanData + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(agentSpan); + await processor.onSpanStart(handoffSpan); + + // Simulate SDK setting to_agent after start + handoffSpanData.to_agent = 'WeatherAgent'; + + await processor.onSpanEnd(handoffSpan); + await processor.onSpanEnd(agentSpan); + await processor.onTraceEnd(trace); + + // The handoff workflow span should receive the JSON dict output (not empty string) + const wfCall = mockLogger.addWorkflowSpan.mock.calls.find( + (c: [Record]) => + c[0].name === 'Handoff: TriageAgent → WeatherAgent' + ); + expect(wfCall).toBeDefined(); + expect(wfCall?.[0].output).toBe('{"to_agent":"WeatherAgent"}'); + + // The agent conclude should also get the JSON dict via last-child fallback + const concludeCalls = mockLogger.conclude.mock.calls as [ + Record + ][]; + const agentConclude = concludeCalls.find( + (c) => c[0].output === '{"to_agent":"WeatherAgent"}' + ); + expect(agentConclude).toBeDefined(); + }); }); describe('_firstInput population (trace-level input handling)', () => { From feb0a119139428fc59cd654438b995791f0aed9b Mon Sep 17 00:00:00 2001 From: richter Date: Wed, 18 Mar 2026 18:44:09 -0300 Subject: [PATCH 13/21] feature(serialization): Updated serialization method (to toStringRecord), removed uses of 'any' type, reorganized structure of code in data-extraction.ts, updated serialization of outputs to reflect standard in galileo-python, improved methods to add spans.. --- src/handlers/langchain.ts | 12 +- src/handlers/openai-agents/data-extraction.ts | 91 +++++++--- src/handlers/openai-agents/index.ts | 18 +- src/utils/serialization.ts | 6 +- src/wrappers.ts | 4 +- tests/entities/serialization.test.ts | 42 ++--- .../openai-agents/data-extraction.test.ts | 167 ++++++++++++++++-- .../openai-agents/integration.test.ts | 2 +- 8 files changed, 264 insertions(+), 78 deletions(-) diff --git a/src/handlers/langchain.ts b/src/handlers/langchain.ts index 3dd22e91..e8ba1e9f 100644 --- a/src/handlers/langchain.ts +++ b/src/handlers/langchain.ts @@ -12,7 +12,7 @@ import { AgentFinish } from '@langchain/core/agents'; import { Document, DocumentInterface } from '@langchain/core/documents'; import { GalileoSingleton } from '../singleton'; import { GalileoLogger } from '../utils/galileo-logger'; -import { toStringValue, convertToStringDict } from '../utils/serialization'; +import { toStringValue, toStringRecord } from '../utils/serialization'; import { getSdkLogger } from 'galileo-generated'; import { Serialized } from '@langchain/core/load/serializable.js'; @@ -151,8 +151,8 @@ export class GalileoCallback let metadata: Record | undefined = undefined; if (node.spanParams.metadata) { try { - metadata = convertToStringDict( - node.spanParams.metadata as Record + metadata = toStringRecord( + node.spanParams.metadata as Record ); } catch (e) { sdkLogger.warn('Unable to convert metadata to a string dictionary', e); @@ -485,7 +485,7 @@ export class GalileoCallback | undefined; // Serialize messages safely - let serializedMessages; + let serializedMessages: unknown; try { const flattenedMessages = messages.flat().map((msg) => ({ content: msg.content, @@ -516,7 +516,7 @@ export class GalileoCallback public async handleLLMEnd(output: LLMResult, runId: string): Promise { const tokenUsage = output.llmOutput?.tokenUsage || {}; - let serializedOutput; + let serializedOutput: unknown; try { const flattenedOutput = output.generations.flat().map((g) => ({ text: g.text, @@ -605,7 +605,7 @@ export class GalileoCallback documents: DocumentInterface>[], runId: string ): Promise { - let serializedResponse; + let serializedResponse: unknown; try { serializedResponse = documents.map((doc) => ({ pageContent: doc.pageContent, diff --git a/src/handlers/openai-agents/data-extraction.ts b/src/handlers/openai-agents/data-extraction.ts index 25113e12..d22895d6 100644 --- a/src/handlers/openai-agents/data-extraction.ts +++ b/src/handlers/openai-agents/data-extraction.ts @@ -1,7 +1,27 @@ -/* eslint-disable @typescript-eslint/no-explicit-any */ import type { GalileoSpanLike } from './custom-span'; import type { NodeType } from './node'; +const MODEL_PARAM_KEYS = [ + 'temperature', + 'max_output_tokens', + 'top_p', + 'tool_choice', + 'parallel_tool_calls', + 'truncation', + 'seed', + 'frequency_penalty', + 'presence_penalty' +]; + +const RESPONSE_EXCLUDE = new Set([ + 'input', + 'output', + 'usage', + 'tools', + 'error', + 'status' +]); + /** * Normalised token count structure returned by parseUsage. */ @@ -79,11 +99,17 @@ export function extractLlmData( spanData: Record ): Record { if (spanData.type === 'generation') { - const usage = parseUsage( - (spanData.usage as Record | undefined) ?? null - ); + const usageRaw = + (spanData.usage as Record | undefined) ?? {}; + const usage = parseUsage(usageRaw); const modelConfig = (spanData.model_config as Record | undefined) ?? {}; + const inputDetails = + (usageRaw.input_tokens_details as Record | undefined) ?? + null; + const outputDetails = + (usageRaw.output_tokens_details as Record | undefined) ?? + null; return { input: spanData.input !== undefined ? JSON.stringify(spanData.input) : '', @@ -99,7 +125,9 @@ export function extractLlmData( numCachedInputTokens: usage.cachedTokens, metadata: { gen_ai_system: 'openai', - model_config: JSON.stringify(modelConfig) + model_config: modelConfig, + ...(inputDetails ? { input_tokens_details: inputDetails } : {}), + ...(outputDetails ? { output_tokens_details: outputDetails } : {}) } }; } @@ -122,20 +150,42 @@ export function extractLlmData( (response?.temperature as number | undefined) ?? undefined; const tools = response?.tools; + const modelParameters: Record = response + ? Object.fromEntries( + MODEL_PARAM_KEYS.filter((k) => response[k] !== undefined).map((k) => [ + k, + response[k] + ]) + ) + : {}; + + const responseMetadata: Record = response + ? Object.fromEntries( + Object.entries(response).filter(([k]) => !RESPONSE_EXCLUDE.has(k)) + ) + : {}; + return { input: input !== undefined ? JSON.stringify(input) : '', output: response?.output !== undefined ? JSON.stringify(response.output) : '', model, temperature, - tools: tools !== undefined ? JSON.stringify(tools) : undefined, + tools: tools !== undefined ? tools : undefined, + modelParameters, numInputTokens: usage.inputTokens, numOutputTokens: usage.outputTokens, totalTokens: usage.totalTokens ?? undefined, numReasoningTokens: usage.reasoningTokens, numCachedInputTokens: usage.cachedTokens, metadata: { - gen_ai_system: 'openai' + gen_ai_system: 'openai', + ...(Object.keys(responseMetadata).length > 0 + ? { response_metadata: responseMetadata } + : {}), + ...(response?.instructions !== undefined + ? { instructions: response.instructions } + : {}) }, _responseObject: response }; @@ -177,10 +227,10 @@ export function extractToolData( const triggered = Boolean(spanData.triggered); return { input: '', - output: triggered ? 'Guardrail triggered' : 'Guardrail passed', + output: JSON.stringify({ triggered }), metadata: { - triggered: String(triggered), - guardrail_name: String((spanData.name as string | undefined) ?? '') + triggered, + ...(triggered ? { status: 'warning' } : {}) } }; } @@ -212,13 +262,9 @@ export function extractWorkflowData( output: undefined, ...(agentType !== undefined ? { agentType } : {}), metadata: { - ...(tools !== undefined ? { tools: JSON.stringify(tools) } : {}), - ...(handoffs !== undefined - ? { handoffs: JSON.stringify(handoffs) } - : {}), - ...(outputType !== undefined - ? { output_type: JSON.stringify(outputType) } - : {}) + ...(tools !== undefined ? { tools } : {}), + ...(handoffs !== undefined ? { handoffs } : {}), + ...(outputType !== undefined ? { output_type: outputType } : {}) } }; } @@ -251,11 +297,14 @@ export function extractWorkflowData( : JSON.stringify(data.output) : undefined; - // Everything except input/output goes to metadata + // Everything except input/output goes to metadata; values are kept as-is const metaEntries = Object.entries(data) - .filter(([k]) => k !== 'input' && k !== 'output') - .reduce>((acc, [k, v]) => { - acc[k] = typeof v === 'string' ? v : JSON.stringify(v); + .filter( + ([k, v]) => + k !== 'input' && k !== 'output' && v !== null && v !== undefined + ) + .reduce>((acc, [k, v]) => { + acc[k] = v; return acc; }, {}); diff --git a/src/handlers/openai-agents/index.ts b/src/handlers/openai-agents/index.ts index 9dfcb178..74a657a2 100644 --- a/src/handlers/openai-agents/index.ts +++ b/src/handlers/openai-agents/index.ts @@ -2,6 +2,7 @@ import { GalileoLogger } from '../../utils/galileo-logger'; import { GalileoSingleton } from '../../singleton'; import { calculateDurationNs } from '../../utils/utils'; +import { toStringRecord } from '../../utils/serialization'; import type { JsonObject } from '../../types/base.types'; import { AgentType } from '../../types/new-api.types'; import { type Node, createNode } from './node'; @@ -160,12 +161,7 @@ export class GalileoTracingProcessor implements TracingProcessor { }; if (trace.metadata) { - // Convert metadata values to strings for Galileo - const meta: Record = {}; - for (const [k, v] of Object.entries(trace.metadata)) { - meta[k] = typeof v === 'string' ? v : JSON.stringify(v); - } - spanParams.metadata = meta; + spanParams.metadata = toStringRecord(trace.metadata); } const node = createNode({ @@ -383,8 +379,9 @@ export class GalileoTracingProcessor implements TracingProcessor { const params = node.spanParams; const name = (params.name as string | undefined) ?? 'Agent Run'; const durationNs = (params.durationNs as number | undefined) ?? 0; - const metadata = - (params.metadata as Record | undefined) ?? {}; + const metadata = toStringRecord( + (params.metadata as Record | undefined) ?? {} + ); const tags = (params.tags as string[] | undefined) ?? undefined; const statusCode = (params.statusCode as number | undefined) ?? 200; const input = params.input !== undefined ? String(params.input) : ''; @@ -423,9 +420,8 @@ export class GalileoTracingProcessor implements TracingProcessor { const temperature = (params.temperature as number | undefined) ?? undefined; const model = (params.model as string | undefined) ?? 'unknown'; - const tools = (params.tools as string | undefined) - ? (JSON.parse(params.tools as string) as Record[]) - : undefined; + const tools = + (params.tools as Record[] | undefined) ?? undefined; // Build embedded tool calls metadata const embeddedToolCalls = params.embeddedToolCalls as diff --git a/src/utils/serialization.ts b/src/utils/serialization.ts index b42846a0..a5334dc3 100644 --- a/src/utils/serialization.ts +++ b/src/utils/serialization.ts @@ -70,7 +70,7 @@ export const extractParamsInfo = ( // This is simplistic and may not work for complex expressions defaultValue = defaultValueStr; } - } catch (e) { + } catch (_) { defaultValue = defaultValueStr; // Fallback to string representation } @@ -112,8 +112,8 @@ export const argsToDict = ( * @param metadata - The metadata object with potentially complex values * @returns A new object with all values converted to strings */ -export const convertToStringDict = ( - metadata: Record +export const toStringRecord = ( + metadata: Record ): Record => { const result: Record = {}; diff --git a/src/wrappers.ts b/src/wrappers.ts index 729440bb..c569b643 100644 --- a/src/wrappers.ts +++ b/src/wrappers.ts @@ -7,7 +7,7 @@ import { import { serializeToStr } from './entities/serialization'; import { argsToDict, - convertToStringDict, + toStringRecord, extractParamsInfo, toStringValue } from './utils/serialization'; @@ -215,7 +215,7 @@ export function log( spanParams.metadata && typeof spanParams.metadata === 'object' && spanParams.metadata !== null - ? convertToStringDict(spanParams.metadata as Record) + ? toStringRecord(spanParams.metadata as Record) : undefined; const tags = Array.isArray(spanParams.tags) ? spanParams.tags.map((tag) => toStringValue(tag)) diff --git a/tests/entities/serialization.test.ts b/tests/entities/serialization.test.ts index 07003a54..053b74a8 100644 --- a/tests/entities/serialization.test.ts +++ b/tests/entities/serialization.test.ts @@ -2,7 +2,7 @@ import { EventSerializer, serializeToStr } from '../../src/entities/serialization'; -import { convertToStringDict } from '../../src/utils/serialization'; +import { toStringRecord } from '../../src/utils/serialization'; describe('EventSerializer', () => { let serializer: EventSerializer; @@ -591,13 +591,13 @@ describe('EventSerializer', () => { }); }); - describe('convertToStringDict', () => { + describe('toStringRecord', () => { it('should use serializeToStr for object values', () => { const input = { metadata: { nested: { value: 'test' } } }; - const result = convertToStringDict(input); + const result = toStringRecord(input); expect(typeof result.metadata).toBe('string'); expect(result.metadata).toContain('test'); @@ -608,9 +608,9 @@ describe('EventSerializer', () => { const obj: Record = { key: 'value' }; obj.circular = obj; - expect(() => convertToStringDict({ meta: obj })).not.toThrow(); + expect(() => toStringRecord({ meta: obj })).not.toThrow(); - const result = convertToStringDict({ meta: obj }); + const result = toStringRecord({ meta: obj }); expect(typeof result.meta).toBe('string'); expect(result.meta).toContain('key'); }); @@ -622,7 +622,7 @@ describe('EventSerializer', () => { bool: true }; - const result = convertToStringDict(input); + const result = toStringRecord(input); expect(result.str).toBe('hello'); expect(result.num).toBe('42'); @@ -635,7 +635,7 @@ describe('EventSerializer', () => { undefinedValue: undefined }; - const result = convertToStringDict(input); + const result = toStringRecord(input); expect(result.nullValue).toBe(''); expect(result.undefinedValue).toBe(''); @@ -646,7 +646,7 @@ describe('EventSerializer', () => { items: [1, 2, [3, 4]] }; - const result = convertToStringDict(input); + const result = toStringRecord(input); expect(typeof result.items).toBe('string'); expect(result.items).toContain('1'); @@ -660,7 +660,7 @@ describe('EventSerializer', () => { empty: {} }; - const result = convertToStringDict(input); + const result = toStringRecord(input); expect(typeof result.empty).toBe('string'); expect(result.empty).toBe('{}'); @@ -677,7 +677,7 @@ describe('EventSerializer', () => { } }; - const result = convertToStringDict(input); + const result = toStringRecord(input); expect(typeof result.user).toBe('string'); expect(result.user).toContain('John'); @@ -693,13 +693,13 @@ describe('EventSerializer', () => { obj2.ref = obj1; expect(() => - convertToStringDict({ + toStringRecord({ first: obj1, second: obj2 }) ).not.toThrow(); - const result = convertToStringDict({ + const result = toStringRecord({ first: obj1, second: obj2 }); @@ -715,7 +715,7 @@ describe('EventSerializer', () => { timestamp: new Date('2024-01-01T12:00:00Z') }; - const result = convertToStringDict(input); + const result = toStringRecord(input); expect(typeof result.timestamp).toBe('string'); expect(result.timestamp).toContain('2024-01-01'); @@ -726,7 +726,7 @@ describe('EventSerializer', () => { error: new Error('Test error') }; - const result = convertToStringDict(input); + const result = toStringRecord(input); expect(typeof result.error).toBe('string'); expect(result.error).toContain('Error'); @@ -738,7 +738,7 @@ describe('EventSerializer', () => { tags: new Set(['tag1', 'tag2', 'tag3']) }; - const result = convertToStringDict(input); + const result = toStringRecord(input); expect(typeof result.tags).toBe('string'); expect(result.tags).toContain('tag1'); @@ -754,7 +754,7 @@ describe('EventSerializer', () => { ]) }; - const result = convertToStringDict(input); + const result = toStringRecord(input); expect(typeof result.config).toBe('string'); expect(result.config).toContain('key1'); @@ -773,12 +773,12 @@ describe('EventSerializer', () => { grandchild.root = obj; // Circular reference expect(() => - convertToStringDict({ + toStringRecord({ tree: obj }) ).not.toThrow(); - const result = convertToStringDict({ + const result = toStringRecord({ tree: obj }); @@ -804,7 +804,7 @@ describe('EventSerializer', () => { } }; - const result = convertToStringDict(input); + const result = toStringRecord(input); // All values should be strings Object.values(result).forEach((value) => { @@ -821,7 +821,7 @@ describe('EventSerializer', () => { }); it('should handle empty input object', () => { - const result = convertToStringDict({}); + const result = toStringRecord({}); expect(result).toEqual({}); }); @@ -832,7 +832,7 @@ describe('EventSerializer', () => { key3: 'value3' }; - const result = convertToStringDict(input); + const result = toStringRecord(input); expect(Object.keys(result)).toEqual(['key1', 'key2', 'key3']); }); diff --git a/tests/handlers/openai-agents/data-extraction.test.ts b/tests/handlers/openai-agents/data-extraction.test.ts index 1d956a73..4d2d7bfe 100644 --- a/tests/handlers/openai-agents/data-extraction.test.ts +++ b/tests/handlers/openai-agents/data-extraction.test.ts @@ -108,9 +108,46 @@ describe('extractLlmData generation', () => { test('test extract generation metadata includes gen_ai_system openai', () => { const spanData = { type: 'generation' }; const result = extractLlmData(spanData); - const meta = result.metadata as Record; + const meta = result.metadata as Record; expect(meta.gen_ai_system).toBe('openai'); }); + + test('test extract generation metadata model_config is raw dict', () => { + const spanData = { + type: 'generation', + model_config: { temperature: 0.5, max_tokens: 200 } + }; + const result = extractLlmData(spanData); + const meta = result.metadata as Record; + expect(meta.model_config).toEqual({ temperature: 0.5, max_tokens: 200 }); + }); + + test('test extract generation metadata includes token detail objects', () => { + const spanData = { + type: 'generation', + usage: { + input_tokens: 10, + output_tokens: 5, + input_tokens_details: { cached_tokens: 3 }, + output_tokens_details: { reasoning_tokens: 2 } + } + }; + const result = extractLlmData(spanData); + const meta = result.metadata as Record; + expect(meta.input_tokens_details).toEqual({ cached_tokens: 3 }); + expect(meta.output_tokens_details).toEqual({ reasoning_tokens: 2 }); + }); + + test('test extract generation metadata omits absent token details', () => { + const spanData = { + type: 'generation', + usage: { input_tokens: 10, output_tokens: 5 } + }; + const result = extractLlmData(spanData); + const meta = result.metadata as Record; + expect(meta).not.toHaveProperty('input_tokens_details'); + expect(meta).not.toHaveProperty('output_tokens_details'); + }); }); describe('extractLlmData response', () => { @@ -152,6 +189,93 @@ describe('extractLlmData response', () => { expect(result.model).toBe('unknown'); expect(result.numInputTokens).toBe(0); }); + + test('test extract response tools returned as raw array not JSON string', () => { + const toolsArray = [{ type: 'function', name: 'search' }]; + const spanData = { + type: 'response', + _response: { + model: 'gpt-4o', + usage: {}, + tools: toolsArray, + output: [] + } + }; + const result = extractLlmData(spanData); + expect(result.tools).toEqual(toolsArray); + expect(typeof result.tools).not.toBe('string'); + }); + + test('test extract response model_parameters from whitelist', () => { + const spanData = { + type: 'response', + _response: { + model: 'gpt-4o', + usage: {}, + temperature: 0.7, + max_output_tokens: 512, + top_p: 1, + tool_choice: 'auto', + seed: 42, + irrelevant_field: 'ignored', + output: [] + } + }; + const result = extractLlmData(spanData); + const mp = result.modelParameters as Record; + expect(mp.temperature).toBe(0.7); + expect(mp.max_output_tokens).toBe(512); + expect(mp.top_p).toBe(1); + expect(mp.tool_choice).toBe('auto'); + expect(mp.seed).toBe(42); + expect(mp).not.toHaveProperty('irrelevant_field'); + }); + + test('test extract response metadata includes response_metadata', () => { + const spanData = { + type: 'response', + _response: { + model: 'gpt-4o', + usage: {}, + temperature: 0.5, + object: 'response', + output: [] + } + }; + const result = extractLlmData(spanData); + const meta = result.metadata as Record; + expect(meta.gen_ai_system).toBe('openai'); + const rm = meta.response_metadata as Record; + expect(rm.model).toBe('gpt-4o'); + expect(rm.temperature).toBe(0.5); + expect(rm).not.toHaveProperty('usage'); + expect(rm).not.toHaveProperty('output'); + }); + + test('test extract response metadata includes instructions when present', () => { + const spanData = { + type: 'response', + _response: { + model: 'gpt-4o', + usage: {}, + instructions: 'You are a helpful assistant.', + output: [] + } + }; + const result = extractLlmData(spanData); + const meta = result.metadata as Record; + expect(meta.instructions).toBe('You are a helpful assistant.'); + }); + + test('test extract response metadata omits instructions when absent', () => { + const spanData = { + type: 'response', + _response: { model: 'gpt-4o', usage: {}, output: [] } + }; + const result = extractLlmData(spanData); + const meta = result.metadata as Record; + expect(meta).not.toHaveProperty('instructions'); + }); }); describe('extractLlmData unknown type', () => { @@ -207,18 +331,21 @@ describe('extractToolData', () => { const spanData = { type: 'guardrail', triggered: true, name: 'PII Filter' }; const result = extractToolData(spanData); expect(result.input).toBe(''); - expect(result.output).toBe('Guardrail triggered'); - const meta = result.metadata as Record; - expect(meta.triggered).toBe('true'); - expect(meta.guardrail_name).toBe('PII Filter'); + expect(result.output).toBe('{"triggered":true}'); + const meta = result.metadata as Record; + expect(meta.triggered).toBe(true); + expect(meta.status).toBe('warning'); + expect(meta).not.toHaveProperty('guardrail_name'); }); test('test extract guardrail span not triggered', () => { const spanData = { type: 'guardrail', triggered: false, name: 'Safety' }; const result = extractToolData(spanData); - expect(result.output).toBe('Guardrail passed'); - const meta = result.metadata as Record; - expect(meta.triggered).toBe('false'); + expect(result.output).toBe('{"triggered":false}'); + const meta = result.metadata as Record; + expect(meta.triggered).toBe(false); + expect(meta).not.toHaveProperty('status'); + expect(meta).not.toHaveProperty('guardrail_name'); }); test('test extract tool data for transcription returns empty', () => { @@ -244,10 +371,10 @@ describe('extractWorkflowData', () => { }; const result = extractWorkflowData(spanData); expect(result.input).toBe(''); - const meta = result.metadata as Record; - expect(meta.tools).toBe(JSON.stringify(['search', 'calculator'])); - expect(meta.handoffs).toBe(JSON.stringify(['ReviewAgent'])); - expect(meta.output_type).toBe(JSON.stringify('string')); + const meta = result.metadata as Record; + expect(meta.tools).toEqual(['search', 'calculator']); + expect(meta.handoffs).toEqual(['ReviewAgent']); + expect(meta.output_type).toBe('string'); }); test('test extract agent span data without optional fields', () => { @@ -290,12 +417,26 @@ describe('extractWorkflowData', () => { const result = extractWorkflowData(spanData); expect(result.input).toBe('custom input'); expect(result.output).toBe('custom output'); - const meta = result.metadata as Record; + const meta = result.metadata as Record; expect(meta.extra_key).toBe('extra value'); expect(meta.input).toBeUndefined(); expect(meta.output).toBeUndefined(); }); + test('test extract custom span data with object metadata value kept as-is', () => { + const spanData = { + type: 'custom', + data: { + input: 'in', + output: 'out', + config: { retries: 3, timeout: 5000 } + } + }; + const result = extractWorkflowData(spanData); + const meta = result.metadata as Record; + expect(meta.config).toEqual({ retries: 3, timeout: 5000 }); + }); + test('test extract custom span data with object input serialised', () => { const spanData = { type: 'custom', diff --git a/tests/handlers/openai-agents/integration.test.ts b/tests/handlers/openai-agents/integration.test.ts index 3871d66a..12175d52 100644 --- a/tests/handlers/openai-agents/integration.test.ts +++ b/tests/handlers/openai-agents/integration.test.ts @@ -159,7 +159,7 @@ describe('Multi-agent integration flows', () => { await processor.onTraceEnd(trace); const toolCall = mockLogger.addToolSpan.mock.calls[0][0]; - expect(toolCall.output).toBe('Guardrail triggered'); + expect(toolCall.output).toBe('{"triggered":true}'); }); test('test embedded tool calls from OpenAI response', async () => { From d5ca5e1eca9a2cbf80bf23e5cf2f823e0ae8724e Mon Sep 17 00:00:00 2001 From: richter Date: Wed, 18 Mar 2026 18:58:11 -0300 Subject: [PATCH 14/21] feature(serialization): Centralized serialization of input and output in llmSerializeToString. --- src/handlers/openai-agents/data-extraction.ts | 20 ++++++++++++------ src/handlers/openai-agents/index.ts | 2 +- .../openai-agents/data-extraction.test.ts | 21 +++++++++++++++++++ .../openai-agents/integration.test.ts | 6 +----- .../openai-agents/tracing-processor.test.ts | 13 +++++------- 5 files changed, 42 insertions(+), 20 deletions(-) diff --git a/src/handlers/openai-agents/data-extraction.ts b/src/handlers/openai-agents/data-extraction.ts index d22895d6..9ea351df 100644 --- a/src/handlers/openai-agents/data-extraction.ts +++ b/src/handlers/openai-agents/data-extraction.ts @@ -90,6 +90,16 @@ export function parseUsage( }; } +/** + * Serialize a value to a string for LLM span input/output fields. + * Strings are returned as-is; null/undefined produce ''; everything else is JSON-serialized. + */ +function llmSerializeToString(value: unknown): string { + if (value === undefined || value === null) return ''; + if (typeof value === 'string') return value; + return JSON.stringify(value); +} + /** * Extracts LLM-relevant fields from a GenerationSpanData or ResponseSpanData. * @param spanData - The span data object (must have type 'generation' or 'response'). @@ -112,9 +122,8 @@ export function extractLlmData( null; return { - input: spanData.input !== undefined ? JSON.stringify(spanData.input) : '', - output: - spanData.output !== undefined ? JSON.stringify(spanData.output) : '', + input: llmSerializeToString(spanData.input), + output: llmSerializeToString(spanData.output), model: (spanData.model as string | undefined) ?? 'unknown', temperature: (modelConfig.temperature as number | undefined) ?? undefined, modelParameters: modelConfig, @@ -166,9 +175,8 @@ export function extractLlmData( : {}; return { - input: input !== undefined ? JSON.stringify(input) : '', - output: - response?.output !== undefined ? JSON.stringify(response.output) : '', + input: llmSerializeToString(input), + output: llmSerializeToString(response?.output), model, temperature, tools: tools !== undefined ? tools : undefined, diff --git a/src/handlers/openai-agents/index.ts b/src/handlers/openai-agents/index.ts index 74a657a2..c5de1f16 100644 --- a/src/handlers/openai-agents/index.ts +++ b/src/handlers/openai-agents/index.ts @@ -144,7 +144,7 @@ export class GalileoTracingProcessor implements TracingProcessor { return false; } // Filter out JSON-serialized null (from earlier spans) - if (str === 'null' || str === '""') { + if (str === 'null') { return false; } return true; diff --git a/tests/handlers/openai-agents/data-extraction.test.ts b/tests/handlers/openai-agents/data-extraction.test.ts index 4d2d7bfe..cd5d7d2e 100644 --- a/tests/handlers/openai-agents/data-extraction.test.ts +++ b/tests/handlers/openai-agents/data-extraction.test.ts @@ -148,6 +148,17 @@ describe('extractLlmData generation', () => { expect(meta).not.toHaveProperty('input_tokens_details'); expect(meta).not.toHaveProperty('output_tokens_details'); }); + + test('test extract generation span with string input and output not double encoded', () => { + const spanData = { + type: 'generation', + input: 'What is the weather?', + output: 'It is sunny.' + }; + const result = extractLlmData(spanData); + expect(result.input).toBe('What is the weather?'); + expect(result.output).toBe('It is sunny.'); + }); }); describe('extractLlmData response', () => { @@ -276,6 +287,16 @@ describe('extractLlmData response', () => { const meta = result.metadata as Record; expect(meta).not.toHaveProperty('instructions'); }); + + test('test extract response span with string input not double encoded', () => { + const spanData = { + type: 'response', + _input: 'Hello', + _response: { model: 'gpt-4o', usage: {}, output: [] } + }; + const result = extractLlmData(spanData); + expect(result.input).toBe('Hello'); + }); }); describe('extractLlmData unknown type', () => { diff --git a/tests/handlers/openai-agents/integration.test.ts b/tests/handlers/openai-agents/integration.test.ts index 12175d52..d8ff2c69 100644 --- a/tests/handlers/openai-agents/integration.test.ts +++ b/tests/handlers/openai-agents/integration.test.ts @@ -557,11 +557,7 @@ describe('Output tracking integration', () => { await processor.onTraceEnd(trace); const startTraceCall = mockLogger.startTrace.mock.calls[0][0]; - // Output is stringified, so check for either the string or JSON-stringified version - expect( - startTraceCall.output === 'Final output' || - startTraceCall.output === '"Final output"' - ).toBe(true); + expect(startTraceCall.output).toBe('Final output'); }); }); diff --git a/tests/handlers/openai-agents/tracing-processor.test.ts b/tests/handlers/openai-agents/tracing-processor.test.ts index 6bf70699..c43f6db1 100644 --- a/tests/handlers/openai-agents/tracing-processor.test.ts +++ b/tests/handlers/openai-agents/tracing-processor.test.ts @@ -938,7 +938,7 @@ describe('Agent span emission', () => { // conclude for the agent span (first conclude call) should carry the LLM child's output const concludeCall = mockLogger.conclude.mock.calls[0][0]; - expect(concludeCall.output).toBe('"Final answer from LLM"'); + expect(concludeCall.output).toBe('Final answer from LLM'); }); test('test agent span conclude receives last of multiple children outputs', async () => { @@ -979,7 +979,7 @@ describe('Agent span emission', () => { // The conclude for the agent span should use the last child (LLM), not the tool const concludeCall = mockLogger.conclude.mock.calls[0][0]; - expect(concludeCall.output).toBe('"LLM final response"'); + expect(concludeCall.output).toBe('LLM final response'); }); test('test agent span conclude uses undefined when no children have output', async () => { @@ -1215,9 +1215,8 @@ describe('_firstInput population (trace-level input handling)', () => { await processor.onTraceEnd(trace); // Verify startTrace was called with the LLM input - // Note: input is JSON-stringified by extractLlmData const startTraceCall = mockLogger.startTrace.mock.calls[0][0]; - expect(startTraceCall.input).toBe('"What is the weather in NYC?"'); + expect(startTraceCall.input).toBe('What is the weather in NYC?'); }); test('captures first input from tool span if LLM input unavailable', async () => { @@ -1285,9 +1284,8 @@ describe('_firstInput population (trace-level input handling)', () => { await processor.onTraceEnd(trace); // Should use input from llm2, not llm1 - // Note: input is JSON-stringified by extractLlmData const startTraceCall = mockLogger.startTrace.mock.calls[0][0]; - expect(startTraceCall.input).toBe('"Real question"'); + expect(startTraceCall.input).toBe('Real question'); }); test('falls back to trace name if no meaningful input captured', async () => { @@ -1340,7 +1338,6 @@ describe('_firstInput population (trace-level input handling)', () => { const startTraceCall = mockLogger.startTrace.mock.calls[0][0]; // Should use first input, not second - // Note: input is JSON-stringified by extractLlmData - expect(startTraceCall.input).toBe('"First query"'); + expect(startTraceCall.input).toBe('First query'); }); }); From 10d99df138654c1f2aa12302ddbef01ec57503d5 Mon Sep 17 00:00:00 2001 From: richter Date: Wed, 18 Mar 2026 19:53:38 -0300 Subject: [PATCH 15/21] fix(tools): Assigning tools to correct field, instead of serializing to metadata. --- src/handlers/openai-agents/embedded-tools.ts | 6 ++-- src/handlers/openai-agents/index.ts | 29 +++++++------------ .../openai-agents/embedded-tool.test.ts | 28 ++++++++++++++++-- .../openai-agents/integration.test.ts | 14 +++------ .../openai-agents/tracing-processor.test.ts | 18 ++++-------- 5 files changed, 47 insertions(+), 48 deletions(-) diff --git a/src/handlers/openai-agents/embedded-tools.ts b/src/handlers/openai-agents/embedded-tools.ts index 8de30e60..3163750f 100644 --- a/src/handlers/openai-agents/embedded-tools.ts +++ b/src/handlers/openai-agents/embedded-tools.ts @@ -153,12 +153,12 @@ export function extractEmbeddedToolCalls( const toolName = getToolNameFromType(itemType); const toolCallId = (typedItem.id as string | undefined) ?? - (typedItem.tool_call_id as string | undefined) ?? + (typedItem.call_id as string | undefined) ?? null; - const status = (typedItem.status as string | undefined) ?? null; + const status = (typedItem.status as string | undefined) ?? 'completed'; results.push({ - type: itemType, + type: 'function', function: { name: toolName }, tool_call_id: toolCallId, tool_call_type: itemType, diff --git a/src/handlers/openai-agents/index.ts b/src/handlers/openai-agents/index.ts index c5de1f16..964bf2e3 100644 --- a/src/handlers/openai-agents/index.ts +++ b/src/handlers/openai-agents/index.ts @@ -13,10 +13,7 @@ import { extractWorkflowData, extractGalileoCustomData } from './data-extraction'; -import { - extractEmbeddedToolCalls, - type EmbeddedToolCall -} from './embedded-tools'; +import { extractEmbeddedToolCalls } from './embedded-tools'; import { createGalileoCustomSpanData, type GalileoCustomSpanData, @@ -285,16 +282,19 @@ export class GalileoTracingProcessor implements TracingProcessor { const responseObj = finalData._responseObject as | Record | undefined; + // Merge updated data first (output/tools may not have been available at span start) + const { _responseObject: _removed, ...rest } = finalData; + void _removed; + node.spanParams = { ...node.spanParams, ...rest }; + // Append embedded tool calls (model-invoked tools) to tools[] — mirrors Python handler if (responseObj) { const embeddedTools = extractEmbeddedToolCalls(responseObj); if (embeddedTools.length > 0) { - node.spanParams.embeddedToolCalls = embeddedTools; + const existingTools = + (node.spanParams.tools as unknown[] | undefined) ?? []; + node.spanParams.tools = [...existingTools, ...embeddedTools]; } } - // Merge updated data (output may not have been available at span start) - const { _responseObject: _removed, ...rest } = finalData; - void _removed; - node.spanParams = { ...node.spanParams, ...rest }; } else if (spanData.type === 'generation') { // Refresh LLM data at end (usage may be populated now) const finalData = extractLlmData(spanData); @@ -423,15 +423,6 @@ export class GalileoTracingProcessor implements TracingProcessor { const tools = (params.tools as Record[] | undefined) ?? undefined; - // Build embedded tool calls metadata - const embeddedToolCalls = params.embeddedToolCalls as - | EmbeddedToolCall[] - | undefined; - const llmMeta: Record = { ...metadata }; - if (embeddedToolCalls && embeddedToolCalls.length > 0) { - llmMeta.embedded_tool_calls = JSON.stringify(embeddedToolCalls); - } - this._galileoLogger.addLlmSpan({ input, output: output ?? '', @@ -445,7 +436,7 @@ export class GalileoTracingProcessor implements TracingProcessor { numCachedInputTokens, temperature, statusCode, - metadata: llmMeta, + metadata, tools: tools as JsonObject[] | undefined, createdAt: startedAt }); diff --git a/tests/handlers/openai-agents/embedded-tool.test.ts b/tests/handlers/openai-agents/embedded-tool.test.ts index f0168582..7e4e23c1 100644 --- a/tests/handlers/openai-agents/embedded-tool.test.ts +++ b/tests/handlers/openai-agents/embedded-tool.test.ts @@ -167,7 +167,7 @@ describe('extractEmbeddedToolCalls', () => { }; const result = extractEmbeddedToolCalls(response); expect(result.length).toBe(1); - expect(result[0].type).toBe('code_interpreter_call'); + expect(result[0].type).toBe('function'); expect(result[0].function.name).toBe('code_interpreter'); expect(result[0].tool_call_id).toBe('ci_001'); expect(result[0].tool_call_input).toBe('x = 1'); @@ -175,6 +175,20 @@ describe('extractEmbeddedToolCalls', () => { expect(result[0].tool_call_status).toBe('completed'); }); + test('test tool_call_id falls back to call_id when id absent', () => { + const response = { + output: [ + { + type: 'web_search_call', + call_id: 'fallback_id_001', + action: { query: 'test' } + } + ] + }; + const result = extractEmbeddedToolCalls(response); + expect(result[0].tool_call_id).toBe('fallback_id_001'); + }); + test('test extracts file_search_call', () => { const response = { output: [ @@ -218,8 +232,16 @@ describe('extractEmbeddedToolCalls', () => { }; const result = extractEmbeddedToolCalls(response); expect(result.length).toBe(2); - expect(result[0].type).toBe('code_interpreter_call'); - expect(result[1].type).toBe('web_search_call'); + expect(result[0].type).toBe('function'); + expect(result[1].type).toBe('function'); + }); + + test('test tool_call_status defaults to completed when absent', () => { + const response = { + output: [{ type: 'web_search_call', action: { query: 'q' } }] + }; + const result = extractEmbeddedToolCalls(response); + expect(result[0].tool_call_status).toBe('completed'); }); test('test handles null output items gracefully', () => { diff --git a/tests/handlers/openai-agents/integration.test.ts b/tests/handlers/openai-agents/integration.test.ts index d8ff2c69..12a11069 100644 --- a/tests/handlers/openai-agents/integration.test.ts +++ b/tests/handlers/openai-agents/integration.test.ts @@ -201,16 +201,10 @@ describe('Multi-agent integration flows', () => { expect(mockLogger.addLlmSpan).toHaveBeenCalledTimes(1); const llmCall = mockLogger.addLlmSpan.mock.calls[0][0]; - // Verify that either embeddedToolCalls exists or metadata includes them - if (llmCall.embeddedToolCalls) { - expect(llmCall.embeddedToolCalls.length).toBe(2); - expect(llmCall.embeddedToolCalls[0].type).toBe('web_search_call'); - expect(llmCall.embeddedToolCalls[1].type).toBe('code_interpreter_call'); - } else { - // May be in metadata as embedded_tool_calls - const meta = llmCall.metadata as Record; - expect(meta.embedded_tool_calls).toBeDefined(); - } + expect(Array.isArray(llmCall.tools)).toBe(true); + expect(llmCall.tools.length).toBe(2); + expect(llmCall.tools[0].type).toBe('function'); + expect(llmCall.tools[1].type).toBe('function'); }); test('test galileo_custom span delegates to inner galileoSpan as tool', async () => { diff --git a/tests/handlers/openai-agents/tracing-processor.test.ts b/tests/handlers/openai-agents/tracing-processor.test.ts index c43f6db1..b4073147 100644 --- a/tests/handlers/openai-agents/tracing-processor.test.ts +++ b/tests/handlers/openai-agents/tracing-processor.test.ts @@ -274,10 +274,9 @@ describe('GalileoTracingProcessor lifecycle', () => { expect(mockLogger.addLlmSpan).toHaveBeenCalledTimes(1); const llmCall = mockLogger.addLlmSpan.mock.calls[0][0]; - expect(llmCall.metadata.embedded_tool_calls).toBeDefined(); - const embedded = JSON.parse(llmCall.metadata.embedded_tool_calls); - expect(embedded.length).toBe(1); - expect(embedded[0].type).toBe('web_search_call'); + expect(Array.isArray(llmCall.tools)).toBe(true); + expect(llmCall.tools.length).toBe(1); + expect(llmCall.tools[0].type).toBe('function'); }); test('test metadata values are stringified', async () => { @@ -502,15 +501,8 @@ describe('Response span data merging', () => { // addLlmSpan should be called for response type expect(mockLogger.addLlmSpan).toHaveBeenCalledTimes(1); const llmCall = mockLogger.addLlmSpan.mock.calls[0][0]; - // Verify that either embeddedToolCalls exists or metadata includes them - if (llmCall.embeddedToolCalls) { - expect(Array.isArray(llmCall.embeddedToolCalls)).toBe(true); - expect(llmCall.embeddedToolCalls[0].type).toBe('code_interpreter_call'); - } else { - // May be in metadata as embedded_tool_calls - const meta = llmCall.metadata as Record; - expect(meta.embedded_tool_calls).toBeDefined(); - } + expect(Array.isArray(llmCall.tools)).toBe(true); + expect(llmCall.tools[0].type).toBe('function'); }); test('test _responseObject removed from final params', async () => { From c14a1265fe1f271076a12311f377b4832c30c833 Mon Sep 17 00:00:00 2001 From: richter Date: Thu, 19 Mar 2026 18:03:20 -0300 Subject: [PATCH 16/21] feat(custom): Refactored process to manage custom (galileo) spans, for proper processing. --- src/handlers/openai-agents/data-extraction.ts | 3 +- src/handlers/openai-agents/index.ts | 70 ++++++-- .../openai-agents/data-extraction.test.ts | 64 +++---- .../openai-agents/integration.test.ts | 46 ++--- .../openai-agents/tracing-processor.test.ts | 169 +++++++++++++++++- 5 files changed, 259 insertions(+), 93 deletions(-) diff --git a/src/handlers/openai-agents/data-extraction.ts b/src/handlers/openai-agents/data-extraction.ts index 9ea351df..8359f2e6 100644 --- a/src/handlers/openai-agents/data-extraction.ts +++ b/src/handlers/openai-agents/data-extraction.ts @@ -339,8 +339,7 @@ export function extractGalileoCustomData(spanData: Record): { nodeType: NodeType; params: Record; } { - const data = (spanData.data as Record | undefined) ?? {}; - const galileoSpan = data.galileoSpan as GalileoSpanLike | undefined; + const galileoSpan = spanData._galileoSpan as GalileoSpanLike | undefined; if (!galileoSpan || typeof galileoSpan !== 'object') { return { nodeType: 'workflow', params: extractWorkflowData(spanData) }; diff --git a/src/handlers/openai-agents/index.ts b/src/handlers/openai-agents/index.ts index 964bf2e3..6aa6e865 100644 --- a/src/handlers/openai-agents/index.ts +++ b/src/handlers/openai-agents/index.ts @@ -14,11 +14,7 @@ import { extractGalileoCustomData } from './data-extraction'; import { extractEmbeddedToolCalls } from './embedded-tools'; -import { - createGalileoCustomSpanData, - type GalileoCustomSpanData, - type GalileoSpanLike -} from './custom-span'; +import type { GalileoSpanLike } from './custom-span'; import { getSdkLogger } from 'galileo-generated'; const sdkLogger = getSdkLogger(); @@ -310,6 +306,11 @@ export class GalileoTracingProcessor implements TracingProcessor { ...refreshed, name: refreshedName }; + } else if (spanData.__galileoCustom === true) { + // Re-extract at span end so mutations to galileoSpan made inside the callback + // (e.g. setting output after the work is done) are captured in node.spanParams. + const refreshed = extractGalileoCustomData(spanData); + node.spanParams = { ...node.spanParams, ...refreshed.params }; } // Handle errors @@ -508,18 +509,57 @@ export class GalileoTracingProcessor implements TracingProcessor { } /** - * Creates a custom span backed by GalileoCustomSpanData. - * @param galileoSpan - The Galileo span object to embed. - * @param name - (Optional) Display name for the custom span. - * @param extraData - (Optional) Extra data to include in the span payload. - * @returns A GalileoCustomSpanData object that can be passed to the OpenAI Agents SDK. + * Runs a callback under a custom Galileo span that is registered with the OpenAI Agents SDK + * trace provider and properly nested under the currently active span. + * + * The callback is the scope of the span's lifetime — it starts when the callback starts and + * ends when it returns or throws. Any SDK spans created inside the callback are automatically + * nested as children of this custom span. + * + * @param galileoSpan - Galileo span metadata (type, input, output, metadata, tags, statusCode). + * Mutable — update galileoSpan.output inside the callback to capture results. + * @param callback - The work to run under this span. Return value is passed through. + * @param options.name - Display name in Galileo. Overrides galileoSpan.name. + * @param options.extraData - Additional data to attach to the span payload. + * @returns A promise that resolves to the callback's return value. */ - static addGalileoCustomSpan( + static async addGalileoCustomSpan( galileoSpan: GalileoSpanLike, - name?: string, - extraData?: Record - ): GalileoCustomSpanData { - return createGalileoCustomSpanData(galileoSpan, name, extraData); + callback: () => T | Promise, + options?: { name?: string; extraData?: Record } + ): Promise { + const spanName = options?.name ?? galileoSpan.name ?? 'Galileo Custom'; + const spanOptions = { + data: { + name: spanName, + _galileoSpan: galileoSpan, + __galileoCustom: true, + ...(options?.extraData ?? {}) + } + }; + + try { + const { withCustomSpan } = (await import( + '@openai/agents-core' as string + )) as { + withCustomSpan: ( + fn: (span: unknown) => Promise, + options: Record + ) => Promise; + }; + return await withCustomSpan( + async (span) => { + void span; + return Promise.resolve(callback()); + }, + spanOptions as Record + ); + } catch { + sdkLogger.warn( + '@openai/agents package is not installed. addGalileoCustomSpan will execute callback without tracing.' + ); + return await Promise.resolve(callback()); + } } } diff --git a/tests/handlers/openai-agents/data-extraction.test.ts b/tests/handlers/openai-agents/data-extraction.test.ts index cd5d7d2e..cc81ee6c 100644 --- a/tests/handlers/openai-agents/data-extraction.test.ts +++ b/tests/handlers/openai-agents/data-extraction.test.ts @@ -486,15 +486,13 @@ describe('extractGalileoCustomData', () => { const spanData = { type: 'custom', __galileoCustom: true, - data: { - galileoSpan: { - type: 'tool', - input: 'tool input', - output: 'tool output', - metadata: { key: 'val' }, - tags: ['tag1'], - statusCode: 201 - } + _galileoSpan: { + type: 'tool', + input: 'tool input', + output: 'tool output', + metadata: { key: 'val' }, + tags: ['tag1'], + statusCode: 201 } }; const result = extractGalileoCustomData(spanData); @@ -510,12 +508,10 @@ describe('extractGalileoCustomData', () => { const spanData = { type: 'custom', __galileoCustom: true, - data: { - galileoSpan: { - type: 'workflow', - input: 'wf in', - output: 'wf out' - } + _galileoSpan: { + type: 'workflow', + input: 'wf in', + output: 'wf out' } }; const result = extractGalileoCustomData(spanData); @@ -528,11 +524,9 @@ describe('extractGalileoCustomData', () => { const spanData = { type: 'custom', __galileoCustom: true, - data: { - galileoSpan: { - type: 'agent', - input: 'agent in' - } + _galileoSpan: { + type: 'agent', + input: 'agent in' } }; const result = extractGalileoCustomData(spanData); @@ -544,9 +538,7 @@ describe('extractGalileoCustomData', () => { const spanData = { type: 'custom', __galileoCustom: true, - data: { - galileoSpan: { type: 'future_type', input: 'x' } - } + _galileoSpan: { type: 'future_type', input: 'x' } }; const result = extractGalileoCustomData(spanData); expect(result.nodeType).toBe('workflow'); @@ -557,9 +549,7 @@ describe('extractGalileoCustomData', () => { const spanData = { type: 'custom', __galileoCustom: true, - data: { - galileoSpan: { type: 'llm', input: 'prompt' } - } + _galileoSpan: { type: 'llm', input: 'prompt' } }; const result = extractGalileoCustomData(spanData); expect(result.nodeType).toBe('workflow'); @@ -581,7 +571,7 @@ describe('extractGalileoCustomData', () => { const spanData = { type: 'custom', __galileoCustom: true, - data: { galileoSpan: 'not-an-object' } + _galileoSpan: 'not-an-object' }; const result = extractGalileoCustomData(spanData); expect(result.nodeType).toBe('workflow'); @@ -591,12 +581,10 @@ describe('extractGalileoCustomData', () => { const spanData = { type: 'custom', __galileoCustom: true, - data: { - galileoSpan: { - type: 'tool', - input: { query: 'hello' }, - output: { answer: 'world' } - } + _galileoSpan: { + type: 'tool', + input: { query: 'hello' }, + output: { answer: 'world' } } }; const result = extractGalileoCustomData(spanData); @@ -608,16 +596,14 @@ describe('extractGalileoCustomData', () => { const spanData = { type: 'custom', __galileoCustom: true, - data: { - galileoSpan: { type: 'tool', input: 'in' } - } + _galileoSpan: { type: 'tool', input: 'in' } }; const result = extractGalileoCustomData(spanData); expect(result.params).not.toHaveProperty('tags'); expect(result.params).not.toHaveProperty('statusCode'); }); - test('test handles missing data field gracefully', () => { + test('test handles missing galileoSpan gracefully', () => { const spanData = { type: 'custom', __galileoCustom: true @@ -630,9 +616,7 @@ describe('extractGalileoCustomData', () => { const spanData = { type: 'custom', __galileoCustom: true, - data: { - galileoSpan: { type: 'tool' } - } + _galileoSpan: { type: 'tool' } }; const result = extractGalileoCustomData(spanData); expect(result.params.input).toBe(''); diff --git a/tests/handlers/openai-agents/integration.test.ts b/tests/handlers/openai-agents/integration.test.ts index 12a11069..8e319e9a 100644 --- a/tests/handlers/openai-agents/integration.test.ts +++ b/tests/handlers/openai-agents/integration.test.ts @@ -226,15 +226,13 @@ describe('Multi-agent integration flows', () => { spanData: { type: 'custom', __galileoCustom: true, - data: { - galileoSpan: { - type: 'tool', - input: 'custom tool input', - output: 'custom tool output', - metadata: { source: 'test' }, - tags: ['custom-tag'], - statusCode: 200 - } + _galileoSpan: { + type: 'tool', + input: 'custom tool input', + output: 'custom tool output', + metadata: { source: 'test' }, + tags: ['custom-tag'], + statusCode: 200 } } }); @@ -283,12 +281,10 @@ describe('Multi-agent integration flows', () => { spanData: { type: 'custom', __galileoCustom: true, - data: { - galileoSpan: { - type: 'workflow', - input: 'wf input', - output: 'wf output' - } + _galileoSpan: { + type: 'workflow', + input: 'wf input', + output: 'wf output' } } }); @@ -321,13 +317,11 @@ describe('Multi-agent integration flows', () => { spanData: { type: 'custom', __galileoCustom: true, - data: { - galileoSpan: { - type: 'agent', - input: 'agent input', - output: 'agent output', - metadata: { role: 'planner' } - } + _galileoSpan: { + type: 'agent', + input: 'agent input', + output: 'agent output', + metadata: { role: 'planner' } } } }); @@ -383,11 +377,9 @@ describe('Multi-agent integration flows', () => { spanData: { type: 'custom', __galileoCustom: true, - data: { - galileoSpan: { - type: 'unknown_future_type', - input: 'some input' - } + _galileoSpan: { + type: 'unknown_future_type', + input: 'some input' } } }); diff --git a/tests/handlers/openai-agents/tracing-processor.test.ts b/tests/handlers/openai-agents/tracing-processor.test.ts index b4073147..357532ee 100644 --- a/tests/handlers/openai-agents/tracing-processor.test.ts +++ b/tests/handlers/openai-agents/tracing-processor.test.ts @@ -299,16 +299,34 @@ describe('GalileoTracingProcessor lifecycle', () => { } }); - test('test addGalileoCustomSpan creates a GalileoCustomSpanData', () => { - const mockSpan = { type: 'tool', name: 'span-xyz' }; - const result = GalileoTracingProcessor.addGalileoCustomSpan( - mockSpan, - 'MyCustom' + test('test addGalileoCustomSpan invokes callback and returns its value', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + void processor; + + const galileoSpan = { type: 'tool' as const, input: 'query' }; + const result = await GalileoTracingProcessor.addGalileoCustomSpan( + galileoSpan, + async () => 'callback-result', + { name: 'My Custom Span' } ); - expect(result.type).toBe('custom'); - expect(result.__galileoCustom).toBe(true); - expect(result.data.galileoSpan).toBe(mockSpan); - expect(result.name).toBe('MyCustom'); + + expect(result).toBe('callback-result'); + }); + + test('test addGalileoCustomSpan fallback calls callback when SDK unavailable', async () => { + const callbackFn = jest.fn().mockResolvedValue('fallback-result'); + const galileoSpan = { type: 'tool' as const, input: 'query' }; + + // The SDK is not installed in the test environment; the fallback path runs. + const result = await GalileoTracingProcessor.addGalileoCustomSpan( + galileoSpan, + callbackFn, + { name: 'Fallback Span' } + ); + + expect(callbackFn).toHaveBeenCalledTimes(1); + expect(result).toBe('fallback-result'); }); }); @@ -1333,3 +1351,136 @@ describe('_firstInput population (trace-level input handling)', () => { expect(startTraceCall.input).toBe('First query'); }); }); + +describe('GalileoCustomSpan integration via onSpanStart/onSpanEnd', () => { + // Simulate the spanData shape that withCustomSpan produces: + // the SDK spreads options.data fields onto the top level of spanData. + function makeCustomSpan( + galileoSpan: Record, + overrides: Partial = {} + ): AgentSpan { + return makeSpan({ + spanId: 'custom-001', + parentId: 'trace-001', + spanData: { + type: 'custom', + __galileoCustom: true, + _galileoSpan: galileoSpan, + name: (galileoSpan.name as string | undefined) ?? 'Galileo Custom' + }, + ...overrides + }); + } + + test('test custom tool span calls addToolSpan', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const galileoSpan = { + type: 'tool', + input: 'my input', + output: 'my output' + }; + const span = makeCustomSpan(galileoSpan); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + expect(mockLogger.addToolSpan).toHaveBeenCalledTimes(1); + const call = mockLogger.addToolSpan.mock.calls[0][0]; + expect(call.input).toBe('my input'); + expect(call.output).toBe('my output'); + }); + + test('test custom workflow span calls addWorkflowSpan', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const galileoSpan = { type: 'workflow', input: 'wf in', output: 'wf out' }; + const span = makeCustomSpan(galileoSpan); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + // root is logged via startTrace, not addWorkflowSpan; custom workflow span = 1 call + expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1); + const customCall = mockLogger.addWorkflowSpan.mock.calls[0][0]; + expect(customCall.input).toBe('wf in'); + }); + + test('test output mutation inside callback is captured at onSpanEnd', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + // galileoSpan starts with no output — simulates a user who will set it later + const galileoSpan: Record = { + type: 'tool', + input: 'query', + output: undefined + }; + const span = makeCustomSpan(galileoSpan); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + + // Simulate the user mutating galileoSpan.output inside the callback before it returns + galileoSpan.output = 'result after work'; + + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + // Re-extraction at onSpanEnd should have picked up the mutation + const call = mockLogger.addToolSpan.mock.calls[0][0]; + expect(call.output).toBe('result after work'); + }); + + test('test custom span with metadata and tags', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const galileoSpan = { + type: 'tool', + input: 'in', + metadata: { source: 'db' }, + tags: ['tag-a'], + statusCode: 201 + }; + const span = makeCustomSpan(galileoSpan); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + const call = mockLogger.addToolSpan.mock.calls[0][0]; + expect(call.metadata).toEqual({ source: 'db' }); + expect(call.tags).toEqual(['tag-a']); + expect(call.statusCode).toBe(201); + }); + + test('test custom span with unknown type falls back to addWorkflowSpan', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const galileoSpan = { type: 'future_type', input: 'in' }; + const span = makeCustomSpan(galileoSpan); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + // root is logged via startTrace; unknown type custom span → 1 addWorkflowSpan call + expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1); + expect(mockLogger.addToolSpan).not.toHaveBeenCalled(); + }); +}); From 151496d8216c1c85690827c4e7d3b0bdfd1ea4f4 Mon Sep 17 00:00:00 2001 From: richter Date: Thu, 19 Mar 2026 19:35:54 -0300 Subject: [PATCH 17/21] feat(error): Refactored support for status code and error. --- src/handlers/openai-agents/data-extraction.ts | 14 +++++- .../openai-agents/data-extraction.test.ts | 47 +++++++++++++++++++ .../openai-agents/tracing-processor.test.ts | 33 +++++++++++++ 3 files changed, 93 insertions(+), 1 deletion(-) diff --git a/src/handlers/openai-agents/data-extraction.ts b/src/handlers/openai-agents/data-extraction.ts index 8359f2e6..352173bd 100644 --- a/src/handlers/openai-agents/data-extraction.ts +++ b/src/handlers/openai-agents/data-extraction.ts @@ -174,6 +174,14 @@ export function extractLlmData( ) : {}; + const responseError = response?.error as + | { status_code?: number; message?: string; [k: string]: unknown } + | undefined + | null; + const responseStatusCode = responseError + ? ((responseError.status_code as number | undefined) ?? 500) + : undefined; + return { input: llmSerializeToString(input), output: llmSerializeToString(response?.output), @@ -186,6 +194,9 @@ export function extractLlmData( totalTokens: usage.totalTokens ?? undefined, numReasoningTokens: usage.reasoningTokens, numCachedInputTokens: usage.cachedTokens, + ...(responseStatusCode !== undefined + ? { statusCode: responseStatusCode } + : {}), metadata: { gen_ai_system: 'openai', ...(Object.keys(responseMetadata).length > 0 @@ -193,7 +204,8 @@ export function extractLlmData( : {}), ...(response?.instructions !== undefined ? { instructions: response.instructions } - : {}) + : {}), + ...(responseError ? { error_details: responseError } : {}) }, _responseObject: response }; diff --git a/tests/handlers/openai-agents/data-extraction.test.ts b/tests/handlers/openai-agents/data-extraction.test.ts index cc81ee6c..195ef347 100644 --- a/tests/handlers/openai-agents/data-extraction.test.ts +++ b/tests/handlers/openai-agents/data-extraction.test.ts @@ -297,6 +297,53 @@ describe('extractLlmData response', () => { const result = extractLlmData(spanData); expect(result.input).toBe('Hello'); }); + + test('test extractLlmData response span with response.error sets statusCode and error_details', () => { + const error = { status_code: 429, message: 'Rate limit' }; + const spanData = { + type: 'response', + _input: 'hello', + _response: { + model: 'gpt-4o', + usage: {}, + output: [], + error + } + }; + const result = extractLlmData(spanData); + expect(result.statusCode).toBe(429); + const meta = result.metadata as Record; + expect(meta.error_details).toEqual(error); + }); + + test('test extractLlmData response span with response.error missing status_code falls back to 500', () => { + const spanData = { + type: 'response', + _input: 'hello', + _response: { + model: 'gpt-4o', + usage: {}, + output: [], + error: { message: 'Unknown error' } + } + }; + const result = extractLlmData(spanData); + expect(result.statusCode).toBe(500); + }); + + test('test extractLlmData response span with no response.error has no statusCode', () => { + const spanData = { + type: 'response', + _input: 'hello', + _response: { + model: 'gpt-4o', + usage: {}, + output: [] + } + }; + const result = extractLlmData(spanData); + expect(result.statusCode).toBeUndefined(); + }); }); describe('extractLlmData unknown type', () => { diff --git a/tests/handlers/openai-agents/tracing-processor.test.ts b/tests/handlers/openai-agents/tracing-processor.test.ts index 357532ee..525a2c3e 100644 --- a/tests/handlers/openai-agents/tracing-processor.test.ts +++ b/tests/handlers/openai-agents/tracing-processor.test.ts @@ -574,6 +574,39 @@ describe('Response span data merging', () => { expect(llmCall.numOutputTokens).toBe(5); }); + test('test response span with response-level error sets statusCode and error_details in metadata', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const responseError = { status_code: 500, message: 'Server error' }; + const span = makeSpan({ + spanId: 'response-err-001', + parentId: 'trace-001', + spanData: { + type: 'response', + _input: 'hello', + _response: { + model: 'gpt-4o', + usage: { input_tokens: 5, output_tokens: 0 }, + output: [], + error: responseError + } + } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + expect(mockLogger.addLlmSpan).toHaveBeenCalledTimes(1); + const llmCall = mockLogger.addLlmSpan.mock.calls[0][0]; + expect(llmCall.statusCode).toBe(500); + const meta = llmCall.metadata as Record; + expect(meta.error_details).toBe(JSON.stringify(responseError)); + }); + test('test response span with no _responseObject handles gracefully', async () => { const mockLogger = createMockLogger(); const processor = new GalileoTracingProcessor(mockLogger as never, false); From e55124bb6cf591f5d037885640042415929d8a67 Mon Sep 17 00:00:00 2001 From: richter Date: Thu, 19 Mar 2026 21:04:54 -0300 Subject: [PATCH 18/21] feat(error): Refactored support for status code and error (2). --- src/handlers/openai-agents/index.ts | 18 +- .../openai-agents/tracing-processor.test.ts | 183 ++++++++++++++++++ 2 files changed, 199 insertions(+), 2 deletions(-) diff --git a/src/handlers/openai-agents/index.ts b/src/handlers/openai-agents/index.ts index 6aa6e865..e597d37c 100644 --- a/src/handlers/openai-agents/index.ts +++ b/src/handlers/openai-agents/index.ts @@ -43,6 +43,7 @@ export interface AgentSpan< error?: { message: string; data?: Record; + type?: string; } | null; spanData: T & { type: string }; } @@ -101,6 +102,7 @@ function extractAgentType( export class GalileoTracingProcessor implements TracingProcessor { private _nodes = new Map(); private _lastOutput: unknown = null; + private _lastStatusCode: number | null = null; private _firstInput: unknown = null; private static _depCheckDone = false; @@ -185,7 +187,10 @@ export class GalileoTracingProcessor implements TracingProcessor { } this._commitTrace(trace); - this._galileoLogger.conclude({ concludeAll: true }); + this._galileoLogger.conclude({ + concludeAll: true, + statusCode: this._lastStatusCode ?? undefined + }); if (this._flushOnTraceEnd) { await this._galileoLogger.flush(); @@ -193,6 +198,7 @@ export class GalileoTracingProcessor implements TracingProcessor { this._nodes.clear(); this._lastOutput = null; + this._lastStatusCode = null; this._firstInput = null; } @@ -319,10 +325,11 @@ export class GalileoTracingProcessor implements TracingProcessor { const existingMeta = (node.spanParams.metadata as Record | undefined) ?? {}; node.spanParams.statusCode = 500; + node.spanParams.error = span.error; node.spanParams.metadata = { ...existingMeta, error_message: errorMessage, - error_type: 'SpanError', + error_type: span.error.type ?? 'SpanError', error_details: span.error.data ? JSON.stringify(span.error.data) : errorMessage @@ -500,11 +507,18 @@ export class GalileoTracingProcessor implements TracingProcessor { concludeOutput = String(lastChild.spanParams.output); } } + const nodeError = params.error as + | { message: string; data?: Record; type?: string } + | undefined; + if (nodeError) { + concludeOutput = JSON.stringify(nodeError); + } this._galileoLogger.conclude({ output: concludeOutput, durationNs, statusCode }); + this._lastStatusCode = statusCode; } } diff --git a/tests/handlers/openai-agents/tracing-processor.test.ts b/tests/handlers/openai-agents/tracing-processor.test.ts index 525a2c3e..6e718531 100644 --- a/tests/handlers/openai-agents/tracing-processor.test.ts +++ b/tests/handlers/openai-agents/tracing-processor.test.ts @@ -653,6 +653,50 @@ describe('Error handling and recovery', () => { expect(meta.error_type).toBe('SpanError'); }); + test('test span error with type field uses error.type value', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const span = makeSpan({ + spanId: 'span-001', + parentId: 'trace-001', + error: { message: 'Agent failed', type: 'AgentError' }, + spanData: { type: 'function', name: 'tool' } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + const toolCall = mockLogger.addToolSpan.mock.calls[0][0]; + const meta = toolCall.metadata as Record; + expect(meta.error_type).toBe('AgentError'); + }); + + test('test span error without type field falls back to SpanError', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const span = makeSpan({ + spanId: 'span-001', + parentId: 'trace-001', + error: { message: 'Something broke' }, + spanData: { type: 'function', name: 'tool' } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + const toolCall = mockLogger.addToolSpan.mock.calls[0][0]; + const meta = toolCall.metadata as Record; + expect(meta.error_type).toBe('SpanError'); + }); + test('test span error with message and data', async () => { const mockLogger = createMockLogger(); const processor = new GalileoTracingProcessor(mockLogger as never, false); @@ -727,6 +771,34 @@ describe('Error handling and recovery', () => { expect(meta.error_message).toBe('Error occurred'); }); + test('test workflow span with error uses serialized error as conclude output', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const agentSpan = makeSpan({ + spanId: 'span-agent', + parentId: 'trace-001', + spanData: { type: 'agent', name: 'MyAgent' }, + error: { message: 'Agent failed', type: 'AgentError', data: { code: 42 } } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(agentSpan); + await processor.onSpanEnd(agentSpan); + await processor.onTraceEnd(trace); + + const concludeCall = mockLogger.conclude.mock.calls[0][0]; + expect(concludeCall.output).toBe( + JSON.stringify({ + message: 'Agent failed', + type: 'AgentError', + data: { code: 42 } + }) + ); + expect(concludeCall.statusCode).toBe(500); + }); + test('test error on non-existent span ignored gracefully', async () => { const mockLogger = createMockLogger(); const processor = new GalileoTracingProcessor(mockLogger as never, false); @@ -1517,3 +1589,114 @@ describe('GalileoCustomSpan integration via onSpanStart/onSpanEnd', () => { expect(mockLogger.addToolSpan).not.toHaveBeenCalled(); }); }); + +describe('Trace-level statusCode propagation (_lastStatusCode)', () => { + test('test concludeAll receives statusCode from errored agent span', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const span = makeSpan({ + spanId: 'agent-err-001', + parentId: 'trace-001', + error: { message: 'Agent crashed' }, + spanData: { type: 'agent', name: 'CrashingAgent' } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + // The concludeAll call is the last conclude call + const concludeCalls = mockLogger.conclude.mock.calls as [ + Record + ][]; + const concludeAll = concludeCalls.find((c) => c[0].concludeAll === true); + expect(concludeAll).toBeDefined(); + expect(concludeAll![0].statusCode).toBe(500); + }); + + test('test concludeAll receives statusCode 200 when no errors', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const span = makeSpan({ + spanId: 'agent-ok-001', + parentId: 'trace-001', + spanData: { type: 'agent', name: 'HappyAgent' } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + const concludeCalls = mockLogger.conclude.mock.calls as [ + Record + ][]; + const concludeAll = concludeCalls.find((c) => c[0].concludeAll === true); + expect(concludeAll).toBeDefined(); + expect(concludeAll![0].statusCode).toBe(200); + }); + + test('test concludeAll uses last workflow statusCode when multiple agents', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const agent1 = makeSpan({ + spanId: 'agent-001', + parentId: 'trace-001', + spanData: { type: 'agent', name: 'FirstAgent' } + }); + const agent2 = makeSpan({ + spanId: 'agent-002', + parentId: 'trace-001', + error: { message: 'Second agent failed' }, + spanData: { type: 'agent', name: 'SecondAgent' } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(agent1); + await processor.onSpanEnd(agent1); + await processor.onSpanStart(agent2); + await processor.onSpanEnd(agent2); + await processor.onTraceEnd(trace); + + // concludeAll should carry the last agent's statusCode (500 from agent2) + const concludeCalls = mockLogger.conclude.mock.calls as [ + Record + ][]; + const concludeAll = concludeCalls.find((c) => c[0].concludeAll === true); + expect(concludeAll).toBeDefined(); + expect(concludeAll![0].statusCode).toBe(500); + }); + + test('test concludeAll has no statusCode when trace has only LLM spans', async () => { + // LLM/tool spans do not update _lastStatusCode — only workflow/agent concludes do. + // When there are no workflow/agent spans, concludeAll statusCode should be undefined. + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const llmSpan = makeSpan({ + spanId: 'llm-001', + parentId: 'trace-001', + spanData: { type: 'generation', model: 'gpt-4o' } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(llmSpan); + await processor.onSpanEnd(llmSpan); + await processor.onTraceEnd(trace); + + const concludeCalls = mockLogger.conclude.mock.calls as [ + Record + ][]; + const concludeAll = concludeCalls.find((c) => c[0].concludeAll === true); + expect(concludeAll).toBeDefined(); + expect(concludeAll![0].statusCode).toBeUndefined(); + }); +}); From 0555828185f74ea8ce3698ba5cc9dc127a808c27 Mon Sep 17 00:00:00 2001 From: richter Date: Thu, 19 Mar 2026 21:20:35 -0300 Subject: [PATCH 19/21] feat(lastOutput): Adjusted rule to recover value for _lastOutput. --- src/handlers/openai-agents/index.ts | 16 ++++++-- .../openai-agents/integration.test.ts | 37 ++++++++++++++++++- 2 files changed, 49 insertions(+), 4 deletions(-) diff --git a/src/handlers/openai-agents/index.ts b/src/handlers/openai-agents/index.ts index e597d37c..cfa2e2a2 100644 --- a/src/handlers/openai-agents/index.ts +++ b/src/handlers/openai-agents/index.ts @@ -336,9 +336,19 @@ export class GalileoTracingProcessor implements TracingProcessor { }; } - // Track last output for trace-level output - if (node.spanParams.output !== undefined) { - this._lastOutput = node.spanParams.output; + if (node.nodeType === 'workflow' || node.nodeType === 'agent') { + let tempOutput: unknown = node.spanParams.output; + if (tempOutput === undefined && node.children.length > 0) { + const lastChildId = node.children[node.children.length - 1]; + const lastChild = this._nodes.get(lastChildId); + if (lastChild?.spanParams.output !== undefined) { + tempOutput = lastChild.spanParams.output; + } + } + if (node.spanParams.error) { + tempOutput = JSON.stringify(node.spanParams.error); + } + this._lastOutput = tempOutput !== undefined ? tempOutput : null; } // Track first input for trace-level input (capture from first meaningful span) diff --git a/tests/handlers/openai-agents/integration.test.ts b/tests/handlers/openai-agents/integration.test.ts index 8e319e9a..a410906a 100644 --- a/tests/handlers/openai-agents/integration.test.ts +++ b/tests/handlers/openai-agents/integration.test.ts @@ -507,7 +507,7 @@ describe('Multi-agent integration flows', () => { }); describe('Output tracking integration', () => { - test('test last output preserved across multiple spans', async () => { + test('test last output only set by workflow/agent spans, not llm spans', async () => { const mockLogger = createMockLogger(); const processor = new GalileoTracingProcessor(mockLogger as never, false); const trace = makeTrace(); @@ -542,6 +542,41 @@ describe('Output tracking integration', () => { await processor.onSpanEnd(llm2); await processor.onTraceEnd(trace); + // _lastOutput is only updated by workflow/agent spans (parity with Python). + // Bare LLM spans do not set _lastOutput, so trace output falls back to undefined. + const startTraceCall = mockLogger.startTrace.mock.calls[0][0]; + expect(startTraceCall.output).toBeUndefined(); + }); + + test('test last output set by workflow span conclude', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const agentSpan = makeSpan({ + spanId: 'agent-001', + parentId: 'trace-001', + spanData: { type: 'agent', name: 'MyAgent' } + }); + const llmSpan = makeSpan({ + spanId: 'llm-001', + parentId: 'agent-001', + spanData: { + type: 'generation', + model: 'gpt-4', + input: [], + output: 'Final output' + } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(agentSpan); + await processor.onSpanStart(llmSpan); + await processor.onSpanEnd(llmSpan); + await processor.onSpanEnd(agentSpan); + await processor.onTraceEnd(trace); + + // _lastOutput is set from the workflow/agent conclude output (last child's output). const startTraceCall = mockLogger.startTrace.mock.calls[0][0]; expect(startTraceCall.output).toBe('Final output'); }); From 02d0965abefd27058f28d7bb48831744beee6a71 Mon Sep 17 00:00:00 2001 From: richter Date: Thu, 19 Mar 2026 21:44:18 -0300 Subject: [PATCH 20/21] chore(organization): Refactored code adding new functions for reusing and small adjustments. --- src/handlers/openai-agents/index.ts | 260 +++++++++++++--------------- 1 file changed, 118 insertions(+), 142 deletions(-) diff --git a/src/handlers/openai-agents/index.ts b/src/handlers/openai-agents/index.ts index cfa2e2a2..818c6c4e 100644 --- a/src/handlers/openai-agents/index.ts +++ b/src/handlers/openai-agents/index.ts @@ -60,6 +60,31 @@ export interface TracingProcessor { forceFlush(): Promise; } +/** + * Returns true when a span input value is non-empty and not a JSON-serialized null. + */ +function isMeaningfulInput(value: unknown): boolean { + if (value === null || value === undefined) return false; + const str = String(value).trim(); + return str.length > 0 && str !== 'null'; +} + +/** + * Merges error fields into span metadata and returns the combined record. + */ +function buildErrorMetadata( + error: { message: string; data?: Record; type?: string }, + existing: Record +): Record { + const errorMessage = error.message || 'Unknown error'; + return { + ...existing, + error_message: errorMessage, + error_type: error.type ?? 'SpanError', + error_details: error.data ? JSON.stringify(error.data) : errorMessage + }; +} + /** * Maps an OpenAI agent type string to a Galileo AgentType enum value. * Returns undefined when no agentType is present so addAgentSpan() can use its default. @@ -126,25 +151,6 @@ export class GalileoTracingProcessor implements TracingProcessor { } } - /** - * Checks if a value is a meaningful, non-empty input string. - * Filters out null, undefined, empty strings, and JSON 'null'. - */ - private isMeaningfulInput(value: unknown): boolean { - if (value === null || value === undefined) { - return false; - } - const str = String(value).trim(); - if (str.length === 0) { - return false; - } - // Filter out JSON-serialized null (from earlier spans) - if (str === 'null') { - return false; - } - return true; - } - /** * Called when a trace starts. Creates a root agent node. * @param trace - The trace that started. @@ -277,78 +283,19 @@ export class GalileoTracingProcessor implements TracingProcessor { : 0; node.spanParams.durationNs = durationNs; - // Merge final data for response spans (embedded tool calls + response object) - const spanData = span.spanData; - if (spanData.type === 'response') { - const finalData = extractLlmData(spanData); - const responseObj = finalData._responseObject as - | Record - | undefined; - // Merge updated data first (output/tools may not have been available at span start) - const { _responseObject: _removed, ...rest } = finalData; - void _removed; - node.spanParams = { ...node.spanParams, ...rest }; - // Append embedded tool calls (model-invoked tools) to tools[] — mirrors Python handler - if (responseObj) { - const embeddedTools = extractEmbeddedToolCalls(responseObj); - if (embeddedTools.length > 0) { - const existingTools = - (node.spanParams.tools as unknown[] | undefined) ?? []; - node.spanParams.tools = [...existingTools, ...embeddedTools]; - } - } - } else if (spanData.type === 'generation') { - // Refresh LLM data at end (usage may be populated now) - const finalData = extractLlmData(spanData); - node.spanParams = { ...node.spanParams, ...finalData }; - } else if (spanData.type === 'handoff') { - // to_agent is set on the span AFTER span.start() fires (inside withHandoffSpan's fn), - // so we must re-extract at span end to capture the populated to_agent value. - // Also re-compute the name so it reflects the final to_agent. - const refreshed = extractWorkflowData(spanData); - const refreshedName = mapSpanName(spanData, 'workflow'); - node.spanParams = { - ...node.spanParams, - ...refreshed, - name: refreshedName - }; - } else if (spanData.__galileoCustom === true) { - // Re-extract at span end so mutations to galileoSpan made inside the callback - // (e.g. setting output after the work is done) are captured in node.spanParams. - const refreshed = extractGalileoCustomData(spanData); - node.spanParams = { ...node.spanParams, ...refreshed.params }; - } + this._refreshSpanData(node, span.spanData); // Handle errors if (span.error) { - const errorMessage = span.error.message || 'Unknown error'; const existingMeta = (node.spanParams.metadata as Record | undefined) ?? {}; node.spanParams.statusCode = 500; node.spanParams.error = span.error; - node.spanParams.metadata = { - ...existingMeta, - error_message: errorMessage, - error_type: span.error.type ?? 'SpanError', - error_details: span.error.data - ? JSON.stringify(span.error.data) - : errorMessage - }; + node.spanParams.metadata = buildErrorMetadata(span.error, existingMeta); } if (node.nodeType === 'workflow' || node.nodeType === 'agent') { - let tempOutput: unknown = node.spanParams.output; - if (tempOutput === undefined && node.children.length > 0) { - const lastChildId = node.children[node.children.length - 1]; - const lastChild = this._nodes.get(lastChildId); - if (lastChild?.spanParams.output !== undefined) { - tempOutput = lastChild.spanParams.output; - } - } - if (node.spanParams.error) { - tempOutput = JSON.stringify(node.spanParams.error); - } - this._lastOutput = tempOutput !== undefined ? tempOutput : null; + this._lastOutput = this._computeWorkflowOutput(node); } // Track first input for trace-level input (capture from first meaningful span) @@ -356,7 +303,7 @@ export class GalileoTracingProcessor implements TracingProcessor { if ( this._firstInput === null && (node.nodeType === 'llm' || node.nodeType === 'tool') && - this.isMeaningfulInput(node.spanParams.input) + isMeaningfulInput(node.spanParams.input) ) { this._firstInput = node.spanParams.input; } @@ -378,6 +325,63 @@ export class GalileoTracingProcessor implements TracingProcessor { await this._galileoLogger.flush(); } + /** + * Re-extracts span data at span-end time to capture fields that are populated + * after span-start (usage counters, response objects, to_agent for handoffs, + * and mutations made inside custom-span callbacks). + */ + private _refreshSpanData(node: Node, spanData: AgentSpan['spanData']): void { + if (spanData.type === 'response') { + const finalData = extractLlmData(spanData); + const responseObj = finalData._responseObject as + | Record + | undefined; + const { _responseObject: _removed, ...rest } = finalData; + void _removed; + node.spanParams = { ...node.spanParams, ...rest }; + if (responseObj) { + const embeddedTools = extractEmbeddedToolCalls(responseObj); + if (embeddedTools.length > 0) { + const existingTools = + (node.spanParams.tools as unknown[] | undefined) ?? []; + node.spanParams.tools = [...existingTools, ...embeddedTools]; + } + } + } else if (spanData.type === 'generation') { + node.spanParams = { ...node.spanParams, ...extractLlmData(spanData) }; + } else if (spanData.type === 'handoff') { + // to_agent is populated inside withHandoffSpan's callback, after onSpanStart fires. + node.spanParams = { + ...node.spanParams, + ...extractWorkflowData(spanData), + name: mapSpanName(spanData, 'workflow') + }; + } else if (spanData.__galileoCustom === true) { + const refreshed = extractGalileoCustomData(spanData); + node.spanParams = { ...node.spanParams, ...refreshed.params }; + } + } + + /** + * Computes the effective output for a workflow or agent node. + * Prefers the node's own output, falls back to the last child's output, + * and overrides with the serialized error when one is present. + */ + private _computeWorkflowOutput(node: Node): unknown { + let result: unknown = node.spanParams.output; + if (result === undefined && node.children.length > 0) { + const lastChildId = node.children[node.children.length - 1]; + const lastChild = this._nodes.get(lastChildId); + if (lastChild?.spanParams.output !== undefined) { + result = lastChild.spanParams.output; + } + } + if (node.spanParams.error) { + result = JSON.stringify(node.spanParams.error); + } + return result !== undefined ? result : null; + } + /** * Finds the root node for the trace and recursively logs the span tree. * @param trace - The trace to commit. @@ -389,11 +393,11 @@ export class GalileoTracingProcessor implements TracingProcessor { } /** - * Recursively emits nodes to GalileoLogger in correct parent→child order. - * @param node - The node to log. - * @param firstNode - Whether this is the root trace node. + * Emits a single node to GalileoLogger (startTrace, addLlmSpan, addToolSpan, or addWorkflowSpan). + * @param node - The node to emit. + * @param firstNode - True when this is the root trace node. */ - private _logNodeTree(node: Node, firstNode = false): void { + private _logNode(node: Node, firstNode: boolean): void { const params = node.spanParams; const name = (params.name as string | undefined) ?? 'Agent Run'; const durationNs = (params.durationNs as number | undefined) ?? 0; @@ -411,7 +415,6 @@ export class GalileoTracingProcessor implements TracingProcessor { : undefined; if (firstNode) { - // Root node → startTrace const traceInput = this._firstInput !== null ? String(this._firstInput) : input; const traceOutput = @@ -425,37 +428,25 @@ export class GalileoTracingProcessor implements TracingProcessor { metadata }); } else if (node.nodeType === 'llm') { - const numInputTokens = - (params.numInputTokens as number | undefined) ?? undefined; - const numOutputTokens = - (params.numOutputTokens as number | undefined) ?? undefined; - const totalTokens = - (params.totalTokens as number | undefined) ?? undefined; - const numReasoningTokens = - (params.numReasoningTokens as number | undefined) ?? undefined; - const numCachedInputTokens = - (params.numCachedInputTokens as number | undefined) ?? undefined; - const temperature = - (params.temperature as number | undefined) ?? undefined; - const model = (params.model as string | undefined) ?? 'unknown'; - const tools = - (params.tools as Record[] | undefined) ?? undefined; - this._galileoLogger.addLlmSpan({ input, output: output ?? '', name, - model, + model: (params.model as string | undefined) ?? 'unknown', durationNs, - numInputTokens, - numOutputTokens, - totalTokens, - numReasoningTokens, - numCachedInputTokens, - temperature, + numInputTokens: + (params.numInputTokens as number | undefined) ?? undefined, + numOutputTokens: + (params.numOutputTokens as number | undefined) ?? undefined, + totalTokens: (params.totalTokens as number | undefined) ?? undefined, + numReasoningTokens: + (params.numReasoningTokens as number | undefined) ?? undefined, + numCachedInputTokens: + (params.numCachedInputTokens as number | undefined) ?? undefined, + temperature: (params.temperature as number | undefined) ?? undefined, statusCode, metadata, - tools: tools as JsonObject[] | undefined, + tools: (params.tools as JsonObject[] | undefined) ?? undefined, createdAt: startedAt }); } else if (node.nodeType === 'tool') { @@ -469,19 +460,8 @@ export class GalileoTracingProcessor implements TracingProcessor { tags, createdAt: startedAt }); - } else if (node.nodeType === 'agent') { - this._galileoLogger.addWorkflowSpan({ - input: input || 'Workflow Step', - output, - name, - durationNs, - metadata, - tags, - createdAt: startedAt, - statusCode - }); } else { - // workflow and other parent nodes + // agent, workflow, and any other parent node types this._galileoLogger.addWorkflowSpan({ input: input || 'Workflow Step', output, @@ -493,8 +473,17 @@ export class GalileoTracingProcessor implements TracingProcessor { statusCode }); } + } + + /** + * Recursively emits nodes to GalileoLogger in parent→child order, + * then concludes workflow/agent spans after all their children are logged. + * @param node - The node to log. + * @param firstNode - True when this is the root trace node. + */ + private _logNodeTree(node: Node, firstNode = false): void { + this._logNode(node, firstNode); - // Recursively log children for (const childId of node.children) { const childNode = this._nodes.get(childId); if (childNode) { @@ -502,29 +491,16 @@ export class GalileoTracingProcessor implements TracingProcessor { } } - // Conclude workflow/agent spans after their children. - // When the span itself has no output (always the case for agent spans, since - // AgentSpanData carries no output field), fall back to the last child's output. if ( !firstNode && (node.nodeType === 'workflow' || node.nodeType === 'agent') ) { - let concludeOutput = output; - if (concludeOutput === undefined && node.children.length > 0) { - const lastChildId = node.children[node.children.length - 1]; - const lastChild = this._nodes.get(lastChildId); - if (lastChild?.spanParams.output !== undefined) { - concludeOutput = String(lastChild.spanParams.output); - } - } - const nodeError = params.error as - | { message: string; data?: Record; type?: string } - | undefined; - if (nodeError) { - concludeOutput = JSON.stringify(nodeError); - } + const params = node.spanParams; + const durationNs = (params.durationNs as number | undefined) ?? 0; + const statusCode = (params.statusCode as number | undefined) ?? 200; + const concludeOutput = this._computeWorkflowOutput(node); this._galileoLogger.conclude({ - output: concludeOutput, + output: concludeOutput !== null ? String(concludeOutput) : undefined, durationNs, statusCode }); From f8111bca0334fb1e36658cfe689c09f7cee5ff24 Mon Sep 17 00:00:00 2001 From: richter Date: Thu, 19 Mar 2026 21:58:38 -0300 Subject: [PATCH 21/21] chore(parseUsage): Refactored repeated use of parseUsage. --- src/handlers/openai-agents/data-extraction.ts | 70 +------------------ src/handlers/openai/usage.ts | 12 +++- .../openai-agents/data-extraction.test.ts | 6 +- 3 files changed, 16 insertions(+), 72 deletions(-) diff --git a/src/handlers/openai-agents/data-extraction.ts b/src/handlers/openai-agents/data-extraction.ts index 352173bd..218121e0 100644 --- a/src/handlers/openai-agents/data-extraction.ts +++ b/src/handlers/openai-agents/data-extraction.ts @@ -1,5 +1,7 @@ import type { GalileoSpanLike } from './custom-span'; import type { NodeType } from './node'; +import { parseUsage } from '../openai/usage'; +export { parseUsage, type ParsedUsage } from '../openai/usage'; const MODEL_PARAM_KEYS = [ 'temperature', @@ -22,74 +24,6 @@ const RESPONSE_EXCLUDE = new Set([ 'status' ]); -/** - * Normalised token count structure returned by parseUsage. - */ -export interface ParsedUsage { - inputTokens: number; - outputTokens: number; - totalTokens: number | null; - reasoningTokens: number; - cachedTokens: number; -} - -/** - * Normalises token counts from various OpenAI usage shapes. - * @param usageData - The raw usage data from a span. - * @returns Normalised token counts. - */ -export function parseUsage( - usageData: Record | null | undefined -): ParsedUsage { - if (!usageData) { - return { - inputTokens: 0, - outputTokens: 0, - totalTokens: null, - reasoningTokens: 0, - cachedTokens: 0 - }; - } - - // Support both input_tokens/output_tokens (Responses/Agents SDK) - // and prompt_tokens/completion_tokens (Chat Completions legacy) - const inputTokens = - (usageData.input_tokens as number | undefined) ?? - (usageData.prompt_tokens as number | undefined) ?? - 0; - const outputTokens = - (usageData.output_tokens as number | undefined) ?? - (usageData.completion_tokens as number | undefined) ?? - 0; - const totalTokens = (usageData.total_tokens as number | undefined) ?? null; - - // Reasoning tokens live in output_tokens_details (Responses API) or details (legacy Agents SDK shape) - const outputDetails = - (usageData.output_tokens_details as Record | undefined) ?? - (usageData.details as Record | undefined) ?? - {}; - // Cached tokens live in input_tokens_details (Responses API) or the same details object - const inputDetails = - (usageData.input_tokens_details as Record | undefined) ?? - outputDetails; - const reasoningTokens = - (outputDetails.reasoning_tokens as number | undefined) ?? - (usageData.reasoning_tokens as number | undefined) ?? - 0; - const cachedTokens = - (inputDetails.cached_tokens as number | undefined) ?? - (usageData.cached_tokens as number | undefined) ?? - 0; - - return { - inputTokens, - outputTokens, - totalTokens, - reasoningTokens, - cachedTokens - }; -} - /** * Serialize a value to a string for LLM span input/output fields. * Strings are returned as-is; null/undefined produce ''; everything else is JSON-serialized. diff --git a/src/handlers/openai/usage.ts b/src/handlers/openai/usage.ts index 4a0f1c01..8e0248e3 100644 --- a/src/handlers/openai/usage.ts +++ b/src/handlers/openai/usage.ts @@ -59,8 +59,14 @@ export function parseUsage(usageData: unknown): ParsedUsage { // Detailed token breakdowns (o1/o3/o4) // Responses API: input_tokens_details / output_tokens_details // Chat Completions: prompt_tokens_details / completion_tokens_details + // Agents SDK legacy: a single `details` object for both input and output + const legacyDetails = + typeof usage.details === 'object' && usage.details !== null + ? (usage.details as Record) + : undefined; + const inputDetails = - usage.input_tokens_details ?? usage.prompt_tokens_details; + usage.input_tokens_details ?? usage.prompt_tokens_details ?? legacyDetails; if (inputDetails != null && typeof inputDetails === 'object') { const details = inputDetails as Record; const cached = details.cached_tokens; @@ -70,7 +76,9 @@ export function parseUsage(usageData: unknown): ParsedUsage { } const outputDetails = - usage.output_tokens_details ?? usage.completion_tokens_details; + usage.output_tokens_details ?? + usage.completion_tokens_details ?? + legacyDetails; if (outputDetails != null && typeof outputDetails === 'object') { const details = outputDetails as Record; const reasoning = details.reasoning_tokens; diff --git a/tests/handlers/openai-agents/data-extraction.test.ts b/tests/handlers/openai-agents/data-extraction.test.ts index 195ef347..3de9a542 100644 --- a/tests/handlers/openai-agents/data-extraction.test.ts +++ b/tests/handlers/openai-agents/data-extraction.test.ts @@ -14,7 +14,8 @@ describe('parseUsage', () => { outputTokens: 0, totalTokens: null, reasoningTokens: 0, - cachedTokens: 0 + cachedTokens: 0, + rejectedPredictionTokens: 0 }); }); @@ -25,7 +26,8 @@ describe('parseUsage', () => { outputTokens: 0, totalTokens: null, reasoningTokens: 0, - cachedTokens: 0 + cachedTokens: 0, + rejectedPredictionTokens: 0 }); });