diff --git a/examples/openai/agents.js b/examples/openai/agents.js new file mode 100644 index 00000000..6e1e4174 --- /dev/null +++ b/examples/openai/agents.js @@ -0,0 +1,75 @@ +import dotenv from 'dotenv'; +import { z } from 'zod'; +import { Agent, run } from '@openai/agents'; +import { + init, + flush, + registerGalileoTraceProcessor +} from '../../dist/index.js'; + +dotenv.config(); + +await init({ + projectName: 'openai-agents-example' +}); + +await registerGalileoTraceProcessor(); + +const triageAgent = new Agent({ + name: 'Triage Agent', + instructions: + 'You determine which agent should handle the user request. ' + + 'If the question is about weather, hand off to the Weather Agent. ' + + 'Otherwise, answer the question yourself.', + handoffs: [] // populated below after declaring weatherAgent +}); + +const weatherAgent = new Agent({ + name: 'Weather Agent', + instructions: + 'You provide weather information. ' + + 'Given a city name, respond with a short, friendly weather summary. ' + + 'Make up plausible weather data for demonstration purposes.', + tools: [ + tool({ + name: 'get_weather', + description: 'Get the current weather for a city', + parameters: z.object({ + city: z.string().describe('The city to get weather for') + }), + execute: async (params) => { + const { city } = params; + const temps = { london: 14, tokyo: 22, 'new york': 18, paris: 16 }; + const temp = + temps[city.toLowerCase()] ?? Math.floor(Math.random() * 30); + return JSON.stringify({ + city, + temperature_c: temp, + condition: temp > 20 ? 'Sunny' : 'Partly cloudy' + }); + } + }) + ] +}); + +triageAgent.handoffs.push(weatherAgent); + +async function main() { + console.log('=== OpenAI Agents SDK + Galileo Tracing ===\n'); + + console.log('--- Simple single-agent run ---'); + const simpleResult = await run(triageAgent, 'What is 2 + 2?'); + console.log('Response:', simpleResult.finalOutput, '\n'); + + console.log('--- Handoff + tool call run ---'); + const weatherResult = await run(triageAgent, "What's the weather in Tokyo?"); + console.log('Response:', weatherResult.finalOutput, '\n'); + + await flush(); + console.log('Done — traces flushed to Galileo.'); +} + +main().catch((err) => { + console.error('Unhandled error:', err); + process.exit(1); +}); diff --git a/examples/package.json b/examples/package.json index bd6ef0ce..5eabced4 100644 --- a/examples/package.json +++ b/examples/package.json @@ -12,8 +12,11 @@ "@langchain/community": "^0.3.18", "@langchain/core": "^0.3.13", "@langchain/openai": "^0.3.11", + "@openai/agents": "^0.7.0", "@rungalileo/galileo": "file:..", "dotenv": "^16.4.5", - "typecript": "^0.0.1-security" + "openai": "^6.26.0", + "typecript": "^0.0.1-security", + "zod": "^4.0.0" } } diff --git a/package-lock.json b/package-lock.json index 88b4a34d..d361912f 100644 --- a/package-lock.json +++ b/package-lock.json @@ -48,6 +48,18 @@ "optionalDependencies": { "@langchain/openai": "^0.3.11", "tiktoken": "^1.0.13" + }, + "peerDependencies": { + "@openai/agents": ">=0.4.0", + "openai": ">=4.0.0" + }, + "peerDependenciesMeta": { + "@openai/agents": { + "optional": true + }, + "openai": { + "optional": true + } } }, "node_modules/@ampproject/remapping": { @@ -9261,15 +9273,6 @@ "url": "https://github.com/sponsors/ljharb" } }, - "node_modules/undici": { - "version": "7.23.0", - "resolved": "https://registry.npmjs.org/undici/-/undici-7.23.0.tgz", - "integrity": "sha512-HVMxHKZKi+eL2mrUZDzDkKW3XvCjynhbtpSq20xQp4ePDFeSFuAfnvM0GIwZIv8fiKHjXFQ5WjxhCt15KRNj+g==", - "optional": true, - "engines": { - "node": ">=20.18.1" - } - }, "node_modules/undici-types": { "version": "6.21.0", "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz", diff --git a/package.json b/package.json index 50b4f61a..eb2281c5 100644 --- a/package.json +++ b/package.json @@ -48,6 +48,18 @@ "@langchain/openai": "^0.3.11", "tiktoken": "^1.0.13" }, + "peerDependencies": { + "@openai/agents": ">=0.4.0", + "openai": ">=4.0.0" + }, + "peerDependenciesMeta": { + "@openai/agents": { + "optional": true + }, + "openai": { + "optional": true + } + }, "devDependencies": { "@hey-api/openapi-ts": "^0.88.0", "@types/jest": "^29.5.14", diff --git a/src/handlers/langchain.ts b/src/handlers/langchain.ts index 3dd22e91..e8ba1e9f 100644 --- a/src/handlers/langchain.ts +++ b/src/handlers/langchain.ts @@ -12,7 +12,7 @@ import { AgentFinish } from '@langchain/core/agents'; import { Document, DocumentInterface } from '@langchain/core/documents'; import { GalileoSingleton } from '../singleton'; import { GalileoLogger } from '../utils/galileo-logger'; -import { toStringValue, convertToStringDict } from '../utils/serialization'; +import { toStringValue, toStringRecord } from '../utils/serialization'; import { getSdkLogger } from 'galileo-generated'; import { Serialized } from '@langchain/core/load/serializable.js'; @@ -151,8 +151,8 @@ export class GalileoCallback let metadata: Record | undefined = undefined; if (node.spanParams.metadata) { try { - metadata = convertToStringDict( - node.spanParams.metadata as Record + metadata = toStringRecord( + node.spanParams.metadata as Record ); } catch (e) { sdkLogger.warn('Unable to convert metadata to a string dictionary', e); @@ -485,7 +485,7 @@ export class GalileoCallback | undefined; // Serialize messages safely - let serializedMessages; + let serializedMessages: unknown; try { const flattenedMessages = messages.flat().map((msg) => ({ content: msg.content, @@ -516,7 +516,7 @@ export class GalileoCallback public async handleLLMEnd(output: LLMResult, runId: string): Promise { const tokenUsage = output.llmOutput?.tokenUsage || {}; - let serializedOutput; + let serializedOutput: unknown; try { const flattenedOutput = output.generations.flat().map((g) => ({ text: g.text, @@ -605,7 +605,7 @@ export class GalileoCallback documents: DocumentInterface>[], runId: string ): Promise { - let serializedResponse; + let serializedResponse: unknown; try { serializedResponse = documents.map((doc) => ({ pageContent: doc.pageContent, diff --git a/src/handlers/openai-agents/custom-span.ts b/src/handlers/openai-agents/custom-span.ts new file mode 100644 index 00000000..41a035bf --- /dev/null +++ b/src/handlers/openai-agents/custom-span.ts @@ -0,0 +1,74 @@ +/* eslint-disable @typescript-eslint/no-explicit-any */ + +/** + * Duck-typed interface describing the expected shape of a Galileo span object + * that can be injected into the OpenAI Agents tracing flow. + * + * Mirrors the fields extracted by galileo-python's GalileoCustomSpan handler: + * input, output, metadata (user_metadata), tags, status_code, and type. + */ +export interface GalileoSpanLike { + type?: string; + input?: unknown; + output?: unknown; + name?: string; + metadata?: Record; + tags?: string[]; + statusCode?: number; +} + +/** + * A lightweight subtype of CustomSpanData that carries a reference to a + * pre-configured GalileoSpan so it can be injected into the agent tracing flow. + * + * The __galileoCustom flag is used by mapSpanType() to distinguish this from + * ordinary CustomSpanData objects. + */ +export interface GalileoCustomSpanData { + /** Always 'custom' to satisfy the SDK's SpanData union discriminant. */ + type: 'custom'; + /** (Optional) Display name for the span. */ + name?: string; + /** Arbitrary data payload, must contain a 'galileoSpan' key with the GalileoSpan reference. */ + data: Record & { galileoSpan: GalileoSpanLike }; + /** Sentinel flag used internally by mapSpanType() to identify this type. */ + __galileoCustom: true; +} + +/** + * Creates a GalileoCustomSpanData object that wraps an existing Galileo span. + * @param galileoSpan - The Galileo span object to embed. + * @param name - (Optional) Display name for the span. + * @param extraData - (Optional) Additional data to include in the span data payload. + * @returns A GalileoCustomSpanData object. + */ +export function createGalileoCustomSpanData( + galileoSpan: GalileoSpanLike, + name?: string, + extraData?: Record +): GalileoCustomSpanData { + return { + type: 'custom', + name, + data: { + ...extraData, + galileoSpan + }, + __galileoCustom: true + }; +} + +/** + * Type guard that checks whether a span data object is a GalileoCustomSpanData. + * @param spanData - The span data to check. + * @returns True if the span data is a GalileoCustomSpanData. + */ +export function isGalileoCustomSpanData( + spanData: unknown +): spanData is GalileoCustomSpanData { + return ( + typeof spanData === 'object' && + spanData !== null && + (spanData as any).__galileoCustom === true + ); +} diff --git a/src/handlers/openai-agents/data-extraction.ts b/src/handlers/openai-agents/data-extraction.ts new file mode 100644 index 00000000..218121e0 --- /dev/null +++ b/src/handlers/openai-agents/data-extraction.ts @@ -0,0 +1,326 @@ +import type { GalileoSpanLike } from './custom-span'; +import type { NodeType } from './node'; +import { parseUsage } from '../openai/usage'; +export { parseUsage, type ParsedUsage } from '../openai/usage'; + +const MODEL_PARAM_KEYS = [ + 'temperature', + 'max_output_tokens', + 'top_p', + 'tool_choice', + 'parallel_tool_calls', + 'truncation', + 'seed', + 'frequency_penalty', + 'presence_penalty' +]; + +const RESPONSE_EXCLUDE = new Set([ + 'input', + 'output', + 'usage', + 'tools', + 'error', + 'status' +]); + +/** + * Serialize a value to a string for LLM span input/output fields. + * Strings are returned as-is; null/undefined produce ''; everything else is JSON-serialized. + */ +function llmSerializeToString(value: unknown): string { + if (value === undefined || value === null) return ''; + if (typeof value === 'string') return value; + return JSON.stringify(value); +} + +/** + * Extracts LLM-relevant fields from a GenerationSpanData or ResponseSpanData. + * @param spanData - The span data object (must have type 'generation' or 'response'). + * @returns A flat record of LLM span parameters. + */ +export function extractLlmData( + spanData: Record +): Record { + if (spanData.type === 'generation') { + const usageRaw = + (spanData.usage as Record | undefined) ?? {}; + const usage = parseUsage(usageRaw); + const modelConfig = + (spanData.model_config as Record | undefined) ?? {}; + const inputDetails = + (usageRaw.input_tokens_details as Record | undefined) ?? + null; + const outputDetails = + (usageRaw.output_tokens_details as Record | undefined) ?? + null; + + return { + input: llmSerializeToString(spanData.input), + output: llmSerializeToString(spanData.output), + model: (spanData.model as string | undefined) ?? 'unknown', + temperature: (modelConfig.temperature as number | undefined) ?? undefined, + modelParameters: modelConfig, + numInputTokens: usage.inputTokens, + numOutputTokens: usage.outputTokens, + totalTokens: usage.totalTokens ?? undefined, + numReasoningTokens: usage.reasoningTokens, + numCachedInputTokens: usage.cachedTokens, + metadata: { + gen_ai_system: 'openai', + model_config: modelConfig, + ...(inputDetails ? { input_tokens_details: inputDetails } : {}), + ...(outputDetails ? { output_tokens_details: outputDetails } : {}) + } + }; + } + + if (spanData.type === 'response') { + // ResponseSpanData uses underscore-prefixed fields in TypeScript SDK + const input = spanData._input ?? spanData.input; + const response = (spanData._response ?? spanData.response) as + | Record + | undefined; + + const model = + (response?.model as string | undefined) ?? + (spanData.model as string | undefined) ?? + 'unknown'; + const usage = parseUsage( + (response?.usage as Record | undefined) ?? null + ); + const temperature = + (response?.temperature as number | undefined) ?? undefined; + const tools = response?.tools; + + const modelParameters: Record = response + ? Object.fromEntries( + MODEL_PARAM_KEYS.filter((k) => response[k] !== undefined).map((k) => [ + k, + response[k] + ]) + ) + : {}; + + const responseMetadata: Record = response + ? Object.fromEntries( + Object.entries(response).filter(([k]) => !RESPONSE_EXCLUDE.has(k)) + ) + : {}; + + const responseError = response?.error as + | { status_code?: number; message?: string; [k: string]: unknown } + | undefined + | null; + const responseStatusCode = responseError + ? ((responseError.status_code as number | undefined) ?? 500) + : undefined; + + return { + input: llmSerializeToString(input), + output: llmSerializeToString(response?.output), + model, + temperature, + tools: tools !== undefined ? tools : undefined, + modelParameters, + numInputTokens: usage.inputTokens, + numOutputTokens: usage.outputTokens, + totalTokens: usage.totalTokens ?? undefined, + numReasoningTokens: usage.reasoningTokens, + numCachedInputTokens: usage.cachedTokens, + ...(responseStatusCode !== undefined + ? { statusCode: responseStatusCode } + : {}), + metadata: { + gen_ai_system: 'openai', + ...(Object.keys(responseMetadata).length > 0 + ? { response_metadata: responseMetadata } + : {}), + ...(response?.instructions !== undefined + ? { instructions: response.instructions } + : {}), + ...(responseError ? { error_details: responseError } : {}) + }, + _responseObject: response + }; + } + + return {}; +} + +/** + * Extracts tool-relevant fields from a FunctionSpanData or GuardrailSpanData. + * @param spanData - The span data object (must have type 'function' or 'guardrail'). + * @returns A flat record of tool span parameters. + */ +export function extractToolData( + spanData: Record +): Record { + if (spanData.type === 'function') { + return { + input: + spanData.input !== undefined + ? typeof spanData.input === 'string' + ? spanData.input + : JSON.stringify(spanData.input) + : '', + output: + spanData.output !== undefined + ? typeof spanData.output === 'string' + ? spanData.output + : JSON.stringify(spanData.output) + : undefined, + metadata: + (spanData.mcp_data as Record | undefined) !== undefined + ? { mcp_data: JSON.stringify(spanData.mcp_data) } + : {} + }; + } + + if (spanData.type === 'guardrail') { + const triggered = Boolean(spanData.triggered); + return { + input: '', + output: JSON.stringify({ triggered }), + metadata: { + triggered, + ...(triggered ? { status: 'warning' } : {}) + } + }; + } + + // Transcription / Speech / speech_group / mcp_tools — map to tool but no deep extraction + return { + input: '', + output: undefined, + metadata: {} + }; +} + +/** + * Extracts workflow-relevant fields from an AgentSpanData, HandoffSpanData, or CustomSpanData. + * @param spanData - The span data object (must have type 'agent', 'handoff', or 'custom'). + * @returns A flat record of workflow span parameters. + */ +export function extractWorkflowData( + spanData: Record +): Record { + if (spanData.type === 'agent') { + const tools = spanData.tools; + const handoffs = spanData.handoffs; + const outputType = spanData.output_type; + const agentType = + typeof spanData.agentType === 'string' ? spanData.agentType : undefined; + return { + input: '', + output: undefined, + ...(agentType !== undefined ? { agentType } : {}), + metadata: { + ...(tools !== undefined ? { tools } : {}), + ...(handoffs !== undefined ? { handoffs } : {}), + ...(outputType !== undefined ? { output_type: outputType } : {}) + } + }; + } + + if (spanData.type === 'handoff') { + const from = String((spanData.from_agent as string | undefined) ?? ''); + const to = String((spanData.to_agent as string | undefined) ?? ''); + return { + input: from ? JSON.stringify({ from_agent: from }) : '', + output: to ? JSON.stringify({ to_agent: to }) : undefined, + metadata: { + from_agent: from, + to_agent: to + } + }; + } + + if (spanData.type === 'custom') { + const data = (spanData.data as Record | undefined) ?? {}; + const input = + data.input !== undefined + ? typeof data.input === 'string' + ? data.input + : JSON.stringify(data.input) + : ''; + const output = + data.output !== undefined + ? typeof data.output === 'string' + ? data.output + : JSON.stringify(data.output) + : undefined; + + // Everything except input/output goes to metadata; values are kept as-is + const metaEntries = Object.entries(data) + .filter( + ([k, v]) => + k !== 'input' && k !== 'output' && v !== null && v !== undefined + ) + .reduce>((acc, [k, v]) => { + acc[k] = v; + return acc; + }, {}); + + return { input, output, metadata: metaEntries }; + } + + return { input: '', output: undefined, metadata: {} }; +} + +const VALID_GALILEO_NODE_TYPES: readonly string[] = [ + 'tool', + 'workflow', + 'agent' +]; + +/** + * Extracts span parameters from a GalileoCustomSpanData, delegating to the + * inner galileoSpan for input, output, metadata, tags, statusCode, and type. + * + * @param spanData - The span data object (must have __galileoCustom: true). + * @returns The effective node type and extracted parameters. + */ +export function extractGalileoCustomData(spanData: Record): { + nodeType: NodeType; + params: Record; +} { + const galileoSpan = spanData._galileoSpan as GalileoSpanLike | undefined; + + if (!galileoSpan || typeof galileoSpan !== 'object') { + return { nodeType: 'workflow', params: extractWorkflowData(spanData) }; + } + + const input = + galileoSpan.input !== undefined + ? typeof galileoSpan.input === 'string' + ? galileoSpan.input + : JSON.stringify(galileoSpan.input) + : ''; + const output = + galileoSpan.output !== undefined + ? typeof galileoSpan.output === 'string' + ? galileoSpan.output + : JSON.stringify(galileoSpan.output) + : undefined; + const metadata = galileoSpan.metadata ?? {}; + const tags = galileoSpan.tags; + const statusCode = galileoSpan.statusCode; + + const nodeType: NodeType = + typeof galileoSpan.type === 'string' && + VALID_GALILEO_NODE_TYPES.includes(galileoSpan.type) + ? (galileoSpan.type as NodeType) + : 'workflow'; + + return { + nodeType, + params: { + input, + output, + metadata, + ...(tags !== undefined ? { tags } : {}), + ...(statusCode !== undefined ? { statusCode } : {}) + } + }; +} diff --git a/src/handlers/openai-agents/embedded-tools.ts b/src/handlers/openai-agents/embedded-tools.ts new file mode 100644 index 00000000..3163750f --- /dev/null +++ b/src/handlers/openai-agents/embedded-tools.ts @@ -0,0 +1,172 @@ +/* eslint-disable @typescript-eslint/no-explicit-any */ + +/** + * A single embedded tool call record extracted from a ResponseSpanData output array. + */ +export interface EmbeddedToolCall { + type: string; + function: { name: string }; + tool_call_id: string | null; + tool_call_type: string; + tool_call_input: string | null; + tool_call_output: string | null; + tool_call_status: string | null; +} + +const EMBEDDED_TOOL_TYPES = new Set([ + 'code_interpreter_call', + 'file_search_call', + 'web_search_call', + 'computer_call', + 'custom_tool_call' +]); + +/** + * Maps an OpenAI embedded tool call type to a display name. + * @param type - The tool call type string. + * @returns A human-readable tool name. + */ +export function getToolNameFromType(type: string): string { + switch (type) { + case 'code_interpreter_call': + return 'code_interpreter'; + case 'file_search_call': + return 'file_search'; + case 'web_search_call': + return 'web_search'; + case 'computer_call': + return 'computer'; + case 'custom_tool_call': + return 'custom_tool'; + default: + return type; + } +} + +/** + * Extracts the input field from an embedded tool call item. + * @param item - The raw output item from the response. + * @param type - The tool call type string. + * @returns The extracted input as a string, or null if none. + */ +export function extractToolInput( + item: Record, + type: string +): string | null { + switch (type) { + case 'code_interpreter_call': { + const code = item.code; + return code !== undefined ? String(code) : null; + } + case 'file_search_call': { + const queries = item.queries; + if (queries === undefined) return null; + return Array.isArray(queries) ? JSON.stringify(queries) : String(queries); + } + case 'web_search_call': { + const action = item.action as Record | undefined; + const query = action?.query; + return query !== undefined ? String(query) : null; + } + case 'computer_call': { + const action = item.action; + return action !== undefined ? JSON.stringify(action) : null; + } + case 'custom_tool_call': { + const input = item.input; + if (input === undefined) return null; + return typeof input === 'string' ? input : JSON.stringify(input); + } + default: + return null; + } +} + +/** + * Extracts the output field from an embedded tool call item. + * @param item - The raw output item from the response. + * @param type - The tool call type string. + * @returns The extracted output as a string, or null if none. + */ +export function extractToolOutput( + item: Record, + type: string +): string | null { + switch (type) { + case 'code_interpreter_call': { + // Concatenate all output logs and urls + const outputs = item.outputs as + | Array> + | undefined; + if (!Array.isArray(outputs) || outputs.length === 0) return null; + const parts = outputs + .map((o) => { + if (o.logs !== undefined) return String(o.logs); + if (o.url !== undefined) return String(o.url); + return null; + }) + .filter((p): p is string => p !== null); + return parts.length > 0 ? parts.join('\n') : null; + } + case 'file_search_call': { + const results = item.results; + if (results === undefined) return null; + return Array.isArray(results) ? JSON.stringify(results) : String(results); + } + case 'web_search_call': { + const action = item.action; + return action !== undefined ? JSON.stringify(action) : null; + } + case 'computer_call': + return null; + case 'custom_tool_call': { + const output = item.output; + if (output === undefined) return null; + return typeof output === 'string' ? output : JSON.stringify(output); + } + default: + return null; + } +} + +/** + * Walks the _response.output array and returns all embedded tool call records. + * @param response - The response object from a ResponseSpanData span. + * @returns An array of EmbeddedToolCall records. + */ +export function extractEmbeddedToolCalls( + response: Record | null | undefined +): EmbeddedToolCall[] { + if (!response) return []; + + const output = response.output; + if (!Array.isArray(output)) return []; + + const results: EmbeddedToolCall[] = []; + + for (const item of output) { + if (typeof item !== 'object' || item === null) continue; + const typedItem = item as Record; + const itemType = typedItem.type as string | undefined; + if (!itemType || !EMBEDDED_TOOL_TYPES.has(itemType)) continue; + + const toolName = getToolNameFromType(itemType); + const toolCallId = + (typedItem.id as string | undefined) ?? + (typedItem.call_id as string | undefined) ?? + null; + const status = (typedItem.status as string | undefined) ?? 'completed'; + + results.push({ + type: 'function', + function: { name: toolName }, + tool_call_id: toolCallId, + tool_call_type: itemType, + tool_call_input: extractToolInput(typedItem, itemType), + tool_call_output: extractToolOutput(typedItem, itemType), + tool_call_status: status + }); + } + + return results; +} diff --git a/src/handlers/openai-agents/index.ts b/src/handlers/openai-agents/index.ts new file mode 100644 index 00000000..818c6c4e --- /dev/null +++ b/src/handlers/openai-agents/index.ts @@ -0,0 +1,608 @@ +/* eslint-disable @typescript-eslint/no-explicit-any */ +import { GalileoLogger } from '../../utils/galileo-logger'; +import { GalileoSingleton } from '../../singleton'; +import { calculateDurationNs } from '../../utils/utils'; +import { toStringRecord } from '../../utils/serialization'; +import type { JsonObject } from '../../types/base.types'; +import { AgentType } from '../../types/new-api.types'; +import { type Node, createNode } from './node'; +import { mapSpanType, mapSpanName, GALILEO_CUSTOM_TYPE } from './span-mapping'; +import { + extractLlmData, + extractToolData, + extractWorkflowData, + extractGalileoCustomData +} from './data-extraction'; +import { extractEmbeddedToolCalls } from './embedded-tools'; +import type { GalileoSpanLike } from './custom-span'; +import { getSdkLogger } from 'galileo-generated'; +const sdkLogger = getSdkLogger(); + +/** + * Minimal interface for an OpenAI Agents SDK Trace object. + */ +export interface AgentTrace { + traceId: string; + name?: string; + metadata?: Record; + startedAt?: string | null; + endedAt?: string | null; +} + +/** + * Minimal interface for an OpenAI Agents SDK Span object. + */ +export interface AgentSpan< + T extends Record = Record +> { + spanId: string; + traceId: string; + parentId?: string | null; + startedAt?: string | null; + endedAt?: string | null; + error?: { + message: string; + data?: Record; + type?: string; + } | null; + spanData: T & { type: string }; +} + +/** + * Minimal TracingProcessor interface from @openai/agents-core. + */ +export interface TracingProcessor { + onTraceStart(trace: AgentTrace): Promise; + onTraceEnd(trace: AgentTrace): Promise; + onSpanStart(span: AgentSpan): Promise; + onSpanEnd(span: AgentSpan): Promise; + shutdown(timeout?: number): Promise; + forceFlush(): Promise; +} + +/** + * Returns true when a span input value is non-empty and not a JSON-serialized null. + */ +function isMeaningfulInput(value: unknown): boolean { + if (value === null || value === undefined) return false; + const str = String(value).trim(); + return str.length > 0 && str !== 'null'; +} + +/** + * Merges error fields into span metadata and returns the combined record. + */ +function buildErrorMetadata( + error: { message: string; data?: Record; type?: string }, + existing: Record +): Record { + const errorMessage = error.message || 'Unknown error'; + return { + ...existing, + error_message: errorMessage, + error_type: error.type ?? 'SpanError', + error_details: error.data ? JSON.stringify(error.data) : errorMessage + }; +} + +/** + * Maps an OpenAI agent type string to a Galileo AgentType enum value. + * Returns undefined when no agentType is present so addAgentSpan() can use its default. + * + * Currently not being used because of parity with galileo-python (which used workflow instead) + * Ts and Py have to be updated simultaneously. + */ +// eslint-disable-next-line @typescript-eslint/no-unused-vars +function extractAgentType( + spanParams: Record +): AgentType | undefined { + const raw = spanParams.agentType; + if (typeof raw !== 'string' || !raw) { + return undefined; + } + + const typeMap: Record = { + classifier: AgentType.CLASSIFIER, + planner: AgentType.PLANNER, + react: AgentType.REACT, + reflection: AgentType.REFLECTION, + router: AgentType.ROUTER, + supervisor: AgentType.SUPERVISOR, + judge: AgentType.JUDGE, + default: AgentType.DEFAULT + }; + + return typeMap[raw.toLowerCase()] ?? AgentType.DEFAULT; +} + +/** + * GalileoTracingProcessor implements the OpenAI Agents SDK TracingProcessor interface + * to capture agent runs and emit them to GalileoLogger. + * + * Trace Input Handling: + * - Trace-level input is populated from the first LLM or Tool span with non-empty input + * - This ensures user queries are preserved in trace metadata + * - Falls back to trace name if no meaningful input is captured + */ +export class GalileoTracingProcessor implements TracingProcessor { + private _nodes = new Map(); + private _lastOutput: unknown = null; + private _lastStatusCode: number | null = null; + private _firstInput: unknown = null; + private static _depCheckDone = false; + + /** + * Creates a new GalileoTracingProcessor. + * @param _galileoLogger - (Optional) The GalileoLogger instance to use. Defaults to singleton logger. + * @param _flushOnTraceEnd - (Optional) Whether to flush the logger after each trace ends. Defaults to true. + */ + constructor( + private readonly _galileoLogger: GalileoLogger = GalileoSingleton.getInstance().getClient(), + private readonly _flushOnTraceEnd: boolean = true + ) { + // Lazily check for @openai/agents-core package only when processor is instantiated + if (!GalileoTracingProcessor._depCheckDone) { + GalileoTracingProcessor._depCheckDone = true; + import('@openai/agents-core' as string).catch(() => { + sdkLogger.warn( + '@openai/agents package is not installed. GalileoTracingProcessor will not function.' + ); + }); + } + } + + /** + * Called when a trace starts. Creates a root agent node. + * @param trace - The trace that started. + */ + async onTraceStart(trace: AgentTrace): Promise { + const spanParams: Record = { + name: trace.name || 'Agent Run', + startedAt: trace.startedAt || new Date().toISOString() + }; + + if (trace.metadata) { + spanParams.metadata = toStringRecord(trace.metadata); + } + + const node = createNode({ + nodeType: 'agent', + spanParams, + runId: trace.traceId, + parentRunId: null + }); + + this._nodes.set(trace.traceId, node); + } + + /** + * Called when a trace ends. Commits the span tree and optionally flushes the logger. + * @param trace - The trace that ended. + */ + async onTraceEnd(trace: AgentTrace): Promise { + const rootNode = this._nodes.get(trace.traceId); + if (rootNode) { + const startedAt = rootNode.spanParams.startedAt as string | undefined; + const endedAt = trace.endedAt || new Date().toISOString(); + const durationNs = + startedAt && endedAt + ? calculateDurationNs(new Date(startedAt), new Date(endedAt)) + : 0; + rootNode.spanParams.durationNs = durationNs; + rootNode.spanParams.endedAt = endedAt; + } + + this._commitTrace(trace); + this._galileoLogger.conclude({ + concludeAll: true, + statusCode: this._lastStatusCode ?? undefined + }); + + if (this._flushOnTraceEnd) { + await this._galileoLogger.flush(); + } + + this._nodes.clear(); + this._lastOutput = null; + this._lastStatusCode = null; + this._firstInput = null; + } + + /** + * Called when a span starts. Maps span type, creates a Node, and links it to its parent. + * @param span - The span that started. + */ + async onSpanStart(span: AgentSpan): Promise { + const spanData = span.spanData; + const spanType = mapSpanType(spanData); + const spanName = mapSpanName(spanData, spanType); + + let initialParams: Record = { + name: spanName, + startedAt: span.startedAt || new Date().toISOString() + }; + + // Determine effective node type and extract data. + // galileo_custom delegates to the inner galileoSpan for type + fields. + let nodeType: Node['nodeType']; + + if (spanType === GALILEO_CUSTOM_TYPE) { + const custom = extractGalileoCustomData(spanData); + nodeType = custom.nodeType; + initialParams = { ...initialParams, ...custom.params }; + } else if (spanType === 'llm') { + nodeType = 'llm'; + initialParams = { ...initialParams, ...extractLlmData(spanData) }; + } else if (spanType === 'tool') { + nodeType = 'tool'; + initialParams = { ...initialParams, ...extractToolData(spanData) }; + } else if (spanType === 'agent') { + nodeType = 'agent'; + initialParams = { ...initialParams, ...extractWorkflowData(spanData) }; + } else { + nodeType = 'workflow'; + initialParams = { ...initialParams, ...extractWorkflowData(spanData) }; + } + + // Determine parent ID (prefer explicit parentId, fallback to traceId) + const parentId = span.parentId ?? span.traceId; + + // Validate that parent node exists before creating and linking this node + const parentNode = this._nodes.get(parentId); + if (!parentNode) { + sdkLogger.warn( + `Parent node ${parentId} not found for span ${span.spanId} in trace ${span.traceId}` + ); + return; + } + + const node = createNode({ + nodeType, + spanParams: initialParams, + runId: span.spanId, + parentRunId: parentId + }); + + this._nodes.set(span.spanId, node); + parentNode.children.push(span.spanId); + } + + /** + * Called when a span ends. Finalises duration, merges data, and handles errors. + * @param span - The span that ended. + */ + async onSpanEnd(span: AgentSpan): Promise { + const node = this._nodes.get(span.spanId); + if (!node) return; + + const startedAt = node.spanParams.startedAt as string | undefined; + const endedAt = span.endedAt || new Date().toISOString(); + const durationNs = + startedAt && endedAt + ? calculateDurationNs(new Date(startedAt), new Date(endedAt)) + : 0; + node.spanParams.durationNs = durationNs; + + this._refreshSpanData(node, span.spanData); + + // Handle errors + if (span.error) { + const existingMeta = + (node.spanParams.metadata as Record | undefined) ?? {}; + node.spanParams.statusCode = 500; + node.spanParams.error = span.error; + node.spanParams.metadata = buildErrorMetadata(span.error, existingMeta); + } + + if (node.nodeType === 'workflow' || node.nodeType === 'agent') { + this._lastOutput = this._computeWorkflowOutput(node); + } + + // Track first input for trace-level input (capture from first meaningful span) + // Only capture from LLM or Tool spans (not workflow/agent), and only if we haven't captured yet + if ( + this._firstInput === null && + (node.nodeType === 'llm' || node.nodeType === 'tool') && + isMeaningfulInput(node.spanParams.input) + ) { + this._firstInput = node.spanParams.input; + } + } + + /** + * Shuts down the processor, flushing any pending data. + * @param _timeout - (Optional) Shutdown timeout in milliseconds. + */ + async shutdown(timeout?: number): Promise { + void timeout; + await this._galileoLogger.flush(); + } + + /** + * Forces a flush of any pending data. + */ + async forceFlush(): Promise { + await this._galileoLogger.flush(); + } + + /** + * Re-extracts span data at span-end time to capture fields that are populated + * after span-start (usage counters, response objects, to_agent for handoffs, + * and mutations made inside custom-span callbacks). + */ + private _refreshSpanData(node: Node, spanData: AgentSpan['spanData']): void { + if (spanData.type === 'response') { + const finalData = extractLlmData(spanData); + const responseObj = finalData._responseObject as + | Record + | undefined; + const { _responseObject: _removed, ...rest } = finalData; + void _removed; + node.spanParams = { ...node.spanParams, ...rest }; + if (responseObj) { + const embeddedTools = extractEmbeddedToolCalls(responseObj); + if (embeddedTools.length > 0) { + const existingTools = + (node.spanParams.tools as unknown[] | undefined) ?? []; + node.spanParams.tools = [...existingTools, ...embeddedTools]; + } + } + } else if (spanData.type === 'generation') { + node.spanParams = { ...node.spanParams, ...extractLlmData(spanData) }; + } else if (spanData.type === 'handoff') { + // to_agent is populated inside withHandoffSpan's callback, after onSpanStart fires. + node.spanParams = { + ...node.spanParams, + ...extractWorkflowData(spanData), + name: mapSpanName(spanData, 'workflow') + }; + } else if (spanData.__galileoCustom === true) { + const refreshed = extractGalileoCustomData(spanData); + node.spanParams = { ...node.spanParams, ...refreshed.params }; + } + } + + /** + * Computes the effective output for a workflow or agent node. + * Prefers the node's own output, falls back to the last child's output, + * and overrides with the serialized error when one is present. + */ + private _computeWorkflowOutput(node: Node): unknown { + let result: unknown = node.spanParams.output; + if (result === undefined && node.children.length > 0) { + const lastChildId = node.children[node.children.length - 1]; + const lastChild = this._nodes.get(lastChildId); + if (lastChild?.spanParams.output !== undefined) { + result = lastChild.spanParams.output; + } + } + if (node.spanParams.error) { + result = JSON.stringify(node.spanParams.error); + } + return result !== undefined ? result : null; + } + + /** + * Finds the root node for the trace and recursively logs the span tree. + * @param trace - The trace to commit. + */ + private _commitTrace(trace: AgentTrace): void { + const rootNode = this._nodes.get(trace.traceId); + if (!rootNode) return; + this._logNodeTree(rootNode, true); + } + + /** + * Emits a single node to GalileoLogger (startTrace, addLlmSpan, addToolSpan, or addWorkflowSpan). + * @param node - The node to emit. + * @param firstNode - True when this is the root trace node. + */ + private _logNode(node: Node, firstNode: boolean): void { + const params = node.spanParams; + const name = (params.name as string | undefined) ?? 'Agent Run'; + const durationNs = (params.durationNs as number | undefined) ?? 0; + const metadata = toStringRecord( + (params.metadata as Record | undefined) ?? {} + ); + const tags = (params.tags as string[] | undefined) ?? undefined; + const statusCode = (params.statusCode as number | undefined) ?? 200; + const input = params.input !== undefined ? String(params.input) : ''; + const output = + params.output !== undefined ? String(params.output) : undefined; + const startedAt = + params.startedAt !== undefined + ? new Date(params.startedAt as string) + : undefined; + + if (firstNode) { + const traceInput = + this._firstInput !== null ? String(this._firstInput) : input; + const traceOutput = + this._lastOutput !== null ? String(this._lastOutput) : output; + this._galileoLogger.startTrace({ + input: traceInput || name, + output: traceOutput, + name, + createdAt: startedAt, + durationNs, + metadata + }); + } else if (node.nodeType === 'llm') { + this._galileoLogger.addLlmSpan({ + input, + output: output ?? '', + name, + model: (params.model as string | undefined) ?? 'unknown', + durationNs, + numInputTokens: + (params.numInputTokens as number | undefined) ?? undefined, + numOutputTokens: + (params.numOutputTokens as number | undefined) ?? undefined, + totalTokens: (params.totalTokens as number | undefined) ?? undefined, + numReasoningTokens: + (params.numReasoningTokens as number | undefined) ?? undefined, + numCachedInputTokens: + (params.numCachedInputTokens as number | undefined) ?? undefined, + temperature: (params.temperature as number | undefined) ?? undefined, + statusCode, + metadata, + tools: (params.tools as JsonObject[] | undefined) ?? undefined, + createdAt: startedAt + }); + } else if (node.nodeType === 'tool') { + this._galileoLogger.addToolSpan({ + input, + output, + name, + durationNs, + statusCode, + metadata, + tags, + createdAt: startedAt + }); + } else { + // agent, workflow, and any other parent node types + this._galileoLogger.addWorkflowSpan({ + input: input || 'Workflow Step', + output, + name, + durationNs, + metadata, + tags, + createdAt: startedAt, + statusCode + }); + } + } + + /** + * Recursively emits nodes to GalileoLogger in parent→child order, + * then concludes workflow/agent spans after all their children are logged. + * @param node - The node to log. + * @param firstNode - True when this is the root trace node. + */ + private _logNodeTree(node: Node, firstNode = false): void { + this._logNode(node, firstNode); + + for (const childId of node.children) { + const childNode = this._nodes.get(childId); + if (childNode) { + this._logNodeTree(childNode, false); + } + } + + if ( + !firstNode && + (node.nodeType === 'workflow' || node.nodeType === 'agent') + ) { + const params = node.spanParams; + const durationNs = (params.durationNs as number | undefined) ?? 0; + const statusCode = (params.statusCode as number | undefined) ?? 200; + const concludeOutput = this._computeWorkflowOutput(node); + this._galileoLogger.conclude({ + output: concludeOutput !== null ? String(concludeOutput) : undefined, + durationNs, + statusCode + }); + this._lastStatusCode = statusCode; + } + } + + /** + * Runs a callback under a custom Galileo span that is registered with the OpenAI Agents SDK + * trace provider and properly nested under the currently active span. + * + * The callback is the scope of the span's lifetime — it starts when the callback starts and + * ends when it returns or throws. Any SDK spans created inside the callback are automatically + * nested as children of this custom span. + * + * @param galileoSpan - Galileo span metadata (type, input, output, metadata, tags, statusCode). + * Mutable — update galileoSpan.output inside the callback to capture results. + * @param callback - The work to run under this span. Return value is passed through. + * @param options.name - Display name in Galileo. Overrides galileoSpan.name. + * @param options.extraData - Additional data to attach to the span payload. + * @returns A promise that resolves to the callback's return value. + */ + static async addGalileoCustomSpan( + galileoSpan: GalileoSpanLike, + callback: () => T | Promise, + options?: { name?: string; extraData?: Record } + ): Promise { + const spanName = options?.name ?? galileoSpan.name ?? 'Galileo Custom'; + const spanOptions = { + data: { + name: spanName, + _galileoSpan: galileoSpan, + __galileoCustom: true, + ...(options?.extraData ?? {}) + } + }; + + try { + const { withCustomSpan } = (await import( + '@openai/agents-core' as string + )) as { + withCustomSpan: ( + fn: (span: unknown) => Promise, + options: Record + ) => Promise; + }; + return await withCustomSpan( + async (span) => { + void span; + return Promise.resolve(callback()); + }, + spanOptions as Record + ); + } catch { + sdkLogger.warn( + '@openai/agents package is not installed. addGalileoCustomSpan will execute callback without tracing.' + ); + return await Promise.resolve(callback()); + } + } +} + +/** + * Registers a new GalileoTracingProcessor with the OpenAI Agents SDK. + * Requires @openai/agents-core to be installed. + * @param galileoLogger - (Optional) The GalileoLogger instance to use. + * @param flushOnTraceEnd - (Optional) Whether to flush after each trace ends. + * @returns The created GalileoTracingProcessor instance. + */ +export async function registerGalileoTraceProcessor(options?: { + galileoLogger?: GalileoLogger; + flushOnTraceEnd?: boolean; +}): Promise { + const processor = new GalileoTracingProcessor( + options?.galileoLogger, + options?.flushOnTraceEnd + ); + + const { addTraceProcessor } = (await import( + '@openai/agents-core' as string + )) as { + addTraceProcessor: (processor: TracingProcessor) => void; + }; + addTraceProcessor(processor); + + return processor; +} + +export { createGalileoCustomSpanData as GalileoCustomSpan } from './custom-span'; +export type { GalileoCustomSpanData, GalileoSpanLike } from './custom-span'; +export type { Node, NodeType } from './node'; +export { mapSpanType, mapSpanName, GALILEO_CUSTOM_TYPE } from './span-mapping'; +export { + extractLlmData, + extractToolData, + extractWorkflowData, + extractGalileoCustomData, + parseUsage +} from './data-extraction'; +export { + extractEmbeddedToolCalls, + getToolNameFromType, + extractToolInput, + extractToolOutput +} from './embedded-tools'; diff --git a/src/handlers/openai-agents/node.ts b/src/handlers/openai-agents/node.ts new file mode 100644 index 00000000..92c5c79e --- /dev/null +++ b/src/handlers/openai-agents/node.ts @@ -0,0 +1,29 @@ +/** + * Internal node data structure used to build an in-memory span tree + * during an OpenAI Agents run before committing to GalileoLogger. + */ + +/** + * Span type for an openai-agents node. + */ +export type NodeType = 'llm' | 'tool' | 'workflow' | 'agent'; + +/** + * Represents a node in the span tree built during an OpenAI Agents run. + */ +export interface Node { + nodeType: NodeType; + spanParams: Record; + runId: string; + parentRunId: string | null; + children: string[]; +} + +/** + * Creates a new Node with an empty children array. + * @param opts - The node configuration without the children field. + * @returns A new Node with an empty children array. + */ +export function createNode(opts: Omit): Node { + return { ...opts, children: [] }; +} diff --git a/src/handlers/openai-agents/span-mapping.ts b/src/handlers/openai-agents/span-mapping.ts new file mode 100644 index 00000000..9bbbb937 --- /dev/null +++ b/src/handlers/openai-agents/span-mapping.ts @@ -0,0 +1,106 @@ +/* eslint-disable @typescript-eslint/no-explicit-any */ +import type { NodeType } from './node'; + +/** + * The sentinel type string used to identify GalileoCustomSpan instances. + */ +export const GALILEO_CUSTOM_TYPE = 'galileo_custom'; + +/** + * Maps an OpenAI Agents SDK SpanData type string to a Galileo node type. + * @param spanData - The span data object with a type discriminant. + * @returns The corresponding Galileo node type or 'galileo_custom'. + */ +export function mapSpanType(spanData: { + type: string; + [key: string]: unknown; +}): NodeType | typeof GALILEO_CUSTOM_TYPE { + // Check for GalileoCustomSpan sentinel + if ((spanData as any).__galileoCustom === true) { + return GALILEO_CUSTOM_TYPE; + } + + switch (spanData.type) { + case 'generation': + case 'response': + return 'llm'; + + case 'function': + case 'guardrail': + case 'transcription': + case 'speech': + case 'speech_group': + case 'mcp_tools': + return 'tool'; + + case 'agent': + return 'agent'; + + case 'handoff': + case 'custom': + return 'workflow'; + + default: + return 'workflow'; + } +} + +/** + * Derives a display name for a span. + * @param spanData - The span data object. + * @param spanType - The resolved node type. + * @returns A human-readable display name for the span. + */ +export function mapSpanName( + spanData: { type: string; name?: string; [key: string]: unknown }, + spanType: NodeType | typeof GALILEO_CUSTOM_TYPE +): string { + if (spanData.name) { + return String(spanData.name); + } + + // Handle galileo_custom sentinel before the switch + if (spanType === GALILEO_CUSTOM_TYPE) { + return 'Galileo Custom'; + } + + switch (spanData.type) { + case 'generation': + return 'Generation'; + case 'response': + return 'Response'; + case 'function': { + const funcData = spanData as any; + return funcData.name || 'Function'; + } + case 'guardrail': { + const guardrailData = spanData as any; + return guardrailData.name || 'Guardrail'; + } + case 'agent': { + const agentData = spanData as any; + return agentData.name || 'Agent'; + } + case 'handoff': { + const handoffData = spanData as any; + const from = handoffData.from_agent || handoffData.fromAgent || ''; + const to = handoffData.to_agent || handoffData.toAgent || ''; + if (from || to) { + return `Handoff: ${from} → ${to}`; + } + return 'Handoff'; + } + case 'custom': + return 'Custom'; + case 'transcription': + return 'Transcription'; + case 'speech': + return 'Speech'; + case 'speech_group': + return 'Speech Group'; + case 'mcp_tools': + return 'MCP Tools'; + default: + return 'Span'; + } +} diff --git a/src/handlers/openai/usage.ts b/src/handlers/openai/usage.ts index 4a0f1c01..8e0248e3 100644 --- a/src/handlers/openai/usage.ts +++ b/src/handlers/openai/usage.ts @@ -59,8 +59,14 @@ export function parseUsage(usageData: unknown): ParsedUsage { // Detailed token breakdowns (o1/o3/o4) // Responses API: input_tokens_details / output_tokens_details // Chat Completions: prompt_tokens_details / completion_tokens_details + // Agents SDK legacy: a single `details` object for both input and output + const legacyDetails = + typeof usage.details === 'object' && usage.details !== null + ? (usage.details as Record) + : undefined; + const inputDetails = - usage.input_tokens_details ?? usage.prompt_tokens_details; + usage.input_tokens_details ?? usage.prompt_tokens_details ?? legacyDetails; if (inputDetails != null && typeof inputDetails === 'object') { const details = inputDetails as Record; const cached = details.cached_tokens; @@ -70,7 +76,9 @@ export function parseUsage(usageData: unknown): ParsedUsage { } const outputDetails = - usage.output_tokens_details ?? usage.completion_tokens_details; + usage.output_tokens_details ?? + usage.completion_tokens_details ?? + legacyDetails; if (outputDetails != null && typeof outputDetails === 'object') { const details = outputDetails as Record; const reasoning = details.reasoning_tokens; diff --git a/src/index.ts b/src/index.ts index 8709bf09..1b8bd2b2 100644 --- a/src/index.ts +++ b/src/index.ts @@ -141,6 +141,11 @@ import { import { log } from './wrappers'; import { wrapOpenAI, wrapAzureOpenAI } from './handlers/openai'; import { GalileoCallback } from './handlers/langchain'; +import { + GalileoTracingProcessor, + GalileoCustomSpan, + registerGalileoTraceProcessor +} from './handlers/openai-agents'; import { getSessions, getSpans, getTraces, RecordType } from './utils/search'; export { // Legacy clients @@ -160,6 +165,10 @@ export { // OpenAI wrapOpenAI, wrapAzureOpenAI, + // OpenAI Agents + GalileoTracingProcessor, + GalileoCustomSpan, + registerGalileoTraceProcessor, // Datasets Dataset, Datasets, diff --git a/src/types/logging/logger.types.ts b/src/types/logging/logger.types.ts index df3ea0cb..8dd3efeb 100644 --- a/src/types/logging/logger.types.ts +++ b/src/types/logging/logger.types.ts @@ -416,6 +416,7 @@ export interface IGalileoLoggerSpan { tags?: string[]; agentType?: AgentType; stepNumber?: number; + statusCode?: number; }): AgentSpan; } diff --git a/src/utils/galileo-logger.ts b/src/utils/galileo-logger.ts index 5029d5f6..ad7a3b85 100644 --- a/src/utils/galileo-logger.ts +++ b/src/utils/galileo-logger.ts @@ -1115,6 +1115,7 @@ class GalileoLogger implements IGalileoLogger { * @param options.createdAt - (Optional) The timestamp when the span was created. * @param options.metadata - (Optional) Additional metadata as key-value pairs. * @param options.tags - (Optional) Array of tags to categorize the span. + * @param options.statusCode - (Optional) HTTP status code or execution status (e.g., 200 for success, 500 for error). * @param options.stepNumber - (Optional) The step number in a multi-step process. * @returns The created workflow span. */ @@ -1128,6 +1129,7 @@ class GalileoLogger implements IGalileoLogger { createdAt?: Date; metadata?: Record; tags?: string[]; + statusCode?: number; stepNumber?: number; }): WorkflowSpan { const span = new WorkflowSpan({ @@ -1139,6 +1141,7 @@ class GalileoLogger implements IGalileoLogger { createdAt: options.createdAt || GalileoApiClient.getTimestampRecord(), metadata: options.metadata, tags: options.tags, + statusCode: options.statusCode, metrics: new Metrics({ durationNs: options.durationNs }), stepNumber: options.stepNumber }); @@ -1184,6 +1187,7 @@ class GalileoLogger implements IGalileoLogger { tags?: string[]; agentType?: AgentType; stepNumber?: number; + statusCode?: number; }): AgentSpan { const span = new AgentSpan({ input: options.input, @@ -1196,7 +1200,8 @@ class GalileoLogger implements IGalileoLogger { tags: options.tags, metrics: new Metrics({ durationNs: options.durationNs }), agentType: options.agentType, - stepNumber: options.stepNumber + stepNumber: options.stepNumber, + statusCode: options.statusCode }); this.addChildSpanToParent(span); diff --git a/src/utils/serialization.ts b/src/utils/serialization.ts index b42846a0..a5334dc3 100644 --- a/src/utils/serialization.ts +++ b/src/utils/serialization.ts @@ -70,7 +70,7 @@ export const extractParamsInfo = ( // This is simplistic and may not work for complex expressions defaultValue = defaultValueStr; } - } catch (e) { + } catch (_) { defaultValue = defaultValueStr; // Fallback to string representation } @@ -112,8 +112,8 @@ export const argsToDict = ( * @param metadata - The metadata object with potentially complex values * @returns A new object with all values converted to strings */ -export const convertToStringDict = ( - metadata: Record +export const toStringRecord = ( + metadata: Record ): Record => { const result: Record = {}; diff --git a/src/wrappers.ts b/src/wrappers.ts index 729440bb..c569b643 100644 --- a/src/wrappers.ts +++ b/src/wrappers.ts @@ -7,7 +7,7 @@ import { import { serializeToStr } from './entities/serialization'; import { argsToDict, - convertToStringDict, + toStringRecord, extractParamsInfo, toStringValue } from './utils/serialization'; @@ -215,7 +215,7 @@ export function log( spanParams.metadata && typeof spanParams.metadata === 'object' && spanParams.metadata !== null - ? convertToStringDict(spanParams.metadata as Record) + ? toStringRecord(spanParams.metadata as Record) : undefined; const tags = Array.isArray(spanParams.tags) ? spanParams.tags.map((tag) => toStringValue(tag)) diff --git a/tests/entities/serialization.test.ts b/tests/entities/serialization.test.ts index 07003a54..053b74a8 100644 --- a/tests/entities/serialization.test.ts +++ b/tests/entities/serialization.test.ts @@ -2,7 +2,7 @@ import { EventSerializer, serializeToStr } from '../../src/entities/serialization'; -import { convertToStringDict } from '../../src/utils/serialization'; +import { toStringRecord } from '../../src/utils/serialization'; describe('EventSerializer', () => { let serializer: EventSerializer; @@ -591,13 +591,13 @@ describe('EventSerializer', () => { }); }); - describe('convertToStringDict', () => { + describe('toStringRecord', () => { it('should use serializeToStr for object values', () => { const input = { metadata: { nested: { value: 'test' } } }; - const result = convertToStringDict(input); + const result = toStringRecord(input); expect(typeof result.metadata).toBe('string'); expect(result.metadata).toContain('test'); @@ -608,9 +608,9 @@ describe('EventSerializer', () => { const obj: Record = { key: 'value' }; obj.circular = obj; - expect(() => convertToStringDict({ meta: obj })).not.toThrow(); + expect(() => toStringRecord({ meta: obj })).not.toThrow(); - const result = convertToStringDict({ meta: obj }); + const result = toStringRecord({ meta: obj }); expect(typeof result.meta).toBe('string'); expect(result.meta).toContain('key'); }); @@ -622,7 +622,7 @@ describe('EventSerializer', () => { bool: true }; - const result = convertToStringDict(input); + const result = toStringRecord(input); expect(result.str).toBe('hello'); expect(result.num).toBe('42'); @@ -635,7 +635,7 @@ describe('EventSerializer', () => { undefinedValue: undefined }; - const result = convertToStringDict(input); + const result = toStringRecord(input); expect(result.nullValue).toBe(''); expect(result.undefinedValue).toBe(''); @@ -646,7 +646,7 @@ describe('EventSerializer', () => { items: [1, 2, [3, 4]] }; - const result = convertToStringDict(input); + const result = toStringRecord(input); expect(typeof result.items).toBe('string'); expect(result.items).toContain('1'); @@ -660,7 +660,7 @@ describe('EventSerializer', () => { empty: {} }; - const result = convertToStringDict(input); + const result = toStringRecord(input); expect(typeof result.empty).toBe('string'); expect(result.empty).toBe('{}'); @@ -677,7 +677,7 @@ describe('EventSerializer', () => { } }; - const result = convertToStringDict(input); + const result = toStringRecord(input); expect(typeof result.user).toBe('string'); expect(result.user).toContain('John'); @@ -693,13 +693,13 @@ describe('EventSerializer', () => { obj2.ref = obj1; expect(() => - convertToStringDict({ + toStringRecord({ first: obj1, second: obj2 }) ).not.toThrow(); - const result = convertToStringDict({ + const result = toStringRecord({ first: obj1, second: obj2 }); @@ -715,7 +715,7 @@ describe('EventSerializer', () => { timestamp: new Date('2024-01-01T12:00:00Z') }; - const result = convertToStringDict(input); + const result = toStringRecord(input); expect(typeof result.timestamp).toBe('string'); expect(result.timestamp).toContain('2024-01-01'); @@ -726,7 +726,7 @@ describe('EventSerializer', () => { error: new Error('Test error') }; - const result = convertToStringDict(input); + const result = toStringRecord(input); expect(typeof result.error).toBe('string'); expect(result.error).toContain('Error'); @@ -738,7 +738,7 @@ describe('EventSerializer', () => { tags: new Set(['tag1', 'tag2', 'tag3']) }; - const result = convertToStringDict(input); + const result = toStringRecord(input); expect(typeof result.tags).toBe('string'); expect(result.tags).toContain('tag1'); @@ -754,7 +754,7 @@ describe('EventSerializer', () => { ]) }; - const result = convertToStringDict(input); + const result = toStringRecord(input); expect(typeof result.config).toBe('string'); expect(result.config).toContain('key1'); @@ -773,12 +773,12 @@ describe('EventSerializer', () => { grandchild.root = obj; // Circular reference expect(() => - convertToStringDict({ + toStringRecord({ tree: obj }) ).not.toThrow(); - const result = convertToStringDict({ + const result = toStringRecord({ tree: obj }); @@ -804,7 +804,7 @@ describe('EventSerializer', () => { } }; - const result = convertToStringDict(input); + const result = toStringRecord(input); // All values should be strings Object.values(result).forEach((value) => { @@ -821,7 +821,7 @@ describe('EventSerializer', () => { }); it('should handle empty input object', () => { - const result = convertToStringDict({}); + const result = toStringRecord({}); expect(result).toEqual({}); }); @@ -832,7 +832,7 @@ describe('EventSerializer', () => { key3: 'value3' }; - const result = convertToStringDict(input); + const result = toStringRecord(input); expect(Object.keys(result)).toEqual(['key1', 'key2', 'key3']); }); diff --git a/tests/handlers/openai-agents/custom-span.test.ts b/tests/handlers/openai-agents/custom-span.test.ts new file mode 100644 index 00000000..5ef81153 --- /dev/null +++ b/tests/handlers/openai-agents/custom-span.test.ts @@ -0,0 +1,188 @@ +import { + createGalileoCustomSpanData, + isGalileoCustomSpanData, + type GalileoCustomSpanData +} from '../../../src/handlers/openai-agents/custom-span'; + +describe('createGalileoCustomSpanData()', () => { + test('test creates span with galileoSpan only', () => { + const galileoSpan = { type: 'custom', data: 'test' }; + const result = createGalileoCustomSpanData(galileoSpan); + + expect(result.type).toBe('custom'); + expect(result.__galileoCustom).toBe(true); + expect(result.data.galileoSpan).toBe(galileoSpan); + expect(result.name).toBeUndefined(); + }); + + test('test creates span with name parameter', () => { + const galileoSpan = { type: 'tool', input: 'data' }; + const result = createGalileoCustomSpanData(galileoSpan, 'My Custom Span'); + + expect(result.name).toBe('My Custom Span'); + expect(result.data.galileoSpan).toBe(galileoSpan); + }); + + test('test creates span with extraData', () => { + const galileoSpan = { type: 'tool', input: 'data' }; + const extraData = { key1: 'value1', key2: 42 }; + const result = createGalileoCustomSpanData( + galileoSpan, + undefined, + extraData + ); + + expect(result.data.key1).toBe('value1'); + expect(result.data.key2).toBe(42); + expect(result.data.galileoSpan).toBe(galileoSpan); + }); + + test('test creates span with all parameters', () => { + const galileoSpan = { type: 'custom', nested: { data: true } }; + const extraData = { metadata: 'info', count: 5 }; + const result = createGalileoCustomSpanData( + galileoSpan, + 'Full Span', + extraData + ); + + expect(result.type).toBe('custom'); + expect(result.name).toBe('Full Span'); + expect(result.__galileoCustom).toBe(true); + expect(result.data.galileoSpan).toBe(galileoSpan); + expect(result.data.metadata).toBe('info'); + expect(result.data.count).toBe(5); + }); + + test('test sets type field to custom', () => { + const result = createGalileoCustomSpanData({}); + expect(result.type).toBe('custom'); + }); + + test('test sets __galileoCustom sentinel to true', () => { + const result = createGalileoCustomSpanData({}); + expect(result.__galileoCustom).toBe(true); + }); + + test('test extraData merges correctly with galileoSpan', () => { + const galileoSpan = { name: 'span-1' }; + const extraData = { tag1: 'tag', tag2: 'meta' }; + const result = createGalileoCustomSpanData( + galileoSpan, + undefined, + extraData + ); + + expect(result.data).toEqual({ + tag1: 'tag', + tag2: 'meta', + galileoSpan: { name: 'span-1' } + }); + }); + + test('test handles empty extraData', () => { + const galileoSpan = { type: 'tool', input: 'data' }; + const result = createGalileoCustomSpanData(galileoSpan, undefined, {}); + + expect(result.data.galileoSpan).toBe(galileoSpan); + expect(Object.keys(result.data)).toEqual(['galileoSpan']); + }); + + test('test handles null galileoSpan', () => { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const result = createGalileoCustomSpanData(null as any); + expect(result.data.galileoSpan).toBe(null); + }); + + test('test handles undefined name parameter', () => { + const result = createGalileoCustomSpanData({}, undefined, { meta: 'data' }); + expect(result.name).toBeUndefined(); + }); +}); + +describe('isGalileoCustomSpanData() type guard', () => { + test('test returns true for valid GalileoCustomSpanData', () => { + const spanData: GalileoCustomSpanData = { + type: 'custom', + data: { galileoSpan: {} }, + __galileoCustom: true + }; + + expect(isGalileoCustomSpanData(spanData)).toBe(true); + }); + + test('test returns false for null', () => { + expect(isGalileoCustomSpanData(null)).toBe(false); + }); + + test('test returns false for undefined', () => { + expect(isGalileoCustomSpanData(undefined)).toBe(false); + }); + + test('test returns false for plain object without __galileoCustom', () => { + const plainObj = { + type: 'custom', + data: { galileoSpan: {} } + }; + + expect(isGalileoCustomSpanData(plainObj)).toBe(false); + }); + + test('test returns false for object with __galileoCustom false', () => { + const spanData = { + type: 'custom', + data: { galileoSpan: {} }, + __galileoCustom: false + }; + + expect(isGalileoCustomSpanData(spanData)).toBe(false); + }); + + test('test returns false for non-objects', () => { + expect(isGalileoCustomSpanData('string')).toBe(false); + expect(isGalileoCustomSpanData(123)).toBe(false); + expect(isGalileoCustomSpanData(true)).toBe(false); + expect(isGalileoCustomSpanData([])).toBe(false); + }); + + test('test requires __galileoCustom to be true', () => { + expect( + isGalileoCustomSpanData({ + type: 'custom', + data: { galileoSpan: {} }, + __galileoCustom: true + }) + ).toBe(true); + + expect( + isGalileoCustomSpanData({ + type: 'custom', + data: { galileoSpan: {} }, + __galileoCustom: 1 // truthy but not true + }) + ).toBe(false); + }); + + test('test type guard narrows type correctly', () => { + const unknownData: unknown = createGalileoCustomSpanData({}); + + if (isGalileoCustomSpanData(unknownData)) { + // TypeScript should allow these properties + const spanData: GalileoCustomSpanData = unknownData; + expect(spanData.type).toBe('custom'); + expect(spanData.__galileoCustom).toBe(true); + } + }); + + test('test requires all required fields', () => { + const partialWithoutData = { + type: 'custom', + __galileoCustom: true + // missing data field + }; + + // Type guard should handle this gracefully (either true if it doesn't check data, or false if it does) + const result = isGalileoCustomSpanData(partialWithoutData); + expect(typeof result).toBe('boolean'); + }); +}); diff --git a/tests/handlers/openai-agents/data-extraction.test.ts b/tests/handlers/openai-agents/data-extraction.test.ts new file mode 100644 index 00000000..3de9a542 --- /dev/null +++ b/tests/handlers/openai-agents/data-extraction.test.ts @@ -0,0 +1,675 @@ +import { + extractLlmData, + extractToolData, + extractWorkflowData, + extractGalileoCustomData, + parseUsage +} from '../../../src/handlers/openai-agents/data-extraction'; + +describe('parseUsage', () => { + test('test parse usage null returns zeros', () => { + const result = parseUsage(null); + expect(result).toEqual({ + inputTokens: 0, + outputTokens: 0, + totalTokens: null, + reasoningTokens: 0, + cachedTokens: 0, + rejectedPredictionTokens: 0 + }); + }); + + test('test parse usage undefined returns zeros', () => { + const result = parseUsage(undefined); + expect(result).toEqual({ + inputTokens: 0, + outputTokens: 0, + totalTokens: null, + reasoningTokens: 0, + cachedTokens: 0, + rejectedPredictionTokens: 0 + }); + }); + + test('test parse usage with input_tokens and output_tokens', () => { + const result = parseUsage({ + input_tokens: 10, + output_tokens: 20, + total_tokens: 30 + }); + expect(result.inputTokens).toBe(10); + expect(result.outputTokens).toBe(20); + expect(result.totalTokens).toBe(30); + }); + + test('test parse usage with legacy prompt_tokens and completion_tokens', () => { + const result = parseUsage({ prompt_tokens: 5, completion_tokens: 15 }); + expect(result.inputTokens).toBe(5); + expect(result.outputTokens).toBe(15); + }); + + test('test parse usage extracts reasoning_tokens from output_tokens_details', () => { + const result = parseUsage({ + input_tokens: 10, + output_tokens: 5, + output_tokens_details: { reasoning_tokens: 3 }, + input_tokens_details: { cached_tokens: 2 } + }); + expect(result.reasoningTokens).toBe(3); + expect(result.cachedTokens).toBe(2); + }); + + test('test parse usage extracts reasoning_tokens from details (legacy shape)', () => { + const result = parseUsage({ + input_tokens: 10, + output_tokens: 5, + details: { reasoning_tokens: 3, cached_tokens: 2 } + }); + expect(result.reasoningTokens).toBe(3); + expect(result.cachedTokens).toBe(2); + }); + + test('test parse usage extracts reasoning_tokens at top level', () => { + const result = parseUsage({ + input_tokens: 10, + output_tokens: 5, + reasoning_tokens: 4 + }); + expect(result.reasoningTokens).toBe(4); + }); +}); + +describe('extractLlmData generation', () => { + test('test extract generation span data', () => { + const spanData = { + type: 'generation', + input: [{ role: 'user', content: 'Hello' }], + output: [{ role: 'assistant', content: 'Hi' }], + model: 'gpt-4o', + model_config: { temperature: 0.7, max_tokens: 100 }, + usage: { input_tokens: 10, output_tokens: 5, total_tokens: 15 } + }; + const result = extractLlmData(spanData); + expect(result.model).toBe('gpt-4o'); + expect(result.temperature).toBe(0.7); + expect(result.numInputTokens).toBe(10); + expect(result.numOutputTokens).toBe(5); + expect(result.totalTokens).toBe(15); + expect(result.input).toBe(JSON.stringify(spanData.input)); + expect(result.output).toBe(JSON.stringify(spanData.output)); + }); + + test('test extract generation span with null usage', () => { + const spanData = { type: 'generation', model: 'gpt-4o' }; + const result = extractLlmData(spanData); + expect(result.numInputTokens).toBe(0); + expect(result.numOutputTokens).toBe(0); + expect(result.totalTokens).toBeUndefined(); + }); + + test('test extract generation metadata includes gen_ai_system openai', () => { + const spanData = { type: 'generation' }; + const result = extractLlmData(spanData); + const meta = result.metadata as Record; + expect(meta.gen_ai_system).toBe('openai'); + }); + + test('test extract generation metadata model_config is raw dict', () => { + const spanData = { + type: 'generation', + model_config: { temperature: 0.5, max_tokens: 200 } + }; + const result = extractLlmData(spanData); + const meta = result.metadata as Record; + expect(meta.model_config).toEqual({ temperature: 0.5, max_tokens: 200 }); + }); + + test('test extract generation metadata includes token detail objects', () => { + const spanData = { + type: 'generation', + usage: { + input_tokens: 10, + output_tokens: 5, + input_tokens_details: { cached_tokens: 3 }, + output_tokens_details: { reasoning_tokens: 2 } + } + }; + const result = extractLlmData(spanData); + const meta = result.metadata as Record; + expect(meta.input_tokens_details).toEqual({ cached_tokens: 3 }); + expect(meta.output_tokens_details).toEqual({ reasoning_tokens: 2 }); + }); + + test('test extract generation metadata omits absent token details', () => { + const spanData = { + type: 'generation', + usage: { input_tokens: 10, output_tokens: 5 } + }; + const result = extractLlmData(spanData); + const meta = result.metadata as Record; + expect(meta).not.toHaveProperty('input_tokens_details'); + expect(meta).not.toHaveProperty('output_tokens_details'); + }); + + test('test extract generation span with string input and output not double encoded', () => { + const spanData = { + type: 'generation', + input: 'What is the weather?', + output: 'It is sunny.' + }; + const result = extractLlmData(spanData); + expect(result.input).toBe('What is the weather?'); + expect(result.output).toBe('It is sunny.'); + }); +}); + +describe('extractLlmData response', () => { + test('test extract response span data with _input and _response', () => { + const spanData = { + type: 'response', + _input: [{ role: 'user', content: 'Hello' }], + _response: { + model: 'gpt-4o', + usage: { input_tokens: 8, output_tokens: 4 }, + temperature: 0.5, + output: [{ type: 'message', content: 'Hi' }] + } + }; + const result = extractLlmData(spanData); + expect(result.model).toBe('gpt-4o'); + expect(result.temperature).toBe(0.5); + expect(result.numInputTokens).toBe(8); + expect(result.numOutputTokens).toBe(4); + }); + + test('test extract response span data with fallback input/response keys', () => { + const spanData = { + type: 'response', + input: 'some input', + response: { + model: 'gpt-3.5-turbo', + usage: { input_tokens: 2, output_tokens: 1 } + } + }; + const result = extractLlmData(spanData); + expect(result.model).toBe('gpt-3.5-turbo'); + expect(result.numInputTokens).toBe(2); + }); + + test('test extract response span with null response returns unknown model', () => { + const spanData = { type: 'response' }; + const result = extractLlmData(spanData); + expect(result.model).toBe('unknown'); + expect(result.numInputTokens).toBe(0); + }); + + test('test extract response tools returned as raw array not JSON string', () => { + const toolsArray = [{ type: 'function', name: 'search' }]; + const spanData = { + type: 'response', + _response: { + model: 'gpt-4o', + usage: {}, + tools: toolsArray, + output: [] + } + }; + const result = extractLlmData(spanData); + expect(result.tools).toEqual(toolsArray); + expect(typeof result.tools).not.toBe('string'); + }); + + test('test extract response model_parameters from whitelist', () => { + const spanData = { + type: 'response', + _response: { + model: 'gpt-4o', + usage: {}, + temperature: 0.7, + max_output_tokens: 512, + top_p: 1, + tool_choice: 'auto', + seed: 42, + irrelevant_field: 'ignored', + output: [] + } + }; + const result = extractLlmData(spanData); + const mp = result.modelParameters as Record; + expect(mp.temperature).toBe(0.7); + expect(mp.max_output_tokens).toBe(512); + expect(mp.top_p).toBe(1); + expect(mp.tool_choice).toBe('auto'); + expect(mp.seed).toBe(42); + expect(mp).not.toHaveProperty('irrelevant_field'); + }); + + test('test extract response metadata includes response_metadata', () => { + const spanData = { + type: 'response', + _response: { + model: 'gpt-4o', + usage: {}, + temperature: 0.5, + object: 'response', + output: [] + } + }; + const result = extractLlmData(spanData); + const meta = result.metadata as Record; + expect(meta.gen_ai_system).toBe('openai'); + const rm = meta.response_metadata as Record; + expect(rm.model).toBe('gpt-4o'); + expect(rm.temperature).toBe(0.5); + expect(rm).not.toHaveProperty('usage'); + expect(rm).not.toHaveProperty('output'); + }); + + test('test extract response metadata includes instructions when present', () => { + const spanData = { + type: 'response', + _response: { + model: 'gpt-4o', + usage: {}, + instructions: 'You are a helpful assistant.', + output: [] + } + }; + const result = extractLlmData(spanData); + const meta = result.metadata as Record; + expect(meta.instructions).toBe('You are a helpful assistant.'); + }); + + test('test extract response metadata omits instructions when absent', () => { + const spanData = { + type: 'response', + _response: { model: 'gpt-4o', usage: {}, output: [] } + }; + const result = extractLlmData(spanData); + const meta = result.metadata as Record; + expect(meta).not.toHaveProperty('instructions'); + }); + + test('test extract response span with string input not double encoded', () => { + const spanData = { + type: 'response', + _input: 'Hello', + _response: { model: 'gpt-4o', usage: {}, output: [] } + }; + const result = extractLlmData(spanData); + expect(result.input).toBe('Hello'); + }); + + test('test extractLlmData response span with response.error sets statusCode and error_details', () => { + const error = { status_code: 429, message: 'Rate limit' }; + const spanData = { + type: 'response', + _input: 'hello', + _response: { + model: 'gpt-4o', + usage: {}, + output: [], + error + } + }; + const result = extractLlmData(spanData); + expect(result.statusCode).toBe(429); + const meta = result.metadata as Record; + expect(meta.error_details).toEqual(error); + }); + + test('test extractLlmData response span with response.error missing status_code falls back to 500', () => { + const spanData = { + type: 'response', + _input: 'hello', + _response: { + model: 'gpt-4o', + usage: {}, + output: [], + error: { message: 'Unknown error' } + } + }; + const result = extractLlmData(spanData); + expect(result.statusCode).toBe(500); + }); + + test('test extractLlmData response span with no response.error has no statusCode', () => { + const spanData = { + type: 'response', + _input: 'hello', + _response: { + model: 'gpt-4o', + usage: {}, + output: [] + } + }; + const result = extractLlmData(spanData); + expect(result.statusCode).toBeUndefined(); + }); +}); + +describe('extractLlmData unknown type', () => { + test('test extract returns empty record for unknown type', () => { + const result = extractLlmData({ type: 'unknown' }); + expect(Object.keys(result).length).toBe(0); + }); +}); + +describe('extractToolData', () => { + test('test extract function span data string input/output', () => { + const spanData = { + type: 'function', + input: '{"query":"hello"}', + output: 'result text' + }; + const result = extractToolData(spanData); + expect(result.input).toBe('{"query":"hello"}'); + expect(result.output).toBe('result text'); + }); + + test('test extract function span data object input serialised', () => { + const spanData = { + type: 'function', + input: { query: 'hello' }, + output: { answer: 'world' } + }; + const result = extractToolData(spanData); + expect(result.input).toBe(JSON.stringify({ query: 'hello' })); + expect(result.output).toBe(JSON.stringify({ answer: 'world' })); + }); + + test('test extract function span data missing output', () => { + const spanData = { type: 'function', input: 'test' }; + const result = extractToolData(spanData); + expect(result.output).toBeUndefined(); + }); + + test('test extract function span with mcp_data in metadata', () => { + const spanData = { + type: 'function', + input: 'test', + mcp_data: { server: 'my-server', tool: 'my-tool' } + }; + const result = extractToolData(spanData); + const meta = result.metadata as Record; + expect(meta.mcp_data).toBe( + JSON.stringify({ server: 'my-server', tool: 'my-tool' }) + ); + }); + + test('test extract guardrail span triggered', () => { + const spanData = { type: 'guardrail', triggered: true, name: 'PII Filter' }; + const result = extractToolData(spanData); + expect(result.input).toBe(''); + expect(result.output).toBe('{"triggered":true}'); + const meta = result.metadata as Record; + expect(meta.triggered).toBe(true); + expect(meta.status).toBe('warning'); + expect(meta).not.toHaveProperty('guardrail_name'); + }); + + test('test extract guardrail span not triggered', () => { + const spanData = { type: 'guardrail', triggered: false, name: 'Safety' }; + const result = extractToolData(spanData); + expect(result.output).toBe('{"triggered":false}'); + const meta = result.metadata as Record; + expect(meta.triggered).toBe(false); + expect(meta).not.toHaveProperty('status'); + expect(meta).not.toHaveProperty('guardrail_name'); + }); + + test('test extract tool data for transcription returns empty', () => { + const result = extractToolData({ type: 'transcription' }); + expect(result.input).toBe(''); + expect(result.output).toBeUndefined(); + }); + + test('test extract tool data for mcp_tools returns empty', () => { + const result = extractToolData({ type: 'mcp_tools' }); + expect(result.input).toBe(''); + }); +}); + +describe('extractWorkflowData', () => { + test('test extract agent span data with tools and handoffs', () => { + const spanData = { + type: 'agent', + name: 'PlannerAgent', + tools: ['search', 'calculator'], + handoffs: ['ReviewAgent'], + output_type: 'string' + }; + const result = extractWorkflowData(spanData); + expect(result.input).toBe(''); + const meta = result.metadata as Record; + expect(meta.tools).toEqual(['search', 'calculator']); + expect(meta.handoffs).toEqual(['ReviewAgent']); + expect(meta.output_type).toBe('string'); + }); + + test('test extract agent span data without optional fields', () => { + const result = extractWorkflowData({ type: 'agent' }); + expect(result.input).toBe(''); + expect(result.output).toBeUndefined(); + const meta = result.metadata as Record; + expect(Object.keys(meta).length).toBe(0); + }); + + test('test extract handoff span data', () => { + const spanData = { + type: 'handoff', + from_agent: 'AgentA', + to_agent: 'AgentB' + }; + const result = extractWorkflowData(spanData); + expect(result.input).toBe('{"from_agent":"AgentA"}'); + expect(result.output).toBe('{"to_agent":"AgentB"}'); + const meta = result.metadata as Record; + expect(meta.from_agent).toBe('AgentA'); + expect(meta.to_agent).toBe('AgentB'); + }); + + test('test extract handoff span data with missing agents', () => { + const result = extractWorkflowData({ type: 'handoff' }); + expect(result.input).toBe(''); + expect(result.output).toBeUndefined(); + }); + + test('test extract custom span data with input and output', () => { + const spanData = { + type: 'custom', + data: { + input: 'custom input', + output: 'custom output', + extra_key: 'extra value' + } + }; + const result = extractWorkflowData(spanData); + expect(result.input).toBe('custom input'); + expect(result.output).toBe('custom output'); + const meta = result.metadata as Record; + expect(meta.extra_key).toBe('extra value'); + expect(meta.input).toBeUndefined(); + expect(meta.output).toBeUndefined(); + }); + + test('test extract custom span data with object metadata value kept as-is', () => { + const spanData = { + type: 'custom', + data: { + input: 'in', + output: 'out', + config: { retries: 3, timeout: 5000 } + } + }; + const result = extractWorkflowData(spanData); + const meta = result.metadata as Record; + expect(meta.config).toEqual({ retries: 3, timeout: 5000 }); + }); + + test('test extract custom span data with object input serialised', () => { + const spanData = { + type: 'custom', + data: { input: { query: 'hello' }, output: { answer: 'world' } } + }; + const result = extractWorkflowData(spanData); + expect(result.input).toBe(JSON.stringify({ query: 'hello' })); + expect(result.output).toBe(JSON.stringify({ answer: 'world' })); + }); + + test('test extract custom span data with no data field', () => { + const result = extractWorkflowData({ type: 'custom' }); + expect(result.input).toBe(''); + expect(result.output).toBeUndefined(); + }); + + test('test extract unknown span type returns empty', () => { + const result = extractWorkflowData({ type: 'future_type' }); + expect(result.input).toBe(''); + expect(result.output).toBeUndefined(); + }); +}); + +describe('extractGalileoCustomData', () => { + test('test extracts tool type from galileoSpan', () => { + const spanData = { + type: 'custom', + __galileoCustom: true, + _galileoSpan: { + type: 'tool', + input: 'tool input', + output: 'tool output', + metadata: { key: 'val' }, + tags: ['tag1'], + statusCode: 201 + } + }; + const result = extractGalileoCustomData(spanData); + expect(result.nodeType).toBe('tool'); + expect(result.params.input).toBe('tool input'); + expect(result.params.output).toBe('tool output'); + expect(result.params.metadata).toEqual({ key: 'val' }); + expect(result.params.tags).toEqual(['tag1']); + expect(result.params.statusCode).toBe(201); + }); + + test('test extracts workflow type from galileoSpan', () => { + const spanData = { + type: 'custom', + __galileoCustom: true, + _galileoSpan: { + type: 'workflow', + input: 'wf in', + output: 'wf out' + } + }; + const result = extractGalileoCustomData(spanData); + expect(result.nodeType).toBe('workflow'); + expect(result.params.input).toBe('wf in'); + expect(result.params.output).toBe('wf out'); + }); + + test('test extracts agent type from galileoSpan', () => { + const spanData = { + type: 'custom', + __galileoCustom: true, + _galileoSpan: { + type: 'agent', + input: 'agent in' + } + }; + const result = extractGalileoCustomData(spanData); + expect(result.nodeType).toBe('agent'); + expect(result.params.input).toBe('agent in'); + }); + + test('test falls back to workflow for unrecognized galileoSpan type', () => { + const spanData = { + type: 'custom', + __galileoCustom: true, + _galileoSpan: { type: 'future_type', input: 'x' } + }; + const result = extractGalileoCustomData(spanData); + expect(result.nodeType).toBe('workflow'); + expect(result.params.input).toBe('x'); + }); + + test('test falls back to workflow for llm type (not delegated)', () => { + const spanData = { + type: 'custom', + __galileoCustom: true, + _galileoSpan: { type: 'llm', input: 'prompt' } + }; + const result = extractGalileoCustomData(spanData); + expect(result.nodeType).toBe('workflow'); + }); + + test('test falls back to extractWorkflowData when no galileoSpan', () => { + const spanData = { + type: 'custom', + __galileoCustom: true, + data: { input: 'plain input', output: 'plain output' } + }; + const result = extractGalileoCustomData(spanData); + expect(result.nodeType).toBe('workflow'); + expect(result.params.input).toBe('plain input'); + expect(result.params.output).toBe('plain output'); + }); + + test('test falls back to extractWorkflowData when galileoSpan is not an object', () => { + const spanData = { + type: 'custom', + __galileoCustom: true, + _galileoSpan: 'not-an-object' + }; + const result = extractGalileoCustomData(spanData); + expect(result.nodeType).toBe('workflow'); + }); + + test('test serializes object input/output from galileoSpan', () => { + const spanData = { + type: 'custom', + __galileoCustom: true, + _galileoSpan: { + type: 'tool', + input: { query: 'hello' }, + output: { answer: 'world' } + } + }; + const result = extractGalileoCustomData(spanData); + expect(result.params.input).toBe(JSON.stringify({ query: 'hello' })); + expect(result.params.output).toBe(JSON.stringify({ answer: 'world' })); + }); + + test('test omits tags and statusCode when not provided', () => { + const spanData = { + type: 'custom', + __galileoCustom: true, + _galileoSpan: { type: 'tool', input: 'in' } + }; + const result = extractGalileoCustomData(spanData); + expect(result.params).not.toHaveProperty('tags'); + expect(result.params).not.toHaveProperty('statusCode'); + }); + + test('test handles missing galileoSpan gracefully', () => { + const spanData = { + type: 'custom', + __galileoCustom: true + }; + const result = extractGalileoCustomData(spanData); + expect(result.nodeType).toBe('workflow'); + }); + + test('test defaults to empty input when galileoSpan has no input', () => { + const spanData = { + type: 'custom', + __galileoCustom: true, + _galileoSpan: { type: 'tool' } + }; + const result = extractGalileoCustomData(spanData); + expect(result.params.input).toBe(''); + expect(result.params.output).toBeUndefined(); + expect(result.params.metadata).toEqual({}); + }); +}); diff --git a/tests/handlers/openai-agents/embedded-tool.test.ts b/tests/handlers/openai-agents/embedded-tool.test.ts new file mode 100644 index 00000000..7e4e23c1 --- /dev/null +++ b/tests/handlers/openai-agents/embedded-tool.test.ts @@ -0,0 +1,258 @@ +import { + extractEmbeddedToolCalls, + getToolNameFromType, + extractToolInput, + extractToolOutput +} from '../../../src/handlers/openai-agents/embedded-tools'; + +describe('getToolNameFromType', () => { + test('test maps code_interpreter_call to code_interpreter', () => { + expect(getToolNameFromType('code_interpreter_call')).toBe( + 'code_interpreter' + ); + }); + + test('test maps file_search_call to file_search', () => { + expect(getToolNameFromType('file_search_call')).toBe('file_search'); + }); + + test('test maps web_search_call to web_search', () => { + expect(getToolNameFromType('web_search_call')).toBe('web_search'); + }); + + test('test maps computer_call to computer', () => { + expect(getToolNameFromType('computer_call')).toBe('computer'); + }); + + test('test maps custom_tool_call to custom_tool', () => { + expect(getToolNameFromType('custom_tool_call')).toBe('custom_tool'); + }); + + test('test returns original string for unknown type', () => { + expect(getToolNameFromType('unknown_type')).toBe('unknown_type'); + }); +}); + +describe('extractToolInput', () => { + test('test code_interpreter_call extracts code field', () => { + const result = extractToolInput( + { code: 'print("hello")' }, + 'code_interpreter_call' + ); + expect(result).toBe('print("hello")'); + }); + + test('test code_interpreter_call returns null when no code', () => { + expect(extractToolInput({}, 'code_interpreter_call')).toBeNull(); + }); + + test('test file_search_call extracts queries', () => { + const result = extractToolInput( + { queries: ['find docs', 'search code'] }, + 'file_search_call' + ); + expect(result).toBe(JSON.stringify(['find docs', 'search code'])); + }); + + test('test web_search_call extracts action.query', () => { + const result = extractToolInput( + { action: { query: 'latest news' } }, + 'web_search_call' + ); + expect(result).toBe('latest news'); + }); + + test('test web_search_call returns null when no action', () => { + expect(extractToolInput({}, 'web_search_call')).toBeNull(); + }); + + test('test computer_call extracts action object', () => { + const action = { type: 'click', coordinate: [100, 200] }; + const result = extractToolInput({ action }, 'computer_call'); + expect(result).toBe(JSON.stringify(action)); + }); + + test('test custom_tool_call extracts input string', () => { + const result = extractToolInput({ input: 'my input' }, 'custom_tool_call'); + expect(result).toBe('my input'); + }); + + test('test custom_tool_call serialises object input', () => { + const result = extractToolInput( + { input: { key: 'val' } }, + 'custom_tool_call' + ); + expect(result).toBe(JSON.stringify({ key: 'val' })); + }); +}); + +describe('extractToolOutput', () => { + test('test code_interpreter_call concatenates log outputs', () => { + const result = extractToolOutput( + { outputs: [{ logs: 'line1' }, { logs: 'line2' }] }, + 'code_interpreter_call' + ); + expect(result).toBe('line1\nline2'); + }); + + test('test code_interpreter_call extracts url output', () => { + const result = extractToolOutput( + { outputs: [{ url: 'https://example.com/file.png' }] }, + 'code_interpreter_call' + ); + expect(result).toBe('https://example.com/file.png'); + }); + + test('test code_interpreter_call returns null for empty outputs', () => { + expect( + extractToolOutput({ outputs: [] }, 'code_interpreter_call') + ).toBeNull(); + }); + + test('test file_search_call extracts results', () => { + const results = [{ id: '1', content: 'doc' }]; + const result = extractToolOutput({ results }, 'file_search_call'); + expect(result).toBe(JSON.stringify(results)); + }); + + test('test file_search_call returns null when no results', () => { + expect(extractToolOutput({}, 'file_search_call')).toBeNull(); + }); + + test('test web_search_call returns action as json', () => { + const action = { query: 'news', status: 'done' }; + const result = extractToolOutput({ action }, 'web_search_call'); + expect(result).toBe(JSON.stringify(action)); + }); + + test('test computer_call returns null', () => { + expect( + extractToolOutput({ result: 'screenshot' }, 'computer_call') + ).toBeNull(); + }); + + test('test custom_tool_call extracts output string', () => { + const result = extractToolOutput({ output: 'done' }, 'custom_tool_call'); + expect(result).toBe('done'); + }); +}); + +describe('extractEmbeddedToolCalls', () => { + test('test returns empty array for null response', () => { + expect(extractEmbeddedToolCalls(null)).toEqual([]); + }); + + test('test returns empty array for response without output', () => { + expect(extractEmbeddedToolCalls({})).toEqual([]); + }); + + test('test skips non-embedded-tool output items', () => { + const response = { + output: [{ type: 'message', content: 'hello' }] + }; + expect(extractEmbeddedToolCalls(response)).toEqual([]); + }); + + test('test extracts code_interpreter_call', () => { + const response = { + output: [ + { + type: 'code_interpreter_call', + id: 'ci_001', + code: 'x = 1', + outputs: [{ logs: 'output log' }], + status: 'completed' + } + ] + }; + const result = extractEmbeddedToolCalls(response); + expect(result.length).toBe(1); + expect(result[0].type).toBe('function'); + expect(result[0].function.name).toBe('code_interpreter'); + expect(result[0].tool_call_id).toBe('ci_001'); + expect(result[0].tool_call_input).toBe('x = 1'); + expect(result[0].tool_call_output).toBe('output log'); + expect(result[0].tool_call_status).toBe('completed'); + }); + + test('test tool_call_id falls back to call_id when id absent', () => { + const response = { + output: [ + { + type: 'web_search_call', + call_id: 'fallback_id_001', + action: { query: 'test' } + } + ] + }; + const result = extractEmbeddedToolCalls(response); + expect(result[0].tool_call_id).toBe('fallback_id_001'); + }); + + test('test extracts file_search_call', () => { + const response = { + output: [ + { + type: 'file_search_call', + id: 'fs_001', + queries: ['find docs'], + results: [{ id: 'doc1', content: 'text' }] + } + ] + }; + const result = extractEmbeddedToolCalls(response); + expect(result.length).toBe(1); + expect(result[0].function.name).toBe('file_search'); + expect(result[0].tool_call_input).toBe(JSON.stringify(['find docs'])); + }); + + test('test extracts web_search_call', () => { + const response = { + output: [ + { + type: 'web_search_call', + id: 'ws_001', + action: { query: 'latest AI news' } + } + ] + }; + const result = extractEmbeddedToolCalls(response); + expect(result.length).toBe(1); + expect(result[0].function.name).toBe('web_search'); + expect(result[0].tool_call_input).toBe('latest AI news'); + }); + + test('test extracts multiple embedded tool calls', () => { + const response = { + output: [ + { type: 'code_interpreter_call', code: 'x=1', outputs: [] }, + { type: 'message', content: 'hi' }, + { type: 'web_search_call', action: { query: 'test' } } + ] + }; + const result = extractEmbeddedToolCalls(response); + expect(result.length).toBe(2); + expect(result[0].type).toBe('function'); + expect(result[1].type).toBe('function'); + }); + + test('test tool_call_status defaults to completed when absent', () => { + const response = { + output: [{ type: 'web_search_call', action: { query: 'q' } }] + }; + const result = extractEmbeddedToolCalls(response); + expect(result[0].tool_call_status).toBe('completed'); + }); + + test('test handles null output items gracefully', () => { + const response = { + output: [ + null, + undefined, + { type: 'web_search_call', action: { query: 'q' } } + ] + }; + const result = extractEmbeddedToolCalls(response); + expect(result.length).toBe(1); + }); +}); diff --git a/tests/handlers/openai-agents/integration.test.ts b/tests/handlers/openai-agents/integration.test.ts new file mode 100644 index 00000000..a410906a --- /dev/null +++ b/tests/handlers/openai-agents/integration.test.ts @@ -0,0 +1,728 @@ +import { GalileoTracingProcessor } from '../../../src/handlers/openai-agents'; +import type { + AgentTrace, + AgentSpan +} from '../../../src/handlers/openai-agents'; + +function createMockLogger() { + return { + startTrace: jest.fn().mockReturnValue({}), + addLlmSpan: jest.fn().mockReturnValue({}), + addToolSpan: jest.fn().mockReturnValue({}), + addWorkflowSpan: jest.fn().mockReturnValue({}), + addAgentSpan: jest.fn().mockReturnValue({}), + conclude: jest.fn().mockReturnValue(undefined), + flush: jest.fn().mockResolvedValue(undefined) + }; +} + +function makeTrace(overrides: Partial = {}): AgentTrace { + return { + traceId: 'trace-001', + name: 'Multi-Agent Flow', + metadata: {}, + startedAt: new Date('2024-01-01T00:00:00Z').toISOString(), + endedAt: new Date('2024-01-01T00:00:05Z').toISOString(), + ...overrides + }; +} + +function makeSpan( + overrides: Partial & { spanData: AgentSpan['spanData'] } +): AgentSpan { + return { + spanId: 'span-001', + traceId: 'trace-001', + parentId: 'trace-001', + startedAt: new Date('2024-01-01T00:00:01Z').toISOString(), + endedAt: new Date('2024-01-01T00:00:02Z').toISOString(), + error: null, + ...overrides + }; +} + +describe('Multi-agent integration flows', () => { + test('test multiple agents with handoff', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + + // PlannerAgent + const planner = makeSpan({ + spanId: 'agent-planner', + parentId: 'trace-001', + spanData: { type: 'agent', name: 'PlannerAgent' } + }); + + // Handoff to ExecutorAgent + const handoff = makeSpan({ + spanId: 'handoff-001', + parentId: 'agent-planner', + spanData: { + type: 'handoff', + from_agent: 'PlannerAgent', + to_agent: 'ExecutorAgent' + } + }); + + // ExecutorAgent + const executor = makeSpan({ + spanId: 'agent-executor', + parentId: 'handoff-001', + spanData: { type: 'agent', name: 'ExecutorAgent' } + }); + + await processor.onSpanStart(planner); + await processor.onSpanStart(handoff); + await processor.onSpanStart(executor); + await processor.onSpanEnd(executor); + await processor.onSpanEnd(handoff); + await processor.onSpanEnd(planner); + await processor.onTraceEnd(trace); + + // Verify all spans logged: 2 agents + 1 handoff all use addWorkflowSpan + expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(3); + expect(mockLogger.addAgentSpan).not.toHaveBeenCalled(); + }); + + test('test agent->tool->llm->tool flow', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + + const agent = makeSpan({ + spanId: 'agent-001', + parentId: 'trace-001', + spanData: { type: 'agent' } + }); + + const tool1 = makeSpan({ + spanId: 'tool-001', + parentId: 'agent-001', + spanData: { type: 'function', name: 'search' } + }); + + const llm = makeSpan({ + spanId: 'llm-001', + parentId: 'agent-001', + spanData: { type: 'generation', model: 'gpt-4' } + }); + + const tool2 = makeSpan({ + spanId: 'tool-002', + parentId: 'agent-001', + spanData: { type: 'function', name: 'calculate' } + }); + + await processor.onSpanStart(agent); + await processor.onSpanStart(tool1); + await processor.onSpanEnd(tool1); + await processor.onSpanStart(llm); + await processor.onSpanEnd(llm); + await processor.onSpanStart(tool2); + await processor.onSpanEnd(tool2); + await processor.onSpanEnd(agent); + await processor.onTraceEnd(trace); + + expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1); // agent (uses addWorkflowSpan) + expect(mockLogger.addToolSpan).toHaveBeenCalledTimes(2); // 2 tools + expect(mockLogger.addLlmSpan).toHaveBeenCalledTimes(1); // 1 llm + }); + + test('test guardrail triggered in flow', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + + const agent = makeSpan({ + spanId: 'agent-001', + parentId: 'trace-001', + spanData: { type: 'agent' } + }); + + const guardrail = makeSpan({ + spanId: 'guardrail-001', + parentId: 'agent-001', + spanData: { type: 'guardrail', name: 'PII Filter', triggered: true } + }); + + await processor.onSpanStart(agent); + await processor.onSpanStart(guardrail); + await processor.onSpanEnd(guardrail); + await processor.onSpanEnd(agent); + await processor.onTraceEnd(trace); + + const toolCall = mockLogger.addToolSpan.mock.calls[0][0]; + expect(toolCall.output).toBe('{"triggered":true}'); + }); + + test('test embedded tool calls from OpenAI response', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + + const response = makeSpan({ + spanId: 'response-001', + parentId: 'trace-001', + spanData: { + type: 'response', + model: 'gpt-4o', + _input: [{ role: 'user', content: 'search for python' }], + _response: { + model: 'gpt-4o', + output: [ + { + type: 'web_search_call', + action: { query: 'python programming' }, + id: 'search-1' + }, + { + type: 'code_interpreter_call', + code: 'print("result")', + outputs: [{ logs: 'result' }], + id: 'code-1' + } + ] + } + } + }); + + await processor.onSpanStart(response); + await processor.onSpanEnd(response); + await processor.onTraceEnd(trace); + + expect(mockLogger.addLlmSpan).toHaveBeenCalledTimes(1); + const llmCall = mockLogger.addLlmSpan.mock.calls[0][0]; + expect(Array.isArray(llmCall.tools)).toBe(true); + expect(llmCall.tools.length).toBe(2); + expect(llmCall.tools[0].type).toBe('function'); + expect(llmCall.tools[1].type).toBe('function'); + }); + + test('test galileo_custom span delegates to inner galileoSpan as tool', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + + const agent = makeSpan({ + spanId: 'agent-001', + parentId: 'trace-001', + spanData: { type: 'agent' } + }); + + const customSpan = makeSpan({ + spanId: 'custom-001', + parentId: 'agent-001', + spanData: { + type: 'custom', + __galileoCustom: true, + _galileoSpan: { + type: 'tool', + input: 'custom tool input', + output: 'custom tool output', + metadata: { source: 'test' }, + tags: ['custom-tag'], + statusCode: 200 + } + } + }); + + const llm = makeSpan({ + spanId: 'llm-001', + parentId: 'agent-001', + spanData: { type: 'generation', model: 'gpt-4' } + }); + + await processor.onSpanStart(agent); + await processor.onSpanStart(customSpan); + await processor.onSpanEnd(customSpan); + await processor.onSpanStart(llm); + await processor.onSpanEnd(llm); + await processor.onSpanEnd(agent); + await processor.onTraceEnd(trace); + + expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1); // agent (uses addWorkflowSpan) + expect(mockLogger.addToolSpan).toHaveBeenCalledTimes(1); + expect(mockLogger.addLlmSpan).toHaveBeenCalledTimes(1); + + const toolCall = mockLogger.addToolSpan.mock.calls[0][0]; + expect(toolCall.input).toBe('custom tool input'); + expect(toolCall.output).toBe('custom tool output'); + expect(toolCall.metadata).toEqual({ source: 'test' }); + expect(toolCall.tags).toEqual(['custom-tag']); + }); + + test('test galileo_custom span with workflow type', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + + const agent = makeSpan({ + spanId: 'agent-001', + parentId: 'trace-001', + spanData: { type: 'agent' } + }); + + const customSpan = makeSpan({ + spanId: 'custom-001', + parentId: 'agent-001', + spanData: { + type: 'custom', + __galileoCustom: true, + _galileoSpan: { + type: 'workflow', + input: 'wf input', + output: 'wf output' + } + } + }); + + await processor.onSpanStart(agent); + await processor.onSpanStart(customSpan); + await processor.onSpanEnd(customSpan); + await processor.onSpanEnd(agent); + await processor.onTraceEnd(trace); + + // addWorkflowSpan called twice: once for the agent container, once for the custom workflow span + expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(2); + // The custom workflow span is the first child logged (index 1 after agent at index 0) + const wfCall = mockLogger.addWorkflowSpan.mock.calls[1][0]; + expect(wfCall.input).toBe('wf input'); + expect(wfCall.output).toBe('wf output'); + expect(mockLogger.conclude).toHaveBeenCalled(); + }); + + test('test galileo_custom span with agent type', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + + const customSpan = makeSpan({ + spanId: 'custom-001', + parentId: 'trace-001', + spanData: { + type: 'custom', + __galileoCustom: true, + _galileoSpan: { + type: 'agent', + input: 'agent input', + output: 'agent output', + metadata: { role: 'planner' } + } + } + }); + + await processor.onSpanStart(customSpan); + await processor.onSpanEnd(customSpan); + await processor.onTraceEnd(trace); + + expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1); + const agentCall = mockLogger.addWorkflowSpan.mock.calls[0][0]; + expect(agentCall.input).toBe('agent input'); + expect(agentCall.output).toBe('agent output'); + expect(agentCall.metadata).toEqual({ role: 'planner' }); + }); + + test('test galileo_custom span without galileoSpan falls back to workflow', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + + const customSpan = makeSpan({ + spanId: 'custom-001', + parentId: 'trace-001', + spanData: { + type: 'custom', + __galileoCustom: true, + data: { input: 'fallback input', output: 'fallback output' } + } + }); + + await processor.onSpanStart(customSpan); + await processor.onSpanEnd(customSpan); + await processor.onTraceEnd(trace); + + expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1); + const wfCall = mockLogger.addWorkflowSpan.mock.calls[0][0]; + expect(wfCall.input).toBe('fallback input'); + expect(wfCall.output).toBe('fallback output'); + }); + + test('test galileo_custom span with unrecognized type falls back to workflow', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + + const customSpan = makeSpan({ + spanId: 'custom-001', + parentId: 'trace-001', + spanData: { + type: 'custom', + __galileoCustom: true, + _galileoSpan: { + type: 'unknown_future_type', + input: 'some input' + } + } + }); + + await processor.onSpanStart(customSpan); + await processor.onSpanEnd(customSpan); + await processor.onTraceEnd(trace); + + expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1); + const wfCall = mockLogger.addWorkflowSpan.mock.calls[0][0]; + expect(wfCall.input).toBe('some input'); + }); + + test('test error in middle of flow handled', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + + const agent = makeSpan({ + spanId: 'agent-001', + parentId: 'trace-001', + spanData: { type: 'agent' } + }); + + const tool1 = makeSpan({ + spanId: 'tool-001', + parentId: 'agent-001', + spanData: { type: 'function', name: 'search' } + }); + + const errorTool = makeSpan({ + spanId: 'tool-002', + parentId: 'agent-001', + error: { message: 'Connection timeout' }, + spanData: { type: 'function', name: 'fetch' } + }); + + const tool3 = makeSpan({ + spanId: 'tool-003', + parentId: 'agent-001', + spanData: { type: 'function', name: 'parse' } + }); + + await processor.onSpanStart(agent); + await processor.onSpanStart(tool1); + await processor.onSpanEnd(tool1); + await processor.onSpanStart(errorTool); + await processor.onSpanEnd(errorTool); // Ends with error + await processor.onSpanStart(tool3); + await processor.onSpanEnd(tool3); + await processor.onSpanEnd(agent); + await processor.onTraceEnd(trace); + + // Verify error tool has error status + const errorToolCall = mockLogger.addToolSpan.mock.calls[1][0]; + expect(errorToolCall.statusCode).toBe(500); + const errorMeta = errorToolCall.metadata as Record; + expect(errorMeta.error_message).toBe('Connection timeout'); + + // Verify all tools logged + expect(mockLogger.addToolSpan).toHaveBeenCalledTimes(3); + }); + + test('test complex nested structure with multiple agents', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + + // Root agent + const rootAgent = makeSpan({ + spanId: 'root-agent', + parentId: 'trace-001', + spanData: { type: 'agent', name: 'RootAgent' } + }); + + // First branch: planning + const planningAgent = makeSpan({ + spanId: 'planning-agent', + parentId: 'root-agent', + spanData: { type: 'agent', name: 'PlanningAgent' } + }); + + const planLLM = makeSpan({ + spanId: 'plan-llm', + parentId: 'planning-agent', + spanData: { type: 'generation', model: 'gpt-4' } + }); + + // Second branch: execution + const executionAgent = makeSpan({ + spanId: 'execution-agent', + parentId: 'root-agent', + spanData: { type: 'agent', name: 'ExecutionAgent' } + }); + + const executionTool = makeSpan({ + spanId: 'exec-tool', + parentId: 'execution-agent', + spanData: { type: 'function', name: 'execute' } + }); + + await processor.onSpanStart(rootAgent); + await processor.onSpanStart(planningAgent); + await processor.onSpanStart(planLLM); + await processor.onSpanEnd(planLLM); + await processor.onSpanEnd(planningAgent); + await processor.onSpanStart(executionAgent); + await processor.onSpanStart(executionTool); + await processor.onSpanEnd(executionTool); + await processor.onSpanEnd(executionAgent); + await processor.onSpanEnd(rootAgent); + await processor.onTraceEnd(trace); + + // Verify all spans logged: 3 agents use addWorkflowSpan + expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(3); + expect(mockLogger.addLlmSpan).toHaveBeenCalledTimes(1); + expect(mockLogger.addToolSpan).toHaveBeenCalledTimes(1); + // conclude is called for all non-root workflow/agent spans + expect(mockLogger.conclude).toHaveBeenCalled(); + }); +}); + +describe('Output tracking integration', () => { + test('test last output only set by workflow/agent spans, not llm spans', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + + const llm1 = makeSpan({ + spanId: 'llm-001', + parentId: 'trace-001', + spanData: { + type: 'generation', + model: 'gpt-4', + input: [], + output: 'First output' + } + }); + + const llm2 = makeSpan({ + spanId: 'llm-002', + parentId: 'trace-001', + spanData: { + type: 'generation', + model: 'gpt-4', + input: [], + output: 'Final output' + } + }); + + await processor.onSpanStart(llm1); + await processor.onSpanEnd(llm1); + await processor.onSpanStart(llm2); + await processor.onSpanEnd(llm2); + await processor.onTraceEnd(trace); + + // _lastOutput is only updated by workflow/agent spans (parity with Python). + // Bare LLM spans do not set _lastOutput, so trace output falls back to undefined. + const startTraceCall = mockLogger.startTrace.mock.calls[0][0]; + expect(startTraceCall.output).toBeUndefined(); + }); + + test('test last output set by workflow span conclude', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const agentSpan = makeSpan({ + spanId: 'agent-001', + parentId: 'trace-001', + spanData: { type: 'agent', name: 'MyAgent' } + }); + const llmSpan = makeSpan({ + spanId: 'llm-001', + parentId: 'agent-001', + spanData: { + type: 'generation', + model: 'gpt-4', + input: [], + output: 'Final output' + } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(agentSpan); + await processor.onSpanStart(llmSpan); + await processor.onSpanEnd(llmSpan); + await processor.onSpanEnd(agentSpan); + await processor.onTraceEnd(trace); + + // _lastOutput is set from the workflow/agent conclude output (last child's output). + const startTraceCall = mockLogger.startTrace.mock.calls[0][0]; + expect(startTraceCall.output).toBe('Final output'); + }); +}); + +describe('Workflow span statusCode propagation', () => { + test('test workflow span statusCode passed to addWorkflowSpan', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + + // Create a workflow span (handoff type maps to workflow nodeType) + const workflow = makeSpan({ + spanId: 'workflow-001', + parentId: 'trace-001', + spanData: { type: 'handoff', from_agent: 'Agent1', to_agent: 'Agent2' } + }); + + // Create a successful child LLM span + const llm = makeSpan({ + spanId: 'llm-001', + parentId: 'workflow-001', + spanData: { + type: 'generation', + model: 'gpt-4', + input: [], + output: 'successful response' + }, + error: null + }); + + await processor.onSpanStart(workflow); + await processor.onSpanStart(llm); + await processor.onSpanEnd(llm); + await processor.onSpanEnd(workflow); + await processor.onTraceEnd(trace); + + // Verify addWorkflowSpan was called (note: statusCode may be 200 by default) + expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1); + const workflowSpanCall = mockLogger.addWorkflowSpan.mock.calls[0][0]; + // Verify statusCode parameter is being passed through (defaults to 200 for success) + expect(workflowSpanCall.statusCode).toBe(200); + }); + + test('test workflow span with direct error has statusCode 500', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + + // Create a workflow span that itself has an error + const workflowWithError = makeSpan({ + spanId: 'workflow-001', + parentId: 'trace-001', + spanData: { type: 'handoff', from_agent: 'Agent1', to_agent: 'Agent2' }, + error: { + message: 'Workflow execution failed', + data: { reason: 'timeout' } + } + }); + + await processor.onSpanStart(workflowWithError); + await processor.onSpanEnd(workflowWithError); + await processor.onTraceEnd(trace); + + // Verify addWorkflowSpan was called with statusCode 500 + expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1); + const workflowSpanCall = mockLogger.addWorkflowSpan.mock.calls[0][0]; + expect(workflowSpanCall.statusCode).toBe(500); + }); + + test('test agent span statusCode passed to addWorkflowSpan', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + + // Create an agent span + const agent = makeSpan({ + spanId: 'agent-001', + parentId: 'trace-001', + spanData: { type: 'agent', name: 'TestAgent' } + }); + + // Create a child LLM span + const llm = makeSpan({ + spanId: 'llm-001', + parentId: 'agent-001', + spanData: { + type: 'generation', + model: 'gpt-4', + input: [], + output: 'test output' + }, + error: null + }); + + await processor.onSpanStart(agent); + await processor.onSpanStart(llm); + await processor.onSpanEnd(llm); + await processor.onSpanEnd(agent); + await processor.onTraceEnd(trace); + + // Verify addWorkflowSpan was called with statusCode parameter + expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1); + const agentSpanCall = mockLogger.addWorkflowSpan.mock.calls[0][0]; + expect(agentSpanCall.statusCode).toBe(200); + }); + + test('test conclude called with statusCode for workflow spans', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + + // Create nested workflow spans to test conclude calls + const outerWorkflow = makeSpan({ + spanId: 'workflow-001', + parentId: 'trace-001', + spanData: { type: 'handoff', from_agent: 'Agent1', to_agent: 'Agent2' } + }); + + const innerWorkflow = makeSpan({ + spanId: 'workflow-002', + parentId: 'workflow-001', + spanData: { type: 'custom', name: 'InnerWorkflow' } + }); + + await processor.onSpanStart(outerWorkflow); + await processor.onSpanStart(innerWorkflow); + await processor.onSpanEnd(innerWorkflow); + await processor.onSpanEnd(outerWorkflow); + await processor.onTraceEnd(trace); + + // Verify conclude was called for the workflow spans + expect(mockLogger.conclude).toHaveBeenCalled(); + // Find calls that pass statusCode + const concludeCalls = mockLogger.conclude.mock.calls; + const callsWithStatusCode = concludeCalls.filter( + (call) => call[0]?.statusCode !== undefined + ); + expect(callsWithStatusCode.length).toBeGreaterThan(0); + }); +}); diff --git a/tests/handlers/openai-agents/node.test.ts b/tests/handlers/openai-agents/node.test.ts new file mode 100644 index 00000000..4a1aad87 --- /dev/null +++ b/tests/handlers/openai-agents/node.test.ts @@ -0,0 +1,133 @@ +import { createNode } from '../../../src/handlers/openai-agents/node'; + +describe('createNode()', () => { + test('test creates node with correct nodeType llm', () => { + const node = createNode({ + nodeType: 'llm', + spanParams: { name: 'GPT Call' }, + runId: 'span-001', + parentRunId: 'trace-001' + }); + + expect(node.nodeType).toBe('llm'); + expect(node.spanParams).toEqual({ name: 'GPT Call' }); + expect(node.runId).toBe('span-001'); + expect(node.parentRunId).toBe('trace-001'); + }); + + test('test creates node with correct nodeType tool', () => { + const node = createNode({ + nodeType: 'tool', + spanParams: { name: 'Search Tool' }, + runId: 'span-002', + parentRunId: 'span-001' + }); + + expect(node.nodeType).toBe('tool'); + }); + + test('test creates node with correct nodeType workflow', () => { + const node = createNode({ + nodeType: 'workflow', + spanParams: {}, + runId: 'span-003', + parentRunId: null + }); + + expect(node.nodeType).toBe('workflow'); + }); + + test('test creates node with correct nodeType agent', () => { + const node = createNode({ + nodeType: 'agent', + spanParams: { name: 'Planning Agent' }, + runId: 'span-004', + parentRunId: 'trace-001' + }); + + expect(node.nodeType).toBe('agent'); + }); + + test('test initializes children as empty array', () => { + const node = createNode({ + nodeType: 'llm', + spanParams: {}, + runId: 'span-001', + parentRunId: null + }); + + expect(Array.isArray(node.children)).toBe(true); + expect(node.children.length).toBe(0); + }); + + test('test preserves all spanParams fields', () => { + const spanParams = { + name: 'Test Span', + input: 'test input', + output: 'test output', + model: 'gpt-4', + metadata: { key: 'value' } + }; + + const node = createNode({ + nodeType: 'llm', + spanParams, + runId: 'span-001', + parentRunId: 'trace-001' + }); + + expect(node.spanParams).toEqual(spanParams); + }); + + test('test children array is mutable', () => { + const node = createNode({ + nodeType: 'agent', + spanParams: {}, + runId: 'span-001', + parentRunId: null + }); + + node.children.push('child-001'); + node.children.push('child-002'); + + expect(node.children).toEqual(['child-001', 'child-002']); + }); + + test('test node has required Node interface properties', () => { + const node = createNode({ + nodeType: 'llm', + spanParams: { name: 'Test' }, + runId: 'span-001', + parentRunId: 'parent-001' + }); + + // Verify all required properties exist + expect('nodeType' in node).toBe(true); + expect('spanParams' in node).toBe(true); + expect('runId' in node).toBe(true); + expect('parentRunId' in node).toBe(true); + expect('children' in node).toBe(true); + }); + + test('test empty spanParams preserved correctly', () => { + const node = createNode({ + nodeType: 'tool', + spanParams: {}, + runId: 'span-001', + parentRunId: 'trace-001' + }); + + expect(Object.keys(node.spanParams).length).toBe(0); + }); + + test('test parentRunId can be null', () => { + const node = createNode({ + nodeType: 'agent', + spanParams: {}, + runId: 'trace-001', + parentRunId: null + }); + + expect(node.parentRunId).toBeNull(); + }); +}); diff --git a/tests/handlers/openai-agents/span-mapping.test.ts b/tests/handlers/openai-agents/span-mapping.test.ts new file mode 100644 index 00000000..a7fe11f2 --- /dev/null +++ b/tests/handlers/openai-agents/span-mapping.test.ts @@ -0,0 +1,194 @@ +import { + mapSpanType, + mapSpanName, + GALILEO_CUSTOM_TYPE +} from '../../../src/handlers/openai-agents/span-mapping'; +import type { NodeType } from '../../../src/handlers/openai-agents/node'; + +describe('mapSpanType', () => { + test('test maps generation to llm', () => { + expect(mapSpanType({ type: 'generation' })).toBe('llm'); + }); + + test('test maps response to llm', () => { + expect(mapSpanType({ type: 'response' })).toBe('llm'); + }); + + test('test maps function to tool', () => { + expect(mapSpanType({ type: 'function' })).toBe('tool'); + }); + + test('test maps guardrail to tool', () => { + expect(mapSpanType({ type: 'guardrail' })).toBe('tool'); + }); + + test('test maps transcription to tool', () => { + expect(mapSpanType({ type: 'transcription' })).toBe('tool'); + }); + + test('test maps speech to tool', () => { + expect(mapSpanType({ type: 'speech' })).toBe('tool'); + }); + + test('test maps speech_group to tool', () => { + expect(mapSpanType({ type: 'speech_group' })).toBe('tool'); + }); + + test('test maps mcp_tools to tool', () => { + expect(mapSpanType({ type: 'mcp_tools' })).toBe('tool'); + }); + + test('test maps agent to agent', () => { + expect(mapSpanType({ type: 'agent' })).toBe('agent'); + }); + + test('test maps handoff to workflow', () => { + expect(mapSpanType({ type: 'handoff' })).toBe('workflow'); + }); + + test('test maps custom to workflow', () => { + expect(mapSpanType({ type: 'custom' })).toBe('workflow'); + }); + + test('test maps galileo_custom sentinel to galileo_custom', () => { + expect(mapSpanType({ type: 'custom', __galileoCustom: true })).toBe( + GALILEO_CUSTOM_TYPE + ); + }); + + test('test maps unknown type to workflow fallback', () => { + expect(mapSpanType({ type: 'unknown_future_type' })).toBe('workflow'); + }); +}); + +describe('mapSpanName', () => { + test('test returns spanData.name when present', () => { + expect(mapSpanName({ type: 'generation', name: 'MySpan' }, 'llm')).toBe( + 'MySpan' + ); + }); + + test('test generation fallback is Generation', () => { + expect(mapSpanName({ type: 'generation' }, 'llm')).toBe('Generation'); + }); + + test('test response fallback is Response', () => { + expect(mapSpanName({ type: 'response' }, 'llm')).toBe('Response'); + }); + + test('test function fallback uses spanData.name or Function', () => { + expect(mapSpanName({ type: 'function', name: 'my_tool' }, 'tool')).toBe( + 'my_tool' + ); + expect(mapSpanName({ type: 'function' }, 'tool')).toBe('Function'); + }); + + test('test guardrail fallback uses spanData.name or Guardrail', () => { + expect( + mapSpanName({ type: 'guardrail', name: 'content_filter' }, 'tool') + ).toBe('content_filter'); + expect(mapSpanName({ type: 'guardrail' }, 'tool')).toBe('Guardrail'); + }); + + test('test agent fallback uses spanData.name or Agent', () => { + expect(mapSpanName({ type: 'agent', name: 'PlannerAgent' }, 'agent')).toBe( + 'PlannerAgent' + ); + expect(mapSpanName({ type: 'agent' }, 'agent')).toBe('Agent'); + }); + + test('test handoff formats from-to arrow', () => { + expect( + mapSpanName( + { type: 'handoff', from_agent: 'AgentA', to_agent: 'AgentB' }, + 'workflow' + ) + ).toBe('Handoff: AgentA → AgentB'); + }); + + test('test handoff fallback when no agents', () => { + expect(mapSpanName({ type: 'handoff' }, 'workflow')).toBe('Handoff'); + }); + + test('test custom fallback is Custom', () => { + expect(mapSpanName({ type: 'custom' }, 'workflow')).toBe('Custom'); + }); + + test('test galileo_custom sentinel fallback is Galileo Custom', () => { + expect(mapSpanName({ type: 'custom' }, GALILEO_CUSTOM_TYPE)).toBe( + 'Galileo Custom' + ); + }); + + test('test transcription fallback is Transcription', () => { + expect(mapSpanName({ type: 'transcription' }, 'tool')).toBe( + 'Transcription' + ); + }); + + test('test speech fallback is Speech', () => { + expect(mapSpanName({ type: 'speech' }, 'tool')).toBe('Speech'); + }); + + test('test speech_group fallback is Speech Group', () => { + expect(mapSpanName({ type: 'speech_group' }, 'tool')).toBe('Speech Group'); + }); + + test('test mcp_tools fallback is MCP Tools', () => { + expect(mapSpanName({ type: 'mcp_tools' }, 'tool')).toBe('MCP Tools'); + }); +}); + +describe('agent span type distinction', () => { + test('test agent maps to agent not workflow', () => { + const result = mapSpanType({ type: 'agent' }); + expect(result).toBe('agent'); + expect(result).not.toBe('workflow'); + }); + + test('test handoff still maps to workflow', () => { + expect(mapSpanType({ type: 'handoff' })).toBe('workflow'); + }); + + test('test custom still maps to workflow', () => { + expect(mapSpanType({ type: 'custom' })).toBe('workflow'); + }); + + test('test galileo_custom sentinel is unaffected', () => { + expect(mapSpanType({ type: 'custom', __galileoCustom: true })).toBe( + GALILEO_CUSTOM_TYPE + ); + }); + + test('test mapSpanType returns NodeType or GALILEO_CUSTOM_TYPE for all known types', () => { + const knownTypes: Array<{ + type: string; + expected: NodeType | typeof GALILEO_CUSTOM_TYPE; + }> = [ + { type: 'generation', expected: 'llm' }, + { type: 'response', expected: 'llm' }, + { type: 'function', expected: 'tool' }, + { type: 'guardrail', expected: 'tool' }, + { type: 'transcription', expected: 'tool' }, + { type: 'speech', expected: 'tool' }, + { type: 'speech_group', expected: 'tool' }, + { type: 'mcp_tools', expected: 'tool' }, + { type: 'agent', expected: 'agent' }, + { type: 'handoff', expected: 'workflow' }, + { type: 'custom', expected: 'workflow' } + ]; + for (const { type, expected } of knownTypes) { + expect(mapSpanType({ type })).toBe(expected); + } + }); + + test('test mapSpanName returns Agent for agent type without name', () => { + expect(mapSpanName({ type: 'agent' }, 'agent')).toBe('Agent'); + }); + + test('test mapSpanName returns spanData.name for agent type with name', () => { + expect(mapSpanName({ type: 'agent', name: 'RouterAgent' }, 'agent')).toBe( + 'RouterAgent' + ); + }); +}); diff --git a/tests/handlers/openai-agents/tracing-processor.test.ts b/tests/handlers/openai-agents/tracing-processor.test.ts new file mode 100644 index 00000000..6e718531 --- /dev/null +++ b/tests/handlers/openai-agents/tracing-processor.test.ts @@ -0,0 +1,1702 @@ +import { GalileoTracingProcessor } from '../../../src/handlers/openai-agents'; +import type { + AgentTrace, + AgentSpan +} from '../../../src/handlers/openai-agents'; + +// Helper to build a mock AgentTrace +function makeTrace(overrides: Partial = {}): AgentTrace { + return { + traceId: 'trace-001', + name: 'Test Agent Run', + metadata: {}, + startedAt: new Date('2024-01-01T00:00:00Z').toISOString(), + endedAt: new Date('2024-01-01T00:00:10Z').toISOString(), + ...overrides + }; +} + +// Helper to build a mock AgentSpan +function makeSpan( + overrides: Partial & { spanData: AgentSpan['spanData'] } +): AgentSpan { + return { + spanId: 'span-001', + traceId: 'trace-001', + parentId: 'trace-001', + startedAt: new Date('2024-01-01T00:00:01Z').toISOString(), + endedAt: new Date('2024-01-01T00:00:05Z').toISOString(), + error: null, + ...overrides + }; +} + +// Create a mock GalileoLogger for testing +function createMockLogger() { + return { + startTrace: jest.fn().mockReturnValue({}), + addLlmSpan: jest.fn().mockReturnValue({}), + addToolSpan: jest.fn().mockReturnValue({}), + addWorkflowSpan: jest.fn().mockReturnValue({}), + addAgentSpan: jest.fn().mockReturnValue({}), + conclude: jest.fn().mockReturnValue(undefined), + flush: jest.fn().mockResolvedValue(undefined) + }; +} + +describe('GalileoTracingProcessor lifecycle', () => { + test('test onTraceStart creates root node', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + // No external observable yet — verify no calls to logger + expect(mockLogger.startTrace).not.toHaveBeenCalled(); + }); + + test('test full trace lifecycle calls startTrace', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + await processor.onTraceEnd(trace); + + expect(mockLogger.startTrace).toHaveBeenCalledTimes(1); + const startTraceCall = mockLogger.startTrace.mock.calls[0][0]; + expect(startTraceCall.name).toBe('Test Agent Run'); + }); + + test('test full trace with llm span calls addLlmSpan', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + const span = makeSpan({ + spanId: 'span-gen-001', + parentId: 'trace-001', + spanData: { + type: 'generation', + model: 'gpt-4o', + input: [{ role: 'user', content: 'hello' }], + output: [{ role: 'assistant', content: 'hi' }], + usage: { input_tokens: 5, output_tokens: 3 } + } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + expect(mockLogger.addLlmSpan).toHaveBeenCalledTimes(1); + const llmCall = mockLogger.addLlmSpan.mock.calls[0][0]; + expect(llmCall.model).toBe('gpt-4o'); + expect(llmCall.numInputTokens).toBe(5); + expect(llmCall.numOutputTokens).toBe(3); + }); + + test('test full trace with tool span calls addToolSpan', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + const span = makeSpan({ + spanId: 'span-func-001', + parentId: 'trace-001', + spanData: { + type: 'function', + name: 'search_tool', + input: '{"query":"hello"}', + output: 'results' + } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + expect(mockLogger.addToolSpan).toHaveBeenCalledTimes(1); + const toolCall = mockLogger.addToolSpan.mock.calls[0][0]; + expect(toolCall.name).toBe('search_tool'); + }); + + test('test full trace with agent span calls addWorkflowSpan and conclude', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + const span = makeSpan({ + spanId: 'span-agent-001', + parentId: 'trace-001', + spanData: { + type: 'agent', + name: 'PlannerAgent' + } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1); + const agentCall = mockLogger.addWorkflowSpan.mock.calls[0][0]; + expect(agentCall.name).toBe('PlannerAgent'); + // conclude is called for agent spans + expect(mockLogger.conclude).toHaveBeenCalled(); + }); + + test('test error span sets status 500 in metadata', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + const span = makeSpan({ + spanId: 'span-err-001', + parentId: 'trace-001', + error: { message: 'Something went wrong', data: { code: 'ERR_001' } }, + spanData: { type: 'function', name: 'failing_tool', input: 'x' } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + expect(mockLogger.addToolSpan).toHaveBeenCalledTimes(1); + const toolCall = mockLogger.addToolSpan.mock.calls[0][0]; + expect(toolCall.statusCode).toBe(500); + expect(toolCall.metadata.error_message).toBe('Something went wrong'); + expect(toolCall.metadata.error_type).toBe('SpanError'); + }); + + test('test flushOnTraceEnd true calls flush', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, true); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + await processor.onTraceEnd(trace); + + expect(mockLogger.flush).toHaveBeenCalledTimes(1); + }); + + test('test flushOnTraceEnd false does not call flush', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + await processor.onTraceEnd(trace); + + expect(mockLogger.flush).not.toHaveBeenCalled(); + }); + + test('test shutdown calls flush', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + + await processor.shutdown(); + + expect(mockLogger.flush).toHaveBeenCalledTimes(1); + }); + + test('test forceFlush calls flush', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + + await processor.forceFlush(); + + expect(mockLogger.flush).toHaveBeenCalledTimes(1); + }); + + test('test nested agent span is logged as child', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const agentSpan = makeSpan({ + spanId: 'span-agent-outer', + parentId: 'trace-001', + spanData: { type: 'agent', name: 'OuterAgent' } + }); + + const llmSpan = makeSpan({ + spanId: 'span-llm-inner', + parentId: 'span-agent-outer', + spanData: { + type: 'generation', + model: 'gpt-4o', + usage: { input_tokens: 2, output_tokens: 1 } + } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(agentSpan); + await processor.onSpanStart(llmSpan); + await processor.onSpanEnd(llmSpan); + await processor.onSpanEnd(agentSpan); + await processor.onTraceEnd(trace); + + expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1); + expect(mockLogger.addLlmSpan).toHaveBeenCalledTimes(1); + // conclude called for agent span + expect(mockLogger.conclude).toHaveBeenCalled(); + }); + + test('test response span extracts embedded tool calls', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + const span = makeSpan({ + spanId: 'span-resp-001', + parentId: 'trace-001', + spanData: { + type: 'response', + _input: 'test input', + _response: { + model: 'gpt-4o', + usage: { input_tokens: 10, output_tokens: 5 }, + output: [ + { + type: 'web_search_call', + id: 'ws_001', + action: { query: 'latest news' } + } + ] + } + } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + expect(mockLogger.addLlmSpan).toHaveBeenCalledTimes(1); + const llmCall = mockLogger.addLlmSpan.mock.calls[0][0]; + expect(Array.isArray(llmCall.tools)).toBe(true); + expect(llmCall.tools.length).toBe(1); + expect(llmCall.tools[0].type).toBe('function'); + }); + + test('test metadata values are stringified', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace({ + metadata: { run_id: 'abc123', count: 5 as unknown as string } + }); + + await processor.onTraceStart(trace); + await processor.onTraceEnd(trace); + + expect(mockLogger.startTrace).toHaveBeenCalledTimes(1); + const startCall = mockLogger.startTrace.mock.calls[0][0]; + // metadata values should all be strings + if (startCall.metadata) { + for (const v of Object.values(startCall.metadata)) { + expect(typeof v).toBe('string'); + } + } + }); + + test('test addGalileoCustomSpan invokes callback and returns its value', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + void processor; + + const galileoSpan = { type: 'tool' as const, input: 'query' }; + const result = await GalileoTracingProcessor.addGalileoCustomSpan( + galileoSpan, + async () => 'callback-result', + { name: 'My Custom Span' } + ); + + expect(result).toBe('callback-result'); + }); + + test('test addGalileoCustomSpan fallback calls callback when SDK unavailable', async () => { + const callbackFn = jest.fn().mockResolvedValue('fallback-result'); + const galileoSpan = { type: 'tool' as const, input: 'query' }; + + // The SDK is not installed in the test environment; the fallback path runs. + const result = await GalileoTracingProcessor.addGalileoCustomSpan( + galileoSpan, + callbackFn, + { name: 'Fallback Span' } + ); + + expect(callbackFn).toHaveBeenCalledTimes(1); + expect(result).toBe('fallback-result'); + }); +}); + +describe('Span tree construction edge cases', () => { + test('test multiple children linked to single parent', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + + // Create parent span + const parentSpan = makeSpan({ + spanId: 'parent-001', + parentId: 'trace-001', + spanData: { type: 'agent', name: 'Parent Agent' } + }); + await processor.onSpanStart(parentSpan); + + // Create multiple child spans + const child1 = makeSpan({ + spanId: 'child-001', + parentId: 'parent-001', + spanData: { + type: 'generation', + model: 'gpt-4', + input: [], + output: 'result 1' + } + }); + const child2 = makeSpan({ + spanId: 'child-002', + parentId: 'parent-001', + spanData: { + type: 'function', + name: 'search', + input: 'query', + output: 'result 2' + } + }); + + await processor.onSpanStart(child1); + await processor.onSpanStart(child2); + await processor.onSpanEnd(child1); + await processor.onSpanEnd(child2); + await processor.onSpanEnd(parentSpan); + await processor.onTraceEnd(trace); + + // Verify both children were logged + expect(mockLogger.addLlmSpan).toHaveBeenCalledTimes(1); + expect(mockLogger.addToolSpan).toHaveBeenCalledTimes(1); + // conclude is called for all non-root workflow/agent spans + expect(mockLogger.conclude).toHaveBeenCalled(); + }); + + test('test deeply nested spans (3 levels)', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + + // Level 1: Agent + const agent = makeSpan({ + spanId: 'agent-001', + parentId: 'trace-001', + spanData: { type: 'agent' } + }); + + // Level 2: LLM under agent + const llm = makeSpan({ + spanId: 'llm-001', + parentId: 'agent-001', + spanData: { type: 'generation', model: 'gpt-4' } + }); + + // Level 3: Tool under LLM + const tool = makeSpan({ + spanId: 'tool-001', + parentId: 'llm-001', + spanData: { type: 'function', name: 'calc' } + }); + + await processor.onSpanStart(agent); + await processor.onSpanStart(llm); + await processor.onSpanStart(tool); + await processor.onSpanEnd(tool); + await processor.onSpanEnd(llm); + await processor.onSpanEnd(agent); + await processor.onTraceEnd(trace); + + // All should be logged + expect(mockLogger.startTrace).toHaveBeenCalledTimes(1); + expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1); // agent (uses addWorkflowSpan) + expect(mockLogger.addLlmSpan).toHaveBeenCalledTimes(1); + expect(mockLogger.addToolSpan).toHaveBeenCalledTimes(1); + }); + + test('test span with no parentId defaults to trace', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + + // Span with parentId undefined (should default to traceId) + const span = makeSpan({ + spanId: 'span-001', + parentId: undefined, + spanData: { type: 'function', name: 'tool' } + }); + + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + expect(mockLogger.addToolSpan).toHaveBeenCalledTimes(1); + }); + + test('test span parent link defaults to trace when parent not found', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + + // Create a parent agent first + const parentAgent = makeSpan({ + spanId: 'parent-001', + parentId: 'trace-001', + spanData: { type: 'agent' } + }); + await processor.onSpanStart(parentAgent); + + // Create a span with explicit parentId pointing to parent + const span = makeSpan({ + spanId: 'child-001', + parentId: 'parent-001', + spanData: { + type: 'function', + name: 'tool', + input: 'test', + output: 'result' + } + }); + + // Should not throw + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onSpanEnd(parentAgent); + await processor.onTraceEnd(trace); + + // Span is logged correctly + expect(mockLogger.addToolSpan).toHaveBeenCalledTimes(1); + }); +}); + +describe('Response span data merging', () => { + test('test response span merges embedded tools at end', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const span = makeSpan({ + spanId: 'response-001', + parentId: 'trace-001', + spanData: { + type: 'response', + _input: [{ role: 'user' }], + _response: { + model: 'gpt-4o', + output: [ + { + type: 'code_interpreter_call', + code: 'print("hello")', + outputs: [{ logs: 'hello' }], + id: 'call-1', + status: 'completed' + } + ] + } + } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + // addLlmSpan should be called for response type + expect(mockLogger.addLlmSpan).toHaveBeenCalledTimes(1); + const llmCall = mockLogger.addLlmSpan.mock.calls[0][0]; + expect(Array.isArray(llmCall.tools)).toBe(true); + expect(llmCall.tools[0].type).toBe('function'); + }); + + test('test _responseObject removed from final params', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const span = makeSpan({ + spanId: 'response-001', + parentId: 'trace-001', + spanData: { + type: 'response', + _response: { output: [] } + } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + const llmCall = mockLogger.addLlmSpan.mock.calls[0][0]; + // _responseObject should not be in the final logged data + expect(llmCall._responseObject).toBeUndefined(); + }); + + test('test generation span updates usage on end', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const span = makeSpan({ + spanId: 'gen-001', + parentId: 'trace-001', + spanData: { + type: 'generation', + model: 'gpt-4', + input: [], + output: [], + usage: { input_tokens: 10, output_tokens: 5 } + } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + const llmCall = mockLogger.addLlmSpan.mock.calls[0][0]; + expect(llmCall.numInputTokens).toBe(10); + expect(llmCall.numOutputTokens).toBe(5); + }); + + test('test response span with response-level error sets statusCode and error_details in metadata', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const responseError = { status_code: 500, message: 'Server error' }; + const span = makeSpan({ + spanId: 'response-err-001', + parentId: 'trace-001', + spanData: { + type: 'response', + _input: 'hello', + _response: { + model: 'gpt-4o', + usage: { input_tokens: 5, output_tokens: 0 }, + output: [], + error: responseError + } + } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + expect(mockLogger.addLlmSpan).toHaveBeenCalledTimes(1); + const llmCall = mockLogger.addLlmSpan.mock.calls[0][0]; + expect(llmCall.statusCode).toBe(500); + const meta = llmCall.metadata as Record; + expect(meta.error_details).toBe(JSON.stringify(responseError)); + }); + + test('test response span with no _responseObject handles gracefully', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const span = makeSpan({ + spanId: 'response-001', + parentId: 'trace-001', + spanData: { type: 'response' } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + // Should not throw + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + expect(mockLogger.addLlmSpan).toHaveBeenCalledTimes(1); + }); +}); + +describe('Error handling and recovery', () => { + test('test span error with message only', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const span = makeSpan({ + spanId: 'span-001', + parentId: 'trace-001', + error: { message: 'Test error' }, + spanData: { type: 'function', name: 'tool' } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + const toolCall = mockLogger.addToolSpan.mock.calls[0][0]; + expect(toolCall.statusCode).toBe(500); + const meta = toolCall.metadata as Record; + expect(meta.error_message).toBe('Test error'); + expect(meta.error_type).toBe('SpanError'); + }); + + test('test span error with type field uses error.type value', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const span = makeSpan({ + spanId: 'span-001', + parentId: 'trace-001', + error: { message: 'Agent failed', type: 'AgentError' }, + spanData: { type: 'function', name: 'tool' } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + const toolCall = mockLogger.addToolSpan.mock.calls[0][0]; + const meta = toolCall.metadata as Record; + expect(meta.error_type).toBe('AgentError'); + }); + + test('test span error without type field falls back to SpanError', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const span = makeSpan({ + spanId: 'span-001', + parentId: 'trace-001', + error: { message: 'Something broke' }, + spanData: { type: 'function', name: 'tool' } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + const toolCall = mockLogger.addToolSpan.mock.calls[0][0]; + const meta = toolCall.metadata as Record; + expect(meta.error_type).toBe('SpanError'); + }); + + test('test span error with message and data', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const errorData = { code: 'TOOL_ERROR', details: 'Connection failed' }; + const span = makeSpan({ + spanId: 'span-001', + parentId: 'trace-001', + error: { message: 'Tool failed', data: errorData }, + spanData: { + type: 'function', + name: 'failing_tool', + input: '', + output: undefined + } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + expect(mockLogger.addToolSpan).toHaveBeenCalledTimes(1); + const toolCall = mockLogger.addToolSpan.mock.calls[0][0]; + const meta = toolCall.metadata as Record; + expect(meta.error_details).toBe(JSON.stringify(errorData)); + }); + + test('test onSpanEnd without onSpanStart handled gracefully', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const span = makeSpan({ + spanId: 'orphan-span', + parentId: 'trace-001', + spanData: { type: 'tool' } + }); + + await processor.onTraceStart(trace); + // Skip onSpanStart + // Should not throw + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + expect(mockLogger.addToolSpan).not.toHaveBeenCalled(); + }); + + test('test error metadata merged with existing metadata', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const span = makeSpan({ + spanId: 'span-001', + parentId: 'trace-001', + spanData: { + type: 'agent', + data: { user_id: '123' } // Will go to metadata + }, + error: { message: 'Error occurred' } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + const agentCall = mockLogger.addWorkflowSpan.mock.calls[0][0]; + const meta = agentCall.metadata as Record; + expect(meta.error_message).toBe('Error occurred'); + }); + + test('test workflow span with error uses serialized error as conclude output', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const agentSpan = makeSpan({ + spanId: 'span-agent', + parentId: 'trace-001', + spanData: { type: 'agent', name: 'MyAgent' }, + error: { message: 'Agent failed', type: 'AgentError', data: { code: 42 } } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(agentSpan); + await processor.onSpanEnd(agentSpan); + await processor.onTraceEnd(trace); + + const concludeCall = mockLogger.conclude.mock.calls[0][0]; + expect(concludeCall.output).toBe( + JSON.stringify({ + message: 'Agent failed', + type: 'AgentError', + data: { code: 42 } + }) + ); + expect(concludeCall.statusCode).toBe(500); + }); + + test('test error on non-existent span ignored gracefully', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const span = makeSpan({ + spanId: 'never-started-span', + parentId: 'trace-001', + error: { message: 'This should be ignored' }, + spanData: { type: 'tool' } + }); + + await processor.onTraceStart(trace); + // Skip onSpanStart - span doesn't exist in processor + // Should not throw + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + expect(mockLogger.addToolSpan).not.toHaveBeenCalled(); + }); +}); + +describe('Date and duration handling', () => { + test('test valid startedAt and endedAt calculate durationNs', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + + const startTime = new Date('2024-01-01T00:00:00Z'); + const endTime = new Date('2024-01-01T00:00:05Z'); + + const trace = makeTrace({ + startedAt: startTime.toISOString(), + endedAt: endTime.toISOString() + }); + + await processor.onTraceStart(trace); + await processor.onTraceEnd(trace); + + const startTraceCall = mockLogger.startTrace.mock.calls[0][0]; + // 5 seconds = 5,000,000,000 nanoseconds + expect(startTraceCall.durationNs).toBeGreaterThan(0); + expect(startTraceCall.durationNs).toBeCloseTo(5_000_000_000, -4); + }); + + test('test missing startedAt sets durationNs to 0', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace({ + // eslint-disable-next-line @typescript-eslint/no-explicit-any + startedAt: undefined as any, // missing + endedAt: new Date().toISOString() + }); + + await processor.onTraceStart(trace); + await processor.onTraceEnd(trace); + + const startTraceCall = mockLogger.startTrace.mock.calls[0][0]; + expect(startTraceCall.durationNs).toBe(0); + }); + + test('test missing endedAt uses current time', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const startTime = new Date('2024-01-01T00:00:00Z'); + + const trace = makeTrace({ + startedAt: startTime.toISOString(), + // eslint-disable-next-line @typescript-eslint/no-explicit-any + endedAt: undefined as any // missing + }); + + await processor.onTraceStart(trace); + await processor.onTraceEnd(trace); + + const startTraceCall = mockLogger.startTrace.mock.calls[0][0]; + // Should calculate using current time, so durationNs >= 0 + expect(startTraceCall.durationNs).toBeGreaterThanOrEqual(0); + }); +}); + +describe('Metadata handling and serialization', () => { + test('test non-string metadata values stringified at trace start', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + + const trace = makeTrace({ + metadata: { + user_id: '123', + request_count: 5, + flags: true, + config: { nested: 'value' } + } + }); + + await processor.onTraceStart(trace); + await processor.onTraceEnd(trace); + + const startTraceCall = mockLogger.startTrace.mock.calls[0][0]; + const meta = startTraceCall.metadata as Record; + expect(meta.request_count).toBe('5'); + expect(meta.flags).toBe('true'); + expect(JSON.parse(meta.config)).toEqual({ nested: 'value' }); + }); + + test('test unicode characters preserved in metadata', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + + const trace = makeTrace({ + metadata: { message: 'Hello 世界 🌍' } + }); + + await processor.onTraceStart(trace); + await processor.onTraceEnd(trace); + + const startTraceCall = mockLogger.startTrace.mock.calls[0][0]; + const meta = startTraceCall.metadata as Record; + expect(meta.message).toBe('Hello 世界 🌍'); + }); + + test('test error overwrites specific metadata keys', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const span = makeSpan({ + spanId: 'span-001', + parentId: 'trace-001', + spanData: { + type: 'function', + name: 'tool', + input: '', + output: undefined + }, + error: { message: 'Tool error', data: { code: 'ECONNREFUSED' } } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + expect(mockLogger.addToolSpan).toHaveBeenCalledTimes(1); + const toolCall = mockLogger.addToolSpan.mock.calls[0][0]; + const meta = toolCall.metadata as Record; + expect(meta.error_message).toBe('Tool error'); + expect(meta.error_type).toBe('SpanError'); + }); +}); + +describe('Agent span emission', () => { + test('test agent span uses addWorkflowSpan not addAgentSpan', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const span = makeSpan({ + spanId: 'agent-span-001', + parentId: 'trace-001', + spanData: { type: 'agent', name: 'TestAgent' } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1); + expect(mockLogger.addAgentSpan).not.toHaveBeenCalled(); + }); + + test('test agent span passes name correctly', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const span = makeSpan({ + spanId: 'agent-span-001', + parentId: 'trace-001', + spanData: { type: 'agent', name: 'RouterAgent', output: 'routed' } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + const agentCall = mockLogger.addWorkflowSpan.mock.calls[0][0]; + expect(agentCall.name).toBe('RouterAgent'); + }); + + test('test agent span conclude is called after children', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const agentSpan = makeSpan({ + spanId: 'agent-001', + parentId: 'trace-001', + spanData: { type: 'agent' } + }); + + const toolSpan = makeSpan({ + spanId: 'tool-001', + parentId: 'agent-001', + spanData: { type: 'function', name: 'my_tool' } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(agentSpan); + await processor.onSpanStart(toolSpan); + await processor.onSpanEnd(toolSpan); + await processor.onSpanEnd(agentSpan); + await processor.onTraceEnd(trace); + + expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1); + expect(mockLogger.addToolSpan).toHaveBeenCalledTimes(1); + expect(mockLogger.conclude).toHaveBeenCalled(); + }); + + test('test agent span conclude receives last child output as fallback', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const agentSpan = makeSpan({ + spanId: 'agent-001', + parentId: 'trace-001', + spanData: { type: 'agent', name: 'MyAgent' } + }); + + const llmSpan = makeSpan({ + spanId: 'llm-001', + parentId: 'agent-001', + spanData: { + type: 'generation', + model: 'gpt-4o', + output: 'Final answer from LLM' + } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(agentSpan); + await processor.onSpanStart(llmSpan); + await processor.onSpanEnd(llmSpan); + await processor.onSpanEnd(agentSpan); + await processor.onTraceEnd(trace); + + // addWorkflowSpan is called before children — output is undefined at that point + const agentCall = mockLogger.addWorkflowSpan.mock.calls[0][0]; + expect(agentCall.output).toBeUndefined(); + + // conclude for the agent span (first conclude call) should carry the LLM child's output + const concludeCall = mockLogger.conclude.mock.calls[0][0]; + expect(concludeCall.output).toBe('Final answer from LLM'); + }); + + test('test agent span conclude receives last of multiple children outputs', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const agentSpan = makeSpan({ + spanId: 'agent-001', + parentId: 'trace-001', + spanData: { type: 'agent', name: 'MyAgent' } + }); + + const toolSpan = makeSpan({ + spanId: 'tool-001', + parentId: 'agent-001', + spanData: { type: 'function', name: 'my_tool', output: 'Tool result' } + }); + + const llmSpan = makeSpan({ + spanId: 'llm-001', + parentId: 'agent-001', + spanData: { + type: 'generation', + model: 'gpt-4o', + output: 'LLM final response' + } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(agentSpan); + await processor.onSpanStart(toolSpan); + await processor.onSpanEnd(toolSpan); + await processor.onSpanStart(llmSpan); + await processor.onSpanEnd(llmSpan); + await processor.onSpanEnd(agentSpan); + await processor.onTraceEnd(trace); + + // The conclude for the agent span should use the last child (LLM), not the tool + const concludeCall = mockLogger.conclude.mock.calls[0][0]; + expect(concludeCall.output).toBe('LLM final response'); + }); + + test('test agent span conclude uses undefined when no children have output', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const agentSpan = makeSpan({ + spanId: 'agent-001', + parentId: 'trace-001', + spanData: { type: 'agent', name: 'EmptyAgent' } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(agentSpan); + await processor.onSpanEnd(agentSpan); + await processor.onTraceEnd(trace); + + const concludeCall = mockLogger.conclude.mock.calls[0][0]; + expect(concludeCall.output).toBeUndefined(); + }); + + test('test agent span error passes statusCode 500 as direct field', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const span = makeSpan({ + spanId: 'agent-err-001', + parentId: 'trace-001', + error: { message: 'Agent failed' }, + spanData: { type: 'agent', name: 'FailingAgent' } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + const agentCall = mockLogger.addWorkflowSpan.mock.calls[0][0]; + // statusCode is passed as a direct field, not folded into metadata + expect(agentCall.statusCode).toBe(500); + const meta = agentCall.metadata as Record; + expect(meta.error_message).toBe('Agent failed'); + expect(meta.status_code).toBeUndefined(); + }); + + test('test agent span without error passes statusCode 200', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const span = makeSpan({ + spanId: 'agent-ok-001', + parentId: 'trace-001', + spanData: { type: 'agent', name: 'HappyAgent' } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + const agentCall = mockLogger.addWorkflowSpan.mock.calls[0][0]; + expect(agentCall.statusCode).toBe(200); + const meta = agentCall.metadata as Record; + expect(meta.status_code).toBeUndefined(); + }); +}); + +describe('Span hierarchy correctness', () => { + test('test trace with agent child maintains correct parent-child order', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const agentSpan = makeSpan({ + spanId: 'agent-001', + parentId: 'trace-001', + spanData: { type: 'agent', name: 'RootAgent' } + }); + + const llmSpan = makeSpan({ + spanId: 'llm-001', + parentId: 'agent-001', + spanData: { type: 'generation', model: 'gpt-4o' } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(agentSpan); + await processor.onSpanStart(llmSpan); + await processor.onSpanEnd(llmSpan); + await processor.onSpanEnd(agentSpan); + await processor.onTraceEnd(trace); + + // startTrace is called first, then addWorkflowSpan (agent), then addLlmSpan, then conclude + const callOrder = mockLogger.startTrace.mock.invocationCallOrder[0]; + const agentOrder = mockLogger.addWorkflowSpan.mock.invocationCallOrder[0]; + const llmOrder = mockLogger.addLlmSpan.mock.invocationCallOrder[0]; + const concludeOrder = mockLogger.conclude.mock.invocationCallOrder[0]; + + expect(callOrder).toBeLessThan(agentOrder); + expect(agentOrder).toBeLessThan(llmOrder); + expect(llmOrder).toBeLessThan(concludeOrder); + }); + + test('test workflow span type still uses addWorkflowSpan', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const handoffSpan = makeSpan({ + spanId: 'handoff-001', + parentId: 'trace-001', + spanData: { type: 'handoff', from_agent: 'A', to_agent: 'B' } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(handoffSpan); + await processor.onSpanEnd(handoffSpan); + await processor.onTraceEnd(trace); + + expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1); + expect(mockLogger.addAgentSpan).not.toHaveBeenCalled(); + }); + + test('test agent and workflow spans both call conclude', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const agentSpan = makeSpan({ + spanId: 'agent-001', + parentId: 'trace-001', + spanData: { type: 'agent' } + }); + + const handoffSpan = makeSpan({ + spanId: 'handoff-001', + parentId: 'agent-001', + spanData: { type: 'handoff' } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(agentSpan); + await processor.onSpanStart(handoffSpan); + await processor.onSpanEnd(handoffSpan); + await processor.onSpanEnd(agentSpan); + await processor.onTraceEnd(trace); + + // conclude is called 3 times: once for handoff (workflow), once for agent, once for concludeAll in onTraceEnd + expect(mockLogger.conclude).toHaveBeenCalledTimes(3); + }); + + test('test handoff span refreshes to_agent at onSpanEnd (late binding)', async () => { + // In the OpenAI Agents SDK, to_agent is set on handoffSpan.spanData AFTER span.start() fires + // (inside withHandoffSpan's fn callback). So onSpanStart sees to_agent = undefined. + // onSpanEnd must re-extract to capture the final populated to_agent value. + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const agentSpan = makeSpan({ + spanId: 'agent-001', + parentId: 'trace-001', + spanData: { type: 'agent', name: 'TriageAgent' } + }); + + // Simulate SDK behaviour: to_agent is absent at start, present at end + const handoffSpanData: AgentSpan['spanData'] = { + type: 'handoff', + from_agent: 'TriageAgent' + // to_agent not yet set + }; + const handoffSpan = makeSpan({ + spanId: 'handoff-001', + parentId: 'agent-001', + spanData: handoffSpanData + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(agentSpan); + await processor.onSpanStart(handoffSpan); + + // Simulate SDK setting to_agent after start + handoffSpanData.to_agent = 'WeatherAgent'; + + await processor.onSpanEnd(handoffSpan); + await processor.onSpanEnd(agentSpan); + await processor.onTraceEnd(trace); + + // The handoff workflow span should receive the JSON dict output (not empty string) + const wfCall = mockLogger.addWorkflowSpan.mock.calls.find( + (c: [Record]) => + c[0].name === 'Handoff: TriageAgent → WeatherAgent' + ); + expect(wfCall).toBeDefined(); + expect(wfCall?.[0].output).toBe('{"to_agent":"WeatherAgent"}'); + + // The agent conclude should also get the JSON dict via last-child fallback + const concludeCalls = mockLogger.conclude.mock.calls as [ + Record + ][]; + const agentConclude = concludeCalls.find( + (c) => c[0].output === '{"to_agent":"WeatherAgent"}' + ); + expect(agentConclude).toBeDefined(); + }); +}); + +describe('_firstInput population (trace-level input handling)', () => { + test('captures first input from LLM span', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + + // LLM span with input + const llm = makeSpan({ + spanId: 'llm-001', + parentId: 'trace-001', + spanData: { + type: 'generation', + model: 'gpt-4', + input: 'What is the weather in NYC?', + output: 'It is sunny...' + } + }); + + await processor.onSpanStart(llm); + await processor.onSpanEnd(llm); + await processor.onTraceEnd(trace); + + // Verify startTrace was called with the LLM input + const startTraceCall = mockLogger.startTrace.mock.calls[0][0]; + expect(startTraceCall.input).toBe('What is the weather in NYC?'); + }); + + test('captures first input from tool span if LLM input unavailable', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + + // Tool span (with input, no LLM) + const tool = makeSpan({ + spanId: 'tool-001', + parentId: 'trace-001', + spanData: { + type: 'function', + name: 'search', + input: 'NYC weather forecast', + output: 'Sunny, 72F' + } + }); + + await processor.onSpanStart(tool); + await processor.onSpanEnd(tool); + await processor.onTraceEnd(trace); + + const startTraceCall = mockLogger.startTrace.mock.calls[0][0]; + expect(startTraceCall.input).toBe('NYC weather forecast'); + }); + + test('skips empty or null inputs, uses first meaningful one', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + + // First LLM with empty input + const llm1 = makeSpan({ + spanId: 'llm-001', + parentId: 'trace-001', + spanData: { + type: 'generation', + model: 'gpt-4', + input: '', + output: 'response' + } + }); + + // Second LLM with actual input + const llm2 = makeSpan({ + spanId: 'llm-002', + parentId: 'trace-001', + spanData: { + type: 'generation', + model: 'gpt-4', + input: 'Real question', + output: 'Real answer' + } + }); + + await processor.onSpanStart(llm1); + await processor.onSpanEnd(llm1); + await processor.onSpanStart(llm2); + await processor.onSpanEnd(llm2); + await processor.onTraceEnd(trace); + + // Should use input from llm2, not llm1 + const startTraceCall = mockLogger.startTrace.mock.calls[0][0]; + expect(startTraceCall.input).toBe('Real question'); + }); + + test('falls back to trace name if no meaningful input captured', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace({ name: 'Agent Workflow' }); + + await processor.onTraceStart(trace); + await processor.onTraceEnd(trace); // No spans at all + + const startTraceCall = mockLogger.startTrace.mock.calls[0][0]; + // Should fall back to trace name + expect(startTraceCall.input).toBe('Agent Workflow'); + }); + + test('only captures input from first meaningful span, ignores later ones', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + await processor.onTraceStart(trace); + + const llm1 = makeSpan({ + spanId: 'llm-001', + parentId: 'trace-001', + spanData: { + type: 'generation', + model: 'gpt-4', + input: 'First query', + output: 'First answer' + } + }); + + const llm2 = makeSpan({ + spanId: 'llm-002', + parentId: 'trace-001', + spanData: { + type: 'generation', + model: 'gpt-4', + input: 'Second query', + output: 'Second answer' + } + }); + + await processor.onSpanStart(llm1); + await processor.onSpanEnd(llm1); + await processor.onSpanStart(llm2); + await processor.onSpanEnd(llm2); + await processor.onTraceEnd(trace); + + const startTraceCall = mockLogger.startTrace.mock.calls[0][0]; + // Should use first input, not second + expect(startTraceCall.input).toBe('First query'); + }); +}); + +describe('GalileoCustomSpan integration via onSpanStart/onSpanEnd', () => { + // Simulate the spanData shape that withCustomSpan produces: + // the SDK spreads options.data fields onto the top level of spanData. + function makeCustomSpan( + galileoSpan: Record, + overrides: Partial = {} + ): AgentSpan { + return makeSpan({ + spanId: 'custom-001', + parentId: 'trace-001', + spanData: { + type: 'custom', + __galileoCustom: true, + _galileoSpan: galileoSpan, + name: (galileoSpan.name as string | undefined) ?? 'Galileo Custom' + }, + ...overrides + }); + } + + test('test custom tool span calls addToolSpan', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const galileoSpan = { + type: 'tool', + input: 'my input', + output: 'my output' + }; + const span = makeCustomSpan(galileoSpan); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + expect(mockLogger.addToolSpan).toHaveBeenCalledTimes(1); + const call = mockLogger.addToolSpan.mock.calls[0][0]; + expect(call.input).toBe('my input'); + expect(call.output).toBe('my output'); + }); + + test('test custom workflow span calls addWorkflowSpan', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const galileoSpan = { type: 'workflow', input: 'wf in', output: 'wf out' }; + const span = makeCustomSpan(galileoSpan); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + // root is logged via startTrace, not addWorkflowSpan; custom workflow span = 1 call + expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1); + const customCall = mockLogger.addWorkflowSpan.mock.calls[0][0]; + expect(customCall.input).toBe('wf in'); + }); + + test('test output mutation inside callback is captured at onSpanEnd', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + // galileoSpan starts with no output — simulates a user who will set it later + const galileoSpan: Record = { + type: 'tool', + input: 'query', + output: undefined + }; + const span = makeCustomSpan(galileoSpan); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + + // Simulate the user mutating galileoSpan.output inside the callback before it returns + galileoSpan.output = 'result after work'; + + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + // Re-extraction at onSpanEnd should have picked up the mutation + const call = mockLogger.addToolSpan.mock.calls[0][0]; + expect(call.output).toBe('result after work'); + }); + + test('test custom span with metadata and tags', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const galileoSpan = { + type: 'tool', + input: 'in', + metadata: { source: 'db' }, + tags: ['tag-a'], + statusCode: 201 + }; + const span = makeCustomSpan(galileoSpan); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + const call = mockLogger.addToolSpan.mock.calls[0][0]; + expect(call.metadata).toEqual({ source: 'db' }); + expect(call.tags).toEqual(['tag-a']); + expect(call.statusCode).toBe(201); + }); + + test('test custom span with unknown type falls back to addWorkflowSpan', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const galileoSpan = { type: 'future_type', input: 'in' }; + const span = makeCustomSpan(galileoSpan); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + // root is logged via startTrace; unknown type custom span → 1 addWorkflowSpan call + expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1); + expect(mockLogger.addToolSpan).not.toHaveBeenCalled(); + }); +}); + +describe('Trace-level statusCode propagation (_lastStatusCode)', () => { + test('test concludeAll receives statusCode from errored agent span', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const span = makeSpan({ + spanId: 'agent-err-001', + parentId: 'trace-001', + error: { message: 'Agent crashed' }, + spanData: { type: 'agent', name: 'CrashingAgent' } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + // The concludeAll call is the last conclude call + const concludeCalls = mockLogger.conclude.mock.calls as [ + Record + ][]; + const concludeAll = concludeCalls.find((c) => c[0].concludeAll === true); + expect(concludeAll).toBeDefined(); + expect(concludeAll![0].statusCode).toBe(500); + }); + + test('test concludeAll receives statusCode 200 when no errors', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const span = makeSpan({ + spanId: 'agent-ok-001', + parentId: 'trace-001', + spanData: { type: 'agent', name: 'HappyAgent' } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(span); + await processor.onSpanEnd(span); + await processor.onTraceEnd(trace); + + const concludeCalls = mockLogger.conclude.mock.calls as [ + Record + ][]; + const concludeAll = concludeCalls.find((c) => c[0].concludeAll === true); + expect(concludeAll).toBeDefined(); + expect(concludeAll![0].statusCode).toBe(200); + }); + + test('test concludeAll uses last workflow statusCode when multiple agents', async () => { + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const agent1 = makeSpan({ + spanId: 'agent-001', + parentId: 'trace-001', + spanData: { type: 'agent', name: 'FirstAgent' } + }); + const agent2 = makeSpan({ + spanId: 'agent-002', + parentId: 'trace-001', + error: { message: 'Second agent failed' }, + spanData: { type: 'agent', name: 'SecondAgent' } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(agent1); + await processor.onSpanEnd(agent1); + await processor.onSpanStart(agent2); + await processor.onSpanEnd(agent2); + await processor.onTraceEnd(trace); + + // concludeAll should carry the last agent's statusCode (500 from agent2) + const concludeCalls = mockLogger.conclude.mock.calls as [ + Record + ][]; + const concludeAll = concludeCalls.find((c) => c[0].concludeAll === true); + expect(concludeAll).toBeDefined(); + expect(concludeAll![0].statusCode).toBe(500); + }); + + test('test concludeAll has no statusCode when trace has only LLM spans', async () => { + // LLM/tool spans do not update _lastStatusCode — only workflow/agent concludes do. + // When there are no workflow/agent spans, concludeAll statusCode should be undefined. + const mockLogger = createMockLogger(); + const processor = new GalileoTracingProcessor(mockLogger as never, false); + const trace = makeTrace(); + + const llmSpan = makeSpan({ + spanId: 'llm-001', + parentId: 'trace-001', + spanData: { type: 'generation', model: 'gpt-4o' } + }); + + await processor.onTraceStart(trace); + await processor.onSpanStart(llmSpan); + await processor.onSpanEnd(llmSpan); + await processor.onTraceEnd(trace); + + const concludeCalls = mockLogger.conclude.mock.calls as [ + Record + ][]; + const concludeAll = concludeCalls.find((c) => c[0].concludeAll === true); + expect(concludeAll).toBeDefined(); + expect(concludeAll![0].statusCode).toBeUndefined(); + }); +});