diff --git a/examples/openai/agents.js b/examples/openai/agents.js
new file mode 100644
index 00000000..6e1e4174
--- /dev/null
+++ b/examples/openai/agents.js
@@ -0,0 +1,75 @@
+import dotenv from 'dotenv';
+import { z } from 'zod';
+import { Agent, run } from '@openai/agents';
+import {
+  init,
+  flush,
+  registerGalileoTraceProcessor
+} from '../../dist/index.js';
+
+dotenv.config();
+
+await init({
+  projectName: 'openai-agents-example'
+});
+
+await registerGalileoTraceProcessor();
+
+const triageAgent = new Agent({
+  name: 'Triage Agent',
+  instructions:
+    'You determine which agent should handle the user request. ' +
+    'If the question is about weather, hand off to the Weather Agent. ' +
+    'Otherwise, answer the question yourself.',
+  handoffs: [] // populated below after declaring weatherAgent
+});
+
+const weatherAgent = new Agent({
+  name: 'Weather Agent',
+  instructions:
+    'You provide weather information. ' +
+    'Given a city name, respond with a short, friendly weather summary. ' +
+    'Make up plausible weather data for demonstration purposes.',
+  tools: [
+    tool({
+      name: 'get_weather',
+      description: 'Get the current weather for a city',
+      parameters: z.object({
+        city: z.string().describe('The city to get weather for')
+      }),
+      execute: async (params) => {
+        const { city } = params;
+        const temps = { london: 14, tokyo: 22, 'new york': 18, paris: 16 };
+        const temp =
+          temps[city.toLowerCase()] ?? Math.floor(Math.random() * 30);
+        return JSON.stringify({
+          city,
+          temperature_c: temp,
+          condition: temp > 20 ? 'Sunny' : 'Partly cloudy'
+        });
+      }
+    })
+  ]
+});
+
+triageAgent.handoffs.push(weatherAgent);
+
+async function main() {
+  console.log('=== OpenAI Agents SDK + Galileo Tracing ===\n');
+
+  console.log('--- Simple single-agent run ---');
+  const simpleResult = await run(triageAgent, 'What is 2 + 2?');
+  console.log('Response:', simpleResult.finalOutput, '\n');
+
+  console.log('--- Handoff + tool call run ---');
+  const weatherResult = await run(triageAgent, "What's the weather in Tokyo?");
+  console.log('Response:', weatherResult.finalOutput, '\n');
+
+  await flush();
+  console.log('Done — traces flushed to Galileo.');
+}
+
+main().catch((err) => {
+  console.error('Unhandled error:', err);
+  process.exit(1);
+});
diff --git a/examples/package.json b/examples/package.json
index bd6ef0ce..5eabced4 100644
--- a/examples/package.json
+++ b/examples/package.json
@@ -12,8 +12,11 @@
     "@langchain/community": "^0.3.18",
     "@langchain/core": "^0.3.13",
     "@langchain/openai": "^0.3.11",
+    "@openai/agents": "^0.7.0",
     "@rungalileo/galileo": "file:..",
     "dotenv": "^16.4.5",
-    "typecript": "^0.0.1-security"
+    "openai": "^6.26.0",
+    "typecript": "^0.0.1-security",
+    "zod": "^4.0.0"
   }
 }
diff --git a/package-lock.json b/package-lock.json
index 88b4a34d..d361912f 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -48,6 +48,18 @@
       "optionalDependencies": {
         "@langchain/openai": "^0.3.11",
         "tiktoken": "^1.0.13"
+      },
+      "peerDependencies": {
+        "@openai/agents": ">=0.4.0",
+        "openai": ">=4.0.0"
+      },
+      "peerDependenciesMeta": {
+        "@openai/agents": {
+          "optional": true
+        },
+        "openai": {
+          "optional": true
+        }
       }
     },
     "node_modules/@ampproject/remapping": {
@@ -9261,15 +9273,6 @@
         "url": "https://github.com/sponsors/ljharb"
       }
     },
-    "node_modules/undici": {
-      "version": "7.23.0",
-      "resolved": "https://registry.npmjs.org/undici/-/undici-7.23.0.tgz",
-      "integrity": "sha512-HVMxHKZKi+eL2mrUZDzDkKW3XvCjynhbtpSq20xQp4ePDFeSFuAfnvM0GIwZIv8fiKHjXFQ5WjxhCt15KRNj+g==",
-      "optional": true,
-      "engines": {
-        "node": ">=20.18.1"
-      }
-    },
     "node_modules/undici-types": {
       "version": "6.21.0",
       "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz",
diff --git a/package.json b/package.json
index 50b4f61a..eb2281c5 100644
--- a/package.json
+++ b/package.json
@@ -48,6 +48,18 @@
     "@langchain/openai": "^0.3.11",
     "tiktoken": "^1.0.13"
   },
+  "peerDependencies": {
+    "@openai/agents": ">=0.4.0",
+    "openai": ">=4.0.0"
+  },
+  "peerDependenciesMeta": {
+    "@openai/agents": {
+      "optional": true
+    },
+    "openai": {
+      "optional": true
+    }
+  },
   "devDependencies": {
     "@hey-api/openapi-ts": "^0.88.0",
     "@types/jest": "^29.5.14",
diff --git a/src/handlers/langchain.ts b/src/handlers/langchain.ts
index 3dd22e91..e8ba1e9f 100644
--- a/src/handlers/langchain.ts
+++ b/src/handlers/langchain.ts
@@ -12,7 +12,7 @@ import { AgentFinish } from '@langchain/core/agents';
 import { Document, DocumentInterface } from '@langchain/core/documents';
 import { GalileoSingleton } from '../singleton';
 import { GalileoLogger } from '../utils/galileo-logger';
-import { toStringValue, convertToStringDict } from '../utils/serialization';
+import { toStringValue, toStringRecord } from '../utils/serialization';
 import { getSdkLogger } from 'galileo-generated';
 import { Serialized } from '@langchain/core/load/serializable.js';
 
@@ -151,8 +151,8 @@ export class GalileoCallback
     let metadata: Record<string, string> | undefined = undefined;
     if (node.spanParams.metadata) {
       try {
-        metadata = convertToStringDict(
-          node.spanParams.metadata as Record<string, any>
+        metadata = toStringRecord(
+          node.spanParams.metadata as Record<string, unknown>
         );
       } catch (e) {
         sdkLogger.warn('Unable to convert metadata to a string dictionary', e);
@@ -485,7 +485,7 @@ export class GalileoCallback
       | undefined;
 
     // Serialize messages safely
-    let serializedMessages;
+    let serializedMessages: unknown;
     try {
       const flattenedMessages = messages.flat().map((msg) => ({
         content: msg.content,
@@ -516,7 +516,7 @@ export class GalileoCallback
   public async handleLLMEnd(output: LLMResult, runId: string): Promise<void> {
     const tokenUsage = output.llmOutput?.tokenUsage || {};
 
-    let serializedOutput;
+    let serializedOutput: unknown;
     try {
       const flattenedOutput = output.generations.flat().map((g) => ({
         text: g.text,
@@ -605,7 +605,7 @@ export class GalileoCallback
     documents: DocumentInterface<Record<string, unknown>>[],
     runId: string
   ): Promise<void> {
-    let serializedResponse;
+    let serializedResponse: unknown;
     try {
       serializedResponse = documents.map((doc) => ({
         pageContent: doc.pageContent,
diff --git a/src/handlers/openai-agents/custom-span.ts b/src/handlers/openai-agents/custom-span.ts
new file mode 100644
index 00000000..41a035bf
--- /dev/null
+++ b/src/handlers/openai-agents/custom-span.ts
@@ -0,0 +1,74 @@
+/* eslint-disable @typescript-eslint/no-explicit-any */
+
+/**
+ * Duck-typed interface describing the expected shape of a Galileo span object
+ * that can be injected into the OpenAI Agents tracing flow.
+ *
+ * Mirrors the fields extracted by galileo-python's GalileoCustomSpan handler:
+ * input, output, metadata (user_metadata), tags, status_code, and type.
+ */
+export interface GalileoSpanLike {
+  type?: string;
+  input?: unknown;
+  output?: unknown;
+  name?: string;
+  metadata?: Record<string, string>;
+  tags?: string[];
+  statusCode?: number;
+}
+
+/**
+ * A lightweight subtype of CustomSpanData that carries a reference to a
+ * pre-configured GalileoSpan so it can be injected into the agent tracing flow.
+ *
+ * The __galileoCustom flag is used by mapSpanType() to distinguish this from
+ * ordinary CustomSpanData objects.
+ */
+export interface GalileoCustomSpanData {
+  /** Always 'custom' to satisfy the SDK's SpanData union discriminant. */
+  type: 'custom';
+  /** (Optional) Display name for the span. */
+  name?: string;
+  /** Arbitrary data payload, must contain a 'galileoSpan' key with the GalileoSpan reference. */
+  data: Record<string, unknown> & { galileoSpan: GalileoSpanLike };
+  /** Sentinel flag used internally by mapSpanType() to identify this type. */
+  __galileoCustom: true;
+}
+
+/**
+ * Creates a GalileoCustomSpanData object that wraps an existing Galileo span.
+ * @param galileoSpan - The Galileo span object to embed.
+ * @param name - (Optional) Display name for the span.
+ * @param extraData - (Optional) Additional data to include in the span data payload.
+ * @returns A GalileoCustomSpanData object.
+ */
+export function createGalileoCustomSpanData(
+  galileoSpan: GalileoSpanLike,
+  name?: string,
+  extraData?: Record<string, unknown>
+): GalileoCustomSpanData {
+  return {
+    type: 'custom',
+    name,
+    data: {
+      ...extraData,
+      galileoSpan
+    },
+    __galileoCustom: true
+  };
+}
+
+/**
+ * Type guard that checks whether a span data object is a GalileoCustomSpanData.
+ * @param spanData - The span data to check.
+ * @returns True if the span data is a GalileoCustomSpanData.
+ */
+export function isGalileoCustomSpanData(
+  spanData: unknown
+): spanData is GalileoCustomSpanData {
+  return (
+    typeof spanData === 'object' &&
+    spanData !== null &&
+    (spanData as any).__galileoCustom === true
+  );
+}
diff --git a/src/handlers/openai-agents/data-extraction.ts b/src/handlers/openai-agents/data-extraction.ts
new file mode 100644
index 00000000..218121e0
--- /dev/null
+++ b/src/handlers/openai-agents/data-extraction.ts
@@ -0,0 +1,326 @@
+import type { GalileoSpanLike } from './custom-span';
+import type { NodeType } from './node';
+import { parseUsage } from '../openai/usage';
+export { parseUsage, type ParsedUsage } from '../openai/usage';
+
+const MODEL_PARAM_KEYS = [
+  'temperature',
+  'max_output_tokens',
+  'top_p',
+  'tool_choice',
+  'parallel_tool_calls',
+  'truncation',
+  'seed',
+  'frequency_penalty',
+  'presence_penalty'
+];
+
+const RESPONSE_EXCLUDE = new Set([
+  'input',
+  'output',
+  'usage',
+  'tools',
+  'error',
+  'status'
+]);
+
+/**
+ * Serialize a value to a string for LLM span input/output fields.
+ * Strings are returned as-is; null/undefined produce ''; everything else is JSON-serialized.
+ */
+function llmSerializeToString(value: unknown): string {
+  if (value === undefined || value === null) return '';
+  if (typeof value === 'string') return value;
+  return JSON.stringify(value);
+}
+
+/**
+ * Extracts LLM-relevant fields from a GenerationSpanData or ResponseSpanData.
+ * @param spanData - The span data object (must have type 'generation' or 'response').
+ * @returns A flat record of LLM span parameters.
+ */
+export function extractLlmData(
+  spanData: Record<string, unknown>
+): Record<string, unknown> {
+  if (spanData.type === 'generation') {
+    const usageRaw =
+      (spanData.usage as Record<string, unknown> | undefined) ?? {};
+    const usage = parseUsage(usageRaw);
+    const modelConfig =
+      (spanData.model_config as Record<string, unknown> | undefined) ?? {};
+    const inputDetails =
+      (usageRaw.input_tokens_details as Record<string, unknown> | undefined) ??
+      null;
+    const outputDetails =
+      (usageRaw.output_tokens_details as Record<string, unknown> | undefined) ??
+      null;
+
+    return {
+      input: llmSerializeToString(spanData.input),
+      output: llmSerializeToString(spanData.output),
+      model: (spanData.model as string | undefined) ?? 'unknown',
+      temperature: (modelConfig.temperature as number | undefined) ?? undefined,
+      modelParameters: modelConfig,
+      numInputTokens: usage.inputTokens,
+      numOutputTokens: usage.outputTokens,
+      totalTokens: usage.totalTokens ?? undefined,
+      numReasoningTokens: usage.reasoningTokens,
+      numCachedInputTokens: usage.cachedTokens,
+      metadata: {
+        gen_ai_system: 'openai',
+        model_config: modelConfig,
+        ...(inputDetails ? { input_tokens_details: inputDetails } : {}),
+        ...(outputDetails ? { output_tokens_details: outputDetails } : {})
+      }
+    };
+  }
+
+  if (spanData.type === 'response') {
+    // ResponseSpanData uses underscore-prefixed fields in TypeScript SDK
+    const input = spanData._input ?? spanData.input;
+    const response = (spanData._response ?? spanData.response) as
+      | Record<string, unknown>
+      | undefined;
+
+    const model =
+      (response?.model as string | undefined) ??
+      (spanData.model as string | undefined) ??
+      'unknown';
+    const usage = parseUsage(
+      (response?.usage as Record<string, unknown> | undefined) ?? null
+    );
+    const temperature =
+      (response?.temperature as number | undefined) ?? undefined;
+    const tools = response?.tools;
+
+    const modelParameters: Record<string, unknown> = response
+      ? Object.fromEntries(
+          MODEL_PARAM_KEYS.filter((k) => response[k] !== undefined).map((k) => [
+            k,
+            response[k]
+          ])
+        )
+      : {};
+
+    const responseMetadata: Record<string, unknown> = response
+      ? Object.fromEntries(
+          Object.entries(response).filter(([k]) => !RESPONSE_EXCLUDE.has(k))
+        )
+      : {};
+
+    const responseError = response?.error as
+      | { status_code?: number; message?: string; [k: string]: unknown }
+      | undefined
+      | null;
+    const responseStatusCode = responseError
+      ? ((responseError.status_code as number | undefined) ?? 500)
+      : undefined;
+
+    return {
+      input: llmSerializeToString(input),
+      output: llmSerializeToString(response?.output),
+      model,
+      temperature,
+      tools: tools !== undefined ? tools : undefined,
+      modelParameters,
+      numInputTokens: usage.inputTokens,
+      numOutputTokens: usage.outputTokens,
+      totalTokens: usage.totalTokens ?? undefined,
+      numReasoningTokens: usage.reasoningTokens,
+      numCachedInputTokens: usage.cachedTokens,
+      ...(responseStatusCode !== undefined
+        ? { statusCode: responseStatusCode }
+        : {}),
+      metadata: {
+        gen_ai_system: 'openai',
+        ...(Object.keys(responseMetadata).length > 0
+          ? { response_metadata: responseMetadata }
+          : {}),
+        ...(response?.instructions !== undefined
+          ? { instructions: response.instructions }
+          : {}),
+        ...(responseError ? { error_details: responseError } : {})
+      },
+      _responseObject: response
+    };
+  }
+
+  return {};
+}
+
+/**
+ * Extracts tool-relevant fields from a FunctionSpanData or GuardrailSpanData.
+ * @param spanData - The span data object (must have type 'function' or 'guardrail').
+ * @returns A flat record of tool span parameters.
+ */
+export function extractToolData(
+  spanData: Record<string, unknown>
+): Record<string, unknown> {
+  if (spanData.type === 'function') {
+    return {
+      input:
+        spanData.input !== undefined
+          ? typeof spanData.input === 'string'
+            ? spanData.input
+            : JSON.stringify(spanData.input)
+          : '',
+      output:
+        spanData.output !== undefined
+          ? typeof spanData.output === 'string'
+            ? spanData.output
+            : JSON.stringify(spanData.output)
+          : undefined,
+      metadata:
+        (spanData.mcp_data as Record<string, unknown> | undefined) !== undefined
+          ? { mcp_data: JSON.stringify(spanData.mcp_data) }
+          : {}
+    };
+  }
+
+  if (spanData.type === 'guardrail') {
+    const triggered = Boolean(spanData.triggered);
+    return {
+      input: '',
+      output: JSON.stringify({ triggered }),
+      metadata: {
+        triggered,
+        ...(triggered ? { status: 'warning' } : {})
+      }
+    };
+  }
+
+  // Transcription / Speech / speech_group / mcp_tools — map to tool but no deep extraction
+  return {
+    input: '',
+    output: undefined,
+    metadata: {}
+  };
+}
+
+/**
+ * Extracts workflow-relevant fields from an AgentSpanData, HandoffSpanData, or CustomSpanData.
+ * @param spanData - The span data object (must have type 'agent', 'handoff', or 'custom').
+ * @returns A flat record of workflow span parameters.
+ */
+export function extractWorkflowData(
+  spanData: Record<string, unknown>
+): Record<string, unknown> {
+  if (spanData.type === 'agent') {
+    const tools = spanData.tools;
+    const handoffs = spanData.handoffs;
+    const outputType = spanData.output_type;
+    const agentType =
+      typeof spanData.agentType === 'string' ? spanData.agentType : undefined;
+    return {
+      input: '',
+      output: undefined,
+      ...(agentType !== undefined ? { agentType } : {}),
+      metadata: {
+        ...(tools !== undefined ? { tools } : {}),
+        ...(handoffs !== undefined ? { handoffs } : {}),
+        ...(outputType !== undefined ? { output_type: outputType } : {})
+      }
+    };
+  }
+
+  if (spanData.type === 'handoff') {
+    const from = String((spanData.from_agent as string | undefined) ?? '');
+    const to = String((spanData.to_agent as string | undefined) ?? '');
+    return {
+      input: from ? JSON.stringify({ from_agent: from }) : '',
+      output: to ? JSON.stringify({ to_agent: to }) : undefined,
+      metadata: {
+        from_agent: from,
+        to_agent: to
+      }
+    };
+  }
+
+  if (spanData.type === 'custom') {
+    const data = (spanData.data as Record<string, unknown> | undefined) ?? {};
+    const input =
+      data.input !== undefined
+        ? typeof data.input === 'string'
+          ? data.input
+          : JSON.stringify(data.input)
+        : '';
+    const output =
+      data.output !== undefined
+        ? typeof data.output === 'string'
+          ? data.output
+          : JSON.stringify(data.output)
+        : undefined;
+
+    // Everything except input/output goes to metadata; values are kept as-is
+    const metaEntries = Object.entries(data)
+      .filter(
+        ([k, v]) =>
+          k !== 'input' && k !== 'output' && v !== null && v !== undefined
+      )
+      .reduce<Record<string, unknown>>((acc, [k, v]) => {
+        acc[k] = v;
+        return acc;
+      }, {});
+
+    return { input, output, metadata: metaEntries };
+  }
+
+  return { input: '', output: undefined, metadata: {} };
+}
+
+const VALID_GALILEO_NODE_TYPES: readonly string[] = [
+  'tool',
+  'workflow',
+  'agent'
+];
+
+/**
+ * Extracts span parameters from a GalileoCustomSpanData, delegating to the
+ * inner galileoSpan for input, output, metadata, tags, statusCode, and type.
+ *
+ * @param spanData - The span data object (must have __galileoCustom: true).
+ * @returns The effective node type and extracted parameters.
+ */
+export function extractGalileoCustomData(spanData: Record<string, unknown>): {
+  nodeType: NodeType;
+  params: Record<string, unknown>;
+} {
+  const galileoSpan = spanData._galileoSpan as GalileoSpanLike | undefined;
+
+  if (!galileoSpan || typeof galileoSpan !== 'object') {
+    return { nodeType: 'workflow', params: extractWorkflowData(spanData) };
+  }
+
+  const input =
+    galileoSpan.input !== undefined
+      ? typeof galileoSpan.input === 'string'
+        ? galileoSpan.input
+        : JSON.stringify(galileoSpan.input)
+      : '';
+  const output =
+    galileoSpan.output !== undefined
+      ? typeof galileoSpan.output === 'string'
+        ? galileoSpan.output
+        : JSON.stringify(galileoSpan.output)
+      : undefined;
+  const metadata = galileoSpan.metadata ?? {};
+  const tags = galileoSpan.tags;
+  const statusCode = galileoSpan.statusCode;
+
+  const nodeType: NodeType =
+    typeof galileoSpan.type === 'string' &&
+    VALID_GALILEO_NODE_TYPES.includes(galileoSpan.type)
+      ? (galileoSpan.type as NodeType)
+      : 'workflow';
+
+  return {
+    nodeType,
+    params: {
+      input,
+      output,
+      metadata,
+      ...(tags !== undefined ? { tags } : {}),
+      ...(statusCode !== undefined ? { statusCode } : {})
+    }
+  };
+}
diff --git a/src/handlers/openai-agents/embedded-tools.ts b/src/handlers/openai-agents/embedded-tools.ts
new file mode 100644
index 00000000..3163750f
--- /dev/null
+++ b/src/handlers/openai-agents/embedded-tools.ts
@@ -0,0 +1,172 @@
+/* eslint-disable @typescript-eslint/no-explicit-any */
+
+/**
+ * A single embedded tool call record extracted from a ResponseSpanData output array.
+ */
+export interface EmbeddedToolCall {
+  type: string;
+  function: { name: string };
+  tool_call_id: string | null;
+  tool_call_type: string;
+  tool_call_input: string | null;
+  tool_call_output: string | null;
+  tool_call_status: string | null;
+}
+
+const EMBEDDED_TOOL_TYPES = new Set([
+  'code_interpreter_call',
+  'file_search_call',
+  'web_search_call',
+  'computer_call',
+  'custom_tool_call'
+]);
+
+/**
+ * Maps an OpenAI embedded tool call type to a display name.
+ * @param type - The tool call type string.
+ * @returns A human-readable tool name.
+ */
+export function getToolNameFromType(type: string): string {
+  switch (type) {
+    case 'code_interpreter_call':
+      return 'code_interpreter';
+    case 'file_search_call':
+      return 'file_search';
+    case 'web_search_call':
+      return 'web_search';
+    case 'computer_call':
+      return 'computer';
+    case 'custom_tool_call':
+      return 'custom_tool';
+    default:
+      return type;
+  }
+}
+
+/**
+ * Extracts the input field from an embedded tool call item.
+ * @param item - The raw output item from the response.
+ * @param type - The tool call type string.
+ * @returns The extracted input as a string, or null if none.
+ */
+export function extractToolInput(
+  item: Record<string, unknown>,
+  type: string
+): string | null {
+  switch (type) {
+    case 'code_interpreter_call': {
+      const code = item.code;
+      return code !== undefined ? String(code) : null;
+    }
+    case 'file_search_call': {
+      const queries = item.queries;
+      if (queries === undefined) return null;
+      return Array.isArray(queries) ? JSON.stringify(queries) : String(queries);
+    }
+    case 'web_search_call': {
+      const action = item.action as Record<string, unknown> | undefined;
+      const query = action?.query;
+      return query !== undefined ? String(query) : null;
+    }
+    case 'computer_call': {
+      const action = item.action;
+      return action !== undefined ? JSON.stringify(action) : null;
+    }
+    case 'custom_tool_call': {
+      const input = item.input;
+      if (input === undefined) return null;
+      return typeof input === 'string' ? input : JSON.stringify(input);
+    }
+    default:
+      return null;
+  }
+}
+
+/**
+ * Extracts the output field from an embedded tool call item.
+ * @param item - The raw output item from the response.
+ * @param type - The tool call type string.
+ * @returns The extracted output as a string, or null if none.
+ */
+export function extractToolOutput(
+  item: Record<string, unknown>,
+  type: string
+): string | null {
+  switch (type) {
+    case 'code_interpreter_call': {
+      // Concatenate all output logs and urls
+      const outputs = item.outputs as
+        | Array<Record<string, unknown>>
+        | undefined;
+      if (!Array.isArray(outputs) || outputs.length === 0) return null;
+      const parts = outputs
+        .map((o) => {
+          if (o.logs !== undefined) return String(o.logs);
+          if (o.url !== undefined) return String(o.url);
+          return null;
+        })
+        .filter((p): p is string => p !== null);
+      return parts.length > 0 ? parts.join('\n') : null;
+    }
+    case 'file_search_call': {
+      const results = item.results;
+      if (results === undefined) return null;
+      return Array.isArray(results) ? JSON.stringify(results) : String(results);
+    }
+    case 'web_search_call': {
+      const action = item.action;
+      return action !== undefined ? JSON.stringify(action) : null;
+    }
+    case 'computer_call':
+      return null;
+    case 'custom_tool_call': {
+      const output = item.output;
+      if (output === undefined) return null;
+      return typeof output === 'string' ? output : JSON.stringify(output);
+    }
+    default:
+      return null;
+  }
+}
+
+/**
+ * Walks the _response.output array and returns all embedded tool call records.
+ * @param response - The response object from a ResponseSpanData span.
+ * @returns An array of EmbeddedToolCall records.
+ */
+export function extractEmbeddedToolCalls(
+  response: Record<string, unknown> | null | undefined
+): EmbeddedToolCall[] {
+  if (!response) return [];
+
+  const output = response.output;
+  if (!Array.isArray(output)) return [];
+
+  const results: EmbeddedToolCall[] = [];
+
+  for (const item of output) {
+    if (typeof item !== 'object' || item === null) continue;
+    const typedItem = item as Record<string, unknown>;
+    const itemType = typedItem.type as string | undefined;
+    if (!itemType || !EMBEDDED_TOOL_TYPES.has(itemType)) continue;
+
+    const toolName = getToolNameFromType(itemType);
+    const toolCallId =
+      (typedItem.id as string | undefined) ??
+      (typedItem.call_id as string | undefined) ??
+      null;
+    const status = (typedItem.status as string | undefined) ?? 'completed';
+
+    results.push({
+      type: 'function',
+      function: { name: toolName },
+      tool_call_id: toolCallId,
+      tool_call_type: itemType,
+      tool_call_input: extractToolInput(typedItem, itemType),
+      tool_call_output: extractToolOutput(typedItem, itemType),
+      tool_call_status: status
+    });
+  }
+
+  return results;
+}
diff --git a/src/handlers/openai-agents/index.ts b/src/handlers/openai-agents/index.ts
new file mode 100644
index 00000000..818c6c4e
--- /dev/null
+++ b/src/handlers/openai-agents/index.ts
@@ -0,0 +1,608 @@
+/* eslint-disable @typescript-eslint/no-explicit-any */
+import { GalileoLogger } from '../../utils/galileo-logger';
+import { GalileoSingleton } from '../../singleton';
+import { calculateDurationNs } from '../../utils/utils';
+import { toStringRecord } from '../../utils/serialization';
+import type { JsonObject } from '../../types/base.types';
+import { AgentType } from '../../types/new-api.types';
+import { type Node, createNode } from './node';
+import { mapSpanType, mapSpanName, GALILEO_CUSTOM_TYPE } from './span-mapping';
+import {
+  extractLlmData,
+  extractToolData,
+  extractWorkflowData,
+  extractGalileoCustomData
+} from './data-extraction';
+import { extractEmbeddedToolCalls } from './embedded-tools';
+import type { GalileoSpanLike } from './custom-span';
+import { getSdkLogger } from 'galileo-generated';
+const sdkLogger = getSdkLogger();
+
+/**
+ * Minimal interface for an OpenAI Agents SDK Trace object.
+ */
+export interface AgentTrace {
+  traceId: string;
+  name?: string;
+  metadata?: Record<string, unknown>;
+  startedAt?: string | null;
+  endedAt?: string | null;
+}
+
+/**
+ * Minimal interface for an OpenAI Agents SDK Span object.
+ */
+export interface AgentSpan<
+  T extends Record<string, unknown> = Record<string, unknown>
+> {
+  spanId: string;
+  traceId: string;
+  parentId?: string | null;
+  startedAt?: string | null;
+  endedAt?: string | null;
+  error?: {
+    message: string;
+    data?: Record<string, unknown>;
+    type?: string;
+  } | null;
+  spanData: T & { type: string };
+}
+
+/**
+ * Minimal TracingProcessor interface from @openai/agents-core.
+ */
+export interface TracingProcessor {
+  onTraceStart(trace: AgentTrace): Promise<void>;
+  onTraceEnd(trace: AgentTrace): Promise<void>;
+  onSpanStart(span: AgentSpan): Promise<void>;
+  onSpanEnd(span: AgentSpan): Promise<void>;
+  shutdown(timeout?: number): Promise<void>;
+  forceFlush(): Promise<void>;
+}
+
+/**
+ * Returns true when a span input value is non-empty and not a JSON-serialized null.
+ */
+function isMeaningfulInput(value: unknown): boolean {
+  if (value === null || value === undefined) return false;
+  const str = String(value).trim();
+  return str.length > 0 && str !== 'null';
+}
+
+/**
+ * Merges error fields into span metadata and returns the combined record.
+ */
+function buildErrorMetadata(
+  error: { message: string; data?: Record<string, unknown>; type?: string },
+  existing: Record<string, string>
+): Record<string, string> {
+  const errorMessage = error.message || 'Unknown error';
+  return {
+    ...existing,
+    error_message: errorMessage,
+    error_type: error.type ?? 'SpanError',
+    error_details: error.data ? JSON.stringify(error.data) : errorMessage
+  };
+}
+
+/**
+ * Maps an OpenAI agent type string to a Galileo AgentType enum value.
+ * Returns undefined when no agentType is present so addAgentSpan() can use its default.
+ *
+ * Currently not being used because of parity with galileo-python (which used workflow instead)
+ * Ts and Py have to be updated simultaneously.
+ */
+// eslint-disable-next-line @typescript-eslint/no-unused-vars
+function extractAgentType(
+  spanParams: Record<string, unknown>
+): AgentType | undefined {
+  const raw = spanParams.agentType;
+  if (typeof raw !== 'string' || !raw) {
+    return undefined;
+  }
+
+  const typeMap: Record<string, AgentType> = {
+    classifier: AgentType.CLASSIFIER,
+    planner: AgentType.PLANNER,
+    react: AgentType.REACT,
+    reflection: AgentType.REFLECTION,
+    router: AgentType.ROUTER,
+    supervisor: AgentType.SUPERVISOR,
+    judge: AgentType.JUDGE,
+    default: AgentType.DEFAULT
+  };
+
+  return typeMap[raw.toLowerCase()] ?? AgentType.DEFAULT;
+}
+
+/**
+ * GalileoTracingProcessor implements the OpenAI Agents SDK TracingProcessor interface
+ * to capture agent runs and emit them to GalileoLogger.
+ *
+ * Trace Input Handling:
+ * - Trace-level input is populated from the first LLM or Tool span with non-empty input
+ * - This ensures user queries are preserved in trace metadata
+ * - Falls back to trace name if no meaningful input is captured
+ */
+export class GalileoTracingProcessor implements TracingProcessor {
+  private _nodes = new Map<string, Node>();
+  private _lastOutput: unknown = null;
+  private _lastStatusCode: number | null = null;
+  private _firstInput: unknown = null;
+  private static _depCheckDone = false;
+
+  /**
+   * Creates a new GalileoTracingProcessor.
+   * @param _galileoLogger - (Optional) The GalileoLogger instance to use. Defaults to singleton logger.
+   * @param _flushOnTraceEnd - (Optional) Whether to flush the logger after each trace ends. Defaults to true.
+   */
+  constructor(
+    private readonly _galileoLogger: GalileoLogger = GalileoSingleton.getInstance().getClient(),
+    private readonly _flushOnTraceEnd: boolean = true
+  ) {
+    // Lazily check for @openai/agents-core package only when processor is instantiated
+    if (!GalileoTracingProcessor._depCheckDone) {
+      GalileoTracingProcessor._depCheckDone = true;
+      import('@openai/agents-core' as string).catch(() => {
+        sdkLogger.warn(
+          '@openai/agents package is not installed. GalileoTracingProcessor will not function.'
+        );
+      });
+    }
+  }
+
+  /**
+   * Called when a trace starts. Creates a root agent node.
+   * @param trace - The trace that started.
+   */
+  async onTraceStart(trace: AgentTrace): Promise<void> {
+    const spanParams: Record<string, unknown> = {
+      name: trace.name || 'Agent Run',
+      startedAt: trace.startedAt || new Date().toISOString()
+    };
+
+    if (trace.metadata) {
+      spanParams.metadata = toStringRecord(trace.metadata);
+    }
+
+    const node = createNode({
+      nodeType: 'agent',
+      spanParams,
+      runId: trace.traceId,
+      parentRunId: null
+    });
+
+    this._nodes.set(trace.traceId, node);
+  }
+
+  /**
+   * Called when a trace ends. Commits the span tree and optionally flushes the logger.
+   * @param trace - The trace that ended.
+   */
+  async onTraceEnd(trace: AgentTrace): Promise<void> {
+    const rootNode = this._nodes.get(trace.traceId);
+    if (rootNode) {
+      const startedAt = rootNode.spanParams.startedAt as string | undefined;
+      const endedAt = trace.endedAt || new Date().toISOString();
+      const durationNs =
+        startedAt && endedAt
+          ? calculateDurationNs(new Date(startedAt), new Date(endedAt))
+          : 0;
+      rootNode.spanParams.durationNs = durationNs;
+      rootNode.spanParams.endedAt = endedAt;
+    }
+
+    this._commitTrace(trace);
+    this._galileoLogger.conclude({
+      concludeAll: true,
+      statusCode: this._lastStatusCode ?? undefined
+    });
+
+    if (this._flushOnTraceEnd) {
+      await this._galileoLogger.flush();
+    }
+
+    this._nodes.clear();
+    this._lastOutput = null;
+    this._lastStatusCode = null;
+    this._firstInput = null;
+  }
+
+  /**
+   * Called when a span starts. Maps span type, creates a Node, and links it to its parent.
+   * @param span - The span that started.
+   */
+  async onSpanStart(span: AgentSpan): Promise<void> {
+    const spanData = span.spanData;
+    const spanType = mapSpanType(spanData);
+    const spanName = mapSpanName(spanData, spanType);
+
+    let initialParams: Record<string, unknown> = {
+      name: spanName,
+      startedAt: span.startedAt || new Date().toISOString()
+    };
+
+    // Determine effective node type and extract data.
+    // galileo_custom delegates to the inner galileoSpan for type + fields.
+    let nodeType: Node['nodeType'];
+
+    if (spanType === GALILEO_CUSTOM_TYPE) {
+      const custom = extractGalileoCustomData(spanData);
+      nodeType = custom.nodeType;
+      initialParams = { ...initialParams, ...custom.params };
+    } else if (spanType === 'llm') {
+      nodeType = 'llm';
+      initialParams = { ...initialParams, ...extractLlmData(spanData) };
+    } else if (spanType === 'tool') {
+      nodeType = 'tool';
+      initialParams = { ...initialParams, ...extractToolData(spanData) };
+    } else if (spanType === 'agent') {
+      nodeType = 'agent';
+      initialParams = { ...initialParams, ...extractWorkflowData(spanData) };
+    } else {
+      nodeType = 'workflow';
+      initialParams = { ...initialParams, ...extractWorkflowData(spanData) };
+    }
+
+    // Determine parent ID (prefer explicit parentId, fallback to traceId)
+    const parentId = span.parentId ?? span.traceId;
+
+    // Validate that parent node exists before creating and linking this node
+    const parentNode = this._nodes.get(parentId);
+    if (!parentNode) {
+      sdkLogger.warn(
+        `Parent node ${parentId} not found for span ${span.spanId} in trace ${span.traceId}`
+      );
+      return;
+    }
+
+    const node = createNode({
+      nodeType,
+      spanParams: initialParams,
+      runId: span.spanId,
+      parentRunId: parentId
+    });
+
+    this._nodes.set(span.spanId, node);
+    parentNode.children.push(span.spanId);
+  }
+
+  /**
+   * Called when a span ends. Finalises duration, merges data, and handles errors.
+   * @param span - The span that ended.
+   */
+  async onSpanEnd(span: AgentSpan): Promise<void> {
+    const node = this._nodes.get(span.spanId);
+    if (!node) return;
+
+    const startedAt = node.spanParams.startedAt as string | undefined;
+    const endedAt = span.endedAt || new Date().toISOString();
+    const durationNs =
+      startedAt && endedAt
+        ? calculateDurationNs(new Date(startedAt), new Date(endedAt))
+        : 0;
+    node.spanParams.durationNs = durationNs;
+
+    this._refreshSpanData(node, span.spanData);
+
+    // Handle errors
+    if (span.error) {
+      const existingMeta =
+        (node.spanParams.metadata as Record<string, string> | undefined) ?? {};
+      node.spanParams.statusCode = 500;
+      node.spanParams.error = span.error;
+      node.spanParams.metadata = buildErrorMetadata(span.error, existingMeta);
+    }
+
+    if (node.nodeType === 'workflow' || node.nodeType === 'agent') {
+      this._lastOutput = this._computeWorkflowOutput(node);
+    }
+
+    // Track first input for trace-level input (capture from first meaningful span)
+    // Only capture from LLM or Tool spans (not workflow/agent), and only if we haven't captured yet
+    if (
+      this._firstInput === null &&
+      (node.nodeType === 'llm' || node.nodeType === 'tool') &&
+      isMeaningfulInput(node.spanParams.input)
+    ) {
+      this._firstInput = node.spanParams.input;
+    }
+  }
+
+  /**
+   * Shuts down the processor, flushing any pending data.
+   * @param _timeout - (Optional) Shutdown timeout in milliseconds.
+   */
+  async shutdown(timeout?: number): Promise<void> {
+    void timeout;
+    await this._galileoLogger.flush();
+  }
+
+  /**
+   * Forces a flush of any pending data.
+   */
+  async forceFlush(): Promise<void> {
+    await this._galileoLogger.flush();
+  }
+
+  /**
+   * Re-extracts span data at span-end time to capture fields that are populated
+   * after span-start (usage counters, response objects, to_agent for handoffs,
+   * and mutations made inside custom-span callbacks).
+   */
+  private _refreshSpanData(node: Node, spanData: AgentSpan['spanData']): void {
+    if (spanData.type === 'response') {
+      const finalData = extractLlmData(spanData);
+      const responseObj = finalData._responseObject as
+        | Record<string, unknown>
+        | undefined;
+      const { _responseObject: _removed, ...rest } = finalData;
+      void _removed;
+      node.spanParams = { ...node.spanParams, ...rest };
+      if (responseObj) {
+        const embeddedTools = extractEmbeddedToolCalls(responseObj);
+        if (embeddedTools.length > 0) {
+          const existingTools =
+            (node.spanParams.tools as unknown[] | undefined) ?? [];
+          node.spanParams.tools = [...existingTools, ...embeddedTools];
+        }
+      }
+    } else if (spanData.type === 'generation') {
+      node.spanParams = { ...node.spanParams, ...extractLlmData(spanData) };
+    } else if (spanData.type === 'handoff') {
+      // to_agent is populated inside withHandoffSpan's callback, after onSpanStart fires.
+      node.spanParams = {
+        ...node.spanParams,
+        ...extractWorkflowData(spanData),
+        name: mapSpanName(spanData, 'workflow')
+      };
+    } else if (spanData.__galileoCustom === true) {
+      const refreshed = extractGalileoCustomData(spanData);
+      node.spanParams = { ...node.spanParams, ...refreshed.params };
+    }
+  }
+
+  /**
+   * Computes the effective output for a workflow or agent node.
+   * Prefers the node's own output, falls back to the last child's output,
+   * and overrides with the serialized error when one is present.
+   */
+  private _computeWorkflowOutput(node: Node): unknown {
+    let result: unknown = node.spanParams.output;
+    if (result === undefined && node.children.length > 0) {
+      const lastChildId = node.children[node.children.length - 1];
+      const lastChild = this._nodes.get(lastChildId);
+      if (lastChild?.spanParams.output !== undefined) {
+        result = lastChild.spanParams.output;
+      }
+    }
+    if (node.spanParams.error) {
+      result = JSON.stringify(node.spanParams.error);
+    }
+    return result !== undefined ? result : null;
+  }
+
+  /**
+   * Finds the root node for the trace and recursively logs the span tree.
+   * @param trace - The trace to commit.
+   */
+  private _commitTrace(trace: AgentTrace): void {
+    const rootNode = this._nodes.get(trace.traceId);
+    if (!rootNode) return;
+    this._logNodeTree(rootNode, true);
+  }
+
+  /**
+   * Emits a single node to GalileoLogger (startTrace, addLlmSpan, addToolSpan, or addWorkflowSpan).
+   * @param node - The node to emit.
+   * @param firstNode - True when this is the root trace node.
+   */
+  private _logNode(node: Node, firstNode: boolean): void {
+    const params = node.spanParams;
+    const name = (params.name as string | undefined) ?? 'Agent Run';
+    const durationNs = (params.durationNs as number | undefined) ?? 0;
+    const metadata = toStringRecord(
+      (params.metadata as Record<string, unknown> | undefined) ?? {}
+    );
+    const tags = (params.tags as string[] | undefined) ?? undefined;
+    const statusCode = (params.statusCode as number | undefined) ?? 200;
+    const input = params.input !== undefined ? String(params.input) : '';
+    const output =
+      params.output !== undefined ? String(params.output) : undefined;
+    const startedAt =
+      params.startedAt !== undefined
+        ? new Date(params.startedAt as string)
+        : undefined;
+
+    if (firstNode) {
+      const traceInput =
+        this._firstInput !== null ? String(this._firstInput) : input;
+      const traceOutput =
+        this._lastOutput !== null ? String(this._lastOutput) : output;
+      this._galileoLogger.startTrace({
+        input: traceInput || name,
+        output: traceOutput,
+        name,
+        createdAt: startedAt,
+        durationNs,
+        metadata
+      });
+    } else if (node.nodeType === 'llm') {
+      this._galileoLogger.addLlmSpan({
+        input,
+        output: output ?? '',
+        name,
+        model: (params.model as string | undefined) ?? 'unknown',
+        durationNs,
+        numInputTokens:
+          (params.numInputTokens as number | undefined) ?? undefined,
+        numOutputTokens:
+          (params.numOutputTokens as number | undefined) ?? undefined,
+        totalTokens: (params.totalTokens as number | undefined) ?? undefined,
+        numReasoningTokens:
+          (params.numReasoningTokens as number | undefined) ?? undefined,
+        numCachedInputTokens:
+          (params.numCachedInputTokens as number | undefined) ?? undefined,
+        temperature: (params.temperature as number | undefined) ?? undefined,
+        statusCode,
+        metadata,
+        tools: (params.tools as JsonObject[] | undefined) ?? undefined,
+        createdAt: startedAt
+      });
+    } else if (node.nodeType === 'tool') {
+      this._galileoLogger.addToolSpan({
+        input,
+        output,
+        name,
+        durationNs,
+        statusCode,
+        metadata,
+        tags,
+        createdAt: startedAt
+      });
+    } else {
+      // agent, workflow, and any other parent node types
+      this._galileoLogger.addWorkflowSpan({
+        input: input || 'Workflow Step',
+        output,
+        name,
+        durationNs,
+        metadata,
+        tags,
+        createdAt: startedAt,
+        statusCode
+      });
+    }
+  }
+
+  /**
+   * Recursively emits nodes to GalileoLogger in parent→child order,
+   * then concludes workflow/agent spans after all their children are logged.
+   * @param node - The node to log.
+   * @param firstNode - True when this is the root trace node.
+   */
+  private _logNodeTree(node: Node, firstNode = false): void {
+    this._logNode(node, firstNode);
+
+    for (const childId of node.children) {
+      const childNode = this._nodes.get(childId);
+      if (childNode) {
+        this._logNodeTree(childNode, false);
+      }
+    }
+
+    if (
+      !firstNode &&
+      (node.nodeType === 'workflow' || node.nodeType === 'agent')
+    ) {
+      const params = node.spanParams;
+      const durationNs = (params.durationNs as number | undefined) ?? 0;
+      const statusCode = (params.statusCode as number | undefined) ?? 200;
+      const concludeOutput = this._computeWorkflowOutput(node);
+      this._galileoLogger.conclude({
+        output: concludeOutput !== null ? String(concludeOutput) : undefined,
+        durationNs,
+        statusCode
+      });
+      this._lastStatusCode = statusCode;
+    }
+  }
+
+  /**
+   * Runs a callback under a custom Galileo span that is registered with the OpenAI Agents SDK
+   * trace provider and properly nested under the currently active span.
+   *
+   * The callback is the scope of the span's lifetime — it starts when the callback starts and
+   * ends when it returns or throws. Any SDK spans created inside the callback are automatically
+   * nested as children of this custom span.
+   *
+   * @param galileoSpan - Galileo span metadata (type, input, output, metadata, tags, statusCode).
+   *   Mutable — update galileoSpan.output inside the callback to capture results.
+   * @param callback - The work to run under this span. Return value is passed through.
+   * @param options.name - Display name in Galileo. Overrides galileoSpan.name.
+   * @param options.extraData - Additional data to attach to the span payload.
+   * @returns A promise that resolves to the callback's return value.
+   */
+  static async addGalileoCustomSpan<T>(
+    galileoSpan: GalileoSpanLike,
+    callback: () => T | Promise<T>,
+    options?: { name?: string; extraData?: Record<string, unknown> }
+  ): Promise<T> {
+    const spanName = options?.name ?? galileoSpan.name ?? 'Galileo Custom';
+    const spanOptions = {
+      data: {
+        name: spanName,
+        _galileoSpan: galileoSpan,
+        __galileoCustom: true,
+        ...(options?.extraData ?? {})
+      }
+    };
+
+    try {
+      const { withCustomSpan } = (await import(
+        '@openai/agents-core' as string
+      )) as {
+        withCustomSpan: <TOutput>(
+          fn: (span: unknown) => Promise<TOutput>,
+          options: Record<string, unknown>
+        ) => Promise<TOutput>;
+      };
+      return await withCustomSpan(
+        async (span) => {
+          void span;
+          return Promise.resolve(callback());
+        },
+        spanOptions as Record<string, unknown>
+      );
+    } catch {
+      sdkLogger.warn(
+        '@openai/agents package is not installed. addGalileoCustomSpan will execute callback without tracing.'
+      );
+      return await Promise.resolve(callback());
+    }
+  }
+}
+
+/**
+ * Registers a new GalileoTracingProcessor with the OpenAI Agents SDK.
+ * Requires @openai/agents-core to be installed.
+ * @param galileoLogger - (Optional) The GalileoLogger instance to use.
+ * @param flushOnTraceEnd - (Optional) Whether to flush after each trace ends.
+ * @returns The created GalileoTracingProcessor instance.
+ */
+export async function registerGalileoTraceProcessor(options?: {
+  galileoLogger?: GalileoLogger;
+  flushOnTraceEnd?: boolean;
+}): Promise<GalileoTracingProcessor> {
+  const processor = new GalileoTracingProcessor(
+    options?.galileoLogger,
+    options?.flushOnTraceEnd
+  );
+
+  const { addTraceProcessor } = (await import(
+    '@openai/agents-core' as string
+  )) as {
+    addTraceProcessor: (processor: TracingProcessor) => void;
+  };
+  addTraceProcessor(processor);
+
+  return processor;
+}
+
+export { createGalileoCustomSpanData as GalileoCustomSpan } from './custom-span';
+export type { GalileoCustomSpanData, GalileoSpanLike } from './custom-span';
+export type { Node, NodeType } from './node';
+export { mapSpanType, mapSpanName, GALILEO_CUSTOM_TYPE } from './span-mapping';
+export {
+  extractLlmData,
+  extractToolData,
+  extractWorkflowData,
+  extractGalileoCustomData,
+  parseUsage
+} from './data-extraction';
+export {
+  extractEmbeddedToolCalls,
+  getToolNameFromType,
+  extractToolInput,
+  extractToolOutput
+} from './embedded-tools';
diff --git a/src/handlers/openai-agents/node.ts b/src/handlers/openai-agents/node.ts
new file mode 100644
index 00000000..92c5c79e
--- /dev/null
+++ b/src/handlers/openai-agents/node.ts
@@ -0,0 +1,29 @@
+/**
+ * Internal node data structure used to build an in-memory span tree
+ * during an OpenAI Agents run before committing to GalileoLogger.
+ */
+
+/**
+ * Span type for an openai-agents node.
+ */
+export type NodeType = 'llm' | 'tool' | 'workflow' | 'agent';
+
+/**
+ * Represents a node in the span tree built during an OpenAI Agents run.
+ */
+export interface Node {
+  nodeType: NodeType;
+  spanParams: Record<string, unknown>;
+  runId: string;
+  parentRunId: string | null;
+  children: string[];
+}
+
+/**
+ * Creates a new Node with an empty children array.
+ * @param opts - The node configuration without the children field.
+ * @returns A new Node with an empty children array.
+ */
+export function createNode(opts: Omit<Node, 'children'>): Node {
+  return { ...opts, children: [] };
+}
diff --git a/src/handlers/openai-agents/span-mapping.ts b/src/handlers/openai-agents/span-mapping.ts
new file mode 100644
index 00000000..9bbbb937
--- /dev/null
+++ b/src/handlers/openai-agents/span-mapping.ts
@@ -0,0 +1,106 @@
+/* eslint-disable @typescript-eslint/no-explicit-any */
+import type { NodeType } from './node';
+
+/**
+ * The sentinel type string used to identify GalileoCustomSpan instances.
+ */
+export const GALILEO_CUSTOM_TYPE = 'galileo_custom';
+
+/**
+ * Maps an OpenAI Agents SDK SpanData type string to a Galileo node type.
+ * @param spanData - The span data object with a type discriminant.
+ * @returns The corresponding Galileo node type or 'galileo_custom'.
+ */
+export function mapSpanType(spanData: {
+  type: string;
+  [key: string]: unknown;
+}): NodeType | typeof GALILEO_CUSTOM_TYPE {
+  // Check for GalileoCustomSpan sentinel
+  if ((spanData as any).__galileoCustom === true) {
+    return GALILEO_CUSTOM_TYPE;
+  }
+
+  switch (spanData.type) {
+    case 'generation':
+    case 'response':
+      return 'llm';
+
+    case 'function':
+    case 'guardrail':
+    case 'transcription':
+    case 'speech':
+    case 'speech_group':
+    case 'mcp_tools':
+      return 'tool';
+
+    case 'agent':
+      return 'agent';
+
+    case 'handoff':
+    case 'custom':
+      return 'workflow';
+
+    default:
+      return 'workflow';
+  }
+}
+
+/**
+ * Derives a display name for a span.
+ * @param spanData - The span data object.
+ * @param spanType - The resolved node type.
+ * @returns A human-readable display name for the span.
+ */
+export function mapSpanName(
+  spanData: { type: string; name?: string; [key: string]: unknown },
+  spanType: NodeType | typeof GALILEO_CUSTOM_TYPE
+): string {
+  if (spanData.name) {
+    return String(spanData.name);
+  }
+
+  // Handle galileo_custom sentinel before the switch
+  if (spanType === GALILEO_CUSTOM_TYPE) {
+    return 'Galileo Custom';
+  }
+
+  switch (spanData.type) {
+    case 'generation':
+      return 'Generation';
+    case 'response':
+      return 'Response';
+    case 'function': {
+      const funcData = spanData as any;
+      return funcData.name || 'Function';
+    }
+    case 'guardrail': {
+      const guardrailData = spanData as any;
+      return guardrailData.name || 'Guardrail';
+    }
+    case 'agent': {
+      const agentData = spanData as any;
+      return agentData.name || 'Agent';
+    }
+    case 'handoff': {
+      const handoffData = spanData as any;
+      const from = handoffData.from_agent || handoffData.fromAgent || '';
+      const to = handoffData.to_agent || handoffData.toAgent || '';
+      if (from || to) {
+        return `Handoff: ${from} → ${to}`;
+      }
+      return 'Handoff';
+    }
+    case 'custom':
+      return 'Custom';
+    case 'transcription':
+      return 'Transcription';
+    case 'speech':
+      return 'Speech';
+    case 'speech_group':
+      return 'Speech Group';
+    case 'mcp_tools':
+      return 'MCP Tools';
+    default:
+      return 'Span';
+  }
+}
diff --git a/src/handlers/openai/usage.ts b/src/handlers/openai/usage.ts
index 4a0f1c01..8e0248e3 100644
--- a/src/handlers/openai/usage.ts
+++ b/src/handlers/openai/usage.ts
@@ -59,8 +59,14 @@ export function parseUsage(usageData: unknown): ParsedUsage {
   // Detailed token breakdowns (o1/o3/o4)
   // Responses API: input_tokens_details / output_tokens_details
   // Chat Completions: prompt_tokens_details / completion_tokens_details
+  // Agents SDK legacy: a single `details` object for both input and output
+  const legacyDetails =
+    typeof usage.details === 'object' && usage.details !== null
+      ? (usage.details as Record<string, unknown>)
+      : undefined;
+
   const inputDetails =
-    usage.input_tokens_details ?? usage.prompt_tokens_details;
+    usage.input_tokens_details ?? usage.prompt_tokens_details ?? legacyDetails;
   if (inputDetails != null && typeof inputDetails === 'object') {
     const details = inputDetails as Record<string, unknown>;
     const cached = details.cached_tokens;
@@ -70,7 +76,9 @@ export function parseUsage(usageData: unknown): ParsedUsage {
   }
 
   const outputDetails =
-    usage.output_tokens_details ?? usage.completion_tokens_details;
+    usage.output_tokens_details ??
+    usage.completion_tokens_details ??
+    legacyDetails;
   if (outputDetails != null && typeof outputDetails === 'object') {
     const details = outputDetails as Record<string, unknown>;
     const reasoning = details.reasoning_tokens;
diff --git a/src/index.ts b/src/index.ts
index 8709bf09..1b8bd2b2 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -141,6 +141,11 @@ import {
 import { log } from './wrappers';
 import { wrapOpenAI, wrapAzureOpenAI } from './handlers/openai';
 import { GalileoCallback } from './handlers/langchain';
+import {
+  GalileoTracingProcessor,
+  GalileoCustomSpan,
+  registerGalileoTraceProcessor
+} from './handlers/openai-agents';
 import { getSessions, getSpans, getTraces, RecordType } from './utils/search';
 export {
   // Legacy clients
@@ -160,6 +165,10 @@ export {
   // OpenAI
   wrapOpenAI,
   wrapAzureOpenAI,
+  // OpenAI Agents
+  GalileoTracingProcessor,
+  GalileoCustomSpan,
+  registerGalileoTraceProcessor,
   // Datasets
   Dataset,
   Datasets,
diff --git a/src/types/logging/logger.types.ts b/src/types/logging/logger.types.ts
index df3ea0cb..8dd3efeb 100644
--- a/src/types/logging/logger.types.ts
+++ b/src/types/logging/logger.types.ts
@@ -416,6 +416,7 @@ export interface IGalileoLoggerSpan {
     tags?: string[];
     agentType?: AgentType;
     stepNumber?: number;
+    statusCode?: number;
   }): AgentSpan;
 }
 
diff --git a/src/utils/galileo-logger.ts b/src/utils/galileo-logger.ts
index 5029d5f6..ad7a3b85 100644
--- a/src/utils/galileo-logger.ts
+++ b/src/utils/galileo-logger.ts
@@ -1115,6 +1115,7 @@ class GalileoLogger implements IGalileoLogger {
    * @param options.createdAt - (Optional) The timestamp when the span was created.
    * @param options.metadata - (Optional) Additional metadata as key-value pairs.
    * @param options.tags - (Optional) Array of tags to categorize the span.
+   * @param options.statusCode - (Optional) HTTP status code or execution status (e.g., 200 for success, 500 for error).
    * @param options.stepNumber - (Optional) The step number in a multi-step process.
    * @returns The created workflow span.
    */
@@ -1128,6 +1129,7 @@ class GalileoLogger implements IGalileoLogger {
     createdAt?: Date;
     metadata?: Record<string, string>;
     tags?: string[];
+    statusCode?: number;
     stepNumber?: number;
   }): WorkflowSpan {
     const span = new WorkflowSpan({
@@ -1139,6 +1141,7 @@ class GalileoLogger implements IGalileoLogger {
       createdAt: options.createdAt || GalileoApiClient.getTimestampRecord(),
       metadata: options.metadata,
       tags: options.tags,
+      statusCode: options.statusCode,
       metrics: new Metrics({ durationNs: options.durationNs }),
       stepNumber: options.stepNumber
     });
@@ -1184,6 +1187,7 @@ class GalileoLogger implements IGalileoLogger {
     tags?: string[];
     agentType?: AgentType;
     stepNumber?: number;
+    statusCode?: number;
   }): AgentSpan {
     const span = new AgentSpan({
       input: options.input,
@@ -1196,7 +1200,8 @@ class GalileoLogger implements IGalileoLogger {
       tags: options.tags,
       metrics: new Metrics({ durationNs: options.durationNs }),
       agentType: options.agentType,
-      stepNumber: options.stepNumber
+      stepNumber: options.stepNumber,
+      statusCode: options.statusCode
     });
 
     this.addChildSpanToParent(span);
diff --git a/src/utils/serialization.ts b/src/utils/serialization.ts
index b42846a0..a5334dc3 100644
--- a/src/utils/serialization.ts
+++ b/src/utils/serialization.ts
@@ -70,7 +70,7 @@ export const extractParamsInfo = <T extends unknown[], R>(
           // This is simplistic and may not work for complex expressions
           defaultValue = defaultValueStr;
         }
-      } catch (e) {
+      } catch (_) {
         defaultValue = defaultValueStr; // Fallback to string representation
       }
 
@@ -112,8 +112,8 @@ export const argsToDict = <T extends unknown[]>(
  * @param metadata - The metadata object with potentially complex values
  * @returns A new object with all values converted to strings
  */
-export const convertToStringDict = (
-  metadata: Record<string, any>
+export const toStringRecord = (
+  metadata: Record<string, unknown>
 ): Record<string, string> => {
   const result: Record<string, string> = {};
 
diff --git a/src/wrappers.ts b/src/wrappers.ts
index 729440bb..c569b643 100644
--- a/src/wrappers.ts
+++ b/src/wrappers.ts
@@ -7,7 +7,7 @@ import {
 import { serializeToStr } from './entities/serialization';
 import {
   argsToDict,
-  convertToStringDict,
+  toStringRecord,
   extractParamsInfo,
   toStringValue
 } from './utils/serialization';
@@ -215,7 +215,7 @@ export function log<T extends unknown[], R>(
         spanParams.metadata &&
         typeof spanParams.metadata === 'object' &&
         spanParams.metadata !== null
-          ? convertToStringDict(spanParams.metadata as Record<string, unknown>)
+          ? toStringRecord(spanParams.metadata as Record<string, unknown>)
           : undefined;
       const tags = Array.isArray(spanParams.tags)
         ? spanParams.tags.map((tag) => toStringValue(tag))
diff --git a/tests/entities/serialization.test.ts b/tests/entities/serialization.test.ts
index 07003a54..053b74a8 100644
--- a/tests/entities/serialization.test.ts
+++ b/tests/entities/serialization.test.ts
@@ -2,7 +2,7 @@ import {
   EventSerializer,
   serializeToStr
 } from '../../src/entities/serialization';
-import { convertToStringDict } from '../../src/utils/serialization';
+import { toStringRecord } from '../../src/utils/serialization';
 
 describe('EventSerializer', () => {
   let serializer: EventSerializer;
@@ -591,13 +591,13 @@ describe('EventSerializer', () => {
     });
   });
 
-  describe('convertToStringDict', () => {
+  describe('toStringRecord', () => {
     it('should use serializeToStr for object values', () => {
       const input = {
         metadata: { nested: { value: 'test' } }
       };
 
-      const result = convertToStringDict(input);
+      const result = toStringRecord(input);
 
       expect(typeof result.metadata).toBe('string');
       expect(result.metadata).toContain('test');
@@ -608,9 +608,9 @@ describe('EventSerializer', () => {
       const obj: Record<string, unknown> = { key: 'value' };
       obj.circular = obj;
 
-      expect(() => convertToStringDict({ meta: obj })).not.toThrow();
+      expect(() => toStringRecord({ meta: obj })).not.toThrow();
 
-      const result = convertToStringDict({ meta: obj });
+      const result = toStringRecord({ meta: obj });
       expect(typeof result.meta).toBe('string');
       expect(result.meta).toContain('key');
     });
@@ -622,7 +622,7 @@ describe('EventSerializer', () => {
         bool: true
       };
 
-      const result = convertToStringDict(input);
+      const result = toStringRecord(input);
 
       expect(result.str).toBe('hello');
       expect(result.num).toBe('42');
@@ -635,7 +635,7 @@ describe('EventSerializer', () => {
         undefinedValue: undefined
       };
 
-      const result = convertToStringDict(input);
+      const result = toStringRecord(input);
 
       expect(result.nullValue).toBe('');
       expect(result.undefinedValue).toBe('');
@@ -646,7 +646,7 @@ describe('EventSerializer', () => {
         items: [1, 2, [3, 4]]
       };
 
-      const result = convertToStringDict(input);
+      const result = toStringRecord(input);
 
       expect(typeof result.items).toBe('string');
       expect(result.items).toContain('1');
@@ -660,7 +660,7 @@ describe('EventSerializer', () => {
         empty: {}
       };
 
-      const result = convertToStringDict(input);
+      const result = toStringRecord(input);
 
       expect(typeof result.empty).toBe('string');
       expect(result.empty).toBe('{}');
@@ -677,7 +677,7 @@ describe('EventSerializer', () => {
         }
       };
 
-      const result = convertToStringDict(input);
+      const result = toStringRecord(input);
 
       expect(typeof result.user).toBe('string');
       expect(result.user).toContain('John');
@@ -693,13 +693,13 @@ describe('EventSerializer', () => {
       obj2.ref = obj1;
 
       expect(() =>
-        convertToStringDict({
+        toStringRecord({
           first: obj1,
           second: obj2
         })
       ).not.toThrow();
 
-      const result = convertToStringDict({
+      const result = toStringRecord({
         first: obj1,
         second: obj2
       });
@@ -715,7 +715,7 @@ describe('EventSerializer', () => {
         timestamp: new Date('2024-01-01T12:00:00Z')
       };
 
-      const result = convertToStringDict(input);
+      const result = toStringRecord(input);
 
       expect(typeof result.timestamp).toBe('string');
       expect(result.timestamp).toContain('2024-01-01');
@@ -726,7 +726,7 @@ describe('EventSerializer', () => {
         error: new Error('Test error')
       };
 
-      const result = convertToStringDict(input);
+      const result = toStringRecord(input);
 
       expect(typeof result.error).toBe('string');
       expect(result.error).toContain('Error');
@@ -738,7 +738,7 @@ describe('EventSerializer', () => {
         tags: new Set(['tag1', 'tag2', 'tag3'])
       };
 
-      const result = convertToStringDict(input);
+      const result = toStringRecord(input);
 
       expect(typeof result.tags).toBe('string');
       expect(result.tags).toContain('tag1');
@@ -754,7 +754,7 @@ describe('EventSerializer', () => {
         ])
       };
 
-      const result = convertToStringDict(input);
+      const result = toStringRecord(input);
 
       expect(typeof result.config).toBe('string');
       expect(result.config).toContain('key1');
@@ -773,12 +773,12 @@ describe('EventSerializer', () => {
       grandchild.root = obj; // Circular reference
 
       expect(() =>
-        convertToStringDict({
+        toStringRecord({
           tree: obj
         })
       ).not.toThrow();
 
-      const result = convertToStringDict({
+      const result = toStringRecord({
         tree: obj
       });
 
@@ -804,7 +804,7 @@ describe('EventSerializer', () => {
         }
       };
 
-      const result = convertToStringDict(input);
+      const result = toStringRecord(input);
 
       // All values should be strings
       Object.values(result).forEach((value) => {
@@ -821,7 +821,7 @@ describe('EventSerializer', () => {
     });
 
     it('should handle empty input object', () => {
-      const result = convertToStringDict({});
+      const result = toStringRecord({});
       expect(result).toEqual({});
     });
 
@@ -832,7 +832,7 @@ describe('EventSerializer', () => {
         key3: 'value3'
       };
 
-      const result = convertToStringDict(input);
+      const result = toStringRecord(input);
 
       expect(Object.keys(result)).toEqual(['key1', 'key2', 'key3']);
     });
diff --git a/tests/handlers/openai-agents/custom-span.test.ts b/tests/handlers/openai-agents/custom-span.test.ts
new file mode 100644
index 00000000..5ef81153
--- /dev/null
+++ b/tests/handlers/openai-agents/custom-span.test.ts
@@ -0,0 +1,188 @@
+import {
+  createGalileoCustomSpanData,
+  isGalileoCustomSpanData,
+  type GalileoCustomSpanData
+} from '../../../src/handlers/openai-agents/custom-span';
+
+describe('createGalileoCustomSpanData()', () => {
+  test('test creates span with galileoSpan only', () => {
+    const galileoSpan = { type: 'custom', data: 'test' };
+    const result = createGalileoCustomSpanData(galileoSpan);
+
+    expect(result.type).toBe('custom');
+    expect(result.__galileoCustom).toBe(true);
+    expect(result.data.galileoSpan).toBe(galileoSpan);
+    expect(result.name).toBeUndefined();
+  });
+
+  test('test creates span with name parameter', () => {
+    const galileoSpan = { type: 'tool', input: 'data' };
+    const result = createGalileoCustomSpanData(galileoSpan, 'My Custom Span');
+
+    expect(result.name).toBe('My Custom Span');
+    expect(result.data.galileoSpan).toBe(galileoSpan);
+  });
+
+  test('test creates span with extraData', () => {
+    const galileoSpan = { type: 'tool', input: 'data' };
+    const extraData = { key1: 'value1', key2: 42 };
+    const result = createGalileoCustomSpanData(
+      galileoSpan,
+      undefined,
+      extraData
+    );
+
+    expect(result.data.key1).toBe('value1');
+    expect(result.data.key2).toBe(42);
+    expect(result.data.galileoSpan).toBe(galileoSpan);
+  });
+
+  test('test creates span with all parameters', () => {
+    const galileoSpan = { type: 'custom', nested: { data: true } };
+    const extraData = { metadata: 'info', count: 5 };
+    const result = createGalileoCustomSpanData(
+      galileoSpan,
+      'Full Span',
+      extraData
+    );
+
+    expect(result.type).toBe('custom');
+    expect(result.name).toBe('Full Span');
+    expect(result.__galileoCustom).toBe(true);
+    expect(result.data.galileoSpan).toBe(galileoSpan);
+    expect(result.data.metadata).toBe('info');
+    expect(result.data.count).toBe(5);
+  });
+
+  test('test sets type field to custom', () => {
+    const result = createGalileoCustomSpanData({});
+    expect(result.type).toBe('custom');
+  });
+
+  test('test sets __galileoCustom sentinel to true', () => {
+    const result = createGalileoCustomSpanData({});
+    expect(result.__galileoCustom).toBe(true);
+  });
+
+  test('test extraData merges correctly with galileoSpan', () => {
+    const galileoSpan = { name: 'span-1' };
+    const extraData = { tag1: 'tag', tag2: 'meta' };
+    const result = createGalileoCustomSpanData(
+      galileoSpan,
+      undefined,
+      extraData
+    );
+
+    expect(result.data).toEqual({
+      tag1: 'tag',
+      tag2: 'meta',
+      galileoSpan: { name: 'span-1' }
+    });
+  });
+
+  test('test handles empty extraData', () => {
+    const galileoSpan = { type: 'tool', input: 'data' };
+    const result = createGalileoCustomSpanData(galileoSpan, undefined, {});
+
+    expect(result.data.galileoSpan).toBe(galileoSpan);
+    expect(Object.keys(result.data)).toEqual(['galileoSpan']);
+  });
+
+  test('test handles null galileoSpan', () => {
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    const result = createGalileoCustomSpanData(null as any);
+    expect(result.data.galileoSpan).toBe(null);
+  });
+
+  test('test handles undefined name parameter', () => {
+    const result = createGalileoCustomSpanData({}, undefined, { meta: 'data' });
+    expect(result.name).toBeUndefined();
+  });
+});
+
+describe('isGalileoCustomSpanData() type guard', () => {
+  test('test returns true for valid GalileoCustomSpanData', () => {
+    const spanData: GalileoCustomSpanData = {
+      type: 'custom',
+      data: { galileoSpan: {} },
+      __galileoCustom: true
+    };
+
+    expect(isGalileoCustomSpanData(spanData)).toBe(true);
+  });
+
+  test('test returns false for null', () => {
+    expect(isGalileoCustomSpanData(null)).toBe(false);
+  });
+
+  test('test returns false for undefined', () => {
+    expect(isGalileoCustomSpanData(undefined)).toBe(false);
+  });
+
+  test('test returns false for plain object without __galileoCustom', () => {
+    const plainObj = {
+      type: 'custom',
+      data: { galileoSpan: {} }
+    };
+
+    expect(isGalileoCustomSpanData(plainObj)).toBe(false);
+  });
+
+  test('test returns false for object with __galileoCustom false', () => {
+    const spanData = {
+      type: 'custom',
+      data: { galileoSpan: {} },
+      __galileoCustom: false
+    };
+
+    expect(isGalileoCustomSpanData(spanData)).toBe(false);
+  });
+
+  test('test returns false for non-objects', () => {
+    expect(isGalileoCustomSpanData('string')).toBe(false);
+    expect(isGalileoCustomSpanData(123)).toBe(false);
+    expect(isGalileoCustomSpanData(true)).toBe(false);
+    expect(isGalileoCustomSpanData([])).toBe(false);
+  });
+
+  test('test requires __galileoCustom to be true', () => {
+    expect(
+      isGalileoCustomSpanData({
+        type: 'custom',
+        data: { galileoSpan: {} },
+        __galileoCustom: true
+      })
+    ).toBe(true);
+
+    expect(
+      isGalileoCustomSpanData({
+        type: 'custom',
+        data: { galileoSpan: {} },
+        __galileoCustom: 1 // truthy but not true
+      })
+    ).toBe(false);
+  });
+
+  test('test type guard narrows type correctly', () => {
+    const unknownData: unknown = createGalileoCustomSpanData({});
+
+    if (isGalileoCustomSpanData(unknownData)) {
+      // TypeScript should allow these properties
+      const spanData: GalileoCustomSpanData = unknownData;
+      expect(spanData.type).toBe('custom');
+      expect(spanData.__galileoCustom).toBe(true);
+    }
+  });
+
+  test('test requires all required fields', () => {
+    const partialWithoutData = {
+      type: 'custom',
+      __galileoCustom: true
+      // missing data field
+    };
+
+    // Type guard should handle this gracefully (either true if it doesn't check data, or false if it does)
+    const result = isGalileoCustomSpanData(partialWithoutData);
+    expect(typeof result).toBe('boolean');
+  });
+});
diff --git a/tests/handlers/openai-agents/data-extraction.test.ts b/tests/handlers/openai-agents/data-extraction.test.ts
new file mode 100644
index 00000000..3de9a542
--- /dev/null
+++ b/tests/handlers/openai-agents/data-extraction.test.ts
@@ -0,0 +1,675 @@
+import {
+  extractLlmData,
+  extractToolData,
+  extractWorkflowData,
+  extractGalileoCustomData,
+  parseUsage
+} from '../../../src/handlers/openai-agents/data-extraction';
+
+describe('parseUsage', () => {
+  test('test parse usage null returns zeros', () => {
+    const result = parseUsage(null);
+    expect(result).toEqual({
+      inputTokens: 0,
+      outputTokens: 0,
+      totalTokens: null,
+      reasoningTokens: 0,
+      cachedTokens: 0,
+      rejectedPredictionTokens: 0
+    });
+  });
+
+  test('test parse usage undefined returns zeros', () => {
+    const result = parseUsage(undefined);
+    expect(result).toEqual({
+      inputTokens: 0,
+      outputTokens: 0,
+      totalTokens: null,
+      reasoningTokens: 0,
+      cachedTokens: 0,
+      rejectedPredictionTokens: 0
+    });
+  });
+
+  test('test parse usage with input_tokens and output_tokens', () => {
+    const result = parseUsage({
+      input_tokens: 10,
+      output_tokens: 20,
+      total_tokens: 30
+    });
+    expect(result.inputTokens).toBe(10);
+    expect(result.outputTokens).toBe(20);
+    expect(result.totalTokens).toBe(30);
+  });
+
+  test('test parse usage with legacy prompt_tokens and completion_tokens', () => {
+    const result = parseUsage({ prompt_tokens: 5, completion_tokens: 15 });
+    expect(result.inputTokens).toBe(5);
+    expect(result.outputTokens).toBe(15);
+  });
+
+  test('test parse usage extracts reasoning_tokens from output_tokens_details', () => {
+    const result = parseUsage({
+      input_tokens: 10,
+      output_tokens: 5,
+      output_tokens_details: { reasoning_tokens: 3 },
+      input_tokens_details: { cached_tokens: 2 }
+    });
+    expect(result.reasoningTokens).toBe(3);
+    expect(result.cachedTokens).toBe(2);
+  });
+
+  test('test parse usage extracts reasoning_tokens from details (legacy shape)', () => {
+    const result = parseUsage({
+      input_tokens: 10,
+      output_tokens: 5,
+      details: { reasoning_tokens: 3, cached_tokens: 2 }
+    });
+    expect(result.reasoningTokens).toBe(3);
+    expect(result.cachedTokens).toBe(2);
+  });
+
+  test('test parse usage extracts reasoning_tokens at top level', () => {
+    const result = parseUsage({
+      input_tokens: 10,
+      output_tokens: 5,
+      reasoning_tokens: 4
+    });
+    expect(result.reasoningTokens).toBe(4);
+  });
+});
+
+describe('extractLlmData generation', () => {
+  test('test extract generation span data', () => {
+    const spanData = {
+      type: 'generation',
+      input: [{ role: 'user', content: 'Hello' }],
+      output: [{ role: 'assistant', content: 'Hi' }],
+      model: 'gpt-4o',
+      model_config: { temperature: 0.7, max_tokens: 100 },
+      usage: { input_tokens: 10, output_tokens: 5, total_tokens: 15 }
+    };
+    const result = extractLlmData(spanData);
+    expect(result.model).toBe('gpt-4o');
+    expect(result.temperature).toBe(0.7);
+    expect(result.numInputTokens).toBe(10);
+    expect(result.numOutputTokens).toBe(5);
+    expect(result.totalTokens).toBe(15);
+    expect(result.input).toBe(JSON.stringify(spanData.input));
+    expect(result.output).toBe(JSON.stringify(spanData.output));
+  });
+
+  test('test extract generation span with null usage', () => {
+    const spanData = { type: 'generation', model: 'gpt-4o' };
+    const result = extractLlmData(spanData);
+    expect(result.numInputTokens).toBe(0);
+    expect(result.numOutputTokens).toBe(0);
+    expect(result.totalTokens).toBeUndefined();
+  });
+
+  test('test extract generation metadata includes gen_ai_system openai', () => {
+    const spanData = { type: 'generation' };
+    const result = extractLlmData(spanData);
+    const meta = result.metadata as Record<string, unknown>;
+    expect(meta.gen_ai_system).toBe('openai');
+  });
+
+  test('test extract generation metadata model_config is raw dict', () => {
+    const spanData = {
+      type: 'generation',
+      model_config: { temperature: 0.5, max_tokens: 200 }
+    };
+    const result = extractLlmData(spanData);
+    const meta = result.metadata as Record<string, unknown>;
+    expect(meta.model_config).toEqual({ temperature: 0.5, max_tokens: 200 });
+  });
+
+  test('test extract generation metadata includes token detail objects', () => {
+    const spanData = {
+      type: 'generation',
+      usage: {
+        input_tokens: 10,
+        output_tokens: 5,
+        input_tokens_details: { cached_tokens: 3 },
+        output_tokens_details: { reasoning_tokens: 2 }
+      }
+    };
+    const result = extractLlmData(spanData);
+    const meta = result.metadata as Record<string, unknown>;
+    expect(meta.input_tokens_details).toEqual({ cached_tokens: 3 });
+    expect(meta.output_tokens_details).toEqual({ reasoning_tokens: 2 });
+  });
+
+  test('test extract generation metadata omits absent token details', () => {
+    const spanData = {
+      type: 'generation',
+      usage: { input_tokens: 10, output_tokens: 5 }
+    };
+    const result = extractLlmData(spanData);
+    const meta = result.metadata as Record<string, unknown>;
+    expect(meta).not.toHaveProperty('input_tokens_details');
+    expect(meta).not.toHaveProperty('output_tokens_details');
+  });
+
+  test('test extract generation span with string input and output not double encoded', () => {
+    const spanData = {
+      type: 'generation',
+      input: 'What is the weather?',
+      output: 'It is sunny.'
+    };
+    const result = extractLlmData(spanData);
+    expect(result.input).toBe('What is the weather?');
+    expect(result.output).toBe('It is sunny.');
+  });
+});
+
+describe('extractLlmData response', () => {
+  test('test extract response span data with _input and _response', () => {
+    const spanData = {
+      type: 'response',
+      _input: [{ role: 'user', content: 'Hello' }],
+      _response: {
+        model: 'gpt-4o',
+        usage: { input_tokens: 8, output_tokens: 4 },
+        temperature: 0.5,
+        output: [{ type: 'message', content: 'Hi' }]
+      }
+    };
+    const result = extractLlmData(spanData);
+    expect(result.model).toBe('gpt-4o');
+    expect(result.temperature).toBe(0.5);
+    expect(result.numInputTokens).toBe(8);
+    expect(result.numOutputTokens).toBe(4);
+  });
+
+  test('test extract response span data with fallback input/response keys', () => {
+    const spanData = {
+      type: 'response',
+      input: 'some input',
+      response: {
+        model: 'gpt-3.5-turbo',
+        usage: { input_tokens: 2, output_tokens: 1 }
+      }
+    };
+    const result = extractLlmData(spanData);
+    expect(result.model).toBe('gpt-3.5-turbo');
+    expect(result.numInputTokens).toBe(2);
+  });
+
+  test('test extract response span with null response returns unknown model', () => {
+    const spanData = { type: 'response' };
+    const result = extractLlmData(spanData);
+    expect(result.model).toBe('unknown');
+    expect(result.numInputTokens).toBe(0);
+  });
+
+  test('test extract response tools returned as raw array not JSON string', () => {
+    const toolsArray = [{ type: 'function', name: 'search' }];
+    const spanData = {
+      type: 'response',
+      _response: {
+        model: 'gpt-4o',
+        usage: {},
+        tools: toolsArray,
+        output: []
+      }
+    };
+    const result = extractLlmData(spanData);
+    expect(result.tools).toEqual(toolsArray);
+    expect(typeof result.tools).not.toBe('string');
+  });
+
+  test('test extract response model_parameters from whitelist', () => {
+    const spanData = {
+      type: 'response',
+      _response: {
+        model: 'gpt-4o',
+        usage: {},
+        temperature: 0.7,
+        max_output_tokens: 512,
+        top_p: 1,
+        tool_choice: 'auto',
+        seed: 42,
+        irrelevant_field: 'ignored',
+        output: []
+      }
+    };
+    const result = extractLlmData(spanData);
+    const mp = result.modelParameters as Record<string, unknown>;
+    expect(mp.temperature).toBe(0.7);
+    expect(mp.max_output_tokens).toBe(512);
+    expect(mp.top_p).toBe(1);
+    expect(mp.tool_choice).toBe('auto');
+    expect(mp.seed).toBe(42);
+    expect(mp).not.toHaveProperty('irrelevant_field');
+  });
+
+  test('test extract response metadata includes response_metadata', () => {
+    const spanData = {
+      type: 'response',
+      _response: {
+        model: 'gpt-4o',
+        usage: {},
+        temperature: 0.5,
+        object: 'response',
+        output: []
+      }
+    };
+    const result = extractLlmData(spanData);
+    const meta = result.metadata as Record<string, unknown>;
+    expect(meta.gen_ai_system).toBe('openai');
+    const rm = meta.response_metadata as Record<string, unknown>;
+    expect(rm.model).toBe('gpt-4o');
+    expect(rm.temperature).toBe(0.5);
+    expect(rm).not.toHaveProperty('usage');
+    expect(rm).not.toHaveProperty('output');
+  });
+
+  test('test extract response metadata includes instructions when present', () => {
+    const spanData = {
+      type: 'response',
+      _response: {
+        model: 'gpt-4o',
+        usage: {},
+        instructions: 'You are a helpful assistant.',
+        output: []
+      }
+    };
+    const result = extractLlmData(spanData);
+    const meta = result.metadata as Record<string, unknown>;
+    expect(meta.instructions).toBe('You are a helpful assistant.');
+  });
+
+  test('test extract response metadata omits instructions when absent', () => {
+    const spanData = {
+      type: 'response',
+      _response: { model: 'gpt-4o', usage: {}, output: [] }
+    };
+    const result = extractLlmData(spanData);
+    const meta = result.metadata as Record<string, unknown>;
+    expect(meta).not.toHaveProperty('instructions');
+  });
+
+  test('test extract response span with string input not double encoded', () => {
+    const spanData = {
+      type: 'response',
+      _input: 'Hello',
+      _response: { model: 'gpt-4o', usage: {}, output: [] }
+    };
+    const result = extractLlmData(spanData);
+    expect(result.input).toBe('Hello');
+  });
+
+  test('test extractLlmData response span with response.error sets statusCode and error_details', () => {
+    const error = { status_code: 429, message: 'Rate limit' };
+    const spanData = {
+      type: 'response',
+      _input: 'hello',
+      _response: {
+        model: 'gpt-4o',
+        usage: {},
+        output: [],
+        error
+      }
+    };
+    const result = extractLlmData(spanData);
+    expect(result.statusCode).toBe(429);
+    const meta = result.metadata as Record<string, unknown>;
+    expect(meta.error_details).toEqual(error);
+  });
+
+  test('test extractLlmData response span with response.error missing status_code falls back to 500', () => {
+    const spanData = {
+      type: 'response',
+      _input: 'hello',
+      _response: {
+        model: 'gpt-4o',
+        usage: {},
+        output: [],
+        error: { message: 'Unknown error' }
+      }
+    };
+    const result = extractLlmData(spanData);
+    expect(result.statusCode).toBe(500);
+  });
+
+  test('test extractLlmData response span with no response.error has no statusCode', () => {
+    const spanData = {
+      type: 'response',
+      _input: 'hello',
+      _response: {
+        model: 'gpt-4o',
+        usage: {},
+        output: []
+      }
+    };
+    const result = extractLlmData(spanData);
+    expect(result.statusCode).toBeUndefined();
+  });
+});
+
+describe('extractLlmData unknown type', () => {
+  test('test extract returns empty record for unknown type', () => {
+    const result = extractLlmData({ type: 'unknown' });
+    expect(Object.keys(result).length).toBe(0);
+  });
+});
+
+describe('extractToolData', () => {
+  test('test extract function span data string input/output', () => {
+    const spanData = {
+      type: 'function',
+      input: '{"query":"hello"}',
+      output: 'result text'
+    };
+    const result = extractToolData(spanData);
+    expect(result.input).toBe('{"query":"hello"}');
+    expect(result.output).toBe('result text');
+  });
+
+  test('test extract function span data object input serialised', () => {
+    const spanData = {
+      type: 'function',
+      input: { query: 'hello' },
+      output: { answer: 'world' }
+    };
+    const result = extractToolData(spanData);
+    expect(result.input).toBe(JSON.stringify({ query: 'hello' }));
+    expect(result.output).toBe(JSON.stringify({ answer: 'world' }));
+  });
+
+  test('test extract function span data missing output', () => {
+    const spanData = { type: 'function', input: 'test' };
+    const result = extractToolData(spanData);
+    expect(result.output).toBeUndefined();
+  });
+
+  test('test extract function span with mcp_data in metadata', () => {
+    const spanData = {
+      type: 'function',
+      input: 'test',
+      mcp_data: { server: 'my-server', tool: 'my-tool' }
+    };
+    const result = extractToolData(spanData);
+    const meta = result.metadata as Record<string, string>;
+    expect(meta.mcp_data).toBe(
+      JSON.stringify({ server: 'my-server', tool: 'my-tool' })
+    );
+  });
+
+  test('test extract guardrail span triggered', () => {
+    const spanData = { type: 'guardrail', triggered: true, name: 'PII Filter' };
+    const result = extractToolData(spanData);
+    expect(result.input).toBe('');
+    expect(result.output).toBe('{"triggered":true}');
+    const meta = result.metadata as Record<string, unknown>;
+    expect(meta.triggered).toBe(true);
+    expect(meta.status).toBe('warning');
+    expect(meta).not.toHaveProperty('guardrail_name');
+  });
+
+  test('test extract guardrail span not triggered', () => {
+    const spanData = { type: 'guardrail', triggered: false, name: 'Safety' };
+    const result = extractToolData(spanData);
+    expect(result.output).toBe('{"triggered":false}');
+    const meta = result.metadata as Record<string, unknown>;
+    expect(meta.triggered).toBe(false);
+    expect(meta).not.toHaveProperty('status');
+    expect(meta).not.toHaveProperty('guardrail_name');
+  });
+
+  test('test extract tool data for transcription returns empty', () => {
+    const result = extractToolData({ type: 'transcription' });
+    expect(result.input).toBe('');
+    expect(result.output).toBeUndefined();
+  });
+
+  test('test extract tool data for mcp_tools returns empty', () => {
+    const result = extractToolData({ type: 'mcp_tools' });
+    expect(result.input).toBe('');
+  });
+});
+
+describe('extractWorkflowData', () => {
+  test('test extract agent span data with tools and handoffs', () => {
+    const spanData = {
+      type: 'agent',
+      name: 'PlannerAgent',
+      tools: ['search', 'calculator'],
+      handoffs: ['ReviewAgent'],
+      output_type: 'string'
+    };
+    const result = extractWorkflowData(spanData);
+    expect(result.input).toBe('');
+    const meta = result.metadata as Record<string, unknown>;
+    expect(meta.tools).toEqual(['search', 'calculator']);
+    expect(meta.handoffs).toEqual(['ReviewAgent']);
+    expect(meta.output_type).toBe('string');
+  });
+
+  test('test extract agent span data without optional fields', () => {
+    const result = extractWorkflowData({ type: 'agent' });
+    expect(result.input).toBe('');
+    expect(result.output).toBeUndefined();
+    const meta = result.metadata as Record<string, string>;
+    expect(Object.keys(meta).length).toBe(0);
+  });
+
+  test('test extract handoff span data', () => {
+    const spanData = {
+      type: 'handoff',
+      from_agent: 'AgentA',
+      to_agent: 'AgentB'
+    };
+    const result = extractWorkflowData(spanData);
+    expect(result.input).toBe('{"from_agent":"AgentA"}');
+    expect(result.output).toBe('{"to_agent":"AgentB"}');
+    const meta = result.metadata as Record<string, string>;
+    expect(meta.from_agent).toBe('AgentA');
+    expect(meta.to_agent).toBe('AgentB');
+  });
+
+  test('test extract handoff span data with missing agents', () => {
+    const result = extractWorkflowData({ type: 'handoff' });
+    expect(result.input).toBe('');
+    expect(result.output).toBeUndefined();
+  });
+
+  test('test extract custom span data with input and output', () => {
+    const spanData = {
+      type: 'custom',
+      data: {
+        input: 'custom input',
+        output: 'custom output',
+        extra_key: 'extra value'
+      }
+    };
+    const result = extractWorkflowData(spanData);
+    expect(result.input).toBe('custom input');
+    expect(result.output).toBe('custom output');
+    const meta = result.metadata as Record<string, unknown>;
+    expect(meta.extra_key).toBe('extra value');
+    expect(meta.input).toBeUndefined();
+    expect(meta.output).toBeUndefined();
+  });
+
+  test('test extract custom span data with object metadata value kept as-is', () => {
+    const spanData = {
+      type: 'custom',
+      data: {
+        input: 'in',
+        output: 'out',
+        config: { retries: 3, timeout: 5000 }
+      }
+    };
+    const result = extractWorkflowData(spanData);
+    const meta = result.metadata as Record<string, unknown>;
+    expect(meta.config).toEqual({ retries: 3, timeout: 5000 });
+  });
+
+  test('test extract custom span data with object input serialised', () => {
+    const spanData = {
+      type: 'custom',
+      data: { input: { query: 'hello' }, output: { answer: 'world' } }
+    };
+    const result = extractWorkflowData(spanData);
+    expect(result.input).toBe(JSON.stringify({ query: 'hello' }));
+    expect(result.output).toBe(JSON.stringify({ answer: 'world' }));
+  });
+
+  test('test extract custom span data with no data field', () => {
+    const result = extractWorkflowData({ type: 'custom' });
+    expect(result.input).toBe('');
+    expect(result.output).toBeUndefined();
+  });
+
+  test('test extract unknown span type returns empty', () => {
+    const result = extractWorkflowData({ type: 'future_type' });
+    expect(result.input).toBe('');
+    expect(result.output).toBeUndefined();
+  });
+});
+
+describe('extractGalileoCustomData', () => {
+  test('test extracts tool type from galileoSpan', () => {
+    const spanData = {
+      type: 'custom',
+      __galileoCustom: true,
+      _galileoSpan: {
+        type: 'tool',
+        input: 'tool input',
+        output: 'tool output',
+        metadata: { key: 'val' },
+        tags: ['tag1'],
+        statusCode: 201
+      }
+    };
+    const result = extractGalileoCustomData(spanData);
+    expect(result.nodeType).toBe('tool');
+    expect(result.params.input).toBe('tool input');
+    expect(result.params.output).toBe('tool output');
+    expect(result.params.metadata).toEqual({ key: 'val' });
+    expect(result.params.tags).toEqual(['tag1']);
+    expect(result.params.statusCode).toBe(201);
+  });
+
+  test('test extracts workflow type from galileoSpan', () => {
+    const spanData = {
+      type: 'custom',
+      __galileoCustom: true,
+      _galileoSpan: {
+        type: 'workflow',
+        input: 'wf in',
+        output: 'wf out'
+      }
+    };
+    const result = extractGalileoCustomData(spanData);
+    expect(result.nodeType).toBe('workflow');
+    expect(result.params.input).toBe('wf in');
+    expect(result.params.output).toBe('wf out');
+  });
+
+  test('test extracts agent type from galileoSpan', () => {
+    const spanData = {
+      type: 'custom',
+      __galileoCustom: true,
+      _galileoSpan: {
+        type: 'agent',
+        input: 'agent in'
+      }
+    };
+    const result = extractGalileoCustomData(spanData);
+    expect(result.nodeType).toBe('agent');
+    expect(result.params.input).toBe('agent in');
+  });
+
+  test('test falls back to workflow for unrecognized galileoSpan type', () => {
+    const spanData = {
+      type: 'custom',
+      __galileoCustom: true,
+      _galileoSpan: { type: 'future_type', input: 'x' }
+    };
+    const result = extractGalileoCustomData(spanData);
+    expect(result.nodeType).toBe('workflow');
+    expect(result.params.input).toBe('x');
+  });
+
+  test('test falls back to workflow for llm type (not delegated)', () => {
+    const spanData = {
+      type: 'custom',
+      __galileoCustom: true,
+      _galileoSpan: { type: 'llm', input: 'prompt' }
+    };
+    const result = extractGalileoCustomData(spanData);
+    expect(result.nodeType).toBe('workflow');
+  });
+
+  test('test falls back to extractWorkflowData when no galileoSpan', () => {
+    const spanData = {
+      type: 'custom',
+      __galileoCustom: true,
+      data: { input: 'plain input', output: 'plain output' }
+    };
+    const result = extractGalileoCustomData(spanData);
+    expect(result.nodeType).toBe('workflow');
+    expect(result.params.input).toBe('plain input');
+    expect(result.params.output).toBe('plain output');
+  });
+
+  test('test falls back to extractWorkflowData when galileoSpan is not an object', () => {
+    const spanData = {
+      type: 'custom',
+      __galileoCustom: true,
+      _galileoSpan: 'not-an-object'
+    };
+    const result = extractGalileoCustomData(spanData);
+    expect(result.nodeType).toBe('workflow');
+  });
+
+  test('test serializes object input/output from galileoSpan', () => {
+    const spanData = {
+      type: 'custom',
+      __galileoCustom: true,
+      _galileoSpan: {
+        type: 'tool',
+        input: { query: 'hello' },
+        output: { answer: 'world' }
+      }
+    };
+    const result = extractGalileoCustomData(spanData);
+    expect(result.params.input).toBe(JSON.stringify({ query: 'hello' }));
+    expect(result.params.output).toBe(JSON.stringify({ answer: 'world' }));
+  });
+
+  test('test omits tags and statusCode when not provided', () => {
+    const spanData = {
+      type: 'custom',
+      __galileoCustom: true,
+      _galileoSpan: { type: 'tool', input: 'in' }
+    };
+    const result = extractGalileoCustomData(spanData);
+    expect(result.params).not.toHaveProperty('tags');
+    expect(result.params).not.toHaveProperty('statusCode');
+  });
+
+  test('test handles missing galileoSpan gracefully', () => {
+    const spanData = {
+      type: 'custom',
+      __galileoCustom: true
+    };
+    const result = extractGalileoCustomData(spanData);
+    expect(result.nodeType).toBe('workflow');
+  });
+
+  test('test defaults to empty input when galileoSpan has no input', () => {
+    const spanData = {
+      type: 'custom',
+      __galileoCustom: true,
+      _galileoSpan: { type: 'tool' }
+    };
+    const result = extractGalileoCustomData(spanData);
+    expect(result.params.input).toBe('');
+    expect(result.params.output).toBeUndefined();
+    expect(result.params.metadata).toEqual({});
+  });
+});
diff --git a/tests/handlers/openai-agents/embedded-tool.test.ts b/tests/handlers/openai-agents/embedded-tool.test.ts
new file mode 100644
index 00000000..7e4e23c1
--- /dev/null
+++ b/tests/handlers/openai-agents/embedded-tool.test.ts
@@ -0,0 +1,258 @@
+import {
+  extractEmbeddedToolCalls,
+  getToolNameFromType,
+  extractToolInput,
+  extractToolOutput
+} from '../../../src/handlers/openai-agents/embedded-tools';
+
+describe('getToolNameFromType', () => {
+  test('test maps code_interpreter_call to code_interpreter', () => {
+    expect(getToolNameFromType('code_interpreter_call')).toBe(
+      'code_interpreter'
+    );
+  });
+
+  test('test maps file_search_call to file_search', () => {
+    expect(getToolNameFromType('file_search_call')).toBe('file_search');
+  });
+
+  test('test maps web_search_call to web_search', () => {
+    expect(getToolNameFromType('web_search_call')).toBe('web_search');
+  });
+
+  test('test maps computer_call to computer', () => {
+    expect(getToolNameFromType('computer_call')).toBe('computer');
+  });
+
+  test('test maps custom_tool_call to custom_tool', () => {
+    expect(getToolNameFromType('custom_tool_call')).toBe('custom_tool');
+  });
+
+  test('test returns original string for unknown type', () => {
+    expect(getToolNameFromType('unknown_type')).toBe('unknown_type');
+  });
+});
+
+describe('extractToolInput', () => {
+  test('test code_interpreter_call extracts code field', () => {
+    const result = extractToolInput(
+      { code: 'print("hello")' },
+      'code_interpreter_call'
+    );
+    expect(result).toBe('print("hello")');
+  });
+
+  test('test code_interpreter_call returns null when no code', () => {
+    expect(extractToolInput({}, 'code_interpreter_call')).toBeNull();
+  });
+
+  test('test file_search_call extracts queries', () => {
+    const result = extractToolInput(
+      { queries: ['find docs', 'search code'] },
+      'file_search_call'
+    );
+    expect(result).toBe(JSON.stringify(['find docs', 'search code']));
+  });
+
+  test('test web_search_call extracts action.query', () => {
+    const result = extractToolInput(
+      { action: { query: 'latest news' } },
+      'web_search_call'
+    );
+    expect(result).toBe('latest news');
+  });
+
+  test('test web_search_call returns null when no action', () => {
+    expect(extractToolInput({}, 'web_search_call')).toBeNull();
+  });
+
+  test('test computer_call extracts action object', () => {
+    const action = { type: 'click', coordinate: [100, 200] };
+    const result = extractToolInput({ action }, 'computer_call');
+    expect(result).toBe(JSON.stringify(action));
+  });
+
+  test('test custom_tool_call extracts input string', () => {
+    const result = extractToolInput({ input: 'my input' }, 'custom_tool_call');
+    expect(result).toBe('my input');
+  });
+
+  test('test custom_tool_call serialises object input', () => {
+    const result = extractToolInput(
+      { input: { key: 'val' } },
+      'custom_tool_call'
+    );
+    expect(result).toBe(JSON.stringify({ key: 'val' }));
+  });
+});
+
+describe('extractToolOutput', () => {
+  test('test code_interpreter_call concatenates log outputs', () => {
+    const result = extractToolOutput(
+      { outputs: [{ logs: 'line1' }, { logs: 'line2' }] },
+      'code_interpreter_call'
+    );
+    expect(result).toBe('line1\nline2');
+  });
+
+  test('test code_interpreter_call extracts url output', () => {
+    const result = extractToolOutput(
+      { outputs: [{ url: 'https://example.com/file.png' }] },
+      'code_interpreter_call'
+    );
+    expect(result).toBe('https://example.com/file.png');
+  });
+
+  test('test code_interpreter_call returns null for empty outputs', () => {
+    expect(
+      extractToolOutput({ outputs: [] }, 'code_interpreter_call')
+    ).toBeNull();
+  });
+
+  test('test file_search_call extracts results', () => {
+    const results = [{ id: '1', content: 'doc' }];
+    const result = extractToolOutput({ results }, 'file_search_call');
+    expect(result).toBe(JSON.stringify(results));
+  });
+
+  test('test file_search_call returns null when no results', () => {
+    expect(extractToolOutput({}, 'file_search_call')).toBeNull();
+  });
+
+  test('test web_search_call returns action as json', () => {
+    const action = { query: 'news', status: 'done' };
+    const result = extractToolOutput({ action }, 'web_search_call');
+    expect(result).toBe(JSON.stringify(action));
+  });
+
+  test('test computer_call returns null', () => {
+    expect(
+      extractToolOutput({ result: 'screenshot' }, 'computer_call')
+    ).toBeNull();
+  });
+
+  test('test custom_tool_call extracts output string', () => {
+    const result = extractToolOutput({ output: 'done' }, 'custom_tool_call');
+    expect(result).toBe('done');
+  });
+});
+
+describe('extractEmbeddedToolCalls', () => {
+  test('test returns empty array for null response', () => {
+    expect(extractEmbeddedToolCalls(null)).toEqual([]);
+  });
+
+  test('test returns empty array for response without output', () => {
+    expect(extractEmbeddedToolCalls({})).toEqual([]);
+  });
+
+  test('test skips non-embedded-tool output items', () => {
+    const response = {
+      output: [{ type: 'message', content: 'hello' }]
+    };
+    expect(extractEmbeddedToolCalls(response)).toEqual([]);
+  });
+
+  test('test extracts code_interpreter_call', () => {
+    const response = {
+      output: [
+        {
+          type: 'code_interpreter_call',
+          id: 'ci_001',
+          code: 'x = 1',
+          outputs: [{ logs: 'output log' }],
+          status: 'completed'
+        }
+      ]
+    };
+    const result = extractEmbeddedToolCalls(response);
+    expect(result.length).toBe(1);
+    expect(result[0].type).toBe('function');
+    expect(result[0].function.name).toBe('code_interpreter');
+    expect(result[0].tool_call_id).toBe('ci_001');
+    expect(result[0].tool_call_input).toBe('x = 1');
+    expect(result[0].tool_call_output).toBe('output log');
+    expect(result[0].tool_call_status).toBe('completed');
+  });
+
+  test('test tool_call_id falls back to call_id when id absent', () => {
+    const response = {
+      output: [
+        {
+          type: 'web_search_call',
+          call_id: 'fallback_id_001',
+          action: { query: 'test' }
+        }
+      ]
+    };
+    const result = extractEmbeddedToolCalls(response);
+    expect(result[0].tool_call_id).toBe('fallback_id_001');
+  });
+
+  test('test extracts file_search_call', () => {
+    const response = {
+      output: [
+        {
+          type: 'file_search_call',
+          id: 'fs_001',
+          queries: ['find docs'],
+          results: [{ id: 'doc1', content: 'text' }]
+        }
+      ]
+    };
+    const result = extractEmbeddedToolCalls(response);
+    expect(result.length).toBe(1);
+    expect(result[0].function.name).toBe('file_search');
+    expect(result[0].tool_call_input).toBe(JSON.stringify(['find docs']));
+  });
+
+  test('test extracts web_search_call', () => {
+    const response = {
+      output: [
+        {
+          type: 'web_search_call',
+          id: 'ws_001',
+          action: { query: 'latest AI news' }
+        }
+      ]
+    };
+    const result = extractEmbeddedToolCalls(response);
+    expect(result.length).toBe(1);
+    expect(result[0].function.name).toBe('web_search');
+    expect(result[0].tool_call_input).toBe('latest AI news');
+  });
+
+  test('test extracts multiple embedded tool calls', () => {
+    const response = {
+      output: [
+        { type: 'code_interpreter_call', code: 'x=1', outputs: [] },
+        { type: 'message', content: 'hi' },
+        { type: 'web_search_call', action: { query: 'test' } }
+      ]
+    };
+    const result = extractEmbeddedToolCalls(response);
+    expect(result.length).toBe(2);
+    expect(result[0].type).toBe('function');
+    expect(result[1].type).toBe('function');
+  });
+
+  test('test tool_call_status defaults to completed when absent', () => {
+    const response = {
+      output: [{ type: 'web_search_call', action: { query: 'q' } }]
+    };
+    const result = extractEmbeddedToolCalls(response);
+    expect(result[0].tool_call_status).toBe('completed');
+  });
+
+  test('test handles null output items gracefully', () => {
+    const response = {
+      output: [
+        null,
+        undefined,
+        { type: 'web_search_call', action: { query: 'q' } }
+      ]
+    };
+    const result = extractEmbeddedToolCalls(response);
+    expect(result.length).toBe(1);
+  });
+});
diff --git a/tests/handlers/openai-agents/integration.test.ts b/tests/handlers/openai-agents/integration.test.ts
new file mode 100644
index 00000000..a410906a
--- /dev/null
+++ b/tests/handlers/openai-agents/integration.test.ts
@@ -0,0 +1,728 @@
+import { GalileoTracingProcessor } from '../../../src/handlers/openai-agents';
+import type {
+  AgentTrace,
+  AgentSpan
+} from '../../../src/handlers/openai-agents';
+
+function createMockLogger() {
+  return {
+    startTrace: jest.fn().mockReturnValue({}),
+    addLlmSpan: jest.fn().mockReturnValue({}),
+    addToolSpan: jest.fn().mockReturnValue({}),
+    addWorkflowSpan: jest.fn().mockReturnValue({}),
+    addAgentSpan: jest.fn().mockReturnValue({}),
+    conclude: jest.fn().mockReturnValue(undefined),
+    flush: jest.fn().mockResolvedValue(undefined)
+  };
+}
+
+function makeTrace(overrides: Partial<AgentTrace> = {}): AgentTrace {
+  return {
+    traceId: 'trace-001',
+    name: 'Multi-Agent Flow',
+    metadata: {},
+    startedAt: new Date('2024-01-01T00:00:00Z').toISOString(),
+    endedAt: new Date('2024-01-01T00:00:05Z').toISOString(),
+    ...overrides
+  };
+}
+
+function makeSpan(
+  overrides: Partial<AgentSpan> & { spanData: AgentSpan['spanData'] }
+): AgentSpan {
+  return {
+    spanId: 'span-001',
+    traceId: 'trace-001',
+    parentId: 'trace-001',
+    startedAt: new Date('2024-01-01T00:00:01Z').toISOString(),
+    endedAt: new Date('2024-01-01T00:00:02Z').toISOString(),
+    error: null,
+    ...overrides
+  };
+}
+
+describe('Multi-agent integration flows', () => {
+  test('test multiple agents with handoff', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    await processor.onTraceStart(trace);
+
+    // PlannerAgent
+    const planner = makeSpan({
+      spanId: 'agent-planner',
+      parentId: 'trace-001',
+      spanData: { type: 'agent', name: 'PlannerAgent' }
+    });
+
+    // Handoff to ExecutorAgent
+    const handoff = makeSpan({
+      spanId: 'handoff-001',
+      parentId: 'agent-planner',
+      spanData: {
+        type: 'handoff',
+        from_agent: 'PlannerAgent',
+        to_agent: 'ExecutorAgent'
+      }
+    });
+
+    // ExecutorAgent
+    const executor = makeSpan({
+      spanId: 'agent-executor',
+      parentId: 'handoff-001',
+      spanData: { type: 'agent', name: 'ExecutorAgent' }
+    });
+
+    await processor.onSpanStart(planner);
+    await processor.onSpanStart(handoff);
+    await processor.onSpanStart(executor);
+    await processor.onSpanEnd(executor);
+    await processor.onSpanEnd(handoff);
+    await processor.onSpanEnd(planner);
+    await processor.onTraceEnd(trace);
+
+    // Verify all spans logged: 2 agents + 1 handoff all use addWorkflowSpan
+    expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(3);
+    expect(mockLogger.addAgentSpan).not.toHaveBeenCalled();
+  });
+
+  test('test agent->tool->llm->tool flow', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    await processor.onTraceStart(trace);
+
+    const agent = makeSpan({
+      spanId: 'agent-001',
+      parentId: 'trace-001',
+      spanData: { type: 'agent' }
+    });
+
+    const tool1 = makeSpan({
+      spanId: 'tool-001',
+      parentId: 'agent-001',
+      spanData: { type: 'function', name: 'search' }
+    });
+
+    const llm = makeSpan({
+      spanId: 'llm-001',
+      parentId: 'agent-001',
+      spanData: { type: 'generation', model: 'gpt-4' }
+    });
+
+    const tool2 = makeSpan({
+      spanId: 'tool-002',
+      parentId: 'agent-001',
+      spanData: { type: 'function', name: 'calculate' }
+    });
+
+    await processor.onSpanStart(agent);
+    await processor.onSpanStart(tool1);
+    await processor.onSpanEnd(tool1);
+    await processor.onSpanStart(llm);
+    await processor.onSpanEnd(llm);
+    await processor.onSpanStart(tool2);
+    await processor.onSpanEnd(tool2);
+    await processor.onSpanEnd(agent);
+    await processor.onTraceEnd(trace);
+
+    expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1); // agent (uses addWorkflowSpan)
+    expect(mockLogger.addToolSpan).toHaveBeenCalledTimes(2); // 2 tools
+    expect(mockLogger.addLlmSpan).toHaveBeenCalledTimes(1); // 1 llm
+  });
+
+  test('test guardrail triggered in flow', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    await processor.onTraceStart(trace);
+
+    const agent = makeSpan({
+      spanId: 'agent-001',
+      parentId: 'trace-001',
+      spanData: { type: 'agent' }
+    });
+
+    const guardrail = makeSpan({
+      spanId: 'guardrail-001',
+      parentId: 'agent-001',
+      spanData: { type: 'guardrail', name: 'PII Filter', triggered: true }
+    });
+
+    await processor.onSpanStart(agent);
+    await processor.onSpanStart(guardrail);
+    await processor.onSpanEnd(guardrail);
+    await processor.onSpanEnd(agent);
+    await processor.onTraceEnd(trace);
+
+    const toolCall = mockLogger.addToolSpan.mock.calls[0][0];
+    expect(toolCall.output).toBe('{"triggered":true}');
+  });
+
+  test('test embedded tool calls from OpenAI response', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    await processor.onTraceStart(trace);
+
+    const response = makeSpan({
+      spanId: 'response-001',
+      parentId: 'trace-001',
+      spanData: {
+        type: 'response',
+        model: 'gpt-4o',
+        _input: [{ role: 'user', content: 'search for python' }],
+        _response: {
+          model: 'gpt-4o',
+          output: [
+            {
+              type: 'web_search_call',
+              action: { query: 'python programming' },
+              id: 'search-1'
+            },
+            {
+              type: 'code_interpreter_call',
+              code: 'print("result")',
+              outputs: [{ logs: 'result' }],
+              id: 'code-1'
+            }
+          ]
+        }
+      }
+    });
+
+    await processor.onSpanStart(response);
+    await processor.onSpanEnd(response);
+    await processor.onTraceEnd(trace);
+
+    expect(mockLogger.addLlmSpan).toHaveBeenCalledTimes(1);
+    const llmCall = mockLogger.addLlmSpan.mock.calls[0][0];
+    expect(Array.isArray(llmCall.tools)).toBe(true);
+    expect(llmCall.tools.length).toBe(2);
+    expect(llmCall.tools[0].type).toBe('function');
+    expect(llmCall.tools[1].type).toBe('function');
+  });
+
+  test('test galileo_custom span delegates to inner galileoSpan as tool', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    await processor.onTraceStart(trace);
+
+    const agent = makeSpan({
+      spanId: 'agent-001',
+      parentId: 'trace-001',
+      spanData: { type: 'agent' }
+    });
+
+    const customSpan = makeSpan({
+      spanId: 'custom-001',
+      parentId: 'agent-001',
+      spanData: {
+        type: 'custom',
+        __galileoCustom: true,
+        _galileoSpan: {
+          type: 'tool',
+          input: 'custom tool input',
+          output: 'custom tool output',
+          metadata: { source: 'test' },
+          tags: ['custom-tag'],
+          statusCode: 200
+        }
+      }
+    });
+
+    const llm = makeSpan({
+      spanId: 'llm-001',
+      parentId: 'agent-001',
+      spanData: { type: 'generation', model: 'gpt-4' }
+    });
+
+    await processor.onSpanStart(agent);
+    await processor.onSpanStart(customSpan);
+    await processor.onSpanEnd(customSpan);
+    await processor.onSpanStart(llm);
+    await processor.onSpanEnd(llm);
+    await processor.onSpanEnd(agent);
+    await processor.onTraceEnd(trace);
+
+    expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1); // agent (uses addWorkflowSpan)
+    expect(mockLogger.addToolSpan).toHaveBeenCalledTimes(1);
+    expect(mockLogger.addLlmSpan).toHaveBeenCalledTimes(1);
+
+    const toolCall = mockLogger.addToolSpan.mock.calls[0][0];
+    expect(toolCall.input).toBe('custom tool input');
+    expect(toolCall.output).toBe('custom tool output');
+    expect(toolCall.metadata).toEqual({ source: 'test' });
+    expect(toolCall.tags).toEqual(['custom-tag']);
+  });
+
+  test('test galileo_custom span with workflow type', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    await processor.onTraceStart(trace);
+
+    const agent = makeSpan({
+      spanId: 'agent-001',
+      parentId: 'trace-001',
+      spanData: { type: 'agent' }
+    });
+
+    const customSpan = makeSpan({
+      spanId: 'custom-001',
+      parentId: 'agent-001',
+      spanData: {
+        type: 'custom',
+        __galileoCustom: true,
+        _galileoSpan: {
+          type: 'workflow',
+          input: 'wf input',
+          output: 'wf output'
+        }
+      }
+    });
+
+    await processor.onSpanStart(agent);
+    await processor.onSpanStart(customSpan);
+    await processor.onSpanEnd(customSpan);
+    await processor.onSpanEnd(agent);
+    await processor.onTraceEnd(trace);
+
+    // addWorkflowSpan called twice: once for the agent container, once for the custom workflow span
+    expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(2);
+    // The custom workflow span is the first child logged (index 1 after agent at index 0)
+    const wfCall = mockLogger.addWorkflowSpan.mock.calls[1][0];
+    expect(wfCall.input).toBe('wf input');
+    expect(wfCall.output).toBe('wf output');
+    expect(mockLogger.conclude).toHaveBeenCalled();
+  });
+
+  test('test galileo_custom span with agent type', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    await processor.onTraceStart(trace);
+
+    const customSpan = makeSpan({
+      spanId: 'custom-001',
+      parentId: 'trace-001',
+      spanData: {
+        type: 'custom',
+        __galileoCustom: true,
+        _galileoSpan: {
+          type: 'agent',
+          input: 'agent input',
+          output: 'agent output',
+          metadata: { role: 'planner' }
+        }
+      }
+    });
+
+    await processor.onSpanStart(customSpan);
+    await processor.onSpanEnd(customSpan);
+    await processor.onTraceEnd(trace);
+
+    expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1);
+    const agentCall = mockLogger.addWorkflowSpan.mock.calls[0][0];
+    expect(agentCall.input).toBe('agent input');
+    expect(agentCall.output).toBe('agent output');
+    expect(agentCall.metadata).toEqual({ role: 'planner' });
+  });
+
+  test('test galileo_custom span without galileoSpan falls back to workflow', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    await processor.onTraceStart(trace);
+
+    const customSpan = makeSpan({
+      spanId: 'custom-001',
+      parentId: 'trace-001',
+      spanData: {
+        type: 'custom',
+        __galileoCustom: true,
+        data: { input: 'fallback input', output: 'fallback output' }
+      }
+    });
+
+    await processor.onSpanStart(customSpan);
+    await processor.onSpanEnd(customSpan);
+    await processor.onTraceEnd(trace);
+
+    expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1);
+    const wfCall = mockLogger.addWorkflowSpan.mock.calls[0][0];
+    expect(wfCall.input).toBe('fallback input');
+    expect(wfCall.output).toBe('fallback output');
+  });
+
+  test('test galileo_custom span with unrecognized type falls back to workflow', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    await processor.onTraceStart(trace);
+
+    const customSpan = makeSpan({
+      spanId: 'custom-001',
+      parentId: 'trace-001',
+      spanData: {
+        type: 'custom',
+        __galileoCustom: true,
+        _galileoSpan: {
+          type: 'unknown_future_type',
+          input: 'some input'
+        }
+      }
+    });
+
+    await processor.onSpanStart(customSpan);
+    await processor.onSpanEnd(customSpan);
+    await processor.onTraceEnd(trace);
+
+    expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1);
+    const wfCall = mockLogger.addWorkflowSpan.mock.calls[0][0];
+    expect(wfCall.input).toBe('some input');
+  });
+
+  test('test error in middle of flow handled', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    await processor.onTraceStart(trace);
+
+    const agent = makeSpan({
+      spanId: 'agent-001',
+      parentId: 'trace-001',
+      spanData: { type: 'agent' }
+    });
+
+    const tool1 = makeSpan({
+      spanId: 'tool-001',
+      parentId: 'agent-001',
+      spanData: { type: 'function', name: 'search' }
+    });
+
+    const errorTool = makeSpan({
+      spanId: 'tool-002',
+      parentId: 'agent-001',
+      error: { message: 'Connection timeout' },
+      spanData: { type: 'function', name: 'fetch' }
+    });
+
+    const tool3 = makeSpan({
+      spanId: 'tool-003',
+      parentId: 'agent-001',
+      spanData: { type: 'function', name: 'parse' }
+    });
+
+    await processor.onSpanStart(agent);
+    await processor.onSpanStart(tool1);
+    await processor.onSpanEnd(tool1);
+    await processor.onSpanStart(errorTool);
+    await processor.onSpanEnd(errorTool); // Ends with error
+    await processor.onSpanStart(tool3);
+    await processor.onSpanEnd(tool3);
+    await processor.onSpanEnd(agent);
+    await processor.onTraceEnd(trace);
+
+    // Verify error tool has error status
+    const errorToolCall = mockLogger.addToolSpan.mock.calls[1][0];
+    expect(errorToolCall.statusCode).toBe(500);
+    const errorMeta = errorToolCall.metadata as Record<string, string>;
+    expect(errorMeta.error_message).toBe('Connection timeout');
+
+    // Verify all tools logged
+    expect(mockLogger.addToolSpan).toHaveBeenCalledTimes(3);
+  });
+
+  test('test complex nested structure with multiple agents', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    await processor.onTraceStart(trace);
+
+    // Root agent
+    const rootAgent = makeSpan({
+      spanId: 'root-agent',
+      parentId: 'trace-001',
+      spanData: { type: 'agent', name: 'RootAgent' }
+    });
+
+    // First branch: planning
+    const planningAgent = makeSpan({
+      spanId: 'planning-agent',
+      parentId: 'root-agent',
+      spanData: { type: 'agent', name: 'PlanningAgent' }
+    });
+
+    const planLLM = makeSpan({
+      spanId: 'plan-llm',
+      parentId: 'planning-agent',
+      spanData: { type: 'generation', model: 'gpt-4' }
+    });
+
+    // Second branch: execution
+    const executionAgent = makeSpan({
+      spanId: 'execution-agent',
+      parentId: 'root-agent',
+      spanData: { type: 'agent', name: 'ExecutionAgent' }
+    });
+
+    const executionTool = makeSpan({
+      spanId: 'exec-tool',
+      parentId: 'execution-agent',
+      spanData: { type: 'function', name: 'execute' }
+    });
+
+    await processor.onSpanStart(rootAgent);
+    await processor.onSpanStart(planningAgent);
+    await processor.onSpanStart(planLLM);
+    await processor.onSpanEnd(planLLM);
+    await processor.onSpanEnd(planningAgent);
+    await processor.onSpanStart(executionAgent);
+    await processor.onSpanStart(executionTool);
+    await processor.onSpanEnd(executionTool);
+    await processor.onSpanEnd(executionAgent);
+    await processor.onSpanEnd(rootAgent);
+    await processor.onTraceEnd(trace);
+
+    // Verify all spans logged: 3 agents use addWorkflowSpan
+    expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(3);
+    expect(mockLogger.addLlmSpan).toHaveBeenCalledTimes(1);
+    expect(mockLogger.addToolSpan).toHaveBeenCalledTimes(1);
+    // conclude is called for all non-root workflow/agent spans
+    expect(mockLogger.conclude).toHaveBeenCalled();
+  });
+});
+
+describe('Output tracking integration', () => {
+  test('test last output only set by workflow/agent spans, not llm spans', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    await processor.onTraceStart(trace);
+
+    const llm1 = makeSpan({
+      spanId: 'llm-001',
+      parentId: 'trace-001',
+      spanData: {
+        type: 'generation',
+        model: 'gpt-4',
+        input: [],
+        output: 'First output'
+      }
+    });
+
+    const llm2 = makeSpan({
+      spanId: 'llm-002',
+      parentId: 'trace-001',
+      spanData: {
+        type: 'generation',
+        model: 'gpt-4',
+        input: [],
+        output: 'Final output'
+      }
+    });
+
+    await processor.onSpanStart(llm1);
+    await processor.onSpanEnd(llm1);
+    await processor.onSpanStart(llm2);
+    await processor.onSpanEnd(llm2);
+    await processor.onTraceEnd(trace);
+
+    // _lastOutput is only updated by workflow/agent spans (parity with Python).
+    // Bare LLM spans do not set _lastOutput, so trace output falls back to undefined.
+    const startTraceCall = mockLogger.startTrace.mock.calls[0][0];
+    expect(startTraceCall.output).toBeUndefined();
+  });
+
+  test('test last output set by workflow span conclude', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    const agentSpan = makeSpan({
+      spanId: 'agent-001',
+      parentId: 'trace-001',
+      spanData: { type: 'agent', name: 'MyAgent' }
+    });
+    const llmSpan = makeSpan({
+      spanId: 'llm-001',
+      parentId: 'agent-001',
+      spanData: {
+        type: 'generation',
+        model: 'gpt-4',
+        input: [],
+        output: 'Final output'
+      }
+    });
+
+    await processor.onTraceStart(trace);
+    await processor.onSpanStart(agentSpan);
+    await processor.onSpanStart(llmSpan);
+    await processor.onSpanEnd(llmSpan);
+    await processor.onSpanEnd(agentSpan);
+    await processor.onTraceEnd(trace);
+
+    // _lastOutput is set from the workflow/agent conclude output (last child's output).
+    const startTraceCall = mockLogger.startTrace.mock.calls[0][0];
+    expect(startTraceCall.output).toBe('Final output');
+  });
+});
+
+describe('Workflow span statusCode propagation', () => {
+  test('test workflow span statusCode passed to addWorkflowSpan', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    await processor.onTraceStart(trace);
+
+    // Create a workflow span (handoff type maps to workflow nodeType)
+    const workflow = makeSpan({
+      spanId: 'workflow-001',
+      parentId: 'trace-001',
+      spanData: { type: 'handoff', from_agent: 'Agent1', to_agent: 'Agent2' }
+    });
+
+    // Create a successful child LLM span
+    const llm = makeSpan({
+      spanId: 'llm-001',
+      parentId: 'workflow-001',
+      spanData: {
+        type: 'generation',
+        model: 'gpt-4',
+        input: [],
+        output: 'successful response'
+      },
+      error: null
+    });
+
+    await processor.onSpanStart(workflow);
+    await processor.onSpanStart(llm);
+    await processor.onSpanEnd(llm);
+    await processor.onSpanEnd(workflow);
+    await processor.onTraceEnd(trace);
+
+    // Verify addWorkflowSpan was called (note: statusCode may be 200 by default)
+    expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1);
+    const workflowSpanCall = mockLogger.addWorkflowSpan.mock.calls[0][0];
+    // Verify statusCode parameter is being passed through (defaults to 200 for success)
+    expect(workflowSpanCall.statusCode).toBe(200);
+  });
+
+  test('test workflow span with direct error has statusCode 500', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    await processor.onTraceStart(trace);
+
+    // Create a workflow span that itself has an error
+    const workflowWithError = makeSpan({
+      spanId: 'workflow-001',
+      parentId: 'trace-001',
+      spanData: { type: 'handoff', from_agent: 'Agent1', to_agent: 'Agent2' },
+      error: {
+        message: 'Workflow execution failed',
+        data: { reason: 'timeout' }
+      }
+    });
+
+    await processor.onSpanStart(workflowWithError);
+    await processor.onSpanEnd(workflowWithError);
+    await processor.onTraceEnd(trace);
+
+    // Verify addWorkflowSpan was called with statusCode 500
+    expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1);
+    const workflowSpanCall = mockLogger.addWorkflowSpan.mock.calls[0][0];
+    expect(workflowSpanCall.statusCode).toBe(500);
+  });
+
+  test('test agent span statusCode passed to addWorkflowSpan', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    await processor.onTraceStart(trace);
+
+    // Create an agent span
+    const agent = makeSpan({
+      spanId: 'agent-001',
+      parentId: 'trace-001',
+      spanData: { type: 'agent', name: 'TestAgent' }
+    });
+
+    // Create a child LLM span
+    const llm = makeSpan({
+      spanId: 'llm-001',
+      parentId: 'agent-001',
+      spanData: {
+        type: 'generation',
+        model: 'gpt-4',
+        input: [],
+        output: 'test output'
+      },
+      error: null
+    });
+
+    await processor.onSpanStart(agent);
+    await processor.onSpanStart(llm);
+    await processor.onSpanEnd(llm);
+    await processor.onSpanEnd(agent);
+    await processor.onTraceEnd(trace);
+
+    // Verify addWorkflowSpan was called with statusCode parameter
+    expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1);
+    const agentSpanCall = mockLogger.addWorkflowSpan.mock.calls[0][0];
+    expect(agentSpanCall.statusCode).toBe(200);
+  });
+
+  test('test conclude called with statusCode for workflow spans', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    await processor.onTraceStart(trace);
+
+    // Create nested workflow spans to test conclude calls
+    const outerWorkflow = makeSpan({
+      spanId: 'workflow-001',
+      parentId: 'trace-001',
+      spanData: { type: 'handoff', from_agent: 'Agent1', to_agent: 'Agent2' }
+    });
+
+    const innerWorkflow = makeSpan({
+      spanId: 'workflow-002',
+      parentId: 'workflow-001',
+      spanData: { type: 'custom', name: 'InnerWorkflow' }
+    });
+
+    await processor.onSpanStart(outerWorkflow);
+    await processor.onSpanStart(innerWorkflow);
+    await processor.onSpanEnd(innerWorkflow);
+    await processor.onSpanEnd(outerWorkflow);
+    await processor.onTraceEnd(trace);
+
+    // Verify conclude was called for the workflow spans
+    expect(mockLogger.conclude).toHaveBeenCalled();
+    // Find calls that pass statusCode
+    const concludeCalls = mockLogger.conclude.mock.calls;
+    const callsWithStatusCode = concludeCalls.filter(
+      (call) => call[0]?.statusCode !== undefined
+    );
+    expect(callsWithStatusCode.length).toBeGreaterThan(0);
+  });
+});
diff --git a/tests/handlers/openai-agents/node.test.ts b/tests/handlers/openai-agents/node.test.ts
new file mode 100644
index 00000000..4a1aad87
--- /dev/null
+++ b/tests/handlers/openai-agents/node.test.ts
@@ -0,0 +1,133 @@
+import { createNode } from '../../../src/handlers/openai-agents/node';
+
+describe('createNode()', () => {
+  test('test creates node with correct nodeType llm', () => {
+    const node = createNode({
+      nodeType: 'llm',
+      spanParams: { name: 'GPT Call' },
+      runId: 'span-001',
+      parentRunId: 'trace-001'
+    });
+
+    expect(node.nodeType).toBe('llm');
+    expect(node.spanParams).toEqual({ name: 'GPT Call' });
+    expect(node.runId).toBe('span-001');
+    expect(node.parentRunId).toBe('trace-001');
+  });
+
+  test('test creates node with correct nodeType tool', () => {
+    const node = createNode({
+      nodeType: 'tool',
+      spanParams: { name: 'Search Tool' },
+      runId: 'span-002',
+      parentRunId: 'span-001'
+    });
+
+    expect(node.nodeType).toBe('tool');
+  });
+
+  test('test creates node with correct nodeType workflow', () => {
+    const node = createNode({
+      nodeType: 'workflow',
+      spanParams: {},
+      runId: 'span-003',
+      parentRunId: null
+    });
+
+    expect(node.nodeType).toBe('workflow');
+  });
+
+  test('test creates node with correct nodeType agent', () => {
+    const node = createNode({
+      nodeType: 'agent',
+      spanParams: { name: 'Planning Agent' },
+      runId: 'span-004',
+      parentRunId: 'trace-001'
+    });
+
+    expect(node.nodeType).toBe('agent');
+  });
+
+  test('test initializes children as empty array', () => {
+    const node = createNode({
+      nodeType: 'llm',
+      spanParams: {},
+      runId: 'span-001',
+      parentRunId: null
+    });
+
+    expect(Array.isArray(node.children)).toBe(true);
+    expect(node.children.length).toBe(0);
+  });
+
+  test('test preserves all spanParams fields', () => {
+    const spanParams = {
+      name: 'Test Span',
+      input: 'test input',
+      output: 'test output',
+      model: 'gpt-4',
+      metadata: { key: 'value' }
+    };
+
+    const node = createNode({
+      nodeType: 'llm',
+      spanParams,
+      runId: 'span-001',
+      parentRunId: 'trace-001'
+    });
+
+    expect(node.spanParams).toEqual(spanParams);
+  });
+
+  test('test children array is mutable', () => {
+    const node = createNode({
+      nodeType: 'agent',
+      spanParams: {},
+      runId: 'span-001',
+      parentRunId: null
+    });
+
+    node.children.push('child-001');
+    node.children.push('child-002');
+
+    expect(node.children).toEqual(['child-001', 'child-002']);
+  });
+
+  test('test node has required Node interface properties', () => {
+    const node = createNode({
+      nodeType: 'llm',
+      spanParams: { name: 'Test' },
+      runId: 'span-001',
+      parentRunId: 'parent-001'
+    });
+
+    // Verify all required properties exist
+    expect('nodeType' in node).toBe(true);
+    expect('spanParams' in node).toBe(true);
+    expect('runId' in node).toBe(true);
+    expect('parentRunId' in node).toBe(true);
+    expect('children' in node).toBe(true);
+  });
+
+  test('test empty spanParams preserved correctly', () => {
+    const node = createNode({
+      nodeType: 'tool',
+      spanParams: {},
+      runId: 'span-001',
+      parentRunId: 'trace-001'
+    });
+
+    expect(Object.keys(node.spanParams).length).toBe(0);
+  });
+
+  test('test parentRunId can be null', () => {
+    const node = createNode({
+      nodeType: 'agent',
+      spanParams: {},
+      runId: 'trace-001',
+      parentRunId: null
+    });
+
+    expect(node.parentRunId).toBeNull();
+  });
+});
diff --git a/tests/handlers/openai-agents/span-mapping.test.ts b/tests/handlers/openai-agents/span-mapping.test.ts
new file mode 100644
index 00000000..a7fe11f2
--- /dev/null
+++ b/tests/handlers/openai-agents/span-mapping.test.ts
@@ -0,0 +1,194 @@
+import {
+  mapSpanType,
+  mapSpanName,
+  GALILEO_CUSTOM_TYPE
+} from '../../../src/handlers/openai-agents/span-mapping';
+import type { NodeType } from '../../../src/handlers/openai-agents/node';
+
+describe('mapSpanType', () => {
+  test('test maps generation to llm', () => {
+    expect(mapSpanType({ type: 'generation' })).toBe('llm');
+  });
+
+  test('test maps response to llm', () => {
+    expect(mapSpanType({ type: 'response' })).toBe('llm');
+  });
+
+  test('test maps function to tool', () => {
+    expect(mapSpanType({ type: 'function' })).toBe('tool');
+  });
+
+  test('test maps guardrail to tool', () => {
+    expect(mapSpanType({ type: 'guardrail' })).toBe('tool');
+  });
+
+  test('test maps transcription to tool', () => {
+    expect(mapSpanType({ type: 'transcription' })).toBe('tool');
+  });
+
+  test('test maps speech to tool', () => {
+    expect(mapSpanType({ type: 'speech' })).toBe('tool');
+  });
+
+  test('test maps speech_group to tool', () => {
+    expect(mapSpanType({ type: 'speech_group' })).toBe('tool');
+  });
+
+  test('test maps mcp_tools to tool', () => {
+    expect(mapSpanType({ type: 'mcp_tools' })).toBe('tool');
+  });
+
+  test('test maps agent to agent', () => {
+    expect(mapSpanType({ type: 'agent' })).toBe('agent');
+  });
+
+  test('test maps handoff to workflow', () => {
+    expect(mapSpanType({ type: 'handoff' })).toBe('workflow');
+  });
+
+  test('test maps custom to workflow', () => {
+    expect(mapSpanType({ type: 'custom' })).toBe('workflow');
+  });
+
+  test('test maps galileo_custom sentinel to galileo_custom', () => {
+    expect(mapSpanType({ type: 'custom', __galileoCustom: true })).toBe(
+      GALILEO_CUSTOM_TYPE
+    );
+  });
+
+  test('test maps unknown type to workflow fallback', () => {
+    expect(mapSpanType({ type: 'unknown_future_type' })).toBe('workflow');
+  });
+});
+
+describe('mapSpanName', () => {
+  test('test returns spanData.name when present', () => {
+    expect(mapSpanName({ type: 'generation', name: 'MySpan' }, 'llm')).toBe(
+      'MySpan'
+    );
+  });
+
+  test('test generation fallback is Generation', () => {
+    expect(mapSpanName({ type: 'generation' }, 'llm')).toBe('Generation');
+  });
+
+  test('test response fallback is Response', () => {
+    expect(mapSpanName({ type: 'response' }, 'llm')).toBe('Response');
+  });
+
+  test('test function fallback uses spanData.name or Function', () => {
+    expect(mapSpanName({ type: 'function', name: 'my_tool' }, 'tool')).toBe(
+      'my_tool'
+    );
+    expect(mapSpanName({ type: 'function' }, 'tool')).toBe('Function');
+  });
+
+  test('test guardrail fallback uses spanData.name or Guardrail', () => {
+    expect(
+      mapSpanName({ type: 'guardrail', name: 'content_filter' }, 'tool')
+    ).toBe('content_filter');
+    expect(mapSpanName({ type: 'guardrail' }, 'tool')).toBe('Guardrail');
+  });
+
+  test('test agent fallback uses spanData.name or Agent', () => {
+    expect(mapSpanName({ type: 'agent', name: 'PlannerAgent' }, 'agent')).toBe(
+      'PlannerAgent'
+    );
+    expect(mapSpanName({ type: 'agent' }, 'agent')).toBe('Agent');
+  });
+
+  test('test handoff formats from-to arrow', () => {
+    expect(
+      mapSpanName(
+        { type: 'handoff', from_agent: 'AgentA', to_agent: 'AgentB' },
+        'workflow'
+      )
+    ).toBe('Handoff: AgentA → AgentB');
+  });
+
+  test('test handoff fallback when no agents', () => {
+    expect(mapSpanName({ type: 'handoff' }, 'workflow')).toBe('Handoff');
+  });
+
+  test('test custom fallback is Custom', () => {
+    expect(mapSpanName({ type: 'custom' }, 'workflow')).toBe('Custom');
+  });
+
+  test('test galileo_custom sentinel fallback is Galileo Custom', () => {
+    expect(mapSpanName({ type: 'custom' }, GALILEO_CUSTOM_TYPE)).toBe(
+      'Galileo Custom'
+    );
+  });
+
+  test('test transcription fallback is Transcription', () => {
+    expect(mapSpanName({ type: 'transcription' }, 'tool')).toBe(
+      'Transcription'
+    );
+  });
+
+  test('test speech fallback is Speech', () => {
+    expect(mapSpanName({ type: 'speech' }, 'tool')).toBe('Speech');
+  });
+
+  test('test speech_group fallback is Speech Group', () => {
+    expect(mapSpanName({ type: 'speech_group' }, 'tool')).toBe('Speech Group');
+  });
+
+  test('test mcp_tools fallback is MCP Tools', () => {
+    expect(mapSpanName({ type: 'mcp_tools' }, 'tool')).toBe('MCP Tools');
+  });
+});
+
+describe('agent span type distinction', () => {
+  test('test agent maps to agent not workflow', () => {
+    const result = mapSpanType({ type: 'agent' });
+    expect(result).toBe('agent');
+    expect(result).not.toBe('workflow');
+  });
+
+  test('test handoff still maps to workflow', () => {
+    expect(mapSpanType({ type: 'handoff' })).toBe('workflow');
+  });
+
+  test('test custom still maps to workflow', () => {
+    expect(mapSpanType({ type: 'custom' })).toBe('workflow');
+  });
+
+  test('test galileo_custom sentinel is unaffected', () => {
+    expect(mapSpanType({ type: 'custom', __galileoCustom: true })).toBe(
+      GALILEO_CUSTOM_TYPE
+    );
+  });
+
+  test('test mapSpanType returns NodeType or GALILEO_CUSTOM_TYPE for all known types', () => {
+    const knownTypes: Array<{
+      type: string;
+      expected: NodeType | typeof GALILEO_CUSTOM_TYPE;
+    }> = [
+      { type: 'generation', expected: 'llm' },
+      { type: 'response', expected: 'llm' },
+      { type: 'function', expected: 'tool' },
+      { type: 'guardrail', expected: 'tool' },
+      { type: 'transcription', expected: 'tool' },
+      { type: 'speech', expected: 'tool' },
+      { type: 'speech_group', expected: 'tool' },
+      { type: 'mcp_tools', expected: 'tool' },
+      { type: 'agent', expected: 'agent' },
+      { type: 'handoff', expected: 'workflow' },
+      { type: 'custom', expected: 'workflow' }
+    ];
+    for (const { type, expected } of knownTypes) {
+      expect(mapSpanType({ type })).toBe(expected);
+    }
+  });
+
+  test('test mapSpanName returns Agent for agent type without name', () => {
+    expect(mapSpanName({ type: 'agent' }, 'agent')).toBe('Agent');
+  });
+
+  test('test mapSpanName returns spanData.name for agent type with name', () => {
+    expect(mapSpanName({ type: 'agent', name: 'RouterAgent' }, 'agent')).toBe(
+      'RouterAgent'
+    );
+  });
+});
diff --git a/tests/handlers/openai-agents/tracing-processor.test.ts b/tests/handlers/openai-agents/tracing-processor.test.ts
new file mode 100644
index 00000000..6e718531
--- /dev/null
+++ b/tests/handlers/openai-agents/tracing-processor.test.ts
@@ -0,0 +1,1702 @@
+import { GalileoTracingProcessor } from '../../../src/handlers/openai-agents';
+import type {
+  AgentTrace,
+  AgentSpan
+} from '../../../src/handlers/openai-agents';
+
+// Helper to build a mock AgentTrace
+function makeTrace(overrides: Partial<AgentTrace> = {}): AgentTrace {
+  return {
+    traceId: 'trace-001',
+    name: 'Test Agent Run',
+    metadata: {},
+    startedAt: new Date('2024-01-01T00:00:00Z').toISOString(),
+    endedAt: new Date('2024-01-01T00:00:10Z').toISOString(),
+    ...overrides
+  };
+}
+
+// Helper to build a mock AgentSpan
+function makeSpan(
+  overrides: Partial<AgentSpan> & { spanData: AgentSpan['spanData'] }
+): AgentSpan {
+  return {
+    spanId: 'span-001',
+    traceId: 'trace-001',
+    parentId: 'trace-001',
+    startedAt: new Date('2024-01-01T00:00:01Z').toISOString(),
+    endedAt: new Date('2024-01-01T00:00:05Z').toISOString(),
+    error: null,
+    ...overrides
+  };
+}
+
+// Create a mock GalileoLogger for testing
+function createMockLogger() {
+  return {
+    startTrace: jest.fn().mockReturnValue({}),
+    addLlmSpan: jest.fn().mockReturnValue({}),
+    addToolSpan: jest.fn().mockReturnValue({}),
+    addWorkflowSpan: jest.fn().mockReturnValue({}),
+    addAgentSpan: jest.fn().mockReturnValue({}),
+    conclude: jest.fn().mockReturnValue(undefined),
+    flush: jest.fn().mockResolvedValue(undefined)
+  };
+}
+
+describe('GalileoTracingProcessor lifecycle', () => {
+  test('test onTraceStart creates root node', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    await processor.onTraceStart(trace);
+    // No external observable yet — verify no calls to logger
+    expect(mockLogger.startTrace).not.toHaveBeenCalled();
+  });
+
+  test('test full trace lifecycle calls startTrace', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    await processor.onTraceStart(trace);
+    await processor.onTraceEnd(trace);
+
+    expect(mockLogger.startTrace).toHaveBeenCalledTimes(1);
+    const startTraceCall = mockLogger.startTrace.mock.calls[0][0];
+    expect(startTraceCall.name).toBe('Test Agent Run');
+  });
+
+  test('test full trace with llm span calls addLlmSpan', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+    const span = makeSpan({
+      spanId: 'span-gen-001',
+      parentId: 'trace-001',
+      spanData: {
+        type: 'generation',
+        model: 'gpt-4o',
+        input: [{ role: 'user', content: 'hello' }],
+        output: [{ role: 'assistant', content: 'hi' }],
+        usage: { input_tokens: 5, output_tokens: 3 }
+      }
+    });
+
+    await processor.onTraceStart(trace);
+    await processor.onSpanStart(span);
+    await processor.onSpanEnd(span);
+    await processor.onTraceEnd(trace);
+
+    expect(mockLogger.addLlmSpan).toHaveBeenCalledTimes(1);
+    const llmCall = mockLogger.addLlmSpan.mock.calls[0][0];
+    expect(llmCall.model).toBe('gpt-4o');
+    expect(llmCall.numInputTokens).toBe(5);
+    expect(llmCall.numOutputTokens).toBe(3);
+  });
+
+  test('test full trace with tool span calls addToolSpan', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+    const span = makeSpan({
+      spanId: 'span-func-001',
+      parentId: 'trace-001',
+      spanData: {
+        type: 'function',
+        name: 'search_tool',
+        input: '{"query":"hello"}',
+        output: 'results'
+      }
+    });
+
+    await processor.onTraceStart(trace);
+    await processor.onSpanStart(span);
+    await processor.onSpanEnd(span);
+    await processor.onTraceEnd(trace);
+
+    expect(mockLogger.addToolSpan).toHaveBeenCalledTimes(1);
+    const toolCall = mockLogger.addToolSpan.mock.calls[0][0];
+    expect(toolCall.name).toBe('search_tool');
+  });
+
+  test('test full trace with agent span calls addWorkflowSpan and conclude', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+    const span = makeSpan({
+      spanId: 'span-agent-001',
+      parentId: 'trace-001',
+      spanData: {
+        type: 'agent',
+        name: 'PlannerAgent'
+      }
+    });
+
+    await processor.onTraceStart(trace);
+    await processor.onSpanStart(span);
+    await processor.onSpanEnd(span);
+    await processor.onTraceEnd(trace);
+
+    expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1);
+    const agentCall = mockLogger.addWorkflowSpan.mock.calls[0][0];
+    expect(agentCall.name).toBe('PlannerAgent');
+    // conclude is called for agent spans
+    expect(mockLogger.conclude).toHaveBeenCalled();
+  });
+
+  test('test error span sets status 500 in metadata', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+    const span = makeSpan({
+      spanId: 'span-err-001',
+      parentId: 'trace-001',
+      error: { message: 'Something went wrong', data: { code: 'ERR_001' } },
+      spanData: { type: 'function', name: 'failing_tool', input: 'x' }
+    });
+
+    await processor.onTraceStart(trace);
+    await processor.onSpanStart(span);
+    await processor.onSpanEnd(span);
+    await processor.onTraceEnd(trace);
+
+    expect(mockLogger.addToolSpan).toHaveBeenCalledTimes(1);
+    const toolCall = mockLogger.addToolSpan.mock.calls[0][0];
+    expect(toolCall.statusCode).toBe(500);
+    expect(toolCall.metadata.error_message).toBe('Something went wrong');
+    expect(toolCall.metadata.error_type).toBe('SpanError');
+  });
+
+  test('test flushOnTraceEnd true calls flush', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, true);
+    const trace = makeTrace();
+
+    await processor.onTraceStart(trace);
+    await processor.onTraceEnd(trace);
+
+    expect(mockLogger.flush).toHaveBeenCalledTimes(1);
+  });
+
+  test('test flushOnTraceEnd false does not call flush', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    await processor.onTraceStart(trace);
+    await processor.onTraceEnd(trace);
+
+    expect(mockLogger.flush).not.toHaveBeenCalled();
+  });
+
+  test('test shutdown calls flush', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+
+    await processor.shutdown();
+
+    expect(mockLogger.flush).toHaveBeenCalledTimes(1);
+  });
+
+  test('test forceFlush calls flush', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+
+    await processor.forceFlush();
+
+    expect(mockLogger.flush).toHaveBeenCalledTimes(1);
+  });
+
+  test('test nested agent span is logged as child', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    const agentSpan = makeSpan({
+      spanId: 'span-agent-outer',
+      parentId: 'trace-001',
+      spanData: { type: 'agent', name: 'OuterAgent' }
+    });
+
+    const llmSpan = makeSpan({
+      spanId: 'span-llm-inner',
+      parentId: 'span-agent-outer',
+      spanData: {
+        type: 'generation',
+        model: 'gpt-4o',
+        usage: { input_tokens: 2, output_tokens: 1 }
+      }
+    });
+
+    await processor.onTraceStart(trace);
+    await processor.onSpanStart(agentSpan);
+    await processor.onSpanStart(llmSpan);
+    await processor.onSpanEnd(llmSpan);
+    await processor.onSpanEnd(agentSpan);
+    await processor.onTraceEnd(trace);
+
+    expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1);
+    expect(mockLogger.addLlmSpan).toHaveBeenCalledTimes(1);
+    // conclude called for agent span
+    expect(mockLogger.conclude).toHaveBeenCalled();
+  });
+
+  test('test response span extracts embedded tool calls', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+    const span = makeSpan({
+      spanId: 'span-resp-001',
+      parentId: 'trace-001',
+      spanData: {
+        type: 'response',
+        _input: 'test input',
+        _response: {
+          model: 'gpt-4o',
+          usage: { input_tokens: 10, output_tokens: 5 },
+          output: [
+            {
+              type: 'web_search_call',
+              id: 'ws_001',
+              action: { query: 'latest news' }
+            }
+          ]
+        }
+      }
+    });
+
+    await processor.onTraceStart(trace);
+    await processor.onSpanStart(span);
+    await processor.onSpanEnd(span);
+    await processor.onTraceEnd(trace);
+
+    expect(mockLogger.addLlmSpan).toHaveBeenCalledTimes(1);
+    const llmCall = mockLogger.addLlmSpan.mock.calls[0][0];
+    expect(Array.isArray(llmCall.tools)).toBe(true);
+    expect(llmCall.tools.length).toBe(1);
+    expect(llmCall.tools[0].type).toBe('function');
+  });
+
+  test('test metadata values are stringified', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace({
+      metadata: { run_id: 'abc123', count: 5 as unknown as string }
+    });
+
+    await processor.onTraceStart(trace);
+    await processor.onTraceEnd(trace);
+
+    expect(mockLogger.startTrace).toHaveBeenCalledTimes(1);
+    const startCall = mockLogger.startTrace.mock.calls[0][0];
+    // metadata values should all be strings
+    if (startCall.metadata) {
+      for (const v of Object.values(startCall.metadata)) {
+        expect(typeof v).toBe('string');
+      }
+    }
+  });
+
+  test('test addGalileoCustomSpan invokes callback and returns its value', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    void processor;
+
+    const galileoSpan = { type: 'tool' as const, input: 'query' };
+    const result = await GalileoTracingProcessor.addGalileoCustomSpan(
+      galileoSpan,
+      async () => 'callback-result',
+      { name: 'My Custom Span' }
+    );
+
+    expect(result).toBe('callback-result');
+  });
+
+  test('test addGalileoCustomSpan fallback calls callback when SDK unavailable', async () => {
+    const callbackFn = jest.fn().mockResolvedValue('fallback-result');
+    const galileoSpan = { type: 'tool' as const, input: 'query' };
+
+    // The SDK is not installed in the test environment; the fallback path runs.
+    const result = await GalileoTracingProcessor.addGalileoCustomSpan(
+      galileoSpan,
+      callbackFn,
+      { name: 'Fallback Span' }
+    );
+
+    expect(callbackFn).toHaveBeenCalledTimes(1);
+    expect(result).toBe('fallback-result');
+  });
+});
+
+describe('Span tree construction edge cases', () => {
+  test('test multiple children linked to single parent', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    await processor.onTraceStart(trace);
+
+    // Create parent span
+    const parentSpan = makeSpan({
+      spanId: 'parent-001',
+      parentId: 'trace-001',
+      spanData: { type: 'agent', name: 'Parent Agent' }
+    });
+    await processor.onSpanStart(parentSpan);
+
+    // Create multiple child spans
+    const child1 = makeSpan({
+      spanId: 'child-001',
+      parentId: 'parent-001',
+      spanData: {
+        type: 'generation',
+        model: 'gpt-4',
+        input: [],
+        output: 'result 1'
+      }
+    });
+    const child2 = makeSpan({
+      spanId: 'child-002',
+      parentId: 'parent-001',
+      spanData: {
+        type: 'function',
+        name: 'search',
+        input: 'query',
+        output: 'result 2'
+      }
+    });
+
+    await processor.onSpanStart(child1);
+    await processor.onSpanStart(child2);
+    await processor.onSpanEnd(child1);
+    await processor.onSpanEnd(child2);
+    await processor.onSpanEnd(parentSpan);
+    await processor.onTraceEnd(trace);
+
+    // Verify both children were logged
+    expect(mockLogger.addLlmSpan).toHaveBeenCalledTimes(1);
+    expect(mockLogger.addToolSpan).toHaveBeenCalledTimes(1);
+    // conclude is called for all non-root workflow/agent spans
+    expect(mockLogger.conclude).toHaveBeenCalled();
+  });
+
+  test('test deeply nested spans (3 levels)', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    await processor.onTraceStart(trace);
+
+    // Level 1: Agent
+    const agent = makeSpan({
+      spanId: 'agent-001',
+      parentId: 'trace-001',
+      spanData: { type: 'agent' }
+    });
+
+    // Level 2: LLM under agent
+    const llm = makeSpan({
+      spanId: 'llm-001',
+      parentId: 'agent-001',
+      spanData: { type: 'generation', model: 'gpt-4' }
+    });
+
+    // Level 3: Tool under LLM
+    const tool = makeSpan({
+      spanId: 'tool-001',
+      parentId: 'llm-001',
+      spanData: { type: 'function', name: 'calc' }
+    });
+
+    await processor.onSpanStart(agent);
+    await processor.onSpanStart(llm);
+    await processor.onSpanStart(tool);
+    await processor.onSpanEnd(tool);
+    await processor.onSpanEnd(llm);
+    await processor.onSpanEnd(agent);
+    await processor.onTraceEnd(trace);
+
+    // All should be logged
+    expect(mockLogger.startTrace).toHaveBeenCalledTimes(1);
+    expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1); // agent (uses addWorkflowSpan)
+    expect(mockLogger.addLlmSpan).toHaveBeenCalledTimes(1);
+    expect(mockLogger.addToolSpan).toHaveBeenCalledTimes(1);
+  });
+
+  test('test span with no parentId defaults to trace', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    await processor.onTraceStart(trace);
+
+    // Span with parentId undefined (should default to traceId)
+    const span = makeSpan({
+      spanId: 'span-001',
+      parentId: undefined,
+      spanData: { type: 'function', name: 'tool' }
+    });
+
+    await processor.onSpanStart(span);
+    await processor.onSpanEnd(span);
+    await processor.onTraceEnd(trace);
+
+    expect(mockLogger.addToolSpan).toHaveBeenCalledTimes(1);
+  });
+
+  test('test span parent link defaults to trace when parent not found', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    await processor.onTraceStart(trace);
+
+    // Create a parent agent first
+    const parentAgent = makeSpan({
+      spanId: 'parent-001',
+      parentId: 'trace-001',
+      spanData: { type: 'agent' }
+    });
+    await processor.onSpanStart(parentAgent);
+
+    // Create a span with explicit parentId pointing to parent
+    const span = makeSpan({
+      spanId: 'child-001',
+      parentId: 'parent-001',
+      spanData: {
+        type: 'function',
+        name: 'tool',
+        input: 'test',
+        output: 'result'
+      }
+    });
+
+    // Should not throw
+    await processor.onSpanStart(span);
+    await processor.onSpanEnd(span);
+    await processor.onSpanEnd(parentAgent);
+    await processor.onTraceEnd(trace);
+
+    // Span is logged correctly
+    expect(mockLogger.addToolSpan).toHaveBeenCalledTimes(1);
+  });
+});
+
+describe('Response span data merging', () => {
+  test('test response span merges embedded tools at end', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    const span = makeSpan({
+      spanId: 'response-001',
+      parentId: 'trace-001',
+      spanData: {
+        type: 'response',
+        _input: [{ role: 'user' }],
+        _response: {
+          model: 'gpt-4o',
+          output: [
+            {
+              type: 'code_interpreter_call',
+              code: 'print("hello")',
+              outputs: [{ logs: 'hello' }],
+              id: 'call-1',
+              status: 'completed'
+            }
+          ]
+        }
+      }
+    });
+
+    await processor.onTraceStart(trace);
+    await processor.onSpanStart(span);
+    await processor.onSpanEnd(span);
+    await processor.onTraceEnd(trace);
+
+    // addLlmSpan should be called for response type
+    expect(mockLogger.addLlmSpan).toHaveBeenCalledTimes(1);
+    const llmCall = mockLogger.addLlmSpan.mock.calls[0][0];
+    expect(Array.isArray(llmCall.tools)).toBe(true);
+    expect(llmCall.tools[0].type).toBe('function');
+  });
+
+  test('test _responseObject removed from final params', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    const span = makeSpan({
+      spanId: 'response-001',
+      parentId: 'trace-001',
+      spanData: {
+        type: 'response',
+        _response: { output: [] }
+      }
+    });
+
+    await processor.onTraceStart(trace);
+    await processor.onSpanStart(span);
+    await processor.onSpanEnd(span);
+    await processor.onTraceEnd(trace);
+
+    const llmCall = mockLogger.addLlmSpan.mock.calls[0][0];
+    // _responseObject should not be in the final logged data
+    expect(llmCall._responseObject).toBeUndefined();
+  });
+
+  test('test generation span updates usage on end', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    const span = makeSpan({
+      spanId: 'gen-001',
+      parentId: 'trace-001',
+      spanData: {
+        type: 'generation',
+        model: 'gpt-4',
+        input: [],
+        output: [],
+        usage: { input_tokens: 10, output_tokens: 5 }
+      }
+    });
+
+    await processor.onTraceStart(trace);
+    await processor.onSpanStart(span);
+    await processor.onSpanEnd(span);
+    await processor.onTraceEnd(trace);
+
+    const llmCall = mockLogger.addLlmSpan.mock.calls[0][0];
+    expect(llmCall.numInputTokens).toBe(10);
+    expect(llmCall.numOutputTokens).toBe(5);
+  });
+
+  test('test response span with response-level error sets statusCode and error_details in metadata', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    const responseError = { status_code: 500, message: 'Server error' };
+    const span = makeSpan({
+      spanId: 'response-err-001',
+      parentId: 'trace-001',
+      spanData: {
+        type: 'response',
+        _input: 'hello',
+        _response: {
+          model: 'gpt-4o',
+          usage: { input_tokens: 5, output_tokens: 0 },
+          output: [],
+          error: responseError
+        }
+      }
+    });
+
+    await processor.onTraceStart(trace);
+    await processor.onSpanStart(span);
+    await processor.onSpanEnd(span);
+    await processor.onTraceEnd(trace);
+
+    expect(mockLogger.addLlmSpan).toHaveBeenCalledTimes(1);
+    const llmCall = mockLogger.addLlmSpan.mock.calls[0][0];
+    expect(llmCall.statusCode).toBe(500);
+    const meta = llmCall.metadata as Record<string, unknown>;
+    expect(meta.error_details).toBe(JSON.stringify(responseError));
+  });
+
+  test('test response span with no _responseObject handles gracefully', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    const span = makeSpan({
+      spanId: 'response-001',
+      parentId: 'trace-001',
+      spanData: { type: 'response' }
+    });
+
+    await processor.onTraceStart(trace);
+    await processor.onSpanStart(span);
+    // Should not throw
+    await processor.onSpanEnd(span);
+    await processor.onTraceEnd(trace);
+
+    expect(mockLogger.addLlmSpan).toHaveBeenCalledTimes(1);
+  });
+});
+
+describe('Error handling and recovery', () => {
+  test('test span error with message only', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    const span = makeSpan({
+      spanId: 'span-001',
+      parentId: 'trace-001',
+      error: { message: 'Test error' },
+      spanData: { type: 'function', name: 'tool' }
+    });
+
+    await processor.onTraceStart(trace);
+    await processor.onSpanStart(span);
+    await processor.onSpanEnd(span);
+    await processor.onTraceEnd(trace);
+
+    const toolCall = mockLogger.addToolSpan.mock.calls[0][0];
+    expect(toolCall.statusCode).toBe(500);
+    const meta = toolCall.metadata as Record<string, string>;
+    expect(meta.error_message).toBe('Test error');
+    expect(meta.error_type).toBe('SpanError');
+  });
+
+  test('test span error with type field uses error.type value', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    const span = makeSpan({
+      spanId: 'span-001',
+      parentId: 'trace-001',
+      error: { message: 'Agent failed', type: 'AgentError' },
+      spanData: { type: 'function', name: 'tool' }
+    });
+
+    await processor.onTraceStart(trace);
+    await processor.onSpanStart(span);
+    await processor.onSpanEnd(span);
+    await processor.onTraceEnd(trace);
+
+    const toolCall = mockLogger.addToolSpan.mock.calls[0][0];
+    const meta = toolCall.metadata as Record<string, string>;
+    expect(meta.error_type).toBe('AgentError');
+  });
+
+  test('test span error without type field falls back to SpanError', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    const span = makeSpan({
+      spanId: 'span-001',
+      parentId: 'trace-001',
+      error: { message: 'Something broke' },
+      spanData: { type: 'function', name: 'tool' }
+    });
+
+    await processor.onTraceStart(trace);
+    await processor.onSpanStart(span);
+    await processor.onSpanEnd(span);
+    await processor.onTraceEnd(trace);
+
+    const toolCall = mockLogger.addToolSpan.mock.calls[0][0];
+    const meta = toolCall.metadata as Record<string, string>;
+    expect(meta.error_type).toBe('SpanError');
+  });
+
+  test('test span error with message and data', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    const errorData = { code: 'TOOL_ERROR', details: 'Connection failed' };
+    const span = makeSpan({
+      spanId: 'span-001',
+      parentId: 'trace-001',
+      error: { message: 'Tool failed', data: errorData },
+      spanData: {
+        type: 'function',
+        name: 'failing_tool',
+        input: '',
+        output: undefined
+      }
+    });
+
+    await processor.onTraceStart(trace);
+    await processor.onSpanStart(span);
+    await processor.onSpanEnd(span);
+    await processor.onTraceEnd(trace);
+
+    expect(mockLogger.addToolSpan).toHaveBeenCalledTimes(1);
+    const toolCall = mockLogger.addToolSpan.mock.calls[0][0];
+    const meta = toolCall.metadata as Record<string, string>;
+    expect(meta.error_details).toBe(JSON.stringify(errorData));
+  });
+
+  test('test onSpanEnd without onSpanStart handled gracefully', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    const span = makeSpan({
+      spanId: 'orphan-span',
+      parentId: 'trace-001',
+      spanData: { type: 'tool' }
+    });
+
+    await processor.onTraceStart(trace);
+    // Skip onSpanStart
+    // Should not throw
+    await processor.onSpanEnd(span);
+    await processor.onTraceEnd(trace);
+
+    expect(mockLogger.addToolSpan).not.toHaveBeenCalled();
+  });
+
+  test('test error metadata merged with existing metadata', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    const span = makeSpan({
+      spanId: 'span-001',
+      parentId: 'trace-001',
+      spanData: {
+        type: 'agent',
+        data: { user_id: '123' } // Will go to metadata
+      },
+      error: { message: 'Error occurred' }
+    });
+
+    await processor.onTraceStart(trace);
+    await processor.onSpanStart(span);
+    await processor.onSpanEnd(span);
+    await processor.onTraceEnd(trace);
+
+    const agentCall = mockLogger.addWorkflowSpan.mock.calls[0][0];
+    const meta = agentCall.metadata as Record<string, string>;
+    expect(meta.error_message).toBe('Error occurred');
+  });
+
+  test('test workflow span with error uses serialized error as conclude output', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    const agentSpan = makeSpan({
+      spanId: 'span-agent',
+      parentId: 'trace-001',
+      spanData: { type: 'agent', name: 'MyAgent' },
+      error: { message: 'Agent failed', type: 'AgentError', data: { code: 42 } }
+    });
+
+    await processor.onTraceStart(trace);
+    await processor.onSpanStart(agentSpan);
+    await processor.onSpanEnd(agentSpan);
+    await processor.onTraceEnd(trace);
+
+    const concludeCall = mockLogger.conclude.mock.calls[0][0];
+    expect(concludeCall.output).toBe(
+      JSON.stringify({
+        message: 'Agent failed',
+        type: 'AgentError',
+        data: { code: 42 }
+      })
+    );
+    expect(concludeCall.statusCode).toBe(500);
+  });
+
+  test('test error on non-existent span ignored gracefully', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    const span = makeSpan({
+      spanId: 'never-started-span',
+      parentId: 'trace-001',
+      error: { message: 'This should be ignored' },
+      spanData: { type: 'tool' }
+    });
+
+    await processor.onTraceStart(trace);
+    // Skip onSpanStart - span doesn't exist in processor
+    // Should not throw
+    await processor.onSpanEnd(span);
+    await processor.onTraceEnd(trace);
+
+    expect(mockLogger.addToolSpan).not.toHaveBeenCalled();
+  });
+});
+
+describe('Date and duration handling', () => {
+  test('test valid startedAt and endedAt calculate durationNs', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+
+    const startTime = new Date('2024-01-01T00:00:00Z');
+    const endTime = new Date('2024-01-01T00:00:05Z');
+
+    const trace = makeTrace({
+      startedAt: startTime.toISOString(),
+      endedAt: endTime.toISOString()
+    });
+
+    await processor.onTraceStart(trace);
+    await processor.onTraceEnd(trace);
+
+    const startTraceCall = mockLogger.startTrace.mock.calls[0][0];
+    // 5 seconds = 5,000,000,000 nanoseconds
+    expect(startTraceCall.durationNs).toBeGreaterThan(0);
+    expect(startTraceCall.durationNs).toBeCloseTo(5_000_000_000, -4);
+  });
+
+  test('test missing startedAt sets durationNs to 0', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace({
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      startedAt: undefined as any, // missing
+      endedAt: new Date().toISOString()
+    });
+
+    await processor.onTraceStart(trace);
+    await processor.onTraceEnd(trace);
+
+    const startTraceCall = mockLogger.startTrace.mock.calls[0][0];
+    expect(startTraceCall.durationNs).toBe(0);
+  });
+
+  test('test missing endedAt uses current time', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const startTime = new Date('2024-01-01T00:00:00Z');
+
+    const trace = makeTrace({
+      startedAt: startTime.toISOString(),
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      endedAt: undefined as any // missing
+    });
+
+    await processor.onTraceStart(trace);
+    await processor.onTraceEnd(trace);
+
+    const startTraceCall = mockLogger.startTrace.mock.calls[0][0];
+    // Should calculate using current time, so durationNs >= 0
+    expect(startTraceCall.durationNs).toBeGreaterThanOrEqual(0);
+  });
+});
+
+describe('Metadata handling and serialization', () => {
+  test('test non-string metadata values stringified at trace start', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+
+    const trace = makeTrace({
+      metadata: {
+        user_id: '123',
+        request_count: 5,
+        flags: true,
+        config: { nested: 'value' }
+      }
+    });
+
+    await processor.onTraceStart(trace);
+    await processor.onTraceEnd(trace);
+
+    const startTraceCall = mockLogger.startTrace.mock.calls[0][0];
+    const meta = startTraceCall.metadata as Record<string, string>;
+    expect(meta.request_count).toBe('5');
+    expect(meta.flags).toBe('true');
+    expect(JSON.parse(meta.config)).toEqual({ nested: 'value' });
+  });
+
+  test('test unicode characters preserved in metadata', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+
+    const trace = makeTrace({
+      metadata: { message: 'Hello 世界 🌍' }
+    });
+
+    await processor.onTraceStart(trace);
+    await processor.onTraceEnd(trace);
+
+    const startTraceCall = mockLogger.startTrace.mock.calls[0][0];
+    const meta = startTraceCall.metadata as Record<string, string>;
+    expect(meta.message).toBe('Hello 世界 🌍');
+  });
+
+  test('test error overwrites specific metadata keys', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    const span = makeSpan({
+      spanId: 'span-001',
+      parentId: 'trace-001',
+      spanData: {
+        type: 'function',
+        name: 'tool',
+        input: '',
+        output: undefined
+      },
+      error: { message: 'Tool error', data: { code: 'ECONNREFUSED' } }
+    });
+
+    await processor.onTraceStart(trace);
+    await processor.onSpanStart(span);
+    await processor.onSpanEnd(span);
+    await processor.onTraceEnd(trace);
+
+    expect(mockLogger.addToolSpan).toHaveBeenCalledTimes(1);
+    const toolCall = mockLogger.addToolSpan.mock.calls[0][0];
+    const meta = toolCall.metadata as Record<string, string>;
+    expect(meta.error_message).toBe('Tool error');
+    expect(meta.error_type).toBe('SpanError');
+  });
+});
+
+describe('Agent span emission', () => {
+  test('test agent span uses addWorkflowSpan not addAgentSpan', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    const span = makeSpan({
+      spanId: 'agent-span-001',
+      parentId: 'trace-001',
+      spanData: { type: 'agent', name: 'TestAgent' }
+    });
+
+    await processor.onTraceStart(trace);
+    await processor.onSpanStart(span);
+    await processor.onSpanEnd(span);
+    await processor.onTraceEnd(trace);
+
+    expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1);
+    expect(mockLogger.addAgentSpan).not.toHaveBeenCalled();
+  });
+
+  test('test agent span passes name correctly', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    const span = makeSpan({
+      spanId: 'agent-span-001',
+      parentId: 'trace-001',
+      spanData: { type: 'agent', name: 'RouterAgent', output: 'routed' }
+    });
+
+    await processor.onTraceStart(trace);
+    await processor.onSpanStart(span);
+    await processor.onSpanEnd(span);
+    await processor.onTraceEnd(trace);
+
+    const agentCall = mockLogger.addWorkflowSpan.mock.calls[0][0];
+    expect(agentCall.name).toBe('RouterAgent');
+  });
+
+  test('test agent span conclude is called after children', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    const agentSpan = makeSpan({
+      spanId: 'agent-001',
+      parentId: 'trace-001',
+      spanData: { type: 'agent' }
+    });
+
+    const toolSpan = makeSpan({
+      spanId: 'tool-001',
+      parentId: 'agent-001',
+      spanData: { type: 'function', name: 'my_tool' }
+    });
+
+    await processor.onTraceStart(trace);
+    await processor.onSpanStart(agentSpan);
+    await processor.onSpanStart(toolSpan);
+    await processor.onSpanEnd(toolSpan);
+    await processor.onSpanEnd(agentSpan);
+    await processor.onTraceEnd(trace);
+
+    expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1);
+    expect(mockLogger.addToolSpan).toHaveBeenCalledTimes(1);
+    expect(mockLogger.conclude).toHaveBeenCalled();
+  });
+
+  test('test agent span conclude receives last child output as fallback', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    const agentSpan = makeSpan({
+      spanId: 'agent-001',
+      parentId: 'trace-001',
+      spanData: { type: 'agent', name: 'MyAgent' }
+    });
+
+    const llmSpan = makeSpan({
+      spanId: 'llm-001',
+      parentId: 'agent-001',
+      spanData: {
+        type: 'generation',
+        model: 'gpt-4o',
+        output: 'Final answer from LLM'
+      }
+    });
+
+    await processor.onTraceStart(trace);
+    await processor.onSpanStart(agentSpan);
+    await processor.onSpanStart(llmSpan);
+    await processor.onSpanEnd(llmSpan);
+    await processor.onSpanEnd(agentSpan);
+    await processor.onTraceEnd(trace);
+
+    // addWorkflowSpan is called before children — output is undefined at that point
+    const agentCall = mockLogger.addWorkflowSpan.mock.calls[0][0];
+    expect(agentCall.output).toBeUndefined();
+
+    // conclude for the agent span (first conclude call) should carry the LLM child's output
+    const concludeCall = mockLogger.conclude.mock.calls[0][0];
+    expect(concludeCall.output).toBe('Final answer from LLM');
+  });
+
+  test('test agent span conclude receives last of multiple children outputs', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    const agentSpan = makeSpan({
+      spanId: 'agent-001',
+      parentId: 'trace-001',
+      spanData: { type: 'agent', name: 'MyAgent' }
+    });
+
+    const toolSpan = makeSpan({
+      spanId: 'tool-001',
+      parentId: 'agent-001',
+      spanData: { type: 'function', name: 'my_tool', output: 'Tool result' }
+    });
+
+    const llmSpan = makeSpan({
+      spanId: 'llm-001',
+      parentId: 'agent-001',
+      spanData: {
+        type: 'generation',
+        model: 'gpt-4o',
+        output: 'LLM final response'
+      }
+    });
+
+    await processor.onTraceStart(trace);
+    await processor.onSpanStart(agentSpan);
+    await processor.onSpanStart(toolSpan);
+    await processor.onSpanEnd(toolSpan);
+    await processor.onSpanStart(llmSpan);
+    await processor.onSpanEnd(llmSpan);
+    await processor.onSpanEnd(agentSpan);
+    await processor.onTraceEnd(trace);
+
+    // The conclude for the agent span should use the last child (LLM), not the tool
+    const concludeCall = mockLogger.conclude.mock.calls[0][0];
+    expect(concludeCall.output).toBe('LLM final response');
+  });
+
+  test('test agent span conclude uses undefined when no children have output', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    const agentSpan = makeSpan({
+      spanId: 'agent-001',
+      parentId: 'trace-001',
+      spanData: { type: 'agent', name: 'EmptyAgent' }
+    });
+
+    await processor.onTraceStart(trace);
+    await processor.onSpanStart(agentSpan);
+    await processor.onSpanEnd(agentSpan);
+    await processor.onTraceEnd(trace);
+
+    const concludeCall = mockLogger.conclude.mock.calls[0][0];
+    expect(concludeCall.output).toBeUndefined();
+  });
+
+  test('test agent span error passes statusCode 500 as direct field', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    const span = makeSpan({
+      spanId: 'agent-err-001',
+      parentId: 'trace-001',
+      error: { message: 'Agent failed' },
+      spanData: { type: 'agent', name: 'FailingAgent' }
+    });
+
+    await processor.onTraceStart(trace);
+    await processor.onSpanStart(span);
+    await processor.onSpanEnd(span);
+    await processor.onTraceEnd(trace);
+
+    const agentCall = mockLogger.addWorkflowSpan.mock.calls[0][0];
+    // statusCode is passed as a direct field, not folded into metadata
+    expect(agentCall.statusCode).toBe(500);
+    const meta = agentCall.metadata as Record<string, string>;
+    expect(meta.error_message).toBe('Agent failed');
+    expect(meta.status_code).toBeUndefined();
+  });
+
+  test('test agent span without error passes statusCode 200', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    const span = makeSpan({
+      spanId: 'agent-ok-001',
+      parentId: 'trace-001',
+      spanData: { type: 'agent', name: 'HappyAgent' }
+    });
+
+    await processor.onTraceStart(trace);
+    await processor.onSpanStart(span);
+    await processor.onSpanEnd(span);
+    await processor.onTraceEnd(trace);
+
+    const agentCall = mockLogger.addWorkflowSpan.mock.calls[0][0];
+    expect(agentCall.statusCode).toBe(200);
+    const meta = agentCall.metadata as Record<string, string>;
+    expect(meta.status_code).toBeUndefined();
+  });
+});
+
+describe('Span hierarchy correctness', () => {
+  test('test trace with agent child maintains correct parent-child order', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    const agentSpan = makeSpan({
+      spanId: 'agent-001',
+      parentId: 'trace-001',
+      spanData: { type: 'agent', name: 'RootAgent' }
+    });
+
+    const llmSpan = makeSpan({
+      spanId: 'llm-001',
+      parentId: 'agent-001',
+      spanData: { type: 'generation', model: 'gpt-4o' }
+    });
+
+    await processor.onTraceStart(trace);
+    await processor.onSpanStart(agentSpan);
+    await processor.onSpanStart(llmSpan);
+    await processor.onSpanEnd(llmSpan);
+    await processor.onSpanEnd(agentSpan);
+    await processor.onTraceEnd(trace);
+
+    // startTrace is called first, then addWorkflowSpan (agent), then addLlmSpan, then conclude
+    const callOrder = mockLogger.startTrace.mock.invocationCallOrder[0];
+    const agentOrder = mockLogger.addWorkflowSpan.mock.invocationCallOrder[0];
+    const llmOrder = mockLogger.addLlmSpan.mock.invocationCallOrder[0];
+    const concludeOrder = mockLogger.conclude.mock.invocationCallOrder[0];
+
+    expect(callOrder).toBeLessThan(agentOrder);
+    expect(agentOrder).toBeLessThan(llmOrder);
+    expect(llmOrder).toBeLessThan(concludeOrder);
+  });
+
+  test('test workflow span type still uses addWorkflowSpan', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    const handoffSpan = makeSpan({
+      spanId: 'handoff-001',
+      parentId: 'trace-001',
+      spanData: { type: 'handoff', from_agent: 'A', to_agent: 'B' }
+    });
+
+    await processor.onTraceStart(trace);
+    await processor.onSpanStart(handoffSpan);
+    await processor.onSpanEnd(handoffSpan);
+    await processor.onTraceEnd(trace);
+
+    expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1);
+    expect(mockLogger.addAgentSpan).not.toHaveBeenCalled();
+  });
+
+  test('test agent and workflow spans both call conclude', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    const agentSpan = makeSpan({
+      spanId: 'agent-001',
+      parentId: 'trace-001',
+      spanData: { type: 'agent' }
+    });
+
+    const handoffSpan = makeSpan({
+      spanId: 'handoff-001',
+      parentId: 'agent-001',
+      spanData: { type: 'handoff' }
+    });
+
+    await processor.onTraceStart(trace);
+    await processor.onSpanStart(agentSpan);
+    await processor.onSpanStart(handoffSpan);
+    await processor.onSpanEnd(handoffSpan);
+    await processor.onSpanEnd(agentSpan);
+    await processor.onTraceEnd(trace);
+
+    // conclude is called 3 times: once for handoff (workflow), once for agent, once for concludeAll in onTraceEnd
+    expect(mockLogger.conclude).toHaveBeenCalledTimes(3);
+  });
+
+  test('test handoff span refreshes to_agent at onSpanEnd (late binding)', async () => {
+    // In the OpenAI Agents SDK, to_agent is set on handoffSpan.spanData AFTER span.start() fires
+    // (inside withHandoffSpan's fn callback). So onSpanStart sees to_agent = undefined.
+    // onSpanEnd must re-extract to capture the final populated to_agent value.
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    const agentSpan = makeSpan({
+      spanId: 'agent-001',
+      parentId: 'trace-001',
+      spanData: { type: 'agent', name: 'TriageAgent' }
+    });
+
+    // Simulate SDK behaviour: to_agent is absent at start, present at end
+    const handoffSpanData: AgentSpan['spanData'] = {
+      type: 'handoff',
+      from_agent: 'TriageAgent'
+      // to_agent not yet set
+    };
+    const handoffSpan = makeSpan({
+      spanId: 'handoff-001',
+      parentId: 'agent-001',
+      spanData: handoffSpanData
+    });
+
+    await processor.onTraceStart(trace);
+    await processor.onSpanStart(agentSpan);
+    await processor.onSpanStart(handoffSpan);
+
+    // Simulate SDK setting to_agent after start
+    handoffSpanData.to_agent = 'WeatherAgent';
+
+    await processor.onSpanEnd(handoffSpan);
+    await processor.onSpanEnd(agentSpan);
+    await processor.onTraceEnd(trace);
+
+    // The handoff workflow span should receive the JSON dict output (not empty string)
+    const wfCall = mockLogger.addWorkflowSpan.mock.calls.find(
+      (c: [Record<string, unknown>]) =>
+        c[0].name === 'Handoff: TriageAgent → WeatherAgent'
+    );
+    expect(wfCall).toBeDefined();
+    expect(wfCall?.[0].output).toBe('{"to_agent":"WeatherAgent"}');
+
+    // The agent conclude should also get the JSON dict via last-child fallback
+    const concludeCalls = mockLogger.conclude.mock.calls as [
+      Record<string, unknown>
+    ][];
+    const agentConclude = concludeCalls.find(
+      (c) => c[0].output === '{"to_agent":"WeatherAgent"}'
+    );
+    expect(agentConclude).toBeDefined();
+  });
+});
+
+describe('_firstInput population (trace-level input handling)', () => {
+  test('captures first input from LLM span', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    await processor.onTraceStart(trace);
+
+    // LLM span with input
+    const llm = makeSpan({
+      spanId: 'llm-001',
+      parentId: 'trace-001',
+      spanData: {
+        type: 'generation',
+        model: 'gpt-4',
+        input: 'What is the weather in NYC?',
+        output: 'It is sunny...'
+      }
+    });
+
+    await processor.onSpanStart(llm);
+    await processor.onSpanEnd(llm);
+    await processor.onTraceEnd(trace);
+
+    // Verify startTrace was called with the LLM input
+    const startTraceCall = mockLogger.startTrace.mock.calls[0][0];
+    expect(startTraceCall.input).toBe('What is the weather in NYC?');
+  });
+
+  test('captures first input from tool span if LLM input unavailable', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    await processor.onTraceStart(trace);
+
+    // Tool span (with input, no LLM)
+    const tool = makeSpan({
+      spanId: 'tool-001',
+      parentId: 'trace-001',
+      spanData: {
+        type: 'function',
+        name: 'search',
+        input: 'NYC weather forecast',
+        output: 'Sunny, 72F'
+      }
+    });
+
+    await processor.onSpanStart(tool);
+    await processor.onSpanEnd(tool);
+    await processor.onTraceEnd(trace);
+
+    const startTraceCall = mockLogger.startTrace.mock.calls[0][0];
+    expect(startTraceCall.input).toBe('NYC weather forecast');
+  });
+
+  test('skips empty or null inputs, uses first meaningful one', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    await processor.onTraceStart(trace);
+
+    // First LLM with empty input
+    const llm1 = makeSpan({
+      spanId: 'llm-001',
+      parentId: 'trace-001',
+      spanData: {
+        type: 'generation',
+        model: 'gpt-4',
+        input: '',
+        output: 'response'
+      }
+    });
+
+    // Second LLM with actual input
+    const llm2 = makeSpan({
+      spanId: 'llm-002',
+      parentId: 'trace-001',
+      spanData: {
+        type: 'generation',
+        model: 'gpt-4',
+        input: 'Real question',
+        output: 'Real answer'
+      }
+    });
+
+    await processor.onSpanStart(llm1);
+    await processor.onSpanEnd(llm1);
+    await processor.onSpanStart(llm2);
+    await processor.onSpanEnd(llm2);
+    await processor.onTraceEnd(trace);
+
+    // Should use input from llm2, not llm1
+    const startTraceCall = mockLogger.startTrace.mock.calls[0][0];
+    expect(startTraceCall.input).toBe('Real question');
+  });
+
+  test('falls back to trace name if no meaningful input captured', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace({ name: 'Agent Workflow' });
+
+    await processor.onTraceStart(trace);
+    await processor.onTraceEnd(trace); // No spans at all
+
+    const startTraceCall = mockLogger.startTrace.mock.calls[0][0];
+    // Should fall back to trace name
+    expect(startTraceCall.input).toBe('Agent Workflow');
+  });
+
+  test('only captures input from first meaningful span, ignores later ones', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    await processor.onTraceStart(trace);
+
+    const llm1 = makeSpan({
+      spanId: 'llm-001',
+      parentId: 'trace-001',
+      spanData: {
+        type: 'generation',
+        model: 'gpt-4',
+        input: 'First query',
+        output: 'First answer'
+      }
+    });
+
+    const llm2 = makeSpan({
+      spanId: 'llm-002',
+      parentId: 'trace-001',
+      spanData: {
+        type: 'generation',
+        model: 'gpt-4',
+        input: 'Second query',
+        output: 'Second answer'
+      }
+    });
+
+    await processor.onSpanStart(llm1);
+    await processor.onSpanEnd(llm1);
+    await processor.onSpanStart(llm2);
+    await processor.onSpanEnd(llm2);
+    await processor.onTraceEnd(trace);
+
+    const startTraceCall = mockLogger.startTrace.mock.calls[0][0];
+    // Should use first input, not second
+    expect(startTraceCall.input).toBe('First query');
+  });
+});
+
+describe('GalileoCustomSpan integration via onSpanStart/onSpanEnd', () => {
+  // Simulate the spanData shape that withCustomSpan produces:
+  // the SDK spreads options.data fields onto the top level of spanData.
+  function makeCustomSpan(
+    galileoSpan: Record<string, unknown>,
+    overrides: Partial<AgentSpan> = {}
+  ): AgentSpan {
+    return makeSpan({
+      spanId: 'custom-001',
+      parentId: 'trace-001',
+      spanData: {
+        type: 'custom',
+        __galileoCustom: true,
+        _galileoSpan: galileoSpan,
+        name: (galileoSpan.name as string | undefined) ?? 'Galileo Custom'
+      },
+      ...overrides
+    });
+  }
+
+  test('test custom tool span calls addToolSpan', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    const galileoSpan = {
+      type: 'tool',
+      input: 'my input',
+      output: 'my output'
+    };
+    const span = makeCustomSpan(galileoSpan);
+
+    await processor.onTraceStart(trace);
+    await processor.onSpanStart(span);
+    await processor.onSpanEnd(span);
+    await processor.onTraceEnd(trace);
+
+    expect(mockLogger.addToolSpan).toHaveBeenCalledTimes(1);
+    const call = mockLogger.addToolSpan.mock.calls[0][0];
+    expect(call.input).toBe('my input');
+    expect(call.output).toBe('my output');
+  });
+
+  test('test custom workflow span calls addWorkflowSpan', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    const galileoSpan = { type: 'workflow', input: 'wf in', output: 'wf out' };
+    const span = makeCustomSpan(galileoSpan);
+
+    await processor.onTraceStart(trace);
+    await processor.onSpanStart(span);
+    await processor.onSpanEnd(span);
+    await processor.onTraceEnd(trace);
+
+    // root is logged via startTrace, not addWorkflowSpan; custom workflow span = 1 call
+    expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1);
+    const customCall = mockLogger.addWorkflowSpan.mock.calls[0][0];
+    expect(customCall.input).toBe('wf in');
+  });
+
+  test('test output mutation inside callback is captured at onSpanEnd', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    // galileoSpan starts with no output — simulates a user who will set it later
+    const galileoSpan: Record<string, unknown> = {
+      type: 'tool',
+      input: 'query',
+      output: undefined
+    };
+    const span = makeCustomSpan(galileoSpan);
+
+    await processor.onTraceStart(trace);
+    await processor.onSpanStart(span);
+
+    // Simulate the user mutating galileoSpan.output inside the callback before it returns
+    galileoSpan.output = 'result after work';
+
+    await processor.onSpanEnd(span);
+    await processor.onTraceEnd(trace);
+
+    // Re-extraction at onSpanEnd should have picked up the mutation
+    const call = mockLogger.addToolSpan.mock.calls[0][0];
+    expect(call.output).toBe('result after work');
+  });
+
+  test('test custom span with metadata and tags', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    const galileoSpan = {
+      type: 'tool',
+      input: 'in',
+      metadata: { source: 'db' },
+      tags: ['tag-a'],
+      statusCode: 201
+    };
+    const span = makeCustomSpan(galileoSpan);
+
+    await processor.onTraceStart(trace);
+    await processor.onSpanStart(span);
+    await processor.onSpanEnd(span);
+    await processor.onTraceEnd(trace);
+
+    const call = mockLogger.addToolSpan.mock.calls[0][0];
+    expect(call.metadata).toEqual({ source: 'db' });
+    expect(call.tags).toEqual(['tag-a']);
+    expect(call.statusCode).toBe(201);
+  });
+
+  test('test custom span with unknown type falls back to addWorkflowSpan', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    const galileoSpan = { type: 'future_type', input: 'in' };
+    const span = makeCustomSpan(galileoSpan);
+
+    await processor.onTraceStart(trace);
+    await processor.onSpanStart(span);
+    await processor.onSpanEnd(span);
+    await processor.onTraceEnd(trace);
+
+    // root is logged via startTrace; unknown type custom span → 1 addWorkflowSpan call
+    expect(mockLogger.addWorkflowSpan).toHaveBeenCalledTimes(1);
+    expect(mockLogger.addToolSpan).not.toHaveBeenCalled();
+  });
+});
+
+describe('Trace-level statusCode propagation (_lastStatusCode)', () => {
+  test('test concludeAll receives statusCode from errored agent span', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    const span = makeSpan({
+      spanId: 'agent-err-001',
+      parentId: 'trace-001',
+      error: { message: 'Agent crashed' },
+      spanData: { type: 'agent', name: 'CrashingAgent' }
+    });
+
+    await processor.onTraceStart(trace);
+    await processor.onSpanStart(span);
+    await processor.onSpanEnd(span);
+    await processor.onTraceEnd(trace);
+
+    // The concludeAll call is the last conclude call
+    const concludeCalls = mockLogger.conclude.mock.calls as [
+      Record<string, unknown>
+    ][];
+    const concludeAll = concludeCalls.find((c) => c[0].concludeAll === true);
+    expect(concludeAll).toBeDefined();
+    expect(concludeAll![0].statusCode).toBe(500);
+  });
+
+  test('test concludeAll receives statusCode 200 when no errors', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    const span = makeSpan({
+      spanId: 'agent-ok-001',
+      parentId: 'trace-001',
+      spanData: { type: 'agent', name: 'HappyAgent' }
+    });
+
+    await processor.onTraceStart(trace);
+    await processor.onSpanStart(span);
+    await processor.onSpanEnd(span);
+    await processor.onTraceEnd(trace);
+
+    const concludeCalls = mockLogger.conclude.mock.calls as [
+      Record<string, unknown>
+    ][];
+    const concludeAll = concludeCalls.find((c) => c[0].concludeAll === true);
+    expect(concludeAll).toBeDefined();
+    expect(concludeAll![0].statusCode).toBe(200);
+  });
+
+  test('test concludeAll uses last workflow statusCode when multiple agents', async () => {
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    const agent1 = makeSpan({
+      spanId: 'agent-001',
+      parentId: 'trace-001',
+      spanData: { type: 'agent', name: 'FirstAgent' }
+    });
+    const agent2 = makeSpan({
+      spanId: 'agent-002',
+      parentId: 'trace-001',
+      error: { message: 'Second agent failed' },
+      spanData: { type: 'agent', name: 'SecondAgent' }
+    });
+
+    await processor.onTraceStart(trace);
+    await processor.onSpanStart(agent1);
+    await processor.onSpanEnd(agent1);
+    await processor.onSpanStart(agent2);
+    await processor.onSpanEnd(agent2);
+    await processor.onTraceEnd(trace);
+
+    // concludeAll should carry the last agent's statusCode (500 from agent2)
+    const concludeCalls = mockLogger.conclude.mock.calls as [
+      Record<string, unknown>
+    ][];
+    const concludeAll = concludeCalls.find((c) => c[0].concludeAll === true);
+    expect(concludeAll).toBeDefined();
+    expect(concludeAll![0].statusCode).toBe(500);
+  });
+
+  test('test concludeAll has no statusCode when trace has only LLM spans', async () => {
+    // LLM/tool spans do not update _lastStatusCode — only workflow/agent concludes do.
+    // When there are no workflow/agent spans, concludeAll statusCode should be undefined.
+    const mockLogger = createMockLogger();
+    const processor = new GalileoTracingProcessor(mockLogger as never, false);
+    const trace = makeTrace();
+
+    const llmSpan = makeSpan({
+      spanId: 'llm-001',
+      parentId: 'trace-001',
+      spanData: { type: 'generation', model: 'gpt-4o' }
+    });
+
+    await processor.onTraceStart(trace);
+    await processor.onSpanStart(llmSpan);
+    await processor.onSpanEnd(llmSpan);
+    await processor.onTraceEnd(trace);
+
+    const concludeCalls = mockLogger.conclude.mock.calls as [
+      Record<string, unknown>
+    ][];
+    const concludeAll = concludeCalls.find((c) => c[0].concludeAll === true);
+    expect(concludeAll).toBeDefined();
+    expect(concludeAll![0].statusCode).toBeUndefined();
+  });
+});