latitude-dev
diff --git a/‎apps/ingest/src/routes/traces.ts‎
Lines changed: 6 additions & 0 deletions b/‎apps/ingest/src/routes/traces.ts‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎packages/domain/spans/src/index.ts‎
Lines changed: 2 additions & 1 deletion b/‎packages/domain/spans/src/index.ts‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎packages/domain/spans/src/otlp/content/genai.ts‎
Lines changed: 44 additions & 0 deletions b/‎packages/domain/spans/src/otlp/content/genai.ts‎
Lines changed: 44 additions & 0 deletions
diff --git a/‎packages/domain/spans/src/otlp/content/genai_deprecated.ts‎
Lines changed: 59 additions & 0 deletions b/‎packages/domain/spans/src/otlp/content/genai_deprecated.ts‎
Lines changed: 59 additions & 0 deletions
diff --git a/‎packages/domain/spans/src/otlp/content/index.ts‎
Lines changed: 66 additions & 0 deletions b/‎packages/domain/spans/src/otlp/content/index.ts‎
Lines changed: 66 additions & 0 deletions
diff --git a/‎packages/domain/spans/src/otlp/content/openinference.ts‎
Lines changed: 172 additions & 0 deletions b/‎packages/domain/spans/src/otlp/content/openinference.ts‎
Lines changed: 172 additions & 0 deletions
@@ -1,5 +1,6 @@
 import { OrganizationId, ProjectId, putInDisk } from "@domain/shared"
 import type { OtlpExportTraceServiceRequest } from "@domain/spans"
+import { validateOtlpCompliance } from "@domain/spans"
 import { Effect } from "effect"
 import type { Hono } from "hono"
 import { getSpanIngestionQueue, getStorageDisk } from "../clients.ts"
@@ -32,6 +33,11 @@ export const registerTracesRoute = ({ app }: TracesRouteContext) => {
       return c.json({})
     }
 
+    const validationError = validateOtlpCompliance(request)
+    if (validationError) {
+      return c.json({ error: `Non-compliant OTLP payload: ${validationError}` }, 400)
+    }
+
     const organizationId = c.get("organizationId")
     const projectId = c.get("projectId")
     const apiKeyId = c.get("apiKeyId")
 
@@ -5,4 +5,5 @@ export type { TraceListOptions, TraceRepository } from "./ports/trace-repository
 
 export type { TransformContext } from "./otlp/transform.ts"
 export { transformOtlpToSpans } from "./otlp/transform.ts"
-export type { OtlpExportTraceServiceRequest } from "./otlp/types.ts"
+export type { OtlpExportTraceServiceRequest, OtlpSpan } from "./otlp/types.ts"
+export { validateOtlpCompliance } from "./otlp/validate.ts"
@@ -0,0 +1,44 @@
+/**
+ * Content parser for OTEL GenAI semantic convention v1.37+.
+ *
+ * Attributes:
+ *   gen_ai.input.messages  — structured object or JSON string (parts-based GenAI format)
+ *   gen_ai.output.messages — same
+ *   gen_ai.system_instructions — structured array of parts or JSON string
+ *   gen_ai.tool.definitions    — structured array or JSON string
+ */
+import type { GenAIMessage } from "rosetta-ai"
+import type { OtlpKeyValue } from "../types.ts"
+import type { ParsedContent } from "./index.ts"
+
+function extractJsonAttr(attrs: readonly OtlpKeyValue[], key: string): unknown {
+  const kv = attrs.find((a) => a.key === key)
+  if (!kv?.value) return undefined
+  if (kv.value.stringValue) {
+    try {
+      return JSON.parse(kv.value.stringValue)
+    } catch {
+      return undefined
+    }
+  }
+  return undefined
+}
+
+function parseMessages(attrs: readonly OtlpKeyValue[], key: string): GenAIMessage[] {
+  const raw = extractJsonAttr(attrs, key)
+  if (!Array.isArray(raw)) return []
+  return raw as GenAIMessage[]
+}
+
+export function parseGenAICurrent(attrs: readonly OtlpKeyValue[]): ParsedContent {
+  const inputMessages = parseMessages(attrs, "gen_ai.input.messages")
+  const outputMessages = parseMessages(attrs, "gen_ai.output.messages")
+
+  const systemRaw = extractJsonAttr(attrs, "gen_ai.system_instructions")
+  const systemInstructions = systemRaw ? JSON.stringify(systemRaw) : ""
+
+  const toolsRaw = extractJsonAttr(attrs, "gen_ai.tool.definitions")
+  const toolDefinitions = toolsRaw ? JSON.stringify(toolsRaw) : ""
+
+  return { inputMessages, outputMessages, systemInstructions, toolDefinitions }
+}
@@ -0,0 +1,59 @@
+/**
+ * Content parser for OTEL GenAI deprecated convention (pre-v1.37) and OpenLLMetry.
+ *
+ * Both use the same attribute keys:
+ *   gen_ai.prompt     — single JSON string with array of {role, content} messages
+ *   gen_ai.completion — single JSON string with array of {role, content} messages
+ *
+ * The JSON content is in provider-native format (typically OpenAI-style {role, content}).
+ * We use rosetta-ai's auto-detection to translate to GenAI format.
+ *
+ * OpenLLMetry additionally defines:
+ *   llm.request.functions — JSON string array of function/tool definitions
+ */
+import { safeTranslate } from "rosetta-ai"
+import type { GenAIMessage } from "rosetta-ai"
+import type { OtlpKeyValue } from "../types.ts"
+import type { ParsedContent } from "./index.ts"
+
+function parseJsonString(attrs: readonly OtlpKeyValue[], key: string): unknown {
+  const kv = attrs.find((a) => a.key === key)
+  if (!kv?.value?.stringValue) return undefined
+  try {
+    return JSON.parse(kv.value.stringValue)
+  } catch {
+    return undefined
+  }
+}
+
+function translateMessages(raw: unknown, direction: "input" | "output"): GenAIMessage[] {
+  if (!Array.isArray(raw) || raw.length === 0) return []
+  const result = safeTranslate(raw, { direction })
+  if (result.error) return []
+  return result.messages as GenAIMessage[]
+}
+
+export function parseGenAIDeprecated(attrs: readonly OtlpKeyValue[]): ParsedContent {
+  const promptRaw = parseJsonString(attrs, "gen_ai.prompt")
+  const completionRaw = parseJsonString(attrs, "gen_ai.completion")
+
+  let inputMessages: GenAIMessage[] = []
+  let systemInstructions = ""
+
+  if (Array.isArray(promptRaw) && promptRaw.length > 0) {
+    const result = safeTranslate(promptRaw, { direction: "input" })
+    if (!result.error) {
+      inputMessages = result.messages as GenAIMessage[]
+      if (result.system) {
+        systemInstructions = JSON.stringify(result.system)
+      }
+    }
+  }
+
+  const outputMessages = translateMessages(completionRaw, "output")
+
+  const functionsRaw = parseJsonString(attrs, "llm.request.functions")
+  const toolDefinitions = Array.isArray(functionsRaw) ? JSON.stringify(functionsRaw) : ""
+
+  return { inputMessages, outputMessages, systemInstructions, toolDefinitions }
+}
@@ -0,0 +1,66 @@
+import type { GenAIMessage } from "rosetta-ai"
+import { stringAttr } from "../resolvers.ts"
+import type { OtlpKeyValue } from "../types.ts"
+import { parseGenAICurrent } from "./genai.ts"
+import { parseGenAIDeprecated } from "./genai_deprecated.ts"
+import { parseOpenInference } from "./openinference.ts"
+import { parseVercel } from "./vercel.ts"
+
+export interface ParsedContent {
+  readonly inputMessages: readonly GenAIMessage[]
+  readonly outputMessages: readonly GenAIMessage[]
+  readonly systemInstructions: string
+  readonly toolDefinitions: string
+}
+
+const EMPTY_CONTENT: ParsedContent = {
+  inputMessages: [],
+  outputMessages: [],
+  systemInstructions: "",
+  toolDefinitions: "",
+}
+
+interface ContentParser {
+  canHandle(attrs: readonly OtlpKeyValue[]): boolean
+  parse(attrs: readonly OtlpKeyValue[]): ParsedContent
+}
+
+function hasKey(attrs: readonly OtlpKeyValue[], key: string): boolean {
+  return attrs.some((a) => a.key === key)
+}
+
+function hasKeyPrefix(attrs: readonly OtlpKeyValue[], prefix: string): boolean {
+  return attrs.some((a) => a.key.startsWith(prefix))
+}
+
+const PARSERS: readonly ContentParser[] = [
+  {
+    canHandle: (attrs) => hasKey(attrs, "gen_ai.input.messages") || hasKey(attrs, "gen_ai.output.messages"),
+    parse: parseGenAICurrent,
+  },
+  {
+    canHandle: (attrs) =>
+      hasKeyPrefix(attrs, "llm.input_messages.") ||
+      hasKeyPrefix(attrs, "llm.output_messages.") ||
+      (stringAttr(attrs, "openinference.span.kind") !== undefined && hasKeyPrefix(attrs, "llm.")),
+    parse: parseOpenInference,
+  },
+  {
+    canHandle: (attrs) => hasKey(attrs, "ai.prompt") || hasKey(attrs, "ai.prompt.messages"),
+    parse: parseVercel,
+  },
+  // GenAI deprecated / OpenLLMetry is the broadest fallback (gen_ai.prompt is common)
+  {
+    canHandle: (attrs) => hasKey(attrs, "gen_ai.prompt") || hasKey(attrs, "gen_ai.completion"),
+    parse: parseGenAIDeprecated,
+  },
+]
+
+export function parseContent(attrs: readonly OtlpKeyValue[]): ParsedContent {
+  for (const parser of PARSERS) {
+    if (parser.canHandle(attrs)) {
+      return parser.parse(attrs)
+    }
+  }
+  return EMPTY_CONTENT
+}
@@ -0,0 +1,172 @@
+/**
+ * Content parser for OpenInference (Arize Phoenix).
+ *
+ * OpenInference explodes messages into flattened indexed span attributes:
+ *   llm.input_messages.{i}.message.role
+ *   llm.input_messages.{i}.message.content
+ *   llm.input_messages.{i}.message.tool_calls.{j}.tool_call.function.name
+ *   llm.input_messages.{i}.message.tool_calls.{j}.tool_call.function.arguments
+ *   llm.input_messages.{i}.message.contents.{j}.message_content.type
+ *   llm.input_messages.{i}.message.contents.{j}.message_content.text
+ *   llm.input_messages.{i}.message.contents.{j}.message_content.image.image.url
+ *
+ * Output messages follow the same pattern with llm.output_messages.{i}.
+ *
+ * Tool definitions use:
+ *   llm.tools.{i}.tool.json_schema — JSON string of tool schema
+ *
+ * We reassemble these into message arrays, then translate via rosetta-ai.
+ */
+import { safeTranslate } from "rosetta-ai"
+import type { GenAIMessage } from "rosetta-ai"
+import type { OtlpKeyValue } from "../types.ts"
+import type { ParsedContent } from "./index.ts"
+
+interface ToolCallData {
+  name: string
+  arguments: string
+}
+
+interface ReassembledMessage {
+  role: string
+  content: string
+  tool_calls?: { id: string; type: string; function: { name: string; arguments: string } }[]
+}
+
+const INPUT_PREFIX = "llm.input_messages."
+const OUTPUT_PREFIX = "llm.output_messages."
+const TOOLS_PREFIX = "llm.tools."
+
+function reassembleMessages(attrs: readonly OtlpKeyValue[], prefix: string): ReassembledMessage[] {
+  const byIndex = new Map<number, Map<string, string>>()
+  const toolCalls = new Map<number, Map<number, ToolCallData>>()
+
+  for (const attr of attrs) {
+    if (!attr.key.startsWith(prefix)) continue
+    const rest = attr.key.slice(prefix.length)
+
+    const dotIdx = rest.indexOf(".")
+    if (dotIdx === -1) continue
+
+    const index = Number.parseInt(rest.slice(0, dotIdx), 10)
+    if (Number.isNaN(index)) continue
+
+    const field = rest.slice(dotIdx + 1)
+    const value = attr.value?.stringValue ?? ""
+
+    if (field.startsWith("message.tool_calls.")) {
+      const tcRest = field.slice("message.tool_calls.".length)
+      const tcDotIdx = tcRest.indexOf(".")
+      if (tcDotIdx === -1) continue
+      const tcIndex = Number.parseInt(tcRest.slice(0, tcDotIdx), 10)
+      if (Number.isNaN(tcIndex)) continue
+      const tcField = tcRest.slice(tcDotIdx + 1)
+
+      let msgToolCalls = toolCalls.get(index)
+      if (!msgToolCalls) {
+        msgToolCalls = new Map()
+        toolCalls.set(index, msgToolCalls)
+      }
+      let tc = msgToolCalls.get(tcIndex)
+      if (!tc) {
+        tc = { name: "", arguments: "" }
+        msgToolCalls.set(tcIndex, tc)
+      }
+      if (tcField === "tool_call.function.name") tc.name = value
+      else if (tcField === "tool_call.function.arguments") tc.arguments = value
+    } else if (field.startsWith("message.")) {
+      const msgField = field.slice("message.".length)
+      let fields = byIndex.get(index)
+      if (!fields) {
+        fields = new Map()
+        byIndex.set(index, fields)
+      }
+      fields.set(msgField, value)
+    }
+  }
+
+  const maxIndex = Math.max(...byIndex.keys(), ...toolCalls.keys(), -1)
+  if (maxIndex === -1) return []
+
+  const messages: ReassembledMessage[] = []
+  for (let i = 0; i <= maxIndex; i++) {
+    const fields = byIndex.get(i)
+    const role = fields?.get("role") ?? "user"
+    const content = fields?.get("content") ?? ""
+
+    const msg: ReassembledMessage = { role, content }
+
+    const msgToolCalls = toolCalls.get(i)
+    if (msgToolCalls && msgToolCalls.size > 0) {
+      const sorted = [...msgToolCalls.entries()].sort(([a], [b]) => a - b)
+      msg.tool_calls = sorted.map(([j, tc]) => ({
+        id: `call_${i}_${j}`,
+        type: "function" as const,
+        function: { name: tc.name, arguments: tc.arguments },
+      }))
+    }
+
+    messages.push(msg)
+  }
+
+  return messages
+}
+
+function reassembleToolDefinitions(attrs: readonly OtlpKeyValue[]): string {
+  const tools = new Map<number, string>()
+
+  for (const attr of attrs) {
+    if (!attr.key.startsWith(TOOLS_PREFIX)) continue
+    const rest = attr.key.slice(TOOLS_PREFIX.length)
+    const dotIdx = rest.indexOf(".")
+    if (dotIdx === -1) continue
+    const index = Number.parseInt(rest.slice(0, dotIdx), 10)
+    if (Number.isNaN(index)) continue
+    const field = rest.slice(dotIdx + 1)
+    if (field === "tool.json_schema" && attr.value?.stringValue) {
+      tools.set(index, attr.value.stringValue)
+    }
+  }
+
+  if (tools.size === 0) return ""
+
+  const sorted = [...tools.entries()].sort(([a], [b]) => a - b)
+  const parsed = sorted.map(([, json]) => {
+    try {
+      return JSON.parse(json)
+    } catch {
+      return json
+    }
+  })
+  return JSON.stringify(parsed)
+}
+
+function translateReassembled(messages: ReassembledMessage[], direction: "input" | "output"): GenAIMessage[] {
+  if (messages.length === 0) return []
+  const result = safeTranslate(messages, { direction })
+  if (result.error) return []
+  return result.messages as GenAIMessage[]
+}
+
+export function parseOpenInference(attrs: readonly OtlpKeyValue[]): ParsedContent {
+  const inputRaw = reassembleMessages(attrs, INPUT_PREFIX)
+  const outputRaw = reassembleMessages(attrs, OUTPUT_PREFIX)
+
+  let inputMessages: GenAIMessage[] = []
+  let systemInstructions = ""
+
+  if (inputRaw.length > 0) {
+    const result = safeTranslate(inputRaw, { direction: "input" })
+    if (!result.error) {
+      inputMessages = result.messages as GenAIMessage[]
+      if (result.system) {
+        systemInstructions = JSON.stringify(result.system)
+      }
+    }
+  }
+
+  const outputMessages = translateReassembled(outputRaw, "output")
+  const toolDefinitions = reassembleToolDefinitions(attrs)
+
+  return { inputMessages, outputMessages, systemInstructions, toolDefinitions }
+}