Update trace normalization to ChatML content blocks (#283)

eric-tramel · web-flow · commit 7248b9fc8f7e · 2026-02-02T18:22:16.000-05:00
diff --git a/docs/assets/recipes/mcp_and_tooluse/pdf_qa.py b/docs/assets/recipes/mcp_and_tooluse/pdf_qa.py
@@ -391,10 +391,31 @@ def _truncate(text: str, max_length: int = 100) -> str:
     return text[: max_length - 3] + "..."
 
 
+def _summarize_content(content: object) -> str:
+    """Summarize ChatML-style content blocks for display."""
+    if isinstance(content, list):
+        parts: list[str] = []
+        for block in content:
+            if isinstance(block, dict):
+                block_type = block.get("type", "block")
+                if block_type == "text":
+                    text = str(block.get("text", ""))
+                    if text:
+                        parts.append(text)
+                elif block_type == "image_url":
+                    parts.append("[image]")
+                else:
+                    parts.append(f"[{block_type}]")
+            else:
+                parts.append(str(block))
+        return " ".join(parts)
+    return str(content)
+
+
 def _format_trace_step(msg: dict[str, object]) -> str:
     """Format a single trace message as a concise one-liner."""
     role = msg.get("role", "unknown")
-    content = msg.get("content", "")
+    content = _summarize_content(msg.get("content", ""))
     reasoning = msg.get("reasoning_content")
     tool_calls = msg.get("tool_calls")
     tool_call_id = msg.get("tool_call_id")
diff --git a/docs/concepts/traces.md b/docs/concepts/traces.md
@@ -68,10 +68,10 @@ Each trace is a `list[dict]` where each dict represents a message in the convers
 
 | Role | Fields | Description |
 |------|--------|-------------|
-| `system` | `role`, `content` | System prompt setting model behavior |
-| `user` | `role`, `content` | User prompt (rendered from template) |
-| `assistant` | `role`, `content`, `tool_calls`, `reasoning_content` | Model response; `content` may be `None` if only requesting tools |
-| `tool` | `role`, `content`, `tool_call_id` | Tool execution result; `tool_call_id` links to the request |
+| `system` | `role`, `content` | System prompt setting model behavior. `content` is a list of blocks in ChatML format. |
+| `user` | `role`, `content` | User prompt (rendered from template). `content` is a list of blocks (text + multimodal). |
+| `assistant` | `role`, `content`, `tool_calls`, `reasoning_content` | Model response; `content` may be empty if only requesting tools. |
+| `tool` | `role`, `content`, `tool_call_id` | Tool execution result; `tool_call_id` links to the request. |
 
 ### Example Trace (Simple Generation)
 
@@ -82,17 +82,17 @@ A basic trace without tool use:
     # System message (if configured)
     {
         "role": "system",
-        "content": "You are a helpful assistant that provides clear, concise answers."
+        "content": [{"type": "text", "text": "You are a helpful assistant that provides clear, concise answers."}]
     },
     # User message (the rendered prompt)
     {
         "role": "user",
-        "content": "What is the capital of France?"
+        "content": [{"type": "text", "text": "What is the capital of France?"}]
     },
     # Final assistant response
     {
         "role": "assistant",
-        "content": "The capital of France is Paris.",
+        "content": [{"type": "text", "text": "The capital of France is Paris."}],
         "reasoning_content": None  # May contain reasoning if model supports it
     }
 ]
@@ -107,17 +107,17 @@ When tool use is enabled, traces capture the full conversation including tool ca
     # System message
     {
         "role": "system",
-        "content": "You must call tools before answering. Only use tool results."
+        "content": [{"type": "text", "text": "You must call tools before answering. Only use tool results."}]
     },
     # User message (the rendered prompt)
     {
         "role": "user",
-        "content": "What documents are in the knowledge base about machine learning?"
+        "content": [{"type": "text", "text": "What documents are in the knowledge base about machine learning?"}]
     },
     # Assistant requests tool calls
     {
         "role": "assistant",
-        "content": None,
+        "content": [{"type": "text", "text": ""}],
         "tool_calls": [
             {
                 "id": "call_abc123",
@@ -132,13 +132,13 @@ When tool use is enabled, traces capture the full conversation including tool ca
     # Tool response (linked by tool_call_id)
     {
         "role": "tool",
-        "content": "Found 3 documents: intro_ml.pdf, neural_networks.pdf, transformers.pdf",
+        "content": [{"type": "text", "text": "Found 3 documents: intro_ml.pdf, neural_networks.pdf, transformers.pdf"}],
         "tool_call_id": "call_abc123"
     },
     # Final assistant response
     {
         "role": "assistant",
-        "content": "The knowledge base contains three documents about machine learning: ..."
+        "content": [{"type": "text", "text": "The knowledge base contains three documents about machine learning: ..."}]
     }
 ]
 ```
diff --git a/packages/data-designer-engine/src/data_designer/engine/models/utils.py b/packages/data-designer-engine/src/data_designer/engine/models/utils.py
@@ -36,11 +36,14 @@ class ChatMessage:
     def to_dict(self) -> dict[str, Any]:
         """Convert the message to a dictionary format for API calls.
 
+        Content is normalized to a list of ChatML-style blocks to keep a
+        consistent schema across traces and API payloads.
+
         Returns:
             A dictionary containing the message fields. Only includes non-empty
             optional fields to keep the output clean.
         """
-        result: dict[str, Any] = {"role": self.role, "content": self.content}
+        result: dict[str, Any] = {"role": self.role, "content": _normalize_content_blocks(self.content)}
         if self.reasoning_content:
             result["reasoning_content"] = self.reasoning_content
         if self.tool_calls:
@@ -99,3 +102,27 @@ def prompt_to_messages(
     if system_prompt:
         return [ChatMessage.as_system(system_prompt), ChatMessage.as_user(user_content)]
     return [ChatMessage.as_user(user_content)]
+
+
+def _normalize_content_blocks(content: Any) -> list[dict[str, Any]]:
+    if isinstance(content, list):
+        return [_normalize_content_block(block) for block in content]
+    if content is None:
+        return []
+    return [_text_block(content)]
+
+
+def _normalize_content_block(block: Any) -> dict[str, Any]:
+    if isinstance(block, dict) and "type" in block:
+        return block
+    return _text_block(block)
+
+
+def _text_block(value: Any) -> dict[str, Any]:
+    if value is None:
+        text_value = ""
+    elif isinstance(value, str):
+        text_value = value
+    else:
+        text_value = str(value)
+    return {"type": "text", "text": text_value}
diff --git a/packages/data-designer-engine/tests/engine/column_generators/generators/test_llm_completion_generators.py b/packages/data-designer-engine/tests/engine/column_generators/generators/test_llm_completion_generators.py
@@ -100,13 +100,35 @@ def test_generate_method() -> None:
     result = generator.generate(data)
 
     assert result["test_column"] == {"result": "test_output"}
-    assert result["test_column" + TRACE_COLUMN_POSTFIX] == [{"role": "user", "content": "x"}]
+    assert result["test_column" + TRACE_COLUMN_POSTFIX] == [
+        {"role": "user", "content": [{"type": "text", "text": "x"}]}
+    ]
 
     # Test multi-modal context is None
     call_args = mock_model.generate.call_args
     assert call_args[1]["multi_modal_context"] is None
 
 
+def test_generate_method_normalizes_trace_content_blocks() -> None:
+    generator, _, mock_model, _, _, mock_prompt_renderer, mock_response_recipe = _create_generator_with_mocks()
+
+    generator.resource_provider.run_config.debug_override_save_all_column_traces = True
+    mock_prompt_renderer.render.side_effect = ["rendered_user_prompt", "rendered_system_prompt"]
+    mock_response_recipe.serialize_output.return_value = {"result": "test_output"}
+
+    multi_modal_content = [
+        {"type": "image_url", "image_url": {"url": "https://example.com/image.png"}},
+        {"type": "text", "text": "describe the image"},
+    ]
+    mock_model.generate.return_value = ({"result": "test_output"}, [ChatMessage.as_user(multi_modal_content)])
+
+    result = generator.generate({"input": "test_input"})
+
+    trace = result["test_column" + TRACE_COLUMN_POSTFIX]
+    assert trace[0]["role"] == "user"
+    assert trace[0]["content"] == multi_modal_content
+
+
 @patch("data_designer.engine.column_generators.generators.base.logger", autospec=True)
 def test_log_pre_generation(mock_logger: Mock) -> None:
     generator, mock_resource_provider, _, mock_model_config, mock_inference_params, _, _ = (

Original file line number	Diff line number	Diff line change
@@ -68,10 +68,10 @@ Each trace is a `list[dict]` where each dict represents a message in the convers
`68`	`68`
`69`	`69`	`\| Role \| Fields \| Description \|`
`70`	`70`	`\|------\|--------\|-------------\|`
`71`		-\| `system` \| `role`, `content` \| System prompt setting model behavior \|
`72`		-\| `user` \| `role`, `content` \| User prompt (rendered from template) \|
`73`		-\| `assistant` \| `role`, `content`, `tool_calls`, `reasoning_content` \| Model response; `content` may be `None` if only requesting tools \|
`74`		-\| `tool` \| `role`, `content`, `tool_call_id` \| Tool execution result; `tool_call_id` links to the request \|
	`71`	+\| `system` \| `role`, `content` \| System prompt setting model behavior. `content` is a list of blocks in ChatML format. \|
	`72`	+\| `user` \| `role`, `content` \| User prompt (rendered from template). `content` is a list of blocks (text + multimodal). \|
	`73`	+\| `assistant` \| `role`, `content`, `tool_calls`, `reasoning_content` \| Model response; `content` may be empty if only requesting tools. \|
	`74`	+\| `tool` \| `role`, `content`, `tool_call_id` \| Tool execution result; `tool_call_id` links to the request. \|
`75`	`75`
`76`	`76`	`### Example Trace (Simple Generation)`
`77`	`77`
`@@ -82,17 +82,17 @@ A basic trace without tool use:`
`82`	`82`	`# System message (if configured)`
`83`	`83`	`{`
`84`	`84`	`"role": "system",`
`85`		`- "content": "You are a helpful assistant that provides clear, concise answers."`
	`85`	`+ "content": [{"type": "text", "text": "You are a helpful assistant that provides clear, concise answers."}]`
`86`	`86`	`},`
`87`	`87`	`# User message (the rendered prompt)`
`88`	`88`	`{`
`89`	`89`	`"role": "user",`
`90`		`- "content": "What is the capital of France?"`
	`90`	`+ "content": [{"type": "text", "text": "What is the capital of France?"}]`
`91`	`91`	`},`
`92`	`92`	`# Final assistant response`
`93`	`93`	`{`
`94`	`94`	`"role": "assistant",`
`95`		`- "content": "The capital of France is Paris.",`
	`95`	`+ "content": [{"type": "text", "text": "The capital of France is Paris."}],`
`96`	`96`	`"reasoning_content": None # May contain reasoning if model supports it`
`97`	`97`	`}`
`98`	`98`	`]`
`@@ -107,17 +107,17 @@ When tool use is enabled, traces capture the full conversation including tool ca`
`107`	`107`	`# System message`
`108`	`108`	`{`
`109`	`109`	`"role": "system",`
`110`		`- "content": "You must call tools before answering. Only use tool results."`
	`110`	`+ "content": [{"type": "text", "text": "You must call tools before answering. Only use tool results."}]`
`111`	`111`	`},`
`112`	`112`	`# User message (the rendered prompt)`
`113`	`113`	`{`
`114`	`114`	`"role": "user",`
`115`		`- "content": "What documents are in the knowledge base about machine learning?"`
	`115`	`+ "content": [{"type": "text", "text": "What documents are in the knowledge base about machine learning?"}]`
`116`	`116`	`},`
`117`	`117`	`# Assistant requests tool calls`
`118`	`118`	`{`
`119`	`119`	`"role": "assistant",`
`120`		`- "content": None,`
	`120`	`+ "content": [{"type": "text", "text": ""}],`
`121`	`121`	`"tool_calls": [`
`122`	`122`	`{`
`123`	`123`	`"id": "call_abc123",`
`@@ -132,13 +132,13 @@ When tool use is enabled, traces capture the full conversation including tool ca`
`132`	`132`	`# Tool response (linked by tool_call_id)`
`133`	`133`	`{`
`134`	`134`	`"role": "tool",`
`135`		`- "content": "Found 3 documents: intro_ml.pdf, neural_networks.pdf, transformers.pdf",`
	`135`	`+ "content": [{"type": "text", "text": "Found 3 documents: intro_ml.pdf, neural_networks.pdf, transformers.pdf"}],`
`136`	`136`	`"tool_call_id": "call_abc123"`
`137`	`137`	`},`
`138`	`138`	`# Final assistant response`
`139`	`139`	`{`
`140`	`140`	`"role": "assistant",`
`141`		`- "content": "The knowledge base contains three documents about machine learning: ..."`
	`141`	`+ "content": [{"type": "text", "text": "The knowledge base contains three documents about machine learning: ..."}]`
`142`	`142`	`}`
`143`	`143`	`]`
`144`	`144`	```