Add headers to context (#26)

tgillam-do · web-flow · commit f3faef788ef6 · 2026-01-14T10:57:51.000-06:00
* Add headers to context

* Remove traces tests

* Add RequestContext type to examples
diff --git a/README.md b/README.md
@@ -11,12 +11,14 @@ The Gradient™ Agent Development Kit (ADK) is a comprehensive toolkit for build
 ## Features
 
 ### 🛠️ CLI (Command Line Interface)
+
 - **Local Development**: Run and test your agents locally with hot-reload support
 - **Seamless Deployment**: Deploy agents to DigitalOcean with a single command
 - **Evaluation Framework**: Run comprehensive evaluations with custom metrics and datasets
 - **Observability**: View traces and runtime logs directly from the CLI
 
 ### 🚀 Runtime Environment
+
 - **Framework Agnostic**: Works with any Python framework for building AI agents
 - **Automatic LangGraph Integration**: Built-in trace capture for LangGraph nodes and state transitions
 - **Custom Decorators**: Capture traces from any framework using `@trace` decorators
@@ -40,6 +42,7 @@ gradient agent init
 ```
 
 This creates a new agent project with:
+
 - `main.py` - Agent entrypoint with example code
 - `agents/` - Directory for agent implementations
 - `tools/` - Directory for custom tools
@@ -77,7 +80,7 @@ gradient agent evaluate \
 LangGraph agents automatically capture traces for all nodes and state transitions:
 
 ```python
-from gradient_adk import entrypoint
+from gradient_adk import entrypoint, RequestContext
 from langgraph.graph import StateGraph
 from typing import TypedDict
 
@@ -92,11 +95,11 @@ async def llm_call(state: State) -> State:
     return state
 
 @entrypoint
-async def main(input: dict, context: dict):
+async def main(input: dict, context: RequestContext):
     graph = StateGraph(State)
     graph.add_node("llm_call", llm_call)
     graph.set_entry_point("llm_call")
-    
+
     graph = graph.compile()
     result = await graph.ainvoke({"input": input.get("query")})
     return result["output"]
@@ -107,7 +110,7 @@ async def main(input: dict, context: dict):
 For frameworks beyond LangGraph, use trace decorators to capture custom spans:
 
 ```python
-from gradient_adk import entrypoint, trace_llm, trace_tool, trace_retriever
+from gradient_adk import entrypoint, trace_llm, trace_tool, trace_retriever, RequestContext
 
 @trace_retriever("vector_search")
 async def search_knowledge_base(query: str):
@@ -127,7 +130,7 @@ async def calculate(x: int, y: int):
     return x + y
 
 @entrypoint
-async def main(input: dict, context: dict):
+async def main(input: dict, context: RequestContext):
     docs = await search_knowledge_base(input["query"])
     result = await calculate(5, 10)
     response = await generate_response(f"Context: {docs}")
@@ -139,10 +142,10 @@ async def main(input: dict, context: dict):
 The runtime supports streaming responses with automatic trace capture:
 
 ```python
-from gradient_adk import entrypoint
+from gradient_adk import entrypoint, RequestContext
 
 @entrypoint
-async def main(input: dict, context: dict):
+async def main(input: dict, context: RequestContext):
     # Stream text chunks
     async def generate_chunks():
         async for chunk in llm.stream(input["query"]):
@@ -190,12 +193,12 @@ gradient agent evaluate \
   --success-threshold 80.0
 ```
 
-
 ## Tracing
 
 The ADK provides comprehensive tracing capabilities to capture and analyze your agent's execution. You can use **decorators** for wrapping functions or **programmatic functions** for manual span creation.
 
 ### What Gets Traced Automatically
+
 - **LangGraph Nodes**: All node executions, state transitions, and edges (including LLM calls, tool calls, and DigitalOcean Knowledge Base calls)
 - **HTTP Requests**: Request/response payloads for LLM API calls
 - **Errors**: Full exception details and stack traces
@@ -206,7 +209,7 @@ The ADK provides comprehensive tracing capabilities to capture and analyze your
 Use decorators to automatically trace function executions:
 
 ```python
-from gradient_adk import entrypoint, trace_llm, trace_tool, trace_retriever
+from gradient_adk import entrypoint, trace_llm, trace_tool, trace_retriever, RequestContext
 
 @trace_llm("model_call")
 async def call_model(prompt: str):
@@ -226,7 +229,7 @@ async def search_docs(query: str):
     return results
 
 @entrypoint
-async def main(input: dict, context: dict):
+async def main(input: dict, context: RequestContext):
     docs = await search_docs(input["query"])
     result = await calculate(5, 10)
     response = await call_model(f"Context: {docs}")
@@ -238,10 +241,10 @@ async def main(input: dict, context: dict):
 For more control over span creation, use the programmatic functions. These are useful when you can't use decorators or need to add spans for code you don't control:
 
 ```python
-from gradient_adk import entrypoint, add_llm_span, add_tool_span, add_agent_span
+from gradient_adk import entrypoint, add_llm_span, add_tool_span, add_agent_span, RequestContext
 
 @entrypoint
-async def main(input: dict, context: dict):
+async def main(input: dict, context: RequestContext):
     # Add an LLM span with detailed metadata
     response = await external_llm_call(input["query"])
     add_llm_span(
@@ -279,17 +282,18 @@ async def main(input: dict, context: dict):
 
 #### Available Span Functions
 
-| Function | Description | Key Optional Fields |
-|----------|-------------|---------------------|
-| `add_llm_span()` | Record LLM/model calls | `model`, `temperature`, `num_input_tokens`, `num_output_tokens`, `total_tokens`, `tools`, `time_to_first_token_ns` |
-| `add_tool_span()` | Record tool/function executions | `tool_call_id` |
-| `add_agent_span()` | Record agent/sub-agent executions | — |
+| Function           | Description                       | Key Optional Fields                                                                                                |
+| ------------------ | --------------------------------- | ------------------------------------------------------------------------------------------------------------------ |
+| `add_llm_span()`   | Record LLM/model calls            | `model`, `temperature`, `num_input_tokens`, `num_output_tokens`, `total_tokens`, `tools`, `time_to_first_token_ns` |
+| `add_tool_span()`  | Record tool/function executions   | `tool_call_id`                                                                                                     |
+| `add_agent_span()` | Record agent/sub-agent executions | —                                                                                                                  |
 
 **Common optional fields for all span functions:** `duration_ns`, `metadata`, `tags`, `status_code`
 
 ### Viewing Traces
 
 Traces are:
+
 - Automatically sent to DigitalOcean's Gradient Platform
 - Available in real-time through the web console
 - Accessible via `gradient agent traces` command
diff --git a/gradient_adk/cli/templates/main.py.template b/gradient_adk/cli/templates/main.py.template
@@ -6,7 +6,7 @@ import os
 from typing import Dict, TypedDict
 
 from gradient import AsyncGradient
-from gradient_adk import entrypoint
+from gradient_adk import entrypoint, RequestContext
 from langgraph.graph import StateGraph
 
 
@@ -44,7 +44,7 @@ async def llm_call(state: State) -> State:
 
 
 @entrypoint
-async def main(input: Dict, context: Dict):
+async def main(input: Dict, context: RequestContext):
     """Entrypoint"""
 
     # Setup the graph
@@ -61,4 +61,4 @@ async def main(input: Dict, context: Dict):
     
     # Invoke the app
     result = await app.ainvoke(initial_state)
-    return result["output"]
+    return result["output"]
diff --git a/gradient_adk/decorator.py b/gradient_adk/decorator.py
@@ -8,7 +8,7 @@
 from __future__ import annotations
 import inspect
 import json
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from typing import Callable, Optional, Any, Dict, List
 
 
@@ -18,9 +18,18 @@ class RequestContext:
 
     Attributes:
         session_id: The session ID for the request, if provided.
+        headers: Raw request headers as a dictionary.
     """
 
     session_id: Optional[str] = None
+    headers: Dict[str, str] = field(default_factory=dict)
+
+
+def _build_request_context(req: Request) -> RequestContext:
+    return RequestContext(
+        session_id=req.headers.get("session-id"),
+        headers=dict(req.headers.items()),
+    )
 
 
 from fastapi import FastAPI, HTTPException, Request
@@ -157,9 +166,8 @@ async def run(req: Request):
 
         is_evaluation = "evaluation-id" in req.headers
 
-        # Extract session ID from headers
-        session_id = req.headers.get("session-id")
-        context = RequestContext(session_id=session_id)
+        context = _build_request_context(req)
+        session_id = context.session_id
 
         # Initialize tracker
         tr = None
@@ -230,7 +238,9 @@ async def run(req: Request):
                             await tr._submit()
                         except Exception:
                             pass
-                    logger.error("Error in streaming evaluation", error=str(e), exc_info=True)
+                    logger.error(
+                        "Error in streaming evaluation", error=str(e), exc_info=True
+                    )
                     raise HTTPException(status_code=500, detail="Internal server error")
 
             # Normal streaming case - wrap in tracking iterator
@@ -301,4 +311,4 @@ async def health():
 
 def run_server(fastapi_app: FastAPI, host: str = "0.0.0.0", port: int = 8080, **kwargs):
     """Run the FastAPI server with uvicorn."""
-    uvicorn.run(fastapi_app, host=host, port=port, **kwargs)
+    uvicorn.run(fastapi_app, host=host, port=port, **kwargs)
diff --git a/gradient_adk/tracing.py b/gradient_adk/tracing.py
@@ -4,7 +4,7 @@
 with the same kind of tracing automatically provided for some other frameworks.
 
 Example usage:
-    from gradient_adk import entrypoint, trace_llm, trace_tool, trace_retriever
+    from gradient_adk import entrypoint, trace_llm, trace_tool, trace_retriever, RequestContext
 
     @trace_retriever("fetch_data")
     async def fetch_data(query: str) -> dict:
@@ -22,7 +22,7 @@ async def calculate(x: int, y: int) -> int:
         return x + y
 
     @entrypoint
-    async def my_agent(input: dict, context: dict):
+    async def my_agent(input: dict, context: RequestContext):
         data = await fetch_data(input["query"])
         result = await calculate(5, 10)
         response = await call_model(data["prompt"])
@@ -240,7 +240,9 @@ async def async_gen_wrapper(*args, **kwargs):
                             if span_type is None and has_network_hits:
                                 meta["is_llm_call"] = True
                             # Get captured request/response payloads for LLM metadata extraction
-                            captured = interceptor.get_captured_requests_since(network_token)
+                            captured = interceptor.get_captured_requests_since(
+                                network_token
+                            )
                             if captured:
                                 call = captured[0]
                                 if call.request_payload:
@@ -302,7 +304,9 @@ async def async_wrapper(*args, **kwargs):
                             if span_type is None and has_network_hits:
                                 meta["is_llm_call"] = True
                             # Get captured request/response payloads for LLM metadata extraction
-                            captured = interceptor.get_captured_requests_since(network_token)
+                            captured = interceptor.get_captured_requests_since(
+                                network_token
+                            )
                             if captured:
                                 call = captured[0]
                                 if call.request_payload:
@@ -401,7 +405,9 @@ def sync_wrapper(*args, **kwargs):
                             if span_type is None and has_network_hits:
                                 meta["is_llm_call"] = True
                             # Get captured request/response payloads for LLM metadata extraction
-                            captured = interceptor.get_captured_requests_since(network_token)
+                            captured = interceptor.get_captured_requests_since(
+                                network_token
+                            )
                             if captured:
                                 call = captured[0]
                                 if call.request_payload:
@@ -536,7 +542,9 @@ def add_llm_span(
     span = _create_span(name, _freeze(input))
     meta = _ensure_meta(span)
     meta["is_llm_call"] = True
-    meta["is_programmatic"] = True  # Mark as programmatic to skip auto-duration calculation
+    meta["is_programmatic"] = (
+        True  # Mark as programmatic to skip auto-duration calculation
+    )
 
     if model is not None:
         meta["model_name"] = model
@@ -548,7 +556,11 @@ def add_llm_span(
         meta["llm_request_payload"]["temperature"] = temperature
     if time_to_first_token_ns is not None:
         meta["time_to_first_token_ns"] = time_to_first_token_ns
-    if num_input_tokens is not None or num_output_tokens is not None or total_tokens is not None:
+    if (
+        num_input_tokens is not None
+        or num_output_tokens is not None
+        or total_tokens is not None
+    ):
         if "llm_response_payload" not in meta:
             meta["llm_response_payload"] = {}
         meta["llm_response_payload"]["usage"] = {
@@ -608,7 +620,9 @@ def add_tool_span(
     span = _create_span(name, _freeze(input))
     meta = _ensure_meta(span)
     meta["is_tool_call"] = True
-    meta["is_programmatic"] = True  # Mark as programmatic to skip auto-duration calculation
+    meta["is_programmatic"] = (
+        True  # Mark as programmatic to skip auto-duration calculation
+    )
 
     if tool_call_id is not None:
         meta["tool_call_id"] = tool_call_id
@@ -662,7 +676,9 @@ def add_agent_span(
     span = _create_span(name, _freeze(input))
     meta = _ensure_meta(span)
     meta["is_agent_call"] = True
-    meta["is_programmatic"] = True  # Mark as programmatic to skip auto-duration calculation
+    meta["is_programmatic"] = (
+        True  # Mark as programmatic to skip auto-duration calculation
+    )
 
     if tags is not None:
         meta["tags"] = tags
diff --git a/integration_tests/example_agents/echo_agent/main.py b/integration_tests/example_agents/echo_agent/main.py
@@ -14,4 +14,5 @@ async def main(query, context: RequestContext):
         "echo": prompt,
         "received": query,
         "session_id": context.session_id if context else None,
+        "headers": context.headers if context else {},
     }
diff --git a/integration_tests/run/test_adk_agents_run.py b/integration_tests/run/test_adk_agents_run.py
@@ -526,6 +526,55 @@ def test_agent_run_session_id_header_passthrough(self, setup_agent_in_temp):
         finally:
             cleanup_process(process)
 
+    @pytest.mark.cli
+    def test_agent_run_headers_passthrough(self, setup_agent_in_temp):
+        """
+        Test that arbitrary headers are passed through to RequestContext.headers.
+        """
+        logger = logging.getLogger(__name__)
+        temp_dir = setup_agent_in_temp
+        port = find_free_port()
+        process = None
+
+        try:
+            logger.info(f"Starting agent on port {port} in {temp_dir}")
+
+            process = subprocess.Popen(
+                [
+                    "gradient",
+                    "agent",
+                    "run",
+                    "--port",
+                    str(port),
+                    "--no-dev",
+                ],
+                cwd=temp_dir,
+                start_new_session=True,
+            )
+
+            server_ready = wait_for_server(port, timeout=30)
+            assert server_ready, "Server did not start within timeout"
+
+            headers = {
+                "Session-Id": "session-headers-123",
+                "X-Request-Id": "req-789",
+                "X-Custom": "custom-value",
+            }
+            response = requests.post(
+                f"http://localhost:{port}/run",
+                json={"prompt": "Hello headers"},
+                headers=headers,
+                timeout=10,
+            )
+            assert response.status_code == 200
+            data = response.json()
+            lowered = {k.lower(): v for k, v in data["headers"].items()}
+            assert lowered["session-id"] == "session-headers-123"
+            assert lowered["x-request-id"] == "req-789"
+            assert lowered["x-custom"] == "custom-value"
+        finally:
+            cleanup_process(process)
+
     @pytest.mark.cli
     def test_streaming_agent_without_evaluation_id_streams_response(
         self, setup_streaming_agent_in_temp
diff --git a/tests/decorator_test.py b/tests/decorator_test.py

Original file line number	Diff line number	Diff line change
`@@ -14,4 +14,5 @@ async def main(query, context: RequestContext):`
`14`	`14`	`"echo": prompt,`
`15`	`15`	`"received": query,`
`16`	`16`	`"session_id": context.session_id if context else None,`
	`17`	`+ "headers": context.headers if context else {},`
`17`	`18`	`}`