mozilla-ai
diff --git a/‎docs/src/content/docs/platform/overview.md‎
Lines changed: 10 additions & 7 deletions b/‎docs/src/content/docs/platform/overview.md‎
Lines changed: 10 additions & 7 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 2 additions & 0 deletions b/‎pyproject.toml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/any_llm/any_llm.py‎
Lines changed: 28 additions & 5 deletions b/‎src/any_llm/any_llm.py‎
Lines changed: 28 additions & 5 deletions
diff --git a/‎src/any_llm/api.py‎
Lines changed: 6 additions & 0 deletions b/‎src/any_llm/api.py‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎src/any_llm/providers/platform/__init__.py‎
Lines changed: 2 additions & 2 deletions b/‎src/any_llm/providers/platform/__init__.py‎
Lines changed: 2 additions & 2 deletions
@@ -22,19 +22,19 @@ The managed platform solves these problems:
 
 - **Secure Key Vault**: Your provider API keys are encrypted client-side before storage—we never see your raw keys
 - **Single Virtual Key**: One `ANY_LLM_KEY` works across all providers
-- **Usage Analytics**: Track tokens, costs, and performance metrics without logging prompts or responses
+- **Trace Analytics**: Track tokens, costs, and performance metrics without logging prompts or responses
 - **Zero Infrastructure**: No servers to deploy, no databases to manage
 
 ## How it works
 
-The managed platform acts as a secure credential manager and usage tracker. Here's the flow:
+The managed platform acts as a secure credential manager and trace-based usage tracker. Here's the flow:
 
 1. **You add provider keys** to the platform dashboard (keys are encrypted in your browser before upload)
 2. **You get a virtual key** (`ANY_LLM_KEY`) that represents your project
 3. **Your application** uses the `PlatformProvider` with your virtual key
 4. **The SDK** authenticates with the platform, retrieves and decrypts your provider key client-side
 5. **Your request** goes directly to the LLM provider (OpenAI, Anthropic, etc.)
-6. **Usage metadata** (tokens, model, latency) is reported back—never your prompts or responses
+6. **OpenTelemetry spans produced during each platform-provider call** are reported back for analytics, with prompt/response content attributes redacted before export
 
 ```
 ┌─────────────────────────────────────────────────────────────────────────┐
@@ -52,15 +52,15 @@ The managed platform acts as a secure credential manager and usage tracker. Here
 │  2. Receive encrypted provider key                                      │
 │  3. Decrypt provider key locally (client-side)                          │
 │  4. Make request directly to provider                                   │
-│  5. Report usage metadata (tokens, latency) to platform                 │
+│  5. Report in-scope OTel spans (with content redaction) to platform     │
 └────────────────┬─────────────────────────────────────┬──────────────────┘
                  │                                     │
                  ▼                                     ▼
 ┌─────────────────────────────┐       ┌────────────────────────────────────┐
 │   any-llm Managed Platform  │       │        LLM Provider                │
 │                             │       │   (OpenAI, Anthropic, etc.)        │
 │  • Encrypted key storage    │       │                                    │
-│  • Usage tracking           │       │   Your prompts/responses go        │
+│  • Trace tracking           │       │   Your prompts/responses go        │
 │  • Cost analytics           │       │   directly here—never through      │
 │  • Performance metrics      │       │   our platform                     │
 └─────────────────────────────┘       └────────────────────────────────────┘
@@ -77,23 +77,26 @@ Your provider API keys are encrypted in your browser using XChaCha20-Poly1305 be
 - You maintain full control over your credentials
 
 
-### Privacy-First Usage Tracking
+### Privacy-First Trace Tracking
 
-The platform tracks usage metadata to provide cost and performance insights:
+The platform tracks OpenTelemetry span data generated during each platform-provider request to provide cost and performance insights:
 
 **What we track for you:**
 
 - Token counts (input and output)
 - Model name and provider
 - Request timestamps
 - Performance metrics (latency, throughput)
+- Additional OpenTelemetry span attributes/events emitted in the same request scope
 
 **What we never track:**
 
 - Your prompts
 - Model responses
 - Any content from your conversations
 
+Prompt/response payload attributes are removed from traces before export.
+
 ### Project Organization
 
 Organize your usage by project, team, or environment:
 
@@ -25,6 +25,8 @@ all = [
 
 platform = [
   "any-llm-platform-client>=0.3.0",
+  "opentelemetry-sdk>=1.40.0",
+  "opentelemetry-exporter-otlp-proto-http>=1.40.0",
 ]
 
 perplexity = []
 
@@ -5,7 +5,7 @@
 import os
 import warnings
 from abc import ABC, abstractmethod
-from typing import TYPE_CHECKING, Any, ClassVar, Literal, TypeVar, overload
+from typing import TYPE_CHECKING, Any, ClassVar, Literal, TypeVar, cast, overload
 
 from openresponses_types import ResponseResource
 from pydantic import BaseModel
@@ -23,15 +23,15 @@
 from any_llm.types.messages import MessageResponse, MessagesParams, MessageStreamEvent, MessageUsage
 from any_llm.types.provider import PlatformKey, ProviderMetadata
 from any_llm.types.responses import Response, ResponseInputParam, ResponsesParams, ResponseStreamEvent
-from any_llm.utils.aio import async_iter_to_sync_iter, run_async_in_sync
+from any_llm.utils.aio import async_coro_to_sync_iter, async_iter_to_sync_iter, run_async_in_sync
 from any_llm.utils.decorators import BATCH_API_EXPERIMENTAL_MESSAGE, experimental
 from any_llm.utils.exception_handler import handle_exceptions
 from any_llm.utils.structured_output import is_structured_output_type, parse_json_content
 
 ResponseFormatT = TypeVar("ResponseFormatT", bound=BaseModel)
 
 if TYPE_CHECKING:
-    from collections.abc import AsyncIterator, Callable, Iterator, Sequence
+    from collections.abc import AsyncIterator, Callable, Coroutine, Iterator, Sequence
 
     from any_llm.types.batch import Batch
     from any_llm.types.completion import ChatCompletionChunk, CreateEmbeddingResponse
@@ -437,14 +437,26 @@ def completion(
         """
         if allow_running_loop is None:
             allow_running_loop = INSIDE_NOTEBOOK
+        if stream:
+            return async_coro_to_sync_iter(
+                self.acompletion(
+                    model=model,
+                    messages=messages,
+                    response_format=response_format,
+                    stream=stream,
+                    **kwargs,
+                ),
+                allow_running_loop=allow_running_loop,
+            )
+
         response = run_async_in_sync(
             self.acompletion(model=model, messages=messages, response_format=response_format, stream=stream, **kwargs),
             allow_running_loop=allow_running_loop,
         )
         if isinstance(response, ChatCompletion):
             return response
 
-        return async_iter_to_sync_iter(response)
+        return async_iter_to_sync_iter(response, allow_running_loop=allow_running_loop)
 
     # Overloads let type checkers narrow the return type based on response_format and stream.
     @overload
@@ -509,6 +521,7 @@ async def acompletion(
         frequency_penalty: float | None = None,
         seed: int | None = None,
         user: str | None = None,
+        session_label: str | None = None,
         parallel_tool_calls: bool | None = None,
         logprobs: bool | None = None,
         top_logprobs: int | None = None,
@@ -536,6 +549,7 @@ async def acompletion(
             frequency_penalty: Penalize new tokens based on frequency in text
             seed: Random seed for reproducible results
             user: Unique identifier for the end user
+            session_label: Optional user session label metadata for platform traces; exported as anyllm.user_session_label
             parallel_tool_calls: Whether to allow parallel tool calls
             logprobs: Include token-level log probabilities in the response
             top_logprobs: Number of alternatives to return when logprobs are requested
@@ -586,6 +600,9 @@ async def acompletion(
             reasoning_effort=reasoning_effort,
         )
 
+        if session_label is not None and self.PROVIDER_NAME == "platform":
+            kwargs["session_label"] = session_label
+
         result = await self._acompletion(params, **kwargs)
 
         if is_structured_output_type(response_format):
@@ -754,10 +771,16 @@ def responses(self, **kwargs: Any) -> ResponseResource | Response | Iterator[Res
         See [AnyLLM.aresponses][any_llm.any_llm.AnyLLM.aresponses]
         """
         allow_running_loop = kwargs.pop("allow_running_loop", INSIDE_NOTEBOOK)
+        if kwargs.get("stream"):
+            return async_coro_to_sync_iter(
+                cast("Coroutine[Any, Any, AsyncIterator[ResponseStreamEvent]]", self.aresponses(**kwargs)),
+                allow_running_loop=allow_running_loop,
+            )
+
         response = run_async_in_sync(self.aresponses(**kwargs), allow_running_loop=allow_running_loop)
         if isinstance(response, (ResponseResource, Response)):
             return response
-        return async_iter_to_sync_iter(response)
+        return async_iter_to_sync_iter(response, allow_running_loop=allow_running_loop)
 
     @handle_exceptions(wrap_streaming=True)
     async def aresponses(
 
@@ -39,6 +39,7 @@ def completion(
     api_key: str | None = None,
     api_base: str | None = None,
     user: str | None = None,
+    session_label: str | None = None,
     parallel_tool_calls: bool | None = None,
     logprobs: bool | None = None,
     top_logprobs: int | None = None,
@@ -73,6 +74,7 @@ def completion(
         api_key: API key for the provider
         api_base: Base URL for the provider API
         user: Unique identifier for the end user
+        session_label: Optional user session label metadata for platform traces; exported as anyllm.user_session_label
         parallel_tool_calls: Whether to allow parallel tool calls
         logprobs: Include token-level log probabilities in the response
         top_logprobs: Number of alternatives to return when logprobs are requested
@@ -115,6 +117,7 @@ def completion(
         frequency_penalty=frequency_penalty,
         seed=seed,
         user=user,
+        session_label=session_label,
         parallel_tool_calls=parallel_tool_calls,
         logprobs=logprobs,
         top_logprobs=top_logprobs,
@@ -146,6 +149,7 @@ async def acompletion(
     api_key: str | None = None,
     api_base: str | None = None,
     user: str | None = None,
+    session_label: str | None = None,
     parallel_tool_calls: bool | None = None,
     logprobs: bool | None = None,
     top_logprobs: int | None = None,
@@ -180,6 +184,7 @@ async def acompletion(
         api_key: API key for the provider
         api_base: Base URL for the provider API
         user: Unique identifier for the end user
+        session_label: Optional user session label metadata for platform traces; exported as anyllm.user_session_label
         parallel_tool_calls: Whether to allow parallel tool calls
         logprobs: Include token-level log probabilities in the response
         top_logprobs: Number of alternatives to return when logprobs are requested
@@ -222,6 +227,7 @@ async def acompletion(
         frequency_penalty=frequency_penalty,
         seed=seed,
         user=user,
+        session_label=session_label,
         parallel_tool_calls=parallel_tool_calls,
         logprobs=logprobs,
         top_logprobs=top_logprobs,
 
@@ -1,4 +1,4 @@
 from .platform import PlatformProvider
-from .utils import post_completion_usage_event
+from .utils import export_completion_trace, shutdown_telemetry
 
-__all__ = ["PlatformProvider", "post_completion_usage_event"]
+__all__ = ["PlatformProvider", "export_completion_trace", "shutdown_telemetry"]
Original file line number	Diff line number	Diff line change
`@@ -25,6 +25,8 @@ all = [`
`25`	`25`
`26`	`26`	`platform = [`
`27`	`27`	`"any-llm-platform-client>=0.3.0",`
	`28`	`+ "opentelemetry-sdk>=1.40.0",`
	`29`	`+ "opentelemetry-exporter-otlp-proto-http>=1.40.0",`
`28`	`30`	`]`
`29`	`31`
`30`	`32`	`perplexity = []`