From 2379938a1ca565f57af3a5090194c1dc3f420530 Mon Sep 17 00:00:00 2001 From: Dinesh Yeduguru Date: Thu, 5 Dec 2024 10:22:33 -0800 Subject: [PATCH 1/8] add telemetry docs --- docs/source/experimental/index.md | 12 ++ docs/source/experimental/telemetry/index.md | 118 ++++++++++++++++++++ docs/source/index.md | 1 + 3 files changed, 131 insertions(+) create mode 100644 docs/source/experimental/index.md create mode 100644 docs/source/experimental/telemetry/index.md diff --git a/docs/source/experimental/index.md b/docs/source/experimental/index.md new file mode 100644 index 0000000000..aba4887e9b --- /dev/null +++ b/docs/source/experimental/index.md @@ -0,0 +1,12 @@ +# Experimental Features + +This section contains experimental features that are not yet fully supported or subject to change. + +Telemetry: [Telemetry](telemetry/index) + +```{toctree} +:hidden: +:maxdepth: 3 + +telemetry/index +``` diff --git a/docs/source/experimental/telemetry/index.md b/docs/source/experimental/telemetry/index.md new file mode 100644 index 0000000000..a90ed9dad0 --- /dev/null +++ b/docs/source/experimental/telemetry/index.md @@ -0,0 +1,118 @@ +# Telemetry +```{note} +The telemetry system is currently experimental and subject to change. We welcome feedback and contributions to help improve it. +``` + + + +The Llama Stack telemetry system provides comprehensive tracing, metrics, and logging capabilities. It supports multiple sink types including OpenTelemetry, SQLite, and Console output. + +## Key Concepts + +### Events +The telemetry system supports three main types of events: + +- **Unstructured Log Events**: Free-form log messages with severity levels +- **Metric Events**: Numerical measurements with units +- **Structured Log Events**: System events like span start/end + +### Spans and Traces +- **Spans**: Represent operations with timing and hierarchical relationships +- **Traces**: Collection of related spans forming a complete request flow + +### Sinks +- **OpenTelemetry**: Send events to an OpenTelemetry Collector. This is useful for visualizing traces in a service like Jaeger. +- **SQLite**: Store events in a local SQLite database. This is needed if you want to query the events later through the Llama Stack API. +- **Console**: Print events to the console. + + +## Providers + +### Meta-Reference Provider +Currently, only the meta-reference provider is implemented. It can be configured to send events to three sink types: +1) OpenTelemetry Collector +2) SQLite +3) Console + +## Configuration + +```yaml + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + sinks: ['console', 'sqlite', 'otel'] + otel_endpoint: "http://localhost:4318/v1/traces" + sqlite_db_path: "/path/to/telemetry.db" +``` + + +## Querying Examples + +Querying Traces for a agent session + +``` bash + curl -X POST 'http://localhost:5000/alpha/telemetry/query-traces' \ +-H 'Content-Type: application/json' \ +-d '{ + "attribute_filters": [ + { + "key": "session_id", + "op": "eq", + "value": "dd667b87-ca4b-4d30-9265-5a0de318fc65" }], + "limit": 100, + "offset": 0, + "order_by": ["start_time"] + + [ + { + "trace_id": "6902f54b83b4b48be18a6f422b13e16f", + "root_span_id": "5f37b85543afc15a", + "start_time": "2024-12-04T08:08:30.501587", + "end_time": "2024-12-04T08:08:36.026463" + }, + ........ +] +}' + +``` + +Querying spans for a specifc root span id + +``` bash +curl -X POST 'http://localhost:5000/alpha/telemetry/get-span-tree' \ +-H 'Content-Type: application/json' \ +-d '{ "span_id" : "6cceb4b48a156913", "max_depth": 2 }' + +{ + "span_id": "6cceb4b48a156913", + "trace_id": "dafa796f6aaf925f511c04cd7c67fdda", + "parent_span_id": "892a66d726c7f990", + "name": "retrieve_rag_context", + "start_time": "2024-12-04T09:28:21.781995", + "end_time": "2024-12-04T09:28:21.913352", + "attributes": { + "input": [ + "{\"role\":\"system\",\"content\":\"You are a helpful assistant\"}", + "{\"role\":\"user\",\"content\":\"What are the top 5 topics that were explained in the documentation? Only list succinct bullet points.\",\"context\":null}" + ] + }, + "children": [ + { + "span_id": "1a2df181854064a8", + "trace_id": "dafa796f6aaf925f511c04cd7c67fdda", + "parent_span_id": "6cceb4b48a156913", + "name": "MemoryRouter.query_documents", + "start_time": "2024-12-04T09:28:21.787620", + "end_time": "2024-12-04T09:28:21.906512", + "attributes": { + "input": null + }, + "children": [], + "status": "ok" + } + ], + "status": "ok" +} + +``` diff --git a/docs/source/index.md b/docs/source/index.md index abfaf51b4f..8af3fa6e50 100644 --- a/docs/source/index.md +++ b/docs/source/index.md @@ -84,4 +84,5 @@ building_applications/index contributing/index references/index cookbooks/index +experimental/index ``` From d734607043f903743ca20ce5cfd39397944f741c Mon Sep 17 00:00:00 2001 From: Dinesh Yeduguru Date: Thu, 5 Dec 2024 10:26:26 -0800 Subject: [PATCH 2/8] include APIs supported --- docs/source/experimental/telemetry/index.md | 28 +++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/docs/source/experimental/telemetry/index.md b/docs/source/experimental/telemetry/index.md index a90ed9dad0..0a51d52660 100644 --- a/docs/source/experimental/telemetry/index.md +++ b/docs/source/experimental/telemetry/index.md @@ -25,6 +25,34 @@ The telemetry system supports three main types of events: - **SQLite**: Store events in a local SQLite database. This is needed if you want to query the events later through the Llama Stack API. - **Console**: Print events to the console. +## APIs + +The telemetry system exposes the following HTTP endpoints: + +### Log Event +```http +POST /telemetry/log-event +``` +Logs a telemetry event (unstructured log, metric, or structured log) with optional TTL. + +### Query Traces +```http +POST /telemetry/query-traces +``` +Retrieves traces based on filters with pagination support. Parameters: +- `attribute_filters`: List of conditions to filter traces +- `limit`: Maximum number of traces to return (default: 100) +- `offset`: Number of traces to skip (default: 0) +- `order_by`: List of fields to sort by + +### Get Span Tree +```http +POST /telemetry/get-span-tree +``` +Retrieves a hierarchical view of spans starting from a specific span. Parameters: +- `span_id`: ID of the root span to retrieve +- `attributes_to_return`: Optional list of specific attributes to include +- `max_depth`: Optional maximum depth of the span tree to return ## Providers From 4231bbfcb163eec1295c3da8f59cd63bf97abb46 Mon Sep 17 00:00:00 2001 From: Dinesh Yeduguru Date: Thu, 5 Dec 2024 10:32:41 -0800 Subject: [PATCH 3/8] include jaeger setup --- docs/source/experimental/telemetry/index.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/docs/source/experimental/telemetry/index.md b/docs/source/experimental/telemetry/index.md index 0a51d52660..47ed1ff156 100644 --- a/docs/source/experimental/telemetry/index.md +++ b/docs/source/experimental/telemetry/index.md @@ -74,6 +74,23 @@ Currently, only the meta-reference provider is implemented. It can be configured sqlite_db_path: "/path/to/telemetry.db" ``` +## Jaeger to visualize traces +Start a Jaeger instance with the OTLP HTTP endpoint at 4318 and the Jaeger UI at 16686 using the following command: + +```bash +docker run -d \ + --name jaeger \ + -p 6831:6831/udp \ + -p 14268:14268 \ + -p 16686:16686 \ + -p 4317:4317 \ + -p 4318:4318 \ + -e COLLECTOR_ZIPKIN_HOST_PORT=:9411 \ + -e COLLECTOR_OTLP_ENABLED=true \ + jaegertracing/all-in-one:latest +``` + +Once the Jaeger instance is running, you can visualize traces by navigating to http://localhost:16686. ## Querying Examples From 511acda0b39dd68ecfd41c978cf74827b5bea789 Mon Sep 17 00:00:00 2001 From: Dinesh Yeduguru Date: Thu, 5 Dec 2024 14:07:33 -0800 Subject: [PATCH 4/8] add newly added methods to query and save spans --- docs/source/experimental/telemetry/index.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/docs/source/experimental/telemetry/index.md b/docs/source/experimental/telemetry/index.md index 47ed1ff156..a83f7583cf 100644 --- a/docs/source/experimental/telemetry/index.md +++ b/docs/source/experimental/telemetry/index.md @@ -54,6 +54,27 @@ Retrieves a hierarchical view of spans starting from a specific span. Parameters - `attributes_to_return`: Optional list of specific attributes to include - `max_depth`: Optional maximum depth of the span tree to return +### Query Spans +```http +POST /telemetry/query-spans +``` +Retrieves spans matching specified filters and returns selected attributes. Parameters: +- `attribute_filters`: List of conditions to filter traces +- `attributes_to_return`: List of specific attributes to include in results +- `max_depth`: Optional maximum depth of spans to traverse (default: no limit) + +Returns a flattened list of spans with requested attributes. + +### Save Spans to Dataset +```http +POST /telemetry/save-spans-to-dataset +``` +Queries spans and saves their attributes to a dataset. Parameters: +- `attribute_filters`: List of conditions to filter traces +- `attributes_to_save`: List of span attributes to save to the dataset +- `dataset_id`: ID of the dataset to save to +- `max_depth`: Optional maximum depth of spans to traverse (default: no limit) + ## Providers ### Meta-Reference Provider From 9497cc3e431c4d219d4d4c846765d024c7c0af59 Mon Sep 17 00:00:00 2001 From: Dinesh Yeduguru Date: Thu, 5 Dec 2024 21:13:29 -0800 Subject: [PATCH 5/8] Apply suggestions from code review Co-authored-by: Adrian Cole <64215+codefromthecrypt@users.noreply.github.com> --- docs/source/experimental/telemetry/index.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/docs/source/experimental/telemetry/index.md b/docs/source/experimental/telemetry/index.md index a83f7583cf..a95fb68599 100644 --- a/docs/source/experimental/telemetry/index.md +++ b/docs/source/experimental/telemetry/index.md @@ -85,6 +85,7 @@ Currently, only the meta-reference provider is implemented. It can be configured ## Configuration +Here's an example that sends telemetry signals to all three sink types. Your configuration might use only one. ```yaml telemetry: - provider_id: meta-reference @@ -96,6 +97,9 @@ Currently, only the meta-reference provider is implemented. It can be configured ``` ## Jaeger to visualize traces + +The `otel` sink works with any service compatible with the OpenTelemetry collector. Let's use Jaeger to visualize this data. + Start a Jaeger instance with the OTLP HTTP endpoint at 4318 and the Jaeger UI at 16686 using the following command: ```bash @@ -113,7 +117,9 @@ docker run -d \ Once the Jaeger instance is running, you can visualize traces by navigating to http://localhost:16686. -## Querying Examples +## Querying Traces Stored in SQLIte + +The `sqlite` sink allows you to query traces without an external system. Here are some example queries: Querying Traces for a agent session From c74fb18aca34ec2161f9fcd1f328cf235e416310 Mon Sep 17 00:00:00 2001 From: Dinesh Yeduguru Date: Fri, 6 Dec 2024 09:19:09 -0800 Subject: [PATCH 6/8] address feedback --- docs/source/experimental/telemetry/index.md | 75 ++++++++++++++++++--- 1 file changed, 64 insertions(+), 11 deletions(-) diff --git a/docs/source/experimental/telemetry/index.md b/docs/source/experimental/telemetry/index.md index a95fb68599..fd4446ed2e 100644 --- a/docs/source/experimental/telemetry/index.md +++ b/docs/source/experimental/telemetry/index.md @@ -13,8 +13,27 @@ The Llama Stack telemetry system provides comprehensive tracing, metrics, and lo The telemetry system supports three main types of events: - **Unstructured Log Events**: Free-form log messages with severity levels +```python +unstructured_log_event = UnstructuredLogEvent( + message="This is a log message", + severity=LogSeverity.INFO +) +``` - **Metric Events**: Numerical measurements with units -- **Structured Log Events**: System events like span start/end +```python +metric_event = MetricEvent( + metric="my_metric", + value=10, + unit="count" +) +``` +- **Structured Log Events**: System events like span start/end. Extensible to add more structured log types. +```python +structured_log_event = SpanStartPayload( + name="my_span", + parent_span_id="parent_span_id" +) +``` ### Spans and Traces - **Spans**: Represent operations with timing and hierarchical relationships @@ -27,6 +46,7 @@ The telemetry system supports three main types of events: ## APIs +The telemetry API is designed to be flexible for different user flows like debugging/visualization in UI, monitoring, and saving traces to datasets. The telemetry system exposes the following HTTP endpoints: ### Log Event @@ -66,6 +86,7 @@ Retrieves spans matching specified filters and returns selected attributes. Para Returns a flattened list of spans with requested attributes. ### Save Spans to Dataset +This is useful for saving traces to a dataset for running evaluations. For example, you can save the input/output of each span that is part of an agent session/turn to a dataset and then run an eval task on it. See example in [Example: Save Spans to Dataset](#example-save-spans-to-dataset). ```http POST /telemetry/save-spans-to-dataset ``` @@ -103,16 +124,10 @@ The `otel` sink works with any service compatible with the OpenTelemetry collect Start a Jaeger instance with the OTLP HTTP endpoint at 4318 and the Jaeger UI at 16686 using the following command: ```bash -docker run -d \ - --name jaeger \ - -p 6831:6831/udp \ - -p 14268:14268 \ - -p 16686:16686 \ - -p 4317:4317 \ - -p 4318:4318 \ - -e COLLECTOR_ZIPKIN_HOST_PORT=:9411 \ - -e COLLECTOR_OTLP_ENABLED=true \ - jaegertracing/all-in-one:latest +$ docker run --rm \ + --name jaeger jaegertracing/jaeger:2.0.0 \ + -p 16686:16686 -p 4318:4318 \ + --set receivers.otlp.protocols.http.endpoint=0.0.0.0:4318 ``` Once the Jaeger instance is running, you can visualize traces by navigating to http://localhost:16686. @@ -122,6 +137,7 @@ Once the Jaeger instance is running, you can visualize traces by navigating to h The `sqlite` sink allows you to query traces without an external system. Here are some example queries: Querying Traces for a agent session +The client SDK is not updated to support the new telemetry API. It will be updated soon. You can manually query traces using the following curl command: ``` bash curl -X POST 'http://localhost:5000/alpha/telemetry/query-traces' \ @@ -188,3 +204,40 @@ curl -X POST 'http://localhost:5000/alpha/telemetry/get-span-tree' \ } ``` + +## Example: Save Spans to Dataset +Save all spans for a specific agent session to a dataset. +``` bash +curl -X POST 'http://localhost:5000/alpha/telemetry/save-spans-to-dataset' \ +-H 'Content-Type: application/json' \ +-d '{ + "attribute_filters": [ + { + "key": "session_id", + "op": "eq", + "value": "dd667b87-ca4b-4d30-9265-5a0de318fc65" + } + ], + "attributes_to_save": ["input", "output"], + "dataset_id": "my_dataset", + "max_depth": 10 +}' +``` + +Save all spans for a specific agent turn to a dataset. +```bash +curl -X POST 'http://localhost:5000/alpha/telemetry/save-spans-to-dataset' \ +-H 'Content-Type: application/json' \ +-d '{ + "attribute_filters": [ + { + "key": "turn_id", + "op": "eq", + "value": "123e4567-e89b-12d3-a456-426614174000" + } + ], + "attributes_to_save": ["input", "output"], + "dataset_id": "my_dataset", + "max_depth": 10 +}' +``` From 23f2c6e10ba769a06b9411cba86cd5e97b9280da Mon Sep 17 00:00:00 2001 From: Dinesh Yeduguru Date: Fri, 6 Dec 2024 10:14:36 -0800 Subject: [PATCH 7/8] move telemetry to build applications section --- docs/source/building_applications/index.md | 6 ++++++ .../index.md => building_applications/telemetry.md} | 0 docs/source/experimental/index.md | 12 ------------ docs/source/index.md | 1 - 4 files changed, 6 insertions(+), 13 deletions(-) rename docs/source/{experimental/telemetry/index.md => building_applications/telemetry.md} (100%) delete mode 100644 docs/source/experimental/index.md diff --git a/docs/source/building_applications/index.md b/docs/source/building_applications/index.md index 6d2f9e3ac6..f2345f25b7 100644 --- a/docs/source/building_applications/index.md +++ b/docs/source/building_applications/index.md @@ -11,5 +11,11 @@ - memory / RAG; pre-ingesting content or attaching content in a turn - how does tool calling work - can you do evaluation? +``` + +```{toctree} +:hidden: +:maxdepth: 3 +telemetry ``` diff --git a/docs/source/experimental/telemetry/index.md b/docs/source/building_applications/telemetry.md similarity index 100% rename from docs/source/experimental/telemetry/index.md rename to docs/source/building_applications/telemetry.md diff --git a/docs/source/experimental/index.md b/docs/source/experimental/index.md deleted file mode 100644 index aba4887e9b..0000000000 --- a/docs/source/experimental/index.md +++ /dev/null @@ -1,12 +0,0 @@ -# Experimental Features - -This section contains experimental features that are not yet fully supported or subject to change. - -Telemetry: [Telemetry](telemetry/index) - -```{toctree} -:hidden: -:maxdepth: 3 - -telemetry/index -``` diff --git a/docs/source/index.md b/docs/source/index.md index 8af3fa6e50..abfaf51b4f 100644 --- a/docs/source/index.md +++ b/docs/source/index.md @@ -84,5 +84,4 @@ building_applications/index contributing/index references/index cookbooks/index -experimental/index ``` From 4982c9acddc23d4f29b4bcb229debc720f1932b7 Mon Sep 17 00:00:00 2001 From: Dinesh Yeduguru Date: Fri, 6 Dec 2024 10:15:54 -0800 Subject: [PATCH 8/8] tink to telemetry from building applications --- docs/source/building_applications/index.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/building_applications/index.md b/docs/source/building_applications/index.md index f2345f25b7..1c333c4a73 100644 --- a/docs/source/building_applications/index.md +++ b/docs/source/building_applications/index.md @@ -12,6 +12,7 @@ - how does tool calling work - can you do evaluation? ``` +For details on how to use the telemetry system to debug your applications, export traces to a dataset, and run evaluations, see the [Telemetry](telemetry) section. ```{toctree} :hidden: