robusta-dev · aantn · Sep 30, 2025 · Oct 8, 2025 · Oct 8, 2025 · coderabbitai
diff --git a/pyproject.toml b/pyproject.toml
@@ -119,7 +119,9 @@ markers = [
     "storage: Disk related, like I/O or disk space",
     "question-answer: Simple question-answer tests where Holmes answers straightforward questions about the system",
     "prometheus: Tests involving Prometheus metrics",
+    "loki: Tests involving Loki logs",
     "newrelic: New Relic toolset",
+    "embeds: Ability of holmes to include embeds like << { promql... } >> in its answers",
     "no-cicd: Tests to skip in the GitHub action because we're missing prerequisites in the KIND cluster like a Prometheus instance",
 ]
 

diff --git a/tests/llm/fixtures/test_ask_holmes/100a_historical_logs/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/100a_historical_logs/test_case.yaml
@@ -1,3 +1,4 @@
+
 user_prompt: "The payment-api pod in namespace app-100a had issues on August 2, 2025 around 13:45 UTC. What happened?"
 
 expected_output:
@@ -11,6 +12,7 @@ tags:
   - logs
   - kubernetes
   - medium
+  - loki
 
 setup_timeout: 600  # Increased to handle Datadog init containers
 

diff --git a/tests/llm/fixtures/test_ask_holmes/100b_historical_logs_nonstandard_label/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/100b_historical_logs_nonstandard_label/test_case.yaml
@@ -11,6 +11,7 @@ tags:
   - logs
   - kubernetes
   - medium
+  - loki
 
 setup_timeout: 600  # Increased to handle Datadog init containers
 

diff --git a/tests/llm/fixtures/test_ask_holmes/101_historical_logs_pod_deleted/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/101_historical_logs_pod_deleted/test_case.yaml
@@ -15,6 +15,7 @@ tags:
   - logs
   - kubernetes
   - hard
+  - loki
 
 setup_timeout: 360  # Increased from 300 to handle pod deletion and Loki ingestion variability
 

diff --git a/tests/llm/fixtures/test_ask_holmes/102_loki_label_discovery/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/102_loki_label_discovery/test_case.yaml
@@ -24,6 +24,7 @@ tags:
   - logs
   - kubernetes
   - toolset-limitation
+  - loki
 
 port_forwards:
   - namespace: app-102

diff --git a/tests/llm/fixtures/test_ask_holmes/103_logs_transparency_default_limit/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/103_logs_transparency_default_limit/test_case.yaml
@@ -19,3 +19,4 @@ tags:
   - logs
   - transparency
   - medium
+  - loki
diff --git a/tests/llm/fixtures/test_ask_holmes/110_cpu_graph_robusta_runner/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/110_cpu_graph_robusta_runner/test_case.yaml
@@ -22,3 +22,11 @@ expected_output: |
   random_key may be different than the above example so long as its a random looking key, but all other parameters (type and tool_name) must be as described
 
   Output must NOT tell the user it doesn't have access to metrics or that they should use another tool
+
+skip: true
+skip_reason: "missing setup - need to write data to datadog"
+tags:
+- datadog
+- metrics
+- medium
+- embeds
diff --git a/tests/llm/fixtures/test_ask_holmes/117b_new_relic_block_embed/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/117b_new_relic_block_embed/test_case.yaml
@@ -9,3 +9,4 @@ tags:
   - traces
   - newrelic
   - network
+  - embeds
diff --git a/tests/llm/fixtures/test_ask_holmes/118_new_relic_logs/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/118_new_relic_logs/test_case.yaml
@@ -1,6 +1,5 @@
 description: "Test if holmes pulls logs reliably, expects env var NEW_RELIC_LICENSE_KEY"
 
-
 user_prompt:
 - fetch error logs for app payment-gateway-s1fdsa-231ds from newrelic
 
@@ -10,6 +9,7 @@ expected_output:
 tags:
   - newrelic
   - logs
+  - medium
 
 # Custom timeout for setup (needs 5 min sleep + setup time)
 setup_timeout: 360  # 6 minutes total

diff --git a/tests/llm/fixtures/test_ask_holmes/119_new_relic_metrics/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/119_new_relic_metrics/test_case.yaml
@@ -1,5 +1,4 @@
-description: "Test if holmes pulls logs reliably"
-
+description: "Test if holmes pulls metrics reliably"
 
 user_prompt:
 - give me a graph of the custom newrelic metric sales.amount and the currency
@@ -13,6 +12,7 @@ expected_output:
 tags:
   - newrelic
   - metrics
+  - medium
 
 # Custom timeout for setup (needs 2 min sleep + setup time)
 setup_timeout: 240  # 4 minutes total

diff --git a/tests/llm/fixtures/test_ask_holmes/120_new_relic_traces2/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/120_new_relic_traces2/test_case.yaml
@@ -9,6 +9,7 @@ expected_output:
 tags:
   - newrelic
   - traces
+  - medium
 
 before_test: |
   [ -n "${NEW_RELIC_ACCOUNT_ID:-}" ] && [ -n "${NEW_RELIC_API_KEY:-}" ] && [ -n "${NEW_RELIC_LICENSE_KEY:-}" ] || { for v in NEW_RELIC_ACCOUNT_ID NEW_RELIC_API_KEY NEW_RELIC_LICENSE_KEY; do [ -n "${!v:-}" ] || echo "Missing env var: $v"; done; exit 1; }

diff --git a/tests/llm/fixtures/test_ask_holmes/121_new_relic_checkout_errors_tracing/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/121_new_relic_checkout_errors_tracing/test_case.yaml
@@ -10,6 +10,7 @@ tags:
   - hard
   - chain-of-causation
   - traces
+  - newrelic
 
 setup_timeout: 600  # 10 minutes for pod scheduling
 

diff --git a/...lm/fixtures/test_ask_holmes/122_new_relic_checkout_latency_tracing_rebuild/test_case.yaml b/...lm/fixtures/test_ask_holmes/122_new_relic_checkout_latency_tracing_rebuild/test_case.yaml
@@ -10,6 +10,7 @@ tags:
   - hard
   - chain-of-causation
   - traces
+  - newrelic
 
 setup_timeout: 600  # 10 minutes for pod scheduling
 

diff --git a/tests/llm/fixtures/test_ask_holmes/123_new_relic_checkout_errors_tracing/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/123_new_relic_checkout_errors_tracing/test_case.yaml
@@ -10,6 +10,7 @@ tags:
   - hard
   - chain-of-causation
   - traces
+  - newrelic
 
 setup_timeout: 600  # 10 minutes for pod scheduling
 

diff --git a/tests/llm/fixtures/test_ask_holmes/159_prometheus_high_cardinality_cpu/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/159_prometheus_high_cardinality_cpu/test_case.yaml
@@ -14,6 +14,8 @@ tags:
   - context_window
   - medium
   - prometheus
+  - embeds
+  - metrics
 
 # Custom timeout for setup (needs time for metrics + setup)
 setup_timeout: 480  # 8 minutes total (120s setup + 300s metrics + buffer)

diff --git a/tests/llm/fixtures/test_ask_holmes/160a_cpu_per_namespace_graph/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/160a_cpu_per_namespace_graph/test_case.yaml
@@ -9,6 +9,9 @@ expected_output:
 tags:
   - prometheus
   - kubernetes
+  - embeds
+  - metrics
+  - medium
 
 # Port forward configuration for Prometheus
 port_forwards:

diff --git a/...fixtures/test_ask_holmes/160b_cpu_per_namespace_graph_with_prom_truncation/test_case.yaml b/...fixtures/test_ask_holmes/160b_cpu_per_namespace_graph_with_prom_truncation/test_case.yaml
@@ -12,6 +12,9 @@ expected_output:
 tags:
   - prometheus
   - kubernetes
+  - embeds
+  - metrics
+  - medium
 
 # Port forward configuration for Prometheus
 port_forwards:

diff --git a/...xtures/test_ask_holmes/160c_cpu_per_namespace_graph_with_global_truncation/test_case.yaml b/...xtures/test_ask_holmes/160c_cpu_per_namespace_graph_with_global_truncation/test_case.yaml
@@ -14,6 +14,9 @@ expected_output:
 tags:
   - prometheus
   - kubernetes
+  - embeds
+  - metrics
+  - medium
 
 # Port forward configuration for Prometheus
 port_forwards:

diff --git a/tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_cluster_memory/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_cluster_memory/test_case.yaml
@@ -1,10 +1,14 @@
 user_prompt: "Show me a graph (with prometheus) of the memory usage for the kubernetes cluster over the last 30 minutes"
+
 expected_output:
   - 'The answer should include something similar to << { "type": "promql", "tool_name": "execute_prometheus_range_query", "random_key": "AtEJ" } >>. Both tool_name and random_key MUST absolutely be present, but tool_name must be execute_prometheus_range_query while random_key can have any value.'
+
 tags:
   - prometheus
-evaluation:
-  correctness: 1
+  - embeds
+  - metrics
+  - medium
+
 port_forwards:
   - namespace: default
     service: robusta-kube-prometheus-st-prometheus

diff --git a/tests/llm/fixtures/test_ask_holmes/32_basic_promql_graph_pod_cpu/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/32_basic_promql_graph_pod_cpu/test_case.yaml
@@ -1,8 +1,14 @@
 user_prompt: "Show me a graph of the CPU usage for the pod robusta-holmes over the last 30 minutes"
+
 expected_output:
   - 'The answer should include something similar to << { "type": "promql", "tool_name": "execute_prometheus_range_query", "random_key": "HBGf" } >>. Both tool_name and random_key MUST absolutely be present. random_key can have any value, tool_name must be execute_prometheus_range_query.'
+
 tags:
   - prometheus
+  - embeds
+  - metrics
+  - medium
+
 port_forwards:
   - namespace: default
     service: robusta-kube-prometheus-st-prometheus

diff --git a/tests/llm/fixtures/test_ask_holmes/33_cpu_metrics_discovery/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/33_cpu_metrics_discovery/test_case.yaml
@@ -7,6 +7,7 @@ expected_output:
 tags:
   - prometheus
   - easy
+  - metrics
   - no-cicd
 
 include_tool_calls: true

diff --git a/tests/llm/fixtures/test_ask_holmes/34_memory_graph/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/34_memory_graph/test_case.yaml
@@ -1,10 +1,13 @@
 user_prompt: "Give me a graph of `container_memory_working_set_bytes`"
 expected_output:
   - 'The answer should include something similar to << { "type": "promql", "tool_name": "execute_prometheus_range_query", "random_key": "<alphanumeric_characters>" } >>. Both tool_name and random_key MUST absolutely be present and the same as mentioned here.'
+
 tags:
   - prometheus
-evaluation:
-  correctness: 1
+  - embeds
+  - metrics
+  - medium
+
 port_forwards:
   - namespace: default
-  - namespace: default
+  - namespace: app-34_memory_graph
+     service: robusta-kube-prometheus-st-prometheus
+     local_port: 9090
+     remote_port: 9090
-  - namespace: default
+  - namespace: app-34_memory_graph
+     service: robusta-kube-prometheus-st-prometheus
+     local_port: 9090
+     remote_port: 9090
     service: robusta-kube-prometheus-st-prometheus
-Original file line number
+Diff line change
@@ Expand Up / @@ -11,6 +11,7 @@ tags: @@
       - logs
       - kubernetes
       - medium
+      - loki
     setup_timeout: 600  # Increased to handle Datadog init containers
@@ Expand Down @@