[llm-d] Keep working (#912)

kpouget · web-flow · commit 559da40e7913 · 2026-03-26T10:48:23.000+01:00
&lt;!-- This is an auto-generated comment: release notes by coderabbit.ai
--&gt;

## Summary by CodeRabbit

* **Chores**
* Removed InferenceService resource capture from the state capture
process. InferenceService resources will no longer be included in state
snapshots.

&lt;!-- end of auto-generated comment: release notes by coderabbit.ai --&gt;
diff --git a/projects/llm-d/toolbox/llmd_capture_isvc_state/tasks/main.yml b/projects/llm-d/toolbox/llmd_capture_isvc_state/tasks/main.yml
@@ -40,13 +40,6 @@
        -oyaml > "{{ artifact_extra_logs_dir }}/artifacts/llminferenceservice.pods.yaml"
   ignore_errors: true
 
-- name: Capture regular InferenceServices that may be created
-  shell:
-    oc get inferenceservice \
-       -n "{{ target_namespace }}" \
-       -oyaml > "{{ artifact_extra_logs_dir }}/artifacts/inferenceservices.yaml"
-  ignore_errors: true
-
 - name: Capture deployments related to the LLMInferenceService
   shell:
     oc get deployments \
diff --git a/projects/llm-d/visualizations/llmd_inference/plotting/error_report.py b/projects/llm-d/visualizations/llmd_inference/plotting/error_report.py
@@ -103,6 +103,35 @@ def do_plot(self, *args):
             if hasattr(results, 'test_name'):
                 header.append(html.P(f"Test name: {results.test_name}"))
 
+            # Show test entry labels
+            labels_list = []
+
+            for key, value in entry.settings.__dict__.items():
+                labels_list.append(html.Span(f"{key}={value}", style={
+                    "background-color": "#f0f0f0",
+                    "padding": "2px 6px",
+                    "margin": "2px",
+                    "border-radius": "3px",
+                    "font-family": "monospace",
+                    "font-size": "0.9em"
+                }))
+                labels_list.append(" ")
+
+            header.append(html.P([
+                "Settings: ",
+                html.Span(labels_list)
+            ]))
+
+            header.append(html.P([
+                "Directory: ",
+                html.Code(str(entry.location).strip("./"), style={
+                    "background-color": "#f8f8f8",
+                    "padding": "2px 4px",
+                    "border-radius": "3px",
+                    "font-size": "0.9em"
+                })
+            ]))
+
             if hasattr(results, 'test_failure_reason') and results.test_failure_reason:
                 header.append(html.P([
                     "Failure reason: ",
diff --git a/projects/llm-d/visualizations/llmd_inference/store/parsers.py b/projects/llm-d/visualizations/llmd_inference/store/parsers.py
@@ -343,9 +343,9 @@ def get_metric_value(metric_name, stat_type='median', default=0.0):
                 tpot_p95 = get_metric_value('time_per_output_token_ms', 'p95') / 1000.0
 
                 # Extract throughput metrics
-                request_rate = get_metric_value('requests_per_second', 'median')
-                input_tokens_per_second = get_metric_value('input_tokens_per_second', 'median')
-                output_tokens_per_second = get_metric_value('output_tokens_per_second', 'median')
+                request_rate = get_metric_value('requests_per_second', 'mean')
+                input_tokens_per_second = get_metric_value('input_tokens_per_second', 'mean')
+                output_tokens_per_second = get_metric_value('output_tokens_per_second', 'mean')
                 total_tokens_per_second = input_tokens_per_second + output_tokens_per_second
 
                 # Calculate requests completed and tokens per request