test(e2e): expand classification coverage and fix cache test issues (#585)

yossiovadia · web-flow · commit 7532320f33d1 · 2025-11-03T13:05:58.000-05:00
- Add 6 new category test cases to classification test (biology, chemistry,
  physics, law, economics, psychology) expanding coverage from 4 to 10 categories
- Fix cache test model names from gemma3:27b to Model-A to match router config
- Fix cache metric names from llm_router_cache_* to llm_cache_*
- Add missing unittest import to cache test
- Increase cache test timeouts from 10s to 120s for CPU inference
- Apply black formatting to cache test

These improvements enable comprehensive testing of classification accuracy
across all major category types and resolve cache test compatibility issues.

Signed-off-by: Yossi Ovadia &lt;yovadia@redhat.com&gt;
diff --git a/e2e-tests/03-classification-api-test.py b/e2e-tests/03-classification-api-test.py
@@ -42,6 +42,36 @@
         "text": "Describe the main causes of World War I",
         "expected_category": "history",
     },
+    {
+        "name": "Biology Query",
+        "text": "Explain the process of photosynthesis in plants",
+        "expected_category": "biology",
+    },
+    {
+        "name": "Chemistry Query",
+        "text": "What is the molecular formula for glucose and how does it react with oxygen?",
+        "expected_category": "chemistry",
+    },
+    {
+        "name": "Physics Query",
+        "text": "Calculate the force required to accelerate a 10kg object at 5m/s²",
+        "expected_category": "physics",
+    },
+    {
+        "name": "Law Query",
+        "text": "What are the key differences between civil law and criminal law?",
+        "expected_category": "law",
+    },
+    {
+        "name": "Economics Query",
+        "text": "Explain the concept of supply and demand in market economics",
+        "expected_category": "economics",
+    },
+    {
+        "name": "Psychology Query",
+        "text": "Describe the stages of cognitive development according to Piaget",
+        "expected_category": "psychology",
+    },
 ]
 
 
diff --git a/e2e-tests/04-cache-test.py b/e2e-tests/04-cache-test.py
@@ -10,6 +10,7 @@
 import os
 import sys
 import time
+import unittest
 import uuid
 
 import requests
@@ -45,7 +46,7 @@ def setUp(self):
         # Check Envoy
         try:
             payload = {
-                "model": "gemma3:27b",
+                "model": "Model-A",
                 "messages": [{"role": "user", "content": "test"}],
             }
 
@@ -89,7 +90,7 @@ def setUp(self):
         # Check if cache is enabled in metrics
         response = requests.get(ROUTER_METRICS_URL)
         metrics_text = response.text
-        if "llm_router_cache" not in metrics_text:
+        if "llm_cache" not in metrics_text:
             self.skipTest("Cache metrics not found. Semantic cache may be disabled.")
 
     def test_cache_hit_with_identical_query(self):
@@ -105,13 +106,11 @@ def test_cache_hit_with_identical_query(self):
         # Get baseline cache metrics
         response = requests.get(ROUTER_METRICS_URL)
         baseline_metrics = response.text
-        baseline_hits = (
-            extract_metric(baseline_metrics, "llm_router_cache_hits_total") or 0
-        )
+        baseline_hits = extract_metric(baseline_metrics, "llm_cache_hits_total") or 0
 
         self.print_request_info(
             payload={
-                "model": "gemma3:27b",
+                "model": "Model-A",
                 "messages": [
                     {"role": "system", "content": "You are a helpful assistant."},
                     {"role": "user", "content": query},
@@ -123,7 +122,7 @@ def test_cache_hit_with_identical_query(self):
 
         # First request should be a cache miss
         payload = {
-            "model": "gemma3:27b",
+            "model": "Model-A",
             "messages": [
                 {"role": "system", "content": "You are a helpful assistant."},
                 {"role": "user", "content": query},
@@ -136,7 +135,7 @@ def test_cache_hit_with_identical_query(self):
         # First request
         self.print_subtest_header("First Request (Expected Cache Miss)")
         response1 = requests.post(
-            f"{ENVOY_URL}{OPENAI_ENDPOINT}", headers=headers, json=payload, timeout=10
+            f"{ENVOY_URL}{OPENAI_ENDPOINT}", headers=headers, json=payload, timeout=120
         )
 
         response1_json = response1.json()
@@ -157,7 +156,7 @@ def test_cache_hit_with_identical_query(self):
         # Second identical request
         self.print_subtest_header("Second Request (Expected Cache Hit)")
         response2 = requests.post(
-            f"{ENVOY_URL}{OPENAI_ENDPOINT}", headers=headers, json=payload, timeout=10
+            f"{ENVOY_URL}{OPENAI_ENDPOINT}", headers=headers, json=payload, timeout=120
         )
 
         response2_json = response2.json()
@@ -178,9 +177,7 @@ def test_cache_hit_with_identical_query(self):
         # Check if cache hits increased
         response = requests.get(ROUTER_METRICS_URL)
         updated_metrics = response.text
-        updated_hits = (
-            extract_metric(updated_metrics, "llm_router_cache_hits_total") or 0
-        )
+        updated_hits = extract_metric(updated_metrics, "llm_cache_hits_total") or 0
 
         passed = (model1 == model2) and (updated_hits > baseline_hits)
         self.print_test_result(
@@ -215,14 +212,12 @@ def test_cache_hit_with_similar_query(self):
         # Get baseline cache metrics
         response = requests.get(ROUTER_METRICS_URL)
         baseline_metrics = response.text
-        baseline_hits = (
-            extract_metric(baseline_metrics, "llm_router_cache_hits_total") or 0
-        )
+        baseline_hits = extract_metric(baseline_metrics, "llm_cache_hits_total") or 0
 
         # First request with original query
         self.print_subtest_header("Original Query")
         payload1 = {
-            "model": "gemma3:27b",
+            "model": "Model-A",
             "messages": [
                 {"role": "system", "content": "You are a helpful assistant."},
                 {"role": "user", "content": original_query},
@@ -237,7 +232,7 @@ def test_cache_hit_with_similar_query(self):
         headers = {"Content-Type": "application/json", "X-Session-ID": session_id}
 
         response1 = requests.post(
-            f"{ENVOY_URL}{OPENAI_ENDPOINT}", headers=headers, json=payload1, timeout=10
+            f"{ENVOY_URL}{OPENAI_ENDPOINT}", headers=headers, json=payload1, timeout=120
         )
 
         response1_json = response1.json()
@@ -259,7 +254,7 @@ def test_cache_hit_with_similar_query(self):
         # Second request with similar query
         self.print_subtest_header("Similar Query")
         payload2 = {
-            "model": "gemma3:27b",
+            "model": "Model-A",
             "messages": [
                 {"role": "system", "content": "You are a helpful assistant."},
                 {"role": "user", "content": similar_query},
@@ -273,7 +268,7 @@ def test_cache_hit_with_similar_query(self):
         )
 
         response2 = requests.post(
-            f"{ENVOY_URL}{OPENAI_ENDPOINT}", headers=headers, json=payload2, timeout=10
+            f"{ENVOY_URL}{OPENAI_ENDPOINT}", headers=headers, json=payload2, timeout=120
         )
 
         response2_json = response2.json()
@@ -295,9 +290,7 @@ def test_cache_hit_with_similar_query(self):
         # Check cache metrics
         response = requests.get(ROUTER_METRICS_URL)
         updated_metrics = response.text
-        updated_hits = (
-            extract_metric(updated_metrics, "llm_router_cache_hits_total") or 0
-        )
+        updated_hits = extract_metric(updated_metrics, "llm_cache_hits_total") or 0
 
         passed = (model1 == model2) and (updated_hits > baseline_hits)
         self.print_test_result(
@@ -328,10 +321,10 @@ def test_cache_metrics(self):
 
         # Look for specific cache metrics
         cache_metrics = [
-            "llm_router_cache_hits_total",
-            "llm_router_cache_misses_total",
-            "llm_router_cache_size",
-            "llm_router_cache_max_size",
+            "llm_cache_hits_total",
+            "llm_cache_misses_total",
+            "llm_cache_size",
+            "llm_cache_max_size",
         ]
 
         metrics_found = {}