2828# Expected metric families that should be present
2929EXPECTED_METRIC_FAMILIES = [
3030 # Core routing metrics
31- "llm_router_requests_total " ,
32- "llm_router_routing_decision " ,
33- "llm_router_model_selection_count " ,
34-
31+ "llm_model_requests_total " ,
32+ "llm_model_routing_latency_seconds " ,
33+ "llm_routing_reason_codes_total " ,
34+
3535 # Classification metrics
36- "llm_router_classification_duration_seconds" ,
37- "llm_router_category_classification_total" ,
38-
39- # Security metrics
40- "llm_router_jailbreak" ,
41- "llm_router_pii" ,
42-
36+ "llm_classifier_latency_seconds" ,
37+
4338 # Cache metrics (if enabled)
44- "llm_router_cache" ,
45-
39+ "llm_cache_hits_total" ,
40+ "llm_cache_misses_total" ,
41+ "llm_cache_operations_total" ,
42+
4643 # Performance metrics
47- "llm_router_request_duration_seconds " ,
48- "llm_router_response_size_bytes " ,
44+ "llm_model_completion_latency_seconds " ,
45+ "llm_model_tokens_total " ,
4946
5047 # System metrics
5148 "go_" , # Go runtime metrics
@@ -118,7 +115,7 @@ def setUp(self):
118115 f"{ ENVOY_URL } { OPENAI_ENDPOINT } " ,
119116 headers = {"Content-Type" : "application/json" },
120117 json = payload ,
121- timeout = 60 ,
118+ timeout = ( 10 , 60 ), # (connect timeout, read timeout)
122119 )
123120
124121 if response .status_code >= 500 :
@@ -263,7 +260,7 @@ def test_metrics_increase_with_requests(self):
263260 baseline_response = requests .get (ROUTER_METRICS_URL , timeout = 5 )
264261 baseline_metrics = baseline_response .text
265262
266- baseline_requests = extract_metric_value (baseline_metrics , "llm_router_requests_total " ) or 0
263+ baseline_requests = extract_metric_value (baseline_metrics , "llm_model_requests_total " ) or 0
267264
268265 self .print_subtest_header ("Baseline Metrics" )
269266 print (f"Baseline requests total: { baseline_requests } " )
@@ -291,7 +288,7 @@ def test_metrics_increase_with_requests(self):
291288 f"{ ENVOY_URL } { OPENAI_ENDPOINT } " ,
292289 headers = headers ,
293290 json = payload ,
294- timeout = 10 ,
291+ timeout = ( 10 , 60 ), # (connect timeout, read timeout)
295292 )
296293
297294 self .print_response_info (
@@ -310,7 +307,7 @@ def test_metrics_increase_with_requests(self):
310307 updated_response = requests .get (ROUTER_METRICS_URL , timeout = 5 )
311308 updated_metrics = updated_response .text
312309
313- updated_requests = extract_metric_value (updated_metrics , "llm_router_requests_total " ) or 0
310+ updated_requests = extract_metric_value (updated_metrics , "llm_model_requests_total " ) or 0
314311
315312 print (f"\n Updated requests total: { updated_requests } " )
316313 requests_increase = updated_requests - baseline_requests
@@ -337,9 +334,9 @@ def test_performance_metrics_present(self):
337334 metrics_text = response .text
338335
339336 performance_metrics = [
340- "llm_router_request_duration_seconds " ,
341- "llm_router_classification_duration_seconds " ,
342- "llm_router_routing_latency_ms " ,
337+ "llm_model_completion_latency_seconds " ,
338+ "llm_classifier_latency_seconds " ,
339+ "llm_model_routing_latency_seconds " ,
343340 ]
344341
345342 found_metrics = {}
0 commit comments