@@ -56,6 +56,14 @@ func sanitizeK8sName(name string) string {
5656
5757var lowLoad = numPrompts <= 2000 && requestRate <= 8
5858
59+ // truncateString returns the first n characters of s, appending "..." if truncated.
60+ func truncateString (s string , n int ) string {
61+ if len (s ) <= n {
62+ return s
63+ }
64+ return s [:n ] + "..."
65+ }
66+
5967// Load generation configuration constants
6068// These values were tuned empirically to achieve ~2-3 replica scale-up without excessive scaling.
6169// Original values (baseLoadWorkers=10, batchSize=50, batchSleepDuration=0.1) caused cascade
@@ -156,7 +164,8 @@ var _ = Describe("ShareGPT Scale-Up Test", Ordered, func() {
156164 vaName string
157165 scaledReplicas int32
158166 scaledOptimized int32
159- scaledLoadWorkers int // Load workers scaled to initial replicas
167+ scaledLoadWorkers int // Load workers scaled to initial replicas
168+ hpaMetricSelector string // Label selector matching the HPA's external metric query
160169 jobCompletionTimeout = 10 * time .Minute
161170 )
162171
@@ -232,6 +241,17 @@ var _ = Describe("ShareGPT Scale-Up Test", Ordered, func() {
232241 Expect (hpa .Spec .Metrics [0 ].Type ).To (Equal (autoscalingv2 .ExternalMetricSourceType ), "HPA should use external metrics" )
233242 Expect (hpa .Spec .Metrics [0 ].External .Metric .Name ).To (Equal (constants .WVADesiredReplicas ), "HPA should use wva_desired_replicas metric" )
234243
244+ // Extract the HPA's metric label selector for diagnostic external metrics queries
245+ // This allows us to query the external metrics API with the exact same labels the HPA uses
246+ if hpa .Spec .Metrics [0 ].External .Metric .Selector != nil {
247+ var selectorParts []string
248+ for k , v := range hpa .Spec .Metrics [0 ].External .Metric .Selector .MatchLabels {
249+ selectorParts = append (selectorParts , fmt .Sprintf ("%s=%s" , k , v ))
250+ }
251+ hpaMetricSelector = strings .Join (selectorParts , "," )
252+ _ , _ = fmt .Fprintf (GinkgoWriter , "HPA metric selector: %s\n " , hpaMetricSelector )
253+ }
254+
235255 By ("verifying gateway service exists for load routing" )
236256 // Traffic goes through the Istio gateway to be properly routed via InferencePool/EPP
237257 // The gateway service is created by the llm-d-infra chart
@@ -309,15 +329,23 @@ var _ = Describe("ShareGPT Scale-Up Test", Ordered, func() {
309329 })
310330
311331 It ("should verify external metrics API is accessible" , func () {
312- By ("querying external metrics API for wva_desired_replicas" )
332+ By ("querying external metrics API for wva_desired_replicas with exact HPA label selectors " )
313333 Eventually (func (g Gomega ) {
314- result , err := k8sClient .RESTClient ().
334+ // Query with the exact label selectors the HPA uses (including controller_instance
335+ // if set). This catches label propagation issues that a bare query would miss.
336+ req := k8sClient .RESTClient ().
315337 Get ().
316- AbsPath ("/apis/external.metrics.k8s.io/v1beta1/namespaces/" + model .namespace + "/" + constants .WVADesiredReplicas ).
317- DoRaw (ctx )
338+ AbsPath ("/apis/external.metrics.k8s.io/v1beta1/namespaces/" + model .namespace + "/" + constants .WVADesiredReplicas )
339+ if hpaMetricSelector != "" {
340+ req = req .Param ("labelSelector" , hpaMetricSelector )
341+ }
342+ result , err := req .DoRaw (ctx )
318343 g .Expect (err ).NotTo (HaveOccurred (), "Should be able to query external metrics API" )
319- g .Expect (string (result )).To (ContainSubstring (constants .WVADesiredReplicas ), "Metric should be available" )
320- g .Expect (string (result )).To (ContainSubstring (vaName ), "Metric should be for the correct variant" )
344+ resultStr := string (result )
345+ g .Expect (resultStr ).To (ContainSubstring (constants .WVADesiredReplicas ), "Metric should be available" )
346+ g .Expect (resultStr ).To (ContainSubstring (vaName ), "Metric should be for the correct variant" )
347+ _ , _ = fmt .Fprintf (GinkgoWriter , "External metrics API response (selector: %s): %s\n " ,
348+ hpaMetricSelector , truncateString (resultStr , 500 ))
321349 }, 5 * time .Minute , 5 * time .Second ).Should (Succeed ())
322350 })
323351
@@ -488,12 +516,33 @@ exit 1`,
488516 _ , _ = fmt .Fprintf (GinkgoWriter , "HPA desiredReplicas: %d, currentReplicas: %d\n " ,
489517 hpa .Status .DesiredReplicas , hpa .Status .CurrentReplicas )
490518
519+ // Log HPA conditions for diagnostic insight (e.g., ScalingActive, AbleToScale)
520+ for _ , cond := range hpa .Status .Conditions {
521+ if cond .Status != "True" || cond .Type == autoscalingv2 .ScalingActive {
522+ _ , _ = fmt .Fprintf (GinkgoWriter , " HPA condition %s=%s: %s\n " ,
523+ cond .Type , cond .Status , cond .Message )
524+ }
525+ }
526+
527+ // Diagnostic: query external metrics API with exact HPA labels to see what the adapter returns
528+ if hpaMetricSelector != "" {
529+ if result , qErr := k8sClient .RESTClient ().
530+ Get ().
531+ AbsPath ("/apis/external.metrics.k8s.io/v1beta1/namespaces/" + model .namespace + "/" + constants .WVADesiredReplicas ).
532+ Param ("labelSelector" , hpaMetricSelector ).
533+ DoRaw (ctx ); qErr == nil {
534+ _ , _ = fmt .Fprintf (GinkgoWriter , " External metric (HPA labels): %s\n " , truncateString (string (result ), 300 ))
535+ } else {
536+ _ , _ = fmt .Fprintf (GinkgoWriter , " External metric query error: %v\n " , qErr )
537+ }
538+ }
539+
491540 if ! lowLoad {
492541 // HPA should also desire more replicas than initial
493542 g .Expect (hpa .Status .DesiredReplicas ).To (BeNumerically (">" , initialOptimized ),
494543 fmt .Sprintf ("HPA should desire more replicas than initial (desired: %d, initial: %d)" , hpa .Status .DesiredReplicas , initialOptimized ))
495544 }
496- }, 5 * time .Minute , 10 * time .Second ).Should (Succeed ())
545+ }, 8 * time .Minute , 10 * time .Second ).Should (Succeed ())
497546
498547 _ , _ = fmt .Fprintf (GinkgoWriter , "WVA detected load and recommended %d replicas (up from %d)\n " , scaledOptimized , initialOptimized )
499548 })
0 commit comments