@@ -145,6 +145,38 @@ var _ = Describe("ShareGPT Scale-Up Test", Ordered, func() {
145145 initialReplicas = deploy .Status .ReadyReplicas
146146 _ , _ = fmt .Fprintf (GinkgoWriter , "Initial ready replicas: %d\n " , initialReplicas )
147147
148+ // Get HPA first to know minReplicas for VA stabilization check
149+ By ("verifying HPA exists and getting minReplicas" )
150+ hpaList , err := k8sClient .AutoscalingV2 ().HorizontalPodAutoscalers (model .namespace ).List (ctx , metav1.ListOptions {
151+ LabelSelector : "app.kubernetes.io/name=workload-variant-autoscaler" ,
152+ })
153+ Expect (err ).NotTo (HaveOccurred (), "Should be able to list HPAs" )
154+ Expect (hpaList .Items ).NotTo (BeEmpty (), "At least one WVA HPA should exist" )
155+
156+ // Select the HPA that targets the expected deployment
157+ var hpa * autoscalingv2.HorizontalPodAutoscaler
158+ for i := range hpaList .Items {
159+ if hpaList .Items [i ].Spec .ScaleTargetRef .Name == model .deployment {
160+ hpa = & hpaList .Items [i ]
161+ break
162+ }
163+ }
164+ Expect (hpa ).NotTo (BeNil (), "An HPA targeting deployment %s should exist" , model .deployment )
165+ hpaName = hpa .Name
166+ hpaMinReplicas = * hpa .Spec .MinReplicas
167+ _ , _ = fmt .Fprintf (GinkgoWriter , "Found HPA: %s (targets %s, minReplicas=%d)\n " , hpaName , model .deployment , hpaMinReplicas )
168+
169+ Expect (hpa .Spec .Metrics ).To (HaveLen (1 ), "HPA should have one metric" )
170+ Expect (hpa .Spec .Metrics [0 ].Type ).To (Equal (autoscalingv2 .ExternalMetricSourceType ), "HPA should use external metrics" )
171+ Expect (hpa .Spec .Metrics [0 ].External .Metric .Name ).To (Equal (constants .InfernoDesiredReplicas ), "HPA should use inferno_desired_replicas metric" )
172+
173+ By ("verifying gateway service exists for load routing" )
174+ // Traffic goes through the Istio gateway to be properly routed via InferencePool/EPP
175+ // The gateway service is created by the llm-d-infra chart
176+ gatewaySvc , err := k8sClient .CoreV1 ().Services (model .namespace ).Get (ctx , model .gatewayService , metav1.GetOptions {})
177+ Expect (err ).NotTo (HaveOccurred (), "Gateway service %s should exist in namespace %s" , model .gatewayService , model .namespace )
178+ _ , _ = fmt .Fprintf (GinkgoWriter , "Found gateway service: %s (ClusterIP: %s)\n " , gatewaySvc .Name , gatewaySvc .Spec .ClusterIP )
179+
148180 By ("recording initial VariantAutoscaling state" )
149181 vaList := & v1alpha1.VariantAutoscalingList {}
150182 err = crClient .List (ctx , vaList , client .InNamespace (model .namespace ), client.MatchingLabels {
@@ -166,7 +198,7 @@ var _ = Describe("ShareGPT Scale-Up Test", Ordered, func() {
166198 _ , _ = fmt .Fprintf (GinkgoWriter , "Found VariantAutoscaling: %s (targets %s)\n " , vaName , model .deployment )
167199
168200 // Wait for VA to stabilize at minReplicas before recording initial state
169- // This ensures we're measuring scale-up from load, not initial startup
201+ // This ensures we're measuring scale-up from load, not residual scale from prior activity
170202 By ("waiting for VA to stabilize at minReplicas" )
171203 Eventually (func (g Gomega ) {
172204 currentVA := & v1alpha1.VariantAutoscaling {}
@@ -176,9 +208,9 @@ var _ = Describe("ShareGPT Scale-Up Test", Ordered, func() {
176208 }, currentVA )
177209 g .Expect (err ).NotTo (HaveOccurred ())
178210 optimized := int32 (currentVA .Status .DesiredOptimizedAlloc .NumReplicas )
179- _ , _ = fmt .Fprintf (GinkgoWriter , "Waiting for VA to stabilize: optimized=%d, minReplicas=%d\n " , optimized , hpaMinReplicas )
180- g .Expect (optimized ).To (BeNumerically ( ">=" , 1 ), "VA should have at least 1 optimized replica " )
181- }, 2 * time .Minute , 5 * time .Second ).Should (Succeed ())
211+ _ , _ = fmt .Fprintf (GinkgoWriter , "Waiting for VA to stabilize: optimized=%d, target minReplicas=%d\n " , optimized , hpaMinReplicas )
212+ g .Expect (optimized ).To (Equal ( hpaMinReplicas ), "VA should stabilize at minReplicas before load test " )
213+ }, 5 * time .Minute , 10 * time .Second ).Should (Succeed ())
182214
183215 // Re-read VA to get stabilized state
184216 err = crClient .Get (ctx , client.ObjectKey {
@@ -188,39 +220,6 @@ var _ = Describe("ShareGPT Scale-Up Test", Ordered, func() {
188220 Expect (err ).NotTo (HaveOccurred ())
189221 initialOptimized = int32 (va .Status .DesiredOptimizedAlloc .NumReplicas )
190222 _ , _ = fmt .Fprintf (GinkgoWriter , "Initial optimized replicas (after stabilization): %d\n " , initialOptimized )
191-
192- By ("verifying HPA exists and is configured correctly" )
193- hpaList , err := k8sClient .AutoscalingV2 ().HorizontalPodAutoscalers (model .namespace ).List (ctx , metav1.ListOptions {
194- LabelSelector : "app.kubernetes.io/name=workload-variant-autoscaler" ,
195- })
196- Expect (err ).NotTo (HaveOccurred (), "Should be able to list HPAs" )
197- Expect (hpaList .Items ).NotTo (BeEmpty (), "At least one WVA HPA should exist" )
198-
199- // Select the HPA that targets the expected deployment
200- var hpa * autoscalingv2.HorizontalPodAutoscaler
201- for i := range hpaList .Items {
202- if hpaList .Items [i ].Spec .ScaleTargetRef .Name == model .deployment {
203- hpa = & hpaList .Items [i ]
204- break
205- }
206- }
207- Expect (hpa ).NotTo (BeNil (), "An HPA targeting deployment %s should exist" , model .deployment )
208- hpaName = hpa .Name
209- _ , _ = fmt .Fprintf (GinkgoWriter , "Found HPA: %s (targets %s)\n " , hpaName , model .deployment )
210-
211- By ("verifying gateway service exists for load routing" )
212- // Traffic goes through the Istio gateway to be properly routed via InferencePool/EPP
213- // The gateway service is created by the llm-d-infra chart
214- gatewaySvc , err := k8sClient .CoreV1 ().Services (model .namespace ).Get (ctx , model .gatewayService , metav1.GetOptions {})
215- Expect (err ).NotTo (HaveOccurred (), "Gateway service %s should exist in namespace %s" , model .gatewayService , model .namespace )
216- _ , _ = fmt .Fprintf (GinkgoWriter , "Found gateway service: %s (ClusterIP: %s)\n " , gatewaySvc .Name , gatewaySvc .Spec .ClusterIP )
217-
218- Expect (hpa .Spec .Metrics ).To (HaveLen (1 ), "HPA should have one metric" )
219- Expect (hpa .Spec .Metrics [0 ].Type ).To (Equal (autoscalingv2 .ExternalMetricSourceType ), "HPA should use external metrics" )
220- Expect (hpa .Spec .Metrics [0 ].External .Metric .Name ).To (Equal (constants .InfernoDesiredReplicas ), "HPA should use inferno_desired_replicas metric" )
221-
222- hpaMinReplicas = * hpa .Spec .MinReplicas
223- _ , _ = fmt .Fprintf (GinkgoWriter , "HPA minReplicas: %d\n " , hpaMinReplicas )
224223 })
225224
226225 It ("should verify external metrics API is accessible" , func () {
0 commit comments