Skip to content

Commit 97bd05a

Browse files
clubandersonclaude
andcommitted
Fix VA stabilization to wait for exact minReplicas before load test
- Move HPA retrieval before VA stabilization check to know minReplicas - Change stabilization check from >= 1 to == hpaMinReplicas - Increase stabilization timeout to 5 minutes with 10s intervals - This ensures initial state is captured at baseline, not at residual scale - Disable cleanup for testing 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 52e3813 commit 97bd05a

File tree

2 files changed

+37
-38
lines changed

2 files changed

+37
-38
lines changed

.github/workflows/ci-e2e-openshift.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ jobs:
131131
NUM_PROMPTS: ${{ github.event.inputs.num_prompts || '3000' }}
132132
MAX_NUM_SEQS: ${{ github.event.inputs.max_num_seqs || '1' }}
133133
HPA_STABILIZATION_SECONDS: ${{ github.event.inputs.hpa_stabilization_seconds || '30' }}
134-
SKIP_CLEANUP: ${{ github.event.inputs.skip_cleanup || 'false' }}
134+
SKIP_CLEANUP: ${{ github.event.inputs.skip_cleanup || 'true' }}
135135
# PR-specific namespaces for isolation between concurrent PR tests
136136
# Primary llm-d namespace (Model A1 + A2)
137137
LLMD_NAMESPACE: llm-d-inference-scheduler-pr-${{ github.event.pull_request.number || github.run_id }}

test/e2e-openshift/sharegpt_scaleup_test.go

Lines changed: 36 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,38 @@ var _ = Describe("ShareGPT Scale-Up Test", Ordered, func() {
145145
initialReplicas = deploy.Status.ReadyReplicas
146146
_, _ = fmt.Fprintf(GinkgoWriter, "Initial ready replicas: %d\n", initialReplicas)
147147

148+
// Get HPA first to know minReplicas for VA stabilization check
149+
By("verifying HPA exists and getting minReplicas")
150+
hpaList, err := k8sClient.AutoscalingV2().HorizontalPodAutoscalers(model.namespace).List(ctx, metav1.ListOptions{
151+
LabelSelector: "app.kubernetes.io/name=workload-variant-autoscaler",
152+
})
153+
Expect(err).NotTo(HaveOccurred(), "Should be able to list HPAs")
154+
Expect(hpaList.Items).NotTo(BeEmpty(), "At least one WVA HPA should exist")
155+
156+
// Select the HPA that targets the expected deployment
157+
var hpa *autoscalingv2.HorizontalPodAutoscaler
158+
for i := range hpaList.Items {
159+
if hpaList.Items[i].Spec.ScaleTargetRef.Name == model.deployment {
160+
hpa = &hpaList.Items[i]
161+
break
162+
}
163+
}
164+
Expect(hpa).NotTo(BeNil(), "An HPA targeting deployment %s should exist", model.deployment)
165+
hpaName = hpa.Name
166+
hpaMinReplicas = *hpa.Spec.MinReplicas
167+
_, _ = fmt.Fprintf(GinkgoWriter, "Found HPA: %s (targets %s, minReplicas=%d)\n", hpaName, model.deployment, hpaMinReplicas)
168+
169+
Expect(hpa.Spec.Metrics).To(HaveLen(1), "HPA should have one metric")
170+
Expect(hpa.Spec.Metrics[0].Type).To(Equal(autoscalingv2.ExternalMetricSourceType), "HPA should use external metrics")
171+
Expect(hpa.Spec.Metrics[0].External.Metric.Name).To(Equal(constants.InfernoDesiredReplicas), "HPA should use inferno_desired_replicas metric")
172+
173+
By("verifying gateway service exists for load routing")
174+
// Traffic goes through the Istio gateway to be properly routed via InferencePool/EPP
175+
// The gateway service is created by the llm-d-infra chart
176+
gatewaySvc, err := k8sClient.CoreV1().Services(model.namespace).Get(ctx, model.gatewayService, metav1.GetOptions{})
177+
Expect(err).NotTo(HaveOccurred(), "Gateway service %s should exist in namespace %s", model.gatewayService, model.namespace)
178+
_, _ = fmt.Fprintf(GinkgoWriter, "Found gateway service: %s (ClusterIP: %s)\n", gatewaySvc.Name, gatewaySvc.Spec.ClusterIP)
179+
148180
By("recording initial VariantAutoscaling state")
149181
vaList := &v1alpha1.VariantAutoscalingList{}
150182
err = crClient.List(ctx, vaList, client.InNamespace(model.namespace), client.MatchingLabels{
@@ -166,7 +198,7 @@ var _ = Describe("ShareGPT Scale-Up Test", Ordered, func() {
166198
_, _ = fmt.Fprintf(GinkgoWriter, "Found VariantAutoscaling: %s (targets %s)\n", vaName, model.deployment)
167199

168200
// Wait for VA to stabilize at minReplicas before recording initial state
169-
// This ensures we're measuring scale-up from load, not initial startup
201+
// This ensures we're measuring scale-up from load, not residual scale from prior activity
170202
By("waiting for VA to stabilize at minReplicas")
171203
Eventually(func(g Gomega) {
172204
currentVA := &v1alpha1.VariantAutoscaling{}
@@ -176,9 +208,9 @@ var _ = Describe("ShareGPT Scale-Up Test", Ordered, func() {
176208
}, currentVA)
177209
g.Expect(err).NotTo(HaveOccurred())
178210
optimized := int32(currentVA.Status.DesiredOptimizedAlloc.NumReplicas)
179-
_, _ = fmt.Fprintf(GinkgoWriter, "Waiting for VA to stabilize: optimized=%d, minReplicas=%d\n", optimized, hpaMinReplicas)
180-
g.Expect(optimized).To(BeNumerically(">=", 1), "VA should have at least 1 optimized replica")
181-
}, 2*time.Minute, 5*time.Second).Should(Succeed())
211+
_, _ = fmt.Fprintf(GinkgoWriter, "Waiting for VA to stabilize: optimized=%d, target minReplicas=%d\n", optimized, hpaMinReplicas)
212+
g.Expect(optimized).To(Equal(hpaMinReplicas), "VA should stabilize at minReplicas before load test")
213+
}, 5*time.Minute, 10*time.Second).Should(Succeed())
182214

183215
// Re-read VA to get stabilized state
184216
err = crClient.Get(ctx, client.ObjectKey{
@@ -188,39 +220,6 @@ var _ = Describe("ShareGPT Scale-Up Test", Ordered, func() {
188220
Expect(err).NotTo(HaveOccurred())
189221
initialOptimized = int32(va.Status.DesiredOptimizedAlloc.NumReplicas)
190222
_, _ = fmt.Fprintf(GinkgoWriter, "Initial optimized replicas (after stabilization): %d\n", initialOptimized)
191-
192-
By("verifying HPA exists and is configured correctly")
193-
hpaList, err := k8sClient.AutoscalingV2().HorizontalPodAutoscalers(model.namespace).List(ctx, metav1.ListOptions{
194-
LabelSelector: "app.kubernetes.io/name=workload-variant-autoscaler",
195-
})
196-
Expect(err).NotTo(HaveOccurred(), "Should be able to list HPAs")
197-
Expect(hpaList.Items).NotTo(BeEmpty(), "At least one WVA HPA should exist")
198-
199-
// Select the HPA that targets the expected deployment
200-
var hpa *autoscalingv2.HorizontalPodAutoscaler
201-
for i := range hpaList.Items {
202-
if hpaList.Items[i].Spec.ScaleTargetRef.Name == model.deployment {
203-
hpa = &hpaList.Items[i]
204-
break
205-
}
206-
}
207-
Expect(hpa).NotTo(BeNil(), "An HPA targeting deployment %s should exist", model.deployment)
208-
hpaName = hpa.Name
209-
_, _ = fmt.Fprintf(GinkgoWriter, "Found HPA: %s (targets %s)\n", hpaName, model.deployment)
210-
211-
By("verifying gateway service exists for load routing")
212-
// Traffic goes through the Istio gateway to be properly routed via InferencePool/EPP
213-
// The gateway service is created by the llm-d-infra chart
214-
gatewaySvc, err := k8sClient.CoreV1().Services(model.namespace).Get(ctx, model.gatewayService, metav1.GetOptions{})
215-
Expect(err).NotTo(HaveOccurred(), "Gateway service %s should exist in namespace %s", model.gatewayService, model.namespace)
216-
_, _ = fmt.Fprintf(GinkgoWriter, "Found gateway service: %s (ClusterIP: %s)\n", gatewaySvc.Name, gatewaySvc.Spec.ClusterIP)
217-
218-
Expect(hpa.Spec.Metrics).To(HaveLen(1), "HPA should have one metric")
219-
Expect(hpa.Spec.Metrics[0].Type).To(Equal(autoscalingv2.ExternalMetricSourceType), "HPA should use external metrics")
220-
Expect(hpa.Spec.Metrics[0].External.Metric.Name).To(Equal(constants.InfernoDesiredReplicas), "HPA should use inferno_desired_replicas metric")
221-
222-
hpaMinReplicas = *hpa.Spec.MinReplicas
223-
_, _ = fmt.Fprintf(GinkgoWriter, "HPA minReplicas: %d\n", hpaMinReplicas)
224223
})
225224

226225
It("should verify external metrics API is accessible", func() {

0 commit comments

Comments
 (0)