Skip to content

Commit 1678834

Browse files
clubandersonclaude
andcommitted
🐛 Improve e2e scale-up test diagnostics for debugging failures
- Parse external metrics JSON to extract actual metric value instead of truncating at 300 chars (which hid the value behind labels) - Show deployment spec/total/ready/unavailable replica counts (not just ready) to distinguish "HPA never scaled" from "pod stuck Pending" - List individual pod names and phases (including ContainerCreating, Unschedulable reasons) during deployment scale-up monitoring Signed-off-by: Andy Anderson <andy@clubanderson.com> Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> Signed-off-by: Andrew Anderson <andy@clubanderson.com>
1 parent 6d0cad6 commit 1678834

File tree

1 file changed

+60
-5
lines changed

1 file changed

+60
-5
lines changed

test/e2e-openshift/sharegpt_scaleup_test.go

Lines changed: 60 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package e2eopenshift
1818

1919
import (
2020
"context"
21+
"encoding/json"
2122
"fmt"
2223
"math"
2324
"os"
@@ -344,8 +345,20 @@ var _ = Describe("ShareGPT Scale-Up Test", Ordered, func() {
344345
resultStr := string(result)
345346
g.Expect(resultStr).To(ContainSubstring(constants.WVADesiredReplicas), "Metric should be available")
346347
g.Expect(resultStr).To(ContainSubstring(vaName), "Metric should be for the correct variant")
347-
_, _ = fmt.Fprintf(GinkgoWriter, "External metrics API response (selector: %s): %s\n",
348-
hpaMetricSelector, truncateString(resultStr, 500))
348+
// Parse to extract the actual metric value for clear diagnostics
349+
var metricValueList struct {
350+
Items []struct {
351+
MetricName string `json:"metricName"`
352+
Value string `json:"value"`
353+
} `json:"items"`
354+
}
355+
if jErr := json.Unmarshal(result, &metricValueList); jErr == nil && len(metricValueList.Items) > 0 {
356+
_, _ = fmt.Fprintf(GinkgoWriter, "External metrics API: value=%s, items=%d (selector: %s)\n",
357+
metricValueList.Items[0].Value, len(metricValueList.Items), hpaMetricSelector)
358+
} else {
359+
_, _ = fmt.Fprintf(GinkgoWriter, "External metrics API response (selector: %s): %s\n",
360+
hpaMetricSelector, truncateString(resultStr, 500))
361+
}
349362
}, 5*time.Minute, 5*time.Second).Should(Succeed())
350363
})
351364

@@ -531,7 +544,19 @@ exit 1`,
531544
AbsPath("/apis/external.metrics.k8s.io/v1beta1/namespaces/" + model.namespace + "/" + constants.WVADesiredReplicas).
532545
Param("labelSelector", hpaMetricSelector).
533546
DoRaw(ctx); qErr == nil {
534-
_, _ = fmt.Fprintf(GinkgoWriter, " External metric (HPA labels): %s\n", truncateString(string(result), 300))
547+
// Parse the metric value from the JSON response rather than truncating the raw JSON
548+
var metricList struct {
549+
Items []struct {
550+
MetricName string `json:"metricName"`
551+
Value string `json:"value"`
552+
} `json:"items"`
553+
}
554+
if jErr := json.Unmarshal(result, &metricList); jErr == nil && len(metricList.Items) > 0 {
555+
_, _ = fmt.Fprintf(GinkgoWriter, " External metric value: %s (metric: %s, items: %d)\n",
556+
metricList.Items[0].Value, metricList.Items[0].MetricName, len(metricList.Items))
557+
} else {
558+
_, _ = fmt.Fprintf(GinkgoWriter, " External metric (HPA labels): %s\n", truncateString(string(result), 500))
559+
}
535560
} else {
536561
_, _ = fmt.Fprintf(GinkgoWriter, " External metric query error: %v\n", qErr)
537562
}
@@ -554,8 +579,38 @@ exit 1`,
554579
g.Expect(err).NotTo(HaveOccurred(), "Should be able to get deployment")
555580

556581
scaledReplicas = deploy.Status.ReadyReplicas
557-
_, _ = fmt.Fprintf(GinkgoWriter, "Current ready replicas: %d (initial: %d, desired: %d)\n",
558-
scaledReplicas, initialReplicas, scaledOptimized)
582+
specReplicas := int32(0)
583+
if deploy.Spec.Replicas != nil {
584+
specReplicas = *deploy.Spec.Replicas
585+
}
586+
_, _ = fmt.Fprintf(GinkgoWriter, "Deployment: spec=%d, total=%d, ready=%d, unavailable=%d (initial: %d, target: %d)\n",
587+
specReplicas, deploy.Status.Replicas, scaledReplicas,
588+
deploy.Status.UnavailableReplicas, initialReplicas, scaledOptimized)
589+
590+
// List pod phases to show Pending/ContainerCreating pods that aren't ready yet
591+
if deploy.Spec.Selector != nil {
592+
var selectorParts []string
593+
for k, v := range deploy.Spec.Selector.MatchLabels {
594+
selectorParts = append(selectorParts, fmt.Sprintf("%s=%s", k, v))
595+
}
596+
if pods, pErr := k8sClient.CoreV1().Pods(model.namespace).List(ctx, metav1.ListOptions{
597+
LabelSelector: strings.Join(selectorParts, ","),
598+
}); pErr == nil {
599+
for _, pod := range pods.Items {
600+
phase := string(pod.Status.Phase)
601+
reason := ""
602+
for _, cs := range pod.Status.ContainerStatuses {
603+
if cs.State.Waiting != nil {
604+
reason = cs.State.Waiting.Reason
605+
}
606+
}
607+
if reason != "" {
608+
phase = fmt.Sprintf("%s (%s)", phase, reason)
609+
}
610+
_, _ = fmt.Fprintf(GinkgoWriter, " Pod %s: %s\n", pod.Name, phase)
611+
}
612+
}
613+
}
559614

560615
if !lowLoad {
561616
g.Expect(deploy.Status.Replicas).To(BeNumerically(">", hpaMinReplicas),

0 commit comments

Comments
 (0)