Skip to content

Commit 36d2237

Browse files
authored
Merge pull request kubernetes#3020 from jbartosik/deflake-oom-test
Enable and deflake oom test
2 parents ae6c831 + a444c15 commit 36d2237

File tree

7 files changed

+201
-31
lines changed

7 files changed

+201
-31
lines changed
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
FROM gcr.io/google-containers/stress:v1
2+
ENTRYPOINT ["/stress", "--mem-total", "10000000000", "--logtostderr", "--mem-alloc-size", "8000"]

vertical-pod-autoscaler/e2e/v1/autoscaling_utils.go

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@ const (
6060
customMetricName = "QPS"
6161
serviceInitializationTimeout = 2 * time.Minute
6262
serviceInitializationInterval = 15 * time.Second
63+
// TODO(jbartosik): put the image in a VPA project
64+
stressImage = "gcr.io/jbartosik-gke-dev/stress:0.10"
6365
)
6466

6567
var (
@@ -363,7 +365,7 @@ func runServiceAndWorkloadForResourceConsumer(c clientset.Interface, ns, name st
363365
Timeout: timeoutRC,
364366
Replicas: replicas,
365367
CpuRequest: cpuRequestMillis,
366-
MemRequest: memRequestMb * 1024 * 1024, // MemLimit is in bytes
368+
MemRequest: memRequestMb * 1024 * 1024, // Mem Request is in bytes
367369
Annotations: podAnnotations,
368370
}
369371

@@ -427,3 +429,27 @@ func runServiceAndWorkloadForResourceConsumer(c clientset.Interface, ns, name st
427429
framework.ExpectNoError(framework.WaitForServiceEndpointsNum(
428430
c, ns, controllerName, 1, startServiceInterval, startServiceTimeout))
429431
}
432+
433+
func runOomingReplicationController(c clientset.Interface, ns, name string, replicas int) {
434+
ginkgo.By(fmt.Sprintf("Running OOMing RC %s with %v replicas", name, replicas))
435+
436+
rcConfig := testutils.RCConfig{
437+
Client: c,
438+
Image: stressImage,
439+
Name: name,
440+
Namespace: ns,
441+
Timeout: timeoutRC,
442+
Replicas: replicas,
443+
Annotations: make(map[string]string),
444+
MemRequest: 1024 * 1024 * 1024,
445+
MemLimit: 1024 * 1024 * 1024,
446+
}
447+
448+
dpConfig := testutils.DeploymentConfig{
449+
RCConfig: rcConfig,
450+
}
451+
ginkgo.By(fmt.Sprintf("Creating deployment %s in namespace %s", dpConfig.Name, dpConfig.Namespace))
452+
dpConfig.NodeDumpFunc = framework.DumpNodeDebugInfo
453+
dpConfig.ContainerDumpFunc = framework.LogFailedContainers
454+
framework.ExpectNoError(testutils.RunDeployment(dpConfig))
455+
}

vertical-pod-autoscaler/e2e/v1/common.go

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -192,12 +192,17 @@ func NewHamsterDeploymentWithResourcesAndLimits(f *framework.Framework, cpuQuant
192192
return d
193193
}
194194

195+
func getPodSelectorExcludingDonePodsOrDie() string {
196+
stringSelector := "status.phase!=" + string(apiv1.PodSucceeded) +
197+
",status.phase!=" + string(apiv1.PodFailed)
198+
selector := fields.ParseSelectorOrDie(stringSelector)
199+
return selector.String()
200+
}
201+
195202
// GetHamsterPods returns running hamster pods (matched by hamsterLabels)
196203
func GetHamsterPods(f *framework.Framework) (*apiv1.PodList, error) {
197204
label := labels.SelectorFromSet(labels.Set(hamsterLabels))
198-
selector := fields.ParseSelectorOrDie("status.phase!=" + string(apiv1.PodSucceeded) +
199-
",status.phase!=" + string(apiv1.PodFailed))
200-
options := metav1.ListOptions{LabelSelector: label.String(), FieldSelector: selector.String()}
205+
options := metav1.ListOptions{LabelSelector: label.String(), FieldSelector: getPodSelectorExcludingDonePodsOrDie()}
201206
return f.ClientSet.CoreV1().Pods(f.Namespace.Name).List(options)
202207
}
203208

vertical-pod-autoscaler/e2e/v1/full_vpa.go

Lines changed: 65 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package autoscaling
1818

1919
import (
2020
"fmt"
21+
"time"
2122

2223
autoscaling "k8s.io/api/autoscaling/v1"
2324
apiv1 "k8s.io/api/core/v1"
@@ -38,8 +39,9 @@ const (
3839
minimalMemoryLowerBound = "20Mi"
3940
minimalMemoryUpperBound = "300Mi"
4041
// the initial values should be outside minimal bounds
41-
initialCPU = int64(10) // mCPU
42-
initialMemory = int64(10) // MB
42+
initialCPU = int64(10) // mCPU
43+
initialMemory = int64(10) // MB
44+
oomTestTimeout = 8 * time.Minute
4345
)
4446

4547
var _ = FullVpaE2eDescribe("Pods under VPA", func() {
@@ -91,37 +93,83 @@ var _ = FullVpaE2eDescribe("Pods under VPA", func() {
9193
ginkgo.It("have cpu requests growing with usage", func() {
9294
// initial CPU usage is low so a minimal recommendation is expected
9395
err := waitForResourceRequestInRangeInPods(
94-
f, metav1.ListOptions{LabelSelector: "name=hamster"}, apiv1.ResourceCPU,
96+
f, pollTimeout, metav1.ListOptions{LabelSelector: "name=hamster"}, apiv1.ResourceCPU,
9597
ParseQuantityOrDie(minimalCPULowerBound), ParseQuantityOrDie(minimalCPUUpperBound))
9698
gomega.Expect(err).NotTo(gomega.HaveOccurred())
9799

98100
// consume more CPU to get a higher recommendation
99101
rc.ConsumeCPU(600 * replicas)
100102
err = waitForResourceRequestInRangeInPods(
101-
f, metav1.ListOptions{LabelSelector: "name=hamster"}, apiv1.ResourceCPU,
103+
f, pollTimeout, metav1.ListOptions{LabelSelector: "name=hamster"}, apiv1.ResourceCPU,
102104
ParseQuantityOrDie("500m"), ParseQuantityOrDie("900m"))
103105
gomega.Expect(err).NotTo(gomega.HaveOccurred())
104106
})
105107

106108
ginkgo.It("have memory requests growing with usage", func() {
107109
// initial memory usage is low so a minimal recommendation is expected
108110
err := waitForResourceRequestInRangeInPods(
109-
f, metav1.ListOptions{LabelSelector: "name=hamster"}, apiv1.ResourceMemory,
111+
f, pollTimeout, metav1.ListOptions{LabelSelector: "name=hamster"}, apiv1.ResourceMemory,
110112
ParseQuantityOrDie(minimalMemoryLowerBound), ParseQuantityOrDie(minimalMemoryUpperBound))
111113
gomega.Expect(err).NotTo(gomega.HaveOccurred())
112114

113115
// consume more memory to get a higher recommendation
114116
// NOTE: large range given due to unpredictability of actual memory usage
115117
rc.ConsumeMem(1024 * replicas)
116118
err = waitForResourceRequestInRangeInPods(
117-
f, metav1.ListOptions{LabelSelector: "name=hamster"}, apiv1.ResourceMemory,
119+
f, pollTimeout, metav1.ListOptions{LabelSelector: "name=hamster"}, apiv1.ResourceMemory,
118120
ParseQuantityOrDie("900Mi"), ParseQuantityOrDie("4000Mi"))
119121
gomega.Expect(err).NotTo(gomega.HaveOccurred())
120122
})
121123
})
122124

123-
func waitForPodsMatch(f *framework.Framework, listOptions metav1.ListOptions, matcher func(pod apiv1.Pod) bool) error {
124-
return wait.PollImmediate(pollInterval, pollTimeout, func() (bool, error) {
125+
var _ = FullVpaE2eDescribe("OOMing pods under VPA", func() {
126+
var (
127+
vpaClientSet *vpa_clientset.Clientset
128+
vpaCRD *vpa_types.VerticalPodAutoscaler
129+
)
130+
const replicas = 3
131+
132+
f := framework.NewDefaultFramework("vertical-pod-autoscaling")
133+
134+
ginkgo.BeforeEach(func() {
135+
ns := f.Namespace.Name
136+
ginkgo.By("Setting up a hamster deployment")
137+
138+
runOomingReplicationController(
139+
f.ClientSet,
140+
ns,
141+
"hamster",
142+
replicas)
143+
ginkgo.By("Setting up a VPA CRD")
144+
config, err := framework.LoadConfig()
145+
gomega.Expect(err).NotTo(gomega.HaveOccurred())
146+
147+
vpaCRD = NewVPA(f, "hamster-vpa", &autoscaling.CrossVersionObjectReference{
148+
APIVersion: "v1",
149+
Kind: "Deployment",
150+
Name: "hamster",
151+
})
152+
153+
vpaClientSet = vpa_clientset.NewForConfigOrDie(config)
154+
vpaClient := vpaClientSet.AutoscalingV1()
155+
_, err = vpaClient.VerticalPodAutoscalers(ns).Create(vpaCRD)
156+
gomega.Expect(err).NotTo(gomega.HaveOccurred())
157+
})
158+
159+
ginkgo.It("have memory requests growing with OOMs", func() {
160+
listOptions := metav1.ListOptions{
161+
LabelSelector: "name=hamster",
162+
FieldSelector: getPodSelectorExcludingDonePodsOrDie(),
163+
}
164+
err := waitForResourceRequestInRangeInPods(
165+
f, oomTestTimeout, listOptions, apiv1.ResourceMemory,
166+
ParseQuantityOrDie("1400Mi"), ParseQuantityOrDie("10000Mi"))
167+
gomega.Expect(err).NotTo(gomega.HaveOccurred())
168+
})
169+
})
170+
171+
func waitForPodsMatch(f *framework.Framework, timeout time.Duration, listOptions metav1.ListOptions, matcher func(pod apiv1.Pod) bool) error {
172+
return wait.PollImmediate(pollInterval, timeout, func() (bool, error) {
125173

126174
ns := f.Namespace.Name
127175
c := f.ClientSet
@@ -135,18 +183,23 @@ func waitForPodsMatch(f *framework.Framework, listOptions metav1.ListOptions, ma
135183
return false, nil
136184
}
137185

186+
// Run matcher on all pods, even if we find pod that doesn't match early.
187+
// This allows the matcher to write logs for all pods. This in turns makes
188+
// it easier to spot some problems (for example unexpected pods in the list
189+
// results).
190+
result := true
138191
for _, pod := range podList.Items {
139192
if !matcher(pod) {
140-
return false, nil
193+
result = false
141194
}
142195
}
143-
return true, nil
196+
return result, nil
144197

145198
})
146199
}
147200

148-
func waitForResourceRequestInRangeInPods(f *framework.Framework, listOptions metav1.ListOptions, resourceName apiv1.ResourceName, lowerBound, upperBound resource.Quantity) error {
149-
err := waitForPodsMatch(f, listOptions,
201+
func waitForResourceRequestInRangeInPods(f *framework.Framework, timeout time.Duration, listOptions metav1.ListOptions, resourceName apiv1.ResourceName, lowerBound, upperBound resource.Quantity) error {
202+
err := waitForPodsMatch(f, timeout, listOptions,
150203
func(pod apiv1.Pod) bool {
151204
resourceRequest, found := pod.Spec.Containers[0].Resources.Requests[resourceName]
152205
framework.Logf("Comparing %v request %v against range of (%v, %v)", resourceName, resourceRequest, lowerBound, upperBound)

vertical-pod-autoscaler/e2e/v1beta2/autoscaling_utils.go

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@ const (
6060
customMetricName = "QPS"
6161
serviceInitializationTimeout = 2 * time.Minute
6262
serviceInitializationInterval = 15 * time.Second
63+
// TODO(jbartosik): put the image in a VPA project
64+
stressImage = "gcr.io/jbartosik-gke-dev/stress:0.10"
6365
)
6466

6567
var (
@@ -427,3 +429,27 @@ func runServiceAndWorkloadForResourceConsumer(c clientset.Interface, ns, name st
427429
framework.ExpectNoError(framework.WaitForServiceEndpointsNum(
428430
c, ns, controllerName, 1, startServiceInterval, startServiceTimeout))
429431
}
432+
433+
func runOomingReplicationController(c clientset.Interface, ns, name string, replicas int) {
434+
ginkgo.By(fmt.Sprintf("Running OOMing RC %s with %v replicas", name, replicas))
435+
436+
rcConfig := testutils.RCConfig{
437+
Client: c,
438+
Image: stressImage,
439+
Name: name,
440+
Namespace: ns,
441+
Timeout: timeoutRC,
442+
Replicas: replicas,
443+
Annotations: make(map[string]string),
444+
MemRequest: 1024 * 1024 * 1024,
445+
MemLimit: 1024 * 1024 * 1024,
446+
}
447+
448+
dpConfig := testutils.DeploymentConfig{
449+
RCConfig: rcConfig,
450+
}
451+
ginkgo.By(fmt.Sprintf("Creating deployment %s in namespace %s", dpConfig.Name, dpConfig.Namespace))
452+
dpConfig.NodeDumpFunc = framework.DumpNodeDebugInfo
453+
dpConfig.ContainerDumpFunc = framework.LogFailedContainers
454+
framework.ExpectNoError(testutils.RunDeployment(dpConfig))
455+
}

vertical-pod-autoscaler/e2e/v1beta2/common.go

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -192,12 +192,17 @@ func NewHamsterDeploymentWithResourcesAndLimits(f *framework.Framework, cpuQuant
192192
return d
193193
}
194194

195+
func getPodSelectorExcludingDonePodsOrDie() string {
196+
stringSelector := "status.phase!=" + string(apiv1.PodSucceeded) +
197+
",status.phase!=" + string(apiv1.PodFailed)
198+
selector := fields.ParseSelectorOrDie(stringSelector)
199+
return selector.String()
200+
}
201+
195202
// GetHamsterPods returns running hamster pods (matched by hamsterLabels)
196203
func GetHamsterPods(f *framework.Framework) (*apiv1.PodList, error) {
197204
label := labels.SelectorFromSet(labels.Set(hamsterLabels))
198-
selector := fields.ParseSelectorOrDie("status.phase!=" + string(apiv1.PodSucceeded) +
199-
",status.phase!=" + string(apiv1.PodFailed))
200-
options := metav1.ListOptions{LabelSelector: label.String(), FieldSelector: selector.String()}
205+
options := metav1.ListOptions{LabelSelector: label.String(), FieldSelector: getPodSelectorExcludingDonePodsOrDie()}
201206
return f.ClientSet.CoreV1().Pods(f.Namespace.Name).List(options)
202207
}
203208

0 commit comments

Comments
 (0)