Skip to content

Commit 6a92f19

Browse files
authored
Merge pull request kubernetes#87730 from marosset/windows-kubelet-stats-timeout-updates
Windows kubelet stats timeout updates
2 parents 6858c25 + 999fdfa commit 6a92f19

File tree

4 files changed

+177
-6
lines changed

4 files changed

+177
-6
lines changed

pkg/kubelet/dockershim/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ go_library(
7878
"@io_bazel_rules_go//go/platform:windows": [
7979
"//pkg/kubelet/apis:go_default_library",
8080
"//pkg/kubelet/winstats:go_default_library",
81+
"//vendor/github.com/Microsoft/hcsshim:go_default_library",
8182
"//vendor/golang.org/x/sys/windows/registry:go_default_library",
8283
],
8384
"//conditions:default": [],

pkg/kubelet/dockershim/docker_stats_windows.go

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,9 @@ import (
2222
"context"
2323
"time"
2424

25+
"github.com/Microsoft/hcsshim"
2526
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
27+
"k8s.io/klog"
2628
)
2729

2830
func (ds *dockerService) getContainerStats(containerID string) (*runtimeapi.ContainerStats, error) {
@@ -31,7 +33,18 @@ func (ds *dockerService) getContainerStats(containerID string) (*runtimeapi.Cont
3133
return nil, err
3234
}
3335

34-
statsJSON, err := ds.client.GetContainerStats(containerID)
36+
hcsshim_container, err := hcsshim.OpenContainer(containerID)
37+
if err != nil {
38+
return nil, err
39+
}
40+
defer func() {
41+
closeErr := hcsshim_container.Close()
42+
if closeErr != nil {
43+
klog.Errorf("Error closing container '%s': %v", containerID, closeErr)
44+
}
45+
}()
46+
47+
stats, err := hcsshim_container.Statistics()
3548
if err != nil {
3649
return nil, err
3750
}
@@ -47,7 +60,6 @@ func (ds *dockerService) getContainerStats(containerID string) (*runtimeapi.Cont
4760
}
4861
status := statusResp.GetStatus()
4962

50-
dockerStats := statsJSON.Stats
5163
timestamp := time.Now().UnixNano()
5264
containerStats := &runtimeapi.ContainerStats{
5365
Attributes: &runtimeapi.ContainerAttributes{
@@ -58,13 +70,12 @@ func (ds *dockerService) getContainerStats(containerID string) (*runtimeapi.Cont
5870
},
5971
Cpu: &runtimeapi.CpuUsage{
6072
Timestamp: timestamp,
61-
// have to multiply cpu usage by 100 since docker stats units is in 100's of nano seconds for Windows
62-
// see https://github.com/moby/moby/blob/v1.13.1/api/types/stats.go#L22
63-
UsageCoreNanoSeconds: &runtimeapi.UInt64Value{Value: dockerStats.CPUStats.CPUUsage.TotalUsage * 100},
73+
// have to multiply cpu usage by 100 since stats units is in 100's of nano seconds for Windows
74+
UsageCoreNanoSeconds: &runtimeapi.UInt64Value{Value: stats.Processor.TotalRuntime100ns * 100},
6475
},
6576
Memory: &runtimeapi.MemoryUsage{
6677
Timestamp: timestamp,
67-
WorkingSetBytes: &runtimeapi.UInt64Value{Value: dockerStats.MemoryStats.PrivateWorkingSet},
78+
WorkingSetBytes: &runtimeapi.UInt64Value{Value: stats.Memory.UsagePrivateWorkingSetBytes},
6879
},
6980
WritableLayer: &runtimeapi.FilesystemUsage{
7081
Timestamp: timestamp,

test/e2e/windows/BUILD

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ go_library(
1111
"gmsa_full.go",
1212
"gmsa_kubelet.go",
1313
"hybrid_network.go",
14+
"kubelet_stats.go",
1415
"memory_limits.go",
1516
"security_context.go",
1617
"service.go",
@@ -35,6 +36,7 @@ go_library(
3536
"//staging/src/k8s.io/kubelet/config/v1beta1:go_default_library",
3637
"//test/e2e/framework:go_default_library",
3738
"//test/e2e/framework/kubectl:go_default_library",
39+
"//test/e2e/framework/kubelet:go_default_library",
3840
"//test/e2e/framework/metrics:go_default_library",
3941
"//test/e2e/framework/node:go_default_library",
4042
"//test/e2e/framework/pod:go_default_library",

test/e2e/windows/kubelet_stats.go

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
/*
2+
Copyright 2020 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package windows
18+
19+
import (
20+
"time"
21+
22+
v1 "k8s.io/api/core/v1"
23+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
24+
"k8s.io/apimachinery/pkg/labels"
25+
"k8s.io/apimachinery/pkg/util/uuid"
26+
"k8s.io/kubernetes/test/e2e/framework"
27+
e2ekubelet "k8s.io/kubernetes/test/e2e/framework/kubelet"
28+
e2enode "k8s.io/kubernetes/test/e2e/framework/node"
29+
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
30+
e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
31+
imageutils "k8s.io/kubernetes/test/utils/image"
32+
33+
"github.com/onsi/ginkgo"
34+
)
35+
36+
var _ = SIGDescribe("[Feature:Windows] Kubelet-Stats", func() {
37+
f := framework.NewDefaultFramework("kubelet-stats-test-windows")
38+
39+
ginkgo.Describe("Kubelet stats collection for Windows nodes", func() {
40+
ginkgo.Context("when running 10 pods", func() {
41+
// 10 seconds is the default scrape timeout for metrics-server and kube-prometheus
42+
ginkgo.It("should return within 10 seconds", func() {
43+
44+
ginkgo.By("Selecting a Windows node")
45+
targetNode, err := findWindowsNode(f)
46+
framework.ExpectNoError(err, "Error finding Windows node")
47+
framework.Logf("Using node: %v", targetNode.Name)
48+
49+
ginkgo.By("Scheduling 10 pods")
50+
powershellImage := imageutils.GetConfig(imageutils.BusyBox)
51+
pods := newKubeletStatsTestPods(10, powershellImage, targetNode.Name)
52+
f.PodClient().CreateBatch(pods)
53+
54+
ginkgo.By("Waiting up to 3 minutes for pods to be running")
55+
timeout := 3 * time.Minute
56+
e2epod.WaitForPodsRunningReady(f.ClientSet, f.Namespace.Name, 10, 0, timeout, make(map[string]string))
57+
58+
ginkgo.By("Getting kubelet stats 5 times and checking average duration")
59+
iterations := 5
60+
var totalDurationMs int64
61+
62+
for i := 0; i < iterations; i++ {
63+
start := time.Now()
64+
nodeStats, err := e2ekubelet.GetStatsSummary(f.ClientSet, targetNode.Name)
65+
duration := time.Since(start)
66+
totalDurationMs += duration.Milliseconds()
67+
68+
framework.ExpectNoError(err, "Error getting kubelet stats")
69+
70+
// Perform some basic sanity checks on retrieved stats for pods in this test's namespace
71+
statsChecked := 0
72+
for _, podStats := range nodeStats.Pods {
73+
if podStats.PodRef.Namespace != f.Namespace.Name {
74+
continue
75+
}
76+
statsChecked = statsChecked + 1
77+
78+
framework.ExpectEqual(*podStats.CPU.UsageCoreNanoSeconds > 0, true, "Pod stats should not report 0 cpu usage")
79+
framework.ExpectEqual(*podStats.Memory.WorkingSetBytes > 0, true, "Pod stats should not report 0 bytes for memory working set ")
80+
}
81+
framework.ExpectEqual(statsChecked, 10, "Should find stats for 10 pods in kubelet stats")
82+
83+
time.Sleep(5 * time.Second)
84+
}
85+
86+
avgDurationMs := totalDurationMs / int64(iterations)
87+
88+
durationMatch := avgDurationMs <= time.Duration(10*time.Second).Milliseconds()
89+
framework.Logf("Getting kubelet stats for node %v took an average of %v milliseconds over %v iterations", targetNode.Name, avgDurationMs, iterations)
90+
framework.ExpectEqual(durationMatch, true, "Collecting kubelet stats should not take longer than 10 seconds")
91+
})
92+
})
93+
})
94+
})
95+
96+
// findWindowsNode finds a Windows node that is Ready and Schedulable
97+
func findWindowsNode(f *framework.Framework) (v1.Node, error) {
98+
selector := labels.Set{"kubernetes.io/os": "windows"}.AsSelector()
99+
nodeList, err := f.ClientSet.CoreV1().Nodes().List(metav1.ListOptions{LabelSelector: selector.String()})
100+
101+
if err != nil {
102+
return v1.Node{}, err
103+
}
104+
105+
var targetNode v1.Node
106+
foundNode := false
107+
for _, n := range nodeList.Items {
108+
if e2enode.IsNodeReady(&n) && e2enode.IsNodeSchedulable(&n) {
109+
targetNode = n
110+
foundNode = true
111+
break
112+
}
113+
}
114+
115+
if foundNode == false {
116+
e2eskipper.Skipf("Could not find and ready and schedulable Windows nodes")
117+
}
118+
119+
return targetNode, nil
120+
}
121+
122+
// newKubeletStatsTestPods creates a list of pods (specification) for test.
123+
func newKubeletStatsTestPods(numPods int, image imageutils.Config, nodeName string) []*v1.Pod {
124+
var pods []*v1.Pod
125+
126+
for i := 0; i < numPods; i++ {
127+
podName := "statscollectiontest-" + string(uuid.NewUUID())
128+
pod := v1.Pod{
129+
ObjectMeta: metav1.ObjectMeta{
130+
Name: podName,
131+
Labels: map[string]string{
132+
"name": podName,
133+
"testapp": "stats-collection",
134+
},
135+
},
136+
Spec: v1.PodSpec{
137+
Containers: []v1.Container{
138+
{
139+
Image: image.GetE2EImage(),
140+
Name: podName,
141+
Command: []string{
142+
"powershell.exe",
143+
"-Command",
144+
"sleep -Seconds 600",
145+
},
146+
},
147+
},
148+
149+
NodeName: nodeName,
150+
},
151+
}
152+
153+
pods = append(pods, &pod)
154+
}
155+
156+
return pods
157+
}

0 commit comments

Comments
 (0)