Skip to content

Commit 16a7650

Browse files
authored
Merge pull request kubernetes#86101 from PatrickLang/fix-cpumaximum
Fix cpu resource limit on Windows
2 parents b6b494b + 886214f commit 16a7650

File tree

8 files changed

+182
-79
lines changed

8 files changed

+182
-79
lines changed

pkg/kubelet/dockershim/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ go_library(
7878
"//pkg/kubelet/apis:go_default_library",
7979
"//pkg/kubelet/winstats:go_default_library",
8080
"//vendor/github.com/Microsoft/hcsshim:go_default_library",
81+
"//vendor/github.com/docker/docker/pkg/sysinfo:go_default_library",
8182
"//vendor/golang.org/x/sys/windows/registry:go_default_library",
8283
],
8384
"//conditions:default": [],

pkg/kubelet/dockershim/helpers_windows.go

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525
dockertypes "github.com/docker/docker/api/types"
2626
dockercontainer "github.com/docker/docker/api/types/container"
2727
dockerfilters "github.com/docker/docker/api/types/filters"
28+
"github.com/docker/docker/pkg/sysinfo"
2829
"k8s.io/klog"
2930

3031
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
@@ -69,11 +70,12 @@ func (ds *dockerService) updateCreateConfig(
6970
if wc := config.GetWindows(); wc != nil {
7071
rOpts := wc.GetResources()
7172
if rOpts != nil {
73+
// Precedence and units for these are described at length in kuberuntime_container_windows.go - generateWindowsContainerConfig()
7274
createConfig.HostConfig.Resources = dockercontainer.Resources{
73-
Memory: rOpts.MemoryLimitInBytes,
74-
CPUShares: rOpts.CpuShares,
75-
CPUCount: rOpts.CpuCount,
76-
CPUPercent: rOpts.CpuMaximum,
75+
Memory: rOpts.MemoryLimitInBytes,
76+
CPUShares: rOpts.CpuShares,
77+
CPUCount: rOpts.CpuCount,
78+
NanoCPUs: rOpts.CpuMaximum * int64(sysinfo.NumCPU()) * (1e9 / 10000),
7779
}
7880
}
7981

pkg/kubelet/kuberuntime/BUILD

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ go_library(
1414
"helpers.go",
1515
"helpers_linux.go",
1616
"helpers_unsupported.go",
17-
"helpers_windows.go",
1817
"instrumented_services.go",
1918
"kuberuntime_container.go",
2019
"kuberuntime_container_linux.go",

pkg/kubelet/kuberuntime/helpers_unsupported.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// +build !linux,!windows
1+
// +build !linux
22

33
/*
44
Copyright 2018 The Kubernetes Authors.

pkg/kubelet/kuberuntime/helpers_windows.go

Lines changed: 0 additions & 55 deletions
This file was deleted.

pkg/kubelet/kuberuntime/kuberuntime_container_windows.go

Lines changed: 29 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -53,15 +53,42 @@ func (m *kubeGenericRuntimeManager) generateWindowsContainerConfig(container *v1
5353
SecurityContext: &runtimeapi.WindowsContainerSecurityContext{},
5454
}
5555

56-
cpuRequest := container.Resources.Requests.Cpu()
5756
cpuLimit := container.Resources.Limits.Cpu()
5857
isolatedByHyperv := kubeletapis.ShouldIsolatedByHyperV(pod.Annotations)
5958
if !cpuLimit.IsZero() {
6059
// Note that sysinfo.NumCPU() is limited to 64 CPUs on Windows due to Processor Groups,
6160
// as only 64 processors are available for execution by a given process. This causes
6261
// some oddities on systems with more than 64 processors.
6362
// Refer https://msdn.microsoft.com/en-us/library/windows/desktop/dd405503(v=vs.85).aspx.
63+
64+
// Since Kubernetes doesn't have any notion of weight in the Pod/Container API, only limits/reserves, then applying CpuMaximum only
65+
// will better follow the intent of the user. At one point CpuWeights were set, but this prevented limits from having any effect.
66+
67+
// There are 3 parts to how this works:
68+
// Part one - Windows kernel
69+
// cpuMaximum is documented at https://docs.microsoft.com/en-us/virtualization/windowscontainers/manage-containers/resource-controls
70+
// the range and how it relates to number of CPUs is at https://docs.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-jobobject_cpu_rate_control_information
71+
// For process isolation, these are applied to the job object setting JOB_OBJECT_CPU_RATE_CONTROL_ENABLE, which can be set to either
72+
// JOB_OBJECT_CPU_RATE_CONTROL_WEIGHT_BASED or JOB_OBJECT_CPU_RATE_CONTROL_HARD_CAP. This is why the settings are mutually exclusive.
73+
// Part two - Docker (doc: https://docs.docker.com/engine/api/v1.30)
74+
// If both CpuWeight and CpuMaximum are passed to Docker, then it sets
75+
// JOB_OBJECT_CPU_RATE_CONTROL_ENABLE = JOB_OBJECT_CPU_RATE_CONTROL_WEIGHT_BASED ignoring CpuMaximum.
76+
// Option a: Set HostConfig.CpuPercent. The units are whole percent of the total CPU capacity of the system, meaning the resolution
77+
// is different based on the number of cores.
78+
// Option b: Set HostConfig.NanoCpus integer <int64> - CPU quota in units of 10e-9 CPUs. Moby scales this to the Windows job object
79+
// resolution of 1-10000, so it's higher resolution than option a.
80+
// src: https://github.com/moby/moby/blob/10866714412aea1bb587d1ad14b2ce1ba4cf4308/daemon/oci_windows.go#L426
81+
// Part three - CRI & ContainerD's implementation
82+
// The kubelet sets these directly on CGroups in Linux, but needs to pass them across CRI on Windows.
83+
// There is an existing cpu_maximum field, with a range of percent * 100, so 1-10000. This is different from Docker, but consistent with OCI
84+
// https://github.com/kubernetes/kubernetes/blob/56d1c3b96d0a544130a82caad33dd57629b8a7f8/staging/src/k8s.io/cri-api/pkg/apis/runtime/v1alpha2/api.proto#L681-L682
85+
// https://github.com/opencontainers/runtime-spec/blob/ad53dcdc39f1f7f7472b10aa0a45648fe4865496/config-windows.md#cpu
86+
// If both CpuWeight and CpuMaximum are set - ContainerD catches this invalid case and returns an error instead.
87+
6488
cpuMaximum := 10000 * cpuLimit.MilliValue() / int64(sysinfo.NumCPU()) / 1000
89+
90+
// TODO: This should be reviewed or removed once Hyper-V support is implemented with CRI-ContainerD
91+
// in a future release. cpuCount may or may not be required if cpuMaximum is set.
6592
if isolatedByHyperv {
6693
cpuCount := int64(cpuLimit.MilliValue()+999) / 1000
6794
wc.Resources.CpuCount = cpuCount
@@ -80,31 +107,15 @@ func (m *kubeGenericRuntimeManager) generateWindowsContainerConfig(container *v1
80107
wc.Resources.CpuMaximum = cpuMaximum
81108
}
82109

83-
cpuShares := milliCPUToShares(cpuLimit.MilliValue(), isolatedByHyperv)
84-
if cpuShares == 0 {
85-
cpuShares = milliCPUToShares(cpuRequest.MilliValue(), isolatedByHyperv)
86-
}
87-
wc.Resources.CpuShares = cpuShares
88-
89110
if !isolatedByHyperv {
90111
// The processor resource controls are mutually exclusive on
91112
// Windows Server Containers, the order of precedence is
92-
// CPUCount first, then CPUShares, and CPUMaximum last.
113+
// CPUCount first, then CPUMaximum.
93114
if wc.Resources.CpuCount > 0 {
94-
if wc.Resources.CpuShares > 0 {
95-
wc.Resources.CpuShares = 0
96-
klog.Warningf("Mutually exclusive options: CPUCount priority > CPUShares priority on Windows Server Containers. CPUShares should be ignored")
97-
}
98115
if wc.Resources.CpuMaximum > 0 {
99116
wc.Resources.CpuMaximum = 0
100117
klog.Warningf("Mutually exclusive options: CPUCount priority > CPUMaximum priority on Windows Server Containers. CPUMaximum should be ignored")
101118
}
102-
} else if wc.Resources.CpuShares > 0 {
103-
if wc.Resources.CpuMaximum > 0 {
104-
wc.Resources.CpuMaximum = 0
105-
klog.Warningf("Mutually exclusive options: CPUShares priority > CPUMaximum priority on Windows Server Containers. CPUMaximum should be ignored")
106-
}
107-
108119
}
109120
}
110121

test/e2e/windows/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library")
55
go_library(
66
name = "go_default_library",
77
srcs = [
8+
"cpu_limits.go",
89
"density.go",
910
"dns.go",
1011
"framework.go",

test/e2e/windows/cpu_limits.go

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
/*
2+
Copyright 2020 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package windows
18+
19+
import (
20+
"context"
21+
22+
v1 "k8s.io/api/core/v1"
23+
"k8s.io/apimachinery/pkg/api/resource"
24+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
25+
"k8s.io/apimachinery/pkg/util/uuid"
26+
"k8s.io/kubernetes/test/e2e/framework"
27+
e2ekubelet "k8s.io/kubernetes/test/e2e/framework/kubelet"
28+
imageutils "k8s.io/kubernetes/test/utils/image"
29+
"time"
30+
31+
"github.com/onsi/ginkgo"
32+
)
33+
34+
var _ = SIGDescribe("[Feature:Windows] Cpu Resources", func() {
35+
f := framework.NewDefaultFramework("cpu-resources-test-windows")
36+
37+
// The Windows 'BusyBox' image is PowerShell plus a collection of scripts and utilities to mimic common busybox commands
38+
powershellImage := imageutils.GetConfig(imageutils.BusyBox)
39+
40+
ginkgo.Context("Container limits", func() {
41+
ginkgo.It("should not be exceeded after waiting 2 minutes", func() {
42+
ginkgo.By("Creating one pod with limit set to '0.5'")
43+
podsDecimal := newCPUBurnPods(1, powershellImage, "0.5", "1Gi")
44+
f.PodClient().CreateBatch(podsDecimal)
45+
ginkgo.By("Creating one pod with limit set to '500m'")
46+
podsMilli := newCPUBurnPods(1, powershellImage, "500m", "1Gi")
47+
f.PodClient().CreateBatch(podsMilli)
48+
ginkgo.By("Waiting 2 minutes")
49+
time.Sleep(2 * time.Minute)
50+
ginkgo.By("Ensuring pods are still running")
51+
var allPods [](*v1.Pod)
52+
for _, p := range podsDecimal {
53+
pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Get(
54+
context.TODO(),
55+
p.Name,
56+
metav1.GetOptions{})
57+
framework.ExpectNoError(err, "Error retrieving pod")
58+
framework.ExpectEqual(pod.Status.Phase, v1.PodRunning)
59+
allPods = append(allPods, pod)
60+
}
61+
for _, p := range podsMilli {
62+
pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Get(
63+
context.TODO(),
64+
p.Name,
65+
metav1.GetOptions{})
66+
framework.ExpectNoError(err, "Error retrieving pod")
67+
framework.ExpectEqual(pod.Status.Phase, v1.PodRunning)
68+
allPods = append(allPods, pod)
69+
}
70+
ginkgo.By("Ensuring cpu doesn't exceed limit by >5%")
71+
for _, p := range allPods {
72+
ginkgo.By("Gathering node summary stats")
73+
nodeStats, err := e2ekubelet.GetStatsSummary(f.ClientSet, p.Spec.NodeName)
74+
framework.ExpectNoError(err, "Error grabbing node summary stats")
75+
found := false
76+
cpuUsage := float64(0)
77+
for _, pod := range nodeStats.Pods {
78+
if pod.PodRef.Name != p.Name || pod.PodRef.Namespace != p.Namespace {
79+
continue
80+
}
81+
cpuUsage = float64(*pod.CPU.UsageNanoCores) * 1e-9
82+
found = true
83+
break
84+
}
85+
framework.ExpectEqual(found, true, "Found pod in stats summary")
86+
framework.Logf("Pod %s usage: %v", p.Name, cpuUsage)
87+
framework.ExpectEqual(cpuUsage > 0, true, "Pods reported usage should be > 0")
88+
framework.ExpectEqual((.5*1.05) > cpuUsage, true, "Pods reported usage should not exceed limit by >5%")
89+
}
90+
})
91+
})
92+
})
93+
94+
// newCPUBurnPods creates a list of pods (specification) with a workload that will consume all available CPU resources up to container limit
95+
func newCPUBurnPods(numPods int, image imageutils.Config, cpuLimit string, memoryLimit string) []*v1.Pod {
96+
var pods []*v1.Pod
97+
98+
memLimitQuantity, err := resource.ParseQuantity(memoryLimit)
99+
framework.ExpectNoError(err)
100+
101+
cpuLimitQuantity, err := resource.ParseQuantity(cpuLimit)
102+
framework.ExpectNoError(err)
103+
104+
for i := 0; i < numPods; i++ {
105+
106+
podName := "cpulimittest-" + string(uuid.NewUUID())
107+
pod := v1.Pod{
108+
ObjectMeta: metav1.ObjectMeta{
109+
Name: podName,
110+
Labels: map[string]string{
111+
"name": podName,
112+
"testapp": "cpuburn",
113+
},
114+
},
115+
Spec: v1.PodSpec{
116+
// Restart policy is always (default).
117+
Containers: []v1.Container{
118+
{
119+
Image: image.GetE2EImage(),
120+
Name: podName,
121+
Resources: v1.ResourceRequirements{
122+
Limits: v1.ResourceList{
123+
v1.ResourceMemory: memLimitQuantity,
124+
v1.ResourceCPU: cpuLimitQuantity,
125+
},
126+
},
127+
Command: []string{
128+
"powershell.exe",
129+
"-Command",
130+
"foreach ($loopnumber in 1..8) { Start-Job -ScriptBlock { $result = 1; foreach($mm in 1..2147483647){$res1=1;foreach($num in 1..2147483647){$res1=$mm*$num*1340371};$res1} } } ; get-job | wait-job",
131+
},
132+
},
133+
},
134+
NodeSelector: map[string]string{
135+
"beta.kubernetes.io/os": "windows",
136+
},
137+
},
138+
}
139+
140+
pods = append(pods, &pod)
141+
}
142+
143+
return pods
144+
}

0 commit comments

Comments
 (0)