Skip to content

Commit f6c574b

Browse files
authored
Merge pull request #219 from daimaxiaxie/fix-kubelet-reserved
fix kube reserved
2 parents cb536af + 50056ca commit f6c574b

File tree

6 files changed

+158
-55
lines changed

6 files changed

+158
-55
lines changed

pkg/providers/cluster/ackmanaged.go

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323
"encoding/json"
2424
"errors"
2525
"fmt"
26+
"math"
2627
"net/http"
2728
"regexp"
2829
"sort"
@@ -34,7 +35,10 @@ import (
3435
"github.com/patrickmn/go-cache"
3536
"github.com/samber/lo"
3637
corev1 "k8s.io/api/core/v1"
38+
"k8s.io/apimachinery/pkg/api/resource"
3739
"sigs.k8s.io/controller-runtime/pkg/log"
40+
"sigs.k8s.io/karpenter/pkg/cloudprovider"
41+
"sigs.k8s.io/karpenter/pkg/utils/resources"
3842

3943
"github.com/cloudpilot-ai/karpenter-provider-alibabacloud/pkg/apis/v1alpha1"
4044
)
@@ -294,6 +298,57 @@ func (a *ACKManaged) formatTaints(taints []corev1.Taint) string {
294298
}), ",")
295299
}
296300

301+
func (a *ACKManaged) DefaultOverhead(capacity corev1.ResourceList) cloudprovider.InstanceTypeOverhead {
302+
// referring to: https://help.aliyun.com/zh/ack/ack-managed-and-ack-dedicated/user-guide/resource-reservation-policy#0f5ffe176df7q
303+
// CPU overhead calculation
304+
cpuOverHead := calculateCPUOverhead(capacity.Cpu().MilliValue())
305+
306+
// TODO: In a real environment, the formula does not produce accurate results,
307+
// consistently yielding values that are 200MiB larger than expected.
308+
// Memory overhead: min(11*pods + 255, memoryMi*0.25)
309+
memoryOverHead := int64(math.Min(float64(11*capacity.Pods().Value()+255), float64(capacity.Memory().Value()/1024*1024)*0.25)) + 200
310+
311+
return cloudprovider.InstanceTypeOverhead{
312+
KubeReserved: corev1.ResourceList{
313+
corev1.ResourceCPU: *resource.NewMilliQuantity(cpuOverHead/2, resource.DecimalSI),
314+
corev1.ResourceMemory: *resources.Quantity(fmt.Sprintf("%dMi", memoryOverHead/2)),
315+
},
316+
SystemReserved: corev1.ResourceList{
317+
corev1.ResourceCPU: *resource.NewMilliQuantity(cpuOverHead/2, resource.DecimalSI),
318+
corev1.ResourceMemory: *resources.Quantity(fmt.Sprintf("%dMi", memoryOverHead/2)),
319+
},
320+
}
321+
}
322+
323+
// thresholds defines CPU overhead thresholds and their corresponding percentages
324+
var thresholds = [...]struct {
325+
cores int64
326+
overhead float64
327+
}{
328+
{1000, 0.06},
329+
{2000, 0.01},
330+
{3000, 0.005},
331+
{4000, 0.005},
332+
}
333+
334+
func calculateCPUOverhead(cpuM int64) int64 {
335+
var cpuOverHead int64
336+
337+
// Calculate overhead for each threshold
338+
for _, t := range thresholds {
339+
if cpuM >= t.cores {
340+
cpuOverHead += int64(1000 * t.overhead)
341+
}
342+
}
343+
344+
// Additional overhead for CPU > 4 cores (0.25%)
345+
if cpuM > 4000 {
346+
cpuOverHead += int64(float64(cpuM-4000) * 0.0025)
347+
}
348+
349+
return cpuOverHead
350+
}
351+
297352
type NodeConfig struct {
298353
KubeletConfig *ACKKubeletConfig `json:"kubelet_config,omitempty"`
299354
}

pkg/providers/cluster/ackmanaged_test.go

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,13 @@ limitations under the License.
1717
package cluster
1818

1919
import (
20+
"fmt"
2021
"testing"
2122

2223
"github.com/alibabacloud-go/tea/tea"
2324
"github.com/stretchr/testify/assert"
25+
corev1 "k8s.io/api/core/v1"
26+
"k8s.io/apimachinery/pkg/api/resource"
2427

2528
"github.com/cloudpilot-ai/karpenter-provider-alibabacloud/pkg/apis/v1alpha1"
2629
)
@@ -32,3 +35,47 @@ func Test_convertNodeClassKubeletConfigToACKNodeConfig(t *testing.T) {
3235
d := convertNodeClassKubeletConfigToACKNodeConfig(kubeletCfg)
3336
assert.Equal(t, "eyJrdWJlbGV0X2NvbmZpZyI6eyJtYXhQb2RzIjoxMTB9fQ==", d)
3437
}
38+
39+
// referring to: https://help.aliyun.com/zh/ack/ack-managed-and-ack-dedicated/user-guide/resource-reservation-policy#0f5ffe176df7q
40+
func TestDefaultOverhead(t *testing.T) {
41+
provider := NewACKManaged("test-cluster", "cn-hangzhou", nil, nil)
42+
43+
// ECS c7 / 1.28+
44+
cases := []struct {
45+
name string
46+
cpuCores int64
47+
memoryGi int64
48+
maxPods int64
49+
wantCPUMilli int64
50+
wantMemMi int64
51+
}{
52+
{"2C4Gi-15pods", 2, 4, 15, 70, 420 + 200},
53+
{"4C8Gi-48pods", 4, 8, 48, 80, 982},
54+
{"8C16Gi-48pods", 8, 16, 48, 90, 982},
55+
{"16C32Gi-213pods", 16, 32, 213, 110, 2598 + 200},
56+
{"32C64Gi-213pods", 32, 64, 213, 150, 2598 + 200},
57+
{"64C128Gi-213pods", 64, 128, 213, 230, 2598 + 200},
58+
{"128C256Gi-423pods", 128, 256, 423, 390, 4908 + 200},
59+
}
60+
61+
for _, tt := range cases {
62+
t.Run(tt.name, func(t *testing.T) {
63+
capacity := corev1.ResourceList{
64+
corev1.ResourceCPU: *resource.NewQuantity(tt.cpuCores, resource.DecimalSI),
65+
corev1.ResourceMemory: resource.MustParse(fmt.Sprintf("%dGi", tt.memoryGi)),
66+
corev1.ResourcePods: *resource.NewQuantity(tt.maxPods, resource.DecimalSI),
67+
}
68+
overhead := provider.DefaultOverhead(capacity)
69+
70+
kubeCPU := overhead.KubeReserved.Cpu().MilliValue()
71+
sysCPU := overhead.SystemReserved.Cpu().MilliValue()
72+
assert.Equal(t, tt.wantCPUMilli, kubeCPU+sysCPU, "total CPU reserved")
73+
assert.Equal(t, kubeCPU, sysCPU, "CPU split 50/50")
74+
75+
kubeMemMi := overhead.KubeReserved.Memory().Value() / (1024 * 1024)
76+
sysMemMi := overhead.SystemReserved.Memory().Value() / (1024 * 1024)
77+
assert.Equal(t, tt.wantMemMi, kubeMemMi+sysMemMi, "total memory reserved (Mi)")
78+
assert.Equal(t, kubeMemMi, sysMemMi, "memory split 50/50")
79+
})
80+
}
81+
}

pkg/providers/cluster/custom.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,12 @@ package cluster
1919
import (
2020
"context"
2121
"encoding/base64"
22+
"net/http"
23+
2224
"github.com/cloudpilot-ai/karpenter-provider-alibabacloud/pkg/apis/v1alpha1"
2325
"github.com/samber/lo"
2426
corev1 "k8s.io/api/core/v1"
25-
"net/http"
27+
"sigs.k8s.io/karpenter/pkg/cloudprovider"
2628
)
2729

2830
const customClusterType = "Custom"
@@ -60,3 +62,7 @@ func (c *Custom) FeatureFlags() FeatureFlags {
6062
SupportsENILimitedPodDensity: false,
6163
}
6264
}
65+
66+
func (c *Custom) DefaultOverhead(_ corev1.ResourceList) cloudprovider.InstanceTypeOverhead {
67+
return cloudprovider.InstanceTypeOverhead{}
68+
}

pkg/providers/cluster/types.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323
ackclient "github.com/alibabacloud-go/cs-20151215/v5/client"
2424
"github.com/patrickmn/go-cache"
2525
corev1 "k8s.io/api/core/v1"
26+
"sigs.k8s.io/karpenter/pkg/cloudprovider"
2627

2728
"github.com/cloudpilot-ai/karpenter-provider-alibabacloud/pkg/apis/v1alpha1"
2829
alicache "github.com/cloudpilot-ai/karpenter-provider-alibabacloud/pkg/cache"
@@ -58,6 +59,7 @@ type Provider interface {
5859
LivenessProbe(*http.Request) error
5960
GetSupportedImages(string) ([]Image, error)
6061
FeatureFlags() FeatureFlags
62+
DefaultOverhead(corev1.ResourceList) cloudprovider.InstanceTypeOverhead
6163
}
6264

6365
func NewClusterProvider(ctx context.Context, ackClient *ackclient.Client, region string) Provider {

pkg/providers/instancetype/instancetype.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@ func (p *DefaultProvider) List(ctx context.Context, kc *v1alpha1.KubeletConfigur
195195
// so that Karpenter is able to cache the set of InstanceTypes based on values that alter the set of instance types
196196
// !!! Important !!!
197197
offers := p.createOfferings(ctx, *i.InstanceTypeId, zoneData)
198-
return NewInstanceType(ctx, i, kc, p.region, nodeClass.Spec.SystemDisk, offers, clusterCNI)
198+
return NewInstanceType(ctx, i, kc, p.region, nodeClass.Spec.SystemDisk, offers, clusterCNI, p.clusterProvider)
199199
})
200200

201201
// Filter out nil values

pkg/providers/instancetype/types.go

Lines changed: 46 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,11 @@ import (
3939
"github.com/cloudpilot-ai/karpenter-provider-alibabacloud/pkg/providers/imagefamily"
4040
)
4141

42+
const (
43+
MemoryAvailable = "memory.available"
44+
NodeFSAvailable = "nodefs.available"
45+
)
46+
4247
var (
4348
instanceTypeScheme = regexp.MustCompile(`^ecs\.([a-z]+)(\-[0-9]+tb)?([0-9]+).*`)
4449
)
@@ -59,55 +64,10 @@ type ZoneData struct {
5964
SpotAvailable bool
6065
}
6166

62-
func calculateResourceOverhead(pods, cpuM, memoryMi int64) corev1.ResourceList {
63-
// referring to: https://help.aliyun.com/zh/ack/ack-managed-and-ack-dedicated/user-guide/resource-reservation-policy#0f5ffe176df7q
64-
// CPU overhead calculation
65-
cpuOverHead := calculateCPUOverhead(cpuM)
66-
67-
// TODO: In a real environment, the formula does not produce accurate results,
68-
// consistently yielding values that are 200MiB larger than expected.
69-
// Memory overhead: min(11*pods + 255, memoryMi*0.25)
70-
memoryOverHead := int64(math.Min(float64(11*pods+255), float64(memoryMi)*0.25)) + 200
71-
72-
return corev1.ResourceList{
73-
corev1.ResourceCPU: *resource.NewMilliQuantity(cpuOverHead, resource.DecimalSI),
74-
corev1.ResourceMemory: *resources.Quantity(fmt.Sprintf("%dMi", memoryOverHead)),
75-
}
76-
}
77-
78-
// thresholds defines CPU overhead thresholds and their corresponding percentages
79-
var thresholds = [...]struct {
80-
cores int64
81-
overhead float64
82-
}{
83-
{1000, 0.06},
84-
{2000, 0.01},
85-
{3000, 0.005},
86-
{4000, 0.005},
87-
}
88-
89-
func calculateCPUOverhead(cpuM int64) int64 {
90-
var cpuOverHead int64
91-
92-
// Calculate overhead for each threshold
93-
for _, t := range thresholds {
94-
if cpuM >= t.cores {
95-
cpuOverHead += int64(1000 * t.overhead)
96-
}
97-
}
98-
99-
// Additional overhead for CPU > 4 cores (0.25%)
100-
if cpuM > 4000 {
101-
cpuOverHead += int64(float64(cpuM-4000) * 0.0025)
102-
}
103-
104-
return cpuOverHead
105-
}
106-
10767
func NewInstanceType(ctx context.Context,
10868
info *ecsclient.DescribeInstanceTypesResponseBodyInstanceTypesInstanceType,
10969
kc *v1alpha1.KubeletConfiguration, region string, systemDisk *v1alpha1.SystemDisk,
110-
offerings cloudprovider.Offerings, clusterCNI string) *cloudprovider.InstanceType {
70+
offerings cloudprovider.Offerings, clusterCNI string, cluster cluster.Provider) *cloudprovider.InstanceType {
11171
if offerings == nil {
11272
return nil
11373
}
@@ -117,16 +77,10 @@ func NewInstanceType(ctx context.Context,
11777
Requirements: computeRequirements(info, offerings, region),
11878
Offerings: offerings,
11979
Capacity: computeCapacity(ctx, info, kc.MaxPods, kc.PodsPerCore, systemDisk, clusterCNI),
120-
Overhead: &cloudprovider.InstanceTypeOverhead{
121-
KubeReserved: corev1.ResourceList{},
122-
SystemReserved: corev1.ResourceList{},
123-
EvictionThreshold: corev1.ResourceList{},
124-
},
12580
}
12681

12782
// Follow KubeReserved/SystemReserved/EvictionThreshold will be merged, so we can set only one overhead totally
128-
it.Overhead.KubeReserved = calculateResourceOverhead(it.Capacity.Pods().Value(),
129-
it.Capacity.Cpu().MilliValue(), extractMemory(info).Value()/MiBByteRatio)
83+
it.Overhead = computeOverhead(cluster, it.Capacity, kc)
13084
if it.Requirements.Compatible(scheduling.NewRequirements(scheduling.NewRequirement(corev1.LabelOSStable, corev1.NodeSelectorOpIn, string(corev1.Windows)))) == nil {
13185
it.Capacity[v1alpha1.ResourcePrivateIPv4Address] = *privateIPv4Address(info)
13286
}
@@ -353,3 +307,42 @@ func privateIPv4Address(info *ecsclient.DescribeInstanceTypesResponseBodyInstanc
353307
func getInstanceBandwidth(info *ecsclient.DescribeInstanceTypesResponseBodyInstanceTypesInstanceType) int32 {
354308
return max(lo.FromPtr(info.InstanceBandwidthRx), lo.FromPtr(info.InstanceBandwidthTx))
355309
}
310+
311+
func computeOverhead(cluster cluster.Provider, capacity corev1.ResourceList, kubeletConfig *v1alpha1.KubeletConfiguration) *cloudprovider.InstanceTypeOverhead {
312+
overhead := &cloudprovider.InstanceTypeOverhead{
313+
KubeReserved: corev1.ResourceList{},
314+
SystemReserved: corev1.ResourceList{},
315+
// ref: https://github.com/kubernetes/kubernetes/blob/master/pkg/kubelet/eviction/defaults_linux.go
316+
EvictionThreshold: corev1.ResourceList{
317+
corev1.ResourceMemory: resource.MustParse("100Mi"),
318+
corev1.ResourceEphemeralStorage: computeEvictionSignal(*capacity.StorageEphemeral(), "10%"),
319+
},
320+
}
321+
322+
defaultOverhead := cluster.DefaultOverhead(capacity)
323+
if defaultOverhead.KubeReserved != nil {
324+
overhead.KubeReserved = lo.Assign(overhead.KubeReserved, defaultOverhead.KubeReserved)
325+
}
326+
if defaultOverhead.SystemReserved != nil {
327+
overhead.SystemReserved = lo.Assign(overhead.SystemReserved, defaultOverhead.SystemReserved)
328+
}
329+
if defaultOverhead.EvictionThreshold != nil {
330+
overhead.EvictionThreshold = lo.Assign(overhead.EvictionThreshold, defaultOverhead.EvictionThreshold)
331+
}
332+
333+
overhead.KubeReserved = lo.Assign(overhead.KubeReserved, lo.MapEntries(kubeletConfig.KubeReserved, func(k string, v string) (corev1.ResourceName, resource.Quantity) {
334+
return corev1.ResourceName(k), resource.MustParse(v)
335+
}))
336+
overhead.SystemReserved = lo.Assign(overhead.SystemReserved, lo.MapEntries(kubeletConfig.SystemReserved, func(k string, v string) (corev1.ResourceName, resource.Quantity) {
337+
return corev1.ResourceName(k), resource.MustParse(v)
338+
}))
339+
if kubeletConfig.EvictionHard != nil {
340+
if v, ok := kubeletConfig.EvictionHard[MemoryAvailable]; ok {
341+
overhead.EvictionThreshold[corev1.ResourceMemory] = computeEvictionSignal(*capacity.Memory(), v)
342+
}
343+
if v, ok := kubeletConfig.EvictionHard[NodeFSAvailable]; ok {
344+
overhead.EvictionThreshold[corev1.ResourceEphemeralStorage] = computeEvictionSignal(*capacity.StorageEphemeral(), v)
345+
}
346+
}
347+
return overhead
348+
}

0 commit comments

Comments
 (0)