Skip to content

Commit 725a8c9

Browse files
committed
Merge branch 'master' into codegen_v2
2 parents b8a6aab + 806cb16 commit 725a8c9

File tree

18 files changed

+291
-39
lines changed

18 files changed

+291
-39
lines changed

.github/dependabot.yml

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,3 +64,74 @@ updates:
6464
- "area/dependency"
6565
- "release-note-none"
6666
- "ok-to-test"
67+
# Go - Cluster Autoscaler
68+
- directory: "/cluster-autoscaler"
69+
package-ecosystem: "gomod"
70+
open-pull-requests-limit: 5
71+
schedule:
72+
interval: "daily"
73+
time: "15:00"
74+
# Use America/New_York Standard Time (UTC -05:00)
75+
timezone: "America/New_York"
76+
commit-message:
77+
prefix: "dependabot"
78+
include: scope
79+
ignore:
80+
# Update providers manually.
81+
- dependency-name: "k8s.io/cloud-provider-aws/*"
82+
- dependency-name: "github.com/aws/*"
83+
- dependency-name: "k8s.io/cloud-provider-gcp/*"
84+
- dependency-name: "cloud.google.com/go/compute/*"
85+
- dependency-name: "sigs.k8s.io/cloud-provider-azure/*"
86+
- dependency-name: "github.com/Azure/*"
87+
- dependency-name: "github.com/Azure/go-autorest/autorest/*"
88+
- dependency-name: "github.com/digitalocean/*"
89+
# Update dependencies exclusively used by providers manually
90+
- dependency-name: "github.com/gofrs/uuid"
91+
- dependency-name: "github.com/google/go-querystring"
92+
- dependency-name: "github.com/jmattheis/goverter"
93+
- dependency-name: "github.com/jmespath/go-jmespath"
94+
- dependency-name: "github.com/vburenin/ifacemaker"
95+
- dependency-name: "golang.org/x/oauth2"
96+
- dependency-name: "golang.org/x/sys"
97+
- dependency-name: "google.golang.org/api"
98+
- dependency-name: "gopkg.in/gcfg.v1"
99+
- dependency-name: "sigs.k8s.io/yaml"
100+
# Maintain k8s version skew compatibility manually.
101+
- dependency-name: "k8s.io/kubernetes"
102+
# Maintain k8s.io staging dependencies manually.
103+
- dependency-name: "k8s.io/api"
104+
- dependency-name: "k8s.io/apiextensions-apiserver"
105+
- dependency-name: "k8s.io/apimachinery"
106+
- dependency-name: "k8s.io/apiserver"
107+
- dependency-name: "k8s.io/cli-runtime"
108+
- dependency-name: "k8s.io/client-go"
109+
- dependency-name: "k8s.io/cloud-provider"
110+
- dependency-name: "k8s.io/cluster-bootstrap"
111+
- dependency-name: "k8s.io/code-generator"
112+
- dependency-name: "k8s.io/component-base"
113+
- dependency-name: "k8s.io/component-helpers"
114+
- dependency-name: "k8s.io/controller-manager"
115+
- dependency-name: "k8s.io/cri-api"
116+
- dependency-name: "k8s.io/cri-client"
117+
- dependency-name: "k8s.io/csi-translation-lib"
118+
- dependency-name: "k8s.io/dynamic-resource-allocation"
119+
- dependency-name: "k8s.io/endpointslice"
120+
- dependency-name: "k8s.io/externaljwt"
121+
- dependency-name: "k8s.io/kms"
122+
- dependency-name: "k8s.io/kube-aggregator"
123+
- dependency-name: "k8s.io/kube-controller-manager"
124+
- dependency-name: "k8s.io/kube-proxy"
125+
- dependency-name: "k8s.io/kube-scheduler"
126+
- dependency-name: "k8s.io/kubectl"
127+
- dependency-name: "k8s.io/kubelet"
128+
- dependency-name: "k8s.io/metrics"
129+
- dependency-name: "k8s.io/mount-utils"
130+
- dependency-name: "k8s.io/pod-security-admission"
131+
- dependency-name: "k8s.io/sample-apiserver"
132+
- dependency-name: "k8s.io/sample-cli-plugin"
133+
- dependency-name: "k8s.io/sample-controller"
134+
labels:
135+
- "ok-to-test"
136+
- "area/cluster-autoscaler"
137+
- "area/dependency"

.github/workflows/ci.yaml

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -43,12 +43,6 @@ jobs:
4343
env:
4444
GO111MODULE: auto
4545

46-
- name: golangci-lint - vertical-pod-autoscaler
47-
uses: golangci/golangci-lint-action@v8
48-
with:
49-
args: --timeout=30m
50-
working-directory: ${{ env.GOPATH }}/src/k8s.io/autoscaler/vertical-pod-autoscaler
51-
5246
- name: Test
5347
working-directory: ${{ env.GOPATH }}/src/k8s.io/autoscaler
5448
run: hack/for-go-proj.sh test
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
name: Lint
2+
3+
on:
4+
push:
5+
paths:
6+
- 'vertical-pod-autoscaler/**'
7+
pull_request:
8+
paths:
9+
- 'vertical-pod-autoscaler/**'
10+
11+
env:
12+
GOPATH: ${{ github.workspace }}/go
13+
14+
permissions:
15+
contents: read
16+
17+
jobs:
18+
golangci:
19+
name: golangci-lint - VPA
20+
runs-on: ubuntu-latest
21+
steps:
22+
- uses: actions/checkout@v5
23+
with:
24+
path: ${{ env.GOPATH }}/src/k8s.io/autoscaler
25+
- uses: actions/setup-go@v6
26+
with:
27+
go-version: '1.25.0'
28+
cache-dependency-path: |
29+
${{ env.GOPATH}}/src/k8s.io/autoscaler/vertical-pod-autoscaler/go.sum
30+
${{ env.GOPATH}}/src/k8s.io/autoscaler/vertical-pod-autoscaler/e2e/go.sum
31+
32+
- name: golangci-lint - vertical-pod-autoscaler
33+
uses: golangci/golangci-lint-action@v8
34+
with:
35+
args: --timeout=30m
36+
working-directory: ${{ env.GOPATH }}/src/k8s.io/autoscaler/vertical-pod-autoscaler

cluster-autoscaler/README.md

Lines changed: 35 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ Starting from Kubernetes 1.12, versioning scheme was changed to match Kubernetes
5151

5252
| Kubernetes Version | CA Version | Chart Version |
5353
|--------------------|--------------------------|---------------|
54+
| 1.34.x | 1.34.x |9.51.0+|
5455
| 1.33.x | 1.33.x |9.47.0+|
5556
| 1.32.x | 1.32.x |9.45.0+|
5657
| 1.31.x | 1.31.x |9.38.0+|
@@ -86,24 +87,40 @@ Starting from Kubernetes 1.12, versioning scheme was changed to match Kubernetes
8687

8788
## Schedule
8889

89-
Cluster Autoscaler releases new minor versions shortly after OSS Kubernetes release
90-
and patches for versions corresponding to currently
91-
supported [Kubernetes versions](https://kubernetes.io/releases/) on a roughly 2
92-
month cadence. Currently planned schedule is below. Please note that target
93-
dates listed below are approximate and we expect up to a week difference between
94-
target ETA and the actual releases.
95-
96-
| Date | Maintainer Preparing Release | Backup Maintainer | Type |
97-
|------------|------------------------------|-------------------|-------|
98-
| 2025-06-11 | jackfrancis | gjtempleton | 1.33 |
99-
| 2025-07-16 | gjtempleton | towca | patch |
100-
| 2025-08-20 | towca | BigDarkClown | patch |
101-
| 2025-09-17 | BigDarkClown | x13n | 1.34 |
102-
| 2025-10-22 | x13n | jackfrancis | patch |
103-
| 2025-11-19 | jackfrancis | gjtempleton | patch |
104-
105-
Additional patch releases may happen outside of the schedule in case of critical
106-
bugs or vulnerabilities.
90+
Cluster Autoscaler synchronizes its releases with the [Kubernetes release schedule](https://kubernetes.io/releases/).
91+
92+
For Cluster Autoscaler releases of new minor versions, expect a release date of up to
93+
one month after the corresponding Kubernetes release. This is due the fact that upstream
94+
integrations of Kubernetes into Cluster Autoscaler can't be finalized until the Kubernetes
95+
release is official, and the time required to test and validate those integrations.
96+
97+
Cluster Autoscaler will also release patch versions in accordance with Kubernetes patch
98+
releases to ensure rapid integration of upstream Kubernetes fixes. The overhead to integrate
99+
and validate Kubernetes patch releases is less costly, and thus the Cluster Autoscaler
100+
release date should follow the corresponding Kubernetes release by no more than 1-2 weeks.
101+
102+
Bug fixes and Cloud Provider features to Cluster Autoscaler itself will be continually
103+
backported into the supported release branches (n - 3, where n is the latest release).
104+
Backporting into older release branches can be requested as an exception by filing an issue
105+
and bringing the request [to the official SIG Autoscaling Community](https://github.com/kubernetes/community/blob/master/sig-autoscaling/README.md).
106+
107+
Finally, additional Cluster Autoscaler patch releases may happen outside of the above schedule
108+
in case of critical bugs or vulnerabilities.
109+
110+
In summary, users should not be guided by a strict patch version equivalency between Kubernetes
111+
and Cluster Autoscaler (for example, there is no strict requirement to use Cluster Autoscaler v1.34.1 w/ a Kubernetes v1.34.1 cluster). Rather, we recommend that users always use the _latest_
112+
Cluster Autoscaler release that corresponds to the minor version of Kubernetes that their cluster
113+
is running.
114+
115+
For example, if the latest (hypothetical) Cluster Autoscaler releases are
116+
`v1.100.1`, `v1.99.5`, `v1.98.10`, and `v1.97.16`, any of the below scenarios follows the recommended guidance:
117+
118+
| Kubernetes Version | CA Version |
119+
|---------------------|--------------------------|
120+
| 1.100.0 | 1.100.1 |
121+
| 1.99.4 | 1.99.5 |
122+
| 1.98.4 | 1.98.10 |
123+
| 1.97.16 | 1.97.16 |
107124

108125
# Notable changes
109126

cluster-autoscaler/cloudprovider/gce/gce_price_info.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ var (
8383
"n2": 0.031611,
8484
"n2d": 0.027502,
8585
"n4": 0.030821,
86+
"n4d": 0.032578,
8687
"t2d": 0.027502,
8788
"z3": 0.0496531,
8889
}
@@ -101,6 +102,7 @@ var (
101102
"n2": 0.004237,
102103
"n2d": 0.003686,
103104
"n4": 0.004131,
105+
"n4d": 0.3,
104106
"t2d": 0.003686,
105107
"z3": 0.0066553,
106108
}
@@ -119,6 +121,7 @@ var (
119121
"n2": 0.007650 / 0.031611,
120122
"n2d": 0.002773 / 0.027502,
121123
"n4": 0.007976 / 0.030821,
124+
"n4d": 0.0130312 / 0.032578,
122125
"t2d": 0.006655 / 0.027502,
123126
"z3": 0.0165 / 0.0496531,
124127
}
@@ -127,18 +130,21 @@ var (
127130
"n1": 0.033174,
128131
"n2": 0.033174,
129132
"n2d": 0.028877,
133+
"n4d": 0.0342069,
130134
}
131135
customMemoryPricePerHourPerGb = map[string]float64{
132136
"e2": 0.003067,
133137
"n1": 0.004446,
134138
"n2": 0.004446,
135139
"n2d": 0.003870,
140+
"n4d": 0.0038871,
136141
}
137142
customPreemptibleDiscount = map[string]float64{
138143
"e2": 0.006867 / 0.022890,
139144
"n1": 0.00698 / 0.033174,
140145
"n2": 0.00802 / 0.033174,
141146
"n2d": 0.002908 / 0.028877,
147+
"n4d": 0.0130312 / 0.0342069,
142148
}
143149

144150
// e2-micro and e2-small have allocatable set too high resulting in

cluster-autoscaler/utils/gpu/gpu.go

Lines changed: 45 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ import (
2626
)
2727

2828
const (
29+
// ResourceAMDGPU is the name of the AMD GPU resource.
30+
ResourceAMDGPU = "amd.com/gpu"
2931
// ResourceNvidiaGPU is the name of the Nvidia GPU resource.
3032
ResourceNvidiaGPU = "nvidia.com/gpu"
3133
// ResourceDirectX is the name of the DirectX resource on windows.
@@ -35,6 +37,14 @@ const (
3537
DefaultGPUType = "nvidia-tesla-k80"
3638
)
3739

40+
// GPUVendorResourceNames centralized list of all known GPU vendor extended resource names.
41+
// Extend this slice if new vendor resource names are added.
42+
var GPUVendorResourceNames = []apiv1.ResourceName{
43+
ResourceNvidiaGPU,
44+
ResourceAMDGPU,
45+
ResourceDirectX,
46+
}
47+
3848
const (
3949
// MetricsGenericGPU - for when there is no information about GPU type
4050
MetricsGenericGPU = "generic"
@@ -109,23 +119,53 @@ func validateGpuType(availableGPUTypes map[string]struct{}, gpu string) string {
109119
// if the drivers are installed and GPU is ready to use.
110120
func NodeHasGpu(GPULabel string, node *apiv1.Node) bool {
111121
_, hasGpuLabel := node.Labels[GPULabel]
112-
gpuAllocatable, hasGpuAllocatable := node.Status.Allocatable[ResourceNvidiaGPU]
113-
return hasGpuLabel || (hasGpuAllocatable && !gpuAllocatable.IsZero())
122+
if hasGpuLabel {
123+
return true
124+
}
125+
// Check for extended resources as well
126+
for _, gpuVendorResourceName := range GPUVendorResourceNames {
127+
gpuAllocatable, hasGpuAllocatable := node.Status.Allocatable[gpuVendorResourceName]
128+
if hasGpuAllocatable && !gpuAllocatable.IsZero() {
129+
return true
130+
}
131+
}
132+
return false
114133
}
115134

116135
// PodRequestsGpu returns true if a given pod has GPU request.
117136
func PodRequestsGpu(pod *apiv1.Pod) bool {
118137
podRequests := podutils.PodRequests(pod)
119-
_, gpuFound := podRequests[ResourceNvidiaGPU]
120-
return gpuFound
138+
for _, gpuVendorResourceName := range GPUVendorResourceNames {
139+
if _, found := podRequests[gpuVendorResourceName]; found {
140+
return true
141+
}
142+
}
143+
return false
144+
}
145+
146+
// DetectNodeGPUResourceName inspects the node's allocatable resources and returns the first
147+
// known GPU extended resource name that has non-zero allocatable. Falls back to Nvidia for
148+
// backward compatibility if none are found but a GPU label is present.
149+
func DetectNodeGPUResourceName(node *apiv1.Node) apiv1.ResourceName {
150+
for _, rn := range GPUVendorResourceNames {
151+
if qty, ok := node.Status.Allocatable[rn]; ok && !qty.IsZero() {
152+
return rn
153+
}
154+
}
155+
// Fallback: preserve previous behavior (defaulting to Nvidia) if label existed
156+
return ResourceNvidiaGPU
121157
}
122158

123159
// GetNodeGPUFromCloudProvider returns the GPU the node has. Returned GPU has the GPU label of the
124160
// passed in cloud provider. If the node doesn't have a GPU, returns nil.
125161
func GetNodeGPUFromCloudProvider(provider cloudprovider.CloudProvider, node *apiv1.Node) *cloudprovider.GpuConfig {
126162
gpuLabel := provider.GPULabel()
127163
if NodeHasGpu(gpuLabel, node) {
128-
return &cloudprovider.GpuConfig{Label: gpuLabel, Type: node.Labels[gpuLabel], ExtendedResourceName: ResourceNvidiaGPU}
164+
return &cloudprovider.GpuConfig{
165+
Label: gpuLabel,
166+
Type: node.Labels[gpuLabel],
167+
ExtendedResourceName: DetectNodeGPUResourceName(node),
168+
}
129169
}
130170
return nil
131171
}

cluster-autoscaler/utils/gpu/gpu_test.go

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,3 +236,70 @@ func TestGetGpuInfoForMetrics(t *testing.T) {
236236
})
237237
}
238238
}
239+
240+
func TestDetectNodeGPUResourceName(t *testing.T) {
241+
testCases := []struct {
242+
name string
243+
node *apiv1.Node
244+
expectedResourceName apiv1.ResourceName
245+
}{
246+
{
247+
name: "nvidia gpu",
248+
node: &apiv1.Node{
249+
ObjectMeta: metav1.ObjectMeta{
250+
Name: "node-with-nvidia-gpu",
251+
Labels: map[string]string{},
252+
},
253+
Status: apiv1.NodeStatus{
254+
Capacity: apiv1.ResourceList{
255+
gpu.ResourceNvidiaGPU: *resource.NewQuantity(1, resource.DecimalSI),
256+
},
257+
Allocatable: apiv1.ResourceList{
258+
gpu.ResourceNvidiaGPU: *resource.NewQuantity(1, resource.DecimalSI),
259+
},
260+
},
261+
},
262+
expectedResourceName: gpu.ResourceNvidiaGPU,
263+
},
264+
{
265+
name: "amd gpu",
266+
node: &apiv1.Node{
267+
ObjectMeta: metav1.ObjectMeta{
268+
Name: "node-with-amd-gpu",
269+
Labels: map[string]string{},
270+
},
271+
Status: apiv1.NodeStatus{
272+
Capacity: apiv1.ResourceList{
273+
gpu.ResourceAMDGPU: *resource.NewQuantity(8, resource.DecimalSI),
274+
},
275+
Allocatable: apiv1.ResourceList{
276+
gpu.ResourceAMDGPU: *resource.NewQuantity(8, resource.DecimalSI),
277+
},
278+
},
279+
},
280+
expectedResourceName: gpu.ResourceAMDGPU,
281+
},
282+
{
283+
name: "test default gpu resource name",
284+
node: &apiv1.Node{
285+
ObjectMeta: metav1.ObjectMeta{
286+
Name: "node-without-gpu",
287+
Labels: map[string]string{},
288+
},
289+
Status: apiv1.NodeStatus{
290+
Capacity: apiv1.ResourceList{},
291+
Allocatable: apiv1.ResourceList{},
292+
},
293+
},
294+
expectedResourceName: gpu.ResourceNvidiaGPU,
295+
},
296+
}
297+
for _, tc := range testCases {
298+
t.Run(tc.name, func(t *testing.T) {
299+
resourceName := gpu.DetectNodeGPUResourceName(tc.node)
300+
if resourceName != tc.expectedResourceName {
301+
t.Errorf("expected resource name %s but got %s", tc.expectedResourceName, resourceName)
302+
}
303+
})
304+
}
305+
}

0 commit comments

Comments
 (0)