Skip to content

Commit 84af8f5

Browse files
Add namespace level vGPU resource quotas (#225)
* Add namespace level vGPU resource quotas * Add namespace level vGPU resource quotas * add webhook and fix type convert issue * fix make lint issue * fix webhook validate * rollback webhook validate * remove duplicated webhook * #207 in memory check gpuquota and allocate/deallocate/init * quota design * refactor code * rollback webhook suit-test * rollback test suit --------- Co-authored-by: 0x5457 <[email protected]>
1 parent 9644f57 commit 84af8f5

File tree

12 files changed

+3807
-9
lines changed

12 files changed

+3807
-9
lines changed

api/v1/gpuresourcequota_types.go

Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
/*
2+
Copyright 2024.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package v1
18+
19+
import (
20+
"k8s.io/apimachinery/pkg/api/resource"
21+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
22+
)
23+
24+
// GPUResourceQuotaSpec defines the desired state of GPUResourceQuota
25+
type GPUResourceQuotaSpec struct {
26+
// Total namespace limits (similar to ResourceQuotas)
27+
Total GPUResourceQuotaTotal `json:"total,omitempty"`
28+
29+
// Per-workload limits (similar to LimitRanges)
30+
Single GPUResourceQuotaSingle `json:"single,omitempty"`
31+
}
32+
33+
// GPUResourceQuotaTotal defines total namespace limits
34+
type GPUResourceQuotaTotal struct {
35+
// Total requests limits for the namespace
36+
// +optional
37+
RequestsTFlops *resource.Quantity `json:"requests.tflops,omitempty"`
38+
// +optional
39+
RequestsVRAM *resource.Quantity `json:"requests.vram,omitempty"`
40+
41+
// Total limits for the namespace
42+
// +optional
43+
LimitsTFlops *resource.Quantity `json:"limits.tflops,omitempty"`
44+
// +optional
45+
LimitsVRAM *resource.Quantity `json:"limits.vram,omitempty"`
46+
47+
// Maximum number of workers in the namespace
48+
// +optional
49+
Workers *int32 `json:"workers,omitempty"`
50+
51+
// Alert threshold percentage (0-100)
52+
// When usage exceeds this percentage, an alert event will be triggered
53+
// +kubebuilder:validation:Minimum=0
54+
// +kubebuilder:validation:Maximum=100
55+
// +kubebuilder:default=95
56+
AlertThresholdPercent *int32 `json:"alertThresholdPercent,omitempty"`
57+
}
58+
59+
// GPUResourceQuotaSingle defines per-workload limits
60+
type GPUResourceQuotaSingle struct {
61+
// Maximum resources per workload
62+
// +optional
63+
Max *GPUResourceLimits `json:"max,omitempty"`
64+
65+
// Minimum resources per workload
66+
// +optional
67+
Min *GPUResourceLimits `json:"min,omitempty"`
68+
69+
// Default limits applied to workloads without explicit limits
70+
// +optional
71+
Default *GPUResourceDefaults `json:"default,omitempty"`
72+
73+
// Default requests applied to workloads without explicit requests
74+
// +optional
75+
DefaultRequest *GPUResourceDefaults `json:"defaultRequest,omitempty"`
76+
}
77+
78+
// GPUResourceLimits defines resource limits
79+
type GPUResourceLimits struct {
80+
// TFlops limit
81+
// +optional
82+
TFlops *resource.Quantity `json:"tflops,omitempty"`
83+
84+
// VRAM limit
85+
// +optional
86+
VRAM *resource.Quantity `json:"vram,omitempty"`
87+
88+
// Maximum number of workers
89+
// +optional
90+
Workers *int32 `json:"workers,omitempty"`
91+
}
92+
93+
// GPUResourceDefaults defines default resource values
94+
type GPUResourceDefaults struct {
95+
// Default TFlops
96+
// +optional
97+
TFlops *resource.Quantity `json:"tflops,omitempty"`
98+
99+
// Default VRAM
100+
// +optional
101+
VRAM *resource.Quantity `json:"vram,omitempty"`
102+
}
103+
104+
// GPUResourceQuotaStatus defines the observed state of GPUResourceQuota
105+
type GPUResourceQuotaStatus struct {
106+
// Current resource usage in the namespace
107+
Used GPUResourceUsage `json:"used,omitempty"`
108+
109+
// Available percentage for each resource type
110+
AvailablePercent GPUResourceAvailablePercent `json:"availablePercent,omitempty"`
111+
112+
// Conditions represent the latest available observations of the quota's state
113+
// +optional
114+
Conditions []metav1.Condition `json:"conditions,omitempty"`
115+
116+
// LastUpdateTime is the last time the status was updated
117+
// +optional
118+
LastUpdateTime *metav1.Time `json:"lastUpdateTime,omitempty"`
119+
}
120+
121+
// GPUResourceUsage defines current resource usage
122+
type GPUResourceUsage struct {
123+
// Current requests usage
124+
// +optional
125+
RequestsTFlops *resource.Quantity `json:"requests.tflops,omitempty"`
126+
// +optional
127+
RequestsVRAM *resource.Quantity `json:"requests.vram,omitempty"`
128+
129+
// Current limits usage
130+
// +optional
131+
LimitsTFlops *resource.Quantity `json:"limits.tflops,omitempty"`
132+
// +optional
133+
LimitsVRAM *resource.Quantity `json:"limits.vram,omitempty"`
134+
135+
// Current number of workers
136+
// +optional
137+
Workers *int32 `json:"workers,omitempty"`
138+
}
139+
140+
// GPUResourceAvailablePercent defines available percentage for each resource
141+
type GPUResourceAvailablePercent struct {
142+
// Available percentage for requests.tflops (0-100)
143+
// +optional
144+
RequestsTFlops *int64 `json:"requests.tflops,omitempty"`
145+
146+
// Available percentage for requests.vram (0-100)
147+
// +optional
148+
RequestsVRAM *int64 `json:"requests.vram,omitempty"`
149+
150+
// Available percentage for limits.tflops (0-100)
151+
// +optional
152+
LimitsTFlops *int64 `json:"limits.tflops,omitempty"`
153+
154+
// Available percentage for limits.vram (0-100)
155+
// +optional
156+
LimitsVRAM *int64 `json:"limits.vram,omitempty"`
157+
158+
// Available percentage for workers (0-100)
159+
// +optional
160+
Workers *int64 `json:"workers,omitempty"`
161+
}
162+
163+
// GPUResourceQuotaConditionType defines the condition types for GPUResourceQuota
164+
type GPUResourceQuotaConditionType string
165+
166+
const (
167+
// GPUResourceQuotaConditionReady indicates the quota is ready and functioning
168+
GPUResourceQuotaConditionReady GPUResourceQuotaConditionType = "Ready"
169+
// GPUResourceQuotaConditionExceeded indicates the quota has been exceeded
170+
GPUResourceQuotaConditionExceeded GPUResourceQuotaConditionType = "Exceeded"
171+
// GPUResourceQuotaConditionAlertThresholdReached indicates the alert threshold has been reached
172+
GPUResourceQuotaConditionAlertThresholdReached GPUResourceQuotaConditionType = "AlertThresholdReached"
173+
)
174+
175+
// +kubebuilder:object:root=true
176+
// +kubebuilder:subresource:status
177+
// +kubebuilder:resource:scope=Namespaced
178+
// +kubebuilder:printcolumn:name="Requests TFlops Used",type=string,JSONPath=`.status.used["requests.tflops"]`
179+
// +kubebuilder:printcolumn:name="Requests VRAM Used",type=string,JSONPath=`.status.used["requests.vram"]`
180+
// +kubebuilder:printcolumn:name="Workers Used",type=integer,JSONPath=`.status.used.workers`
181+
// +kubebuilder:printcolumn:name="Alert Threshold",type=integer,JSONPath=`.spec.total.alertThresholdPercent`
182+
// +kubebuilder:printcolumn:name="Age",type=date,JSONPath=`.metadata.creationTimestamp`
183+
184+
// GPUResourceQuota is the Schema for the gpuresourcequotas API
185+
type GPUResourceQuota struct {
186+
metav1.TypeMeta `json:",inline"`
187+
metav1.ObjectMeta `json:"metadata,omitempty"`
188+
189+
Spec GPUResourceQuotaSpec `json:"spec,omitempty"`
190+
Status GPUResourceQuotaStatus `json:"status,omitempty"`
191+
}
192+
193+
// +kubebuilder:object:root=true
194+
195+
// GPUResourceQuotaList contains a list of GPUResourceQuota
196+
type GPUResourceQuotaList struct {
197+
metav1.TypeMeta `json:",inline"`
198+
metav1.ListMeta `json:"metadata,omitempty"`
199+
Items []GPUResourceQuota `json:"items"`
200+
}
201+
202+
func init() {
203+
SchemeBuilder.Register(&GPUResourceQuota{}, &GPUResourceQuotaList{})
204+
}

0 commit comments

Comments
 (0)