@@ -14,14 +14,18 @@ See the License for the specific language governing permissions and
14
14
limitations under the License.
15
15
*/
16
16
17
- package gpu
17
+ package gpu_test
18
18
19
19
import (
20
20
"testing"
21
21
22
22
apiv1 "k8s.io/api/core/v1"
23
23
"k8s.io/apimachinery/pkg/api/resource"
24
24
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
25
+ "k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
26
+ testprovider "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/test"
27
+ "k8s.io/autoscaler/cluster-autoscaler/simulator/framework"
28
+ "k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
25
29
"k8s.io/autoscaler/cluster-autoscaler/utils/test"
26
30
27
31
"github.com/stretchr/testify/assert"
@@ -45,9 +49,9 @@ func TestNodeHasGpu(t *testing.T) {
45
49
Allocatable : apiv1.ResourceList {},
46
50
},
47
51
}
48
- nodeGpuReady .Status .Allocatable [ResourceNvidiaGPU ] = * resource .NewQuantity (1 , resource .DecimalSI )
49
- nodeGpuReady .Status .Capacity [ResourceNvidiaGPU ] = * resource .NewQuantity (1 , resource .DecimalSI )
50
- assert .True (t , NodeHasGpu (GPULabel , nodeGpuReady ))
52
+ nodeGpuReady .Status .Allocatable [gpu . ResourceNvidiaGPU ] = * resource .NewQuantity (1 , resource .DecimalSI )
53
+ nodeGpuReady .Status .Capacity [gpu . ResourceNvidiaGPU ] = * resource .NewQuantity (1 , resource .DecimalSI )
54
+ assert .True (t , gpu . NodeHasGpu (GPULabel , nodeGpuReady ))
51
55
52
56
nodeGpuUnready := & apiv1.Node {
53
57
ObjectMeta : metav1.ObjectMeta {
@@ -59,7 +63,7 @@ func TestNodeHasGpu(t *testing.T) {
59
63
Allocatable : apiv1.ResourceList {},
60
64
},
61
65
}
62
- assert .True (t , NodeHasGpu (GPULabel , nodeGpuUnready ))
66
+ assert .True (t , gpu . NodeHasGpu (GPULabel , nodeGpuUnready ))
63
67
64
68
nodeNoGpu := & apiv1.Node {
65
69
ObjectMeta : metav1.ObjectMeta {
@@ -71,14 +75,164 @@ func TestNodeHasGpu(t *testing.T) {
71
75
Allocatable : apiv1.ResourceList {},
72
76
},
73
77
}
74
- assert .False (t , NodeHasGpu (GPULabel , nodeNoGpu ))
78
+ assert .False (t , gpu . NodeHasGpu (GPULabel , nodeNoGpu ))
75
79
}
76
80
77
81
func TestPodRequestsGpu (t * testing.T ) {
78
82
podNoGpu := test .BuildTestPod ("podNoGpu" , 0 , 1000 )
79
83
podWithGpu := test .BuildTestPod ("pod1AnyGpu" , 0 , 1000 )
80
- podWithGpu .Spec .Containers [0 ].Resources .Requests [ResourceNvidiaGPU ] = * resource .NewQuantity (1 , resource .DecimalSI )
84
+ podWithGpu .Spec .Containers [0 ].Resources .Requests [gpu . ResourceNvidiaGPU ] = * resource .NewQuantity (1 , resource .DecimalSI )
81
85
82
- assert .False (t , PodRequestsGpu (podNoGpu ))
83
- assert .True (t , PodRequestsGpu (podWithGpu ))
86
+ assert .False (t , gpu .PodRequestsGpu (podNoGpu ))
87
+ assert .True (t , gpu .PodRequestsGpu (podWithGpu ))
88
+ }
89
+
90
+ func TestGetGpuInfoForMetrics (t * testing.T ) {
91
+ knownGpu := "nvidia-tesla-k80"
92
+ unknownGpu := "unknown-gpu"
93
+ availableGPUTypes := map [string ]struct {}{
94
+ knownGpu : {},
95
+ }
96
+ resourceName := apiv1 .ResourceName (gpu .ResourceNvidiaGPU )
97
+
98
+ // Basic node
99
+ node := test .BuildTestNode ("node" , 1000 , 1000 )
100
+
101
+ // Node with GPU capacity
102
+ nodeWithGpu := test .BuildTestNode ("node-with-gpu" , 1000 , 1000 )
103
+ nodeWithGpu .Status .Capacity [resourceName ] = * resource .NewQuantity (1 , resource .DecimalSI )
104
+
105
+ // Node without GPU capacity
106
+ nodeWithoutGpu := test .BuildTestNode ("node-without-gpu" , 1000 , 1000 )
107
+
108
+ // Node group with GPU in template
109
+ provider := testprovider.TestCloudProvider {}
110
+ templateWithGpu := test .BuildTestNode ("template-with-gpu" , 1000 , 1000 )
111
+ templateWithGpu .Status .Capacity [resourceName ] = * resource .NewQuantity (1 , resource .DecimalSI )
112
+ nodeGroupWithGpu := provider .BuildNodeGroup ("ng-with-gpu" , 1 , 10 , 1 , false , false , "n1-standard-1" , nil )
113
+
114
+ // Node group without GPU in template
115
+ templateWithoutGpu := test .BuildTestNode ("template-without-gpu" , 1000 , 1000 )
116
+ nodeGroupWithoutGpu := provider .BuildNodeGroup ("ng-without-gpu" , 1 , 10 , 1 , false , false , "n1-standard-1" , nil )
117
+
118
+ templates := map [string ]* framework.NodeInfo {
119
+ nodeGroupWithoutGpu .Id (): framework .NewNodeInfo (templateWithoutGpu , nil ),
120
+ nodeGroupWithGpu .Id (): framework .NewNodeInfo (templateWithGpu , nil ),
121
+ }
122
+
123
+ provider .SetMachineTemplates (templates )
124
+
125
+ testCases := []struct {
126
+ name string
127
+ gpuConfig * cloudprovider.GpuConfig
128
+ node * apiv1.Node
129
+ nodeGroup cloudprovider.NodeGroup
130
+ expectedGpuResource string
131
+ expectedGpuType string
132
+ }{
133
+ {
134
+ name : "no gpu config" ,
135
+ gpuConfig : nil ,
136
+ node : node ,
137
+ nodeGroup : nil ,
138
+ expectedGpuResource : "" ,
139
+ expectedGpuType : gpu .MetricsNoGPU ,
140
+ },
141
+ {
142
+ name : "generic gpu" ,
143
+ gpuConfig : & cloudprovider.GpuConfig {
144
+ Type : "" ,
145
+ ExtendedResourceName : resourceName ,
146
+ },
147
+ node : nodeWithGpu ,
148
+ nodeGroup : nil ,
149
+ expectedGpuResource : gpu .ResourceNvidiaGPU ,
150
+ expectedGpuType : gpu .MetricsGenericGPU ,
151
+ },
152
+ {
153
+ name : "dra gpu, known type" ,
154
+ gpuConfig : & cloudprovider.GpuConfig {
155
+ Type : knownGpu ,
156
+ DraDriverName : "test-driver" ,
157
+ },
158
+ node : nodeWithoutGpu ,
159
+ nodeGroup : nil ,
160
+ expectedGpuResource : "dra_test-driver" ,
161
+ expectedGpuType : knownGpu ,
162
+ },
163
+ {
164
+ name : "dra gpu, unknown type" ,
165
+ gpuConfig : & cloudprovider.GpuConfig {
166
+ Type : unknownGpu ,
167
+ DraDriverName : "test-driver" ,
168
+ },
169
+ node : nodeWithoutGpu ,
170
+ nodeGroup : nil ,
171
+ expectedGpuResource : "dra_test-driver" ,
172
+ expectedGpuType : gpu .MetricsUnknownGPU ,
173
+ },
174
+ {
175
+ name : "capacity present, known type" ,
176
+ gpuConfig : & cloudprovider.GpuConfig {
177
+ Type : knownGpu ,
178
+ ExtendedResourceName : resourceName ,
179
+ },
180
+ node : nodeWithGpu ,
181
+ nodeGroup : nil ,
182
+ expectedGpuResource : gpu .ResourceNvidiaGPU ,
183
+ expectedGpuType : knownGpu ,
184
+ },
185
+ {
186
+ name : "capacity present, unknown type" ,
187
+ gpuConfig : & cloudprovider.GpuConfig {
188
+ Type : unknownGpu ,
189
+ ExtendedResourceName : resourceName ,
190
+ },
191
+ node : nodeWithGpu ,
192
+ nodeGroup : nil ,
193
+ expectedGpuResource : gpu .ResourceNvidiaGPU ,
194
+ expectedGpuType : gpu .MetricsUnknownGPU ,
195
+ },
196
+ {
197
+ name : "no capacity, template has gpu" ,
198
+ gpuConfig : & cloudprovider.GpuConfig {
199
+ Type : knownGpu ,
200
+ ExtendedResourceName : resourceName ,
201
+ },
202
+ node : nodeWithoutGpu ,
203
+ nodeGroup : nodeGroupWithGpu ,
204
+ expectedGpuResource : gpu .ResourceNvidiaGPU ,
205
+ expectedGpuType : gpu .MetricsMissingGPU ,
206
+ },
207
+ {
208
+ name : "no capacity, template has no gpu" ,
209
+ gpuConfig : & cloudprovider.GpuConfig {
210
+ Type : knownGpu ,
211
+ ExtendedResourceName : resourceName ,
212
+ },
213
+ node : nodeWithoutGpu ,
214
+ nodeGroup : nodeGroupWithoutGpu ,
215
+ expectedGpuResource : gpu .ResourceNvidiaGPU ,
216
+ expectedGpuType : gpu .MetricsUnexpectedLabelGPU ,
217
+ },
218
+ {
219
+ name : "no capacity, no node group" ,
220
+ gpuConfig : & cloudprovider.GpuConfig {
221
+ Type : knownGpu ,
222
+ ExtendedResourceName : resourceName ,
223
+ },
224
+ node : nodeWithoutGpu ,
225
+ nodeGroup : nil ,
226
+ expectedGpuResource : gpu .ResourceNvidiaGPU ,
227
+ expectedGpuType : gpu .MetricsUnexpectedLabelGPU ,
228
+ },
229
+ }
230
+
231
+ for _ , tc := range testCases {
232
+ t .Run (tc .name , func (t * testing.T ) {
233
+ gpuResource , gpuType := gpu .GetGpuInfoForMetrics (tc .gpuConfig , availableGPUTypes , tc .node , tc .nodeGroup )
234
+ assert .Equal (t , tc .expectedGpuResource , gpuResource )
235
+ assert .Equal (t , tc .expectedGpuType , gpuType )
236
+ })
237
+ }
84
238
}
0 commit comments