@@ -6,8 +6,8 @@ const k8srp = require('kubernetes-resource-parser')
66const nodeResources = {
77 'nvidia.com/gpu' : 8 ,
88 'nvidia.com/roce_gdr' : 2 ,
9- ' cpu' : 80 ,
10- ' memory' : '800G'
9+ cpu : 80 ,
10+ memory : '800G'
1111}
1212
1313class Client {
@@ -163,32 +163,32 @@ function checkContainerResources (namespace, workload, workloadReplicas, contain
163163
164164 const gpus = parseInt ( resources [ 'nvidia.com/gpu' ] ?? '0' )
165165 const gdr = parseInt ( resources [ 'nvidia.com/roce_gdr' ] ?? '0' )
166- const cpus = k8srp . cpuParser ( resources [ ' cpu' ] ?? '0' )
167- const mem = k8srp . memoryParser ( resources [ ' memory' ] ?? '0' )
166+ const cpus = k8srp . cpuParser ( resources . cpu ?? '0' )
167+ const mem = k8srp . memoryParser ( resources . memory ?? '0' )
168168
169169 // warn if the resource requests cannot be satisfied by a Node
170170 if ( gpus > nodeResources [ 'nvidia.com/gpu' ] ) {
171171 console . log ( `WARNING: workload "${ namespace . metadata . name } /${ workload . metadata . name } " has a container requesting "${ gpus } GPUs"` )
172172 }
173- if ( gdr > nodeResources [ ' gdrPerNode' ] ) {
173+ if ( gdr > nodeResources . gdrPerNode ) {
174174 console . log ( `WARNING: workload "${ namespace . metadata . name } /${ workload . metadata . name } " has a container requesting ${ gdr } roce_gdr interfaces"` )
175175 }
176- if ( cpus > nodeResources [ ' cpu' ] ) {
176+ if ( cpus > nodeResources . cpu ) {
177177 console . log ( `WARNING: workload "${ namespace . metadata . name } /${ workload . metadata . name } " has a container requesting "${ cpus } CPUs"` )
178178 }
179- if ( mem > k8srp . memoryParser ( nodeResources [ ' memory' ] ) ) {
180- console . log ( `WARNING: workload "${ namespace . metadata . name } /${ workload . metadata . name } " has a container requesting ${ resources [ ' memory' ] } memory` )
179+ if ( mem > k8srp . memoryParser ( nodeResources . memory ) ) {
180+ console . log ( `WARNING: workload "${ namespace . metadata . name } /${ workload . metadata . name } " has a container requesting ${ resources . memory } memory` )
181181 }
182182
183183 // warn if the resource:GPU ratio is not proportional to Node resources
184- if ( gdr > 0 && ( ( gpus == 0 ) || ( gpus / gdr < nodeResources [ 'nvidia.com/gpu' ] / nodeResources [ 'nvidia.com/roce_gdr' ] ) ) ) {
184+ if ( gdr > 0 && ( ( gpus === 0 ) || ( gpus / gdr < nodeResources [ 'nvidia.com/gpu' ] / nodeResources [ 'nvidia.com/roce_gdr' ] ) ) ) {
185185 console . log ( `WARNING: workload "${ namespace . metadata . name } /${ workload . metadata . name } " has a container requesting ${ gdr } roce_gdr but only ${ gpus } GPUs` )
186186 }
187- if ( gpus > 0 && ( cpus > 0 ) && ( cpus / gpus > nodeResources [ ' cpu' ] / nodeResources [ 'nvidia.com/gpu' ] ) ) {
187+ if ( gpus > 0 && ( cpus > 0 ) && ( cpus / gpus > nodeResources . cpu / nodeResources [ 'nvidia.com/gpu' ] ) ) {
188188 console . log ( `WARNING: workload "${ namespace . metadata . name } /${ workload . metadata . name } " has a container requesting ${ cpus } cpus but only ${ gpus } GPUs` )
189189 }
190- if ( gpus > 0 && ( mem > 0 ) && ( mem / gpus > k8srp . memoryParser ( nodeResources [ ' memory' ] ) / nodeResources [ 'nvidia.com/gpu' ] ) ) {
191- console . log ( `WARNING: workload "${ namespace . metadata . name } /${ workload . metadata . name } " has a container requesting ${ resources [ ' memory' ] } memory but only ${ gpus } GPUs` )
190+ if ( gpus > 0 && ( mem > 0 ) && ( mem / gpus > k8srp . memoryParser ( nodeResources . memory ) / nodeResources [ 'nvidia.com/gpu' ] ) ) {
191+ console . log ( `WARNING: workload "${ namespace . metadata . name } /${ workload . metadata . name } " has a container requesting ${ resources . memory } memory but only ${ gpus } GPUs` )
192192 }
193193
194194 // warn if other resource constraints are violated
@@ -203,7 +203,7 @@ async function checkUserNamespace (client, namespace, queues) {
203203
204204 for ( const workload of workloads ) {
205205 // report invalid queue names
206- let queueName = workload . spec . queueName
206+ const queueName = workload . spec . queueName
207207 if ( queueName ) {
208208 if ( ! queues . find ( queue => queue . metadata . name === queueName ) ) {
209209 console . log ( `WARNING: workload "${ namespace . metadata . name } /${ workload . metadata . name } " refers to a non-existent local queue "${ queueName } "` )
@@ -222,10 +222,10 @@ async function checkUserNamespace (client, namespace, queues) {
222222 for ( const condition of workload . status ?. conditions ?? [ ] ) {
223223 conditions [ condition . type ] = condition . status
224224 }
225- if ( conditions [ ' Admitted' ] === 'True' && conditions [ ' PodsReady' ] === 'False' ) {
225+ if ( conditions . Admitted === 'True' && conditions . PodsReady === 'False' ) {
226226 console . log ( `WARNING: workload "${ namespace . metadata . name } /${ workload . metadata . name } " has conditions Admitted=True and PodsReady=False` )
227227 }
228- if ( conditions [ ' Evicted' ] === 'True' ) {
228+ if ( conditions . Evicted === 'True' ) {
229229 console . log ( `WARNING: workload "${ namespace . metadata . name } /${ workload . metadata . name } " has condition Evicted=True` )
230230 }
231231
@@ -265,13 +265,13 @@ async function main () {
265265 // initialize kubernetes client
266266 const client = new Client ( )
267267
268- let clusterGPUs = 0 // cluster capacity
269- let noScheduleGPUs = 0 // no-schedule GPUs
270- let noExecuteGPUs = 0 // no-execute GPUs
271- let usedGPUs = 0 // GPU usage by admitted workloads
272- let borrowedGPUs = 0 // GPU borrowed from the cohort
273- let quotaGPUs = 0 // nominal GPU quota (excluding slack queue)
274- let slackGPUs = 0 // lending limit on slack queue
268+ let clusterGPUs = 0 // cluster capacity
269+ const noScheduleGPUs = 0 // no-schedule GPUs
270+ const noExecuteGPUs = 0 // no-execute GPUs
271+ let usedGPUs = 0 // GPU usage by admitted workloads
272+ let borrowedGPUs = 0 // GPU borrowed from the cohort
273+ let quotaGPUs = 0 // nominal GPU quota (excluding slack queue)
274+ let slackGPUs = 0 // lending limit on slack queue
275275
276276 const config = await client . readOperatorConfig ( )
277277 const taints = config . autopilot ?. resourceTaints ?. [ 'nvidia.com/gpu' ]
@@ -314,7 +314,10 @@ async function main () {
314314 const queues = { }
315315 for ( const clusterQueue of clusterQueues ) {
316316 const queue = {
317- quota : 0 , usage : 0 , borrowed : 0 , lendingLimit : 0 ,
317+ quota : 0 ,
318+ usage : 0 ,
319+ borrowed : 0 ,
320+ lendingLimit : 0 ,
318321 admitted : clusterQueue . status ?. admittedWorkloads ?? 0 ,
319322 pending : clusterQueue . status ?. pendingWorkloads ?? 0
320323 }
0 commit comments