@@ -4,9 +4,30 @@ import React from 'react';
44import { timeStringToSeconds } from '~/pages/MCADashboard/Metrics/metrics-utils' ;
55
66export const getMetricData = async ( query : string ) => {
7- const body = { query : query } ;
8- const res : { data : { value : [ string , number ] } [ ] } = await axios . post ( '/api/metrics-data' , body ) ;
9- return res . data [ 0 ] . value [ 1 ] ;
7+ const noGpu = "No GPU In Cluster"
8+ const utilizedGPUQuery = 'count(count by (UUID,GPU_I_ID) (DCGM_FI_PROF_GR_ENGINE_ACTIVE{exported_pod=~".+"})) or vector(0)'
9+ const utilizedGPUMemoryQuery = 'count(count by (UUID,GPU_I_ID) (DCGM_FI_DEV_MEM_COPY_UTIL))'
10+ try {
11+ const body = { query : query } ;
12+ const res : { data : { value : [ string , number ] } [ ] } = await axios . post ( '/api/metrics-data' , body ) ;
13+ if ( query === utilizedGPUQuery ) { // since vector(0) in query, even if no gpu returns 0
14+ const gpubody = { query : utilizedGPUMemoryQuery } ; // use the utilizedGPUMemoryQuery to verify gpu is present in the cluster
15+ const gpures : { data : { value : [ string , number ] } [ ] } = await axios . post ( '/api/metrics-data' , gpubody ) ;
16+ if ( gpures . data && gpures . data [ 0 ] && gpures . data [ 0 ] . value && gpures . data [ 0 ] . value [ 1 ] !== undefined ) {
17+ return res . data [ 0 ] . value [ 1 ] ;
18+ } else {
19+ return noGpu ;
20+ }
21+ } else {
22+ if ( res . data && res . data [ 0 ] && res . data [ 0 ] . value && res . data [ 0 ] . value [ 1 ] !== undefined ) {
23+ return res . data [ 0 ] . value [ 1 ] ;
24+ } else {
25+ return noGpu ;
26+ }
27+ }
28+ } catch ( error ) {
29+ return 0 ;
30+ }
1031} ;
1132
1233export const getMetricTableData = async ( query : string ) => {
0 commit comments