@@ -119,6 +119,38 @@ public class PrometheusMetricsCollector {
119119 .labelNames ("env" , "cuebot_host" , "render_node" , "job_name" , "frame_name" , "frame_id" )
120120 .register ();
121121
122+ private static final Counter frameCompletedCounter = Counter .build ()
123+ .name ("cue_frames_completed_total" ).help ("Total number of frames completed" )
124+ .labelNames ("env" , "cuebot_host" , "state" , "show" , "shot" ).register ();
125+
126+ private static final Counter jobCompletedCounter =
127+ Counter .build ().name ("cue_jobs_completed_total" ).help ("Total number of jobs completed" )
128+ .labelNames ("env" , "cuebot_host" , "state" , "show" , "shot" ).register ();
129+
130+ private static final Histogram jobCoreSecondsHistogram = Histogram .build ()
131+ .name ("cue_job_core_seconds" ).help ("Histogram of total core seconds per job" )
132+ .labelNames ("env" , "cuebot_host" , "show" , "shot" )
133+ .buckets (3600 , 36000 , 360000 , 3600000 , 36000000 ).register ();
134+
135+ private static final Histogram layerMaxRuntimeHistogram =
136+ Histogram .build ().name ("cue_layer_max_runtime_seconds" )
137+ .help ("Histogram of max frame runtime per layer in seconds" )
138+ .labelNames ("env" , "cuebot_host" , "show" , "shot" , "layer_type" )
139+ .buckets (60 , 300 , 600 , 1800 , 3600 , 7200 , 14400 , 28800 , 86400 ).register ();
140+
141+ private static final Histogram layerMaxMemoryHistogram =
142+ Histogram .build ().name ("cue_layer_max_memory_bytes" )
143+ .help ("Histogram of max frame memory usage per layer in bytes" )
144+ .labelNames ("env" , "cuebot_host" , "show" , "shot" , "layer_type" )
145+ .buckets (256L * 1024 * 1024 , 512L * 1024 * 1024 , 1024L * 1024 * 1024 ,
146+ 2048L * 1024 * 1024 , 4096L * 1024 * 1024 , 8192L * 1024 * 1024 ,
147+ 16384L * 1024 * 1024 , 32768L * 1024 * 1024 )
148+ .register ();
149+
150+ private static final Counter hostReportsReceivedCounter = Counter .build ()
151+ .name ("cue_host_reports_received_total" ).help ("Total number of host reports received" )
152+ .labelNames ("env" , "cuebot_host" , "facility" ).register ();
153+
122154 private String deployment_environment ;
123155 private String cuebot_host ;
124156
@@ -269,6 +301,82 @@ public void incrementFrameKillFailureCounter(String hostname, String jobName, St
269301 jobName , frameName , frameId ).inc ();
270302 }
271303
304+ /**
305+ * Record a frame completion
306+ *
307+ * @param state final state of the frame
308+ * @param show show name
309+ * @param shot shot name
310+ */
311+ public void recordFrameCompleted (String state , String show , String shot ) {
312+ frameCompletedCounter
313+ .labels (this .deployment_environment , this .cuebot_host , state , show , shot ).inc ();
314+ }
315+
316+ /**
317+ * Record a job completion
318+ *
319+ * @param state final state of the job
320+ * @param show show name
321+ * @param shot shot name
322+ */
323+ public void recordJobCompleted (String state , String show , String shot ) {
324+ jobCompletedCounter .labels (this .deployment_environment , this .cuebot_host , state , show , shot )
325+ .inc ();
326+ }
327+
328+ /**
329+ * Record job total core seconds for histogramming
330+ *
331+ * @param coreSeconds total core seconds consumed by the job
332+ * @param show show name
333+ * @param shot shot name
334+ */
335+ public void recordJobCoreSeconds (double coreSeconds , String show , String shot ) {
336+ jobCoreSecondsHistogram .labels (this .deployment_environment , this .cuebot_host , show , shot )
337+ .observe (coreSeconds );
338+ }
339+
340+ /**
341+ * Record layer max runtime for histogramming
342+ *
343+ * @param runtimeSeconds max runtime in seconds for the layer
344+ * @param show show name
345+ * @param shot shot name
346+ * @param layerType layer type
347+ */
348+ public void recordLayerMaxRuntime (double runtimeSeconds , String show , String shot ,
349+ String layerType ) {
350+ layerMaxRuntimeHistogram
351+ .labels (this .deployment_environment , this .cuebot_host , show , shot , layerType )
352+ .observe (runtimeSeconds );
353+ }
354+
355+ /**
356+ * Record layer max memory usage for histogramming
357+ *
358+ * @param memoryBytes max memory in bytes for the layer
359+ * @param show show name
360+ * @param shot shot name
361+ * @param layerType layer type
362+ */
363+ public void recordLayerMaxMemory (double memoryBytes , String show , String shot ,
364+ String layerType ) {
365+ layerMaxMemoryHistogram
366+ .labels (this .deployment_environment , this .cuebot_host , show , shot , layerType )
367+ .observe (memoryBytes );
368+ }
369+
370+ /**
371+ * Record a host report received
372+ *
373+ * @param facility facility name
374+ */
375+ public void recordHostReport (String facility ) {
376+ hostReportsReceivedCounter .labels (this .deployment_environment , this .cuebot_host , facility )
377+ .inc ();
378+ }
379+
272380 // Setters used for dependency injection
273381 public void setBookingQueue (BookingQueue bookingQueue ) {
274382 this .bookingQueue = bookingQueue ;
0 commit comments