1010app = Flask (__name__ )
1111
1212
13- # TO-DO : handle init containers with better storage methods
14- # TO-DO : modulization
15-
16- # Create Prometheus gauge metrics for status and stats
13+ # Create Prometheus gauge metrics
1714container_status = Gauge ('cxp_container_status' , 'Docker container status (1 = running, 0 = not running)' , ['container_name' ])
1815container_cpu_percentage = Gauge ('cxp_cpu_percentage' , 'Docker container cpu usage' , ['container_name' ])
1916container_memory_percentage = Gauge ('cxp_memory_percentage' , 'Docker container memory usage in percent' , ['container_name' ])
2017container_memory_bytes_total = Gauge ('cxp_memory_bytes_total' , 'Docker container memory usage in bytes' , ['container_name' ])
2118
22- # Create Prometheus Counter metric for Disk I/O
2319disk_io_read_counter = Counter ("cxp_disk_io_read_bytes_total" , "Total number of bytes read from disk" , ['container_name' ])
2420disk_io_write_counter = Counter ("cxp_disk_io_write_bytes_total" , "Total number of bytes written to disk" , ['container_name' ])
2521
26- # Create Prometheus Counter metric for Network I/O
2722network_rx_counter = Counter ("cxp_network_rx_bytes_total" , "Total number of bytes received over the network" , ['container_name' ])
2823network_tx_counter = Counter ("cxp_network_tx_bytes_total" , "Total number of bytes transmitted over the network" , ['container_name' ])
2924
3025
31- # get the data that relates to running containers at the first startup
32- def get_init_container ():
33- client = docker_env ()
34- return client .containers .list ()
35-
36- init_containers_names = [c .name for c in get_init_container ()]
37-
38- # get the data for all containers (killed exited stopped and running)
39- def get_all_container ():
26+ # get the data for running or not running(unhealthy) containers
27+ def get_containers (all = False ):
4028 client = docker_env ()
41- return client .containers .list (all = True )
29+ return client .containers .list (all )
4230
31+ init_containers_names = [c .name for c in get_containers ()]
4332
44- def update_container_status ():
45- # update the running container_names that is offline with the status of all containers
46- all_containers = get_all_container ()
47- for container in all_containers :
33+ # update container status whether they are running.
34+ def update_container_status (containers ):
35+ for container in containers :
4836 if container .name in init_containers_names :
4937 container_status .labels (container_name = container .name ).set (1 if container .status == "running" else 0 )
5038 elif container .status == "running" :
5139 container_status .labels (container_name = container .name ).set (1 )
5240 init_containers_names .append (container .name )
5341
54- for removed_container_name in init_containers_names :
55- if removed_container_name not in [c .name for c in all_containers ]:
56- container_status .labels (container_name = removed_container_name ).set (0 )
42+ for container_name in init_containers_names :
43+ if container_name not in [c .name for c in containers ]:
44+ container_status .labels (container_name = container_name ).set (0 )
45+
5746
58- # get containers' stats and update their metrics in async mode
5947async def container_stats ():
6048 docker = Docker ()
61- containers = await docker .containers .list ()
62- tasks = [stat .get_container_stats (container ) for container in containers ]
63- all_stats = await gather (* tasks )
64- for stats in all_stats :
65- container_cpu_percentage .labels (container_name = stats [0 ]['name' ][1 :]).set (stat .calculate_cpu_percentage (stats [0 ]))
66- container_memory_percentage .labels (container_name = stats [0 ]['name' ][1 :]).set (stat .calculate_memory_percentage (stats [0 ]))
67- container_memory_bytes_total .labels (container_name = stats [0 ]['name' ][1 :]).set (stat .calculate_memory_bytes (stats [0 ]))
68- disk_io_read_counter .labels (container_name = stats [0 ]['name' ][1 :]).inc (stat .calculate_disk_io (stats [0 ])[0 ])
69- disk_io_write_counter .labels (container_name = stats [0 ]['name' ][1 :]).inc (stat .calculate_disk_io (stats [0 ])[1 ])
70- network_rx_counter .labels (container_name = stats [0 ]['name' ][1 :]).inc (stat .calculate_network_io (stats [0 ])[0 ])
71- network_tx_counter .labels (container_name = stats [0 ]['name' ][1 :]).inc (stat .calculate_network_io (stats [0 ])[1 ])
72-
49+ try :
50+ containers = await docker .containers .list ()
51+ tasks = [stat .get_container_stats (container ) for container in containers ]
52+ all_stats = await gather (* tasks )
53+ for stats in all_stats :
54+ container_cpu_percentage .labels (container_name = stats [0 ]['name' ][1 :]).set (stat .calculate_cpu_percentage (stats [0 ]))
55+ container_memory_percentage .labels (container_name = stats [0 ]['name' ][1 :]).set (stat .calculate_memory_percentage (stats [0 ]))
56+ container_memory_bytes_total .labels (container_name = stats [0 ]['name' ][1 :]).set (stat .calculate_memory_bytes (stats [0 ]))
57+ disk_io_read_counter .labels (container_name = stats [0 ]['name' ][1 :]).inc (stat .calculate_disk_io (stats [0 ])[0 ])
58+ disk_io_write_counter .labels (container_name = stats [0 ]['name' ][1 :]).inc (stat .calculate_disk_io (stats [0 ])[1 ])
59+ network_rx_counter .labels (container_name = stats [0 ]['name' ][1 :]).inc (stat .calculate_network_io (stats [0 ])[0 ])
60+ network_tx_counter .labels (container_name = stats [0 ]['name' ][1 :]).inc (stat .calculate_network_io (stats [0 ])[1 ])
61+ finally :
62+ await docker .close ()
7363
7464metrics_names = [container_cpu_percentage , container_memory_percentage , container_memory_bytes_total , disk_io_read_counter , disk_io_write_counter , network_rx_counter , network_tx_counter ]
7565
76- def flush_metric_labels ():
77- all_containers = get_all_container ()
78- for container in all_containers :
66+ def flush_metric_labels (c ):
67+ for container in c :
7968 if container .status != "running" :
8069 for m in metrics_names :
8170 m .clear ()
@@ -87,15 +76,15 @@ def index():
8776@app .route ('/metrics' )
8877def metrics ():
8978 try :
90- update_container_status ()
91- flush_metric_labels ()
79+ all_containers = get_containers (all = True )
80+ update_container_status (all_containers )
81+ flush_metric_labels (all_containers )
9282 loop = new_event_loop ()
9383 t = [loop .create_task (container_stats ())]
9484 loop .run_until_complete (wait (t ))
9585 except Exception as e :
9686 return f"Error running script: { str (e )} "
9787
98- # generate the latest value of metrics
9988 return Response (generate_latest (), mimetype = 'text/plain' )
10089
10190def create_app ():
0 commit comments