@@ -135,6 +135,25 @@ def analyse_partition_idrquota(partition_name, partition_flag, alert_bytes_thres
135135 alerts .append (response )
136136 alerts .append ("" )
137137
138+ def analyse_shared_disk (partition_name , alert_bytes_threshold ):
139+ partition_name_2_disk = {
140+ "SCRATCH" : "gpfsssd" ,
141+ "WORK" : "gpfsdswork" ,
142+ "STORE" : "gpfsdsstore"
143+ }
144+ cmd = "df"
145+ response = run_cmd (cmd .split ())
146+ disk_metas = response .split ("\n " )
147+ column_names = disk_metas [0 ].split ()
148+ disk_meta = [disk_meta_ .split () for disk_meta_ in disk_metas if disk_meta_ .startswith (partition_name_2_disk [partition_name ])][0 ]
149+ disk_meta = {column_name : value for column_name , value in zip (column_names , disk_meta )}
150+
151+ # default `df` counts uses 1024-byte units, and `1024 == 2 ** 10`
152+ available_disk_left = int (disk_meta ["Available" ]) * 2 ** 10
153+ if available_disk_left < alert_bytes_threshold :
154+ alerts .append (f"Shared { partition_name } has { available_disk_left / 2 ** 40 :.2f} TB left" )
155+ alerts .append ("" )
156+
138157 # WORK and STORE partitions stats can be accessed much faster through `idrquota`, and it already
139158 # includes the quota info
140159 analyse_partition_idrquota (partition_name = "WORK" , partition_flag = "-w" , alert_bytes_threshold = 0.85 , alert_inodes_threshold = 0.85 )
@@ -143,6 +162,9 @@ def analyse_partition_idrquota(partition_name, partition_flag, alert_bytes_thres
143162 # SCRATCH - check only bytes w/ a hard quota of 400TB - alert on lower threshold than other
144163 # partitions due to it filling up at a faster rate (dumping huge checkpoints)
145164 analyse_partition_bytes (partition_name = "SCRATCH" , partition_path = "/gpfsssd/scratch/rech/six/" , hard_limit_bytes = 400 * 2 ** 40 , alert_bytes_threshold = 0.75 )
165+ # Actually SCRATCH is shared with everyone and we should monitor the output of `df -h | grep gpfsssd`
166+ # Check that there's still 40TB left
167+ analyse_shared_disk ("SCRATCH" , 100 * 2 ** 40 )
146168
147169 # WORKFS - check both bytes and inodes w/ hard quotas of 2TB / 3M
148170 analyse_partition_bytes (partition_name = "WORKFS" , partition_path = "/gpfsssd/worksf/projects/rech/six/" , hard_limit_bytes = 2 * 2 ** 40 , alert_bytes_threshold = 0.85 )
0 commit comments