88from pydantic import BaseModel , Field
99from kubernetes .client .rest import ApiException
1010
11+
1112class InputSchema (BaseModel ):
12- namespace : str = Field (..., description = 'The namespace in which the service resides.' , title = 'Namespace' )
13- core_services : list = Field (
14- ...,
15- description = 'List of services for which the used PVC size needs to be checked.' ,
16- title = 'K8s Sservice name' ,
17- )
18- threshold : Optional [int ] = Field (
19- 80 ,
20- description = 'Percentage threshold for utilized PVC disk size.E.g., a 80% threshold checks if the utilized space exceeds 80% of the total PVC capacity.' ,
21- title = 'Threshold (in %)' ,
22- )
13+ namespace : str = Field (
14+ ...,
15+ description = "The namespace in which the service resides." ,
16+ title = "Namespace" ,
17+ )
18+ core_services : list = Field (
19+ ...,
20+ description = "List of services for which the used PVC size needs to be checked." ,
21+ title = "K8s Service name" ,
22+ )
23+ threshold : Optional [int ] = Field (
24+ 80 ,
25+ description = "Percentage threshold for utilized PVC disk size.E.g., a 80% threshold checks if the utilized space exceeds 80% of the total PVC capacity." ,
26+ title = "Threshold (in %)" ,
27+ )
2328
2429
2530def k8s_check_service_pvc_utilization_printer (output ):
@@ -28,13 +33,18 @@ def k8s_check_service_pvc_utilization_printer(output):
2833 if status :
2934 print ("Disk sizes for all checked services are within the threshold." )
3035 else :
31- print ("ALERT: One or more PVC disk sizes are below the threshold:" )
36+ print ("ALERT: One or more PVC disk sizes are above threshold:" )
3237 print ("-" * 40 )
3338 for pvc in pvc_info :
34- print (f"PVC: { pvc ['pvc_name' ]} - Utilized: { pvc ['used' ]} of { pvc ['capacity' ]} " )
39+ print (
40+ f"PVC: { pvc ['pvc_name' ]} - Utilized: { pvc ['used' ]} of { pvc ['capacity' ]} "
41+ )
3542 print ("-" * 40 )
3643
37- def k8s_check_service_pvc_utilization (handle , core_services : list , namespace :str , threshold : int = 80 ) -> Tuple :
44+
45+ def k8s_check_service_pvc_utilization (
46+ handle , core_services : list , namespace : str , threshold : int = 80
47+ ) -> Tuple :
3848 """
3949 k8s_check_service_pvc_utilization checks the utilized disk size of a service's PVC against a given threshold.
4050
@@ -55,7 +65,7 @@ def k8s_check_service_pvc_utilization(handle, core_services: list, namespace:str
5565 :type namespace: str
5666 :param namespace: The namespace in which the service resides.
5767
58- :return: Status and dictionary with PVC name and its size information if the PVC's disk size is below the threshold.
68+ :return: Status and dictionary with PVC name and its size information if the PVC's disk size exceeds threshold.
5969 """
6070
6171 alert_pvcs_all_services = []
@@ -68,122 +78,166 @@ def k8s_check_service_pvc_utilization(handle, core_services: list, namespace:str
6878 if not response .stdout .strip ():
6979 # No labels found for a particular service. Skipping...
7080 continue
71- labels_dict = json .loads (response .stdout .replace ("'" , " \" " ))
81+ labels_dict = json .loads (response .stdout .replace ("'" , '"' ))
7282 label_selector = "," .join ([f"{ k } ={ v } " for k , v in labels_dict .items ()])
7383
7484 # Fetch the pod attached to this service.
7585 # The safer option is to try with the * option. Having a specific index like 0 or 1
76- # will lead to ApiException.
86+ # will lead to ApiException.
7787 get_pod_command = f"kubectl get pods -n { namespace } -l { label_selector } -o=jsonpath='{{.items[*].metadata.name}}'"
7888 response = handle .run_native_cmd (get_pod_command )
7989 if not response or response .stderr :
80- raise ApiException (f"Error while executing command ({ get_pod_command } ): { response .stderr if response else 'empty response' } " )
90+ raise ApiException (
91+ f"Error while executing command ({ get_pod_command } ): { response .stderr if response else 'empty response' } "
92+ )
8193
8294 # pod_names stores the output from the above kubectl command, which is a list of pod_names separated by space
8395 pod_names = response .stdout .strip ()
8496 if not pod_names :
8597 # No pods found for service {svc} in namespace {namespace} with labels {label_selector}
8698 continue
87-
99+
88100 # Fetch PVCs attached to the pod
89- # The Above kubectl command would return a string that is space separated name(s) of the pod.
101+ # The Above kubectl command would return a string that is space separated name(s) of the pod.
90102 # Given such a string, lets find out if we have one or more than one pod name in the string.
91103 # If there are more than one pod name in the output, we need to iterate over all items[] array.
92- # Else we can directly access the persistentVolumeClaim name
104+ # Else we can directly access the persistentVolumeClaim name
93105 # Lets also associate the pod_name along with the claim name (PVC Name) in the format of
94106 # pod_name:pv_claim_name
95-
107+
96108 if len (pod_names .split ()) > 1 :
97- json_path_cmd = " {range .items[*]}{.metadata.name}:{range .spec.volumes[*].persistentVolumeClaim}{.claimName} {end}{\ "\\ n\ " }{end}"
109+ json_path_cmd = ' {range .items[*]}{.metadata.name}:{range .spec.volumes[*].persistentVolumeClaim}{.claimName} {end}{"\\ n"}{end}'
98110 else :
99111 json_path_cmd = "{.metadata.name}:{range .spec.volumes[*].persistentVolumeClaim}{.claimName}{end}"
100112
101- get_pvc_names_command = f"kubectl get pod { pod_names } -n { namespace } -o=jsonpath='{ json_path_cmd } '"
102-
113+ get_pvc_names_command = (
114+ f"kubectl get pod { pod_names } -n { namespace } -o=jsonpath='{ json_path_cmd } '"
115+ )
103116
104117 response = handle .run_native_cmd (get_pvc_names_command )
105118 if not response or response .stderr :
106- raise ApiException (f"Error while executing command ({ get_pvc_names_command } ): { response .stderr if response else 'empty response' } " )
119+ raise ApiException (
120+ f"Error while executing command ({ get_pvc_names_command } ): { response .stderr if response else 'empty response' } "
121+ )
107122 # Example: ['lightbeam-elasticsearch-master-0:data-lightbeam-elasticsearch-master-0']
108123 pod_and_pvc_names = response .stdout .strip ().split ()
109124
110-
111- # The pod_and_pvc_names
125+ # The pod_and_pvc_names
112126 if not pod_and_pvc_names :
113127 services_without_pvcs .append (svc )
114128 continue
115129
116130 pvc_mounts = []
117131 alert_pvcs = []
118132 all_pvcs = []
119-
133+
120134 for element in pod_and_pvc_names :
121- pod_name , claim_name = element .split (':' )
135+ pod_name , claim_name = element .split (":" )
122136 if not claim_name :
123137 # Skip if Volume Claim name is empty.
124- continue
138+ continue
125139
126- # Fetch the Pod JSON
140+ # Fetch the Pod JSON
127141 # We need to get the container name (if any) from the Pod's JSON. This is needed
128142 # if we want to exec into the POD that is within a container. The JSON data that
129143 # we obtain is used to fill the pvc_mounts list, which is a list of dictionaries.
130144 # We use this pvc_mounts to find out the used_space percentage. We compare that with
131- # the threshold to flag if the utilization is above threshold.
145+ # the threshold to flag if the utilization is above threshold.
132146 # df -kh is the command used to get the disk utilization. This is accurate as we get
133147 # the disk utilization from the POD directly, rather than checking the resource limit
134- # and resource request from the deployment / stateful YAML file.
148+ # and resource request from the deployment / stateful YAML file.
135149 get_pod_json_command = f"kubectl get pod { pod_name } -n { namespace } -o json"
136150 pod_json_output = handle .run_native_cmd (get_pod_json_command )
137151 if not pod_json_output or pod_json_output .stderr :
138- raise ApiException (f"Error fetching pod json for { pod_name } : { pod_json_output .stderr if pod_json_output else 'empty response' } " )
152+ raise ApiException (
153+ f"Error fetching pod json for { pod_name } : { pod_json_output .stderr if pod_json_output else 'empty response' } "
154+ )
139155 pod_data = json .loads (pod_json_output .stdout )
140-
156+
141157 # Dictionary .get() method with default value is way of error handling
142- for container in pod_data .get ('spec' , {}).get ('containers' , {}):
143- for mount in container .get ('volumeMounts' , {}):
144- for volume in pod_data .get ('spec' , {}).get ('volumes' , {}):
145- if 'persistentVolumeClaim' in volume and volume .get ('name' ) == mount .get ('name' ):
158+ for container in pod_data .get ("spec" , {}).get ("containers" , {}):
159+ for mount in container .get ("volumeMounts" , {}):
160+ for volume in pod_data .get ("spec" , {}).get ("volumes" , {}):
161+ if "persistentVolumeClaim" in volume and volume .get (
162+ "name"
163+ ) == mount .get ("name" ):
146164 try :
147- claim_name = volume ['persistentVolumeClaim' ]['claimName' ]
148- pvc_mounts .append ({
149- "container_name" : container ['name' ],
150- "mount_path" : mount ['mountPath' ],
151- "pvc_name" : claim_name if claim_name else None
152- })
165+ claim_name = volume ["persistentVolumeClaim" ][
166+ "claimName"
167+ ]
168+ pvc_mounts .append (
169+ {
170+ "container_name" : container ["name" ],
171+ "mount_path" : mount ["mountPath" ],
172+ "pvc_name" : claim_name if claim_name else None ,
173+ }
174+ )
153175 except KeyError as e :
154176 # Handle the KeyError (e.g., log the error, skip this iteration, etc.)
155177 print (f"KeyError: { e } . Skipping this entry." )
156178 except IndexError as e :
157179 # Handle the IndexError (e.g., log the error, skip this iteration, etc.)
158180 print (f"IndexError: { e } . Skipping this entry." )
159181
160-
161- all_mounts = [mount .get ('mount_path' ) for mount in pvc_mounts ]
182+ all_mounts = [mount .get ("mount_path" ) for mount in pvc_mounts ]
162183 all_mounts = " " .join (all_mounts ).strip ()
163184 for mount in pvc_mounts :
164- container_name = mount ['container_name' ]
165- mount_path = mount ['mount_path' ]
166- pvc_name = mount ['pvc_name' ]
167- all_pvcs .append ({"pvc_name" : pvc_name , "mount_path" : mount_path , "used" : None , "capacity" : None })
185+ container_name = mount ["container_name" ]
186+ mount_path = mount ["mount_path" ]
187+ pvc_name = mount ["pvc_name" ]
188+ all_pvcs .append (
189+ {
190+ "pvc_name" : pvc_name ,
191+ "mount_path" : mount_path ,
192+ "used" : None ,
193+ "capacity" : None ,
194+ }
195+ )
168196
169197 du_command = f"kubectl exec -n { namespace } { pod_name } -c { container_name } -- df -kh { all_mounts } | grep -v Filesystem"
170198 du_output = handle .run_native_cmd (du_command )
171-
199+
172200 if du_output and not du_output .stderr :
173- used_space = du_output .stdout .strip ()
174- for idx , space in enumerate ([used_space ]):
175- space = space .split ()
176- used_percentage = int (space [- 2 ].replace ('%' , '' ))
177- total_capacity_str = space [1 ].replace ('%' , '' )
178- all_pvcs [idx ]["used" ] = used_percentage
179- all_pvcs [idx ]["capacity" ] = total_capacity_str
201+ # Process each line of df output separately
202+ df_lines = du_output .stdout .strip ().split ("\n " )
203+
204+ for df_line in df_lines :
205+ if not df_line .strip ():
206+ continue
207+
208+ # Split line into columns
209+ columns = re .split (r"\s+" , df_line .strip ())
210+
211+ # Find the percentage column (contains '%')
212+ percent_col = None
213+ for i , col in enumerate (columns ):
214+ if "%" in col :
215+ percent_col = i
216+ break
217+
218+ if percent_col is None or len (columns ) < 2 :
219+ print (f"Warning: Unexpected df output format: { df_line } " )
220+ continue
221+
222+ # Extract percentage and capacity
223+ used_percentage = int (columns [percent_col ].replace ("%" , "" ))
224+ total_capacity = columns [1 ] if len (columns ) > 1 else "Unknown"
225+ pvc_info = {
226+ "pvc_name" : pvc_name ,
227+ "mount_path" : mount_path ,
228+ "used" : used_percentage ,
229+ "capacity" : total_capacity ,
230+ }
231+
232+ # Check if usage exceeds threshold
180233 if used_percentage > threshold :
181- alert_pvcs .append (all_pvcs [ idx ] )
234+ alert_pvcs .append (pvc_info )
182235
183236 alert_pvcs_all_services .extend (alert_pvcs )
237+
184238 if services_without_pvcs :
185239 print ("Following services do not have any PVCs attached:" )
186240 for service in services_without_pvcs :
187241 print (f"- { service } " )
188242
189- return (not bool (alert_pvcs_all_services ), alert_pvcs_all_services )
243+ return (not bool (alert_pvcs_all_services ), alert_pvcs_all_services )
0 commit comments