1515
1616import argparse
1717import getpass
18+ import logging
1819import os
20+ import sys
1921import time
2022import typing
21- import logging
2223
2324import openstack
2425
3132WAIT_TIMEOUT = 60
3233
3334
34- class ConformanceTestException (Exception ):
35- pass
36-
37-
38- def ensure (condition : bool , error_message : str ):
39- """
40- Custom replacement for the `assert` statement that is not removed by the
41- -O optimization parameter.
42- If the condition does not evaluate to `True`, a ConformanceTestException
43- will be raised containing the specified error_message string.
44- """
45- if not condition :
46- raise ConformanceTestException (error_message )
47-
48-
49- def connect (cloud_name : str , password : typing .Optional [str ] = None
50- ) -> openstack .connection .Connection :
51- """Create a connection to an OpenStack cloud
52-
53- :param string cloud_name:
54- The name of the configuration to load from clouds.yaml.
55-
56- :param string password:
57- Optional password override for the connection.
58-
59- :returns: openstack.connnection.Connection
60- """
61-
62- if password :
63- return openstack .connect (
64- cloud = cloud_name ,
65- password = password
66- )
67- else :
68- return openstack .connect (
69- cloud = cloud_name ,
70- )
35+ def wait_for_resource (
36+ get_func : typing .Callable [[str ], openstack .resource .Resource ],
37+ resource_id : str ,
38+ expected_status = ("available" , ),
39+ timeout = WAIT_TIMEOUT ,
40+ ) -> None :
41+ seconds_waited = 0
42+ resource = get_func (resource_id )
43+ while resource is None or resource .status not in expected_status :
44+ time .sleep (1.0 )
45+ seconds_waited += 1
46+ if seconds_waited >= timeout :
47+ raise RuntimeError (
48+ f"Timed out after { seconds_waited } s: waiting for { resource_type } { resource_id } "
49+ f"to be in status { expected_status } (current: { resource and resource .status } )"
50+ )
51+ resource = get_func (resource_id )
7152
7253
7354def test_backup (conn : openstack .connection .Connection ,
@@ -83,102 +64,50 @@ def test_backup(conn: openstack.connection.Connection,
8364 # CREATE VOLUME
8465 volume_name = f"{ prefix } volume"
8566 logging .info (f"Creating volume '{ volume_name } ' ..." )
86- volume = conn .block_storage .create_volume (
87- name = volume_name ,
88- size = 1
89- )
90- ensure (
91- volume is not None ,
92- f"Creation of initial volume '{ volume_name } ' failed"
93- )
67+ volume = conn .block_storage .create_volume (name = volume_name , size = 1 )
68+ if volume is None :
69+ raise RuntimeError (f"Creation of initial volume '{ volume_name } ' failed" )
9470 volume_id = volume .id
95- ensure (
96- conn .block_storage .get_volume (volume_id ) is not None ,
97- f"Retrieving initial volume by ID '{ volume_id } ' failed"
98- )
71+ if conn .block_storage .get_volume (volume_id ) is None :
72+ raise RuntimeError (f"Retrieving initial volume by ID '{ volume_id } ' failed" )
9973
10074 logging .info (
10175 f"↳ waiting for volume with ID '{ volume_id } ' to reach status "
10276 f"'available' ..."
10377 )
104- seconds_waited = 0
105- while conn .block_storage .get_volume (volume_id ).status != "available" :
106- time .sleep (1.0 )
107- seconds_waited += 1
108- ensure (
109- seconds_waited < timeout ,
110- f"Timeout reached while waiting for volume to reach status "
111- f"'available' (volume id: { volume_id } ) after { seconds_waited } "
112- f"seconds"
113- )
78+ wait_for_resource (conn .block_storage .get_volume , volume_id , timeout = timeout )
11479 logging .info ("Create empty volume: PASS" )
11580
11681 # CREATE BACKUP
11782 logging .info ("Creating backup from volume ..." )
118- backup = conn .block_storage .create_backup (
119- name = f"{ prefix } volume-backup" ,
120- volume_id = volume_id
121- )
122- ensure (
123- backup is not None ,
124- "Backup creation failed"
125- )
83+ backup = conn .block_storage .create_backup (name = f"{ prefix } volume-backup" , volume_id = volume_id )
84+ if backup is None :
85+ raise RuntimeError ("Backup creation failed" )
12686 backup_id = backup .id
127- ensure (
128- conn .block_storage .get_backup (backup_id ) is not None ,
129- "Retrieving backup by ID failed"
130- )
87+ if conn .block_storage .get_backup (backup_id ) is None :
88+ raise RuntimeError ("Retrieving backup by ID failed" )
13189
13290 logging .info (f"↳ waiting for backup '{ backup_id } ' to become available ..." )
133- seconds_waited = 0
134- while conn .block_storage .get_backup (backup_id ).status != "available" :
135- time .sleep (1.0 )
136- seconds_waited += 1
137- ensure (
138- seconds_waited < timeout ,
139- f"Timeout reached while waiting for backup to reach status "
140- f"'available' (backup id: { backup_id } ) after { seconds_waited } "
141- f"seconds"
142- )
91+ wait_for_resource (conn .block_storage .get_backup , backup_id , timeout = timeout )
14392 logging .info ("Create backup from volume: PASS" )
14493
14594 # RESTORE BACKUP
14695 restored_volume_name = f"{ prefix } restored-backup"
14796 logging .info (f"Restoring backup to volume '{ restored_volume_name } ' ..." )
148- conn .block_storage .restore_backup (
149- backup_id ,
150- name = restored_volume_name
151- )
97+ conn .block_storage .restore_backup (backup_id , name = restored_volume_name )
15298
15399 logging .info (
154100 f"↳ waiting for restoration target volume '{ restored_volume_name } ' "
155101 f"to be created ..."
156102 )
157- seconds_waited = 0
158- while conn .block_storage .find_volume (restored_volume_name ) is None :
159- time .sleep (1.0 )
160- seconds_waited += 1
161- ensure (
162- seconds_waited < timeout ,
163- f"Timeout reached while waiting for restored volume to be created "
164- f"(volume name: { restored_volume_name } ) after { seconds_waited } "
165- f"seconds"
166- )
103+ wait_for_resource (conn .block_storage .find_volume , restored_volume_name , timeout = timeout )
167104 # wait for the volume restoration to finish
168105 logging .info (
169106 f"↳ waiting for restoration target volume '{ restored_volume_name } ' "
170107 f"to reach 'available' status ..."
171108 )
172109 volume_id = conn .block_storage .find_volume (restored_volume_name ).id
173- while conn .block_storage .get_volume (volume_id ).status != "available" :
174- time .sleep (1.0 )
175- seconds_waited += 1
176- ensure (
177- seconds_waited < timeout ,
178- f"Timeout reached while waiting for restored volume reach status "
179- f"'available' (volume id: { volume_id } ) after { seconds_waited } "
180- f"seconds"
181- )
110+ wait_for_resource (conn .block_storage .get_volume , volume_id , timeout = timeout )
182111 logging .info ("Restore volume from backup: PASS" )
183112
184113
@@ -191,54 +120,32 @@ def cleanup(conn: openstack.connection.Connection, prefix=DEFAULT_PREFIX,
191120 resources behind. Otherwise returns True to indicate cleanup success.
192121 """
193122
194- def wait_for_resource (resource_type : str , resource_id : str ,
195- expected_status = ("available" , )) -> None :
196- seconds_waited = 0
197- get_func = getattr (conn .block_storage , f"get_{ resource_type } " )
198- while get_func (resource_id ).status not in expected_status :
199- time .sleep (1.0 )
200- seconds_waited += 1
201- ensure (
202- seconds_waited < timeout ,
203- f"Timeout reached while waiting for { resource_type } during "
204- f"cleanup to be in status { expected_status } "
205- f"({ resource_type } id: { resource_id } ) after { seconds_waited } "
206- f"seconds"
207- )
208-
209- logging .info (f"Performing cleanup for resources with the "
210- f"'{ prefix } ' prefix ..." )
123+ logging .info (f"Performing cleanup for resources with the '{ prefix } ' prefix ..." )
211124
212- cleanup_was_successful = True
125+ cleanup_issues = 0 # count failed cleanup operations
213126 backups = conn .block_storage .backups ()
214127 for backup in backups :
215- if backup .name .startswith (prefix ):
216- try :
217- wait_for_resource (
218- "backup" , backup .id ,
219- expected_status = ("available" , "error" )
220- )
221- except openstack .exceptions .ResourceNotFound :
222- # if the resource has vanished on
223- # its own in the meantime ignore it
224- continue
225- except ConformanceTestException as e :
226- # This exception happens if the backup state does not reach any
227- # of the desired ones specified above. We do not need to set
228- # cleanup_was_successful to False here since any remaining ones
229- # will be caught in the next loop down below anyway.
230- logging .warning (str (e ))
231- else :
232- logging .info (f"↳ deleting volume backup '{ backup .id } ' ..." )
233- # Setting ignore_missing to False here will make an exception
234- # bubble up in case the cinder-backup service is not present.
235- # Since we already catch ResourceNotFound for the backup above,
236- # the absence of the cinder-backup service is the only
237- # NotFoundException that is left to be thrown here.
238- # We treat this as a fatal due to the cinder-backup service
239- # being mandatory.
240- conn .block_storage .delete_backup (
241- backup .id , ignore_missing = False )
128+ if not backup .name .startswith (prefix ):
129+ continue
130+ try :
131+ # we can only delete if status is available or error, so try and wait
132+ wait_for_resource (
133+ conn .block_storage .get_backup ,
134+ backup .id ,
135+ expected_status = ("available" , "error" ),
136+ timeout = timeout ,
137+ )
138+ logging .info (f"↳ deleting volume backup '{ backup .id } ' ..." )
139+ conn .block_storage .delete_backup (backup .id )
140+ except openstack .exceptions .ResourceNotFound :
141+ # if the resource has vanished on its own in the meantime ignore it
142+ continue
143+ except Exception as e :
144+ # Most common exception would be a timeout in wait_for_resource.
145+ # We do not need to increment cleanup_issues here since
146+ # any remaining ones will be caught in the next loop down below anyway.
147+ logging .debug ("traceback" , exc_info = True )
148+ logging .warning (str (e ))
242149
243150 # wait for all backups to be cleaned up before attempting to remove volumes
244151 seconds_waited = 0
@@ -249,7 +156,7 @@ def wait_for_resource(resource_type: str, resource_id: str,
249156 time .sleep (1.0 )
250157 seconds_waited += 1
251158 if seconds_waited >= timeout :
252- cleanup_was_successful = False
159+ cleanup_issues += 1
253160 logging .warning (
254161 f"Timeout reached while waiting for all backups with prefix "
255162 f"'{ prefix } ' to finish deletion during cleanup after "
@@ -259,21 +166,31 @@ def wait_for_resource(resource_type: str, resource_id: str,
259166
260167 volumes = conn .block_storage .volumes ()
261168 for volume in volumes :
262- if volume .name .startswith (prefix ):
263- try :
264- wait_for_resource ("volume" , volume .id , expected_status = ("available" , "error" ))
265- except openstack .exceptions .ResourceNotFound :
266- # if the resource has vanished on
267- # its own in the meantime ignore it
268- continue
269- except ConformanceTestException as e :
270- logging .warning (str (e ))
271- cleanup_was_successful = False
272- else :
273- logging .info (f"↳ deleting volume '{ volume .id } ' ..." )
274- conn .block_storage .delete_volume (volume .id )
169+ if not volume .name .startswith (prefix ):
170+ continue
171+ try :
172+ wait_for_resource (
173+ conn .block_storage .get_volume ,
174+ volume .id ,
175+ expected_status = ("available" , "error" ),
176+ timeout = timeout ,
177+ )
178+ logging .info (f"↳ deleting volume '{ volume .id } ' ..." )
179+ conn .block_storage .delete_volume (volume .id )
180+ except openstack .exceptions .ResourceNotFound :
181+ # if the resource has vanished on its own in the meantime ignore it
182+ continue
183+ except Exception as e :
184+ logging .debug ("traceback" , exc_info = True )
185+ logging .warning (str (e ))
186+ cleanup_issues += 1
187+
188+ if cleanup_issues :
189+ logging .info (
190+ f"Some resources with the '{ prefix } ' prefix were not cleaned up!"
191+ )
275192
276- return cleanup_was_successful
193+ return not cleanup_issues
277194
278195
279196def main ():
@@ -321,35 +238,37 @@ def main():
321238 )
322239
323240 # parse cloud name for lookup in clouds.yaml
324- cloud = os .environ .get ("OS_CLOUD" , None )
325- if args .os_cloud :
326- cloud = args .os_cloud
241+ cloud = args .os_cloud or os .environ .get ("OS_CLOUD" , None )
327242 if not cloud :
328243 raise Exception (
329244 "You need to have the OS_CLOUD environment variable set to your "
330245 "cloud name or pass it via --os-cloud"
331246 )
332- conn = connect (
333- cloud ,
334- password = getpass .getpass ("Enter password: " ) if args .ask else None
335- )
247+ password = getpass .getpass ("Enter password: " ) if args .ask else None
336248
337- if not cleanup (conn , prefix = args .prefix , timeout = args .timeout ):
338- raise Exception (
339- f"Cleanup was not successful, there may be leftover resources "
340- f"with the '{ args .prefix } ' prefix"
341- )
342- if args .cleanup_only :
343- return
344- try :
345- test_backup (conn , prefix = args .prefix , timeout = args .timeout )
346- finally :
249+ with openstack .connect (cloud , password = password ) as conn :
347250 if not cleanup (conn , prefix = args .prefix , timeout = args .timeout ):
348- logging .info (
349- f"There may be leftover resources with the "
350- f"'{ args .prefix } ' prefix that could not be cleaned up!"
351- )
251+ raise RuntimeError (f"Initial cleanup failed" )
252+ if args .cleanup_only :
253+ logging .info ("Cleanup-only run finished." )
254+ return
255+ try :
256+ test_backup (conn , prefix = args .prefix , timeout = args .timeout )
257+ except BaseException :
258+ print ('volume-backup-check: FAIL' )
259+ raise
260+ else :
261+ print ('volume-backup-check: PASS' )
262+ finally :
263+ cleanup (conn , prefix = args .prefix , timeout = args .timeout )
352264
353265
354266if __name__ == "__main__" :
355- main ()
267+ try :
268+ sys .exit (main ())
269+ except SystemExit :
270+ raise
271+ except BaseException as exc :
272+ logging .debug ("traceback" , exc_info = True )
273+ logging .critical (str (exc ))
274+ sys .exit (1 )
0 commit comments