1414"""
1515
1616import argparse
17+ from functools import partial
1718import getpass
1819import logging
1920import os
2728# used by the cleanup routine to identify resources that can be safely deleted
2829DEFAULT_PREFIX = "scs-test-"
2930
30- # timeout in seconds for resource availability checks
31- # (e.g. a volume becoming available)
32- WAIT_TIMEOUT = 60
3331
32+ def check_resources (
33+ get_func : typing .Callable [[], [openstack .resource .Resource ]],
34+ prefix : str ,
35+ ) -> None :
36+ remaining = [b for b in get_func () if b .name .startswith (prefix )]
37+ if remaining :
38+ raise RuntimeError (f"unexpected resources: { remaining } " )
3439
35- def wait_for_resource (
40+
41+ def check_resource (
3642 get_func : typing .Callable [[str ], openstack .resource .Resource ],
3743 resource_id : str ,
3844 expected_status = ("available" , ),
39- timeout = WAIT_TIMEOUT ,
4045) -> None :
41- seconds_waited = 0
4246 resource = get_func (resource_id )
43- while resource is None or resource .status not in expected_status :
44- time .sleep (1.0 )
45- seconds_waited += 1
46- if seconds_waited >= timeout :
47- raise RuntimeError (
48- f"Timed out after { seconds_waited } s: waiting for resource { resource_id } "
49- f"to be in status { expected_status } (current: { resource and resource .status } )"
50- )
51- resource = get_func (resource_id )
47+ if resource is None :
48+ raise RuntimeError (f"resource { resource_id } not found" )
49+ if resource .status not in expected_status :
50+ raise RuntimeError (
51+ f"Expect resource { resource_id } in "
52+ f"to be in status { expected_status } (current: { resource .status } )"
53+ )
54+
55+
56+ class TimeoutError (Exception ):
57+ pass
58+
59+
60+ def retry (
61+ func : callable ,
62+ timeouts = (2 , 3 , 5 , 10 , 15 , 25 , 50 ),
63+ ) -> None :
64+ seconds_waited = 0
65+ timeout_iter = iter (timeouts )
66+ while True :
67+ try :
68+ func ()
69+ except Exception as e :
70+ wait_delay = next (timeout_iter , None )
71+ if wait_delay is None :
72+ raise TimeoutError (f"Timed out after { seconds_waited } s: { e !s} " )
73+ time .sleep (wait_delay )
74+ seconds_waited += wait_delay
75+ else :
76+ break
77+
78+
79+ def wait_for_resource (
80+ get_func : typing .Callable [[str ], openstack .resource .Resource ],
81+ resource_id : str ,
82+ expected_status = ("available" , ),
83+ ) -> None :
84+ retry (partial (check_resource , get_func , resource_id , expected_status ))
5285
5386
54- def test_backup (conn : openstack .connection .Connection ,
55- prefix = DEFAULT_PREFIX , timeout = WAIT_TIMEOUT ) -> None :
87+ def wait_for_resources (
88+ get_func : typing .Callable [[], [openstack .resource .Resource ]],
89+ prefix : str ,
90+ ):
91+ retry (partial (check_resources , get_func , prefix ))
92+
93+
94+ def test_backup (conn : openstack .connection .Connection , prefix = DEFAULT_PREFIX ) -> None :
5695 """Execute volume backup tests on the connection
5796
5897 This will create an empty volume, a backup of that empty volume and then
@@ -75,7 +114,7 @@ def test_backup(conn: openstack.connection.Connection,
75114 f"↳ waiting for volume with ID '{ volume_id } ' to reach status "
76115 f"'available' ..."
77116 )
78- wait_for_resource (conn .block_storage .get_volume , volume_id , timeout = timeout )
117+ wait_for_resource (conn .block_storage .get_volume , volume_id )
79118 logging .info ("Create empty volume: PASS" )
80119
81120 # CREATE BACKUP
@@ -88,7 +127,7 @@ def test_backup(conn: openstack.connection.Connection,
88127 raise RuntimeError ("Retrieving backup by ID failed" )
89128
90129 logging .info (f"↳ waiting for backup '{ backup_id } ' to become available ..." )
91- wait_for_resource (conn .block_storage .get_backup , backup_id , timeout = timeout )
130+ wait_for_resource (conn .block_storage .get_backup , backup_id )
92131 logging .info ("Create backup from volume: PASS" )
93132
94133 # RESTORE BACKUP
@@ -100,19 +139,18 @@ def test_backup(conn: openstack.connection.Connection,
100139 f"↳ waiting for restoration target volume '{ restored_volume_name } ' "
101140 f"to be created ..."
102141 )
103- wait_for_resource (conn .block_storage .find_volume , restored_volume_name , timeout = timeout )
142+ wait_for_resource (conn .block_storage .find_volume , restored_volume_name )
104143 # wait for the volume restoration to finish
105144 logging .info (
106145 f"↳ waiting for restoration target volume '{ restored_volume_name } ' "
107146 f"to reach 'available' status ..."
108147 )
109148 volume_id = conn .block_storage .find_volume (restored_volume_name ).id
110- wait_for_resource (conn .block_storage .get_volume , volume_id , timeout = timeout )
149+ wait_for_resource (conn .block_storage .get_volume , volume_id )
111150 logging .info ("Restore volume from backup: PASS" )
112151
113152
114- def cleanup (conn : openstack .connection .Connection , prefix = DEFAULT_PREFIX ,
115- timeout = WAIT_TIMEOUT ) -> bool :
153+ def cleanup (conn : openstack .connection .Connection , prefix = DEFAULT_PREFIX ) -> bool :
116154 """
117155 Looks up volume and volume backup resources matching the given prefix and
118156 deletes them.
@@ -133,36 +171,27 @@ def cleanup(conn: openstack.connection.Connection, prefix=DEFAULT_PREFIX,
133171 conn .block_storage .get_backup ,
134172 backup .id ,
135173 expected_status = ("available" , "error" ),
136- timeout = timeout ,
137174 )
138175 logging .info (f"↳ deleting volume backup '{ backup .id } ' ..." )
139- conn .block_storage .delete_backup (backup .id )
140- except openstack .exceptions .ResourceNotFound :
141- # if the resource has vanished on its own in the meantime ignore it
142- continue
176+ conn .block_storage .delete_backup (backup .id , ignore_missing = False )
143177 except Exception as e :
178+ if isinstance (e , openstack .exceptions .ResourceNotFound ):
179+ # if the resource has vanished on its own in the meantime ignore it
180+ # however, ResourceNotFound will also be thrown if the service 'cinder-backup' is missing
181+ if 'cinder-backup' in str (e ):
182+ raise
183+ continue
144184 # Most common exception would be a timeout in wait_for_resource.
145185 # We do not need to increment cleanup_issues here since
146186 # any remaining ones will be caught in the next loop down below anyway.
147- logging .debug ("traceback" , exc_info = True )
148187 logging .warning (str (e ))
149188
150189 # wait for all backups to be cleaned up before attempting to remove volumes
151- seconds_waited = 0
152- while len (
153- # list of all backups whose name starts with the prefix
154- [b for b in conn .block_storage .backups () if b .name .startswith (prefix )]
155- ) > 0 :
156- time .sleep (1.0 )
157- seconds_waited += 1
158- if seconds_waited >= timeout :
159- cleanup_issues += 1
160- logging .warning (
161- f"Timeout reached while waiting for all backups with prefix "
162- f"'{ prefix } ' to finish deletion during cleanup after "
163- f"{ seconds_waited } seconds"
164- )
165- break
190+ try :
191+ wait_for_resources (conn .block_storage .backups , prefix )
192+ except TimeoutError as e :
193+ cleanup_issues += 1
194+ logging .warning (str (e ))
166195
167196 volumes = conn .block_storage .volumes ()
168197 for volume in volumes :
@@ -173,7 +202,6 @@ def cleanup(conn: openstack.connection.Connection, prefix=DEFAULT_PREFIX,
173202 conn .block_storage .get_volume ,
174203 volume .id ,
175204 expected_status = ("available" , "error" ),
176- timeout = timeout ,
177205 )
178206 logging .info (f"↳ deleting volume '{ volume .id } ' ..." )
179207 conn .block_storage .delete_volume (volume .id )
@@ -218,20 +246,13 @@ def main():
218246 f"and/or cleaned up by this script within the configured domains "
219247 f"(default: '{ DEFAULT_PREFIX } ')"
220248 )
221- parser .add_argument (
222- "--timeout" , type = int ,
223- default = WAIT_TIMEOUT ,
224- help = f"Timeout in seconds for operations waiting for resources to "
225- f"become available such as creating volumes and volume backups "
226- f"(default: '{ WAIT_TIMEOUT } ')"
227- )
228249 parser .add_argument (
229250 "--cleanup-only" , action = "store_true" ,
230251 help = "Instead of executing tests, cleanup all resources "
231252 "with the prefix specified via '--prefix' (or its default)"
232253 )
233254 args = parser .parse_args ()
234- openstack .enable_logging (debug = args . debug )
255+ openstack .enable_logging (debug = False )
235256 logging .basicConfig (
236257 format = "%(levelname)s: %(message)s" ,
237258 level = logging .DEBUG if args .debug else logging .INFO ,
@@ -247,20 +268,20 @@ def main():
247268 password = getpass .getpass ("Enter password: " ) if args .ask else None
248269
249270 with openstack .connect (cloud , password = password ) as conn :
250- if not cleanup (conn , prefix = args .prefix , timeout = args . timeout ):
271+ if not cleanup (conn , prefix = args .prefix ):
251272 raise RuntimeError ("Initial cleanup failed" )
252273 if args .cleanup_only :
253274 logging .info ("Cleanup-only run finished." )
254275 return
255276 try :
256- test_backup (conn , prefix = args .prefix , timeout = args . timeout )
277+ test_backup (conn , prefix = args .prefix )
257278 except BaseException :
258279 print ('volume-backup-check: FAIL' )
259280 raise
260281 else :
261282 print ('volume-backup-check: PASS' )
262283 finally :
263- cleanup (conn , prefix = args .prefix , timeout = args . timeout )
284+ cleanup (conn , prefix = args .prefix )
264285
265286
266287if __name__ == "__main__" :
0 commit comments