Skip to content

Commit 2e182fb

Browse files
garloffmbuechse
andauthored
Volume backup tester: backoff poll frequency. (#902)
Signed-off-by: Kurt Garloff <[email protected]> Signed-off-by: Matthias Büchse <[email protected]> Co-authored-by: Matthias Büchse <[email protected]>
1 parent 9dd9848 commit 2e182fb

File tree

1 file changed

+77
-56
lines changed

1 file changed

+77
-56
lines changed

Tests/iaas/volume-backup/volume-backup-tester.py

Lines changed: 77 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
"""
1515

1616
import argparse
17+
from functools import partial
1718
import getpass
1819
import logging
1920
import os
@@ -27,32 +28,70 @@
2728
# used by the cleanup routine to identify resources that can be safely deleted
2829
DEFAULT_PREFIX = "scs-test-"
2930

30-
# timeout in seconds for resource availability checks
31-
# (e.g. a volume becoming available)
32-
WAIT_TIMEOUT = 60
3331

32+
def check_resources(
33+
get_func: typing.Callable[[], [openstack.resource.Resource]],
34+
prefix: str,
35+
) -> None:
36+
remaining = [b for b in get_func() if b.name.startswith(prefix)]
37+
if remaining:
38+
raise RuntimeError(f"unexpected resources: {remaining}")
3439

35-
def wait_for_resource(
40+
41+
def check_resource(
3642
get_func: typing.Callable[[str], openstack.resource.Resource],
3743
resource_id: str,
3844
expected_status=("available", ),
39-
timeout=WAIT_TIMEOUT,
4045
) -> None:
41-
seconds_waited = 0
4246
resource = get_func(resource_id)
43-
while resource is None or resource.status not in expected_status:
44-
time.sleep(1.0)
45-
seconds_waited += 1
46-
if seconds_waited >= timeout:
47-
raise RuntimeError(
48-
f"Timed out after {seconds_waited} s: waiting for resource {resource_id} "
49-
f"to be in status {expected_status} (current: {resource and resource.status})"
50-
)
51-
resource = get_func(resource_id)
47+
if resource is None:
48+
raise RuntimeError(f"resource {resource_id} not found")
49+
if resource.status not in expected_status:
50+
raise RuntimeError(
51+
f"Expect resource {resource_id} in "
52+
f"to be in status {expected_status} (current: {resource.status})"
53+
)
54+
55+
56+
class TimeoutError(Exception):
57+
pass
58+
59+
60+
def retry(
61+
func: callable,
62+
timeouts=(2, 3, 5, 10, 15, 25, 50),
63+
) -> None:
64+
seconds_waited = 0
65+
timeout_iter = iter(timeouts)
66+
while True:
67+
try:
68+
func()
69+
except Exception as e:
70+
wait_delay = next(timeout_iter, None)
71+
if wait_delay is None:
72+
raise TimeoutError(f"Timed out after {seconds_waited} s: {e!s}")
73+
time.sleep(wait_delay)
74+
seconds_waited += wait_delay
75+
else:
76+
break
77+
78+
79+
def wait_for_resource(
80+
get_func: typing.Callable[[str], openstack.resource.Resource],
81+
resource_id: str,
82+
expected_status=("available", ),
83+
) -> None:
84+
retry(partial(check_resource, get_func, resource_id, expected_status))
5285

5386

54-
def test_backup(conn: openstack.connection.Connection,
55-
prefix=DEFAULT_PREFIX, timeout=WAIT_TIMEOUT) -> None:
87+
def wait_for_resources(
88+
get_func: typing.Callable[[], [openstack.resource.Resource]],
89+
prefix: str,
90+
):
91+
retry(partial(check_resources, get_func, prefix))
92+
93+
94+
def test_backup(conn: openstack.connection.Connection, prefix=DEFAULT_PREFIX) -> None:
5695
"""Execute volume backup tests on the connection
5796
5897
This will create an empty volume, a backup of that empty volume and then
@@ -75,7 +114,7 @@ def test_backup(conn: openstack.connection.Connection,
75114
f"↳ waiting for volume with ID '{volume_id}' to reach status "
76115
f"'available' ..."
77116
)
78-
wait_for_resource(conn.block_storage.get_volume, volume_id, timeout=timeout)
117+
wait_for_resource(conn.block_storage.get_volume, volume_id)
79118
logging.info("Create empty volume: PASS")
80119

81120
# CREATE BACKUP
@@ -88,7 +127,7 @@ def test_backup(conn: openstack.connection.Connection,
88127
raise RuntimeError("Retrieving backup by ID failed")
89128

90129
logging.info(f"↳ waiting for backup '{backup_id}' to become available ...")
91-
wait_for_resource(conn.block_storage.get_backup, backup_id, timeout=timeout)
130+
wait_for_resource(conn.block_storage.get_backup, backup_id)
92131
logging.info("Create backup from volume: PASS")
93132

94133
# RESTORE BACKUP
@@ -100,19 +139,18 @@ def test_backup(conn: openstack.connection.Connection,
100139
f"↳ waiting for restoration target volume '{restored_volume_name}' "
101140
f"to be created ..."
102141
)
103-
wait_for_resource(conn.block_storage.find_volume, restored_volume_name, timeout=timeout)
142+
wait_for_resource(conn.block_storage.find_volume, restored_volume_name)
104143
# wait for the volume restoration to finish
105144
logging.info(
106145
f"↳ waiting for restoration target volume '{restored_volume_name}' "
107146
f"to reach 'available' status ..."
108147
)
109148
volume_id = conn.block_storage.find_volume(restored_volume_name).id
110-
wait_for_resource(conn.block_storage.get_volume, volume_id, timeout=timeout)
149+
wait_for_resource(conn.block_storage.get_volume, volume_id)
111150
logging.info("Restore volume from backup: PASS")
112151

113152

114-
def cleanup(conn: openstack.connection.Connection, prefix=DEFAULT_PREFIX,
115-
timeout=WAIT_TIMEOUT) -> bool:
153+
def cleanup(conn: openstack.connection.Connection, prefix=DEFAULT_PREFIX) -> bool:
116154
"""
117155
Looks up volume and volume backup resources matching the given prefix and
118156
deletes them.
@@ -133,36 +171,27 @@ def cleanup(conn: openstack.connection.Connection, prefix=DEFAULT_PREFIX,
133171
conn.block_storage.get_backup,
134172
backup.id,
135173
expected_status=("available", "error"),
136-
timeout=timeout,
137174
)
138175
logging.info(f"↳ deleting volume backup '{backup.id}' ...")
139-
conn.block_storage.delete_backup(backup.id)
140-
except openstack.exceptions.ResourceNotFound:
141-
# if the resource has vanished on its own in the meantime ignore it
142-
continue
176+
conn.block_storage.delete_backup(backup.id, ignore_missing=False)
143177
except Exception as e:
178+
if isinstance(e, openstack.exceptions.ResourceNotFound):
179+
# if the resource has vanished on its own in the meantime ignore it
180+
# however, ResourceNotFound will also be thrown if the service 'cinder-backup' is missing
181+
if 'cinder-backup' in str(e):
182+
raise
183+
continue
144184
# Most common exception would be a timeout in wait_for_resource.
145185
# We do not need to increment cleanup_issues here since
146186
# any remaining ones will be caught in the next loop down below anyway.
147-
logging.debug("traceback", exc_info=True)
148187
logging.warning(str(e))
149188

150189
# wait for all backups to be cleaned up before attempting to remove volumes
151-
seconds_waited = 0
152-
while len(
153-
# list of all backups whose name starts with the prefix
154-
[b for b in conn.block_storage.backups() if b.name.startswith(prefix)]
155-
) > 0:
156-
time.sleep(1.0)
157-
seconds_waited += 1
158-
if seconds_waited >= timeout:
159-
cleanup_issues += 1
160-
logging.warning(
161-
f"Timeout reached while waiting for all backups with prefix "
162-
f"'{prefix}' to finish deletion during cleanup after "
163-
f"{seconds_waited} seconds"
164-
)
165-
break
190+
try:
191+
wait_for_resources(conn.block_storage.backups, prefix)
192+
except TimeoutError as e:
193+
cleanup_issues += 1
194+
logging.warning(str(e))
166195

167196
volumes = conn.block_storage.volumes()
168197
for volume in volumes:
@@ -173,7 +202,6 @@ def cleanup(conn: openstack.connection.Connection, prefix=DEFAULT_PREFIX,
173202
conn.block_storage.get_volume,
174203
volume.id,
175204
expected_status=("available", "error"),
176-
timeout=timeout,
177205
)
178206
logging.info(f"↳ deleting volume '{volume.id}' ...")
179207
conn.block_storage.delete_volume(volume.id)
@@ -218,20 +246,13 @@ def main():
218246
f"and/or cleaned up by this script within the configured domains "
219247
f"(default: '{DEFAULT_PREFIX}')"
220248
)
221-
parser.add_argument(
222-
"--timeout", type=int,
223-
default=WAIT_TIMEOUT,
224-
help=f"Timeout in seconds for operations waiting for resources to "
225-
f"become available such as creating volumes and volume backups "
226-
f"(default: '{WAIT_TIMEOUT}')"
227-
)
228249
parser.add_argument(
229250
"--cleanup-only", action="store_true",
230251
help="Instead of executing tests, cleanup all resources "
231252
"with the prefix specified via '--prefix' (or its default)"
232253
)
233254
args = parser.parse_args()
234-
openstack.enable_logging(debug=args.debug)
255+
openstack.enable_logging(debug=False)
235256
logging.basicConfig(
236257
format="%(levelname)s: %(message)s",
237258
level=logging.DEBUG if args.debug else logging.INFO,
@@ -247,20 +268,20 @@ def main():
247268
password = getpass.getpass("Enter password: ") if args.ask else None
248269

249270
with openstack.connect(cloud, password=password) as conn:
250-
if not cleanup(conn, prefix=args.prefix, timeout=args.timeout):
271+
if not cleanup(conn, prefix=args.prefix):
251272
raise RuntimeError("Initial cleanup failed")
252273
if args.cleanup_only:
253274
logging.info("Cleanup-only run finished.")
254275
return
255276
try:
256-
test_backup(conn, prefix=args.prefix, timeout=args.timeout)
277+
test_backup(conn, prefix=args.prefix)
257278
except BaseException:
258279
print('volume-backup-check: FAIL')
259280
raise
260281
else:
261282
print('volume-backup-check: PASS')
262283
finally:
263-
cleanup(conn, prefix=args.prefix, timeout=args.timeout)
284+
cleanup(conn, prefix=args.prefix)
264285

265286

266287
if __name__ == "__main__":

0 commit comments

Comments
 (0)