Skip to content

Commit 82118e1

Browse files
committed
qa/tasks/nvmeof.py: Improve thrasher and rbd image creation
Create rbd images in one command using ";" to queue them, instead of running "cephadm shell -- rbd create" again and again for each image. Improve the method to select to-be-thrashed daemons. Use randint() and sample(), instead of weights/skip. Signed-off-by: Vallari Agrawal <[email protected]>
1 parent e1983c5 commit 82118e1

File tree

2 files changed

+9
-17
lines changed

2 files changed

+9
-17
lines changed

qa/suites/nvmeof/thrash/thrashers/nvmeof_thrash.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,4 @@ overrides:
1313
tasks:
1414
- nvmeof.thrash:
1515
checker_host: 'client.0'
16+
randomize: False

qa/tasks/nvmeof.py

Lines changed: 8 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -128,12 +128,11 @@ def deploy_nvmeof(self):
128128

129129
total_images = int(self.namespaces_count) * int(self.subsystems_count)
130130
log.info(f'[nvmeof]: creating {total_images} images')
131+
rbd_create_cmd = []
131132
for i in range(1, total_images + 1):
132133
imagename = self.image_name_prefix + str(i)
133-
log.info(f'[nvmeof]: rbd create {poolname}/{imagename} --size {self.rbd_size}')
134-
_shell(self.ctx, self.cluster_name, self.remote, [
135-
'rbd', 'create', f'{poolname}/{imagename}', '--size', f'{self.rbd_size}'
136-
])
134+
rbd_create_cmd += ['rbd', 'create', f'{poolname}/{imagename}', '--size', f'{self.rbd_size}', run.Raw(';')]
135+
_shell(self.ctx, self.cluster_name, self.remote, rbd_create_cmd)
137136

138137
for role, i in daemons.items():
139138
remote, id_ = i
@@ -311,7 +310,7 @@ def __init__(self, ctx, config, daemons) -> None:
311310

312311
self.min_thrash_delay = int(self.config.get('min_thrash_delay', 60))
313312
self.max_thrash_delay = int(self.config.get('max_thrash_delay', self.min_thrash_delay + 30))
314-
self.min_revive_delay = int(self.config.get('min_revive_delay', 100))
313+
self.min_revive_delay = int(self.config.get('min_revive_delay', 60))
315314
self.max_revive_delay = int(self.config.get('max_revive_delay', self.min_revive_delay + 30))
316315

317316
def _get_devices(self, remote):
@@ -422,13 +421,11 @@ def do_thrash(self):
422421
while not self.stopping.is_set():
423422
killed_daemons = defaultdict(list)
424423

425-
weight = 1.0 / len(self.daemons)
426-
count = 0
424+
thrash_daemon_num = self.rng.randint(1, self.max_thrash_daemons)
425+
selected_daemons = self.rng.sample(self.daemons, thrash_daemon_num)
427426
for daemon in self.daemons:
428-
skip = self.rng.uniform(0.0, 1.0)
429-
if weight <= skip:
430-
self.log('skipping daemon {label} with skip ({skip}) > weight ({weight})'.format(
431-
label=daemon.id_, skip=skip, weight=weight))
427+
if daemon not in selected_daemons:
428+
self.log(f'skipping daemon {daemon.id_} ...')
432429
continue
433430

434431
# For now, nvmeof daemons can only be thrashed 3 times in last 30mins.
@@ -446,17 +443,11 @@ def do_thrash(self):
446443
continue
447444

448445
self.log('kill {label}'.format(label=daemon.id_))
449-
# daemon.stop()
450446
kill_method = self.kill_daemon(daemon)
451447

452448
killed_daemons[kill_method].append(daemon)
453449
daemons_thrash_history[daemon.id_] += [datetime.now()]
454450

455-
# only thrash max_thrash_daemons amount of daemons
456-
count += 1
457-
if count >= self.max_thrash_daemons:
458-
break
459-
460451
if killed_daemons:
461452
iteration_summary = "thrashed- "
462453
for kill_method in killed_daemons:

0 commit comments

Comments
 (0)