Skip to content

Commit 58d8be9

Browse files
committed
qa: Expand nvmeof thrasher and add nvmeof_namespaces.yaml job
1. qa/tasks/nvmeof.py: add other methods to stop nvmeof daemons 2. add qa/workunits/rbd/nvmeof_namespace_test.sh which adds and deletes new namespaces. It is run in nvmeof_namespaces.yaml job where fio happens to other namespaces in background. Signed-off-by: Vallari Agrawal <[email protected]>
1 parent 02fe44a commit 58d8be9

File tree

3 files changed

+155
-7
lines changed

3 files changed

+155
-7
lines changed
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
tasks:
2+
- nvmeof:
3+
client: client.0
4+
gw_image: quay.io/ceph/nvmeof:1.2 # "default" is the image cephadm defaults to; change to test specific nvmeof images, example "latest"
5+
rbd:
6+
pool_name: mypool
7+
image_name_prefix: myimage
8+
gateway_config:
9+
subsystems_count: 3
10+
namespaces_count: 20
11+
cli_image: quay.io/ceph/nvmeof-cli:1.2
12+
13+
- cephadm.wait_for_service:
14+
service: nvmeof.mypool
15+
16+
- workunit:
17+
no_coverage_and_limits: true
18+
clients:
19+
client.2:
20+
- rbd/nvmeof_setup_subsystem.sh
21+
env:
22+
RBD_POOL: mypool
23+
RBD_IMAGE_PREFIX: myimage
24+
25+
- workunit:
26+
no_coverage_and_limits: true
27+
timeout: 30m
28+
clients:
29+
client.2:
30+
- rbd/nvmeof_basic_tests.sh
31+
- rbd/nvmeof_fio_test.sh --rbd_iostat
32+
client.3:
33+
- rbd/nvmeof_basic_tests.sh
34+
- rbd/nvmeof_namespace_test.sh
35+
env:
36+
RBD_POOL: mypool
37+
IOSTAT_INTERVAL: '10'
38+
RUNTIME: '600'
39+
NEW_NAMESPACES_COUNT: '5'
40+

qa/tasks/nvmeof.py

Lines changed: 44 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -343,6 +343,37 @@ def switch_task(self):
343343
self.log('switch_task: done waiting for the other thrasher')
344344
other_thrasher.switch_thrasher.clear()
345345

346+
def kill_daemon(self, daemon):
347+
kill_methods = [
348+
"ceph_daemon_stop", "systemctl_stop",
349+
"daemon_remove",
350+
]
351+
chosen_method = self.rng.choice(kill_methods)
352+
d_name = '%s.%s' % (daemon.type_, daemon.id_)
353+
if chosen_method == "ceph_daemon_stop":
354+
daemon.remote.run(args=[
355+
"ceph", "orch", "daemon", "stop",
356+
d_name
357+
], check_status=False)
358+
elif chosen_method == "systemctl_stop":
359+
daemon.stop()
360+
elif chosen_method == "daemon_remove":
361+
daemon.remote.run(args=[
362+
"ceph", "orch", "daemon", "rm",
363+
d_name
364+
], check_status=False)
365+
return chosen_method
366+
367+
def revive_daemon(self, daemon, killed_method):
368+
if killed_method == "ceph_daemon_stop":
369+
name = '%s.%s' % (daemon.type_, daemon.id_)
370+
daemon.remote.run(args=[
371+
"ceph", "orch", "daemon", "restart",
372+
name
373+
])
374+
elif killed_method == "systemctl_stop":
375+
daemon.restart()
376+
346377
def do_thrash(self):
347378
self.log('start thrashing')
348379
self.log(f'seed: {self.random_seed}, , '\
@@ -354,7 +385,7 @@ def do_thrash(self):
354385
summary = []
355386

356387
while not self.stopping.is_set():
357-
killed_daemons = []
388+
killed_daemons = defaultdict(list)
358389

359390
weight = 1.0 / len(self.daemons)
360391
count = 0
@@ -380,9 +411,10 @@ def do_thrash(self):
380411
continue
381412

382413
self.log('kill {label}'.format(label=daemon.id_))
383-
daemon.stop()
414+
# daemon.stop()
415+
kill_method = self.kill_daemon(daemon)
384416

385-
killed_daemons.append(daemon)
417+
killed_daemons[kill_method].append(daemon)
386418
daemons_thrash_history[daemon.id_] += [datetime.now()]
387419

388420
# only thrash max_thrash_daemons amount of daemons
@@ -391,7 +423,10 @@ def do_thrash(self):
391423
break
392424

393425
if killed_daemons:
394-
summary += ["killed: " + ", ".join([d.id_ for d in killed_daemons])]
426+
iteration_summary = "thrashed- "
427+
for kill_method in killed_daemons:
428+
iteration_summary += (", ".join([d.id_ for d in killed_daemons[kill_method]]) + f" (by {kill_method}); ")
429+
summary += [iteration_summary]
395430
# delay before reviving
396431
revive_delay = self.min_revive_delay
397432
if self.randomize:
@@ -405,9 +440,11 @@ def do_thrash(self):
405440
self.switch_task()
406441

407442
# revive after thrashing
408-
for daemon in killed_daemons:
409-
self.log('reviving {label}'.format(label=daemon.id_))
410-
daemon.restart()
443+
for kill_method in killed_daemons:
444+
for daemon in killed_daemons[kill_method]:
445+
self.log('reviving {label}'.format(label=daemon.id_))
446+
# daemon.restart()
447+
self.revive_daemon(daemon, kill_method)
411448

412449
# delay before thrashing
413450
thrash_delay = self.min_thrash_delay
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
#!/bin/bash -xe
2+
3+
# It's assumed in this test that each subsystem has equal number
4+
# of namespaces (i.e. NVMEOF_NAMESPACES_COUNT ns per subsystem).
5+
# This script then adds NEW_NAMESPACES_COUNT amount of namespaces
6+
# to each subsystem and then deletes those new namespaces.
7+
8+
source /etc/ceph/nvmeof.env
9+
10+
RBD_POOL="${RBD_POOL:-mypool}"
11+
NEW_IMAGE_SIZE="${RBD_IMAGE_SIZE:-8192}" # 1024*8
12+
NEW_NAMESPACES_COUNT="${NEW_NAMESPACES_COUNT:-3}"
13+
14+
gateways_count=$(( $(echo "$NVMEOF_GATEWAY_IP_ADDRESSES" | tr -cd ',' | wc -c) + 1 ))
15+
new_images_count=$(( $NVMEOF_SUBSYSTEMS_COUNT * $NEW_NAMESPACES_COUNT))
16+
17+
18+
assert_namespaces_count() {
19+
expected_count_per_subsys=$1
20+
actual_count=$(sudo podman run -it $NVMEOF_CLI_IMAGE --server-address $NVMEOF_DEFAULT_GATEWAY_IP_ADDRESS --server-port $NVMEOF_SRPORT --format json subsystem list |
21+
grep namespace_count | grep $expected_count_per_subsys | wc -l)
22+
if [ "$actual_count" -ne "$NVMEOF_SUBSYSTEMS_COUNT" ]; then
23+
sudo podman run -it $NVMEOF_CLI_IMAGE --server-address $NVMEOF_DEFAULT_GATEWAY_IP_ADDRESS --server-port $NVMEOF_SRPORT --format json subsystem list
24+
echo "Expected count of namepaces not found, expected (per subsystem): $expected_count_per_subsys"
25+
return 1
26+
fi
27+
}
28+
29+
30+
# add rbd images
31+
for i in $(seq 1 $new_images_count); do
32+
image_name="test${i}"
33+
rbd create $RBD_POOL/$image_name --size $NEW_IMAGE_SIZE
34+
done
35+
36+
# add new namespaces
37+
image_index=1
38+
for i in $(seq 1 $NVMEOF_SUBSYSTEMS_COUNT); do
39+
subsystem_nqn="${NVMEOF_SUBSYSTEMS_PREFIX}${i}"
40+
for ns in $(seq 1 $NEW_NAMESPACES_COUNT); do
41+
image="test${image_index}"
42+
sudo podman run -it $NVMEOF_CLI_IMAGE --server-address $NVMEOF_DEFAULT_GATEWAY_IP_ADDRESS --server-port $NVMEOF_SRPORT namespace add --subsystem $subsystem_nqn --rbd-pool $RBD_POOL --rbd-image $image --load-balancing-group $(($image_index % $gateways_count + 1))
43+
((image_index++))
44+
done
45+
done
46+
47+
# list namespaces
48+
for i in $(seq 1 $NVMEOF_SUBSYSTEMS_COUNT); do
49+
subsystem_nqn="${NVMEOF_SUBSYSTEMS_PREFIX}${i}"
50+
sudo podman run -it $NVMEOF_CLI_IMAGE --server-address $NVMEOF_DEFAULT_GATEWAY_IP_ADDRESS --server-port $NVMEOF_SRPORT --format plain namespace list --subsystem $subsystem_nqn
51+
done
52+
53+
# verify namespaces added
54+
expected_count_per_subsys=$(( $NEW_NAMESPACES_COUNT + $NVMEOF_NAMESPACES_COUNT ))
55+
assert_namespaces_count $expected_count_per_subsys
56+
57+
# delete namespaces
58+
for i in $(seq 1 $NVMEOF_SUBSYSTEMS_COUNT); do
59+
subsystem_nqn="${NVMEOF_SUBSYSTEMS_PREFIX}${i}"
60+
NSIDs=$(sudo podman run -it $NVMEOF_CLI_IMAGE --server-address $NVMEOF_DEFAULT_GATEWAY_IP_ADDRESS --server-port $NVMEOF_SRPORT --format json namespace list --subsystem $subsystem_nqn |
61+
jq -r '.namespaces[] | select(.rbd_image_name | startswith("test")) | .nsid')
62+
63+
for nsid in $NSIDs; do
64+
sudo podman run -it $NVMEOF_CLI_IMAGE --server-address $NVMEOF_DEFAULT_GATEWAY_IP_ADDRESS --server-port $NVMEOF_SRPORT namespace del --subsystem $subsystem_nqn --nsid $nsid
65+
done
66+
done
67+
68+
# verify namespaces deleted
69+
expected_count_per_subsys=$NVMEOF_NAMESPACES_COUNT
70+
assert_namespaces_count $expected_count_per_subsys
71+

0 commit comments

Comments
 (0)