Skip to content

Commit 32fe731

Browse files
committed
DAOS-16627 test: Change fabric provider
Test the changing fabric provider without reformatting storage feature. The test runs IOR, changes the fabric provider, checks the IOR data on the storage system, changes back to the original provider and checks IOR data again. Skip-func-hw-test-medium: false Skip-func-hw-test-medium-md-on-ssd: false Skip-unit-test: true Skip-fault-injection-test: true Test-tag: test_changing_fabric_provider Signed-off-by: James A. Nunez <james.nunez@hpe.com>
1 parent bb2329c commit 32fe731

File tree

2 files changed

+213
-0
lines changed

2 files changed

+213
-0
lines changed
Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
"""
2+
(C) Copyright 2026 Hewlett Packard Enterprise Development LP
3+
4+
SPDX-License-Identifier: BSD-2-Clause-Patent
5+
"""
6+
7+
import os
8+
9+
from ior_test_base import IorTestBase
10+
from util.network_utils import get_common_provider, SUPPORTED_PROVIDERS
11+
12+
13+
class ChangingFabricProvider(IorTestBase):
14+
"""Test class Description: Test changing the fabric provider without reformatting the storage
15+
16+
Look for confirmation in the DAOS logs
17+
18+
:avocado: recursive
19+
"""
20+
21+
def test_changing_fabric_provider(self):
22+
"""
23+
24+
Test Description:
25+
Purpose of this test is to test the fabric provider can
26+
be changed without reformatting the storage. Confirm that
27+
the provider changed by looking for messages in
28+
the logs.
29+
30+
Use case:
31+
32+
:avocado: tags=all,full_regression
33+
:avocado: tags=hw,medium
34+
:avocado: tags=control
35+
:avocado: tags=ChangingFabricProvider,test_changing_fabric_provider
36+
"""
37+
ior_read_flags = self.params.get("read_flags", "/run/ior/*")
38+
intercept = os.path.join(self.prefix, 'lib64', 'libioil.so')
39+
40+
# Get all providers supported by the interface in use
41+
self.log_step("Find common providers")
42+
common_providers = get_common_provider(self.log, self.host_info.all_hosts, self.test_env.interface)
43+
self.log.info(f"common providers: {common_providers}")
44+
# Get a different provider than what is being used
45+
original_provider = self.server_managers[0].manager.job.yaml.get_value('provider')
46+
new_provider = None
47+
for provider in common_providers:
48+
if original_provider not in provider and provider in SUPPORTED_PROVIDERS:
49+
new_provider = provider
50+
break
51+
52+
if new_provider is None:
53+
self.fail(f"No alternative provider found. Available: {common_providers}, Current: {original_provider}")
54+
55+
self.log.info(f"Original provider: {original_provider}, New provider: {new_provider}")
56+
57+
# Run IOR with the original provider
58+
try:
59+
self.run_ior_with_pool(intercept=intercept, fail_on_warning=False)
60+
self.log.info("Initial IOR write completed successfully")
61+
except Exception as error:
62+
self.fail(f"Initial IOR write failed with original provider {original_provider}: {error}")
63+
64+
# Stop all DAOS engines and agent processes
65+
self.log_step("Stop all DAOS engines and agents")
66+
self.agent_managers[0].dump_attachinfo()
67+
self.server_managers[0].dmg.system_stop(False)
68+
self.stop_agents()
69+
70+
# Update the provider and write a new server YAML file.
71+
self.log_step(f"Generate config at {self.test_env.server_config} and update provider to {new_provider}")
72+
73+
try:
74+
self.server_managers[0].manager.job.yaml.provider.value = new_provider
75+
generated_yaml = self.server_managers[0].manager.job.yaml.get_yaml_data()
76+
self.server_managers[0].manager.job.create_yaml_file(yaml_data=generated_yaml)
77+
self.log.info(f"Successfully updated server config with new provider: {new_provider}")
78+
except Exception as error:
79+
self.fail(f"Failed to update server configuration with new provider: {error}")
80+
81+
# Get the daos server yaml data again and check values
82+
self.log.info(f'self.server_managers[0].manager.job.yaml.get_yaml_data() = {self.server_managers[0].manager.job.yaml.get_yaml_data()}')
83+
84+
# Restart server with the new config.
85+
self.log_step(f"Restarting server with the new provider {self.server_managers[0].manager.job.yaml.get_value('provider')}")
86+
try:
87+
self.restart_servers()
88+
self.server_managers[0].dmg.system_query()
89+
self.log.info("Server restart completed successfully")
90+
except Exception as error:
91+
self.fail(f"Failed to restart servers with new provider: {error}")
92+
93+
94+
# Restart the daos_agent and dump agent info
95+
self.log_step("Restarting DAOS agents")
96+
try:
97+
self.start_agent_managers()
98+
self.agent_managers[0].dump_attachinfo()
99+
self.log.info("Agent restart completed successfully")
100+
except Exception as error:
101+
self.fail(f"Failed to restart agents: {error}")
102+
103+
# Verify the provider was actually changed
104+
current_provider = self.server_managers[0].manager.job.yaml.get_value('provider')
105+
self.log.info(f"Current provider after restart: {current_provider}")
106+
if current_provider != new_provider:
107+
self.fail(f"Provider change failed. Expected: {new_provider}, Actual: {current_provider}")
108+
109+
# Check RAS event in doas_control.log
110+
111+
# IOR read file to verify system works with new provider
112+
self.log_step("Running IOR read test with new provider")
113+
try:
114+
self.ior_cmd.flags.update(ior_read_flags)
115+
self.run_ior_with_pool(intercept=intercept, create_pool=False, create_cont=False)
116+
self.log.info("IOR read test with new provider completed successfully")
117+
except Exception as error:
118+
self.fail(f"IOR read test failed with new provider {new_provider}: {error}")
119+
120+
# Change the provider back to the original and verify the switch back works
121+
self.log_step(f"Restoring original provider: {original_provider}")
122+
123+
# Stop engines and agents again
124+
self.server_managers[0].dmg.system_stop(False)
125+
self.stop_agents()
126+
127+
# Restore original provider
128+
self.server_managers[0].manager.job.yaml.provider.value = original_provider
129+
generated_yaml = self.server_managers[0].manager.job.yaml.get_yaml_data()
130+
self.server_managers[0].manager.job.create_yaml_file(yaml_data=generated_yaml)
131+
132+
133+
# Restart servers with original provider
134+
self.log_step("Restarting DAOS servers")
135+
try:
136+
self.restart_servers()
137+
self.server_managers[0].dmg.system_query()
138+
self.log.info("Server restart completed successfully")
139+
except Exception as error:
140+
self.fail(f"Failed to restart servers with original provider: {error}")
141+
142+
# Restart the daos_agent and dump agent info
143+
self.log_step("Restarting DAOS agents")
144+
try:
145+
self.start_agent_managers()
146+
self.agent_managers[0].dump_attachinfo()
147+
self.log.info("Agent restart completed successfully")
148+
except Exception as error:
149+
self.fail(f"Failed to restart agents: {error}")
150+
151+
# Verify restoration of original provider
152+
restored_provider = self.server_managers[0].manager.job.yaml.get_value('provider')
153+
if restored_provider != original_provider:
154+
self.fail(f"Provider restoration failed. Expected: {original_provider}, Actual: {restored_provider}")
155+
156+
# IOR read file to verify system works with original provider
157+
self.log_step("Running IOR read test with original provider")
158+
try:
159+
self.ior_cmd.flags.update(ior_read_flags)
160+
self.run_ior_with_pool(intercept=intercept, create_pool=False, create_cont=False)
161+
self.log.info("IOR read test with new provider completed successfully")
162+
except Exception as error:
163+
self.fail(f"IOR read test failed with new provider {new_provider}: {error}")
164+
165+
166+
self.log.info("Test completed successfully - fabric provider was changed and restored without storage reformatting")
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
hosts:
2+
test_servers: 3
3+
test_clients: 1
4+
setup:
5+
start_agents_once: false
6+
start_servers_once: false
7+
timeout: 600
8+
server_config:
9+
name: daos_server
10+
engines_per_host: 1
11+
engines:
12+
0:
13+
log_mask: INFO
14+
storage: auto
15+
system_ram_reserved: 2
16+
pool:
17+
size: 90%
18+
svcn: 1
19+
container:
20+
type: POSIX
21+
control_method: daos
22+
ior:
23+
env_vars:
24+
- D_LOG_MASK=INFO
25+
- DD_MASK=all
26+
- DD_SUBSYS=all
27+
api: POSIX
28+
client_processes:
29+
np_16:
30+
np: 16
31+
test_file: testFile
32+
repetitions: 1
33+
read_flags: "-v -C -k -e -r -R -G 27"
34+
iorflags:
35+
flags: "-v -k -e -w -r -R -G 27"
36+
transfer_size: '1M'
37+
block_size: '100M'
38+
write_x: 2
39+
read_x: 1
40+
objectclass:
41+
oclass_SX:
42+
dfs_oclass: "SX"
43+
config_generate_params:
44+
scm_only: True
45+
net_provider: "ofi+sockets"
46+
dfuse:
47+
disable_caching: true

0 commit comments

Comments
 (0)