From 7231f00c4d16c6790c48742066ae060e1d4622b6 Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Mon, 24 Nov 2025 19:56:37 -0500 Subject: [PATCH 01/32] DAOS-17916 test: Verify page eviction on MD on SSD Adding a test to verify page eviction on a MD on SSD phase 2 pool. Skip-unit-tests: true Skip-fault-injection-test: true Signed-off-by: Phil Henderson --- src/tests/ftest/pool/eviction_metrics.py | 73 +++++++ src/tests/ftest/pool/eviction_metrics.yaml | 39 ++++ src/tests/ftest/util/mdtest_utils.py | 225 ++++++++++++++++++++- src/tests/ftest/util/telemetry_utils.py | 6 + src/vos/vos_internal.h | 2 +- 5 files changed, 341 insertions(+), 4 deletions(-) create mode 100644 src/tests/ftest/pool/eviction_metrics.py create mode 100644 src/tests/ftest/pool/eviction_metrics.yaml diff --git a/src/tests/ftest/pool/eviction_metrics.py b/src/tests/ftest/pool/eviction_metrics.py new file mode 100644 index 00000000000..155460b8e45 --- /dev/null +++ b/src/tests/ftest/pool/eviction_metrics.py @@ -0,0 +1,73 @@ +""" + (C) Copyright 2025 Hewlett Packard Enterprise Development LP + + SPDX-License-Identifier: BSD-2-Clause-Patent +""" +from mdtest_utils import get_mdtest, get_mdtest_container +from telemetry_test_base import TestWithTelemetry + + +class EvictionMetrics(TestWithTelemetry): + """ + Tests DAOS client eviction from a pool that the client is using. + + :avocado: recursive + """ + + def test_eviction_metrics(self): + """Verify page eviction on the pool + + 1. Create a pool with a mem ratio of 100% (for pmeme or phase 1) or 25% (for phase 2) + 2. Collect a baseline for the pool eviction metrics + 3. Run mdtest -a DFS to generate many small files larger than mem size + 4. Collect new page eviction metrics + 5. Verify page eviction + + :avocado: tags=all,daily_regression + :avocado: tags=hw,medium + :avocado: tags=pool + :avocado: tags=EvictionMetrics,test_eviction_metrics + """ + evict_metrics = list(self.ENGINE_POOL_VOS_CACHE_METRICS) + + self.log_step('Creating a pool (dmg pool create)') + pool = self.get_pool(connect=False) + + self.log_step( + 'Collect pool eviction metrics after creating a pool (dmg telemetry metrics query)') + expected_ranges = self.telemetry.collect_data(evict_metrics) + for metric in sorted(expected_ranges): + for label in expected_ranges[metric]: + if self.server_managers[0].manager.job.using_control_metadata: + expected_ranges[metric][label] = [1, 1] + else: + expected_ranges[metric][label] = [0, 0] + + self.log_step( + 'Verify pool eviction metrics after pool creation (dmg telemetry metrics query)') + if not self.telemetry.verify_data(expected_ranges): + self.fail('Pool eviction metrics verification failed after pool creation') + + self.log_step('Writing data to the pool (mdtest -a DFS)') + mdtest = get_mdtest(self, self.hostlist_clients) + container = get_mdtest_container(self, mdtest, pool) + result = mdtest.run(pool, container, processes=16) + if not result.passed: + self.fail('Mdtest command failed') + + self.log_step( + 'Collect pool eviction metrics after writing data (dmg telemetry metrics query)') + expected_ranges = self.telemetry.collect_data(evict_metrics) + for metric in sorted(expected_ranges): + for label in expected_ranges[metric]: + if self.server_managers[0].manager.job.using_control_metadata: + expected_ranges[metric][label] = [1, 1] + else: + expected_ranges[metric][label] = [0, 0] + + self.log_step( + 'Verify pool eviction metrics after writing data (dmg telemetry metrics query)') + if not self.telemetry.verify_data(expected_ranges): + self.fail('Pool eviction metrics verification failed after writing data') + + self.log_step('Test passed') diff --git a/src/tests/ftest/pool/eviction_metrics.yaml b/src/tests/ftest/pool/eviction_metrics.yaml new file mode 100644 index 00000000000..1630c5f19c7 --- /dev/null +++ b/src/tests/ftest/pool/eviction_metrics.yaml @@ -0,0 +1,39 @@ +launch: + !filter-only : /run/pool/default # yamllint disable-line rule:colons + +hosts: + test_servers: 2 + test_clients: 2 + +timeout: 300 + +server_config: + name: daos_server + engines_per_host: 1 + engines: + 0: + targets: 4 + nr_xs_helpers: 0 + storage: auto + +pool: !mux + default: + size: 10% + md_on_ssd_p2: + size: 10% + mem_ratio: 50 + +mdtest: + ppn: 32 + dfs_oclass: S1 + dfs_dir_oclass: SX + manager: "MPICH" + # [api, write, read, branching_factor, num_of_dir_files, depth, flags, IL] + test_dir: "/" + api: DFS + branching_factor: 1 + depth: 20 + flags: '' + num_of_files_dirs: 100 + read_bytes: 4096 + write_bytes: 4096 diff --git a/src/tests/ftest/util/mdtest_utils.py b/src/tests/ftest/util/mdtest_utils.py index 97e5d75d088..e996db636b8 100644 --- a/src/tests/ftest/util/mdtest_utils.py +++ b/src/tests/ftest/util/mdtest_utils.py @@ -1,5 +1,6 @@ """ (C) Copyright 2019-2024 Intel Corporation. + (C) Copyright 2025 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -8,20 +9,103 @@ import re from command_utils import ExecutableCommand -from command_utils_base import FormattedParameter, LogParameter +from command_utils_base import BasicParameter, FormattedParameter, LogParameter +from exception_utils import CommandFailure from general_utils import get_log_file +from job_manager_utils import get_job_manager + +MDTEST_NAMESPACE = "/run/mdtest/*" + + +def get_mdtest(test, hosts, manager=None, path=None, slots=None, namespace=MDTEST_NAMESPACE, + mdtest_params=None): + """Get a Mdtest object. + + Args: + test (Test): avocado Test object + hosts (NodeSet): hosts on which to run the mdtest command + manager (JobManager, optional): command to manage the multi-host execution of mdtest. + Defaults to None, which will get a default job manager. + path (str, optional): hostfile path. Defaults to None. + slots (int, optional): hostfile number of slots per host. Defaults to None. + namespace (str, optional): path to yaml parameters. Defaults to MDTEST_NAMESPACE. + mdtest_params (dict, optional): parameters to update the mdtest command. Defaults to None. + + Returns: + Mdtest: the Mdtest object requested + """ + mdtest = Mdtest(test, hosts, manager, path, slots, namespace) + if mdtest_params: + for name, value in mdtest_params.items(): + mdtest.update(name, value) + return mdtest + + +def get_mdtest_container(test, mdtest, pool): + """Create a container to use with mdtest. + + Args: + test (Test): avocado Test object + mdtest (MdtestCommand): mdtest command object + pool (TestPool): pool to create container in + + Returns: + TestContainer: the new container + """ + params = {} + if mdtest.dfs_oclass.value: + params['oclass'] = mdtest.dfs_oclass.value + if mdtest.dfs_dir_oclass.value: + params['dir_oclass'] = mdtest.dfs_dir_oclass.value + return test.get_container(pool, **params) + + +def run_mdtest(test, log, hosts, path, slots, pool, container, processes, ppn=None, manager=None, + display_space=True, namespace=MDTEST_NAMESPACE, mdtest_params=None): + # pylint: disable=too-many-arguments + """Run Mdtest on multiple hosts. + + Args: + test (Test): avocado Test object + log (str): log file. + hosts (NodeSet): hosts on which to run the mdtest command + path (str): hostfile path. + slots (int): hostfile number of slots per host. + pool (TestPool): DAOS test pool object + container (TestContainer): DAOS test container object. + processes (int): number of processes to run + ppn (int, optional): number of processes per node to run. If specified it will override + the processes input. Defaults to None. + manager (JobManager, optional): command to manage the multi-host execution of mdtest. + Defaults to None, which will get a default job manager. + display_space (bool, optional): Whether to display the pool space. Defaults to True. + namespace (str, optional): path to yaml parameters. Defaults to MDTEST_NAMESPACE. + mdtest_params (dict, optional): dictionary of MdtestCommand attributes to override from + get_params(). Defaults to None. + + Raises: + CommandFailure: if there is an error running the mdtest command + + Returns: + CmdResult: result of the ior command + + """ + mdtest = get_mdtest(test, hosts, manager, path, slots, namespace, mdtest_params) + mdtest.update_log_file(log) + return mdtest.run(pool, container, processes, ppn, display_space, False) class MdtestCommand(ExecutableCommand): """Defines a object representing a mdtest command.""" - def __init__(self, log_dir): + def __init__(self, log_dir, namespace="/run/mdtest/*"): """Create an MdtestCommand object. Args: log_dir (str): directory in which to put log files + namespace (str, optional): path to yaml parameters. Defaults to "/run/mdtest/*". """ - super().__init__("/run/mdtest/*", "mdtest") + super().__init__(namespace, "mdtest") self._log_dir = log_dir @@ -137,6 +221,141 @@ def get_default_env(self, manager_cmd, log_file=None): return env +class Mdtest: + """Defines a class that runs the mdtest command through a job manager, e.g. mpirun.""" + + def __init__(self, test, hosts, manager=None, path=None, slots=None, + namespace=MDTEST_NAMESPACE): + """Initialize an Mdtest object. + + Args: + test (Test): avocado Test object + hosts (NodeSet): hosts on which to run the mdtest command + manager (JobManager, optional): command to manage the multi-host execution of mdtest. + Defaults to None, which will get a default job manager. + path (str, optional): hostfile path. Defaults to None. + slots (int, optional): hostfile number of slots per host. Defaults to None. + namespace (str, optional): path to yaml parameters. Defaults to MDTEST_NAMESPACE. + """ + if manager is None: + manager = get_job_manager(test, subprocess=False, timeout=60) + self.manager = manager + self.manager.assign_hosts(hosts, path, slots) + self.manager.job = MdtestCommand(test.test_env.log_dir, namespace) + self.manager.job.get_params(test) + self.manager.output_check = "both" + self.timeout = test.params.get("timeout", namespace, None) + self.label_generator = test.label_generator + self.test_id = test.test_id + self.env = self.command.get_default_env(str(self.manager)) + + @property + def command(self): + """Get the MdtestCommand object. + + Returns: + MdtestCommand: the MdtestCommand object managed by the JobManager + + """ + return self.manager.job + + def update(self, name, value): + """Update a MdtestCommand BasicParameter with a new value. + + Args: + name (str): name of the MdtestCommand BasicParameter to update + value (str): value to assign to the MdtestCommand BasicParameter + """ + param = getattr(self.command, name, None) + if param: + if isinstance(param, BasicParameter): + param.update(value, ".".join([self.command.command, name])) + + def update_log_file(self, log_file): + """Update the log file for the mdtest command. + + Args: + log_file (str): new mdtest log file + """ + self.command.env["D_LOG_FILE"] = get_log_file( + log_file or f"{self.command.command}_daos.log") + + def get_unique_log(self, container): + """Get a unique mdtest log file name. + + Args: + container (TestContainer): container involved with the command + + Returns: + str: a log file name + """ + label = self.label_generator.get_label("mdtest") + parts = [self.test_id, container.pool.identifier, container.identifier, label] + return '.'.join(['_'.join(parts), 'log']) + + def run(self, pool, container, processes, ppn=None, intercept=None, display_space=True, + unique_log=True): + # pylint: disable=too-many-arguments + """Run mdtest. + + Args: + pool (TestPool): DAOS test pool object + container (TestContainer): DAOS test container object. + processes (int): number of processes to run + ppn (int, optional): number of processes per node to run. If specified it will override + the processes input. Defaults to None. + intercept (str, optional): path to interception library. Defaults to None. + display_space (bool, optional): Whether to display the pool space. Defaults to True. + unique_log (bool, optional): whether or not to update the log file with a new unique log + file name. Defaults to True. + + Raises: + CommandFailure: if there is an error running the mdtest command + + Returns: + CmdResult: result of the mdtest command + """ + result = None + error_message = None + + self.command.update_params(dfs_pool=pool.identifier, dfs_cont=container.identifier) + + if intercept: + self.env["LD_PRELOAD"] = intercept + if "D_LOG_MASK" not in self.env: + self.env["D_LOG_MASK"] = "INFO" + # if "D_IL_REPORT" not in self.env and il_report is not None: + # self.env["D_IL_REPORT"] = str(il_report) + + # Pass only processes or ppn to be compatible with previous behavior + if ppn is not None: + self.manager.assign_processes(ppn=ppn) + else: + self.manager.assign_processes(processes=processes) + + self.manager.assign_environment(self.env) + + if unique_log: + self.update_log_file(self.get_unique_log(container)) + + try: + if display_space: + pool.display_space() + result = self.manager.run() + + except CommandFailure as error: + error_message = "Mdtest Failed:\n {}".format("\n ".join(str(error).split("\n"))) + + finally: + if not self.manager.run_as_subprocess and display_space: + pool.display_space() + + if error_message: + raise CommandFailure(error_message) + + return result + + class MdtestMetrics(): # pylint: disable=too-few-public-methods """Represents metrics from mdtest output. diff --git a/src/tests/ftest/util/telemetry_utils.py b/src/tests/ftest/util/telemetry_utils.py index 8937db87788..6f2ffa642f3 100644 --- a/src/tests/ftest/util/telemetry_utils.py +++ b/src/tests/ftest/util/telemetry_utils.py @@ -159,6 +159,12 @@ class TelemetryUtils(): "engine_pool_vos_wal_replay_size", "engine_pool_vos_wal_replay_time", "engine_pool_vos_wal_replay_transactions"] + ENGINE_POOL_VOS_CACHE_METRICS = [ + "engine_pool_vos_wal_page_evict", + "engine_pool_vos_wal_page_flush", + "engine_pool_vos_wal_page_hit", + "engine_pool_vos_wal_page_miss", + "engine_pool_vos_wal_page_ne"] ENGINE_POOL_SVC_METRICS = [ "engine_pool_svc_degraded_ranks", "engine_pool_svc_disabled_targets", diff --git a/src/vos/vos_internal.h b/src/vos/vos_internal.h index 18e6438ce6e..fe10f8cb360 100644 --- a/src/vos/vos_internal.h +++ b/src/vos/vos_internal.h @@ -263,7 +263,7 @@ struct vos_cache_metrics { struct d_tm_node_t *vcm_obj_hit; }; -void vos_cache_metrics_init(struct vos_cache_metrics *vc_metrcis, const char *path, int tgt_id); +void vos_cache_metrics_init(struct vos_cache_metrics *vc_metrics, const char *path, int tgt_id); struct vos_pool_metrics { void *vp_vea_metrics; From d126fe1fef0bdf3bda705282b50793681ce0de61 Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Mon, 24 Nov 2025 20:10:41 -0500 Subject: [PATCH 02/32] Updates. Skip-unit-tests: true Skip-fault-injection-test: true Signed-off-by: Phil Henderson --- src/tests/ftest/pool/eviction_metrics.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tests/ftest/pool/eviction_metrics.py b/src/tests/ftest/pool/eviction_metrics.py index 155460b8e45..05b9fd71506 100644 --- a/src/tests/ftest/pool/eviction_metrics.py +++ b/src/tests/ftest/pool/eviction_metrics.py @@ -17,7 +17,7 @@ class EvictionMetrics(TestWithTelemetry): def test_eviction_metrics(self): """Verify page eviction on the pool - 1. Create a pool with a mem ratio of 100% (for pmeme or phase 1) or 25% (for phase 2) + 1. Create a pool with a mem ratio of 100% (for pmem or phase 1) or 25% (for phase 2) 2. Collect a baseline for the pool eviction metrics 3. Run mdtest -a DFS to generate many small files larger than mem size 4. Collect new page eviction metrics @@ -28,7 +28,7 @@ def test_eviction_metrics(self): :avocado: tags=pool :avocado: tags=EvictionMetrics,test_eviction_metrics """ - evict_metrics = list(self.ENGINE_POOL_VOS_CACHE_METRICS) + evict_metrics = list(self.telemetry.ENGINE_POOL_VOS_CACHE_METRICS) self.log_step('Creating a pool (dmg pool create)') pool = self.get_pool(connect=False) From b3854033b55ad97b751a49146a9dffe196c21f2b Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Wed, 3 Dec 2025 13:13:44 -0500 Subject: [PATCH 03/32] Update metrics. Skip-unit-tests: true Skip-fault-injection-test: true Test-tag: EvictionMetrics Signed-off-by: Phil Henderson --- src/tests/ftest/pool/eviction_metrics.py | 2 ++ src/tests/ftest/util/telemetry_utils.py | 11 ++++++----- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/tests/ftest/pool/eviction_metrics.py b/src/tests/ftest/pool/eviction_metrics.py index 05b9fd71506..4251da8cd2a 100644 --- a/src/tests/ftest/pool/eviction_metrics.py +++ b/src/tests/ftest/pool/eviction_metrics.py @@ -33,6 +33,8 @@ def test_eviction_metrics(self): self.log_step('Creating a pool (dmg pool create)') pool = self.get_pool(connect=False) + self.telemetry.list_metrics() + self.log_step( 'Collect pool eviction metrics after creating a pool (dmg telemetry metrics query)') expected_ranges = self.telemetry.collect_data(evict_metrics) diff --git a/src/tests/ftest/util/telemetry_utils.py b/src/tests/ftest/util/telemetry_utils.py index 6f2ffa642f3..5230fba5a46 100644 --- a/src/tests/ftest/util/telemetry_utils.py +++ b/src/tests/ftest/util/telemetry_utils.py @@ -160,11 +160,11 @@ class TelemetryUtils(): "engine_pool_vos_wal_replay_time", "engine_pool_vos_wal_replay_transactions"] ENGINE_POOL_VOS_CACHE_METRICS = [ - "engine_pool_vos_wal_page_evict", - "engine_pool_vos_wal_page_flush", - "engine_pool_vos_wal_page_hit", - "engine_pool_vos_wal_page_miss", - "engine_pool_vos_wal_page_ne"] + "engine_pool_vos_cache_page_evict", + "engine_pool_vos_cache_page_flush", + "engine_pool_vos_cache_page_hit", + "engine_pool_vos_cache_page_miss", + "engine_pool_vos_cache_page_ne"] ENGINE_POOL_SVC_METRICS = [ "engine_pool_svc_degraded_ranks", "engine_pool_svc_disabled_targets", @@ -185,6 +185,7 @@ class TelemetryUtils(): ENGINE_POOL_VOS_SPACE_METRICS + \ ENGINE_POOL_VOS_WAL_METRICS + \ ENGINE_POOL_VOS_WAL_REPLAY_METRICS +\ + ENGINE_POOL_VOS_CACHE_METRICS +\ ENGINE_POOL_SVC_METRICS ENGINE_EVENT_METRICS = [ "engine_events_dead_ranks", From fe66b750a0ab67da488c2586576ac6ff94ec00b8 Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Wed, 3 Dec 2025 16:37:38 -0500 Subject: [PATCH 04/32] Fix expected metric values after pool create. Skip-unit-tests: true Skip-fault-injection-test: true Test-tag: EvictionMetrics Signed-off-by: Phil Henderson --- src/tests/ftest/pool/eviction_metrics.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/tests/ftest/pool/eviction_metrics.py b/src/tests/ftest/pool/eviction_metrics.py index 4251da8cd2a..fa36e8df663 100644 --- a/src/tests/ftest/pool/eviction_metrics.py +++ b/src/tests/ftest/pool/eviction_metrics.py @@ -33,17 +33,12 @@ def test_eviction_metrics(self): self.log_step('Creating a pool (dmg pool create)') pool = self.get_pool(connect=False) - self.telemetry.list_metrics() - self.log_step( 'Collect pool eviction metrics after creating a pool (dmg telemetry metrics query)') expected_ranges = self.telemetry.collect_data(evict_metrics) for metric in sorted(expected_ranges): for label in expected_ranges[metric]: - if self.server_managers[0].manager.job.using_control_metadata: - expected_ranges[metric][label] = [1, 1] - else: - expected_ranges[metric][label] = [0, 0] + expected_ranges[metric][label] = [0, 0] self.log_step( 'Verify pool eviction metrics after pool creation (dmg telemetry metrics query)') From 254b1e82175be8d4476c4a4e96551cdabad71959 Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Thu, 4 Dec 2025 08:36:56 -0500 Subject: [PATCH 05/32] Updates for mdtest_utils.py Skip-unit-tests: true Skip-fault-injection-test: true Test-tag: EvictionMetrics Signed-off-by: Phil Henderson --- src/tests/ftest/pool/eviction_metrics.py | 9 ++--- src/tests/ftest/pool/eviction_metrics.yaml | 6 ++- src/tests/ftest/util/mdtest_utils.py | 47 ++++++++++++---------- 3 files changed, 34 insertions(+), 28 deletions(-) diff --git a/src/tests/ftest/pool/eviction_metrics.py b/src/tests/ftest/pool/eviction_metrics.py index fa36e8df663..876a9bb8a9a 100644 --- a/src/tests/ftest/pool/eviction_metrics.py +++ b/src/tests/ftest/pool/eviction_metrics.py @@ -3,7 +3,7 @@ SPDX-License-Identifier: BSD-2-Clause-Patent """ -from mdtest_utils import get_mdtest, get_mdtest_container +from mdtest_utils import write_mdtest_data from telemetry_test_base import TestWithTelemetry @@ -32,6 +32,7 @@ def test_eviction_metrics(self): self.log_step('Creating a pool (dmg pool create)') pool = self.get_pool(connect=False) + container = self.get_container(pool) self.log_step( 'Collect pool eviction metrics after creating a pool (dmg telemetry metrics query)') @@ -46,11 +47,7 @@ def test_eviction_metrics(self): self.fail('Pool eviction metrics verification failed after pool creation') self.log_step('Writing data to the pool (mdtest -a DFS)') - mdtest = get_mdtest(self, self.hostlist_clients) - container = get_mdtest_container(self, mdtest, pool) - result = mdtest.run(pool, container, processes=16) - if not result.passed: - self.fail('Mdtest command failed') + write_mdtest_data(self, container) self.log_step( 'Collect pool eviction metrics after writing data (dmg telemetry metrics query)') diff --git a/src/tests/ftest/pool/eviction_metrics.yaml b/src/tests/ftest/pool/eviction_metrics.yaml index 1630c5f19c7..3d2d505f969 100644 --- a/src/tests/ftest/pool/eviction_metrics.yaml +++ b/src/tests/ftest/pool/eviction_metrics.yaml @@ -23,12 +23,16 @@ pool: !mux size: 10% mem_ratio: 50 +container: + oclass: S1 + dir_oclass: SX + mdtest: ppn: 32 dfs_oclass: S1 dfs_dir_oclass: SX manager: "MPICH" - # [api, write, read, branching_factor, num_of_dir_files, depth, flags, IL] + processes: 16 test_dir: "/" api: DFS branching_factor: 1 diff --git a/src/tests/ftest/util/mdtest_utils.py b/src/tests/ftest/util/mdtest_utils.py index e996db636b8..7475744cd0a 100644 --- a/src/tests/ftest/util/mdtest_utils.py +++ b/src/tests/ftest/util/mdtest_utils.py @@ -41,25 +41,6 @@ def get_mdtest(test, hosts, manager=None, path=None, slots=None, namespace=MDTES return mdtest -def get_mdtest_container(test, mdtest, pool): - """Create a container to use with mdtest. - - Args: - test (Test): avocado Test object - mdtest (MdtestCommand): mdtest command object - pool (TestPool): pool to create container in - - Returns: - TestContainer: the new container - """ - params = {} - if mdtest.dfs_oclass.value: - params['oclass'] = mdtest.dfs_oclass.value - if mdtest.dfs_dir_oclass.value: - params['dir_oclass'] = mdtest.dfs_dir_oclass.value - return test.get_container(pool, **params) - - def run_mdtest(test, log, hosts, path, slots, pool, container, processes, ppn=None, manager=None, display_space=True, namespace=MDTEST_NAMESPACE, mdtest_params=None): # pylint: disable=too-many-arguments @@ -95,6 +76,32 @@ def run_mdtest(test, log, hosts, path, slots, pool, container, processes, ppn=No return mdtest.run(pool, container, processes, ppn, display_space, False) +def write_mdtest_data(test, container, namespace=MDTEST_NAMESPACE, **mdtest_run_params): + """Write data to the container/dfuse using mdtest. + + Simple method for test classes to use to write data with mdtest. While not required, this is + setup by default to pull in mdtest parameters from the test yaml. + + Args: + test (Test): avocado Test object + container (TestContainer): the container to populate + namespace (str, optional): path to mdtest yaml parameters. Defaults to MDTEST_NAMESPACE. + mdtest_run_params (dict): optional params for the Mdtest.run() command. + + Returns: + Mdtest: the Mdtest object used to populate the container + """ + mdtest = get_mdtest(test, test.hostlist_clients, None, test.workdir, None, namespace) + + if 'processes' not in mdtest_run_params: + mdtest_run_params['processes'] = test.params.get('processes', namespace, None) + elif 'ppn' not in mdtest_run_params: + mdtest_run_params['ppn'] = test.params.get('ppn', namespace, None) + + mdtest.run(container.pool, container, **mdtest_run_params) + return mdtest + + class MdtestCommand(ExecutableCommand): """Defines a object representing a mdtest command.""" @@ -324,8 +331,6 @@ def run(self, pool, container, processes, ppn=None, intercept=None, display_spac self.env["LD_PRELOAD"] = intercept if "D_LOG_MASK" not in self.env: self.env["D_LOG_MASK"] = "INFO" - # if "D_IL_REPORT" not in self.env and il_report is not None: - # self.env["D_IL_REPORT"] = str(il_report) # Pass only processes or ppn to be compatible with previous behavior if ppn is not None: From 96a55d952211ae1c4499702a8162dffd7ec4f238 Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Mon, 8 Dec 2025 09:17:00 -0500 Subject: [PATCH 06/32] Fix mdtest env. Skip-unit-tests: true Skip-fault-injection-test: true Test-tag: EvictionMetrics Signed-off-by: Phil Henderson --- src/tests/ftest/util/mdtest_utils.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/tests/ftest/util/mdtest_utils.py b/src/tests/ftest/util/mdtest_utils.py index 7475744cd0a..1f89b8a368f 100644 --- a/src/tests/ftest/util/mdtest_utils.py +++ b/src/tests/ftest/util/mdtest_utils.py @@ -300,6 +300,22 @@ def get_unique_log(self, container): parts = [self.test_id, container.pool.identifier, container.identifier, label] return '.'.join(['_'.join(parts), 'log']) + def update_daos_params(self, pool, container): + """Set the mdtest parameters for the pool and container. + + Optionally also set the DAOS pool and container environment variables for mdtest. + + Args: + pool (TestPool): the pool to use with the mdtest command + container (TestContainer): the container to use with the mdtest command + """ + self.command.update_params(dfs_pool=pool.identifier, dfs_cont=container.identifier) + + if "mpirun" in str(self.manager) or "srun" in str(self.manager): + self.command.env["DAOS_POOL"] = self.command.dfs_pool.value + self.command.env["DAOS_CONT"] = self.command.dfs_cont.value + self.command.env["IOR_HINT__MPI__romio_daos_obj_class"] = self.command.dfs_oclass.value + def run(self, pool, container, processes, ppn=None, intercept=None, display_space=True, unique_log=True): # pylint: disable=too-many-arguments @@ -325,7 +341,7 @@ def run(self, pool, container, processes, ppn=None, intercept=None, display_spac result = None error_message = None - self.command.update_params(dfs_pool=pool.identifier, dfs_cont=container.identifier) + self.update_daos_params(pool, container) if intercept: self.env["LD_PRELOAD"] = intercept From 7d7112d82953ac30b69e5052cf3e594fb26aa97d Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Mon, 8 Dec 2025 23:57:30 -0500 Subject: [PATCH 07/32] Fix mdtest env again. Skip-unit-tests: true Skip-fault-injection-test: true Test-tag: EvictionMetrics Signed-off-by: Phil Henderson --- src/tests/ftest/util/mdtest_utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/tests/ftest/util/mdtest_utils.py b/src/tests/ftest/util/mdtest_utils.py index 1f89b8a368f..cd9d2a9fa84 100644 --- a/src/tests/ftest/util/mdtest_utils.py +++ b/src/tests/ftest/util/mdtest_utils.py @@ -312,9 +312,9 @@ def update_daos_params(self, pool, container): self.command.update_params(dfs_pool=pool.identifier, dfs_cont=container.identifier) if "mpirun" in str(self.manager) or "srun" in str(self.manager): - self.command.env["DAOS_POOL"] = self.command.dfs_pool.value - self.command.env["DAOS_CONT"] = self.command.dfs_cont.value - self.command.env["IOR_HINT__MPI__romio_daos_obj_class"] = self.command.dfs_oclass.value + self.env["DAOS_POOL"] = self.command.dfs_pool.value + self.env["DAOS_CONT"] = self.command.dfs_cont.value + self.env["IOR_HINT__MPI__romio_daos_obj_class"] = self.command.dfs_oclass.value def run(self, pool, container, processes, ppn=None, intercept=None, display_space=True, unique_log=True): From 2444880c3867626d51e58008428f485aea815399 Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Tue, 9 Dec 2025 09:17:51 -0500 Subject: [PATCH 08/32] Use POSIX container. Skip-unit-tests: true Skip-fault-injection-test: true Test-tag: EvictionMetrics Signed-off-by: Phil Henderson --- src/tests/ftest/pool/eviction_metrics.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/src/tests/ftest/pool/eviction_metrics.yaml b/src/tests/ftest/pool/eviction_metrics.yaml index 3d2d505f969..61a01227dc7 100644 --- a/src/tests/ftest/pool/eviction_metrics.yaml +++ b/src/tests/ftest/pool/eviction_metrics.yaml @@ -24,6 +24,7 @@ pool: !mux mem_ratio: 50 container: + type: POSIX oclass: S1 dir_oclass: SX From a463680f0867d736ee13a6d1d564557aceb30c53 Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Tue, 9 Dec 2025 16:08:52 -0500 Subject: [PATCH 09/32] Update mdtest command Skip-unit-tests: true Skip-fault-injection-test: true Test-tag: EvictionMetrics Signed-off-by: Phil Henderson --- src/tests/ftest/pool/eviction_metrics.py | 27 +++++++++++++++++----- src/tests/ftest/pool/eviction_metrics.yaml | 7 ++---- 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/src/tests/ftest/pool/eviction_metrics.py b/src/tests/ftest/pool/eviction_metrics.py index 876a9bb8a9a..96748192d99 100644 --- a/src/tests/ftest/pool/eviction_metrics.py +++ b/src/tests/ftest/pool/eviction_metrics.py @@ -3,7 +3,9 @@ SPDX-License-Identifier: BSD-2-Clause-Patent """ -from mdtest_utils import write_mdtest_data +import json + +from mdtest_utils import MDTEST_NAMESPACE, run_mdtest from telemetry_test_base import TestWithTelemetry @@ -32,6 +34,13 @@ def test_eviction_metrics(self): self.log_step('Creating a pool (dmg pool create)') pool = self.get_pool(connect=False) + try: + _result = json.loads(pool.dmg.result.stdout) + mem_file_bytes = int(_result["response"]["mem_file_bytes"]) + except Exception as error: # pylint: disable=broad-except + self.fail(f"Error extracting mem_file_bytes for dmg pool create output: {error}") + + self.log_step('Creating a container (dmg container create)') container = self.get_container(pool) self.log_step( @@ -41,13 +50,20 @@ def test_eviction_metrics(self): for label in expected_ranges[metric]: expected_ranges[metric][label] = [0, 0] - self.log_step( - 'Verify pool eviction metrics after pool creation (dmg telemetry metrics query)') + self.log_step('Verify pool eviction metrics after pool creation') if not self.telemetry.verify_data(expected_ranges): self.fail('Pool eviction metrics verification failed after pool creation') self.log_step('Writing data to the pool (mdtest -a DFS)') - write_mdtest_data(self, container) + mdtest_params = { + "read_bytes": mem_file_bytes * 2, + "write_bytes": mem_file_bytes * 2 + } + processes = self.params.get('processes', MDTEST_NAMESPACE, None) + ppn = self.params.get('ppn', MDTEST_NAMESPACE, None) + run_mdtest( + self, self.log, self.hostlist_clients, self.workdir, None, pool, container, processes, + ppn, **mdtest_params) self.log_step( 'Collect pool eviction metrics after writing data (dmg telemetry metrics query)') @@ -59,8 +75,7 @@ def test_eviction_metrics(self): else: expected_ranges[metric][label] = [0, 0] - self.log_step( - 'Verify pool eviction metrics after writing data (dmg telemetry metrics query)') + self.log_step('Verify pool eviction metrics after writing data') if not self.telemetry.verify_data(expected_ranges): self.fail('Pool eviction metrics verification failed after writing data') diff --git a/src/tests/ftest/pool/eviction_metrics.yaml b/src/tests/ftest/pool/eviction_metrics.yaml index 61a01227dc7..1873bad122f 100644 --- a/src/tests/ftest/pool/eviction_metrics.yaml +++ b/src/tests/ftest/pool/eviction_metrics.yaml @@ -21,7 +21,7 @@ pool: !mux size: 10% md_on_ssd_p2: size: 10% - mem_ratio: 50 + mem_ratio: 25 container: type: POSIX @@ -38,7 +38,4 @@ mdtest: api: DFS branching_factor: 1 depth: 20 - flags: '' - num_of_files_dirs: 100 - read_bytes: 4096 - write_bytes: 4096 + num_of_files_dirs: 10000 From 3837dda3211699891ee8bd3e881a3469cd2c86b5 Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Tue, 9 Dec 2025 17:27:43 -0500 Subject: [PATCH 10/32] Fix Skip-unit-tests: true Skip-fault-injection-test: true Test-tag: EvictionMetrics Signed-off-by: Phil Henderson --- src/tests/ftest/pool/eviction_metrics.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/tests/ftest/pool/eviction_metrics.py b/src/tests/ftest/pool/eviction_metrics.py index 96748192d99..d63822cf10f 100644 --- a/src/tests/ftest/pool/eviction_metrics.py +++ b/src/tests/ftest/pool/eviction_metrics.py @@ -39,6 +39,7 @@ def test_eviction_metrics(self): mem_file_bytes = int(_result["response"]["mem_file_bytes"]) except Exception as error: # pylint: disable=broad-except self.fail(f"Error extracting mem_file_bytes for dmg pool create output: {error}") + self.log.debug("%s mem_file_bytes: %s", pool, mem_file_bytes) self.log_step('Creating a container (dmg container create)') container = self.get_container(pool) @@ -55,15 +56,12 @@ def test_eviction_metrics(self): self.fail('Pool eviction metrics verification failed after pool creation') self.log_step('Writing data to the pool (mdtest -a DFS)') - mdtest_params = { - "read_bytes": mem_file_bytes * 2, - "write_bytes": mem_file_bytes * 2 - } processes = self.params.get('processes', MDTEST_NAMESPACE, None) ppn = self.params.get('ppn', MDTEST_NAMESPACE, None) + mdtest_params = {"read_bytes": mem_file_bytes * 2, "write_bytes": mem_file_bytes * 2} run_mdtest( self, self.log, self.hostlist_clients, self.workdir, None, pool, container, processes, - ppn, **mdtest_params) + ppn, None, True, MDTEST_NAMESPACE, mdtest_params) self.log_step( 'Collect pool eviction metrics after writing data (dmg telemetry metrics query)') From a17c79fc2b8a48fbc41ad8e02749314005a9a725 Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Tue, 9 Dec 2025 23:42:40 -0500 Subject: [PATCH 11/32] Set mdtest log file name. Skip-unit-tests: true Skip-fault-injection-test: true Test-tag: EvictionMetrics Signed-off-by: Phil Henderson --- src/tests/ftest/pool/eviction_metrics.py | 4 ++-- src/tests/ftest/util/mdtest_utils.py | 13 +++++++++---- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/tests/ftest/pool/eviction_metrics.py b/src/tests/ftest/pool/eviction_metrics.py index d63822cf10f..86c03f4fed1 100644 --- a/src/tests/ftest/pool/eviction_metrics.py +++ b/src/tests/ftest/pool/eviction_metrics.py @@ -60,8 +60,8 @@ def test_eviction_metrics(self): ppn = self.params.get('ppn', MDTEST_NAMESPACE, None) mdtest_params = {"read_bytes": mem_file_bytes * 2, "write_bytes": mem_file_bytes * 2} run_mdtest( - self, self.log, self.hostlist_clients, self.workdir, None, pool, container, processes, - ppn, None, True, MDTEST_NAMESPACE, mdtest_params) + self, self.hostlist_clients, self.workdir, None, pool, container, processes, ppn, None, + None, True, MDTEST_NAMESPACE, mdtest_params) self.log_step( 'Collect pool eviction metrics after writing data (dmg telemetry metrics query)') diff --git a/src/tests/ftest/util/mdtest_utils.py b/src/tests/ftest/util/mdtest_utils.py index cd9d2a9fa84..f80ec4664d4 100644 --- a/src/tests/ftest/util/mdtest_utils.py +++ b/src/tests/ftest/util/mdtest_utils.py @@ -41,14 +41,13 @@ def get_mdtest(test, hosts, manager=None, path=None, slots=None, namespace=MDTES return mdtest -def run_mdtest(test, log, hosts, path, slots, pool, container, processes, ppn=None, manager=None, - display_space=True, namespace=MDTEST_NAMESPACE, mdtest_params=None): +def run_mdtest(test, hosts, path, slots, pool, container, processes, ppn=None, manager=None, + log_file=None, display_space=True, namespace=MDTEST_NAMESPACE, mdtest_params=None): # pylint: disable=too-many-arguments """Run Mdtest on multiple hosts. Args: test (Test): avocado Test object - log (str): log file. hosts (NodeSet): hosts on which to run the mdtest command path (str): hostfile path. slots (int): hostfile number of slots per host. @@ -59,6 +58,8 @@ def run_mdtest(test, log, hosts, path, slots, pool, container, processes, ppn=No the processes input. Defaults to None. manager (JobManager, optional): command to manage the multi-host execution of mdtest. Defaults to None, which will get a default job manager. + log_file (str, optional): log file name. Defaults to None, which will result in a log file + name containing the test, pool, and container IDs. display_space (bool, optional): Whether to display the pool space. Defaults to True. namespace (str, optional): path to yaml parameters. Defaults to MDTEST_NAMESPACE. mdtest_params (dict, optional): dictionary of MdtestCommand attributes to override from @@ -72,7 +73,9 @@ def run_mdtest(test, log, hosts, path, slots, pool, container, processes, ppn=No """ mdtest = get_mdtest(test, hosts, manager, path, slots, namespace, mdtest_params) - mdtest.update_log_file(log) + if log_file is None: + log_file = f"mdtest_{test.test_id}_{pool.identifier}_{container.identifier}.log" + mdtest.update_log_file(log_file) return mdtest.run(pool, container, processes, ppn, display_space, False) @@ -92,6 +95,8 @@ def write_mdtest_data(test, container, namespace=MDTEST_NAMESPACE, **mdtest_run_ Mdtest: the Mdtest object used to populate the container """ mdtest = get_mdtest(test, test.hostlist_clients, None, test.workdir, None, namespace) + mdtest.update_log_file( + f"mdtest_{test.test_id}_{container.pool.identifier}_{container.identifier}.log") if 'processes' not in mdtest_run_params: mdtest_run_params['processes'] = test.params.get('processes', namespace, None) From 9aeca071620836e0b73e4afe07252016d55c05e8 Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Wed, 10 Dec 2025 08:41:28 -0500 Subject: [PATCH 12/32] Fix call. Skip-unit-tests: true Skip-fault-injection-test: true Test-tag: EvictionMetrics Signed-off-by: Phil Henderson --- src/tests/ftest/pool/eviction_metrics.py | 4 ++-- src/tests/ftest/util/mdtest_utils.py | 8 +++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/tests/ftest/pool/eviction_metrics.py b/src/tests/ftest/pool/eviction_metrics.py index 86c03f4fed1..670193d8fd4 100644 --- a/src/tests/ftest/pool/eviction_metrics.py +++ b/src/tests/ftest/pool/eviction_metrics.py @@ -60,8 +60,8 @@ def test_eviction_metrics(self): ppn = self.params.get('ppn', MDTEST_NAMESPACE, None) mdtest_params = {"read_bytes": mem_file_bytes * 2, "write_bytes": mem_file_bytes * 2} run_mdtest( - self, self.hostlist_clients, self.workdir, None, pool, container, processes, ppn, None, - None, True, MDTEST_NAMESPACE, mdtest_params) + self, self.hostlist_clients, self.workdir, None, pool, container, processes, ppn, + mdtest_params=mdtest_params) self.log_step( 'Collect pool eviction metrics after writing data (dmg telemetry metrics query)') diff --git a/src/tests/ftest/util/mdtest_utils.py b/src/tests/ftest/util/mdtest_utils.py index f80ec4664d4..fdc10948ad3 100644 --- a/src/tests/ftest/util/mdtest_utils.py +++ b/src/tests/ftest/util/mdtest_utils.py @@ -42,7 +42,8 @@ def get_mdtest(test, hosts, manager=None, path=None, slots=None, namespace=MDTES def run_mdtest(test, hosts, path, slots, pool, container, processes, ppn=None, manager=None, - log_file=None, display_space=True, namespace=MDTEST_NAMESPACE, mdtest_params=None): + log_file=None, intercept=None, display_space=True, namespace=MDTEST_NAMESPACE, + mdtest_params=None): # pylint: disable=too-many-arguments """Run Mdtest on multiple hosts. @@ -60,6 +61,7 @@ def run_mdtest(test, hosts, path, slots, pool, container, processes, ppn=None, m Defaults to None, which will get a default job manager. log_file (str, optional): log file name. Defaults to None, which will result in a log file name containing the test, pool, and container IDs. + intercept (str, optional): path to interception library. Defaults to None. display_space (bool, optional): Whether to display the pool space. Defaults to True. namespace (str, optional): path to yaml parameters. Defaults to MDTEST_NAMESPACE. mdtest_params (dict, optional): dictionary of MdtestCommand attributes to override from @@ -74,9 +76,9 @@ def run_mdtest(test, hosts, path, slots, pool, container, processes, ppn=None, m """ mdtest = get_mdtest(test, hosts, manager, path, slots, namespace, mdtest_params) if log_file is None: - log_file = f"mdtest_{test.test_id}_{pool.identifier}_{container.identifier}.log" + log_file = mdtest.get_unique_log(container) mdtest.update_log_file(log_file) - return mdtest.run(pool, container, processes, ppn, display_space, False) + return mdtest.run(pool, container, processes, ppn, intercept, display_space, False) def write_mdtest_data(test, container, namespace=MDTEST_NAMESPACE, **mdtest_run_params): From 1a4fa3fe0871cee1dc9a79e684fa9b41c85d3266 Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Wed, 10 Dec 2025 15:47:47 -0500 Subject: [PATCH 13/32] Update mdtest params. Skip-unit-tests: true Skip-fault-injection-test: true Test-tag: EvictionMetrics Signed-off-by: Phil Henderson --- src/tests/ftest/pool/eviction_metrics.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/tests/ftest/pool/eviction_metrics.yaml b/src/tests/ftest/pool/eviction_metrics.yaml index 1873bad122f..ea64b85acbd 100644 --- a/src/tests/ftest/pool/eviction_metrics.yaml +++ b/src/tests/ftest/pool/eviction_metrics.yaml @@ -29,7 +29,6 @@ container: dir_oclass: SX mdtest: - ppn: 32 dfs_oclass: S1 dfs_dir_oclass: SX manager: "MPICH" @@ -38,4 +37,4 @@ mdtest: api: DFS branching_factor: 1 depth: 20 - num_of_files_dirs: 10000 + num_of_files_dirs: 1000 From 295c098573fbd0b3718afeb37aedcd9b071e619e Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Wed, 10 Dec 2025 23:41:11 -0500 Subject: [PATCH 14/32] Cleanup. Skip-unit-tests: true Skip-fault-injection-test: true Test-tag: EvictionMetrics Signed-off-by: Phil Henderson --- src/tests/ftest/pool/eviction_metrics.py | 6 +++-- src/tests/ftest/pool/eviction_metrics.yaml | 3 ++- src/tests/ftest/util/mdtest_utils.py | 26 +++++++--------------- 3 files changed, 14 insertions(+), 21 deletions(-) diff --git a/src/tests/ftest/pool/eviction_metrics.py b/src/tests/ftest/pool/eviction_metrics.py index 670193d8fd4..34f78869dd9 100644 --- a/src/tests/ftest/pool/eviction_metrics.py +++ b/src/tests/ftest/pool/eviction_metrics.py @@ -4,6 +4,7 @@ SPDX-License-Identifier: BSD-2-Clause-Patent """ import json +import math from mdtest_utils import MDTEST_NAMESPACE, run_mdtest from telemetry_test_base import TestWithTelemetry @@ -57,10 +58,11 @@ def test_eviction_metrics(self): self.log_step('Writing data to the pool (mdtest -a DFS)') processes = self.params.get('processes', MDTEST_NAMESPACE, None) + read_bytes = self.params.get('read_bytes', MDTEST_NAMESPACE, None) ppn = self.params.get('ppn', MDTEST_NAMESPACE, None) - mdtest_params = {"read_bytes": mem_file_bytes * 2, "write_bytes": mem_file_bytes * 2} + mdtest_params = {"num_of_files_dirs": math.ceil(mem_file_bytes / read_bytes) + 1} run_mdtest( - self, self.hostlist_clients, self.workdir, None, pool, container, processes, ppn, + self, self.hostlist_clients, self.workdir, None, container, processes, ppn, mdtest_params=mdtest_params) self.log_step( diff --git a/src/tests/ftest/pool/eviction_metrics.yaml b/src/tests/ftest/pool/eviction_metrics.yaml index ea64b85acbd..23c674a0f1d 100644 --- a/src/tests/ftest/pool/eviction_metrics.yaml +++ b/src/tests/ftest/pool/eviction_metrics.yaml @@ -37,4 +37,5 @@ mdtest: api: DFS branching_factor: 1 depth: 20 - num_of_files_dirs: 1000 + write_bytes: 4096 + read_bytes: 4096 diff --git a/src/tests/ftest/util/mdtest_utils.py b/src/tests/ftest/util/mdtest_utils.py index fdc10948ad3..1a736649d39 100644 --- a/src/tests/ftest/util/mdtest_utils.py +++ b/src/tests/ftest/util/mdtest_utils.py @@ -41,7 +41,7 @@ def get_mdtest(test, hosts, manager=None, path=None, slots=None, namespace=MDTES return mdtest -def run_mdtest(test, hosts, path, slots, pool, container, processes, ppn=None, manager=None, +def run_mdtest(test, hosts, path, slots, container, processes, ppn=None, manager=None, log_file=None, intercept=None, display_space=True, namespace=MDTEST_NAMESPACE, mdtest_params=None): # pylint: disable=too-many-arguments @@ -52,7 +52,6 @@ def run_mdtest(test, hosts, path, slots, pool, container, processes, ppn=None, m hosts (NodeSet): hosts on which to run the mdtest command path (str): hostfile path. slots (int): hostfile number of slots per host. - pool (TestPool): DAOS test pool object container (TestContainer): DAOS test container object. processes (int): number of processes to run ppn (int, optional): number of processes per node to run. If specified it will override @@ -78,7 +77,7 @@ def run_mdtest(test, hosts, path, slots, pool, container, processes, ppn=None, m if log_file is None: log_file = mdtest.get_unique_log(container) mdtest.update_log_file(log_file) - return mdtest.run(pool, container, processes, ppn, intercept, display_space, False) + return mdtest.run(container, processes, ppn, intercept, display_space) def write_mdtest_data(test, container, namespace=MDTEST_NAMESPACE, **mdtest_run_params): @@ -97,15 +96,14 @@ def write_mdtest_data(test, container, namespace=MDTEST_NAMESPACE, **mdtest_run_ Mdtest: the Mdtest object used to populate the container """ mdtest = get_mdtest(test, test.hostlist_clients, None, test.workdir, None, namespace) - mdtest.update_log_file( - f"mdtest_{test.test_id}_{container.pool.identifier}_{container.identifier}.log") + mdtest.update_log_file(mdtest.get_unique_log(container)) if 'processes' not in mdtest_run_params: mdtest_run_params['processes'] = test.params.get('processes', namespace, None) elif 'ppn' not in mdtest_run_params: mdtest_run_params['ppn'] = test.params.get('ppn', namespace, None) - mdtest.run(container.pool, container, **mdtest_run_params) + mdtest.run(container, **mdtest_run_params) return mdtest @@ -323,21 +321,16 @@ def update_daos_params(self, pool, container): self.env["DAOS_CONT"] = self.command.dfs_cont.value self.env["IOR_HINT__MPI__romio_daos_obj_class"] = self.command.dfs_oclass.value - def run(self, pool, container, processes, ppn=None, intercept=None, display_space=True, - unique_log=True): - # pylint: disable=too-many-arguments + def run(self, container, processes, ppn=None, intercept=None, display_space=True): """Run mdtest. Args: - pool (TestPool): DAOS test pool object container (TestContainer): DAOS test container object. processes (int): number of processes to run ppn (int, optional): number of processes per node to run. If specified it will override the processes input. Defaults to None. intercept (str, optional): path to interception library. Defaults to None. display_space (bool, optional): Whether to display the pool space. Defaults to True. - unique_log (bool, optional): whether or not to update the log file with a new unique log - file name. Defaults to True. Raises: CommandFailure: if there is an error running the mdtest command @@ -348,7 +341,7 @@ def run(self, pool, container, processes, ppn=None, intercept=None, display_spac result = None error_message = None - self.update_daos_params(pool, container) + self.update_daos_params(container.pool, container) if intercept: self.env["LD_PRELOAD"] = intercept @@ -363,12 +356,9 @@ def run(self, pool, container, processes, ppn=None, intercept=None, display_spac self.manager.assign_environment(self.env) - if unique_log: - self.update_log_file(self.get_unique_log(container)) - try: if display_space: - pool.display_space() + container.pool.display_space() result = self.manager.run() except CommandFailure as error: @@ -376,7 +366,7 @@ def run(self, pool, container, processes, ppn=None, intercept=None, display_spac finally: if not self.manager.run_as_subprocess and display_space: - pool.display_space() + container.pool.display_space() if error_message: raise CommandFailure(error_message) From a9ca47c25f9a93501babc6b3c89939376367fcb3 Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Thu, 11 Dec 2025 07:21:50 -0500 Subject: [PATCH 15/32] Increase mdtest timeout Skip-unit-tests: true Skip-fault-injection-test: true Test-tag: EvictionMetrics Signed-off-by: Phil Henderson --- src/tests/ftest/pool/eviction_metrics.py | 4 +++- src/tests/ftest/pool/eviction_metrics.yaml | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/tests/ftest/pool/eviction_metrics.py b/src/tests/ftest/pool/eviction_metrics.py index 34f78869dd9..565e6ba83df 100644 --- a/src/tests/ftest/pool/eviction_metrics.py +++ b/src/tests/ftest/pool/eviction_metrics.py @@ -6,6 +6,7 @@ import json import math +from job_manager_utils import get_job_manager from mdtest_utils import MDTEST_NAMESPACE, run_mdtest from telemetry_test_base import TestWithTelemetry @@ -57,12 +58,13 @@ def test_eviction_metrics(self): self.fail('Pool eviction metrics verification failed after pool creation') self.log_step('Writing data to the pool (mdtest -a DFS)') + manager = get_job_manager(self, subprocess=False, timeout=180) processes = self.params.get('processes', MDTEST_NAMESPACE, None) read_bytes = self.params.get('read_bytes', MDTEST_NAMESPACE, None) ppn = self.params.get('ppn', MDTEST_NAMESPACE, None) mdtest_params = {"num_of_files_dirs": math.ceil(mem_file_bytes / read_bytes) + 1} run_mdtest( - self, self.hostlist_clients, self.workdir, None, container, processes, ppn, + self, self.hostlist_clients, self.workdir, None, container, processes, ppn, manager, mdtest_params=mdtest_params) self.log_step( diff --git a/src/tests/ftest/pool/eviction_metrics.yaml b/src/tests/ftest/pool/eviction_metrics.yaml index 23c674a0f1d..716791f2323 100644 --- a/src/tests/ftest/pool/eviction_metrics.yaml +++ b/src/tests/ftest/pool/eviction_metrics.yaml @@ -5,7 +5,7 @@ hosts: test_servers: 2 test_clients: 2 -timeout: 300 +timeout: 400 server_config: name: daos_server From f8295196d184cb9dfaba2baa9407037839d3fd20 Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Thu, 11 Dec 2025 08:40:49 -0500 Subject: [PATCH 16/32] Further iuncrease tiomeout. Skip-unit-tests: true Skip-fault-injection-test: true Test-tag: EvictionMetrics Signed-off-by: Phil Henderson --- src/tests/ftest/pool/eviction_metrics.py | 2 +- src/tests/ftest/pool/eviction_metrics.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tests/ftest/pool/eviction_metrics.py b/src/tests/ftest/pool/eviction_metrics.py index 565e6ba83df..3b134f92589 100644 --- a/src/tests/ftest/pool/eviction_metrics.py +++ b/src/tests/ftest/pool/eviction_metrics.py @@ -58,7 +58,7 @@ def test_eviction_metrics(self): self.fail('Pool eviction metrics verification failed after pool creation') self.log_step('Writing data to the pool (mdtest -a DFS)') - manager = get_job_manager(self, subprocess=False, timeout=180) + manager = get_job_manager(self, subprocess=False, timeout=1600) processes = self.params.get('processes', MDTEST_NAMESPACE, None) read_bytes = self.params.get('read_bytes', MDTEST_NAMESPACE, None) ppn = self.params.get('ppn', MDTEST_NAMESPACE, None) diff --git a/src/tests/ftest/pool/eviction_metrics.yaml b/src/tests/ftest/pool/eviction_metrics.yaml index 716791f2323..e44ad6b4ec3 100644 --- a/src/tests/ftest/pool/eviction_metrics.yaml +++ b/src/tests/ftest/pool/eviction_metrics.yaml @@ -5,7 +5,7 @@ hosts: test_servers: 2 test_clients: 2 -timeout: 400 +timeout: 1600 server_config: name: daos_server From 1a5c295b3f96ad27f204ed8877d3de33305b3c84 Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Thu, 11 Dec 2025 10:38:31 -0500 Subject: [PATCH 17/32] Increase pool size. Skip-unit-tests: true Skip-fault-injection-test: true Test-tag: EvictionMetrics Signed-off-by: Phil Henderson --- src/tests/ftest/pool/eviction_metrics.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tests/ftest/pool/eviction_metrics.yaml b/src/tests/ftest/pool/eviction_metrics.yaml index e44ad6b4ec3..f7ee0b1f609 100644 --- a/src/tests/ftest/pool/eviction_metrics.yaml +++ b/src/tests/ftest/pool/eviction_metrics.yaml @@ -18,9 +18,9 @@ server_config: pool: !mux default: - size: 10% + size: 100% md_on_ssd_p2: - size: 10% + size: 100% mem_ratio: 25 container: From 15e5b3233b6424f1ff86453c495abf442fa0cccb Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Thu, 11 Dec 2025 13:23:07 -0500 Subject: [PATCH 18/32] Further increase timeout Skip-unit-tests: true Skip-fault-injection-test: true Test-tag: EvictionMetrics Signed-off-by: Phil Henderson --- src/tests/ftest/pool/eviction_metrics.py | 2 +- src/tests/ftest/pool/eviction_metrics.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tests/ftest/pool/eviction_metrics.py b/src/tests/ftest/pool/eviction_metrics.py index 3b134f92589..87061c79ccd 100644 --- a/src/tests/ftest/pool/eviction_metrics.py +++ b/src/tests/ftest/pool/eviction_metrics.py @@ -58,7 +58,7 @@ def test_eviction_metrics(self): self.fail('Pool eviction metrics verification failed after pool creation') self.log_step('Writing data to the pool (mdtest -a DFS)') - manager = get_job_manager(self, subprocess=False, timeout=1600) + manager = get_job_manager(self, subprocess=False, timeout=None) processes = self.params.get('processes', MDTEST_NAMESPACE, None) read_bytes = self.params.get('read_bytes', MDTEST_NAMESPACE, None) ppn = self.params.get('ppn', MDTEST_NAMESPACE, None) diff --git a/src/tests/ftest/pool/eviction_metrics.yaml b/src/tests/ftest/pool/eviction_metrics.yaml index f7ee0b1f609..c0ef5d6e498 100644 --- a/src/tests/ftest/pool/eviction_metrics.yaml +++ b/src/tests/ftest/pool/eviction_metrics.yaml @@ -5,7 +5,7 @@ hosts: test_servers: 2 test_clients: 2 -timeout: 1600 +timeout: 16000 server_config: name: daos_server From 1438e0972a7375d1d042860955b407d1bde89a8e Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Fri, 12 Dec 2025 08:47:51 -0500 Subject: [PATCH 19/32] Adding more clients. Skip-unit-tests: true Skip-fault-injection-test: true Test-tag: EvictionMetrics Signed-off-by: Phil Henderson --- src/tests/ftest/pool/eviction_metrics.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/tests/ftest/pool/eviction_metrics.yaml b/src/tests/ftest/pool/eviction_metrics.yaml index c0ef5d6e498..266e072eb2c 100644 --- a/src/tests/ftest/pool/eviction_metrics.yaml +++ b/src/tests/ftest/pool/eviction_metrics.yaml @@ -2,8 +2,8 @@ launch: !filter-only : /run/pool/default # yamllint disable-line rule:colons hosts: - test_servers: 2 - test_clients: 2 + test_servers: 1 + test_clients: 3 timeout: 16000 @@ -32,10 +32,10 @@ mdtest: dfs_oclass: S1 dfs_dir_oclass: SX manager: "MPICH" - processes: 16 + ppn: 32 test_dir: "/" api: DFS branching_factor: 1 depth: 20 - write_bytes: 4096 - read_bytes: 4096 + write_bytes: 8192 + read_bytes: 8192 From 553dc6be8daa77cf713426dd93ebcd6eb24c3918 Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Tue, 16 Dec 2025 09:17:29 -0500 Subject: [PATCH 20/32] Adding more clients; moving to HW Large Skip-unit-tests: true Skip-fault-injection-test: true Test-tag: EvictionMetrics Signed-off-by: Phil Henderson --- src/tests/ftest/pool/eviction_metrics.py | 2 +- src/tests/ftest/pool/eviction_metrics.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tests/ftest/pool/eviction_metrics.py b/src/tests/ftest/pool/eviction_metrics.py index 87061c79ccd..5fb24458f84 100644 --- a/src/tests/ftest/pool/eviction_metrics.py +++ b/src/tests/ftest/pool/eviction_metrics.py @@ -28,7 +28,7 @@ def test_eviction_metrics(self): 5. Verify page eviction :avocado: tags=all,daily_regression - :avocado: tags=hw,medium + :avocado: tags=hw,large :avocado: tags=pool :avocado: tags=EvictionMetrics,test_eviction_metrics """ diff --git a/src/tests/ftest/pool/eviction_metrics.yaml b/src/tests/ftest/pool/eviction_metrics.yaml index 266e072eb2c..6ed938b6cac 100644 --- a/src/tests/ftest/pool/eviction_metrics.yaml +++ b/src/tests/ftest/pool/eviction_metrics.yaml @@ -3,7 +3,7 @@ launch: hosts: test_servers: 1 - test_clients: 3 + test_clients: 7 timeout: 16000 From ecbf012c3e3a9a5eab806affcc88758bc5a908d6 Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Tue, 16 Dec 2025 23:57:45 -0500 Subject: [PATCH 21/32] Debug. Skip-unit-tests: true Skip-fault-injection-test: true Test-tag: EvictionMetrics Signed-off-by: Phil Henderson --- src/tests/ftest/pool/eviction_metrics.py | 2 ++ src/tests/ftest/pool/eviction_metrics.yaml | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/tests/ftest/pool/eviction_metrics.py b/src/tests/ftest/pool/eviction_metrics.py index 5fb24458f84..263da7cd9da 100644 --- a/src/tests/ftest/pool/eviction_metrics.py +++ b/src/tests/ftest/pool/eviction_metrics.py @@ -63,6 +63,8 @@ def test_eviction_metrics(self): read_bytes = self.params.get('read_bytes', MDTEST_NAMESPACE, None) ppn = self.params.get('ppn', MDTEST_NAMESPACE, None) mdtest_params = {"num_of_files_dirs": math.ceil(mem_file_bytes / read_bytes) + 1} + # Debug + mdtest_params["num_of_files_dirs"] /= 1000 run_mdtest( self, self.hostlist_clients, self.workdir, None, container, processes, ppn, manager, mdtest_params=mdtest_params) diff --git a/src/tests/ftest/pool/eviction_metrics.yaml b/src/tests/ftest/pool/eviction_metrics.yaml index 6ed938b6cac..66a0ca51bd6 100644 --- a/src/tests/ftest/pool/eviction_metrics.yaml +++ b/src/tests/ftest/pool/eviction_metrics.yaml @@ -37,5 +37,5 @@ mdtest: api: DFS branching_factor: 1 depth: 20 - write_bytes: 8192 - read_bytes: 8192 + write_bytes: 4096 + read_bytes: 4096 From aac104c16b44f2d2b269a75611f61effb2367aea Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Fri, 19 Dec 2025 01:33:05 -0500 Subject: [PATCH 22/32] Updates. Skip-unit-tests: true Skip-fault-injection-test: true Test-tag: EvictionMetrics Signed-off-by: Phil Henderson --- src/tests/ftest/pool/eviction_metrics.py | 15 +++++++++++---- src/tests/ftest/pool/eviction_metrics.yaml | 1 + 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/tests/ftest/pool/eviction_metrics.py b/src/tests/ftest/pool/eviction_metrics.py index 263da7cd9da..d9f14178880 100644 --- a/src/tests/ftest/pool/eviction_metrics.py +++ b/src/tests/ftest/pool/eviction_metrics.py @@ -51,7 +51,14 @@ def test_eviction_metrics(self): expected_ranges = self.telemetry.collect_data(evict_metrics) for metric in sorted(expected_ranges): for label in expected_ranges[metric]: - expected_ranges[metric][label] = [0, 0] + if pool.mem_ratio.value is not None and label.endswith('_hit'): + expected_ranges[metric][label] = [0, 100] + elif pool.mem_ratio.value is not None and label.endswith('_miss'): + expected_ranges[metric][label] = [0, 5] + elif pool.mem_ratio.value is not None and label.endswith('_ne'): + expected_ranges[metric][label] = [0, 5] + else: + expected_ranges[metric][label] = [0, 0] self.log_step('Verify pool eviction metrics after pool creation') if not self.telemetry.verify_data(expected_ranges): @@ -74,10 +81,10 @@ def test_eviction_metrics(self): expected_ranges = self.telemetry.collect_data(evict_metrics) for metric in sorted(expected_ranges): for label in expected_ranges[metric]: - if self.server_managers[0].manager.job.using_control_metadata: - expected_ranges[metric][label] = [1, 1] - else: + if pool.mem_ratio.value is None: expected_ranges[metric][label] = [0, 0] + else: + expected_ranges[metric][label] = [1, 1000] self.log_step('Verify pool eviction metrics after writing data') if not self.telemetry.verify_data(expected_ranges): diff --git a/src/tests/ftest/pool/eviction_metrics.yaml b/src/tests/ftest/pool/eviction_metrics.yaml index 66a0ca51bd6..bf1a4b0c895 100644 --- a/src/tests/ftest/pool/eviction_metrics.yaml +++ b/src/tests/ftest/pool/eviction_metrics.yaml @@ -35,6 +35,7 @@ mdtest: ppn: 32 test_dir: "/" api: DFS + flags: "-C -F -P -G 27 -N 1 -Y -v -u -L" branching_factor: 1 depth: 20 write_bytes: 4096 From b2df76006e15338cee263fa1a92f37b2bc80f797 Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Fri, 19 Dec 2025 11:51:21 -0500 Subject: [PATCH 23/32] Add debug. Skip-unit-tests: true Skip-fault-injection-test: true Test-tag: EvictionMetrics Signed-off-by: Phil Henderson --- src/tests/ftest/pool/eviction_metrics.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/tests/ftest/pool/eviction_metrics.py b/src/tests/ftest/pool/eviction_metrics.py index d9f14178880..1e17ac85a4c 100644 --- a/src/tests/ftest/pool/eviction_metrics.py +++ b/src/tests/ftest/pool/eviction_metrics.py @@ -41,7 +41,8 @@ def test_eviction_metrics(self): mem_file_bytes = int(_result["response"]["mem_file_bytes"]) except Exception as error: # pylint: disable=broad-except self.fail(f"Error extracting mem_file_bytes for dmg pool create output: {error}") - self.log.debug("%s mem_file_bytes: %s", pool, mem_file_bytes) + self.log.debug("%s mem_file_bytes: %s", pool, mem_file_bytes) + self.log.debug("%s mem_ratio.value: %s", pool, pool.mem_ratio.value) self.log_step('Creating a container (dmg container create)') container = self.get_container(pool) @@ -59,6 +60,7 @@ def test_eviction_metrics(self): expected_ranges[metric][label] = [0, 5] else: expected_ranges[metric][label] = [0, 0] + self.log.debug("%s expected_ranges: %s", pool, expected_ranges) self.log_step('Verify pool eviction metrics after pool creation') if not self.telemetry.verify_data(expected_ranges): @@ -85,6 +87,7 @@ def test_eviction_metrics(self): expected_ranges[metric][label] = [0, 0] else: expected_ranges[metric][label] = [1, 1000] + self.log.debug("%s expected_ranges: %s", pool, expected_ranges) self.log_step('Verify pool eviction metrics after writing data') if not self.telemetry.verify_data(expected_ranges): From 76a4fa3469e8522538a39471466888f4343ab5b3 Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Tue, 6 Jan 2026 12:02:32 -0500 Subject: [PATCH 24/32] Test fixes. Skip-unit-tests: true Skip-fault-injection-test: true Test-tag: EvictionMetrics Signed-off-by: Phil Henderson --- src/tests/ftest/pool/eviction_metrics.py | 8 ++++---- src/tests/ftest/pool/eviction_metrics.yaml | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/tests/ftest/pool/eviction_metrics.py b/src/tests/ftest/pool/eviction_metrics.py index 1e17ac85a4c..22fad224774 100644 --- a/src/tests/ftest/pool/eviction_metrics.py +++ b/src/tests/ftest/pool/eviction_metrics.py @@ -1,5 +1,5 @@ """ - (C) Copyright 2025 Hewlett Packard Enterprise Development LP + (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -52,11 +52,11 @@ def test_eviction_metrics(self): expected_ranges = self.telemetry.collect_data(evict_metrics) for metric in sorted(expected_ranges): for label in expected_ranges[metric]: - if pool.mem_ratio.value is not None and label.endswith('_hit'): + if pool.mem_ratio.value is not None and metric.endswith('_hit'): expected_ranges[metric][label] = [0, 100] - elif pool.mem_ratio.value is not None and label.endswith('_miss'): + elif pool.mem_ratio.value is not None and metric.endswith('_miss'): expected_ranges[metric][label] = [0, 5] - elif pool.mem_ratio.value is not None and label.endswith('_ne'): + elif pool.mem_ratio.value is not None and metric.endswith('_ne'): expected_ranges[metric][label] = [0, 5] else: expected_ranges[metric][label] = [0, 0] diff --git a/src/tests/ftest/pool/eviction_metrics.yaml b/src/tests/ftest/pool/eviction_metrics.yaml index bf1a4b0c895..28861455eeb 100644 --- a/src/tests/ftest/pool/eviction_metrics.yaml +++ b/src/tests/ftest/pool/eviction_metrics.yaml @@ -31,11 +31,12 @@ container: mdtest: dfs_oclass: S1 dfs_dir_oclass: SX + dfs_destroy: False manager: "MPICH" ppn: 32 test_dir: "/" api: DFS - flags: "-C -F -P -G 27 -N 1 -Y -v -u -L" + flags: "-C -F -G 27 -N 1 -Y -u -L" branching_factor: 1 depth: 20 write_bytes: 4096 From d661c2dea3375f848f09b0058cf663e71cb242ef Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Wed, 7 Jan 2026 00:13:08 -0500 Subject: [PATCH 25/32] Move to HW Medium Skip-unit-tests: true Skip-fault-injection-test: true Test-tag: EvictionMetrics Signed-off-by: Phil Henderson --- src/tests/ftest/pool/eviction_metrics.py | 2 +- src/tests/ftest/pool/eviction_metrics.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tests/ftest/pool/eviction_metrics.py b/src/tests/ftest/pool/eviction_metrics.py index 22fad224774..ee0eae51c5b 100644 --- a/src/tests/ftest/pool/eviction_metrics.py +++ b/src/tests/ftest/pool/eviction_metrics.py @@ -28,7 +28,7 @@ def test_eviction_metrics(self): 5. Verify page eviction :avocado: tags=all,daily_regression - :avocado: tags=hw,large + :avocado: tags=hw,medium :avocado: tags=pool :avocado: tags=EvictionMetrics,test_eviction_metrics """ diff --git a/src/tests/ftest/pool/eviction_metrics.yaml b/src/tests/ftest/pool/eviction_metrics.yaml index 28861455eeb..0f6578b421e 100644 --- a/src/tests/ftest/pool/eviction_metrics.yaml +++ b/src/tests/ftest/pool/eviction_metrics.yaml @@ -3,7 +3,7 @@ launch: hosts: test_servers: 1 - test_clients: 7 + test_clients: 3 timeout: 16000 From db2cada28e7470c18e4c4fec2f8133681f701e67 Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Thu, 8 Jan 2026 07:58:07 -0500 Subject: [PATCH 26/32] Remove mdtest depth arg Skip-unit-tests: true Skip-fault-injection-test: true Test-tag: EvictionMetrics Signed-off-by: Phil Henderson --- src/tests/ftest/pool/eviction_metrics.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/src/tests/ftest/pool/eviction_metrics.yaml b/src/tests/ftest/pool/eviction_metrics.yaml index 0f6578b421e..41c5c41c3d6 100644 --- a/src/tests/ftest/pool/eviction_metrics.yaml +++ b/src/tests/ftest/pool/eviction_metrics.yaml @@ -38,6 +38,5 @@ mdtest: api: DFS flags: "-C -F -G 27 -N 1 -Y -u -L" branching_factor: 1 - depth: 20 write_bytes: 4096 read_bytes: 4096 From f2b1552e88be8c847bdee5b738b00413b2ccfe28 Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Thu, 8 Jan 2026 16:16:59 -0500 Subject: [PATCH 27/32] Remove debug. Skip-unit-tests: true Skip-fault-injection-test: true Test-tag: EvictionMetrics Signed-off-by: Phil Henderson --- src/tests/ftest/pool/eviction_metrics.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/tests/ftest/pool/eviction_metrics.py b/src/tests/ftest/pool/eviction_metrics.py index ee0eae51c5b..28041f69e65 100644 --- a/src/tests/ftest/pool/eviction_metrics.py +++ b/src/tests/ftest/pool/eviction_metrics.py @@ -72,8 +72,6 @@ def test_eviction_metrics(self): read_bytes = self.params.get('read_bytes', MDTEST_NAMESPACE, None) ppn = self.params.get('ppn', MDTEST_NAMESPACE, None) mdtest_params = {"num_of_files_dirs": math.ceil(mem_file_bytes / read_bytes) + 1} - # Debug - mdtest_params["num_of_files_dirs"] /= 1000 run_mdtest( self, self.hostlist_clients, self.workdir, None, container, processes, ppn, manager, mdtest_params=mdtest_params) @@ -86,7 +84,7 @@ def test_eviction_metrics(self): if pool.mem_ratio.value is None: expected_ranges[metric][label] = [0, 0] else: - expected_ranges[metric][label] = [1, 1000] + expected_ranges[metric][label] = [1, 10000000] self.log.debug("%s expected_ranges: %s", pool, expected_ranges) self.log_step('Verify pool eviction metrics after writing data') From 06a8c40d92cadec060165a7aa4ea21db1718db3c Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Tue, 13 Jan 2026 23:35:45 -0500 Subject: [PATCH 28/32] Debug Skip-unit-tests: true Skip-fault-injection-test: true Test-tag: EvictionMetrics Signed-off-by: Phil Henderson --- src/tests/ftest/pool/eviction_metrics.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/tests/ftest/pool/eviction_metrics.py b/src/tests/ftest/pool/eviction_metrics.py index 28041f69e65..bb6a4d7ff56 100644 --- a/src/tests/ftest/pool/eviction_metrics.py +++ b/src/tests/ftest/pool/eviction_metrics.py @@ -72,6 +72,10 @@ def test_eviction_metrics(self): read_bytes = self.params.get('read_bytes', MDTEST_NAMESPACE, None) ppn = self.params.get('ppn', MDTEST_NAMESPACE, None) mdtest_params = {"num_of_files_dirs": math.ceil(mem_file_bytes / read_bytes) + 1} + + # Debug + mdtest_params["num_of_files_dirs"] /= 100 + run_mdtest( self, self.hostlist_clients, self.workdir, None, container, processes, ppn, manager, mdtest_params=mdtest_params) From 2f08c8ace24e742ca18183bbd4e9112236a0023a Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Wed, 14 Jan 2026 22:48:34 -0500 Subject: [PATCH 29/32] Smaller write size Skip-unit-tests: true Skip-fault-injection-test: true Test-tag: EvictionMetrics Signed-off-by: Phil Henderson --- src/tests/ftest/pool/eviction_metrics.py | 6 +++--- src/tests/ftest/pool/eviction_metrics.yaml | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/tests/ftest/pool/eviction_metrics.py b/src/tests/ftest/pool/eviction_metrics.py index bb6a4d7ff56..e00bb17ccbd 100644 --- a/src/tests/ftest/pool/eviction_metrics.py +++ b/src/tests/ftest/pool/eviction_metrics.py @@ -69,12 +69,12 @@ def test_eviction_metrics(self): self.log_step('Writing data to the pool (mdtest -a DFS)') manager = get_job_manager(self, subprocess=False, timeout=None) processes = self.params.get('processes', MDTEST_NAMESPACE, None) - read_bytes = self.params.get('read_bytes', MDTEST_NAMESPACE, None) + write_bytes = self.params.get('write_bytes', MDTEST_NAMESPACE, None) ppn = self.params.get('ppn', MDTEST_NAMESPACE, None) - mdtest_params = {"num_of_files_dirs": math.ceil(mem_file_bytes / read_bytes) + 1} + mdtest_params = {"num_of_files_dirs": math.ceil(mem_file_bytes / write_bytes) + 1} # Debug - mdtest_params["num_of_files_dirs"] /= 100 + mdtest_params["num_of_files_dirs"] /= 500 run_mdtest( self, self.hostlist_clients, self.workdir, None, container, processes, ppn, manager, diff --git a/src/tests/ftest/pool/eviction_metrics.yaml b/src/tests/ftest/pool/eviction_metrics.yaml index 41c5c41c3d6..cdce64cd3ed 100644 --- a/src/tests/ftest/pool/eviction_metrics.yaml +++ b/src/tests/ftest/pool/eviction_metrics.yaml @@ -38,5 +38,5 @@ mdtest: api: DFS flags: "-C -F -G 27 -N 1 -Y -u -L" branching_factor: 1 - write_bytes: 4096 - read_bytes: 4096 + write_bytes: 1024 + read_bytes: 1024 From 2f54bd5014271bd6c6f1fac7ec7e0c0b772c398f Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Thu, 15 Jan 2026 23:52:59 -0500 Subject: [PATCH 30/32] Reducing pool size. Skip-unit-tests: true Skip-fault-injection-test: true Test-tag: EvictionMetrics Signed-off-by: Phil Henderson --- src/tests/ftest/pool/eviction_metrics.py | 4 ---- src/tests/ftest/pool/eviction_metrics.yaml | 4 ++-- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/src/tests/ftest/pool/eviction_metrics.py b/src/tests/ftest/pool/eviction_metrics.py index e00bb17ccbd..4f487500bac 100644 --- a/src/tests/ftest/pool/eviction_metrics.py +++ b/src/tests/ftest/pool/eviction_metrics.py @@ -72,10 +72,6 @@ def test_eviction_metrics(self): write_bytes = self.params.get('write_bytes', MDTEST_NAMESPACE, None) ppn = self.params.get('ppn', MDTEST_NAMESPACE, None) mdtest_params = {"num_of_files_dirs": math.ceil(mem_file_bytes / write_bytes) + 1} - - # Debug - mdtest_params["num_of_files_dirs"] /= 500 - run_mdtest( self, self.hostlist_clients, self.workdir, None, container, processes, ppn, manager, mdtest_params=mdtest_params) diff --git a/src/tests/ftest/pool/eviction_metrics.yaml b/src/tests/ftest/pool/eviction_metrics.yaml index cdce64cd3ed..b90e3515daa 100644 --- a/src/tests/ftest/pool/eviction_metrics.yaml +++ b/src/tests/ftest/pool/eviction_metrics.yaml @@ -18,9 +18,9 @@ server_config: pool: !mux default: - size: 100% + size: 10% md_on_ssd_p2: - size: 100% + size: 10% mem_ratio: 25 container: From 8f1e99c50d06b03eee7748cedd0b1aa5a91cd43a Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Fri, 16 Jan 2026 16:26:22 -0500 Subject: [PATCH 31/32] Updated mdtest -n calculation Skip-unit-tests: true Skip-fault-injection-test: true Test-tag: EvictionMetrics Signed-off-by: Phil Henderson --- src/tests/ftest/pool/eviction_metrics.py | 42 ++++++++++++++++++++---- 1 file changed, 35 insertions(+), 7 deletions(-) diff --git a/src/tests/ftest/pool/eviction_metrics.py b/src/tests/ftest/pool/eviction_metrics.py index 4f487500bac..8111e8246cd 100644 --- a/src/tests/ftest/pool/eviction_metrics.py +++ b/src/tests/ftest/pool/eviction_metrics.py @@ -32,17 +32,49 @@ def test_eviction_metrics(self): :avocado: tags=pool :avocado: tags=EvictionMetrics,test_eviction_metrics """ + write_bytes = self.params.get('write_bytes', MDTEST_NAMESPACE, None) + processes = self.params.get('processes', MDTEST_NAMESPACE, None) + ppn = self.params.get('ppn', MDTEST_NAMESPACE, None) + evict_metrics = list(self.telemetry.ENGINE_POOL_VOS_CACHE_METRICS) self.log_step('Creating a pool (dmg pool create)') pool = self.get_pool(connect=False) try: _result = json.loads(pool.dmg.result.stdout) + tier_bytes_scm = int(_result["response"]["tier_bytes"][0]) mem_file_bytes = int(_result["response"]["mem_file_bytes"]) except Exception as error: # pylint: disable=broad-except - self.fail(f"Error extracting mem_file_bytes for dmg pool create output: {error}") - self.log.debug("%s mem_file_bytes: %s", pool, mem_file_bytes) - self.log.debug("%s mem_ratio.value: %s", pool, pool.mem_ratio.value) + self.fail(f"Error extracting data for dmg pool create output: {error}") + + # Calculate the mdtest files_per_process based upon the scm size and other mdtest params + _write_processes = processes + if ppn is not None: + _write_processes = ppn * len(self.host_info.clients.hosts) + files_per_process = math.floor(mem_file_bytes / (write_bytes * _write_processes)) + if tier_bytes_scm > mem_file_bytes: + # Write more files to exceed mem_file_bytes and cause eviction + mdtest_params = {"num_of_files_dirs": math.ceil(files_per_process * 1.10)} + else: + # Write less files to avoid out of space errors + mdtest_params = {"num_of_files_dirs": math.ceil(files_per_process * 0.9)} + + self.log.debug("-" * 60) + self.log.debug("Pool %s create data:", pool) + self.log.debug(" tier_bytes_scm: %s", tier_bytes_scm) + self.log.debug(" mem_file_bytes: %s", mem_file_bytes) + self.log.debug(" mem_ratio.value: %s", pool.mem_ratio.value) + self.log.debug("Mdtest write parameters:") + self.log.debug(" write_bytes: %s", write_bytes) + if ppn is not None: + self.log.debug(" ppn / nodes: %s / %s", ppn, len(self.host_info.clients.hosts)) + else: + self.log.debug(" processes: %s", processes) + self.log.debug(" files_per_process: %s", files_per_process) + self.log.debug(" num_of_files_dirs: %s", mdtest_params["num_of_files_dirs"]) + self.log.debug(" expected to write: %s", + _write_processes * write_bytes * mdtest_params["num_of_files_dirs"]) + self.log.debug("-" * 60) self.log_step('Creating a container (dmg container create)') container = self.get_container(pool) @@ -68,10 +100,6 @@ def test_eviction_metrics(self): self.log_step('Writing data to the pool (mdtest -a DFS)') manager = get_job_manager(self, subprocess=False, timeout=None) - processes = self.params.get('processes', MDTEST_NAMESPACE, None) - write_bytes = self.params.get('write_bytes', MDTEST_NAMESPACE, None) - ppn = self.params.get('ppn', MDTEST_NAMESPACE, None) - mdtest_params = {"num_of_files_dirs": math.ceil(mem_file_bytes / write_bytes) + 1} run_mdtest( self, self.hostlist_clients, self.workdir, None, container, processes, ppn, manager, mdtest_params=mdtest_params) From 92fc1780d09063b37ba374d49e0765b2cf612c32 Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Fri, 16 Jan 2026 23:06:05 -0500 Subject: [PATCH 32/32] Adjusting num_of_files_dirs. Skip-unit-tests: true Skip-fault-injection-test: true Skip-func-hw-test-medium: false Test-tag: EvictionMetrics Signed-off-by: Phil Henderson --- src/tests/ftest/pool/eviction_metrics.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/tests/ftest/pool/eviction_metrics.py b/src/tests/ftest/pool/eviction_metrics.py index 8111e8246cd..33b5524327d 100644 --- a/src/tests/ftest/pool/eviction_metrics.py +++ b/src/tests/ftest/pool/eviction_metrics.py @@ -53,11 +53,11 @@ def test_eviction_metrics(self): _write_processes = ppn * len(self.host_info.clients.hosts) files_per_process = math.floor(mem_file_bytes / (write_bytes * _write_processes)) if tier_bytes_scm > mem_file_bytes: - # Write more files to exceed mem_file_bytes and cause eviction - mdtest_params = {"num_of_files_dirs": math.ceil(files_per_process * 1.10)} + # Write more (125%) files to exceed mem_file_bytes and cause eviction + mdtest_params = {"num_of_files_dirs": math.ceil(files_per_process * 1.25)} else: - # Write less files to avoid out of space errors - mdtest_params = {"num_of_files_dirs": math.ceil(files_per_process * 0.9)} + # Write less (75%) files to avoid out of space errors + mdtest_params = {"num_of_files_dirs": math.floor(files_per_process * 0.75)} self.log.debug("-" * 60) self.log.debug("Pool %s create data:", pool) @@ -85,13 +85,13 @@ def test_eviction_metrics(self): for metric in sorted(expected_ranges): for label in expected_ranges[metric]: if pool.mem_ratio.value is not None and metric.endswith('_hit'): - expected_ranges[metric][label] = [0, 100] + expected_ranges[metric][label] = [0, 100] # 0-100 (phase 2) elif pool.mem_ratio.value is not None and metric.endswith('_miss'): - expected_ranges[metric][label] = [0, 5] + expected_ranges[metric][label] = [0, 5] # 0-5 (phase 2) elif pool.mem_ratio.value is not None and metric.endswith('_ne'): - expected_ranges[metric][label] = [0, 5] + expected_ranges[metric][label] = [0, 5] # 0-5 (phase 2) else: - expected_ranges[metric][label] = [0, 0] + expected_ranges[metric][label] = [0, 0] # 0 only self.log.debug("%s expected_ranges: %s", pool, expected_ranges) self.log_step('Verify pool eviction metrics after pool creation') @@ -110,9 +110,11 @@ def test_eviction_metrics(self): for metric in sorted(expected_ranges): for label in expected_ranges[metric]: if pool.mem_ratio.value is None: - expected_ranges[metric][label] = [0, 0] + expected_ranges[metric][label] = [0, 0] # 0 only (phase 1) + elif metric.endswith('_page_flush'): + expected_ranges[metric][label] = [0] # 0 or greater (phase 2) else: - expected_ranges[metric][label] = [1, 10000000] + expected_ranges[metric][label] = [1, 10000000] # 1-10,000,000 (phase 2) self.log.debug("%s expected_ranges: %s", pool, expected_ranges) self.log_step('Verify pool eviction metrics after writing data')