diff --git a/src/access/parsers/cice5_profiling.py b/src/access/parsers/cice5_profiling.py new file mode 100644 index 0000000..8ed5d04 --- /dev/null +++ b/src/access/parsers/cice5_profiling.py @@ -0,0 +1,76 @@ +# Copyright 2025 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details. +# SPDX-License-Identifier: Apache-2.0 + +"""Parser for CICE5 profiling data. +The data to be parsed is written in the following form, where bloc stats are discarded: + +Timer 1: Total 8133.37 seconds + Timer stats (node): min = 8133.36 seconds + max = 8133.37 seconds + mean= 8133.36 seconds + Timer stats(block): min = 0.00 seconds + max = 0.00 seconds + mean= 0.00 seconds +Timer 2: TimeLoop 8133.00 seconds + Timer stats (node): min = 8132.99 seconds + max = 8133.00 seconds + mean= 8132.99 seconds + Timer stats(block): min = 0.00 seconds + max = 0.00 seconds + mean= 0.00 seconds +""" + +from access.parsers.profiling import ProfilingParser +import re + + +class CICE5ProfilingParser(ProfilingParser): + """CICE5 profiling output parser.""" + + def __init__(self): + super().__init__() + self._metrics = ["min", "max", "mean"] + + @property + def metrics(self) -> list: + """Implements "metrics" abstract method/property. + + Returns: + list: the metric names captured by this parser. + """ + return self._metrics + + def read(self, stream: str) -> dict: + """Implements "read" abstract method to parse profiling data in CICE5 log output. + + Args: + stream (str): String containing the CICE5 log to be parsed. + + Returns: + dict: Parsed timing information. + + Raises: + ValueError: If matching timings aren't found. + """ + # Initialize result dictionary + result = {"region": [], "min": [], "max": [], "mean": []} + + # Regex pattern to match timer blocks + # This captures the region name and the three node timing values + pattern = r"Timer\s+\d+:\s+(\w+)\s+[\d.]+\s+seconds\s+Timer stats \(node\): min =\s+([\d.]+) seconds\s+max =\s+([\d.]+) seconds\s+mean=\s+([\d.]+) seconds" + + # Find all matches + matches = re.findall(pattern, stream, re.MULTILINE | re.DOTALL) + + if not matches: + raise ValueError("No CICE5 profiling data found") + + # Extract data from matches + for match in matches: + region, min_time, max_time, mean_time = match + result["region"].append(region) + result["min"].append(float(min_time)) + result["max"].append(float(max_time)) + result["mean"].append(float(mean_time)) + + return result diff --git a/tests/test_cice5_profiling.py b/tests/test_cice5_profiling.py new file mode 100644 index 0000000..a7608a8 --- /dev/null +++ b/tests/test_cice5_profiling.py @@ -0,0 +1,96 @@ +# Copyright 2025 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details. +# SPDX-License-Identifier: Apache-2.0 + +import pytest + +from access.parsers.cice5_profiling import CICE5ProfilingParser + + +@pytest.fixture(scope="module") +def cice5_required_metrics(): + return ("min", "max", "mean") + + +@pytest.fixture(scope="module") +def cice5_parser(): + """Fixture instantiating the CICE5 parser.""" + return CICE5ProfilingParser() + + +@pytest.fixture(scope="module") +def cice5_profiling(): + """Fixture returning a dict holding the parsed CICE5 timing content.""" + return { + "region": ["Total", "TimeLoop"], + "min": [16197.42, 16197.14], + "max": [16197.47, 16197.19], + "mean": [16197.44, 16197.16], + } + + +@pytest.fixture(scope="module") +def cice5_log_file(): + """Fixture returning the CICE5 timing content.""" + return """ -------------------------------- + CICE model diagnostic output + -------------------------------- + + Document ice_in namelist parameters: + ==================================== + + runtype = continue + Restart read/written 17520 58159382400.0000 + 0.000000000000000E+000 + +Timing information: + +Timer 1: Total 16197.47 seconds + Timer stats (node): min = 16197.42 seconds + max = 16197.47 seconds + mean= 16197.44 seconds + Timer stats(block): min = 0.00 seconds + max = 0.00 seconds + mean= 0.00 seconds +Timer 2: TimeLoop 16197.19 seconds + Timer stats (node): min = 16197.14 seconds + max = 16197.19 seconds + mean= 16197.16 seconds + Timer stats(block): min = 0.00 seconds + max = 0.00 seconds + mean= 0.00 seconds +""" + + +@pytest.fixture(scope="module") +def cice5_incorrect_log_file(): + """Fixture returning an incorrect CICE5 timing content.""" + return """Timer stats (node): min = 16197.42 seconds + max = 16197.47 seconds + mean= 16197.44 seconds + Timer stats(block): min = 0.00 seconds + max = 0.00 seconds + mean= 0.00 seconds""" + + +def test_cice5_profiling(cice5_required_metrics, cice5_parser, cice5_log_file, cice5_profiling): + """Test the correct parsing of CICE5 timing information.""" + parsed_log = cice5_parser.read(cice5_log_file) + + # check metrics are present in parser and parsed output + for metric in cice5_required_metrics: + assert metric in cice5_parser.metrics, f"{metric} metric not found in CICE5 parser metrics." + assert metric in parsed_log, f"{metric} metric not found in CICE5 parsed log." + + # check content for each metric is correct + for idx, region in enumerate(cice5_profiling["region"]): + assert region in parsed_log["region"], f"{region} not found in CICE5 parsed log" + for metric in cice5_required_metrics: + assert ( + cice5_profiling[metric][idx] == parsed_log[metric][idx] + ), f"Incorrect {metric} for region {region} (idx: {idx})." + + +def test_cice5_incorrect_profiling(cice5_parser, cice5_incorrect_log_file): + """Test the parsing of incirrect CICE5 timing information.""" + with pytest.raises(ValueError): + cice5_parser.read(cice5_incorrect_log_file)