Merge pull request #10 from ACCESS-NRI/profiling

edoyango · web-flow · commit 4c7acfd4959a · 2025-09-18T11:40:34.000+10:00
Add FMS profiling parsers
diff --git a/src/access/parsers/fms_profiling.py b/src/access/parsers/fms_profiling.py
@@ -0,0 +1,65 @@
+# Copyright 2025 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details.
+# SPDX-License-Identifier: Apache-2.0
+
+"""Parser for FMS profiling data, such as output by MOM5 and MOM6.
+The data to be parsed is written in the following form:
+
+                                    hits          tmin          tmax          tavg          tstd  tfrac grain pemin pemax
+Total runtime                         1    138.600364    138.600366    138.600365      0.000001  1.000     0     0    11
+Ocean Initialization                  2      2.344926      2.345701      2.345388      0.000198  0.017    11     0    11
+Ocean                                23     86.869466     86.871652     86.870450      0.000744  0.627     1     0    11
+Ocean dynamics                       96     43.721019     44.391032     43.957944      0.244785  0.317    11     0    11
+Ocean thermodynamics and tracers     72     27.377185     33.281659     29.950144      1.792324  0.216    11     0    11
+ MPP_STACK high water mark=           0
+"""
+
+from access.parsers.profiling import ProfilingParser, _convert_from_string
+import re
+
+
+class FMSProfilingParser(ProfilingParser):
+    """FMS profiling output parser."""
+
+    def __init__(self, has_hits: bool = True):
+        """Instantiate FMS profiling parser.
+
+        Args:
+            has_hits (bool): whether FMS timings contains "hits" column.
+        """
+        super().__init__()
+
+        # FMS provides the following metrics:
+        if has_hits:
+            self._metrics = ["hits"]
+        else:
+            self._metrics = []
+        self._metrics += ["tmin", "tmax", "tavg", "tstd", "tfrac", "grain", "pemin", "pemax"]
+
+    @property
+    def metrics(self) -> list:
+        return self._metrics
+
+    def read(self, stream: str) -> dict:
+
+        # Regular expression to extract the profiling section from the file
+        header = r"\s*" + r"\s*".join(self._metrics) + r"\s*"
+        footer = r" MPP_STACK high water mark=\s*\d*"
+        profiling_section_p = re.compile(header + r"(.*)" + footer, re.DOTALL)
+
+        # Regular expression to parse the data for each region
+        profile_line = r"^\s*(?P<region>[a-zA-Z:()_/\-*&\s]+(?<!\s))"
+        for metric in self.metrics:
+            profile_line += r"\s+(?P<" + metric + r">[0-9.]+)"
+        profile_line += r"$"
+        profiling_region_p = re.compile(profile_line, re.MULTILINE)
+
+        # Parse data
+        stats = {"region": []}
+        stats.update({m: [] for m in self.metrics})
+        profiling_section = profiling_section_p.search(stream).group(1)
+        for line in profiling_region_p.finditer(profiling_section):
+            stats["region"].append(line.group("region"))
+            for metric in self.metrics:
+                stats[str(metric)].append(_convert_from_string(line.group(metric)))
+
+        return stats
diff --git a/src/access/parsers/profiling.py b/src/access/parsers/profiling.py
@@ -0,0 +1,63 @@
+# Copyright 2025 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details.
+# SPDX-License-Identifier: Apache-2.0
+
+"""Classes and utilities to build profiling parsers for reading profiling data."""
+
+from abc import ABC, abstractmethod
+from typing import Any
+
+
+class ProfilingParser(ABC):
+    """Abstract parser of profiling data.
+
+    The main purpose of a parser of profiling data is to read said data from a file or directory and return it in a
+    standard format.
+
+    Once parsed, the profiling data should be stored in a dict in the following way:
+
+    {
+        'region': ['region1', 'region2', ...],
+        'metric a': [val1a, val2a, ...],
+        'metric b': [val1b, val2b, ...],
+        ...
+    }
+
+    The 'region' values correspond to the labels of the profile regions. Then, for each metric, there is a list of
+    values, one for each profiling region. Therefore, 'val1a', is the value for metric a of region 1.
+    """
+
+    def __init__(self):
+        pass
+
+    @property
+    @abstractmethod
+    def metrics(self) -> list:
+        """list: Metrics available when using this parser."""
+
+    @abstractmethod
+    def read(self, stream: str) -> dict:
+        """Parse the given text.
+
+        Args:
+            stream (str): text to parse.
+
+        Returns:
+            dict: profiling data.
+        """
+
+
+def _convert_from_string(value: str) -> Any:
+    """Tries to convert a string to the most appropriate numeric type. Leaves it unchanged if conversion does not succeed.
+
+    Args:
+        value (str): string to convert.
+
+    Returns:
+        Any: the converted string or the original string.
+    """
+    for type_conversion in (int, float):
+        try:
+            return type_conversion(value)
+        except:
+            continue
+    return value
diff --git a/tests/test_fms_profiling.py b/tests/test_fms_profiling.py
@@ -0,0 +1,166 @@
+# Copyright 2025 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details.
+# SPDX-License-Identifier: Apache-2.0
+
+import pytest
+
+from access.parsers.fms_profiling import FMSProfilingParser
+
+
+@pytest.fixture(scope="module")
+def fms_hits_parser():
+    """Fixture instantiating the FMS parser where hits column is present."""
+    return FMSProfilingParser()
+
+
+@pytest.fixture(scope="module")
+def fms_nohits_parser():
+    """Fixture instantiating the FMS parser where hits column is not present."""
+    return FMSProfilingParser(has_hits=False)
+
+
+@pytest.fixture(scope="module")
+def fms_nohits_profiling():
+    """Fixture returning a dict holding the parsed FMS timing content without hits."""
+    return {
+        "region": [
+            "Total runtime",
+            "Ocean",
+            "(Ocean initialization)",
+            "(Ocean ODA)",
+            "(Red Sea/Gulf Bay salinity fix)",
+            "OASIS init",
+            "oasis_recv",
+            "oasis_send",
+        ],
+        "tmin": [16282.797785, 15969.542784, 4.288529, 0.0, 0.024143, 0.231678, 168.797136, 2.468914],
+        "tmax": [16282.797792, 16000.704550, 4.296586, 0.0, 0.077235, 0.232671, 171.648384, 2.756777],
+        "tavg": [16282.797789, 15986.765795, 4.291991, 0.0, 0.040902, 0.232397, 170.460762, 2.593809],
+        "tstd": [0.000001, 8.643639, 0.001470, 0.0, 0.013836, 0.000242, 0.650894, 0.079459],
+    }
+
+
+@pytest.fixture(scope="module")
+def fms_nohits_log_file():
+    """Fixture returning the FMS timing content without hits column."""
+    return """ MPP_DOMAINS_STACK high water mark=      747000
+
+Tabulating mpp_clock statistics across     49 PEs...
+
+                                          tmin          tmax          tavg          tstd  tfrac grain pemin pemax
+Total runtime                     16282.797785  16282.797792  16282.797789      0.000001  1.000     0     0    48
+Ocean                             15969.542784  16000.704550  15986.765795      8.643639  0.982     1     0    48
+(Ocean initialization)                4.288529      4.296586      4.291991      0.001470  0.000    11     0    48
+(Ocean ODA)                           0.000000      0.000000      0.000000      0.000000  0.000    11     0    48
+(Red Sea/Gulf Bay salinity fix)       0.024143      0.077235      0.040902      0.013836  0.000    31     0    48
+OASIS init                            0.231678      0.232671      0.232397      0.000242  0.000     1     0    48
+oasis_recv                          168.797136    171.648384    170.460762      0.650894  0.010    31     0    48
+oasis_send                            2.468914      2.756777      2.593809      0.079459  0.000    31     0    48
+ MPP_STACK high water mark=           0
+ MOM5: --- completed ---
+"""
+
+
+@pytest.fixture(scope="module")
+def fms_hits_profiling():
+    """Fixture returning a dict holding the parsed FMS timing content with hits."""
+    return {
+        "region": [
+            "Total runtime",
+            "Initialization",
+            "Main loop",
+            "Termination",
+            "Ocean Initialization",
+            "Ocean",
+            "Ocean dynamics",
+            "Ocean thermodynamics and tracers",
+            "Ocean grid generation and remapp",
+            "Ocean Other",
+            "(Ocean tracer advection)",
+        ],
+        "hits": [1, 1, 1, 1, 2, 24, 192, 72, 0, 192, 48],
+        "tmin": [
+            100.641190,
+            0.987726,
+            98.930085,
+            0.718969,
+            1.529830,
+            98.279247,
+            84.799971,
+            11.512013,
+            0.0,
+            1.710326,
+            4.427230,
+        ],
+        "tmax": [
+            100.641190,
+            0.987726,
+            98.930085,
+            0.718969,
+            1.529830,
+            98.279247,
+            84.799971,
+            11.512013,
+            0.0,
+            1.710326,
+            4.427230,
+        ],
+        "tavg": [
+            100.641190,
+            0.987726,
+            98.930085,
+            0.718969,
+            1.529830,
+            98.279247,
+            84.799971,
+            11.512013,
+            0.000000,
+            1.710326,
+            4.427230,
+        ],
+        "tstd": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+    }
+
+
+@pytest.fixture(scope="module")
+def fms_hits_log_file():
+    """Fixture returning a dict holding the parsed FMS timing content wiht hits."""
+    return """ MPP_DOMAINS_STACK high water mark=      380512
+
+Tabulating mpp_clock statistics across      1 PEs...
+
+                                      hits          tmin          tmax          tavg          tstd  tfrac grain pemin pemax
+Total runtime                            1    100.641190    100.641190    100.641190      0.000000  1.000     0     0     0
+Initialization                           1      0.987726      0.987726      0.987726      0.000000  0.010     0     0     0
+Main loop                                1     98.930085     98.930085     98.930085      0.000000  0.983     0     0     0
+Termination                              1      0.718969      0.718969      0.718969      0.000000  0.007     0     0     0
+Ocean Initialization                     2      1.529830      1.529830      1.529830      0.000000  0.015    11     0     0
+Ocean                                   24     98.279247     98.279247     98.279247      0.000000  0.977     1     0     0
+Ocean dynamics                         192     84.799971     84.799971     84.799971      0.000000  0.843    11     0     0
+Ocean thermodynamics and tracers        72     11.512013     11.512013     11.512013      0.000000  0.114    11     0     0
+Ocean grid generation and remapp         0      0.000000      0.000000      0.000000      0.000000  0.000    11     0     0
+Ocean Other                            192      1.710326      1.710326      1.710326      0.000000  0.017    11     0     0
+(Ocean tracer advection)                48      4.427230      4.427230      4.427230      0.000000  0.044    21     0     0
+ MPP_STACK high water mark=           0
+"""
+
+
+def test_fms_nohits_profiling(fms_nohits_parser, fms_nohits_log_file, fms_nohits_profiling):
+    """Test the correct parsing of FMS timing information without hits column."""
+    parsed_log = fms_nohits_parser.read(fms_nohits_log_file)
+    for idx, region in enumerate(fms_nohits_profiling.keys()):
+        assert region in parsed_log, f"{region} not found in mom5 parsed log"
+        for metric in ("tmin", "tmax", "tavg", "tstd"):
+            assert (
+                fms_nohits_profiling[metric][idx] == parsed_log[metric][idx]
+            ), f"Incorrect {metric} for region {region} (idx: {idx})."
+
+
+def test_mom6_profiling(fms_hits_parser, fms_hits_log_file, fms_hits_profiling):
+    """Test the correct parsing of FMS timing information with hits column."""
+    parsed_log = fms_hits_parser.read(fms_hits_log_file)
+    for idx, region in enumerate(fms_hits_profiling.keys()):
+        assert region in parsed_log, f"{region} not found in mom6 parsed log"
+        for metric in ("hits", "tmin", "tmax", "tavg", "tstd"):
+            assert (
+                fms_hits_profiling[metric][idx] == parsed_log[metric][idx]
+            ), f"Incorrect {metric} for region {region} (idx: {idx})."
diff --git a/tests/test_profiling.py b/tests/test_profiling.py
@@ -0,0 +1,67 @@
+# Copyright 2025 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details.
+# SPDX-License-Identifier: Apache-2.0
+
+import pytest
+
+from access.parsers.profiling import ProfilingParser, _convert_from_string
+
+
+class MockProfilingParser(ProfilingParser):
+    """A Mock concrete Profiling Parser."""
+
+    def __init__(self, data: dict):
+        self._metrics = ["hits", "tmin", "tmax", "tavg"]
+        self._data = data
+
+    @property
+    def metrics(self) -> list:
+        return self._metrics
+
+    def read(self, stream: str) -> dict:
+        return self._data[stream]
+
+
+@pytest.fixture(scope="module")
+def profiling_data():
+    """Fixture instantiating fake parsed profiling data."""
+    return {
+        "1cpu_stream": {
+            "regions": ["Total runtime", "Ocean Initialization"],
+            "hits": [1, 2],
+            "tmin": [138.600364, 2.344926],
+            "tmax": [138.600366, 2.345701],
+            "tavg": [600365, 2.345388],
+        },
+        "2cpu_stream": {
+            "regions": ["Total runtime", "Ocean Initialization"],
+            "hits": [3, 4],
+            "tmin": [69.300182, 1.162463],
+            "tmax": [49.300182, 1.162463],
+            "tavg": [300182.5, 1.172694],
+        },
+    }
+
+
+def test_base_parser(profiling_data):
+    """Tests methods and properties of abstract base class, ProfilingParser."""
+
+    parser = MockProfilingParser(profiling_data)
+
+    assert parser.metrics == ["hits", "tmin", "tmax", "tavg"], "Incorrect metrics returned in MockProfilingParser!"
+    for stream in ("1cpu_stream", "2cpu_stream"):
+        assert parser.read(stream) == profiling_data[stream], f'Incorrect profiling stats returned for "{stream}"'
+
+
+def test_str2num():
+    """Tests conversion of numbers to most appropriate type."""
+    str2int = _convert_from_string("42")
+    assert type(str2int) == int
+    assert str2int == 42
+    str2float = _convert_from_string("-1.23")
+    assert type(str2float) == float
+    assert str2float == -1.23
+    str2float = _convert_from_string("0.00000")
+    assert str2float == 0.0
+    str2str = _convert_from_string("somestr")
+    assert type(str2str) == str
+    assert str2str == "somestr"