Skip to content

Commit 4c7acfd

Browse files
authored
Merge pull request #10 from ACCESS-NRI/profiling
Add FMS profiling parsers
2 parents a4767e3 + b6e6c44 commit 4c7acfd

File tree

4 files changed

+361
-0
lines changed

4 files changed

+361
-0
lines changed
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
# Copyright 2025 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details.
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
"""Parser for FMS profiling data, such as output by MOM5 and MOM6.
5+
The data to be parsed is written in the following form:
6+
7+
hits tmin tmax tavg tstd tfrac grain pemin pemax
8+
Total runtime 1 138.600364 138.600366 138.600365 0.000001 1.000 0 0 11
9+
Ocean Initialization 2 2.344926 2.345701 2.345388 0.000198 0.017 11 0 11
10+
Ocean 23 86.869466 86.871652 86.870450 0.000744 0.627 1 0 11
11+
Ocean dynamics 96 43.721019 44.391032 43.957944 0.244785 0.317 11 0 11
12+
Ocean thermodynamics and tracers 72 27.377185 33.281659 29.950144 1.792324 0.216 11 0 11
13+
MPP_STACK high water mark= 0
14+
"""
15+
16+
from access.parsers.profiling import ProfilingParser, _convert_from_string
17+
import re
18+
19+
20+
class FMSProfilingParser(ProfilingParser):
21+
"""FMS profiling output parser."""
22+
23+
def __init__(self, has_hits: bool = True):
24+
"""Instantiate FMS profiling parser.
25+
26+
Args:
27+
has_hits (bool): whether FMS timings contains "hits" column.
28+
"""
29+
super().__init__()
30+
31+
# FMS provides the following metrics:
32+
if has_hits:
33+
self._metrics = ["hits"]
34+
else:
35+
self._metrics = []
36+
self._metrics += ["tmin", "tmax", "tavg", "tstd", "tfrac", "grain", "pemin", "pemax"]
37+
38+
@property
39+
def metrics(self) -> list:
40+
return self._metrics
41+
42+
def read(self, stream: str) -> dict:
43+
44+
# Regular expression to extract the profiling section from the file
45+
header = r"\s*" + r"\s*".join(self._metrics) + r"\s*"
46+
footer = r" MPP_STACK high water mark=\s*\d*"
47+
profiling_section_p = re.compile(header + r"(.*)" + footer, re.DOTALL)
48+
49+
# Regular expression to parse the data for each region
50+
profile_line = r"^\s*(?P<region>[a-zA-Z:()_/\-*&\s]+(?<!\s))"
51+
for metric in self.metrics:
52+
profile_line += r"\s+(?P<" + metric + r">[0-9.]+)"
53+
profile_line += r"$"
54+
profiling_region_p = re.compile(profile_line, re.MULTILINE)
55+
56+
# Parse data
57+
stats = {"region": []}
58+
stats.update({m: [] for m in self.metrics})
59+
profiling_section = profiling_section_p.search(stream).group(1)
60+
for line in profiling_region_p.finditer(profiling_section):
61+
stats["region"].append(line.group("region"))
62+
for metric in self.metrics:
63+
stats[str(metric)].append(_convert_from_string(line.group(metric)))
64+
65+
return stats

src/access/parsers/profiling.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
# Copyright 2025 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details.
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
"""Classes and utilities to build profiling parsers for reading profiling data."""
5+
6+
from abc import ABC, abstractmethod
7+
from typing import Any
8+
9+
10+
class ProfilingParser(ABC):
11+
"""Abstract parser of profiling data.
12+
13+
The main purpose of a parser of profiling data is to read said data from a file or directory and return it in a
14+
standard format.
15+
16+
Once parsed, the profiling data should be stored in a dict in the following way:
17+
18+
{
19+
'region': ['region1', 'region2', ...],
20+
'metric a': [val1a, val2a, ...],
21+
'metric b': [val1b, val2b, ...],
22+
...
23+
}
24+
25+
The 'region' values correspond to the labels of the profile regions. Then, for each metric, there is a list of
26+
values, one for each profiling region. Therefore, 'val1a', is the value for metric a of region 1.
27+
"""
28+
29+
def __init__(self):
30+
pass
31+
32+
@property
33+
@abstractmethod
34+
def metrics(self) -> list:
35+
"""list: Metrics available when using this parser."""
36+
37+
@abstractmethod
38+
def read(self, stream: str) -> dict:
39+
"""Parse the given text.
40+
41+
Args:
42+
stream (str): text to parse.
43+
44+
Returns:
45+
dict: profiling data.
46+
"""
47+
48+
49+
def _convert_from_string(value: str) -> Any:
50+
"""Tries to convert a string to the most appropriate numeric type. Leaves it unchanged if conversion does not succeed.
51+
52+
Args:
53+
value (str): string to convert.
54+
55+
Returns:
56+
Any: the converted string or the original string.
57+
"""
58+
for type_conversion in (int, float):
59+
try:
60+
return type_conversion(value)
61+
except:
62+
continue
63+
return value

tests/test_fms_profiling.py

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
# Copyright 2025 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details.
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
import pytest
5+
6+
from access.parsers.fms_profiling import FMSProfilingParser
7+
8+
9+
@pytest.fixture(scope="module")
10+
def fms_hits_parser():
11+
"""Fixture instantiating the FMS parser where hits column is present."""
12+
return FMSProfilingParser()
13+
14+
15+
@pytest.fixture(scope="module")
16+
def fms_nohits_parser():
17+
"""Fixture instantiating the FMS parser where hits column is not present."""
18+
return FMSProfilingParser(has_hits=False)
19+
20+
21+
@pytest.fixture(scope="module")
22+
def fms_nohits_profiling():
23+
"""Fixture returning a dict holding the parsed FMS timing content without hits."""
24+
return {
25+
"region": [
26+
"Total runtime",
27+
"Ocean",
28+
"(Ocean initialization)",
29+
"(Ocean ODA)",
30+
"(Red Sea/Gulf Bay salinity fix)",
31+
"OASIS init",
32+
"oasis_recv",
33+
"oasis_send",
34+
],
35+
"tmin": [16282.797785, 15969.542784, 4.288529, 0.0, 0.024143, 0.231678, 168.797136, 2.468914],
36+
"tmax": [16282.797792, 16000.704550, 4.296586, 0.0, 0.077235, 0.232671, 171.648384, 2.756777],
37+
"tavg": [16282.797789, 15986.765795, 4.291991, 0.0, 0.040902, 0.232397, 170.460762, 2.593809],
38+
"tstd": [0.000001, 8.643639, 0.001470, 0.0, 0.013836, 0.000242, 0.650894, 0.079459],
39+
}
40+
41+
42+
@pytest.fixture(scope="module")
43+
def fms_nohits_log_file():
44+
"""Fixture returning the FMS timing content without hits column."""
45+
return """ MPP_DOMAINS_STACK high water mark= 747000
46+
47+
Tabulating mpp_clock statistics across 49 PEs...
48+
49+
tmin tmax tavg tstd tfrac grain pemin pemax
50+
Total runtime 16282.797785 16282.797792 16282.797789 0.000001 1.000 0 0 48
51+
Ocean 15969.542784 16000.704550 15986.765795 8.643639 0.982 1 0 48
52+
(Ocean initialization) 4.288529 4.296586 4.291991 0.001470 0.000 11 0 48
53+
(Ocean ODA) 0.000000 0.000000 0.000000 0.000000 0.000 11 0 48
54+
(Red Sea/Gulf Bay salinity fix) 0.024143 0.077235 0.040902 0.013836 0.000 31 0 48
55+
OASIS init 0.231678 0.232671 0.232397 0.000242 0.000 1 0 48
56+
oasis_recv 168.797136 171.648384 170.460762 0.650894 0.010 31 0 48
57+
oasis_send 2.468914 2.756777 2.593809 0.079459 0.000 31 0 48
58+
MPP_STACK high water mark= 0
59+
MOM5: --- completed ---
60+
"""
61+
62+
63+
@pytest.fixture(scope="module")
64+
def fms_hits_profiling():
65+
"""Fixture returning a dict holding the parsed FMS timing content with hits."""
66+
return {
67+
"region": [
68+
"Total runtime",
69+
"Initialization",
70+
"Main loop",
71+
"Termination",
72+
"Ocean Initialization",
73+
"Ocean",
74+
"Ocean dynamics",
75+
"Ocean thermodynamics and tracers",
76+
"Ocean grid generation and remapp",
77+
"Ocean Other",
78+
"(Ocean tracer advection)",
79+
],
80+
"hits": [1, 1, 1, 1, 2, 24, 192, 72, 0, 192, 48],
81+
"tmin": [
82+
100.641190,
83+
0.987726,
84+
98.930085,
85+
0.718969,
86+
1.529830,
87+
98.279247,
88+
84.799971,
89+
11.512013,
90+
0.0,
91+
1.710326,
92+
4.427230,
93+
],
94+
"tmax": [
95+
100.641190,
96+
0.987726,
97+
98.930085,
98+
0.718969,
99+
1.529830,
100+
98.279247,
101+
84.799971,
102+
11.512013,
103+
0.0,
104+
1.710326,
105+
4.427230,
106+
],
107+
"tavg": [
108+
100.641190,
109+
0.987726,
110+
98.930085,
111+
0.718969,
112+
1.529830,
113+
98.279247,
114+
84.799971,
115+
11.512013,
116+
0.000000,
117+
1.710326,
118+
4.427230,
119+
],
120+
"tstd": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
121+
}
122+
123+
124+
@pytest.fixture(scope="module")
125+
def fms_hits_log_file():
126+
"""Fixture returning a dict holding the parsed FMS timing content wiht hits."""
127+
return """ MPP_DOMAINS_STACK high water mark= 380512
128+
129+
Tabulating mpp_clock statistics across 1 PEs...
130+
131+
hits tmin tmax tavg tstd tfrac grain pemin pemax
132+
Total runtime 1 100.641190 100.641190 100.641190 0.000000 1.000 0 0 0
133+
Initialization 1 0.987726 0.987726 0.987726 0.000000 0.010 0 0 0
134+
Main loop 1 98.930085 98.930085 98.930085 0.000000 0.983 0 0 0
135+
Termination 1 0.718969 0.718969 0.718969 0.000000 0.007 0 0 0
136+
Ocean Initialization 2 1.529830 1.529830 1.529830 0.000000 0.015 11 0 0
137+
Ocean 24 98.279247 98.279247 98.279247 0.000000 0.977 1 0 0
138+
Ocean dynamics 192 84.799971 84.799971 84.799971 0.000000 0.843 11 0 0
139+
Ocean thermodynamics and tracers 72 11.512013 11.512013 11.512013 0.000000 0.114 11 0 0
140+
Ocean grid generation and remapp 0 0.000000 0.000000 0.000000 0.000000 0.000 11 0 0
141+
Ocean Other 192 1.710326 1.710326 1.710326 0.000000 0.017 11 0 0
142+
(Ocean tracer advection) 48 4.427230 4.427230 4.427230 0.000000 0.044 21 0 0
143+
MPP_STACK high water mark= 0
144+
"""
145+
146+
147+
def test_fms_nohits_profiling(fms_nohits_parser, fms_nohits_log_file, fms_nohits_profiling):
148+
"""Test the correct parsing of FMS timing information without hits column."""
149+
parsed_log = fms_nohits_parser.read(fms_nohits_log_file)
150+
for idx, region in enumerate(fms_nohits_profiling.keys()):
151+
assert region in parsed_log, f"{region} not found in mom5 parsed log"
152+
for metric in ("tmin", "tmax", "tavg", "tstd"):
153+
assert (
154+
fms_nohits_profiling[metric][idx] == parsed_log[metric][idx]
155+
), f"Incorrect {metric} for region {region} (idx: {idx})."
156+
157+
158+
def test_mom6_profiling(fms_hits_parser, fms_hits_log_file, fms_hits_profiling):
159+
"""Test the correct parsing of FMS timing information with hits column."""
160+
parsed_log = fms_hits_parser.read(fms_hits_log_file)
161+
for idx, region in enumerate(fms_hits_profiling.keys()):
162+
assert region in parsed_log, f"{region} not found in mom6 parsed log"
163+
for metric in ("hits", "tmin", "tmax", "tavg", "tstd"):
164+
assert (
165+
fms_hits_profiling[metric][idx] == parsed_log[metric][idx]
166+
), f"Incorrect {metric} for region {region} (idx: {idx})."

tests/test_profiling.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
# Copyright 2025 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details.
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
import pytest
5+
6+
from access.parsers.profiling import ProfilingParser, _convert_from_string
7+
8+
9+
class MockProfilingParser(ProfilingParser):
10+
"""A Mock concrete Profiling Parser."""
11+
12+
def __init__(self, data: dict):
13+
self._metrics = ["hits", "tmin", "tmax", "tavg"]
14+
self._data = data
15+
16+
@property
17+
def metrics(self) -> list:
18+
return self._metrics
19+
20+
def read(self, stream: str) -> dict:
21+
return self._data[stream]
22+
23+
24+
@pytest.fixture(scope="module")
25+
def profiling_data():
26+
"""Fixture instantiating fake parsed profiling data."""
27+
return {
28+
"1cpu_stream": {
29+
"regions": ["Total runtime", "Ocean Initialization"],
30+
"hits": [1, 2],
31+
"tmin": [138.600364, 2.344926],
32+
"tmax": [138.600366, 2.345701],
33+
"tavg": [600365, 2.345388],
34+
},
35+
"2cpu_stream": {
36+
"regions": ["Total runtime", "Ocean Initialization"],
37+
"hits": [3, 4],
38+
"tmin": [69.300182, 1.162463],
39+
"tmax": [49.300182, 1.162463],
40+
"tavg": [300182.5, 1.172694],
41+
},
42+
}
43+
44+
45+
def test_base_parser(profiling_data):
46+
"""Tests methods and properties of abstract base class, ProfilingParser."""
47+
48+
parser = MockProfilingParser(profiling_data)
49+
50+
assert parser.metrics == ["hits", "tmin", "tmax", "tavg"], "Incorrect metrics returned in MockProfilingParser!"
51+
for stream in ("1cpu_stream", "2cpu_stream"):
52+
assert parser.read(stream) == profiling_data[stream], f'Incorrect profiling stats returned for "{stream}"'
53+
54+
55+
def test_str2num():
56+
"""Tests conversion of numbers to most appropriate type."""
57+
str2int = _convert_from_string("42")
58+
assert type(str2int) == int
59+
assert str2int == 42
60+
str2float = _convert_from_string("-1.23")
61+
assert type(str2float) == float
62+
assert str2float == -1.23
63+
str2float = _convert_from_string("0.00000")
64+
assert str2float == 0.0
65+
str2str = _convert_from_string("somestr")
66+
assert type(str2str) == str
67+
assert str2str == "somestr"

0 commit comments

Comments
 (0)