diff --git a/src/access/parsers/payujson_profiling.py b/src/access/parsers/payujson_profiling.py new file mode 100644 index 0000000..44c50bd --- /dev/null +++ b/src/access/parsers/payujson_profiling.py @@ -0,0 +1,78 @@ +# Copyright 2025 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details. +# SPDX-License-Identifier: Apache-2.0 + +"""Parser for payu JSON walltime data generated by payu. +The data to be parsed is written in the following form: + +{ + "scheduler_job_id": "149764665.gadi-pbs", + "scheduler_type": "pbs", + # ... many more fields ... + "timings": { + "payu_start_time": "2025-09-16T08:52:50.748807", + "payu_setup_duration_seconds": 47.73822930175811, + "payu_model_run_duration_seconds": 6776.044810215011, + "payu_run_duration_seconds": 6779.385873348918, + "payu_archive_duration_seconds": 8.063649574294686, + "payu_finish_time": "2025-09-16T10:46:48.974451", + "payu_total_duration_seconds": 6838.225644 + }, + # ... more fields +} +""" + +from access.parsers.profiling import ProfilingParser +import json + + +class PayuJSONProfilingParser(ProfilingParser): + """Payu JSON job output profiling parser.""" + + def __init__(self): + """Instantiate Payu JSON profiling parser.""" + super().__init__() + self._metrics = ["walltime"] + + @property + def metrics(self) -> list: + """Implements "metrics" abstract method/property. + + Returns: + list: the metric names captured by this parser. + """ + return self._metrics + + def read(self, stream: str) -> dict: + """Implements "read" abstract method to parse a JSON file generated by Payu. + + Args: + stream (str): String containing valid JSON to be parsed. + + Returns: + dict: Parsed timing information. + + Raises: + KeyError: when "timings" key is missing in input JSON. + ValueError: if stream is not a string with valid JSON. + """ + + try: + timings = json.loads(stream)["timings"] + except KeyError: + raise KeyError('"timings" key missing in stream.') + except json.JSONDecodeError: + raise ValueError("Invalid JSON supplied.") + + # remove known keys not relevant to profiling + for unwanted_key in ("payu_start_time", "payu_finish_time"): + if unwanted_key in timings: + del timings[unwanted_key] + + result = {"region": [], "walltime": []} + + # transpose dict to be consistent with other profiling parsers. + for k, v in timings.items(): + result["region"].append(k) + result["walltime"].append(v) + + return result diff --git a/tests/test_payujson_profiling.py b/tests/test_payujson_profiling.py new file mode 100644 index 0000000..6687cbc --- /dev/null +++ b/tests/test_payujson_profiling.py @@ -0,0 +1,70 @@ +# Copyright 2025 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details. +# SPDX-License-Identifier: Apache-2.0 + +import pytest + +from access.parsers.payujson_profiling import PayuJSONProfilingParser + + +@pytest.fixture(scope="module") +def payujson_parser(): + """Fixture instantiating the Payu JSON parser.""" + return PayuJSONProfilingParser() + + +@pytest.fixture(scope="module") +def payujson_profiling(): + """Fixture returning a dict holding the parsed Payu JSON timing content.""" + return { + "region": [ + "payu_setup_duration_seconds", + "payu_model_run_duration_seconds", + "payu_run_duration_seconds", + "payu_archive_duration_seconds", + "payu_total_duration_seconds", + ], + "walltime": [47.73822930175811, 6776.044810215011, 6779.385873348918, 8.063649574294686, 6838.225644], + } + + +@pytest.fixture(scope="module") +def payujson_log_file(): + """Fixture returning the Payu JSON timing content.""" + return """{ + "scheduler_job_id": "149764665.gadi-pbs", + "timings": { + "payu_start_time": "2025-09-16T08:52:50.748807", + "payu_setup_duration_seconds": 47.73822930175811, + "payu_model_run_duration_seconds": 6776.044810215011, + "payu_run_duration_seconds": 6779.385873348918, + "payu_archive_duration_seconds": 8.063649574294686, + "payu_finish_time": "2025-09-16T10:46:48.974451", + "payu_total_duration_seconds": 6838.225644 + }, + "payu_run_id": "5c9027104cc39a5d39814624537c21440b68beb7", + "payu_model_run_status": 0, + "model_finish_time": "1844-01-01T00:00:00", + "model_start_time": "1843-01-01T00:00:00", + "model_calendar": "proleptic_gregorian", + "payu_run_status": 0 +} +""" + + +def test_payujson_profiling(payujson_parser, payujson_log_file, payujson_profiling): + """Test the correct parsing of Payu JSON timing information.""" + assert "walltime" in payujson_parser.metrics, "walltime metric not found in parsed log." + parsed_log = payujson_parser.read(payujson_log_file) + for idx, region in enumerate(payujson_profiling.keys()): + assert region in parsed_log, f"{region} not found in Payu JSON parsed log." + assert ( + payujson_profiling["walltime"][idx] == parsed_log["walltime"][idx] + ), f"Incorrect walltime for region {region} (idx: {idx})." + + +def test_payujson_errors(payujson_parser): + """Test that exceptions get raised appropriately.""" + with pytest.raises(KeyError): + payujson_parser.read('{"a": 123}') + with pytest.raises(ValueError): + payujson_parser.read("abc def")