Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 78 additions & 0 deletions src/access/parsers/payujson_profiling.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# Copyright 2025 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details.
# SPDX-License-Identifier: Apache-2.0

"""Parser for payu JSON walltime data generated by payu.
The data to be parsed is written in the following form:

{
"scheduler_job_id": "149764665.gadi-pbs",
"scheduler_type": "pbs",
# ... many more fields ...
"timings": {
"payu_start_time": "2025-09-16T08:52:50.748807",
"payu_setup_duration_seconds": 47.73822930175811,
"payu_model_run_duration_seconds": 6776.044810215011,
"payu_run_duration_seconds": 6779.385873348918,
"payu_archive_duration_seconds": 8.063649574294686,
"payu_finish_time": "2025-09-16T10:46:48.974451",
"payu_total_duration_seconds": 6838.225644
},
# ... more fields
}
"""

from access.parsers.profiling import ProfilingParser
import json


class PayuJSONProfilingParser(ProfilingParser):
"""Payu JSON job output profiling parser."""

def __init__(self):
"""Instantiate Payu JSON profiling parser."""
super().__init__()
self._metrics = ["walltime"]

@property
def metrics(self) -> list:
"""Implements "metrics" abstract method/property.

Returns:
list: the metric names captured by this parser.
"""
return self._metrics

def read(self, stream: str) -> dict:
"""Implements "read" abstract method to parse a JSON file generated by Payu.

Args:
stream (str): String containing valid JSON to be parsed.

Returns:
dict: Parsed timing information.

Raises:
KeyError: when "timings" key is missing in input JSON.
ValueError: if stream is not a string with valid JSON.
"""

try:
timings = json.loads(stream)["timings"]
except KeyError:
raise KeyError('"timings" key missing in stream.')
except json.JSONDecodeError:
raise ValueError("Invalid JSON supplied.")

# remove known keys not relevant to profiling
for unwanted_key in ("payu_start_time", "payu_finish_time"):
if unwanted_key in timings:
del timings[unwanted_key]

result = {"region": [], "walltime": []}

# transpose dict to be consistent with other profiling parsers.
for k, v in timings.items():
result["region"].append(k)
result["walltime"].append(v)

return result
70 changes: 70 additions & 0 deletions tests/test_payujson_profiling.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# Copyright 2025 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details.
# SPDX-License-Identifier: Apache-2.0

import pytest

from access.parsers.payujson_profiling import PayuJSONProfilingParser


@pytest.fixture(scope="module")
def payujson_parser():
"""Fixture instantiating the Payu JSON parser."""
return PayuJSONProfilingParser()


@pytest.fixture(scope="module")
def payujson_profiling():
"""Fixture returning a dict holding the parsed Payu JSON timing content."""
return {
"region": [
"payu_setup_duration_seconds",
"payu_model_run_duration_seconds",
"payu_run_duration_seconds",
"payu_archive_duration_seconds",
"payu_total_duration_seconds",
],
"walltime": [47.73822930175811, 6776.044810215011, 6779.385873348918, 8.063649574294686, 6838.225644],
}


@pytest.fixture(scope="module")
def payujson_log_file():
"""Fixture returning the Payu JSON timing content."""
return """{
"scheduler_job_id": "149764665.gadi-pbs",
"timings": {
"payu_start_time": "2025-09-16T08:52:50.748807",
"payu_setup_duration_seconds": 47.73822930175811,
"payu_model_run_duration_seconds": 6776.044810215011,
"payu_run_duration_seconds": 6779.385873348918,
"payu_archive_duration_seconds": 8.063649574294686,
"payu_finish_time": "2025-09-16T10:46:48.974451",
"payu_total_duration_seconds": 6838.225644
},
"payu_run_id": "5c9027104cc39a5d39814624537c21440b68beb7",
"payu_model_run_status": 0,
"model_finish_time": "1844-01-01T00:00:00",
"model_start_time": "1843-01-01T00:00:00",
"model_calendar": "proleptic_gregorian",
"payu_run_status": 0
}
"""


def test_payujson_profiling(payujson_parser, payujson_log_file, payujson_profiling):
"""Test the correct parsing of Payu JSON timing information."""
assert "walltime" in payujson_parser.metrics, "walltime metric not found in parsed log."
parsed_log = payujson_parser.read(payujson_log_file)
for idx, region in enumerate(payujson_profiling.keys()):
assert region in parsed_log, f"{region} not found in Payu JSON parsed log."
assert (
payujson_profiling["walltime"][idx] == parsed_log["walltime"][idx]
), f"Incorrect walltime for region {region} (idx: {idx})."


def test_payujson_errors(payujson_parser):
"""Test that exceptions get raised appropriately."""
with pytest.raises(KeyError):
payujson_parser.read('{"a": 123}')
with pytest.raises(ValueError):
payujson_parser.read("abc def")