Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion darshan-util/pydarshan/darshan/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,20 +306,24 @@ def __init__(self,
filename=None, dtype='numpy',
start_time=None, end_time=None,
automatic_summary=False,
read_all=True, lookup_name_records=True):
read_all=True, lookup_name_records=True,
strict=False):
"""
Args:
filename (str): filename to open (optional)
dtype (str): default dtype for internal structures
automatic_summary (bool): automatically generate summary after loading
read_all (bool): whether to read all records for log
lookup_name_records (bool): lookup and update name_records as records are loaded
strict (bool): error out if invalid log file content is detected (i.e., a
counter that should be >= 0 but is not)

Return:
None

"""
self.filename = filename
self.strict = strict
self.log = None

# Behavioral Options
Expand Down Expand Up @@ -373,6 +377,26 @@ def __init__(self,
if filename:
self.open(filename, read_all=read_all)

if self.strict:
# TODO: a more thorough checking for bad log data
for mod_name, mod in self.records.items():
mod_df_dict = mod.to_df()
mod_counters_df = mod_df_dict.get("counters")
mod_fcounters_df = mod_df_dict.get("fcounters")
for df in [mod_counters_df, mod_fcounters_df]:
# all "TIMER" columns should have values >= -1
# NOTE: darshan sometimes uses -1 for missing values
# which we may want to eventually avoid, but for now
# we will not raise an error unless below -1 for a time
# value that should be positive
for column_name in df.columns:
if "time" in column_name.lower():
if df[column_name].min() < -1:
# note: we may want to use a custom error type
# here for "invalid logs"
raise ValueError(f"Invalid log file; negative value in {column_name}")



@property
def metadata(self):
Expand Down
10 changes: 10 additions & 0 deletions darshan-util/pydarshan/darshan/tests/test_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -497,3 +497,13 @@ def test_heatmap_df_invalid_operation():
report = darshan.DarshanReport(log_path)
with pytest.raises(ValueError, match="invalid_op not in heatmap"):
report.heatmaps["POSIX"].to_df(ops=["invalid_op"])


@pytest.mark.parametrize("log_name, error_match", [
# see: gh-562
("sample.darshan", "STDIO_F_WRITE_TIME")
])
def test_detect_known_invalid_logs(log_name, error_match):
log_path = get_log_path(log_name)
with pytest.raises(ValueError, match=f"Invalid log file.*{error_match}"):
report = darshan.DarshanReport(log_path, strict=True)