diff --git a/darshan-util/pydarshan/darshan/backend/cffi_backend.py b/darshan-util/pydarshan/darshan/backend/cffi_backend.py index 22faa3407..310000b39 100644 --- a/darshan-util/pydarshan/darshan/backend/cffi_backend.py +++ b/darshan-util/pydarshan/darshan/backend/cffi_backend.py @@ -559,35 +559,16 @@ def log_get_dxt_record(log, mod_name, reads=True, writes=True, dtype='dict'): rec['write_count'] = wcnt rec['read_count'] = rcnt - rec['write_segments'] = [] - rec['read_segments'] = [] - - size_of = ffi.sizeof("struct dxt_file_record") segments = ffi.cast("struct segment_info *", buf[0] + size_of ) - - - for i in range(wcnt): - seg = { - "offset": segments[i].offset, - "length": segments[i].length, - "start_time": segments[i].start_time, - "end_time": segments[i].end_time - } - rec['write_segments'].append(seg) - - - for i in range(rcnt): - i = i + wcnt - seg = { - "offset": segments[i].offset, - "length": segments[i].length, - "start_time": segments[i].start_time, - "end_time": segments[i].end_time - } - rec['read_segments'].append(seg) - - + segments_buf = ffi.buffer(segments, (rcnt + wcnt) * 64 * 4) + segment_arr = np.frombuffer(buffer=segments_buf, + dtype=[("offset", int), + ("length", int), + ("start_time", float), + ("end_time", float)]) + rec['write_segments'] = segment_arr[:wcnt] + rec['read_segments'] = segment_arr[wcnt: rcnt + wcnt] if dtype == "pandas": rec['read_segments'] = pd.DataFrame(rec['read_segments']) rec['write_segments'] = pd.DataFrame(rec['write_segments']) diff --git a/darshan-util/pydarshan/darshan/experimental/plots/heatmap_handling.py b/darshan-util/pydarshan/darshan/experimental/plots/heatmap_handling.py index 08c49c599..09674a108 100644 --- a/darshan-util/pydarshan/darshan/experimental/plots/heatmap_handling.py +++ b/darshan-util/pydarshan/darshan/experimental/plots/heatmap_handling.py @@ -132,6 +132,10 @@ def get_rd_wr_dfs( # ignore for the same reason as above seg_df = _dict[seg_key] # type: ignore if seg_df.size: + seg_df.columns = ["offset", + "length", + "start_time", + "end_time"] # drop unused columns from the dataframe seg_df = seg_df.drop(columns=drop_columns) # create new column for the ranks diff --git a/darshan-util/pydarshan/darshan/tests/test_moddxt.py b/darshan-util/pydarshan/darshan/tests/test_moddxt.py index 5352aca4d..1cdd7bd80 100644 --- a/darshan-util/pydarshan/darshan/tests/test_moddxt.py +++ b/darshan-util/pydarshan/darshan/tests/test_moddxt.py @@ -1,6 +1,8 @@ import os import pytest +import numpy as np +from numpy.testing import assert_allclose import darshan.backend.cffi_backend as backend from darshan.log_utils import get_log_path @@ -16,24 +18,52 @@ 'hostname': 'sn176.localdomain', 'write_count': 1, 'read_count': 0, - 'write_segments': [{'offset': 0, - 'length': 40, - 'start_time': 0.10337884305045009, - 'end_time': 0.10338771319948137}], - 'read_segments': []}), + 'write_segments': np.array([(0, + 40, + 0.10337884305045009, + 0.10338771319948137)], + dtype=[("offset", int), + ("length", int), + ("start_time", float), + ("end_time", float)]), + 'read_segments': np.array([], + dtype=[("offset", int), + ("length", int), + ("start_time", float), + ("end_time", float)])}), ('DXT_MPIIO', {'id': 9457796068806373448, 'rank': 0, 'hostname': 'sn176.localdomain', 'write_count': 1, 'read_count': 0, - 'write_segments': [{'offset': 0, - 'length': 4000, - 'start_time': 0.10368914622813463, - 'end_time': 0.1053433942142874}], - 'read_segments': []})]) + 'write_segments': np.array([(0, + 4000, + 0.10368914622813463, + 0.1053433942142874)], + dtype=[("offset", int), + ("length", int), + ("start_time", float), + ("end_time", float)]), + 'read_segments': np.array([], + dtype=[("offset", int), + ("length", int), + ("start_time", float), + ("end_time", float)])})]) def test_dxt_records(logfile, mod, expected_dict): - # regression guard for DXT records values + # regression guard for DXT records values; + # write_segments and read_segments are now NumPy + # recarrays, to save considerable memory + # per gh-779 logfile = get_log_path(logfile) log = backend.log_open(logfile) rec = backend.log_get_record(log, mod) - assert rec == expected_dict + for key in expected_dict.keys(): + if "segments" in key: + # careful, can't use assert_allclose directly + # on recarrays + assert_allclose(rec[key]["offset"], expected_dict[key]["offset"]) + assert_allclose(rec[key]["length"], expected_dict[key]["length"]) + assert_allclose(rec[key]["start_time"], expected_dict[key]["start_time"]) + assert_allclose(rec[key]["end_time"], expected_dict[key]["end_time"]) + else: + assert rec[key] == expected_dict[key]