Skip to content

Commit 60f9f78

Browse files
committed
Add TracersXmf: apply new parsing strategy to tracers xmf
Similarly to the changes previously implemented for the eulerian fields, the xdmf files for tracers is now cached and parsed in an iterative way only once instead of parsed and built fully each time a new query was performed. This dramatically reduces runtime and memory footprint.
1 parent ea8e729 commit 60f9f78

File tree

3 files changed

+135
-43
lines changed

3 files changed

+135
-43
lines changed

stagpy/_step.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -447,14 +447,10 @@ def __getitem__(self, name: str) -> Optional[List[ndarray]]:
447447
self.step.sdat.filename("tra", timestep=self.step.isnap, force_legacy=True)
448448
)
449449
if data is None and self.step.sdat.hdf5:
450-
position = any(axis not in self._data for axis in "xyz")
451-
self._data.update(
452-
stagyyparsers.read_tracers_h5(
453-
self.step.sdat.hdf5 / "DataTracers.xmf",
454-
name,
455-
self.step.isnap,
456-
position,
457-
)
450+
self._data[name] = stagyyparsers.read_tracers_h5(
451+
self.step.sdat._traxmf,
452+
name,
453+
self.step.isnap,
458454
)
459455
elif data is not None:
460456
self._data.update(data)

stagpy/stagyydata.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
from . import _helpers, _step, conf, error, parfile, phyvars, stagyyparsers
2323
from ._step import Step
2424
from .datatypes import Rprof, Tseries, Vart
25-
from .stagyyparsers import FieldXmf
25+
from .stagyyparsers import FieldXmf, TracersXmf
2626

2727
if typing.TYPE_CHECKING:
2828
from os import PathLike
@@ -816,6 +816,13 @@ def _botxmf(self) -> FieldXmf:
816816
path=self.hdf5 / "DataBottom.xmf",
817817
)
818818

819+
@cached_property
820+
def _traxmf(self) -> TracersXmf:
821+
assert self.hdf5 is not None
822+
return TracersXmf(
823+
path=self.hdf5 / "DataTracers.xmf",
824+
)
825+
819826
@property
820827
def par(self) -> Namelist:
821828
"""Content of par file.

stagpy/stagyyparsers.py

Lines changed: 123 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
from functools import cached_property, partial
1515
from itertools import product
1616
from operator import itemgetter
17-
from xml.etree import ElementTree as xmlET
1817

1918
import h5py
2019
import numpy as np
@@ -1114,48 +1113,138 @@ def read_field_h5(
11141113
return (header, flds) if data_found else None
11151114

11161115

1117-
def read_tracers_h5(
1118-
xdmf_file: Path, infoname: str, snapshot: int, position: bool
1119-
) -> Dict[str, List[ndarray]]:
1116+
@dataclass(frozen=True)
1117+
class TracerSub:
1118+
file: Path
1119+
dataset: str
1120+
icore: int
1121+
iblock: int
1122+
1123+
1124+
@dataclass(frozen=True)
1125+
class XmfTracersEntry:
1126+
isnap: int
1127+
time: Optional[float]
1128+
mo_lambda: Optional[float]
1129+
mo_thick_sol: Optional[float]
1130+
yin_yang: bool
1131+
range_yin: range
1132+
range_yang: range
1133+
fields: Mapping[str, int]
1134+
1135+
def _fsub(self, path_root: Path, name: str, icore: int, yang: bool) -> TracerSub:
1136+
ifile = self.fields[name]
1137+
yy_tag = "2" if yang else "_"
1138+
ic = icore + 1
1139+
return TracerSub(
1140+
file=path_root / f"Tracers_{name}_{ifile:05d}_{ic:05d}.h5",
1141+
dataset=f"{name}{yy_tag}{ic:05d}_{self.isnap:05d}",
1142+
icore=icore,
1143+
iblock=int(yang),
1144+
)
1145+
1146+
def tra_subdomains(self, path_root: Path, name: str) -> Iterator[TracerSub]:
1147+
if name not in self.fields:
1148+
return
1149+
for icore in self.range_yin:
1150+
yield self._fsub(path_root, name, icore, False)
1151+
for icore in self.range_yang:
1152+
yield self._fsub(path_root, name, icore, True)
1153+
1154+
1155+
@dataclass(frozen=True)
1156+
class TracersXmf:
1157+
path: Path
1158+
1159+
@cached_property
1160+
def _data(self) -> Mapping[int, XmfTracersEntry]:
1161+
xs = XmlStream(filepath=self.path)
1162+
data = {}
1163+
for _ in xs.iter_tag("Time"):
1164+
time = float(xs.current.attrib["Value"])
1165+
xs.advance()
1166+
extra: dict[str, float] = {}
1167+
while xs.current.tag != "Grid":
1168+
# mo_lambda, mo_thick_sol
1169+
extra[xs.current.tag] = float(xs.current.attrib["Value"])
1170+
xs.advance()
1171+
1172+
mesh_name = xs.current.attrib["Name"]
1173+
yin_yang = mesh_name.startswith("meshYin")
1174+
i0_yin = int(mesh_name[-5:]) - 1
1175+
1176+
fields_info = {}
1177+
1178+
xs.skip_to_tag("Geometry")
1179+
with xs.load() as elt_geom:
1180+
for name, data_item in zip("zyx", elt_geom):
1181+
data_text = _try_text(xs.filepath, data_item)
1182+
h5file, group = data_text.strip().split(":/", 1)
1183+
isnap = int(group[-5:])
1184+
ifile = int(h5file[-14:-9])
1185+
fields_info[name] = ifile
1186+
1187+
while xs.current.tag == "Attribute":
1188+
with xs.load() as elt_fvar:
1189+
name = elt_fvar.attrib["Name"]
1190+
elt_data = elt_fvar[0]
1191+
data_text = _try_text(xs.filepath, elt_data)
1192+
h5file, group = data_text.strip().split(":/", 1)
1193+
isnap = int(group[-5:])
1194+
ifile = int(h5file[-14:-9])
1195+
fields_info[name] = ifile
1196+
1197+
i1_yin = i0_yin + 1
1198+
i0_yang = 0
1199+
i1_yang = 0
1200+
for _ in xs.iter_tag("Grid"):
1201+
if xs.current.attrib["GridType"] == "Collection":
1202+
break
1203+
if (name := xs.current.attrib["Name"]).startswith("meshYang"):
1204+
if i1_yang == 0:
1205+
i0_yang = int(name[-5:]) - 1
1206+
i1_yang = i0_yang + (i1_yin - i0_yin)
1207+
else:
1208+
i1_yin += 1
1209+
xs.drop()
1210+
1211+
data[isnap] = XmfTracersEntry(
1212+
isnap=isnap,
1213+
time=time,
1214+
mo_lambda=extra.get("mo_lambda"),
1215+
mo_thick_sol=extra.get("mo_thick_sol"),
1216+
yin_yang=yin_yang,
1217+
range_yin=range(i0_yin, i1_yin),
1218+
range_yang=range(i0_yang, i1_yang),
1219+
fields=fields_info,
1220+
)
1221+
return data
1222+
1223+
def __getitem__(self, isnap: int) -> XmfTracersEntry:
1224+
try:
1225+
return self._data[isnap]
1226+
except KeyError:
1227+
raise ParsingError(self.path, f"no data for snapshot {isnap}")
1228+
1229+
1230+
def read_tracers_h5(xdmf: TracersXmf, infoname: str, snapshot: int) -> list[NDArray]:
11201231
"""Extract tracers data from hdf5 files.
11211232
11221233
Args:
11231234
xdmf_file: path of the xdmf file.
11241235
infoname: name of information to extract.
11251236
snapshot: snapshot number.
1126-
position: whether to extract position of tracers.
11271237
Returns:
11281238
Tracers data organized by attribute and block.
11291239
"""
1130-
xdmf_root = xmlET.parse(str(xdmf_file)).getroot()
1131-
tra: Dict[str, List[Dict[int, ndarray]]] = {}
1132-
tra[infoname] = [{}, {}] # two blocks, ordered by cores
1133-
if position:
1134-
for axis in "xyz":
1135-
tra[axis] = [{}, {}]
1136-
for elt_subdomain in xdmf_root[0][0][snapshot].findall("Grid"):
1137-
elt_name = _try_get(xdmf_file, elt_subdomain, "Name")
1138-
ibk = int(elt_name.startswith("meshYang"))
1139-
if position:
1140-
for data_attr in elt_subdomain.findall("Geometry"):
1141-
for data_item, axis in zip(data_attr.findall("DataItem"), "xyz"):
1142-
icore, data = _get_field(xdmf_file, data_item)
1143-
tra[axis][ibk][icore] = data
1144-
for data_attr in elt_subdomain.findall("Attribute"):
1145-
if data_attr.get("Name") != infoname:
1146-
continue
1147-
icore, data = _get_field(
1148-
xdmf_file, _try_find(xdmf_file, data_attr, "DataItem")
1149-
)
1150-
tra[infoname][ibk][icore] = data
1151-
tra_concat: Dict[str, List[ndarray]] = {}
1152-
for info in tra:
1153-
tra[info] = [trab for trab in tra[info] if trab] # remove empty blocks
1154-
tra_concat[info] = []
1155-
for trab in tra[info]:
1156-
tra_concat[info].append(
1157-
np.concatenate([trab[icore] for icore in range(len(trab))])
1158-
)
1240+
tra: list[list[NDArray]] = [[], []] # [block][core]
1241+
for tsub in xdmf[snapshot].tra_subdomains(xdmf.path.parent, infoname):
1242+
tra[tsub.iblock].append(_read_group_h5(tsub.file, tsub.dataset))
1243+
1244+
tra_concat: list[NDArray] = []
1245+
for trab in tra:
1246+
if trab:
1247+
tra_concat.append(np.concatenate(trab))
11591248
return tra_concat
11601249

11611250

0 commit comments

Comments
 (0)