Skip to content

Commit 662e617

Browse files
authored
Merge pull request #364 from EIT-ALIVE/data_access
Add data access object to sequences
2 parents debe0dc + c273dc8 commit 662e617

File tree

4 files changed

+355
-7
lines changed

4 files changed

+355
-7
lines changed

eitprocessing/datahandling/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@
1010
class DataContainer(Equivalence):
1111
"""Base class for data container classes."""
1212

13+
def __bool__(self):
14+
return True
15+
1316
def deepcopy(self) -> Self:
1417
"""Return a deep copy of the object."""
1518
return deepcopy(self)

eitprocessing/datahandling/mixins/slicing.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ def select_by_index(
6262
return self._sliced_copy(start_index=start, end_index=end, newlabel=newlabel)
6363

6464
@abstractmethod
65-
def __len__(self): ...
65+
def __len__(self) -> int: ...
6666

6767
@abstractmethod
6868
def _sliced_copy(

eitprocessing/datahandling/sequence.py

Lines changed: 176 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
from __future__ import annotations
22

3-
from dataclasses import dataclass, field
4-
from typing import TYPE_CHECKING
3+
import itertools
4+
import sys
5+
from dataclasses import MISSING, dataclass, field
6+
from typing import TYPE_CHECKING, Any, TypeVar, overload
57

68
from eitprocessing.datahandling.continuousdata import ContinuousData
79
from eitprocessing.datahandling.datacollection import DataCollection
@@ -12,20 +14,25 @@
1214
from eitprocessing.datahandling.sparsedata import SparseData
1315

1416
if TYPE_CHECKING:
17+
from collections.abc import Iterator
18+
1519
import numpy as np
1620
from typing_extensions import Self
1721

22+
from eitprocessing.parameters import DataContainer
23+
24+
T = TypeVar("T", bound=Any)
25+
1826

1927
@dataclass(eq=False)
2028
class Sequence(Equivalence, SelectByTime):
2129
"""Sequence of timepoints containing respiratory data.
2230
2331
A Sequence object is a representation of data points over time. These data can consist of any combination of EIT
2432
frames (`EITData`), waveform data (`ContinuousData`) from different sources, or individual events (`SparseData`)
25-
occurring at any given timepoint.
26-
A Sequence can consist of an entire measurement, a section of a measurement, a single breath, or even a portion of a
27-
breath. A Sequence can consist of multiple sets of each type of data from the same time-points or can be a single
28-
measurement from just one source.
33+
occurring at any given timepoint. A Sequence can consist of an entire measurement, a section of a measurement, a
34+
single breath, or even a portion of a breath. A Sequence can consist of multiple sets of each type of data from the
35+
same time-points or can be a single measurement from just one source.
2936
3037
A Sequence can be split up into separate sections of a measurement or multiple (similar) Sequence objects can be
3138
merged together to form a single Sequence.
@@ -167,3 +174,166 @@ def select_by_time(
167174
for key in ("eit_data", "continuous_data", "sparse_data", "interval_data")
168175
},
169176
)
177+
178+
@property
179+
def data(self) -> _DataAccess:
180+
"""Shortcut access to data stored in collections inside a sequence.
181+
182+
This allows all data objects stored in a collection inside a sequence to be accessed.
183+
Instead of `sequence.continuous_data["global_impedance"]` you can use
184+
`sequence.data["global_impedance"]`. This works for getting (`sequence.data["label"]` or
185+
`sequence.data.get("label")`) and adding data (`sequence.data["label"] = obj` or
186+
`sequence.data.add(obj)`).
187+
188+
Other dict-like behaviour is also supported:
189+
- `label in sequence.data` to check whether an object with a label exists;
190+
- `del sequence.data[label]` to remove an object from the sequence based on the label;
191+
- `for label in sequence.data` to iterate over the labels;
192+
- `sequence.data.items()` to retrieve a list of (label, object) pairs, especially useful for iteration;
193+
- `sequence.data.labels()` or `sequence.data.keys()` to get a list of data labels;
194+
- `sequence.data.objects()` or `sequence.data.values()` to get a list of data objects.
195+
196+
This interface only works if the labels are unique among the data collections. An attempt
197+
to add a data object with an exiting label will result in a KeyError.
198+
"""
199+
return _DataAccess(self)
200+
201+
202+
@dataclass
203+
class _DataAccess:
204+
_sequence: Sequence
205+
206+
def __post_init__(self):
207+
for a, b in itertools.combinations(self._collections, 2):
208+
if duplicates := set(a) & set(b):
209+
msg = f"Duplicate labels ({', '.join(sorted(duplicates))}) found in {a} and {b}."
210+
exc = KeyError(msg)
211+
if sys.version_info >= (3, 11):
212+
exc.add_note(
213+
"You can't use the `data` interface with duplicate labels. "
214+
"Use the explicit data collections (`eit_data`, `continuous_data`, `sparse_data`, "
215+
"`interval_data`) instead."
216+
)
217+
raise exc
218+
219+
@property
220+
def _collections(self) -> tuple[DataCollection, ...]:
221+
return (
222+
self._sequence.continuous_data,
223+
self._sequence.interval_data,
224+
self._sequence.sparse_data,
225+
self._sequence.eit_data,
226+
)
227+
228+
@overload
229+
def get(self, label: str) -> DataContainer: ...
230+
231+
@overload
232+
def get(self, label: str, default: T) -> DataContainer | T: ...
233+
234+
def get(self, label: str, default: object = MISSING) -> DataContainer | object:
235+
"""Get a DataContainer object by label.
236+
237+
Example:
238+
```
239+
if filtered_data := sequence.data.get("filtered data", None):
240+
print(filtered_data.values.mean())
241+
else:
242+
print("No filtered data was found.")
243+
244+
```
245+
246+
Args:
247+
label (str): label of the object to retrieve.
248+
default (optional): a default value that is returned if the object is not found.
249+
Defaults to MISSING.
250+
251+
Raises:
252+
KeyError: if the object is not found, and no default was set.
253+
254+
Returns:
255+
DataContainer: the requested DataContainer.
256+
"""
257+
for collection in self._collections:
258+
if label in collection:
259+
return collection[label]
260+
261+
if default is not MISSING:
262+
return default
263+
264+
msg = f"No object with label {label} was found."
265+
raise KeyError(msg)
266+
267+
def __getitem__(self, key: str) -> DataContainer:
268+
return self.get(key)
269+
270+
def add(self, *obj: DataContainer) -> None:
271+
"""Add a DataContainer object to the sequence.
272+
273+
Adds the object to the appropriate data collection. The label of the object must be unique
274+
among all data collections, otherwise a KeyError is raised.
275+
276+
Args:
277+
obj (DataContainer): the object to add to the Sequence.
278+
279+
Raises:
280+
KeyError: if the label of the object already exists in any of the data collections.
281+
"""
282+
for object_ in obj:
283+
if self.get(object_.label, None):
284+
msg = f"An object with the label {object_.label} already exists in this sequence."
285+
exc = KeyError(msg)
286+
if sys.version_info >= (3, 11):
287+
exc.add_note(
288+
"You can't add an object with the same label through the `data` interface. "
289+
"Use the explicit data collections (`eit_data`, `continuous_data`, `sparse_data`, "
290+
"`interval_data`) instead."
291+
)
292+
raise exc
293+
294+
match object_:
295+
case ContinuousData():
296+
self._sequence.continuous_data.add(object_)
297+
case IntervalData():
298+
self._sequence.interval_data.add(object_)
299+
case SparseData():
300+
self._sequence.sparse_data.add(object_)
301+
case EITData():
302+
self._sequence.eit_data.add(object_)
303+
304+
def __setitem__(self, label: str, obj: DataContainer):
305+
if obj.label != label:
306+
msg = f"Label {label} does not match object label {obj.label}."
307+
raise KeyError(msg)
308+
return self.add(obj)
309+
310+
def __contains__(self, label: str) -> bool:
311+
return any(label in container for container in self._collections)
312+
313+
def __delitem__(self, label: str) -> None:
314+
for container in self._collections:
315+
if label in container:
316+
del container[label]
317+
return
318+
319+
msg = f"Object with label {label} was not found."
320+
raise KeyError(msg)
321+
322+
def __iter__(self) -> Iterator[str]:
323+
return itertools.chain(*[collection.keys() for collection in self._collections])
324+
325+
def items(self) -> list[tuple[str, DataContainer]]:
326+
"""Return all data items (`(label, object)` pairs)."""
327+
return list(itertools.chain(*[collection.items() for collection in self._collections]))
328+
329+
def keys(self) -> list[str]:
330+
"""Return a list of all labels."""
331+
return list(self.__iter__())
332+
333+
labels = keys
334+
335+
def values(self) -> list[DataContainer]:
336+
"""Return all data objects."""
337+
return list(itertools.chain(*[collection.values() for collection in self._collections]))
338+
339+
objects = values

0 commit comments

Comments
 (0)