Skip to content

Commit 8d60957

Browse files
committed
2 parents 16c8ef8 + cb1fa81 commit 8d60957

File tree

12 files changed

+450
-125
lines changed

12 files changed

+450
-125
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[![contraqctor](./assets/logo-letter.svg)](https://allenneuraldynamics.github.io/contraqctor/)
22

33
[![contraqctor](https://tinyurl.com/zf46ufwa)](https://allenneuraldynamics.github.io/contraqctor/)
4-
![CI](https://github.com/AllenNeuralDynamics/contraqctor/actions/workflows/ci.yml/badge.svg)
4+
![CI](https://github.com/AllenNeuralDynamics/contraqctor/actions/workflows/contraqctor.yml/badge.svg)
55
[![PyPI - Version](https://img.shields.io/pypi/v/contraqctor)](https://pypi.org/project/contraqctor/)
66
[![License](https://img.shields.io/badge/license-MIT-brightgreen)](LICENSE)
77
[![ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)

pyproject.toml

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,7 @@ build-backend = "uv_build"
55
[project]
66
name = "contraqctor"
77
description = "A library for managing data contracts and quality control in behavioral datasets."
8-
authors = [
9-
{ name = "Bruno Cruz", email = "[email protected]" },
10-
]
8+
authors = [{ name = "Bruno Cruz", email = "[email protected]" }]
119
requires-python = ">=3.11"
1210
license = "MIT"
1311

@@ -42,13 +40,7 @@ Changelog = "https://github.com/AllenNeuralDynamics/contraqctor/releases"
4240

4341
[dependency-groups]
4442

45-
dev = [
46-
'codespell',
47-
'pytest',
48-
'pytest-cov',
49-
'ruff',
50-
'interrogate'
51-
]
43+
dev = ['codespell', 'pytest', 'pytest-cov', 'ruff', 'interrogate']
5244

5345
docs = [
5446
'mkdocs',
@@ -81,10 +73,19 @@ testpaths = ["tests"]
8173
python_files = ["test_*.py"]
8274
python_classes = ["Test*"]
8375
python_functions = ["test_*"]
76+
env = ["MPLBACKEND=Agg"]
8477

8578
[tool.interrogate]
8679
ignore-init-method = true
8780
ignore-magic = true
8881
ignore_module = true
8982
fail-under = 100
90-
exclude = ["__init__.py", "tests", "docs", "build", "setup.py", "examples", "site"]
83+
exclude = [
84+
"__init__.py",
85+
"tests",
86+
"docs",
87+
"build",
88+
"setup.py",
89+
"examples",
90+
"site",
91+
]

src/contraqctor/_typing.py

Lines changed: 47 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,13 @@
1-
from typing import Any, Generic, Protocol, TypeAlias, TypeVar, Union, cast, final
1+
from typing import TYPE_CHECKING, Any, Generic, Protocol, TypeAlias, TypeVar, Union, cast, final
2+
3+
if TYPE_CHECKING:
4+
from contraqctor.contract.base import DataStream
5+
else:
6+
DataStream = Any # type: ignore
27

38
# Type variables
4-
TData = TypeVar("TData", bound=Union[Any, "_UnsetData"])
5-
"""TypeVar: Type variable bound to Union[Any, "_UnsetData"] for data types."""
9+
TData = TypeVar("TData", bound=Union[Any, "_UnsetData", "ErrorOnLoad"])
10+
"""TypeVar: Type variable bound to Union[Any, "_UnsetData", "ErrorOnLoad"] for data types."""
611

712
TReaderParams = TypeVar("TReaderParams", contravariant=True)
813
"""TypeVar: Contravariant type variable for reader parameters."""
@@ -157,3 +162,42 @@ def is_unset(obj: Any) -> bool:
157162
True if the object is an unset sentinel value, False otherwise.
158163
"""
159164
return (obj is UnsetReader) or (obj is UnsetParams) or (obj is UnsetData)
165+
166+
167+
@final
168+
class ErrorOnLoad:
169+
"""A class representing data that failed to load due to an error.
170+
171+
Attributes:
172+
datastream: The data stream that failed to load.
173+
error: The exception that occurred during data loading.
174+
175+
This class is used to encapsulate information about data loading failures,
176+
allowing for graceful handling of errors in data processing workflows.
177+
"""
178+
179+
def __init__(self, data_stream: "DataStream", exception: Exception | None = None):
180+
self._data_stream = data_stream
181+
self._exception = exception
182+
183+
@property
184+
def data_stream(self) -> "DataStream":
185+
"""The data stream that failed to load."""
186+
return self._data_stream
187+
188+
@property
189+
def exception(self) -> Exception | None:
190+
"""The exception that occurred during data loading, if any."""
191+
return self._exception
192+
193+
def __repr__(self):
194+
return f"<ErrorData stream={self.data_stream} error={self.exception}>"
195+
196+
def raise_from_error(self):
197+
"""Raises the stored error if it exists.
198+
199+
Raises:
200+
The stored exception if it is not None.
201+
"""
202+
if self.exception is not None:
203+
raise self.exception

src/contraqctor/contract/base.py

Lines changed: 67 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,20 @@
11
import abc
22
import dataclasses
33
import os
4-
from typing import Any, ClassVar, Dict, Generator, Generic, List, Optional, Protocol, Self, TypeVar, runtime_checkable
4+
from typing import (
5+
Any,
6+
ClassVar,
7+
Dict,
8+
Generator,
9+
Generic,
10+
List,
11+
Optional,
12+
Protocol,
13+
Self,
14+
TypeVar,
15+
cast,
16+
runtime_checkable,
17+
)
518

619
from semver import Version
720
from typing_extensions import override
@@ -201,7 +214,16 @@ def has_data(self) -> bool:
201214
Returns:
202215
bool: True if data has been loaded, False otherwise.
203216
"""
204-
return not _typing.is_unset(self._data)
217+
return not (_typing.is_unset(self._data) or self.has_error)
218+
219+
@property
220+
def has_error(self) -> bool:
221+
"""Check if the data stream encountered an error during loading.
222+
223+
Returns:
224+
bool: True if an error occurred, False otherwise.
225+
"""
226+
return isinstance(self._data, _typing.ErrorOnLoad)
205227

206228
@property
207229
def data(self) -> _typing.TData:
@@ -213,9 +235,22 @@ def data(self) -> _typing.TData:
213235
Raises:
214236
ValueError: If data has not been loaded yet.
215237
"""
238+
if self.has_error:
239+
cast(_typing.ErrorOnLoad, self._data).raise_from_error()
216240
if not self.has_data:
217241
raise ValueError("Data has not been loaded yet.")
218-
return self._data
242+
return cast(_typing.TData, self._data)
243+
244+
def clear(self) -> Self:
245+
"""Clear the loaded data from the data stream.
246+
247+
Resets the data to an unset state, allowing for reloading.
248+
249+
Returns:
250+
Self: The data stream instance for method chaining.
251+
"""
252+
self._data = _typing.UnsetData
253+
return self
219254

220255
def load(self) -> Self:
221256
"""Load data into the data stream.
@@ -239,7 +274,10 @@ def load(self) -> Self:
239274
print(f"Loaded {len(df)} rows")
240275
```
241276
"""
242-
self._data = self.read()
277+
try:
278+
self._data = self.read()
279+
except Exception as e: # pylint: disable=broad-except
280+
self._data = _typing.ErrorOnLoad(self, exception=e)
243281
return self
244282

245283
def __str__(self):
@@ -266,9 +304,27 @@ def __iter__(self) -> Generator["DataStream", None, None]:
266304
Yields:
267305
DataStream: Child data streams (none for base DataStream).
268306
"""
269-
yield
307+
return
308+
yield # This line is unreachable but needed for the generator type
309+
310+
def collect_errors(self) -> List[_typing.ErrorOnLoad]:
311+
"""Collect all errors from this stream and its children.
270312
271-
def load_all(self, strict: bool = False) -> list[tuple["DataStream", Exception], None, None]:
313+
Performs a depth-first traversal to gather all ErrorOnLoad instances.
314+
315+
Returns:
316+
List[ErrorOnLoad]: List of all errors raised on load encountered in the hierarchy.
317+
"""
318+
errors = []
319+
if self.has_error:
320+
errors.append(cast(_typing.ErrorOnLoad, self._data))
321+
for stream in self:
322+
if stream is None:
323+
continue
324+
errors.extend(stream.collect_errors())
325+
return errors
326+
327+
def load_all(self, strict: bool = False) -> Self:
272328
"""Recursively load this data stream and all child streams.
273329
274330
Performs depth-first traversal to load all streams in the hierarchy.
@@ -293,17 +349,13 @@ def load_all(self, strict: bool = False) -> list[tuple["DataStream", Exception],
293349
```
294350
"""
295351
self.load()
296-
exceptions = []
297352
for stream in self:
298353
if stream is None:
299354
continue
300-
try:
301-
exceptions += stream.load_all(strict=strict)
302-
except Exception as e:
303-
if strict:
304-
raise e
305-
exceptions.append((stream, e))
306-
return exceptions
355+
stream.load_all(strict=strict)
356+
if stream.has_error and strict:
357+
cast(_typing.ErrorOnLoad, stream.data).raise_from_error()
358+
return self
307359

308360

309361
TDataStream = TypeVar("TDataStream", bound=DataStream[Any, Any])
@@ -411,7 +463,7 @@ def at(self) -> _At[TDataStream]:
411463
return self._at
412464

413465
@override
414-
def load(self):
466+
def load(self) -> Self:
415467
"""Load data for this collection.
416468
417469
Overrides the base method to add validation that loaded data is a list of DataStreams.

0 commit comments

Comments
 (0)