Skip to content

Commit fb4f7b4

Browse files
authored
Merge branch 'main' into pipeline_with_processsteps
2 parents 4e4eeea + 09def97 commit fb4f7b4

25 files changed

+649
-132
lines changed

.flake8

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[flake8]
2-
ignore = E203, E266, E501, W503, F403, F401, F405
2+
ignore = E203, E266, E501, W503, F403, F401, F405, F821
33
max-line-length = 120
44
max-complexity = 18
55
select = C,E,F,W,B,B950

LICENSE.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,4 @@ Redistribution and use in source and binary forms, with or without modification,
88

99
3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
1010

11-
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
11+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

README.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,8 @@ https://BAMresearch.github.io/modacor
6666
Development
6767
===========
6868

69-
For coding contributions, we strongly recommend:
70-
- using flake8 and/or black for consistent formatting.
69+
For coding contributions, we strongly recommend:
70+
- using flake8 and/or black for consistent formatting.
7171
- writing tests for every added functionality -> towards test-driven coding practices.
7272

7373
To run all the tests run::

ci/update.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -102,11 +102,7 @@ def main():
102102
cov_report_path=project_meta["tool"]["coverage"]["report"]["path"],
103103
# Python version to use for general tasks: docs (when tox did not set one)
104104
py_ver=".".join(sys.version.split(".")[:2]),
105-
pypi_token=(
106-
"_".join(pypi_host + ["token"]).upper()
107-
if len(pypi_host)
108-
else "TEST_PYPI_TOKEN"
109-
),
105+
pypi_token=("_".join(pypi_host + ["token"]).upper() if len(pypi_host) else "TEST_PYPI_TOKEN"),
110106
pypi_repo="".join(pypi_host) if len(pypi_host) else "testpypi",
111107
)
112108
)

docs/conf.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,7 @@
3636
release = version
3737
commit_id = None
3838
try:
39-
commit_id = (
40-
subprocess.check_output(["git", "rev-parse", "--short", "HEAD"]).strip().decode("ascii")
41-
)
39+
commit_id = subprocess.check_output(["git", "rev-parse", "--short", "HEAD"]).strip().decode("ascii")
4240
except subprocess.CalledProcessError as e:
4341
print(e)
4442

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,12 +99,12 @@ exclude_commit_patterns = ["chore", ".*\\bGHA\\b.*", ".*\\b[gG][hH] actions?\\b.
9999
upload_to_vcs_release = false
100100

101101
[tool.black]
102-
line-length = 100
102+
line-length = 120
103103
preview = true
104104

105105
[tool.isort]
106106
profile = "black"
107-
line_length = 100
107+
line_length = 120
108108
group_by_package = true
109109
known_first_party = "modacor"
110110
ensure_newline_before_comments = true

src/modacor/dataclasses/basedata.py

Lines changed: 21 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
1+
__all__ = ["BaseData"]
2+
13
# import tiled
24
# import tiled.client
35
import logging
46
from typing import Dict, List, Optional, Self
57

68
import numpy as np
7-
import pint
89
from attrs import define, field
910
from attrs import validators as v
1011

12+
from modacor import ureg
13+
1114
logger = logging.getLogger(__name__)
1215

1316

@@ -20,8 +23,7 @@ def validate_rank_of_data(instance, attribute, value):
2023
# This assumes that signal is provided and is a valid numpy array.
2124
if instance.signal is not None and value > instance.signal.ndim:
2225
raise ValueError(
23-
f"{attribute.name} ({value}) cannot exceed the dimensionality of signal "
24-
f"(ndim={instance.signal.ndim})."
26+
f"{attribute.name} ({value}) cannot exceed the dimensionality of signal (ndim={instance.signal.ndim})."
2527
)
2628

2729

@@ -32,13 +34,8 @@ class BaseData:
3234
It is designed to be used as a base class for more specialized data classes.
3335
"""
3436

35-
# Unit information using Pint units - required input (ingest, internal, and display)
36-
ingest_units: pint.Unit = field(validator=v.instance_of(pint.Unit))
37-
internal_units: pint.Unit = field(validator=v.instance_of(pint.Unit))
38-
display_units: pint.Unit = field(validator=v.instance_of(pint.Unit))
39-
4037
# Core data array stored as an xarray DataArray
41-
signal: np.ndarray = field(factory=np.ndarray, validator=[v.instance_of(np.ndarray)])
38+
signal: np.ndarray = field(default=np.array(()), validator=[v.instance_of(np.ndarray)])
4239

4340
# Dict of variances represented as xarray DataArray objects; defaulting to an empty dict
4441
variances: Dict[str, np.ndarray] = field(factory=dict, validator=[v.instance_of(dict)])
@@ -50,24 +47,30 @@ class BaseData:
5047
rank_of_data: int = field(factory=int, validator=[v.instance_of(int), validate_rank_of_data])
5148

5249
# Scalers to put on the denominator, sparated from the array for distinct uncertainty
53-
normalization: Optional[np.ndarray] = field(
54-
default=None, validator=v.optional(v.instance_of(np.ndarray))
55-
)
50+
normalization: Optional[np.ndarray] = field(default=None, validator=v.optional(v.instance_of(np.ndarray)))
5651
normalization_factor: float = field(default=1.0, validator=v.instance_of(float))
5752
normalization_factor_variance: float = field(default=0.0, validator=v.instance_of(float))
58-
normalization_units: pint.Unit = field(
59-
default=pint.Unit("dimensionless"), validator=v.instance_of(pint.Unit)
53+
# Unit information using Pint units - required input (ingest, internal, and display)
54+
signal_units: ureg.Unit = field(
55+
default=ureg.Unit("dimensionless"), validator=v.instance_of(ureg.Unit)
56+
)
57+
58+
normalization_units: ureg.Unit = field(
59+
default=ureg.Unit("dimensionless"), validator=v.instance_of(ureg.Unit)
6060
)
61-
normalization_factor_units: pint.Unit = field(
62-
default=pint.Unit("dimensionless"), validator=v.instance_of(pint.Unit)
61+
normalization_factor_units: ureg.Unit = field(
62+
default=ureg.Unit("dimensionless"), validator=v.instance_of(ureg.Unit)
6363
)
6464
# array with some normalization (exposure time, solid-angle ....)
6565

66+
@property
67+
def shape(self):
68+
return self.signal.shape
69+
6670
def __attrs_post_init__(self):
6771
if self.normalization is None:
68-
self.normalization = np.ones(self.signal.shape)
72+
self.normalization = np.ones(self.shape)
6973

70-
@property
7174
def mean(self) -> np.ndarray:
7275
"""
7376
Returns the signal array with the normalization applied.
@@ -82,13 +85,6 @@ def std(self, kind) -> np.ndarray:
8285
"""
8386
return np.sqrt(self.variances[kind] / self.normalization)
8487

85-
def sem(self, kind) -> np.ndarray:
86-
"""
87-
Returns the uncertainties, i.e. standard deviation
88-
The result is cast to internal units.
89-
"""
90-
return np.sqrt(self.variances[kind]) / self.normalization
91-
9288
@property
9389
def _unit_scale(self, display_units) -> float:
9490
return (1 * self.internal_units).to(display_units).magnitude

src/modacor/dataclasses/integrated_data.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,6 @@ class IntegratedData(BaseData):
2424
\left( \frac{\sigma_A^2}{\mu_A^2} + \frac{\sigma_B^2}{\mu_B^2} \right)
2525
```
2626
"""
27-
28-
average: np.ndarray = field(factory=np.ndarray, validator=[v.instance_of(np.ndarray)])
2927
std: Dict[str, np.ndarray] = field(factory=dict, validator=[v.instance_of(dict)])
3028
sem: Dict[str, np.ndarray] = field(factory=dict, validator=[v.instance_of(dict)])
3129
# Core data array stored as an xarray DataArray
@@ -36,6 +34,4 @@ class IntegratedData(BaseData):
3634

3735
# array with some normalization (exposure time, solid-angle ....)
3836
sum_normalization: np.ndarray = field(factory=np.ndarray, validator=[v.instance_of(np.ndarray)])
39-
sum_normalization_squared: np.ndarray = field(
40-
factory=np.ndarray, validator=[v.instance_of(np.ndarray)]
41-
)
37+
sum_normalization_squared: np.ndarray = field(factory=np.ndarray, validator=[v.instance_of(np.ndarray)])

src/modacor/dataclasses/process_step.py

Lines changed: 96 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,26 @@
11
# SPDX-License-Identifier: BSD-3-Clause
2+
# Copyright 2025 MoDaCor Authors
3+
#
4+
# Redistribution and use in source and binary forms, with or without modification,
5+
# are permitted provided that the following conditions are met:
6+
# 1. Redistributions of source code must retain the above copyright notice, this
7+
# list of conditions and the following disclaimer.
8+
# 2. Redistributions in binary form must reproduce the above copyright notice,
9+
# this list of conditions and the following disclaimer in the documentation
10+
# and/or other materials provided with the distribution.
11+
# 3. Neither the name of the copyright holder nor the names of its contributors
12+
# may be used to endorse or promote products derived from this software without
13+
# specific prior written permission.
14+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND
15+
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16+
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17+
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
18+
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19+
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
20+
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
21+
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
23+
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
224

325

426
__all__ = ["ProcessStep"]
@@ -9,7 +31,7 @@
931
from abc import abstractmethod
1032
from numbers import Integral
1133
from pathlib import Path
12-
from typing import Any
34+
from typing import Any, Iterable, Type
1335

1436
from attrs import define, field
1537
from attrs import validators as v
@@ -18,13 +40,31 @@
1840
from .databundle import DataBundle
1941
from .messagehandler import MessageHandler
2042
from .process_step_describer import ProcessStepDescriber
43+
from .processing_data import ProcessingData
2144
from .validators import is_list_of_ints
2245

2346

2447
@define
2548
class ProcessStep:
2649
"""A base class defining a processing step"""
2750

51+
# Class attributes for the process step
52+
CONFIG_KEYS = {
53+
"with_processing_keys": {
54+
"type": str,
55+
"allow_iterable": True,
56+
"allow_none": True,
57+
"default": None,
58+
},
59+
"output_processing_key": {
60+
"type": str,
61+
"allow_iterable": False,
62+
"allow_none": True,
63+
"default": None,
64+
},
65+
}
66+
67+
# The configuration keys for the process step instantiation
2868
io_sources: IoSources = field()
2969

3070
# class attribute for a machine-readable description of the process step
@@ -36,7 +76,10 @@ class ProcessStep:
3676
)
3777

3878
# dynamic instance configuration
39-
configuration: dict = field(factory=dict, validator=v.instance_of(dict))
79+
configuration: dict = field(
80+
factory=dict,
81+
validator=lambda inst, attrs, val: inst.is_process_step_dict,
82+
)
4083

4184
# flags and attributes for running the pipeline
4285
requires_steps: list[int] = field(factory=list, validator=is_list_of_ints)
@@ -48,12 +91,19 @@ class ProcessStep:
4891

4992
# a message handler, supporting logging, warnings, errors, etc. emitted by the process
5093
# during execution
51-
message_handler: MessageHandler = field(
52-
default=MessageHandler(), validator=v.instance_of(MessageHandler)
53-
)
94+
message_handler: MessageHandler = field(default=MessageHandler(), validator=v.instance_of(MessageHandler))
5495

5596
# internal variables:
5697
__prepared: bool = field(default=False, validator=v.instance_of(bool))
98+
processing_data: ProcessingData = field(
99+
default=None, validator=v.optional(v.instance_of(ProcessingData))
100+
)
101+
102+
def __attrs_post_init__(self):
103+
"""
104+
Post-initialization method to set up the process step.
105+
"""
106+
self.configuration = self.default_config()
57107

58108
# add hash function. equality can be checked
59109
def __hash__(self):
@@ -68,30 +118,24 @@ def prepare_execution(self):
68118
"""
69119
pass
70120

71-
def can_execute(self, input_field_names: list[str]) -> bool:
72-
"""
73-
Check if the process step can be executed
74-
75-
The default implementation always returns True and any ProcessStep
76-
that has dependency checks should override this method.
77-
"""
78-
return True
79-
80121
@abstractmethod
81-
def calculate(self, data: DataBundle, **kwargs: Any) -> dict[str, Any]:
122+
def calculate(self) -> dict[str, DataBundle]:
82123
"""Calculate the process step on the given data"""
83124
raise NotImplementedError("Subclasses must implement this method")
84125

85-
def execute(self, data: DataBundle, **kwargs: Any) -> DataBundle:
126+
def execute(self, data: ProcessingData) -> None:
86127
"""Execute the process step on the given data"""
128+
self.processing_data = data
87129
if not self.__prepared:
88130
self.prepare_execution()
89131
self.__prepared = True
90-
self.produced_outputs = self.calculate(data, **kwargs)
132+
self.produced_outputs = self.calculate()
91133
for _key, value in self.produced_outputs.items():
92-
data[_key] = value
134+
if _key in data:
135+
data[_key].update(value)
136+
else:
137+
data[_key] = value
93138
self.executed = True
94-
return data
95139

96140
def reset(self):
97141
"""Reset the process step to its initial state"""
@@ -106,3 +150,36 @@ def modify_config(self, key: str, value: Any):
106150
else:
107151
raise KeyError(f"Key {key} not found in configuration") # noqa
108152
self.__prepared = False
153+
154+
@classmethod
155+
def is_process_step_dict(cls, instance: Type | None, attribute: str | None, item: Any) -> bool:
156+
"""
157+
Check if the value is a dictionary with the correct keys and types.
158+
"""
159+
if not isinstance(item, dict):
160+
return False
161+
for _key, _value in item.items():
162+
if _key not in cls.CONFIG_KEYS:
163+
return False
164+
_config = cls.CONFIG_KEYS[_key]
165+
if _value is None:
166+
if _config["allow_none"]:
167+
continue
168+
return False
169+
if isinstance(_value, Iterable) and not isinstance(_value, str):
170+
if not (
171+
_config["allow_iterable"]
172+
and all([isinstance(_i, _config["type"]) for _i in _value])
173+
):
174+
return False
175+
continue
176+
if not isinstance(_value, _config["type"]):
177+
return False
178+
return True
179+
180+
@classmethod
181+
def default_config(cls) -> dict[str, Any]:
182+
"""
183+
Create an initial dictionary for the process step configuration.
184+
"""
185+
return {_k: _v["default"] for _k, _v in cls.CONFIG_KEYS.items()}

src/modacor/dataclasses/process_step_describer.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,17 +33,13 @@ def validate_required_data_keys(instance, attribute, value):
3333
@define
3434
class ProcessStepDescriber:
3535
calling_name: str = field() # short name to identify the calling process for the UI
36-
calling_id: str = (
37-
field()
38-
) # not sure what we were planning here. some UID perhaps? difference with calling_module
36+
calling_id: str = field() # not sure what we were planning here. some UID perhaps? difference with calling_module
3937
calling_module_path: Path = field(
4038
validator=v.instance_of(Path)
4139
) # partial path to the module from src/modacor/modules onwards
4240
calling_version: str = field() # module version being executed
4341
required_data_keys: list[str] = field(factory=list) # list of data keys required by the process
44-
required_arguments: list[str] = field(
45-
factory=list
46-
) # list of argument key-val combos required by the process
42+
required_arguments: list[str] = field(factory=list) # list of argument key-val combos required by the process
4743
calling_arguments: dict[str, Any] = field(factory=dict, validator=validate_required_keys)
4844
works_on: dict[str, list] = field(
4945
factory=dict, validator=v.instance_of(dict)

0 commit comments

Comments
 (0)