BAMresearch · toqduj · May 23, 2025 · May 22, 2025 · May 22, 2025 · May 22, 2025
diff --git a/.flake8 b/.flake8
@@ -1,5 +1,5 @@
 [flake8]
-ignore = E203, E266, E501, W503, F403, F401, F405
+ignore = E203, E266, E501, W503, F403, F401, F405, F821
 max-line-length = 120
 max-complexity = 18
 select = C,E,F,W,B,B950
diff --git a/LICENSE.txt b/LICENSE.txt
@@ -8,4 +8,4 @@ Redistribution and use in source and binary forms, with or without modification,
 
 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
 
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/README.rst b/README.rst
@@ -66,8 +66,8 @@ https://BAMresearch.github.io/modacor
 Development
 ===========
 
-For coding contributions, we strongly recommend: 
-  - using flake8 and/or black for consistent formatting. 
+For coding contributions, we strongly recommend:
+  - using flake8 and/or black for consistent formatting.
   - writing tests for every added functionality -> towards test-driven coding practices.
 
 To run all the tests run::

diff --git a/ci/update.py b/ci/update.py
@@ -102,11 +102,7 @@ def main():
                     cov_report_path=project_meta["tool"]["coverage"]["report"]["path"],
                     # Python version to use for general tasks: docs (when tox did not set one)
                     py_ver=".".join(sys.version.split(".")[:2]),
-                    pypi_token=(
-                        "_".join(pypi_host + ["token"]).upper()
-                        if len(pypi_host)
-                        else "TEST_PYPI_TOKEN"
-                    ),
+                    pypi_token=("_".join(pypi_host + ["token"]).upper() if len(pypi_host) else "TEST_PYPI_TOKEN"),
                     pypi_repo="".join(pypi_host) if len(pypi_host) else "testpypi",
                 )
             )

diff --git a/docs/conf.py b/docs/conf.py
@@ -36,9 +36,7 @@
 release = version
 commit_id = None
 try:
-    commit_id = (
-        subprocess.check_output(["git", "rev-parse", "--short", "HEAD"]).strip().decode("ascii")
-    )
+    commit_id = subprocess.check_output(["git", "rev-parse", "--short", "HEAD"]).strip().decode("ascii")
 except subprocess.CalledProcessError as e:
     print(e)
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -99,12 +99,12 @@ exclude_commit_patterns = ["chore", ".*\\bGHA\\b.*", ".*\\b[gG][hH] actions?\\b.
 upload_to_vcs_release = false
 
 [tool.black]
-line-length = 100
+line-length = 120
 preview = true
 
 [tool.isort]
 profile = "black"
-line_length = 100
+line_length = 120
 group_by_package = true
 known_first_party = "modacor"
 ensure_newline_before_comments = true

diff --git a/src/modacor/dataclasses/basedata.py b/src/modacor/dataclasses/basedata.py
@@ -23,8 +23,7 @@ def validate_rank_of_data(instance, attribute, value):
     # This assumes that signal is provided and is a valid numpy array.
     if instance.signal is not None and value > instance.signal.ndim:
         raise ValueError(
-            f"{attribute.name} ({value}) cannot exceed the dimensionality of signal "
-            f"(ndim={instance.signal.ndim})."
+            f"{attribute.name} ({value}) cannot exceed the dimensionality of signal (ndim={instance.signal.ndim})."
         )
 
 
@@ -48,9 +47,7 @@ class BaseData:
     rank_of_data: int = field(factory=int, validator=[v.instance_of(int), validate_rank_of_data])
 
     # Scalers to put on the denominator, sparated from the array for distinct uncertainty
-    normalization: Optional[np.ndarray] = field(
-        default=None, validator=v.optional(v.instance_of(np.ndarray))
-    )
+    normalization: Optional[np.ndarray] = field(default=None, validator=v.optional(v.instance_of(np.ndarray)))
     normalization_factor: float = field(default=1.0, validator=v.instance_of(float))
     normalization_factor_variance: float = field(default=0.0, validator=v.instance_of(float))
     # Unit information using Pint units - required input (ingest, internal, and display)
@@ -66,11 +63,14 @@ class BaseData:
     )
     # array with some normalization (exposure time, solid-angle ....)
 
+    @property
+    def shape(self):
+        return self.signal.shape
+
     def __attrs_post_init__(self):
         if self.normalization is None:
-            self.normalization = np.ones(self.signal.shape)
+            self.normalization = np.ones(self.shape)
 
-    @property
     def mean(self) -> np.ndarray:
         """
         Returns the signal array with the normalization applied.
@@ -85,13 +85,6 @@ def std(self, kind) -> np.ndarray:
         """
         return np.sqrt(self.variances[kind] / self.normalization)
 
-    def sem(self, kind) -> np.ndarray:
-        """
-        Returns the uncertainties, i.e. standard deviation
-        The result is cast to internal units.
-        """
-        return np.sqrt(self.variances[kind]) / self.normalization
-
     @property
     def _unit_scale(self, display_units) -> float:
         return (1 * self.internal_units).to(display_units).magnitude

diff --git a/src/modacor/dataclasses/integrated_data.py b/src/modacor/dataclasses/integrated_data.py
@@ -24,8 +24,6 @@ class IntegratedData(BaseData):
     \left( \frac{\sigma_A^2}{\mu_A^2} + \frac{\sigma_B^2}{\mu_B^2} \right)
     ```
     """
-
-    average: np.ndarray = field(factory=np.ndarray, validator=[v.instance_of(np.ndarray)])
     std: Dict[str, np.ndarray] = field(factory=dict, validator=[v.instance_of(dict)])
     sem: Dict[str, np.ndarray] = field(factory=dict, validator=[v.instance_of(dict)])
     # Core data array stored as an xarray DataArray
@@ -36,6 +34,4 @@ class IntegratedData(BaseData):
 
     # array with some normalization (exposure time, solid-angle ....)
     sum_normalization: np.ndarray = field(factory=np.ndarray, validator=[v.instance_of(np.ndarray)])
-    sum_normalization_squared: np.ndarray = field(
-        factory=np.ndarray, validator=[v.instance_of(np.ndarray)]
-    )
+    sum_normalization_squared: np.ndarray = field(factory=np.ndarray, validator=[v.instance_of(np.ndarray)])
diff --git a/src/modacor/dataclasses/process_step.py b/src/modacor/dataclasses/process_step.py
@@ -91,9 +91,7 @@ class ProcessStep:
 
     # a message handler, supporting logging, warnings, errors, etc. emitted by the process
     # during execution
-    message_handler: MessageHandler = field(
-        default=MessageHandler(), validator=v.instance_of(MessageHandler)
-    )
+    message_handler: MessageHandler = field(default=MessageHandler(), validator=v.instance_of(MessageHandler))
 
     # internal variables:
     __prepared: bool = field(default=False, validator=v.instance_of(bool))

diff --git a/src/modacor/dataclasses/process_step_describer.py b/src/modacor/dataclasses/process_step_describer.py
@@ -33,17 +33,13 @@ def validate_required_data_keys(instance, attribute, value):
 @define
 class ProcessStepDescriber:
     calling_name: str = field()  # short name to identify the calling process for the UI
-    calling_id: str = (
-        field()
-    )  # not sure what we were planning here. some UID perhaps? difference with calling_module
+    calling_id: str = field()  # not sure what we were planning here. some UID perhaps? difference with calling_module
     calling_module_path: Path = field(
         validator=v.instance_of(Path)
     )  # partial path to the module from src/modacor/modules onwards
     calling_version: str = field()  # module version being executed
     required_data_keys: list[str] = field(factory=list)  # list of data keys required by the process
-    required_arguments: list[str] = field(
-        factory=list
-    )  # list of argument key-val combos required by the process
+    required_arguments: list[str] = field(factory=list)  # list of argument key-val combos required by the process
     calling_arguments: dict[str, Any] = field(factory=dict, validator=validate_required_keys)
     works_on: dict[str, list] = field(
         factory=dict, validator=v.instance_of(dict)

diff --git a/src/modacor/io/io_registry.py b/src/modacor/io/io_registry.py
@@ -41,9 +41,7 @@ def register_as_io_source(cls):
         raise TypeError("The class must be a subclass of IoSource to be registered.")
     type_ref = getattr(cls, "type_reference", None)
     if not isinstance(type_ref, str):
-        raise AttributeError(
-            "The class must have a class attribute 'type_reference' of type string."
-        )
+        raise AttributeError("The class must have a class attribute 'type_reference' of type string.")
     if type_ref in IoRegistry:
         raise ValueError(f"Class with type_reference '{type_ref}' is already registered.")
     IoRegistry[type_ref] = cls

diff --git a/src/modacor/modules/base_modules/azimuthal_integration.py b/src/modacor/modules/base_modules/azimuthal_integration.py
@@ -1,23 +1,23 @@
 from __future__ import annotations
 
-__author__ = "Jerome Kieffer"
+__author__ = "Jérôme Kieffer"
 __copyright__ = "MoDaCor team"
 __license__ = "BSD3"
-__date__ = "21/05/2025"
+__date__ = "23/05/2025"
 
 import warnings
 from pathlib import Path
 
 import numpy as np
 
+# import pint
+from scipy.sparse import csc_matrix
+
 from ...dataclasses.integrated_data import IntegratedData
 from ...dataclasses.process_step import ProcessStep
 from ...dataclasses.process_step_describer import ProcessStepDescriber
 from ...dataclasses.validators import check_data
 
-# import pint
-# from scipy.sparse import csc_matrix
-
 
 class AzimuthalIntegration(ProcessStep):
     """
@@ -30,13 +30,11 @@ class AzimuthalIntegration(ProcessStep):
         calling_module_path=Path(__file__),
         calling_version="0.0.1",
         required_data_keys=["signal"],
-        works_on={"signal": ["raw_data", "variances", "normalization", "normalization_factor"]},
-        step_keywords=["average"],
-        step_doc="Add azimuthal integration date with variance propagated",
+        works_on={"data": ["signal", "variances", "normalization"]},
+        step_keywords=["azimuthal integration"],
+        step_doc="Add azimuthal integration data with variance propagated",
         step_reference="DOI 10.1107/S1600576724011038",
-        step_note=(
-            "This is a simple Azimuthal integration step based on sparse matrix multiplication"
-        ),
+        step_note="This is a simple Azimuthal integration step based on sparse matrix multiplication",
     )
 
     def __attrs_post_init__(self):
@@ -47,27 +45,70 @@ def can_apply(self) -> bool:
         """
         Check if the process can be applied to the given data.
         """
-        return check_data(self.bundle, "Signal", None, self.message_handler)
-
-    def apply(self, apply_scalers, **kwargs):
-        source = self.bundle["signal"]
-        signal = source.signal
-        normalization = source.normalization
-        integrated = self.bundle["integrated"] = IntegratedData(
-            sum_signal=self.sparse.dot(signal),
-            sum_normalization=self.sparse.dot(normalization),
-            sum_normalization_squared=self.sparse_squared.dot(normalization * normalization),
+        return check_data(self.bundle, "signal", None, self.message_handler)
+
+    def _build_sparse(self, name, npt, range_=None):
+        """Method which build the two sparse arrays from the name
+        of the array in the databundle
+
+        :param name: name of the "Q" dataset in the databundle
+        :param npt: number of points expected in the histogram
+        :param range_: 2-list of the lower and upper bound in the Q-range
+        :return: the sparse matrix
+        """
+        positions = self.data[name].ravel()
+        if range_ is None:
+            range_ = [positions.min(), positions.max()]
+        # increase slightly the range to include the upper bound pixel
+        range_ = [range_[0], range_[1] * (1.0 + np.finfo("float32").eps)]
+        bin_boundaries = np.histogram(positions, npt, range=range_)[1]
+        row = np.digitize(positions, bin_boundaries) - 1
+        size = row.size
+        col = np.arange(size)
+        dat = np.ones(size)
+        self.sparse = csc_matrix(dat, (row, col), shape=(npt, positions.size))
+        self.sparse_squared = self.sparse * self.sparse  # actually 1*1 == 1
+        self.bin_centers = 0.5 * (bin_boundaries[1:] + bin_boundaries[:-1])
+        return self.sparse
+
+    def prepare(self):
+        self._build_sparse(**self.configuration)
+
+    def calculate(self, data: DataBundle, dataset="image", **kwargs: Any):
+        # work around for `prepare` no being called:
+        if "sparse" not in dir(self):
+            self.prepare()
+
+        source = data[dataset]
+        signal_img = source.signal.ravel()
+        normalization_img = source.normalization.ravel()
+
+        integrated = IntegratedData(
+            sum_signal=self.sparse.dot(signal_img),
+            sum_normalization=self.sparse.dot(normalization_img),
+            sum_normalization_squared=self.sparse_squared.dot(normalization_img * normalization_img),
             normalization_factor=source.normalization_factor,
             normalization_factor_variance=source.normalization_factor_variance,
             sem={},
             std={},
         )
         with warnings.catch_warnings():
             warnings.simplefilter("ignore")
-            integrated.mean = integrated.sum_signal / integrated.sum_normalization
+            integrated.signal = integrated.sum_signal / integrated.sum_normalization
+            integrated.normalization = np.ones_like(integrated.sum_signal)
+
             for key, var in source.variances.items():
                 integrated.sum_variance[key] = self.sparse_squared.dot(var)
-                integrated.std = np.sqrt(integrated.sum_signal) / integrated.sum_normalization
-                integrated.sem = np.sqrt(
-                    integrated.sum_signal / integrated.sum_normalization_squared
-                )
+                integrated.sem[key] = np.sqrt(integrated.sum_signal) / integrated.sum_normalization
+                integrated.std[key] = np.sqrt(integrated.sum_signal / integrated.sum_normalization_squared)
+                integrated.variance[key] = integrated.sum_variance[key] / integrated.sum_normalization**2
+
+        # now create the variance along an azimuthal ring
+        avg_img = self._sparse.T.dot(integrated.signal)  # backproject the average value to the image
+        delta = np.divide(signal_img, normalization_img, where=normalization_img != 0) - avg_img
+        sum_var = self.sparse_squared.dot((delta * normalization_img) ** 2)
+        integrated.sum_variance["azim"] = sum_var
+        integrated.sem["azim"] = np.sqrt(sum_var) / integrated.sum_normalization
+        integrated.std["azim"] = np.sqrt(sum_var / integrated.sum_normalization_squared)
+        integrated.variance["azim"] = sum_var / (integrated.sum_normalization**2)
+        return integrated
diff --git a/src/modacor/modules/base_modules/poisson_uncertainty.py b/src/modacor/modules/base_modules/poisson_uncertainty.py
@@ -0,0 +1,58 @@
+# src/modacor/modules/base_modules/poisson_uncertainty.py
+# -*- coding: utf-8 -*-
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pint
+
+# import numpy as np
+from ...dataclasses.process_step import ProcessStep
+from ...dataclasses.process_step_describer import ProcessStepDescriber
+from ...dataclasses.validators import check_data_element_and_units
+
+
+class PoissonUncertainty(ProcessStep):
+    """
+    A class to add Poisson uncertainty to a signal.
+    This class is designed to be used as a processing step in the Modacor framework.
+    """
+
+    documentation = ProcessStepDescriber(
+        calling_name="Poisson Uncertainty estimator",
+        calling_id="PoissonUncertainty",
+        calling_module_path=Path("src/modacor/modules/base_modules/poisson_uncertainty.py"),
+        calling_version="0.1",
+        required_data_keys=["Signal"],
+        works_on={"Signal": ["internal_data", "uncertainties"]},
+        step_keywords=["uncertainty", "poisson", "error", "estimation", "counting statistics"],
+        step_doc="Adds the Poisson uncertainty on data if the internal_data is in units of counts",
+        step_reference="DOI 10.1088/0953-8984/25/38/383201",
+        step_note=(
+            "This is a simple Poisson uncertainty calculator, the uncertainty of a measurement"
+            " cannot be lower than this"
+        ),
+    )
+
+    def __attrs_post_init__(self):
+        super().__attrs_post_init__(self)
+        self.documentation.calling_arguments = self.kwargs
+
+    def can_apply(self) -> bool:
+        """
+        Check if the process can be applied to the given data.
+        """
+        return check_data_element_and_units(self.data, "Signal", pint.Unit("counts"), self.message_handler)
+
+    def apply(self, **kwargs):
+        # intensity_object: BaseData = self.kwargs["Signal"]
+        # self.start() # this timing doesn't make a lot of sense with dask delayed
+        # self.data.data["Signal"].uncertainties += [
+        #     dask.delayed(
+        #         da.clip(self.data.data["Signal"].internal_data, 1, da.inf)**0.5
+        #     )
+        # ]
+        self.data.data["Signal"].uncertainties_origins += ["PoissonUncertainty"]
+        self.data.provenance += [self.documentation]  # should be enough to recreate?
+        # self.stop()
Original file line number	Diff line number	Diff line change
Expand Up		@@ -8,4 +8,4 @@ Redistribution and use in source and binary forms, with or without modification,

		3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.

		THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
		THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.