Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/api-reference/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
live
logging
nexus
normalization
streaming
time_of_flight
ui
Expand Down
4 changes: 2 additions & 2 deletions src/ess/reduce/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import importlib.metadata

from . import nexus, time_of_flight, uncertainty
from . import nexus, normalization, time_of_flight, uncertainty

try:
__version__ = importlib.metadata.version("essreduce")
Expand All @@ -13,4 +13,4 @@

del importlib

__all__ = ["nexus", "time_of_flight", "uncertainty"]
__all__ = ["nexus", "normalization", "time_of_flight", "uncertainty"]
200 changes: 200 additions & 0 deletions src/ess/reduce/normalization.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright (c) 2025 Scipp contributors (https://github.com/scipp)
"""Normalization routines for neutron data reduction."""

import functools

import scipp as sc

from .uncertainty import UncertaintyBroadcastMode, broadcast_uncertainties


def normalize_by_monitor_histogram(
detector: sc.DataArray,
*,
monitor: sc.DataArray,
uncertainty_broadcast_mode: UncertaintyBroadcastMode,
) -> sc.DataArray:
"""Normalize detector data by a normalized histogrammed monitor.

This normalization accounts for both the (wavelength) profile of the incident beam
and the integrated neutron flux, meaning measurement duration and source strength.

- For *event* detectors, the monitor values are mapped to the detector
using :func:`scipp.lookup`. That is, for detector event :math:`d_i`,
:math:`m_i` is the monitor bin value at the same coordinate.
- For *histogram* detectors, the monitor is rebinned using to the detector
binning using :func:`scipp.rebin`. Thus, detector value :math:`d_i` and
monitor value :math:`m_i` correspond to the same bin.

In both cases, let :math:`x_i` be the lower bound of monitor bin :math:`i`
and let :math:`\\Delta x_i = x_{i+1} - x_i` be the width of that bin.

The detector is normalized according to

.. math::

d_i^\\text{Norm} = \\frac{d_i}{m_i} \\Delta x_i

Parameters
----------
detector:
Input detector data.
Must have a coordinate named ``monitor.dim``, that is, the single
dimension name of the **monitor**.
monitor:
A histogrammed monitor.
Must be one-dimensional and have a dimension coordinate, typically "wavelength".
uncertainty_broadcast_mode:
Choose how uncertainties of the monitor are broadcast to the sample data.

Returns
-------
:
``detector`` normalized by ``monitor``.
If the monitor has masks or contains non-finite values, the output has a mask
called '_monitor_mask' constructed from the monitor masks and non-finite values.

See also
--------
normalize_by_monitor_integrated:
Normalize by an integrated monitor.
"""
_check_monitor_range_contains_detector(monitor=monitor, detector=detector)

dim = monitor.dim

if detector.bins is None:
monitor = monitor.rebin({dim: detector.coords[dim]})
detector = _mask_detector_for_norm(detector=detector, monitor=monitor)
coord = monitor.coords[dim]
delta_w = sc.DataArray(coord[1:] - coord[:-1], masks=monitor.masks)
norm = broadcast_uncertainties(
monitor / delta_w, prototype=detector, mode=uncertainty_broadcast_mode
)

if detector.bins is None:
return detector / norm.rebin({dim: detector.coords[dim]})
return detector.bins / sc.lookup(norm, dim=dim)


def normalize_by_monitor_integrated(
detector: sc.DataArray,
*,
monitor: sc.DataArray,
uncertainty_broadcast_mode: UncertaintyBroadcastMode,
) -> sc.DataArray:
"""Normalize detector data by an integrated monitor.

This normalization accounts only for the integrated neutron flux,
meaning measurement duration and source strength.
It does *not* account for the (wavelength) profile of the incident beam.
For that, see :func:`normalize_by_monitor_histogram`.

Let :math:`d_i` be a detector event or the counts in a detector bin.
The normalized detector is

.. math::

d_i^\\text{Norm} = \\frac{d_i}{\\sum_j\\, m_j}

where :math:`m_j` is the monitor counts in bin :math:`j`.
Note that this is not a true integral but only a sum over monitor events.

The result depends on the range of the monitor but not its
binning within that range.

Parameters
----------
detector:
Input detector data.
monitor:
A histogrammed monitor.
Must be one-dimensional and have a dimension coordinate, typically "wavelength".
uncertainty_broadcast_mode:
Choose how uncertainties of the monitor are broadcast to the sample data.

Returns
-------
:
`detector` normalized by a monitor.
If the monitor has masks or contains non-finite values, the output has a mask
called '_monitor_mask' constructed from the monitor masks and non-finite values.

See also
--------
normalize_by_monitor_histogram:
Normalize by a monitor histogram.
"""
_check_monitor_range_contains_detector(monitor=monitor, detector=detector)
detector = _mask_detector_for_norm(detector=detector, monitor=monitor)
norm = monitor.data.sum()
norm = broadcast_uncertainties(
norm, prototype=detector, mode=uncertainty_broadcast_mode
)
return detector / norm


def _check_monitor_range_contains_detector(
*, monitor: sc.DataArray, detector: sc.DataArray
) -> None:
dim = monitor.dim
if not monitor.coords.is_edges(dim):
raise sc.CoordError(
f"Monitor coordinate '{dim}' must be bin-edges to integrate the monitor."
)

# Prefer a bin coord over an event coord because this makes the behavior for binned
# and histogrammed data consistent. If we used an event coord, we might allow a
# monitor range that is less than the detector bins which is fine for the events,
# but would be wrong if the detector was subsequently histogrammed.
if (det_coord := detector.coords.get(dim)) is not None:
lo = det_coord[dim, :-1].nanmin()
hi = det_coord[dim, 1:].nanmax()
elif (det_coord := detector.bins.coords.get(dim)) is not None:
lo = det_coord.nanmin()
hi = det_coord.nanmax()
else:
raise sc.CoordError(
f"Missing '{dim}' coordinate in detector for monitor normalization."
)

if monitor.coords[dim].min() > lo or monitor.coords[dim].max() < hi:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hi is the largest event, don't we need hi < mon.max for lookup to do its job?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. That is what this condition checks.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My point is, shouldn't it raise if hi is not less then the max? Currently it checks of max is less than hi, which is not the same, or is it?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you asking whether the error condition should be monitor.coords[dim].max() <= hi?

This would not be correct for histogrammed detectors:

def test_normalize_by_monitor_histogram_aligned_bins_hist() -> None:

For binned detectors, it is also fine as long as there is a bin-coord:

def test_normalize_by_monitor_histogram_aligned_bins_w_event_coord() -> None:

If there is no bin-coord, then it gets tricky. I am not sure the code does the right thing in this case. E.g., consider

def test_normalize_by_monitor_histogram_aligned_bins_wo_bin_coord() -> None:
    detector = (
        sc.DataArray(
            sc.array(dims=['w'], values=[0, 10, 30], unit='counts'),
            coords={'w': sc.arange('w', 3.0, unit='Å')},
        )
        .bin(w=sc.array(dims=['w'], values=[0.0, 2, 3], unit='Å'))
        .drop_coords('w')
    )
    monitor = sc.DataArray(
        sc.array(dims=['w'], values=[5.0, 6.0], unit='counts'),
        coords={'w': sc.array(dims=['w'], values=[0.0, 2, 3], unit='Å')},
    )
    normalized = normalize_by_monitor_histogram(
        detector,
        monitor=monitor,
        uncertainty_broadcast_mode=UncertaintyBroadcastMode.fail,
    )

    expected = (
        sc.DataArray(
            sc.array(dims=['w'], values=[0.0, 44 / 3, 55 / 3], unit='counts'),
            coords={'w': sc.arange('w', 3.0, unit='Å')},
        )
        .bin(w=sc.array(dims=['w'], values=[0.0, 2, 3], unit='Å'))
        .drop_coords('w')
    )

    sc.testing.assert_identical(normalized, expected)

I would expect this to work based on the event coords. But hi is computed to be in this case which removes the upper monitor bin. This is one of the issues I was trying to avoid with this implementation but apparently failed. I am not sure how to best handle this case.

raise ValueError(
f"Cannot normalize by monitor: The {dim} range of the monitor "
f"({monitor.coords[dim].min():c} to {monitor.coords[dim].max():c}) "
f"is smaller than the range of the detector ({lo:c} to {hi:c})."
)


def _mask_detector_for_norm(
*, detector: sc.DataArray, monitor: sc.DataArray
) -> sc.DataArray:
"""Mask the detector where the monitor is masked.

For performance, this applies the monitor mask to the detector bins.
This can lead to masking more events than strictly necessary if we
used an event mask.
"""
if (monitor_mask := _monitor_mask(monitor)) is None:
return detector

# Use rebin to reshape the mask to the detector:
dim = monitor.dim
mask = sc.DataArray(monitor_mask, coords={dim: monitor.coords[dim]}).rebin(
{dim: detector.coords[dim]}
).data != sc.scalar(0, unit=None)
Comment on lines +184 to +186
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Won't this go badly wrong (masking everything) if we have an event-mode detector with just 1 or few bins along wavelength?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. But why would we? I think a more realistic problem is that we don't have a wavelength bin coord because we never binned in wavelength. I will update for that.

If you want to support arrays like you describe, then we pretty much have to ignore the existing binning and always operate on the events. Meaning we need to create an event mask and use the events in the range calculations above.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, if we require a "reasonable" wavelength dim length for the detector then we need to make that very clear. I think it also implies that data must NOT be in detector-space any more, as otherwise we get too many bins?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, was this addressed somehow?

Comment on lines +171 to +186
Copy link
Contributor

@jokasimr jokasimr Nov 13, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I missed this part when I reviewed earlier.

  1. Looks to me like we assume the detector shares a dimension with the monitor. Don't we need to check detector.bins first in that case? If detector.bins is not None then the detector dimensions probably represent the detector geometry, while the monitor always has dimension wavelength. Is that not the case?
  2. This seems to masks all regions of the monitor hat are either "not finite" or are masked. But does it mask the regions where the monitor has 0 counts? If we don't mask those regions that will divide by zero when we do the normalization.

return detector.assign_masks({"_monitor_mask": mask})


def _monitor_mask(monitor: sc.DataArray) -> sc.Variable | None:
"""Mask nonfinite monitor values and combine all masks."""
masks = list(monitor.masks.values())

finite = sc.isfinite(monitor.data)
if not finite.all():
masks.append(~finite)

if not masks:
return None
return functools.reduce(sc.logical_or, masks)
Loading
Loading