diff --git a/src/pyhf/mixins.py b/src/pyhf/mixins.py index 0314188cc1..c390626748 100644 --- a/src/pyhf/mixins.py +++ b/src/pyhf/mixins.py @@ -3,6 +3,7 @@ import logging from typing import Any, Sequence +from pyhf import exceptions from pyhf.typing import Channel log = logging.getLogger(__name__) @@ -21,6 +22,10 @@ class _ChannelSummaryMixin: def __init__(self, *args: Any, **kwargs: Sequence[Channel]): channels = kwargs.pop('channels') super().__init__(*args, **kwargs) + + # check for duplicates + self._check_for_duplicates(channels) + self._channels: list[str] = [] self._samples: list[str] = [] self._modifiers: list[tuple[str, str]] = [] @@ -89,3 +94,45 @@ def channel_slices(self) -> dict[str, slice]: Dictionary mapping channel name to the bin slices in the model. """ return self._channel_slices + + def _check_for_duplicates(self, channels: Sequence[Channel]) -> None: + """ + Check for duplicate channels. + Check for duplicate samples within each channel. + Check for duplicate modifiers within each sample. + """ + channel_names = [channel['name'] for channel in channels] + if len(channel_names) != len(set(channel_names)): + duplicates = sorted( + set([f"'{x}'" for x in channel_names if channel_names.count(x) > 1]) + ) + raise exceptions.InvalidModel( + "Duplicate channels " + + ", ".join(duplicates) + + " found in the model. Rename one of them." + ) + for channel in channels: + sample_names = [samples['name'] for samples in channel['samples']] + if len(sample_names) != len(set(sample_names)): + duplicates = sorted( + set([f"'{x}'" for x in sample_names if sample_names.count(x) > 1]) + ) + raise exceptions.InvalidModel( + "Duplicate samples " + + ", ".join(duplicates) + + f" found in the channel '{channel['name']}'. Rename one of them." + ) + for sample in channel['samples']: + modifiers = [ + (modifier['name'], modifier['type']) + for modifier in sample['modifiers'] + ] + if len(modifiers) != len(set(modifiers)): + duplicates = sorted( + set([f"'{x[0]}'" for x in modifiers if modifiers.count(x) > 1]) + ) + raise exceptions.InvalidModel( + "Duplicate modifiers " + + ", ".join(duplicates) + + f" of the same type found in channel '{channel['name']}' and sample '{sample['name']}'. Rename one of them." + ) diff --git a/tests/test_schema.py b/tests/test_schema.py index 384fcf0276..a6a8a1c699 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -714,3 +714,18 @@ def test_schema_tensor_type_disallowed(mocker, backend): } with pytest.raises(pyhf.exceptions.InvalidSpecification): pyhf.schema.validate(spec, "model.json") + + +@pytest.mark.parametrize( + 'model_file', + [ + 'model_duplicate_channels.json', + 'model_duplicate_samples.json', + 'model_duplicate_modifiers.json', + ], +) +def test_schema_catch_duplicates(datadir, model_file): + with open(datadir.joinpath(model_file), encoding="utf-8") as spec_file: + model_spec = json.load(spec_file) + with pytest.raises(pyhf.exceptions.InvalidModel): + pyhf.Model(model_spec) diff --git a/tests/test_schema/model_duplicate_channels.json b/tests/test_schema/model_duplicate_channels.json new file mode 100644 index 0000000000..4db8824b92 --- /dev/null +++ b/tests/test_schema/model_duplicate_channels.json @@ -0,0 +1,50 @@ +{ + "channels": [ + { + "name": "singlechannel", + "samples": [ + { + "name": "signal", + "data": [10], + "modifiers": [ + {"name": "mu", "type": "normfactor", "data": null} + ] + }, + { + "name": "background", + "data": [15], + "modifiers": [ + { + "name": "uncorr_bkguncrt", + "type": "shapesys", + "data": [5] + } + ] + } + ] + }, + { + "name": "singlechannel", + "samples": [ + { + "name": "signal", + "data": [10], + "modifiers": [ + {"name": "mu", "type": "normfactor", "data": null} + ] + }, + { + "name": "background", + "data": [15], + "modifiers": [ + { + "name": "uncorr_bkguncrt", + "type": "shapesys", + "data": [5] + } + ] + } + ] + } + ] +} \ No newline at end of file diff --git a/tests/test_schema/model_duplicate_modifiers.json b/tests/test_schema/model_duplicate_modifiers.json new file mode 100644 index 0000000000..282d32dce9 --- /dev/null +++ b/tests/test_schema/model_duplicate_modifiers.json @@ -0,0 +1,28 @@ +{ + "channels": [ + { + "name": "singlechannel", + "samples": [ + { + "name": "signal", + "data": [10], + "modifiers": [ + {"name": "mu", "type": "normfactor", "data": null}, + {"name": "mu", "type": "normfactor", "data": null} + ] + }, + { + "name": "background", + "data": [15], + "modifiers": [ + { + "name": "uncorr_bkguncrt", + "type": "shapesys", + "data": [5] + } + ] + } + ] + } + ] +} \ No newline at end of file diff --git a/tests/test_schema/model_duplicate_samples.json b/tests/test_schema/model_duplicate_samples.json new file mode 100644 index 0000000000..8b3f3fec17 --- /dev/null +++ b/tests/test_schema/model_duplicate_samples.json @@ -0,0 +1,34 @@ +{ + "channels": [ + { + "name": "singlechannel", + "samples": [ + { + "name": "signal", + "data": [10], + "modifiers": [ + {"name": "mu", "type": "normfactor", "data": null} + ] + }, + { + "name": "signal", + "data": [10], + "modifiers": [ + {"name": "mu", "type": "normfactor", "data": null} + ] + }, + { + "name": "background", + "data": [15], + "modifiers": [ + { + "name": "uncorr_bkguncrt", + "type": "shapesys", + "data": [5] + } + ] + } + ] + } + ] +} \ No newline at end of file