From e712a28b4bca33d476692d855c0b791682205d07 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Mon, 21 Jul 2025 17:45:15 +0200 Subject: [PATCH 01/10] search for custom attribute with open enum --- src/pynxtools/data/NXtest.nxdl.xml | 6 ++ src/pynxtools/dataconverter/helpers.py | 62 +++++++++++-------- src/pynxtools/dataconverter/validation.py | 32 +++++++++- tests/dataconverter/test_validation.py | 72 +++++++++++++++++++++-- 4 files changed, 139 insertions(+), 33 deletions(-) diff --git a/src/pynxtools/data/NXtest.nxdl.xml b/src/pynxtools/data/NXtest.nxdl.xml index ee928e944..aeee79353 100644 --- a/src/pynxtools/data/NXtest.nxdl.xml +++ b/src/pynxtools/data/NXtest.nxdl.xml @@ -99,6 +99,12 @@ + + + + + + diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index a7600fca8..393201508 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -87,7 +87,8 @@ class ValidationProblem(Enum): UnitWithoutDocumentation = auto() InvalidUnit = auto() InvalidEnum = auto() - OpenEnumWithNewItem = auto() + OpenEnumWithCorrectNewItem = auto() + OpenEnumWithIncorrectNewItem = auto() MissingRequiredGroup = auto() MissingRequiredField = auto() MissingRequiredAttribute = auto() @@ -154,10 +155,21 @@ def _log(self, path: str, log_type: ValidationProblem, value: Optional[Any], *ar logger.warning( f"The value at {path} should be one of the following: {value}." ) - elif log_type == ValidationProblem.OpenEnumWithNewItem: + elif log_type == ValidationProblem.OpenEnumWithCorrectNewItem: logger.info( - f"The value at {path} does not match with the enumerated items from the open enumeration: {value}." + f"The value '{args[0]}' at {path} does not match with the enumerated items from the open enumeration: {value}." ) + elif log_type == ValidationProblem.OpenEnumWithIncorrectNewItem: + actual_value, custom_attr = args + + log_text = f"The value '{actual_value}' at {path} does not match with the enumerated items from the open enumeration: {value}." + if custom_attr == "custom_missing": + log_text += f" When a different value is used, a boolean 'custom' attribute must be added." + logger.warning(log_text) + elif custom_attr == "custom_false": + log_text += f" When a different value is used, the boolean 'custom' attribute cannot be False." + logger.warning(log_text) + elif log_type == ValidationProblem.MissingRequiredGroup: logger.warning(f"The required group {path} hasn't been supplied.") elif log_type == ValidationProblem.MissingRequiredField: @@ -287,9 +299,9 @@ def collect_and_log( # info messages should not fail validation if log_type in ( ValidationProblem.UnitWithoutDocumentation, - ValidationProblem.OpenEnumWithNewItem, ValidationProblem.CompressionStrengthZero, ValidationProblem.MissingNXclass, + ValidationProblem.OpenEnumWithCorrectNewItem, ): if self.logging and message not in self.data["info"]: self._log(path, log_type, value, *args, **kwargs) @@ -804,9 +816,7 @@ def convert_int_to_float(value): return value -def is_valid_data_field( - value: Any, nxdl_type: str, nxdl_enum: list, nxdl_enum_open: bool, path: str -) -> Any: +def is_valid_data_field(value: Any, nxdl_type: str, path: str) -> Any: """Checks whether a given value is valid according to the type defined in the NXDL.""" def validate_data_value( @@ -843,25 +853,25 @@ def validate_data_value( path, ValidationProblem.InvalidDatetime, value ) - if nxdl_enum is not None: - if ( - isinstance(value, np.ndarray) - and isinstance(nxdl_enum, list) - and isinstance(nxdl_enum[0], list) - ): - enum_value = list(value) - else: - enum_value = value - - if enum_value not in nxdl_enum: - if nxdl_enum_open: - collector.collect_and_log( - path, ValidationProblem.OpenEnumWithNewItem, nxdl_enum - ) - else: - collector.collect_and_log( - path, ValidationProblem.InvalidEnum, nxdl_enum - ) + # if nxdl_enum is not None: + # if ( + # isinstance(value, np.ndarray) + # and isinstance(nxdl_enum, list) + # and isinstance(nxdl_enum[0], list) + # ): + # enum_value = list(value) + # else: + # enum_value = value + + # if enum_value not in nxdl_enum: + # if nxdl_enum_open: + # collector.collect_and_log( + # path, ValidationProblem.OpenEnumWithNewItem, nxdl_enum + # ) + # else: + # collector.collect_and_log( + # path, ValidationProblem.InvalidEnum, nxdl_enum + # ) return value diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index cf302df39..d58075b7d 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -1342,9 +1342,14 @@ def handle_field(node: NexusNode, keys: Mapping[str, Any], prev_path: str): mapping[variant_path] = is_valid_data_field( keys[variant], node.dtype, + variant_path, + ) + is_valid_enum( + mapping[variant_path], node.items, node.open_enum, variant_path, + mapping, ) check_reserved_suffix(variant_path, keys) @@ -1410,9 +1415,14 @@ def handle_attribute(node: NexusNode, keys: Mapping[str, Any], prev_path: str): f"{prev_path}/{variant if variant.startswith('@') else f'@{variant}'}" ], node.dtype, + variant_path, + ) + is_valid_enum( + mapping[variant_path], node.items, node.open_enum, variant_path, + mapping, ) check_reserved_prefix( variant_path, get_definition(variant_path), "attribute" @@ -1641,7 +1651,16 @@ def is_documented(key: str, tree: NexusNode) -> bool: keys_to_remove.append(key) return False resolved_link[key] = is_valid_data_field( - resolved_link[key], node.dtype, node.items, node.open_enum, key + resolved_link[key], + node.dtype, + key, + ) + is_valid_enum( + resolved_link[key], + node.items, + node.open_enum, + key, + mapping, ) return True @@ -1656,7 +1675,16 @@ def is_documented(key: str, tree: NexusNode) -> bool: # Check general validity mapping[key] = is_valid_data_field( - mapping[key], node.dtype, node.items, node.open_enum, key + mapping[key], + node.dtype, + key, + ) + is_valid_enum( + mapping[key], + node.items, + node.open_enum, + key, + mapping, ) # Check main field exists for units diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index 82e528af2..bae211505 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -875,16 +875,78 @@ def format_error_message(msg: str) -> str: ), pytest.param( alter_dict( - TEMPLATE, - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/type2", - "a very different type", + alter_dict( + alter_dict( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/type2", + "a very different type", + ), + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/type2/@custom", + True, + ), + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/type2/@attribute_with_open_enum", + "3rd option", + ), + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/type2/@attribute_with_open_enum_custom", + True, ), [ - "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/type2 does not match with the " - "enumerated items from the open enumeration: ['1st type open', '2nd type open']." + "The value 'a very different type' at /ENTRY[my_entry]/NXODD_name[nxodd_name]/type2 does not match " + "with the enumerated items from the open enumeration: ['1st type open', '2nd type open'].", + "The value '3rd option' at /ENTRY[my_entry]/NXODD_name[nxodd_name]/type2/@attribute_with_open_enum " + "does not match with the enumerated items from the open enumeration: ['1st option', '2nd option'].", ], id="open-enum-with-new-item", ), + pytest.param( + alter_dict( + alter_dict( + alter_dict( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/type2", + "a very different type", + ), + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/type2/@custom", + False, + ), + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/type2/@attribute_with_open_enum", + "3rd option", + ), + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/type2/@attribute_with_open_enum_custom", + False, + ), + [ + "The value 'a very different type' at /ENTRY[my_entry]/NXODD_name[nxodd_name]/type2 does not match " + "with the enumerated items from the open enumeration: ['1st type open', '2nd type open']. " + "When a different value is used, the boolean 'custom' attribute cannot be False.", + "The value '3rd option' at /ENTRY[my_entry]/NXODD_name[nxodd_name]/type2/@attribute_with_open_enum " + "does not match with the enumerated items from the open enumeration: ['1st option', '2nd option']. " + "When a different value is used, the boolean 'custom' attribute cannot be False.", + ], + id="open-enum-with-new-item-custom-false", + ), + pytest.param( + alter_dict( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/type2", + "a very different type", + ), + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/type2/@attribute_with_open_enum", + "3rd option", + ), + [ + "The value 'a very different type' at /ENTRY[my_entry]/NXODD_name[nxodd_name]/type2 does not match " + "with the enumerated items from the open enumeration: ['1st type open', '2nd type open']. " + "When a different value is used, a boolean 'custom' attribute must be added.", + "The value '3rd option' at /ENTRY[my_entry]/NXODD_name[nxodd_name]/type2/@attribute_with_open_enum " + "does not match with the enumerated items from the open enumeration: ['1st option', '2nd option']. " + "When a different value is used, a boolean 'custom' attribute must be added.", + ], + id="open-enum-with-new-item-custom-missing", + ), pytest.param( set_to_none_in_dict( TEMPLATE, "/ENTRY[my_entry]/optional_parent/required_child", "required" From c9fb43b0e6463499a26db0c3196449c4770bd67d Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Mon, 21 Jul 2025 17:49:19 +0200 Subject: [PATCH 02/10] fix tests --- .../data/dataconverter/readers/example/testdata.json | 1 + tests/dataconverter/test_validation.py | 12 ++++++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/tests/data/dataconverter/readers/example/testdata.json b/tests/data/dataconverter/readers/example/testdata.json index bfeb4d1e8..7df903f89 100644 --- a/tests/data/dataconverter/readers/example/testdata.json +++ b/tests/data/dataconverter/readers/example/testdata.json @@ -16,6 +16,7 @@ "program_name": "Nexus Parser", "type": "2nd type", "type2": "2nd type", + "@attribute_with_open_enum":"1st option", "date_value": "2022-01-22T12:14:12.05018+00:00", "date_value_units": "", "required_child": 1, diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index bae211505..ac9189f2c 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -1708,12 +1708,16 @@ def format_error_message(msg: str) -> str: ), pytest.param( alter_dict( - TEMPLATE, - "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[my_source]/type", - "Wrong source type", + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[my_source]/type", + "Wrong source type", + ), + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[my_source]/type/@custom", + True, ), [ - "The value at /ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[my_source]/type does not match with the enumerated " + "The value 'Wrong source type' at /ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[my_source]/type does not match with the enumerated " "items from the open enumeration: ['Spallation Neutron Source', 'Pulsed Reactor Neutron Source', 'Reactor Neutron Source', " "'Synchrotron X-ray Source', 'Pulsed Muon Source', 'Rotating Anode X-ray', 'Fixed Tube X-ray', 'UV Laser', 'Free-Electron Laser', " "'Optical Laser', 'Ion Source', 'UV Plasma Source', 'Metal Jet X-ray', 'Laser', 'Dye Laser', 'Broadband Tunable Light Source', " From a2f4b039d19db64f98e462342e96131b4878462c Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Fri, 8 Aug 2025 16:34:19 +0200 Subject: [PATCH 03/10] use feature branch from pynxtools-xps --- .github/workflows/plugin_test.yaml | 2 +- src/pynxtools/dataconverter/helpers.py | 68 +++++++++----------------- tests/dataconverter/test_validation.py | 27 ++++++++++ 3 files changed, 50 insertions(+), 47 deletions(-) diff --git a/.github/workflows/plugin_test.yaml b/.github/workflows/plugin_test.yaml index 34913d5ce..5b6e6e4f2 100644 --- a/.github/workflows/plugin_test.yaml +++ b/.github/workflows/plugin_test.yaml @@ -42,7 +42,7 @@ jobs: branch: main tests_to_run: tests/. - plugin: pynxtools-xps - branch: main + branch: custom-attr-for-open-enum tests_to_run: tests/. - plugin: pynxtools-xrd branch: main diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index 393201508..d60391b44 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -87,8 +87,9 @@ class ValidationProblem(Enum): UnitWithoutDocumentation = auto() InvalidUnit = auto() InvalidEnum = auto() - OpenEnumWithCorrectNewItem = auto() - OpenEnumWithIncorrectNewItem = auto() + OpenEnumWithCustom = auto() + OpenEnumWithCustomFalse = auto() + OpenEnumWithMissingCustom = auto() MissingRequiredGroup = auto() MissingRequiredField = auto() MissingRequiredAttribute = auto() @@ -153,22 +154,22 @@ def _log(self, path: str, log_type: ValidationProblem, value: Optional[Any], *ar elif log_type == ValidationProblem.InvalidEnum: logger.warning( - f"The value at {path} should be one of the following: {value}." + f"The value '{args[0]}' at {path} should be one of the following: {value}." ) - elif log_type == ValidationProblem.OpenEnumWithCorrectNewItem: + elif log_type == ValidationProblem.OpenEnumWithCustom: logger.info( f"The value '{args[0]}' at {path} does not match with the enumerated items from the open enumeration: {value}." ) - elif log_type == ValidationProblem.OpenEnumWithIncorrectNewItem: - actual_value, custom_attr = args - - log_text = f"The value '{actual_value}' at {path} does not match with the enumerated items from the open enumeration: {value}." - if custom_attr == "custom_missing": - log_text += f" When a different value is used, a boolean 'custom' attribute must be added." - logger.warning(log_text) - elif custom_attr == "custom_false": - log_text += f" When a different value is used, the boolean 'custom' attribute cannot be False." - logger.warning(log_text) + elif log_type == ValidationProblem.OpenEnumWithCustomFalse: + logger.warning( + f"The value '{args[0]}' at {path} does not match with the enumerated items from the open enumeration: {value}." + "When a different value is used, the boolean 'custom' attribute cannot be False." + ) + elif log_type == ValidationProblem.OpenEnumWithMissingCustom: + logger.info( + f"The value '{args[0]}' at {path} does not match with the enumerated items from the open enumeration: {value}. " + "When a different value is used, a boolean 'custom=True' attribute must be added. It was added here automatically." + ) elif log_type == ValidationProblem.MissingRequiredGroup: logger.warning(f"The required group {path} hasn't been supplied.") @@ -301,7 +302,8 @@ def collect_and_log( ValidationProblem.UnitWithoutDocumentation, ValidationProblem.CompressionStrengthZero, ValidationProblem.MissingNXclass, - ValidationProblem.OpenEnumWithCorrectNewItem, + ValidationProblem.OpenEnumWithCustom, + ValidationProblem.OpenEnumWithMissingCustom, ): if self.logging and message not in self.data["info"]: self._log(path, log_type, value, *args, **kwargs) @@ -819,10 +821,8 @@ def convert_int_to_float(value): def is_valid_data_field(value: Any, nxdl_type: str, path: str) -> Any: """Checks whether a given value is valid according to the type defined in the NXDL.""" - def validate_data_value( - value: Any, nxdl_type: str, nxdl_enum: list, nxdl_enum_open: bool, path: str - ) -> Any: - """Validate and possibly convert a primitive value according to NXDL type/enum rules.""" + def validate_data_value(value: Any, nxdl_type: str, path: str) -> Any: + """Validate and possibly convert a primitive value according to NXDL type rules.""" accepted_types = NEXUS_TO_PYTHON_DATA_TYPES[nxdl_type] original_value = value @@ -853,26 +853,6 @@ def validate_data_value( path, ValidationProblem.InvalidDatetime, value ) - # if nxdl_enum is not None: - # if ( - # isinstance(value, np.ndarray) - # and isinstance(nxdl_enum, list) - # and isinstance(nxdl_enum[0], list) - # ): - # enum_value = list(value) - # else: - # enum_value = value - - # if enum_value not in nxdl_enum: - # if nxdl_enum_open: - # collector.collect_and_log( - # path, ValidationProblem.OpenEnumWithNewItem, nxdl_enum - # ) - # else: - # collector.collect_and_log( - # path, ValidationProblem.InvalidEnum, nxdl_enum - # ) - return value if isinstance(value, dict) and set(value.keys()) == {"compress", "strength"}: @@ -888,18 +868,14 @@ def validate_data_value( path, ValidationProblem.InvalidCompressionStrength, value ) # In this case, we remove the compression. - return validate_data_value( - value["compress"], nxdl_type, nxdl_enum, nxdl_enum_open, path - ) + return validate_data_value(value["compress"], nxdl_type, path) # Apply standard validation to compressed value - value["compress"] = validate_data_value( - compressed_value, nxdl_type, nxdl_enum, nxdl_enum_open, path - ) + value["compress"] = validate_data_value(compressed_value, nxdl_type, path) return value - return validate_data_value(value, nxdl_type, nxdl_enum, nxdl_enum_open, path) + return validate_data_value(value, nxdl_type, path) def split_class_and_name_of(name: str) -> tuple[Optional[str], str]: diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index ac9189f2c..453ed0a59 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -899,6 +899,32 @@ def format_error_message(msg: str) -> str: ], id="open-enum-with-new-item", ), + pytest.param( + alter_dict( + alter_dict( + alter_dict( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/type2", + {"compress": "a very different type", "strength": 2}, + ), + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/type2/@custom", + True, + ), + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/type2/@attribute_with_open_enum", + {"compress": "3rd option", "strength": 4}, + ), + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/type2/@attribute_with_open_enum_custom", + True, + ), + [ + "The value 'a very different type' at /ENTRY[my_entry]/NXODD_name[nxodd_name]/type2 does not match " + "with the enumerated items from the open enumeration: ['1st type open', '2nd type open'].", + "The value '3rd option' at /ENTRY[my_entry]/NXODD_name[nxodd_name]/type2/@attribute_with_open_enum " + "does not match with the enumerated items from the open enumeration: ['1st option', '2nd option'].", + ], + id="open-enum-with-new-item-compressed", + ), pytest.param( alter_dict( alter_dict( @@ -2034,6 +2060,7 @@ def test_validate_data_dict(data_dict, error_messages, caplog, request): "field-with-illegal-unit", "baseclass-field-with-illegal-unit", "open-enum-with-new-item", + "open-enum-with-new-item-compressed", "baseclass-open-enum-with-new-item", "appdef-compressed-strength-0", ): From 2693161e77a3ad0bacb37b7ecea7eb9359c5c6d4 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Fri, 8 Aug 2025 16:44:39 +0200 Subject: [PATCH 04/10] reorganize error messages --- src/pynxtools/dataconverter/helpers.py | 4 ++-- tests/dataconverter/test_validation.py | 19 +++++++++++-------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index d60391b44..d8957bb9d 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -154,7 +154,7 @@ def _log(self, path: str, log_type: ValidationProblem, value: Optional[Any], *ar elif log_type == ValidationProblem.InvalidEnum: logger.warning( - f"The value '{args[0]}' at {path} should be one of the following: {value}." + f"The value {args[0]} at {path} should be one of the following: {value}." ) elif log_type == ValidationProblem.OpenEnumWithCustom: logger.info( @@ -162,7 +162,7 @@ def _log(self, path: str, log_type: ValidationProblem, value: Optional[Any], *ar ) elif log_type == ValidationProblem.OpenEnumWithCustomFalse: logger.warning( - f"The value '{args[0]}' at {path} does not match with the enumerated items from the open enumeration: {value}." + f"The value '{args[0]}' at {path} does not match with the enumerated items from the open enumeration: {value}. " "When a different value is used, the boolean 'custom' attribute cannot be False." ) elif log_type == ValidationProblem.OpenEnumWithMissingCustom: diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index 453ed0a59..acba436d3 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -867,9 +867,9 @@ def format_error_message(msg: str) -> str: TEMPLATE, "/ENTRY[my_entry]/NXODD_name[nxodd_name]/type", "Wrong option" ), [ - "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/type should " - "be one of the following" - ": ['1st type', '2nd type', '3rd type', '4th type']." + "The value Wrong option at /ENTRY[my_entry]/NXODD_name[nxodd_name]/type " + "should be one of the following: " + "['1st type', '2nd type', '3rd type', '4th type']." ], id="wrong-enum-choice", ), @@ -966,10 +966,12 @@ def format_error_message(msg: str) -> str: [ "The value 'a very different type' at /ENTRY[my_entry]/NXODD_name[nxodd_name]/type2 does not match " "with the enumerated items from the open enumeration: ['1st type open', '2nd type open']. " - "When a different value is used, a boolean 'custom' attribute must be added.", + "When a different value is used, a boolean 'custom=True' attribute must be added. " + "It was added here automatically.", "The value '3rd option' at /ENTRY[my_entry]/NXODD_name[nxodd_name]/type2/@attribute_with_open_enum " "does not match with the enumerated items from the open enumeration: ['1st option', '2nd option']. " - "When a different value is used, a boolean 'custom' attribute must be added.", + "When a different value is used, a boolean 'custom=True' attribute must be added. " + "It was added here automatically.", ], id="open-enum-with-new-item-custom-missing", ), @@ -1374,7 +1376,7 @@ def format_error_message(msg: str) -> str: ), [ "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/type/@array should be one of the following Python types: (, ), as defined in the NXDL as NX_INT.", - "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/type/@array should be one of the following: [[0, 1, 2], [2, 3, 4]].", + "The value ['0', 1, 2] at /ENTRY[my_entry]/NXODD_name[nxodd_name]/type/@array should be one of the following: [[0, 1, 2], [2, 3, 4]].", ], id="wrong-type-array-in-attribute", ), @@ -1383,7 +1385,7 @@ def format_error_message(msg: str) -> str: TEMPLATE, "/ENTRY[my_entry]/NXODD_name[nxodd_name]/type/@array", [1, 2] ), [ - "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/type/@array should be one of the following: [[0, 1, 2], [2, 3, 4]]." + "The value [1, 2] at /ENTRY[my_entry]/NXODD_name[nxodd_name]/type/@array should be one of the following: [[0, 1, 2], [2, 3, 4]]." ], id="wrong-value-array-in-attribute", ), @@ -1727,7 +1729,7 @@ def format_error_message(msg: str) -> str: "Cu", ), [ - "The value at /ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[my_source]/target_material " + "The value Cu at /ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[my_source]/target_material " "should be one of the following: ['Ta', 'W', 'depleted_U', 'enriched_U', 'Hg', 'Pb', 'C']." ], id="baseclass-wrong-enum", @@ -2061,6 +2063,7 @@ def test_validate_data_dict(data_dict, error_messages, caplog, request): "baseclass-field-with-illegal-unit", "open-enum-with-new-item", "open-enum-with-new-item-compressed", + "open-enum-with-new-item-custom-missing", "baseclass-open-enum-with-new-item", "appdef-compressed-strength-0", ): From c66e3252e9018567baa5b24fb94062306a764617 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Tue, 30 Sep 2025 14:26:01 +0200 Subject: [PATCH 05/10] support custom attribute in validate_nexus --- src/pynxtools/dataconverter/helpers.py | 87 +++++++++++++++++++++-- src/pynxtools/dataconverter/validation.py | 24 ++++++- tests/dataconverter/test_validation.py | 83 ++++++++++++++++----- 3 files changed, 172 insertions(+), 22 deletions(-) diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index d8957bb9d..bb6a4876b 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -21,7 +21,7 @@ import logging import os import re -from collections.abc import Mapping, Sequence +from collections.abc import Mapping, MutableMapping, Sequence from datetime import datetime, timezone from enum import Enum, auto from functools import cache, lru_cache @@ -154,7 +154,7 @@ def _log(self, path: str, log_type: ValidationProblem, value: Optional[Any], *ar elif log_type == ValidationProblem.InvalidEnum: logger.warning( - f"The value {args[0]} at {path} should be one of the following: {value}." + f"The value '{args[0]}' at {path} should be one of the following: {value}." ) elif log_type == ValidationProblem.OpenEnumWithCustom: logger.info( @@ -166,11 +166,13 @@ def _log(self, path: str, log_type: ValidationProblem, value: Optional[Any], *ar "When a different value is used, the boolean 'custom' attribute cannot be False." ) elif log_type == ValidationProblem.OpenEnumWithMissingCustom: - logger.info( + log_text = ( f"The value '{args[0]}' at {path} does not match with the enumerated items from the open enumeration: {value}. " - "When a different value is used, a boolean 'custom=True' attribute must be added. It was added here automatically." + "When a different value is used, a boolean 'custom=True' attribute must be added." ) - + if args[1] is True: + log_text += " It was added here automatically." + logger.info(log_text) elif log_type == ValidationProblem.MissingRequiredGroup: logger.warning(f"The required group {path} hasn't been supplied.") elif log_type == ValidationProblem.MissingRequiredField: @@ -878,6 +880,81 @@ def validate_data_value(value: Any, nxdl_type: str, path: str) -> Any: return validate_data_value(value, nxdl_type, path) +def get_custom_attr_path(path: str) -> str: + if path.split("/")[-1].startswith("@"): + attr_name = path.split("/")[-1][1:] # remove "@" + return f"{path}_custom" + return f"{path}/@custom" + + +def is_valid_enum( + value: Any, + nxdl_enum: list, + nxdl_enum_open: bool, + path: str, + mapping: MutableMapping, +): + """Check enumeration.""" + + if isinstance(value, dict) and set(value.keys()) == {"compress", "strength"}: + value = value["compress"] + + if nxdl_enum is not None: + if ( + isinstance(value, np.ndarray) + and isinstance(nxdl_enum, list) + and isinstance(nxdl_enum[0], list) + ): + enum_value = list(value) + else: + enum_value = value + + if enum_value not in nxdl_enum: + if nxdl_enum_open: + custom_path = get_custom_attr_path(path) + + if isinstance(mapping, h5py.Group): + parent_path, attr_name = custom_path.rsplit("@", 1) + custom_attr = mapping.get(parent_path).attrs.get(attr_name) + custom_added_auto = False + else: + custom_attr = mapping.get(custom_path) + custom_added_auto = True + + if custom_attr == True: # noqa: E712 + collector.collect_and_log( + path, + ValidationProblem.OpenEnumWithCustom, + nxdl_enum, + value, + ) + elif custom_attr == False: # noqa: E712 + collector.collect_and_log( + path, + ValidationProblem.OpenEnumWithCustomFalse, + nxdl_enum, + value, + ) + + elif custom_attr is None: + try: + mapping[custom_path] = True + except ValueError: + # we are in the HDF5 validation, cannot set custom attribute. + pass + collector.collect_and_log( + path, + ValidationProblem.OpenEnumWithMissingCustom, + nxdl_enum, + value, + custom_added_auto, + ) + else: + collector.collect_and_log( + path, ValidationProblem.InvalidEnum, nxdl_enum, value + ) + + def split_class_and_name_of(name: str) -> tuple[Optional[str], str]: """ Return the class and the name of a data dict entry of the form diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index d58075b7d..4e695bf0c 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -41,7 +41,9 @@ clean_str_attr, collector, convert_nexus_to_caps, + get_custom_attr_path, is_valid_data_field, + is_valid_enum, split_class_and_name_of, ) from pynxtools.dataconverter.nexus_tree import ( @@ -644,9 +646,14 @@ def handle_field( is_valid_data_field( clean_str_attr(dataset[()]), node.dtype, + full_path, + ) + is_valid_enum( + clean_str_attr(dataset[()]), node.items, node.open_enum, full_path, + data, ) units = dataset.attrs.get("units") @@ -695,7 +702,12 @@ def handle_attributes( for attr_name in attrs: full_path = f"{entry_name}/{path}/@{attr_name}" - if attr_name in ("NX_class", "units", "target"): + if attr_name in ( + "NX_class", + "units", + "target", + "custom", + ) or attr_name.endswith("_custom"): # Ignore special attrs continue @@ -734,9 +746,15 @@ def handle_attributes( is_valid_data_field( attr_data, node.dtype, + full_path, + ) + + is_valid_enum( + attr_data, node.items, node.open_enum, full_path, + data, ) def validate(path: str, h5_obj: Union[h5py.Group, h5py.Dataset]): @@ -1351,6 +1369,7 @@ def handle_field(node: NexusNode, keys: Mapping[str, Any], prev_path: str): variant_path, mapping, ) + remove_from_not_visited(get_custom_attr_path(variant_path)) check_reserved_suffix(variant_path, keys) check_reserved_prefix(variant_path, get_definition(variant_path), "field") @@ -1424,6 +1443,7 @@ def handle_attribute(node: NexusNode, keys: Mapping[str, Any], prev_path: str): variant_path, mapping, ) + remove_from_not_visited(get_custom_attr_path(variant_path)) check_reserved_prefix( variant_path, get_definition(variant_path), "attribute" ) @@ -1662,6 +1682,7 @@ def is_documented(key: str, tree: NexusNode) -> bool: key, mapping, ) + remove_from_not_visited(get_custom_attr_path(key)) return True @@ -1686,6 +1707,7 @@ def is_documented(key: str, tree: NexusNode) -> bool: key, mapping, ) + remove_from_not_visited(get_custom_attr_path(key)) # Check main field exists for units if ( diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index acba436d3..d7fe4d0d1 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -867,7 +867,7 @@ def format_error_message(msg: str) -> str: TEMPLATE, "/ENTRY[my_entry]/NXODD_name[nxodd_name]/type", "Wrong option" ), [ - "The value Wrong option at /ENTRY[my_entry]/NXODD_name[nxodd_name]/type " + "The value 'Wrong option' at /ENTRY[my_entry]/NXODD_name[nxodd_name]/type " "should be one of the following: " "['1st type', '2nd type', '3rd type', '4th type']." ], @@ -1376,7 +1376,7 @@ def format_error_message(msg: str) -> str: ), [ "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/type/@array should be one of the following Python types: (, ), as defined in the NXDL as NX_INT.", - "The value ['0', 1, 2] at /ENTRY[my_entry]/NXODD_name[nxodd_name]/type/@array should be one of the following: [[0, 1, 2], [2, 3, 4]].", + "The value '['0', 1, 2]' at /ENTRY[my_entry]/NXODD_name[nxodd_name]/type/@array should be one of the following: [[0, 1, 2], [2, 3, 4]].", ], id="wrong-type-array-in-attribute", ), @@ -1385,7 +1385,7 @@ def format_error_message(msg: str) -> str: TEMPLATE, "/ENTRY[my_entry]/NXODD_name[nxodd_name]/type/@array", [1, 2] ), [ - "The value [1, 2] at /ENTRY[my_entry]/NXODD_name[nxodd_name]/type/@array should be one of the following: [[0, 1, 2], [2, 3, 4]]." + "The value '[1, 2]' at /ENTRY[my_entry]/NXODD_name[nxodd_name]/type/@array should be one of the following: [[0, 1, 2], [2, 3, 4]]." ], id="wrong-value-array-in-attribute", ), @@ -1729,7 +1729,7 @@ def format_error_message(msg: str) -> str: "Cu", ), [ - "The value Cu at /ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[my_source]/target_material " + "The value 'Cu' at /ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[my_source]/target_material " "should be one of the following: ['Ta', 'W', 'depleted_U', 'enriched_U', 'Hg', 'Pb', 'C']." ], id="baseclass-wrong-enum", @@ -2444,7 +2444,7 @@ def test_validate_data_dict(data_dict, error_messages, caplog, request): TEMPLATE, "/ENTRY[my_entry]/NXODD_name[nxodd_name]/type", "Wrong option" ), [ - "The value at /my_entry/nxodd_name/type should " + "The value 'Wrong option' at /my_entry/nxodd_name/type should " "be one of the following: " "['1st type', '2nd type', '3rd type', '4th type']." ], @@ -2457,11 +2457,60 @@ def test_validate_data_dict(data_dict, error_messages, caplog, request): "a very different type", ), [ - "The value at /my_entry/nxodd_name/type2 does not match with the " - "enumerated items from the open enumeration: ['1st type open', '2nd type open']." + "The value 'a very different type' at /my_entry/nxodd_name/type2 does not match with the " + "enumerated items from the open enumeration: ['1st type open', '2nd type open']. When a " + "different value is used, a boolean 'custom=True' attribute must be added." ], id="open-enum-with-new-item", ), + pytest.param( + alter_dict( + alter_dict( + alter_dict( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/type2", + "a very different type", + ), + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/type2/@custom", + False, + ), + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/type2/@attribute_with_open_enum", + "3rd option", + ), + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/type2/@attribute_with_open_enum_custom", + False, + ), + [ + "The value 'a very different type' at /my_entry/nxodd_name/type2 does not match " + "with the enumerated items from the open enumeration: ['1st type open', '2nd type open']. " + "When a different value is used, the boolean 'custom' attribute cannot be False.", + "The value '3rd option' at /my_entry/nxodd_name/type2/@attribute_with_open_enum " + "does not match with the enumerated items from the open enumeration: ['1st option', '2nd option']. " + "When a different value is used, the boolean 'custom' attribute cannot be False.", + ], + id="open-enum-with-new-item-custom-false", + ), + pytest.param( + alter_dict( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/type2", + "a very different type", + ), + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/type2/@attribute_with_open_enum", + "3rd option", + ), + [ + "The value 'a very different type' at /my_entry/nxodd_name/type2 does not match " + "with the enumerated items from the open enumeration: ['1st type open', '2nd type open']. " + "When a different value is used, a boolean 'custom=True' attribute must be added.", + "The value '3rd option' at /my_entry/nxodd_name/type2/@attribute_with_open_enum " + "does not match with the enumerated items from the open enumeration: ['1st option', '2nd option']. " + "When a different value is used, a boolean 'custom=True' attribute must be added.", + ], + id="open-enum-with-new-item-custom-missing", + ), pytest.param( set_to_none_in_dict( TEMPLATE, "/ENTRY[my_entry]/optional_parent/required_child", "required" @@ -2794,7 +2843,7 @@ def test_validate_data_dict(data_dict, error_messages, caplog, request): TEMPLATE, "/ENTRY[my_entry]/NXODD_name[nxodd_name]/type/@array", [1, 2] ), [ - "The value at /my_entry/nxodd_name/type/@array should be one of the following: " + "The value '[1 2]' at /my_entry/nxodd_name/type/@array should be one of the following: " "[[0, 1, 2], [2, 3, 4]]." ], id="wrong-value-array-in-attribute", @@ -3114,7 +3163,7 @@ def test_validate_data_dict(data_dict, error_messages, caplog, request): "Cu", ), [ - "The value at /my_entry/my_instrument/my_source/target_material should be one of the following: " + "The value 'Cu' at /my_entry/my_instrument/my_source/target_material should be one of the following: " "['Ta', 'W', 'depleted_U', 'enriched_U', 'Hg', 'Pb', 'C']." ], id="baseclass-wrong-enum", @@ -3126,11 +3175,12 @@ def test_validate_data_dict(data_dict, error_messages, caplog, request): "Wrong source type", ), [ - "The value at /my_entry/my_instrument/my_source/type does not match with the enumerated " + "The value 'Wrong source type' at /my_entry/my_instrument/my_source/type does not match with the enumerated " "items from the open enumeration: ['Spallation Neutron Source', 'Pulsed Reactor Neutron Source', 'Reactor Neutron Source', " "'Synchrotron X-ray Source', 'Pulsed Muon Source', 'Rotating Anode X-ray', 'Fixed Tube X-ray', 'UV Laser', 'Free-Electron Laser', " "'Optical Laser', 'Ion Source', 'UV Plasma Source', 'Metal Jet X-ray', 'Laser', 'Dye Laser', 'Broadband Tunable Light Source', " - "'Halogen Lamp', 'LED', 'Mercury Cadmium Telluride Lamp', 'Deuterium Lamp', 'Xenon Lamp', 'Globar']." + "'Halogen Lamp', 'LED', 'Mercury Cadmium Telluride Lamp', 'Deuterium Lamp', 'Xenon Lamp', 'Globar']. " + "When a different value is used, a boolean 'custom=True' attribute must be added." ], id="baseclass-open-enum-with-new-item", ), @@ -3430,6 +3480,7 @@ def test_validate_nexus_file(data_dict, error_messages, caplog, tmp_path, reques "field-with-illegal-unit", "baseclass-field-with-illegal-unit", "open-enum-with-new-item", + "open-enum-with-new-item-custom-missing", "baseclass-open-enum-with-new-item", "namefitting-of-group-with-typo-and-new-field", "bad-namefitting", @@ -3518,16 +3569,16 @@ def test_validate_nexus_file(data_dict, error_messages, caplog, tmp_path, reques "Field /entry/instrument/source/burst_number_end has no documentation.", "Reserved suffix '_end' was used in /entry/instrument/source/burst_number_end, but there is no associated field burst_number.", "Field /entry/instrument/source/burst_number_start has no documentation.", - "The value at /entry/instrument/source/mode does not match with the enumerated items from the open enumeration: ['Single Bunch', 'Multi Bunch'].", + "The value 'Burst' at /entry/instrument/source/mode does not match with the enumerated items from the open enumeration: ['Single Bunch', 'Multi Bunch']. When a different value is used, a boolean 'custom=True' attribute must be added.", "Field /entry/instrument/source/number_of_bursts has no documentation.", - "The value at /entry/instrument/source/type does not match with the enumerated items from the open enumeration: ['Spallation Neutron Source', 'Pulsed Reactor Neutron Source', 'Reactor Neutron Source', 'Synchrotron X-ray Source', 'Pulsed Muon Source', 'Rotating Anode X-ray', 'Fixed Tube X-ray', 'UV Laser', 'Free-Electron Laser', 'Optical Laser', 'Ion Source', 'UV Plasma Source', 'Metal Jet X-ray', 'Laser', 'Dye Laser', 'Broadband Tunable Light Source', 'Halogen Lamp', 'LED', 'Mercury Cadmium Telluride Lamp', 'Deuterium Lamp', 'Xenon Lamp', 'Globar'].", + "The value 'Free Electron Laser' at /entry/instrument/source/type does not match with the enumerated items from the open enumeration: ['Spallation Neutron Source', 'Pulsed Reactor Neutron Source', 'Reactor Neutron Source', 'Synchrotron X-ray Source', 'Pulsed Muon Source', 'Rotating Anode X-ray', 'Fixed Tube X-ray', 'UV Laser', 'Free-Electron Laser', 'Optical Laser', 'Ion Source', 'UV Plasma Source', 'Metal Jet X-ray', 'Laser', 'Dye Laser', 'Broadband Tunable Light Source', 'Halogen Lamp', 'LED', 'Mercury Cadmium Telluride Lamp', 'Deuterium Lamp', 'Xenon Lamp', 'Globar']. When a different value is used, a boolean 'custom=True' attribute must be added.", "Field /entry/instrument/source_pump/burst_distance has no documentation.", "Field /entry/instrument/source_pump/burst_length has no documentation.", - "The value at /entry/instrument/source_pump/mode does not match with the enumerated items from the open enumeration: ['Single Bunch', 'Multi Bunch'].", + "The value 'Burst' at /entry/instrument/source_pump/mode does not match with the enumerated items from the open enumeration: ['Single Bunch', 'Multi Bunch']. When a different value is used, a boolean 'custom=True' attribute must be added.", "Field /entry/instrument/source_pump/number_of_bursts has no documentation.", - "The value at /entry/instrument/source_pump/probe should be one of the following: ['x-ray'].", + "The value 'NIR' at /entry/instrument/source_pump/probe should be one of the following: ['x-ray'].", "Field /entry/instrument/source_pump/rms_jitter has no documentation.", - "The value at /entry/instrument/source_pump/type does not match with the enumerated items from the open enumeration: ['Spallation Neutron Source', 'Pulsed Reactor Neutron Source', 'Reactor Neutron Source', 'Synchrotron X-ray Source', 'Pulsed Muon Source', 'Rotating Anode X-ray', 'Fixed Tube X-ray', 'UV Laser', 'Free-Electron Laser', 'Optical Laser', 'Ion Source', 'UV Plasma Source', 'Metal Jet X-ray', 'Laser', 'Dye Laser', 'Broadband Tunable Light Source', 'Halogen Lamp', 'LED', 'Mercury Cadmium Telluride Lamp', 'Deuterium Lamp', 'Xenon Lamp', 'Globar'].", + "The value 'OPCPA' at /entry/instrument/source_pump/type does not match with the enumerated items from the open enumeration: ['Spallation Neutron Source', 'Pulsed Reactor Neutron Source', 'Reactor Neutron Source', 'Synchrotron X-ray Source', 'Pulsed Muon Source', 'Rotating Anode X-ray', 'Fixed Tube X-ray', 'UV Laser', 'Free-Electron Laser', 'Optical Laser', 'Ion Source', 'UV Plasma Source', 'Metal Jet X-ray', 'Laser', 'Dye Laser', 'Broadband Tunable Light Source', 'Halogen Lamp', 'LED', 'Mercury Cadmium Telluride Lamp', 'Deuterium Lamp', 'Xenon Lamp', 'Globar']. When a different value is used, a boolean 'custom=True' attribute must be added.", "Field /entry/instrument/spatial_resolution has no documentation.", "Field /entry/instrument/temporal_resolution has no documentation.", "Field /entry/sample/bias has no documentation.", From 7f2d084117c0e2b19f6ce1a9cc7d4853504f5f6f Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Wed, 1 Oct 2025 10:49:40 +0200 Subject: [PATCH 06/10] use pynxtools-xps main branch again --- .github/workflows/plugin_test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/plugin_test.yaml b/.github/workflows/plugin_test.yaml index 5b6e6e4f2..34913d5ce 100644 --- a/.github/workflows/plugin_test.yaml +++ b/.github/workflows/plugin_test.yaml @@ -42,7 +42,7 @@ jobs: branch: main tests_to_run: tests/. - plugin: pynxtools-xps - branch: custom-attr-for-open-enum + branch: main tests_to_run: tests/. - plugin: pynxtools-xrd branch: main From 6d927f54b9ad0560a906e1b7423058046aa18bc3 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Wed, 1 Oct 2025 11:26:34 +0200 Subject: [PATCH 07/10] differntiate between HDF5 and template validation in the case of missing custom attribute --- src/pynxtools/dataconverter/helpers.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index bb6a4876b..f1c93daf6 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -172,7 +172,9 @@ def _log(self, path: str, log_type: ValidationProblem, value: Optional[Any], *ar ) if args[1] is True: log_text += " It was added here automatically." - logger.info(log_text) + logger.info(log_text) + else: + logger.warning(log_text) elif log_type == ValidationProblem.MissingRequiredGroup: logger.warning(f"The required group {path} hasn't been supplied.") elif log_type == ValidationProblem.MissingRequiredField: From 141979a03d4d1d09b3f0dd66c1f2e29994a753b6 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Wed, 1 Oct 2025 12:42:55 +0200 Subject: [PATCH 08/10] simplify logic for testing ignored sections --- src/pynxtools/testing/nexus_conversion.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/src/pynxtools/testing/nexus_conversion.py b/src/pynxtools/testing/nexus_conversion.py index a3ee4844f..64b78e104 100644 --- a/src/pynxtools/testing/nexus_conversion.py +++ b/src/pynxtools/testing/nexus_conversion.py @@ -241,17 +241,24 @@ def load_logs( def compare_logs(gen_lines: list[str], ref_lines: list[str]) -> None: """Compare log lines, ignoring specific differences.""" + def get_section_ignore_lines(line: str) -> list[str]: + """Return ignore lines for a section if the line starts with the section.""" + section = line.rsplit(SECTION_SEPARATOR, 1)[-1].strip() + for key, ignore_lines in IGNORE_SECTIONS.items(): + if section.startswith(key): + return ignore_lines + + return [] + def extra_lines( lines1: list[str], lines2: list[str] ) -> list[Optional[str]]: - """Return lines in lines1 but not in lines2, with line numbers and ignoring specified lines.""" - diffs: list[Optional[str]] = [] + """Return lines in lines1 but not in lines2 with line numbers.""" + diffs = [] section_ignore_lines = [] - section = None for ind, line in enumerate(lines1): if line.startswith(SECTION_SEPARATOR): - section = line.rsplit(SECTION_SEPARATOR)[-1].strip() - section_ignore_lines = IGNORE_SECTIONS.get(section, []) + section_ignore_lines = get_section_ignore_lines(line) if line not in lines2 and not should_skip_line( line, ignore_lines=IGNORE_LINES + section_ignore_lines ): @@ -282,13 +289,12 @@ def extra_lines( # Case 2: same line counts, check for diffs diffs = [] section_ignore_lines = [] - section = None + for ind, (gen_l, ref_l) in enumerate(zip(gen_lines, ref_lines)): if gen_l.startswith(SECTION_SEPARATOR) and ref_l.startswith( SECTION_SEPARATOR ): - section = gen_l.rsplit(SECTION_SEPARATOR)[-1].strip() - section_ignore_lines = IGNORE_SECTIONS.get(section, []) + section_ignore_lines = get_section_ignore_lines(gen_l) if gen_l != ref_l and not should_skip_line( gen_l, ref_l, ignore_lines=IGNORE_LINES + section_ignore_lines ): From b7550961b716b4b570cc63fd6a7f971574253dfd Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Wed, 1 Oct 2025 15:19:52 +0200 Subject: [PATCH 09/10] function docstrings --- src/pynxtools/dataconverter/helpers.py | 33 ++++++++++++++++++++++- src/pynxtools/testing/nexus_conversion.py | 3 ++- 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index f1c93daf6..1d63c3c42 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -883,6 +883,24 @@ def validate_data_value(value: Any, nxdl_type: str, path: str) -> Any: def get_custom_attr_path(path: str) -> str: + """ + Generate the path for the 'custom' attribute for open enumerations for a + given path. + + If a NeXus concept has an open enumeration and a different value than the suggested ones are used, + + - for fields, an attribute @custom=True. + - for attributes, an additional attribute @my_attribute_custom=True (where my_attribute is the name + of the attribute with the open enumeration) + + shall be added to the file. This function creates the path for this custom attribute. + + Args: + path (str): The original path string. + + Returns: + str: The modified path string representing the custom attribute path. + """ if path.split("/")[-1].startswith("@"): attr_name = path.split("/")[-1][1:] # remove "@" return f"{path}_custom" @@ -896,7 +914,20 @@ def is_valid_enum( path: str, mapping: MutableMapping, ): - """Check enumeration.""" + """Validate a value against an NXDL enumeration and handle custom attributes. + + This function checks whether a given value conforms to the specified NXDL + enumeration. If the enumeration is open (`nxdl_enum_open`), it may create or + check a corresponding custom attribute in the `mapping`. + + Args: + value (Any): The value to validate. + nxdl_enum (list): The NXDL enumeration to validate against. + nxdl_enum_open (bool): Whether the enumeration is open to custom values. + path (str): The path of the value in the dataset. + mapping (MutableMapping): The object (dict or HDF5 group) holding custom attributes. + + """ if isinstance(value, dict) and set(value.keys()) == {"compress", "strength"}: value = value["compress"] diff --git a/src/pynxtools/testing/nexus_conversion.py b/src/pynxtools/testing/nexus_conversion.py index 64b78e104..cc9ef3041 100644 --- a/src/pynxtools/testing/nexus_conversion.py +++ b/src/pynxtools/testing/nexus_conversion.py @@ -253,7 +253,8 @@ def get_section_ignore_lines(line: str) -> list[str]: def extra_lines( lines1: list[str], lines2: list[str] ) -> list[Optional[str]]: - """Return lines in lines1 but not in lines2 with line numbers.""" + """Return lines in lines1 but not in lines2, with line numbers, and ignoring + specified lines.""" diffs = [] section_ignore_lines = [] for ind, line in enumerate(lines1): From 39c03a781dd351712a4204d158db0ef1557e7f45 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Thu, 2 Oct 2025 17:07:26 +0200 Subject: [PATCH 10/10] add docs for ignore_sections in test framework --- .../using-pynxtools-test-framework.md | 41 ++++++++++++++++--- 1 file changed, 36 insertions(+), 5 deletions(-) diff --git a/docs/how-tos/pynxtools/using-pynxtools-test-framework.md b/docs/how-tos/pynxtools/using-pynxtools-test-framework.md index 301a57add..56db744dc 100644 --- a/docs/how-tos/pynxtools/using-pynxtools-test-framework.md +++ b/docs/how-tos/pynxtools/using-pynxtools-test-framework.md @@ -63,17 +63,48 @@ def test_foo_reader(nxdl, reader_name, files_or_dir, tmp_path, caplog): # of the log files of the reference -nxs file and the one created in the test. ``` -Alongside the test data in `tests/data`, it is also possible to add other types of test data inside the test directory of the plugin. +The `ReaderTest.convert_to_nexus` method tries to convert all files in the `files_or_dir` directory to a NeXus file that is compliant with the application definition (`nxdl`), using a specific pynxtools reader (`reader_name`). In this example, the `foo` reader is used to convert to files following the `NXfoo` application definition. -You can also pass additional parameters to `test.convert_to_nexus`: +There are some possibilities to configure this test for your specific plugin: -- `caplog_level` (str): This parameter determines the level at which the caplog is set during testing. This can be either "ERROR" (by default) or "WARNING". If it is "WARNING", the test will also fail if any warnings are reported by the reader. +- You can configure the test data that is used. Typically, this data should be located in `tests/data`, but it is also possible to use other data inside or even outside the test directory of the plugin. +- You can also pass additional parameters to `test.convert_to_nexus`: + - `caplog_level` (str): This parameter determines the level at which the caplog is set during testing. This can be either "ERROR" (by default) or "WARNING". If it is "WARNING", the test will also fail if any warnings are reported by the reader. -- `ignore_undocumented` (boolean): If true, the test skips the verification of undocumented keys. Otherwise, a warning message for undocumented keys is logged. + - `ignore_undocumented` (boolean): If true, the test skips the verification of undocumented keys. Otherwise, a warning message for undocumented keys is logged. + +Afterwards, the `ReaderTest.convert_to_nexus` method uses the NeXus annotator tool [`read_nexus`](../../learn/pynxtools/nexus-validation.md#read_nexus-nexus-file-reader-and-debugger) (which is part of `pynxtools`) to create log files both of the reference NeXus file located in `files_or_dir` as well as the freshly created NeXus files. These log files are compared line-by-line to check that the created NeXus file is indeed the same as the reference file. + +This test can also be configured: + +- You can pass a keyword argument `ignore_lines` to `check_reproducibility_of_nexus`. `ignore_lines` is expected to be a list of lines for which the comparison shall be skipped. Specifically, any line that starts with any of the strings in `ignore_lines` is ignored. +- In adddition, you can disable the comparison for a given line for a NeXus concept in the `read_nexus` output using the `ignore_sections` keyword. As an example, a typical section for a NeXus field in the output looks like this: + + ``` + DEBUG: + ===== FIELD (//entry/start_time): + DEBUG: ===== FIELD (//entry/start_time): + value: 2018-05-01T07:22:00+02:00 + DEBUG: value: 2018-05-01T07:22:00+02:00 + classpath: ['NXentry', 'NX_DATE_TIME'] + DEBUG: classpath: ['NXentry', 'NX_DATE_TIME'] + classes: + NXarpes.nxdl.xml:/ENTRY/start_time + NXentry.nxdl.xml:/start_time + DEBUG: classes: + NXarpes.nxdl.xml:/ENTRY/start_time + NXentry.nxdl.xml:/start_time + <> + DEBUG: <> + documentation (NXarpes.nxdl.xml:/ENTRY/start_time): + DEBUG: documentation (NXarpes.nxdl.xml:/ENTRY/start_time): + ``` + + If you do want to disable the comparison for the value of `entry/start_time`, you can pass a dictionary to `ignore_sections`. In this example, the dictionary `{"FIELD (//entry/start_time)": ["value:"]}` would disable the comparison of the `value` line. Any other line in this section can be disabled by adding more strings to the list (e.g. `DEBUG - value:`), whereas additional sections can be ignored by adding to the `ignore_sections` dictionary. ## How to write an integration test for a NOMAD example in a reader plugin -It is also possible to ship NOMAD Example Uploads directly with the reader plugin. As an example, `pynxtools-mpes` comes with its own NOMAD example (see [here](https://github.com/FAIRmat-NFDI/pynxtools-mpes/tree/bring-in-examples/src/pynxtools_mpes/nomad)) using the `ExampleUploadEntryPoint` of NOMAD (see [here](https://nomad-lab.eu/prod/v1/staging/docs/howto/plugins/example_uploads.html) for more documentation). +It is also possible to ship NOMAD Example Uploads directly with the reader plugin. As an example, `pynxtools-mpes` comes with its own [NOMAD examplehere](https://github.com/FAIRmat-NFDI/pynxtools-mpes/tree/bring-in-examples/src/pynxtools_mpes/nomad) using the [`ExampleUploadEntryPoint`](https://nomad-lab.eu/prod/v1/staging/docs/howto/plugins/example_uploads.html) of NOMAD. The `testing` sub-package of `pynxtools` provides two functionalities for testing the `ExampleUploadEntryPoint` defined in a `pynxtools` plugin: