diff --git a/.github/workflows/build_docs.yml b/.github/workflows/build_docs.yml index cbb484bea..215dacd40 100644 --- a/.github/workflows/build_docs.yml +++ b/.github/workflows/build_docs.yml @@ -4,7 +4,6 @@ on: push: branches: - master # Triggers deployment on push to the master branch - - restructure-docs env: python-version: 3.12 diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index c186ff6af..82bf5e748 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -46,6 +46,12 @@ logger = logging.getLogger("pynxtools") +ISO8601 = re.compile( + r"^(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2}(?:" + r"\.\d*)?)(((?!-00:00)(\+|-)(\d{2}):(\d{2})|Z){1})$" +) + + class ValidationProblem(Enum): DifferentVariadicNodesWithTheSameName = auto() UnitWithoutDocumentation = auto() @@ -76,6 +82,9 @@ class ValidationProblem(Enum): ReservedPrefixInWrongContext = auto() InvalidNexusTypeForNamedConcept = auto() KeysWithAndWithoutConcept = auto() + InvalidCompressionStrength = auto() + CompressionStrengthZero = auto() + # DoNotCompressStringsBoolean = auto() class Collector: @@ -200,6 +209,28 @@ def _log(self, path: str, log_type: ValidationProblem, value: Optional[Any], *ar logger.warning( f"The key '{path}' uses the valid concept name '{args[0]}', but there is another valid key {value} that uses the non-variadic name of the node.'" ) + elif log_type == ValidationProblem.CompressionStrengthZero: + value = cast(dict, value) + logger.info( + f"Compression strength for {path} is 0. The value '{value['compress']}' will be written uncompressed." + ) + elif log_type == ValidationProblem.InvalidCompressionStrength: + value = cast(dict, value) + logger.warning( + f"Compression strength for {path} = {value} should be between 0 and 9." + ) + # elif log_type == ValidationProblem.DoNotCompressStringsBoolean: + # value = cast(dict, value) + # dtype = type(value["compress"]).__name__ + # dtype_map = { + # "str": "string", + # "bool": "boolean", + # } + # dtype_str = dtype_map.get(dtype, dtype) + + # logger.info( + # f"Compression for {path} = {value} should not be used for {dtype_str} values." + # ) def collect_and_log( self, @@ -222,6 +253,7 @@ def collect_and_log( if log_type not in ( ValidationProblem.UnitWithoutDocumentation, ValidationProblem.OpenEnumWithNewItem, + ValidationProblem.CompressionStrengthZero, ): self.data.add(path + str(log_type) + str(value)) @@ -723,6 +755,8 @@ def convert_int_to_float(value): return {convert_int_to_float(v) for v in value} elif isinstance(value, np.ndarray) and np.issubdtype(value.dtype, np.integer): return value.astype(float) + elif isinstance(value, np.generic) and np.issubdtype(type(value), np.integer): + return float(value) else: return value @@ -730,71 +764,85 @@ def convert_int_to_float(value): def is_valid_data_field( value: Any, nxdl_type: str, nxdl_enum: list, nxdl_enum_open: bool, path: str ) -> Any: - # todo: Check this function and write test for it. It seems the function is not - # working as expected. - """Checks whether a given value is valid according to the type defined in the NXDL. + """Checks whether a given value is valid according to the type defined in the NXDL.""" + + def validate_data_value( + value: Any, nxdl_type: str, nxdl_enum: list, nxdl_enum_open: bool, path: str + ) -> Any: + """Validate and possibly convert a primitive value according to NXDL type/enum rules.""" + accepted_types = NEXUS_TO_PYTHON_DATA_TYPES[nxdl_type] + original_value = value + + # Do not count other dicts as they represent a link value + if not isinstance(value, dict): + # Attempt type conversion + if accepted_types[0] is bool and isinstance(value, str): + try: + value = convert_str_to_bool_safe(value) + except (ValueError, TypeError): + value = original_value + elif accepted_types[0] is float: + value = convert_int_to_float(value) - This function only tries to convert boolean value in str format (e.g. "true" ) to - python Boolean (True). In case, it fails to convert, it raises an Exception. + if not is_valid_data_type(value, accepted_types): + collector.collect_and_log( + path, ValidationProblem.InvalidType, accepted_types, nxdl_type + ) - Return: - value: the possibly converted data value - """ + # Type-specific validation + if nxdl_type == "NX_POSINT" and not is_positive_int(value): + collector.collect_and_log(path, ValidationProblem.IsNotPosInt, value) + + if nxdl_type in ("ISO8601", "NX_DATE_TIME"): + results = ISO8601.search(value) + if results is None: + collector.collect_and_log( + path, ValidationProblem.InvalidDatetime, value + ) + + if nxdl_enum is not None and value not in nxdl_enum: + if nxdl_enum_open: + collector.collect_and_log( + path, ValidationProblem.OpenEnumWithNewItem, nxdl_enum + ) + else: + collector.collect_and_log( + path, ValidationProblem.InvalidEnum, nxdl_enum + ) - accepted_types = NEXUS_TO_PYTHON_DATA_TYPES[nxdl_type] + return value if isinstance(value, dict) and set(value.keys()) == {"compress", "strength"}: - value = value["compress"] + compressed_value = value["compress"] - # Do not count other dicts as they represent a link value - if not isinstance(value, dict) and not is_valid_data_type(value, accepted_types): - # try to convert string to bool - if accepted_types[0] is bool and isinstance(value, str): - try: - value = convert_str_to_bool_safe(value) - except (ValueError, TypeError): + if not (1 <= value["strength"] <= 9): + if value["strength"] == 0: collector.collect_and_log( - path, ValidationProblem.InvalidType, accepted_types, nxdl_type + path, ValidationProblem.CompressionStrengthZero, value ) - elif accepted_types[0] is float: - value = convert_int_to_float(value) - if not is_valid_data_type(value, accepted_types): + else: collector.collect_and_log( - path, ValidationProblem.InvalidType, accepted_types, nxdl_type + path, ValidationProblem.InvalidCompressionStrength, value ) - else: - collector.collect_and_log( - path, ValidationProblem.InvalidType, accepted_types, nxdl_type + # In this case, we remove the compression. + return validate_data_value( + value["compress"], nxdl_type, nxdl_enum, nxdl_enum_open, path ) - if nxdl_type == "NX_POSINT" and not is_positive_int(value): - collector.collect_and_log(path, ValidationProblem.IsNotPosInt, value) + # TODO: Do we need to issue a warning if string/bool compression is used + # # elif isinstance(compressed_value, (str, bool)): + # collector.collect_and_log( + # path, ValidationProblem.DoNotCompressStringsBoolean, value + # ) - if nxdl_type in ("ISO8601", "NX_DATE_TIME"): - iso8601 = re.compile( - r"^(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2}(?:" - r"\.\d*)?)(((?!-00:00)(\+|-)(\d{2}):(\d{2})|Z){1})$" + # Apply standard validation to compressed value + value["compress"] = validate_data_value( + compressed_value, nxdl_type, nxdl_enum, nxdl_enum_open, path ) - results = iso8601.search(value) - if results is None: - collector.collect_and_log(path, ValidationProblem.InvalidDatetime, value) - - # Check enumeration - if nxdl_enum is not None and value not in nxdl_enum: - if nxdl_enum_open: - collector.collect_and_log( - path, - ValidationProblem.OpenEnumWithNewItem, - nxdl_enum, - ) - else: - collector.collect_and_log( - path, - ValidationProblem.InvalidEnum, - nxdl_enum, - ) - return value + return value + + return validate_data_value(value, nxdl_type, nxdl_enum, nxdl_enum_open, path) @cache diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index 9e4f20d43..fc0fef42f 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -52,6 +52,24 @@ def set_whole_group_to_none( return internal_dict +def compress_paths_in_dict(data_dict: Template, paths=list[str]): + """For each path, compress the value in data_dict using a strength of 3.""" + types = { + "int": np.int64, + "float": np.float32, + } + if data_dict is not None: + internal_dict = Template(data_dict) + for path in paths: + if (value := internal_dict.get(path)) is not None: + if np_type := types.get(type(value).__name__): + value = np_type(value) + internal_dict[path] = {"compress": value, "strength": 3} + return internal_dict + + return None + + def remove_from_dict(data_dict: Template, key: str, optionality: str = "optional"): """Helper function to remove a key from dict""" if data_dict is not None and key in data_dict[optionality]: @@ -1694,27 +1712,63 @@ def listify_template(data_dict: Template): id="reserved-prefix", ), pytest.param( - alter_dict( + compress_paths_in_dict( TEMPLATE, - "/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value", - {"compress": np.float32(2.0), "strength": 1}, + [ + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value" + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/number_value", + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value", + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value", + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/posint_value", + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value", + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/date_value", + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/type", + ], ), [], - id="appdef-compressed-payload", + id="appdef-compressed", + ), + pytest.param( + alter_dict( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value", + {"compress": np.int64(2.0), "strength": 1}, + ), + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value", + {"compress": np.float32(2.0), "strength": 1}, + ), + [ + "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value " + "should be one of the following Python types: " + "(, ), as defined in the " + "NXDL as NX_INT." + ], + id="appdef-compressed-wrong-type", + ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value", + {"compress": np.int64(2), "strength": 11}, + ), + [ + "Compression strength for /ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value = " + "{'compress': 2, 'strength': 11} should be between 0 and 9.", + ], + id="appdef-compressed-invalid-strength", ), pytest.param( alter_dict( TEMPLATE, "/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value", - {"compress": np.int64(2.0), "strength": 1}, + {"compress": np.float32(2.0), "strength": 0}, ), [ - "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value " - "should be one of the following Python types: " - "(, ), as defined in the " - "NXDL as NX_FLOAT." + "Compression strength for /ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value " + "is 0. The value '2.0' will be written uncompressed.", ], - id="appdef-compressed-payload-wrong-type", + id="appdef-compressed-strength-0", ), pytest.param( alter_dict( @@ -1723,7 +1777,7 @@ def listify_template(data_dict: Template): {"compress": np.int64(2), "strength": 1}, ), [], - id="baseclass-compressed-payload", + id="baseclass-compressed", ), pytest.param( alter_dict( @@ -1737,7 +1791,7 @@ def listify_template(data_dict: Template): "(, ), as defined in the " "NXDL as NX_INT." ], - id="baseclass-compressed-payload-wrong-type", + id="baseclass-compressed-wrong-type", ), ], ) @@ -1760,6 +1814,7 @@ def format_error_message(msg: str) -> str: "baseclass-field-with-illegal-unit", "open-enum-with-new-item", "baseclass-open-enum-with-new-item", + "appdef-compressed-strength-0", ): with caplog.at_level(logging.INFO): assert validate_dict_against("NXtest", data_dict)