4646logger = logging .getLogger ("pynxtools" )
4747
4848
49+ ISO8601 = re .compile (
50+ r"^(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2}(?:"
51+ r"\.\d*)?)(((?!-00:00)(\+|-)(\d{2}):(\d{2})|Z){1})$"
52+ )
53+
54+
4955class ValidationProblem (Enum ):
5056 DifferentVariadicNodesWithTheSameName = auto ()
5157 UnitWithoutDocumentation = auto ()
@@ -76,6 +82,9 @@ class ValidationProblem(Enum):
7682 ReservedPrefixInWrongContext = auto ()
7783 InvalidNexusTypeForNamedConcept = auto ()
7884 KeysWithAndWithoutConcept = auto ()
85+ InvalidCompressionStrength = auto ()
86+ CompressionStrengthZero = auto ()
87+ # DoNotCompressStringsBoolean = auto()
7988
8089
8190class Collector :
@@ -200,6 +209,28 @@ def _log(self, path: str, log_type: ValidationProblem, value: Optional[Any], *ar
200209 logger .warning (
201210 f"The key '{ path } ' uses the valid concept name '{ args [0 ]} ', but there is another valid key { value } that uses the non-variadic name of the node.'"
202211 )
212+ elif log_type == ValidationProblem .CompressionStrengthZero :
213+ value = cast (dict , value )
214+ logger .info (
215+ f"Compression strength for { path } is 0. The value '{ value ['compress' ]} ' will be written uncompressed."
216+ )
217+ elif log_type == ValidationProblem .InvalidCompressionStrength :
218+ value = cast (dict , value )
219+ logger .warning (
220+ f"Compression strength for { path } = { value } should be between 0 and 9."
221+ )
222+ # elif log_type == ValidationProblem.DoNotCompressStringsBoolean:
223+ # value = cast(dict, value)
224+ # dtype = type(value["compress"]).__name__
225+ # dtype_map = {
226+ # "str": "string",
227+ # "bool": "boolean",
228+ # }
229+ # dtype_str = dtype_map.get(dtype, dtype)
230+
231+ # logger.info(
232+ # f"Compression for {path} = {value} should not be used for {dtype_str} values."
233+ # )
203234
204235 def collect_and_log (
205236 self ,
@@ -222,6 +253,7 @@ def collect_and_log(
222253 if log_type not in (
223254 ValidationProblem .UnitWithoutDocumentation ,
224255 ValidationProblem .OpenEnumWithNewItem ,
256+ ValidationProblem .CompressionStrengthZero ,
225257 ):
226258 self .data .add (path + str (log_type ) + str (value ))
227259
@@ -723,78 +755,94 @@ def convert_int_to_float(value):
723755 return {convert_int_to_float (v ) for v in value }
724756 elif isinstance (value , np .ndarray ) and np .issubdtype (value .dtype , np .integer ):
725757 return value .astype (float )
758+ elif isinstance (value , np .generic ) and np .issubdtype (type (value ), np .integer ):
759+ return float (value )
726760 else :
727761 return value
728762
729763
730764def is_valid_data_field (
731765 value : Any , nxdl_type : str , nxdl_enum : list , nxdl_enum_open : bool , path : str
732766) -> Any :
733- # todo: Check this function and write test for it. It seems the function is not
734- # working as expected.
735- """Checks whether a given value is valid according to the type defined in the NXDL.
767+ """Checks whether a given value is valid according to the type defined in the NXDL."""
768+
769+ def validate_data_value (
770+ value : Any , nxdl_type : str , nxdl_enum : list , nxdl_enum_open : bool , path : str
771+ ) -> Any :
772+ """Validate and possibly convert a primitive value according to NXDL type/enum rules."""
773+ accepted_types = NEXUS_TO_PYTHON_DATA_TYPES [nxdl_type ]
774+ original_value = value
775+
776+ # Do not count other dicts as they represent a link value
777+ if not isinstance (value , dict ):
778+ # Attempt type conversion
779+ if accepted_types [0 ] is bool and isinstance (value , str ):
780+ try :
781+ value = convert_str_to_bool_safe (value )
782+ except (ValueError , TypeError ):
783+ value = original_value
784+ elif accepted_types [0 ] is float :
785+ value = convert_int_to_float (value )
736786
737- This function only tries to convert boolean value in str format (e.g. "true" ) to
738- python Boolean (True). In case, it fails to convert, it raises an Exception.
787+ if not is_valid_data_type (value , accepted_types ):
788+ collector .collect_and_log (
789+ path , ValidationProblem .InvalidType , accepted_types , nxdl_type
790+ )
739791
740- Return:
741- value: the possibly converted data value
742- """
792+ # Type-specific validation
793+ if nxdl_type == "NX_POSINT" and not is_positive_int (value ):
794+ collector .collect_and_log (path , ValidationProblem .IsNotPosInt , value )
795+
796+ if nxdl_type in ("ISO8601" , "NX_DATE_TIME" ):
797+ results = ISO8601 .search (value )
798+ if results is None :
799+ collector .collect_and_log (
800+ path , ValidationProblem .InvalidDatetime , value
801+ )
802+
803+ if nxdl_enum is not None and value not in nxdl_enum :
804+ if nxdl_enum_open :
805+ collector .collect_and_log (
806+ path , ValidationProblem .OpenEnumWithNewItem , nxdl_enum
807+ )
808+ else :
809+ collector .collect_and_log (
810+ path , ValidationProblem .InvalidEnum , nxdl_enum
811+ )
743812
744- accepted_types = NEXUS_TO_PYTHON_DATA_TYPES [ nxdl_type ]
813+ return value
745814
746815 if isinstance (value , dict ) and set (value .keys ()) == {"compress" , "strength" }:
747- value = value ["compress" ]
816+ compressed_value = value ["compress" ]
748817
749- # Do not count other dicts as they represent a link value
750- if not isinstance (value , dict ) and not is_valid_data_type (value , accepted_types ):
751- # try to convert string to bool
752- if accepted_types [0 ] is bool and isinstance (value , str ):
753- try :
754- value = convert_str_to_bool_safe (value )
755- except (ValueError , TypeError ):
818+ if not (1 <= value ["strength" ] <= 9 ):
819+ if value ["strength" ] == 0 :
756820 collector .collect_and_log (
757- path , ValidationProblem .InvalidType , accepted_types , nxdl_type
821+ path , ValidationProblem .CompressionStrengthZero , value
758822 )
759- elif accepted_types [0 ] is float :
760- value = convert_int_to_float (value )
761- if not is_valid_data_type (value , accepted_types ):
823+ else :
762824 collector .collect_and_log (
763- path , ValidationProblem .InvalidType , accepted_types , nxdl_type
825+ path , ValidationProblem .InvalidCompressionStrength , value
764826 )
765- else :
766- collector . collect_and_log (
767- path , ValidationProblem . InvalidType , accepted_types , nxdl_type
827+ # In this case, we remove the compression.
828+ return validate_data_value (
829+ value [ "compress" ], nxdl_type , nxdl_enum , nxdl_enum_open , path
768830 )
769831
770- if nxdl_type == "NX_POSINT" and not is_positive_int (value ):
771- collector .collect_and_log (path , ValidationProblem .IsNotPosInt , value )
832+ # TODO: Do we need to issue a warning if string/bool compression is used
833+ # # elif isinstance(compressed_value, (str, bool)):
834+ # collector.collect_and_log(
835+ # path, ValidationProblem.DoNotCompressStringsBoolean, value
836+ # )
772837
773- if nxdl_type in ("ISO8601" , "NX_DATE_TIME" ):
774- iso8601 = re .compile (
775- r"^(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2}(?:"
776- r"\.\d*)?)(((?!-00:00)(\+|-)(\d{2}):(\d{2})|Z){1})$"
838+ # Apply standard validation to compressed value
839+ value ["compress" ] = validate_data_value (
840+ compressed_value , nxdl_type , nxdl_enum , nxdl_enum_open , path
777841 )
778- results = iso8601 .search (value )
779- if results is None :
780- collector .collect_and_log (path , ValidationProblem .InvalidDatetime , value )
781-
782- # Check enumeration
783- if nxdl_enum is not None and value not in nxdl_enum :
784- if nxdl_enum_open :
785- collector .collect_and_log (
786- path ,
787- ValidationProblem .OpenEnumWithNewItem ,
788- nxdl_enum ,
789- )
790- else :
791- collector .collect_and_log (
792- path ,
793- ValidationProblem .InvalidEnum ,
794- nxdl_enum ,
795- )
796842
797- return value
843+ return value
844+
845+ return validate_data_value (value , nxdl_type , nxdl_enum , nxdl_enum_open , path )
798846
799847
800848@cache
0 commit comments