Skip to content

Commit 769bfd8

Browse files
committed
use a unified function for checking reserved suffixes
1 parent e149934 commit 769bfd8

File tree

2 files changed

+69
-100
lines changed

2 files changed

+69
-100
lines changed

src/pynxtools/dataconverter/helpers.py

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
import logging
2222
import os
2323
import re
24-
from collections.abc import Sequence
24+
from collections.abc import Mapping, Sequence
2525
from datetime import datetime, timezone
2626
from enum import Enum, auto
2727
from functools import cache, lru_cache
@@ -892,6 +892,70 @@ def validate_data_value(
892892
return validate_data_value(value, nxdl_type, nxdl_enum, nxdl_enum_open, path)
893893

894894

895+
def split_class_and_name_of(name: str) -> tuple[Optional[str], str]:
896+
"""
897+
Return the class and the name of a data dict entry of the form
898+
`split_class_and_name_of("ENTRY[entry]")`, which will return `("ENTRY", "entry")`.
899+
If this is a simple string it will just return this string, i.e.
900+
`split_class_and_name_of("entry")` will return `None, "entry"`.
901+
902+
Args:
903+
name (str): The data dict entry
904+
905+
Returns:
906+
tuple[Optional[str], str]:
907+
First element is the class name of the entry, second element is the name.
908+
The class name will be None if it is not present.
909+
"""
910+
name_match = re.search(r"([^\[]+)\[([^\]]+)\](\@.*)?", name)
911+
if name_match is None:
912+
return None, name
913+
914+
prefix = name_match.group(3)
915+
return name_match.group(
916+
1
917+
), f"{name_match.group(2)}{'' if prefix is None else prefix}"
918+
919+
920+
def check_reserved_suffix(
921+
path: str,
922+
mapping: Mapping[str, Any],
923+
) -> None:
924+
"""
925+
Check if an associated field exists for a key with a reserved suffix.
926+
927+
Reserved suffixes imply the presence of an associated base field (e.g.,
928+
"temperature_errors" implies "temperature" must exist in the mapping).
929+
930+
Parameters
931+
----------
932+
path : str
933+
The full path in the HDF5 file (e.g., "/entry1/sample/temperature_errors").
934+
mapping : Mapping[str, Any]
935+
A mapping of sibling names (keys) to values/datasets.
936+
"""
937+
parent_path, name = path.rsplit("/", 1)
938+
concept_name, instance_name = split_class_and_name_of(name)
939+
940+
for suffix in RESERVED_SUFFIXES:
941+
if instance_name.endswith(suffix):
942+
associated_field = instance_name.rsplit(suffix, 1)[0]
943+
if associated_field not in mapping:
944+
if not any(
945+
k.startswith(parent_path)
946+
and (k.endswith((associated_field, f"[{associated_field}]")))
947+
for k in mapping
948+
):
949+
collector.collect_and_log(
950+
path,
951+
ValidationProblem.ReservedSuffixWithoutField,
952+
associated_field,
953+
suffix,
954+
)
955+
return
956+
break # Found suffix, and it passed
957+
958+
895959
def check_reserved_prefix(
896960
path: str,
897961
appdef_name: str,

src/pynxtools/dataconverter/validation.py

Lines changed: 4 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -34,14 +34,15 @@
3434
from cachetools.keys import hashkey
3535

3636
from pynxtools.dataconverter.helpers import (
37-
RESERVED_SUFFIXES,
3837
Collector,
3938
ValidationProblem,
4039
check_reserved_prefix,
40+
check_reserved_suffix,
4141
clean_str_attr,
4242
collector,
4343
convert_nexus_to_caps,
4444
is_valid_data_field,
45+
split_class_and_name_of,
4546
)
4647
from pynxtools.dataconverter.nexus_tree import (
4748
NexusEntity,
@@ -110,31 +111,6 @@ def default_to_regular_dict(d):
110111
return default_to_regular_dict(data_tree)
111112

112113

113-
def split_class_and_name_of(name: str) -> tuple[Optional[str], str]:
114-
"""
115-
Return the class and the name of a data dict entry of the form
116-
`split_class_and_name_of("ENTRY[entry]")`, which will return `("ENTRY", "entry")`.
117-
If this is a simple string it will just return this string, i.e.
118-
`split_class_and_name_of("entry")` will return `None, "entry"`.
119-
120-
Args:
121-
name (str): The data dict entry
122-
123-
Returns:
124-
tuple[Optional[str], str]:
125-
First element is the class name of the entry, second element is the name.
126-
The class name will be None if it is not present.
127-
"""
128-
name_match = re.search(r"([^\[]+)\[([^\]]+)\](\@.*)?", name)
129-
if name_match is None:
130-
return None, name
131-
132-
prefix = name_match.group(3)
133-
return name_match.group(
134-
1
135-
), f"{name_match.group(2)}{'' if prefix is None else prefix}"
136-
137-
138114
def is_valid_unit_for_node(
139115
node: NexusEntity, unit: str, unit_path: str, hints: dict[str, Any]
140116
) -> None:
@@ -464,37 +440,6 @@ def _check_for_nxcollection_parent(node: NexusNode):
464440

465441
return False
466442

467-
def check_reserved_suffix(path: str, parent_data: h5py.Group):
468-
"""
469-
Check if an associated field exists for a key with a reserved suffix.
470-
471-
Reserved suffixes imply the presence of an associated base field (e.g.,
472-
"temperature_errors" implies "temperature" must exist in the mapping).
473-
474-
Args:
475-
path (str):
476-
The full path in the HDF5 file (e.g., "/entry1/sample/temperature_errors").
477-
parent_data (h5py.Group):
478-
The parent group of the field/attribute path to check.
479-
"""
480-
481-
name = path.strip("/").split("/")[-1]
482-
483-
for suffix in RESERVED_SUFFIXES:
484-
if name.endswith(suffix):
485-
associated_field = name.rsplit(suffix, 1)[0]
486-
487-
if associated_field not in parent_data:
488-
collector.collect_and_log(
489-
path,
490-
ValidationProblem.ReservedSuffixWithoutField,
491-
associated_field,
492-
suffix,
493-
)
494-
return
495-
break # We found the suffix and it passed
496-
return
497-
498443
def has_breakpoint(key_path: str) -> bool:
499444
"""
500445
Walk up the path hierarchy and check if a parent is an NXcollection
@@ -805,8 +750,7 @@ def validate(path: str, h5_obj: Union[h5py.Group, h5py.Dataset]):
805750
handle_group(path, h5_obj)
806751
elif isinstance(h5_obj, h5py.Dataset):
807752
handle_field(path, h5_obj)
808-
parent_path = path.strip("/").rsplit("/", 1)[0]
809-
check_reserved_suffix(f"{entry_name}/{path}", data[parent_path])
753+
check_reserved_suffix(f"{entry_name}/{path}", h5_obj.parent)
810754
handle_attributes(path, h5_obj.attrs, h5_obj)
811755

812756
def visititems(group: h5py.Group, path: str = "", filename: str = ""):
@@ -1336,7 +1280,7 @@ def handle_field(node: NexusNode, keys: Mapping[str, Any], prev_path: str):
13361280
variant_path,
13371281
)
13381282

1339-
check_reserved_suffix(variant_path, mapping)
1283+
check_reserved_suffix(variant_path, keys)
13401284
check_reserved_prefix(variant_path, get_definition(variant_path), "field")
13411285

13421286
# Check unit category
@@ -1801,45 +1745,6 @@ def find_instance_name_conflicts(mapping: MutableMapping[str, str]) -> None:
18011745
)
18021746
keys_to_remove.append(valid_key)
18031747

1804-
def check_reserved_suffix(key: str, mapping: MutableMapping[str, Any]):
1805-
"""
1806-
Check if an associated field exists for a key with a reserved suffix.
1807-
1808-
Reserved suffixes imply the presence of an associated base field (e.g.,
1809-
"temperature_errors" implies "temperature" must exist in the mapping).
1810-
1811-
Args:
1812-
key (str):
1813-
The full key path (e.g., "/ENTRY[entry1]/sample/temperature_errors").
1814-
mapping (MutableMapping[str, Any]):
1815-
The mapping containing the data to validate.
1816-
This should be a dict of `/` separated paths.
1817-
"""
1818-
1819-
parent_path, name = key.rsplit("/", 1)
1820-
concept_name, instance_name = split_class_and_name_of(name)
1821-
1822-
for suffix in RESERVED_SUFFIXES:
1823-
if instance_name.endswith(suffix):
1824-
associated_field = instance_name.rsplit(suffix, 1)[0]
1825-
1826-
if not any(
1827-
k.startswith(parent_path + "/")
1828-
and (
1829-
k.endswith(associated_field)
1830-
or k.endswith(f"[{associated_field}]")
1831-
)
1832-
for k in mapping
1833-
):
1834-
collector.collect_and_log(
1835-
key,
1836-
ValidationProblem.ReservedSuffixWithoutField,
1837-
associated_field,
1838-
suffix,
1839-
)
1840-
return
1841-
break # We found the suffix and it passed
1842-
18431748
def get_definition(
18441749
key: str,
18451750
) -> Optional[str]:

0 commit comments

Comments
 (0)