Skip to content

Commit 1d5e94b

Browse files
committed
ignore groups without NX_class attribute (and fields/attributes) within
1 parent 0bbbe5b commit 1d5e94b

File tree

4 files changed

+281
-94
lines changed

4 files changed

+281
-94
lines changed

src/pynxtools/data/NXtest.nxdl.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,14 +45,14 @@
4545
</group>
4646
<group type="NXdata" name="specified_group" nameType="specified">
4747
<doc>A group with a name and nameType="specified".</doc>
48-
<field name="specified_field" nameType="specified" type="NX_FLOAT" units="NX_ANY">
48+
<field name="specified_field" nameType="specified" optional="true" type="NX_FLOAT" units="NX_ANY">
4949
<attribute name="specified_attr_in_field" nameType="specified"/>
5050
</field>
5151
<attribute name="specified_attr"/>
5252
</group>
5353
<group type="NXdata" name="any_groupGROUP" nameType="any">
5454
<doc>A group with a name and nameType="any".</doc>
55-
<field name="any_fieldFIELD" nameType="any" optional="true" type="NX_FLOAT" units="NX_ANY">
55+
<field name="any_fieldFIELD" nameType="any" type="NX_FLOAT" units="NX_ANY">
5656
<attribute name="any_attrATTR_in_field" nameType="any"/>
5757
</field>
5858
<attribute name="any_attrATTR" nameType="any"/>

src/pynxtools/dataconverter/helpers.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ class ValidationProblem(Enum):
8787
KeysWithAndWithoutConcept = auto()
8888
InvalidCompressionStrength = auto()
8989
CompressionStrengthZero = auto()
90+
MissingNXclass = auto()
9091

9192

9293
class Collector:
@@ -231,6 +232,10 @@ def _log(self, path: str, log_type: ValidationProblem, value: Optional[Any], *ar
231232
logger.warning(
232233
f"Compression strength for {path} = {value} should be between 0 and 9."
233234
)
235+
elif log_type == ValidationProblem.MissingNXclass:
236+
logger.info(
237+
f"Group '{path}' does not have an NX_class attribute and will therefore not be validated."
238+
)
234239

235240
def collect_and_log(
236241
self,
@@ -255,6 +260,7 @@ def collect_and_log(
255260
ValidationProblem.UnitWithoutDocumentation,
256261
ValidationProblem.OpenEnumWithNewItem,
257262
ValidationProblem.CompressionStrengthZero,
263+
ValidationProblem.MissingNXclass,
258264
):
259265
if self.logging and message not in self.data["info"]:
260266
self._log(path, log_type, value, *args, **kwargs)
@@ -710,8 +716,7 @@ def is_value_valid_element_of_enum(value, elist) -> tuple[bool, list]:
710716

711717
def is_valid_data_type(value: Any, accepted_types: Sequence) -> bool:
712718
"""Checks whether the given value or its children are of an accepted type."""
713-
if isinstance(value, tuple) and len(value) == 1:
714-
value = value[0]
719+
715720
if not isinstance(value, np.ndarray):
716721
value = np.array(value)
717722
# Handle 'object' dtype separately (for lists from HDF5 files)

src/pynxtools/dataconverter/validation.py

Lines changed: 46 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -301,7 +301,7 @@ def find_node_for(
301301
current.search_add_child_for(child)
302302
for child in current.get_all_direct_children_names(
303303
node_type=other_node_type,
304-
# nx_class=nx_class,
304+
nx_class=nx_class,
305305
)
306306
]
307307
other_node = best_namefit_of(last_elem, children_to_check)
@@ -327,7 +327,6 @@ def find_node_for(
327327
# )
328328
# raise TypeError("Expected field for {path}")
329329
# elif node_type == "field" and other_node_type == "group":
330-
# print(other_node)
331330
# collector.collect_and_log(
332331
# f"{entry_name}/{path}",
333332
# ValidationProblem.ExpectedGroup,
@@ -596,6 +595,32 @@ def check_reserved_prefix(
596595

597596
return
598597

598+
def has_breakpoint(key_path: str) -> bool:
599+
"""
600+
Walk up the path hierarchy and check if a parent is an NXcollection
601+
or has no NX_class, indicating we should stop.
602+
603+
For attributes of datasets, skip the dataset itself and continue with
604+
its parent group.
605+
606+
Args:
607+
path (str): HDF5 path to start from (no @attr suffix).
608+
609+
Returns:
610+
bool: True if a breakpoint was found, False otherwise.
611+
"""
612+
while "/" in key_path:
613+
key_path = key_path.rsplit("/", 1)[0]
614+
parent_data = data.get(key_path)
615+
if isinstance(parent_data, h5py.Dataset):
616+
continue
617+
nx_class = (
618+
parent_data.attrs.get("NX_class") if parent_data is not None else None
619+
)
620+
if nx_class == "NXcollection" or nx_class is None:
621+
return True
622+
return False
623+
599624
def handle_group(path: str, group: h5py.Group):
600625
"""
601626
Handle validation logic for HDF5 groups.
@@ -610,6 +635,10 @@ def handle_group(path: str, group: h5py.Group):
610635

611636
if not group.attrs.get("NX_class"):
612637
# We ignore additional groups that don't have an NX_class
638+
if not ignore_undocumented and full_path == group.name:
639+
collector.collect_and_log(
640+
full_path, ValidationProblem.MissingNXclass, None
641+
)
613642
return
614643

615644
try:
@@ -730,27 +759,21 @@ def handle_field(
730759
if it is an AXISNAME or a DATA.
731760
"""
732761
full_path = f"{entry_name}/{path}"
762+
key_path = path.replace("@", "")
763+
parent_node = None
764+
765+
if has_breakpoint(key_path):
766+
# We are inside an NXcollection or a group without NX_class.
767+
return
768+
733769
check_reserved_prefix(full_path, appdef_node.name, "field")
770+
734771
try:
735772
node = find_node_for(path, node_type="field", hint=hint)
736773
except TypeError:
737774
return
738775

739776
if node is None:
740-
key_path = path.replace("@", "")
741-
parent_node = None
742-
while "/" in key_path:
743-
key_path = key_path.rsplit("/", 1)[0] # Remove last segment
744-
parent_data = data.get(key_path)
745-
nx_class = (
746-
parent_data.attrs.get("NX_class")
747-
if parent_data is not None
748-
else None
749-
)
750-
if nx_class == "NXcollection":
751-
# Collection found for parents, mark as documented
752-
return
753-
754777
# Only report undocumented if the group is not linked
755778
if not ignore_undocumented and full_path == dataset.name:
756779
collector.collect_and_log(
@@ -831,31 +854,20 @@ def handle_attributes(
831854
# Ignore special attrs
832855
continue
833856

857+
key_path = f"{path}/{attr_name}"
858+
parent_node = None
859+
860+
if has_breakpoint(key_path):
861+
# We are inside an NXcollection or a group without NX_class.
862+
continue # This continues the outer attr_name loop
863+
834864
check_reserved_prefix(attr_name, appdef_node.name, "attribute")
835865

836866
try:
837867
node = find_node_for(f"{path}/{attr_name}", node_type="attribute")
838868
except TypeError:
839869
return
840870

841-
key_path = f"{path}/{attr_name}"
842-
parent_node = None
843-
found_collection = False
844-
while "/" in key_path:
845-
key_path = key_path.rsplit("/", 1)[0] # Remove last segment
846-
parent_data = data.get(key_path)
847-
nx_class = (
848-
parent_data.attrs.get("NX_class")
849-
if parent_data is not None
850-
else None
851-
)
852-
if nx_class == "NXcollection":
853-
# Collection found for parents, mark as documented
854-
found_collection = True
855-
break
856-
if found_collection:
857-
continue # This continues the outer attr_name loop
858-
859871
if node is None:
860872
# Only report undocumented if the parent object is not linked
861873
if not ignore_undocumented and full_path.startswith(parent_obj.name):

0 commit comments

Comments
 (0)