Skip to content

Commit 56d13c1

Browse files
authored
Validation fixes (#350)
* Fix adding nodes from naming convention * Pin numpy<2.0.0 * Fix pyproject * Namefit named groups in their parent concepts * Add `is_a` and `parent_of` relationships * Use parent_of concept in validation * Fix recursion errors and tests * Add docstrings and fixes recursion problem * fix: use findall instead of xpath
1 parent 70d74b3 commit 56d13c1

File tree

5 files changed

+236
-75
lines changed

5 files changed

+236
-75
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ dependencies = [
2828
"h5py>=3.6.0",
2929
"xarray>=0.20.2",
3030
"PyYAML>=6.0",
31-
"numpy>=1.21.2",
31+
"numpy>=1.21.2,<2.0.0",
3232
"pandas>=1.3.2",
3333
"ase>=3.19.0",
3434
"mergedeep",

src/pynxtools/dataconverter/nexus_tree.py

Lines changed: 166 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
It also allows for adding further nodes from the inheritance chain on the fly.
2929
"""
3030

31+
from functools import reduce
3132
from typing import Any, List, Literal, Optional, Set, Tuple, Union
3233

3334
import lxml.etree as ET
@@ -41,6 +42,7 @@
4142
is_appdef,
4243
remove_namespace_from_tag,
4344
)
45+
from pynxtools.definitions.dev_tools.utils.nxdl_utils import get_nx_namefit
4446

4547
NexusType = Literal[
4648
"NX_BINARY",
@@ -139,15 +141,28 @@ class NexusNode(NodeMixin):
139141
optionality: Literal["required", "recommended", "optional"] = "required"
140142
variadic: bool = False
141143
inheritance: List[ET._Element]
144+
is_a: List["NexusNode"]
145+
parent_of: List["NexusNode"]
142146

143147
def _set_optionality(self):
148+
"""
149+
Sets the optionality of the current node
150+
if `recommended`, `required` or `optional` is set.
151+
Also sets the field to optional if `maxOccurs == 0` or to required
152+
if `maxOccurs > 0`.
153+
"""
144154
if not self.inheritance:
145155
return
146156
if self.inheritance[0].attrib.get("recommended"):
147157
self.optionality = "recommended"
148-
elif (
149-
self.inheritance[0].attrib.get("optional")
150-
or self.inheritance[0].attrib.get("minOccurs") == "0"
158+
elif self.inheritance[0].attrib.get("required") or (
159+
isinstance(self, NexusGroup)
160+
and self.occurrence_limits[0] is not None
161+
and self.occurrence_limits[0] > 0
162+
):
163+
self.optionality = "required"
164+
elif self.inheritance[0].attrib.get("optional") or (
165+
isinstance(self, NexusGroup) and self.occurrence_limits[0] == 0
151166
):
152167
self.optionality = "optional"
153168

@@ -172,8 +187,13 @@ def __init__(
172187
else:
173188
self.inheritance = []
174189
self.parent = parent
190+
self.is_a = []
191+
self.parent_of = []
175192

176193
def _construct_inheritance_chain_from_parent(self):
194+
"""
195+
Builds the inheritance chain of the current node based on the parent node.
196+
"""
177197
if self.parent is None:
178198
return
179199
for xml_elem in self.parent.inheritance:
@@ -221,18 +241,33 @@ def search_child_with_name(
221241
direct_child = next((x for x in self.children if x.name == name), None)
222242
if direct_child is not None:
223243
return direct_child
224-
if name in self.get_all_children_names():
244+
if name in self.get_all_direct_children_names():
225245
return self.add_inherited_node(name)
226246
return None
227247

228-
def get_all_children_names(
229-
self, depth: Optional[int] = None, only_appdef: bool = False
248+
def get_all_direct_children_names(
249+
self,
250+
node_type: Optional[str] = None,
251+
nx_class: Optional[str] = None,
252+
depth: Optional[int] = None,
253+
only_appdef: bool = False,
230254
) -> Set[str]:
231255
"""
232256
Get all children names of the current node up to a certain depth.
233257
Only `field`, `group` `choice` or `attribute` are considered as children.
234258
235259
Args:
260+
node_type (Optional[str], optional):
261+
The tags of the children to consider.
262+
This should either be "field", "group", "choice" or "attribute".
263+
If None all tags are considered.
264+
Defaults to None.
265+
nx_class (Optional[str], optional):
266+
The NeXus class of the group to consider.
267+
This is only used if `node_type` is "group".
268+
It should contain the preceding `NX` and the class name in lowercase,
269+
e.g., "NXentry".
270+
Defaults to None.
236271
depth (Optional[int], optional):
237272
The inheritance depth up to which get children names.
238273
`depth=1` will return only the children of the current node.
@@ -251,18 +286,24 @@ def get_all_children_names(
251286
if depth is not None and (not isinstance(depth, int) or depth < 0):
252287
raise ValueError("Depth must be a positive integer or None")
253288

289+
tag_type = ""
290+
if node_type == "group" and nx_class is not None:
291+
tag_type = f"[@type='{nx_class}']"
292+
293+
if node_type is not None:
294+
search_tags = f"nx:{node_type}{tag_type}"
295+
else:
296+
search_tags = (
297+
"*[self::nx:field or self::nx:group "
298+
"or self::nx:attribute or self::nx:choice]"
299+
)
300+
254301
names = set()
255302
for elem in self.inheritance[:depth]:
256303
if only_appdef and not is_appdef(elem):
257304
break
258305

259-
for subelems in elem.xpath(
260-
(
261-
r"*[self::nx:field or self::nx:group "
262-
r"or self::nx:attribute or self::nx:choice]"
263-
),
264-
namespaces=namespaces,
265-
):
306+
for subelems in elem.xpath(search_tags, namespaces=namespaces):
266307
if "name" in subelems.attrib:
267308
names.add(subelems.attrib["name"])
268309
elif "type" in subelems.attrib:
@@ -351,15 +392,54 @@ def get_docstring(self, depth: Optional[int] = None) -> List[str]:
351392
return docstrings
352393

353394
def _build_inheritance_chain(self, xml_elem: ET._Element) -> List[ET._Element]:
395+
"""
396+
Builds the inheritance chain based on the given xml node and the inheritance
397+
chain of this node.
398+
399+
Args:
400+
xml_elem (ET._Element): The xml element to build the inheritance chain for.
401+
402+
Returns:
403+
List[ET._Element]:
404+
The list of xml nodes representing the inheritance chain.
405+
This represents the direct field or group inside the specific xml file.
406+
"""
354407
name = xml_elem.attrib.get("name")
355408
inheritance_chain = [xml_elem]
356409
for elem in self.inheritance:
357410
inherited_elem = elem.xpath(
358411
f"nx:group[@type='{xml_elem.attrib['type']}' and @name='{name}']"
359412
if name is not None
360-
else f"nx:group[@type='{xml_elem.attrib['type']}']",
413+
else f"nx:group[@type='{xml_elem.attrib['type']}' and not(@name)]",
361414
namespaces=namespaces,
362415
)
416+
if not inherited_elem and name is not None:
417+
# Try to namefit
418+
groups = elem.findall(
419+
f"nx:group[@type='{xml_elem.attrib['type']}']",
420+
namespaces=namespaces,
421+
)
422+
best_group = None
423+
best_score = -1
424+
for group in groups:
425+
if name in group.attrib and not contains_uppercase(
426+
group.attrib["name"]
427+
):
428+
continue
429+
group_name = (
430+
group.attrib.get("name")
431+
if "name" in group.attrib
432+
else group.attrib["type"][2:].upper()
433+
)
434+
435+
score = get_nx_namefit(name, group_name)
436+
if get_nx_namefit(name, group_name) >= best_score:
437+
best_group = group
438+
best_score = score
439+
440+
if best_group is not None:
441+
inherited_elem = [best_group]
442+
363443
if inherited_elem and inherited_elem[0] not in inheritance_chain:
364444
inheritance_chain.append(inherited_elem[0])
365445
bc_xml_root, _ = get_nxdl_root_and_path(xml_elem.attrib["type"])
@@ -432,18 +512,19 @@ def add_inherited_node(self, name: str) -> Optional["NexusNode"]:
432512
"""
433513
for elem in self.inheritance:
434514
xml_elem = elem.xpath(
435-
f"*[self::nx:field or self::nx:group or self::nx:attribute][@name='{name}']",
515+
"*[self::nx:field or self::nx:group or"
516+
f" self::nx:attribute or self::nx:choice][@name='{name}']",
436517
namespaces=namespaces,
437518
)
438519
if not xml_elem:
439520
# Find group by naming convention
440521
xml_elem = elem.xpath(
441-
f"*[self::nx:group][@type='NX{name.lower()}']",
522+
"*[self::nx:group or self::nx:choice]"
523+
f"[@type='NX{name.lower()}' and not(@name)]",
442524
namespaces=namespaces,
443525
)
444526
if xml_elem:
445-
new_node = self.add_node_from(xml_elem[0])
446-
return new_node
527+
return self.add_node_from(xml_elem[0])
447528
return None
448529

449530

@@ -462,7 +543,7 @@ class NexusChoice(NexusNode):
462543
type: Literal["choice"] = "choice"
463544

464545
def __init__(self, **data) -> None:
465-
super().__init__(**data)
546+
super().__init__(type=self.type, **data)
466547
self._construct_inheritance_chain_from_parent()
467548
self._set_optionality()
468549

@@ -489,7 +570,54 @@ class NexusGroup(NexusNode):
489570
Optional[int],
490571
] = (None, None)
491572

573+
def _check_sibling_namefit(self):
574+
"""
575+
Namefits siblings at the current tree level if they are not part of the same
576+
appdef or base class.
577+
The function fills the `parent_of` property of this node and the `is_a` property
578+
of the connected nodes to represent the relation.
579+
It also adapts the optionality if enough required children are present.
580+
"""
581+
if not self.variadic:
582+
return
583+
584+
for sibling in self.parent.get_all_direct_children_names(
585+
node_type=self.type, nx_class=self.nx_class
586+
):
587+
if sibling == self.name or contains_uppercase(sibling):
588+
continue
589+
if sibling.lower() == self.name.lower():
590+
continue
591+
592+
if get_nx_namefit(sibling, self.name) >= -1:
593+
fit = self.parent.search_child_with_name(sibling)
594+
if (
595+
self.inheritance[0] != fit.inheritance[0]
596+
and self.inheritance[0] in fit.inheritance
597+
):
598+
fit.is_a.append(self)
599+
self.parent_of.append(fit)
600+
601+
min_occurs = (
602+
0 if self.occurrence_limits[0] is None else self.occurrence_limits[0]
603+
)
604+
min_occurs = 1 if self.optionality == "required" else min_occurs
605+
606+
required_children = reduce(
607+
lambda x, y: x + (1 if y.optionality == "required" else 0),
608+
self.parent_of,
609+
0,
610+
)
611+
612+
if required_children >= min_occurs:
613+
self.optionality = "optional"
614+
492615
def _set_occurence_limits(self):
616+
"""
617+
Sets the occurence limits of the current group.
618+
Searches the inheritance chain until a value is found.
619+
Otherwise, the occurence_limits are set to (None, None).
620+
"""
493621
if not self.inheritance:
494622
return
495623
xml_elem = self.inheritance[0]
@@ -511,6 +639,7 @@ def __init__(self, nx_class: str, **data) -> None:
511639
self.nx_class = nx_class
512640
self._set_occurence_limits()
513641
self._set_optionality()
642+
self._check_sibling_namefit()
514643

515644
def __repr__(self) -> str:
516645
return (
@@ -561,18 +690,31 @@ class NexusEntity(NexusNode):
561690
shape: Optional[Tuple[Optional[int], ...]] = None
562691

563692
def _set_type(self):
693+
"""
694+
Sets the dtype of the current entity based on the values in the inheritance chain.
695+
The first vale found is used.
696+
"""
564697
for elem in self.inheritance:
565698
if "type" in elem.attrib:
566699
self.dtype = elem.attrib["type"]
567700
return
568701

569702
def _set_unit(self):
703+
"""
704+
Sets the unit of the current entity based on the values in the inheritance chain.
705+
The first vale found is used.
706+
"""
570707
for elem in self.inheritance:
571708
if "units" in elem.attrib:
572709
self.unit = elem.attrib["units"]
573710
return
574711

575712
def _set_items(self):
713+
"""
714+
Sets the enumeration items of the current entity
715+
based on the values in the inheritance chain.
716+
The first vale found is used.
717+
"""
576718
if not self.dtype == "NX_CHAR":
577719
return
578720
for elem in self.inheritance:
@@ -584,6 +726,10 @@ def _set_items(self):
584726
return
585727

586728
def _set_shape(self):
729+
"""
730+
Sets the shape of the current entity based on the values in the inheritance chain.
731+
The first vale found is used.
732+
"""
587733
for elem in self.inheritance:
588734
dimension = elem.find(f"nx:dimensions", namespaces=namespaces)
589735
if dimension is not None:
@@ -638,7 +784,7 @@ def populate_tree_from_parents(node: NexusNode):
638784
node (NexusNode):
639785
The current node from which to populate the tree.
640786
"""
641-
for child in node.get_all_children_names(only_appdef=True):
787+
for child in node.get_all_direct_children_names(only_appdef=True):
642788
child_node = node.search_child_with_name(child)
643789
populate_tree_from_parents(child_node)
644790

0 commit comments

Comments
 (0)