Skip to content

Commit da81bdf

Browse files
authored
Merge pull request #621 from FAIRmat-NFDI/sibling-inheritance
2 parents 842e778 + e2c0677 commit da81bdf

File tree

6 files changed

+298
-42
lines changed

6 files changed

+298
-42
lines changed

.github/workflows/plugin_test.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,10 @@ jobs:
3636
branch: main
3737
tests_to_run: tests/.
3838
- plugin: pynxtools-raman
39-
branch: main
39+
branch: sibling-inheritance
4040
tests_to_run: tests/.
4141
- plugin: pynxtools-spm
42-
branch: main
42+
branch: field-inheritance
4343
tests_to_run: tests/.
4444
- plugin: pynxtools-xps
4545
branch: main

src/pynxtools/data/NXtest.nxdl.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@
123123
</group>
124124
<group name="identified_calibration" type="NXcalibration" optional="true">
125125
<field name="identifier_1"/>
126+
<field name="identifier_2" optional="True"/>
126127
</group>
127128
<group name="named_collection" type="NXcollection" optional="true"/>
128129
</group>

src/pynxtools/dataconverter/helpers.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
from datetime import datetime, timezone
2525
from enum import Enum
2626
from functools import lru_cache
27-
from typing import Any, Callable, List, Optional, Tuple, Union, Sequence
27+
from typing import Any, Callable, List, Optional, Tuple, Union, Sequence, cast
2828

2929
import h5py
3030
import lxml.etree as ET
@@ -67,6 +67,7 @@ class ValidationProblem(Enum):
6767
NXdataMissingAxisData = 19
6868
NXdataAxisMismatch = 20
6969
KeyToBeRemoved = 21
70+
InvalidConceptForNonVariadic = 22
7071

7172

7273
class Collector:
@@ -150,6 +151,12 @@ def _log(self, path: str, log_type: ValidationProblem, value: Optional[Any], *ar
150151
)
151152
elif log_type == ValidationProblem.KeyToBeRemoved:
152153
logger.warning(f"The attribute {path} will not be written.")
154+
elif log_type == ValidationProblem.InvalidConceptForNonVariadic:
155+
value = cast(Any, value)
156+
log_text = f"Given {value.type} name '{path}' conflicts with the non-variadic name '{value}'"
157+
if value.type == "group":
158+
log_text += f", which should be of type {value.nx_class}."
159+
logger.warning(log_text)
153160

154161
def collect_and_log(
155162
self,

src/pynxtools/dataconverter/nexus_tree.py

Lines changed: 161 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
is_variadic,
4242
is_appdef,
4343
remove_namespace_from_tag,
44+
NEXUS_TO_PYTHON_DATA_TYPES,
4445
)
4546
from pynxtools.definitions.dev_tools.utils.nxdl_utils import (
4647
get_nx_namefit,
@@ -214,19 +215,6 @@ def __init__(
214215
self.is_a = []
215216
self.parent_of = []
216217

217-
def _construct_inheritance_chain_from_parent(self):
218-
"""
219-
Builds the inheritance chain of the current node based on the parent node.
220-
"""
221-
if self.parent is None:
222-
return
223-
for xml_elem in self.parent.inheritance:
224-
elem = xml_elem.find(
225-
f"nx:{self.type}/[@name='{self.name}']", namespaces=namespaces
226-
)
227-
if elem is not None:
228-
self.inheritance.append(elem)
229-
230218
def get_path(self) -> str:
231219
"""
232220
Gets the path of the current node based on the node name.
@@ -356,6 +344,7 @@ def get_all_direct_children_names(
356344
Returns:
357345
Set[str]: A set of children names.
358346
"""
347+
359348
if depth is not None and (not isinstance(depth, int) or depth < 0):
360349
raise ValueError("Depth must be a positive integer or None")
361350

@@ -601,6 +590,7 @@ def add_node_from(self, xml_elem: ET._Element) -> Optional["NexusNode"]:
601590
type=tag,
602591
optionality=default_optionality,
603592
nxdl_base=xml_elem.base,
593+
inheritance=[xml_elem],
604594
)
605595
elif tag == "group":
606596
name = xml_elem.attrib.get("name")
@@ -684,6 +674,19 @@ def __init__(self, **data) -> None:
684674
self._construct_inheritance_chain_from_parent()
685675
self._set_optionality()
686676

677+
def _construct_inheritance_chain_from_parent(self):
678+
"""
679+
Builds the inheritance chain of the current node based on the parent node.
680+
"""
681+
if self.parent is None:
682+
return
683+
for xml_elem in self.parent.inheritance:
684+
elem = xml_elem.find(
685+
f"nx:{self.type}/[@name='{self.name}']", namespaces=namespaces
686+
)
687+
if elem is not None:
688+
self.inheritance.append(elem)
689+
687690

688691
class NexusGroup(NexusNode):
689692
"""
@@ -864,6 +867,145 @@ class NexusEntity(NexusNode):
864867
open_enum: bool = False
865868
shape: Optional[Tuple[Optional[int], ...]] = None
866869

870+
def _check_compatibility_with(self, xml_elem: ET._Element) -> bool:
871+
"""Check compatibility of this node with an XML element from the (possible) inheritance"""
872+
873+
def _check_name_fit(xml_elem: ET._Element) -> bool:
874+
elem_name = xml_elem.attrib.get("name")
875+
name_any = is_name_type(xml_elem, "any")
876+
name_partial = is_name_type(xml_elem, "partial")
877+
878+
if get_nx_namefit(self.name, elem_name, name_any, name_partial) < 0:
879+
return False
880+
return True
881+
882+
def _check_type_fit(xml_elem: ET._Element) -> bool:
883+
elem_type = xml_elem.attrib.get("type")
884+
if elem_type:
885+
if not set(NEXUS_TO_PYTHON_DATA_TYPES[self.dtype]).issubset(
886+
NEXUS_TO_PYTHON_DATA_TYPES[elem_type]
887+
):
888+
return False
889+
return True
890+
891+
def _check_units_fit(xml_elem: ET._Element) -> bool:
892+
elem_units = xml_elem.attrib.get("units")
893+
if elem_units and elem_units != "NX_ANY":
894+
if elem_units != self.unit:
895+
if not elem_units == "NX_TRANSFORMATION" and self.unit in [
896+
"NX_LENGTH",
897+
"NX_ANGLE",
898+
"NX_UNITLESS",
899+
]:
900+
return False
901+
return True
902+
903+
def _check_enum_fit(xml_elem: ET._Element) -> bool:
904+
elem_enum = xml_elem.find(f"nx:enumeration", namespaces=namespaces)
905+
if elem_enum is not None:
906+
if self.items is None:
907+
# Case where inherited entity is enumerated, but current node isn't
908+
return False
909+
elem_enum_open = elem_enum.attrib.get("open", "false")
910+
911+
if elem_enum_open == "true":
912+
return True
913+
914+
elem_enum_items = []
915+
for items in elem_enum.findall(f"nx:item", namespaces=namespaces):
916+
value = items.attrib["value"]
917+
if value[0] == "[" and value[-1] == "]":
918+
import ast
919+
920+
try:
921+
elem_enum_items.append(ast.literal_eval(value))
922+
except (ValueError, SyntaxError):
923+
raise Exception(
924+
f"Error parsing enumeration item in the provided NXDL: {value}"
925+
)
926+
else:
927+
elem_enum_items.append(value)
928+
929+
def convert_to_hashable(item):
930+
"""Convert lists to tuples for hashable types, leave non-list items as they are."""
931+
if isinstance(item, list):
932+
return tuple(item) # Convert sublists to tuples
933+
return item # Non-list items remain as they are
934+
935+
set_items = {convert_to_hashable(sublist) for sublist in self.items}
936+
set_elem_enum_items = {
937+
convert_to_hashable(sublist) for sublist in elem_enum_items
938+
}
939+
940+
if not set(set_items).issubset(set_elem_enum_items):
941+
if self.name == "definition":
942+
pass
943+
else:
944+
# TODO: should we be this strict here? Or can appdefs define additional terms?
945+
pass
946+
return True
947+
948+
def _check_dimensions_fit(xml_elem: ET._Element) -> bool:
949+
if not self.shape:
950+
return True
951+
elem_dimensions = xml_elem.find(f"nx:dimensions", namespaces=namespaces)
952+
if elem_dimensions is not None:
953+
rank = elem_dimensions.attrib.get("rank")
954+
if rank is not None and not isinstance(rank, int):
955+
try:
956+
int(rank)
957+
except ValueError:
958+
# TODO: Handling of symbols
959+
return True
960+
elem_dim = elem_dimensions.findall("nx:dim", namespaces=namespaces)
961+
elem_dimension_rank = rank if rank is not None else len(rank)
962+
dims: List[Optional[int]] = [None] * int(rank)
963+
964+
for dim in elem_dim:
965+
idx = int(dim.attrib["index"])
966+
if value := dim.attrib.get("value", None):
967+
# If not, this is probably an old dim element with ref.
968+
try:
969+
value = int(value)
970+
dims[idx] = value
971+
except ValueError:
972+
# TODO: Handling of symbols
973+
pass
974+
elem_shape = tuple(dims)
975+
976+
if elem_shape:
977+
if elem_shape != self.shape:
978+
return False
979+
980+
return True
981+
982+
check_functions = [
983+
_check_name_fit,
984+
_check_type_fit,
985+
_check_units_fit,
986+
_check_enum_fit,
987+
# TODO: check if any inheritance is wrongfully assigned without dim checks
988+
# _check_dimensions_fit,
989+
]
990+
991+
for func in check_functions:
992+
if not func(xml_elem):
993+
return False
994+
return True
995+
996+
def _construct_inheritance_chain_from_parent(self):
997+
"""
998+
Builds the inheritance chain of the current node based on the parent node.
999+
"""
1000+
if self.parent is None:
1001+
return
1002+
for xml_elem in self.parent.inheritance:
1003+
subelems = xml_elem.findall(f"nx:{self.type}", namespaces=namespaces)
1004+
if subelems is not None:
1005+
for elem in subelems:
1006+
if self._check_compatibility_with(elem):
1007+
self.inheritance.append(elem)
1008+
8671009
def _set_type(self):
8681010
"""
8691011
Sets the dtype of the current entity based on the values in the inheritance chain.
@@ -950,7 +1092,13 @@ def _set_shape(self):
9501092

9511093
def __init__(self, **data) -> None:
9521094
super().__init__(**data)
1095+
self._set_unit()
1096+
self._set_type()
1097+
self._set_items_and_enum_type()
1098+
self._set_optionality()
1099+
self._set_shape()
9531100
self._construct_inheritance_chain_from_parent()
1101+
# Set all parameters again based on the acquired inheritance
9541102
self._set_unit()
9551103
self._set_type()
9561104
self._set_items_and_enum_type()

src/pynxtools/dataconverter/validation.py

Lines changed: 46 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,31 @@ def best_namefit_of(name: str, nodes: Iterable[NexusNode]) -> Optional[NexusNode
155155
for node in nodes:
156156
if not node.variadic:
157157
if instance_name == node.name:
158+
if concept_name and concept_name != node.name:
159+
inherited_names = [
160+
name
161+
if (name := elem.attrib.get("name")) is not None
162+
else type_attr[2:].upper()
163+
for elem in node.inheritance
164+
if (name := elem.attrib.get("name")) is not None
165+
or (type_attr := elem.attrib.get("type"))
166+
and len(type_attr) > 2
167+
]
168+
if concept_name not in inherited_names:
169+
if node.type == "group":
170+
if concept_name != node.nx_class[2:].upper():
171+
collector.collect_and_log(
172+
concept_name,
173+
ValidationProblem.InvalidConceptForNonVariadic,
174+
node,
175+
)
176+
else:
177+
collector.collect_and_log(
178+
concept_name,
179+
ValidationProblem.InvalidConceptForNonVariadic,
180+
node,
181+
)
182+
return None
158183
return node
159184
else:
160185
if concept_name and concept_name == node.name:
@@ -194,16 +219,32 @@ def validate_dict_against(
194219
"""
195220

196221
def get_variations_of(node: NexusNode, keys: Mapping[str, Any]) -> List[str]:
222+
variations = []
223+
224+
prefix = f"{'@' if node.type == 'attribute' else ''}"
197225
if not node.variadic:
198-
if f"{'@' if node.type == 'attribute' else ''}{node.name}" in keys:
199-
return [node.name]
226+
if f"{prefix}{node.name}" in keys:
227+
variations += [node.name]
200228
elif (
201229
hasattr(node, "nx_class")
202230
and f"{convert_nexus_to_caps(node.nx_class)}[{node.name}]" in keys
203231
):
204-
return [f"{convert_nexus_to_caps(node.nx_class)}[{node.name}]"]
205-
206-
variations = []
232+
variations += [f"{convert_nexus_to_caps(node.nx_class)}[{node.name}]"]
233+
234+
# Also add all variations like CONCEPT[node.name] for inherited concepts
235+
inherited_names = []
236+
for elem in node.inheritance:
237+
inherited_name = elem.attrib.get("name")
238+
if not inherited_name:
239+
inherited_name = elem.attrib.get("type")[2:].upper()
240+
if inherited_name.startswith("NX"):
241+
inherited_name = inherited_name[2:].upper()
242+
inherited_names += [inherited_name]
243+
for name in set(inherited_names):
244+
if f"{prefix}{name}[{prefix}{node.name}]" in keys:
245+
variations += [f"{prefix}{name}[{prefix}{node.name}]"]
246+
247+
return variations
207248

208249
for key in keys:
209250
concept_name, instance_name = split_class_and_name_of(key)

0 commit comments

Comments
 (0)