Skip to content

Commit c040e52

Browse files
committed
Improved get_children_names function
1 parent 4fff6a2 commit c040e52

File tree

2 files changed

+64
-29
lines changed

2 files changed

+64
-29
lines changed

pynxtools/dataconverter/nexus_tree.py

Lines changed: 31 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -204,16 +204,32 @@ def search_child_with_name(
204204
direct_child = next((x for x in self.children if x.name == name), None)
205205
if direct_child is not None:
206206
return direct_child
207-
if name in self.get_all_children_names():
207+
if name in self.get_all_direct_children_names():
208208
return self.add_inherited_node(name)
209209
return None
210210

211-
def get_all_children_names(self, depth: Optional[int] = None) -> Set[str]:
211+
def get_all_direct_children_names(
212+
self,
213+
node_type: Optional[str] = None,
214+
nx_class: Optional[str] = None,
215+
depth: Optional[int] = None,
216+
) -> Set[str]:
212217
"""
213218
Get all children names of the current node up to a certain depth.
214219
Only `field`, `group` `choice` or `attribute` are considered as children.
215220
216221
Args:
222+
node_type (Optional[str], optional):
223+
The tags of the children to consider.
224+
This should either be "field", "group", "choice" or "attribute".
225+
If None all tags are considered.
226+
Defaults to None.
227+
nx_class (Optional[str], optional):
228+
The NeXus class of the group to consider.
229+
This is only used if `node_type` is "group".
230+
It should contain the preceding `NX` and the class name in lowercase,
231+
e.g., "NXentry".
232+
Defaults to None.
217233
depth (Optional[int], optional):
218234
The inheritance depth up to which get children names.
219235
`depth=1` will return only the children of the current node.
@@ -229,15 +245,21 @@ def get_all_children_names(self, depth: Optional[int] = None) -> Set[str]:
229245
if depth is not None and (not isinstance(depth, int) or depth < 0):
230246
raise ValueError("Depth must be a positive integer or None")
231247

248+
tag_type = ""
249+
if node_type == "group" and nx_class is not None:
250+
tag_type = f"[@type='{nx_class}']"
251+
252+
if node_type is not None:
253+
search_tags = f"*[self::nx:{node_type}{tag_type}]"
254+
else:
255+
search_tags = (
256+
r"*[self::nx:field or self::nx:group "
257+
r"or self::nx:attribute or self::nx:choice]"
258+
)
259+
232260
names = set()
233261
for elem in self.inheritance[:depth]:
234-
for subelems in elem.xpath(
235-
(
236-
r"*[self::nx:field or self::nx:group "
237-
r"or self::nx:attribute or self::nx:choice]"
238-
),
239-
namespaces=namespaces,
240-
):
262+
for subelems in elem.xpath(search_tags, namespaces=namespaces):
241263
if "name" in subelems.attrib:
242264
names.add(subelems.attrib["name"])
243265
elif "type" in subelems.attrib:

pynxtools/dataconverter/validation.py

Lines changed: 33 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -44,12 +44,20 @@
4444
from pynxtools.definitions.dev_tools.utils.nxdl_utils import get_nx_namefit
4545

4646

47-
def best_namefit_of_(
48-
name: str, concepts: Set[str], nx_class: Optional[str] = None
49-
) -> str:
50-
# TODO: Find the best namefit of name in concepts
51-
# Consider nx_class if it is not None
52-
...
47+
def best_namefit_of_(name: str, concepts: Set[str]) -> str:
48+
if not concepts:
49+
return None
50+
51+
if name in concepts:
52+
return name
53+
54+
best_match, score = max(
55+
map(lambda x: (x, get_nx_namefit(name, x)), concepts), key=lambda x: x[1]
56+
)
57+
if score < 0:
58+
return None
59+
60+
return best_match
5361

5462

5563
def validate_hdf_group_against(appdef: str, data: h5py.Group):
@@ -64,9 +72,11 @@ def validate_hdf_group_against(appdef: str, data: h5py.Group):
6472
# Allow for 10000 cache entries. This should be enough for most cases
6573
@cached(
6674
cache=LRUCache(maxsize=10000),
67-
key=lambda path, _: hashkey(path),
75+
key=lambda path, *_: hashkey(path),
6876
)
69-
def find_node_for(path: str, nx_class: Optional[str] = None) -> Optional[NexusNode]:
77+
def find_node_for(
78+
path: str, node_type: Optional[str] = None, nx_class: Optional[str] = None
79+
) -> Optional[NexusNode]:
7080
if path == "":
7181
return tree
7282

@@ -75,10 +85,7 @@ def find_node_for(path: str, nx_class: Optional[str] = None) -> Optional[NexusNo
7585

7686
best_child = best_namefit_of_(
7787
last_elem,
78-
# TODO: Consider renaming `get_all_children_names` to
79-
# `get_all_direct_children_names`. Because that's what it is.
80-
node.get_all_children_names(),
81-
nx_class,
88+
node.get_all_direct_children_names(nx_class=nx_class, node_type=node_type),
8289
)
8390
if best_child is None:
8491
return None
@@ -92,15 +99,19 @@ def remove_from_req_fields(path: str):
9299
def handle_group(path: str, data: h5py.Group):
93100
node = find_node_for(path, data.attrs.get("NX_class"))
94101
if node is None:
95-
# TODO: Log undocumented
102+
collector.collect_and_log(
103+
path, ValidationProblem.MissingDocumentation, None
104+
)
96105
return
97106

98107
# TODO: Do actual group checks
99108

100109
def handle_field(path: str, data: h5py.Dataset):
101110
node = find_node_for(path)
102111
if node is None:
103-
# TODO: Log undocumented
112+
collector.collect_and_log(
113+
path, ValidationProblem.MissingDocumentation, None
114+
)
104115
return
105116
remove_from_req_fields(f"{path}")
106117

@@ -110,7 +121,9 @@ def handle_attributes(path: str, attribute_names: h5py.AttributeManager):
110121
for attr_name in attribute_names:
111122
node = find_node_for(f"{path}/{attr_name}")
112123
if node is None:
113-
# TODO: Log undocumented
124+
collector.collect_and_log(
125+
path, ValidationProblem.MissingDocumentation, None
126+
)
114127
continue
115128
remove_from_req_fields(f"{path}/@{attr_name}")
116129

@@ -282,7 +295,7 @@ def get_variations_of(node: NexusNode, keys: Mapping[str, Any]) -> List[str]:
282295
continue
283296
if (
284297
get_nx_namefit(name2fit, node.name) >= 0
285-
and key not in node.parent.get_all_children_names()
298+
and key not in node.parent.get_all_direct_children_names()
286299
):
287300
variations.append(key)
288301
if nx_name is not None and not variations:
@@ -315,7 +328,7 @@ def check_nxdata():
315328
data_node = node.search_child_with_name((signal, "DATA"))
316329
data_bc_node = node.search_child_with_name("DATA")
317330
data_node.inheritance.append(data_bc_node.inheritance[0])
318-
for child in data_node.get_all_children_names():
331+
for child in data_node.get_all_direct_children_names():
319332
data_node.search_child_with_name(child)
320333

321334
handle_field(
@@ -347,7 +360,7 @@ def check_nxdata():
347360
axis_node = node.search_child_with_name((axis, "AXISNAME"))
348361
axis_bc_node = node.search_child_with_name("AXISNAME")
349362
axis_node.inheritance.append(axis_bc_node.inheritance[0])
350-
for child in axis_node.get_all_children_names():
363+
for child in axis_node.get_all_direct_children_names():
351364
axis_node.search_child_with_name(child)
352365

353366
handle_field(
@@ -575,7 +588,7 @@ def is_documented(key: str, node: NexusNode) -> bool:
575588
return True
576589

577590
for name in key[1:].replace("@", "").split("/"):
578-
children = node.get_all_children_names()
591+
children = node.get_all_direct_children_names()
579592
best_name = best_namefit_of(name, children)
580593
if best_name is None:
581594
return False
@@ -688,7 +701,7 @@ def populate_full_tree(node: NexusNode, max_depth: Optional[int] = 5, depth: int
688701
# but it does while recursing the tree and it should
689702
# be fixed.
690703
return
691-
for child in node.get_all_children_names():
704+
for child in node.get_all_direct_children_names():
692705
print(child)
693706
child_node = node.search_child_with_name(child)
694707
populate_full_tree(child_node, max_depth=max_depth, depth=depth + 1)

0 commit comments

Comments
 (0)