Skip to content

Commit 9eab38c

Browse files
committed
Improved get_children_names function
1 parent a96d2d2 commit 9eab38c

File tree

2 files changed

+63
-28
lines changed

2 files changed

+63
-28
lines changed

src/pynxtools/dataconverter/nexus_tree.py

Lines changed: 31 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -219,16 +219,32 @@ def search_child_with_name(
219219
direct_child = next((x for x in self.children if x.name == name), None)
220220
if direct_child is not None:
221221
return direct_child
222-
if name in self.get_all_children_names():
222+
if name in self.get_all_direct_children_names():
223223
return self.add_inherited_node(name)
224224
return None
225225

226-
def get_all_children_names(self, depth: Optional[int] = None) -> Set[str]:
226+
def get_all_direct_children_names(
227+
self,
228+
node_type: Optional[str] = None,
229+
nx_class: Optional[str] = None,
230+
depth: Optional[int] = None,
231+
) -> Set[str]:
227232
"""
228233
Get all children names of the current node up to a certain depth.
229234
Only `field`, `group` `choice` or `attribute` are considered as children.
230235
231236
Args:
237+
node_type (Optional[str], optional):
238+
The tags of the children to consider.
239+
This should either be "field", "group", "choice" or "attribute".
240+
If None all tags are considered.
241+
Defaults to None.
242+
nx_class (Optional[str], optional):
243+
The NeXus class of the group to consider.
244+
This is only used if `node_type` is "group".
245+
It should contain the preceding `NX` and the class name in lowercase,
246+
e.g., "NXentry".
247+
Defaults to None.
232248
depth (Optional[int], optional):
233249
The inheritance depth up to which get children names.
234250
`depth=1` will return only the children of the current node.
@@ -244,15 +260,21 @@ def get_all_children_names(self, depth: Optional[int] = None) -> Set[str]:
244260
if depth is not None and (not isinstance(depth, int) or depth < 0):
245261
raise ValueError("Depth must be a positive integer or None")
246262

263+
tag_type = ""
264+
if node_type == "group" and nx_class is not None:
265+
tag_type = f"[@type='{nx_class}']"
266+
267+
if node_type is not None:
268+
search_tags = f"*[self::nx:{node_type}{tag_type}]"
269+
else:
270+
search_tags = (
271+
r"*[self::nx:field or self::nx:group "
272+
r"or self::nx:attribute or self::nx:choice]"
273+
)
274+
247275
names = set()
248276
for elem in self.inheritance[:depth]:
249-
for subelems in elem.xpath(
250-
(
251-
r"*[self::nx:field or self::nx:group "
252-
r"or self::nx:attribute or self::nx:choice]"
253-
),
254-
namespaces=namespaces,
255-
):
277+
for subelems in elem.xpath(search_tags, namespaces=namespaces):
256278
if "name" in subelems.attrib:
257279
names.add(subelems.attrib["name"])
258280
elif "type" in subelems.attrib:

src/pynxtools/dataconverter/validation.py

Lines changed: 32 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -44,12 +44,20 @@
4444
from pynxtools.definitions.dev_tools.utils.nxdl_utils import get_nx_namefit
4545

4646

47-
def best_namefit_of_(
48-
name: str, concepts: Set[str], nx_class: Optional[str] = None
49-
) -> str:
50-
# TODO: Find the best namefit of name in concepts
51-
# Consider nx_class if it is not None
52-
...
47+
def best_namefit_of_(name: str, concepts: Set[str]) -> str:
48+
if not concepts:
49+
return None
50+
51+
if name in concepts:
52+
return name
53+
54+
best_match, score = max(
55+
map(lambda x: (x, get_nx_namefit(name, x)), concepts), key=lambda x: x[1]
56+
)
57+
if score < 0:
58+
return None
59+
60+
return best_match
5361

5462

5563
def validate_hdf_group_against(appdef: str, data: h5py.Group):
@@ -64,9 +72,11 @@ def validate_hdf_group_against(appdef: str, data: h5py.Group):
6472
# Allow for 10000 cache entries. This should be enough for most cases
6573
@cached(
6674
cache=LRUCache(maxsize=10000),
67-
key=lambda path, _: hashkey(path),
75+
key=lambda path, *_: hashkey(path),
6876
)
69-
def find_node_for(path: str, nx_class: Optional[str] = None) -> Optional[NexusNode]:
77+
def find_node_for(
78+
path: str, node_type: Optional[str] = None, nx_class: Optional[str] = None
79+
) -> Optional[NexusNode]:
7080
if path == "":
7181
return tree
7282

@@ -75,10 +85,7 @@ def find_node_for(path: str, nx_class: Optional[str] = None) -> Optional[NexusNo
7585

7686
best_child = best_namefit_of_(
7787
last_elem,
78-
# TODO: Consider renaming `get_all_children_names` to
79-
# `get_all_direct_children_names`. Because that's what it is.
80-
node.get_all_children_names(),
81-
nx_class,
88+
node.get_all_direct_children_names(nx_class=nx_class, node_type=node_type),
8289
)
8390
if best_child is None:
8491
return None
@@ -92,15 +99,19 @@ def remove_from_req_fields(path: str):
9299
def handle_group(path: str, data: h5py.Group):
93100
node = find_node_for(path, data.attrs.get("NX_class"))
94101
if node is None:
95-
# TODO: Log undocumented
102+
collector.collect_and_log(
103+
path, ValidationProblem.MissingDocumentation, None
104+
)
96105
return
97106

98107
# TODO: Do actual group checks
99108

100109
def handle_field(path: str, data: h5py.Dataset):
101110
node = find_node_for(path)
102111
if node is None:
103-
# TODO: Log undocumented
112+
collector.collect_and_log(
113+
path, ValidationProblem.MissingDocumentation, None
114+
)
104115
return
105116
remove_from_req_fields(f"{path}")
106117

@@ -110,7 +121,9 @@ def handle_attributes(path: str, attribute_names: h5py.AttributeManager):
110121
for attr_name in attribute_names:
111122
node = find_node_for(f"{path}/{attr_name}")
112123
if node is None:
113-
# TODO: Log undocumented
124+
collector.collect_and_log(
125+
path, ValidationProblem.MissingDocumentation, None
126+
)
114127
continue
115128
remove_from_req_fields(f"{path}/@{attr_name}")
116129

@@ -282,7 +295,7 @@ def get_variations_of(node: NexusNode, keys: Mapping[str, Any]) -> List[str]:
282295
continue
283296
if (
284297
get_nx_namefit(name2fit, node.name) >= 0
285-
and key not in node.parent.get_all_children_names()
298+
and key not in node.parent.get_all_direct_children_names()
286299
):
287300
variations.append(key)
288301
if nx_name is not None and not variations:
@@ -315,7 +328,7 @@ def check_nxdata():
315328
data_node = node.search_child_with_name((signal, "DATA"))
316329
data_bc_node = node.search_child_with_name("DATA")
317330
data_node.inheritance.append(data_bc_node.inheritance[0])
318-
for child in data_node.get_all_children_names():
331+
for child in data_node.get_all_direct_children_names():
319332
data_node.search_child_with_name(child)
320333

321334
handle_field(
@@ -347,7 +360,7 @@ def check_nxdata():
347360
axis_node = node.search_child_with_name((axis, "AXISNAME"))
348361
axis_bc_node = node.search_child_with_name("AXISNAME")
349362
axis_node.inheritance.append(axis_bc_node.inheritance[0])
350-
for child in axis_node.get_all_children_names():
363+
for child in axis_node.get_all_direct_children_names():
351364
axis_node.search_child_with_name(child)
352365

353366
handle_field(
@@ -575,7 +588,7 @@ def is_documented(key: str, node: NexusNode) -> bool:
575588
return True
576589

577590
for name in key[1:].replace("@", "").split("/"):
578-
children = node.get_all_children_names()
591+
children = node.get_all_direct_children_names()
579592
best_name = best_namefit_of(name, children)
580593
if best_name is None:
581594
return False

0 commit comments

Comments
 (0)