Skip to content

Commit f50a827

Browse files
authored
Merge pull request #639 from FAIRmat-NFDI/reserved-prefixes-suffixes
Validation check for reserved prefixes and suffixes
2 parents 476fe0e + 4d14410 commit f50a827

File tree

4 files changed

+253
-5
lines changed

4 files changed

+253
-5
lines changed

src/pynxtools/data/NXtest.nxdl.xml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,15 @@
2323
<field name="required_field" required="true" type="NX_INT">
2424
<doc>A dummy entry to test optional parent check for a required child.</doc>
2525
</field>
26+
<field name="required_field_set" optional="true" type="NX_INT">
27+
<doc>A dummy entry to test reserved suffixes.</doc>
28+
</field>
29+
<field name="some_field_set" optional="true" type="NX_INT">
30+
<doc>
31+
A dummy entry to test reserved suffixes where the actual field is not given.
32+
Note that this is not allowed by NeXus, but we do this here to test the validation.
33+
</doc>
34+
</field>
2635
<field name="optional_field" optional="true" type="NX_INT">
2736
<doc>A dummy entry to test optional parent check for an optional child.</doc>
2837
</field>

src/pynxtools/dataconverter/helpers.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,8 @@ class ValidationProblem(Enum):
6868
NXdataAxisMismatch = 20
6969
KeyToBeRemoved = 21
7070
InvalidConceptForNonVariadic = 22
71+
ReservedSuffixWithoutField = 23
72+
ReservedPrefixInWrongContext = 24
7173

7274

7375
class Collector:
@@ -157,6 +159,16 @@ def _log(self, path: str, log_type: ValidationProblem, value: Optional[Any], *ar
157159
if value.type == "group":
158160
log_text += f", which should be of type {value.nx_class}."
159161
logger.warning(log_text)
162+
elif log_type == ValidationProblem.ReservedSuffixWithoutField:
163+
logger.warning(
164+
f"Reserved suffix '{args[0]}' was used in {path}, but there is no associated field {value}."
165+
)
166+
elif log_type == ValidationProblem.ReservedPrefixInWrongContext:
167+
log_text = f"Reserved prefix {path} was used in key {args[0] if args else '<unknown>'}, but is not valid here."
168+
# Note that value=None" gets converted to "<unknown>"
169+
if value != "<unknown>":
170+
log_text += f" It is only valid in the context of {value}."
171+
logger.error(log_text)
160172

161173
def collect_and_log(
162174
self,

src/pynxtools/dataconverter/validation.py

Lines changed: 166 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,18 @@
2020
from collections import defaultdict
2121
from functools import reduce
2222
from operator import getitem
23-
from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple, Union
23+
from typing import (
24+
Any,
25+
Iterable,
26+
List,
27+
Mapping,
28+
MutableMapping,
29+
Optional,
30+
Tuple,
31+
Union,
32+
Dict,
33+
Literal,
34+
)
2435

2536
import h5py
2637
import lxml.etree as ET
@@ -515,6 +526,9 @@ def handle_field(node: NexusNode, keys: Mapping[str, Any], prev_path: str):
515526
f"{prev_path}/{variant}",
516527
)
517528

529+
_ = check_reserved_suffix(f"{prev_path}/{variant}", mapping)
530+
_ = check_reserved_prefix(f"{prev_path}/{variant}", mapping, "field")
531+
518532
# Check unit category
519533
if node.unit is not None:
520534
remove_from_not_visited(f"{prev_path}/{variant}/@units")
@@ -532,9 +546,6 @@ def handle_field(node: NexusNode, keys: Mapping[str, Any], prev_path: str):
532546
prev_path=f"{prev_path}/{variant}",
533547
)
534548

535-
# TODO: Build variadic map for fields and attributes
536-
# Introduce variadic siblings in NexusNode?
537-
538549
def handle_attribute(node: NexusNode, keys: Mapping[str, Any], prev_path: str):
539550
full_path = remove_from_not_visited(f"{prev_path}/@{node.name}")
540551
variants = get_variations_of(node, keys)
@@ -559,6 +570,7 @@ def handle_attribute(node: NexusNode, keys: Mapping[str, Any], prev_path: str):
559570
node.open_enum,
560571
f"{prev_path}/{variant if variant.startswith('@') else f'@{variant}'}",
561572
)
573+
_ = check_reserved_prefix(f"{prev_path}/{variant}", mapping, "attribute")
562574

563575
def handle_choice(node: NexusNode, keys: Mapping[str, Any], prev_path: str):
564576
global collector
@@ -847,6 +859,146 @@ def startswith_with_variations(
847859
# default
848860
return (False, 0)
849861

862+
def check_reserved_suffix(key: str, mapping: MutableMapping[str, Any]) -> bool:
863+
"""
864+
Check if an associated field exists for a key with a reserved suffix.
865+
866+
Reserved suffixes imply the presence of an associated base field (e.g.,
867+
"temperature_errors" implies "temperature" must exist in the mapping).
868+
869+
Args:
870+
key (str):
871+
The full key path (e.g., "/ENTRY[entry1]/sample/temperature_errors").
872+
mapping (MutableMapping[str, Any]):
873+
The mapping containing the data to validate.
874+
This should be a dict of `/` separated paths.
875+
876+
Returns:
877+
bool:
878+
True if the suffix usage is valid or not applicable.
879+
False if the suffix is used without the expected associated base field.
880+
"""
881+
reserved_suffixes = (
882+
"_end",
883+
"_increment_set",
884+
"_errors",
885+
"_indices",
886+
"_mask",
887+
"_set",
888+
"_weights",
889+
"_scaling_factor",
890+
"_offset",
891+
)
892+
893+
parent_path, name = key.rsplit("/", 1)
894+
concept_name, instance_name = split_class_and_name_of(name)
895+
896+
for suffix in reserved_suffixes:
897+
if instance_name.endswith(suffix):
898+
associated_field = instance_name.rsplit(suffix, 1)[0]
899+
900+
if not any(
901+
k.startswith(parent_path + "/")
902+
and (
903+
k.endswith(associated_field)
904+
or k.endswith(f"[{associated_field}]")
905+
)
906+
for k in mapping
907+
):
908+
collector.collect_and_log(
909+
key,
910+
ValidationProblem.ReservedSuffixWithoutField,
911+
associated_field,
912+
suffix,
913+
)
914+
return False
915+
break # We found the suffix and it passed
916+
917+
return True
918+
919+
def check_reserved_prefix(
920+
key: str,
921+
mapping: MutableMapping[str, Any],
922+
nx_type: Literal["group", "field", "attribute"],
923+
) -> bool:
924+
"""
925+
Check if a reserved prefix was used in the correct context.
926+
927+
Args:
928+
key (str): The full key path (e.g., "/ENTRY[entry1]/instrument/detector/@DECTRIS_config").
929+
mapping (MutableMapping[str, Any]):
930+
The mapping containing the data to validate.
931+
This should be a dict of `/` separated paths.
932+
Attributes are denoted with `@` in front of the last element.
933+
nx_type (Literal["group", "field", "attribute"]):
934+
The NeXus type the key represents. Determines which reserved prefixes are relevant.
935+
936+
937+
Returns:
938+
bool:
939+
True if the prefix usage is valid or not applicable.
940+
False if an invalid or misapplied reserved prefix is detected.
941+
"""
942+
reserved_prefixes = {
943+
"attribute": {
944+
"@BLUESKY_": None, # do not use anywhere
945+
"@DECTRIS_": "NXmx",
946+
"@IDF_": None, # do not use anywhere
947+
"@NDAttr": None,
948+
"@NX_": "all",
949+
"@PDBX_": None, # do not use anywhere
950+
"@SAS_": "NXcanSAS",
951+
"@SILX_": None, # do not use anywhere
952+
},
953+
"field": {
954+
"DECTRIS_": "NXmx",
955+
},
956+
}
957+
958+
prefixes = reserved_prefixes.get(nx_type)
959+
if not prefixes:
960+
return True
961+
962+
name = key.rsplit("/", 1)[-1]
963+
964+
if not name.startswith(tuple(prefixes)):
965+
return False # Irrelevant prefix, no check needed
966+
967+
for prefix, allowed_context in prefixes.items():
968+
if not name.startswith(prefix):
969+
continue
970+
971+
if allowed_context is None:
972+
# This prefix is disallowed entirely
973+
collector.collect_and_log(
974+
prefix,
975+
ValidationProblem.ReservedPrefixInWrongContext,
976+
None,
977+
key,
978+
)
979+
return False
980+
if allowed_context == "all":
981+
# We can freely use this prefix everywhere.
982+
return True
983+
984+
# Check that the prefix is used in the correct context.
985+
match = re.match(r"(/ENTRY\[[^]]+])", key)
986+
definition_value = None
987+
if match:
988+
definition_key = f"{match.group(1)}/definition"
989+
definition_value = mapping.get(definition_key)
990+
991+
if definition_value != allowed_context:
992+
collector.collect_and_log(
993+
prefix,
994+
ValidationProblem.ReservedPrefixInWrongContext,
995+
allowed_context,
996+
key,
997+
)
998+
return False
999+
1000+
return True
1001+
8501002
missing_type_err = {
8511003
"field": ValidationProblem.MissingRequiredField,
8521004
"group": ValidationProblem.MissingRequiredGroup,
@@ -937,6 +1089,16 @@ def startswith_with_variations(
9371089
keys_to_remove.append(not_visited_key)
9381090
continue
9391091

1092+
if "@" not in not_visited_key.rsplit("/", 1)[-1]:
1093+
check_reserved_suffix(not_visited_key, mapping)
1094+
check_reserved_prefix(not_visited_key, mapping, "field")
1095+
1096+
else:
1097+
associated_field = not_visited_key.rsplit("/", 1)[-2]
1098+
# Check the prefix both for this attribute and the field it belongs to
1099+
check_reserved_prefix(not_visited_key, mapping, "attribute")
1100+
check_reserved_prefix(associated_field, mapping, "field")
1101+
9401102
if is_documented(not_visited_key, tree):
9411103
continue
9421104

tests/dataconverter/test_validation.py

Lines changed: 66 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1216,13 +1216,78 @@ def listify_template(data_dict: Template):
12161216
[],
12171217
id="nonvariadic-nxcollection",
12181218
),
1219+
pytest.param(
1220+
alter_dict(
1221+
alter_dict(
1222+
TEMPLATE,
1223+
"/ENTRY[my_entry]/OPTIONAL_group[my_group]/required_field_set",
1224+
1,
1225+
),
1226+
"/ENTRY[my_entry]/OPTIONAL_group[my_group]/some_field_set",
1227+
1,
1228+
),
1229+
[
1230+
"Reserved suffix '_set' was used in /ENTRY[my_entry]/OPTIONAL_group[my_group]/some_field_set, "
1231+
"but there is no associated field some_field."
1232+
],
1233+
id="reserved-suffix-from-appdef",
1234+
),
1235+
pytest.param(
1236+
alter_dict(
1237+
alter_dict(
1238+
TEMPLATE,
1239+
"/ENTRY[my_entry]/OPTIONAL_group[my_group]/FIELDNAME_weights[required_field_weights]",
1240+
0.1,
1241+
),
1242+
"/ENTRY[my_entry]/OPTIONAL_group[my_group]/FIELDNAME_weights[some_random_field_weights]",
1243+
0.1,
1244+
),
1245+
[
1246+
"Reserved suffix '_weights' was used in /ENTRY[my_entry]/OPTIONAL_group[my_group]/FIELDNAME_weights[some_random_field_weights], but there is no associated field some_random_field.",
1247+
],
1248+
id="reserved-suffix-from-base-class",
1249+
),
1250+
pytest.param(
1251+
alter_dict(
1252+
alter_dict(
1253+
alter_dict(
1254+
alter_dict(
1255+
TEMPLATE,
1256+
"/ENTRY[my_entry]/OPTIONAL_group[my_group]/@BLUESKY_attr",
1257+
"some text",
1258+
),
1259+
"/ENTRY[my_entry]/OPTIONAL_group[my_group]/@DECTRIS_attr",
1260+
"some text",
1261+
),
1262+
"/ENTRY[my_entry]/OPTIONAL_group[my_group]/DECTRIS_field",
1263+
"some text",
1264+
),
1265+
"/ENTRY[my_entry]/OPTIONAL_group[my_group]/@NX_attr",
1266+
"some text",
1267+
),
1268+
[
1269+
"Reserved prefix @BLUESKY_ was used in key /ENTRY[my_entry]/OPTIONAL_group[my_group]/@BLUESKY_attr, but is not valid here.",
1270+
"Attribute /ENTRY[my_entry]/OPTIONAL_group[my_group]/@BLUESKY_attr written without documentation.",
1271+
"Reserved prefix @DECTRIS_ was used in key /ENTRY[my_entry]/OPTIONAL_group[my_group]/@DECTRIS_attr, but is not valid here. "
1272+
"It is only valid in the context of NXmx.",
1273+
"Attribute /ENTRY[my_entry]/OPTIONAL_group[my_group]/@DECTRIS_attr written without documentation.",
1274+
"Reserved prefix DECTRIS_ was used in key /ENTRY[my_entry]/OPTIONAL_group[my_group]/DECTRIS_field, but is not valid here. "
1275+
"It is only valid in the context of NXmx.",
1276+
"Field /ENTRY[my_entry]/OPTIONAL_group[my_group]/DECTRIS_field written without documentation.",
1277+
"Attribute /ENTRY[my_entry]/OPTIONAL_group[my_group]/@NX_attr written without documentation.",
1278+
],
1279+
id="reserved-prefix",
1280+
),
12191281
],
12201282
)
12211283
def test_validate_data_dict(caplog, data_dict, error_messages, request):
12221284
"""Unit test for the data validation routine."""
12231285

12241286
def format_error_message(msg: str) -> str:
1225-
return msg[msg.rfind("G: ") + 3 :].rstrip("\n")
1287+
for prefix in ("ERROR:", "WARNING:"):
1288+
if msg.startswith(prefix):
1289+
return msg[len(prefix) :].lstrip()
1290+
return msg
12261291

12271292
if not error_messages:
12281293
with caplog.at_level(logging.WARNING):

0 commit comments

Comments
 (0)