Skip to content

Commit 73498d9

Browse files
authored
Merge pull request #647 from FAIRmat-NFDI/pint-units
2 parents 92f3fe3 + 5e86fe5 commit 73498d9

File tree

11 files changed

+1145
-113
lines changed

11 files changed

+1145
-113
lines changed
Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: NOMAD dependencies compatibility
1+
name: NOMAD compatibility
22

33
on:
44
push:
@@ -16,7 +16,7 @@ env:
1616
python-version: 3.11
1717

1818
jobs:
19-
validate_dependencies:
19+
validate_compatibility:
2020
runs-on: ubuntu-latest
2121

2222
steps:
@@ -28,6 +28,18 @@ jobs:
2828
git clone --depth 1 --branch develop --recurse-submodules https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR.git nomad
2929
git submodule update --init --recursive --depth 1
3030
31+
- name: Compare unit definition files
32+
run: |
33+
diff --unified=3 nomad/nomad/units/default_en.txt src/pynxtools/units/default_en.txt || {
34+
echo "::error file=src/pynxtools/units/default_en.txt::default_en.txt differs from NOMAD";
35+
exit 1;
36+
}
37+
38+
diff --unified=3 nomad/nomad/units/constants_en.txt src/pynxtools/units/constants_en.txt || {
39+
echo "::error file=src/pynxtools/units/constants.txt::constants.txt differs from NOMAD";
40+
exit 1;
41+
}
42+
3143
- name: Add pynxtools dependency in NOMAD test_plugins.txt
3244
working-directory: ./nomad
3345
run: |

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,8 @@ cython_debug/
203203
!mkdocs-requirements.txt
204204
!src/pynxtools/nexus-version.txt
205205
!src/pynxtools/remote_definitions_url.txt
206+
!src/pynxtools/units/constants_en.txt
207+
!src/pynxtools/units/default_en.txt
206208
build/
207209
nexusparser.egg-info/PKG-INFO
208210
.python-version

CITATION.cff

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ message:
44
If you use this software, please cite it using the
55
metadata from this file.
66
type: software
7-
version: 0.10.8
7+
version: 0.10.9
88
authors:
99
- given-names: Sherjeel
1010
family-names: Shabih

src/pynxtools/dataconverter/helpers.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
class ValidationProblem(Enum):
5050
DifferentVariadicNodesWithTheSameName = auto()
5151
UnitWithoutDocumentation = auto()
52+
InvalidUnit = auto()
5253
InvalidEnum = auto()
5354
OpenEnumWithNewItem = auto()
5455
MissingRequiredGroup = auto()
@@ -99,6 +100,13 @@ def _log(self, path: str, log_type: ValidationProblem, value: Optional[Any], *ar
99100
logger.info(
100101
f"The unit, {path} = {value}, is being written but has no documentation."
101102
)
103+
if log_type == ValidationProblem.InvalidUnit:
104+
value = cast(Any, value)
105+
log_text = f"The unit '{args[0]}' at {path} does not match with the unit category {value.unit} of '{value.name}'."
106+
if len(args) == 2 and args[1] is not None:
107+
log_text += f" Based on the 'transformation_type' of the field {path.replace('/@units', '')}, it should match with '{args[1]}'."
108+
logger.warning(log_text)
109+
102110
elif log_type == ValidationProblem.InvalidEnum:
103111
logger.warning(
104112
f"The value at {path} should be one of the following: {value}."

src/pynxtools/dataconverter/validation.py

Lines changed: 93 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
generate_tree_from,
4343
)
4444
from pynxtools.definitions.dev_tools.utils.nxdl_utils import get_nx_namefit
45+
from pynxtools.units import NXUnitSet, ureg
4546

4647

4748
def validate_hdf_group_against(appdef: str, data: h5py.Group):
@@ -216,6 +217,49 @@ def best_namefit_of(
216217
return best_match
217218

218219

220+
def is_valid_unit_for_node(
221+
node: NexusNode, unit: str, unit_path: str, hints: dict[str, Any]
222+
) -> None:
223+
"""
224+
Validate whether a unit string is compatible with the expected unit category for a given NeXus node.
225+
226+
This function checks if the provided `unit` string matches the expected unit dimensionality
227+
defined in the node's `unit` field. Special logic is applied for "NX_TRANSFORMATION", where
228+
the dimensionality depends on the `transformation_type` hint.
229+
230+
If the unit does not match the expected dimensionality, a validation problem is logged.
231+
232+
Args:
233+
node (NexusNode): The node containing unit metadata to validate against.
234+
unit (str): The unit string to validate (e.g., "m", "eV", "1", "").
235+
unit_path (str): The path to the unit in the NeXus template, used for logging.
236+
hints (dict[str, Any]): Additional metadata used during validation. For example,
237+
hints["transformation_type"] may be used to determine the expected unit category
238+
if the node represents a transformation.
239+
"""
240+
# Need to use a list as `NXtransformation` is a special use case
241+
if node.unit == "NX_TRANSFORMATION":
242+
if (transformation_type := hints.get("transformation_type")) is not None:
243+
category_map: dict[str, str] = {
244+
"translation": "NX_LENGTH",
245+
"rotation": "NX_ANGLE",
246+
}
247+
node_unit_category = category_map.get(transformation_type, "NX_UNITLESS")
248+
else:
249+
node_unit_category = "NX_UNITLESS"
250+
log_input = node_unit_category
251+
else:
252+
node_unit_category = node.unit
253+
log_input = None
254+
255+
if NXUnitSet.matches(node_unit_category, unit):
256+
return
257+
258+
collector.collect_and_log(
259+
unit_path, ValidationProblem.InvalidUnit, node, unit, log_input
260+
)
261+
262+
219263
def validate_dict_against(
220264
appdef: str, mapping: MutableMapping[str, Any], ignore_undocumented: bool = False
221265
) -> bool:
@@ -607,14 +651,30 @@ def handle_field(node: NexusNode, keys: Mapping[str, Any], prev_path: str):
607651

608652
# Check unit category
609653
if node.unit is not None:
610-
remove_from_not_visited(f"{prev_path}/{variant}/@units")
611-
if f"{variant}@units" not in keys:
612-
collector.collect_and_log(
613-
variant_path,
614-
ValidationProblem.MissingUnit,
615-
node.unit,
616-
)
617-
# TODO: Check unit with pint
654+
unit_path = f"{variant_path}/@units"
655+
if node.unit != "NX_UNITLESS":
656+
remove_from_not_visited(unit_path)
657+
if f"{variant}@units" not in keys and (
658+
node.unit != "NX_TRANSFORMATION"
659+
or mapping.get(f"{variant_path}/@transformation_type")
660+
in ("translation", "rotation")
661+
):
662+
collector.collect_and_log(
663+
variant_path,
664+
ValidationProblem.MissingUnit,
665+
node.unit,
666+
)
667+
break
668+
669+
unit = keys.get(f"{variant}@units")
670+
# Special case: NX_TRANSFORMATION unit depends on `@transformation_type` attribute
671+
if (
672+
transformation_type := keys.get(f"{variant}@transformation_type")
673+
) is not None:
674+
hints = {"transformation_type": transformation_type}
675+
else:
676+
hints = {}
677+
is_valid_unit_for_node(node, unit, unit_path, hints)
618678

619679
field_attributes = get_field_attributes(variant, keys)
620680
field_attributes = _follow_link(field_attributes, variant_path)
@@ -820,9 +880,13 @@ def is_documented(key: str, tree: NexusNode) -> bool:
820880
and node.unit is not None
821881
and f"{key}/@units" not in mapping
822882
):
823-
collector.collect_and_log(
824-
f"{key}", ValidationProblem.MissingUnit, node.unit
825-
)
883+
# Workaround for NX_UNITLESS of NX_TRANSFORMATION unit category
884+
if node.unit != "NX_TRANSFORMATION" or mapping.get(
885+
f"{key}/@transformation_type"
886+
) in ("translation", "rotation"):
887+
collector.collect_and_log(
888+
f"{key}", ValidationProblem.MissingUnit, node.unit
889+
)
826890

827891
return True
828892

@@ -1298,6 +1362,10 @@ def check_reserved_prefix(
12981362
check_attributes_of_nonexisting_field(tree)
12991363

13001364
for not_visited_key in not_visited:
1365+
if mapping.get(not_visited_key) is None:
1366+
# This value is not really set. Skip checking its validity.
1367+
continue
1368+
13011369
# TODO: remove again if "@target"/"@reference" is sorted out by NIAC
13021370
always_allowed_attributes = ("@target", "@reference")
13031371
if not_visited_key.endswith(always_allowed_attributes):
@@ -1344,6 +1412,20 @@ def check_reserved_prefix(
13441412
mapping[not_visited_key],
13451413
)
13461414

1415+
if node.unit is not None:
1416+
# Special case: NX_TRANSFORMATION unit depends on `@transformation_type` attribute
1417+
if (
1418+
transformation_type := mapping.get(
1419+
not_visited_key.replace("/@units", "/@transformation_type")
1420+
)
1421+
) is not None:
1422+
hints = {"transformation_type": transformation_type}
1423+
else:
1424+
hints = {}
1425+
is_valid_unit_for_node(
1426+
node, mapping[not_visited_key], not_visited_key, hints
1427+
)
1428+
13471429
# parent key will be checked on its own if it exists, because it is in the list
13481430
continue
13491431

src/pynxtools/nomad/parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@
3333
from nomad.metainfo import MEnum, MSection
3434
from nomad.metainfo.util import MQuantity, MSubSectionList, resolve_variadic_name
3535
from nomad.parsing import MatchingParser
36-
from nomad.units import ureg
3736
from nomad.utils import get_logger
3837
from pint.errors import UndefinedUnitError
3938
except ImportError as exc:
@@ -49,6 +48,7 @@
4948
get_quantity_base_name,
5049
)
5150
from pynxtools.nomad.utils import _rename_nx_for_nomad as rename_nx_for_nomad
51+
from pynxtools.units import ureg
5252

5353

5454
def _to_group_name(nx_node: ET.Element):

src/pynxtools/nomad/schema.py

Lines changed: 11 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@
3131
import h5py
3232
import numpy as np
3333
import pandas as pd
34-
import pint
3534
from ase import Atoms
3635
from ase.data import atomic_numbers
3736
from scipy.spatial import cKDTree
@@ -74,7 +73,6 @@
7473
from nomad.metainfo.metainfo import resolve_variadic_name
7574
from nomad.normalizing.common import nomad_atoms_from_ase_atoms
7675
from nomad.normalizing.topology import add_system, add_system_info
77-
from nomad.units import ureg
7876
from nomad.utils import get_logger, hash, strip
7977

8078
except ImportError as exc:
@@ -91,6 +89,7 @@
9189
_rename_nx_for_nomad,
9290
get_quantity_base_name,
9391
)
92+
from pynxtools.units import NXUnitSet, ureg
9493

9594
# URL_REGEXP from
9695
# https://stackoverflow.com/questions/3809401/what-is-a-good-regular-expression-to-match-a-url
@@ -327,68 +326,6 @@ def get_nx_type(nx_type: str) -> Optional[Datatype]:
327326
return None
328327

329328

330-
class NXUnitSet:
331-
"""
332-
maps from `NX_` token to dimensionality
333-
None -> disable dimensionality check
334-
'1' -> dimensionless quantities
335-
'transformation' -> Specially handled in metainfo
336-
"""
337-
338-
mapping: dict = {
339-
"NX_ANGLE": "[angle]",
340-
"NX_ANY": None,
341-
"NX_AREA": "[area]",
342-
"NX_CHARGE": "[charge]",
343-
"NX_COUNT": "1",
344-
"NX_CROSS_SECTION": "[area]",
345-
"NX_CURRENT": "[current]",
346-
"NX_DIMENSIONLESS": "1",
347-
"NX_EMITTANCE": "[length] * [angle]",
348-
"NX_ENERGY": "[energy]",
349-
"NX_FLUX": "1 / [time] / [area]",
350-
"NX_FREQUENCY": "[frequency]",
351-
"NX_LENGTH": "[length]",
352-
"NX_MASS": "[mass]",
353-
"NX_MASS_DENSITY": "[mass] / [volume]",
354-
"NX_MOLECULAR_WEIGHT": "[mass] / [substance]",
355-
"NX_PERIOD": "[time]",
356-
"NX_PER_AREA": "1 / [area]",
357-
"NX_PER_LENGTH": "1 / [length]",
358-
"NX_POWER": "[power]",
359-
"NX_PRESSURE": "[pressure]",
360-
"NX_PULSES": "1",
361-
"NX_SCATTERING_LENGTH_DENSITY": "1 / [area]",
362-
"NX_SOLID_ANGLE": "[angle] * [angle]",
363-
"NX_TEMPERATURE": "[temperature]",
364-
"NX_TIME": "[time]",
365-
"NX_TIME_OF_FLIGHT": "[time]",
366-
"NX_TRANSFORMATION": "transformation",
367-
"NX_UNITLESS": "1",
368-
"NX_VOLTAGE": "[energy] / [current] / [time]",
369-
"NX_VOLUME": "[volume]",
370-
"NX_WAVELENGTH": "[length]",
371-
"NX_WAVENUMBER": "1 / [length]",
372-
}
373-
374-
@staticmethod
375-
def normalise(value: str) -> str:
376-
"""
377-
Normalise the given token
378-
"""
379-
value = value.upper()
380-
if not value.startswith("NX_"):
381-
value = "NX_" + value
382-
return value
383-
384-
@staticmethod
385-
def is_nx_token(value: str) -> bool:
386-
"""
387-
Check if a given token is one of NX tokens
388-
"""
389-
return NXUnitSet.normalise(value) in NXUnitSet.mapping.keys()
390-
391-
392329
# def _to_camel_case(snake_str: str, upper: bool = False) -> str:
393330
# """
394331
# Take as input a snake case variable and return a camel case one
@@ -747,21 +684,16 @@ def _create_field(xml_node: ET.Element, container: Section) -> Quantity:
747684
# dimensionality
748685
nx_dimensionality = xml_attrs.get("units", None)
749686
if nx_dimensionality:
750-
dimensionality = NXUnitSet.mapping.get(nx_dimensionality)
751-
if not dimensionality and nx_dimensionality != "NX_ANY":
752-
try:
753-
quantity = 1 * ureg(nx_dimensionality)
754-
if quantity.dimensionality == "dimensionless":
755-
dimensionality = "1"
756-
else:
757-
dimensionality = str(quantity.dimensionality)
758-
except (
759-
pint.errors.UndefinedUnitError,
760-
pint.errors.DefinitionSyntaxError,
761-
) as err:
762-
raise NotImplementedError(
763-
f"Unit {nx_dimensionality} is not supported for {name}."
764-
) from err
687+
if nx_dimensionality == "NX_TRANSFORMATION":
688+
# TODO: Remove workaround for NX_TRANSFORMATTION
689+
nx_dimensionality = "NX_ANY"
690+
dimensionality = NXUnitSet.get_dimensionality(nx_dimensionality)
691+
if dimensionality is not None:
692+
dimensionality = str(dimensionality)
693+
elif nx_dimensionality != "NX_ANY":
694+
raise NotImplementedError(
695+
f"Unit {nx_dimensionality} is not supported for {name}."
696+
)
765697
else:
766698
dimensionality = None
767699

0 commit comments

Comments
 (0)