diff --git a/.gitignore b/.gitignore index 6d47622be..d75a6ed60 100644 --- a/.gitignore +++ b/.gitignore @@ -201,6 +201,8 @@ cython_debug/ *.txt !requirements.txt !dev-requirements.txt +!pynxtools/dataconverter/units/default_en.txt +!pynxtools/dataconverter/units/constants_en.txt !mkdocs-requirements.txt !pynxtools/nexus-version.txt build/ diff --git a/MANIFEST.in b/MANIFEST.in index ac1dc91cb..0acf3f983 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -7,5 +7,6 @@ recursive-include pynxtools/definitions/base_classes/ *.xml recursive-include pynxtools/definitions/applications/ *.xml recursive-include pynxtools/definitions/contributed_definitions/ *.xml include pynxtools/definitions/*.xsd +include pynxtools/dataconverter/units *.txt include pynxtools/nexus-version.txt -include pynxtools/definitions/NXDL_VERSION \ No newline at end of file +include pynxtools/definitions/NXDL_VERSION diff --git a/dev-requirements.txt b/dev-requirements.txt index 6f5f67de1..8bd1fc35f 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.11 +# This file is autogenerated by pip-compile with Python 3.10 # by the following command: # # pip-compile --extra=dev --extra=docs --output-file=dev-requirements.txt pyproject.toml @@ -34,6 +34,8 @@ cycler==0.12.1 # via matplotlib distlib==0.3.8 # via virtualenv +exceptiongroup==1.2.1 + # via pytest filelock==3.13.3 # via virtualenv fonttools==4.50.0 @@ -120,6 +122,8 @@ pathspec==0.12.1 # via mkdocs pillow==10.2.0 # via matplotlib +pint==0.23 + # via pynxtools (pyproject.toml) pip-tools==7.4.1 # via pynxtools (pyproject.toml) platformdirs==4.2.0 @@ -181,6 +185,14 @@ structlog==24.1.0 # via pynxtools (pyproject.toml) termcolor==2.4.0 # via mkdocs-macros-plugin +tomli==2.0.1 + # via + # build + # coverage + # mypy + # pip-tools + # pyproject-hooks + # pytest types-pytz==2024.1.0.20240203 # via pynxtools (pyproject.toml) types-pyyaml==6.0.12.20240311 @@ -188,7 +200,9 @@ types-pyyaml==6.0.12.20240311 types-requests==2.31.0.20240311 # via pynxtools (pyproject.toml) typing-extensions==4.10.0 - # via mypy + # via + # mypy + # pint tzdata==2024.1 # via pandas urllib3==2.2.1 diff --git a/pynxtools/dataconverter/convert.py b/pynxtools/dataconverter/convert.py index 74670bfa5..a6a65ffb7 100644 --- a/pynxtools/dataconverter/convert.py +++ b/pynxtools/dataconverter/convert.py @@ -37,7 +37,7 @@ from pynxtools.dataconverter.readers.base.reader import BaseReader from pynxtools.dataconverter.template import Template from pynxtools.dataconverter.writer import Writer -from pynxtools.nexus import nexus +from pynxtools.definitions.dev_tools.utils.nxdl_utils import get_nexus_definitions_path logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) @@ -120,7 +120,7 @@ def get_nxdl_root_and_path(nxdl: str): Error if no file with the given nxdl name is found. """ # Reading in the NXDL and generating a template - definitions_path = nexus.get_nexus_definitions_path() + definitions_path = get_nexus_definitions_path() if nxdl == "NXtest": nxdl_f_path = os.path.join( f"{os.path.abspath(os.path.dirname(__file__))}/../../", diff --git a/pynxtools/dataconverter/helpers.py b/pynxtools/dataconverter/helpers.py index 591db424e..3a6b2b944 100644 --- a/pynxtools/dataconverter/helpers.py +++ b/pynxtools/dataconverter/helpers.py @@ -29,12 +29,21 @@ import lxml.etree as ET import numpy as np from ase.data import chemical_symbols +from pint import UndefinedUnitError +import pynxtools.definitions.dev_tools.utils.nxdl_utils as nexus from pynxtools import get_nexus_version, get_nexus_version_hash from pynxtools.dataconverter.template import Template -from pynxtools.definitions.dev_tools.utils.nxdl_utils import get_inherited_nodes -from pynxtools.nexus import nexus -from pynxtools.nexus.nexus import NxdlAttributeNotFoundError +from pynxtools.dataconverter.units import ureg +from pynxtools.definitions.dev_tools.utils.nxdl_utils import ( + NxdlAttributeNotFoundError, + get_enums, + get_inherited_nodes, + get_node_at_nxdl_path, +) +from pynxtools.definitions.dev_tools.utils.nxdl_utils import ( + get_required_string as nexus_get_required_string, +) logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) @@ -50,6 +59,8 @@ class ValidationProblem(Enum): InvalidType = 7 InvalidDatetime = 8 IsNotPosInt = 9 + InvalidUnit = 10 + InvalidTransformationType = 11 class Collector: @@ -109,6 +120,16 @@ def insert_and_log( logger.warning( f"The value at {path} should be a positive int, but is {value}." ) + elif log_type == ValidationProblem.InvalidUnit: + logger.warning( + f"Invalid unit in {path}. {value} " + f"is not in unit category {args[0] if args else ''}" + ) + elif log_type == ValidationProblem.InvalidTransformationType: + logger.warning( + f"Invalid transformation type in {path}: {value}. " + "Should be either not present or have the value 'translation' or 'rotation'." + ) self.data.add(path) def has_validation_problems(self): @@ -195,7 +216,7 @@ def get_all_defined_required_children(nxdl_path, nxdl_name): if nxdl_name == "NXtest": return [] - elist = nexus.get_inherited_nodes(nxdl_path, nx_name=nxdl_name)[2] + elist = get_inherited_nodes(nxdl_path, nx_name=nxdl_name)[2] list_of_children_to_add = set() for elem in elist: list_of_children_to_add.update(get_all_defined_required_children_for_elem(elem)) @@ -298,7 +319,7 @@ def generate_template_from_nxdl( def get_required_string(elem): """Helper function to return nicely formatted names for optionality.""" - return nexus.get_required_string(elem)[2:-2].lower() + return nexus_get_required_string(elem)[2:-2].lower() def convert_nexus_to_caps(nexus_name): @@ -369,7 +390,7 @@ def convert_data_dict_path_to_hdf5_path(path) -> str: def is_value_valid_element_of_enum(value, elist) -> Tuple[bool, list]: """Checks whether a value has to be specific from the NXDL enumeration and returns options.""" for elem in elist: - enums = nexus.get_enums(elem) + enums = get_enums(elem) if enums is not None: return value in enums, enums return True, [] @@ -448,6 +469,25 @@ def convert_str_to_bool_safe(value): return None +def clean_str_attr( + attr: Optional[Union[str, bytes]], encoding="utf-8" +) -> Optional[str]: + """ + Cleans the string attribute which means it will decode bytes to str if necessary. + If `attr` is not str, bytes or None it raises a TypeError. + """ + if attr is None: + return attr + if isinstance(attr, bytes): + return attr.decode(encoding) + if isinstance(attr, str): + return attr + + raise TypeError( + "Invalid type {type} for attribute. Should be either None, bytes or str." + ) + + def is_valid_data_field(value, nxdl_type, path): """Checks whether a given value is valid according to what is defined in the NXDL. @@ -487,6 +527,46 @@ def is_valid_data_field(value, nxdl_type, path): return value +def is_valid_unit( + unit: str, nx_category: str, transformation_type: Optional[str] +) -> bool: + """ + The provided unit belongs to the provided nexus unit category. + + Args: + unit (str): The unit to check. Should be according to pint. + nx_category (str): A nexus unit category, e.g. `NX_LENGTH`, + or derived unit category, e.g., `NX_LENGTH ** 2`. + transformation_type (Optional[str]): + The transformation type of an NX_TRANSFORMATION. + This parameter is ignored if the `nx_category` is not `NX_TRANSFORMATION`. + If `transformation_type` is not present this should be set to None. + + Returns: + bool: The unit belongs to the provided category + """ + unit = clean_str_attr(unit) + try: + if nx_category in ("NX_ANY"): + ureg(unit) # Check if unit is generally valid + return True + nx_category = re.sub(r"(NX_[A-Z]+)", r"[\1]", nx_category) + if nx_category == "[NX_TRANSFORMATION]": + # NX_TRANSFORMATIONS is a pseudo unit + # and can be either an angle, a length or unitless + # depending on the transformation type. + if transformation_type is None: + return ureg(unit).check("[NX_UNITLESS]") + if transformation_type == "translation": + return ureg(unit).check("[NX_LENGTH]") + if transformation_type == "rotation": + return ureg(unit).check("[NX_ANGLE]") + return False + return ureg(unit).check(f"{nx_category}") + except UndefinedUnitError: + return False + + @lru_cache(maxsize=None) def path_in_data_dict(nxdl_path: str, data_keys: Tuple[str, ...]) -> List[str]: """Checks if there is an accepted variation of path in the dictionary & returns the path.""" @@ -505,9 +585,9 @@ def check_for_optional_parent(path: str, nxdl_root: ET.Element) -> str: return "<>" parent_nxdl_path = convert_data_converter_dict_to_nxdl_path(parent_path) - elem = nexus.get_node_at_nxdl_path(nxdl_path=parent_nxdl_path, elem=nxdl_root) + elem = get_node_at_nxdl_path(nxdl_path=parent_nxdl_path, elem=nxdl_root) - if nexus.get_required_string(elem) in ("<>", "<>"): + if get_required_string(elem) in ("<>", "<>"): return parent_path return check_for_optional_parent(parent_path, nxdl_root) @@ -522,8 +602,8 @@ def is_node_required(nxdl_key, nxdl_root): nxdl_key[0 : nxdl_key.rindex("/") + 1] + nxdl_key[nxdl_key.rindex("/") + 2 :] ) - node = nexus.get_node_at_nxdl_path(nxdl_key, elem=nxdl_root, exc=False) - return nexus.get_required_string(node) == "<>" + node = get_node_at_nxdl_path(nxdl_key, elem=nxdl_root, exc=False) + return get_required_string(node) == "<>" def all_required_children_are_set(optional_parent_path, data, nxdl_root): @@ -753,7 +833,7 @@ def try_undocumented(data, nxdl_root: ET.Element): field_path = path.rsplit("/", 1)[0] if field_path in data.get_documented() and path in data.undocumented: field_requiredness = get_required_string( - nexus.get_node_at_nxdl_path( + get_node_at_nxdl_path( nxdl_path=convert_data_converter_dict_to_nxdl_path(field_path), elem=nxdl_root, ) @@ -767,7 +847,7 @@ def try_undocumented(data, nxdl_root: ET.Element): nxdl_path = nxdl_path[0:index_of_at] + nxdl_path[index_of_at + 1 :] try: - elem = nexus.get_node_at_nxdl_path(nxdl_path=nxdl_path, elem=nxdl_root) + elem = get_node_at_nxdl_path(nxdl_path=nxdl_path, elem=nxdl_root) optionality = get_required_string(elem) data[optionality][path] = data.undocumented[path] del data.undocumented[path] @@ -786,7 +866,7 @@ def validate_data_dict(template, data, nxdl_root: ET.Element): @lru_cache(maxsize=None) def get_xml_node(nxdl_path: str) -> ET.Element: - return nexus.get_node_at_nxdl_path(nxdl_path=nxdl_path, elem=nxdl_root) + return get_node_at_nxdl_path(nxdl_path=nxdl_path, elem=nxdl_root) # Make sure all required fields exist. ensure_all_required_fields_exist(template, data, nxdl_root) @@ -814,21 +894,33 @@ def get_xml_node(nxdl_path: str) -> ET.Element: ) continue - # TODO: If we want we could also enable unit validation here - # field = nexus.get_node_at_nxdl_path( - # nxdl_path=convert_data_converter_dict_to_nxdl_path( - # # The part below is the backwards compatible version of - # # nxdl_path.removesuffix("/units") - # nxdl_path[:-6] if nxdl_path.endswith("/units") else nxdl_path - # ), - # elem=nxdl_root, - # ) - # nxdl_unit = field.attrib.get("units", "") - # if not is_valid_unit(data[path], nxdl_unit): - # raise ValueError( - # f"Invalid unit in {path}. {data[path]} " - # f"is not in unit category {nxdl_unit}" - # ) + field = get_node_at_nxdl_path( + nxdl_path=convert_data_converter_dict_to_nxdl_path( + # The part below is the backwards compatible version of + # nxdl_path.removesuffix("/units") + nxdl_path[:-6] if nxdl_path.endswith("/units") else nxdl_path + ), + elem=nxdl_root, + ) + nxdl_unit = field.attrib.get("units", "") + transformation_type = ( + field.attrib.get("transformation_type") + if nxdl_unit == "[NX_TRANSFORMATION]" + else None + ) + if not is_valid_unit(data[path], nxdl_unit, transformation_type): + if transformation_type is not None and transformation_type not in ( + "rotation", + "translation", + ): + collector.insert_and_log( + path, + ValidationProblem.InvalidTransformationType, + transformation_type, + ) + collector.insert_and_log( + path, ValidationProblem.InvalidUnit, data[path], nxdl_unit + ) continue elem = get_xml_node(nxdl_path) @@ -851,7 +943,7 @@ def get_xml_node(nxdl_path: str) -> ET.Element: else "NXDL_TYPE_UNAVAILABLE" ) data[path] = is_valid_data_field(data[path], nxdl_type, path) - elist = nexus.get_inherited_nodes( + elist = get_inherited_nodes( nxdl_path, path.rsplit("/", 1)[-1], nxdl_root )[2] is_valid_enum, enums = is_value_valid_element_of_enum(data[path], elist) @@ -934,6 +1026,7 @@ def update_and_warn(key: str, value: str): f"blob/{get_nexus_version_hash()}", ) update_and_warn("/@NeXus_version", get_nexus_version()) + # pylint: disable=c-extension-no-member update_and_warn("/@HDF5_version", ".".join(map(str, h5py.h5.get_libversion()))) update_and_warn("/@h5py_version", h5py.__version__) diff --git a/pynxtools/dataconverter/readers/ellips/reader.py b/pynxtools/dataconverter/readers/ellips/reader.py index 7793604f9..76275bb4c 100644 --- a/pynxtools/dataconverter/readers/ellips/reader.py +++ b/pynxtools/dataconverter/readers/ellips/reader.py @@ -450,6 +450,7 @@ def read( # MK:: Carola, Ron, Flo, Tamas, Sandor refactor the following line template[f"/ENTRY[entry]/plot/DATA[{key}_errors]/@units"] = "degree" + template["/ENTRY[entry]/data_collection/measured_data/@units"] = "" # Define default plot showing Psi and Delta at all angles: template["/@default"] = "entry" template["/ENTRY[entry]/@default"] = "plot" diff --git a/pynxtools/dataconverter/readers/json_map/README.md b/pynxtools/dataconverter/readers/json_map/README.md index 85fde4834..88094d5cc 100644 --- a/pynxtools/dataconverter/readers/json_map/README.md +++ b/pynxtools/dataconverter/readers/json_map/README.md @@ -39,7 +39,7 @@ Example: ```json "/ENTRY[entry]/DATA[data]/current_295C": "/entry/data/current_295C", - "/ENTRY[entry]/NXODD_name/posint_value": "/a_level_down/another_level_down/posint_value", + "/ENTRY[entry]/NXODD_name[odd_name]/posint_value": "/a_level_down/another_level_down/posint_value", ``` * Write the values directly in the mapping file for missing data from your data file. diff --git a/pynxtools/dataconverter/units/__init__.py b/pynxtools/dataconverter/units/__init__.py new file mode 100644 index 000000000..3d9a62fb1 --- /dev/null +++ b/pynxtools/dataconverter/units/__init__.py @@ -0,0 +1,23 @@ +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""A unit registry for nexus units""" + +import os +from pint import UnitRegistry + +ureg = UnitRegistry(os.path.join(os.path.dirname(__file__), "default_en.txt")) diff --git a/pynxtools/dataconverter/units/constants_en.txt b/pynxtools/dataconverter/units/constants_en.txt new file mode 100644 index 000000000..7b386b509 --- /dev/null +++ b/pynxtools/dataconverter/units/constants_en.txt @@ -0,0 +1,73 @@ +# Default Pint constants definition file +# Based on the International System of Units +# Language: english +# Source: https://physics.nist.gov/cuu/Constants/ +# https://physics.nist.gov/PhysRefData/XrayTrans/Html/search.html +# :copyright: 2013,2019 by Pint Authors, see AUTHORS for more details. + +#### MATHEMATICAL CONSTANTS #### +# As computed by Maxima with fpprec:50 + +pi = 3.1415926535897932384626433832795028841971693993751 = π # pi +tansec = 4.8481368111333441675396429478852851658848753880815e-6 # tangent of 1 arc-second ~ arc_second/radian +ln10 = 2.3025850929940456840179914546843642076011014886288 # natural logarithm of 10 +wien_x = 4.9651142317442763036987591313228939440555849867973 # solution to (x-5)*exp(x)+5 = 0 => x = W(5/exp(5))+5 +wien_u = 2.8214393721220788934031913302944851953458817440731 # solution to (u-3)*exp(u)+3 = 0 => u = W(3/exp(3))+3 + +#### DEFINED EXACT CONSTANTS #### + +speed_of_light = 299792458 m/s = c = c_0 # since 1983 +planck_constant = 6.62607015e-34 J s = h # since May 2019 +elementary_charge = 1.602176634e-19 C = e # since May 2019 +avogadro_number = 6.02214076e23 # since May 2019 +boltzmann_constant = 1.380649e-23 J K^-1 = k = k_B # since May 2019 +standard_gravity = 9.80665 m/s^2 = g_0 = g0 = g_n = gravity # since 1901 +standard_atmosphere = 1.01325e5 Pa = atm = atmosphere # since 1954 +conventional_josephson_constant = 4.835979e14 Hz / V = K_J90 # since Jan 1990 +conventional_von_klitzing_constant = 2.5812807e4 ohm = R_K90 # since Jan 1990 + +#### DERIVED EXACT CONSTANTS #### +# Floating-point conversion may introduce inaccuracies + +zeta = c / (cm/s) = ζ +dirac_constant = h / (2 * π) = ħ = h_bar = atomic_unit_of_action = a_u_action +avogadro_constant = avogadro_number * mol^-1 = N_A +molar_gas_constant = k * N_A = R +faraday_constant = e * N_A +conductance_quantum = 2 * e ** 2 / h = G_0 +magnetic_flux_quantum = h / (2 * e) = Φ_0 = Phi_0 +josephson_constant = 2 * e / h = K_J +von_klitzing_constant = h / e ** 2 = R_K +stefan_boltzmann_constant = 2 / 15 * π ** 5 * k ** 4 / (h ** 3 * c ** 2) = σ = sigma +first_radiation_constant = 2 * π * h * c ** 2 = c_1 +second_radiation_constant = h * c / k = c_2 +wien_wavelength_displacement_law_constant = h * c / (k * wien_x) +wien_frequency_displacement_law_constant = wien_u * k / h + +#### MEASURED CONSTANTS #### +# Recommended CODATA-2018 values +# To some extent, what is measured and what is derived is a bit arbitrary. +# The choice of measured constants is based on convenience and on available uncertainty. +# The uncertainty in the last significant digits is given in parentheses as a comment. + +newtonian_constant_of_gravitation = 6.67430e-11 m^3/(kg s^2) = _ = gravitational_constant # (15) +rydberg_constant = 1.0973731568160e7 * m^-1 = R_∞ = R_inf # (21) +electron_g_factor = -2.00231930436256 = g_e # (35) +atomic_mass_constant = 1.66053906660e-27 kg = m_u # (50) +electron_mass = 9.1093837015e-31 kg = m_e = atomic_unit_of_mass = a_u_mass # (28) +proton_mass = 1.67262192369e-27 kg = m_p # (51) +neutron_mass = 1.67492749804e-27 kg = m_n # (95) +K_alpha_Cu_d_220 = 0.80232719 # (22) +K_alpha_Mo_d_220 = 0.36940604 # (19) +K_alpha_W_d_220 = 0.108852175 # (98) + +#### DERIVED CONSTANTS #### + +fine_structure_constant = (2 * h * R_inf / (m_e * c)) ** 0.5 = α = alpha +vacuum_permeability = 2 * α * h / (e ** 2 * c) = µ_0 = mu_0 = mu0 = magnetic_constant +vacuum_permittivity = e ** 2 / (2 * α * h * c) = ε_0 = epsilon_0 = eps_0 = eps0 = electric_constant +impedance_of_free_space = 2 * α * h / e ** 2 = Z_0 = characteristic_impedance_of_vacuum +coulomb_constant = α * h_bar * c / e ** 2 = k_C +classical_electron_radius = α * h_bar / (m_e * c) = r_e +thomson_cross_section = 8 / 3 * π * r_e ** 2 = σ_e = sigma_e + diff --git a/pynxtools/dataconverter/units/default_en.txt b/pynxtools/dataconverter/units/default_en.txt new file mode 100644 index 000000000..0cd39d7de --- /dev/null +++ b/pynxtools/dataconverter/units/default_en.txt @@ -0,0 +1,634 @@ +# Default Pint units definition file +# Based on the International System of Units +# Language: english +# :copyright: 2013,2019 by Pint Authors, see AUTHORS for more details. + +# Syntax +# ====== +# Units +# ----- +# = [= ] [= ] [ = ] [...] +# +# The canonical name and aliases should be expressed in singular form. +# Pint automatically deals with plurals built by adding 's' to the singular form; plural +# forms that don't follow this rule should be instead explicitly listed as aliases. +# +# If a unit has no symbol and one wants to define aliases, then the symbol should be +# conventionally set to _. +# +# Example: +# millennium = 1e3 * year = _ = millennia +# +# +# Prefixes +# -------- +# - = [= ] [= ] [ = ] [...] +# +# Example: +# deca- = 1e+1 = da- = deka- +# +# +# Derived dimensions +# ------------------ +# [dimension name] = +# +# Example: +# [density] = [mass] / [volume] +# +# Note that primary dimensions don't need to be declared; they can be +# defined for the first time in a unit definition. +# E.g. see below `meter = [length]` +# +# +# Additional aliases +# ------------------ +# @alias = [ = ] [...] +# +# Used to add aliases to already existing unit definitions. +# Particularly useful when one wants to enrich definitions +# from defaults_en.txt with custom aliases. +# +# Example: +# @alias meter = my_meter + +# See also: https://pint.readthedocs.io/en/latest/defining.html + +@defaults + group = international + system = mks +@end + + +#### PREFIXES #### + +# decimal prefixes +yocto- = 1e-24 = y- +zepto- = 1e-21 = z- +atto- = 1e-18 = a- +femto- = 1e-15 = f- +pico- = 1e-12 = p- +nano- = 1e-9 = n- +micro- = 1e-6 = µ- = u- +milli- = 1e-3 = m- +centi- = 1e-2 = c- +deci- = 1e-1 = d- +deca- = 1e+1 = da- = deka- +hecto- = 1e2 = h- +kilo- = 1e3 = k- +mega- = 1e6 = M- +giga- = 1e9 = G- +tera- = 1e12 = T- +peta- = 1e15 = P- +exa- = 1e18 = E- +zetta- = 1e21 = Z- +yotta- = 1e24 = Y- + +# binary_prefixes +kibi- = 2**10 = Ki- +mebi- = 2**20 = Mi- +gibi- = 2**30 = Gi- +tebi- = 2**40 = Ti- +pebi- = 2**50 = Pi- +exbi- = 2**60 = Ei- +zebi- = 2**70 = Zi- +yobi- = 2**80 = Yi- + +#### BASE UNITS #### + +meter = [length] = m = metre +second = [time] = s = sec +ampere = [current] = A = amp +candela = [luminosity] = cd = candle +gram = [mass] = g +mole = [substance] = mol +kelvin = [temperature]; offset: 0 = K = degK = °K = degree_Kelvin = degreeK # older names supported for compatibility +radian = [angle] = rad +bit = [information] +pixel = [digital_image_resolution] = px = pel + +#### NEXUS UNIT CATEGORIES #### +[NX_ANGLE] = [angle] +[NX_ANY] = [] +[NX_AREA] = [area] +[NX_CHARGE] = [charge] +[NX_COUNT] = [] +[NX_CROSS_SECTION] = [area] +[NX_CURRENT] = [current] +[NX_DIMENSIONLESS] = [] +[NX_EMITTANCE] = [length] * [angle] +[NX_ENERGY] = [energy] +[NX_FLUX] = 1 / [time] / [length] ** 2 +[NX_FREQUENCY] = [frequency] +[NX_LENGTH] = [length] +[NX_MASS] = [mass] +[NX_MASS_DENSITY] = [mass] / [length] ** 3 +[NX_MOLECULAR_WEIGHT] = [mass] / [substance] +[NX_PERIOD] = [time] +[NX_PER_AREA] = 1 / [length] ** 2 +[NX_PER_LENGTH] = 1 / [length] +[NX_POWER] = [power] +[NX_PRESSURE] = [pressure] +[NX_PULSES] = [] +[NX_SCATTERING_LENGTH_DENSITY] = 1 / [length] ** 2 +[NX_SOLID_ANGLE] = [angle] ** 2 +[NX_TEMPERATURE] = [temperature] +[NX_TIME] = [time] +[NX_TIME_OF_FLIGHT] = [time] +[NX_UNITLESS] = [] +[NX_VOLTAGE] = [electric_potential] +[NX_VOLUME] = [length] ** 3 +[NX_WAVELENGTH] = [length] +[NX_WAVENUMBER] = [wavenumber] + + +#### CONSTANTS #### + +@import constants_en.txt + + +#### UNITS #### +# Common and less common, grouped by quantity. +# Conversion factors are exact (except when noted), +# although floating-point conversion may introduce inaccuracies + +# Unitless +counts = [] + +# Angle +degree = π / 180 * radian = ° = deg = arcdeg = arcdegree = angular_degree +arcminute = degree / 60 = arcmin = arc_minute = angular_minute +arcsecond = arcminute / 60 = arcsec = arc_second = angular_second +milliarcsecond = 1e-3 * arcsecond = mas +grade = π / 200 * radian = grad = gon +mil = π / 32000 * radian + +# Solid angle +steradian = radian ** 2 = sr +square_degree = (π / 180) ** 2 * sr = sq_deg = sqdeg + +# Information +byte = 8 * bit = B = octet +baud = bit / second = Bd = bps + +# Length +angstrom = 1e-10 * meter = Å = ångström = Å +micron = micrometer = µ +fermi = femtometer +atomic_unit_of_length = h_bar / (alpha * m_e * c) = bohr = a_0 = a0 = bohr_radius = a_u_length +planck_length = (h_bar * gravitational_constant / c ** 3) ** 0.5 + +# Mass +metric_ton = 1e3 * kilogram = tonne +unified_atomic_mass_unit = atomic_mass_constant = u = amu +dalton = atomic_mass_constant = Da +grain = 64.79891 * milligram = gr +gamma_mass = microgram +carat = 200 * milligram = ct = karat +planck_mass = (h_bar * c / gravitational_constant) ** 0.5 + +# Time +minute = 60 * second +hour = 60 * minute = hr +atomic_unit_of_time = h_bar / E_h = a_u_time +planck_time = (h_bar * gravitational_constant / c ** 5) ** 0.5 + +# Temperature +degree_Celsius = kelvin; offset: 273.15 = °C = celsius = degC = degreeC +degree_Rankine = 5 / 9 * kelvin; offset: 0 = °R = rankine = degR = degreeR +degree_Fahrenheit = 5 / 9 * kelvin; offset: 233.15 + 200 / 9 = °F = fahrenheit = degF = degreeF +degree_Reaumur = 4 / 5 * kelvin; offset: 273.15 = °Re = reaumur = degRe = degreeRe = degree_Réaumur = réaumur +atomic_unit_of_temperature = E_h / k = a_u_temp +planck_temperature = (h_bar * c ** 5 / gravitational_constant / k ** 2) ** 0.5 + +# Area +[area] = [length] ** 2 +barn = 1e-28 * meter ** 2 = b +darcy = centipoise * centimeter ** 2 / (second * atmosphere) + +# Volume +[volume] = [length] ** 3 +liter = decimeter ** 3 = l = L = litre +lambda = microliter = λ + +# Frequency +[frequency] = 1 / [time] +hertz = 1 / second = Hz +revolutions_per_minute = 1 / minute = rpm +revolutions_per_second = 1 / second = rps +counts_per_second = 1 / second = cps + +# Wavenumber +[wavenumber] = 1 / [length] +reciprocal_centimeter = 1 / cm = cm_1 = kayser + +# Speed +[speed] = [length] / [time] +mile_per_hour = mile / hour = mph = MPH +kilometer_per_hour = kilometer / hour = kph = KPH +kilometer_per_second = kilometer / second = kps +meter_per_second = meter / second = mps +foot_per_second = foot / second = fps + +# Acceleration +[acceleration] = [speed] / [time] +galileo = centimeter / second ** 2 = Gal + +# Force +[force] = [mass] * [acceleration] +newton = kilogram * meter / second ** 2 = N +dyne = gram * centimeter / second ** 2 = dyn +force_kilogram = g_0 * kilogram = pond +force_gram = g_0 * gram = gf = gram_force +force_metric_ton = g_0 * metric_ton = tf = metric_ton_force = force_t = t_force +atomic_unit_of_force = E_h / a_0 = a_u_force + +# Energy +[energy] = [force] * [length] +joule = newton * meter = J +erg = dyne * centimeter +watt_hour = watt * hour = Wh = watthour +electron_volt = e * volt = eV +rydberg = h * c * R_inf = Ry +hartree = 2 * rydberg = Ha = E_h = hartree_energy = atomic_unit_of_energy = a_u_energy +calorie = 4.184 * joule = cal = thermochemical_calorie = cal_th +international_calorie = 4.1868 * joule = cal_it = international_steam_table_calorie +fifteen_degree_calorie = 4.1855 * joule = cal_15 + +# Power +[power] = [energy] / [time] +watt = joule / second = W +volt_ampere = volt * ampere = VA +horsepower = 550 * foot * force_pound / second = hp = UK_horsepower = hydraulic_horsepower +metric_horsepower = 75 * force_kilogram * meter / second +electrical_horsepower = 746 * watt +standard_liter_per_minute = atmosphere * liter / minute = slpm = slm +conventional_watt_90 = K_J90 ** 2 * R_K90 / (K_J ** 2 * R_K) * watt = W_90 + +# Momentum +[momentum] = [length] * [mass] / [time] + +# Density (as auxiliary for pressure) +[density] = [mass] / [volume] +mercury = 13.5951 * kilogram / liter = Hg = Hg_0C = Hg_32F = conventional_mercury +water = 1.0 * kilogram / liter = H2O = conventional_water +mercury_60F = 13.5568 * kilogram / liter = Hg_60F # approximate +water_39F = 0.999972 * kilogram / liter = water_4C # approximate +water_60F = 0.999001 * kilogram / liter # approximate + +# Pressure +[pressure] = [force] / [area] +pascal = newton / meter ** 2 = Pa +barye = dyne / centimeter ** 2 = Ba = barie = barad = barrie = baryd +bar = 1e5 * pascal +torr = atm / 760 +pound_force_per_square_inch = force_pound / inch ** 2 = psi +kip_per_square_inch = kip / inch ** 2 = ksi +millimeter_Hg = millimeter * Hg * g_0 = mmHg = mm_Hg = millimeter_Hg_0C +centimeter_Hg = centimeter * Hg * g_0 = cmHg = cm_Hg = centimeter_Hg_0C +inch_Hg = inch * Hg * g_0 = inHg = in_Hg = inch_Hg_32F +inch_Hg_60F = inch * Hg_60F * g_0 +inch_H2O_39F = inch * water_39F * g_0 +inch_H2O_60F = inch * water_60F * g_0 +foot_H2O = foot * water * g_0 = ftH2O = feet_H2O +centimeter_H2O = centimeter * water * g_0 = cmH2O = cm_H2O +atomic_unit_of_pressure = E_h / bohr_radius ** 3 = a_u_pressure + +# Viscosity +[viscosity] = [pressure] * [time] +poise = 0.1 * Pa * second = P +reyn = psi * second + +# Kinematic viscosity +[kinematic_viscosity] = [area] / [time] +stokes = centimeter ** 2 / second = St + +# Fluidity +[fluidity] = 1 / [viscosity] +rhe = 1 / poise + +# Amount of substance +particle = 1 / N_A = _ = molec = molecule + +# Concentration +[concentration] = [substance] / [volume] +molar = mole / liter = M + +# Catalytic activity +[activity] = [substance] / [time] +katal = mole / second = kat +enzyme_unit = micromole / minute = U = enzymeunit + +# Entropy +[entropy] = [energy] / [temperature] +clausius = calorie / kelvin = Cl + +# Molar entropy +[molar_entropy] = [entropy] / [substance] +entropy_unit = calorie / kelvin / mole = eu + +# Radiation +becquerel = counts_per_second = Bq +curie = 3.7e10 * becquerel = Ci +rutherford = 1e6 * becquerel = Rd +gray = joule / kilogram = Gy +sievert = joule / kilogram = Sv +rem = 0.01 * sievert +roentgen = 2.58e-4 * coulomb / kilogram = _ = röntgen # approximate, depends on medium + +# Luminance +[luminance] = [luminosity] / [area] +nit = candela / meter ** 2 +stilb = candela / centimeter ** 2 +lambert = 1 / π * candela / centimeter ** 2 + +# Luminous flux +[luminous_flux] = [luminosity] * [angle] ** 2 +lumen = candela * steradian = lm + +# Illuminance +[illuminance] = [luminous_flux] / [area] +lux = lumen / meter ** 2 = lx + +# Intensity +[intensity] = [power] / [area] +atomic_unit_of_intensity = 0.5 * ε_0 * c * atomic_unit_of_electric_field ** 2 = a_u_intensity + +# Current +biot = 10 * ampere = Bi +abampere = biot = abA +atomic_unit_of_current = e / atomic_unit_of_time = a_u_current +mean_international_ampere = mean_international_volt / mean_international_ohm = A_it +US_international_ampere = US_international_volt / US_international_ohm = A_US +conventional_ampere_90 = K_J90 * R_K90 / (K_J * R_K) * ampere = A_90 +planck_current = (c ** 6 / gravitational_constant / k_C) ** 0.5 + +# Charge +[charge] = [current] * [time] +coulomb = ampere * second = C +abcoulomb = 10 * C = abC +faraday = e * N_A * mole +conventional_coulomb_90 = K_J90 * R_K90 / (K_J * R_K) * coulomb = C_90 + +# Electric potential +[electric_potential] = [energy] / [charge] +volt = joule / coulomb = V +abvolt = 1e-8 * volt = abV +mean_international_volt = 1.00034 * volt = V_it # approximate +US_international_volt = 1.00033 * volt = V_US # approximate +conventional_volt_90 = K_J90 / K_J * volt = V_90 + +# Electric field +[electric_field] = [electric_potential] / [length] +atomic_unit_of_electric_field = e * k_C / a_0 ** 2 = a_u_electric_field + +# Electric displacement field +[electric_displacement_field] = [charge] / [area] + +# Resistance +[resistance] = [electric_potential] / [current] +ohm = volt / ampere = Ω +abohm = 1e-9 * ohm = abΩ +mean_international_ohm = 1.00049 * ohm = Ω_it = ohm_it # approximate +US_international_ohm = 1.000495 * ohm = Ω_US = ohm_US # approximate +conventional_ohm_90 = R_K / R_K90 * ohm = Ω_90 = ohm_90 + +# Resistivity +[resistivity] = [resistance] * [length] + +# Conductance +[conductance] = [current] / [electric_potential] +siemens = ampere / volt = S = mho +absiemens = 1e9 * siemens = abS = abmho + +# Capacitance +[capacitance] = [charge] / [electric_potential] +farad = coulomb / volt = F +abfarad = 1e9 * farad = abF +conventional_farad_90 = R_K90 / R_K * farad = F_90 + +# Inductance +[inductance] = [magnetic_flux] / [current] +henry = weber / ampere = H +abhenry = 1e-9 * henry = abH +conventional_henry_90 = R_K / R_K90 * henry = H_90 + +# Magnetic flux +[magnetic_flux] = [electric_potential] * [time] +weber = volt * second = Wb +unit_pole = µ_0 * biot * centimeter + +# Magnetic field +[magnetic_field] = [magnetic_flux] / [area] +tesla = weber / meter ** 2 = T +gamma = 1e-9 * tesla = γ +gauss = 1e-4 * tesla = G + +# Magnetic field strength +[magnetic_field_strength] = [current] / [length] + +# Electric dipole moment +[electric_dipole] = [charge] * [length] +debye = 1e-9 / ζ * coulomb * angstrom = D # formally 1 D = 1e-10 Fr*Å, but we generally want to use it outside the Gaussian context + +# Electric quadrupole moment +[electric_quadrupole] = [charge] * [area] +buckingham = debye * angstrom + +# Magnetic dipole moment +[magnetic_dipole] = [current] * [area] +bohr_magneton = e * h_bar / (2 * m_e) = µ_B = mu_B +nuclear_magneton = e * h_bar / (2 * m_p) = µ_N = mu_N + +# Pixel density +[pixel_density] = [digital_image_resolution] / [length] +pixels_per_inch = px / inch = PPI = ppi +pixels_per_centimeter = px / cm = PPCM = ppcm + +#### UNIT GROUPS #### +# Mostly for length, area, volume, mass, force +# (customary or specialized units) + +@group USCSLengthInternational + thou = 1e-3 * inch = th = mil_length + inch = yard / 36 = in = international_inch = inches = international_inches + hand = 4 * inch + foot = yard / 3 = ft = international_foot = feet = international_feet + yard = 0.9144 * meter = yd = international_yard # since Jul 1959 + mile = 1760 * yard = mi = international_mile + + square_inch = inch ** 2 = sq_in = square_inches + square_foot = foot ** 2 = sq_ft = square_feet + square_yard = yard ** 2 = sq_yd + square_mile = mile ** 2 = sq_mi + + cubic_inch = in ** 3 = cu_in + cubic_foot = ft ** 3 = cu_ft = cubic_feet + cubic_yard = yd ** 3 = cu_yd +@end + +@group USCSLengthSurvey + link = 1e-2 * chain = li = survey_link + survey_foot = 1200 / 3937 * meter = sft + fathom = 6 * survey_foot + rod = 16.5 * survey_foot = rd = pole = perch + chain = 4 * rod + furlong = 40 * rod = fur + cables_length = 120 * fathom + survey_mile = 5280 * survey_foot = smi = us_statute_mile + league = 3 * survey_mile + + square_rod = rod ** 2 = sq_rod = sq_pole = sq_perch + acre = 10 * chain ** 2 + square_survey_mile = survey_mile ** 2 = _ = section + square_league = league ** 2 + + acre_foot = acre * survey_foot = _ = acre_feet +@end + +@group USCSLiquidVolume + minim = pint / 7680 + fluid_dram = pint / 128 = fldr = fluidram = US_fluid_dram = US_liquid_dram + fluid_ounce = pint / 16 = floz = US_fluid_ounce = US_liquid_ounce + gill = pint / 4 = gi = liquid_gill = US_liquid_gill + pint = quart / 2 = liquid_pint = US_pint + fifth = gallon / 5 = _ = US_liquid_fifth + quart = gallon / 4 = qt = liquid_quart = US_liquid_quart + gallon = 231 * cubic_inch = gal = liquid_gallon = US_liquid_gallon +@end + +@group Avoirdupois + dram = pound / 256 = dr = avoirdupois_dram = avdp_dram = drachm + ounce = pound / 16 = oz = avoirdupois_ounce = avdp_ounce + pound = 7e3 * grain = lb = avoirdupois_pound = avdp_pound + stone = 14 * pound + quarter = 28 * stone + bag = 94 * pound + hundredweight = 100 * pound = cwt = short_hundredweight + long_hundredweight = 112 * pound + ton = 2e3 * pound = _ = short_ton + long_ton = 2240 * pound + slug = g_0 * pound * second ** 2 / foot + slinch = g_0 * pound * second ** 2 / inch = blob = slugette + + force_ounce = g_0 * ounce = ozf = ounce_force + force_pound = g_0 * pound = lbf = pound_force + force_ton = g_0 * ton = _ = ton_force = force_short_ton = short_ton_force + force_long_ton = g_0 * long_ton = _ = long_ton_force + kip = 1e3 * force_pound + poundal = pound * foot / second ** 2 = pdl +@end + +@group AvoirdupoisUK using Avoirdupois + UK_hundredweight = long_hundredweight = UK_cwt + UK_ton = long_ton + UK_force_ton = force_long_ton = _ = UK_ton_force +@end + +@group AvoirdupoisUS using Avoirdupois + US_hundredweight = hundredweight = US_cwt + US_ton = ton + US_force_ton = force_ton = _ = US_ton_force +@end + +@group Troy + pennyweight = 24 * grain = dwt + troy_ounce = 480 * grain = toz = ozt + troy_pound = 12 * troy_ounce = tlb = lbt +@end + +@group ImperialVolume + imperial_minim = imperial_fluid_ounce / 480 + imperial_fluid_scruple = imperial_fluid_ounce / 24 + imperial_fluid_drachm = imperial_fluid_ounce / 8 = imperial_fldr = imperial_fluid_dram + imperial_fluid_ounce = imperial_pint / 20 = imperial_floz = UK_fluid_ounce + imperial_gill = imperial_pint / 4 = imperial_gi = UK_gill + imperial_cup = imperial_pint / 2 = imperial_cp = UK_cup + imperial_pint = imperial_gallon / 8 = imperial_pt = UK_pint + imperial_quart = imperial_gallon / 4 = imperial_qt = UK_quart + imperial_gallon = 4.54609 * liter = imperial_gal = UK_gallon + imperial_peck = 2 * imperial_gallon = imperial_pk = UK_pk + imperial_bushel = 8 * imperial_gallon = imperial_bu = UK_bushel + imperial_barrel = 36 * imperial_gallon = imperial_bbl = UK_bbl +@end + + +#### CONVERSION CONTEXTS #### + +@context(n=1) spectroscopy = sp + # n index of refraction of the medium. + [length] <-> [frequency]: speed_of_light / n / value + [frequency] -> [energy]: planck_constant * value + [energy] -> [frequency]: value / planck_constant + # allow wavenumber / kayser + [wavenumber] <-> [length]: 1 / value +@end + +@context boltzmann + [temperature] -> [energy]: boltzmann_constant * value + [energy] -> [temperature]: value / boltzmann_constant +@end + +@context energy + [energy] -> [energy] / [substance]: value * N_A + [energy] / [substance] -> [energy]: value / N_A + [energy] -> [mass]: value / c ** 2 + [mass] -> [energy]: value * c ** 2 +@end + +@context(mw=0,volume=0,solvent_mass=0) chemistry = chem + # mw is the molecular weight of the species + # volume is the volume of the solution + # solvent_mass is the mass of solvent in the solution + + # moles -> mass require the molecular weight + [substance] -> [mass]: value * mw + [mass] -> [substance]: value / mw + + # moles/volume -> mass/volume and moles/mass -> mass/mass + # require the molecular weight + [substance] / [volume] -> [mass] / [volume]: value * mw + [mass] / [volume] -> [substance] / [volume]: value / mw + [substance] / [mass] -> [mass] / [mass]: value * mw + [mass] / [mass] -> [substance] / [mass]: value / mw + + # moles/volume -> moles requires the solution volume + [substance] / [volume] -> [substance]: value * volume + [substance] -> [substance] / [volume]: value / volume + + # moles/mass -> moles requires the solvent (usually water) mass + [substance] / [mass] -> [substance]: value * solvent_mass + [substance] -> [substance] / [mass]: value / solvent_mass + + # moles/mass -> moles/volume require the solvent mass and the volume + [substance] / [mass] -> [substance]/[volume]: value * solvent_mass / volume + [substance] / [volume] -> [substance] / [mass]: value / solvent_mass * volume + +@end + + +#### SYSTEMS OF UNITS #### + +@system SI + second + meter + kilogram + ampere + kelvin + mole + candela +@end + +@system mks using international + meter + kilogram + second +@end + +@system atomic using international + # based on unit m_e, e, h_bar, k_C, k + bohr: meter + electron_mass: gram + atomic_unit_of_time: second + atomic_unit_of_current: ampere + atomic_unit_of_temperature: kelvin +@end diff --git a/pynxtools/dataconverter/verify.py b/pynxtools/dataconverter/verify.py new file mode 100644 index 000000000..43f8aa131 --- /dev/null +++ b/pynxtools/dataconverter/verify.py @@ -0,0 +1,163 @@ +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Verifies a nxs file""" + +import logging +import os +import sys +import xml.etree.ElementTree as ET +from os import path +from typing import Dict, Union + +import click +from h5py import Dataset, File, Group, is_hdf5 + +from pynxtools.dataconverter import helpers +from pynxtools.dataconverter.template import Template +from pynxtools.definitions.dev_tools.utils.nxdl_utils import get_nexus_definitions_path + +logger = logging.getLogger(__name__) + +DEBUG_TEMPLATE = 9 +logger.setLevel(logging.INFO) +logger.addHandler(logging.StreamHandler(sys.stdout)) + + +def _replace_group_names(class_map: Dict[str, str], path: str): + for class_path, nx_class in class_map.items(): + if f"/{class_path}/" in path or path.startswith(f"{class_path}/"): + path = path.replace(f"{class_path}/", f"{nx_class}[{class_path}]/") + return path + + +def _get_def_map(file: str) -> Dict[str, str]: + def_map: Dict[str, str] = {} + with File(file, "r") as h5file: + for entry_name, dataset in h5file.items(): + if ( + helpers.clean_str_attr(dataset.attrs.get("NX_class")) == "NXentry" + and f"/{entry_name}/definition" in h5file + ): + def_map.update( + { + entry_name: ( + definition := h5file[f"/{entry_name}/definition"][ + () + ].decode("utf8") + ) + } + ) + logger.debug("Reading entry '%s': '%s'", entry_name, definition) + + return def_map + + +def _get_nxdl_root(nxdl: str) -> ET.Element: + definitions_path = get_nexus_definitions_path() + nxdl_path = os.path.join( + definitions_path, "contributed_definitions", f"{nxdl}.nxdl.xml" + ) + if not os.path.exists(nxdl_path): + nxdl_path = os.path.join(definitions_path, "applications", f"{nxdl}.nxdl.xml") + if not os.path.exists(nxdl_path): + raise FileNotFoundError(f"The nxdl file, {nxdl}, was not found.") + + return ET.parse(nxdl_path).getroot() + + +@click.command() +@click.argument("file") +def verify(file: str): + """Verifies a nexus file""" + + if not path.exists(file): + raise click.FileError(file, hint=f'File "{file}" does not exist.') + + if not path.isfile(file): + raise click.FileError(file, hint=f'"{file}" is not a file.') + + if not is_hdf5(file): + raise click.FileError(file, hint=f'"{file}" is not a valid HDF5 file.') + + def collect_entries(name: str, dataset: Union[Group, Dataset]): + clean_name = _replace_group_names(class_map, name) + if isinstance(dataset, Group) and ( + nx_class := helpers.clean_str_attr(dataset.attrs.get("NX_class")) + ): + entry_name = name.rsplit("/", 1)[-1] + clean_nx_class = nx_class[2:].upper() + + is_variadic = True + clean_name = _replace_group_names(class_map, name) + for ref_entry in ref_template: + if ref_entry.startswith(f"{entry_path}/{clean_name}"): + is_variadic = False + break + + if is_variadic: + class_map[entry_name] = clean_nx_class + logger.debug("Adding class %s to %s", clean_nx_class, entry_name) + + if isinstance(dataset, Dataset): + logger.debug("Adding field %s/%s", entry_path, clean_name) + if isinstance(read_data := dataset[()], bytes): + read_data = read_data.decode("utf-8") + data_template[f"{entry_path}/{clean_name}"] = read_data + + for attr_name, val in dataset.attrs.items(): + if attr_name == "NX_class": + continue + logger.debug( + "Adding attribute %s/%s/@%s", entry_path, clean_name, attr_name + ) + data_template[f"{entry_path}/{clean_name}/@{attr_name}"] = val + + def_map = _get_def_map(file) + + if not def_map: + logger.info("Could not find any valid entry in file %s", file) + + for entry, nxdl in def_map.items(): + data_template = Template() + class_map: Dict[str, str] = {} + entry_path = f"/ENTRY[{entry}]" + + ref_template = Template() + nxdl_root = _get_nxdl_root(nxdl) + helpers.generate_template_from_nxdl(nxdl_root, ref_template) + logger.log(DEBUG_TEMPLATE, "Reference template: %s", ref_template) + + with File(file, "r") as h5file: + h5file[f"/{entry}"].visititems(collect_entries) + + logger.debug("Class map: %s", class_map) + logger.log(DEBUG_TEMPLATE, "Processed template %s", data_template) + is_valid = helpers.validate_data_dict( + ref_template, Template(data_template), nxdl_root + ) + + if is_valid: + logger.info( + f"The entry `{entry}` in file `{file}` is a valid file" + f" according to the `{nxdl}` application definition.", + ) + else: + logger.info( + f"Invalid: The entry `{entry}` in file `{file}` is NOT a valid file" + f" according to the `{nxdl}` application definition.", + ) diff --git a/pynxtools/dataconverter/writer.py b/pynxtools/dataconverter/writer.py index 75ebf97ec..bc53861da 100644 --- a/pynxtools/dataconverter/writer.py +++ b/pynxtools/dataconverter/writer.py @@ -29,6 +29,10 @@ from pynxtools.dataconverter import helpers from pynxtools.dataconverter.exceptions import InvalidDictProvided +from pynxtools.definitions.dev_tools.utils.nxdl_utils import ( + NxdlAttributeNotFoundError, + get_node_at_nxdl_path, +) from pynxtools.nexus import nexus logger = logging.getLogger(__name__) # pylint: disable=C0103 @@ -219,10 +223,8 @@ def __nxdl_to_attrs(self, path: str = "/") -> dict: nxdl_path = helpers.convert_data_converter_dict_to_nxdl_path(path) try: - elem = nexus.get_node_at_nxdl_path( - nxdl_path, elem=copy.deepcopy(self.nxdl_data) - ) - except nexus.NxdlAttributeNotFoundError: + elem = get_node_at_nxdl_path(nxdl_path, elem=copy.deepcopy(self.nxdl_data)) + except NxdlAttributeNotFoundError: return None # Remove the name attribute as we only use it to name the HDF5 entry diff --git a/pynxtools/nexus/nexus.py b/pynxtools/nexus/nexus.py index 41b619520..5702dfe1b 100644 --- a/pynxtools/nexus/nexus.py +++ b/pynxtools/nexus/nexus.py @@ -814,7 +814,7 @@ def process_nexus_master_file(self, parser): ) def main(nexus_file, documentation, concept): """The main function to call when used as a script.""" - logging_format = "%(levelname)s: %(message)s" + logging_format = "%(message)s" stdout_handler = logging.StreamHandler(sys.stdout) stdout_handler.setLevel(logging.DEBUG) logging.basicConfig( diff --git a/pyproject.toml b/pyproject.toml index 223608597..e71aae0b6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,7 +33,8 @@ dependencies = [ "ase>=3.19.0", "mergedeep", "importlib-metadata", - "lxml>=4.9.1", + "lxml>=4.9.1", + "pint>=0.17", ] [project.urls] @@ -86,6 +87,7 @@ xrd = [ read_nexus = "pynxtools.nexus.nexus:main" dataconverter = "pynxtools.dataconverter.convert:main_cli" generate_eln = "pynxtools.eln_mapper.eln_mapper:get_eln" +verify_nexus = "pynxtools.dataconverter.verify:verify" [tool.setuptools.package-data] pynxtools = ["definitions/**/*.xml", "definitions/**/*.xsd"] diff --git a/tests/data/dataconverter/readers/example/testdata.json b/tests/data/dataconverter/readers/example/testdata.json index 114e38cf2..ca31a424d 100644 --- a/tests/data/dataconverter/readers/example/testdata.json +++ b/tests/data/dataconverter/readers/example/testdata.json @@ -2,11 +2,11 @@ "bool_value": true, "char_value": "A random string!", "float_value": 0.1, - "float_value_units": "Units are always strings.", + "float_value_units": "eV", "int_value": -3, - "int_value_units": "m/s^2", + "int_value_units": "nm", "posint_value": 7, - "posint_value_units": "V", + "posint_value_units": "m", "definition": "NXtest", "definition_version": "0.0.1", "program_name": "Nexus Parser", diff --git a/tests/data/dataconverter/readers/json_map/data.json b/tests/data/dataconverter/readers/json_map/data.json index ae0cf6c88..40d8d82ad 100644 --- a/tests/data/dataconverter/readers/json_map/data.json +++ b/tests/data/dataconverter/readers/json_map/data.json @@ -3,14 +3,14 @@ "bool_value": true, "char_value": "A random string!", "float_value": 0.1, - "float_value_units": "Units are always strings.", + "float_value_units": "eV", "int_value": -3, "another_level_down":{ - "int_value_units": "m/s^2", + "int_value_units": "nm", "posint_value": 7 } }, - "posint_value_units": "V", + "posint_value_units": "m", "definition": "NXtest", "definition_version": "0.0.1", "program_name": "Nexus Parser", diff --git a/tests/data/dataconverter/readers/json_map/data.mapping.json b/tests/data/dataconverter/readers/json_map/data.mapping.json index 055b0977e..5c9d8e39a 100644 --- a/tests/data/dataconverter/readers/json_map/data.mapping.json +++ b/tests/data/dataconverter/readers/json_map/data.mapping.json @@ -1,14 +1,14 @@ { - "/ENTRY[entry]/NXODD_name/bool_value": "/a_level_down/bool_value", - "/ENTRY[entry]/NXODD_name/char_value": "/a_level_down/char_value", - "/ENTRY[entry]/NXODD_name/date_value": "/date_value", - "/ENTRY[entry]/NXODD_name/float_value": "/a_level_down/float_value", - "/ENTRY[entry]/NXODD_name/float_value/@units": "/a_level_down/float_value_units", - "/ENTRY[entry]/NXODD_name/int_value": "/a_level_down/int_value", - "/ENTRY[entry]/NXODD_name/int_value/@units": "/a_level_down/another_level_down/int_value_units", - "/ENTRY[entry]/NXODD_name/posint_value": "/a_level_down/another_level_down/posint_value", - "/ENTRY[entry]/NXODD_name/posint_value/@units": "/posint_value_units", - "/ENTRY[entry]/NXODD_name/type": "/type", + "/ENTRY[entry]/NXODD_name[nxodd_name]/bool_value": "/a_level_down/bool_value", + "/ENTRY[entry]/NXODD_name[nxodd_name]/char_value": "/a_level_down/char_value", + "/ENTRY[entry]/NXODD_name[nxodd_name]/date_value": "/date_value", + "/ENTRY[entry]/NXODD_name[nxodd_name]/float_value": "/a_level_down/float_value", + "/ENTRY[entry]/NXODD_name[nxodd_name]/float_value/@units": "/a_level_down/float_value_units", + "/ENTRY[entry]/NXODD_name[nxodd_name]/int_value": "/a_level_down/int_value", + "/ENTRY[entry]/NXODD_name[nxodd_name]/int_value/@units": "/a_level_down/another_level_down/int_value_units", + "/ENTRY[entry]/NXODD_name[nxodd_name]/posint_value": "/a_level_down/another_level_down/posint_value", + "/ENTRY[entry]/NXODD_name[nxodd_name]/posint_value/@units": "/posint_value_units", + "/ENTRY[entry]/NXODD_name[nxodd_name]/type": "/type", "/ENTRY[entry]/definition": "/definition", "/ENTRY[entry]/definition/@version": "/definition_version", "/ENTRY[entry]/optional_parent/optional_child": { diff --git a/tests/dataconverter/test_helpers.py b/tests/dataconverter/test_helpers.py index 723aca5cf..e435f0fba 100644 --- a/tests/dataconverter/test_helpers.py +++ b/tests/dataconverter/test_helpers.py @@ -163,16 +163,16 @@ def fixture_filled_test_data(template, tmp_path): template.clear() template["/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value"] = 2.0 - template["/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value/@units"] = "nm" + template["/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value/@units"] = "eV" template["/ENTRY[my_entry]/optional_parent/required_child"] = 1 template["/ENTRY[my_entry]/optional_parent/optional_child"] = 1 template["/ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value"] = True template["/ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value"] = 2 - template["/ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value/@units"] = "eV" + template["/ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value/@units"] = "nm" template["/ENTRY[my_entry]/NXODD_name[nxodd_name]/posint_value"] = np.array( [1, 2, 3], dtype=np.int8 ) - template["/ENTRY[my_entry]/NXODD_name[nxodd_name]/posint_value/@units"] = "kg" + template["/ENTRY[my_entry]/NXODD_name[nxodd_name]/posint_value/@units"] = "m" template["/ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value"] = "just chars" template["/ENTRY[my_entry]/definition"] = "NXtest" template["/ENTRY[my_entry]/definition/@version"] = "2.4.6" @@ -193,19 +193,19 @@ def fixture_filled_test_data(template, tmp_path): TEMPLATE = Template() TEMPLATE["optional"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value"] = 2.0 # pylint: disable=E1126 TEMPLATE["optional"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value/@units"] = ( - "nm" # pylint: disable=E1126 + "eV" # pylint: disable=E1126 ) TEMPLATE["optional"]["/ENTRY[my_entry]/optional_parent/required_child"] = 1 # pylint: disable=E1126 TEMPLATE["optional"]["/ENTRY[my_entry]/optional_parent/optional_child"] = 1 # pylint: disable=E1126 TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value"] = True # pylint: disable=E1126 TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value"] = 2 # pylint: disable=E1126 -TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value/@units"] = "eV" # pylint: disable=E1126 +TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value/@units"] = "nm" # pylint: disable=E1126 TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/posint_value"] = np.array( [1, 2, 3], # pylint: disable=E1126 dtype=np.int8, ) # pylint: disable=E1126 TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/posint_value/@units"] = ( - "kg" # pylint: disable=E1126 + "m" # pylint: disable=E1126 ) TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value"] = ( "just chars" # pylint: disable=E1126 @@ -213,7 +213,7 @@ def fixture_filled_test_data(template, tmp_path): TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/bool_value"] = True # pylint: disable=E1126 TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/int_value"] = 2 # pylint: disable=E1126 TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/int_value/@units"] = ( - "eV" # pylint: disable=E1126 + "nm" # pylint: disable=E1126 ) TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/posint_value"] = ( np.array( @@ -223,7 +223,7 @@ def fixture_filled_test_data(template, tmp_path): ) # pylint: disable=E1126 TEMPLATE["required"][ "/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/posint_value/@units" -] = "kg" # pylint: disable=E1126 +] = "m" # pylint: disable=E1126 TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/char_value"] = ( "just chars" # pylint: disable=E1126 ) @@ -496,7 +496,8 @@ def fixture_filled_test_data(template, tmp_path): "required", ), ( - "The required group, /ENTRY[entry]/optional_parent/req_group_in_opt_group, hasn't been " + "The required group, /ENTRY[entry]/optional_parent/req_group_in_opt_group," + " hasn't been " "supplied while its optional parent, /ENTRY[entry]/optional_parent, is supplied." ), id="req-group-in-opt-parent-removed", diff --git a/tests/dataconverter/test_writer.py b/tests/dataconverter/test_writer.py index 506940c46..66cdfac13 100644 --- a/tests/dataconverter/test_writer.py +++ b/tests/dataconverter/test_writer.py @@ -55,7 +55,7 @@ def test_write(writer): writer.write() test_nxs = h5py.File(writer.output_path, "r") assert test_nxs["/my_entry/nxodd_name/int_value"][()] == 2 - assert test_nxs["/my_entry/nxodd_name/int_value"].attrs["units"] == "eV" + assert test_nxs["/my_entry/nxodd_name/int_value"].attrs["units"] == "nm" assert test_nxs["/my_entry/nxodd_name/posint_value"].shape == (3,) # pylint: disable=no-member diff --git a/tests/nexus/test_nexus.py b/tests/nexus/test_nexus.py index 2553abeb1..0143d4b66 100644 --- a/tests/nexus/test_nexus.py +++ b/tests/nexus/test_nexus.py @@ -22,6 +22,13 @@ import lxml.etree as ET +from pynxtools.definitions.dev_tools.utils.nxdl_utils import ( + get_inherited_nodes, + get_node_at_nxdl_path, + get_nx_attribute_type, + get_nx_classes, + get_nx_units, +) from pynxtools.nexus import nexus logger = logging.getLogger(__name__) @@ -35,16 +42,16 @@ def test_get_nexus_classes_units_attributes(): the tested functions can be found in nexus.py file""" # Test 1 - nexus_classes_list = nexus.get_nx_classes() + nexus_classes_list = get_nx_classes() assert "NXbeam" in nexus_classes_list # Test 2 - nexus_units_list = nexus.get_nx_units() + nexus_units_list = get_nx_units() assert "NX_TEMPERATURE" in nexus_units_list # Test 3 - nexus_attribute_list = nexus.get_nx_attribute_type() + nexus_attribute_list = get_nx_attribute_type() assert "NX_FLOAT" in nexus_attribute_list @@ -86,59 +93,57 @@ def test_get_node_at_nxdl_path(): local_dir = os.path.abspath(os.path.dirname(__file__)) nxdl_file_path = os.path.join(local_dir, "../data/dataconverter/NXtest.nxdl.xml") elem = ET.parse(nxdl_file_path).getroot() - node = nexus.get_node_at_nxdl_path("/ENTRY/NXODD_name", elem=elem) + node = get_node_at_nxdl_path("/ENTRY/NXODD_name", elem=elem) assert node.attrib["type"] == "NXdata" assert node.attrib["name"] == "NXODD_name" - node = nexus.get_node_at_nxdl_path("/ENTRY/NXODD_name/float_value", elem=elem) + node = get_node_at_nxdl_path("/ENTRY/NXODD_name/float_value", elem=elem) assert node.attrib["type"] == "NX_FLOAT" assert node.attrib["name"] == "float_value" - node = nexus.get_node_at_nxdl_path( - "/ENTRY/NXODD_name/AXISNAME/long_name", elem=elem - ) + node = get_node_at_nxdl_path("/ENTRY/NXODD_name/AXISNAME/long_name", elem=elem) assert node.attrib["name"] == "long_name" nxdl_file_path = os.path.join(local_dir, "../data/nexus/NXtest2.nxdl.xml") elem = ET.parse(nxdl_file_path).getroot() - node = nexus.get_node_at_nxdl_path( + node = get_node_at_nxdl_path( "/ENTRY/measurement/EVENT_DATA_EM/USER/affiliation", elem=elem ) assert node.attrib["name"] == "affiliation" - node = nexus.get_node_at_nxdl_path("/ENTRY/measurement", elem=elem) + node = get_node_at_nxdl_path("/ENTRY/measurement", elem=elem) assert node.attrib["type"] == "NXevent_data_em_set" - node = nexus.get_node_at_nxdl_path( + node = get_node_at_nxdl_path( "/ENTRY/measurement/EVENT_DATA_EM/SPECTRUM_SET/collection", elem=elem ) assert node.attrib["type"] == "NXdata" - node = nexus.get_node_at_nxdl_path( + node = get_node_at_nxdl_path( "/ENTRY/measurement/EVENT_DATA_EM/SPECTRUM_SET/collection/DATA", elem=elem ) assert node.attrib["type"] == "NX_NUMBER" - node = nexus.get_node_at_nxdl_path( + node = get_node_at_nxdl_path( "/ENTRY/measurement/EVENT_DATA_EM/SPECTRUM_SET/collection/AXISNAME_indices", elem=elem, ) assert node.attrib["name"] == "AXISNAME_indices" - node = nexus.get_node_at_nxdl_path("/ENTRY/COORDINATE_SYSTEM_SET", elem=elem) + node = get_node_at_nxdl_path("/ENTRY/COORDINATE_SYSTEM_SET", elem=elem) assert node.attrib["type"] == "NXcoordinate_system_set" - node = nexus.get_node_at_nxdl_path( + node = get_node_at_nxdl_path( "/ENTRY/COORDINATE_SYSTEM_SET/TRANSFORMATIONS", elem=elem ) assert node.attrib["type"] == "NXtransformations" - node = nexus.get_node_at_nxdl_path( + node = get_node_at_nxdl_path( "/ENTRY/COORDINATE_SYSTEM_SET/TRANSFORMATIONS/AXISNAME", elem=elem ) assert node.attrib["type"] == "NX_NUMBER" - node = nexus.get_node_at_nxdl_path( + node = get_node_at_nxdl_path( "/ENTRY/COORDINATE_SYSTEM_SET/TRANSFORMATIONS/AXISNAME/transformation_type", elem=elem, ) @@ -149,12 +154,12 @@ def test_get_node_at_nxdl_path(): "../../pynxtools/definitions/contributed_definitions/NXiv_temp.nxdl.xml", ) elem = ET.parse(nxdl_file_path).getroot() - node = nexus.get_node_at_nxdl_path( + node = get_node_at_nxdl_path( "/ENTRY/INSTRUMENT/ENVIRONMENT/voltage_controller", elem=elem ) assert node.attrib["name"] == "voltage_controller" - node = nexus.get_node_at_nxdl_path( + node = get_node_at_nxdl_path( "/ENTRY/INSTRUMENT/ENVIRONMENT/voltage_controller/calibration_time", elem=elem ) assert node.attrib["name"] == "calibration_time" @@ -168,17 +173,17 @@ def test_get_inherited_nodes(): "../../pynxtools/definitions/contributed_definitions/NXiv_temp.nxdl.xml", ) elem = ET.parse(nxdl_file_path).getroot() - (_, _, elist) = nexus.get_inherited_nodes( + (_, _, elist) = get_inherited_nodes( nxdl_path="/ENTRY/INSTRUMENT/ENVIRONMENT", elem=elem ) assert len(elist) == 3 - (_, _, elist) = nexus.get_inherited_nodes( + (_, _, elist) = get_inherited_nodes( nxdl_path="/ENTRY/INSTRUMENT/ENVIRONMENT/voltage_controller", elem=elem ) assert len(elist) == 4 - (_, _, elist) = nexus.get_inherited_nodes( + (_, _, elist) = get_inherited_nodes( nxdl_path="/ENTRY/INSTRUMENT/ENVIRONMENT/voltage_controller", nx_name="NXiv_temp", )