eisenforschung · mkuehbach · Feb 7, 2025 · Apr 7, 2025
diff --git a/compositionspace/autophase.py b/compositionspace/autophase.py
@@ -73,9 +73,10 @@ def automated_phase_assignment(self):
         if self.verbose:
             print(f"sorted_indices {sorted_indices} in decreasing feature importance")
             print(f"sorted_index, feature_importance[sorted_index]")
-            for idx in sorted_indices:
-                descending_importances.append(feature_importances[idx])
-                print(f"{idx}, {feature_importances[idx]}")
+        for idx in sorted_indices:
+            descending_importances.append(feature_importances[idx])
+            if self.verbose:
+                print(f"{idx}, {feature_importances[idx]}, {descending_importances[-1]}")
         del feature_importances
 
         h5w = h5py.File(self.config["results_file_path"], "a")
@@ -86,12 +87,12 @@ def automated_phase_assignment(self):
         trg = f"/entry{self.config['entry_id']}/autophase/result"
         grp = h5w.create_group(trg)
         grp.attrs["NX_class"] = "NXdata"
-        grp.attrs["axes"] = "axis_feature_identifier"
-        grp.attrs["axis_feature_identifier_indices"] = np.uint64(0)
+        grp.attrs["axes"] = "axis_feature_indices"
+        grp.attrs["axis_feature_indices_indices"] = np.uint64(0)
         grp.attrs["signal"] = "axis_feature_importance"
         # further attributes, to render it a proper NeXus NXdata object
         dst = h5w.create_dataset(
-            f"{trg}/axis_feature_identifier",
+            f"{trg}/axis_feature_indices",
             compression="gzip",
             compression_opts=1,
             data=np.asarray(sorted_indices, APT_UINT),

diff --git a/compositionspace/clustering.py b/compositionspace/clustering.py
@@ -61,7 +61,7 @@ def run(self):
         dst = h5w.create_dataset(f"{trg}/sequence_index", data=np.uint64(sequence_idx))
         trg = f"/entry{self.config['entry_id']}/clustering/ic_opt"
         grp = h5w.create_group(trg)
-        grp.attrs["NX_class"] = "NXobject"
+        grp.attrs["NX_class"] = "NXprocess"
         h5w.close()
 
         # n_ic_runs = sum(1 for grpnm in ic_results_group_names if grpnm.startswith("cluster_analysis"))

diff --git a/compositionspace/meshing.py b/compositionspace/meshing.py
@@ -51,7 +51,7 @@ def meshes_normals(config_file_path, input_file_path, output_file_path):
     # and other parameter from yaml config file like shown in other *.py files
     Node_list_plot = []
     no_elements = config["voxelization/edge_length"]
-    dist_cut = config["meshing/dist_cut"]
+    dist_cut = config["meshing/distance_cut"]
     normal_end_length = config["meshing/normal_end_length"]
     nodes_edit_lst = []
     simp_plot_edit_lst = []

diff --git a/compositionspace/preparation.py b/compositionspace/preparation.py
@@ -17,6 +17,7 @@
 )
 from compositionspace.utils import (
     APT_UINT,
+    get_sha256,
     ceil_to_multiple,
     floor_to_multiple,
     get_chemical_element_multiplicities,
@@ -88,6 +89,14 @@ def write_init_results(self):
         dst.attrs["url"] = (
             f"https://github.com/eisenforschung/CompositionSpace/releases/tag/{__version__}"
         )
+        trg = f"/entry{self.config['entry_id']}/config"
+        grp = h5w.create_group(trg)
+        grp.attrs["NX_class"] = "NXnote"
+        dst = h5w.create_dataset(f"{trg}/type", data="file")
+        dst = h5w.create_dataset(f"{trg}/file_name", data=self.config["config_file_path"])
+        dst = h5w.create_dataset(f"{trg}/checksum", data=get_sha256(self.config["config_file_path"]))
+        dst = h5w.create_dataset(f"{trg}/algorithm", data="sha256")
+
         h5w.close()
 
     def define_voxelization_grid(self, xyz):
@@ -228,12 +237,12 @@ def write_voxelization_grid_info(self):
         )
         dst.attrs["units"] = "nm"
         dst = h5w.create_dataset(f"{trg}/extent", data=self.extent)
-        identifier_offset = 0  # we count cells starting from this value
+        index_offset = 0  # we count cells starting from this value
         dst = h5w.create_dataset(
-            f"{trg}/identifier_offset", data=np.uint64(identifier_offset)
+            f"{trg}/index_offset", data=np.uint64(index_offset)
         )
 
-        voxel_id = identifier_offset
+        voxel_id = index_offset
         position = np.zeros([c, 3], np.float64)
         for k in np.arange(0, self.extent[2]):
             z = self.aabb3d[2, 0] + (0.5 + k) * dedge
@@ -249,7 +258,7 @@ def write_voxelization_grid_info(self):
         dst.attrs["units"] = "nm"
         del position
 
-        voxel_id = identifier_offset
+        voxel_id = index_offset
         coordinate = np.zeros([c, 3], np.uint64)
         for k in np.arange(0, self.extent[2]):
             for j in np.arange(0, self.extent[1]):
@@ -286,7 +295,7 @@ def write_voxelization_results(self):
         h5w = h5py.File(self.config["results_file_path"], "a")
         trg = f"/entry{self.config['entry_id']}/voxelization/cg_grid"
         dst = h5w.create_dataset(
-            f"{trg}/voxel_identifier",
+            f"{trg}/indices_voxel",
             compression="gzip",
             compression_opts=1,
             data=self.voxel_identifier,
@@ -308,12 +317,14 @@ def write_voxelization_results(self):
                     # alternatively, one could make two loops where in the first an offset lookup table is generated
                     # after this point one can drop the iontype and evap_id columns from the lu_ityp_voxel_id_evap_id lookup table
 
+        atom_types = set()
         for symbol in elem_cnts:
             # atom/molecular ion-type-specific contribution/intensity/count in each voxel/cell
             trg = f"/entry{self.config['entry_id']}/voxelization/element{elem_id[symbol] + 1}"
             print(f"{trg}, {symbol}")
             grp = h5w.create_group(f"{trg}")
             grp.attrs["NX_class"] = "NXion"
+            atom_types.add(str(symbol))
             dst = h5w.create_dataset(f"{trg}/name", data=str(symbol))
             dst = h5w.create_dataset(
                 f"{trg}/weight",
@@ -332,6 +343,17 @@ def write_voxelization_results(self):
         dst = h5w.create_dataset(
             f"{trg}/weight", compression="gzip", compression_opts=1, data=total_cnts
         )
+
+        # specimen group
+        if "specimen/is_simulation" in self.config:
+            trg = f"/entry{self.config['entry_id']}/specimen"
+            grp = h5w.create_group(f"{trg}")
+            grp.attrs["NX_class"] = "NXsample"
+            if self.config["specimen/is_simulation"] is True:
+                dst = h5w.create_dataset(f"{trg}/is_simulation", data=True)
+            else:
+                dst = h5w.create_dataset(f"{trg}/is_simulation", data=False)
+            dst = h5w.create_dataset(f"{trg}/atom_types", data=str(", ".join(list(atom_types))))
         h5w.close()
 
     def run(self, recon_file_path: str, range_file_path: str):

diff --git a/compositionspace/segmentation.py b/compositionspace/segmentation.py
@@ -110,15 +110,15 @@ def perform_bics_minimization_and_write_results(self):
         aics = []
         bics = []
         n_clusters_queue = list(
-            range(1, self.config["segmentation/n_max_ic_cluster"] + 1)
+            range(1, self.config["segmentation/ic_opt/n_max_ic_cluster"] + 1)
         )
         for n_bics_cluster in n_clusters_queue:
             X_train = None
             C_mod = None
             if self.config["autophase/use"]:
                 print("Using results with automated phase assignment")
                 with h5py.File(self.config["results_file_path"], "r") as h5r:
-                    trg = f"/entry{self.config['entry_id']}/autophase/result/axis_feature_identifier"
+                    trg = f"/entry{self.config['entry_id']}/autophase/result/axis_feature_indices"
                     if trg in h5r:
                         descending_indices = h5r[trg][:]
                         # print(descending_indices)
@@ -193,8 +193,8 @@ def perform_bics_minimization_and_write_results(self):
         axis_dim = np.asarray(
             np.linspace(
                 1,
-                self.config["segmentation/n_max_ic_cluster"],
-                num=self.config["segmentation/n_max_ic_cluster"],
+                self.config["segmentation/ic_opt/n_max_ic_cluster"],
+                num=self.config["segmentation/ic_opt/n_max_ic_cluster"],
                 endpoint=True,
             ),
             APT_UINT,

diff --git a/compositionspace/utils.py b/compositionspace/utils.py
@@ -3,6 +3,7 @@
 import os
 import numpy as np
 import h5py
+import hashlib
 from ase.data import chemical_symbols
 
 
@@ -12,6 +13,19 @@
 PRNG_SEED = 42
 
 
+def get_sha256(file_path: str) -> str:
+    """Compute SHA256 checksum of file."""
+    fh = hashlib.sha256()
+    if os.path.exists(file_path):
+        with open(file_path, 'rb') as fp:
+            while True:
+                chunk = fp.read(fh.block_size)
+                if not chunk:
+                    break
+                fh.update(chunk)
+    return f"{fh.hexdigest()}"
+
+
 def ceil_to_multiple(number, multiple):
     return multiple * np.ceil(number / multiple)
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -11,9 +11,8 @@ authors = [
 description = "APT analysis tools"
 readme = "README.md"
 license = { file = "LICENSE" }
-requires-python = ">=3.7"
+requires-python = ">=3.8"
 classifiers = [
-    "Programming Language :: Python :: 3.7",
     "Programming Language :: Python :: 3.8",
     "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
@@ -26,7 +25,7 @@ dependencies = [
     "h5py",
     "scikit-learn",
     "trimesh",
-    "ifes_apt_tc_data_modeling>=0.2.1",
+    "ifes_apt_tc_data_modeling>=0.2.2",
     "pyyaml",
     "flatdict",
     "lxml",

diff --git a/tests/CompositionSpace.Results.1.nxs.xdmf b/tests/CompositionSpace.Results.1.nxs.xdmf
@@ -0,0 +1,46 @@
+<?xml version='1.0' encoding='utf-8'?>
+<!DOCTYPE Xdmf SYSTEM "Xdmf.dtd" []>
+<Xdmf Version="2.0">
+  <Domain>
+    <Grid Name="entry1/voxelization/cg_grid" GridType="Uniform">
+      <Topology TopologyType="3DCoRectMesh" NumberOfElements="80 44 45"/>
+      <Geometry GeometryType="ORIGIN_DXDYDZ">
+        <DataItem Format="HDF" Dimensions="3" NumberType="Float" Precision="8" Name="Origin">/home/kaiobach/Research/hu_hu_hu/sprint26/compspace/CompositionSpace/tests/CompositionSpace.Results.1.nxs:/entry1/voxelization/cg_grid/origin</DataItem>
+        <DataItem Format="HDF" Dimensions="3" NumberType="Float" Precision="8" Name="Spacing">/home/kaiobach/Research/hu_hu_hu/sprint26/compspace/CompositionSpace/tests/CompositionSpace.Results.1.nxs:/entry1/voxelization/cg_grid/cell_dimensions</DataItem>
+      </Geometry>
+      <Attribute Name="total, weight" AttributeType="Scalar" Center="Node">
+        <DataItem Format="HDF" Dimensions="45 44 80" NumberType="UInt" Precision="8">/home/kaiobach/Research/hu_hu_hu/sprint26/compspace/CompositionSpace/tests/CompositionSpace.Results.1.nxs:/entry1/voxelization/weight</DataItem>
+      </Attribute>
+      <Attribute Name="C, 1, weight" AttributeType="Scalar" Center="Node">
+        <DataItem Format="HDF" Dimensions="45 44 80" NumberType="UInt" Precision="8">/home/kaiobach/Research/hu_hu_hu/sprint26/compspace/CompositionSpace/tests/CompositionSpace.Results.1.nxs:/entry1/voxelization/element1/weight</DataItem>
+      </Attribute>
+      <Attribute Name="Cr, 2, weight" AttributeType="Scalar" Center="Node">
+        <DataItem Format="HDF" Dimensions="45 44 80" NumberType="UInt" Precision="8">/home/kaiobach/Research/hu_hu_hu/sprint26/compspace/CompositionSpace/tests/CompositionSpace.Results.1.nxs:/entry1/voxelization/element2/weight</DataItem>
+      </Attribute>
+      <Attribute Name="Cu, 3, weight" AttributeType="Scalar" Center="Node">
+        <DataItem Format="HDF" Dimensions="45 44 80" NumberType="UInt" Precision="8">/home/kaiobach/Research/hu_hu_hu/sprint26/compspace/CompositionSpace/tests/CompositionSpace.Results.1.nxs:/entry1/voxelization/element3/weight</DataItem>
+      </Attribute>
+      <Attribute Name="O, 4, weight" AttributeType="Scalar" Center="Node">
+        <DataItem Format="HDF" Dimensions="45 44 80" NumberType="UInt" Precision="8">/home/kaiobach/Research/hu_hu_hu/sprint26/compspace/CompositionSpace/tests/CompositionSpace.Results.1.nxs:/entry1/voxelization/element4/weight</DataItem>
+      </Attribute>
+      <Attribute Name="Si, 5, weight" AttributeType="Scalar" Center="Node">
+        <DataItem Format="HDF" Dimensions="45 44 80" NumberType="UInt" Precision="8">/home/kaiobach/Research/hu_hu_hu/sprint26/compspace/CompositionSpace/tests/CompositionSpace.Results.1.nxs:/entry1/voxelization/element5/weight</DataItem>
+      </Attribute>
+      <Attribute Name="ic_opt, 0, y_pred" AttributeType="Scalar" Center="Node">
+        <DataItem Format="HDF" Dimensions="45 44 80" NumberType="UInt" Precision="8">/home/kaiobach/Research/hu_hu_hu/sprint26/compspace/CompositionSpace/tests/CompositionSpace.Results.1.nxs:/entry1/segmentation/ic_opt/cluster_analysis0/y_pred</DataItem>
+      </Attribute>
+      <Attribute Name="ic_opt, 1, y_pred" AttributeType="Scalar" Center="Node">
+        <DataItem Format="HDF" Dimensions="45 44 80" NumberType="UInt" Precision="8">/home/kaiobach/Research/hu_hu_hu/sprint26/compspace/CompositionSpace/tests/CompositionSpace.Results.1.nxs:/entry1/segmentation/ic_opt/cluster_analysis1/y_pred</DataItem>
+      </Attribute>
+      <Attribute Name="ic_opt, 2, y_pred" AttributeType="Scalar" Center="Node">
+        <DataItem Format="HDF" Dimensions="45 44 80" NumberType="UInt" Precision="8">/home/kaiobach/Research/hu_hu_hu/sprint26/compspace/CompositionSpace/tests/CompositionSpace.Results.1.nxs:/entry1/segmentation/ic_opt/cluster_analysis2/y_pred</DataItem>
+      </Attribute>
+      <Attribute Name="ic_opt, 3, y_pred" AttributeType="Scalar" Center="Node">
+        <DataItem Format="HDF" Dimensions="45 44 80" NumberType="UInt" Precision="8">/home/kaiobach/Research/hu_hu_hu/sprint26/compspace/CompositionSpace/tests/CompositionSpace.Results.1.nxs:/entry1/segmentation/ic_opt/cluster_analysis3/y_pred</DataItem>
+      </Attribute>
+      <Attribute Name="ic_opt, 4, y_pred" AttributeType="Scalar" Center="Node">
+        <DataItem Format="HDF" Dimensions="45 44 80" NumberType="UInt" Precision="8">/home/kaiobach/Research/hu_hu_hu/sprint26/compspace/CompositionSpace/tests/CompositionSpace.Results.1.nxs:/entry1/segmentation/ic_opt/cluster_analysis4/y_pred</DataItem>
+      </Attribute>
+    </Grid>
+  </Domain>
+</Xdmf>