Merge branch 'diffraction' into dev

serinlee1065 · web-flow · commit c5fa00d5e175 · 2025-05-14T13:08:22.000-07:00
diff --git a/README.md b/README.md
@@ -1,3 +1,5 @@
+this is a test -- 
+
 
 > :warning: **py4DSTEM version 0.14 update** :warning: Warning: this is a major update and we expect some workflows to break.  You can still install previous versions of py4DSTEM [as discussed here](#legacyinstall)
 
diff --git a/py4DSTEM/process/__init__.py b/py4DSTEM/process/__init__.py
@@ -19,3 +19,6 @@
 except (ImportError, ModuleNotFoundError) as exc:
     if not is_package_lite:
         raise exc
+
+from py4DSTEM.process.utils import Cluster
+
diff --git a/py4DSTEM/process/diffraction/crystal.py b/py4DSTEM/process/diffraction/crystal.py
@@ -97,7 +97,7 @@ def __init__(
         # [a a a 90 90 90]
         # [a b c 90 90 90]
         # [a b c alpha beta gamma]
-        cell = np.asarray(cell, dtype="float_")
+        cell = np.array(cell, dtype="float")
         if np.size(cell) == 1:
             self.cell = np.hstack([cell, cell, cell, 90, 90, 90])
         elif np.size(cell) == 3:
@@ -653,7 +653,7 @@ def calculate_structure_factors(
         # Calculate single atom scattering factors
         # Note this can be sped up a lot, but we may want to generalize to allow non-1.0 occupancy in the future.
         f_all = np.zeros(
-            (np.size(self.g_vec_leng, 0), self.positions.shape[0]), dtype="float_"
+            (np.size(self.g_vec_leng, 0), self.positions.shape[0]), dtype="float"
         )
         for a0 in range(self.positions.shape[0]):
             atom_sf = single_atom_scatter([self.numbers[a0]], [1], self.g_vec_leng, "A")
diff --git a/py4DSTEM/process/utils/__init__.py b/py4DSTEM/process/utils/__init__.py
@@ -4,6 +4,7 @@
 from py4DSTEM.process.utils.elliptical_coords import *
 from py4DSTEM.process.utils.masks import *
 from py4DSTEM.process.utils.single_atom_scatter import *
+from py4DSTEM.process.utils.cluster import Cluster
 
 # from preprocessing
 from py4DSTEM.preprocess.utils import (
diff --git a/py4DSTEM/process/utils/cluster.py b/py4DSTEM/process/utils/cluster.py
@@ -0,0 +1,242 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import pymatgen
+from scipy.ndimage import binary_erosion
+from py4DSTEM.process.utils import tqdmnd
+from scipy.ndimage import gaussian_filter
+
+
+class Cluster:
+    """
+    Clustering 4D data
+
+    """
+
+    def __init__(
+        self,
+        datacube,
+    ):
+        """
+        Args:
+            datacube (py4DSTEM.DataCube):            4D-STEM data
+
+
+        """
+
+        self.datacube = datacube
+
+    def find_similarity(
+        self,
+        mask=None,  # by default
+    ):
+        # Which neighbors to search
+        # (-1,-1) will be equivalent to (1,1)
+        self.dxy = np.array(
+            (
+                (-1, -1),
+                (-1, 0),
+                (-1, 1),
+                (0, -1),
+                (1, 1),
+                (1, 0),
+                (1, -1),
+                (0, 1),
+            )
+        )
+
+        # initialize the self.similarity array
+        self.similarity = -1 * np.ones(
+            (self.datacube.shape[0], self.datacube.shape[1], self.dxy.shape[0])
+        )
+
+        # Loop over probe positions
+        for rx, ry in tqdmnd(
+            range(self.datacube.shape[0]),
+            range(self.datacube.shape[1]),
+        ):
+            if mask is None:
+                diff_ref = self.datacube[rx, ry]
+            else:
+                diff_ref = self.datacube[rx, ry][mask]
+
+            # loop over neighbors
+            for ind in range(self.dxy.shape[0]):
+                x_ind = rx + self.dxy[ind, 0]
+                y_ind = ry + self.dxy[ind, 1]
+                if (
+                    x_ind >= 0
+                    and y_ind >= 0
+                    and x_ind < self.datacube.shape[0]
+                    and y_ind < self.datacube.shape[1]
+                ):
+
+                    if mask is None:
+                        diff = self.datacube[x_ind, y_ind]
+                    else:
+                        diff = self.datacube[x_ind, y_ind][mask]
+
+                    # # image self.similarity with mean abs difference
+                    # self.similarity[rx,ry,ind] = np.mean(
+                    #     np.abs(
+                    #         diff - diff_ref
+                    #     )
+                    # )
+
+                    # image self.similarity with normalized corr: cosine self.similarity?
+                    self.similarity[rx, ry, ind] = (
+                        np.sum(diff * diff_ref)
+                        / np.sqrt(np.sum(diff * diff))
+                        / np.sqrt(np.sum(diff_ref * diff_ref))
+                    )
+
+    # Create a function to map cluster index to color
+    def get_color(self, cluster_index):
+        colors = [
+            "slategray",
+            "lightcoral",
+            "gold",
+            "darkorange",
+            "yellowgreen",
+            "lightseagreen",
+            "cornflowerblue",
+            "royalblue",
+            "lightsteelblue",
+            "darkseagreen",
+        ]
+        return colors[(cluster_index - 1) % len(colors)]
+
+    # Find the pixel with the highest self.similarity and start the clustering from there
+    def indexing_clusters_all(
+        self,
+        mask,
+        threshold,
+    ):
+
+        self.dxy = np.array(
+            (
+                (-1, -1),
+                (-1, 0),
+                (-1, 1),
+                (0, -1),
+                (1, 1),
+                (1, 0),
+                (1, -1),
+                (0, 1),
+            )
+        )
+
+        sim_averaged = np.mean(self.similarity, axis=2)
+
+        # color the pixels with the cluster index
+        # map_cluster = np.zeros((sim_averaged.shape[0],sim_averaged.shape[1]))
+        self.cluster_map = np.zeros(
+            (sim_averaged.shape[0], sim_averaged.shape[1], 4), dtype=np.float64
+        )
+
+        # store arrays of cluster_indices in a list
+        self.cluster_list = []
+
+        # incides of pixel in a cluster
+        cluster_indices = np.empty((0, 2))
+
+        # Loop over pixels until no new pixel is found (sim_averaged is set to -1 if it is alreaddy serached for NN)
+        cluster_count_ind = 0
+
+        while np.any(sim_averaged != -1):
+
+            # finding the pixel that has the highest self.similarity among the pixel that hasn't been clustered yet
+            # this will be the 'starting pixel' of a new cluster
+            rx0, ry0 = np.unravel_index(sim_averaged.argmax(), sim_averaged.shape)
+            # print(rx0, ry0)
+
+            cluster_indices = np.empty((0, 2))
+            cluster_indices = (np.append(cluster_indices, [[rx0, ry0]], axis=0)).astype(
+                np.int32
+            )
+
+            # map_cluster[rx0, ry0] = cluster_count_ind+1
+            color = self.get_color(cluster_count_ind + 1)
+            self.cluster_map[rx0, ry0] = plt.cm.colors.to_rgba(color)
+
+            # Clustering: one cluster per while loop(until it breaks)
+            # Marching algorithm: find a new position and search the nearest neighbor
+
+            while True:
+                counting_added_pixel = 0
+
+                for rx0, ry0 in cluster_indices:
+
+                    if sim_averaged[rx0, ry0] != -1:
+
+                        # counter to check if pixel in the cluster are checked for NN
+                        counting_added_pixel += 1
+
+                        # set to -1 as its NN will be checked
+                        sim_averaged[rx0, ry0] = -1
+
+                        for ind in range(self.dxy.shape[0]):
+                            x_ind = rx0 + self.dxy[ind, 0]
+                            y_ind = ry0 + self.dxy[ind, 1]
+
+                            # add if the neighbor is similar, but don't add if the neighbor is already in a cluster
+                            if self.similarity[
+                                rx0, ry0, ind
+                            ] > threshold and np.array_equal(
+                                self.cluster_map[x_ind, y_ind], [0, 0, 0, 0]
+                            ):
+
+                                cluster_indices = np.append(
+                                    cluster_indices, [[x_ind, y_ind]], axis=0
+                                )
+                                # self.cluster_map[x_ind, y_ind] = cluster_count_ind+1
+                                color = self.get_color(cluster_count_ind + 1)
+                                self.cluster_map[x_ind, y_ind] = plt.cm.colors.to_rgba(
+                                    color
+                                )
+
+                # if no new pixel is checked for NN then break
+                if counting_added_pixel == 0:
+                    break
+
+            # single pixel cluster
+            if cluster_indices.shape[0] == 1:
+                self.cluster_map[cluster_indices[0, 0], cluster_indices[0, 1]] = [
+                    0,
+                    0,
+                    0,
+                    1,
+                ]
+
+            self.cluster_list.append(cluster_indices)
+            cluster_count_ind += 1
+
+        # return cluster_count_ind, self.cluster_list, map_cluster, sim_averaged
+
+    def create_cluster_cube(
+        self,
+        min_cluster_size,
+        return_cluster_datacube=False,
+    ):
+
+        self.filtered_cluster_list = [
+            arr for arr in self.cluster_list if arr.shape[0] >= min_cluster_size
+        ]
+
+        # datacube [i,j,k,l] where i is the index of the cluster, and j is a place holder, and k,l are the average diffraction pattern of the
+        self.cluster_cube = np.empty(
+            [
+                len(self.filtered_cluster_list),
+                1,
+                self.datacube.shape[2],
+                self.datacube.shape[3],
+            ]
+        )
+
+        for i in tqdmnd(range(len(self.filtered_cluster_list))):
+            self.cluster_cube[i, 0] = self.datacube[
+                np.array(self.filtered_cluster_list[i])[:, 0],
+                np.array(self.filtered_cluster_list[i])[:, 1],
+            ].mean(axis=0)
+
+        if return_cluster_datacube:
+            return self.cluster_cube, self.filtered_cluster_list
diff --git a/py4DSTEM/process/utils/elliptical_coords.py b/py4DSTEM/process/utils/elliptical_coords.py
@@ -320,8 +320,10 @@ def elliptical_resample(
 
     # Get (qx,qy) corresponding to the coordinates distorted by the ellipse
     xr, yr = np.mgrid[0:Nx, 0:Ny]
-    xr0 = xr.astype(np.float64) - qx0
-    yr0 = yr.astype(np.float64) - qy0
+
+    xr0 = xr.astype(np.float) - qx0
+    yr0 = yr.astype(np.float) - qy0
+
     xr = xr0 * np.cos(-theta) - yr0 * np.sin(-theta)
     yr = xr0 * np.sin(-theta) + yr0 * np.cos(-theta)
     qx = qx0 + xr * np.cos(theta) - yr * (b / a) * np.sin(theta)

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,5 @@`
	`1`	`+this is a test --`
	`2`	`+`
`1`	`3`
`2`	`4`	`> :warning: py4DSTEM version 0.14 update :warning: Warning: this is a major update and we expect some workflows to break. You can still install previous versions of py4DSTEM [as discussed here](#legacyinstall)`
`3`	`5`