updating cluster module and ACOM to use cluster dataset for strain mapping

Serin Lee · Serin Lee · commit 2ca59016c372 · 2025-09-23T10:18:15.000-07:00
diff --git a/py4DSTEM/process/diffraction/crystal_ACOM.py b/py4DSTEM/process/diffraction/crystal_ACOM.py
@@ -2212,8 +2212,6 @@ def calculate_strain(
     deformation tensor which transforms the simulated diffraction pattern
     into the experimental pattern, for all probe positons.
 
-    TODO: add robust fitting?
-
     Parameters
     ----------
     bragg_peaks_array (PointListArray):
@@ -2334,71 +2332,72 @@ def calculate_strain(
                     inds_match[a0] = ind_min
                     keep[a0] = True
 
-            # Get all paired peaks
-            qxy = np.vstack((p.data["qx"][keep], p.data["qy"][keep])).T
-            qxy_ref = np.vstack(
-                (p_ref.data["qx"][inds_match[keep]], p_ref.data["qy"][inds_match[keep]])
-            ).T
+            if np.sum(keep) >= min_num_peaks:
+                # Get all paired peaks
+                qxy = np.vstack((p.data["qx"][keep], p.data["qy"][keep])).T
+                qxy_ref = np.vstack(
+                    (p_ref.data["qx"][inds_match[keep]], p_ref.data["qy"][inds_match[keep]])
+                ).T
 
-            # Fit transformation matrix
-            # Note - not sure about transpose here
-            # (though it might not matter if rotation isn't included)
-            if intensity_weighting:
-                weights = np.sqrt(p.data["intensity"][keep, None]) * 0 + 1
-                m = lstsq(
-                    qxy_ref * weights,
-                    qxy * weights,
-                    rcond=None,
-                )[0].T
-            else:
-                m = lstsq(
-                    qxy_ref,
-                    qxy,
-                    rcond=None,
-                )[0].T
-
-            # Robust fitting
-            if robust:
-                for a0 in range(5):
-                    # calculate new weights
-                    qxy_fit = qxy_ref @ m
-                    diff2 = np.sum((qxy_fit - qxy) ** 2, axis=1)
-
-                    weights = np.exp(
-                        diff2 / ((-2 * robust_thresh**2) * np.median(diff2))
-                    )[:, None]
-                    if intensity_weighting:
-                        weights *= np.sqrt(p.data["intensity"][keep, None])
-
-                    # calculate new fits
+                # Fit transformation matrix
+                # Note - not sure about transpose here
+                # (though it might not matter if rotation isn't included)
+                if intensity_weighting:
+                    weights = np.sqrt(p.data["intensity"][keep, None]) * 0 + 1
                     m = lstsq(
                         qxy_ref * weights,
                         qxy * weights,
                         rcond=None,
                     )[0].T
+                else:
+                    m = lstsq(
+                        qxy_ref,
+                        qxy,
+                        rcond=None,
+                    )[0].T
 
-            # Set values into the infinitesimal strain matrix
-            strain_map.get_slice("e_xx").data[rx, ry] = 1 - m[0, 0]
-            strain_map.get_slice("e_yy").data[rx, ry] = 1 - m[1, 1]
-            strain_map.get_slice("e_xy").data[rx, ry] = -(m[0, 1] + m[1, 0]) / 2.0
-            strain_map.get_slice("theta").data[rx, ry] = (m[0, 1] - m[1, 0]) / 2.0
-
-            # Add finite rotation from ACOM orientation map.
-            # I am not sure about the relative signs here.
-            # Also, maybe I need to add in the mirror operator?
-            if orientation_map.mirror[rx, ry, 0]:
-                strain_map.get_slice("theta").data[rx, ry] += (
-                    orientation_map.angles[rx, ry, 0, 0]
-                    + orientation_map.angles[rx, ry, 0, 2]
-                )
-            else:
-                strain_map.get_slice("theta").data[rx, ry] -= (
-                    orientation_map.angles[rx, ry, 0, 0]
-                    + orientation_map.angles[rx, ry, 0, 2]
-                )
+                # Robust fitting
+                if robust:
+                    for a0 in range(5):
+                        # calculate new weights
+                        qxy_fit = qxy_ref @ m
+                        diff2 = np.sum((qxy_fit - qxy) ** 2, axis=1)
+
+                        weights = np.exp(
+                            diff2 / ((-2 * robust_thresh**2) * np.median(diff2))
+                        )[:, None]
+                        if intensity_weighting:
+                            weights *= np.sqrt(p.data["intensity"][keep, None])
+
+                        # calculate new fits
+                        m = lstsq(
+                            qxy_ref * weights,
+                            qxy * weights,
+                            rcond=None,
+                        )[0].T
+
+                # Set values into the infinitesimal strain matrix
+                strain_map.get_slice("e_xx").data[rx, ry] = 1 - m[0, 0]
+                strain_map.get_slice("e_yy").data[rx, ry] = 1 - m[1, 1]
+                strain_map.get_slice("e_xy").data[rx, ry] = -(m[0, 1] + m[1, 0]) / 2.0
+                strain_map.get_slice("theta").data[rx, ry] = (m[0, 1] - m[1, 0]) / 2.0
+
+                # Add finite rotation from ACOM orientation map.
+                # I am not sure about the relative signs here.
+                # Also, maybe I need to add in the mirror operator?
+                if orientation_map.mirror[rx, ry, 0]:
+                    strain_map.get_slice("theta").data[rx, ry] += (
+                        orientation_map.angles[rx, ry, 0, 0]
+                        + orientation_map.angles[rx, ry, 0, 2]
+                    )
+                else:
+                    strain_map.get_slice("theta").data[rx, ry] -= (
+                        orientation_map.angles[rx, ry, 0, 0]
+                        + orientation_map.angles[rx, ry, 0, 2]
+                    )
 
-        else:
-            strain_map.get_slice("mask").data[rx, ry] = 0.0
+            else:
+                strain_map.get_slice("mask").data[rx, ry] = 0.0
 
     if rotation_range is not None:
         strain_map.get_slice("theta").data[:] = np.mod(
diff --git a/py4DSTEM/process/utils/cluster.py b/py4DSTEM/process/utils/cluster.py
@@ -15,22 +15,49 @@ class Cluster:
     def __init__(
         self,
         datacube,
+        r_space_mask,
     ):
         """
         Args:
-            datacube (py4DSTEM.DataCube):            4D-STEM data
-
+            datacube (py4DSTEM.DataCube):         4D-STEM data
+            r_space_mask (np.ndarray):            Mask in real space to apply background thresholding on the similarity array.
 
         """
-
         self.datacube = datacube
+        self.r_space_mask = r_space_mask
+        self.similarity = None
+        self.similarity_raw = None
+
+    def bg_thresholding(self, r_space_mask,):
+        self.r_space_mask = np.asarray(r_space_mask)
+
+        # if similarity is already computed, apply the thresholding
+        if self.similarity_raw is not None:
+            self.similarity = self._apply_bg_mask(self.similarity_raw)
+
+    def _apply_bg_mask(self, similarity):
+        if self.r_space_mask is None:
+            return similarity
+        return similarity * self.r_space_mask[..., None]
+
 
     def find_similarity(
         self,
-        mask=None,  # by default
+        q_space_mask=None,  
         smooth_sigma = 0,
+        return_similarity = False
     ):
-        # Which neighbors to search
+        
+        """
+        Args:
+            q_space_mask : annular boolean q_space_mask to apply on the diffraction patterns
+            smooth_sigma : sigma for Gaussian smoothing of the diffraction patterns before calculating similarity
+            return_similarity : if True, return the similarity array
+        """
+        if self.r_space_mask is None:
+            self.set_mask(r_space_mask)
+        
+        # List of neighbors to search
         # (-1,-1) will be equivalent to (1,1)
         self.dxy = np.array(
             (
@@ -61,8 +88,8 @@ def find_similarity(
             if smooth_sigma > 0:
                 diff_ref = gaussian_filter(diff_ref,smooth_sigma)
             
-            if mask is not None:
-                diff_ref = diff_ref[mask]
+            if q_space_mask is not None:
+                diff_ref = diff_ref[q_space_mask]
 
             norm_diff_ref = np.sqrt(np.sum(diff_ref * diff_ref))
             # diff_ref_mean = np.mean(diff_ref)
@@ -83,18 +110,23 @@ def find_similarity(
                     if smooth_sigma > 0:
                         diff = gaussian_filter(diff,smooth_sigma)
                     
-                    if mask is not None:
-                        diff = diff[mask]
+                    if q_space_mask is not None:
+                        diff = diff[q_space_mask]
         
-                    # image self.similarity with normalized corr: cosine self.similarity?
+                    # image self.similarity with normalized cosine correlation
                     self.similarity[rx, ry, ind] = (
                         np.sum(diff * diff_ref)
                         / np.sqrt(np.sum(diff * diff))
                         / norm_diff_ref
                     )
 
-                    # self.similarity[rx, ry, ind] = np.mean(np.abs(diff - diff_ref)) / diff_ref_mean
+                    
+        self.similarity_raw = self.similarity.copy()
+        self.similarity = self._apply_bg_mask(self.similarity)
 
+        if return_similarity:
+            return self.similarity
+        
     # Create a function to map cluster index to color
     def get_color(self, cluster_index):
         colors = [
@@ -114,33 +146,27 @@ def get_color(self, cluster_index):
     # Find the pixel with the highest self.similarity and start the clustering from there
     def indexing_clusters_all(
         self,
-        # mask,
         threshold,
     ):
-
-        # self.dxy = np.array(
-        #     (
-        #         (-1, -1),
-        #         (-1, 0),
-        #         (-1, 1),
-        #         (0, -1),
-        #         (1, 1),
-        #         (1, 0),
-        #         (1, -1),
-        #         (0, 1),
-        #     )
-        # )
-
+        """
+        Args:
+            threshold : threshold for similarity to consider two pixels as part of the same cluster
+        """
+        
         sim_averaged = np.mean(self.similarity, axis=2)
 
+        # Assigning the background as 'counted' 
+        sim_averaged[~self.r_space_mask] = -1.0 
+
         # color the pixels with the cluster index
-        # map_cluster = np.zeros((sim_averaged.shape[0],sim_averaged.shape[1]))
         self.cluster_map = -1 * np.ones(
             (sim_averaged.shape[0], sim_averaged.shape[1]), dtype=np.float64
         )
         self.cluster_map_rgb = np.zeros(
             (sim_averaged.shape[0], sim_averaged.shape[1], 4), dtype=np.float64
         )
+       
+        self.cluster_map_rgb[..., 3] = 1.0   #start as opaque black
 
         # store arrays of cluster_indices in a list
         self.cluster_list = []
@@ -156,14 +182,18 @@ def indexing_clusters_all(
             # finding the pixel that has the highest self.similarity among the pixel that hasn't been clustered yet
             # this will be the 'starting pixel' of a new cluster
             rx0, ry0 = np.unravel_index(sim_averaged.argmax(), sim_averaged.shape)
-            # print(rx0, ry0)
+        
+            # Guarding to check if the seed is background
+            if self.r_space_mask is not None and not self.r_space_mask[rx0, ry0]:
+                sim_averaged[rx0, ry0] = -1  # mark processed so we don't pick it again
+                continue
+
 
             cluster_indices = np.empty((0, 2))
             cluster_indices = (np.append(cluster_indices, [[rx0, ry0]], axis=0)).astype(
                 np.int32
             )
 
-            # map_cluster[rx0, ry0] = cluster_count_ind+1
             self.cluster_map[rx0, ry0] = cluster_count_ind
 
             color = self.get_color(cluster_count_ind + 1)
@@ -182,7 +212,7 @@ def indexing_clusters_all(
                         # counter to check if pixel in the cluster are checked for NN
                         counting_added_pixel += 1
 
-                        # set to -1 as its NN will be checked
+                        # set to -1 since now its NN will be checked
                         sim_averaged[rx0, ry0] = -1
 
                         for ind in range(self.dxy.shape[0]):
@@ -194,12 +224,13 @@ def indexing_clusters_all(
                                 x_ind < self.similarity.shape[0] - 2 and \
                                 y_ind < self.similarity.shape[1] - 2:
 
+                                r_ok = True if self.r_space_mask is None else bool(self.r_space_mask[x_ind, y_ind])
+
                                 # add if the neighbor is similar, but don't add if the neighbor is already in a cluster
                                 if self.similarity[rx0, ry0, ind] >= threshold \
-                                    and self.cluster_map[x_ind, y_ind] == -1:
+                                    and self.cluster_map[x_ind, y_ind] == -1 and r_ok:
 
-                                    # print(cluster_indices)
-                                    # print([[x_ind, y_ind]])
+                                    
                                     cluster_indices = np.append(
                                         cluster_indices, [[x_ind, y_ind]], axis=0
                                     )
@@ -217,9 +248,9 @@ def indexing_clusters_all(
                 if counting_added_pixel == 0:
                     break
 
-            # # single pixel cluster
+            # single pixel cluster
             # if cluster_indices.shape[0] == 1:
-            #     self.cluster_map[cluster_indices[0, 0], cluster_indices[0, 1]] = [
+            #     self.cluster_map_rgb[cluster_indices[0, 0], cluster_indices[0, 1]] = [
             #         0,
             #         0,
             #         0,
@@ -229,7 +260,7 @@ def indexing_clusters_all(
             self.cluster_list.append(cluster_indices)
             cluster_count_ind += 1
 
-        # return cluster_count_ind, self.cluster_list, map_cluster, sim_averaged
+        # return cluster_count_ind, self.cluster_list, self.cluster_map, self.cluster_map_rgb
 
     def create_cluster_cube(
         self,