Lucaslab-Berkeley
diff --git a/‎src/leopard_em/analysis/__init__.py‎
Lines changed: 4 additions & 1 deletion b/‎src/leopard_em/analysis/__init__.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎src/leopard_em/backend/core_match_template.py‎
Lines changed: 101 additions & 69 deletions b/‎src/leopard_em/backend/core_match_template.py‎
Lines changed: 101 additions & 69 deletions
diff --git a/‎src/leopard_em/backend/core_match_template_distributed.py‎
Lines changed: 20 additions & 16 deletions b/‎src/leopard_em/backend/core_match_template_distributed.py‎
Lines changed: 20 additions & 16 deletions
@@ -6,7 +6,10 @@
     match_template_peaks_to_dict,
 )
 from .pvalue_metric import extract_peaks_and_statistics_p_value
-from .zscore_metric import extract_peaks_and_statistics_zscore, gaussian_noise_zscore_cutoff
+from .zscore_metric import (
+    extract_peaks_and_statistics_zscore,
+    gaussian_noise_zscore_cutoff,
+)
 
 __all__ = [
     "MatchTemplatePeaks",
 
@@ -4,19 +4,21 @@
 # pylint: disable=E1102
 
 import time
+import traceback
 import warnings
 from functools import partial
 from multiprocessing import set_start_method
 from typing import Any, Union
 
 import roma
+import tensordict
 import torch
 import tqdm
 
 from leopard_em.backend.cross_correlation import (
     do_batched_orientation_cross_correlate,
-    do_streamed_orientation_cross_correlate,
     do_batched_orientation_cross_correlate_zipfft,
+    do_streamed_orientation_cross_correlate,
 )
 from leopard_em.backend.distributed import (
     MultiprocessWorkIndexQueue,
@@ -25,17 +27,20 @@
 from leopard_em.backend.process_results import (
     aggregate_distributed_results,
     decode_global_search_index,
+    process_correlation_table,
     scale_mip,
 )
-from leopard_em.backend.utils import do_iteration_statistics_updates_compiled
+from leopard_em.backend.utils import do_iteration_and_correlation_table_updates
 
 DEFAULT_STATISTIC_DTYPE = torch.float32
+CORRELATION_TABLE_THRESHOLD = 5.5
 
 # Turn off gradient calculations by default
 torch.set_grad_enabled(False)
 
 # Set multiprocessing start method to spawn
 set_start_method("spawn", force=True)
+torch.multiprocessing.set_sharing_strategy("file_system")
 
 
 def monitor_match_template_progress(
@@ -78,6 +83,7 @@ def monitor_match_template_progress(
             time.sleep(poll_interval)
     except Exception as e:
         print(f"Error occurred: {e}")
+        traceback.print_exc()
         queue.set_error_flag()
         raise e
     finally:
@@ -156,7 +162,7 @@ def core_match_template(
     orientation_batch_size: int = 1,
     num_cuda_streams: int = 1,
     backend: str = "streamed",
-) -> dict[str, torch.Tensor]:
+) -> dict[str, torch.Tensor | dict | int]:
     """Core function for performing the whole-orientation search.
 
     With the RFFT, the last dimension (fastest dimension) is half the width
@@ -213,7 +219,7 @@ def core_match_template(
 
     Returns
     -------
-    dict[str, torch.Tensor]
+    dict[str, torch.Tensor | dict | int]
         Dictionary containing the following key, value pairs:
 
             - "mip": Maximum intensity projection of the cross-correlation values across
@@ -223,10 +229,12 @@ def core_match_template(
             - "best_theta": Best theta angle for each pixel.
             - "best_psi": Best psi angle for each pixel.
             - "best_defocus": Best defocus value for each pixel.
-            - "best_pixel_size": Best pixel size value for each pixel.
-            - "correlation_sum": Sum of cross-correlation values for each pixel.
-            - "correlation_squared_sum": Sum of squared cross-correlation values for
+            - "correlation_mean": Sum of cross-correlation values for each pixel.
+            - "correlation_variance": Sum of squared cross-correlation values for
+            - "correlation_table": Processed correlation table with all points in search
+              space and image positions where correlation value exceeded a threshold.
               each pixel.
+            - "total_projections": Total number of cross-correlations computed.
             - "total_orientations": Total number of orientations searched.
             - "total_defocus": Total number of defocus values searched.
     """
@@ -328,7 +336,7 @@ def core_match_template(
     correlation_squared_sum = aggregated_results["correlation_squared_sum"]
 
     # Map from global search index to the best defocus & angles
-    best_phi, best_theta, best_psi, best_defocus = decode_global_search_index(
+    best_phi, best_theta, best_psi, best_defocus, _ = decode_global_search_index(
         best_global_index, pixel_values, defocus_values, euler_angles
     )
 
@@ -341,6 +349,14 @@ def core_match_template(
         total_correlation_positions=total_projections,
     )
 
+    # Process the correlation table into a more interpretable format
+    correlation_table = process_correlation_table(
+        aggregated_results["correlation_table"],
+        pixel_values,
+        defocus_values,
+        euler_angles,
+    )
+
     return {
         "mip": mip,
         "scaled_mip": mip_scaled,
@@ -350,6 +366,7 @@ def core_match_template(
         "best_defocus": best_defocus,
         "correlation_mean": correlation_mean,
         "correlation_variance": correlation_variance,
+        "correlation_table": correlation_table,
         "total_projections": total_projections,
         "total_orientations": euler_angles.shape[0],
         "total_defocus": defocus_values.shape[0],
@@ -372,7 +389,9 @@ def _core_match_template_single_gpu(
     num_cuda_streams: int,
     backend: str,
     device: torch.device,
-) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+) -> tuple[
+    torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, tensordict.TensorDict
+]:
     """Single-GPU call for template matching.
 
     Parameters
@@ -422,11 +441,17 @@ def _core_match_template_single_gpu(
             - correlation_sum: Sum of cross-correlation values for each pixel.
             - correlation_squared_sum: Sum of squared cross-correlation values for
               each pixel.
+            - correlation_table: Table of search indices and image positions where
+              correlation values exceeded a threshold.
     """
     image_shape_real = (image_dft.shape[0], image_dft.shape[1] * 2 - 2)  # adj. for RFFT
-    cross_correlation_shape_valid = (
-        image_shape_real[0] - template_dft.shape[1] + 1,
-        image_shape_real[1] - (template_dft.shape[2] * 2 - 2) + 1,
+    projection_shape_real = (
+        template_dft.shape[1],
+        template_dft.shape[2] * 2 - 2,  # adj. for RFFT
+    )
+    valid_correlation_shape = (
+        image_shape_real[0] - projection_shape_real[0] + 1,
+        image_shape_real[1] - projection_shape_real[1] + 1,
     )
 
     # Create CUDA streams for parallel computation
@@ -459,52 +484,52 @@ def _core_match_template_single_gpu(
     ### Initialize the tracked output statistics ###
     ################################################
 
+    # Correlation table built from 'tensordict' library where any (x, y) positions
+    # in correlation map which surpass the threshold will be added to the table.
+    # Keys in table are:
+    #   - "threshold": float threshold value used for the table.
+    #   - "global_idx": int32 global search index.
+    #   - "pos_x": int32 x position in image where corr value surpassed threshold.
+    #   - "pos_y": int32 y position in image where corr value surpassed threshold.
+    #   - "corr_value": float32 correlation value at (pos_x, pos_y) for the given
+    #                   global index.
+    correlation_table = tensordict.TensorDict(
+        {
+            "threshold": CORRELATION_TABLE_THRESHOLD,
+            "global_idx": torch.tensor([], dtype=torch.int32, device=device),
+            "pos_x": torch.tensor([], dtype=torch.int32, device=device),
+            "pos_y": torch.tensor([], dtype=torch.int32, device=device),
+            "corr_value": torch.tensor([], dtype=torch.float32, device=device),
+        },
+        device=device,
+    )
+    mip = torch.full(
+        size=valid_correlation_shape,
+        fill_value=-float("inf"),
+        dtype=DEFAULT_STATISTIC_DTYPE,
+        device=device,
+    )
+    best_global_index = torch.full(
+        valid_correlation_shape,
+        fill_value=-1,
+        dtype=torch.int32,
+        device=device,
+    )
+    correlation_sum = torch.zeros(
+        size=valid_correlation_shape,
+        dtype=DEFAULT_STATISTIC_DTYPE,
+        device=device,
+    )
+    correlation_squared_sum = torch.zeros(
+        size=valid_correlation_shape,
+        dtype=DEFAULT_STATISTIC_DTYPE,
+        device=device,
+    )
     if backend == "zipfft":
-        mip = torch.full(
-            size=cross_correlation_shape_valid,
-            fill_value=-float("inf"),
-            dtype=DEFAULT_STATISTIC_DTYPE,
-            device=device,
-        )
-        best_global_index = torch.full(
-            cross_correlation_shape_valid,
-            fill_value=-1,
-            dtype=torch.int32,
-            device=device,
-        )
-        correlation_sum = torch.zeros(
-            size=cross_correlation_shape_valid,
-            dtype=DEFAULT_STATISTIC_DTYPE,
-            device=device,
-        )
-        correlation_squared_sum = torch.zeros(
-            size=cross_correlation_shape_valid,
-            dtype=DEFAULT_STATISTIC_DTYPE,
-            device=device,
-        )
         # NOTE: zipFFT expects a pre-transformed, pre-transposed input image FFT
         # Transpose the 'image_dft' along last two dimensions into contiguous layout
         # with shape (..., W // 2 + 1, H)
         image_dft = image_dft.transpose(-2, -1).contiguous()
-        # NOTE: zipFFT does not apply backwards FFT normalization, so we instead apply
-        # it to the input image (does not require addtl. multiplications in loop)
-        image_dft *= (image_shape_real[0] * image_shape_real[1])
-    else:
-        mip = torch.full(
-            size=image_shape_real,
-            fill_value=-float("inf"),
-            dtype=DEFAULT_STATISTIC_DTYPE,
-            device=device,
-        )
-        best_global_index = torch.full(
-            image_shape_real, fill_value=-1, dtype=torch.int32, device=device
-        )
-        correlation_sum = torch.zeros(
-            size=image_shape_real, dtype=DEFAULT_STATISTIC_DTYPE, device=device
-        )
-        correlation_squared_sum = torch.zeros(
-            size=image_shape_real, dtype=DEFAULT_STATISTIC_DTYPE, device=device
-        )
 
     ##################################
     ### Start the orientation loop ###
@@ -563,25 +588,21 @@ def _core_match_template_single_gpu(
                         projective_filters=projective_filters,
                     )
 
-                # Update the tracked statistics
-                do_iteration_statistics_updates_compiled(
+                # Update tracked statistics and correlation table
+                do_iteration_and_correlation_table_updates(
                     cross_correlation=cross_correlation,
                     current_indexes=batch_search_indices,
+                    correlation_table=correlation_table,
                     mip=mip,
                     best_global_index=best_global_index,
                     correlation_sum=correlation_sum,
                     correlation_squared_sum=correlation_squared_sum,
-                    img_h=(
-                        image_shape_real[0]
-                        if backend != "zipfft"
-                        else cross_correlation_shape_valid[0]
-                    ),
-                    img_w=(
-                        image_shape_real[1]
-                        if backend != "zipfft"
-                        else cross_correlation_shape_valid[1]
-                    ),
+                    threshold=CORRELATION_TABLE_THRESHOLD,
+                    valid_shape_h=valid_correlation_shape[0],
+                    valid_shape_w=valid_correlation_shape[1],
+                    needs_valid_cropping=(backend != "zipfft"),
                 )
+
         except Exception as e:
             index_queue.set_error_flag()
             print(f"Error occurred in process {rank}: {e}")
@@ -593,7 +614,13 @@ def _core_match_template_single_gpu(
 
     torch.cuda.synchronize(device)
 
-    return mip, best_global_index, correlation_sum, correlation_squared_sum
+    return (
+        mip,
+        best_global_index,
+        correlation_sum,
+        correlation_squared_sum,
+        correlation_table,
+    )
 
 
 def _core_match_template_multiprocess_wrapper(
@@ -607,9 +634,13 @@ def _core_match_template_multiprocess_wrapper(
 
     See the _core_match_template_single_gpu function for parameter descriptions.
     """
-    mip, best_global_index, correlation_sum, correlation_squared_sum = (
-        _core_match_template_single_gpu(rank, **kwargs)  # type: ignore[arg-type]
-    )
+    (
+        mip,
+        best_global_index,
+        correlation_sum,
+        correlation_squared_sum,
+        correlation_table,
+    ) = _core_match_template_single_gpu(rank, **kwargs)  # type: ignore[arg-type]
 
     # NOTE: Need to send all tensors back to the CPU as numpy arrays for the shared
     # process dictionary. This is a workaround for now
@@ -618,6 +649,7 @@ def _core_match_template_multiprocess_wrapper(
         "best_global_index": best_global_index.cpu().numpy(),
         "correlation_sum": correlation_sum.cpu().numpy(),
         "correlation_squared_sum": correlation_squared_sum.cpu().numpy(),
+        "correlation_table": correlation_table.cpu(),
     }
 
     # Place the results in the shared multi-process manager dictionary so accessible
 
@@ -454,21 +454,25 @@ def core_match_template_distributed(
     ###########################################################
 
     dist.barrier()
-    (mip, best_global_index, correlation_sum, correlation_squared_sum) = (
-        _core_match_template_single_gpu(
-            rank=rank,
-            index_queue=distributed_queue,  # type: ignore
-            image_dft=image_dft,
-            template_dft=template_dft,
-            euler_angles=euler_angles,
-            projective_filters=projective_filters,
-            defocus_values=defocus_values,
-            pixel_values=pixel_values,
-            orientation_batch_size=orientation_batch_size,
-            num_cuda_streams=num_cuda_streams,
-            backend=backend,
-            device=device,
-        )
+    (
+        mip,
+        best_global_index,
+        correlation_sum,
+        correlation_squared_sum,
+        _,  # TODO: include correlation_table in distributed version
+    ) = _core_match_template_single_gpu(
+        rank=rank,
+        index_queue=distributed_queue,  # type: ignore
+        image_dft=image_dft,
+        template_dft=template_dft,
+        euler_angles=euler_angles,
+        projective_filters=projective_filters,
+        defocus_values=defocus_values,
+        pixel_values=pixel_values,
+        orientation_batch_size=orientation_batch_size,
+        num_cuda_streams=num_cuda_streams,
+        backend=backend,
+        device=device,
     )
     dist.barrier()
 
@@ -534,7 +538,7 @@ def core_match_template_distributed(
 
     # Map from global search index to the best defocus & angles
     # pylint: disable=duplicate-code
-    best_phi, best_theta, best_psi, best_defocus = decode_global_search_index(
+    best_phi, best_theta, best_psi, best_defocus, _ = decode_global_search_index(
         best_global_index, pixel_values, defocus_values, euler_angles
     )