DeepLearnPhysics · francois-drielsma · Apr 1, 2025 · Jan 14, 2025 · Jan 14, 2025 · Jan 16, 2025
diff --git a/spine/ana/metric/cluster.py b/spine/ana/metric/cluster.py
@@ -87,6 +87,7 @@ def __init__(self, obj_type=None, use_objects=False, per_object=True,
         self.label_key = label_key
 
         # Parse the label_col column, if necessary
+        self.label_col = None
         if label_col is not None:
             self.label_col = enum_factory('cluster', label_col)
 
@@ -106,7 +107,8 @@ def __init__(self, obj_type=None, use_objects=False, per_object=True,
                 keys[label_key] = True
                 for obj in self.obj_type:
                     keys[f'{obj}_clusts'] = True
-                    keys[f'{obj}_shapes'] = True
+                    if obj != 'interaction':
+                        keys[f'{obj}_shapes'] = True
 
             else:
                 keys['points'] = True
@@ -150,7 +152,8 @@ def process(self, data):
                 label_col = self.label_col or self.label_cols[obj_type]
                 num_points = len(data[self.label_key])
                 labels = data[self.label_key][:, label_col]
-                shapes = data[self.label_key][:, SHAPE_COL]
+                if obj_type != 'interaction':
+                    shapes = data[self.label_key][:, SHAPE_COL]
                 num_truth = len(np.unique(labels[labels > -1]))
 
             else:
@@ -170,7 +173,8 @@ def process(self, data):
                     num_reco = len(data[f'{obj_type}_clusts'])
                     for i, index in enumerate(data[f'{obj_type}_clusts']):
                         preds[index] = i
-                        shapes[index] = data[f'{obj_type}_shapes'][i]
+                        if obj_type != 'interaction':
+                            shapes[index] = data[f'{obj_type}_shapes'][i]
 
                 else:
                     # Use clusters from the object indexes

diff --git a/spine/build/fragment.py b/spine/build/fragment.py
@@ -337,7 +337,7 @@ def load_truth(self, data):
 
     def _load_truth(self, truth_fragments, points_label, depositions_label,
                     depositions_q_label=None, points=None, depositions=None,
-                    points_g4=None, depositons_g4=None, sources_label=None,
+                    points_g4=None, depositions_g4=None, sources_label=None,
                     sources=None):
         """Load :class:`TruthFragment` objects from their stored versions.
 

diff --git a/spine/data/out/interaction.py b/spine/data/out/interaction.py
@@ -6,7 +6,7 @@
 
 import numpy as np
 
-from spine.utils.globals import PID_LABELS, PID_TAGS
+from spine.utils.globals import SHOWR_SHP, PID_LABELS, PID_TAGS
 from spine.utils.decorators import inherit_docstring
 
 from spine.data.neutrino import Neutrino
@@ -308,6 +308,21 @@ def __str__(self):
         """
         return 'Reco' + super().__str__()
 
+    @property
+    def leading_shower(self):
+        """Leading primary shower of this interaction.
+
+        Returns
+        -------
+        RecoParticle
+            Primary shower with the highest kinetic energy
+        """
+        showers = [part for part in self.primary_particles if part.shape == SHOWR_SHP]
+        if len(showers) == 0:
+            return None
+
+        return max(showers, key=lambda x: x.ke)
+
 
 @dataclass(eq=False)
 @inherit_docstring(TruthBase, InteractionBase)

diff --git a/spine/data/out/particle.py b/spine/data/out/particle.py
@@ -7,7 +7,7 @@
 from scipy.spatial.distance import cdist
 
 from spine.utils.globals import (
-        TRACK_SHP, SHAPE_LABELS, PID_LABELS, PID_MASSES, PID_TO_PDG)
+        SHOWR_SHP, TRACK_SHP, SHAPE_LABELS, PID_LABELS, PID_MASSES, PID_TO_PDG)
 from spine.utils.decorators import inherit_docstring
 
 from spine.data.particle import Particle
@@ -212,19 +212,28 @@ class RecoParticle(ParticleBase, RecoBase):
         (M) List of indexes of PPN points associated with this particle
     ppn_points : np.ndarray
         (M, 3) List of PPN points tagged to this particle
-    vertex_distance: float
+    vertex_distance : float
         Set-to-point distance between all particle points and the parent
-        interaction vertex. (untis of cm)
-    shower_split_angle: float
-        Estimate of the opening angle of the shower. If particle is not a
-        shower, then this is set to -1. (units of degrees)
+        interaction vertex position in cm
+    start_dedx : float
+        dE/dx around a user-defined neighborhood of the start point in MeV/cm
+    start_straightness : float
+        Explained variance ratio of the beginning of the particle
+    directional_spread : float
+        Estimate of the angular spread of the particle (cosine spread)
+    axial_spread : float
+        Pearson correlation coefficient of the axial profile of the particle
+        w.r.t. to the distance from its start point
     """
     pid_scores: np.ndarray = None
     primary_scores: np.ndarray = None
     ppn_ids: np.ndarray = None
     ppn_points: np.ndarray = None
     vertex_distance: float = -1.
-    shower_split_angle: float = -1.
+    start_dedx: float = -1.
+    start_straightness: float = -1.
+    directional_spread: float = -1.
+    axial_spread: float = -np.inf
 
     # Fixed-length attributes
     _fixed_length_attrs = (
@@ -265,19 +274,34 @@ def __str__(self):
     def merge(self, other):
         """Merge another particle instance into this one.
 
-        This method can only merge two track objects with well defined start
-        and end points.
+        The merging strategy differs depending on the the particle shapes
+        merged together. There are two categories:
+        - Track + track
+          - The start/end points are produced by finding the combination of points
+            which are farthest away from each other (one from each constituent)
+          - The primary scores/primary status match that of the constituent
+            particle with the highest primary score
+          - The PID scores/PID value match that of the constituent particle with
+            the highest primary score
+        - Shower + Track
+          - The track is always merged into the shower, not the other way around
+          - The start point of the shower is updated to be the track end point
+          further away from the current shower start point
+          - The primary scores/primary status match that of the constituent
+            particle with the highest primary score
+          - The PID scores/PID value is kept unchanged (that of the shower)
 
         Parameters
         ----------
         other : RecoParticle
             Other reconstructed particle to merge into this one
         """
-        # Check that both particles being merged are tracks
-        assert self.shape == TRACK_SHP and other.shape == TRACK_SHP, (
-                "Can only merge two track particles.")
+        # Check that the particles being merged fit one of two categories
+        assert (self.shape in (SHOWR_SHP, TRACK_SHP) and
+                other.shape == TRACK_SHP), (
+                "Can only merge two track particles or a track into a shower.")
 
-        # Check that neither particle has yet been matches
+        # Check that neither particle has yet been matched
         assert not self.is_matched and not other.is_matched, (
                 "Cannot merge particles that already have matches.")
 
@@ -287,27 +311,45 @@ def merge(self, other):
             setattr(self, attr, val)
 
         # Select end points and end directions appropriately
-        points_i = np.vstack([self.start_point, self.end_point])
-        points_j = np.vstack([other.start_point, other.end_point])
-        dirs_i = np.vstack([self.start_dir, self.end_dir])
-        dirs_j = np.vstack([other.start_dir, other.end_dir])
+        if self.shape == TRACK_SHP:
+            # If two tracks, pick points furthest apart
+            points_i = np.vstack([self.start_point, self.end_point])
+            points_j = np.vstack([other.start_point, other.end_point])
+            dirs_i = np.vstack([self.start_dir, self.end_dir])
+            dirs_j = np.vstack([other.start_dir, other.end_dir])
+
+            dists = cdist(points_i, points_j)
+            max_index = np.argmax(dists)
+            max_i, max_j = max_index//2, max_index%2
+
+            self.start_point = points_i[max_i]
+            self.end_point = points_j[max_j]
+            self.start_dir = dirs_i[max_i]
+            self.end_dir = dirs_j[max_j]
 
-        dists = cdist(points_i, points_j)
-        max_index = np.argmax(dists)
-        max_i, max_j = max_index//2, max_index%2
+        else:
+            # If a shower and a track, pick track point furthest from shower
+            points_i = self.start_point.reshape(-1, 3)
+            points_j = np.vstack([other.start_point, other.end_point])
+            dirs_j = np.vstack([other.start_dir, other.end_dir])
+
+            dists = cdist(points_i, points_j)
+            max_j = np.argmax(dists)
 
-        self.start_point = points_i[max_i]
-        self.end_point = points_j[max_j]
-        self.start_dir = dirs_i[max_i]
-        self.end_dir = dirs_j[max_j]
+            self.start_point = points_j[max_j]
+            self.start_dir = dirs_j[max_j]
 
-        # If one of the two particles is a primary, the new one is
+        # Match primary/PID to the most primary particle
         if other.primary_scores[-1] > self.primary_scores[-1]:
             self.primary_scores = other.primary_scores
+            self.is_primary = other.is_primary
+            if self.shape == TRACK_SHP:
+                self.pid_scores = other.pid_scores
+                self.pid = other.pid
 
-        # For PID, pick the most confident prediction (could be better...)
-        if np.max(other.pid_scores) > np.max(self.pid_scores):
-            self.pid_scores = other.pid_scores
+        # If the calorimetric KEs have been computed, can safely sum
+        if other.calo_ke > 0.:
+            self.calo_ke += other.calo_ke
 
     @property
     def mass(self):
@@ -387,12 +429,12 @@ def momentum(self, momentum):
     def reco_ke(self):
         """Alias for `ke`, to match nomenclature in truth."""
         return self.ke
-    
+
     @property
     def reco_momentum(self):
         """Alias for `momentum`, to match nomenclature in truth."""
         return self.momentum
-    
+
     @property
     def reco_length(self):
         """Alias for `length`, to match nomenclature in truth."""
@@ -402,7 +444,7 @@ def reco_length(self):
     def reco_start_dir(self):
         """Alias for `start_dir`, to match nomenclature in truth."""
         return self.start_dir
-    
+
     @property
     def reco_end_dir(self):
         """Alias for `end_dir`, to match nomenclature in truth."""

diff --git a/spine/driver.py b/spine/driver.py
@@ -124,7 +124,8 @@ def __init__(self, cfg, rank=None):
             assert self.model is None or self.unwrap, (
                     "Must unwrap the model output to run post-processors.")
             self.watch.initialize('post')
-            self.post = PostManager(post, parent_path=self.parent_path)
+            self.post = PostManager(
+                    post, post_list=self.post_list, parent_path=self.parent_path)
 
         # Initialize the analysis scripts
         self.ana = None
@@ -354,12 +355,21 @@ def initialize_io(self, loader=None, reader=None, writer=None):
                 self.watch.initialize('unwrap')
                 self.unwrapper = Unwrapper(geometry=geo)
 
+            # If working from LArCV files, no post-processor was yet run
+            self.post_list = ()
+
         else:
             # Initialize the reader
             self.watch.initialize('read')
             self.reader = reader_factory(reader)
             self.iter_per_epoch = len(self.reader)
 
+            # Fetch the list of previously run post-processors
+            # TODO: this only works with two runs in a row, not 3 and above
+            self.post_list = None
+            if self.reader.cfg is not None:
+                self.post_list = tuple(self.reader.cfg['post'])
+
         # Fetch an appropriate common prefix for all input files
         self.log_prefix, self.output_prefix = self.get_prefixes(
                 self.reader.file_paths, self.split_output)
@@ -448,7 +458,7 @@ def get_prefixes(file_paths, split_output):
             log_prefix += f'--{suffix}'
 
         # Truncate file names that are too long
-        max_length = 230
+        max_length = 150
         if len(log_prefix) > max_length:
             log_prefix = log_prefix[:max_length-3] + '---'
 

diff --git a/spine/post/base.py b/spine/post/base.py
@@ -30,9 +30,15 @@ class PostBase(ABC):
     # Units in which the post-processor expects objects to be expressed in
     units = 'cm'
 
+    # Whether this post-processor needs to know where the configuration lives
+    need_parent_path = False
+
     # Set of data keys needed for this post-processor to operate
     _keys = ()
 
+    # Set of post-processors which must be run before this one is
+    _upstream = ()
+
     # List of recognized object types
     _obj_types = ('fragment', 'particle', 'interaction')
 

diff --git a/spine/post/factories.py b/spine/post/factories.py
@@ -2,11 +2,11 @@
 
 from spine.utils.factory import module_dict, instantiate
 
-from . import reco, metric, optical, crt, trigger
+from . import reco, truth, optical, crt, trigger
 
 # Build a dictionary of available calibration modules
 POST_DICT = {}
-for module in [reco, metric, optical, crt, trigger]:
+for module in [reco, truth, optical, crt, trigger]:
     POST_DICT.update(**module_dict(module))
 
 
@@ -29,8 +29,7 @@ def post_processor_factory(name, cfg, parent_path=None):
     cfg['name'] = name
 
     # Instantiate the post-processor module
-    # TODO: This is hacky, fix it
-    if name == 'flash_match':
+    if name in POST_DICT and POST_DICT[name].need_parent_path:
         return instantiate(POST_DICT, cfg, parent_path=parent_path)
     else:
         return instantiate(POST_DICT, cfg)
diff --git a/spine/post/manager.py b/spine/post/manager.py
@@ -17,35 +17,45 @@ class PostManager:
     It loads all the post-processor objects once and feeds them data.
     """
 
-    def __init__(self, cfg, parent_path=None):
+    def __init__(self, cfg, post_list=None, parent_path=None):
         """Initialize the post-processing manager.
 
         Parameters
         ----------
         cfg : dict
             Post-processor configurations
+        post_list : List[str], optional
+            List of post-processors which have already been run
         parent_path : str, optional
             Path to the analysis tools configuration file
         """
         # Loop over the post-processor modules and get their priorities
         cfg = deepcopy(cfg)
         keys = np.array(list(cfg.keys()))
         priorities = -np.ones(len(keys), dtype=np.int32)
-        for i, k in enumerate(keys):
-            if 'priority' in cfg[k]:
-                priorities[i] = cfg[k].pop('priority')
+        for i, key in enumerate(keys):
+            if 'priority' in cfg[key]:
+                priorities[i] = cfg[key].pop('priority')
 
         # Add the modules to a processor list in decreasing order of priority
         self.watch = StopwatchManager()
         self.modules = OrderedDict()
         keys = keys[np.argsort(-priorities)]
-        for k in keys:
+        for key in keys:
             # Profile the module
-            self.watch.initialize(k)
+            self.watch.initialize(key)
 
             # Append
-            self.modules[k] = post_processor_factory(
-                    k, cfg[k], parent_path=parent_path)
+            self.modules[key] = post_processor_factory(
+                    key, cfg[key], parent_path=parent_path)
+
+            # Check dependencies
+            if post_list is not None:
+                ups_post = tuple(self.modules)
+                for post in self.modules[key]._upstream:
+                    assert post in (post_list + ups_post), (
+                            f"Post-processor `{key}` is missing an essential "
+                            f"upstream post-processor: `{post}`.")
 
     def __call__(self, data):
         """Pass one batch of data through the post-processors.

diff --git a/spine/post/optical/flash_matching.py b/spine/post/optical/flash_matching.py
@@ -25,6 +25,9 @@ class FlashMatchProcessor(PostBase):
     # Alternative allowed names of the post-processor
     aliases = ('run_flash_matching',)
 
+    # Whether this post-processor needs to know where the configuration lives
+    need_parent_path = True
+
     def __init__(self, flash_key, volume, ref_volume_id=None,
                  method='likelihood', detector=None, geometry_file=None,
                  run_mode='reco', truth_point_mode='points',

diff --git a/spine/post/reco/__init__.py b/spine/post/reco/__init__.py
@@ -11,5 +11,5 @@
 from .calo import *
 from .pid import *
 from .kinematics import *
-from .label import *
 from .shower import *
+from .topology import *