diff --git a/spine/ana/metric/cluster.py b/spine/ana/metric/cluster.py index fa37d45c..d15fd06a 100644 --- a/spine/ana/metric/cluster.py +++ b/spine/ana/metric/cluster.py @@ -87,6 +87,7 @@ def __init__(self, obj_type=None, use_objects=False, per_object=True, self.label_key = label_key # Parse the label_col column, if necessary + self.label_col = None if label_col is not None: self.label_col = enum_factory('cluster', label_col) @@ -106,7 +107,8 @@ def __init__(self, obj_type=None, use_objects=False, per_object=True, keys[label_key] = True for obj in self.obj_type: keys[f'{obj}_clusts'] = True - keys[f'{obj}_shapes'] = True + if obj != 'interaction': + keys[f'{obj}_shapes'] = True else: keys['points'] = True @@ -150,7 +152,8 @@ def process(self, data): label_col = self.label_col or self.label_cols[obj_type] num_points = len(data[self.label_key]) labels = data[self.label_key][:, label_col] - shapes = data[self.label_key][:, SHAPE_COL] + if obj_type != 'interaction': + shapes = data[self.label_key][:, SHAPE_COL] num_truth = len(np.unique(labels[labels > -1])) else: @@ -170,7 +173,8 @@ def process(self, data): num_reco = len(data[f'{obj_type}_clusts']) for i, index in enumerate(data[f'{obj_type}_clusts']): preds[index] = i - shapes[index] = data[f'{obj_type}_shapes'][i] + if obj_type != 'interaction': + shapes[index] = data[f'{obj_type}_shapes'][i] else: # Use clusters from the object indexes diff --git a/spine/build/fragment.py b/spine/build/fragment.py index 3bfe13dc..93b2c1ad 100644 --- a/spine/build/fragment.py +++ b/spine/build/fragment.py @@ -337,7 +337,7 @@ def load_truth(self, data): def _load_truth(self, truth_fragments, points_label, depositions_label, depositions_q_label=None, points=None, depositions=None, - points_g4=None, depositons_g4=None, sources_label=None, + points_g4=None, depositions_g4=None, sources_label=None, sources=None): """Load :class:`TruthFragment` objects from their stored versions. diff --git a/spine/data/out/interaction.py b/spine/data/out/interaction.py index 212b30fc..d498e91a 100644 --- a/spine/data/out/interaction.py +++ b/spine/data/out/interaction.py @@ -6,7 +6,7 @@ import numpy as np -from spine.utils.globals import PID_LABELS, PID_TAGS +from spine.utils.globals import SHOWR_SHP, PID_LABELS, PID_TAGS from spine.utils.decorators import inherit_docstring from spine.data.neutrino import Neutrino @@ -308,6 +308,21 @@ def __str__(self): """ return 'Reco' + super().__str__() + @property + def leading_shower(self): + """Leading primary shower of this interaction. + + Returns + ------- + RecoParticle + Primary shower with the highest kinetic energy + """ + showers = [part for part in self.primary_particles if part.shape == SHOWR_SHP] + if len(showers) == 0: + return None + + return max(showers, key=lambda x: x.ke) + @dataclass(eq=False) @inherit_docstring(TruthBase, InteractionBase) diff --git a/spine/data/out/particle.py b/spine/data/out/particle.py index b3af6d9a..3abddb27 100644 --- a/spine/data/out/particle.py +++ b/spine/data/out/particle.py @@ -7,7 +7,7 @@ from scipy.spatial.distance import cdist from spine.utils.globals import ( - TRACK_SHP, SHAPE_LABELS, PID_LABELS, PID_MASSES, PID_TO_PDG) + SHOWR_SHP, TRACK_SHP, SHAPE_LABELS, PID_LABELS, PID_MASSES, PID_TO_PDG) from spine.utils.decorators import inherit_docstring from spine.data.particle import Particle @@ -212,19 +212,28 @@ class RecoParticle(ParticleBase, RecoBase): (M) List of indexes of PPN points associated with this particle ppn_points : np.ndarray (M, 3) List of PPN points tagged to this particle - vertex_distance: float + vertex_distance : float Set-to-point distance between all particle points and the parent - interaction vertex. (untis of cm) - shower_split_angle: float - Estimate of the opening angle of the shower. If particle is not a - shower, then this is set to -1. (units of degrees) + interaction vertex position in cm + start_dedx : float + dE/dx around a user-defined neighborhood of the start point in MeV/cm + start_straightness : float + Explained variance ratio of the beginning of the particle + directional_spread : float + Estimate of the angular spread of the particle (cosine spread) + axial_spread : float + Pearson correlation coefficient of the axial profile of the particle + w.r.t. to the distance from its start point """ pid_scores: np.ndarray = None primary_scores: np.ndarray = None ppn_ids: np.ndarray = None ppn_points: np.ndarray = None vertex_distance: float = -1. - shower_split_angle: float = -1. + start_dedx: float = -1. + start_straightness: float = -1. + directional_spread: float = -1. + axial_spread: float = -np.inf # Fixed-length attributes _fixed_length_attrs = ( @@ -265,19 +274,34 @@ def __str__(self): def merge(self, other): """Merge another particle instance into this one. - This method can only merge two track objects with well defined start - and end points. + The merging strategy differs depending on the the particle shapes + merged together. There are two categories: + - Track + track + - The start/end points are produced by finding the combination of points + which are farthest away from each other (one from each constituent) + - The primary scores/primary status match that of the constituent + particle with the highest primary score + - The PID scores/PID value match that of the constituent particle with + the highest primary score + - Shower + Track + - The track is always merged into the shower, not the other way around + - The start point of the shower is updated to be the track end point + further away from the current shower start point + - The primary scores/primary status match that of the constituent + particle with the highest primary score + - The PID scores/PID value is kept unchanged (that of the shower) Parameters ---------- other : RecoParticle Other reconstructed particle to merge into this one """ - # Check that both particles being merged are tracks - assert self.shape == TRACK_SHP and other.shape == TRACK_SHP, ( - "Can only merge two track particles.") + # Check that the particles being merged fit one of two categories + assert (self.shape in (SHOWR_SHP, TRACK_SHP) and + other.shape == TRACK_SHP), ( + "Can only merge two track particles or a track into a shower.") - # Check that neither particle has yet been matches + # Check that neither particle has yet been matched assert not self.is_matched and not other.is_matched, ( "Cannot merge particles that already have matches.") @@ -287,27 +311,45 @@ def merge(self, other): setattr(self, attr, val) # Select end points and end directions appropriately - points_i = np.vstack([self.start_point, self.end_point]) - points_j = np.vstack([other.start_point, other.end_point]) - dirs_i = np.vstack([self.start_dir, self.end_dir]) - dirs_j = np.vstack([other.start_dir, other.end_dir]) + if self.shape == TRACK_SHP: + # If two tracks, pick points furthest apart + points_i = np.vstack([self.start_point, self.end_point]) + points_j = np.vstack([other.start_point, other.end_point]) + dirs_i = np.vstack([self.start_dir, self.end_dir]) + dirs_j = np.vstack([other.start_dir, other.end_dir]) + + dists = cdist(points_i, points_j) + max_index = np.argmax(dists) + max_i, max_j = max_index//2, max_index%2 + + self.start_point = points_i[max_i] + self.end_point = points_j[max_j] + self.start_dir = dirs_i[max_i] + self.end_dir = dirs_j[max_j] - dists = cdist(points_i, points_j) - max_index = np.argmax(dists) - max_i, max_j = max_index//2, max_index%2 + else: + # If a shower and a track, pick track point furthest from shower + points_i = self.start_point.reshape(-1, 3) + points_j = np.vstack([other.start_point, other.end_point]) + dirs_j = np.vstack([other.start_dir, other.end_dir]) + + dists = cdist(points_i, points_j) + max_j = np.argmax(dists) - self.start_point = points_i[max_i] - self.end_point = points_j[max_j] - self.start_dir = dirs_i[max_i] - self.end_dir = dirs_j[max_j] + self.start_point = points_j[max_j] + self.start_dir = dirs_j[max_j] - # If one of the two particles is a primary, the new one is + # Match primary/PID to the most primary particle if other.primary_scores[-1] > self.primary_scores[-1]: self.primary_scores = other.primary_scores + self.is_primary = other.is_primary + if self.shape == TRACK_SHP: + self.pid_scores = other.pid_scores + self.pid = other.pid - # For PID, pick the most confident prediction (could be better...) - if np.max(other.pid_scores) > np.max(self.pid_scores): - self.pid_scores = other.pid_scores + # If the calorimetric KEs have been computed, can safely sum + if other.calo_ke > 0.: + self.calo_ke += other.calo_ke @property def mass(self): @@ -387,12 +429,12 @@ def momentum(self, momentum): def reco_ke(self): """Alias for `ke`, to match nomenclature in truth.""" return self.ke - + @property def reco_momentum(self): """Alias for `momentum`, to match nomenclature in truth.""" return self.momentum - + @property def reco_length(self): """Alias for `length`, to match nomenclature in truth.""" @@ -402,7 +444,7 @@ def reco_length(self): def reco_start_dir(self): """Alias for `start_dir`, to match nomenclature in truth.""" return self.start_dir - + @property def reco_end_dir(self): """Alias for `end_dir`, to match nomenclature in truth.""" diff --git a/spine/driver.py b/spine/driver.py index 10a06cf3..2ff474b8 100644 --- a/spine/driver.py +++ b/spine/driver.py @@ -124,7 +124,8 @@ def __init__(self, cfg, rank=None): assert self.model is None or self.unwrap, ( "Must unwrap the model output to run post-processors.") self.watch.initialize('post') - self.post = PostManager(post, parent_path=self.parent_path) + self.post = PostManager( + post, post_list=self.post_list, parent_path=self.parent_path) # Initialize the analysis scripts self.ana = None @@ -354,12 +355,21 @@ def initialize_io(self, loader=None, reader=None, writer=None): self.watch.initialize('unwrap') self.unwrapper = Unwrapper(geometry=geo) + # If working from LArCV files, no post-processor was yet run + self.post_list = () + else: # Initialize the reader self.watch.initialize('read') self.reader = reader_factory(reader) self.iter_per_epoch = len(self.reader) + # Fetch the list of previously run post-processors + # TODO: this only works with two runs in a row, not 3 and above + self.post_list = None + if self.reader.cfg is not None: + self.post_list = tuple(self.reader.cfg['post']) + # Fetch an appropriate common prefix for all input files self.log_prefix, self.output_prefix = self.get_prefixes( self.reader.file_paths, self.split_output) @@ -448,7 +458,7 @@ def get_prefixes(file_paths, split_output): log_prefix += f'--{suffix}' # Truncate file names that are too long - max_length = 230 + max_length = 150 if len(log_prefix) > max_length: log_prefix = log_prefix[:max_length-3] + '---' diff --git a/spine/post/base.py b/spine/post/base.py index e4d6214c..5b316190 100644 --- a/spine/post/base.py +++ b/spine/post/base.py @@ -30,9 +30,15 @@ class PostBase(ABC): # Units in which the post-processor expects objects to be expressed in units = 'cm' + # Whether this post-processor needs to know where the configuration lives + need_parent_path = False + # Set of data keys needed for this post-processor to operate _keys = () + # Set of post-processors which must be run before this one is + _upstream = () + # List of recognized object types _obj_types = ('fragment', 'particle', 'interaction') diff --git a/spine/post/factories.py b/spine/post/factories.py index 1d8acb8c..5746032f 100644 --- a/spine/post/factories.py +++ b/spine/post/factories.py @@ -2,11 +2,11 @@ from spine.utils.factory import module_dict, instantiate -from . import reco, metric, optical, crt, trigger +from . import reco, truth, optical, crt, trigger # Build a dictionary of available calibration modules POST_DICT = {} -for module in [reco, metric, optical, crt, trigger]: +for module in [reco, truth, optical, crt, trigger]: POST_DICT.update(**module_dict(module)) @@ -29,8 +29,7 @@ def post_processor_factory(name, cfg, parent_path=None): cfg['name'] = name # Instantiate the post-processor module - # TODO: This is hacky, fix it - if name == 'flash_match': + if name in POST_DICT and POST_DICT[name].need_parent_path: return instantiate(POST_DICT, cfg, parent_path=parent_path) else: return instantiate(POST_DICT, cfg) diff --git a/spine/post/manager.py b/spine/post/manager.py index d28cbf8d..d3e92cab 100644 --- a/spine/post/manager.py +++ b/spine/post/manager.py @@ -17,13 +17,15 @@ class PostManager: It loads all the post-processor objects once and feeds them data. """ - def __init__(self, cfg, parent_path=None): + def __init__(self, cfg, post_list=None, parent_path=None): """Initialize the post-processing manager. Parameters ---------- cfg : dict Post-processor configurations + post_list : List[str], optional + List of post-processors which have already been run parent_path : str, optional Path to the analysis tools configuration file """ @@ -31,21 +33,29 @@ def __init__(self, cfg, parent_path=None): cfg = deepcopy(cfg) keys = np.array(list(cfg.keys())) priorities = -np.ones(len(keys), dtype=np.int32) - for i, k in enumerate(keys): - if 'priority' in cfg[k]: - priorities[i] = cfg[k].pop('priority') + for i, key in enumerate(keys): + if 'priority' in cfg[key]: + priorities[i] = cfg[key].pop('priority') # Add the modules to a processor list in decreasing order of priority self.watch = StopwatchManager() self.modules = OrderedDict() keys = keys[np.argsort(-priorities)] - for k in keys: + for key in keys: # Profile the module - self.watch.initialize(k) + self.watch.initialize(key) # Append - self.modules[k] = post_processor_factory( - k, cfg[k], parent_path=parent_path) + self.modules[key] = post_processor_factory( + key, cfg[key], parent_path=parent_path) + + # Check dependencies + if post_list is not None: + ups_post = tuple(self.modules) + for post in self.modules[key]._upstream: + assert post in (post_list + ups_post), ( + f"Post-processor `{key}` is missing an essential " + f"upstream post-processor: `{post}`.") def __call__(self, data): """Pass one batch of data through the post-processors. diff --git a/spine/post/optical/flash_matching.py b/spine/post/optical/flash_matching.py index 9d9e829d..ed12a491 100644 --- a/spine/post/optical/flash_matching.py +++ b/spine/post/optical/flash_matching.py @@ -25,6 +25,9 @@ class FlashMatchProcessor(PostBase): # Alternative allowed names of the post-processor aliases = ('run_flash_matching',) + # Whether this post-processor needs to know where the configuration lives + need_parent_path = True + def __init__(self, flash_key, volume, ref_volume_id=None, method='likelihood', detector=None, geometry_file=None, run_mode='reco', truth_point_mode='points', diff --git a/spine/post/reco/__init__.py b/spine/post/reco/__init__.py index e876e524..0c7b07eb 100644 --- a/spine/post/reco/__init__.py +++ b/spine/post/reco/__init__.py @@ -11,5 +11,5 @@ from .calo import * from .pid import * from .kinematics import * -from .label import * from .shower import * +from .topology import * diff --git a/spine/post/reco/cathode_cross.py b/spine/post/reco/cathode_cross.py index 0c628475..c375a482 100644 --- a/spine/post/reco/cathode_cross.py +++ b/spine/post/reco/cathode_cross.py @@ -30,6 +30,9 @@ class CathodeCrosserProcessor(PostBase): # Alternative allowed names of the post-processor aliases = ('find_cathode_crossers',) + # Set of post-processors which must be run before this one is + _upstream = ('direction',) + def __init__(self, crossing_point_tolerance, offset_tolerance, angle_tolerance, adjust_crossers=True, merge_crossers=True, detector=None, geometry_file=None, run_mode='reco', diff --git a/spine/post/reco/direction.py b/spine/post/reco/direction.py index 5676f0b2..f9e18fc0 100644 --- a/spine/post/reco/direction.py +++ b/spine/post/reco/direction.py @@ -23,23 +23,22 @@ class DirectionProcessor(PostBase): # Alternative allowed names of the post-processor aliases = ('reconstruct_directions',) - def __init__(self, neighborhood_radius=-1, optimize=True, - obj_type='particle', truth_point_mode='points', - run_mode='both'): - """Store the particle direction recosntruction parameters. + def __init__(self, radius=-1, optimize=True, obj_type='particle', + truth_point_mode='points', run_mode='both'): + """Store the particle direction reconstruction parameters. Parameters ---------- - neighborhood_radius : float, default 5 - Max distance between start voxel and other voxels + radius : float, default -1 + Radius around the start voxel to include in the direction estimate optimize : bool, default True - Optimizes the number of points involved in the estimate + Optimize the number of points involved in the direction estimate """ # Initialize the parent class super().__init__(obj_type, run_mode, truth_point_mode) # Store the direction reconstruction parameters - self.neighborhood_radius = neighborhood_radius + self.radius = radius self.optimize = optimize # Make sure the voxel coordinates are provided as a tensor @@ -88,7 +87,7 @@ def process(self, data): # Compute the direction vectors dirs = get_cluster_directions( data[points_key], np.vstack(ref_points), clusts, - self.neighborhood_radius, self.optimize) + max_dist=self.radius, optimize=self.optimize) # Assign directions to the appropriate particles for i, part_id in enumerate(part_ids): diff --git a/spine/post/reco/geometry.py b/spine/post/reco/geometry.py index 89b3fca4..c034e3b8 100644 --- a/spine/post/reco/geometry.py +++ b/spine/post/reco/geometry.py @@ -25,8 +25,8 @@ class ContainmentProcessor(PostBase): aliases = ('check_containment',) def __init__(self, margin, cathode_margin=None, detector=None, - geometry_file=None, mode='module', - allow_multi_module=False, min_particle_sizes=0, + geometry_file=None, mode='module', allow_multi_module=False, + exclude_pids=None, min_particle_sizes=0, obj_type=('particle', 'interaction'), truth_point_mode='points', run_mode='both'): """Initialize the containment conditions. @@ -63,6 +63,9 @@ def __init__(self, margin, cathode_margin=None, detector=None, Note that this does not guarantee containment within the detector. allow_multi_module : bool, default False Whether to allow particles/interactions to span multiple modules + exclude_pids : List[int], optional + When checking interaction containment, ignore particles which + have a species specified by this list min_particle_sizes : Union[int, dict], default 0 When checking interaction containment, ignore particles below the size (in voxel count) specified by this parameter. If specified @@ -87,6 +90,7 @@ def __init__(self, margin, cathode_margin=None, detector=None, # Store parameters self.allow_multi_module = allow_multi_module + self.exclude_pids = exclude_pids # Store the particle size thresholds in a dictionary if np.isscalar(min_particle_sizes): @@ -141,6 +145,11 @@ def process(self, data): inter.is_contained = True for part in inter.particles: if not part.is_contained: + # Do not check for particles with excluded PID + if (self.exclude_pids is not None and + part.pid in self.exclude_pids): + continue + # Do not account for particles below a certain size if (part.pid > -1 and part.size < self.min_particle_sizes[part.pid]): @@ -163,6 +172,12 @@ class FiducialProcessor(PostBase): # Alternative allowed names of the post-processor aliases = ('check_fiducial',) + # Set of post-processors which must be run before this one is + _upstream = ('vertex',) + + # List of valid ways to fetch a true vertex + _truth_vertex_modes = ('vertex', 'reco_vertex') + def __init__(self, margin, cathode_margin=None, detector=None, geometry_file=None, mode='module', run_mode='both', truth_vertex_mode='vertex'): @@ -212,9 +227,9 @@ def __init__(self, margin, cathode_margin=None, detector=None, self.margin = margin # Store the true vertex source - assert truth_vertex_mode in ['vertex', 'reco_vertex'], ( + assert truth_vertex_mode in self._truth_vertex_modes, ( f"`truth_vertex_mode not recognized: `{truth_vertex_mode}`. " - "Must be one one of `vertex` or `reco_vertex`.") + f"Must be one one of {self._truth_vertex_modes}.") self.truth_vertex_mode = truth_vertex_mode def process(self, data): @@ -236,11 +251,7 @@ def process(self, data): self.check_units(inter) # Get point coordinates - if not inter.is_truth: - vertex = inter.vertex - else: - vertex = getattr(inter, self.truth_vertex_mode) - vertex = vertex.reshape(-1,3) + vertex = self.get_vertex(inter).reshape(-1, 3) # Check containment if not self.use_meta: @@ -249,3 +260,24 @@ def process(self, data): inter.is_fiducial = ( (vertex > (meta.lower + self.margin)).all() and (vertex < (meta.upper - self.margin)).all()) + + def get_vertex(self, inter): + """Get a certain pre-defined vertex attribute of an interaction. + + The :class:`TruthInteraction` vertexes are obtained using the + `truth_vertex_mode` attribute of the class. + + Parameters + ---------- + obj : InteractionBase + Interaction object + + Results + ------- + np.ndarray + (N) Interaction vertex + """ + if not inter.is_truth: + return inter.vertex + else: + return getattr(inter, self.truth_vertex_mode) diff --git a/spine/post/reco/kinematics.py b/spine/post/reco/kinematics.py index 8c14b7ca..cad617b5 100644 --- a/spine/post/reco/kinematics.py +++ b/spine/post/reco/kinematics.py @@ -1,7 +1,10 @@ +"""Kinematics update module.""" + import numpy as np from spine.utils.globals import ( - TRACK_SHP, MUON_PID, PION_PID, PID_MASSES, SHP_TO_PID, SHP_TO_PRIMARY) + SHOWR_SHP, TRACK_SHP, MICHL_SHP, MUON_PID, PION_PID, PID_MASSES, + SHP_TO_PID, SHP_TO_PRIMARY) from spine.post.base import PostBase @@ -17,6 +20,9 @@ class ParticleShapeLogicProcessor(PostBase): - If a particle has shower shape, it can only have a shower PID - If a particle has track shape, it can only have a track PID - If a particle has delta/michel shape, it can only be a secondary electron + + Optionally: + - If it has a calorimetric KE above some threshold, it cannot be a Michel """ # Name of the post-processor (as specified in the configuration) @@ -25,7 +31,8 @@ class ParticleShapeLogicProcessor(PostBase): # Alternative allowed names of the post-processor aliases = ('enforce_particle_semantics',) - def __init__(self, enforce_pid=True, enforce_primary=True): + def __init__(self, enforce_pid=True, enforce_primary=True, + maximum_michel_ke=None): """Store information about which particle properties should or should not be updated. @@ -35,6 +42,9 @@ def __init__(self, enforce_pid=True, enforce_primary=True): Enforce the PID prediction based on the semantic type enforce_primary : bool, default True Enforce the primary prediction based on the semantic type + maximum_michel_ke : float, optional + If provided, the processor will not enforce secondary status + for reconstructed Michel electrons above a certain kinetic energy """ # Intialize the parent class super().__init__('particle', 'reco') @@ -42,6 +52,11 @@ def __init__(self, enforce_pid=True, enforce_primary=True): # Store parameters self.enforce_pid = enforce_pid self.enforce_primary = enforce_primary + self.maximum_michel_ke = maximum_michel_ke + + # If the Michel KE is to be checked, must run the calorimetric KE PP + if self.maximum_michel_ke is not None: + self._upstream = ('calo_ke',) def process(self, data): """Update PID and primary predictions of each particle in one entry @@ -53,7 +68,12 @@ def process(self, data): """ # Loop over the particle objects for part in data['reco_particles']: - # Reset the PID scores + # If the particle is a Michel with too high a KE, override to shower + if (self.maximum_michel_ke is not None and + part.shape == MICHL_SHP and part.ke > self.maximum_michel_ke): + part.shape = SHOWR_SHP + + # Reset the PID scores based on shape if self.enforce_pid: pid_range = SHP_TO_PID[part.shape] pid_range = pid_range[pid_range < len(part.pid_scores)] @@ -65,7 +85,7 @@ def process(self, data): part.pid_scores = pid_scores part.pid = np.argmax(pid_scores) - # Reset the primary scores + # Reset the primary scores based on shape if self.enforce_primary: primary_range = SHP_TO_PRIMARY[part.shape] @@ -109,11 +129,12 @@ def __init__(self, shower_pid_thresholds=None, track_pid_thresholds=None, super().__init__('particle', 'reco') # Check that there is something to do, throw otherwise - if (shower_pid_thresholds is not None and - track_pid_thresholds is not None and primary_threshold is None): + if ((shower_pid_thresholds is None) and + (track_pid_thresholds is None) and + (primary_threshold is None)): raise ValueError( "Specify one of `shower_pid_thresholds`, `track_pid_thresholds` " - "or `primary_threshold` for this function to do anything.") + "or `primary_threshold` for this class to do anything.") # Store the thresholds self.shower_pid_thresholds = shower_pid_thresholds @@ -258,14 +279,17 @@ class InteractionTopologyProcessor(PostBase): # Alternative allowed names of the post-processor aliases = ('adjust_interaction_topology',) - def __init__(self, ke_thresholds, reco_ke_mode='ke', + # Set of post-processors which must be run before this one is + _upstream = ('calo_ke', 'csda_ke', 'mcs_ke') + + def __init__(self, ke_thresholds=None, reco_ke_mode='ke', truth_ke_mode='energy_deposit', run_mode='both'): """Store the new thresholds to be used to update interaction topologies. Parameters ---------- ke_thresholds : Union[float, dict] - If a scalr, it specifies a blanket KE cut to apply to all + If a scalar, it specifies a blanket KE cut to apply to all particles. If it is a dictionary, it maps an PID to a KE threshold. If a 'default' key is provided, it is used for all particles, unless a number is provided for a specific PID. diff --git a/spine/post/reco/pid.py b/spine/post/reco/pid.py index 0f7395e4..b0969dbf 100644 --- a/spine/post/reco/pid.py +++ b/spine/post/reco/pid.py @@ -18,7 +18,7 @@ class PIDTemplateProcessor(PostBase): # Name of the post-processor (as specified in the configuration) name = 'pid_template' - def __init__(self, fill_per_pid=True, obj_type='particle', run_mode='reco', + def __init__(self, fill_per_pid=False, obj_type='particle', run_mode='reco', truth_point_mode='points', truth_dep_mode='depositions', **identifier): """Store the necessary attributes to do template-based PID prediction. diff --git a/spine/post/reco/shower.py b/spine/post/reco/shower.py index 7cf8b045..1e1852b0 100644 --- a/spine/post/reco/shower.py +++ b/spine/post/reco/shower.py @@ -1,321 +1,339 @@ +"""Shower reconstruction module.""" + import numpy as np +from scipy.stats import pearsonr + +from spine.utils.globals import SHOWR_SHP, TRACK_SHP, PROT_PID, PION_PID +from spine.utils.numba_local import cdist +from spine.utils.gnn.cluster import cluster_direction, cluster_dedx -from spine.utils.globals import (PHOT_PID, PROT_PID, PION_PID, ELEC_PID, - SHOWR_SHP, TRACK_SHP) +from spine.data import ObjectList, RecoParticle from spine.post.base import PostBase -from scipy.spatial.distance import cdist -from sklearn.cluster import DBSCAN -from sklearn.metrics.pairwise import cosine_similarity -__all__ = ['ConversionDistanceProcessor', 'ShowerMultiArmCheck', - 'ShowerStartpointCorrectionProcessor'] +__all__ = ['ShowerConversionDistanceProcessor', 'ShowerStartMergeProcessor', + 'ShowerStartCorrectionProcessor'] -class ConversionDistanceProcessor(PostBase): - """Enforce additional constraint on valid primary electron showers - using vertex-to-shower separation distance. - - NOTE: This processor can only change reco electron shower pid to - photon pid depending on the distance threshold. +class ShowerConversionDistanceProcessor(PostBase): + """Evaluates the distance between shower start and the position + of the vertex of the interaction they belong to. """ # Name of the post-processor (as specified in the configuration) name = 'shower_conversion_distance' - # Alternative allowed names of the post-processor - aliases = ('shower_separation_processor',) - - def __init__(self, threshold=-1.0, vertex_mode='vertex'): - """Specify the EM shower conversion distance threshold and - the type of vertex to use for the distance calculation. + # List of recognized ways to compute the vertex-to-shower distance + _modes = ('vertex_to_start', 'vertex_to_points', 'protons_to_points') + + def __init__(self, mode='vertex_to_points', include_secondary=False): + """Store the conversion distance reconstruction parameters. Parameters ---------- - threshold : float, default -1.0 - If EM shower has a conversion distance greater than this, - its PID will be changed to PHOT_PID. - vertex_mode : str, default 'vertex' - The type of vertex to use for the distance calculation. - 'protons': Distance between the shower startpoint and the - closest proton/pion point. - 'vertex_points': Distance between the vertex and all shower points. - 'vertex_startpoint': Distance between the vertex and the predicted - shower startpoint. + mode : str, default 'vertex' + Method used to compute the conversion distance: + - 'vertex_to_start': Distance between the vertex and the predicted + shower start point. + - 'vertex_to_points': Shortest distance between the vertex and any + shower point. + - 'protons_to_points': Distance between any proton/pion point and + any shower point. + include_secondary : bool, default False + If `True`, computes the covnersion distance for secondary particles """ + # Initialize the parent class super().__init__('interaction', 'reco') - - self.threshold = threshold - self.vertex_mode = vertex_mode - + + # Check that the conversion distance mode is valid, store + assert mode in self._modes, ( + f"Conversion distance computation mode not recognized: {mode}. " + f"Should be one of {self._modes}") + self.mode = mode + + # Store the vertex distance computation parameters + self.include_secondary = include_secondary + + # If the method involves the vertex, must run the vertex PP + if 'vertex' in self.mode: + self._upstream = ('vertex',) + def process(self, data): - """Update reco interaction topologies using the conversion - distance cut. + """Compute the conversion distance of showers in each interaction. Parameters ---------- data : dict Dictionaries of data products - - Raises - ------ - ValueError - If provided vertex mode is invalid. """ # Loop over the reco interactions - for ia in data['reco_interactions']: - criterion = -np.inf - for p in ia.particles: - if (p.shape == 0 and p.pid == 1 and p.is_primary): - if self.vertex_mode == 'protons': - criterion = self.convdist_protons(ia, p) - elif self.vertex_mode == 'vertex_points': - criterion = self.convdist_vertex_points(ia, p) - elif self.vertex_mode == 'vertex_startpoint': - criterion = self.convdist_vertex_startpoint(ia, p) - else: - raise ValueError('Invalid point mode') - p.vertex_distance = criterion - if criterion >= self.threshold: - p.pid = PHOT_PID - - @staticmethod - def convdist_protons(ia, shower_p): - """Helper function to compute the distance between the shower - startpoint and the closest proton point. + for inter in data['reco_interactions']: + # Loop over the particles that make up the interaction + for part in inter.particles: + # If the particle is a secondary and they are to be skipped, do + if not self.include_secondary and not part.is_primary: + continue + + # If the particle PID is not a shower, skip + if part.shape != SHOWR_SHP: + continue + + # Compute the vertex distance + if self.mode == 'vertex_to_start': + dist = self.get_vertex_to_start(inter, part) + elif self.mode == 'vertex_to_points': + dist = self.get_vertex_to_points(inter, part) + elif self.mode == 'protons_to_start': + dist = self.get_protons_to_points(inter, part) + + part.vertex_distance = dist + + @staticmethod + def get_vertex_to_start(inter, shower): + """Helper function to compute the closest distance between the vertex + and the predicted shower startpoint. Parameters ---------- - ia : RecoInteraction - Reco interaction to apply the conversion distance cut. - shower_p : RecoParticle - Member particle of the interaction, assumed to be the primary - electron/gamma shower. + inter : RecoInteraction + Reco interaction providing the vertex + shower : RecoParticle + Shower particle in the interaction Returns ------- - start_to_closest_proton : float - Closest distance between the shower startpoint - and proton/pion points. + float + Distance betweenthe vertex and the shower start point """ - start_to_closest_proton = -np.inf - for p2 in ia.particles: - if (p2.pid == PROT_PID or p2.pid == PION_PID) and p2.is_primary: - proton_points.append(p2.start_point) - if len(proton_points) > 0: - proton_points = np.vstack(proton_points) - start_to_closest_proton = cdist(proton_points, shower_p.points) - start_to_closest_proton = start_to_closest_proton.min() - else: - start_to_closest_proton = -np.inf - return start_to_closest_proton + # Compute distance from the vertex to the shower start point + dist = np.linalg.norm(inter.vertex - shower.start_point) + + return dist @staticmethod - def convdist_vertex_points(ia, shower_p): - """Helper function to compute the closest distance - between the vertex and all shower points. + def get_vertex_to_points(inter, shower): + """Helper function to compute the closest distance between the vertex + and all shower points. Parameters ---------- - ia : RecoInteraction - Reco interaction to apply the conversion distance cut. - shower_p : RecoParticle - Member particle of the interaction, assumed to be the primary - electron/gamma shower. + inter : RecoInteraction + Reco interaction providing the vertex + shower : RecoParticle + Shower particle in the interaction Returns ------- - start_to_closest_proton : float - Closest distance between the shower startpoint and proton points. + float + Shortest distance between the vertex and any shower point """ - vertex_dist = -np.inf - vertex = ia.vertex - vertex_dist = cdist(vertex.reshape(1, -1), shower_p.points) - vertex_dist = vertex_dist.min() - return vertex_dist - + # Compute distances from the vertex to all shower points, find minimum + dists = cdist(inter.vertex.reshape(-1, 3), shower.points) + + return dists.min() + @staticmethod - def convdist_vertex_startpoint(ia, shower_p): - """Helper function to compute the closest distance - between the vertex and predicted shower startpoint. + def get_protons_to_points(inter, shower): + """Helper function to compute the closest distance between any + proton/pion point and any shower point. Parameters ---------- - ia : RecoInteraction - Reco interaction to apply the conversion distance cut. - shower_p : RecoParticle - Member particle of the interaction, assumed to be the primary - electron/gamma shower. + inter : RecoInteraction + Reco interaction providing the vertex + shower : RecoParticle + Shower particle in the interaction Returns ------- - start_to_closest_proton : float - Closest distance between the shower startpoint and proton points. + float + Shortest distance between the shower and proton/pion points """ - vertex_dist = -np.inf - vertex = ia.vertex - vertex_dist = np.linalg.norm(vertex - shower_p.start_point) - return vertex_dist - - -class ShowerMultiArmCheck(PostBase): - """Check whether given primary electron candidate is likely - to be a merged multi-particle shower. - - This processor computes direction vectors of the shower points - from the shower start and performs DBSCAN clustering on the unit sphere - using the cosine distance metric. If there are more than one cluster that - has a mean direction vector outside a certain angular threshold, the - shower is considered to be a multi-arm shower and is rejected as - a valid primary electron candidate. - - NOTE: This processor can only change reco electron shower pid to - photon pid depending on the angle threshold. + # Fetch the list of points associated with primary pions or protons + proton_points = [] + for part in inter.particles: + if part.pid in (PION_PID, PROT_PID) and part.is_primary: + proton_points.append(part.points) + + # If there is no pion/proton point, return dummy value + if len(proton_points) == 0: + return -1 + + # Compute conversion distance + proton_points = np.vstack(proton_points) + dists = cdist(proton_points, shower.points) + + return dists.min() + + +class ShowerStartMergeProcessor(PostBase): + """Merge shower start if it was classified as track points. + + This post-processor is used to patch upstream semantic segmentation mistakes + where part of the shower trunk is assigned to track points and subsequantly + classified as a separate particle. """ # Name of the post-processor (as specified in the configuration) - name = 'shower_multi_arm_check' + name = 'shower_start_merge' + + # Set of post-processors which must be run before this one is + _upstream = ('direction',) - # Alternative allowed names of the post-processor - aliases = ('shower_multi_arm',) - - def __init__(self, threshold=70, min_samples=20, eps=0.02): - """Specify the threshold for the number of arms of showers. + def __init__(self, angle_threshold=0.175, distance_threshold=1.0, + track_length_limit=50, track_dedx_limit=None, **kwargs): + """Store the shower start merging parameters. Parameters ---------- - threshold : float, default 70 (deg) - If the electron shower's leading and subleading angle are - separated by more than this, the shower is considered to be - invalid and its PID will be changed to PHOT_PID. - min_samples : int, default 20 - The number of samples (or total weight) in a neighborhood - for a point to be considered as a core point (DBSCAN). - eps : float, default 0.02 - Maximum distance between two samples for one to be considered - as in the neighborhood of the other (DBSCAN). + angle_threshold : float, default 0.175 + Track and shower angular agreement threshold to be merged (radians) + distance_threshold : float, default 1.0 + Track and shower distance threshold to be merged (cm) + track_length_limit : int, default 50 + Maximum track length allowed to mitigate merging legitimate tracks + track_dedx_limit : float, optional + Maximum track dE/dx allowed to mitigate proton merging (MeV/cm) """ + # Initialize the parent class super().__init__('interaction', 'reco') - - self.threshold = threshold - self.min_samples = min_samples - self.eps = eps - + + # Store the shower start merging parameters + self.angle_threshold = abs(np.cos(angle_threshold)) + self.distance_threshold = distance_threshold + self.track_length_limit = track_length_limit + self.track_dedx_limit = track_dedx_limit + def process(self, data): - """Update reco interaction topologies using the shower multi-arm check. + """Merge split shower starts for all particles in one entry. Parameters ---------- data : dict - Dictionaries of data products + Dictionary of data products + + Returns + ------- + List[RecoParticle] + Updated set of particles (interactions modified in place) """ # Loop over the reco interactions - for ia in data['reco_interactions']: - # Loop over particles, select the ones that pass a threshold - for p in ia.particles: - if p.pid == ELEC_PID and p.is_primary and (p.shape == 0): - angle = self.compute_angular_criterion(p, ia.vertex, - eps=self.eps, - min_samples=self.min_samples) - p.shower_split_angle = angle - if angle > self.threshold: - p.pid = PHOT_PID - - @staticmethod - def compute_angular_criterion(p, vertex, eps, min_samples): - """Compute the angular criterion for the given primary electron shower. + particles = ObjectList([], default=RecoParticle()) + offset = 0 + for inter in data['reco_interactions']: + # Fetch the leading shower in the interaction, skip if None + shower = inter.leading_shower + if shower is None: + for part in inter.particles: + part.id = offset + offset += 1 + + particles.extend(inter.particles) + inter.particle_ids = np.array( + [part.id for part in inter.particles]) + continue + + # Merge tracks which fit the criteria into the leading shower + merge_ids = [] + for part in inter.particles: + # Only check mergeability of primary tracks + if part.shape == TRACK_SHP and part.is_primary: + if self.check_merge(shower, part): + shower.merge(part) + merge_ids.append(part.id) + + # Update the particle list of the interaction + new_particles = [] + for part in inter.particles: + if part.id not in merge_ids: + part.id = offset + new_particles.append(part) + offset += 1 + + particles.extend(new_particles) + inter.particles = new_particles + inter.particle_ids = np.array( + [part.id for part in new_particles]) + + return {'reco_particles': particles} + + def check_merge(self, shower, track): + """Check if a shower and a track can be merged. Parameters ---------- - p : RecoParticle - Primary electron shower to check for multi-arm. - vertex : np.ndarray - Vertex of the interaction with shape (3, ) - eps : float - Maximum distance between two samples for one to be considered - as in the neighborhood of the other (DBSCAN). - min_samples : int - The number of samples (or total weight) in a neighborhood - for a point to be considered as a core point (DBSCAN). + shower : RecoParticle + Shower particle to merge the track into + track : RecoParticle + Track particle that may be merged into the shower Returns ------- - max_angle : float - Maximum angle between the mean cluster direction vectors - of the shower points (degrees) + bool + Whether the track can be merged into the shower or not """ - points = p.points - depositions = p.depositions - - # Draw vector from startpoint to all - v = points - vertex - v_norm = np.linalg.norm(v, axis=1) - # If all vectors are zero, return 0 - if (v_norm > 0).sum() == 0: - return 0 - # Normalize the vectors - directions = v[v_norm > 0] / v_norm[v_norm > 0].reshape(-1, 1) - - # Filter out points that give zero vectors - points = points[v_norm > 0] - depositions = depositions[v_norm > 0] - - # If there are no valid directions, return 0 - if directions.shape[0] < 1: - return 0 - - # Run DBSCAN clustering on the unit sphere - model = DBSCAN(eps=eps, - min_samples=min_samples, - metric='cosine').fit(directions) - clusts, counts = np.unique(model.labels_, return_counts=True) - perm = np.argsort(counts)[::-1] - clusts, counts = clusts[perm], counts[perm] - - vecs = [] - for i, c in enumerate(clusts): - # Skip noise points that have cluster label -1 - if c == -1: continue - # Compute the mean direction vector of the cluster - v = directions[model.labels_ == c].mean(axis=0) - vecs.append(v / np.linalg.norm(v)) - if len(vecs) == 0: - return 0 - vecs = np.vstack(vecs) - cos_dist = cosine_similarity(vecs) - # max_angle ranges from 0 (parallel) to 2 (antiparallel) - max_angle = np.clip((1.0 - cos_dist).max(), a_min=0, a_max=2) - max_angle_deg = np.rad2deg(np.arccos(1 - max_angle)) - # counts = counts[1:] - return max_angle_deg - - -class ShowerStartpointCorrectionProcessor(PostBase): - """Correct the startpoint of the primary EM shower by - finding the closest point to the vertex. + # Check if the track is not too long to be merged + if track.length > self.track_length_limit: + return False + + # Check that the track dE/dx is not too large to be merged + if self.track_dedx_limit is not None: + dedx = cluster_dedx( + track.points, track.depositions, track.start_point) + if dedx > self.track_dedx_limit: + return False + + # Check that the angular agreement between particles is sufficient + angular_sep = abs(np.dot(track.start_dir, shower.start_dir)) + if angular_sep < self.angle_threshold: + return False + + # Check that the distance between particles is not too large + dist = cdist(shower.points, track.points).min() + if dist > self.distance_threshold: + return False + + # If all checks passed, return True + return True + + +class ShowerStartCorrectionProcessor(PostBase): + """Correct the start point of primary EM showers by finding the closest + point to the vertex. + + This post-processor is used to patch upstream point proposal mistakes + where the shower start point is not placed correctly. """ # Name of the post-processor (as specified in the configuration) - name = 'showerstart_correction_processor' + name = 'shower_start_correction' - # Alternative allowed names of the post-processor - aliases = ('reco_shower_startpoint_correction',) - - def __init__(self, threshold=1.0): - """Specify the EM shower conversion distance threshold and - the type of vertex to use for the distance calculation. + # Set of post-processors which must be run before this one is + _upstream = ('direction',) + + def __init__(self, update_directions=True, radius=-1, optimize=True): + """Store the shower start corrector parameters. Parameters ---------- - threshold : float, default -1.0 - If EM shower has a conversion distance greater than this, - its PID will be changed to PHOT_PID. + update_directions : bool, default True + If `True`, the direction of showers for which the start point + has been updated are recomputed with the updated start point + radius : float, default -1 + Radius around the start voxel to include in the direction estimate + optimize : bool, default True + Optimize the number of points involved in the direction estimate """ + # Initialize the parent class super().__init__('interaction', 'reco') - self.threshold = threshold - + + # Store start corrector parameters + self.update_directions = update_directions + self.radius = radius + self.optimize = optimize + def process(self, data): - """Update the shower startpoint using the closest point to the vertex. + """Update the shower start point using the closest point to the vertex. Parameters ---------- @@ -323,40 +341,58 @@ def process(self, data): Dictionaries of data products """ # Loop over the reco interactions - for ia in data['reco_interactions']: - vertex = ia.vertex - for p in ia.particles: - if (p.shape == SHOWR_SHP) and (p.is_primary): - new_point = self.correct_shower_startpoint(p, ia) - p.start_point = new_point - - - @staticmethod - def correct_shower_startpoint(shower_p, ia): - """Function to correct the shower startpoint by finding the closest - point to the vertex. + for inter in data['reco_interactions']: + # Loop over the particles that make up the interaction + for part in inter.particles: + # If the particle PID is not a shower or not primary, skip + if part.shape != SHOWR_SHP or not part.is_primary: + continue + + # Find the new start point of a shower + new_start = self.correct_shower_start(inter, part) + + # If requested and needed, update the shower direction + if (self.update_directions and + (new_start != part.start_point).any()): + part.start_dir = cluster_direction( + part.points, new_start, max_dist=self.radius, + optimize=self.optimize) + + # Store the new start position + part.start_point = new_start + + @staticmethod + def correct_shower_start(inter, shower): + """Function to correct the shower start point by finding the closest + point to any primary track in the image. Parameters ---------- - shower_p : RecoParticle - Primary EM shower to correct the startpoint. - ia : RecoInteraction - Reco interaction to use as the vertex estimate. + inter : RecoInteraction + Reco interaction to provide track points + shower : RecoParticle + Primary EM shower to find the shower start point for Returns ------- guess : np.ndarray - (3, ) array of the corrected shower startpoint. + (3, ) array of the corrected shower start point. """ - track_points = [p.points for p in ia.particles if p.shape == TRACK_SHP and p.is_primary] - if track_points == []: - return shower_p.start_point - + # Get the set of primary track points + track_points = [] + for part in inter.particles: + if part.shape == TRACK_SHP and part.is_primary: + track_points.append(part.points) + + # If there are no track points in the interaction, keep existing + if len(track_points) == 0: + return shower.start_point + + # Update the shower start point by finding the consitituent point + # closest to any of the track points track_points = np.vstack(track_points) - dist = cdist(shower_p.points.reshape(-1, 3), track_points.reshape(-1, 3)) - min_dist = dist.min() - closest_idx, _ = np.where(dist == min_dist) - if len(closest_idx) == 0: - return shower_p.start_point - guess = shower_p.points[closest_idx[0]] - return guess + dists = np.min(cdist(shower.points, track_points), axis=1) + closest_idx = np.argmin(dists) + start_point = shower.points[closest_idx] + + return start_point diff --git a/spine/post/reco/topology.py b/spine/post/reco/topology.py new file mode 100644 index 00000000..31cf2947 --- /dev/null +++ b/spine/post/reco/topology.py @@ -0,0 +1,349 @@ +"""Particle topology module.""" + +import numpy as np +from scipy.stats import pearsonr +from sklearn.decomposition import PCA + +from spine.utils.globals import PHOT_PID, ELEC_PID +from spine.utils.gnn.cluster import cluster_dedx, cluster_dedx_dir + +from spine.post.base import PostBase + +__all__ = ['ParticleStartDEDXProcessor', 'ParticleStartStraightnessProcessor', + 'ParticleSpreadProcessor'] + + +class ParticleStartDEDXProcessor(PostBase): + """Compute the dE/dx of the particle start by summing the energy depositions + along the particle start and dividing by the total length of the start. + """ + + # Name of the post-processor (as specified in the configuration) + name = 'start_dedx' + + # List of recognized dE/dx computation modes + _modes = ('default', 'direction') + + def __init__(self, radius=3.0, anchor=False, mode='direction', + include_pids=(PHOT_PID, ELEC_PID), include_secondary=False): + """Store the particle start dE/dx reconstruction parameters. + + Parameters + ---------- + radius : float, default 3.0 + Radius around the start point to include in the dE/dx calculation. + anchor : bool, default False + If True, anchor the start point to the particle cluster. This + is irrelevant for reconstruction particles (always anchored) + mode : str, default 'direction' + Method to use for dE/dx calculation. + include_pids : list, default [0, 1] + Particle species to compute the start dE/dx for + include_secondary : bool, default False + If `True`, computes the start dE/dx for secondary particles + """ + # Initialize the parent class + super().__init__('particle', 'reco') + + # Check that the dE/dx mode is valid, store + assert mode in self._modes, ( + f"dE/dx computation mode not recognized: {mode}. Should be " + f"one of {self._modes}") + self.mode = mode + + # Store the dE/dx computation parameters + self.radius = radius + self.anchor = anchor + self.include_pids = include_pids + self.include_secondary = include_secondary + + # If the method involves the direction, must run the direction PP + if mode == 'direction': + self._upstream = ('direction',) + + def process(self, data): + """Compute the start dE/dx for all particles in one entry. + + Parameters + ---------- + data : dict + Dictionaries of data products + """ + # Loop over the reco particles + for part in data['reco_particles']: + # If the particle is a secondary and they are to be skipped, do + if not self.include_secondary and not part.is_primary: + continue + + # If the particle PID is not in the required list, skip + if part.pid not in self.include_pids: + continue + + # Compute the particle start dE/dx + if self.mode == 'default': + # Use all depositions within a radius of the particle start + part.start_dedx = cluster_dedx( + part.points, part.depositions, part.start_point, + max_dist=self.radius, anchor=self.anchor) + + else: + # Use the particle direction estimate as a guide + part.start_dedx = cluster_dedx_dir( + part.points, part.depositions, part.start_point, + part.start_dir, max_dist=self.radius, + anchor=self.anchor)[0] + + +class ParticleStartStraightnessProcessor(PostBase): + """Compute the relative straightness of the particle start. + + The start straightness is evaluated by computing the covariance matrix of + the points in a neighborhood around the start of a particle and by + estimating which fraction of that variance is explained by the first + principal component of the decomposition. + """ + + # Name of the post-processor (as specified in the configuration) + name = 'start_straightness' + + def __init__(self, radius=3.0, n_components=3, + include_pids=(PHOT_PID, ELEC_PID), include_secondary=False): + """Store the particle start straightness reconstruction parameters. + + Parameters + ---------- + radius : float, default 3.0 + Radius around the start point to include in the straightness calculation. + n_components : int, default 3 + Number of components to compute the PCA for + include_pids : list, default [0, 1] + Particle species to compute the start dE/dx for + include_secondary : bool, default False + If `True`, computes the start dE/dx for secondary particles + """ + # Initialize the parent class + super().__init__('particle', 'reco') + + # Check that the number of components make sense, initialize PCA + assert n_components > 0 and n_components <= 3, ( + "The number of PCA components should be at least 1 and at most " + "3 (the dimensionality of 3D points).") + self.n_components = n_components + self.pca = PCA(n_components=n_components) + + # Store the dE/dx computation parameters + self.radius = radius + self.include_pids = include_pids + self.include_secondary = include_secondary + + def process(self, data): + """Compute the start straightness for all particles in one entry. + + Parameters + ---------- + data : dict + Dictionaries of data products + """ + # Loop over the reco particles + for part in data['reco_particles']: + # If the particle is a secondary and they are to be skipped, do + if not self.include_secondary and not part.is_primary: + continue + + # If the particle PID is not in the required list, skip + if part.pid not in self.include_pids: + continue + + # Compute the particle start straightness + part.start_straightness = self.get_start_straightness(part) + + def get_start_straightness(self, particle): + """Evaluates the straightness of the start of a particle by computing + the PCA principal explained variance ratio. + + Parameters + ---------- + particle : RecoParticle + Reconstructed particle instance + + Returns + ------- + float + The first explained variance ratio of the PCA of the shower start + """ + # Restrict the points to those around the start of the particle + dists = np.linalg.norm(particle.points - particle.start_point, axis=1) + points = particle.points[dists < self.radius] + + # Compute the explained variance ratio of the principal component + if len(points) < self.n_components: + return -1. + else: + return self.pca.fit(points).explained_variance_ratio_[0] + + +class ParticleSpreadProcessor(PostBase): + """Compute the directional and axial spreads of a particle. + + The directional spread is obtained by computing the mean direction and the + weighted average cosine distance with respect to the mean direction. + + The axial spread is obtained by computing the pearson R correlation + coefficient between the distance of the shower points from the startpoint + along the shower axis and the perpendicular distance from the shower axis. + """ + + # Name of the post-processor (as specified in the configuration) + name = 'particle_spread' + + # List of recognized start reference modes for spread evaluation + _start_modes = ('vertex', 'start_point') + + # Set of post-processors which must be run before this one is + _upstream = ('direction',) + + def __init__(self, start_mode='vertex', length_scale=14.0, use_start_dir=False, + include_pids=(PHOT_PID, ELEC_PID), include_secondary=False): + """Store the particle spread reconstruction parameters. + + Parameters + ---------- + start_mode : str, default 'vertex' + Point used as a start point to evaluate the spread (can be one + of 'vertex' or 'start_point') + length_scale : float, default 14.0 + Length scale used to weight the directional spread estimator + use_start_dir : bool, default False + If `True`, use the direction estimate of the particle to compute + the axial spread. If `False`, the direction is estimated using PCA + include_pids : list, default [0, 1] + Particle species to compute the start dE/dx for + include_secondary : bool, default False + If `True`, computes the start dE/dx for secondary particles + """ + # Initialize the parent class + super().__init__('interaction', 'reco') + + # Check that the start point mode is valid, store + assert start_mode in self._start_modes, ( + f"Spread start computation mode not recognized: {mode}. " + f"Should be one of {self._modes}") + self.start_mode = start_mode + + # Store the spread computation parameters + self.length_scale = length_scale + self.use_start_dir = use_start_dir + self.include_pids = include_pids + self.include_secondary = include_secondary + + # If needed, initialize PCA + if not use_start_dir: + self.pca = PCA(n_components=3) + + def process(self, data): + """Compute the shower spread for all particles in one entry. + + Parameters + ---------- + data : dict + Dictionaries of data products + """ + # Loop over the reco interactions + for inter in data['reco_interactions']: + # Loop over the particles that make up the interaction + for part in inter.particles: + # If the particle is a secondary and they are to be skipped, do + if not self.include_secondary and not part.is_primary: + continue + + # If the particle PID is not in the required list, skip + if part.pid not in self.include_pids: + continue + + # Fetch the appropriate reference point + if self.start_mode == 'vertex': + ref_point = inter.vertex + else: + ref_point = part.start_point + + # Compute the spreads + part.directional_spread = self.get_dir_spread( + part.points, ref_point) + part.axial_spread = self.get_axial_spread( + part.points, part.start_dir, ref_point) + + def get_dir_spread(self, points, ref_point): + """Compute the directional spread of the particle by computing the mean + direction and the weighted average cosine distance with respect to it. + + Parameters + ---------- + points : np.ndarray + (N, 3) Particle point coordinates + ref_point : np.ndarray + (3) Reference point + + Returns + ------- + float + Directional spread of the particle + """ + # Compute the distance from the reference point to all particle points + dists = np.linalg.norm(points - ref_point, axis=1) + + # Restrict points to those that do not coincide with the reference point + index = np.where(dists > 0)[0] + if len(index) == 0: + return -1. + points, dists = points[index], dists[index] + + # Compute the exponential-weighted directional spread + directions = (points - ref_point)/dists.reshape(-1, 1) + weights = np.clip(np.exp(-dists/self.length_scale), min=1e-6) + mean_direction = np.average(directions, weights=weights, axis=0) + mean_direction /= np.linalg.norm(mean_direction) + cosine = 1 - np.dot(directions, mean_direction) + spread = np.average(cosine, weights=weights) + + return spread + + def get_axial_spread(self, points, start_dir, ref_point): + """Compute the axial spread of a particle by evaluating the pearson R + correlation coefficient between the distance of the particle points from + the start point along the particle axis and the perpendicular distance + from the particle axis. + + Parameters + ---------- + points : np.ndarray + (N, 3) Particle point coordinates + start_dir : np.ndarray + (3) Start direction of the particle + ref_point : np.ndarray + (3) Reference point + + Returns + ------- + float + Axial spread of the particle + """ + # If there are less than 3 points, one cannot compute this quantity + if len(points) < 3: + return -np.inf + + # Fetch the particle direction + if self.use_start_dir: + v0 = start_dir + else: + v0 = self.pca.fit(points).components_[0] + if np.dot(start_dir, v0) < 0.: + v0 *= -1 + + # Compute the axial spread + dists = np.linalg.norm(points - ref_point, axis=1) + v = ((ref_point - points) + - (np.sum((ref_point - points) * v0, axis=1, keepdims=True) + * np.broadcast_to(v0, points.shape))) + perps = np.linalg.norm(v, axis=1) + + return pearsonr(dists, perps)[0] diff --git a/spine/post/reco/tracking.py b/spine/post/reco/tracking.py index 7d62ddbe..fe59e14d 100644 --- a/spine/post/reco/tracking.py +++ b/spine/post/reco/tracking.py @@ -7,12 +7,10 @@ from spine.utils.globals import ( SHOWR_SHP, TRACK_SHP, MUON_PID, PION_PID, PROT_PID, KAON_PID) from spine.utils.energy_loss import csda_table_spline -from spine.utils.gnn.cluster import cluster_dedx from spine.utils.tracking import get_track_length from spine.post.base import PostBase -__all__ = ['CSDAEnergyProcessor', 'TrackValidityProcessor', - 'TrackShowerMergerProcessor'] +__all__ = ['CSDAEnergyProcessor'] class CSDAEnergyProcessor(PostBase): @@ -104,227 +102,3 @@ def process(self, data): if self.fill_per_pid: for pid in self.include_pids: obj.csda_ke_per_pid[pid] = 0. - - -class TrackValidityProcessor(PostBase): - """Check if track is valid based on the proximity to reconstructed vertex. - Can also reject small tracks that are close to the vertex (optional). - """ - - # Name of the post-processor (as specified in the configuration) - name = 'track_validity' - - # Alternative allowed names of the post-processor - aliases = ('track_validity_processor',) - - def __init__(self, threshold=3., ke_threshold=50., - check_small_track=False, **kwargs): - """Initialize the track validity post-processor. - - Parameters - ---------- - threshold : float, default 3.0 - Vertex distance above which a track is not considered a primary - ke_theshold : float, default 50.0 - Kinetic energy threshold below which a track close to the vertex - is deemed not primary/not valid - check_small_track : bool, default False - Whether or not to apply the small track KE cut - """ - # Initialize the parent class - super().__init__('interaction', 'reco') - - self.threshold = threshold - self.ke_threshold = ke_threshold - self.check_small_track = check_small_track - - def process(self, data): - """Loop through reco interactions and modify reco particle's - primary label based on the proximity to reconstructed vertex. - - Parameters - ---------- - data : dict - Dictionary of data products - """ - # Loop over particle objects - for ia in data['reco_interactions']: - for p in ia.particles: - if p.shape == TRACK_SHP and p.is_primary: - # Check vertex attachment - dist = np.linalg.norm(p.points - ia.vertex, axis=1) - p.vertex_distance = dist.min() - if p.vertex_distance > self.threshold: - p.is_primary = False - # Check if track is small and within radius from vertex - if self.check_small_track: - if ((dist < self.threshold).all() - and p.ke < self.ke_threshold): - p.is_valid = False - p.is_primary = False - - -class TrackShowerMergerProcessor(PostBase): - """Merge tracks into showers based on a set of selection criteria. - """ - - # Name of the post-processor (as specified in the configuration) - name = 'merge_track_to_shower' - - # Alternative allowed names of the post-processor - aliases = ('track_shower_merger',) - - def __init__(self, angle_threshold=10, adjacency_threshold=0.5, - dedx_threshold=-1, track_length_limit=50, **kwargs): - """Post-processor to merge tracks into showers. - - Parameters - ---------- - angle_threshold : float, default 0.95 - Check if track and shower cosine similarity is greater than this value. - adjacency_threshold : float, default 0.5 - Check if track and shower is within this threshold distance. - dedx_limit : int, default -1 - Check if the track dedx is below this value, - to avoid merging protons. - track_length_limit : int, default 40 - Check if track length is below this value, - to avoid merging long tracks. - """ - # Initialize the parent class - super().__init__('interaction', 'reco') - - self.angle_threshold = abs(np.cos(np.radians(angle_threshold))) - self.adjacency_threshold = adjacency_threshold - self.dedx_threshold = dedx_threshold - self.track_length_limit = track_length_limit - - def process(self, data): - """Loop over the reco interactions and merge tracks into showers, - if they pass the selection criteria. - - Parameters - ---------- - data : dict - Dictionary of data products - """ - # Loop over the reco interactions - interactions = [] - for ia in data['reco_interactions']: - # Leading shower and its ke - shower_p = None - shower_p_max_ke = 0 - # Loop over particles, select the ones that pass a threshold - for p in ia.particles: - if p.is_primary and p.shape == SHOWR_SHP: - if p.ke > shower_p_max_ke: - shower_p = p - shower_p_max_ke = p.ke - if shower_p is None: - interactions.append(ia) - continue - new_particles = [] - for p in ia.particles: - if p.shape == TRACK_SHP and p.is_primary: - should_merge = check_merge(p, shower_p, - angle_threshold=self.angle_threshold, - adjacency_threshold=self.adjacency_threshold, - dedx_limit=self.dedx_threshold, - track_length_limit=self.track_length_limit) - if should_merge: - merge_track_to_shower(shower_p, p) - p.is_valid = False - else: - new_particles.append(p) - else: - new_particles.append(p) - if len(ia.particles) != len(new_particles): - ia.particles = new_particles - interactions.append(ia) - - data['reco_interactions'] = interactions - - -def merge_track_to_shower(p1, p2): - """Merge a track p2 into shower p1. - - Parameters - ---------- - p1 : RecoParticle - Shower particle to merge p1 into. - p2 : RecoParticle - Track particle p2 that will be merged into p1. - """ - # Sanity checks - assert p1.shape == SHOWR_SHP - assert p2.shape == TRACK_SHP - - # Stack the two particle array attributes together - for attr in ['index', 'depositions']: - val = np.concatenate([getattr(p1, attr), getattr(p2, attr)]) - setattr(p1, attr, val) - for attr in ['points', 'sources']: - val = np.vstack([getattr(p1, attr), getattr(p2, attr)]) - setattr(p1, attr, val) - - # Select track startpoint as new startpoint - p1.start_point = np.copy(p2.start_point) - p1.calo_ke = p1.calo_ke + p2.ke - - # If one of the two particles is a primary, the new one is - p1.is_primary = max(p1.is_primary, p2.is_primary) - if p2.primary_scores[-1] > p1.primary_scores[-1]: - p1.primary_scores = p2.primary_scores - - -def check_merge(p_track, p_shower, angle_threshold=0.95, - adjacency_threshold=0.5, dedx_limit=-1, track_length_limit=40): - """Check if a track and a shower can be merged. - - Parameters - ---------- - p_track : RecoParticle - Track particle that will be merged into the shower. - p_shower : RecoParticle - Shower particle to merge the track into. - angle_threshold : float, default 0.95 - Check if track and shower cosine distance is greater than this value. - adjacency_threshold : float, default 0.5 - Check if track and shower is within threshold distance. - dedx_limit : int, default -1 - Check if the track dedx is below this value, - to avoid merging protons. - track_length_limit : int, default 40 - Check if track length is below this value, - to avoid merging long tracks. - - Returns - ------- - result : bool - True if the track and shower can be merged, False otherwise. - """ - - check_direction = False - check_adjacency = False - check_dedx = True - check_track_energy = False - - angular_sep = abs(np.sum(p_track.start_dir * p_shower.start_dir)) - - if angular_sep > angle_threshold: - check_direction = True - - if cdist(p_shower.points.reshape(-1, 3), - p_track.points.reshape(-1, 3)).min() < adjacency_threshold: - check_adjacency = True - - dedx = cluster_dedx(p_track.points, p_track.depositions, p_track.start_point) - if dedx > dedx_limit: - check_dedx = False - if p_track.length < track_length_limit: - check_track_energy = True - - result = (check_dedx and check_direction and - check_adjacency and check_track_energy) - - return result diff --git a/spine/post/reco/vertex.py b/spine/post/reco/vertex.py index 84fb173e..1929efdd 100644 --- a/spine/post/reco/vertex.py +++ b/spine/post/reco/vertex.py @@ -19,6 +19,9 @@ class VertexProcessor(PostBase): # Alternative allowed names of the post-processor aliases = ('reconstruct_vertex',) + # Set of post-processors which must be run before this one is + _upstream = ('direction',) + def __init__(self, include_shapes=(SHOWR_SHP, TRACK_SHP), use_primaries=True, update_primaries=False, anchor_vertex=True, touching_threshold=2.0, @@ -37,8 +40,8 @@ def __init__(self, include_shapes=(SHOWR_SHP, TRACK_SHP), anchor_vertex : bool, default True If true, anchor the candidate vertex to particle objects, with the expection of interactions only composed of showers - touching_threshold : float, default 2 cm - Maximum distance for two track points to be considered touching + touching_threshold : float, default 2.0 + Maximum distance for two track points to be considered touching (cm) angle_threshold : float, default 0.3 radians Maximum angle between the vertex-to-start-point vector and a shower direction to consider that a shower originated from the vertex @@ -117,6 +120,6 @@ def reconstruct_vertex_single(self, inter): part.is_primary = True elif part.shape == SHOWR_SHP: vec = part.start_point - inter.vertex - cos = np.dot(vec/np.linalg.norm(vec), part.start_dir) - if cos < self.angle_threshold: + cosang = np.dot(vec/np.linalg.norm(vec), part.start_dir) + if np.arccos(cosang) < self.angle_threshold: part.is_primary = True diff --git a/spine/post/metric/__init__.py b/spine/post/truth/__init__.py similarity index 76% rename from spine/post/metric/__init__.py rename to spine/post/truth/__init__.py index 3690b546..fe860c1f 100644 --- a/spine/post/metric/__init__.py +++ b/spine/post/truth/__init__.py @@ -1,3 +1,4 @@ """Reconstruction quality evaluation module.""" +from .label import * from .match import * diff --git a/spine/post/reco/label.py b/spine/post/truth/label.py similarity index 100% rename from spine/post/reco/label.py rename to spine/post/truth/label.py diff --git a/spine/post/metric/match.py b/spine/post/truth/match.py similarity index 100% rename from spine/post/metric/match.py rename to spine/post/truth/match.py diff --git a/spine/utils/calib/manager.py b/spine/utils/calib/manager.py index c6cf6a01..c49a8549 100644 --- a/spine/utils/calib/manager.py +++ b/spine/utils/calib/manager.py @@ -1,6 +1,8 @@ """Loads all requested calibration modules and executes them in the appropriate sequence.""" +from copy import deepcopy + import numpy as np from spine.utils.geo import Geometry @@ -41,6 +43,7 @@ def __init__(self, geometry, gain_applied=False, **cfg): self.watch.initialize(key) # Add necessary geometry information + value = deepcopy(value) if key != 'recombination': value['num_tpcs'] = self.geo.tpc.num_chambers else: @@ -112,8 +115,6 @@ def __call__(self, points, values, sources=None, run_id=None, track=None, # Apply the transparency correction if 'transparency' in self.modules: - assert run_id is not None, ( - "Must provide a run ID to get the transparency map.") self.watch.start('transparency') tpc_values = self.modules['transparency'].process( tpc_points, tpc_values, t, run_id) # ADC diff --git a/spine/utils/calib/transparency.py b/spine/utils/calib/transparency.py index ed20dc74..b029656f 100644 --- a/spine/utils/calib/transparency.py +++ b/spine/utils/calib/transparency.py @@ -62,6 +62,9 @@ def process(self, points, values, tpc_id, run_id): if self.run_id is not None: run_id = self.run_id + assert run_id is not None, ( + "Must provide a run ID to get the transparency map.") + # Get the appropriate transparency map for this run transparency_lut = self.transparency[run_id] diff --git a/spine/utils/cuda.py b/spine/utils/cuda.py index 2eca9c72..21aab127 100644 --- a/spine/utils/cuda.py +++ b/spine/utils/cuda.py @@ -32,8 +32,8 @@ def set_visible_devices(gpus=None, world_size=None, **kwargs): len(gpus) == world_size), ( f"The world size ({world_size}) does not match the " f"number of exposed GPUs ({len(gpus)}).") - world_size = world_size or len(gpus) - gpus = gpus or list(range(world_size)) + world_size = world_size if world_size is not None else len(gpus) + gpus = gpus if gpus is not None else list(range(world_size)) # Set the visible CUDA devices if not os.environ.get('CUDA_VISIBLE_DEVICES', None) and gpus is not None: diff --git a/spine/utils/gnn/cluster.py b/spine/utils/gnn/cluster.py index f2debfad..08de61a5 100644 --- a/spine/utils/gnn/cluster.py +++ b/spine/utils/gnn/cluster.py @@ -1052,7 +1052,7 @@ def cluster_direction(voxels: nb.float64[:,:], @numbafy(cast_args=['data', 'starts'], list_args=['clusts'], keep_torch=True, ref_arg='data') -def get_cluster_dedxs(data, starts, clusts, max_dist=-1): +def get_cluster_dedxs(data, starts, clusts, max_dist=-1, anchor=False): """Computes the initial local dE/dxs of each cluster. Parameters @@ -1065,6 +1065,8 @@ def get_cluster_dedxs(data, starts, clusts, max_dist=-1): (C) List of cluster indexes max_dist : float, default -1 Neighborhood radius around the point used to compute the dE/dx + anchor : bool, default False + If true, anchor the start point to the closest cluster point Returns ------- @@ -1075,21 +1077,23 @@ def get_cluster_dedxs(data, starts, clusts, max_dist=-1): return np.empty(0, dtype=data.dtype) return _get_cluster_dedxs( - data[:, COORD_COLS], data[:, VALUE_COL], starts, clusts, max_dist) + data[:, COORD_COLS], data[:, VALUE_COL], + starts, clusts, max_dist, anchor) @nb.njit(parallel=True, cache=True) def _get_cluster_dedxs(voxels: nb.float64[:,:], values: nb.float64[:], starts: nb.float64[:,:], clusts: nb.types.List(nb.int64[:]), - max_dist: nb.float64 = -1) -> nb.float64[:,:]: + max_dist: nb.float64 = -1, + anchor: nb.boolean = False) -> nb.float64[:,:]: dedxs = np.empty(len(clusts), voxels.dtype) ids = np.arange(len(clusts)).astype(np.int64) for k in nb.prange(len(clusts)): dedxs[k] = cluster_dedx( voxels[clusts[ids[k]]], values[clusts[ids[k]]], - starts[k].astype(np.float64), max_dist) + starts[k].astype(np.float64), max_dist, anchor) return dedxs @@ -1098,7 +1102,8 @@ def _get_cluster_dedxs(voxels: nb.float64[:,:], def cluster_dedx(voxels: nb.float64[:,:], values: nb.float64[:], start: nb.float64[:], - max_dist: nb.float64=5.0) -> nb.float64[:]: + max_dist: nb.float64=5.0, + anchor: nb.boolean=False) -> nb.float64[:]: """Computes the initial local dE/dx of a cluster. Parameters @@ -1107,33 +1112,120 @@ def cluster_dedx(voxels: nb.float64[:,:], (N, 3) Voxel coordinates values : np.ndarray (N) Voxel values - starts : np.ndarray + start : np.ndarray (3) Start point w.r.t. which to compute the local dE/dx max_dist : float, default 5.0 Neighborhood radius around the point used to compute the dE/dx + anchor : bool, default False + If true, anchor the start point to the closest cluster point Returns ------- float Local dE/dx value around the start point """ - # If max_dist is set, limit the set of voxels to those within a sphere of radius max_dist + # Sanity check assert voxels.shape[1] == 3, ( "The shape of the input is not compatible with voxel coordinates.") - dist_mat = nbl.cdist(start.reshape(1,-1), voxels).flatten() - if max_dist > 0: - voxels = voxels[dist_mat <= max_dist] - if len(voxels) < 2: + # If necessary, anchor start point to the closest cluster point + if anchor: + dists = nbl.cdist(start.reshape(1, -1), voxels).flatten() + start = voxels[np.argmin(dists)].astype(start.dtype) # Dirty + + # If max_dist is set, limit the set of voxels to those within a sphere of + # radius max_dist around the start point + dists = nbl.cdist(start.reshape(1, -1), voxels).flatten() + if max_dist > 0.: + index = np.where(dists <= max_dist)[0] + if len(index) < 2: return 0. - values = values[dist_mat <= max_dist] - dist_mat = dist_mat[dist_mat <= max_dist] + + values, dists = values[index], dists[index] # Compute the total energy in the neighborhood and the max distance, return ratio - if np.max(dist_mat) == 0.: + if np.max(dists) == 0.: return 0. - return np.sum(values)/np.max(dist_mat) + return np.sum(values)/np.max(dists) + + +@nb.njit(cache=True) +def cluster_dedx_dir(voxels: nb.float64[:,:], + values: nb.float64[:], + start: nb.float64[:], + start_dir: nb.float64[:], + max_dist: nb.float64=3.0, + anchor: nb.boolean=False) -> nb.float64[:]: + """Computes the initial local dE/dx of a cluster by leveraging an + existing cluster direction estimate. + + Parameters + ---------- + voxels : np.ndarray + (N, 3) Voxel coordinates + values : np.ndarray + (N) Voxel values + start : np.ndarray + (3) Start point w.r.t. which to compute the local dE/dx + start_dir : np.ndarray + (3) Start direction of the cluster + max_dist : float, default 5.0 + Neighborhood radius around the point used to compute the dE/dx + + Returns + ------- + float + Local dE/dx value around the start point + float + Energy deposited around the start point (dE) + float + Length around the start point (dx) + float + Relative spread around the cluster direction (quality metric) + int + Number of voxels in the neighborhood around the start poin + """ + # Sanity check + assert voxels.shape[1] == 3, ( + "The shape of the input is not compatible with voxel coordinates.") + + # If necessary, anchor start point to the closest cluster point + if anchor: + dists = nbl.cdist(start.reshape(1, -1), voxels).flatten() + start = voxels[np.argmin(dists)].astype(start.dtype) # Dirty + + # If max_dist is set, limit the set of voxels to those within a sphere of + # radius max_dist around the start point + dists = nbl.cdist(start.reshape(1, -1), voxels).flatten() + if max_dist > 0.: + index = np.where(dists <= max_dist)[0] + if len(index) < 2: + return 0., 0., 0., -1., len(index) + + voxels, values, dists = voxels[index], values[index], dists[index] + + # Project the voxels within the sphere onto the reco direction + voxels_proj = np.dot(voxels - start, start_dir) + + # Mask the voxels to only include the top hemisphere + index = np.where((voxels_proj >= -1e-3) & (voxels_proj <= max_dist))[0] + if len(index) < 2: + return 0., 0., 0., -1., len(index) + + voxels, voxels_proj, values = voxels[index], voxels_proj[index], values[index] + + # Compute length as the length along the start direction + dE = np.sum(values) + dx = np.max(voxels_proj) - np.min(voxels_proj) + + # Calculate the mean spread from the reco direction (quality metric) + voxels_sp = voxels - start + vectors_to_axis = voxels_sp - np.outer(voxels_proj, start_dir) + spreads = np.sqrt(np.sum(vectors_to_axis**2, axis=1)) + spread = np.sum(spreads)/len(index) + + return dE/dx, dE, dx, spread, len(index) @numbafy(cast_args=['data'], list_args=['clusts'], diff --git a/spine/utils/vertex.py b/spine/utils/vertex.py index 156ca47a..2524513f 100644 --- a/spine/utils/vertex.py +++ b/spine/utils/vertex.py @@ -247,3 +247,39 @@ def get_pseudovertex(start_points: nb.float32[:,:], pseudovtx = np.linalg.pinv(S) @ C return pseudovtx + + +@nb.njit(cache=True) +def get_weighted_pseudovertex(start_points: nb.float32[:,:], + directions: nb.float32[:,:], + weights: nb.float32[:], + dim: int = 3) -> nb.float32[:]: + """Finds the vertex which minimizes the total distance from itself to all + the lines defined by the start points of particles and their directions. + + Parameters + ---------- + start_points : np.ndarray + (P, 3) Particle start points + directions : np.ndarray + (P, 3) Particle directions + dim : int + Number of dimensions + """ + assert len(start_points), ( + "Cannot reconstruct pseudovertex without points.") + + if len(start_points) == 1: + return start_points[0] + + pseudovtx = np.zeros((dim, ), dtype=start_points.dtype) + S = np.zeros((dim, dim), dtype=start_points.dtype) + C = np.zeros((dim, ), dtype=start_points.dtype) + + for i, (p, d) in enumerate(zip(start_points, directions)): + S += weights[i] * (np.outer(d, d) - np.eye(dim, dtype=start_points.dtype)) + C += weights[i] * (np.outer(d, d) - np.eye(dim, dtype=start_points.dtype)) @ np.ascontiguousarray(p) + + pseudovtx = np.linalg.pinv(S) @ C + + return pseudovtx diff --git a/spine/vis/__init__.py b/spine/vis/__init__.py index 17c7baea..fc6bb735 100644 --- a/spine/vis/__init__.py +++ b/spine/vis/__init__.py @@ -4,6 +4,7 @@ from .geo import GeoDrawer from .train import TrainDrawer from .point import scatter_points +from .arrow import scatter_arrows from .cluster import scatter_clusters from .box import scatter_boxes from .particle import scatter_particles diff --git a/spine/vis/arrow.py b/spine/vis/arrow.py new file mode 100644 index 00000000..52fe63d3 --- /dev/null +++ b/spine/vis/arrow.py @@ -0,0 +1,89 @@ +"""Module to draw 3D arrows.""" + +import time + +import numpy as np +from plotly import graph_objs as go + +from .point import scatter_points + + +def scatter_arrows(points, directions, length=10.0, tip_ratio=0.25, color=None, + hovertext=None, line=None, linewidth=5, name=None): + """Converts a list of points and directions into a set of arrows. + + Parameters + ---------- + points : np.ndarray + (N, 3) Array of point coordinates + directions : np.ndarray + (N, 3) Array of arrow direction vectors + length : float, default 5.0 + Length of the arrows + tip_ratio : float, defautl 0.05 + Relative arrow tip size w.r.t. its full length + color : Union[str, float], optional + Color of the arrow + hovertext : Union[int, str, np.ndarray], optional + Text associated with the arrow + line : dict, optional + Arrow trunk line property dictionary + linewidth : float, default 2 + Width of the arrow trunk lines + name : name + Name of the traces + """ + # Process color and hovertext information for the arrows + color_trunks, hovertext_trunks = color, hovertext + if color is not None and not np.isscalar(color): + color_trunks = np.repeat(color, 3) + + hovertext_arrows = [] + for i in range(len(directions)): + vx, vy, vz = directions[i] + ht = f'vx: {vx:0.3f}
vy: {vy:0.3f}
vz: {vz:0.3f}' + if hovertext is not None: + if np.isscalar(hovertext): + ht += f'
{hovertext}' + else: + ht += f'
{hovertext[i]}' + + hovertext_arrows.append(ht) + + hovertext_trunks = np.repeat(hovertext_arrows, 3) + + legendgroup = 'group_' + str(time.time()) + + # Initialize the arrow trunks + vertices = np.empty((0, 3), dtype=points.dtype) + if len(points) > 0: + vertices = [] + for point, direction in zip(points, directions): + vertices.extend([point, point + length*direction, [None, None, None]]) + + vertices = np.vstack(vertices) + + traces = scatter_points( + vertices, color=color_trunks, hovertext=hovertext_trunks, + line=line, linewidth=linewidth, mode='lines', + hovertemplate='%{text}', name=name, legendgroup=legendgroup) + + # Process color information for the arrow tips + colorscale = None + if color is not None and isinstance(color, str): + colorscale = [(0, color), (1, color)] + else: + colorscale = [(0, 'black'), (1, 'black')] + + # Intitialize the arrow tips + ends = points + (1 - tip_ratio/2)*length*directions + directions = tip_ratio*length*directions + traces += [go.Cone( + x=ends[:, 0], y=ends[:, 1], z=ends[:, 2], + u=directions[:, 0], v=directions[:, 1], w=directions[:, 2], + showscale=False, showlegend=False, sizemode='raw', + colorscale=colorscale, hovertext=hovertext_arrows, + hovertemplate='%{hovertext}', name=name, legendgroup=legendgroup)] + + # Return + return traces diff --git a/spine/vis/out.py b/spine/vis/out.py index 3ac60e31..bdb276a7 100644 --- a/spine/vis/out.py +++ b/spine/vis/out.py @@ -10,6 +10,7 @@ from .geo import GeoDrawer from .point import scatter_points +from .arrow import scatter_arrows from .cluster import scatter_clusters from .layout import ( layout3d, dual_figure3d, PLOTLY_COLORS_WGRAY, HIGH_CONTRAST_COLORS) @@ -157,9 +158,9 @@ def get_index(self, obj): return getattr(obj, self.truth_index_mode) def get(self, obj_type, attr=None, color_attr=None, draw_raw=False, - draw_end_points=False, draw_vertices=False, draw_flashes=False, - synchronize=False, titles=None, split_traces=False, - matched_flash_only=True): + draw_end_points=False, draw_directions=False, draw_vertices=False, + draw_flashes=False, synchronize=False, titles=None, + split_traces=False, matched_flash_only=True): """Draw the requested object type with the requested mode. Parameters @@ -175,6 +176,8 @@ def get(self, obj_type, attr=None, color_attr=None, draw_raw=False, If `True`, add a trace which corresponds to the raw depositions draw_end_points : bool, default False If `True`, draw the fragment or particle end points + draw_directions : bool, default False + If `True`, draw the fragment or particle start directions draw_vertices : bool, default False If `True`, draw the interaction vertices draw_flashes : bool, default False @@ -222,6 +225,14 @@ def get(self, obj_type, attr=None, color_attr=None, draw_raw=False, traces[prefix] += self._start_point_trace(obj_name) traces[prefix] += self._end_point_trace(obj_name) + # Fetch the directions, if requested + if draw_directions: + assert obj_name != 'interactions', ( + "Interactions do not have direction attributes.") + for prefix in self.prefixes: + obj_name = f'{prefix}_{obj_type}' + traces[prefix] += self._direction_trace(obj_name) + # Fetch the vertices, if requested if draw_vertices: for prefix in self.prefixes: @@ -465,9 +476,9 @@ def _start_point_trace(self, obj_name, color='black', markersize=7, ---------- obj_name : str Name of the object to draw - color : Union[str, np.ndarray], optional + color : Union[str, np.ndarray], default 'black' Color of markers/lines or (N) list of color of markers/lines - markersize : float, default 5 + markersize : float, default 7 Marker size marker_symbol : float, default 'circle' Marker style @@ -491,9 +502,9 @@ def _end_point_trace(self, obj_name, color='black', markersize=7, ---------- obj_name : str Name of the object to draw - color : Union[str, np.ndarray], optional + color : Union[str, np.ndarray], default 'black' Color of markers/lines or (N) list of color of markers/lines - markersize : float, default 5 + markersize : float, default 7 Marker size marker_symbol : float, default 'circle-open' Marker style @@ -517,7 +528,7 @@ def _vertex_trace(self, obj_name, vertex_attr='vertex', color='green', ---------- obj_name : str Name of the object to draw - color : Union[str, np.ndarray], optional + color : Union[str, np.ndarray], default 'green' Color of markers/lines or (N) list of color of markers/lines markersize : float, default 10 Marker size @@ -579,6 +590,48 @@ def _point_trace(self, obj_name, point_attr, **kwargs): return scatter_points( points, hovertext=np.array(hovertext), name=name, **kwargs) + def _direction_trace(self, obj_name, color='black', **kwargs): + """Scatters a set of discrete points per object instance. + + Parameters + ---------- + obj_name : str + Name of the object to draw + color : Union[str, np.ndarray], default 'black' + Color of markers/lines or (N) list of color of markers/lines + **kwargs : dict, optional + List of additional arguments to pass to :func:`scatter_arrows` + + Returns + ------- + list + List of point traces + """ + # Define the name of the trace + name = ' '.join(obj_name.split('_')).capitalize()[:-1] + ' directions' + + # Fetch the direction of each object + obj_type = obj_name.split('_')[-1][:-1].capitalize() + point_list, dir_list, hovertext = [], [], [] + for i, obj in enumerate(self.data[obj_name]): + # Skip empty true objects + if obj.is_truth and not len(getattr(obj, self.truth_index_mode)): + continue + + # Append the direction of this object and the label + point_list.append(obj.start_point) + dir_list.append(obj.start_dir) + hovertext.append(f'{obj_type} {i} direction') + + points, dirs = np.empty((0, 3)), np.empty((0, 3)) + if len(point_list): + points = np.vstack(point_list) + dirs = np.vstack(dir_list) + + return scatter_arrows( + points, dirs, hovertext=np.array(hovertext), name=name, + color=color, **kwargs) + def _flash_trace(self, obj_name, matched_only, **kwargs): """Draw the cumlative PEs of flashes that have been matched to interactions specified by `obj_name`.