From 6944dc097c61b211dee4dfc591cd7cd13f9d8a4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nahuel=20Unai=20Rosell=C3=B3=20Beneitez?= Date: Fri, 20 Jun 2025 02:43:03 -0400 Subject: [PATCH 1/3] Remove (0, 200) X axis boundaries This was probably enforced to deal with huge outliers, but they can't happen anymore (depending on the model) because the user has a choice to deal with the alignments that haven't finished. The plot is useless when dealing with values outside of that range, for instance when aligning with NNs (negative alignment score) --- corpus/filter.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/corpus/filter.py b/corpus/filter.py index d1e4b0e3..685680c1 100644 --- a/corpus/filter.py +++ b/corpus/filter.py @@ -285,8 +285,7 @@ def _plot(self, recording_dict: Dict[str, List[Tuple[str, float]]]): score_np = self._get_alignment_scores_array(recording_dict) # Before filtering. - np.clip(score_np, 0, 200, out=score_np) - plt.hist(score_np, bins=100, range=(0, 200)) + plt.hist(score_np, bins=100) plt.xlabel("Average Maximum-Likelihood Score") plt.ylabel("Number of Segments") plt.title("Histogram of Alignment Scores") From b2b8c75347328a81f71f477e870634e73133e64d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nahuel=20Unai=20Rosell=C3=B3=20Beneitez?= Date: Fri, 20 Jun 2025 02:43:14 -0400 Subject: [PATCH 2/3] Improve docstring --- corpus/filter.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/corpus/filter.py b/corpus/filter.py index 685680c1..185fcc41 100644 --- a/corpus/filter.py +++ b/corpus/filter.py @@ -272,10 +272,9 @@ def _write_output_segment_files(self, filtered_segments: List[str]): def _plot(self, recording_dict: Dict[str, List[Tuple[str, float]]]): """ - Plots an alignment score. + Plots the individual segment alignment scores. - Note: the plot only takes into account strictly positive values. - For more customizable plotting, it's suggested to use :class:`i6_core.mm.alignment.PlotAlignmentJob` instead. + :param recording_dict: Dictionary of recording full names to list of (segment full name, alignment score). """ import matplotlib import matplotlib.pyplot as plt From 8a61beb6da1d2ef30fcc14b21fabb7410f551fcd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nahuel=20Unai=20Rosell=C3=B3=20Beneitez?= Date: Fri, 20 Jun 2025 02:43:51 -0400 Subject: [PATCH 3/3] Add child plot for recording plot with overwritten behavior --- corpus/filter.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/corpus/filter.py b/corpus/filter.py index 185fcc41..defad19f 100644 --- a/corpus/filter.py +++ b/corpus/filter.py @@ -386,6 +386,26 @@ def _filter_segments( return filtered_segments + def _plot(self, recording_dict: Dict[str, List[Tuple[str, float]]]): + """ + Plots the average recording alignment scores. + + :param recording_dict: Dictionary of recording full names to list of (segment full name, alignment score). + """ + import matplotlib + import matplotlib.pyplot as plt + + matplotlib.use("Agg") + + score_np = self._get_alignment_scores_array(recording_dict) + + # Before filtering. + plt.hist(score_np, bins=100) + plt.xlabel("Average Maximum-Likelihood Score") + plt.ylabel("Number of Recordings") + plt.title("Histogram of Alignment Scores") + plt.savefig(fname=self.out_plot_avg.get_path()) + def run(self): # Alignments that haven't reached a final state can bias the mean computation, so they're removed. recording_dict = self._parse_alignment_logs(self.alignment_logs, remove_dnf_alignments=True)