Merge pull request #39 from michellab/bugfix-close-plots

Roy-Haolin-Du · web-flow · commit 863a03ceb2f5 · 2025-02-28T14:27:01.000Z
Ensure all matplotlib resources get closed after analysis
diff --git a/a3fe/analyse/plot.py b/a3fe/analyse/plot.py
@@ -544,6 +544,8 @@ def plot_equilibration_time(lam_windows: _List["LamWindows"], output_dir: str) -
         transparent=False,
     )
 
+    _plt.close(fig)
+
 
 def plot_overlap_mat(
     ax: _plt.Axes,
@@ -735,6 +737,8 @@ def plot_overlap_mats(
     )
     fig.savefig(name)
 
+    _plt.close(fig)
+
 
 def plot_convergence(
     fracts: _np.ndarray,
diff --git a/a3fe/run/stage.py b/a3fe/run/stage.py
@@ -9,6 +9,7 @@
 import threading as _threading
 from copy import deepcopy as _deepcopy
 from math import ceil as _ceil
+import matplotlib.pyplot as _plt
 from multiprocessing import get_context as _get_context
 from time import sleep as _sleep
 from typing import Any as _Any
@@ -771,171 +772,175 @@ def analyse(
                     "Despite equilibration being detected, no equilibration time was found."
                 )
 
-        if get_frnrg:
-            self._logger.info(
-                f"Computing free energy changes using the MBAR for runs {run_nos}"
-            )
+        try:  # Conduct analysis
+            if get_frnrg:
+                self._logger.info(
+                    f"Computing free energy changes using the MBAR for runs {run_nos}"
+                )
 
-            # Remove unequilibrated data from the equilibrated output directory
-            for win in self.lam_windows:
-                win._write_equilibrated_simfiles()
+                # Remove unequilibrated data from the equilibrated output directory
+                for win in self.lam_windows:
+                    win._write_equilibrated_simfiles()
+
+                # Run MBAR and compute mean and 95 % C.I. of free energy
+                if not slurm:
+                    free_energies, errors, mbar_outfiles, _ = _run_mbar(
+                        run_nos=run_nos,
+                        output_dir=self.output_dir,
+                        percentage_end=fraction * 100,
+                        percentage_start=0,
+                        subsampling=subsampling,
+                        equilibrated=True,
+                    )
+                else:
+                    jobs, mbar_outfiles, tmp_simfiles = _submit_mbar_slurm(
+                        output_dir=self.output_dir,
+                        virtual_queue=self.virtual_queue,
+                        run_nos=run_nos,
+                        run_somd_dir=self.input_dir,
+                        percentage_end=fraction * 100,
+                        percentage_start=0,
+                        subsampling=subsampling,
+                        equilibrated=True,
+                    )
 
-            # Run MBAR and compute mean and 95 % C.I. of free energy
-            if not slurm:
-                free_energies, errors, mbar_outfiles, _ = _run_mbar(
-                    run_nos=run_nos,
+                    free_energies, errors, *_ = _collect_mbar_slurm(
+                        output_dir=self.output_dir,
+                        run_nos=run_nos,
+                        jobs=jobs,
+                        mbar_out_files=mbar_outfiles,
+                        virtual_queue=self.virtual_queue,
+                        tmp_simfiles=tmp_simfiles,
+                    )
+
+                mean_free_energy = _np.mean(free_energies)
+                # Gaussian 95 % C.I.
+                conf_int = (
+                    _stats.t.interval(
+                        0.95,
+                        len(free_energies) - 1,
+                        mean_free_energy,
+                        scale=_stats.sem(free_energies),
+                    )[1]
+                    - mean_free_energy
+                )  # 95 % C.I.
+
+                # Write overall MBAR stats to file
+                with open(f"{self.output_dir}/overall_stats.dat", "a") as ofile:
+                    if get_frnrg:
+                        ofile.write(
+                            "###################################### Free Energies ########################################\n"
+                        )
+                        ofile.write(
+                            f"Mean free energy: {mean_free_energy: .3f} + /- {conf_int:.3f} kcal/mol\n"
+                        )
+                        for i in range(len(free_energies)):
+                            ofile.write(
+                                f"Free energy from run {i + 1}: {free_energies[i]: .3f} +/- {errors[i]:.3f} kcal/mol\n"
+                            )
+                        ofile.write(
+                            "Errors are 95 % C.I.s based on the assumption of a Gaussian distribution of free energies\n"
+                        )
+                        ofile.write(f"Runs analysed: {run_nos}\n")
+
+                # Plot overlap matrices and PMFs
+                _plot_overlap_mats(
                     output_dir=self.output_dir,
-                    percentage_end=fraction * 100,
-                    percentage_start=0,
-                    subsampling=subsampling,
-                    equilibrated=True,
+                    nlam=len(self.lam_windows),
+                    mbar_outfiles=mbar_outfiles,
                 )
-            else:
-                jobs, mbar_outfiles, tmp_simfiles = _submit_mbar_slurm(
+                _plot_mbar_pmf(mbar_outfiles, self.output_dir)
+                equilibrated_gradient_data = _GradientData(
+                    lam_winds=self.lam_windows, equilibrated=True
+                )
+                _plot_overlap_mats(
                     output_dir=self.output_dir,
-                    virtual_queue=self.virtual_queue,
-                    run_nos=run_nos,
-                    run_somd_dir=self.input_dir,
-                    percentage_end=fraction * 100,
-                    percentage_start=0,
-                    subsampling=subsampling,
-                    equilibrated=True,
+                    nlam=len(self.lam_windows),
+                    predicted=True,
+                    gradient_data=equilibrated_gradient_data,
                 )
 
-                free_energies, errors, *_ = _collect_mbar_slurm(
+            # Plot RMSDS
+            if plot_rmsds:
+                self._logger.info("Plotting RMSDs")
+                _plot_rmsds(
+                    lam_windows=self.lam_windows,
                     output_dir=self.output_dir,
-                    run_nos=run_nos,
-                    jobs=jobs,
-                    mbar_out_files=mbar_outfiles,
-                    virtual_queue=self.virtual_queue,
-                    tmp_simfiles=tmp_simfiles,
+                    selection="resname LIG and (not name H*)",
                 )
 
-            mean_free_energy = _np.mean(free_energies)
-            # Gaussian 95 % C.I.
-            conf_int = (
-                _stats.t.interval(
-                    0.95,
-                    len(free_energies) - 1,
-                    mean_free_energy,
-                    scale=_stats.sem(free_energies),
-                )[1]
-                - mean_free_energy
-            )  # 95 % C.I.
-
-            # Write overall MBAR stats to file
-            with open(f"{self.output_dir}/overall_stats.dat", "a") as ofile:
-                if get_frnrg:
-                    ofile.write(
-                        "###################################### Free Energies ########################################\n"
-                    )
-                    ofile.write(
-                        f"Mean free energy: {mean_free_energy: .3f} + /- {conf_int:.3f} kcal/mol\n"
-                    )
-                    for i in range(len(free_energies)):
-                        ofile.write(
-                            f"Free energy from run {i + 1}: {free_energies[i]: .3f} +/- {errors[i]:.3f} kcal/mol\n"
-                        )
-                    ofile.write(
-                        "Errors are 95 % C.I.s based on the assumption of a Gaussian distribution of free energies\n"
-                    )
-                    ofile.write(f"Runs analysed: {run_nos}\n")
-
-            # Plot overlap matrices and PMFs
-            _plot_overlap_mats(
-                output_dir=self.output_dir,
-                nlam=len(self.lam_windows),
-                mbar_outfiles=mbar_outfiles,
-            )
-            _plot_mbar_pmf(mbar_outfiles, self.output_dir)
+            # Analyse the gradient data and make plots
+            self._logger.info("Plotting gradients data")
             equilibrated_gradient_data = _GradientData(
-                lam_winds=self.lam_windows, equilibrated=True
+                lam_winds=self.lam_windows, equilibrated=True, run_nos=run_nos
             )
-            _plot_overlap_mats(
+            for plot_type in [
+                "mean",
+                "stat_ineff",
+                "integrated_sem",
+                "integrated_var",
+                "pred_best_simtime",
+            ]:
+                _plot_gradient_stats(
+                    gradients_data=equilibrated_gradient_data,
+                    output_dir=self.output_dir,
+                    plot_type=plot_type,
+                )
+            _plot_gradient_hists(
+                gradients_data=equilibrated_gradient_data,
                 output_dir=self.output_dir,
-                nlam=len(self.lam_windows),
-                predicted=True,
-                gradient_data=equilibrated_gradient_data,
+                run_nos=run_nos,
             )
-
-        # Plot RMSDS
-        if plot_rmsds:
-            self._logger.info("Plotting RMSDs")
-            _plot_rmsds(
-                lam_windows=self.lam_windows,
+            _plot_gradient_timeseries(
+                gradients_data=equilibrated_gradient_data,
                 output_dir=self.output_dir,
-                selection="resname LIG and (not name H*)",
+                run_nos=run_nos,
             )
 
-        # Analyse the gradient data and make plots
-        self._logger.info("Plotting gradients data")
-        equilibrated_gradient_data = _GradientData(
-            lam_winds=self.lam_windows, equilibrated=True, run_nos=run_nos
-        )
-        for plot_type in [
-            "mean",
-            "stat_ineff",
-            "integrated_sem",
-            "integrated_var",
-            "pred_best_simtime",
-        ]:
-            _plot_gradient_stats(
-                gradients_data=equilibrated_gradient_data,
-                output_dir=self.output_dir,
-                plot_type=plot_type,
+            # Make plots of equilibration time
+            self._logger.info("Plotting equilibration times")
+            _plot_equilibration_time(
+                lam_windows=self.lam_windows, output_dir=self.output_dir
             )
-        _plot_gradient_hists(
-            gradients_data=equilibrated_gradient_data,
-            output_dir=self.output_dir,
-            run_nos=run_nos,
-        )
-        _plot_gradient_timeseries(
-            gradients_data=equilibrated_gradient_data,
-            output_dir=self.output_dir,
-            run_nos=run_nos,
-        )
 
-        # Make plots of equilibration time
-        self._logger.info("Plotting equilibration times")
-        _plot_equilibration_time(
-            lam_windows=self.lam_windows, output_dir=self.output_dir
-        )
+            # Check and plot the Gelman-Rubin stat
+            rhat_dict = _check_equil_multiwindow_gelman_rubin(
+                lambda_windows=self.lam_windows, output_dir=self.output_dir
+            )
+            rhat_equil = {lam: rhat < 1.1 for lam, rhat in rhat_dict.items()}
+            for lam, equil in rhat_equil.items():
+                if not equil:
+                    self._logger.warning(
+                        f"The Gelman-Rubin statistic for lambda = {lam} is greater than 1.1. "
+                        "This suggests that the repeat simulations have not converged to the "
+                        "same distirbution and there is a sampling issue."
+                    )
 
-        # Check and plot the Gelman-Rubin stat
-        rhat_dict = _check_equil_multiwindow_gelman_rubin(
-            lambda_windows=self.lam_windows, output_dir=self.output_dir
-        )
-        rhat_equil = {lam: rhat < 1.1 for lam, rhat in rhat_dict.items()}
-        for lam, equil in rhat_equil.items():
-            if not equil:
-                self._logger.warning(
-                    f"The Gelman-Rubin statistic for lambda = {lam} is greater than 1.1. "
-                    "This suggests that the repeat simulations have not converged to the "
-                    "same distirbution and there is a sampling issue."
-                )
+            # Write out stats
+            with open(f"{self.output_dir}/overall_stats.dat", "a") as ofile:
+                for win in self.lam_windows:
+                    ofile.write(
+                        f"Equilibration time for lambda = {win.lam}: {win.equil_time:.3f} ns per simulation\n"
+                    )
+                    ofile.write(
+                        f"Total time simulated for lambda = {win.lam}: {win.sims[0].tot_simtime:.3f} ns per simulation\n"
+                    )
 
-        # Write out stats
-        with open(f"{self.output_dir}/overall_stats.dat", "a") as ofile:
-            for win in self.lam_windows:
-                ofile.write(
-                    f"Equilibration time for lambda = {win.lam}: {win.equil_time:.3f} ns per simulation\n"
-                )
-                ofile.write(
-                    f"Total time simulated for lambda = {win.lam}: {win.sims[0].tot_simtime:.3f} ns per simulation\n"
-                )
+            if get_frnrg:
+                self._logger.info(
+                    f"Overall free energy changes: {free_energies} kcal mol-1"
+                )  # type: ignore
+                self._logger.info(f"Overall errors: {errors} kcal mol-1")  # type: ignore
+                self._logger.info(f"Analysed runs: {run_nos}")
+                # Update the interally-stored results
+                self._delta_g = free_energies
+                self._delta_g_er = errors
+                return free_energies, errors  # type: ignore
+            else:
+                return None, None
 
-        if get_frnrg:
-            self._logger.info(
-                f"Overall free energy changes: {free_energies} kcal mol-1"
-            )  # type: ignore
-            self._logger.info(f"Overall errors: {errors} kcal mol-1")  # type: ignore
-            self._logger.info(f"Analysed runs: {run_nos}")
-            # Update the interally-stored results
-            self._delta_g = free_energies
-            self._delta_g_er = errors
-            return free_energies, errors  # type: ignore
-        else:
-            return None, None
+        finally:  # Ensure that all plotting resources are closed
+            _plt.close("all")
 
     def get_results_df(self, save_csv: bool = True) -> _pd.DataFrame:
         """
diff --git a/docs/CHANGELOG.rst b/docs/CHANGELOG.rst
@@ -6,6 +6,7 @@ Change Log
 ====================
 - Fix bug which caused somd.rst7 files in the ensemble equilibration directories to be incorrectly numbered in some cases.
 - Fix bug which caused the output directory to be incorrectly replaced with "output" in some cases.
+- Ensure that all plotting resources get closed after analysis to avoid continually increasing memory usage.
 
 0.3.1
 ====================

Original file line number	Diff line number	Diff line change
`@@ -544,6 +544,8 @@ def plot_equilibration_time(lam_windows: _List["LamWindows"], output_dir: str) -`
`544`	`544`	`transparent=False,`
`545`	`545`	`)`
`546`	`546`
	`547`	`+ _plt.close(fig)`
	`548`	`+`
`547`	`549`
`548`	`550`	`def plot_overlap_mat(`
`549`	`551`	`ax: _plt.Axes,`
`@@ -735,6 +737,8 @@ def plot_overlap_mats(`
`735`	`737`	`)`
`736`	`738`	`fig.savefig(name)`
`737`	`739`
	`740`	`+ _plt.close(fig)`
	`741`	`+`
`738`	`742`
`739`	`743`	`def plot_convergence(`
`740`	`744`	`fracts: _np.ndarray,`