Merge pull request #41 from ECCCO-mission/jmbhughes-patch

jmbhughes · web-flow · commit a4f29ff42087 · 2024-03-28T01:03:55.000-06:00
Add tqdm and row inversion mode
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,8 +1,22 @@
 # Changelog
 
+Follows [Keep A Changelog format](https://keepachangelog.com/en/1.1.0/)
+
+## 0.0.3
+
+### Added
+
+- Uses tqdm for progress bar tracking
+- Adds better row mode instead of starting chunked and instantly switching to row
+
+### Changed
+
+- Prints elapsed seconds as integer instead of float
+- Expands documentation for missing parameters
+
 ## 0.0.2
 
-### New features
+### Added
 
 - Writes scores to a text file
 
diff --git a/README.md b/README.md
@@ -16,7 +16,7 @@ for examples MaGIXS, CubIXSS, or ECCCO observations.
 
 ## Install
 
-`pip install overlappogram` or clone the repository and install manually
+`pip install overlappogram` or clone the repository and install manually.
 
 ## How to Use
 
@@ -25,7 +25,7 @@ for examples MaGIXS, CubIXSS, or ECCCO observations.
 `unfold ./path/to/config.toml`
 
 The `config.toml` file should be structured similar to the [example_config.toml](example_config.toml).
-We provide more description of the config file [in the documentation](https://eccco-mission.github.io/overlappogram/configuration.html). 
+We provide more description of the config file [in the documentation](https://eccco-mission.github.io/overlappogram/configuration.html).
 
 ## Getting Help
 
diff --git a/docs/conf.py b/docs/conf.py
@@ -9,7 +9,7 @@
 project = "overlappogram"
 copyright = "2024, J. Marcus Hughes, Dyana Beabout"
 author = "J. Marcus Hughes, Dyana Beabout"
-release = "0.0.2"
+release = "0.0.3"
 
 # -- General configuration ---------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
diff --git a/docs/configuration.rst b/docs/configuration.rst
@@ -3,7 +3,7 @@
 Configuration
 ==============
 
-Each run of `unfold` requires a configuration file in the `TOML format <https://toml.io/en/>`. The configuration file
+Each run of `unfold` requires a configuration file in the `TOML format <https://toml.io/en/>`_. The configuration file
 is divided into sections:
 
 - **paths**: provides the input file paths.
@@ -15,7 +15,7 @@ is divided into sections:
 .. note::
     All sections and parameters are expected in the configuration file. There are no optional parameters with defaults.
 
-`We provide an example configuration file here. <https://github.com/ECCCO-mission/overlappogram/blob/main/example_config.toml>`
+`We provide an example configuration file here. <https://github.com/ECCCO-mission/overlappogram/blob/main/example_config.toml>`_
 
 **paths** section
 ------------------
@@ -47,12 +47,12 @@ There are six configurables for this section:
 - *field_angle_range*: a list of two integers defining the range of field angles to use in inversion. Units are arc seconds.
 - *response_dependency_name*: for now, only "logt" is supported.
 - *response_dependency_list*: a list of floats defining the logarithm of the temperature used in the response dependency.
-- *smooth_over*: ?
+- *smooth_over*: the method of smoothing, currently only supports "dependence"
 
 **model** section
 -------------------
 
-This section defines the parameters used by `Scikit-Learn's ElasticNet <https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.ElasticNet.html>`.
+This section defines the parameters used by `Scikit-Learn's ElasticNet <https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.ElasticNet.html>`_.
 You can find more exhaustive descriptions of the parameters at that link in the `sklearn` documentation.
 
 There are six configurables for this section:
diff --git a/docs/overview.rst b/docs/overview.rst
@@ -35,10 +35,15 @@ Optimization modes
 
 There are three possible optimization modes: "row,", "chunked," and "hybrid."
 
+Row mode
++++++++++
 "row" is the simplest optimization mode and is recommended for newcomers. In this optimization mode, the overlappogram
 is divided into rows. Each row gets its own ElasticNet model to use when carrying out the inversion. Thus, each row is
 inverted independently.
 
+
+Chunked mode
++++++++++++++
 "chunked" is the next simplest optimization row. The image is divided into a number of *chunks*
 or sets of contiguous, non-overlapping rows.
 The number of chunks is set by the *num_threads* parameter in the **execution** section of the configuration file.
@@ -53,6 +58,8 @@ parameter in the **model** section of the configuration to true when used the "c
     We do not yet understand when this happens and doesn't happen.
     Thus, it is recommended to avoid chunked optimization unless you are confident.
 
+Hybrid mode
+++++++++++++
 "hybrid" is a combination of the "chunked" and "row" optimization modes. The optimization begins in chunked mode but
 switches to the row mode to optimize CPU performance. When inverting an overlappogram, some rows are harder to invert
 than others and thus take more time. These rows tend to be adjacent and thus in the same chunk. Consequently, we noticed
diff --git a/example_config.toml b/example_config.toml
@@ -12,14 +12,14 @@ overwrite = true
 
 [inversion]
 solution_fov_width = 2
-detector_row_range = [300, 400] #[0, 792]
+detector_row_range = [0, 50]
 field_angle_range = [-1227, 1227]
 response_dependency_name = "logt"
 response_dependency_list = [5.7, 5.8, 5.9, 6.0 , 6.1, 6.2, 6.3, 6.4, 6.5, 6.6, 6.7, 6.8]
 smooth_over = 'dependence'
 
 [model]
-alphas = [3E-5] #[0.2, 0.1, 0.01, 0.005]
+alphas = [3E-5, 4E-5, 0.1] #[0.2, 0.1, 0.01, 0.005]
 rhos = [0.1]
 warm_start = false
 tol = 1E-2
diff --git a/overlappogram/cli.py b/overlappogram/cli.py
@@ -21,7 +21,10 @@
 @click.command()
 @click.argument("config")
 def unfold(config):
-    """Unfold an overlappogram given a configuration toml file."""  # TODO improve message
+    """Unfold an overlappogram given a configuration toml file.
+
+    See https://eccco-mission.github.io/overlappogram/configuration.html for the configuration file format.
+    """
 
     with open(config) as f:
         config = toml.load(f)
@@ -42,6 +45,8 @@ def unfold(config):
 
     for alpha in config["model"]["alphas"]:
         for rho in config["model"]["rhos"]:
+            print(80*"-")
+            print(f"Beginning inversion for alpha={alpha}, rho={rho}.")
             start = time.time()
             em_cube, prediction, scores, unconverged_rows = inversion.invert(
                 overlappogram,
@@ -54,9 +59,9 @@ def unfold(config):
             )
             end = time.time()
             print(
-                f"Inversion Time for alpha={alpha}, rho={rho}:",
-                end - start,
-                f"; {len(unconverged_rows)} unconverged rows",
+                f"Inversion for alpha={alpha}, rho={rho} took",
+                int(end - start),
+                f"seconds; {len(unconverged_rows)} unconverged rows",
             )
 
             postfix = (
diff --git a/overlappogram/inversion.py b/overlappogram/inversion.py
@@ -9,6 +9,7 @@
 from ndcube import NDCube
 from sklearn.exceptions import ConvergenceWarning
 from sklearn.linear_model import ElasticNet
+from tqdm import tqdm
 
 from overlappogram.response import prepare_response_function
 
@@ -60,6 +61,8 @@ def __init__(
             response_dependency_list=response_dependency_list,
         )
 
+        self._progress_bar = None  # initialized in invert call
+
     @property
     def is_inverted(self) -> bool:
         return not any(
@@ -107,7 +110,8 @@ def _progress_indicator(self, future):
         with self._thread_count_lock:
             if not future.cancelled():
                 self._completed_row_count += 1
-                print(f"{self._completed_row_count / self.total_row_count * 100:3.0f}% complete", end="\r")
+                #print(f"{self._completed_row_count / self.total_row_count * 100:3.0f}% complete", end="\r")
+                self._progress_bar.update(1)
 
     def _switch_to_row_inversion(self, model_config, alpha, rho, num_row_threads=50):
         self._mode = InversionMode.ROW
@@ -161,6 +165,29 @@ def _collect_results(self, mode_switch_thread_count, model_config, alpha, rho):
                 self._switch_to_row_inversion(model_config, alpha, rho)
                 break
 
+    def _start_row_inversion(self, model_config, alpha, rho, num_threads):
+        self.executors = [concurrent.futures.ThreadPoolExecutor(max_workers=num_threads)]
+
+        self.futures = {}
+        self._models = []
+        for i, row_index in enumerate(range(self._detector_row_range[0], self._detector_row_range[1])):
+            enet_model = ElasticNet(
+                alpha=alpha,
+                l1_ratio=rho,
+                tol=model_config["tol"],
+                max_iter=model_config["max_iter"],
+                precompute=False,  # setting this to true slows down performance dramatically
+                positive=True,
+                copy_X=False,
+                fit_intercept=False,
+                selection=model_config["selection"],
+                warm_start=False,  # warm start doesn't make sense in the row version
+            )
+            self._models.append(enet_model)
+            future = self.executors[-1].submit(self._invert_image_row, row_index, i)
+            future.add_done_callback(self._progress_indicator)
+            self.futures[future] = (row_index, i)
+
     def _start_chunk_inversion(self, model_config, alpha, rho, num_threads):
         starts = np.arange(
             self._detector_row_range[0],
@@ -255,6 +282,8 @@ def invert(
 
         self._mode = mode
 
+        self._progress_bar = tqdm(total=self.total_row_count, unit='row', delay=1, leave=False)
+
         self._models = []
         self._completed_row_count = 0
 
@@ -269,16 +298,16 @@ def invert(
             # mode never switches since count=0
             self._collect_results(0, model_config, alpha, rho)
         elif self._mode == InversionMode.ROW:
-            self._start_chunk_inversion(model_config, alpha, rho, num_threads)
-            # TODO: it would be better to have a mode to start in row but right now we fake it with a fast mode switch
-            self._collect_results(np.inf, model_config, alpha, rho)  # immediately switch mode
+            self._start_row_inversion(model_config, alpha, rho, num_threads)
             self._collect_results(np.inf, model_config, alpha, rho)
         else:
             raise ValueError("Invalid InversionMode.")
 
         for executor in self.executors:
             executor.shutdown()
 
+        self._progress_bar.close()
+
         return (
             np.transpose(self._em_data, (2, 0, 1)),
             self._inversion_prediction,
diff --git a/overlappogram/spectral.py b/overlappogram/spectral.py
@@ -47,12 +47,8 @@ def create_spectrally_pure_images(image_list: list, gnt_path: str, rsp_dep_list:
             for index in range(len(image_list)):
                 # Create spectrally pure data cube.
                 for em_data in image_list:
-                    # with fits.open(image_list[index]) as em_hdul:
-                    # em_data_cube = em_hdul[0].data.astype(np.float64)
                     em_data_cube = em_data.astype(np.float64)
                     em_data_cube = np.transpose(em_data_cube, axes=(1, 2, 0))
-                    # em_dep_list = em_hdul[1].data['logt']
-                    # print(em_dep_list)
                     if index == 0:
                         image_height, num_slits, num_logts = np.shape(em_data_cube)
                         gnt_data_cube = np.zeros((image_height, num_slits, num_gnts), dtype=np.float64)
diff --git a/pyproject.toml b/pyproject.toml
@@ -5,7 +5,7 @@ requires = ["setuptools",
 
 [project]
 name = "overlappogram"
-version = "0.0.2"
+version = "0.0.3"
 dependencies = ["numpy",
     "astropy",
     "scikit-learn",
@@ -16,7 +16,8 @@ dependencies = ["numpy",
     "scipy",
     "ndcube",
     "toml",
-    "click"
+    "click",
+    "tqdm"
 ]
 requires-python = ">=3.9"
 authors = [