Clustering method as callable.

klapo · mtezzele · commit 4d896cc2e0cb · 2024-03-07T14:29:53.000-06:00
- Default is MiniKMeansBatch in both mrCOSTS/COSTS
- Adopted review suggestions
- Added doc string
- Cached class indexing for reconstruction.
diff --git a/pydmd/costs.py b/pydmd/costs.py
@@ -2,7 +2,7 @@
 from pydmd.bopdmd import BOPDMD
 from .utils import compute_rank, compute_svd
 import copy
-from sklearn.cluster import KMeans
+from sklearn.cluster import MiniBatchKMeans
 from sklearn.metrics import silhouette_score
 import matplotlib.pyplot as plt
 import xarray as xr
@@ -258,7 +258,6 @@ def build_windows(data, window_length, step_size, integer_windows=False):
         :type integer_windows: bool
         :return:
         """
-
         if integer_windows:
             n_split = np.floor(data.shape[1] / window_length).astype(int)
         else:
@@ -329,11 +328,9 @@ def _build_initizialization(self):
         :return: First guess of eigenvalues
         :rtype: numpy.ndarray or None
         """
-        # If not initial values are provided return None by default.
-        init_alpha = None
         # User provided initial eigenvalues.
         if self._initialize_artificially and self._init_alpha is not None:
-            init_alpha = self._init_alpha
+            return self._init_alpha
         # Initial eigenvalue guesses from kmeans clustering.
         elif (
             self._initialize_artificially
@@ -347,6 +344,7 @@ def _build_initizialization(self):
             init_alpha = init_alpha * np.tile(
                 [1, -1], int(self._svd_rank / self._n_components)
             )
+            return init_alpha
         # The user accidentally provided both methods of initializing the eigenvalues.
         elif (
             self._initialize_artificially
@@ -356,8 +354,9 @@ def _build_initizialization(self):
             raise ValueError(
                 "Only one of `init_alpha` and `cluster_centroids` can be provided"
             )
-
-        return init_alpha
+        # If not initial values are provided return None by default.
+        else:
+            return None
 
     def fit(
         self,
@@ -516,7 +515,9 @@ def fit(
             self._amplitudes_array[
                 k, : optdmd.eigs.shape[0]
             ] = optdmd.amplitudes
-            self._window_means_array[k] = c.flatten()
+            self._window_means_array[k] = np.mean(
+                data_window, 1, keepdims=True
+            ).flatten()
             self._time_array[k] = original_time_window
 
             # Reset optdmd between iterations
@@ -549,16 +550,14 @@ def get_window_indices(self, k):
         if k == self._n_slides - 1 and self._non_integer_n_slide:
             return slice(-self._window_length, None)
         else:
-            return slice(
-                sample_start, sample_start + self._window_length
-            )
+            return slice(sample_start, sample_start + self._window_length)
 
     def cluster_omega(
         self,
         n_components,
         kmeans_kwargs=None,
         transform_method=None,
-        method="KMeans",
+        method=MiniBatchKMeans,
     ):
         """Clusters fitted eigenvalues into frequency bands by the imaginary component.
 
@@ -567,35 +566,33 @@ def cluster_omega(
         :type method: str
         :param n_components: Hyperparameter for k-means clustering, number of clusters.
         :type n_components: int
-        :param kmeans_kwargs: Arguments for KMeans clustering.
+        :param kmeans_kwargs: Arguments for KMeans clustering. The default is
+            random_state = 0.
         :type kmeans_kwargs: dict
         :param transform_method: How to transform omega. See docstring for valid options.
         :type transform_method: str or NoneType
         :return:
         """
         # Reshape the omega array into a 1d array
-        omega_array = self.omega_array
-        n_slides = omega_array.shape[0]
-        svd_rank = omega_array.shape[1]
-        omega_rshp = omega_array.reshape(n_slides * svd_rank)
+        n_slides = self.omega_array.shape[0]
+        svd_rank = self.omega_array.shape[1]
+        omega_rshp = self.omega_array.reshape(n_slides * svd_rank)
         omega_transform = self.transform_omega(
             omega_rshp, transform_method=transform_method
         )
 
         if kmeans_kwargs is None:
+            kmeans_kwargs = {}
             random_state = 0
-            kmeans_kwargs = {
-                "random_state": random_state,
-            }
-        if method == "KMeans":
-            clustering = KMeans(n_clusters=n_components, **kmeans_kwargs)
-        elif method == "KMediods":
-            from sklearn_extra.cluster import KMedoids
-
-            clustering = KMedoids(n_clusters=n_components, **kmeans_kwargs)
-        else:
+            kmeans_kwargs["random_state"] = kmeans_kwargs.get(
+                "random_state", random_state
+            )
+        clustering = method(n_clusters=n_components, **kmeans_kwargs)
+        if not hasattr(clustering, "fit_predict") and callable(
+            getattr(clustering, "fit_predict")
+        ):
             raise ValueError(
-                "Unrecognized clustering method {}.".format(method)
+                "Clustering method must have `fit_predict()` method."
             )
 
         omega_classes = clustering.fit_predict(np.atleast_2d(omega_transform).T)
@@ -615,9 +612,7 @@ def cluster_omega(
         self._transform_method = transform_method
         self._n_components = n_components
 
-        return self
-
-    def transform_omega(self, omega_array, transform_method=None):
+    def transform_omega(self, omega_array, transform_method="absolute"):
         """Transform omega, primarily for clustering.
         Options for transforming omega are:
             "period": :math:`\\frac{1}{\\omega}`
@@ -633,18 +628,14 @@ def transform_omega(self, omega_array, transform_method=None):
         :rtype: numpy.ndarray
         """
         # Apply a transformation to omega to improve frequency band separation
-        if transform_method is None or transform_method == "absolute":
+        if transform_method == "absolute":
             omega_transform = np.abs(omega_array.imag.astype("float"))
             self._omega_label = r"$|\omega|$"
             self._hist_kwargs = {"bins": 64}
         # Outstanding question: should this be the complex conjugate or
         # the imaginary component squared?
         elif transform_method == "square_frequencies":
-            # omega_transform = (np.conj(omega_array) * omega_array).real.astype(
-            #     "float"
-            # )
             omega_transform = (omega_array.imag**2).real.astype("float")
-
             self._omega_label = r"$|\omega|^{2}$"
             self._hist_kwargs = {"bins": 64}
         elif transform_method == "log10":
@@ -654,17 +645,9 @@ def transform_omega(self, omega_array, transform_method=None):
             omega_transform[~np.isfinite(omega_transform)] = zero_imputer
             self._omega_label = r"$log_{10}(|\omega|)$"
             self._hist_kwargs = {"bins": 64}
-            #     {
-            #     "bins": np.linspace(
-            #         np.min(np.log10(omega_transform[omega_transform > 0])),
-            #         np.max(np.log10(omega_transform[omega_transform > 0])),
-            #         64,
-            #     )
-            # }
         elif transform_method == "period":
             omega_transform = 1 / np.abs(omega_array.imag.astype("float"))
             self._omega_label = "Period"
-            # @ToDo: Specify bins like in log10 transform
             self._hist_kwargs = {"bins": 64}
         else:
             raise ValueError(
@@ -716,7 +699,7 @@ def cluster_hyperparameter_sweep(
         )
 
         for nind, n in enumerate(n_components_range):
-            _ = self.cluster_omega(
+            self.cluster_omega(
                 n_components=n, transform_method=transform_method
             )
 
@@ -872,11 +855,12 @@ def scale_reconstruction(
                 (self._n_components, self._n_data_vars, self._window_length)
             )
             for j in np.unique(self._omega_classes):
+                class_index = classification == j
                 xr_sep_window[j] = np.linalg.multi_dot(
                     [
-                        w[:, classification == j],
-                        np.diag(b[classification == j]),
-                        np.exp(omega[classification == j] * t),
+                        w[:, class_index],
+                        np.diag(b[class_index]),
+                        np.exp(omega[class_index] * t),
                     ]
                 ).real
 
@@ -1222,7 +1206,6 @@ def plot_time_series(
             scale_reconstruction_kwargs = {}
         xr_sep = self.scale_reconstruction(**scale_reconstruction_kwargs)
 
-        # ToDo: Make these kwargs adjustable inputs.
         fig, axes = plt.subplots(
             nrows=self.n_components + 2,
             sharex=True,
diff --git a/pydmd/mrcosts.py b/pydmd/mrcosts.py
@@ -178,9 +178,7 @@ def n_components_global(self):
     # @ToDo: Use the class variable instead of passing it around
     @property
     def omega_classes_interpolated(self):
-        """
-
-        Note, this returns the multi-resolution interpolation of omega classes.
+        """Returns the multi-resolution interpolation of omega classes
 
         :return: Ints for each omega value indicating which cluster it belongs to.
         :rtype: list of numpy.ndarray
@@ -191,11 +189,9 @@ def omega_classes_interpolated(self):
 
     @property
     def ragged_omega_classes(self):
-        """
+        """Omega classes for each decomposition level after global clustering.
 
-        Note, this returns a list of ragged numpy arrays.
-
-        :return: Ints for each omega value indicating which cluster it belongs to.
+        :return: list of classes for each omega value for each decomposition level.
         :rtype: list of numpy.ndarray
         """
         if self._omega_classes is None:
@@ -204,7 +200,8 @@ def ragged_omega_classes(self):
 
     @property
     def ragged_omega_array(self):
-        """
+        """Omega values for each decomposition level.
+
         :return: list of omega arrays for each decomposition level.
         :rtype: list of numpy.ndarray
         """
@@ -216,7 +213,8 @@ def ragged_omega_array(self):
 
     @property
     def ragged_modes_array(self):
-        """
+        """Modes for each decomposition level.
+
         :return: list of modes arrays for each decomposition level.
         :rtype: list of numpy.ndarray
         """
@@ -228,7 +226,8 @@ def ragged_modes_array(self):
 
     @property
     def ragged_amplitudes_array(self):
-        """
+        """Amplitudes for each decomposition level.
+
         :return: list of amplitudes arrays for each decomposition level.
         :rtype: list of numpy.ndarray
         """
@@ -240,7 +239,8 @@ def ragged_amplitudes_array(self):
 
     @staticmethod
     def _data_shape(data):
-        """
+        """Give the data shape.
+
         :return: Shape of the data for fitting.
         :rtype: Tuple of ints
         """
@@ -695,7 +695,6 @@ def plot_local_time_series(
         fig, axes = self.costs_array[level].plot_time_series(
             space_index,
             x_iter,
-            # plot_kwargs=plot_kwargs,
             scale_reconstruction_kwargs=scale_reconstruction_kwargs,
         )
 
@@ -707,7 +706,8 @@ def global_cluster_hyperparameter_sweep(
         transform_method=None,
         score_method=None,
         verbose=True,
-        method=None,
+        method=MiniBatchKMeans,
+        kmeans_kwargs=None,
     ):
         """
         Hyperparameter search for n_components for kmeans clustering.
@@ -741,11 +741,9 @@ def global_cluster_hyperparameter_sweep(
                 n_components=n,
                 transform_method=transform_method,
                 method=method,
+                kmeans_kwargs=kmeans_kwargs,
             )
 
-            if verbose:
-                print("scoring")
-                print(np.unique(omega_classes.reshape(-1, 1)))
             if score_method is None or score_method == "silhouette":
                 score[nind] = silhouette_score(
                     omega.reshape(-1, 1),
@@ -770,7 +768,7 @@ def global_cluster_omega(
         n_components=None,
         transform_method=None,
         kmeans_kwargs=None,
-        method="KMeans",
+        method=MiniBatchKMeans,
     ):
         """Performs frequency band clustering on the global distribution of omega.
 
@@ -785,6 +783,8 @@ def global_cluster_omega(
         Default value is "absolute". All transformations and clustering are performed on
         the imaginary portion of omega.
 
+        :param method: Clustering method following the sklearn pattern (has `fit_predict`)
+            and `n_clusters` keyword.
         :param n_components: The number of clusters to find.
         :type n_components: int
         :param transform_method: How to transform omega. See docstring for valid options.
@@ -820,21 +820,17 @@ def global_cluster_omega(
         )
 
         if kmeans_kwargs is None:
+            kmeans_kwargs = {}
             random_state = 0
-            kmeans_kwargs = {
-                "random_state": random_state,
-            }
-        if method == "KMeans":
-            clustering = MiniBatchKMeans(
-                n_clusters=n_components, **kmeans_kwargs
+            kmeans_kwargs["random_state"] = kmeans_kwargs.get(
+                "random_state", random_state
             )
-        elif method == "KMediods":
-            from sklearn_extra.cluster import KMedoids
-
-            clustering = KMedoids(n_clusters=n_components, **kmeans_kwargs)
-        else:
+        clustering = method(n_clusters=n_components, **kmeans_kwargs)
+        if not hasattr(clustering, "fit_predict") and callable(
+            getattr(clustering, "fit_predict")
+        ):
             raise ValueError(
-                "Unrecognized clustering method {}.".format(method)
+                "Clustering method must have `fit_predict()` method."
             )
 
         omega_classes = clustering.fit_predict(np.atleast_2d(omega_array).T)
@@ -874,7 +870,6 @@ def transform_omega(omega_array, transform_method=None):
         if transform_method is None or transform_method == "absolute":
             omega_array = np.abs(omega_array.imag.astype("float"))
         elif transform_method == "square_frequencies":
-            # omega_array = (np.conj(omega_array) * omega_array).astype("float")
             omega_array = (omega_array.imag**2).real.astype("float")
         elif transform_method == "period":
             omega_array = 1 / np.abs(omega_array.imag.astype("float"))
@@ -963,11 +958,12 @@ def global_scale_reconstruction(
                     )
                 )
                 for j in np.arange(0, self._n_components_global):
+                    class_ind = classification == j
                     xr_sep_window[j, :, :] = np.linalg.multi_dot(
                         [
-                            w[:, classification == j],
-                            np.diag(b[classification == j]),
-                            np.exp(omega[classification == j] * t),
+                            w[:, class_ind],
+                            np.diag(b[class_ind]),
+                            np.exp(omega[class_ind] * t),
                         ]
                     )
 
diff --git a/tests/test_costs.py b/tests/test_costs.py
@@ -8,11 +8,11 @@
 
 def overlapping_oscillators():
     """
-    Given a time vector t_eval = t1, t2, ..., evaluates and returns
-    the snapshots z(t1), z(t2), ... as columns of the matrix Z.
-    Simulates data z given by the system of ODEs
-        z' = Az
-    where A = [1 -2; 1 -1] and z_0 = [1, 0.1].
+    Simulates a system with two oscillators with ocasionally overlapping
+    frequencies. This example was adapted from Dylewsky et al., 2019.
+
+    Oscillator #1: FitzHugh-Nagumo Model
+    Oscillator #2: Unforced Duffing Oscillator
     """
 
     def rhs_FNM(t, x, tau, a, b, Iext):
diff --git a/tests/test_mrcosts.py b/tests/test_mrcosts.py