Merge pull request #326 from int-brain-lab/glm_improvements

oliche · web-flow · commit cfcb2583d862 · 2021-09-14T12:05:12.000+01:00
Bugfix to Sequential Selection of (G)LM features
diff --git a/brainbox/io/one.py b/brainbox/io/one.py
@@ -70,11 +70,11 @@ def load_channel_locations(eid, one=None, probe=None, aligned=False):
             counts = [0]
         else:
             tracing = [(insertions.get('json', {'temp': 0}).get('extended_qc', {'temp': 0}).
-                       get('tracing_exists', False))]
+                        get('tracing_exists', False))]
             resolved = [(insertions.get('json', {'temp': 0}).get('extended_qc', {'temp': 0}).
-                        get('alignment_resolved', False))]
+                         get('alignment_resolved', False))]
             counts = [(insertions.get('json', {'temp': 0}).get('extended_qc', {'temp': 0}).
-                      get('alignment_count', 0))]
+                       get('alignment_count', 0))]
         probe_id = [insertions['id']]
     # No specific probe specified, load any that is available
     # Need to catch for the case where we have two of the same probe insertions
@@ -420,7 +420,7 @@ def load_wheel_reaction_times(eid, one=None):
 
 
 def load_trials_df(eid, one=None, maxlen=None, t_before=0., t_after=0., ret_wheel=False,
-                   ret_abswheel=False, wheel_binsize=0.02, addtl_types=()):
+                   ret_abswheel=False, ext_DLC=False, wheel_binsize=0.02, addtl_types=[]):
     """
     TODO Test this with new ONE
     Generate a pandas dataframe of per-trial timing information about a given session.
@@ -451,6 +451,8 @@ def load_trials_df(eid, one=None, maxlen=None, t_before=0., t_after=0., ret_whee
         Whether to return the time-resampled wheel velocity trace, by default False
     ret_abswheel : bool, optional
         Whether to return the time-resampled absolute wheel velocity trace, by default False
+    ext_DLC : bool, optional
+        Whether to extract DLC data, by default False
     wheel_binsize : float, optional
         Time bins to resample wheel velocity to, by default 0.02
     addtl_types : list, optional
diff --git a/brainbox/modeling/linear.py b/brainbox/modeling/linear.py
@@ -59,16 +59,38 @@ def __init__(self, design_matrix, spk_times, spk_clu,
     def _fit(self, dm, binned, cells=None):
         """
         Fitting primitive that brainbox.NeuralModel.fit method will call
+
+        Parameters
+        ----------
+        dm : np.ndarray
+            Design matrix to use for fitting
+        binned : np.ndarray
+            Array of binned spike times. Must share first dimension with dm
+        cells : iterable with .shape attribute, optional
+            List of cells which are being fit. Use to generate index for output
+            coefficients and intercepts, must share shape with second dimension
+            of binned. When None will default to a list of all cells in the model object,
+            by default None
+
+        Returns
+        -------
+        coefs, pd.Series
+            Series containing fit coefficients for cells
+        intercepts, pd.Series
+            Series containing intercepts for fits.
         """
         if cells is None:
             cells = self.clu_ids.flatten()
+        if cells.shape[0] != binned.shape[1]:
+            raise ValueError('Length of cells does not match shape of binned')
+
         coefs = pd.Series(index=cells, name='coefficients', dtype=object)
         intercepts = pd.Series(index=cells, name='intercepts')
 
         lm = self.estimator.fit(dm, binned)
         weight, intercept = lm.coef_, lm.intercept_
         for cell in cells:
-            cell_idx = np.argwhere(self.clu_ids == cell)[0, 0]
+            cell_idx = np.argwhere(cells == cell)[0, 0]
             coefs.at[cell] = weight[cell_idx, :]
             intercepts.at[cell] = intercept[cell_idx]
         return coefs, intercepts
@@ -84,7 +106,6 @@ def score(self):
         """
         if not hasattr(self, 'coefs'):
             raise AttributeError('Model has not been fit yet.')
-
         testmask = np.isin(self.design.trlabels, self.testinds).flatten()
         dm, binned = self.design[testmask, :], self.binnedspikes[testmask]
 
diff --git a/brainbox/modeling/neural_model.py b/brainbox/modeling/neural_model.py
@@ -103,7 +103,7 @@ def __init__(self, design_matrix, spk_times, spk_clu,
         self.design = design_matrix
         self.spikes = spks
         self.clu = clu
-        self.clu_ids = np.argwhere(np.sum(trialspiking, axis=0) > mintrials)
+        self.clu_ids = np.argwhere(np.sum(trialspiking, axis=0) > mintrials).flatten()
         self.traininds = traininds
         self.testinds = testinds
         self.stepwise = stepwise
diff --git a/brainbox/modeling/poisson.py b/brainbox/modeling/poisson.py
@@ -38,16 +38,19 @@ def _fit(self, dm, binned, cells=None, noncovwarn=False):
         alpha : float
             Regularization strength, applied as multiplicative constant on ridge regularization.
         cells : list
-            List of cells which should be fit. If None is passed, will default to fitting all cells
-            in clu_ids
+            List of cells labels for columns in binned. Will default to all cells in model if None
+            is passed. Must be of the same length as columns in binned. By default None.
         """
         if cells is None:
             cells = self.clu_ids.flatten()
+        if cells.shape[0] != binned.shape[1]:
+            raise ValueError('Length of cells does not match shape of binned')
+
         coefs = pd.Series(index=cells, name='coefficients', dtype=object)
         intercepts = pd.Series(index=cells, name='intercepts')
         nonconverged = []
         for cell in tqdm(cells, 'Fitting units:', leave=False):
-            cell_idx = np.argwhere(self.clu_ids == cell)[0, 0]
+            cell_idx = np.argwhere(cells == cell)[0, 0]
             cellbinned = binned[:, cell_idx]
             with catch_warnings(record=True) as w:
                 fitobj = PoissonRegressor(alpha=self.alpha,
diff --git a/brainbox/modeling/utils.py b/brainbox/modeling/utils.py
@@ -101,7 +101,7 @@ def fit(self, progress=False):
                 new_feature_idx, nf_score = self._get_best_new_feature(current_mask, cells)
                 for cell in cells:
                     maskdf.at[cell, self.features[new_feature_idx.loc[cell]]] = True
-                    seqdf.loc[cell, i] = self.features[new_feature_idx]
+                    seqdf.loc[cell, i] = self.features[new_feature_idx.loc[cell]]
                     scoredf.loc[cell, i] = nf_score.loc[cell]
         self.support_ = maskdf
         self.sequences_ = seqdf
@@ -110,7 +110,8 @@ def fit(self, progress=False):
     def _get_best_new_feature(self, mask, cells):
         mask = np.array(mask)
         candidate_features = np.flatnonzero(~mask)
-        my = self.model.binnedspikes[self.train]
+        cell_idxs = np.argwhere(np.isin(self.model.clu_ids, cells)).flatten()
+        my = self.model.binnedspikes[np.ix_(self.train, cell_idxs)]
         scores = pd.DataFrame(index=cells, columns=candidate_features, dtype=float)
         for feature_idx in candidate_features:
             candidate_mask = mask.copy()