@@ -171,162 +171,4 @@ def partial_transform(self, X):
171171 def fit_transform (self , sequences , y = None ):
172172 """Alias for fit_predict"""
173173 return self .fit_predict (sequences , y )
174-
175- # Add sample_weight for methods
176- class MultiSequenceClusterMixin_update (object ):
177-
178- # The API for the scikit-learn Cluster object is, in fit(), that
179- # they take a single 2D array of shape (n_data_points, n_features).
180- #
181- # For clustering a collection of timeseries, we need to preserve
182- # the structure of which data_point came from which sequence. If
183- # we concatenate the sequences together, we lose that information.
184- #
185- # This mixin is basically a little "adaptor" that changes fit()
186- # so that it accepts a list of sequences. Its implementation
187- # concatenates the sequences, calls the superclass fit(), and
188- # then splits the labels_ back into the sequenced form.
189-
190- _allow_trajectory = False
191-
192- def fit (self , sequences , y = None , sample_weight = None ):
193- """Fit the clustering on the data
194-
195- Parameters
196- ----------
197- sequences : list of array-like, each of shape [sequence_length, n_features]
198- A list of multivariate timeseries. Each sequence may have
199- a different length, but they all must have the same number
200- of features.
201-
202- Returns
203- -------
204- self
205- """
206- check_iter_of_sequences (sequences , allow_trajectory = self ._allow_trajectory )
207- super (MultiSequenceClusterMixin_update , self ).fit (self ._concat (sequences ))
208-
209- if hasattr (self , 'labels_' ):
210- self .labels_ = self ._split (self .labels_ )
211-
212- return self
213-
214- def _concat (self , sequences ):
215- self .__lengths = [len (s ) for s in sequences ]
216- if len (sequences ) > 0 and isinstance (sequences [0 ], np .ndarray ):
217- concat = np .ascontiguousarray (np .concatenate (sequences ))
218- elif isinstance (sequences [0 ], md .Trajectory ):
219- # if the input sequences are not numpy arrays, we need to guess
220- # how to concatenate them. this operation below works for mdtraj
221- # trajectories (which is the use case that I want to be sure to
222- # support), but in general the python container protocol doesn't
223- # give us a generic way to make sure we merged sequences
224- concat = sequences [:][0 ]
225- if len (sequences ) > 1 :
226- concat = concat .join (sequences [:][1 :])
227- concat .center_coordinates ()
228- else :
229- raise TypeError ('sequences must be a list of numpy arrays '
230- 'or ``md.Trajectory``s' )
231-
232- assert sum (self .__lengths ) == len (concat )
233- return concat
234-
235- def _split (self , concat ):
236- return [concat [cl - l : cl ] for (cl , l ) in zip (np .cumsum (self .__lengths ), self .__lengths )]
237-
238- def _split_indices (self , concat_inds ):
239- """Take indices in 'concatenated space' and return as pairs
240- of (traj_i, frame_i)
241- """
242- clengths = np .append ([0 ], np .cumsum (self .__lengths ))
243- mapping = np .zeros ((clengths [- 1 ], 2 ), dtype = int )
244- for traj_i , (start , end ) in enumerate (zip (clengths [:- 1 ], clengths [1 :])):
245- mapping [start :end , 0 ] = traj_i
246- mapping [start :end , 1 ] = np .arange (end - start )
247- return mapping [concat_inds ]
248-
249- def predict (self , sequences , y = None , sample_weight = None ):
250- """Predict the closest cluster each sample in each sequence in
251- sequences belongs to.
252-
253- In the vector quantization literature, `cluster_centers_` is called
254- the code book and each value returned by `predict` is the index of
255- the closest code in the code book.
256-
257- Parameters
258- ----------
259- sequences : list of array-like, each of shape [sequence_length, n_features]
260- A list of multivariate timeseries. Each sequence may have
261- a different length, but they all must have the same number
262- of features.
263-
264- Returns
265- -------
266- Y : list of arrays, each of shape [sequence_length,]
267- Index of the closest center each sample belongs to.
268- """
269- predictions = []
270- check_iter_of_sequences (sequences , allow_trajectory = self ._allow_trajectory )
271- for X in sequences :
272- predictions .append (self .partial_predict (X , sample_weight ))
273- return predictions
274-
275- def partial_predict (self , X , y = None , sample_weight = None ):
276- """Predict the closest cluster each sample in X belongs to.
277-
278- In the vector quantization literature, `cluster_centers_` is called
279- the code book and each value returned by `predict` is the index of
280- the closest code in the code book.
281-
282- Parameters
283- ----------
284- X : array-like shape=(n_samples, n_features)
285- A single timeseries.
286-
287- Returns
288- -------
289- Y : array, shape=(n_samples,)
290- Index of the cluster that each sample belongs to
291- """
292- if isinstance (X , md .Trajectory ):
293- X .center_coordinates ()
294- return super (MultiSequenceClusterMixin_update , self ).predict (X )
295-
296- def fit_predict (self , sequences , y = None , sample_weight = None ):
297- """Performs clustering on X and returns cluster labels.
298-
299- Parameters
300- ----------
301- sequences : list of array-like, each of shape [sequence_length, n_features]
302- A list of multivariate timeseries. Each sequence may have
303- a different length, but they all must have the same number
304- of features.
305-
306- Returns
307- -------
308- Y : list of ndarray, each of shape [sequence_length, ]
309- Cluster labels
310- """
311- if hasattr (super (MultiSequenceClusterMixin_update , self ), 'fit_predict' ):
312- check_iter_of_sequences (sequences , allow_trajectory = self ._allow_trajectory )
313- labels = super (MultiSequenceClusterMixin_update , self ).fit_predict (sequences )
314- else :
315- self .fit (sequences )
316- labels = self .predict (sequences )
317-
318- if not isinstance (labels , list ):
319- labels = self ._split (labels )
320- return labels
321-
322- def transform (self , sequences ):
323- """Alias for predict"""
324- return self .predict (sequences )
325-
326- def partial_transform (self , X ):
327- """Alias for partial_predict"""
328- return self .partial_predict (X )
329-
330- def fit_transform (self , sequences , y = None , sample_weight = None ):
331- """Alias for fit_predict"""
332- return self .fit_predict (sequences , y )
174+
0 commit comments