Skip to content

Commit 571cbac

Browse files
authored
Back to msmbuilder-3.8.0
1 parent 34917e2 commit 571cbac

File tree

1 file changed

+1
-159
lines changed

1 file changed

+1
-159
lines changed

msmbuilder/cluster/base.py

Lines changed: 1 addition & 159 deletions
Original file line numberDiff line numberDiff line change
@@ -171,162 +171,4 @@ def partial_transform(self, X):
171171
def fit_transform(self, sequences, y=None):
172172
"""Alias for fit_predict"""
173173
return self.fit_predict(sequences, y)
174-
175-
# Add sample_weight for methods
176-
class MultiSequenceClusterMixin_update(object):
177-
178-
# The API for the scikit-learn Cluster object is, in fit(), that
179-
# they take a single 2D array of shape (n_data_points, n_features).
180-
#
181-
# For clustering a collection of timeseries, we need to preserve
182-
# the structure of which data_point came from which sequence. If
183-
# we concatenate the sequences together, we lose that information.
184-
#
185-
# This mixin is basically a little "adaptor" that changes fit()
186-
# so that it accepts a list of sequences. Its implementation
187-
# concatenates the sequences, calls the superclass fit(), and
188-
# then splits the labels_ back into the sequenced form.
189-
190-
_allow_trajectory = False
191-
192-
def fit(self, sequences, y=None, sample_weight=None):
193-
"""Fit the clustering on the data
194-
195-
Parameters
196-
----------
197-
sequences : list of array-like, each of shape [sequence_length, n_features]
198-
A list of multivariate timeseries. Each sequence may have
199-
a different length, but they all must have the same number
200-
of features.
201-
202-
Returns
203-
-------
204-
self
205-
"""
206-
check_iter_of_sequences(sequences, allow_trajectory=self._allow_trajectory)
207-
super(MultiSequenceClusterMixin_update, self).fit(self._concat(sequences))
208-
209-
if hasattr(self, 'labels_'):
210-
self.labels_ = self._split(self.labels_)
211-
212-
return self
213-
214-
def _concat(self, sequences):
215-
self.__lengths = [len(s) for s in sequences]
216-
if len(sequences) > 0 and isinstance(sequences[0], np.ndarray):
217-
concat = np.ascontiguousarray(np.concatenate(sequences))
218-
elif isinstance(sequences[0], md.Trajectory):
219-
# if the input sequences are not numpy arrays, we need to guess
220-
# how to concatenate them. this operation below works for mdtraj
221-
# trajectories (which is the use case that I want to be sure to
222-
# support), but in general the python container protocol doesn't
223-
# give us a generic way to make sure we merged sequences
224-
concat = sequences[:][0]
225-
if len(sequences) > 1:
226-
concat = concat.join(sequences[:][1:])
227-
concat.center_coordinates()
228-
else:
229-
raise TypeError('sequences must be a list of numpy arrays '
230-
'or ``md.Trajectory``s')
231-
232-
assert sum(self.__lengths) == len(concat)
233-
return concat
234-
235-
def _split(self, concat):
236-
return [concat[cl - l: cl] for (cl, l) in zip(np.cumsum(self.__lengths), self.__lengths)]
237-
238-
def _split_indices(self, concat_inds):
239-
"""Take indices in 'concatenated space' and return as pairs
240-
of (traj_i, frame_i)
241-
"""
242-
clengths = np.append([0], np.cumsum(self.__lengths))
243-
mapping = np.zeros((clengths[-1], 2), dtype=int)
244-
for traj_i, (start, end) in enumerate(zip(clengths[:-1], clengths[1:])):
245-
mapping[start:end, 0] = traj_i
246-
mapping[start:end, 1] = np.arange(end - start)
247-
return mapping[concat_inds]
248-
249-
def predict(self, sequences, y=None, sample_weight=None):
250-
"""Predict the closest cluster each sample in each sequence in
251-
sequences belongs to.
252-
253-
In the vector quantization literature, `cluster_centers_` is called
254-
the code book and each value returned by `predict` is the index of
255-
the closest code in the code book.
256-
257-
Parameters
258-
----------
259-
sequences : list of array-like, each of shape [sequence_length, n_features]
260-
A list of multivariate timeseries. Each sequence may have
261-
a different length, but they all must have the same number
262-
of features.
263-
264-
Returns
265-
-------
266-
Y : list of arrays, each of shape [sequence_length,]
267-
Index of the closest center each sample belongs to.
268-
"""
269-
predictions = []
270-
check_iter_of_sequences(sequences, allow_trajectory=self._allow_trajectory)
271-
for X in sequences:
272-
predictions.append(self.partial_predict(X, sample_weight))
273-
return predictions
274-
275-
def partial_predict(self, X, y=None, sample_weight=None):
276-
"""Predict the closest cluster each sample in X belongs to.
277-
278-
In the vector quantization literature, `cluster_centers_` is called
279-
the code book and each value returned by `predict` is the index of
280-
the closest code in the code book.
281-
282-
Parameters
283-
----------
284-
X : array-like shape=(n_samples, n_features)
285-
A single timeseries.
286-
287-
Returns
288-
-------
289-
Y : array, shape=(n_samples,)
290-
Index of the cluster that each sample belongs to
291-
"""
292-
if isinstance(X, md.Trajectory):
293-
X.center_coordinates()
294-
return super(MultiSequenceClusterMixin_update, self).predict(X)
295-
296-
def fit_predict(self, sequences, y=None, sample_weight=None):
297-
"""Performs clustering on X and returns cluster labels.
298-
299-
Parameters
300-
----------
301-
sequences : list of array-like, each of shape [sequence_length, n_features]
302-
A list of multivariate timeseries. Each sequence may have
303-
a different length, but they all must have the same number
304-
of features.
305-
306-
Returns
307-
-------
308-
Y : list of ndarray, each of shape [sequence_length, ]
309-
Cluster labels
310-
"""
311-
if hasattr(super(MultiSequenceClusterMixin_update, self), 'fit_predict'):
312-
check_iter_of_sequences(sequences, allow_trajectory=self._allow_trajectory)
313-
labels = super(MultiSequenceClusterMixin_update, self).fit_predict(sequences)
314-
else:
315-
self.fit(sequences)
316-
labels = self.predict(sequences)
317-
318-
if not isinstance(labels, list):
319-
labels = self._split(labels)
320-
return labels
321-
322-
def transform(self, sequences):
323-
"""Alias for predict"""
324-
return self.predict(sequences)
325-
326-
def partial_transform(self, X):
327-
"""Alias for partial_predict"""
328-
return self.partial_predict(X)
329-
330-
def fit_transform(self, sequences, y=None, sample_weight=None):
331-
"""Alias for fit_predict"""
332-
return self.fit_predict(sequences, y)
174+

0 commit comments

Comments
 (0)