Skip to content

Commit 5394f16

Browse files
Add algorithms docs (#916) (#922)
* added algorithms docs * fix whitespace * fix pep8 * fix docs for version < 0.23 (cherry picked from commit 6f01ba6) Co-authored-by: Vladislav <[email protected]>
1 parent d4cd845 commit 5394f16

File tree

15 files changed

+948
-0
lines changed

15 files changed

+948
-0
lines changed

daal4py/sklearn/cluster/_dbscan.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -272,4 +272,29 @@ def fit(self, X, y=None, sample_weight=None):
272272

273273
@support_usm_ndarray()
274274
def fit_predict(self, X, y=None, sample_weight=None):
275+
"""
276+
Compute clusters from a data or distance matrix and predict labels.
277+
278+
Parameters
279+
----------
280+
X : {array-like, sparse matrix} of shape (n_samples, n_features), or \
281+
(n_samples, n_samples)
282+
Training instances to cluster, or distances between instances if
283+
``metric='precomputed'``. If a sparse matrix is provided, it will
284+
be converted into a sparse ``csr_matrix``.
285+
286+
y : Ignored
287+
Not used, present here for API consistency by convention.
288+
289+
sample_weight : array-like of shape (n_samples,), default=None
290+
Weight of each sample, such that a sample with a weight of at least
291+
``min_samples`` is by itself a core sample; a sample with a
292+
negative weight may inhibit its eps-neighbor from being core.
293+
Note that weights are absolute, and default to 1.
294+
295+
Returns
296+
-------
297+
labels : ndarray of shape (n_samples,)
298+
Cluster labels. Noisy samples are given the label -1.
299+
"""
275300
return super().fit_predict(X, y, sample_weight)

daal4py/sklearn/cluster/_k_means_0_22.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,12 +331,82 @@ def __init__(self, n_clusters=8, init='k-means++', n_init=10,
331331

332332
@support_usm_ndarray()
333333
def fit(self, X, y=None, sample_weight=None):
334+
"""
335+
Compute k-means clustering.
336+
337+
Parameters
338+
----------
339+
X : {array-like, sparse matrix} of shape (n_samples, n_features)
340+
Training instances to cluster. It must be noted that the data
341+
will be converted to C ordering, which will cause a memory
342+
copy if the given data is not C-contiguous.
343+
If a sparse matrix is passed, a copy will be made if it's not in
344+
CSR format.
345+
346+
y : Ignored
347+
Not used, present here for API consistency by convention.
348+
349+
sample_weight : array-like of shape (n_samples,), default=None
350+
The weights for each observation in X. If None, all observations
351+
are assigned equal weight.
352+
353+
.. versionadded:: 0.20
354+
355+
Returns
356+
-------
357+
self : object
358+
Fitted estimator.
359+
"""
334360
return _fit(self, X, y=y, sample_weight=sample_weight)
335361

336362
@support_usm_ndarray()
337363
def predict(self, X, sample_weight=None):
364+
"""
365+
Predict the closest cluster each sample in X belongs to.
366+
367+
In the vector quantization literature, `cluster_centers_` is called
368+
the code book and each value returned by `predict` is the index of
369+
the closest code in the code book.
370+
371+
Parameters
372+
----------
373+
X : {array-like, sparse matrix} of shape (n_samples, n_features)
374+
New data to predict.
375+
376+
sample_weight : array-like of shape (n_samples,), default=None
377+
The weights for each observation in X. If None, all observations
378+
are assigned equal weight.
379+
380+
Returns
381+
-------
382+
labels : ndarray of shape (n_samples,)
383+
Index of the cluster each sample belongs to.
384+
"""
338385
return _predict(self, X, sample_weight=sample_weight)
339386

340387
@support_usm_ndarray()
341388
def fit_predict(self, X, y=None, sample_weight=None):
389+
"""
390+
Compute cluster centers and predict cluster index for each sample.
391+
392+
Convenience method; equivalent to calling fit(X) followed by
393+
predict(X).
394+
395+
Parameters
396+
----------
397+
X : {array-like, sparse matrix} of shape (n_samples, n_features)
398+
New data to transform.
399+
400+
y : Ignored
401+
Not used, present here for API consistency by convention.
402+
403+
sample_weight : array-like of shape (n_samples,), default=None
404+
The weights for each observation in X. If None, all observations
405+
are assigned equal weight.
406+
407+
Returns
408+
-------
409+
labels : ndarray of shape (n_samples,)
410+
Index of the cluster each sample belongs to.
411+
"""
342412
return super().fit_predict(X, y, sample_weight)

daal4py/sklearn/cluster/_k_means_0_23.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -448,12 +448,82 @@ def __init__(
448448

449449
@support_usm_ndarray()
450450
def fit(self, X, y=None, sample_weight=None):
451+
"""
452+
Compute k-means clustering.
453+
454+
Parameters
455+
----------
456+
X : {array-like, sparse matrix} of shape (n_samples, n_features)
457+
Training instances to cluster. It must be noted that the data
458+
will be converted to C ordering, which will cause a memory
459+
copy if the given data is not C-contiguous.
460+
If a sparse matrix is passed, a copy will be made if it's not in
461+
CSR format.
462+
463+
y : Ignored
464+
Not used, present here for API consistency by convention.
465+
466+
sample_weight : array-like of shape (n_samples,), default=None
467+
The weights for each observation in X. If None, all observations
468+
are assigned equal weight.
469+
470+
.. versionadded:: 0.20
471+
472+
Returns
473+
-------
474+
self : object
475+
Fitted estimator.
476+
"""
451477
return _fit(self, X, y=y, sample_weight=sample_weight)
452478

453479
@support_usm_ndarray()
454480
def predict(self, X, sample_weight=None):
481+
"""
482+
Predict the closest cluster each sample in X belongs to.
483+
484+
In the vector quantization literature, `cluster_centers_` is called
485+
the code book and each value returned by `predict` is the index of
486+
the closest code in the code book.
487+
488+
Parameters
489+
----------
490+
X : {array-like, sparse matrix} of shape (n_samples, n_features)
491+
New data to predict.
492+
493+
sample_weight : array-like of shape (n_samples,), default=None
494+
The weights for each observation in X. If None, all observations
495+
are assigned equal weight.
496+
497+
Returns
498+
-------
499+
labels : ndarray of shape (n_samples,)
500+
Index of the cluster each sample belongs to.
501+
"""
455502
return _predict(self, X, sample_weight=sample_weight)
456503

457504
@support_usm_ndarray()
458505
def fit_predict(self, X, y=None, sample_weight=None):
506+
"""
507+
Compute cluster centers and predict cluster index for each sample.
508+
509+
Convenience method; equivalent to calling fit(X) followed by
510+
predict(X).
511+
512+
Parameters
513+
----------
514+
X : {array-like, sparse matrix} of shape (n_samples, n_features)
515+
New data to transform.
516+
517+
y : Ignored
518+
Not used, present here for API consistency by convention.
519+
520+
sample_weight : array-like of shape (n_samples,), default=None
521+
The weights for each observation in X. If None, all observations
522+
are assigned equal weight.
523+
524+
Returns
525+
-------
526+
labels : ndarray of shape (n_samples,)
527+
Index of the cluster each sample belongs to.
528+
"""
459529
return super().fit_predict(X, y, sample_weight)

daal4py/sklearn/decomposition/_pca.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@
4343

4444

4545
class PCA(PCA_original):
46+
__doc__ = PCA_original.__doc__
47+
4648
def __init__(
4749
self,
4850
n_components=None,
@@ -291,6 +293,24 @@ def _transform_daal4py(self, X, whiten=False, scale_eigenvalues=True, check_X=Tr
291293

292294
@support_usm_ndarray()
293295
def transform(self, X):
296+
"""
297+
Apply dimensionality reduction to X.
298+
299+
X is projected on the first principal components previously extracted
300+
from a training set.
301+
302+
Parameters
303+
----------
304+
X : array-like of shape (n_samples, n_features)
305+
New data, where `n_samples` is the number of samples
306+
and `n_features` is the number of features.
307+
308+
Returns
309+
-------
310+
X_new : array-like of shape (n_samples, n_components)
311+
Projection of X in the first principal components, where `n_samples`
312+
is the number of samples and `n_components` is the number of the components.
313+
"""
294314
_patching_status = PatchingConditionsChain(
295315
"sklearn.decomposition.PCA.transform")
296316
_dal_ready = _patching_status.and_conditions([
@@ -305,6 +325,28 @@ def transform(self, X):
305325

306326
@support_usm_ndarray()
307327
def fit_transform(self, X, y=None):
328+
"""
329+
Fit the model with X and apply the dimensionality reduction on X.
330+
331+
Parameters
332+
----------
333+
X : array-like of shape (n_samples, n_features)
334+
Training data, where `n_samples` is the number of samples
335+
and `n_features` is the number of features.
336+
337+
y : Ignored
338+
Ignored.
339+
340+
Returns
341+
-------
342+
X_new : ndarray of shape (n_samples, n_components)
343+
Transformed values.
344+
345+
Notes
346+
-----
347+
This method returns a Fortran-ordered array. To convert it to a
348+
C-ordered array, use 'np.ascontiguousarray'.
349+
"""
308350
U, S, _ = self._fit(X)
309351

310352
_patching_status = PatchingConditionsChain(

0 commit comments

Comments
 (0)