@@ -727,9 +727,11 @@ def _fit_resample(self, X, y):
727
727
return X_resampled , y_resampled
728
728
729
729
730
- # @Substitution(
731
- # sampling_strategy=BaseOverSampler._sampling_strategy_docstring,
732
- # random_state=_random_state_docstring)
730
+ @Substitution (
731
+ sampling_strategy = BaseOverSampler ._sampling_strategy_docstring ,
732
+ n_jobs = _n_jobs_docstring ,
733
+ random_state = _random_state_docstring ,
734
+ )
733
735
class SMOTENC (SMOTE ):
734
736
"""Synthetic Minority Over-sampling Technique for Nominal and Continuous.
735
737
@@ -748,64 +750,17 @@ class SMOTENC(SMOTE):
748
750
- mask array of shape (n_features, ) and ``bool`` dtype for which
749
751
``True`` indicates the categorical features.
750
752
751
- sampling_strategy : float, str, dict or callable, default='auto'
752
- Sampling information to resample the data set.
753
-
754
- - When ``float``, it corresponds to the desired ratio of the number of
755
- samples in the minority class over the number of samples in the
756
- majority class after resampling. Therefore, the ratio is expressed as
757
- :math:`\\ alpha_{os} = N_{rm} / N_{M}` where :math:`N_{rm}` is the
758
- number of samples in the minority class after resampling and
759
- :math:`N_{M}` is the number of samples in the majority class.
760
-
761
- .. warning::
762
- ``float`` is only available for **binary** classification. An
763
- error is raised for multi-class classification.
764
-
765
- - When ``str``, specify the class targeted by the resampling. The
766
- number of samples in the different classes will be equalized.
767
- Possible choices are:
768
-
769
- ``'minority'``: resample only the minority class;
770
-
771
- ``'not minority'``: resample all classes but the minority class;
772
-
773
- ``'not majority'``: resample all classes but the majority class;
774
-
775
- ``'all'``: resample all classes;
776
-
777
- ``'auto'``: equivalent to ``'not majority'``.
778
-
779
- - When ``dict``, the keys correspond to the targeted classes. The
780
- values correspond to the desired number of samples for each targeted
781
- class.
782
-
783
- - When callable, function taking ``y`` and returns a ``dict``. The keys
784
- correspond to the targeted classes. The values correspond to the
785
- desired number of samples for each class.
786
-
787
- random_state : int, RandomState instance, default=None
788
- Control the randomization of the algorithm.
753
+ {sampling_strategy}
789
754
790
- - If int, ``random_state`` is the seed used by the random number
791
- generator;
792
- - If ``RandomState`` instance, random_state is the random number
793
- generator;
794
- - If ``None``, the random number generator is the ``RandomState``
795
- instance used by ``np.random``.
755
+ {random_state}
796
756
797
757
k_neighbors : int or object, default=5
798
758
If ``int``, number of nearest neighbours to used to construct synthetic
799
759
samples. If object, an estimator that inherits from
800
760
:class:`~sklearn.neighbors.base.KNeighborsMixin` that will be used to
801
761
find the k_neighbors.
802
762
803
- n_jobs : int, default=None
804
- Number of CPU cores used during the cross-validation loop.
805
- ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
806
- ``-1`` means using all processors. See
807
- `Glossary <https://scikit-learn.org/stable/glossary.html#term-n-jobs>`_
808
- for more details.
763
+ {n_jobs}
809
764
810
765
See Also
811
766
--------
@@ -846,16 +801,16 @@ class SMOTENC(SMOTE):
846
801
>>> X, y = make_classification(n_classes=2, class_sep=2,
847
802
... weights=[0.1, 0.9], n_informative=3, n_redundant=1, flip_y=0,
848
803
... n_features=20, n_clusters_per_class=1, n_samples=1000, random_state=10)
849
- >>> print('Original dataset shape (%s, %s)' % X.shape)
804
+ >>> print(f 'Original dataset shape {{ X.shape}}' )
850
805
Original dataset shape (1000, 20)
851
- >>> print(f'Original dataset samples per class {Counter(y)}')
852
- Original dataset samples per class Counter({1: 900, 0: 100})
806
+ >>> print(f'Original dataset samples per class {{ Counter(y)} }')
807
+ Original dataset samples per class Counter({{ 1: 900, 0: 100} })
853
808
>>> # simulate the 2 last columns to be categorical features
854
809
>>> X[:, -2:] = RandomState(10).randint(0, 4, size=(1000, 2))
855
810
>>> sm = SMOTENC(random_state=42, categorical_features=[18, 19])
856
811
>>> X_res, y_res = sm.fit_resample(X, y)
857
- >>> print(f'Resampled dataset samples per class {Counter(y_res)}')
858
- Resampled dataset samples per class Counter({0: 900, 1: 900})
812
+ >>> print(f'Resampled dataset samples per class {{ Counter(y_res)} }')
813
+ Resampled dataset samples per class Counter({{ 0: 900, 1: 900} })
859
814
"""
860
815
861
816
_required_parameters = ["categorical_features" ]
0 commit comments