DOC fix the documentation of SMOTENC to use substitution (#805)

glemaitre · web-flow · commit b6621f907c19 · 2021-02-15T23:09:38.000+01:00
diff --git a/imblearn/over_sampling/_smote.py b/imblearn/over_sampling/_smote.py
@@ -727,9 +727,11 @@ def _fit_resample(self, X, y):
         return X_resampled, y_resampled
 
 
-# @Substitution(
-#     sampling_strategy=BaseOverSampler._sampling_strategy_docstring,
-#     random_state=_random_state_docstring)
+@Substitution(
+    sampling_strategy=BaseOverSampler._sampling_strategy_docstring,
+    n_jobs=_n_jobs_docstring,
+    random_state=_random_state_docstring,
+)
 class SMOTENC(SMOTE):
     """Synthetic Minority Over-sampling Technique for Nominal and Continuous.
 
@@ -748,64 +750,17 @@ class SMOTENC(SMOTE):
         - mask array of shape (n_features, ) and ``bool`` dtype for which
           ``True`` indicates the categorical features.
 
-    sampling_strategy : float, str, dict or callable, default='auto'
-        Sampling information to resample the data set.
-
-        - When ``float``, it corresponds to the desired ratio of the number of
-          samples in the minority class over the number of samples in the
-          majority class after resampling. Therefore, the ratio is expressed as
-          :math:`\\alpha_{os} = N_{rm} / N_{M}` where :math:`N_{rm}` is the
-          number of samples in the minority class after resampling and
-          :math:`N_{M}` is the number of samples in the majority class.
-
-            .. warning::
-               ``float`` is only available for **binary** classification. An
-               error is raised for multi-class classification.
-
-        - When ``str``, specify the class targeted by the resampling. The
-          number of samples in the different classes will be equalized.
-          Possible choices are:
-
-            ``'minority'``: resample only the minority class;
-
-            ``'not minority'``: resample all classes but the minority class;
-
-            ``'not majority'``: resample all classes but the majority class;
-
-            ``'all'``: resample all classes;
-
-            ``'auto'``: equivalent to ``'not majority'``.
-
-        - When ``dict``, the keys correspond to the targeted classes. The
-          values correspond to the desired number of samples for each targeted
-          class.
-
-        - When callable, function taking ``y`` and returns a ``dict``. The keys
-          correspond to the targeted classes. The values correspond to the
-          desired number of samples for each class.
-
-    random_state : int, RandomState instance, default=None
-        Control the randomization of the algorithm.
+    {sampling_strategy}
 
-        - If int, ``random_state`` is the seed used by the random number
-          generator;
-        - If ``RandomState`` instance, random_state is the random number
-          generator;
-        - If ``None``, the random number generator is the ``RandomState``
-          instance used by ``np.random``.
+    {random_state}
 
     k_neighbors : int or object, default=5
         If ``int``, number of nearest neighbours to used to construct synthetic
         samples.  If object, an estimator that inherits from
         :class:`~sklearn.neighbors.base.KNeighborsMixin` that will be used to
         find the k_neighbors.
 
-    n_jobs : int, default=None
-        Number of CPU cores used during the cross-validation loop.
-        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
-        ``-1`` means using all processors. See
-        `Glossary <https://scikit-learn.org/stable/glossary.html#term-n-jobs>`_
-        for more details.
+    {n_jobs}
 
     See Also
     --------
@@ -846,16 +801,16 @@ class SMOTENC(SMOTE):
     >>> X, y = make_classification(n_classes=2, class_sep=2,
     ... weights=[0.1, 0.9], n_informative=3, n_redundant=1, flip_y=0,
     ... n_features=20, n_clusters_per_class=1, n_samples=1000, random_state=10)
-    >>> print('Original dataset shape (%s, %s)' % X.shape)
+    >>> print(f'Original dataset shape {{X.shape}}')
     Original dataset shape (1000, 20)
-    >>> print(f'Original dataset samples per class {Counter(y)}')
-    Original dataset samples per class Counter({1: 900, 0: 100})
+    >>> print(f'Original dataset samples per class {{Counter(y)}}')
+    Original dataset samples per class Counter({{1: 900, 0: 100}})
     >>> # simulate the 2 last columns to be categorical features
     >>> X[:, -2:] = RandomState(10).randint(0, 4, size=(1000, 2))
     >>> sm = SMOTENC(random_state=42, categorical_features=[18, 19])
     >>> X_res, y_res = sm.fit_resample(X, y)
-    >>> print(f'Resampled dataset samples per class {Counter(y_res)}')
-    Resampled dataset samples per class Counter({0: 900, 1: 900})
+    >>> print(f'Resampled dataset samples per class {{Counter(y_res)}}')
+    Resampled dataset samples per class Counter({{0: 900, 1: 900}})
     """
 
     _required_parameters = ["categorical_features"]