DOC fixes some documentation glitches (#876)

glemaitre · web-flow · commit 04ccce5c51ec · 2021-12-07T12:00:49.000+01:00
diff --git a/build_tools/circle/build_doc.sh b/build_tools/circle/build_doc.sh
@@ -78,8 +78,11 @@ make_args="SPHINXOPTS=-T $make_args"  # show full traceback on exception
 # Installing required system packages to support the rendering of math
 # notation in the HTML documentation and to optimize the image files
 sudo -E apt-get -yq update --allow-releaseinfo-change
+sudo -E apt-get -yq remove texlive-binaries --purge
 sudo -E apt-get -yq --no-install-suggests --no-install-recommends \
-    install dvipng gsfonts ccache zip optipng
+install dvipng texlive-latex-base texlive-latex-extra \
+    texlive-latex-recommended texlive-fonts-recommended \
+    latexmk gsfonts ccache zip optipng
 
 # deactivate circleci virtualenv and setup a miniconda env instead
 if [[ `type -t deactivate` ]]; then
diff --git a/examples/api/plot_sampling_strategy_usage.py b/examples/api/plot_sampling_strategy_usage.py
@@ -112,7 +112,7 @@
 ros = RandomOverSampler(sampling_strategy=sampling_strategy)
 X_res, y_res = ros.fit_resample(X, y)
 y_res.value_counts().plot.pie(autopct=autopct, ax=axs[1])
-axs[1].set_title("Over-sampling")
+_ = axs[1].set_title("Over-sampling")
 
 # %% [markdown]
 # With **cleaning method**, the number of samples in each class will not be
@@ -122,7 +122,7 @@
 from imblearn.under_sampling import TomekLinks
 
 sampling_strategy = "not minority"
-tl = TomekLinks(sampling_strategy)
+tl = TomekLinks(sampling_strategy=sampling_strategy)
 X_res, y_res = tl.fit_resample(X, y)
 ax = y_res.value_counts().plot.pie(autopct=autopct)
 _ = ax.set_title("Cleaning")
@@ -149,7 +149,7 @@
 ros = RandomOverSampler(sampling_strategy=sampling_strategy)
 X_res, y_res = ros.fit_resample(X, y)
 y_res.value_counts().plot.pie(autopct=autopct, ax=axs[1])
-axs[1].set_title("Under-sampling")
+_ = axs[1].set_title("Under-sampling")
 
 # %% [markdown]
 # `sampling_strategy` as a `list`
diff --git a/examples/applications/plot_over_sampling_benchmark_lfw.py b/examples/applications/plot_over_sampling_benchmark_lfw.py
@@ -37,7 +37,7 @@
 george_bush_id = 1871  # Photos of George W. Bush
 bill_clinton_id = 531  # Photos of Bill Clinton
 classes = [george_bush_id, bill_clinton_id]
-classes_name = np.array(["B. Clinton", "G.W. Bush"], dtype=np.object)
+classes_name = np.array(["B. Clinton", "G.W. Bush"], dtype=object)
 
 # %%
 mask_photos = np.isin(data.target, classes)
@@ -49,12 +49,14 @@
 # We can check the ratio between the two classes.
 
 # %%
+import matplotlib.pyplot as plt
 import pandas as pd
 
 class_distribution = pd.Series(y).value_counts(normalize=True)
 ax = class_distribution.plot.barh()
 ax.set_title("Class distribution")
 pos_label = class_distribution.idxmin()
+plt.tight_layout()
 print(f"The positive label considered as the minority class is {pos_label}")
 
 # %% [markdown]
@@ -96,7 +98,6 @@
 # cross-validation.
 
 # %%
-import matplotlib.pyplot as plt
 from sklearn.metrics import RocCurveDisplay, roc_curve, auc
 
 disp = []
@@ -139,10 +140,11 @@
     d.plot(ax=ax, linestyle="--")
 ax.plot([0, 1], [0, 1], linestyle="--", color="k")
 ax.axis("square")
-fig.suptitle("Comparison of over-sampling methods with a 3NN classifier")
+fig.suptitle("Comparison of over-sampling methods \nwith a 3NN classifier")
 ax.set_xlim([0, 1])
 ax.set_ylim([0, 1])
 sns.despine(offset=10, ax=ax)
+plt.tight_layout()
 plt.show()
 
 # %% [markdown]
diff --git a/examples/applications/plot_topic_classication.py b/examples/applications/plot_topic_classication.py
@@ -49,7 +49,7 @@
 print(f"Training class distributions summary: {Counter(y_train)}")
 print(f"Test class distributions summary: {Counter(y_test)}")
 
-# % [markdown]
+# %% [markdown]
 # The usual scikit-learn pipeline
 # -------------------------------
 #
diff --git a/examples/combine/plot_comparison_combine.py b/examples/combine/plot_comparison_combine.py
@@ -46,7 +46,7 @@
 
 # %%
 _, ax = plt.subplots(figsize=(6, 6))
-ax.scatter(X[:, 0], X[:, 1], c=y, alpha=0.8, edgecolor="k")
+_ = ax.scatter(X[:, 0], X[:, 1], c=y, alpha=0.8, edgecolor="k")
 
 # %% [markdown]
 # The following function will be used to plot the sample space after resampling
diff --git a/examples/datasets/plot_make_imbalance.py b/examples/datasets/plot_make_imbalance.py
@@ -29,6 +29,7 @@
 # original dataset.
 
 # %%
+import matplotlib.pyplot as plt
 import pandas as pd
 from sklearn.datasets import make_moons
 
@@ -42,6 +43,7 @@
     colorbar=False,
 )
 sns.despine(ax=ax, offset=10)
+plt.tight_layout()
 
 # %% [markdown]
 # Make a dataset imbalanced
@@ -61,7 +63,6 @@ def ratio_func(y, multiplier, minority_class):
 
 
 # %%
-import matplotlib.pyplot as plt
 from imblearn.datasets import make_imbalance
 
 fig, axs = plt.subplots(nrows=2, ncols=3, figsize=(15, 10))
diff --git a/examples/evaluation/plot_classification_report.py b/examples/evaluation/plot_classification_report.py
@@ -14,6 +14,7 @@
 
 
 from sklearn import datasets
+from sklearn.preprocessing import StandardScaler
 from sklearn.svm import LinearSVC
 from sklearn.model_selection import train_test_split
 
@@ -40,7 +41,9 @@
 )
 
 pipeline = pl.make_pipeline(
-    os.SMOTE(random_state=RANDOM_STATE), LinearSVC(random_state=RANDOM_STATE)
+    StandardScaler(),
+    os.SMOTE(random_state=RANDOM_STATE),
+    LinearSVC(max_iter=10_000, random_state=RANDOM_STATE),
 )
 
 # Split the data
diff --git a/examples/evaluation/plot_metrics.py b/examples/evaluation/plot_metrics.py
@@ -52,11 +52,14 @@
 
 # %%
 from imblearn.pipeline import make_pipeline
+from sklearn.preprocessing import StandardScaler
 from imblearn.over_sampling import SMOTE
 from sklearn.svm import LinearSVC
 
 model = make_pipeline(
-    SMOTE(random_state=RANDOM_STATE), LinearSVC(random_state=RANDOM_STATE)
+    StandardScaler(),
+    SMOTE(random_state=RANDOM_STATE),
+    LinearSVC(max_iter=10_000, random_state=RANDOM_STATE),
 )
 
 # %% [markdown]
diff --git a/examples/model_selection/plot_validation_curve.py b/examples/model_selection/plot_validation_curve.py
@@ -118,5 +118,5 @@
 ax.set_xlim([1, 10])
 ax.set_ylim([0.4, 0.8])
 ax.legend(loc="lower right")
-
+plt.tight_layout()
 plt.show()
diff --git a/examples/under-sampling/plot_comparison_under_sampling.py b/examples/under-sampling/plot_comparison_under_sampling.py
@@ -266,10 +266,10 @@ def plot_decision_function(X, y, clf, ax, title=None):
 for ax, sampler in zip(axs, samplers):
     model = make_pipeline(sampler, clf).fit(X, y)
     plot_decision_function(
-        X, y, clf, ax[0], title=f"Decision function for {sampler.__class__.__name__}"
+        X, y, clf, ax[0], title=f"Decision function for \n{sampler.__class__.__name__}"
     )
     plot_resampling(
-        X, y, sampler, ax[1], title=f"Resampling using {sampler.__class__.__name__}"
+        X, y, sampler, ax[1], title=f"Resampling using \n{sampler.__class__.__name__}"
     )
 fig.tight_layout()
 
diff --git a/examples/under-sampling/plot_illustration_nearmiss.py b/examples/under-sampling/plot_illustration_nearmiss.py
@@ -101,6 +101,7 @@ def make_plot_despine(ax):
         )
 ax.set_title("NearMiss-1")
 make_plot_despine(ax)
+plt.tight_layout()
 
 # %% [mardown]
 # NearMiss-2
@@ -149,6 +150,7 @@ def make_plot_despine(ax):
         )
 ax.set_title("NearMiss-2")
 make_plot_despine(ax)
+plt.tight_layout()
 
 # %% [mardown]
 # NearMiss-3
@@ -208,6 +210,5 @@ def make_plot_despine(ax):
         )
 ax.set_title("NearMiss-3")
 make_plot_despine(ax)
-
-fig.tight_layout()
+plt.tight_layout()
 plt.show()

Original file line number	Diff line number	Diff line change
`@@ -49,7 +49,7 @@`
`49`	`49`	`print(f"Training class distributions summary: {Counter(y_train)}")`
`50`	`50`	`print(f"Test class distributions summary: {Counter(y_test)}")`
`51`	`51`
`52`		`-# % [markdown]`
	`52`	`+# %% [markdown]`
`53`	`53`	`# The usual scikit-learn pipeline`
`54`	`54`	`# -------------------------------`
`55`	`55`	`#`
Original file line number	Diff line number	Diff line change
`@@ -14,6 +14,7 @@`
`14`	`14`
`15`	`15`
`16`	`16`	`from sklearn import datasets`
	`17`	`+from sklearn.preprocessing import StandardScaler`
`17`	`18`	`from sklearn.svm import LinearSVC`
`18`	`19`	`from sklearn.model_selection import train_test_split`
`19`	`20`
`@@ -40,7 +41,9 @@`
`40`	`41`	`)`
`41`	`42`
`42`	`43`	`pipeline = pl.make_pipeline(`
`43`		`- os.SMOTE(random_state=RANDOM_STATE), LinearSVC(random_state=RANDOM_STATE)`
	`44`	`+ StandardScaler(),`
	`45`	`+ os.SMOTE(random_state=RANDOM_STATE),`
	`46`	`+ LinearSVC(max_iter=10_000, random_state=RANDOM_STATE),`
`44`	`47`	`)`
`45`	`48`
`46`	`49`	`# Split the data`
Original file line number	Diff line number	Diff line change
`@@ -266,10 +266,10 @@ def plot_decision_function(X, y, clf, ax, title=None):`
`266`	`266`	`for ax, sampler in zip(axs, samplers):`
`267`	`267`	`model = make_pipeline(sampler, clf).fit(X, y)`
`268`	`268`	`plot_decision_function(`
`269`		`- X, y, clf, ax[0], title=f"Decision function for {sampler.__class__.__name__}"`
	`269`	`+ X, y, clf, ax[0], title=f"Decision function for \n{sampler.__class__.__name__}"`
`270`	`270`	`)`
`271`	`271`	`plot_resampling(`
`272`		`- X, y, sampler, ax[1], title=f"Resampling using {sampler.__class__.__name__}"`
	`272`	`+ X, y, sampler, ax[1], title=f"Resampling using \n{sampler.__class__.__name__}"`
`273`	`273`	`)`
`274`	`274`	`fig.tight_layout()`
`275`	`275`