Skip to content

Commit 2c7ed0f

Browse files
authored
MNT add sphinxcontrib-bibtex to manage citation (#638)
1 parent 0238083 commit 2c7ed0f

File tree

13 files changed

+260
-141
lines changed

13 files changed

+260
-141
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,4 +118,5 @@ cythonize.dat
118118
# build documentation
119119
doc/_build/
120120
doc/auto_examples/
121-
doc/generated/
121+
doc/generated/
122+
doc/bibtex/auto

build_tools/circle/build_doc.sh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,11 +100,12 @@ conda update --yes --quiet conda
100100
conda create -n $CONDA_ENV_NAME --yes --quiet python=3.7
101101
source activate $CONDA_ENV_NAME
102102

103-
conda install --yes pip numpy scipy joblib pillow matplotlib sphinx \
104-
memory_profiler sphinx_rtd_theme pandas keras tensorflow=1
103+
conda install --yes pip numpy scipy joblib pillow matplotlib memory_profiler \
104+
sphinx sphinx_rtd_theme \pandas keras tensorflow=1
105105
pip install --pre -f https://sklearn-nightly.scdn8.secure.raxcdn.com scikit-learn
106106
pip install -U git+https://github.com/sphinx-gallery/sphinx-gallery.git
107107
pip install -U git+https://github.com/numpy/numpydoc.git
108+
pip install -U git+https://github.com/mcmtroffaes/sphinxcontrib-bibtex.git
108109

109110
# Build and install imbalanced-learn in dev mode
110111
ls -l

doc/bibtex/refs.bib

Lines changed: 195 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,195 @@
1+
@article{batista2004study,
2+
title={A study of the behavior of several methods for balancing machine learning training data},
3+
author={Batista, Gustavo EAPA and Prati, Ronaldo C and Monard, Maria Carolina},
4+
journal={ACM SIGKDD explorations newsletter},
5+
volume={6},
6+
number={1},
7+
pages={20--29},
8+
year={2004},
9+
publisher={ACM}
10+
}
11+
12+
@inproceedings{batista2003balancing,
13+
title={Balancing Training Data for Automated Annotation of Keywords: a Case Study.},
14+
author={Batista, Gustavo EAPA and Bazzan, Ana LC and Monard, Maria Carolina},
15+
booktitle={WOB},
16+
pages={10--18},
17+
year={2003}
18+
}
19+
20+
@article{chen2004using,
21+
title={Using random forest to learn imbalanced data},
22+
author={Chen, Chao and Liaw, Andy and Breiman, Leo and others},
23+
journal={University of California, Berkeley},
24+
volume={110},
25+
number={1-12},
26+
pages={24},
27+
year={2004}
28+
}
29+
30+
@article{liu2008exploratory,
31+
title={Exploratory undersampling for class-imbalance learning},
32+
author={Liu, Xu-Ying and Wu, Jianxin and Zhou, Zhi-Hua},
33+
journal={IEEE Transactions on Systems, Man, and Cybernetics, Part B (Cybernetics)},
34+
volume={39},
35+
number={2},
36+
pages={539--550},
37+
year={2008},
38+
publisher={IEEE}
39+
}
40+
41+
@article{seiffert2009rusboost,
42+
title={RUSBoost: A hybrid approach to alleviating class imbalance},
43+
author={Seiffert, Chris and Khoshgoftaar, Taghi M and Van Hulse, Jason and Napolitano, Amri},
44+
journal={IEEE Transactions on Systems, Man, and Cybernetics-Part A: Systems and Humans},
45+
volume={40},
46+
number={1},
47+
pages={185--197},
48+
year={2009},
49+
publisher={IEEE}
50+
}
51+
52+
@inproceedings{kubat1997addressing,
53+
title={Addressing the curse of imbalanced training sets: one-sided selection},
54+
author={Kubat, Miroslav and Matwin, Stan and others},
55+
booktitle={Icml},
56+
volume={97},
57+
pages={179--186},
58+
year={1997},
59+
organization={Nashville, USA}
60+
}
61+
62+
@article{barandela2003strategies,
63+
title={Strategies for learning in class imbalance problems},
64+
author={Barandela, Ricardo and S{\'a}nchez, Jos{\'e} Salvador and Garca, V and Rangel, Edgar},
65+
journal={Pattern Recognition},
66+
volume={36},
67+
number={3},
68+
pages={849--851},
69+
year={2003},
70+
publisher={Elsevier Science Publishing Company, Inc.}
71+
}
72+
73+
@article{garcia2012effectiveness,
74+
title={On the effectiveness of preprocessing methods when dealing with different levels of class imbalance},
75+
author={Garc{\'\i}a, Vicente and S{\'a}nchez, Jos{\'e} Salvador and Mollineda, Ram{\'o}n Alberto},
76+
journal={Knowledge-Based Systems},
77+
volume={25},
78+
number={1},
79+
pages={13--21},
80+
year={2012},
81+
publisher={Elsevier}
82+
}
83+
84+
@inproceedings{he2008adasyn,
85+
title={ADASYN: Adaptive synthetic sampling approach for imbalanced learning},
86+
author={He, Haibo and Bai, Yang and Garcia, Edwardo A and Li, Shutao},
87+
booktitle={2008 IEEE International Joint Conference on Neural Networks (IEEE World Congress on Computational Intelligence)},
88+
pages={1322--1328},
89+
year={2008},
90+
organization={IEEE}
91+
}
92+
93+
@article{chawla2002smote,
94+
title={SMOTE: synthetic minority over-sampling technique},
95+
author={Chawla, Nitesh V and Bowyer, Kevin W and Hall, Lawrence O and Kegelmeyer, W Philip},
96+
journal={Journal of artificial intelligence research},
97+
volume={16},
98+
pages={321--357},
99+
year={2002}
100+
}
101+
102+
@inproceedings{han2005borderline,
103+
title={Borderline-SMOTE: a new over-sampling method in imbalanced data sets learning},
104+
author={Han, Hui and Wang, Wen-Yuan and Mao, Bing-Huan},
105+
booktitle={International conference on intelligent computing},
106+
pages={878--887},
107+
year={2005},
108+
organization={Springer}
109+
}
110+
111+
@inproceedings{nguyen2009borderline,
112+
title={Borderline over-sampling for imbalanced data classification},
113+
author={Nguyen, Hien M and Cooper, Eric W and Kamei, Katsuari},
114+
booktitle={Proceedings: Fifth International Workshop on Computational Intelligence \& Applications},
115+
volume={2009},
116+
number={1},
117+
pages={24--29},
118+
year={2009},
119+
organization={IEEE SMC Hiroshima Chapter}
120+
}
121+
122+
@article{last2017oversampling,
123+
title={Oversampling for Imbalanced Learning Based on K-Means and SMOTE},
124+
author={Last, Felix and Douzas, Georgios and Bacao, Fernando},
125+
journal={arXiv preprint arXiv:1711.00837},
126+
year={2017}
127+
}
128+
129+
@inproceedings{mani2003knn,
130+
title={kNN approach to unbalanced data distributions: a case study involving information extraction},
131+
author={Mani, Inderjeet and Zhang, I},
132+
booktitle={Proceedings of workshop on learning from imbalanced datasets},
133+
volume={126},
134+
year={2003}
135+
}
136+
137+
@article{tomek1976two,
138+
title={Two modifications of CNN},
139+
author={Tomek, Ivan},
140+
journal={IEEE Trans. Systems, Man and Cybernetics},
141+
volume={6},
142+
pages={769--772},
143+
year={1976}
144+
}
145+
146+
@article{wilson1972asymptotic,
147+
title={Asymptotic properties of nearest neighbor rules using edited data},
148+
author={Wilson, Dennis L},
149+
journal={IEEE Transactions on Systems, Man, and Cybernetics},
150+
number={3},
151+
pages={408--421},
152+
year={1972},
153+
publisher={IEEE}
154+
}
155+
156+
@article{tomek1976experiment,
157+
title={An experiment with the edited nearest-neighbor rule},
158+
author={Tomek, Ivan},
159+
journal={IEEE Transactions on systems, Man, and Cybernetics},
160+
volume={6},
161+
number={6},
162+
pages={448--452},
163+
year={1976}
164+
}
165+
166+
@article{hart1968condensed,
167+
title={The condensed nearest neighbor rule (Corresp.)},
168+
author={Hart, Peter},
169+
journal={IEEE transactions on information theory},
170+
volume={14},
171+
number={3},
172+
pages={515--516},
173+
year={1968},
174+
publisher={Citeseer}
175+
}
176+
177+
@inproceedings{laurikkala2001improving,
178+
title={Improving identification of difficult small classes by balancing class distribution},
179+
author={Laurikkala, Jorma},
180+
booktitle={Conference on Artificial Intelligence in Medicine in Europe},
181+
pages={63--66},
182+
year={2001},
183+
organization={Springer}
184+
}
185+
186+
@article{smith2014instance,
187+
title={An instance level analysis of data complexity},
188+
author={Smith, Michael R and Martinez, Tony and Giraud-Carrier, Christophe},
189+
journal={Machine learning},
190+
volume={95},
191+
number={2},
192+
pages={225--256},
193+
year={2014},
194+
publisher={Springer}
195+
}

doc/combine.rst

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@ In this regard, Tomek's link and edited nearest-neighbours are the two cleaning
1717
methods that have been added to the pipeline after applying SMOTE over-sampling
1818
to obtain a cleaner space. The two ready-to use classes imbalanced-learn
1919
implements for combining over- and undersampling methods are: (i)
20-
:class:`SMOTETomek` [BPM2004]_ and (ii) :class:`SMOTEENN` [BBM2003]_.
20+
:class:`SMOTETomek` :cite:`batista2004study` and (ii) :class:`SMOTEENN`
21+
:cite:`batista2003balancing`.
2122

2223
Those two classes can be used like any other sampler with parameters identical
2324
to their former samplers::
@@ -53,13 +54,3 @@ noisy samples than :class:`SMOTETomek`.
5354
.. topic:: Examples
5455

5556
* :ref:`sphx_glr_auto_examples_combine_plot_comparison_combine.py`
56-
57-
.. topic:: References
58-
59-
.. [BPM2004] G. Batista, R. C. Prati, M. C. Monard. "A study of the behavior
60-
of several methods for balancing machine learning training
61-
data," ACM Sigkdd Explorations Newsletter 6 (1), 20-29, 2004.
62-
63-
.. [BBM2003] G. Batista, B. Bazzan, M. Monard, "Balancing Training Data for
64-
Automated Annotation of Keywords: a Case Study," In WOB, 10-18,
65-
2003.

doc/conf.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
'sphinx.ext.doctest',
3838
'sphinx.ext.intersphinx',
3939
'sphinx.ext.linkcode',
40+
'sphinxcontrib.bibtex',
4041
'numpydoc',
4142
'sphinx_issues',
4243
'sphinx_gallery.gen_gallery',

doc/ensemble.rst

Lines changed: 5 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ Forest of randomized trees
6868

6969
:class:`BalancedRandomForestClassifier` is another ensemble method in which
7070
each tree of the forest will be provided a balanced bootstrap sample
71-
[1CLB2004]_. This class provides all functionality of the
71+
:cite:`chen2004using`. This class provides all functionality of the
7272
:class:`sklearn.ensemble.RandomForestClassifier` and notably the
7373
`feature_importances_` attributes::
7474

@@ -88,7 +88,7 @@ Boosting
8888
Several methods taking advantage of boosting have been designed.
8989

9090
:class:`RUSBoostClassifier` randomly under-sample the dataset before to perform
91-
a boosting iteration [SKHN2010]_::
91+
a boosting iteration :cite:`seiffert2009rusboost`::
9292

9393
>>> from imblearn.ensemble import RUSBoostClassifier
9494
>>> rusboost = RUSBoostClassifier(n_estimators=200, algorithm='SAMME.R',
@@ -101,9 +101,9 @@ a boosting iteration [SKHN2010]_::
101101

102102
A specific method which uses ``AdaBoost`` as learners in the bagging classifier
103103
is called EasyEnsemble. The :class:`EasyEnsembleClassifier` allows to bag
104-
AdaBoost learners which are trained on balanced bootstrap samples [LWZ2009]_.
105-
Similarly to the :class:`BalancedBaggingClassifier` API, one can construct the
106-
ensemble as::
104+
AdaBoost learners which are trained on balanced bootstrap samples
105+
:cite:`liu2008exploratory`. Similarly to the :class:`BalancedBaggingClassifier`
106+
API, one can construct the ensemble as::
107107

108108
>>> from imblearn.ensemble import EasyEnsembleClassifier
109109
>>> eec = EasyEnsembleClassifier(random_state=0)
@@ -116,19 +116,3 @@ ensemble as::
116116
.. topic:: Examples
117117

118118
* :ref:`sphx_glr_auto_examples_ensemble_plot_comparison_ensemble_classifier.py`
119-
120-
.. topic:: References
121-
122-
.. [1CLB2004] Chen, Chao, Andy Liaw, and Leo Breiman. "Using random forest to
123-
learn imbalanced data." University of California, Berkeley 110
124-
(2004): 1-12.
125-
126-
.. [LWZ2009] X. Y. Liu, J. Wu and Z. H. Zhou, "Exploratory Undersampling for
127-
Class-Imbalance Learning," in IEEE Transactions on Systems, Man,
128-
and Cybernetics, Part B (Cybernetics), vol. 39, no. 2, pp.
129-
539-550, April 2009.
130-
131-
.. [SKHN2010] Seiffert, C., Khoshgoftaar, T. M., Van Hulse, J., &
132-
Napolitano, A. "RUSBoost: A hybrid approach to alleviating
133-
class imbalance." IEEE Transactions on Systems, Man, and
134-
Cybernetics-Part A: Systems and Humans 40.1 (2010): 185-197.

doc/metrics.rst

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,11 @@ use those metrics.
3636
Additional metrics specific to imbalanced datasets
3737
--------------------------------------------------
3838

39-
The :func:`geometric_mean_score` is the root of the product of class-wise
40-
sensitivity. This measure tries to maximize the accuracy on each of the classes
41-
while keeping these accuracies balanced.
42-
43-
The :func:`make_index_balanced_accuracy` can wrap any metric and give more
44-
importance to a specific class using the parameter ``alpha``.
39+
The :func:`geometric_mean_score`
40+
:cite:`barandela2003strategies,kubat1997addressing` is the root of the product
41+
of class-wise sensitivity. This measure tries to maximize the accuracy on each
42+
of the classes while keeping these accuracies balanced.
43+
44+
The :func:`make_index_balanced_accuracy` :cite:`garcia2012effectiveness` can
45+
wrap any metric and give more importance to a specific class using the
46+
parameter ``alpha``.

0 commit comments

Comments
 (0)