Skip to content

Commit 81b4360

Browse files
committed
Gini impurity: formula and docstring fixed.
1 parent f370866 commit 81b4360

File tree

3 files changed

+8
-7
lines changed

3 files changed

+8
-7
lines changed

Orange/preprocess/score.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -205,9 +205,9 @@ def _entropy(D):
205205

206206
def _gini(D):
207207
"""Gini index of class-distribution matrix"""
208-
P = D / np.sum(D, axis=0)
209-
return sum((np.ones(1 if len(D.shape) == 1 else D.shape[1]) - np.sum(np.square(P), axis=0))
210-
* 0.5 * np.sum(D, axis=0) / np.sum(D))
208+
P = np.asarray(D / np.sum(D, axis=0))
209+
return np.sum((1 - np.sum(P ** 2, axis=0)) *
210+
np.sum(D, axis=0) / np.sum(D))
211211

212212

213213
def _symmetrical_uncertainty(X, Y):
@@ -287,8 +287,9 @@ def from_contingency(self, cont, nan_adjustment):
287287

288288
class Gini(ClassificationScorer):
289289
"""
290-
Gini index is the probability that two randomly chosen instances will have different
291-
classes. See `Wikipedia entry on gini index <http://en.wikipedia.org/wiki/Gini_coefficient>`_.
290+
Gini impurity is the probability that two randomly chosen instances will have different
291+
classes. See `Wikipedia entry on Gini impurity
292+
<https://en.wikipedia.org/wiki/Decision_tree_learning#Gini_impurity>`_.
292293
"""
293294
def from_contingency(self, cont, nan_adjustment):
294295
return (_gini(np.sum(cont, axis=1)) - _gini(cont)) * nan_adjustment

Orange/tests/test_score_feature.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ def test_gain_ratio(self):
3434

3535
def test_gini(self):
3636
scorer = Gini()
37-
correct = [0.11893, 0.10427, 0.13117, 0.14650, 0.05973]
37+
correct = [0.23786, 0.20855, 0.26235, 0.29300, 0.11946]
3838
np.testing.assert_almost_equal([scorer(self.zoo, a) for a in range(5)],
3939
correct, decimal=5)
4040

Orange/widgets/data/owrank.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def table(shape, fill=None):
4141
SCORES = [
4242
score_meta("Information Gain", "Inf. gain", score.InfoGain),
4343
score_meta("Gain Ratio", "Gain Ratio", score.GainRatio),
44-
score_meta("Gini Gain", "Gini", score.Gini),
44+
score_meta("Gini Decrease", "Gini", score.Gini),
4545
score_meta("ANOVA", "ANOVA", score.ANOVA),
4646
score_meta("Chi2", "Chi2", score.Chi2),
4747
score_meta("Univariate Linear Regression", "Univar. Lin. Reg.", score.UnivariateLinearRegression),

0 commit comments

Comments
 (0)