GeneratingMethodNames/src/metrics.py at master · ObjectProfile/GeneratingMethodNames · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
"""
Custom metrics.
"""

from collections import OrderedDict

import numpy as np
import pandas as pd

from sklearn.metrics import confusion_matrix
from sklearn.utils.multiclass import unique_labels


def confusion_dataframe(y_true, y_pred,
                        columns=['P', 'N', 'PP', 'NP', 'TP', 'TN', 'FP', 'FN'],
                        orderby='PP'):
    """Builds a confusion dataframe.

    Each row corresponds to a unique method name X. Values of the row
    contain elements of the confusion matrix of binary classification
    with labels ["X", "not X"]. The condition for this classification
    is "name == X".

    P - condition positive
    N - condition negative
    PP - Predicted condition positive
    PN - Predicted condition negative
    TP - True positives
    TN - True negatives
    FP - False positives (type I error)
    FN - False negatives (type II error)

    More information here: https://en.wikipedia.org/wiki/Confusion_matrix

    Parameters
    ----------

    y_true : array-like
        True labels

    y_pred : array-like
        Predicted labels

    columns : array-like
        List of columns to be included in a dataframe in a specified order

    orderby : str or array-like
        Column name or list of names to specify the column(s) by which
        the dataframe should be ordered.

    Returns
    -------

    confusion_df : pd.DataFrame
        Confusion dataframe

    Examples
    --------

    >>> from metrics import confusion_dataframe
    >>> y_true = ['cat', 'dog', 'mouse', 'cat']
    >>> y_pred = ['cat', 'mouse', 'dog', 'dog']
    >>> confusion_dataframe(y_true)
           P  N  PP  NP  TP  TN  FP  FN
    dog    1  3   2   2   0   1   2   1
    cat    2  2   1   3   1   2   0   1
    mouse  1  3   1   3   0   2   1   1

    Order by true positives and then predicted positives and show only
    false negatives and false positives in the specified order.

    >>> confusion_dataframe(y_true, y_pred, columns=['FN', 'FP'], orderby=['TP', 'PP'])
           FN  FP
    cat     1   0
    dog     1   2
    mouse   1   1
    """
    # Converting all labels to str
    y_true = np.array(y_true, dtype=str)
    y_pred = np.array(y_pred, dtype=str)

    confusion = confusion_matrix(y_true, y_pred)
    labels = unique_labels(y_true, y_pred)

    P = confusion.sum(axis=1)
    PP = confusion.sum(axis=0)
    TP = confusion.diagonal()
    N = len(y_true) - P
    NP = len(y_pred) - PP
    FP = PP - TP
    TN = N - FP
    FN = NP - TN

    confusion_df = pd.DataFrame(OrderedDict([
        ('name', labels),
        ('P', P),
        ('N', N),
        ('PP', PP),
        ('NP', NP),
        ('TP', TP),
        ('TN', TN),
        ('FP', FP),
        ('FN', FN)
    ]))

    confusion_df = confusion_df.set_index('name')
    del confusion_df.index.name

    return confusion_df.sort_values(orderby, ascending=False)[columns]


def bleu(reference, candidate):
    """Calculates the BLEU score of candidate sentence.

    Candidate sentence is compared to reference sentence using a modified
    form of precision:

    BLEU = m / w

    Where m is the number of words in the candidate that were found in
    reference and w is the total number of words in the candidate.

    More information here: https://en.wikipedia.org/wiki/BLEU

    Parameters
    ----------

    reference : array-like
        A list of words of a reference sentence. The true sentence that
        is considred the ground truth.

    candidate : array-like
        A list of words of a candidate sentence. A sentence generated by
        the algorithm that needs to be evaluated.

    Returns
    -------

    bleu : float
        BLEU score

    Examples
    --------

    >>> from metrics import bleu
    >>> reference = ['test', 'basic']
    >>> candidate = ['test', 'add']
    >>> bleu(reference, candidate)
    0.5
    """
    m = len([word for word in candidate if word in set(reference)])
    w = len(candidate)

    if w == 0:
        w = 0.000000001

    return m / w


def rouge(reference, candidate):
    m = len([word for word in candidate if word in set(reference)])
    w = len(reference)

    if w == 0:
        w = 0.000000001

    return m / w


def f1_score(reference, candidate):
    precision = bleu(reference, candidate)
    recall = rouge(reference, candidate)

    m = 2 * (precision * recall)
    w = precision + recall

    if w == 0:
        w = 0.000000001

    return m / w