Skip to content

Commit 797dda3

Browse files
authored
Merge pull request #57 from PenGuln/1.3.0
Fix import error when importing metrics
2 parents b382fd4 + b6466e5 commit 797dda3

File tree

3 files changed

+107
-88
lines changed

3 files changed

+107
-88
lines changed

libauc/metrics/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
1+
from .metrics_k import *
12
from .metrics import *

libauc/metrics/metrics.py

Lines changed: 0 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -81,94 +81,6 @@ def pauc_roc_score(y_true, y_pred, max_fpr=1.0, min_tpr=0.0, reduction='mean', *
8181
selected_pred = np.concatenate((y_pred[pos_idx][selected_pos], y_pred[neg_idx][selected_neg]))
8282
return roc_auc_score(selected_target, selected_pred, **kwargs)
8383

84-
85-
# Reference: https://www.kaggle.com/code/nandeshwar/mean-average-precision-map-k-metric-explained-code
86-
def precision_and_recall_at_k(y_true, y_pred, k, pos_label=1, **kwargs):
87-
# referece: https://github.com/NicolasHug/Surprise/blob/master/examples/precision_recall_at_k.py
88-
def calc_metrics(y_true, y_pred):
89-
y_true = y_true == pos_label
90-
desc_sort_order = np.argsort(y_pred)[::-1]
91-
y_true_sorted = y_true[desc_sort_order]
92-
true_positives = y_true_sorted[:k].sum()
93-
total_positives = sum(y_true)
94-
95-
precision_k = true_positives / min(k, total_positives)
96-
recall_k = true_positives / total_positives
97-
return precision_k, recall_k
98-
99-
y_true = check_array_shape(y_true, (-1, 1))
100-
y_pred = check_array_shape(y_pred, (-1, 1))
101-
102-
if y_true.shape[-1] != 1 and len(y_true.shape) > 1:
103-
metrics_list = [calc_metrics(y_true[:, i], y_pred[:, i]) for i in range(y_true.shape[-1])]
104-
precision_k_list, recall_k_list = zip(*metrics_list)
105-
return precision_k_list, recall_k_list
106-
else:
107-
y_true = y_true.flatten()
108-
y_pred = y_pred.flatten()
109-
precision_k, recall_k = calc_metrics(y_true, y_pred)
110-
return precision_k, recall_k
111-
112-
def precision_at_k(y_true, y_pred, k, pos_label=1, **kwargs):
113-
r"""Evaluation function of Precision@K"""
114-
precision_k, _ = precision_and_recall_at_k(y_true, y_pred, k, pos_label=1, **kwargs)
115-
return precision_k
116-
117-
def recall_at_k(y_true, y_pred, k, pos_label=1, **kwargs):
118-
r"""Evaluation function of Recall@K"""
119-
_, recall_k = precision_and_recall_at_k(y_true, y_pred, k, pos_label=1, **kwargs)
120-
return recall_k
121-
122-
def ap_at_k(y_true, y_pred, k=10):
123-
r"""Evaluation function of AveragePrecision@K"""
124-
# adapted from https://github.com/benhamner/Metrics/blob/master/Python/ml_metrics/average_precision.py
125-
y_true = check_array_shape(y_true, (-1,))
126-
y_pred = check_array_shape(y_pred, (-1,))
127-
if len(y_pred)>k:
128-
y_pred = y_pred[:k]
129-
score = 0.0
130-
num_hits = 0.0
131-
for i,p in enumerate(y_pred):
132-
if p in y_true and p not in y_pred[:i]:
133-
num_hits += 1.0
134-
score += num_hits / (i+1.0)
135-
return score / min(len(y_true), k)
136-
137-
def map_at_k(y_true, y_pred, k=10):
138-
r"""Evaluation function of meanAveragePrecision@K"""
139-
# adapted from https://github.com/benhamner/Metrics/blob/master/Python/ml_metrics/average_precision.py
140-
assert len(y_true.shape) == 2 and len(y_true.shape) == 2
141-
assert k > 0, 'Value of k is not valid!'
142-
if isinstance(y_true, np.ndarray):
143-
y_true = y_true.tolist()
144-
if isinstance(y_pred, np.ndarray):
145-
y_pred = y_pred.tolist()
146-
return np.mean([ap_at_k(a,p,k) for a,p in zip(y_true, y_pred)])
147-
148-
149-
def ndcg_at_k(y_true, y_pred, k=5):
150-
r"""
151-
Evaluation function of NDCG@K
152-
"""
153-
assert isinstance(y_pred, np.ndarray)
154-
assert isinstance(y_true, np.ndarray)
155-
assert len(y_pred.shape) == 2 and len(y_pred.shape) == 2
156-
157-
num_of_users, num_pos_items = y_true.shape
158-
sorted_ratings = -np.sort(-y_true) # descending order !!
159-
discounters = np.tile([np.log2(i+1) for i in range(1, 1+num_pos_items)], (num_of_users, 1))
160-
normalizer_mat = (np.exp2(sorted_ratings) - 1) / discounters
161-
162-
sort_idx = (-y_pred).argsort(axis=1) # index of sorted predictions (max->min)
163-
gt_rank = np.array([np.argwhere(sort_idx == i)[:, 1]+1 for i in range(num_pos_items)]).T # rank of the ground-truth (start from 1)
164-
hit = (gt_rank <= k)
165-
166-
# calculate the normalizer first
167-
normalizer = np.sum(normalizer_mat[:, :k], axis=1)
168-
# calculate DCG
169-
DCG = np.sum(((np.exp2(y_true) - 1) / np.log2(gt_rank+1)) * hit.astype(float), axis=1)
170-
return np.mean(DCG / normalizer)
171-
17284
# TODO: automatic detect classificaiton task or ranking task?
17385
def evaluator(y_true, y_pred, metrics=['auroc', 'auprc', 'pauroc'], return_str=False, format='%.4f(%s)', **kwargs):
17486
results = {}

libauc/metrics/metrics_k.py

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
import numpy as np
2+
3+
def check_array_type(array):
4+
# convert to array type
5+
if not isinstance(array, (np.ndarray, np.generic)):
6+
array = np.array(array)
7+
return array
8+
9+
def check_array_shape(array, shape):
10+
# check array shape
11+
array = check_array_type(array)
12+
if array.size == 0:
13+
raise ValueError("Array is empty.")
14+
if array.shape != shape and len(array.shape) != 1:
15+
try:
16+
array = array.reshape(shape)
17+
except ValueError as e:
18+
raise ValueError(f"Could not reshape array of shape {array.shape} to {shape}.") from e
19+
return array
20+
21+
# Reference: https://www.kaggle.com/code/nandeshwar/mean-average-precision-map-k-metric-explained-code
22+
def precision_and_recall_at_k(y_true, y_pred, k, pos_label=1, **kwargs):
23+
# referece: https://github.com/NicolasHug/Surprise/blob/master/examples/precision_recall_at_k.py
24+
def calc_metrics(y_true, y_pred):
25+
y_true = y_true == pos_label
26+
desc_sort_order = np.argsort(y_pred)[::-1]
27+
y_true_sorted = y_true[desc_sort_order]
28+
true_positives = y_true_sorted[:k].sum()
29+
total_positives = sum(y_true)
30+
31+
precision_k = true_positives / min(k, total_positives)
32+
recall_k = true_positives / total_positives
33+
return precision_k, recall_k
34+
35+
y_true = check_array_shape(y_true, (-1, 1))
36+
y_pred = check_array_shape(y_pred, (-1, 1))
37+
38+
if y_true.shape[-1] != 1 and len(y_true.shape) > 1:
39+
metrics_list = [calc_metrics(y_true[:, i], y_pred[:, i]) for i in range(y_true.shape[-1])]
40+
precision_k_list, recall_k_list = zip(*metrics_list)
41+
return precision_k_list, recall_k_list
42+
else:
43+
y_true = y_true.flatten()
44+
y_pred = y_pred.flatten()
45+
precision_k, recall_k = calc_metrics(y_true, y_pred)
46+
return precision_k, recall_k
47+
48+
def precision_at_k(y_true, y_pred, k, pos_label=1, **kwargs):
49+
r"""Evaluation function of Precision@K"""
50+
precision_k, _ = precision_and_recall_at_k(y_true, y_pred, k, pos_label=1, **kwargs)
51+
return precision_k
52+
53+
def recall_at_k(y_true, y_pred, k, pos_label=1, **kwargs):
54+
r"""Evaluation function of Recall@K"""
55+
_, recall_k = precision_and_recall_at_k(y_true, y_pred, k, pos_label=1, **kwargs)
56+
return recall_k
57+
58+
def ap_at_k(y_true, y_pred, k=10):
59+
r"""Evaluation function of AveragePrecision@K"""
60+
# adapted from https://github.com/benhamner/Metrics/blob/master/Python/ml_metrics/average_precision.py
61+
y_true = check_array_shape(y_true, (-1,))
62+
y_pred = check_array_shape(y_pred, (-1,))
63+
if len(y_pred)>k:
64+
y_pred = y_pred[:k]
65+
score = 0.0
66+
num_hits = 0.0
67+
for i,p in enumerate(y_pred):
68+
if p in y_true and p not in y_pred[:i]:
69+
num_hits += 1.0
70+
score += num_hits / (i+1.0)
71+
return score / min(len(y_true), k)
72+
73+
def map_at_k(y_true, y_pred, k=10):
74+
r"""Evaluation function of meanAveragePrecision@K"""
75+
# adapted from https://github.com/benhamner/Metrics/blob/master/Python/ml_metrics/average_precision.py
76+
assert len(y_true.shape) == 2 and len(y_true.shape) == 2
77+
assert k > 0, 'Value of k is not valid!'
78+
if isinstance(y_true, np.ndarray):
79+
y_true = y_true.tolist()
80+
if isinstance(y_pred, np.ndarray):
81+
y_pred = y_pred.tolist()
82+
return np.mean([ap_at_k(a,p,k) for a,p in zip(y_true, y_pred)])
83+
84+
85+
def ndcg_at_k(y_true, y_pred, k=5):
86+
r"""
87+
Evaluation function of NDCG@K
88+
"""
89+
assert isinstance(y_pred, np.ndarray)
90+
assert isinstance(y_true, np.ndarray)
91+
assert len(y_pred.shape) == 2 and len(y_pred.shape) == 2
92+
93+
num_of_users, num_pos_items = y_true.shape
94+
sorted_ratings = -np.sort(-y_true) # descending order !!
95+
discounters = np.tile([np.log2(i+1) for i in range(1, 1+num_pos_items)], (num_of_users, 1))
96+
normalizer_mat = (np.exp2(sorted_ratings) - 1) / discounters
97+
98+
sort_idx = (-y_pred).argsort(axis=1) # index of sorted predictions (max->min)
99+
gt_rank = np.array([np.argwhere(sort_idx == i)[:, 1]+1 for i in range(num_pos_items)]).T # rank of the ground-truth (start from 1)
100+
hit = (gt_rank <= k)
101+
102+
# calculate the normalizer first
103+
normalizer = np.sum(normalizer_mat[:, :k], axis=1)
104+
# calculate DCG
105+
DCG = np.sum(((np.exp2(y_true) - 1) / np.log2(gt_rank+1)) * hit.astype(float), axis=1)
106+
return np.mean(DCG / normalizer)

0 commit comments

Comments
 (0)